├── .gitignore ├── README.adoc ├── code ├── csharp │ └── Example.cs ├── go │ └── example.go ├── java │ └── Example.java ├── javascript │ └── example.js └── python │ └── example.py ├── data ├── network-management-35.dump ├── network-management-40.dump ├── network-management-43.dump └── network-management-50.dump ├── documentation ├── img │ ├── Network-Management-Model.svg │ ├── example.png │ ├── model.png │ ├── network-8000-machines.jpg │ ├── network-alternative-routes.jpg │ ├── network-hardware-arrows.jpg │ ├── network-mode-software.jpg │ ├── network-rack-machines-type.jpg │ ├── network-rack.jpg │ ├── network-schema-arrows.jpg │ ├── network-schema-hardware.jpg │ ├── network-software-arrows.jpg │ ├── network-software-dependency.jpg │ ├── network-software-machine.jpg │ ├── network-zones-2.jpg │ ├── network-zones.jpg │ └── network.svg ├── modelling-decisions.adoc ├── network-management.adoc └── network-management.neo4j-browser-guide ├── relate.project-install.json └── scripts └── network-management.cypher /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | *.class 4 | package-lock.json 5 | code/csharp/bin 6 | code/csharp/debug 7 | code/csharp/obj 8 | code/javascript/node_modules 9 | code/java/target 10 | pythonenv3.8 -------------------------------------------------------------------------------- /README.adoc: -------------------------------------------------------------------------------- 1 | :name: network-management 2 | :long-name: Network Management 3 | :description: Dependency and root cause analysis and more for network and IT management 4 | :icon: documentation/img/network.svg 5 | :tags: example-data,dataset,network-data,network-management,it-operations,datacenter 6 | :author: Michael Hunger 7 | :demodb: false 8 | :data: false 9 | :use-load-script: scripts/network-management.cypher 10 | :use-dump-file: data/network-management-40.dump 11 | :zip-file: false 12 | :use-plugin: false 13 | :target-db-version: 3.5,4.0 14 | :bloom-perspective: bloom/network-management.bloom-perspective 15 | :guide: documentation/network-management.neo4j-browser-guide 16 | :rendered-guide: https://guides.neo4j.com/sandbox/{name}/index.html 17 | :model: documentation/img/model.png 18 | :example: documentation/img/example.png 19 | :nodes: 83847 20 | :relationships: 181995 21 | 22 | :model-guide: documentation/modelling-decisions.adoc 23 | :todo: false 24 | image::{icon}[width=100] 25 | 26 | == {long-name} Graph Example 27 | 28 | Description: _{description}_ 29 | 30 | ifeval::[{todo} != false] 31 | To Do: {todo} 32 | endif::[] 33 | 34 | Nodes {nodes} Relationships {relationships} 35 | 36 | .Model 37 | image::{model}[] 38 | 39 | .Example 40 | image::{example}[width=600] 41 | 42 | :param-name: location 43 | :param-value: Iceland, Rekjavik 44 | 45 | .Example Query: 46 | [source,cypher,role=query-example-params,subs=+attributes] 47 | ---- 48 | :param {param-name} => "{param-value}" 49 | ---- 50 | 51 | [source,cypher,role=query-example,param-name={param-name},param-value={param-value},result-column=ip,expected-result=10.0.0.254] 52 | ---- 53 | MATCH (dc:DataCenter {location: $location})-[:CONTAINS]->(r:Router)-[:ROUTES]->(i:Interface) 54 | RETURN i.ip as ip 55 | ---- 56 | 57 | === Setup 58 | 59 | This is for Neo4j version: {target-db-version} 60 | 61 | ifeval::[{use-plugin} != false] 62 | Required plugins: {use-plugin} 63 | endif::[] 64 | 65 | ifeval::[{demodb} != false] 66 | The database is also available on https://demo.neo4jlabs.com:7473 67 | 68 | Username "{name}", password: "{name}", database: "{name}" 69 | endif::[] 70 | 71 | Rendered guide available via: `:play {rendered-guide}` 72 | 73 | Unrendered guide: link:{guide}[] 74 | 75 | Load graph data via the following: 76 | 77 | ifeval::[{data} != false] 78 | ==== Data files: `{data}` 79 | 80 | Import flat files (csv, json, etc) using Cypher's https://neo4j.com/docs/cypher-manual/current/clauses/load-csv/[`LOAD CSV`], https://neo4j.com/labs/apoc/[APOC library], or https://neo4j.com/developer/data-import/[other methods]. 81 | endif::[] 82 | 83 | ifeval::[{use-dump-file} != false] 84 | ==== Dump file: `{use-dump-file}` 85 | 86 | * Drop the file into the `Files` section of a project in Neo4j Desktop. Then choose the option to `Create new DBMS from dump` option from the file options. 87 | 88 | * Use the neo4j-admin tool to load data from the command line with the command below. 89 | 90 | [source,shell,subs=attributes] 91 | ---- 92 | bin/neo4j-admin load --from {use-dump-file} [--database "database"] 93 | ---- 94 | 95 | * Upload the dump file to Neo4j Aura via https://console.neo4j.io/#import-instructions 96 | endif::[] 97 | 98 | ifeval::[{use-load-script} != false] 99 | ==== Data load script: `{use-load-script}` 100 | 101 | [source,shell,subs=attributes] 102 | ---- 103 | bin/cypher-shell -u neo4j -p "password" -f {use-load-script} [-d "database"] 104 | ---- 105 | 106 | Or import in Neo4j Browser by dragging or pasting the content of {use-load-script}. 107 | endif::[] 108 | 109 | ifeval::[{zip-file} != false] 110 | ==== Zip file 111 | 112 | Download the zip file link:{repo}/raw/master/{name}.zip[{name}.zip] and add it as "project from file" to https://neo4j.com/developer/neo4j-desktop[Neo4j Desktop^]. 113 | endif::[] 114 | 115 | === Code Examples 116 | 117 | * link:code/javascript/example.js[JavaScript] 118 | * link:code/java/Example.java[Java] 119 | * link:code/csharp/Example.cs[C#] 120 | * link:code/python/example.py[Python] 121 | * link:code/go/example.go[Go] 122 | 123 | === Feedback 124 | 125 | Feel free to submit issues or pull requests for improvement on this repository. 126 | -------------------------------------------------------------------------------- /code/csharp/Example.cs: -------------------------------------------------------------------------------- 1 | // install dotnet core on your system 2 | // dotnet new console -o . 3 | // dotnet add package Neo4j.Driver 4 | // paste in this code into Program.cs 5 | // dotnet run 6 | 7 | using System; 8 | using System.Collections.Generic; 9 | using System.Text; 10 | using System.Threading.Tasks; 11 | using Neo4j.Driver; 12 | 13 | namespace dotnet { 14 | class Example { 15 | static async Task Main() { 16 | var driver = GraphDatabase.Driver("bolt://:", 17 | AuthTokens.Basic("", "")); 18 | 19 | var cypherQuery = 20 | @" 21 | MATCH (dc:DataCenter {location: $location})-[:CONTAINS]->(r:Router)-[:ROUTES]->(i:Interface) 22 | RETURN i.ip as ip 23 | "; 24 | 25 | var session = driver.AsyncSession(o => o.WithDatabase("neo4j")); 26 | var result = await session.ReadTransactionAsync(async tx => { 27 | var r = await tx.RunAsync(cypherQuery, 28 | new { location="Iceland"}); 29 | return await r.ToListAsync(); 30 | }); 31 | 32 | await session?.CloseAsync(); 33 | foreach (var row in result) 34 | Console.WriteLine(row["ip"].As()); 35 | 36 | } 37 | } 38 | } -------------------------------------------------------------------------------- /code/go/example.go: -------------------------------------------------------------------------------- 1 | // go mod init main 2 | // go run example.go 3 | package main 4 | 5 | import ( 6 | "fmt" 7 | "github.com/neo4j/neo4j-go-driver/v4/neo4j" 8 | "io" 9 | "reflect" 10 | ) 11 | 12 | func main() { 13 | results, err := runQuery("bolt://:", "neo4j", "", "") 14 | if err != nil { 15 | panic(err) 16 | } 17 | for _, result := range results { 18 | fmt.Println(result) 19 | } 20 | } 21 | 22 | func runQuery(uri, database, username, password string) (result []string, err error) { 23 | driver, err := neo4j.NewDriver(uri, neo4j.BasicAuth(username, password, "")) 24 | if err != nil { 25 | return nil, err 26 | } 27 | defer func() {err = handleClose(driver, err)}() 28 | session := driver.NewSession(neo4j.SessionConfig{AccessMode: neo4j.AccessModeRead, DatabaseName: database}) 29 | defer func() {err = handleClose(session, err)}() 30 | results, err := session.ReadTransaction(func(transaction neo4j.Transaction) (interface{}, error) { 31 | result, err := transaction.Run( 32 | ` 33 | MATCH (dc:DataCenter {location: $location})-[:CONTAINS]->(r:Router)-[:ROUTES]->(i:Interface) 34 | RETURN i.ip as ip 35 | `, map[string]interface{}{ 36 | "location": "Iceland", 37 | }) 38 | if err != nil { 39 | return nil, err 40 | } 41 | var arr []string 42 | for result.Next() { 43 | value, found := result.Record().Get("ip") 44 | if found { 45 | arr = append(arr, value.(string)) 46 | } 47 | } 48 | if err = result.Err(); err != nil { 49 | return nil, err 50 | } 51 | return arr, nil 52 | }) 53 | if err != nil { 54 | return nil, err 55 | } 56 | result = results.([]string) 57 | return result, err 58 | } 59 | 60 | func handleClose(closer io.Closer, previousError error) error { 61 | err := closer.Close() 62 | if err == nil { 63 | return previousError 64 | } 65 | if previousError == nil { 66 | return err 67 | } 68 | return fmt.Errorf("%v closure error occurred:\n%s\ninitial error was:\n%w", reflect.TypeOf(closer), err.Error(), previousError) 69 | } 70 | -------------------------------------------------------------------------------- /code/java/Example.java: -------------------------------------------------------------------------------- 1 | // Add your the driver dependency to your pom.xml build.gradle etc. 2 | // Java Driver Dependency: http://search.maven.org/#artifactdetails|org.neo4j.driver|neo4j-java-driver|4.0.1|jar 3 | // Reactive Streams http://search.maven.org/#artifactdetails|org.reactivestreams|reactive-streams|1.0.3|jar 4 | // download jars into current directory 5 | // java -cp "*" Example.java 6 | 7 | import org.neo4j.driver.*; 8 | import static org.neo4j.driver.Values.parameters; 9 | 10 | public class Example { 11 | 12 | public static void main(String...args) { 13 | 14 | Driver driver = GraphDatabase.driver("bolt://:", 15 | AuthTokens.basic("","")); 16 | 17 | try (Session session = driver.session(SessionConfig.forDatabase("neo4j"))) { 18 | 19 | String cypherQuery = 20 | "MATCH (dc:DataCenter {location: $location})-[:CONTAINS]->(r:Router)-[:ROUTES]->(i:Interface)\n" + 21 | "RETURN i.ip as ip"; 22 | 23 | var result = session.readTransaction( 24 | tx -> tx.run(cypherQuery, 25 | parameters("location","Iceland")) 26 | .list()); 27 | 28 | for (Record record : result) { 29 | System.out.println(record.get("ip").asString()); 30 | } 31 | } 32 | driver.close(); 33 | } 34 | } 35 | 36 | 37 | -------------------------------------------------------------------------------- /code/javascript/example.js: -------------------------------------------------------------------------------- 1 | // npm install --save neo4j-driver 2 | // node example.js 3 | const neo4j = require('neo4j-driver'); 4 | const driver = neo4j.driver('bolt://:', 5 | neo4j.auth.basic('', ''), 6 | {/* encrypted: 'ENCRYPTION_OFF' */}); 7 | 8 | const query = 9 | ` 10 | MATCH (dc:DataCenter {location: $location})-[:CONTAINS]->(r:Router)-[:ROUTES]->(i:Interface) 11 | RETURN i.ip as ip 12 | `; 13 | 14 | const params = {"location": "Iceland"}; 15 | 16 | const session = driver.session({database:"neo4j"}); 17 | 18 | session.run(query, params) 19 | .then((result) => { 20 | result.records.forEach((record) => { 21 | console.log(record.get('ip')); 22 | }); 23 | session.close(); 24 | driver.close(); 25 | }) 26 | .catch((error) => { 27 | console.error(error); 28 | }); 29 | -------------------------------------------------------------------------------- /code/python/example.py: -------------------------------------------------------------------------------- 1 | # pip3 install neo4j-driver 2 | # python3 example.py 3 | 4 | from neo4j import GraphDatabase, basic_auth 5 | 6 | driver = GraphDatabase.driver( 7 | "bolt://:", 8 | auth=basic_auth("", "")) 9 | 10 | cypher_query = ''' 11 | MATCH (dc:DataCenter {location: $location})-[:CONTAINS]->(r:Router)-[:ROUTES]->(i:Interface) 12 | RETURN i.ip as ip 13 | ''' 14 | 15 | with driver.session(database="neo4j") as session: 16 | results = session.read_transaction( 17 | lambda tx: tx.run(cypher_query, 18 | location="Iceland").data()) 19 | for record in results: 20 | print(record['ip']) 21 | 22 | driver.close() 23 | -------------------------------------------------------------------------------- /data/network-management-35.dump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/data/network-management-35.dump -------------------------------------------------------------------------------- /data/network-management-40.dump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/data/network-management-40.dump -------------------------------------------------------------------------------- /data/network-management-43.dump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/data/network-management-43.dump -------------------------------------------------------------------------------- /data/network-management-50.dump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/data/network-management-50.dump -------------------------------------------------------------------------------- /documentation/img/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/example.png -------------------------------------------------------------------------------- /documentation/img/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/model.png -------------------------------------------------------------------------------- /documentation/img/network-8000-machines.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-8000-machines.jpg -------------------------------------------------------------------------------- /documentation/img/network-alternative-routes.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-alternative-routes.jpg -------------------------------------------------------------------------------- /documentation/img/network-hardware-arrows.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-hardware-arrows.jpg -------------------------------------------------------------------------------- /documentation/img/network-mode-software.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-mode-software.jpg -------------------------------------------------------------------------------- /documentation/img/network-rack-machines-type.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-rack-machines-type.jpg -------------------------------------------------------------------------------- /documentation/img/network-rack.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-rack.jpg -------------------------------------------------------------------------------- /documentation/img/network-schema-arrows.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-schema-arrows.jpg -------------------------------------------------------------------------------- /documentation/img/network-schema-hardware.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-schema-hardware.jpg -------------------------------------------------------------------------------- /documentation/img/network-software-arrows.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-software-arrows.jpg -------------------------------------------------------------------------------- /documentation/img/network-software-dependency.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-software-dependency.jpg -------------------------------------------------------------------------------- /documentation/img/network-software-machine.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-software-machine.jpg -------------------------------------------------------------------------------- /documentation/img/network-zones-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-zones-2.jpg -------------------------------------------------------------------------------- /documentation/img/network-zones.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neo4j-graph-examples/network-management/15d4a47c0f579d8f2c73ecd76d4e3d9c74f7ecea/documentation/img/network-zones.jpg -------------------------------------------------------------------------------- /documentation/img/network.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Group 5 | Created with Sketch. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /documentation/modelling-decisions.adoc: -------------------------------------------------------------------------------- 1 | == Network Management - Introduction 2 | :neo4j-version: 3.0 3 | :author: Michael Hunger 4 | :tags: network-mgt,it-operations,datacenter 5 | :images: https://dl.dropboxusercontent.com/u/14493611 6 | :images: {img} 7 | :experimental: 8 | 9 | Computer networks span all levels of the stack from physical connections up to mobile and web-applications connecting networks of users. 10 | 11 | Graph Databases offer a natural way of modelling, storing and querying all these types of computer networks. 12 | 13 | A graph database like Neo4j can be utilized for: 14 | 15 | * Configuration Management 16 | * Impact Analysis 17 | * Planning 18 | * Security and Hardening of Networks 19 | * Intrusion Detection 20 | * Traffic Analytics 21 | * Analytics of user behavior 22 | 23 | In this example we want to look at Network Management and Impact Analysis from the level of routing (TCP/IP) upwards to managing applications and tracing their dependencies. 24 | 25 | In this guide we are going to look at: 26 | * setting the scene 27 | * outlining the questions we are looking to ask 28 | * describing the modelling decisions taken. 29 | 30 | == Setting the Scene 31 | 32 | Imagine we have a `DataCenter` connected to an Interconnect via an `Egress Router`. 33 | The datacenter uses a `10.x.x.x/8` IP address range. 34 | 35 | The DataCenter consists of several Zones which are connected to the main backbone each via a `Router` (10.zone.*/16). 36 | 37 | From there each zone is broken down into rows of `Racks`. 38 | 39 | Each `Rack` contains different types of `Servers` and has its own `Switch` to connect to the datacenter routers backplane. 40 | 41 | Each `Server` has external network `Interfaces` that connect to the rack switch, the local networks being `10.zone.rack.*/24`. 42 | 43 | Each machine either runs a real Operating System (`OS`) or a Virtualization Manager that runs a number of Virtual Machines. 44 | 45 | For operational simplicity we only run one `Application` per OS which uses a number of `Ports` on the external interface. 46 | 47 | The datacenter consists of 4 zones, each of which has its own separate `Network` `10.zone.*/16`, and it's own `Router`. 48 | 49 | Each zone contains 10 `Racks`, each of which has it's own `Switch` and subnet with an IP like this pattern `10.zone.rack.*/24`. 50 | 51 | Each Rack contains 200 machines of the types we just introduced, so that in total we get 8000 servers in our datacenter. 52 | 53 | Each machine runs software. For our software we differentiate between `Operating Systems, Services and Applications` (which could also be micro services). Each of them has a name, version(s) and dependencies. 54 | 55 | Each of our machines is set up to run an OS and a single application, each of which might require other dependencies that are also installed. 56 | 57 | == Questions to ask 58 | 59 | We previously set the scene with the types of devices we're looking to include in the scope of our domain. To be able to determine the best-suited data model for this domain, we also need to consider the types of questions we want to ask. In this Network Management example, we want to understand aspects around connectivity, impact analysis, configuration management and so forth. 60 | 61 | To enable this, the types of questions we look to answer in this example are: 62 | * Paths between specific data centers to interfaces, routers and network zones 63 | * Find the shortest paths between certain devices 64 | * Find the impact on network routing when a device goes down 65 | * Explore the contents of a Rack 66 | * Determine the distribution of machine types across a datacenter 67 | * Determine dependencies on software and specific software versions 68 | 69 | == Modeling 70 | 71 | Based on the scene setting of the Network Management domain, along with some sample questions, we can start to make some decisions about what the data model will look like. Each of the elements in our datacenter (and the datacenter itself) are being referenced to as potential anchor points. This would make it a sensible decision for each of these elements to be Nodes in their own right. As elements such as operating system and software are also being queried (and dependencies), these are also going to be Nodes rather than the properties on a Machine node. 72 | 73 | Properties used will either describe the names of datacenters of software, or have the IP address for a network device. 74 | 75 | We can model the network endpoints (boxes like servers, routers, firewalls, racks) of the data center as nodes and the "cables" between them as relationships. 76 | 77 | Another type of node represent networks and interfaces. 78 | 79 | On the application level we have the operating system, virtual machines, application and services that are modeled as entities. 80 | 81 | This is the full data model of your graph. 82 | 83 | image::{images}/network-schema-arrows.jpg[float=right] 84 | 85 | 86 | -------------------------------------------------------------------------------- /documentation/network-management.adoc: -------------------------------------------------------------------------------- 1 | // https://docs.google.com/a/neotechnology.com/presentation/d/1uajyGl64zdpHjD1d92hQVEJErdnnZ9bACJF7J-TnEWI/edit?usp=drive_web 2 | // https://www.vmware.com/pdf/vi_architecture_wp.pdf 3 | // ping Alan, share document 4 | ++++ 5 | 10 | ++++ 11 | 12 | == Network Management - Introduction 13 | :neo4j-version: 3.0 14 | :author: Michael Hunger 15 | :tags: network-mgt,it-operations,datacenter 16 | :images: https://dl.dropboxusercontent.com/u/14493611 17 | :images: {img} 18 | :experimental: 19 | 20 | Computer networks span all levels of the stack from physical connections up to mobile and web-applications connecting networks of users. 21 | 22 | Graph Databases offer a natural way of modelling, storing and querying all these types of computer networks. 23 | 24 | A graph database like Neo4j can be utilized for: 25 | 26 | * Configuration Management 27 | * Impact Analysis 28 | * Planning 29 | * Security and Hardening of Networks 30 | * Intrusion Detection 31 | * Traffic Analytics 32 | * Analytics of user behavior 33 | 34 | In this example we want to look at Network Management and Impact Analysis from the level of routing (TCP/IP) upwards to managing applications and tracing their dependencies. 35 | 36 | Throughout the guide you'll find Cypher statements that you can execute, by clicking on them and then executing them by hitting the run button. 37 | 38 | == Modeling 39 | 40 | image::{images}/Network-Management-Model.svg[width=700, float=right] 41 | 42 | We can model the network endpoints (boxes like servers, routers, firewalls, racks) of the data center as nodes and the "cables" between them as relationships. 43 | 44 | Another type of node represent networks and interfaces. 45 | 46 | On the application level we have the operating system, virtual machines, application and services that are modeled as entities. 47 | 48 | Our example data is already set-up, in the "resources" section at the end, you'll find some pointers there. 49 | 50 | == DataCenter 51 | 52 | This is the full data model of your graph. 53 | 54 | image::{images}/network-schema-arrows.jpg[float=right] 55 | 56 | If you want to see it yourself, run 57 | 58 | [source,cypher] 59 | ---- 60 | call db.schema.visualization() 61 | ---- 62 | 63 | Imagine we have a `DataCenter` connected to an Interconnect via an `Egress Router`. 64 | The datacenter uses a `10.x.x.x/8` IP address range. 65 | 66 | The DataCenter consists of several Zones which are connected to the main backbone each via a `Router` (10.zone.*/16). 67 | 68 | From there each zone is broken down into rows of `Racks`. 69 | 70 | Each `Rack` contains different types of `Servers` and has its own `Switch` to connect to the datacenter routers backplane. 71 | 72 | Each `Server` has external network `Interfaces` that connect to the rack switch, the local networks being `10.zone.rack.*/24`. 73 | 74 | Each machine either runs a real Operating System (`OS`) or a Virtualization Manager that runs a number of Virtual Machines. 75 | 76 | For operational simplicity we only run one `Application` per OS which uses a number of `Ports` on the external interface. 77 | 78 | //// 79 | // todo subset / asciidoctor diagram for networks 80 | // image::{images}/Network-Management-Model.svg[width=700] 81 | // image::{images}/network-schema-hardware.jpg[] 82 | image::{images}/network-software-arrows.jpg[float=right] 83 | image::{images}/network-hardware-arrows.jpg[] 84 | //// 85 | 86 | Usually we would get this kind of information from a configuration management database (CMDB), network management tools or agents installed on the machines. 87 | 88 | == Network Exploration: DataCenter and Zones 89 | 90 | Let's walk through the data, step by step. Let's start with the DataCenter. 91 | 92 | [source,cypher] 93 | ---- 94 | MATCH network = (dc:DataCenter {name:"DC1",location:"Iceland, Rekjavik"}) 95 | -[:CONTAINS]->(:Router) 96 | -[:ROUTES]->(:Interface) 97 | RETURN network; 98 | ---- 99 | 100 | image::{images}/network-zones.jpg[float=right] 101 | 102 | The datacenter consists of 4 zones, each of which has its own separate `Network` `10.zone.*/16`, and it's own `Router`. 103 | 104 | We can draw out that verbal description in a query with patterns matching the network parts. 105 | 106 | [source,cypher] 107 | ---- 108 | MATCH (dc:DataCenter {name:"DC1"})-[:CONTAINS]->(re:Router:Egress)-[:ROUTES]->(rei:Interface) 109 | 110 | MATCH (nr:Network:Zone)<-[:CONNECTS]-(rei) 111 | 112 | // router in DC, connect it via an interface to the zone network 113 | MATCH (dc)-[:CONTAINS]->(r:Router)-[:ROUTES]->(ri:Interface)-[:CONNECTS]->(nr) 114 | 115 | RETURN *; 116 | ---- 117 | 118 | To visualize the DataCenter and its components so far, we can also start at the center and then go 3 hops out. 119 | 120 | [source,cypher] 121 | ---- 122 | MATCH path = (dc:DataCenter)-[*3]-(:Network) 123 | RETURN path; 124 | ---- 125 | 126 | We could also get statistical information, like the addresses of routers and interfaces in each network. 127 | 128 | You can see very well how the graph representation in the match pattern resembles our domain model. 129 | 130 | [source,cypher] 131 | ---- 132 | MATCH (r:Router)-[:ROUTES]->(ri:Interface)-[:CONNECTS]->(nr:Network) 133 | WHERE r.zone IS NOT NULL 134 | RETURN nr.ip as network_ip, ri.ip as router_if_ip, r.name as router, r.zone as zone; 135 | ---- 136 | 137 | //ifndef::env-guide[] 138 | ---- 139 | ╒════════════╤══════════════╤═════════╤══════╕ 140 | │"network_ip"│"router_if_ip"│"router" │"zone"│ 141 | ╞════════════╪══════════════╪═════════╪══════╡ 142 | │"10.1" │"10.1.0.254" │"DC1-R-1"│1 │ 143 | ├────────────┼──────────────┼─────────┼──────┤ 144 | │"10.2" │"10.2.0.254" │"DC1-R-2"│2 │ 145 | ├────────────┼──────────────┼─────────┼──────┤ 146 | │"10.3" │"10.3.0.254" │"DC1-R-3"│3 │ 147 | ├────────────┼──────────────┼─────────┼──────┤ 148 | │"10.4" │"10.4.0.254" │"DC1-R-4"│4 │ 149 | └────────────┴──────────────┴─────────┴──────┘ 150 | ---- 151 | //endif::env-guide[] 152 | 153 | == Network Exploration: Racks 154 | 155 | image::{images}/network-rack.jpg[width=600,float=right] 156 | 157 | Each zone contains 10 `Racks`, each of which has it's own `Switch` and subnet with an IP like this pattern `10.zone.rack.*/24`. 158 | 159 | [source,cypher] 160 | ---- 161 | MATCH (dc:DataCenter {name:"DC1"})-[:CONTAINS]->(rack:Rack)-[:HOLDS]->(s:Switch)-[:ROUTES]->(si:Interface)<-[:ROUTES]-(nr:Network:Zone) 162 | 163 | RETURN *; 164 | ---- 165 | 166 | Now our network has grown quite a bit: 167 | 168 | [source,cypher] 169 | ---- 170 | MATCH network = (dc:DataCenter)-[*6]-(:Rack) 171 | RETURN network; 172 | ---- 173 | 174 | == Network Connectivity 175 | 176 | Now we could already have a look at the network connectivity in our datacenter. 177 | 178 | If we look now at the overall connections we need to use shortest-paths which represents the most efficient route. 179 | 180 | .Connectivity before: 40 routes a 5 hops 181 | [source,cypher] 182 | ---- 183 | MATCH path = allShortestPaths( (rack:Rack)-[:HOLDS|ROUTES|CONNECTS*]-(router:Router:Egress) ) 184 | RETURN length(path) as hops, count(*) as count; 185 | ---- 186 | 187 | What happens if one of our cables gets loose or cut, i.e. the `ROUTES` relationship between the switch's interface and the network is gone. 188 | 189 | Let's *cut the cable* of this first switch. 190 | 191 | [source,cypher] 192 | ---- 193 | MATCH (:Interface {ip:"10.1.1.254"})<-[rel:ROUTES]-(:Network) 194 | DELETE rel 195 | ---- 196 | 197 | Connectivity after: 39 routes a 5 hops 198 | 199 | [source,cypher] 200 | ---- 201 | MATCH path = allShortestPaths( (rack:Rack)-[:HOLDS|ROUTES|CONNECTS*]-(router:Router:Egress) ) 202 | RETURN length(path) as hops, count(*) as count; 203 | ---- 204 | 205 | Now all the machines in that Rack are cut off, no connection anymore, which we can demonstrate by trying to find the shortest path. 206 | 207 | [source,cypher] 208 | ---- 209 | MATCH connection = allShortestPaths( (rack:Rack {name:"DC1-RCK-1-1"})-[:HOLDS|ROUTES|CONNECTS*]-(router:Router:Egress) ) 210 | RETURN connection; 211 | ---- 212 | 213 | How can we fix that? 214 | We could connect each switch to all the other three networks too, so we would survive the loss of 3 of those 4 connections. 215 | 216 | .Createing new, redundant network connections 217 | [source,cypher] 218 | ---- 219 | // for all zones 220 | MATCH (nr:Network:Zone) 221 | // find *all* switches and their interface 222 | MATCH (s:Switch)-[:ROUTES]->(si:Interface) 223 | // connect them to all the zones, if not yet connected 224 | MERGE (si)<-[:ROUTES]-(nr); 225 | ---- 226 | 227 | [source,cypher] 228 | ---- 229 | MATCH path = allShortestPaths((rack:Rack)-[:HOLDS|ROUTES|CONNECTS*]-(router:Router:Egress)) 230 | RETURN length(path) as hops, count(*) as count; 231 | ---- 232 | 233 | ---- 234 | ╒══════╤═══════╕ 235 | │"hops"│"count"│ 236 | ╞══════╪═══════╡ 237 | │5 │160 │ 238 | └──────┴───────┘ 239 | ---- 240 | 241 | image::{images}/network-alternative-routes.jpg[float=right] 242 | 243 | Cut the first cable of this first switch again. 244 | 245 | [source,cypher] 246 | ---- 247 | MATCH (:Interface {ip:"10.1.1.254"})<-[rel:ROUTES]-(:Network) 248 | WITH rel LIMIT 1 249 | DELETE rel 250 | ---- 251 | 252 | But that Rack is now still connected with 3 alternative routes. 253 | 254 | [source,cypher] 255 | ---- 256 | MATCH path = allShortestPaths((rack:Rack {zone:1,rack:1})-[:HOLDS|ROUTES|CONNECTS*]-(router:Router:Egress)) 257 | RETURN path; 258 | ---- 259 | 260 | 261 | ifndef::env-guide[] 262 | ---- 263 | ╒══════╤═══════╕ 264 | │"hops"│"count"│ 265 | ╞══════╪═══════╡ 266 | │5 │3 │ 267 | └──────┴───────┘ 268 | ---- 269 | endif::env-guide[] 270 | 271 | // what happens if the whole network is cut off 272 | 273 | Now let's look at the servers in those racks. 274 | 275 | == Machine types 276 | 277 | Similar to the machines you can rent on AWS we use machine types, for which we auto-created some reasonable capacities for CPU, RAM and DISK. 278 | 279 | [source,cypher] 280 | ---- 281 | MATCH (t:Type) 282 | RETURN t.name, t.id, t.cpu, t.ram, t.disk; 283 | ---- 284 | 285 | ---- 286 | ╒══════════════════╤══════╤═══════╤═══════╤════════╕ 287 | │"t.name" │"t.id"│"t.cpu"│"t.ram"│"t.disk"│ 288 | ╞══════════════════╪══════╪═══════╪═══════╪════════╡ 289 | │"xs-1/1/1" │0 │1 │1 │1 │ 290 | ├──────────────────┼──────┼───────┼───────┼────────┤ 291 | │"s-2/4/5" │1 │2 │4 │5 │ 292 | ├──────────────────┼──────┼───────┼───────┼────────┤ 293 | │"m-4/16/25" │2 │4 │16 │25 │ 294 | ├──────────────────┼──────┼───────┼───────┼────────┤ 295 | │"l-8/64/125" │3 │8 │64 │125 │ 296 | ├──────────────────┼──────┼───────┼───────┼────────┤ 297 | │"xl-16/256/625" │4 │16 │256 │625 │ 298 | ├──────────────────┼──────┼───────┼───────┼────────┤ 299 | │"xxl-32/1024/3125"│5 │32 │1024 │3125 │ 300 | └──────────────────┴──────┴───────┴───────┴────────┘ 301 | ---- 302 | 303 | == Machines 304 | 305 | Each Rack contains 200 machines of the types we just introduced, so that in total we get 8000 servers in our datacenter. 306 | 307 | As expected, the distribution of the types is inverse to their capabilities. 308 | 309 | As the graph visualization of our full datacenter would be pretty but otherwise useless ... 310 | 311 | image::{images}/network-8000-machines.jpg[] 312 | 313 | image::{images}/network-rack-machines-type.jpg[width=400,float=right] 314 | 315 | We'd rather look at the contents of a single rack 316 | 317 | .Visualization of Rack `DC1-RCK-2-1` 318 | [source,cypher] 319 | ---- 320 | MATCH (r:Rack {name:"DC1-RCK-2-1"})-[:HOLDS]->(m:Machine), 321 | (m)-[:ROUTES]->(i:Interface)-[:CONNECTS]->(si)<-[:ROUTES]-(s:Switch), 322 | (m)-[:TYPE]->(type:Type) 323 | RETURN * 324 | ---- 325 | 326 | or it's stats. 327 | 328 | .Contents of Rack `DC1-RCK-2-1` 329 | [source,cypher] 330 | ---- 331 | MATCH (r:Rack {name:"DC1-RCK-2-1"})-[:HOLDS]->(m:Machine), 332 | (m)-[:ROUTES]->(i:Interface)-[:CONNECTS]->(si)<-[:ROUTES]-(s:Switch), 333 | (m)-[:TYPE]->(type:Type) 334 | RETURN r.name as rack, si.ip as switchIp, properties(type) as type, count(m) as machines, min(i.ip) as minIp, max(i.ip) as maxIp 335 | ORDER BY machines DESC; 336 | ---- 337 | 338 | ifndef::env-guide[] 339 | [.smallest] 340 | ---- 341 | ╒═════════════╤════════════╤═══════════════════════════════════════════════════════════════════════════════════════╤══════════╤════════════╤════════════╕ 342 | │"rack" │"switchIp" │"type" │"machines"│"minIp" │"maxIp" │ 343 | ╞═════════════╪════════════╪═══════════════════════════════════════════════════════════════════════════════════════╪══════════╪════════════╪════════════╡ 344 | │"DC1-RCK-2-1"│"10.2.1.254"│{"disk":"5","name":"s-2/4/5","cpu":"2","id":"1","type":"s","ram":"4"} │"94" │"10.2.1.100"│"10.2.1.99" │ 345 | ├─────────────┼────────────┼───────────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┤ 346 | │"DC1-RCK-2-1"│"10.2.1.254"│{"disk":"1","name":"xs-1/1/1","cpu":"1","id":"0","type":"xs","ram":"1"} │"52" │"10.2.1.1" │"10.2.1.9" │ 347 | ├─────────────┼────────────┼───────────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┤ 348 | │"DC1-RCK-2-1"│"10.2.1.254"│{"disk":"25","name":"m-4/16/25","cpu":"4","id":"2","type":"m","ram":"16"} │"34" │"10.2.1.147"│"10.2.1.180"│ 349 | ├─────────────┼────────────┼───────────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┤ 350 | │"DC1-RCK-2-1"│"10.2.1.254"│{"disk":"125","name":"l-8/64/125","cpu":"8","id":"3","type":"l","ram":"64"} │"13" │"10.2.1.181"│"10.2.1.193"│ 351 | ├─────────────┼────────────┼───────────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┤ 352 | │"DC1-RCK-2-1"│"10.2.1.254"│{"disk":"625","name":"xl-16/256/625","cpu":"16","id":"4","type":"xl","ram":"256"} │"5" │"10.2.1.194"│"10.2.1.198"│ 353 | ├─────────────┼────────────┼───────────────────────────────────────────────────────────────────────────────────────┼──────────┼────────────┼────────────┤ 354 | │"DC1-RCK-2-1"│"10.2.1.254"│{"disk":"3125","name":"xxl-32/1024/3125","cpu":"32","id":"5","type":"xxl","ram":"1024"}│"2" │"10.2.1.199"│"10.2.1.200"│ 355 | └─────────────┴────────────┴───────────────────────────────────────────────────────────────────────────────────────┴──────────┴────────────┴────────────┘ 356 | ---- 357 | endif::env-guide[] 358 | 359 | We can also query for a distribution of machine types across the datacenter. 360 | 361 | [source,cypher] 362 | ---- 363 | MATCH (r:Rack)-[:HOLDS]->(m:Machine)-[:TYPE]->(type:Type) 364 | RETURN properties(type) as type, count(*) as c 365 | ORDER BY c DESC; 366 | ---- 367 | 368 | ---- 369 | ╒══════════════════════════════════════════════════════════════════╤════╕ 370 | │"t" │"c" │ 371 | ╞══════════════════════════════════════════════════════════════════╪════╡ 372 | │{"disk":5,"name":"s-2/4/5","cpu":2,"id":1,"ram":4} │3760│ 373 | ├──────────────────────────────────────────────────────────────────┼────┤ 374 | │{"disk":1,"name":"xs-1/1/1","cpu":1,"id":0,"ram":1} │2080│ 375 | ├──────────────────────────────────────────────────────────────────┼────┤ 376 | │{"disk":25,"name":"m-4/16/25","cpu":4,"id":2,"ram":16} │1360│ 377 | ├──────────────────────────────────────────────────────────────────┼────┤ 378 | │{"disk":125,"name":"l-8/64/125","cpu":8,"id":3,"ram":64} │520 │ 379 | ├──────────────────────────────────────────────────────────────────┼────┤ 380 | │{"disk":625,"name":"xl-16/256/625","cpu":16,"id":4,"ram":256} │200 │ 381 | ├──────────────────────────────────────────────────────────────────┼────┤ 382 | │{"disk":3125,"name":"xxl-32/1024/3125","cpu":32,"id":5,"ram":1024}│80 │ 383 | └──────────────────────────────────────────────────────────────────┴────┘ 384 | ---- 385 | 386 | Or if we treat our datacenter as a supercomputer, what's the total amount of CPUs, RAM and disk available: 387 | 388 | [source,cypher] 389 | ---- 390 | MATCH (m:Machine)-[:TYPE]->(type:Type) 391 | RETURN count(*) as count, sum(type.cpu) as cpus, sum(type.ram) as ram, sum(type.disk) as disk; 392 | ---- 393 | 394 | .Not bad, that's quite some compute power. 395 | ---- 396 | ╒═══════╤══════╤══════╤══════╕ 397 | │"count"│"cpus"│"ram" │"disk"│ 398 | ╞═══════╪══════╪══════╪══════╡ 399 | │8000 │24960 │205280│494880│ 400 | └───────┴──────┴──────┴──────┘ 401 | ---- 402 | 403 | == Software: Operating Systems and Applications 404 | 405 | Bare-metal hardware is cool, but something has to run on it to make it useable. 406 | 407 | Most likely it will be some kind of virtualization infrastructure that allows dynamic reallocation of the compute, memory and disk resources. 408 | 409 | Because of the added complexity, we skip this for now. 410 | 411 | For our software we differentiate between `Operating Systems, Services and Applications` (which could also be micro services). 412 | 413 | Each of them has a name, version(s) and dependencies. 414 | 415 | In a more elaborate model we could also handle other resource requirements like RAM / CPU / DISK per running software instance. 416 | 417 | Let's look at our available operating systems. 418 | 419 | [source,cypher] 420 | ---- 421 | MATCH (o:OS:Software)-[:VERSION]->(v) 422 | OPTIONAL MATCH (v)<-[:PREVIOUS]-(vnext) 423 | RETURN o.name as os, v.name as version, vnext.name as next_version 424 | ORDER BY os, version; 425 | ---- 426 | 427 | Similar for our other software 428 | 429 | [source,cypher] 430 | ---- 431 | MATCH (s:Software) WHERE not s:OS 432 | OPTIONAL MATCH (s)-[:VERSION]->(v) 433 | OPTIONAL MATCH (s)-[:DEPENDS_ON]->(dv)<-[:VERSION]-(d) 434 | RETURN s.name, collect(v.name) as versions, [x IN collect([d.name,dv.name]) WHERE x[0] IS NOT NULL] as dependencies, s.ports; 435 | ---- 436 | 437 | == Software: Running on Machines 438 | 439 | Each of our machines is set up to run an OS and a single application, each of which might require other dependencies that are also installed. 440 | 441 | image::{images}/network-software-arrows.jpg[] 442 | 443 | [source,cypher] 444 | ---- 445 | MATCH (m:Machine) WHERE (m)-[:RUNS]->() AND rand() < 0.05 WITH m LIMIT 1 446 | MATCH (m)-[r:RUNS]->(p:Process)-[i:INSTANCE]->(sv) 447 | OPTIONAL MATCH (sv)<-[v:VERSION]-(sw) 448 | RETURN * 449 | ---- 450 | 451 | image::{images}/network-software-machine.jpg[] 452 | 453 | == Dependency Analysis 454 | 455 | We could look at dependencies between data center elements on the physical level, like routers, switches and interfaces. 456 | 457 | Another way to look at it is to determine dependencies between machines based on their internal and external connections. 458 | 459 | But we can also use the software and its dependencies to determine bottlenecks and frequently dependent upon components. 460 | 461 | Let's look at all the software that uses Neo4j and the running Neo4j instances. 462 | 463 | //// 464 | * TODO distribute services across machines 465 | * TODO lookup services for connecting 466 | * TODO have a depends_on between instances 467 | * have multiple consumers depend on a service / machine 468 | * TODO create pid, start-time for process 469 | //// 470 | 471 | [source,cypher] 472 | ---- 473 | MATCH (s)-[:DEPENDS_ON]->(nv:Version)<-[:VERSION]-(n:Software:Service {name:"neo4j"}) 474 | MATCH (s)<-[:INSTANCE]-(sp)<-[:RUNS]-(sm:Machine) 475 | MATCH (sp)-[DEPENDS_ON]->(np)-[:INSTANCE]->(nv) 476 | MATCH (np)<-[:RUNS]-(nm:Machine) 477 | RETURN sm as software_machine, sp as software_process, s as software, nv as neo_version,np as neo4j_process, np as neo_machine 478 | LIMIT 10 479 | ---- 480 | 481 | //// 482 | TODO 483 | As multiple consumers rely on our services, we can determine which are the most dependent upon components in our system. 484 | 485 | TODO 486 | We could then 487 | //// 488 | 489 | == Configuration Management 490 | 491 | Proper IT infrastructures use a large number of configuration parameters to customize commodity hardware and software. To manage all of the variables, Configuration Management Databases (CMDBs) are used. Systems require certain variables, and can report what is currently configured so that the CMDB can detect issues, and send necessary updates. 492 | 493 | In the past, CMDBs were mostly used for network, hardware and OS level configuration. Today, their use has expanded into services to support modern architectures. A number of related systems have popped up, such as ZooKeeper, Konsul, Eureka, and others. 494 | 495 | Due to the variety of systems used for providing configuration to the infrastructure, it is very useful to create a unified, up to date view of the situation in your systems graph. 496 | 497 | === Upgrade OS Version and its Dependencies for a Version Range 498 | 499 | We're looking for machines in our Graph-CMDB whose Operating systems have to be updated. 500 | The OS versions were linked in a list of `:PREVIOUS` connections. 501 | So we can easily determine if someone have an older than the expected version, even if version numbers are not sortable. 502 | Those machine will be marked for an update to the correct version. 503 | 504 | .Mark for update 505 | [source,cypher] 506 | ---- 507 | MATCH (os:OS:Software)-[:VERSION]->(newVersion) WHERE os.name = 'Debian' and newVersion.name = '8-Jessie' 508 | 509 | MATCH (m:Machine)-[:RUNS]->(op:OS:Process)-[:INSTANCE]->(currentVersion) 510 | WHERE (currentVersion)<-[:PREVIOUS*]-(newVersion) 511 | 512 | // create update request 513 | CREATE (m)-[:UPDATE_TO {ts:timestamp()}]->(newVersion) 514 | ---- 515 | 516 | All machines with `UPDATE_TO` requests can be found by tools and operators. 517 | 518 | .Find pending updates 519 | [source,cypher] 520 | ---- 521 | MATCH (r:Rack)-[:HOLDS]->(m:Machine)-[:UPDATE_TO]->(vNew:Version)<-[:VERSION]-(os:OS:Software) 522 | MATCH (m)-[:RUNS]->(:OS:Process)-[:INSTANCE]->(vCurr) 523 | WHERE vCurr <> vNew 524 | RETURN r.name, m.name, os.name, vCurr.name as currentVersion, vNew.name as newVersion 525 | LIMIT 100; 526 | ---- 527 | 528 | When the local OS is physically updated, the old `:OS:Process` will be stopped and the one will run. 529 | 530 | .Replace old OS instance with new 531 | [source,cypher] 532 | ---- 533 | MATCH (m:Machine)-[:UPDATE_TO]->(vNew:Version)<-[:VERSION]-(os:OS:Software) 534 | MATCH (m)-[:RUNS]->(op:OS:Process)-[:INSTANCE]->(vCurr) 535 | WHERE vCurr <> vNew 536 | CREATE (m)-[:RUNS]->(opNew:OS:Process)-[:INSTANCE]->(vNew) 537 | DETACH DELETE op; 538 | ---- 539 | 540 | After the physical update has been performed, the machines will report the now updated version and the update request can be removed. 541 | 542 | .Remove resolved update requests 543 | [source,cypher] 544 | ---- 545 | MATCH (m:Machine)-[update:UPDATE_TO]->(v:Version)<-[:VERSION]-(os:OS:Software) 546 | WHERE (m)-[:RUNS]->(:OS:Process)-[:INSTANCE]->(v) 547 | 548 | DELETE update; 549 | ---- 550 | 551 | //// 552 | === x 553 | 554 | [source,cypher] 555 | ---- 556 | 557 | ---- 558 | 559 | // lending-club 560 | //// 561 | 562 | == IT-Monitoring and Governance 563 | 564 | Live network operations need to be supervised to ensure smooth operations, prevent bottlenecks, protect from attacks and vulnerabilities and allow maintenance planning and failure handling. 565 | 566 | The information is either acquired by listening on network traffic and inferring running services and user and application activity combined with port-scans. 567 | 568 | Alternatively, agents installed on the machines report the state of each server to the network or centralized databases which update the live state of the network. 569 | 570 | // todo Alan R. Assimilation Systems 571 | // todo shodan & co -> Will 572 | 573 | Based on our existing model, those incoming messages and events can do the following: 574 | 575 | * Create new entries for Servers, Switches, Interfaces 576 | * Track running Services via used ports and traffic 577 | * Infer user and application activity and group by network segment, source, used service 578 | * Detect abnormal operations like attacks or potential bottlenecks and issue warnings 579 | * Track violations of rules, like isolation of the DMZ, certain firewall rules etc. 580 | 581 | Here is an example of a new connection coming in and the graph being updated accordingly. 582 | Subsequent information for that connection will be aggregated until it is closed, then the totals could be added to the general `CONNECTIONS` relationship between the two IPs. 583 | 584 | We could generate some events, by having processes from some machines accessing processes from other (random) Machines. 585 | 586 | // todo cross machine service dependencies like CRM -> CMS or service1 -> service100 587 | 588 | [source,cypher] 589 | ---- 590 | MATCH (m:Machine) WITH collect(m) as machines 591 | WITH machines, size(machines) as len 592 | UNWIND range(1,10) as idx 593 | WITH machines[toInteger(rand()*len)] as source, machines[toInteger(rand()*len)] as target 594 | MATCH (source)-[:ROUTES]->(si:Interface)-[:EXPOSES]->(sp:Port)<-[:LISTENS]-(sourceAppProcess)-[:INSTANCE]->(sourceApp) 595 | WITH target, source,si,head(collect(sp)) as sp, sourceAppProcess,sourceApp 596 | // todo limit to first port 597 | MATCH (target)-[:ROUTES]->(ti:Interface)-[:EXPOSES]->(tp:Port)<-[:LISTENS]-(targetAppProcess)-[:INSTANCE]->(targetApp) 598 | WITH source,si,sp, sourceAppProcess,sourceApp,target,ti,head(collect(tp)) as tp, targetAppProcess, targetApp 599 | // todo limit to first port 600 | RETURN {id: randomUUID(), type:"OpenConnection",source:{ip:si.ip, port:sp.port},target:{ip:ti.ip,port:tp.port}, 601 | connection: {source:sourceApp.name, target:targetApp.name, user: "user"+toString(toInteger(rand()*1000))+"@"+source.name, 602 | time:timestamp(), packets: 1, mtu: 1500 }} as event 603 | ---- 604 | 605 | 606 | [source,cypher] 607 | ---- 608 | :param events: 609 | [ 610 | {"source":{"ip":"10.1.7.100","port":11210},"id":"3e41d6f0-fdce-48f4-9bff-818359d8f0af","target":{"ip":"10.3.3.112","port":8080}, 611 | "connection":{"source":"couchbase","target":"webapp","user":"user436@DC1-RCK-1-7-M-100","time":1490540382971},"type":"OpenConnection", 612 | "packets": 1, "mtu": 1500, "time": 1490904418539 }, 613 | {"source":{"ip":"10.1.4.91","port":7474},"id":"fed44be6-55f5-4e42-aab1-bebc5c818268","target":{"ip":"10.4.6.7","port":8080}, 614 | "connection":{"source":"neo4j","target":"webapp","user":"user911@DC1-RCK-1-4-M-91","time":1490540382971},"type":"OpenConnection", 615 | "packets": 1, "mtu": 1500, "time": 1490904464824 } 616 | ] 617 | ---- 618 | 619 | // todo add durations of connections 620 | 621 | [source, cypher] 622 | ---- 623 | UNWIND $events AS event 624 | WITH event WHERE event.type = 'OpenConnection' 625 | 626 | MERGE (si:Interface {ip:event.source.ip}) 627 | MERGE (si)-[:OPENS]->(sp:Port {port: event.source.port}) 628 | 629 | MERGE (ti:Interface {ip:event.target.ip}) 630 | MERGE (ti)-[:LISTENS]->(tp:Port {port:event.target.port}) 631 | 632 | CREATE (sp)<-[:FROM]-(c:Connection {id:event.id})–[:TO]->(tp) 633 | SET c += event.connection // type, timestamp, user-info, ... 634 | MERGE (si)-[cstats:CONNECTIONS]->(ti) 635 | SET si.count = coalesce(si.count,0) + 1 636 | SET si.packets = coalesce(si.packets,0) + event.packets 637 | SET si.volume = coalesce(si.volume,0) + event.packets * event.mtu 638 | ---- 639 | 640 | All the information is aggregated in a live graph representation which is available for querying for alerts & notifications, dashboards, inventory summaries, reports and more. 641 | 642 | Historic information can be stored as well as a timeline chain of changes attributed to cause. 643 | Both can be queried by operators to drill down into detailed analysis. 644 | 645 | .Connections opened over a time range 646 | [source,cypher] 647 | ---- 648 | MATCH (si:Interface)-[:OPENS]->(sp:Port)<-[:FROM]-(c:Connection)–[:TO]->(tp:Port)<-[:LISTENS]-(ti:Interface) 649 | WHERE c.type = 'OpenConnection' 650 | RETURN si.ip as source, ti.ip as target, apoc.date.format(c.time,'ms','yyyy-MM-dd HH') as hour, count(distinct c) as count 651 | ORDER BY hour ASC, count DESC 652 | LIMIT 100; 653 | ---- 654 | 655 | // TODO examples 656 | 657 | == Examples for graph based Network Management Solutions 658 | 659 | A number of commercial solutions provide this kind of service, some of them are running Neo4j. 660 | 661 | There are also open source solutions like https://github.com/LendingClub/mercator[Mercator from Lending Club] and the http://assimilationsystems.com/[Assimilation Project by Alan Robertson]. 662 | 663 | This real-time IT inventory information is also required for due diligence, e.g. for corporate investments, mergers or acquisitions. 664 | 665 | // At the FOSDEM conference, https://fosdem.org/2017/schedule/event/graph_traffic_analysis_hadoop_patterns/[Cloudera engineers demonstrated] how they used graph analytics and visualization to make traffic information of a Hadoop cluster accessible. 666 | // todo cloudera project 667 | 668 | 669 | == Monitoring Use-Cases 670 | 671 | Our graph contains both the static topological information and a lot of runtime information using the base topology. 672 | From the runtime data we can retrieve different metrics. 673 | 674 | === For instance, minimal, average and maximal runtimes of software instances per type 675 | 676 | [source,cypher] 677 | ---- 678 | MATCH (v)<-[:INSTANCE]-(sp:Process)<-[:RUNS]-(sm:Machine) 679 | MATCH (s:Software)-[:VERSION]->(v:Version) 680 | WITH s.name as software, v.name as version, timestamp() - sp.startTime as runtime 681 | RETURN software, version, count(*) as instances, { min: min(runtime), max: max(runtime), avg:avg(runtime) } as runtime 682 | ---- 683 | 684 | === Data Transer Volume between Interfaces 685 | 686 | // todo add durations of connections 687 | 688 | [source,cypher] 689 | ---- 690 | MATCH path = (source:Interface)-[con:CONNECTIONS]->(target:Interface) 691 | RETURN source.ip, target.ip, sum(con.packets) as packets, sum(con.volume) as volume 692 | ---- 693 | 694 | //// 695 | 696 | TODO 697 | === Data Volume Flow over the Network 698 | 699 | [source,cypher] 700 | ---- 701 | MATCH path = allShortestPaths((rr:Router:Egress)-[rel:CONNECTS*]->(i:Interface)) 702 | UNWIND rel as con 703 | RETURN length(path), min(con.flow), max(con.flow), avg(con.flow) 704 | ---- 705 | //// 706 | 707 | 708 | == Resource Management Graph 709 | 710 | If you use a resource manager like Apaoche Mesos (or DC/OS), Kubernetes etc. you specify for each piece of software you run not just name, version and dependencies but also resource requirements like cpu, ram, disk, ports and more. 711 | 712 | A scheduler then takes the available resources of a configured machine cluster to schedule and allocate it's resources to the needs and numbers of the required instances of software to run. 713 | It also takes care of health checks, and (re)starting / (re)scheduling and (re)routing of individual new or failed instances. 714 | 715 | To model the resource graph of such a system is interesting to look at and reason about, especially if other requirements like indicated co-location or disk-reuse are taken into account. 716 | 717 | // TODO mesos resource graph with Johannes 718 | 719 | == References 720 | 721 | * https://neo4j.com/use-cases/network-and-it-operations/[Neo4j Solutions: Network & IT Operations] 722 | * https://neo4j.com/resources/network-datacenter-white-paper/[WP: Graph Databases Solve Problems in Network and Data Center Management] 723 | // * Orange SFR 724 | * http://assimilationsystems.com/[Assimilation Systems] https://neo4j.com/blog/solve-network-management-problems-with-neo4j/[Interview with Founder Alan Robertson] 725 | * https://neo4j.com/graphgists/?category=network-and-it-operations[Network Management GraphGists] 726 | * Lending Club Engineering created a number of network management projects using Neo4j 727 | ** https://neo4j.com/blog/managing-microservices-neo4j/[Presentation], https://www.slideshare.net/robschoening/managing-microservices-with-neo4j-53389282[Slides] 728 | ** MacGyver: DevOps Multi-Tool https://github.com/LendingClub?q=macgyver[Repositories] https://www.slideshare.net/neo4j/neo4j-for-cloud-management-at-scale[Slides] 729 | ** Mercator: produce graph model projections of infrastructure https://github.com/LendingClub/mercator[Repository] 730 | // ** Protector 731 | // * check other use-cases cisco? 732 | * http://springinpractice.com/2011/12/17/domain-modeling-with-spring-data-neo4j-code[Building the Zkybase CMDB using Neo4j and Spring Data Neo4j] 733 | * http://lightmesh.com/[LigthMesh CMDB solution from Neo4j Partner xnlogic] 734 | * https://labs.vmware.com/vmtj/simplifying-virtualization-management-with-graph-databases[Article: Simplifying Virtualization Management with Graph Databases] 735 | * https://www.vmware.com/pdf/vi_architecture_wp.pdf[WhitePaper: VMware Infrastructure Architecture Overview] 736 | 737 | //// 738 | 739 | == Creating Data 740 | 741 | === Data Center 742 | 743 | [source,cypher] 744 | ---- 745 | CREATE (dc:DataCenter {name:"DC1",location:"Iceland, Rekjavik"})-[:CONTAINS]->(re:Router:Egress {name:"DC1-RE"}) 746 | CREATE (re)-[:ROUTES]->(:Interface {ip:"10.0.0.254"}); 747 | ---- 748 | 749 | === Zones 750 | 751 | The datacenter consists of 4 zones, each of which has it's own separate `Network` `10.zone.*/16`, and it's own `Router`. 752 | 753 | 754 | [source,cypher] 755 | ---- 756 | WITH 4 AS zones 757 | MATCH (dc:DataCenter {name:"DC1"})-[:CONTAINS]->(re:Router:Egress)-[:ROUTES]->(rei:Interface) 758 | 759 | // for each zone 760 | WITH * UNWIND range(1,zones) AS zid 761 | 762 | // create zone network 763 | CREATE (nr:Network:Zone {ip:"10."+zid, size: 16, zone:zid})<-[:CONNECTS]-(rei) 764 | 765 | // create router in DC, connect it via an interface to the zone network 766 | CREATE (dc)-[:CONTAINS]->(r:Router {name:"DC1-R-"+zid, zone:zid})-[:ROUTES]->(ri:Interface {ip:nr.ip+".0.254"})-[:CONNECTS]->(nr); 767 | ---- 768 | 769 | === Racks 770 | 771 | [source,cypher] 772 | ---- 773 | WITH 10 as racks 774 | MATCH (dc:DataCenter {name:"DC1"}) 775 | MATCH (nr:Network:Zone) // one per zone 776 | 777 | WITH * UNWIND range(1,racks) AS rackid 778 | 779 | CREATE (dc)-[:CONTAINS]->(rack:Rack {name:"DC1-RCK-"+nr.zone+"-"+rackid, rack:rackid, zone:nr.zone})-[:HOLDS]->(s:Switch {ip:nr.ip+"."+rackid, rack:rackid})-[:ROUTES]->(si:Interface {ip:s.ip+".254"})<-[:ROUTES]-(nr); 780 | ---- 781 | 782 | === Machine types 783 | 784 | Similar to the machines you can rent on AWS we use machine types, for which we auto-create some reasonable capacities for CPU, RAM and DISK. 785 | 786 | [source,cypher] 787 | ---- 788 | WITH ["xs","s","m","l","xl","xxl"] as typeNames 789 | UNWIND range(0,size(typeNames)-1) as idx 790 | CREATE (t:Type {id:idx, cpu: toInteger(2^idx), ram:toInteger(4^idx), disk:toInteger(5^idx), type: typeNames[idx]}) 791 | SET t.name = typeNames[idx]+"-"+t.cpu + "/"+t.ram+"/"+t.disk 792 | RETURN t.name, t.id, t.cpu, t.ram, t.disk; 793 | ---- 794 | 795 | === Machines 796 | 797 | Each Rack contains 200 machines of the types we just introduced, so that in total we get 8000 servers in our datacenter. 798 | 799 | The distribution of the types is inverse to their capabilities. 800 | 801 | [source,cypher] 802 | ---- 803 | MATCH (t:Type) 804 | WITH collect(t) as types, 200 as machines 805 | 806 | MATCH (rack:Rack)-[:HOLDS]->(s:Switch)-[:ROUTES]->(si:Interface) 807 | 808 | UNWIND (range(1,machines)) AS machineid 809 | 810 | CREATE (rack)-[:HOLDS]->(m:Machine {id:rack.id * 1000 + machineid, name: rack.name + "-M-" +machineid })-[:ROUTES]->(i:Interface {ip:s.ip+"."+machineid})-[:CONNECTS]->(si) 811 | WITH m,types,size(types)-toInteger(log(machines - machineid + 1)) -1 as idx 812 | WITH m, types[idx] as t 813 | CREATE (m)-[:TYPE]->(t); 814 | ---- 815 | 816 | === Create OS and Software 817 | 818 | // https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux#Version_history 819 | // https://wiki.ubuntu.com/Releases 820 | // https://en.wikipedia.org/wiki/Debian_version_history 821 | 822 | [source,cypher] 823 | ---- 824 | WITH 825 | [{name:"RHEL",versions:["7.1","7.2","7.3"]},{name:"Ubuntu",versions:["14.04","16.04","16.10","17.04"]},{name:"Debian",versions:["6-Squeeze","7-Wheezy","8-Jessie"]}] as osNames, 826 | [ 827 | {name:"java",versions:["8"]}, 828 | {name:"neo4j",ports:[7474,7473,7687],versions:["3.0","3.1"],dependencies:["java/8"]}, 829 | {name:"postgres",ports:[5432],versions:["9.4","9.5","9.6"]}, 830 | {name:"couchbase",ports:[8091,8092,11207,11209,11210,11211,11214,11215,18091,18092,4369],versions:["3.0","4.0","4.5","4.6"]}, 831 | {name:"elasticsearch",ports:[9200,9300,9500,9700],versions:["2.4","5.0","5.1","5.2"],dependencies:["java/8"]} 832 | ] as services, 833 | [{name:"webserver",ports:[80,443],dependencies:["postgres/9.4"]}, 834 | {name:"crm",ports:[80,443],dependencies:["java/8","neo4j/3.1"]}, 835 | {name:"cms",ports:[8080],dependencies:["php","webserver","couchbase"]}, 836 | {name:"webapp",ports:[8080],dependencies:["java","neo4j"]}, 837 | {name:"logstash",ports:[5000],dependencies:["elasticsearch/5.2"]} 838 | ] as applications 839 | 840 | UNWIND osNames + services + applications AS sw 841 | 842 | CREATE (s:Software) SET s = sw 843 | FOREACH (sw in filter(x IN osNames where x.name = sw.name) | SET s:OS) 844 | FOREACH (sw in filter(x IN services where x.name = sw.name) | SET s:Service) 845 | FOREACH (sw in filter(x IN applications where x.name = sw.name) | SET s:Application) 846 | 847 | FOREACH (idx in range(0,size(coalesce(sw.versions,[]))-2) | 848 | MERGE (s)-[:VERSION]->(v0:Version {name:sw.versions[idx]}) 849 | MERGE (s)-[:VERSION]->(v:Version {name:sw.versions[idx+1]}) 850 | MERGE (v0)<-[:PREVIOUS]-(v) 851 | ) 852 | WITH * 853 | UNWIND sw.dependencies as dep 854 | WITH *,split(dep,"/") as parts 855 | MERGE (d:Software {name:parts[0]}) 856 | FOREACH (v IN case size(parts) when 1 then [] else [parts[1]] end | 857 | MERGE (d)-[:VERSION]->(:Version {name:v}) 858 | ) 859 | WITH * 860 | OPTIONAL MATCH (d)-[:VERSION]->(v:Version {name:parts[1]}) 861 | WITH s, coalesce(v,d) as d 862 | MERGE (s)-[:DEPENDS_ON]->(d); 863 | ---- 864 | 865 | === Install Software 866 | 867 | [source,cypher] 868 | ---- 869 | create index on :Software(name); 870 | ---- 871 | 872 | [source,cypher] 873 | ---- 874 | profile 875 | WITH [(:Software:OS)-[:VERSION]->(v) | v] as osVersions 876 | MATCH (a:Application:Software) 877 | WITH osVersions, collect(a) as apps 878 | MATCH (m:Machine)-[:ROUTES]->(i:Interface) 879 | WITH m,i, osVersions[toInteger(rand()*size(osVersions))] as os, apps[toInteger(rand()*size(apps))] as app 880 | CREATE (m)-[:RUNS]->(op:OS:Process {name:os.name, startTime:timestamp() - toInteger( (rand() * 10 + 5) *24*3600*1000)})-[:INSTANCE]->(os) 881 | CREATE (m)-[:RUNS]->(ap:Application:Process {name: app.name, pid: toInteger(rand()*10000), startTime:timestamp() - toInteger(rand() * 10*24*3600*1000) })-[:INSTANCE]->(app) 882 | 883 | FOREACH (portNo in app.ports | 884 | MERGE (port:Port {port:portNo})<-[:EXPOSES]-(i) 885 | CREATE (ap)-[:LISTENS]->(port) 886 | ) 887 | WITH * 888 | MATCH (app)-[:DEPENDS_ON]->(dep) 889 | CREATE (m)-[:RUNS]->(dp:Service:Process {name: dep.name, pid: toInteger(rand()*10000), startTime:timestamp() - toInteger(rand() * 10*24*3600*1000) })-[:INSTANCE]->(dep) 890 | CREATE (ap)-[:DEPENDS_ON]->(dp) 891 | FOREACH (portNo in dep.ports | 892 | MERGE (port:Port {port:portNo})<-[:EXPOSES]-(i) 893 | CREATE (dp)-[:LISTENS]->(port) 894 | ) 895 | ---- 896 | 897 | //// 898 | 899 | //// 900 |
    901 |
  • 902 | DataCenter 903 |
  • 904 |
  • 905 | Router Egress 906 |
  • 907 |
  • 908 | Interface 909 |
  • 910 |
  • 911 | Network Zone 912 |
  • 913 |
  • 914 | Router 915 |
  • 916 |
  • 917 | Rack 918 |
  • 919 |
  • 920 | Switch 921 |
  • 922 |
  • 923 | Machine 924 |
  • 925 |
  • 926 | Type 927 |
  • 928 |
  • 929 | Software Application 930 |
  • 931 |
  • 932 | Software Service 933 |
  • 934 |
  • 935 | Service Process 936 |
  • 937 |
  • 938 | Application Process 939 |
  • 940 |
  • 941 | Software OS 942 |
  • 943 |
  • 944 | Version 945 |
  • 946 |
  • 947 | OS Process 948 |
  • 949 |
  • 950 | Software 951 |
  • 952 |
  • 953 | Port 954 |
  • 955 |
  • 956 | CONTAINS 957 |
  • 958 |
  • 959 | ROUTES 960 |
  • 961 |
  • 962 | CONNECTS 963 |
  • 964 |
  • 965 | CONTAINS 966 |
  • 967 |
  • 968 | ROUTES 969 |
  • 970 |
  • 971 | CONTAINS 972 |
  • 973 |
  • 974 | HOLDS 975 |
  • 976 |
  • 977 | ROUTES 978 |
  • 979 |
  • 980 | ROUTES 981 |
  • 982 |
  • 983 | TYPE 984 |
  • 985 |
  • 986 | HOLDS 987 |
  • 988 |
  • 989 | ROUTES 990 |
  • 991 |
  • 992 | DEPENDS_ON 993 |
  • 994 |
  • 995 | INSTANCE 996 |
  • 997 |
  • 998 | INSTANCE 999 |
  • 1000 |
  • 1001 | INSTANCE 1002 |
  • 1003 |
  • 1004 | VERSION 1005 |
  • 1006 |
  • 1007 | INSTANCE 1008 |
  • 1009 |
  • 1010 | VERSION 1011 |
  • 1012 |
  • 1013 | DEPENDS_ON 1014 |
  • 1015 |
  • 1016 | INSTANCE 1017 |
  • 1018 |
  • 1019 | DEPENDS_ON 1020 |
  • 1021 |
  • 1022 | INSTANCE 1023 |
  • 1024 |
  • 1025 | DEPENDS_ON 1026 |
  • 1027 |
  • 1028 | RUNS 1029 |
  • 1030 |
  • 1031 | RUNS 1032 |
  • 1033 |
  • 1034 | LISTENS 1035 |
  • 1036 |
  • 1037 | EXPOSES 1038 |
  • 1039 |
  • 1040 | RUNS 1041 |
  • 1042 |
  • 1043 | LISTENS 1044 |
  • 1045 |
1046 | //// 1047 | -------------------------------------------------------------------------------- /documentation/network-management.neo4j-browser-guide: -------------------------------------------------------------------------------- 1 | 21 | 24 |
25 | 26 | 44 | 49 | 50 | 51 | 52 | 53 |
54 |

Network Management - Introduction

55 |
56 |
57 |
58 |

Computer networks span all levels of the stack from physical connections up to mobile and web-applications connecting networks of users.

59 |
60 |
61 |

Graph Databases offer a natural way of modelling, storing and querying all these types of computer networks.

62 |
63 |
64 |

A graph database like Neo4j can be utilized for:

65 |
66 |
67 |
    68 |
  • 69 |

    Configuration Management

    70 |
  • 71 |
  • 72 |

    Impact Analysis

    73 |
  • 74 |
  • 75 |

    Planning

    76 |
  • 77 |
  • 78 |

    Security and Hardening of Networks

    79 |
  • 80 |
  • 81 |

    Intrusion Detection

    82 |
  • 83 |
  • 84 |

    Traffic Analytics

    85 |
  • 86 |
  • 87 |

    Analytics of user behavior

    88 |
  • 89 |
90 |
91 |
92 |

In this example we want to look at Network Management and Impact Analysis from the level of routing (TCP/IP) upwards to managing applications and tracing their dependencies.

93 |
94 |
95 |

Throughout the guide you’ll find Cypher statements that you can execute, by clicking on them and then executing them by hitting the run button.

96 |
97 |
98 |
99 |
100 | 101 | 102 | 103 | 104 |
105 |

Modeling

106 |
107 |
108 |
109 |
110 | Network Management Model 111 |
112 |
113 |
114 |

We can model the network endpoints (boxes like servers, routers, firewalls, racks) of the data center as nodes and the "cables" between them as relationships.

115 |
116 |
117 |

Another type of node represent networks and interfaces.

118 |
119 |
120 |

On the application level we have the operating system, virtual machines, application and services that are modeled as entities.

121 |
122 |
123 |

Our example data is already set-up, in the "resources" section at the end, you’ll find some pointers there.

124 |
125 |
126 |
127 |
128 | 129 | 130 | 131 | 132 |
133 |

DataCenter

134 |
135 |
136 |
137 |

This is the full data model of your graph.

138 |
139 |
140 |
141 | network schema arrows 142 |
143 |
144 |
145 |

If you want to see it yourself, run

146 |
147 |
148 |
149 |
call db.schema.visualization()
150 |
151 |
152 |
153 |

Imagine we have a DataCenter connected to an Interconnect via an Egress Router. 154 | The datacenter uses a 10.x.x.x/8 IP address range.

155 |
156 |
157 |

The DataCenter consists of several Zones which are connected to the main backbone each via a Router (10.zone.*/16).

158 |
159 |
160 |

From there each zone is broken down into rows of Racks.

161 |
162 |
163 |

Each Rack contains different types of Servers and has its own Switch to connect to the datacenter routers backplane.

164 |
165 |
166 |

Each Server has external network Interfaces that connect to the rack switch, the local networks being 10.zone.rack.*/24.

167 |
168 |
169 |

Each machine either runs a real Operating System (OS) or a Virtualization Manager that runs a number of Virtual Machines.

170 |
171 |
172 |

For operational simplicity we only run one Application per OS which uses a number of Ports on the external interface.

173 |
174 |
175 |

Usually we would get this kind of information from a configuration management database (CMDB), network management tools or agents installed on the machines.

176 |
177 |
178 |
179 |
180 | 181 | 182 | 183 | 184 |
185 |

Network Exploration: DataCenter and Zones

186 |
187 |
188 |
189 |

Let’s walk through the data, step by step. Let’s start with the DataCenter.

190 |
191 |
192 |
193 |
MATCH network = (dc:DataCenter {name:"DC1",location:"Iceland, Rekjavik"})
194 |              -[:CONTAINS]->(:Router)
195 |              -[:ROUTES]->(:Interface)
196 | RETURN network;
197 |
198 |
199 |
200 |
201 | network zones 202 |
203 |
204 |
205 |

The datacenter consists of 4 zones, each of which has its own separate Network 10.zone.*/16, and it’s own Router.

206 |
207 |
208 |

We can draw out that verbal description in a query with patterns matching the network parts.

209 |
210 |
211 |
212 |
MATCH (dc:DataCenter {name:"DC1"})-[:CONTAINS]->(re:Router:Egress)-[:ROUTES]->(rei:Interface)
213 | 
214 | MATCH (nr:Network:Zone)<-[:CONNECTS]-(rei)
215 | 
216 | // router in DC, connect it via an interface to the zone network
217 | MATCH (dc)-[:CONTAINS]->(r:Router)-[:ROUTES]->(ri:Interface)-[:CONNECTS]->(nr)
218 | 
219 | RETURN *;
220 |
221 |
222 |
223 |

To visualize the DataCenter and its components so far, we can also start at the center and then go 3 hops out.

224 |
225 |
226 |
227 |
MATCH path = (dc:DataCenter)-[*3]-(:Network)
228 | RETURN path;
229 |
230 |
231 |
232 |

We could also get statistical information, like the addresses of routers and interfaces in each network.

233 |
234 |
235 |

You can see very well how the graph representation in the match pattern resembles our domain model.

236 |
237 |
238 |
239 |
MATCH (r:Router)-[:ROUTES]->(ri:Interface)-[:CONNECTS]->(nr:Network)
240 | WHERE r.zone IS NOT NULL
241 | RETURN nr.ip as network_ip, ri.ip as router_if_ip, r.name as router, r.zone as zone;
242 |
243 |
244 |
245 |
246 |
╒════════════╤══════════════╤═════════╤══════╕
247 | │"network_ip"│"router_if_ip"│"router" │"zone"│
248 | ╞════════════╪══════════════╪═════════╪══════╡
249 | │"10.1"      │"10.1.0.254"  │"DC1-R-1"│1     │
250 | ├────────────┼──────────────┼─────────┼──────┤
251 | │"10.2"      │"10.2.0.254"  │"DC1-R-2"│2     │
252 | ├────────────┼──────────────┼─────────┼──────┤
253 | │"10.3"      │"10.3.0.254"  │"DC1-R-3"│3     │
254 | ├────────────┼──────────────┼─────────┼──────┤
255 | │"10.4"      │"10.4.0.254"  │"DC1-R-4"│4     │
256 | └────────────┴──────────────┴─────────┴──────┘
257 |
258 |
259 |
260 |
261 |
262 | 263 | 264 | 265 | 266 |
267 |

Network Exploration: Racks

268 |
269 |
270 |
271 |
272 | network rack 273 |
274 |
275 |
276 |

Each zone contains 10 Racks, each of which has it’s own Switch and subnet with an IP like this pattern 10.zone.rack.*/24.

277 |
278 |
279 |
280 |
MATCH (dc:DataCenter {name:"DC1"})-[:CONTAINS]->(rack:Rack)-[:HOLDS]->(s:Switch)-[:ROUTES]->(si:Interface)<-[:ROUTES]-(nr:Network:Zone)
281 | 
282 | RETURN *;
283 |
284 |
285 |
286 |

Now our network has grown quite a bit:

287 |
288 |
289 |
290 |
MATCH network = (dc:DataCenter)-[*6]-(:Rack)
291 | RETURN network;
292 |
293 |
294 |
295 |
296 |
297 | 298 | 299 | 300 | 301 |
302 |

Network Connectivity

303 |
304 |
305 |
306 |

Now we could already have a look at the network connectivity in our datacenter.

307 |
308 |
309 |

If we look now at the overall connections we need to use shortest-paths which represents the most efficient route.

310 |
311 |
312 |
Connectivity before: 40 routes a 5 hops
313 |
314 |
MATCH path = allShortestPaths( (rack:Rack)-[:HOLDS|ROUTES|CONNECTS*]-(router:Router:Egress) )
315 | RETURN length(path) as hops, count(*) as count;
316 |
317 |
318 |
319 |

What happens if one of our cables gets loose or cut, i.e. the ROUTES relationship between the switch’s interface and the network is gone.

320 |
321 |
322 |

Let’s cut the cable of this first switch.

323 |
324 |
325 |
326 |
MATCH (:Interface {ip:"10.1.1.254"})<-[rel:ROUTES]-(:Network)
327 | DELETE rel
328 |
329 |
330 |
331 |

Connectivity after: 39 routes a 5 hops

332 |
333 |
334 |
335 |
MATCH path = allShortestPaths( (rack:Rack)-[:HOLDS|ROUTES|CONNECTS*]-(router:Router:Egress) )
336 | RETURN length(path) as hops, count(*) as count;
337 |
338 |
339 |
340 |

Now all the machines in that Rack are cut off, no connection anymore, which we can demonstrate by trying to find the shortest path.

341 |
342 |
343 |
344 |
MATCH connection = allShortestPaths( (rack:Rack {name:"DC1-RCK-1-1"})-[:HOLDS|ROUTES|CONNECTS*]-(router:Router:Egress) )
345 | RETURN connection;
346 |
347 |
348 |
349 |

How can we fix that? 350 | We could connect each switch to all the other three networks too, so we would survive the loss of 3 of those 4 connections.

351 |
352 |
353 |
Createing new, redundant network connections
354 |
355 |
// for all zones
356 | MATCH (nr:Network:Zone)
357 | // find *all* switches and their interface
358 | MATCH (s:Switch)-[:ROUTES]->(si:Interface)
359 | // connect them to all the zones, if not yet connected
360 | MERGE (si)<-[:ROUTES]-(nr);
361 |
362 |
363 |
364 |
365 |
MATCH path = allShortestPaths((rack:Rack)-[:HOLDS|ROUTES|CONNECTS*]-(router:Router:Egress))
366 | RETURN length(path) as hops, count(*) as count;
367 |
368 |
369 |
370 |
371 |
╒══════╤═══════╕
372 | │"hops"│"count"│
373 | ╞══════╪═══════╡
374 | │5     │160    │
375 | └──────┴───────┘
376 |
377 |
378 |
379 |
380 | network alternative routes 381 |
382 |
383 |
384 |

Cut the first cable of this first switch again.

385 |
386 |
387 |
388 |
MATCH (:Interface {ip:"10.1.1.254"})<-[rel:ROUTES]-(:Network)
389 | WITH rel LIMIT 1
390 | DELETE rel
391 |
392 |
393 |
394 |

But that Rack is now still connected with 3 alternative routes.

395 |
396 |
397 |
398 |
MATCH path = allShortestPaths((rack:Rack {zone:1,rack:1})-[:HOLDS|ROUTES|CONNECTS*]-(router:Router:Egress))
399 | RETURN path;
400 |
401 |
402 |
403 |

Now let’s look at the servers in those racks.

404 |
405 |
406 |
407 |
408 | 409 | 410 | 411 | 412 |
413 |

Machine types

414 |
415 |
416 |
417 |

Similar to the machines you can rent on AWS we use machine types, for which we auto-created some reasonable capacities for CPU, RAM and DISK.

418 |
419 |
420 |
421 |
MATCH (t:Type)
422 | RETURN t.name, t.id, t.cpu, t.ram, t.disk;
423 |
424 |
425 |
426 |
427 |
╒══════════════════╤══════╤═══════╤═══════╤════════╕
428 | │"t.name"          │"t.id"│"t.cpu"│"t.ram"│"t.disk"│
429 | ╞══════════════════╪══════╪═══════╪═══════╪════════╡
430 | │"xs-1/1/1"        │0     │1      │1      │1       │
431 | ├──────────────────┼──────┼───────┼───────┼────────┤
432 | │"s-2/4/5"         │1     │2      │4      │5       │
433 | ├──────────────────┼──────┼───────┼───────┼────────┤
434 | │"m-4/16/25"       │2     │4      │16     │25      │
435 | ├──────────────────┼──────┼───────┼───────┼────────┤
436 | │"l-8/64/125"      │3     │8      │64     │125     │
437 | ├──────────────────┼──────┼───────┼───────┼────────┤
438 | │"xl-16/256/625"   │4     │16     │256    │625     │
439 | ├──────────────────┼──────┼───────┼───────┼────────┤
440 | │"xxl-32/1024/3125"│5     │32     │1024   │3125    │
441 | └──────────────────┴──────┴───────┴───────┴────────┘
442 |
443 |
444 |
445 |
446 |
447 | 448 | 449 | 450 | 451 |
452 |

Machines

453 |
454 |
455 |
456 |

Each Rack contains 200 machines of the types we just introduced, so that in total we get 8000 servers in our datacenter.

457 |
458 |
459 |

As expected, the distribution of the types is inverse to their capabilities.

460 |
461 |
462 |

As the graph visualization of our full datacenter would be pretty but otherwise useless …​

463 |
464 |
465 |
466 | network 8000 machines 467 |
468 |
469 |
470 |
471 | network rack machines type 472 |
473 |
474 |
475 |

We’d rather look at the contents of a single rack

476 |
477 |
478 |
Visualization of Rack DC1-RCK-2-1
479 |
480 |
MATCH (r:Rack {name:"DC1-RCK-2-1"})-[:HOLDS]->(m:Machine),
481 |       (m)-[:ROUTES]->(i:Interface)-[:CONNECTS]->(si)<-[:ROUTES]-(s:Switch),
482 |       (m)-[:TYPE]->(type:Type)
483 | RETURN *
484 |
485 |
486 |
487 |

or it’s stats.

488 |
489 |
490 |
Contents of Rack DC1-RCK-2-1
491 |
492 |
MATCH (r:Rack {name:"DC1-RCK-2-1"})-[:HOLDS]->(m:Machine),
493 |       (m)-[:ROUTES]->(i:Interface)-[:CONNECTS]->(si)<-[:ROUTES]-(s:Switch),
494 |       (m)-[:TYPE]->(type:Type)
495 | RETURN r.name as rack, si.ip as switchIp, properties(type) as type, count(m) as machines, min(i.ip) as minIp, max(i.ip) as maxIp
496 | ORDER BY machines DESC;
497 |
498 |
499 |
500 |

We can also query for a distribution of machine types across the datacenter.

501 |
502 |
503 |
504 |
MATCH (r:Rack)-[:HOLDS]->(m:Machine)-[:TYPE]->(type:Type)
505 | RETURN properties(type) as type, count(*) as c
506 | ORDER BY c DESC;
507 |
508 |
509 |
510 |
511 |
╒══════════════════════════════════════════════════════════════════╤════╕
512 | │"t"                                                               │"c" │
513 | ╞══════════════════════════════════════════════════════════════════╪════╡
514 | │{"disk":5,"name":"s-2/4/5","cpu":2,"id":1,"ram":4}                │3760│
515 | ├──────────────────────────────────────────────────────────────────┼────┤
516 | │{"disk":1,"name":"xs-1/1/1","cpu":1,"id":0,"ram":1}               │2080│
517 | ├──────────────────────────────────────────────────────────────────┼────┤
518 | │{"disk":25,"name":"m-4/16/25","cpu":4,"id":2,"ram":16}            │1360│
519 | ├──────────────────────────────────────────────────────────────────┼────┤
520 | │{"disk":125,"name":"l-8/64/125","cpu":8,"id":3,"ram":64}          │520 │
521 | ├──────────────────────────────────────────────────────────────────┼────┤
522 | │{"disk":625,"name":"xl-16/256/625","cpu":16,"id":4,"ram":256}     │200 │
523 | ├──────────────────────────────────────────────────────────────────┼────┤
524 | │{"disk":3125,"name":"xxl-32/1024/3125","cpu":32,"id":5,"ram":1024}│80  │
525 | └──────────────────────────────────────────────────────────────────┴────┘
526 |
527 |
528 |
529 |

Or if we treat our datacenter as a supercomputer, what’s the total amount of CPUs, RAM and disk available:

530 |
531 |
532 |
533 |
MATCH (m:Machine)-[:TYPE]->(type:Type)
534 | RETURN count(*) as count, sum(type.cpu) as cpus, sum(type.ram) as ram, sum(type.disk) as disk;
535 |
536 |
537 |
538 |
Not bad, that’s quite some compute power.
539 |
540 |
╒═══════╤══════╤══════╤══════╕
541 | │"count"│"cpus"│"ram" │"disk"│
542 | ╞═══════╪══════╪══════╪══════╡
543 | │8000   │24960 │205280│494880│
544 | └───────┴──────┴──────┴──────┘
545 |
546 |
547 |
548 |
549 |
550 | 551 | 552 | 553 | 554 |
555 |

Software: Operating Systems and Applications

556 |
557 |
558 |
559 |

Bare-metal hardware is cool, but something has to run on it to make it useable.

560 |
561 |
562 |

Most likely it will be some kind of virtualization infrastructure that allows dynamic reallocation of the compute, memory and disk resources.

563 |
564 |
565 |

Because of the added complexity, we skip this for now.

566 |
567 |
568 |

For our software we differentiate between Operating Systems, Services and Applications (which could also be micro services).

569 |
570 |
571 |

Each of them has a name, version(s) and dependencies.

572 |
573 |
574 |

In a more elaborate model we could also handle other resource requirements like RAM / CPU / DISK per running software instance.

575 |
576 |
577 |

Let’s look at our available operating systems.

578 |
579 |
580 |
581 |
MATCH (o:OS:Software)-[:VERSION]->(v)
582 | OPTIONAL MATCH (v)<-[:PREVIOUS]-(vnext)
583 | RETURN o.name as os, v.name as version, vnext.name as next_version
584 | ORDER BY os, version;
585 |
586 |
587 |
588 |

Similar for our other software

589 |
590 |
591 |
592 |
MATCH (s:Software) WHERE not s:OS
593 | OPTIONAL MATCH (s)-[:VERSION]->(v)
594 | OPTIONAL MATCH (s)-[:DEPENDS_ON]->(dv)<-[:VERSION]-(d)
595 | RETURN s.name, collect(v.name) as versions, [x IN collect([d.name,dv.name]) WHERE x[0] IS NOT NULL] as dependencies, s.ports;
596 |
597 |
598 |
599 |
600 |
601 | 602 | 603 | 604 | 605 |
606 |

Software: Running on Machines

607 |
608 |
609 |
610 |

Each of our machines is set up to run an OS and a single application, each of which might require other dependencies that are also installed.

611 |
612 |
613 |
614 | network software arrows 615 |
616 |
617 |
618 |
619 |
MATCH (m:Machine) WHERE (m)-[:RUNS]->() AND rand() < 0.05 WITH m LIMIT 1
620 | MATCH (m)-[r:RUNS]->(p:Process)-[i:INSTANCE]->(sv)
621 | OPTIONAL MATCH (sv)<-[v:VERSION]-(sw)
622 | RETURN *
623 |
624 |
625 |
626 |
627 | network software machine 628 |
629 |
630 |
631 |
632 |
633 | 634 | 635 | 636 | 637 |
638 |

Dependency Analysis

639 |
640 |
641 |
642 |

We could look at dependencies between data center elements on the physical level, like routers, switches and interfaces.

643 |
644 |
645 |

Another way to look at it is to determine dependencies between machines based on their internal and external connections.

646 |
647 |
648 |

But we can also use the software and its dependencies to determine bottlenecks and frequently dependent upon components.

649 |
650 |
651 |

Let’s look at all the software that uses Neo4j and the running Neo4j instances.

652 |
653 |
654 |
655 |
MATCH (s)-[:DEPENDS_ON]->(nv:Version)<-[:VERSION]-(n:Software:Service {name:"neo4j"})
656 | MATCH (s)<-[:INSTANCE]-(sp)<-[:RUNS]-(sm:Machine)
657 | MATCH (sp)-[DEPENDS_ON]->(np)-[:INSTANCE]->(nv)
658 | MATCH (np)<-[:RUNS]-(nm:Machine)
659 | RETURN sm as software_machine, sp as software_process, s as software, nv as neo_version,np as neo4j_process, np as neo_machine
660 | LIMIT 10
661 |
662 |
663 |
664 |
665 |
666 | 667 | 668 | 669 | 670 |
671 |

Configuration Management

672 |
673 |
674 |
675 |

Proper IT infrastructures use a large number of configuration parameters to customize commodity hardware and software. To manage all of the variables, Configuration Management Databases (CMDBs) are used. Systems require certain variables, and can report what is currently configured so that the CMDB can detect issues, and send necessary updates.

676 |
677 |
678 |

In the past, CMDBs were mostly used for network, hardware and OS level configuration. Today, their use has expanded into services to support modern architectures. A number of related systems have popped up, such as ZooKeeper, Konsul, Eureka, and others.

679 |
680 |
681 |

Due to the variety of systems used for providing configuration to the infrastructure, it is very useful to create a unified, up to date view of the situation in your systems graph.

682 |
683 | 684 | 685 | 686 |

Upgrade OS Version and its Dependencies for a Version Range

687 |
688 |

We’re looking for machines in our Graph-CMDB whose Operating systems have to be updated. 689 | The OS versions were linked in a list of :PREVIOUS connections. 690 | So we can easily determine if someone have an older than the expected version, even if version numbers are not sortable. 691 | Those machine will be marked for an update to the correct version.

692 |
693 |
694 |
Mark for update
695 |
696 |
MATCH (os:OS:Software)-[:VERSION]->(newVersion) WHERE os.name = 'Debian' and newVersion.name = '8-Jessie'
697 | 
698 | MATCH (m:Machine)-[:RUNS]->(op:OS:Process)-[:INSTANCE]->(currentVersion)
699 | WHERE (currentVersion)<-[:PREVIOUS*]-(newVersion)
700 | 
701 | // create update request
702 | CREATE (m)-[:UPDATE_TO {ts:timestamp()}]->(newVersion)
703 |
704 |
705 |
706 |

All machines with UPDATE_TO requests can be found by tools and operators.

707 |
708 |
709 |
Find pending updates
710 |
711 |
MATCH (r:Rack)-[:HOLDS]->(m:Machine)-[:UPDATE_TO]->(vNew:Version)<-[:VERSION]-(os:OS:Software)
712 | MATCH (m)-[:RUNS]->(:OS:Process)-[:INSTANCE]->(vCurr)
713 | WHERE vCurr <> vNew
714 | RETURN r.name, m.name, os.name, vCurr.name as currentVersion, vNew.name as newVersion
715 | LIMIT 100;
716 |
717 |
718 |
719 |

When the local OS is physically updated, the old :OS:Process will be stopped and the one will run.

720 |
721 |
722 |
Replace old OS instance with new
723 |
724 |
MATCH (m:Machine)-[:UPDATE_TO]->(vNew:Version)<-[:VERSION]-(os:OS:Software)
725 | MATCH (m)-[:RUNS]->(op:OS:Process)-[:INSTANCE]->(vCurr)
726 | WHERE vCurr <> vNew
727 | CREATE (m)-[:RUNS]->(opNew:OS:Process)-[:INSTANCE]->(vNew)
728 | DETACH DELETE op;
729 |
730 |
731 |
732 |

After the physical update has been performed, the machines will report the now updated version and the update request can be removed.

733 |
734 |
735 |
Remove resolved update requests
736 |
737 |
MATCH (m:Machine)-[update:UPDATE_TO]->(v:Version)<-[:VERSION]-(os:OS:Software)
738 | WHERE (m)-[:RUNS]->(:OS:Process)-[:INSTANCE]->(v)
739 | 
740 | DELETE update;
741 |
742 |
743 |
744 |
745 |
746 | 747 | 748 | 749 | 750 |
751 |

IT-Monitoring and Governance

752 |
753 |
754 |
755 |

Live network operations need to be supervised to ensure smooth operations, prevent bottlenecks, protect from attacks and vulnerabilities and allow maintenance planning and failure handling.

756 |
757 |
758 |

The information is either acquired by listening on network traffic and inferring running services and user and application activity combined with port-scans.

759 |
760 |
761 |

Alternatively, agents installed on the machines report the state of each server to the network or centralized databases which update the live state of the network.

762 |
763 |
764 |

Based on our existing model, those incoming messages and events can do the following:

765 |
766 |
767 |
    768 |
  • 769 |

    Create new entries for Servers, Switches, Interfaces

    770 |
  • 771 |
  • 772 |

    Track running Services via used ports and traffic

    773 |
  • 774 |
  • 775 |

    Infer user and application activity and group by network segment, source, used service

    776 |
  • 777 |
  • 778 |

    Detect abnormal operations like attacks or potential bottlenecks and issue warnings

    779 |
  • 780 |
  • 781 |

    Track violations of rules, like isolation of the DMZ, certain firewall rules etc.

    782 |
  • 783 |
784 |
785 |
786 |

Here is an example of a new connection coming in and the graph being updated accordingly. 787 | Subsequent information for that connection will be aggregated until it is closed, then the totals could be added to the general CONNECTIONS relationship between the two IPs.

788 |
789 |
790 |

We could generate some events, by having processes from some machines accessing processes from other (random) Machines.

791 |
792 |
793 |
794 |
MATCH (m:Machine) WITH collect(m) as machines
795 | WITH machines, size(machines) as len
796 | UNWIND range(1,10) as idx
797 | WITH machines[toInteger(rand()*len)] as source, machines[toInteger(rand()*len)] as target
798 | MATCH (source)-[:ROUTES]->(si:Interface)-[:EXPOSES]->(sp:Port)<-[:LISTENS]-(sourceAppProcess)-[:INSTANCE]->(sourceApp)
799 | WITH target, source,si,head(collect(sp)) as sp, sourceAppProcess,sourceApp
800 | // todo limit to first port
801 | MATCH (target)-[:ROUTES]->(ti:Interface)-[:EXPOSES]->(tp:Port)<-[:LISTENS]-(targetAppProcess)-[:INSTANCE]->(targetApp)
802 | WITH source,si,sp, sourceAppProcess,sourceApp,target,ti,head(collect(tp)) as tp, targetAppProcess, targetApp
803 | // todo limit to first port
804 | RETURN {id: randomUUID(), type:"OpenConnection",source:{ip:si.ip, port:sp.port},target:{ip:ti.ip,port:tp.port},
805 |         connection: {source:sourceApp.name, target:targetApp.name, user: "user"+toString(toInteger(rand()*1000))+"@"+source.name,
806 |         time:timestamp(), packets: 1, mtu: 1500 }} as event
807 |
808 |
809 |
810 |
811 |
:param events:
812 | [
813 | {"source":{"ip":"10.1.7.100","port":11210},"id":"3e41d6f0-fdce-48f4-9bff-818359d8f0af","target":{"ip":"10.3.3.112","port":8080},
814 |  "connection":{"source":"couchbase","target":"webapp","user":"user436@DC1-RCK-1-7-M-100","time":1490540382971},"type":"OpenConnection",
815 |  "packets": 1, "mtu": 1500, "time": 1490904418539 },
816 | {"source":{"ip":"10.1.4.91","port":7474},"id":"fed44be6-55f5-4e42-aab1-bebc5c818268","target":{"ip":"10.4.6.7","port":8080},
817 |  "connection":{"source":"neo4j","target":"webapp","user":"user911@DC1-RCK-1-4-M-91","time":1490540382971},"type":"OpenConnection",
818 |  "packets": 1, "mtu": 1500, "time": 1490904464824 }
819 | ]
820 |
821 |
822 |
823 |
824 |
UNWIND $events AS event
825 | WITH event WHERE event.type = 'OpenConnection'
826 | 
827 | MERGE (si:Interface {ip:event.source.ip})
828 | MERGE (si)-[:OPENS]->(sp:Port {port: event.source.port})
829 | 
830 | MERGE (ti:Interface {ip:event.target.ip})
831 | MERGE (ti)-[:LISTENS]->(tp:Port {port:event.target.port})
832 | 
833 | CREATE (sp)<-[:FROM]-(c:Connection {id:event.id})–[:TO]->(tp)
834 | SET c += event.connection // type, timestamp, user-info, ...
835 | MERGE (si)-[cstats:CONNECTIONS]->(ti)
836 | SET si.count = coalesce(si.count,0) + 1
837 | SET si.packets = coalesce(si.packets,0) + event.packets
838 | SET si.volume = coalesce(si.volume,0) + event.packets * event.mtu
839 |
840 |
841 |
842 |

All the information is aggregated in a live graph representation which is available for querying for alerts & notifications, dashboards, inventory summaries, reports and more.

843 |
844 |
845 |

Historic information can be stored as well as a timeline chain of changes attributed to cause. 846 | Both can be queried by operators to drill down into detailed analysis.

847 |
848 |
849 |
Connections opened over a time range
850 |
851 |
MATCH (si:Interface)-[:OPENS]->(sp:Port)<-[:FROM]-(c:Connection)–[:TO]->(tp:Port)<-[:LISTENS]-(ti:Interface)
852 | WHERE c.type = 'OpenConnection'
853 | RETURN si.ip as source, ti.ip as target, apoc.date.format(c.time,'ms','yyyy-MM-dd HH') as hour, count(distinct c) as count
854 | ORDER BY hour ASC, count DESC
855 | LIMIT 100;
856 |
857 |
858 |
859 |
860 |
861 | 862 | 863 | 864 | 865 |
866 |

Examples for graph based Network Management Solutions

867 |
868 |
869 |
870 |

A number of commercial solutions provide this kind of service, some of them are running Neo4j.

871 |
872 |
873 |

There are also open source solutions like Mercator from Lending Club and the Assimilation Project by Alan Robertson.

874 |
875 |
876 |

This real-time IT inventory information is also required for due diligence, e.g. for corporate investments, mergers or acquisitions.

877 |
878 |
879 |
880 |
881 | 882 | 883 | 884 | 885 |
886 |

Monitoring Use-Cases

887 |
888 |
889 |
890 |

Our graph contains both the static topological information and a lot of runtime information using the base topology. 891 | From the runtime data we can retrieve different metrics.

892 |
893 | 894 | 895 | 896 |

For instance, minimal, average and maximal runtimes of software instances per type

897 |
898 |
899 |
MATCH (v)<-[:INSTANCE]-(sp:Process)<-[:RUNS]-(sm:Machine)
900 | MATCH (s:Software)-[:VERSION]->(v:Version)
901 | WITH s.name as software, v.name as version, timestamp() - sp.startTime as runtime
902 | RETURN software, version, count(*) as instances, { min: min(runtime), max: max(runtime), avg:avg(runtime) } as runtime
903 |
904 |
905 | 906 | 907 | 908 |

Data Transer Volume between Interfaces

909 |
910 |
911 |
MATCH path = (source:Interface)-[con:CONNECTIONS]->(target:Interface)
912 | RETURN source.ip, target.ip, sum(con.packets) as packets, sum(con.volume) as volume
913 |
914 |
915 |
916 |
917 |
918 | 919 | 920 | 921 | 922 |
923 |

Resource Management Graph

924 |
925 |
926 |
927 |

If you use a resource manager like Apaoche Mesos (or DC/OS), Kubernetes etc. you specify for each piece of software you run not just name, version and dependencies but also resource requirements like cpu, ram, disk, ports and more.

928 |
929 |
930 |

A scheduler then takes the available resources of a configured machine cluster to schedule and allocate it’s resources to the needs and numbers of the required instances of software to run. 931 | It also takes care of health checks, and (re)starting / (re)scheduling and (re)routing of individual new or failed instances.

932 |
933 |
934 |

To model the resource graph of such a system is interesting to look at and reason about, especially if other requirements like indicated co-location or disk-reuse are taken into account.

935 |
936 |
937 |
938 |
939 | 940 | 941 | 942 | 943 |
944 |

References

945 |
946 |
947 |
948 | 990 |
991 |
992 |
993 |
994 |
995 |
-------------------------------------------------------------------------------- /relate.project-install.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "network-management", 3 | "description":"Dependency and root cause analysis and more for network and IT management", 4 | "dbms": [ 5 | { 6 | "dumpFile": "data/network-management-35.dump", 7 | "targetNeo4jVersion":">=3.5.0 <4.0.0", 8 | "plugins": ["apoc"] 9 | }, 10 | { 11 | "dumpFile": "data/network-management-40.dump", 12 | "targetNeo4jVersion": ">=4.0.0 <4.3.0", 13 | "plugins": ["apoc"] 14 | }, 15 | { 16 | "dumpFile": "data/network-management-43.dump", 17 | "targetNeo4jVersion": ">=4.3.0 <5.0.0", 18 | "plugins": ["apoc"] 19 | }, 20 | { 21 | "dumpFile": "data/network-management-50.dump", 22 | "targetNeo4jVersion": ">=5.0.0 <6.0.0", 23 | "plugins": ["apoc"] 24 | }, 25 | { 26 | "scriptFile":"scripts/network-management.cypher", 27 | "targetNeo4jVersion": ">=4.0.0 <6.0.0" 28 | } 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /scripts/network-management.cypher: -------------------------------------------------------------------------------- 1 | // Data Center 2 | CREATE (dc:DataCenter {name:"DC1",location:"Iceland, Rekjavik"})-[:CONTAINS]->(re:Router:Egress {name:"DC1-RE"}) 3 | CREATE (re)-[:ROUTES]->(:Interface {ip:"10.0.0.254"}); 4 | 5 | // Zones 6 | // The datacenter consists of 4 zones, each of which has it's own separate `Network` `10.zone.*/16`, and it's own `Router`. 7 | WITH 4 AS zones 8 | MATCH (dc:DataCenter {name:"DC1"})-[:CONTAINS]->(re:Router:Egress)-[:ROUTES]->(rei:Interface) 9 | // for each zone 10 | WITH * UNWIND range(1,zones) AS zid 11 | // create zone network 12 | CREATE (nr:Network:Zone {ip:"10."+zid, size: 16, zone:zid})<-[:CONNECTS]-(rei) 13 | // create router in DC, connect it via an interface to the zone network 14 | CREATE (dc)-[:CONTAINS]->(r:Router {name:"DC1-R-"+zid, zone:zid})-[:ROUTES]->(ri:Interface {ip:nr.ip+".0.254"})-[:CONNECTS]->(nr); 15 | 16 | 17 | // Racks 18 | WITH 10 as racks 19 | MATCH (dc:DataCenter {name:"DC1"}) 20 | MATCH (nr:Network:Zone) // one per zone 21 | 22 | WITH * UNWIND range(1,racks) AS rackid 23 | CREATE (dc)-[:CONTAINS]->(rack:Rack {name:"DC1-RCK-"+nr.zone+"-"+rackid, rack:rackid, zone:nr.zone})-[:HOLDS]->(s:Switch {ip:nr.ip+"."+rackid, rack:rackid})-[:ROUTES]->(si:Interface {ip:s.ip+".254"})<-[:ROUTES]-(nr); 24 | 25 | // Machine types 26 | // Similar to the machines you can rent on AWS we use machine types, for which we auto-create some reasonable capacities for CPU, RAM and DISK. 27 | WITH ["xs","s","m","l","xl","xxl"] as typeNames 28 | UNWIND range(0,size(typeNames)-1) as idx 29 | CREATE (t:Type {id:idx, cpu: toInteger(2^idx), ram:toInteger(4^idx), disk:toInteger(5^idx), type: typeNames[idx]}) 30 | SET t.name = typeNames[idx]+"-"+t.cpu + "/"+t.ram+"/"+t.disk 31 | RETURN t.name, t.id, t.cpu, t.ram, t.disk; 32 | 33 | 34 | // Machines 35 | // Each Rack contains 200 machines of the types we just introduced, so that in total we get 8000 servers in our datacenter. 36 | // The distribution of the types is inverse to their capabilities. 37 | MATCH (t:Type) 38 | WITH collect(t) as types, 200 as machines 39 | 40 | MATCH (rack:Rack)-[:HOLDS]->(s:Switch)-[:ROUTES]->(si:Interface) 41 | 42 | UNWIND (range(1,machines)) AS machineid 43 | CREATE (rack)-[:HOLDS]->(m:Machine {id:rack.id * 1000 + machineid, name: rack.name + "-M-" +machineid })-[:ROUTES]->(i:Interface {ip:s.ip+"."+machineid})-[:CONNECTS]->(si) 44 | WITH m,types,size(types)-toInteger(log(machines - machineid + 1)) -1 as idx 45 | WITH m, types[idx] as t 46 | CREATE (m)-[:TYPE]->(t); 47 | 48 | // Create OS and Software 49 | // https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux#Version_history 50 | // https://wiki.ubuntu.com/Releases 51 | // https://en.wikipedia.org/wiki/Debian_version_history 52 | WITH 53 | [{name:"RHEL",versions:["7.1","7.2","7.3"]},{name:"Ubuntu",versions:["14.04","16.04","16.10","17.04"]},{name:"Debian",versions:["6-Squeeze","7-Wheezy","8-Jessie"]}] as osNames, 54 | [ 55 | {name:"java",versions:["8"]}, 56 | {name:"neo4j",ports:[7474,7473,7687],versions:["3.0","3.1"],dependencies:["java/8"]}, 57 | {name:"postgres",ports:[5432],versions:["9.4","9.5","9.6"]}, 58 | {name:"couchbase",ports:[8091,8092,11207,11209,11210,11211,11214,11215,18091,18092,4369],versions:["3.0","4.0","4.5","4.6"]}, 59 | {name:"elasticsearch",ports:[9200,9300,9500,9700],versions:["2.4","5.0","5.1","5.2"],dependencies:["java/8"]} 60 | ] as services, 61 | [{name:"webserver",ports:[80,443],dependencies:["postgres/9.4"]}, 62 | {name:"crm",ports:[80,443],dependencies:["java/8","neo4j/3.1"]}, 63 | {name:"cms",ports:[8080],dependencies:["php","webserver","couchbase"]}, 64 | {name:"webapp",ports:[8080],dependencies:["java","neo4j"]}, 65 | {name:"logstash",ports:[5000],dependencies:["elasticsearch/5.2"]} 66 | ] as applications 67 | UNWIND osNames + services + applications AS sw 68 | 69 | CREATE (s:Software) SET s = sw 70 | FOREACH (sw in [x IN osNames where x.name = sw.name | x] | SET s:OS) 71 | FOREACH (sw in [x IN services where x.name = sw.name | x] | SET s:Service) 72 | FOREACH (sw in [x IN applications where x.name = sw.name | x] | SET s:Application) 73 | 74 | FOREACH (idx in range(0,size(coalesce(sw.versions,[]))-2) | 75 | MERGE (s)-[:VERSION]->(v0:Version {name:sw.versions[idx]}) 76 | MERGE (s)-[:VERSION]->(v:Version {name:sw.versions[idx+1]}) 77 | MERGE (v0)<-[:PREVIOUS]-(v) 78 | ) 79 | WITH * 80 | UNWIND sw.dependencies as dep 81 | WITH *,split(dep,"/") as parts 82 | MERGE (d:Software {name:parts[0]}) 83 | FOREACH (v IN case size(parts) when 1 then [] else [parts[1]] end | 84 | MERGE (d)-[:VERSION]->(:Version {name:v}) 85 | ) 86 | WITH * 87 | OPTIONAL MATCH (d)-[:VERSION]->(v:Version {name:parts[1]}) 88 | WITH s, coalesce(v,d) as d 89 | MERGE (s)-[:DEPENDS_ON]->(d); 90 | 91 | 92 | // Install Software 93 | create index on :Software(name); 94 | 95 | WITH [(:Software:OS)-[:VERSION]->(v) | v] as osVersions 96 | MATCH (a:Application:Software) 97 | WITH osVersions, collect(a) as apps 98 | MATCH (m:Machine)-[:ROUTES]->(i:Interface) 99 | WITH m,i, osVersions[toInteger(rand()*size(osVersions))] as os, apps[toInteger(rand()*size(apps))] as app 100 | CREATE (m)-[:RUNS]->(op:OS:Process {name:os.name, startTime:timestamp() - toInteger( (rand() * 10 + 5) *24*3600*1000)})-[:INSTANCE]->(os) 101 | CREATE (m)-[:RUNS]->(ap:Application:Process {name: app.name, pid: toInteger(rand()*10000), startTime:timestamp() - toInteger(rand() * 10*24*3600*1000) })-[:INSTANCE]->(app) 102 | 103 | FOREACH (portNo in app.ports | 104 | MERGE (port:Port {port:portNo})<-[:EXPOSES]-(i) 105 | CREATE (ap)-[:LISTENS]->(port) 106 | ) 107 | WITH * 108 | MATCH (app)-[:DEPENDS_ON]->(dep) 109 | CREATE (m)-[:RUNS]->(dp:Service:Process {name: dep.name, pid: toInteger(rand()*10000), startTime:timestamp() - toInteger(rand() * 10*24*3600*1000) })-[:INSTANCE]->(dep) 110 | CREATE (ap)-[:DEPENDS_ON]->(dp) 111 | FOREACH (portNo in dep.ports | 112 | MERGE (port:Port {port:portNo})<-[:EXPOSES]-(i) 113 | CREATE (dp)-[:LISTENS]->(port) 114 | ); --------------------------------------------------------------------------------