├── .gitignore
├── LICENSE
├── README.md
├── load.cql
└── query.cql


/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | *.csv
3 | *.Rhistory
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Nicole
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # consumer_complaints
 2 | 
 3 | This code accompanies the webinar [Using LOAD CSV in the Real World](http://watch.neo4j.org/video/112447027).
 4 | 
 5 | ## Setup
 6 | 
 7 | * Download `Consumer_Complaints.csv` [here](http://catalog.data.gov/dataset/consumer-complaint-database). Note that your .csv file might have more rows than mine did; they appear to update the data regularly.
 8 | 
 9 | * Find the arrows tool [here](http://www.apcjones.com/arrows/#).
10 | 
11 | * [Sublime Text 2](http://www.sublimetext.com/2) (the editor used in the webinar) along with the [Cypher plugin](https://github.com/kollhof/sublime-cypher).
12 | 
13 | ## Import
14 | 
15 | - Change line 1 of `load.cql` to point to the location of your `Consumer_Complaints.csv` file.
16 |     - OSX and Unix: `file:///path/to/Consumer_Complaints.csv`
17 |     - Windows: `file:C:/path/to/Consumer_Complaints.csv`
18 | 
19 | - Send `load.cql` to the `neo4j-shell`: `./bin/neo4j-shell -file load.cql`
20 | 
21 | **Important**
22 | Note that this import was done on a 16GB machine. If you have less RAM and particularly if you are on Windows, please see these blog posts:
23 | 
24 | * http://jexp.de/blog/2014/06/load-csv-into-neo4j-quickly-and-successfully/
25 | * http://www.markhneedham.com/blog/2014/10/23/neo4j-cypher-avoiding-the-eager/
26 | * http://jexp.de/blog/2014/10/load-cvs-with-success/
27 | 
28 | Also note that in the original webinar, I split `Consumer_Complaints.csv` into separate files to deal with rows with empty strings. Since then, I've modified the script to only use `Consumer_Complaints.csv` but with empty strings filtered out for the subissues and subproducts:
29 | 
30 | ```
31 | FROM {FILEPATH} AS line
32 | WITH line WHERE line.`Sub-issue` <> ''
33 | ```
34 | 
35 | ```
36 | FROM {FILEPATH} AS line
37 | WITH line WHERE line.`Sub-product` <> ''
38 | ```
39 | 
40 | ## Query
41 | 
42 | Run all the example queries:
43 | 
44 | ```
45 | ./bin/neo4j-shell -file query.cql
46 | ```
47 | 


--------------------------------------------------------------------------------
/load.cql:
--------------------------------------------------------------------------------
 1 | export FILEPATH=file:///Users/nicolewhite/Consumer_Complaints.csv
 2 | 
 3 | // Complaints, companies, responses.
 4 | 
 5 | 	// Uniqueness constraints.
 6 | CREATE CONSTRAINT ON (c:Complaint) ASSERT c.id IS UNIQUE;
 7 | CREATE CONSTRAINT ON (c:Company) ASSERT c.name IS UNIQUE;
 8 | CREATE CONSTRAINT ON (r:Response) ASSERT r.name IS UNIQUE;
 9 | 
10 | 	// Load.
11 | USING PERIODIC COMMIT
12 | LOAD CSV WITH HEADERS 
13 | FROM {FILEPATH} AS line
14 | WITH DISTINCT line, SPLIT(line.`Date received`, '/') AS date
15 | WHERE line.`Company response to consumer` IS NOT NULL AND
16 |       line.Company IS NOT NULL
17 | 
18 | CREATE (complaint:Complaint { id: TOINT(line.`Complaint ID`) })
19 | SET complaint.year = TOINT(date[2]),
20 |     complaint.month = TOINT(date[0]),
21 |     complaint.day = TOINT(date[1])
22 | 
23 | MERGE (company:Company { name: UPPER(line.Company) })
24 | MERGE (response:Response { name: UPPER(line.`Company response to consumer`) })
25 | 
26 | CREATE (complaint)-[:AGAINST]->(company)
27 | CREATE (response)-[r:TO]->(complaint)
28 | 
29 | SET r.timely = CASE line.`Timely response?` WHEN 'Yes' THEN true ELSE false END,
30 |     r.disputed = CASE line.`Consumer disputed?` WHEN 'Yes' THEN true ELSE false END
31 | ;
32 | 
33 | // Products, issues.
34 | 
35 | 	// Uniqueness constraints.
36 | CREATE CONSTRAINT ON (p:Product) ASSERT p.name IS UNIQUE;
37 | CREATE CONSTRAINT ON (i:Issue) ASSERT i.name IS UNIQUE;
38 | 
39 | 	// Load.
40 | USING PERIODIC COMMIT
41 | LOAD CSV WITH HEADERS 
42 | FROM {FILEPATH} AS line
43 | WITH line
44 | WHERE line.Product IS NOT NULL AND
45 |       line.Issue IS NOT NULL
46 | 
47 | MATCH (complaint:Complaint { id: TOINT(line.`Complaint ID`) })
48 | 
49 | MERGE (product:Product { name: UPPER(line.Product) })
50 | MERGE (issue:Issue {name: UPPER(line.Issue) })
51 | 
52 | CREATE (complaint)-[:ABOUT]->(product)
53 | CREATE (complaint)-[:WITH]->(issue)
54 | ;
55 | 
56 | // Sub issues, sub products.
57 | 
58 | 	// Uniqueness constraints.
59 | CREATE CONSTRAINT ON (s:SubProduct) ASSERT s.name IS UNIQUE;
60 | CREATE CONSTRAINT ON (s:SubIssue) ASSERT s.name IS UNIQUE;
61 | 
62 | 	// Load.
63 | USING PERIODIC COMMIT
64 | LOAD CSV WITH HEADERS
65 | FROM {FILEPATH} AS line
66 | WITH line 
67 | WHERE line.`Sub-issue` <> '' AND
68 |       line.`Sub-issue` IS NOT NULL
69 | 
70 | MATCH (complaint:Complaint { id: TOINT(line.`Complaint ID`) })
71 | MATCH (complaint)-[:WITH]->(issue:Issue)
72 | 
73 | MERGE (subIssue:SubIssue { name: UPPER(line.`Sub-issue`) })
74 | MERGE (subIssue)-[:IN_CATEGORY]->(issue)
75 | CREATE (complaint)-[:WITH]->(subIssue)
76 | ;
77 | 
78 | USING PERIODIC COMMIT
79 | LOAD CSV WITH HEADERS
80 | FROM {FILEPATH} AS line
81 | WITH line 
82 | WHERE line.`Sub-product` <> '' AND
83 |       line.`Sub-product` IS NOT NULL
84 | 
85 | MATCH (complaint:Complaint { id: TOINT(line.`Complaint ID`) })
86 | MATCH (complaint)-[:ABOUT]->(product:Product)
87 | 
88 | MERGE (subProduct:SubProduct { name: UPPER(line.`Sub-product`) })
89 | MERGE (subProduct)-[:IN_CATEGORY]->(product)
90 | CREATE (complaint)-[:ABOUT]->(subProduct)
91 | ;
92 | 


--------------------------------------------------------------------------------
/query.cql:
--------------------------------------------------------------------------------
 1 | // Top types of responses that are disputed.
 2 | MATCH (r:Response)-[:TO {disputed:true}]->(:Complaint)
 3 | RETURN r.name AS response, COUNT(*) AS count
 4 | ORDER BY count DESC;
 5 | 
 6 | // Companies with the most disputed responses.
 7 | MATCH (:Response)-[:TO {disputed:true}]->(complaint:Complaint)
 8 | MATCH (complaint)-[:AGAINST]->(company:Company)
 9 | RETURN company.name AS company, COUNT(*) AS count
10 | ORDER BY count DESC
11 | LIMIT 10;
12 | 
13 | // All issues.
14 | MATCH (i:Issue)
15 | RETURN i.name AS issue
16 | ORDER BY issue;
17 | 
18 | // All sub-issues within the 'communication tactics' issue.
19 | MATCH (i:Issue {name:'COMMUNICATION TACTICS'})
20 | MATCH (sub:SubIssue)-[:IN_CATEGORY]->(i)
21 | RETURN sub.name AS subissue
22 | ORDER BY subissue;
23 | 
24 | // Top products and sub-products associated with the obscene / abusive language sub-issue.
25 | MATCH (subIssue:SubIssue {name:'USED OBSCENE/PROFANE/ABUSIVE LANGUAGE'})
26 | MATCH (complaint:Complaint)-[:WITH]->(subIssue)
27 | MATCH (complaint)-[:ABOUT]->(p:Product)
28 | OPTIONAL MATCH (complaint)-[:ABOUT]->(sub:SubProduct)
29 | RETURN p.name AS product, sub.name AS subproduct, COUNT(*) AS count
30 | ORDER BY count DESC;
31 | 
32 | // Top company associated with the obscene / abusive language sub-issue.
33 | MATCH (subIssue:SubIssue {name:'USED OBSCENE/PROFANE/ABUSIVE LANGUAGE'})
34 | MATCH (complaint:Complaint)-[:WITH]->(subIssue)
35 | MATCH (complaint)-[:AGAINST]->(company:Company)
36 | RETURN company.name AS company, COUNT(*) AS count
37 | ORDER BY count DESC
38 | LIMIT 10;
39 | 
40 | // Top product, issue combinations with disputed responses at Wells Fargo.
41 | MATCH (wf:Company {name:'WELLS FARGO'})
42 | MATCH (complaint:Complaint)-[:AGAINST]->(wf)
43 | MATCH (:Response)-[:TO {disputed:true}]->(complaint)
44 | MATCH (complaint)-[:ABOUT]->(p:Product)
45 | MATCH (complaint)-[:WITH]->(i:Issue)
46 | RETURN p.name AS product, i.name AS issue, COUNT(*) AS count
47 | ORDER BY count DESC;
48 | 
49 | // Sub-products that belong to multiple product categories.
50 | MATCH (sub:SubProduct)-[:IN_CATEGORY]->(p:Product)
51 | WITH sub, COLLECT(p) AS products
52 | WHERE LENGTH(products) > 1
53 | RETURN sub, products;
54 | 
55 | // Sub-issues that belong to multiple issue categories.
56 | MATCH (sub:SubIssue)-[:IN_CATEGORY]->(i:Issue)
57 | WITH sub, COLLECT(i) AS issues
58 | WHERE LENGTH(issues) > 1
59 | RETURN sub, issues;
60 | 


--------------------------------------------------------------------------------