├── .gitignore
├── All_Mooc
├── Course_csv
│ ├── Coursera_flat.csv
│ ├── Mooc_merge.csv
│ └── ToCsv.py
├── Cypher
│ └── Mooc_merge_cypher.txt
├── Link_Course
│ ├── Catalogue_sim.json
│ ├── Link_course.py
│ ├── inter_link.json
│ ├── inter_link_ID.csv
│ ├── inter_link_Nam.csv
│ └── similarity_compute.ipynb
├── Mooc_json
│ ├── Catalogue_course.json
│ ├── Coursera_flat.json
│ ├── Mooc_merge.json
│ └── Unify.py
└── Program_order
│ ├── Program
│ └── Catalogue.json
│ └── Specialization
│ ├── Coursera_data.json
│ ├── Spe_csv_str.json
│ ├── Spe_lst.json
│ ├── Specialization.csv
│ ├── no_duplicate?
│ ├── Specialization2.csv
│ └── Specialization3.json
│ └── process.py
├── Catalogue
├── Catalogue.json
├── Catalogue_course.json
├── Flatten_course.ipynb
└── USCCatalog_wrapper.ipynb
├── CoursePlanner.pdf
├── Coursera
├── Coursera_data.json
├── Meta_data
│ ├── Coursera.key
│ ├── Coursera_sample.json
│ └── Page_RDF.json
└── Src
│ ├── Coursera_driver.py
│ ├── Crawl_Coursera.py
│ ├── geckodriver.log
│ └── troubleshoot.md
├── Jupyter
├── .ipynb_checkpoints
│ ├── TopoSort_UserInterface-checkpoint.ipynb
│ ├── UserInterface-Copy1-checkpoint.ipynb
│ └── test-checkpoint.ipynb
├── Catalogue.csv
├── Catalogue_sim.csv
├── Mooc_merge.csv
├── TopoSort_UserInterface.ipynb
├── Untitled.ipynb
├── UserInterface-Copy1.ipynb
├── __pycache__
│ └── neo.cpython-36.pyc
├── assets
│ └── index.html
├── cypher.py
├── data (2).json
├── integration.py
├── neo.py
├── test.ipynb
├── toposort.py
└── tpsort.py
├── Khan
├── khan_data.json
└── khan_driver.py
├── README.md
├── Udacity
├── Src
│ ├── Process_endpoint.py
│ └── udacity_endpoint.json
├── Udacity_wrapper
│ └── driver_udacity.py
└── udacity_data.json
├── edX
├── allcourse.html
├── edX.csv
├── edX.json
├── edx_wrapper.ipynb
├── sample.json
└── sample_cypher_edX
├── neo4jupyter
├── UserInterface.ipynb
├── assets
│ └── index.html
└── neo.py
└── rpedsel
├── Catalogue.csv
├── Catalogue_sim.csv
├── Cypher
└── Mooc_merge.csv
/.gitignore:
--------------------------------------------------------------------------------
1 | /.ipynb_checkpoints/
--------------------------------------------------------------------------------
/All_Mooc/Course_csv/Mooc_merge.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rpedsel/CoursePlanner/7e1aae619740fc6c8eb36b7cb6983aa984e5d503/All_Mooc/Course_csv/Mooc_merge.csv
--------------------------------------------------------------------------------
/All_Mooc/Course_csv/ToCsv.py:
--------------------------------------------------------------------------------
1 | import json
2 | import pandas as pd
3 | import sys
4 | reload(sys)
5 | sys.setdefaultencoding('utf8')
6 |
7 | df = pd.read_json("Coursera_flat.json")
8 | print(df)
9 | df.to_csv('Coursera_flat.csv')
10 |
11 | df = pd.read_json("Catalogue_course.json")
12 | print(df)
13 | df.to_csv('Catalogue_course.csv')
14 |
15 | df = pd.read_json("Mooc_merge.json")
16 | print(df)
17 | df.to_csv('Mooc_merge.csv')
18 | values = json.load(open("Catalogue_course.json"))
19 | import csv
20 | with open("Catalogue_course.csv", "wb") as f:
21 | wr = csv.writer(f)
22 | for data in values:
23 | for key, value in data.iteritems():
24 | wr.writerow([",".join([v.encode("utf-8") for v in value]) if isinstance(value, list) else value.encode("utf8")])
25 |
--------------------------------------------------------------------------------
/All_Mooc/Cypher/Mooc_merge_cypher.txt:
--------------------------------------------------------------------------------
1 | *Delete all nodes and edges*
2 | `match (n)-[r]-() delete n, r`
3 |
4 | *Delete all nodes which have no edges*
5 | `match (n) delete n`
6 |
7 | load csv with headers from "file:///Mooc_merge.csv" as row WITH row
8 | where row.id is not null and row.course_name is not null and row.provenance is not null and row.description is not null
9 | and row.course_url is not null and row.image_url is not null
10 | merge (course:Mooc {Id:row.id, Name:row.course_name, Description:row.description, URL: row.course_url, Img:row.image_url})
11 | merge (provenance:Provenance{name:row.provenance})
12 | merge (course) - [:HostedBy] -> (provenance);
13 |
14 | load csv with headers from "file:///Mooc_merge.csv" as row WITH row
15 | where row.id is not null and row.course_name is not null and row.provenance is not null and row.description is not null
16 | and row.course_url is not null and row.image_url is not null
17 | merge (course:Mooc {Id: row.id, Name: row.course_name, Description: row.description, URL: row.course_url, Img: row.image_url})
18 | with row where row.special_id is not null
19 | merge (topic:Topic{id:row.special_id})
20 | merge (course) - [:OfTopic] -> (topic);
21 |
22 | load csv with headers from "file:///Mooc_merge.csv" as row WITH row
23 | where row.id is not null and row.course_name is not null and row.provenance is not null and row.description is not null
24 | and row.course_url is not null and row.image_url is not null
25 | merge (course:Mooc {Id: row.id,Name: row.course_name, Description: row.description, URL: row.course_url, Img: row.image_url})
26 | with row where row.subject is not null
27 | foreach (subjectName in split(row.subject,";") |
28 | merge (subject:Subject{name:subjectName})
29 | merge (course) - [:OfSubject] -> (subject));
30 |
31 | //有时会不同步,强制一下属性
32 | load csv with headers from "file:///Mooc_merge.csv" as row WITH row match (course:Mooc) where row.course_id = course.Id set course.Name = row.course_name and course.Description = row.description and course.URL = row.course_url and course.Img = row.image_url;
33 |
34 | //改node名称 Course->Mooc
35 | MATCH (course:Course)
36 | SET course:Mooc
37 | REMOVE course:Course
38 |
39 | //实现join, Topic has id(special_id), Name
40 | load csv with headers from "file:///Specialization.csv" as row WITH row where row.name is not null match (topic:Topic) where row.special_id = topic.id set topic.Name = row.name;
41 |
42 | ///import inter_link.csv
43 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row
44 | where row.course_id is not null and row.preparation is not null
45 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name})
46 | foreach (pre_course in split(row.preparation,";") |
47 | merge (preparation:Preparation{Id:pre_course})
48 | merge (preparation)-[:PreparationOf]->(c));
49 |
50 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row
51 | where row.course_id is not null And row.duplicate is not null
52 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name})
53 | foreach (dup_course in split(row.duplicate,";") |
54 | merge (duplicate:Duplicate{Id:dup_course})
55 | merge (duplicate)-[:SameContentAs]->(c));
56 |
57 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row
58 | where row.course_id is not null And row.crosslist is not null
59 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name})
60 | foreach (nick_name in split(row.crosslist,";") |
61 | merge (cross:Cross{Id:nick_name})
62 | merge (cross)-[:KnowAlso]->(c))
63 |
64 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row
65 | where row.course_id is not null
66 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name})
67 | with row where row.prerequisite is not null
68 | foreach (pre_course in split(row.prerequisite,";") |
69 | merge (p:Prerequisite{Id:pre_course})
70 | merge (p)-[:PrerequisiteOf]->(c))
71 |
72 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row
73 | where row.course_id is not null and row.corequisite is not null
74 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name})
75 | foreach (co_course in split(row.corequisite,";") |
76 | merge (corequisite:Corequisite{Id:co_course})
77 | merge (corequisite)-[:CorequisiteOf]->(c))
78 |
79 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row
80 | where row.course_id is not null and row.similar_mooc is not null
81 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name})
82 | foreach (mooc in split(row.similar_mooc,";") |
83 | merge (m:Mooc{Id:mooc})
84 | merge (c)-[:CorrespondingTo]->(m));
85 |
--------------------------------------------------------------------------------
/All_Mooc/Link_Course/Link_course.py:
--------------------------------------------------------------------------------
1 | import json
2 | import pandas as pd
3 | catalogue = json.load(open("Catalogue_sim.json"))
4 | inter_link = []
5 |
6 | # for course in catalogue:
7 | # if len(course["prerequisite"]) > 0 or len(course["preparation"]) > 0\
8 | # or len(course["corequisite"]) > 0 or len(course["duplicate"]) > 0\
9 | # or len(course["crosslist"]) > 0:
10 | # del course["description"]
11 |
12 | # if len(course["prerequisite"]) >= 1:
13 | # se1 = course["prerequisite"]
14 | # course["prerequisite"] = [i.encode('UTF8') for i in dict.fromkeys(se1).keys()]
15 |
16 | # if len(course["preparation"]) >= 1:
17 | # se2 = course["preparation"]
18 | # course["preparation"] = [i.encode('UTF8') for i in dict.fromkeys(se2).keys()]
19 |
20 | # if len(course["corequisite"]) >= 1:
21 | # se3 = course["corequisite"]
22 | # course["corequisite"] = [i.encode('UTF8') for i in dict.fromkeys(se3).keys()]
23 |
24 | # if len(course["duplicate"]) >= 1:
25 | # se4 = course["duplicate"]
26 | # course["duplicate"] = [i.encode('UTF8') for i in dict.fromkeys(se4).keys()]
27 |
28 | # if len(course["crosslist"]) >= 1:
29 | # se5 = course["crosslist"]
30 | # course["crosslist"] = [i.encode('UTF8') for i in dict.fromkeys(se5).keys()]
31 |
32 | # empty = []
33 | # for key in course:
34 | # if len(course[key]) == 0:
35 | # empty.append(key)
36 | # # if len(course[key]) == 1:
37 | # # course[key] = course[key][0]
38 | # if len(course["similarity"]) != 0:
39 | # course["similar_mooc"] = []
40 | # for item in course["similarity"]:
41 | # course["similar_mooc"] += [item[0]]
42 | # print course["similar_mooc"]
43 | # list = course["similar_mooc"]
44 | # course["similar_mooc"] = ', '.join([i.encode('UTF8') for i in list])
45 | # del course["similarity"]
46 | # for key in empty:
47 | # del course[key]
48 | # inter_link.append(course)
49 |
50 | # print len(inter_link)
51 | # # print inter_link
52 | # with open("inter_link0.json", "a") as f:
53 | # json.dump(inter_link, f)
54 |
55 |
56 |
57 |
58 | for course in catalogue:
59 | if len(course["prerequisite"]) > 0 or len(course["preparation"]) > 0\
60 | or len(course["corequisite"]) > 0 or len(course["duplicate"]) > 0\
61 | or len(course["crosslist"]) > 0:
62 | del course["description"]
63 |
64 | if len(course["prerequisite"]) > 1:
65 | se1 = course["prerequisite"]
66 | course["prerequisite"] = ', '.join([i.encode('UTF8') for i in dict.fromkeys(se1).keys()])
67 |
68 | if len(course["preparation"]) > 1:
69 | se2 = course["preparation"]
70 | course["preparation"] = ', '.join([i.encode('UTF8') for i in dict.fromkeys(se2).keys()])
71 |
72 | if len(course["corequisite"]) > 1:
73 | se3 = course["corequisite"]
74 | course["corequisite"] = ', '.join([i.encode('UTF8') for i in dict.fromkeys(se3).keys()])
75 |
76 | if len(course["duplicate"]) > 1:
77 | se4 = course["duplicate"]
78 | course["duplicate"] = ', '.join([i.encode('UTF8') for i in dict.fromkeys(se4).keys()])
79 |
80 | if len(course["crosslist"]) > 1:
81 | se5 = course["crosslist"]
82 | course["crosslist"] = ', '.join([i.encode('UTF8') for i in dict.fromkeys(se5).keys()])
83 |
84 | empty = []
85 | for key in course:
86 | if len(course[key]) == 0:
87 | empty.append(key)
88 | if len(course["similarity"]) != 0:
89 | course["similar_mooc"] = []
90 | for item in course["similarity"]:
91 | course["similar_mooc"] += [item[0]]
92 | print course["similar_mooc"]
93 | list = course["similar_mooc"]
94 | course["similar_mooc"] = ', '.join([i.encode('UTF8') for i in list])
95 | del course["similarity"]
96 | for key in empty:
97 | del course[key]
98 | inter_link.append(course)
99 |
100 | for course in inter_link:
101 | for key in course:
102 | if len(course[key]) == 1:
103 | course[key] = course[key][0]
104 |
105 | print len(inter_link)
106 | # print inter_link
107 | with open("inter_link.json", "a") as f:
108 | json.dump(inter_link, f)
109 | import sys
110 | reload(sys)
111 | sys.setdefaultencoding('utf8')
112 |
113 | df = pd.read_json("inter_link.json")
114 | # print(df)
115 | df.to_csv('inter_link.csv')
116 |
--------------------------------------------------------------------------------
/All_Mooc/Link_Course/inter_link_Nam.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rpedsel/CoursePlanner/7e1aae619740fc6c8eb36b7cb6983aa984e5d503/All_Mooc/Link_Course/inter_link_Nam.csv
--------------------------------------------------------------------------------
/All_Mooc/Mooc_json/Unify.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | # with open('Coursera_data.json', 'r') as f:
4 | # json.load()
5 |
6 | coursera = json.load(open('Coursera_data.json'))
7 | edX = json.load(open('edX.json'))
8 | khan = json.load(open('khan_data.json'))
9 | udacity = json.load(open('udacity_data.json'))
10 | print len(coursera), len(edX), len(khan), len(udacity)
11 |
12 | # id in 3 digits letter + 5 digits number
13 | # remove unicode, starting words
14 | def Format_desc(s):
15 | # s = item[unicode("description")]
16 | # s = s.replace("About this course: ", "")
17 | # s = s.replace("About the Course", "")
18 | Useless = ["About the Course", "about this course ", \
19 | "About this Course\n\n", "This course",\
20 | "About this course: ", "In this course",\
21 | "in this course", "this course", "Welcome to "\
22 | "\n\n", " \n", " \n\n", "\n\t", "This is a course",\
23 | "In this course,", "in this course,", "About the course"]
24 | # s = s.replace("In this course", "")
25 | # s = s.replace("in this course", "")
26 | for ul in Useless:
27 | s = s.replace(ul, "")
28 | s = s.encode('ascii','ignore')
29 | s = s.encode('ascii','replace')
30 | return s.strip()
31 |
32 |
33 | csr = []
34 | spe_id = 0
35 | for item in coursera:
36 | # if unicode("provenance") not in item:
37 | # print item
38 | # else:
39 | # print item[unicode("provenance")]
40 | if unicode("courseSet") in item:
41 | # if len(item[unicode("courseSet")]) > 1:
42 | del item[unicode("description")]
43 | spe_id += 1
44 | list = item[unicode("courseSet")]
45 | for course in list:
46 | course["img"] = item[unicode("img")]
47 | if unicode("provenance") in item:
48 | course["provenance"] = item[unicode("provenance")]
49 | else:
50 | course["provenance"] = "cousera"
51 | course[unicode("course_url")] = item[unicode("course_url")]
52 | course[unicode("special_id")] = "spc" + str(spe_id).zfill(5)
53 | original = course[unicode("id")]
54 | course["id"] = "csr" + original[-5:]
55 | s = course[unicode("description")]
56 | # s = s.replace("About this course: ", "")
57 | # s = s.replace("About the Course", "")
58 | del course[unicode("description")]
59 | course["description"] = Format_desc(s)
60 | csr.append(course)
61 | else:
62 | original = item[unicode("id")]
63 | item[unicode("id")] = "csr" + original[-5:]
64 | if unicode("description") in item:
65 | s = item[unicode("description")]
66 | item["description"] = Format_desc(s)
67 | else:
68 | item["description"] = "Empty"
69 | del item["specialization"]
70 | csr.append(item)
71 |
72 | print len(csr)
73 | with open("Coursera_flat.json", "a") as f:
74 | json.dump(csr, f)
75 |
76 |
77 | khn = []
78 | for item in khan:
79 | original = item[unicode("id")]
80 | item[unicode("id")] = "khn" + original[-5:]
81 | if unicode("description") in item:
82 | s = item[unicode("description")]
83 | item["description"] = Format_desc(s)
84 | else:
85 | item["description"] = "Empty"
86 | khn.append(item)
87 |
88 | udc = []
89 | for item in udacity:
90 | original = item[unicode("id")]
91 | item[unicode("id")] = "udc" + original[-5:]
92 | if unicode("description") in item:
93 | s = item[unicode("description")]
94 | item["description"] = Format_desc(s)
95 | else:
96 | item["description"] = "Empty"
97 | udc.append(item)
98 | print len(udc), len(khn)
99 |
100 | Mooc_merge = csr + edX + khn + udc
101 | print len(Mooc_merge)
102 |
103 | with open("Mooc_merge.json", "a") as f:
104 | json.dump(Mooc_merge, f)
105 |
106 |
107 |
108 |
109 |
110 |
--------------------------------------------------------------------------------
/All_Mooc/Program_order/Specialization/Specialization.csv:
--------------------------------------------------------------------------------
1 | special_id,name,provenance,courseList
2 | spc00001,Data Science,Johns Hopkins University,"csr00001, csr00002, csr00003, csr00004, csr00005, csr00006, csr00007, csr00008, csr00009, csr00010"
3 | spc00002,Data Science Math Skills,Duke University,"csr00011, csr00012, csr00013, csr00014, csr00015"
4 | spc00003,Data Analysis and Presentation Skills: the PwC Approach,PwC,"csr00016, csr00017, csr00018, csr00019, csr00020"
5 | spc00004,Data Visualization with Tableau,"University of California, Davis","csr00021, csr00022, csr00023, csr00024, csr00025, csr00026"
6 | spc00005,A Crash Course in Data Science,Johns Hopkins University,"csr00027, csr00028, csr00029, csr00030, csr00031, csr00032"
7 | spc00006,Data Mining ,University of Illinois at Urbana-Champaign,"csr00033, csr00034, csr00035, csr00036, csr00037"
8 | spc00007,Big Data,"University of California, San Diego","csr00038, csr00039, csr00040, csr00041, csr00042"
9 | spc00008,Applied Data Science with Python,University of Michigan,"csr00043, csr00044, csr00045, csr00046, csr00047"
10 | spc00009,Data Analysis and Interpretation,Wesleyan University,"csr00048, csr00049, csr00050, csr00051, csr00052, csr00053"
11 | spc00010,Big Data for Data Engineers,Yandex,"csr00054, csr00055, csr00056, csr00057, csr00058"
12 | spc00011,Data Structures and Algorithms,"University of California, San Diego, Higher School of Economics","csr00059, csr00060, csr00061, csr00062, csr00063, csr00064, csr00065, csr00066"
13 | spc00012,Executive Data Science,Johns Hopkins University,"csr00067, csr00068, csr00069, csr00070, csr00071"
14 | spc00013,Genomic Data Science,Johns Hopkins University,"csr00072, csr00073, csr00074, csr00075, csr00076, csr00077, csr00078"
15 | spc00014,Data Warehousing for Business Intelligence,University of Colorado System,"csr00079, csr00080, csr00081, csr00082"
16 | spc00015,Survey Data Collection and Analytics ,"University of Michigan, University of Maryland, College Park","csr00083, csr00084, csr00085, csr00086, csr00087"
17 | spc00016,Master of Computer Science in Data Science (MCS-DS),,"csr00153, csr00154, csr00155, csr00156"
18 | spc00017,"Algorithms, Part I",Princeton University,"csr00157, csr00158, csr00159, csr00160, csr00161, csr00162"
19 | spc00018,"Algorithms, Part II",Princeton University,"csr00163, csr00164, csr00165, csr00166, csr00167"
20 | spc00019,Master of Computer Science in Data Science (MCS-DS),,"csr00186, csr00187, csr00188, csr00189, csr00190"
21 | spc00020,Object Oriented Java Programming: Data Structures and Beyond,"University of California, San Diego","csr00191, csr00192, csr00193, csr00194, csr00195, csr00196, csr00197"
22 | spc00021,Convolutional Neural Networks,deeplearning.ai,"csr00198, csr00199, csr00200, csr00201"
23 | spc00022,Master of Computer Science in Data Science (MCS-DS),,"csr00239, csr00240, csr00241, csr00242"
24 | spc00023, (MCS-DS),,"csr00262, csr00263, csr00264, csr00265, csr00266"
25 | spc00024,Pattern Discovery in Data Mining,University of Illinois at Urbana-Champaign,"csr00267, csr00268, csr00269, csr00270, csr00271, csr00272, csr00273, csr00274, csr00275, csr00276"
26 | spc00025,Command Line Tools for Genomic Data Science,Johns Hopkins University,"csr00277, csr00278, csr00279, csr00280"
27 | spc00026,Internet Giants: The Law and Economics of Media Platforms,The University of Chicago,"csr00281, csr00282, csr00283, csr00284, csr00285, csr00286"
28 | spc00027,"Nanotechnology and Nanosensors, Part1",Technion - Israel Institute of Technology,"csr00287, csr00288, csr00289, csr00290, csr00291"
29 | spc00028,Master of Computer Science in Data Science (MCS-DS),,"csr00297, csr00298, csr00299, csr00300"
30 | spc00029,Introduction to MongoDB,MongoDB Inc.,"csr00301, csr00302, csr00303, csr00304, csr00305, csr00306"
31 | spc00030,Neural Networks and Deep Learning,deeplearning.ai,"csr00307, csr00308, csr00309, csr00310, csr00311"
32 | spc00031, (MCS-DS),,"csr00312, csr00313, csr00314, csr00315"
33 | spc00032,Spanish Vocabulary: Meeting People,"University of California, Davis","csr00316, csr00317, csr00318, csr00319, csr00320"
34 | spc00033,Master of Computer Science in Data Science (MCS-DS),,"csr00336, csr00337, csr00338, csr00339, csr00340, csr00341"
35 | spc00034,Ruby on Rails Web Development,Johns Hopkins University,"csr00342, csr00343, csr00344"
36 | spc00035,Genome Assembly Programming Challenge,"University of California, San Diego, Higher School of Economics","csr00345, csr00346, csr00347, csr00348, csr00349, csr00350"
37 | spc00036,"Parallel, Concurrent, and Distributed Programming in Java",Rice University,"csr00351, csr00352, csr00353, csr00354, csr00355"
38 | spc00037,A Crash Course in Data Science,Johns Hopkins University,"csr00356, csr00357, csr00358, csr00359, csr00360"
39 | spc00038,Software Design Threats and Mitigations,University of Colorado System,"csr00361, csr00362, csr00363, csr00364, csr00365"
40 | spc00039,Character Design for Video Games,California Institute of the Arts,"csr00366, csr00367, csr00368, csr00369, csr00370"
41 | spc00040,Master of Computer Science in Data Science (MCS-DS),,"csr00379, csr00380, csr00381, csr00382"
42 | spc00041,From Data to Insights with Google Cloud Platform,Google Cloud,"csr00383, csr00384, csr00385, csr00386"
43 | spc00042,Real-Time Cyber Threat Detection and Mitigation,New York University Tandon School of Engineering,"csr00387, csr00388, csr00389, csr00390, csr00391, csr00392, csr00393"
44 | spc00043,Secure Software Design,University of Colorado System,"csr00394, csr00395, csr00396, csr00397, csr00398"
45 | spc00044,Structuring Machine Learning Projects,deeplearning.ai,"csr00399, csr00400, csr00401, csr00402"
46 | spc00045,Internet of Things: How did we get here?,"University of California, San Diego","csr00403, csr00404, csr00405, csr00406, csr00407"
47 | spc00046,,Ludwig-Maximilians-Universitt Mnchen (LMU),"csr00408, csr00409, csr00410, csr00411, csr00412"
48 | spc00047,"Trabaja inteligentemente, no ms duro: Gestin del tiempo para la productividad personal y profesional ","University of California, Irvine","csr00413, csr00414, csr00415, csr00416"
49 | spc00048, (MCS-DS),,"csr00418, csr00419, csr00420, csr00421, csr00422, csr00423, csr00424, csr00425, csr00426, csr00427"
50 | spc00049,Digital Manufacturing & Design Technology ,"The State University of New York, University at Buffalo","csr00428, csr00429, csr00430, csr00431, csr00432, csr00433"
51 | spc00050,Internet of Things,"University of California, San Diego","csr00434, csr00435, csr00436, csr00437"
52 | spc00051,Architecting with Google Cloud Platform,Google Cloud,"csr00438, csr00439, csr00440, csr00441, csr00442"
53 | spc00052,Introduction to Discrete Mathematics for Computer Science,"Higher School of Economics, University of California, San Diego","csr00443, csr00444, csr00445, csr00446, csr00447"
54 | spc00053,Android App Development,Vanderbilt University,"csr00523, csr00524, csr00525, csr00526, csr00527"
55 | spc00054,Java Programming and Software Engineering Fundamentals,Duke University,"csr00562, csr00563, csr00564, csr00565, csr00566"
56 | spc00055,Object Oriented Java Programming: Data Structures and Beyond,"University of California, San Diego","csr00567, csr00568, csr00569, csr00570, csr00571"
57 | spc00056,Learn to Program: The Fundamentals,University of Toronto,"csr00572, csr00573, csr00574"
58 | spc00057,"Algorithms, Part I",Princeton University,"csr00575, csr00576, csr00577, csr00578"
59 | spc00058,Web Applications for Everybody,University of Michigan,"csr00595, csr00596, csr00597, csr00598"
60 | spc00059,Building Web Applications in PHP,University of Michigan,"csr00599, csr00600, csr00601, csr00602, csr00603"
61 | spc00060,Python for Everybody,University of Michigan,"csr00604, csr00605, csr00606, csr00607, csr00608"
62 | spc00061,Using Python to Access Web Data,University of Michigan,"csr00609, csr00610, csr00611, csr00612, csr00613, csr00614"
63 | spc00062,Web Application Development: Basic Concepts,University of New Mexico,"csr00615, csr00616, csr00617, csr00618, csr00619"
64 | spc00063,Web Design for Everybody (Basics of Web Development and Coding),University of Michigan,"csr00620, csr00621, csr00622, csr00623, csr00624, csr00625"
65 | spc00064,Introduction to Discrete Mathematics for Computer Science,"Higher School of Economics, University of California, San Diego","csr00640, csr00641, csr00642, csr00643, csr00644"
66 | spc00065,Mathematical Foundations for Cryptography,University of Colorado System,"csr00645, csr00646, csr00647, csr00648"
67 | spc00066,Applied Cryptography,University of Colorado System,"csr00667, csr00668, csr00669, csr00670"
68 | spc00067,Cryptographic Hash and Integrity Protection,University of Colorado System,"csr00671, csr00672, csr00673, csr00674, csr00675, csr00676, csr00677"
69 | spc00068,Global Challenges in Business,University of Illinois at Urbana-Champaign,"csr00678, csr00679, csr00680, csr00681, csr00682, csr00683"
70 | spc00069,Vital Signs: Understanding What the Body Is Telling Us,University of Pennsylvania,"csr00684, csr00685, csr00686, csr00687, csr00688"
71 | spc00070,Responsive Website Development and Design,"University of London, Goldsmiths, University of London","csr00689, csr00690, csr00691, csr00692, csr00693"
72 | spc00071,Practical Machine Learning,Johns Hopkins University,"csr00694, csr00695, csr00696, csr00697, csr00698"
73 | spc00072,Introduction to the Internet of Things and Embedded Systems,"University of California, Irvine","csr00713, csr00714, csr00715, csr00716, csr00717, csr00718"
74 | spc00073,A developer's guide to the Internet of Things (IoT),IBM,"csr00719, csr00720, csr00721, csr00722, csr00723, csr00724, csr00725, csr00726, csr00727, csr00728"
75 | spc00074,Cybersecurity Policy for Aviation and Internet Infrastructures,University of Colorado System,"csr00729, csr00730, csr00731, csr00732, csr00733"
76 | spc00075,Internet of Things: How did we get here?,"University of California, San Diego","csr00734, csr00735, csr00736, csr00737"
77 | spc00076,Master of Business Administration (iMBA),,"csr00754, csr00755, csr00756, csr00757, csr00758, csr00759, csr00760"
78 | spc00077,Master's in Innovation and Entrepreneurship,,"csr00761, csr00762, csr00763, csr00764"
79 | spc00078,Professional Certificate in Innovation Management and Entrepreneurship,,"csr00765, csr00766, csr00767, csr00768"
80 | spc00079,Managerial Economics and Business Analysis ,University of Illinois at Urbana-Champaign,"csr00769, csr00770, csr00771, csr00772, csr00773, csr00774, csr00775"
81 | spc00080,Strategic Business Analytics,ESSEC Business School,"csr00776, csr00777, csr00778, csr00779, csr00780"
82 | spc00081,Excel Skills for Business,Macquarie University,"csr00781, csr00782, csr00783, csr00784"
83 | spc00082,Global Challenges in Business,University of Illinois at Urbana-Champaign,"csr00785, csr00786, csr00787, csr00788"
84 | spc00083,Statistical Molecular Thermodynamics,University of Minnesota,"csr00825, csr00826, csr00827, csr00828"
85 | spc00084,Statistical Thermodynamics: Molecules to Machines,Carnegie Mellon University,"csr00829, csr00830, csr00831, csr00832, csr00833, csr00834, csr00835"
86 | spc00085,Social Media Marketing,Northwestern University,"csr00838, csr00839, csr00840, csr00841, csr00842, csr00843"
87 | spc00086,Social Media Data Analytics,Rutgers the State University of New Jersey,"csr00844, csr00845, csr00846"
88 | spc00087,What is Social?,Northwestern University,"csr00847, csr00848, csr00849, csr00850, csr00851"
89 | spc00088,Design Patterns,University of Alberta,"csr00855, csr00856, csr00857, csr00858, csr00859"
90 | spc00089,Fashion as Design,The Museum of Modern Art,"csr00860, csr00861, csr00862, csr00863, csr00864, csr00865, csr00866, csr00867"
91 | spc00090,Graphic Design,California Institute of the Arts,"csr00868, csr00869, csr00870, csr00871, csr00872"
92 | spc00091,Introduction to User Experience Design ,Georgia Institute of Technology,"csr00873, csr00874, csr00875, csr00876, csr00877"
93 | spc00092,Cryptography and Information Theory,University of Colorado System,"csr00878, csr00879, csr00880, csr00881, csr00882"
94 | spc00093,Academic Information Seeking,"University of Copenhagen, Technical University of Denmark (DTU)","csr00883, csr00884, csr00885, csr00886, csr00887, csr00888, csr00889, csr00890"
95 | spc00094,Metaliteracy: Empowering Yourself in a Connected World,The State University of New York,"csr00891, csr00892, csr00893, csr00894, csr00895"
96 | spc00095,Mindware: Critical Thinking for the Information Age ,University of Michigan,"csr00896, csr00897, csr00898, csr00899, csr00900"
97 | spc00096,Business Analytics,University of Pennsylvania,"csr00901, csr00902, csr00903, csr00904"
98 | spc00097,Introduction to Complex Analysis,Wesleyan University,"csr00920, csr00921, csr00922, csr00923, csr00924"
99 | spc00098,Analysis of Algorithms,Princeton University,"csr00925, csr00926, csr00927, csr00928, csr00929"
100 | spc00099,Data Analysis and Presentation Skills: the PwC Approach,PwC,"csr00930, csr00931, csr00932, csr00933, csr00934"
101 | spc00100,Bayesian Statistics: From Concept to Data Analysis,"University of California, Santa Cruz","csr00935, csr00936, csr00937, csr00938, csr00939, csr00940, csr00941"
102 | spc00101,Business Statistics and Analysis,Rice University,"csr00942, csr00943, csr00944, csr00945, csr00946"
103 | spc00102,"Internet History, Technology, and Security",University of Michigan,"csr00962, csr00963, csr00964, csr00965, csr00966, csr00967"
104 | spc00103,The Impact of Technology,EIT Digital ,"csr00968, csr00969, csr00970, csr00971, csr00972, csr00973"
105 | spc00104,Get Interactive: Practical Teaching with Technology,"University of London, Bloomsbury Learning Environment","csr00974, csr00975, csr00976, csr00977, csr00978, csr00979, csr00980, csr00981, csr00982, csr00983"
106 | spc00105,Fundamentals of Network Communication,University of Colorado System,"csr01001, csr01002, csr01003, csr01004"
107 | spc00106,Network Protocols and Architecture,Cisco,"csr01005, csr01006, csr01007, csr01008, csr01009"
108 | spc00107,Digital Marketing,University of Illinois at Urbana-Champaign,"csr01028, csr01029, csr01030, csr01031, csr01032, csr01033, csr01034"
109 | spc00108,Introduction to Marketing,University of Pennsylvania,"csr01035, csr01036, csr01037, csr01038, csr01039, csr01040"
110 | spc00109,Business Foundations,University of Pennsylvania,"csr01041, csr01042, csr01043, csr01044, csr01045, csr01046"
111 | spc00110,Marketing in a Digital World,University of Illinois at Urbana-Champaign,"csr01047, csr01048, csr01049, csr01050, csr01051"
112 | spc00111,The Cycle: Management of Successful Arts and Cultural Organizations,"University of Maryland, College Park","csr01052, csr01053, csr01054, csr01055, csr01056, csr01057"
113 | spc00112,Food & Beverage Management,Universit Bocconi,"csr01058, csr01059, csr01060, csr01061"
114 | spc00113,International Marketing Entry and Execution,Yonsei University,"csr01062, csr01063, csr01064, csr01065, csr01066"
115 | spc00114,International Marketing & Cross Industry Growth ,Yonsei University,"csr01067, csr01068, csr01069, csr01070, csr01071"
116 | spc00115,Introduccin al Marketing,University of Pennsylvania,"csr01072, csr01073, csr01074, csr01075, csr01076, csr01077"
117 | spc00116,The Strategy of Content Marketing,"University of California, Davis","csr01078, csr01079, csr01080, csr01081, csr01082"
118 | spc00117,Digital Product Management: Modern Fundamentals,University of Virginia,"csr01093, csr01094, csr01095, csr01096, csr01097, csr01098, csr01099"
119 | spc00118,Digital Marketing,University of Illinois at Urbana-Champaign,"csr01100, csr01101, csr01102, csr01103"
120 | spc00119,Digital Signal Processing,cole Polytechnique Fdrale de Lausanne,"csr01104, csr01105, csr01106, csr01107, csr01108, csr01109, csr01110, csr01111, csr01112, csr01113"
121 | spc00120,Digital Footprint,The University of Edinburgh,"csr01114, csr01115, csr01116, csr01117, csr01118"
122 | spc00121,Introduction to Electronics,Georgia Institute of Technology,"csr01135, csr01136, csr01137, csr01138, csr01139, csr01140"
123 | spc00122,Software Architecture,University of Alberta,"csr01160, csr01161, csr01162, csr01163"
124 | spc00123,Service-Oriented Architecture,University of Alberta,"csr01164, csr01165, csr01166, csr01167"
125 | spc00124,Computer Architecture,Princeton University,"csr01168, csr01169, csr01170, csr01171"
126 | spc00125,Financing and Investing in Infrastructure,Universit Bocconi,"csr01189, csr01190, csr01191, csr01192, csr01193, csr01194"
127 | spc00126,Smart Cities Management of Smart Urban Infrastructures,cole Polytechnique Fdrale de Lausanne,"csr01195, csr01196, csr01197, csr01198"
128 | spc00127,Computer Architecture,Princeton University,"csr01199, csr01200, csr01201, csr01202"
129 | spc00128,Learn to Program: The Fundamentals,University of Toronto,"csr01220, csr01221, csr01222, csr01223, csr01224"
130 | spc00129,Beginning Game Programming with C#,University of Colorado System,"csr01225, csr01226, csr01227, csr01228, csr01229, csr01230"
131 | spc00130,Java Programming and Software Engineering Fundamentals,Duke University,"csr01231, csr01232, csr01233, csr01234"
132 | spc00131,Programming for Everybody (Getting Started with Python),University of Michigan,"csr01235, csr01236, csr01237, csr01238, csr01239"
133 | spc00132,Code Yourself! An Introduction to Programming,"The University of Edinburgh, Universidad ORT Uruguay","csr01240, csr01241, csr01242, csr01243"
134 | spc00133,Introduction to Programming with MATLAB,Vanderbilt University,"csr01244, csr01245, csr01246"
135 | spc00134,An Introduction to Programming the Internet of Things (IOT),"University of California, Irvine","csr01247, csr01248, csr01249, csr01250, csr01251"
136 | spc00135,Internet Giants: The Law and Economics of Media Platforms,The University of Chicago,"csr01265, csr01266, csr01267, csr01268"
137 | spc00136,Google Cloud Platform Fundamentals: Core Infrastructure,Google Cloud,"csr01269, csr01270, csr01271, csr01272, csr01273, csr01274"
138 | spc00137,Data Science at Scale,University of Washington,"csr01275, csr01276, csr01277, csr01278"
139 | spc00138,An Introduction to Programming the Internet of Things (IOT),"University of California, Irvine","csr01279, csr01280, csr01281, csr01282, csr01283"
140 | spc00139,Architecting with Google Cloud Platform,Google Cloud,"csr01284, csr01285, csr01286, csr01287"
141 | spc00140,"Big Data Essentials: HDFS, MapReduce and Spark RDD",Yandex,"csr01303, csr01304, csr01305, csr01306, csr01307"
142 | spc00141,Maps and the Geospatial Revolution,The Pennsylvania State University,"csr01308, csr01309, csr01310, csr01311"
143 | spc00142,Data Visualization with Tableau,"University of California, Davis","csr01312, csr01313, csr01314, csr01315, csr01316, csr01317"
144 | spc00143,Visual Analytics with Tableau,"University of California, Davis","csr01318, csr01319, csr01320, csr01321, csr01322"
145 | spc00144,Data Science at Scale,University of Washington,"csr01323, csr01324, csr01325, csr01326, csr01327, csr01328"
146 | spc00145,Mindware: Critical Thinking for the Information Age ,University of Michigan,"csr01344, csr01345, csr01346, csr01347"
147 | spc00146,Homeland Security and Cybersecurity,University of Colorado System,"csr01348, csr01349, csr01350, csr01351, csr01352"
148 | spc00147,Philosophy and the Sciences: Introduction to the Philosophy of Cognitive Sciences,The University of Edinburgh,"csr01353, csr01354, csr01355, csr01356"
149 | spc00148,Personality Types at Work,University of Florida,"csr01357, csr01358, csr01359, csr01360"
150 | spc00149,Data Analysis and Presentation Skills: the PwC Approach,PwC,"csr01361, csr01362, csr01363, csr01364, csr01365"
151 | spc00150,Master's in Innovation and Entrepreneurship,,"csr01381, csr01382, csr01383, csr01384, csr01385"
152 | spc00151,Professional Certificate in Innovation Management and Entrepreneurship,,"csr01386, csr01387, csr01388, csr01389, csr01390, csr01391, csr01392, csr01393, csr01394, csr01395"
153 | spc00152,Entrepreneurship,University of Pennsylvania,"csr01396, csr01397, csr01398, csr01399"
154 | spc00153,Entrepreneurship 1: Developing the Opportunity,University of Pennsylvania,"csr01400, csr01401, csr01402, csr01403, csr01404, csr01405"
155 | spc00154,English for Business and Entrepreneurship,University of Pennsylvania,"csr01406, csr01407, csr01408"
156 | spc00155,Entrepreneurship 2: Launching your Start-Up,University of Pennsylvania,"csr01409, csr01410, csr01411, csr01412"
157 | spc00156,Career Success,"University of California, Irvine","csr01413, csr01414, csr01415, csr01416, csr01417, csr01418, csr01419"
158 | spc00157,Cyber Threats and Attack Vectors,University of Colorado System,"csr01433, csr01434, csr01435, csr01436, csr01437"
159 | spc00158,Detecting and Mitigating Cyber Threats and Attacks,University of Colorado System,"csr01438, csr01439, csr01440, csr01441"
160 | spc00159,Cybersecurity,"University of Maryland, College Park","csr01442, csr01443, csr01444"
161 | spc00160,Bitcoin and Cryptocurrency Technologies,Princeton University,"csr01445, csr01446, csr01447, csr01448"
162 | spc00161,"Knowledge Exchange: Using, Protecting and Monetizing Ideas with Third Parties","Universiteit Leiden, Leiden University Medical Center, Luris","csr01465, csr01466, csr01467, csr01468, csr01469"
163 | spc00162, (Journey of the Universe: Weaving Knowledge and Action),Yale University,"csr01470, csr01471, csr01472, csr01473, csr01474"
164 | spc00163,Big History: Connecting Knowledge,Macquarie University,"csr01475, csr01476, csr01477, csr01478, csr01479, csr01480, csr01481"
165 | spc00164,Geographic Information Systems (GIS),"University of California, Davis","csr01482, csr01483, csr01484, csr01485"
166 | spc00165,Web Design for Everybody (Basics of Web Development and Coding),University of Michigan,"csr01486, csr01487, csr01488, csr01489, csr01490"
167 | spc00166,Fundamentals of Computing,Rice University,"csr01491, csr01492, csr01493, csr01494"
168 | spc00167,The Data Scientists Toolbox,Johns Hopkins University,"csr01495, csr01496, csr01497, csr01498"
169 | spc00168,Introduction to Formal Concept Analysis,Higher School of Economics,"csr01499, csr01500, csr01501, csr01502"
170 | spc00169,Journey Conversations: Weaving Knowledge and Action,Yale University,"csr01503, csr01504, csr01505, csr01506, csr01507"
171 | spc00170,Six Sigma Green Belt,University System of Georgia,"csr01508, csr01509, csr01510, csr01511"
172 | spc00171,Introduction to Graph Theory,"University of California, San Diego, Higher School of Economics","csr01522, csr01523, csr01524"
173 | spc00172,"Big Data Analysis: Hive, Spark SQL, DataFrames and GraphFrames",Yandex,"csr01525, csr01526, csr01527, csr01528, csr01529, csr01530"
174 | spc00173,Probabilistic Graphical Models ,Stanford University,"csr01531, csr01532, csr01533, csr01534, csr01535, csr01536"
175 | spc00174,"Algorithms, Part I",Princeton University,"csr01537, csr01538, csr01539, csr01540, csr01541"
176 | spc00175,Combinatorics and Probability,"University of California, San Diego, Higher School of Economics","csr01558, csr01559, csr01560, csr01561, csr01562"
177 | spc00176,An Intuitive Introduction to Probability,University of Zurich,"csr01563, csr01564, csr01565"
178 | spc00177,Data Science Math Skills,Duke University,"csr01566, csr01567, csr01568, csr01569, csr01570"
179 | spc00178,Introduction to Probability and Data,Duke University,"csr01571, csr01572, csr01573, csr01574, csr01575"
180 | spc00179,Statistics with R,Duke University,"csr01576, csr01577, csr01578, csr01579, csr01580"
181 | spc00180,Probabilistic Graphical Models ,Stanford University,"csr01581, csr01582, csr01583, csr01584"
182 | spc00181,Probabilistic Graphical Models 1: Representation,Stanford University,"csr01585, csr01586, csr01587, csr01588, csr01589"
183 | spc00182,Introduction to Engineering Mechanics,Georgia Institute of Technology,"csr01603, csr01604, csr01605, csr01606, csr01607"
184 | spc00183,Materials Science: 10 Things Every Engineer Should Know,"University of California, Davis","csr01608, csr01609, csr01610, csr01611, csr01612, csr01613"
185 | spc00184,Code Yourself! An Introduction to Programming,"The University of Edinburgh, Universidad ORT Uruguay","csr01614, csr01615, csr01616, csr01617, csr01618"
186 | spc00185,Fundamentals of Engineering Exam Review,Georgia Institute of Technology,"csr01619, csr01620, csr01621"
187 | spc00186,Engineering Systems in Motion: Dynamics of Particles and Bodies in 2D Motion,Georgia Institute of Technology,"csr01622, csr01623, csr01624"
188 | spc00187,Bitcoin and Cryptocurrency Technologies,Princeton University,"csr01625, csr01626, csr01627"
189 | spc00188,Applications in Engineering Mechanics,Georgia Institute of Technology,"csr01628, csr01629, csr01630, csr01631, csr01632"
190 | spc00189,Cryptography I,Stanford University,"csr01646, csr01647, csr01648, csr01649"
191 | spc00190,Essential Design Principles for Tableau,"University of California, Davis","csr01650, csr01651, csr01652, csr01653, csr01654, csr01655"
192 | spc00191,Digital Transformation in Financial Services,Copenhagen Business School,"csr01656, csr01657, csr01658, csr01659, csr01660"
193 | spc00192,Systems Biology and Biotechnology,Icahn School of Medicine at Mount Sinai,"csr01661, csr01662, csr01663, csr01664"
194 | spc00193,Global Warming I: The Science and Modeling of Climate Change,The University of Chicago,"csr01665, csr01666, csr01667, csr01668"
195 | spc00194,Business English: Meetings,University of Washington,"csr01684, csr01685, csr01686, csr01687, csr01688"
196 | spc00195,Introduction to Systems Biology,Icahn School of Medicine at Mount Sinai,"csr01689, csr01690, csr01691, csr01692, csr01693"
197 | spc00196,Dynamical Modeling Methods for Systems Biology,Icahn School of Medicine at Mount Sinai,"csr01694, csr01695, csr01696, csr01697"
198 | spc00197,Software Design Methods and Tools,University of Colorado System,"csr01698, csr01699, csr01700, csr01701, csr01702"
199 | spc00198,"HI-FIVE: Health Informatics For Innovation, Value & Enrichment (Social/Peer Perspective)",Columbia University,"csr01719, csr01720, csr01721, csr01722, csr01723"
200 | spc00199,Subsistence Marketplaces,University of Illinois at Urbana-Champaign,"csr01743, csr01744, csr01745, csr01746, csr01747"
201 | spc00200,Applied Data Science with Python,University of Michigan,"csr01748, csr01749, csr01750, csr01751, csr01752"
202 | spc00201,"Espace mondial, une vision franaise des global studies",Sciences Po,"csr01753, csr01754, csr01755, csr01756, csr01757"
203 | spc00202,Virtual Teacher,"University of California, Irvine","csr01758, csr01759, csr01760, csr01761, csr01762"
204 | spc00203,Advanced Search Engine Optimization Strategies,"University of California, Davis","csr01763, csr01764, csr01765, csr01766, csr01767"
205 | spc00204,"Espace mondial, a French vision of Global studies",Sciences Po,"csr01768, csr01769, csr01770, csr01771, csr01772, csr01773, csr01774"
206 | spc00205,Innovative Finance: Hacking finance to change the world ,University of Cape Town,"csr01789, csr01790, csr01791, csr01792, csr01793, csr01794"
207 | spc00206,Content Strategy for Professionals: Expanding Your Contents Reach,Northwestern University,"csr01795, csr01796, csr01797, csr01798, csr01799, csr01800"
208 | spc00207,Survey analysis to Gain Marketing Insights,Emory University,"csr01839, csr01840, csr01841, csr01842"
209 | spc00208,The Importance of Listening,Northwestern University,"csr01862, csr01863, csr01864, csr01865, csr01866"
210 | spc00209,Pricing Strategy,IE Business School,"csr01867, csr01868, csr01869, csr01870, csr01871"
211 | spc00210,The Business of Social,Northwestern University,"csr01872, csr01873, csr01874, csr01875, csr01876"
212 | spc00211,Channel Management and Retailing ,IE Business School,"csr01877, csr01878, csr01879, csr01880"
213 | spc00212,Introduction to Social Media Analytics,Emory University,"csr01881, csr01882, csr01883, csr01884, csr01885"
214 | spc00213,Essentials of Corporate Finance,The University of Melbourne,"csr01886, csr01887, csr01888, csr01889, csr01890, csr01891, csr01892"
215 | spc00214,Investigacin de mercados y comportamiento del consumidor,IE Business School,"csr01893, csr01894, csr01895, csr01896, csr01897"
216 | spc00215,Business Analytics,University of Pennsylvania,"csr01898, csr01899, csr01900, csr01901"
217 | spc00216,Fundamentals of Global Energy Business,University of Colorado System,"csr01902, csr01903, csr01904, csr01905, csr01906"
218 | spc00217,Business Growth Strategy,University of Virginia,"csr01907, csr01908, csr01909, csr01910, csr01911"
219 | spc00218,Questionnaire Design for Social Surveys,University of Michigan,"csr01922, csr01923, csr01924, csr01925, csr01926"
220 | spc00219,Capstone Design Project in Power Electronics,University of Colorado Boulder,"csr01966, csr01967, csr01968, csr01969, csr01970"
221 | spc00220,Competitive Strategy and Organization Design Project,Ludwig-Maximilians-Universitt Mnchen (LMU),"csr01971, csr01972, csr01973, csr01974, csr01975"
222 | spc00221,Design and Build a Data Warehouse for Business Intelligence Implementation,University of Colorado System,"csr01976, csr01977, csr01978, csr01979"
223 | spc00222,Game Design Document: Define the Art & Concepts,California Institute of the Arts,"csr01980, csr01981, csr01982, csr01983, csr01984"
224 | spc00223,Introduction to Typography,California Institute of the Arts,"csr01985, csr01986, csr01987, csr01988, csr01989"
--------------------------------------------------------------------------------
/All_Mooc/Program_order/Specialization/no_duplicate?/Specialization2.csv:
--------------------------------------------------------------------------------
1 | name,provenance,special_id,courseList
2 | Data Science,Johns Hopkins University,spc00001,"csr00001, csr00002, csr00003, csr00004, csr00005, csr00006, csr00007, csr00008, csr00009, csr00010"
3 | Data Science Math Skills,Duke University,spc00002,"csr00011, csr00012, csr00013, csr00014, csr00015"
4 | Data Analysis and Presentation Skills: the PwC Approach,PwC,spc00003,"csr00016, csr00017, csr00018, csr00019, csr00020"
5 | Data Visualization with Tableau,"University of California, Davis",spc00004,"csr00021, csr00022, csr00023, csr00024, csr00025, csr00026"
6 | A Crash Course in Data Science,Johns Hopkins University,spc00005,"csr00027, csr00028, csr00029, csr00030, csr00031, csr00032"
7 | Data Mining ,University of Illinois at Urbana-Champaign,spc00006,"csr00033, csr00034, csr00035, csr00036, csr00037"
8 | Big Data,"University of California, San Diego",spc00007,"csr00038, csr00039, csr00040, csr00041, csr00042"
9 | Applied Data Science with Python,University of Michigan,spc00008,"csr00043, csr00044, csr00045, csr00046, csr00047"
10 | Data Analysis and Interpretation,Wesleyan University,spc00009,"csr00048, csr00049, csr00050, csr00051, csr00052, csr00053"
11 | Big Data for Data Engineers,Yandex,spc00010,"csr00054, csr00055, csr00056, csr00057, csr00058"
12 | Data Structures and Algorithms,"University of California, San Diego, Higher School of Economics",spc00011,"csr00059, csr00060, csr00061, csr00062, csr00063, csr00064, csr00065, csr00066"
13 | Executive Data Science,Johns Hopkins University,spc00012,"csr00067, csr00068, csr00069, csr00070, csr00071"
14 | Genomic Data Science,Johns Hopkins University,spc00013,"csr00072, csr00073, csr00074, csr00075, csr00076, csr00077, csr00078"
15 | Data Warehousing for Business Intelligence,University of Colorado System,spc00014,"csr00079, csr00080, csr00081, csr00082"
16 | Survey Data Collection and Analytics ,"University of Michigan, University of Maryland, College Park",spc00015,"csr00083, csr00084, csr00085, csr00086, csr00087"
17 | Master of Computer Science in Data Science (MCS-DS),,spc00016,"csr00153, csr00154, csr00155, csr00156"
18 | "Algorithms, Part I",Princeton University,spc00017,"csr00157, csr00158, csr00159, csr00160, csr00161, csr00162"
19 | "Algorithms, Part II",Princeton University,spc00018,"csr00163, csr00164, csr00165, csr00166, csr00167"
20 | Object Oriented Java Programming: Data Structures and Beyond,"University of California, San Diego",spc00019,"csr00191, csr00192, csr00193, csr00194, csr00195, csr00196, csr00197"
21 | Convolutional Neural Networks,deeplearning.ai,spc00020,"csr00198, csr00199, csr00200, csr00201"
22 | (MCS-DS),,spc00021,"csr00262, csr00263, csr00264, csr00265, csr00266"
23 | Pattern Discovery in Data Mining,University of Illinois at Urbana-Champaign,spc00022,"csr00267, csr00268, csr00269, csr00270, csr00271, csr00272, csr00273, csr00274, csr00275, csr00276"
24 | Command Line Tools for Genomic Data Science,Johns Hopkins University,spc00023,"csr00277, csr00278, csr00279, csr00280"
25 | Internet Giants: The Law and Economics of Media Platforms,The University of Chicago,spc00024,"csr00281, csr00282, csr00283, csr00284, csr00285, csr00286"
26 | "Nanotechnology and Nanosensors, Part1",Technion - Israel Institute of Technology,spc00025,"csr00287, csr00288, csr00289, csr00290, csr00291"
27 | Introduction to MongoDB,MongoDB Inc.,spc00026,"csr00301, csr00302, csr00303, csr00304, csr00305, csr00306"
28 | Neural Networks and Deep Learning,deeplearning.ai,spc00027,"csr00307, csr00308, csr00309, csr00310, csr00311"
29 | (MCS-DS),,spc00028,"csr00312, csr00313, csr00314, csr00315"
30 | Spanish Vocabulary: Meeting People,"University of California, Davis",spc00029,"csr00316, csr00317, csr00318, csr00319, csr00320"
31 | Ruby on Rails Web Development,Johns Hopkins University,spc00030,"csr00342, csr00343, csr00344"
32 | Genome Assembly Programming Challenge,"University of California, San Diego, Higher School of Economics",spc00031,"csr00345, csr00346, csr00347, csr00348, csr00349, csr00350"
33 | "Parallel, Concurrent, and Distributed Programming in Java",Rice University,spc00032,"csr00351, csr00352, csr00353, csr00354, csr00355"
34 | Software Design Threats and Mitigations,University of Colorado System,spc00033,"csr00361, csr00362, csr00363, csr00364, csr00365"
35 | Character Design for Video Games,California Institute of the Arts,spc00034,"csr00366, csr00367, csr00368, csr00369, csr00370"
36 | From Data to Insights with Google Cloud Platform,Google Cloud,spc00035,"csr00383, csr00384, csr00385, csr00386"
37 | Real-Time Cyber Threat Detection and Mitigation,New York University Tandon School of Engineering,spc00036,"csr00387, csr00388, csr00389, csr00390, csr00391, csr00392, csr00393"
38 | Secure Software Design,University of Colorado System,spc00037,"csr00394, csr00395, csr00396, csr00397, csr00398"
39 | Structuring Machine Learning Projects,deeplearning.ai,spc00038,"csr00399, csr00400, csr00401, csr00402"
40 | Internet of Things: How did we get here?,"University of California, San Diego",spc00039,"csr00403, csr00404, csr00405, csr00406, csr00407"
41 | ,Ludwig-Maximilians-Universitt Mnchen (LMU),spc00040,"csr00408, csr00409, csr00410, csr00411, csr00412"
42 | "Trabaja inteligentemente, no ms duro: Gestin del tiempo para la productividad personal y profesional ","University of California, Irvine",spc00041,"csr00413, csr00414, csr00415, csr00416"
43 | Digital Manufacturing & Design Technology ,"The State University of New York, University at Buffalo",spc00042,"csr00428, csr00429, csr00430, csr00431, csr00432, csr00433"
44 | Internet of Things,"University of California, San Diego",spc00043,"csr00434, csr00435, csr00436, csr00437"
45 | Architecting with Google Cloud Platform,Google Cloud,spc00044,"csr00438, csr00439, csr00440, csr00441, csr00442"
46 | Introduction to Discrete Mathematics for Computer Science,"Higher School of Economics, University of California, San Diego",spc00045,"csr00443, csr00444, csr00445, csr00446, csr00447"
47 | Android App Development,Vanderbilt University,spc00046,"csr00523, csr00524, csr00525, csr00526, csr00527"
48 | Java Programming and Software Engineering Fundamentals,Duke University,spc00047,"csr00562, csr00563, csr00564, csr00565, csr00566"
49 | Learn to Program: The Fundamentals,University of Toronto,spc00048,"csr00572, csr00573, csr00574"
50 | Web Applications for Everybody,University of Michigan,spc00049,"csr00595, csr00596, csr00597, csr00598"
51 | Building Web Applications in PHP,University of Michigan,spc00050,"csr00599, csr00600, csr00601, csr00602, csr00603"
52 | Python for Everybody,University of Michigan,spc00051,"csr00604, csr00605, csr00606, csr00607, csr00608"
53 | Using Python to Access Web Data,University of Michigan,spc00052,"csr00609, csr00610, csr00611, csr00612, csr00613, csr00614"
54 | Web Application Development: Basic Concepts,University of New Mexico,spc00053,"csr00615, csr00616, csr00617, csr00618, csr00619"
55 | Web Design for Everybody (Basics of Web Development and Coding),University of Michigan,spc00054,"csr00620, csr00621, csr00622, csr00623, csr00624, csr00625"
56 | Mathematical Foundations for Cryptography,University of Colorado System,spc00055,"csr00645, csr00646, csr00647, csr00648"
57 | Applied Cryptography,University of Colorado System,spc00056,"csr00667, csr00668, csr00669, csr00670"
58 | Cryptographic Hash and Integrity Protection,University of Colorado System,spc00057,"csr00671, csr00672, csr00673, csr00674, csr00675, csr00676, csr00677"
59 | Global Challenges in Business,University of Illinois at Urbana-Champaign,spc00058,"csr00678, csr00679, csr00680, csr00681, csr00682, csr00683"
60 | Vital Signs: Understanding What the Body Is Telling Us,University of Pennsylvania,spc00059,"csr00684, csr00685, csr00686, csr00687, csr00688"
61 | Responsive Website Development and Design,"University of London, Goldsmiths, University of London",spc00060,"csr00689, csr00690, csr00691, csr00692, csr00693"
62 | Practical Machine Learning,Johns Hopkins University,spc00061,"csr00694, csr00695, csr00696, csr00697, csr00698"
63 | Introduction to the Internet of Things and Embedded Systems,"University of California, Irvine",spc00062,"csr00713, csr00714, csr00715, csr00716, csr00717, csr00718"
64 | A developer's guide to the Internet of Things (IoT),IBM,spc00063,"csr00719, csr00720, csr00721, csr00722, csr00723, csr00724, csr00725, csr00726, csr00727, csr00728"
65 | Cybersecurity Policy for Aviation and Internet Infrastructures,University of Colorado System,spc00064,"csr00729, csr00730, csr00731, csr00732, csr00733"
66 | Master of Business Administration (iMBA),,spc00065,"csr00754, csr00755, csr00756, csr00757, csr00758, csr00759, csr00760"
67 | Master's in Innovation and Entrepreneurship,,spc00066,"csr00761, csr00762, csr00763, csr00764"
68 | Professional Certificate in Innovation Management and Entrepreneurship,,spc00067,"csr00765, csr00766, csr00767, csr00768"
69 | Managerial Economics and Business Analysis ,University of Illinois at Urbana-Champaign,spc00068,"csr00769, csr00770, csr00771, csr00772, csr00773, csr00774, csr00775"
70 | Strategic Business Analytics,ESSEC Business School,spc00069,"csr00776, csr00777, csr00778, csr00779, csr00780"
71 | Excel Skills for Business,Macquarie University,spc00070,"csr00781, csr00782, csr00783, csr00784"
72 | Statistical Molecular Thermodynamics,University of Minnesota,spc00071,"csr00825, csr00826, csr00827, csr00828"
73 | Statistical Thermodynamics: Molecules to Machines,Carnegie Mellon University,spc00072,"csr00829, csr00830, csr00831, csr00832, csr00833, csr00834, csr00835"
74 | Social Media Marketing,Northwestern University,spc00073,"csr00838, csr00839, csr00840, csr00841, csr00842, csr00843"
75 | Social Media Data Analytics,Rutgers the State University of New Jersey,spc00074,"csr00844, csr00845, csr00846"
76 | What is Social?,Northwestern University,spc00075,"csr00847, csr00848, csr00849, csr00850, csr00851"
77 | Design Patterns,University of Alberta,spc00076,"csr00855, csr00856, csr00857, csr00858, csr00859"
78 | Fashion as Design,The Museum of Modern Art,spc00077,"csr00860, csr00861, csr00862, csr00863, csr00864, csr00865, csr00866, csr00867"
79 | Graphic Design,California Institute of the Arts,spc00078,"csr00868, csr00869, csr00870, csr00871, csr00872"
80 | Introduction to User Experience Design ,Georgia Institute of Technology,spc00079,"csr00873, csr00874, csr00875, csr00876, csr00877"
81 | Cryptography and Information Theory,University of Colorado System,spc00080,"csr00878, csr00879, csr00880, csr00881, csr00882"
82 | Academic Information Seeking,"University of Copenhagen, Technical University of Denmark (DTU)",spc00081,"csr00883, csr00884, csr00885, csr00886, csr00887, csr00888, csr00889, csr00890"
83 | Metaliteracy: Empowering Yourself in a Connected World,The State University of New York,spc00082,"csr00891, csr00892, csr00893, csr00894, csr00895"
84 | Mindware: Critical Thinking for the Information Age ,University of Michigan,spc00083,"csr00896, csr00897, csr00898, csr00899, csr00900"
85 | Business Analytics,University of Pennsylvania,spc00084,"csr00901, csr00902, csr00903, csr00904"
86 | Introduction to Complex Analysis,Wesleyan University,spc00085,"csr00920, csr00921, csr00922, csr00923, csr00924"
87 | Analysis of Algorithms,Princeton University,spc00086,"csr00925, csr00926, csr00927, csr00928, csr00929"
88 | Bayesian Statistics: From Concept to Data Analysis,"University of California, Santa Cruz",spc00087,"csr00935, csr00936, csr00937, csr00938, csr00939, csr00940, csr00941"
89 | Business Statistics and Analysis,Rice University,spc00088,"csr00942, csr00943, csr00944, csr00945, csr00946"
90 | "Internet History, Technology, and Security",University of Michigan,spc00089,"csr00962, csr00963, csr00964, csr00965, csr00966, csr00967"
91 | The Impact of Technology,EIT Digital ,spc00090,"csr00968, csr00969, csr00970, csr00971, csr00972, csr00973"
92 | Get Interactive: Practical Teaching with Technology,"University of London, Bloomsbury Learning Environment",spc00091,"csr00974, csr00975, csr00976, csr00977, csr00978, csr00979, csr00980, csr00981, csr00982, csr00983"
93 | Fundamentals of Network Communication,University of Colorado System,spc00092,"csr01001, csr01002, csr01003, csr01004"
94 | Network Protocols and Architecture,Cisco,spc00093,"csr01005, csr01006, csr01007, csr01008, csr01009"
95 | Digital Marketing,University of Illinois at Urbana-Champaign,spc00094,"csr01028, csr01029, csr01030, csr01031, csr01032, csr01033, csr01034"
96 | Introduction to Marketing,University of Pennsylvania,spc00095,"csr01035, csr01036, csr01037, csr01038, csr01039, csr01040"
97 | Business Foundations,University of Pennsylvania,spc00096,"csr01041, csr01042, csr01043, csr01044, csr01045, csr01046"
98 | Marketing in a Digital World,University of Illinois at Urbana-Champaign,spc00097,"csr01047, csr01048, csr01049, csr01050, csr01051"
99 | The Cycle: Management of Successful Arts and Cultural Organizations,"University of Maryland, College Park",spc00098,"csr01052, csr01053, csr01054, csr01055, csr01056, csr01057"
100 | Food & Beverage Management,Universit Bocconi,spc00099,"csr01058, csr01059, csr01060, csr01061"
101 | International Marketing Entry and Execution,Yonsei University,spc00100,"csr01062, csr01063, csr01064, csr01065, csr01066"
102 | International Marketing & Cross Industry Growth ,Yonsei University,spc00101,"csr01067, csr01068, csr01069, csr01070, csr01071"
103 | Introduccin al Marketing,University of Pennsylvania,spc00102,"csr01072, csr01073, csr01074, csr01075, csr01076, csr01077"
104 | The Strategy of Content Marketing,"University of California, Davis",spc00103,"csr01078, csr01079, csr01080, csr01081, csr01082"
105 | Digital Product Management: Modern Fundamentals,University of Virginia,spc00104,"csr01093, csr01094, csr01095, csr01096, csr01097, csr01098, csr01099"
106 | Digital Signal Processing,cole Polytechnique Fdrale de Lausanne,spc00105,"csr01104, csr01105, csr01106, csr01107, csr01108, csr01109, csr01110, csr01111, csr01112, csr01113"
107 | Digital Footprint,The University of Edinburgh,spc00106,"csr01114, csr01115, csr01116, csr01117, csr01118"
108 | Introduction to Electronics,Georgia Institute of Technology,spc00107,"csr01135, csr01136, csr01137, csr01138, csr01139, csr01140"
109 | Software Architecture,University of Alberta,spc00108,"csr01160, csr01161, csr01162, csr01163"
110 | Service-Oriented Architecture,University of Alberta,spc00109,"csr01164, csr01165, csr01166, csr01167"
111 | Computer Architecture,Princeton University,spc00110,"csr01168, csr01169, csr01170, csr01171"
112 | Financing and Investing in Infrastructure,Universit Bocconi,spc00111,"csr01189, csr01190, csr01191, csr01192, csr01193, csr01194"
113 | Smart Cities Management of Smart Urban Infrastructures,cole Polytechnique Fdrale de Lausanne,spc00112,"csr01195, csr01196, csr01197, csr01198"
114 | Beginning Game Programming with C#,University of Colorado System,spc00113,"csr01225, csr01226, csr01227, csr01228, csr01229, csr01230"
115 | Programming for Everybody (Getting Started with Python),University of Michigan,spc00114,"csr01235, csr01236, csr01237, csr01238, csr01239"
116 | Code Yourself! An Introduction to Programming,"The University of Edinburgh, Universidad ORT Uruguay",spc00115,"csr01240, csr01241, csr01242, csr01243"
117 | Introduction to Programming with MATLAB,Vanderbilt University,spc00116,"csr01244, csr01245, csr01246"
118 | An Introduction to Programming the Internet of Things (IOT),"University of California, Irvine",spc00117,"csr01247, csr01248, csr01249, csr01250, csr01251"
119 | Google Cloud Platform Fundamentals: Core Infrastructure,Google Cloud,spc00118,"csr01269, csr01270, csr01271, csr01272, csr01273, csr01274"
120 | Data Science at Scale,University of Washington,spc00119,"csr01275, csr01276, csr01277, csr01278"
121 | "Big Data Essentials: HDFS, MapReduce and Spark RDD",Yandex,spc00120,"csr01303, csr01304, csr01305, csr01306, csr01307"
122 | Maps and the Geospatial Revolution,The Pennsylvania State University,spc00121,"csr01308, csr01309, csr01310, csr01311"
123 | Visual Analytics with Tableau,"University of California, Davis",spc00122,"csr01318, csr01319, csr01320, csr01321, csr01322"
124 | Homeland Security and Cybersecurity,University of Colorado System,spc00123,"csr01348, csr01349, csr01350, csr01351, csr01352"
125 | Philosophy and the Sciences: Introduction to the Philosophy of Cognitive Sciences,The University of Edinburgh,spc00124,"csr01353, csr01354, csr01355, csr01356"
126 | Personality Types at Work,University of Florida,spc00125,"csr01357, csr01358, csr01359, csr01360"
127 | Entrepreneurship,University of Pennsylvania,spc00126,"csr01396, csr01397, csr01398, csr01399"
128 | Entrepreneurship 1: Developing the Opportunity,University of Pennsylvania,spc00127,"csr01400, csr01401, csr01402, csr01403, csr01404, csr01405"
129 | English for Business and Entrepreneurship,University of Pennsylvania,spc00128,"csr01406, csr01407, csr01408"
130 | Entrepreneurship 2: Launching your Start-Up,University of Pennsylvania,spc00129,"csr01409, csr01410, csr01411, csr01412"
131 | Career Success,"University of California, Irvine",spc00130,"csr01413, csr01414, csr01415, csr01416, csr01417, csr01418, csr01419"
132 | Cyber Threats and Attack Vectors,University of Colorado System,spc00131,"csr01433, csr01434, csr01435, csr01436, csr01437"
133 | Detecting and Mitigating Cyber Threats and Attacks,University of Colorado System,spc00132,"csr01438, csr01439, csr01440, csr01441"
134 | Cybersecurity,"University of Maryland, College Park",spc00133,"csr01442, csr01443, csr01444"
135 | Bitcoin and Cryptocurrency Technologies,Princeton University,spc00134,"csr01445, csr01446, csr01447, csr01448"
136 | "Knowledge Exchange: Using, Protecting and Monetizing Ideas with Third Parties","Universiteit Leiden, Leiden University Medical Center, Luris",spc00135,"csr01465, csr01466, csr01467, csr01468, csr01469"
137 | (Journey of the Universe: Weaving Knowledge and Action),Yale University,spc00136,"csr01470, csr01471, csr01472, csr01473, csr01474"
138 | Big History: Connecting Knowledge,Macquarie University,spc00137,"csr01475, csr01476, csr01477, csr01478, csr01479, csr01480, csr01481"
139 | Geographic Information Systems (GIS),"University of California, Davis",spc00138,"csr01482, csr01483, csr01484, csr01485"
140 | Fundamentals of Computing,Rice University,spc00139,"csr01491, csr01492, csr01493, csr01494"
141 | The Data Scientists Toolbox,Johns Hopkins University,spc00140,"csr01495, csr01496, csr01497, csr01498"
142 | Introduction to Formal Concept Analysis,Higher School of Economics,spc00141,"csr01499, csr01500, csr01501, csr01502"
143 | Journey Conversations: Weaving Knowledge and Action,Yale University,spc00142,"csr01503, csr01504, csr01505, csr01506, csr01507"
144 | Six Sigma Green Belt,University System of Georgia,spc00143,"csr01508, csr01509, csr01510, csr01511"
145 | Introduction to Graph Theory,"University of California, San Diego, Higher School of Economics",spc00144,"csr01522, csr01523, csr01524"
146 | "Big Data Analysis: Hive, Spark SQL, DataFrames and GraphFrames",Yandex,spc00145,"csr01525, csr01526, csr01527, csr01528, csr01529, csr01530"
147 | Probabilistic Graphical Models ,Stanford University,spc00146,"csr01531, csr01532, csr01533, csr01534, csr01535, csr01536"
148 | Combinatorics and Probability,"University of California, San Diego, Higher School of Economics",spc00147,"csr01558, csr01559, csr01560, csr01561, csr01562"
149 | An Intuitive Introduction to Probability,University of Zurich,spc00148,"csr01563, csr01564, csr01565"
150 | Introduction to Probability and Data,Duke University,spc00149,"csr01571, csr01572, csr01573, csr01574, csr01575"
151 | Statistics with R,Duke University,spc00150,"csr01576, csr01577, csr01578, csr01579, csr01580"
152 | Probabilistic Graphical Models 1: Representation,Stanford University,spc00151,"csr01585, csr01586, csr01587, csr01588, csr01589"
153 | Introduction to Engineering Mechanics,Georgia Institute of Technology,spc00152,"csr01603, csr01604, csr01605, csr01606, csr01607"
154 | Materials Science: 10 Things Every Engineer Should Know,"University of California, Davis",spc00153,"csr01608, csr01609, csr01610, csr01611, csr01612, csr01613"
155 | Fundamentals of Engineering Exam Review,Georgia Institute of Technology,spc00154,"csr01619, csr01620, csr01621"
156 | Engineering Systems in Motion: Dynamics of Particles and Bodies in 2D Motion,Georgia Institute of Technology,spc00155,"csr01622, csr01623, csr01624"
157 | Applications in Engineering Mechanics,Georgia Institute of Technology,spc00156,"csr01628, csr01629, csr01630, csr01631, csr01632"
158 | Cryptography I,Stanford University,spc00157,"csr01646, csr01647, csr01648, csr01649"
159 | Essential Design Principles for Tableau,"University of California, Davis",spc00158,"csr01650, csr01651, csr01652, csr01653, csr01654, csr01655"
160 | Digital Transformation in Financial Services,Copenhagen Business School,spc00159,"csr01656, csr01657, csr01658, csr01659, csr01660"
161 | Systems Biology and Biotechnology,Icahn School of Medicine at Mount Sinai,spc00160,"csr01661, csr01662, csr01663, csr01664"
162 | Global Warming I: The Science and Modeling of Climate Change,The University of Chicago,spc00161,"csr01665, csr01666, csr01667, csr01668"
163 | Business English: Meetings,University of Washington,spc00162,"csr01684, csr01685, csr01686, csr01687, csr01688"
164 | Introduction to Systems Biology,Icahn School of Medicine at Mount Sinai,spc00163,"csr01689, csr01690, csr01691, csr01692, csr01693"
165 | Dynamical Modeling Methods for Systems Biology,Icahn School of Medicine at Mount Sinai,spc00164,"csr01694, csr01695, csr01696, csr01697"
166 | Software Design Methods and Tools,University of Colorado System,spc00165,"csr01698, csr01699, csr01700, csr01701, csr01702"
167 | "HI-FIVE: Health Informatics For Innovation, Value & Enrichment (Social/Peer Perspective)",Columbia University,spc00166,"csr01719, csr01720, csr01721, csr01722, csr01723"
168 | Subsistence Marketplaces,University of Illinois at Urbana-Champaign,spc00167,"csr01743, csr01744, csr01745, csr01746, csr01747"
169 | "Espace mondial, une vision franaise des global studies",Sciences Po,spc00168,"csr01753, csr01754, csr01755, csr01756, csr01757"
170 | Virtual Teacher,"University of California, Irvine",spc00169,"csr01758, csr01759, csr01760, csr01761, csr01762"
171 | Advanced Search Engine Optimization Strategies,"University of California, Davis",spc00170,"csr01763, csr01764, csr01765, csr01766, csr01767"
172 | "Espace mondial, a French vision of Global studies",Sciences Po,spc00171,"csr01768, csr01769, csr01770, csr01771, csr01772, csr01773, csr01774"
173 | Innovative Finance: Hacking finance to change the world ,University of Cape Town,spc00172,"csr01789, csr01790, csr01791, csr01792, csr01793, csr01794"
174 | Content Strategy for Professionals: Expanding Your Contents Reach,Northwestern University,spc00173,"csr01795, csr01796, csr01797, csr01798, csr01799, csr01800"
175 | Survey analysis to Gain Marketing Insights,Emory University,spc00174,"csr01839, csr01840, csr01841, csr01842"
176 | The Importance of Listening,Northwestern University,spc00175,"csr01862, csr01863, csr01864, csr01865, csr01866"
177 | Pricing Strategy,IE Business School,spc00176,"csr01867, csr01868, csr01869, csr01870, csr01871"
178 | The Business of Social,Northwestern University,spc00177,"csr01872, csr01873, csr01874, csr01875, csr01876"
179 | Channel Management and Retailing ,IE Business School,spc00178,"csr01877, csr01878, csr01879, csr01880"
180 | Introduction to Social Media Analytics,Emory University,spc00179,"csr01881, csr01882, csr01883, csr01884, csr01885"
181 | Essentials of Corporate Finance,The University of Melbourne,spc00180,"csr01886, csr01887, csr01888, csr01889, csr01890, csr01891, csr01892"
182 | Investigacin de mercados y comportamiento del consumidor,IE Business School,spc00181,"csr01893, csr01894, csr01895, csr01896, csr01897"
183 | Fundamentals of Global Energy Business,University of Colorado System,spc00182,"csr01902, csr01903, csr01904, csr01905, csr01906"
184 | Business Growth Strategy,University of Virginia,spc00183,"csr01907, csr01908, csr01909, csr01910, csr01911"
185 | Questionnaire Design for Social Surveys,University of Michigan,spc00184,"csr01922, csr01923, csr01924, csr01925, csr01926"
186 | Capstone Design Project in Power Electronics,University of Colorado Boulder,spc00185,"csr01966, csr01967, csr01968, csr01969, csr01970"
187 | Competitive Strategy and Organization Design Project,Ludwig-Maximilians-Universitt Mnchen (LMU),spc00186,"csr01971, csr01972, csr01973, csr01974, csr01975"
188 | Design and Build a Data Warehouse for Business Intelligence Implementation,University of Colorado System,spc00187,"csr01976, csr01977, csr01978, csr01979"
189 | Game Design Document: Define the Art & Concepts,California Institute of the Arts,spc00188,"csr01980, csr01981, csr01982, csr01983, csr01984"
190 | Introduction to Typography,California Institute of the Arts,spc00189,"csr01985, csr01986, csr01987, csr01988, csr01989"
--------------------------------------------------------------------------------
/All_Mooc/Program_order/Specialization/process.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | # coursera = json.load(open('Coursera_data.json'))
4 |
5 | # csr = []
6 | # spe_id = 0
7 | # for item in coursera:
8 | # if unicode("courseSet") in item:
9 | # spe_id += 1
10 | # item["special_id"] = "spc" + str(spe_id).zfill(5)
11 | # list = item[unicode("courseSet")]
12 | # clist = []
13 | # for course in list:
14 | # c = {}
15 | # original = course[unicode("id")]
16 | # c["id"] = "csr" + original[-5:]
17 | # c["course_name"] = course[unicode("name")]
18 | # clist.append(c["id"])
19 | # item[unicode("courseList")] = clist
20 | # del item[unicode("courseSet")]
21 | # del item[unicode("specialization")]
22 | # del item[unicode("description")]
23 | # del item[unicode("course_url")]
24 | # del item[unicode("img")]
25 | # # if unicode("description") in item:
26 | # # s = item[unicode("description")]
27 | # # item["description"] = Format_desc(s)
28 | # # else:
29 | # # item["description"] = "Empty"
30 | # csr.append(item)
31 |
32 | # print len(csr)
33 | # with open("Specialization.json", "a") as f:
34 | # json.dump(csr, f)
35 |
36 | coursera = json.load(open('Coursera_data.json'))
37 |
38 | csr = []
39 | spe_id = 0
40 | duplicate = []
41 | for item in coursera:
42 | if unicode("courseSet") in item and item["name"] not in duplicate:
43 | spe_id += 1
44 | item["special_id"] = "spc" + str(spe_id).zfill(5)
45 | list = item[unicode("courseSet")]
46 | clist = []
47 | for course in list:
48 | c = {}
49 | original = course[unicode("id")]
50 | c["id"] = "csr" + original[-5:]
51 | c["course_name"] = course[unicode("name")]
52 | clist.append(c["id"])
53 | item[unicode("courseList")] = ', '.join(clist)
54 | del item[unicode("courseSet")]
55 | del item[unicode("specialization")]
56 | del item[unicode("description")]
57 | del item[unicode("course_url")]
58 | del item[unicode("img")]
59 | csr.append(item)
60 | duplicate.append(item["name"])
61 |
62 | print len(csr)
63 | with open("Specialization3.json", "a") as f:
64 | json.dump(csr, f)
65 | import pandas as pd
66 | import sys
67 | reload(sys)
68 | sys.setdefaultencoding('utf8')
69 |
70 | df = pd.read_json("Specialization3.json")
71 | # print(df)
72 | df.to_csv('Specialization2.csv')
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/CoursePlanner.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rpedsel/CoursePlanner/7e1aae619740fc6c8eb36b7cb6983aa984e5d503/CoursePlanner.pdf
--------------------------------------------------------------------------------
/Coursera/Meta_data/Coursera.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rpedsel/CoursePlanner/7e1aae619740fc6c8eb36b7cb6983aa984e5d503/Coursera/Meta_data/Coursera.key
--------------------------------------------------------------------------------
/Coursera/Meta_data/Coursera_sample.json:
--------------------------------------------------------------------------------
1 |
2 | // feed key words like "data" in url: http://www.coursera.org
3 | // list of courses and specializations
4 | // each query page has 20 courses list
5 | {
6 | "@querypage": "https://www.coursera.org/courses?languages=en&query=data&start=" "+ int",
7 | "@type" : "ItemList",
8 | "keyWord": "key_input",
9 | "itemListElement":[
10 | {
11 | "@type":"Specialization",
12 | "position":1,
13 | "image": "image_url",
14 | "provenance": "Organization_or_University",
15 | "url":"https://www.coursera.org/specializations/jhu-data-science",
16 | "@type" : "ItemList",
17 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}],
18 | },
19 | {
20 | "@type":"Specialization",
21 | "position":2,
22 | "image": "image_url",
23 | "provenance": "Organization_or_University",
24 | "url":"https://www.coursera.org/specializations/big-data",
25 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}],
26 | },
27 | {
28 | "@type":"Specialization",
29 | "position":3,
30 | "image": "image_url",
31 | "provenance": "Organization_or_University",
32 | "url":"https://www.coursera.org/specializations/data-structures-algorithms",
33 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}],
34 | },
35 | {
36 | "@type":"Specialization",
37 | "position":4,
38 | "image": "image_url",
39 | "provenance": "Organization_or_University",
40 | "url":"https://www.coursera.org/specializations/gcp-data-machine-learning",
41 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}],
42 | },
43 | {
44 | "@type":"Specialization",
45 | "position":5,
46 | "image": "image_url",
47 | "provenance": "Organization_or_University",
48 | "url":"https://www.coursera.org/specializations/data-analysis",
49 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}],
50 | },
51 | {
52 | "@type":"Specialization",
53 | "position":6,
54 | "image": "image_url",
55 | "provenance": "Organization_or_University",
56 | "url":"https://www.coursera.org/specializations/big-data-engineering",
57 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}],
58 | },
59 | {
60 | "@type":"Specialization",
61 | "position":7,
62 | "image": "image_url",
63 | "provenance": "Organization_or_University",
64 | "url":"https://www.coursera.org/specializations/data-science-python",
65 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}],
66 | },
67 | {
68 | "@type":"Course",
69 | "position":8,
70 | "image": "image_url",
71 | "provenance": "Organization_or_University",
72 | "url":"https://www.coursera.org/learn/data-structures"
73 | },
74 | {
75 | "@type":"Course",
76 | "position":9,
77 | "image": "image_url",
78 | "provenance": "Organization_or_University",
79 | "url":"https://www.coursera.org/learn/python-network-data"
80 | },
81 | {
82 | "@type":"Course",
83 | "position":10,
84 | "image": "image_url",
85 | "provenance": "Organization_or_University",
86 | "url":"https://www.coursera.org/learn/big-data-graph-analytics"
87 | },
88 | {
89 | "@type":"Course",
90 | "position":11,
91 | "image": "image_url",
92 | "provenance": "Organization_or_University",
93 | "url":"https://www.coursera.org/learn/gcp-big-data-ml-fundamentals"
94 | },
95 | {
96 | "@type":"Course",
97 | "position":12,
98 | "image": "image_url",
99 | "provenance": "Organization_or_University",
100 | "url":"https://www.coursera.org/learn/big-data-integration-processing"
101 | },
102 | {
103 | "@type":"Course",
104 | "position":13,
105 | "image": "image_url",
106 | "provenance": "Organization_or_University",
107 | "url":"https://www.coursera.org/learn/java-programming-arrays-lists-data"
108 | },
109 | {
110 | "@type":"Course",
111 | "position":14,
112 | "image": "image_url",
113 | "provenance": "Organization_or_University",
114 | "url":"https://www.coursera.org/learn/python-data"
115 | },
116 | {
117 | "@type":"Course",
118 | "position":15,
119 | "image": "image_url",
120 | "provenance": "Organization_or_University",
121 | "url":"https://www.coursera.org/learn/big-data-introduction"
122 | },
123 | {
124 | "@type":"Course",
125 | "position":16,
126 | "image": "image_url",
127 | "provenance": "Organization_or_University",
128 | "url":"https://www.coursera.org/learn/data-manipulation"
129 | },
130 | {
131 | "@type":"Course",
132 | "position":17,
133 | "image": "image_url",
134 | "provenance": "Organization_or_University",
135 | "url":"https://www.coursera.org/learn/big-data-machine-learning"
136 | },
137 | {
138 | "@type":"Course",
139 | "position":18,
140 | "image": "image_url",
141 | "provenance": "Organization_or_University",
142 | "url":"https://www.coursera.org/learn/big-data-management"
143 | },
144 | {
145 | "@type":"Course",
146 | "position":19,
147 | "image": "image_url",
148 | "provenance": "Organization_or_University",
149 | "url":"https://www.coursera.org/learn/python-data-visualization"
150 | },
151 | {
152 | "@type":"Course",
153 | "position":20,
154 | "image": "image_url",
155 | "provenance": "Organization_or_University",
156 | "url":"https://www.coursera.org/learn/datasciencemathskills"
157 | }
158 | ]
159 | }
160 |
--------------------------------------------------------------------------------
/Coursera/Meta_data/Page_RDF.json:
--------------------------------------------------------------------------------
1 | {
2 | "provenance":"Johns Hopkins University",
3 | "name":"Data Science",
4 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/eb/8e18e0a4f111e59ae9c776a3dd0526/jhu-logo-thumb.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
5 | "specialization":"True"
6 | },
7 | {
8 | "provenance":"Duke University",
9 | "name":"Data Science Math Skills",
10 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/08/8c6610c07e11e6a7f5e70b413367a6/DMSIcon.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
11 | "specialization":"False"
12 | },
13 | {
14 | "provenance":"PwC",
15 | "name":"Data Analysis and Presentation Skills: the PwC Approach",
16 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/2a/50c800335611e6a7800fb6ff2f8eb2/PwC_fl_c.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
17 | "specialization":"True"
18 | },
19 | {
20 | "provenance":"University of California, Davis",
21 | "name":"Data Visualization with Tableau",
22 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/41/d326f0af5b11e5ada4195d312ad6aa/davis_data_thumb.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
23 | "specialization":"True"
24 | },
25 | {
26 | "provenance":"Johns Hopkins University",
27 | "name":"A Crash Course in Data Science",
28 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/f0/399ec045ea11e5ba8e4bc295fed462/9990024683_955f8f043b_h.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
29 | "specialization":"False"
30 | },
31 | {
32 | "provenance":"University of Illinois at Urbana-Champaign",
33 | "name":"Data Mining ",
34 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/01/609980fb3311e58521f12840fa495b/DataMining-Specialization_logo.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
35 | "specialization":"True"
36 | },
37 | {
38 | "provenance":"University of California, San Diego",
39 | "name":"Big Data",
40 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/a6/35516029cf11e6b8dfdbd5deea5f21/UCSD_BigData_PartnerProvided.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
41 | "specialization":"True"
42 | },
43 | {
44 | "provenance":"University of Michigan",
45 | "name":"Applied Data Science with Python",
46 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/c8/8d6df01eb311e6b5f5f786b289d8ba/pythondatascience_specialization_final.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
47 | "specialization":"True"
48 | },
49 | {
50 | "provenance":"Wesleyan University",
51 | "name":"Data Analysis and Interpretation",
52 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d2j5ihb19pt1hq.cloudfront.net/sdp_page/s12n_logos/Wesleyan_DataAnalysis_Istock41296982.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
53 | "specialization":"True"
54 | },
55 | {
56 | "provenance":"Yandex",
57 | "name":"Big Data for Data Engineers",
58 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/8c/7d9d20a14411e7843ef75e19b675b3/Yandex-466_______________7.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
59 | "specialization":"True"
60 | },
61 | {
62 | "provenance":"University of California, San Diego, Higher School of Economics",
63 | "name":"Data Structures and Algorithms",
64 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/2a/34a150d9ad11e5bd22cb7d7d7686df/logo3.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
65 | "specialization":"True"
66 | },
67 | {
68 | "provenance":"Johns Hopkins University",
69 | "name":"Executive Data Science",
70 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d396qusza40orc.cloudfront.net/phoenixassets/eds-s12n/eds_thumbnail.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
71 | "specialization":"True"
72 | },
73 | {
74 | "provenance":"Johns Hopkins University",
75 | "name":"Genomic Data Science",
76 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/17/4df3b0a98411e591c0b9c70a6d8700/GBDS.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
77 | "specialization":"True"
78 | },
79 | {
80 | "provenance":"University of Colorado System",
81 | "name":"Data Warehousing for Business Intelligence",
82 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d2j5ihb19pt1hq.cloudfront.net/sdp_page/s12n_logos/Colorado_DataWarehousing_Getty471741796.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
83 | "specialization":"True"
84 | },
85 | {
86 | "provenance":"University of Michigan, University of Maryland, College Park",
87 | "name":"Survey Data Collection and Analytics ",
88 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/e5/f471d0f76811e5a5175d8aed3cf3b0/AdobeStock_83781931.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
89 | "specialization":"True"
90 | },
91 | {
92 | "provenance":"University of Washington",
93 | "name":"Data Science at Scale",
94 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d2j5ihb19pt1hq.cloudfront.net/sdp_page/s12n_logos/UW_PracticalDataScience_Getty479524481.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
95 | "specialization":"True"
96 | },
97 | {
98 | "provenance":"University of Michigan",
99 | "name":"Python Data Structures",
100 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/44/2959b0502911e5851f058ad6ebf936/pythondata_thumbnail_1x1.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
101 | "specialization":"False"
102 | },
103 | {
104 | "provenance":"University of California, San Diego, Higher School of Economics",
105 | "name":"Data Structures",
106 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/2b/96fb00d9ae11e595dfe9e95f32b969/logo3.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
107 | "specialization":"False"
108 | },
109 | {
110 | "provenance":"University of Illinois at Urbana-Champaign",
111 | "name":"Data Visualization",
112 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/f6/4bce20533611e4bc4cff5931b60ef1/Viz_for_Data_Mining.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
113 | "specialization":"False"
114 | },
115 | {
116 | "provenance":"Google Cloud",
117 | "name":"Data Engineering on Google Cloud Platform",
118 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/95/77558056d811e79f95119f98b3ba96/visite-google-datacenter-14.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF",
119 | "specialization":"True"
120 | }
--------------------------------------------------------------------------------
/Coursera/Src/Coursera_driver.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import requests
3 | from bs4 import BeautifulSoup
4 | import json
5 | import time
6 | import sys
7 | reload(sys)
8 | sys.setdefaultencoding('utf-8')
9 |
10 | _x_query = {
11 | "coursename": "//h2[@class = 'color-primary-text headline-1-text flex-1']",
12 | "specialize": "//span[@class = 'specialization-course-count']/span",
13 | "image_url": "//div[@class='horizontal-box']/div/img",
14 | "provenance": ["//span[@class = 'text-light offering-partner-names']/span", "//div[@class = 'text-light offering-partner-names']/span"]
15 | }
16 |
17 | content = []
18 | id = 1718
19 | # 4-skip 6
20 | # keyword = "math"
21 | # ['math', 'social','math', 'marketing', design', 'web', 'cyber', 'program', 'platform', 'map', 'intelligence', 'knowledge', 'graph', 'probability']
22 | # 'digital', 'electronic', 'architecture', 'infrastructure',
23 | keywords = [ 'social', 'marketing', 'design', 'web', 'cyber', 'program', 'platform', 'map', 'intelligence', 'knowledge', 'graph', 'probability', \
24 | 'digital', 'electronic', 'architecture', 'infrastructure', 'digital', 'electronic', 'architecture', 'infrastructure', 'program', 'platform', 'map', 'intelligence', 'entrepreneurship','cyber', 'knowledge', 'graph', 'probability', "engineer"]
25 | for keyword in keywords:
26 |
27 | for j in xrange(2, 5):
28 | url = "https://www.coursera.org/courses?languages=en&query=" + keyword + "&start=" + str(j * 20)
29 | print url
30 | browser = webdriver.Firefox()
31 | browser.set_page_load_timeout(1200)
32 | browser.get(url)
33 | time.sleep(20)
34 | # each page has 20 courses
35 | # coursename = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["coursename"])]
36 | # image = [item.get_attribute('src').encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["image_url"])]
37 | # provenance = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["provenance"][0])]
38 | # # provenance += [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(i for i in _x_query["provenance"[1]])]
39 | # specialize = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["specialize"])]
40 | # # store course info for each page
41 |
42 |
43 | # for i in range(len(coursename)):
44 | # coursedic[coursename[i]] = {"image": image[i], "provenance": provenance[i]}
45 |
46 | # track the links corresponding to 20 courses
47 |
48 | source_code = requests.get(url)
49 | plain_text = source_code.text
50 | soup = BeautifulSoup(plain_text, 'html.parser')
51 | courseTags = soup.find_all("div", "offering-content")
52 | page = []
53 | for tag in courseTags:
54 | coursedic = {}
55 | coursedic["img"] = tag.select('img[src]')[0]['src']
56 | coursedic["name"] = tag.select('h2')[0].text.decode('utf8').encode('ascii', errors='ignore')
57 | specialization = tag.select('.specialization-course-count')
58 | if len(specialization) != 0:
59 | coursedic["specialization"] = True
60 | else:
61 | coursedic["specialization"] = False
62 | if len(tag.find_all('span')) != 0:
63 | coursedic["provenance"] = tag.find_all('span')[-1].text.decode('utf8').encode('ascii', errors='ignore')
64 | else:
65 | print url
66 | page.append(coursedic)
67 |
68 | newpage = []
69 |
70 |
71 | url = soup.select_one("script[type=application/ld+json]")
72 | if len(url) == 0:
73 | continue
74 | if len(url) != 0:
75 | url_list = json.loads(url.text)
76 | urls = [object['url'].encode('ascii','ignore') for object in url_list["itemListElement"]]
77 | urls = ["http:/" + url[4:] for url in urls]
78 | # urls of 20 courses respectively
79 | for i in range(len(urls)):
80 | print urls[i]
81 | browser.set_page_load_timeout(200000000)
82 | erroe = browser.get(urls[i])
83 | print erroe
84 | browser.get(urls[i])
85 | time.sleep(20)
86 | courseObject = page[i]
87 | courseObject["course_url"] = urls[i]
88 | # specialization
89 | source_code = requests.get(urls[i])
90 | plain_text = source_code.text
91 | soup = BeautifulSoup(plain_text, 'html.parser')
92 | if 'specializations' in urls[i]:
93 | courseObject["courseSet"] = []
94 | # if courseObject["specialization"]:
95 | # descriptions = browser.find_elements_by_xpath('//div[@class="description-cont"]/div/div/span')
96 | desc = soup.find_all("div", "description subsection")
97 | if len(desc) != 0:
98 | courseObject["description"] = soup.find_all("div", "description subsection")[0].text
99 | else:
100 | print urls[i]
101 | # provenance = browser.find_element_by_xpath('//p[@class = "headline-1-text created-by"]/following-sibling::*[1]/img')\
102 | # .get_attribute('alt')
103 | # coursenames = browser.find_elements_by_xpath('//h2[@class="course-name headline-5-text"]')
104 | CourseTags = soup.find_all('section', 'rc-Course bgcolor-white')
105 | for tag in CourseTags:
106 | id += 1
107 | course = {}
108 |
109 | name = tag.find_all("h2", "course-name headline-5-text")
110 | if len(name) != 0:
111 | course["name"] = name[0].text.decode('utf8').encode('ascii', errors='ignore')
112 | else:
113 | print "no_name", urls[i]
114 |
115 | if len(tag.select('.description-cont')) != 0:
116 | course["description"] = tag.select('.description-cont')[0].text
117 | else:
118 | print "no_des", urls[i]
119 | course["id"] = "coursera" + str(id).zfill(5)
120 | courseObject["courseSet"].append(course)
121 | # descriptions = [item.text.encode('ascii','ignore') for item in descriptions]
122 | # cousenames = [item.text.encode('ascii','ignore') for item in cousenames]
123 | # coursedic["subject"] = cousenames
124 | else:
125 | id += 1
126 | desc = soup.find_all("p", "body-1-text course-description")
127 | if len(desc) != 0:
128 | courseObject["description"] = soup.find_all("p", "body-1-text course-description")[0].text
129 | else:
130 | print urls[i]
131 | # description = browser.find_element_by_xpath('//p[@class="body-1-text course-description"]')
132 | courseObject["id"] = "coursera" + str(id).zfill(5)
133 | # provenance = browser.find_element_by_xpath('//div[@class = "headline-1-text creator-names"]/span[2]')
134 | # cousename = browser.find_elements_by_xpath('//h1[@class="title display-3-text"]')
135 | # description = description.text.encode('ascii','ignore')
136 | newpage.append(courseObject)
137 | print newpage
138 | with open('Coursera_'+ keyword + str(j) + '.json', 'a') as f:
139 | json.dump(newpage, f)
140 |
141 | content += newpage
142 | with open('Coursera_'+ keyword + '_content' + '.json', 'a') as f:
143 | json.dump(content, f)
144 |
145 | browser.quit()
146 |
147 |
148 |
149 |
150 |
--------------------------------------------------------------------------------
/Coursera/Src/Crawl_Coursera.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import requests
3 | from bs4 import BeautifulSoup
4 | import json
5 | import time
6 | import sys
7 | reload(sys)
8 | sys.setdefaultencoding('utf-8')
9 |
10 | _x_query = {
11 | "coursename": "//h2[@class = 'color-primary-text headline-1-text flex-1']",
12 | "specialize": "//span[@class = 'specialization-course-count']/span",
13 | "image_url": "//div[@class='horizontal-box']/div/img",
14 | "provenance": ["//span[@class = 'text-light offering-partner-names']/span", "//div[@class = 'text-light offering-partner-names']/span"]
15 | }
16 |
17 |
18 | content = []
19 | id = 0
20 | # 4-skip 5
21 | for j in range(0, 9):
22 | url = "https://www.coursera.org/courses?languages=en&query=web&start=" + str(j * 20)
23 | print url
24 | browser = webdriver.Firefox()
25 | browser.set_page_load_timeout(30)
26 | browser.get(url)
27 | time.sleep(20)
28 | # each page has 20 courses
29 | # coursename = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["coursename"])]
30 | # image = [item.get_attribute('src').encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["image_url"])]
31 | # provenance = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["provenance"][0])]
32 | # # provenance += [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(i for i in _x_query["provenance"[1]])]
33 | # specialize = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["specialize"])]
34 | # # store course info for each page
35 |
36 |
37 | # for i in range(len(coursename)):
38 | # coursedic[coursename[i]] = {"image": image[i], "provenance": provenance[i]}
39 |
40 | # track the links corresponding to 20 courses
41 |
42 | source_code = requests.get(url)
43 | plain_text = source_code.text
44 | soup = BeautifulSoup(plain_text, 'html.parser')
45 | courseTags = soup.find_all("div", "offering-content")
46 | page = []
47 | for tag in courseTags:
48 | coursedic = {}
49 | coursedic["img"] = tag.select('img[src]')[0]['src']
50 | coursedic["name"] = tag.select('h2')[0].text.decode('utf8').encode('ascii', errors='ignore')
51 | specialization = tag.select('.specialization-course-count')
52 | if len(specialization) != 0:
53 | coursedic["specialization"] = True
54 | else:
55 | coursedic["specialization"] = False
56 | coursedic["provenance"] = tag.find_all('span')[-1].text.decode('utf8').encode('ascii', errors='ignore')
57 | page.append(coursedic)
58 |
59 |
60 | url_list = json.loads(soup.select_one("script[type=application/ld+json]").text)
61 | urls = [object['url'].encode('ascii','ignore') for object in url_list["itemListElement"]]
62 | urls = ["http:/" + url[4:] for url in urls]
63 | # urls of 20 courses respectively
64 | # for i in range(len(urls)):
65 | # browser.set_page_load_timeout(60)
66 | # browser.get(urls[i])
67 | # time.sleep(20)
68 | # courseObject = page[i]
69 | # courseObject["course_url"] = urls[i]
70 | # # specialization
71 | # source_code = requests.get(urls[i])
72 | # plain_text = source_code.text
73 | # soup = BeautifulSoup(plain_text, 'html.parser')
74 | # if 'specializations' in urls[i]:
75 | # courseObject["courseSet"] = []
76 | # # if courseObject["specialization"]:
77 | # # descriptions = browser.find_elements_by_xpath('//div[@class="description-cont"]/div/div/span')
78 | # desc = soup.find_all("div", "description subsection")
79 | # if len(desc) != 0:
80 | # courseObject["description"] = soup.find_all("div", "description subsection")[0].text
81 | # else:
82 | # print urls[i]
83 | # # provenance = browser.find_element_by_xpath('//p[@class = "headline-1-text created-by"]/following-sibling::*[1]/img')\
84 | # # .get_attribute('alt')
85 | # # coursenames = browser.find_elements_by_xpath('//h2[@class="course-name headline-5-text"]')
86 | # CourseTags = soup.find_all('section', 'rc-Course bgcolor-white')
87 | # for tag in CourseTags:
88 | # id += 1
89 | # course = {}
90 |
91 | # name = tag.find_all("h2", "course-name headline-5-text")
92 | # if len(name) != 0:
93 | # course["name"] = name[0].text.decode('utf8').encode('ascii', errors='ignore')
94 | # else:
95 | # print "no_name", urls[i]
96 |
97 | # if len(tag.select('.description-cont')) != 0:
98 | # course["description"] = tag.select('.description-cont')[0].text
99 | # else:
100 | # print "no_des", urls[i]
101 | # course["id"] = "coursera" + str(id).zfill(5)
102 | # courseObject["courseSet"].append(course)
103 | # # descriptions = [item.text.encode('ascii','ignore') for item in descriptions]
104 | # # cousenames = [item.text.encode('ascii','ignore') for item in cousenames]
105 | # # coursedic["subject"] = cousenames
106 | # else:
107 | # id += 1
108 | # desc = soup.find_all("p", "body-1-text course-description")
109 | # if len(desc) != 0:
110 | # courseObject["description"] = soup.find_all("p", "body-1-text course-description")[0].text
111 | # else:
112 | # print urls[i]
113 | # # description = browser.find_element_by_xpath('//p[@class="body-1-text course-description"]')
114 | # courseObject["id"] = "coursera" + str(id).zfill(5)
115 | # # provenance = browser.find_element_by_xpath('//div[@class = "headline-1-text creator-names"]/span[2]')
116 | # # cousename = browser.find_elements_by_xpath('//h1[@class="title display-3-text"]')
117 | # # description = description.text.encode('ascii','ignore')
118 | page = url_list
119 | # with open('Coursera_web'+ str(j) + '.json', 'a') as f:
120 | # json.dump(page, f)
121 |
122 | content += page
123 | with open('Coursera_web'+ '_content' + '.json', 'a') as f:
124 | json.dump(content, f)
125 |
126 | browser.quit()
127 |
128 |
129 |
130 |
131 |
--------------------------------------------------------------------------------
/Coursera/Src/troubleshoot.md:
--------------------------------------------------------------------------------
1 | \#\# Using Selenium Webdriver MacOS
2 |
3 | 1. Install Selenium: `sudo pip install selenium`
4 |
5 |
6 | 2. Downloads geckodriver (or chromedriber)
7 |
8 | e.g. `Downloads/geckodriver`
9 |
10 | 3. Find the directory of webdriver/chrome (safari, firefox)
11 |
12 | * open unix system directory: finder, hit (command + shift + G)
13 |
14 | * Trick: hit again: `pip install selenium`, terminal will give you:
15 |
16 | "Requirement already satisfied: selenium in /Library/Python/2.7/site-packages"
17 |
18 | * Go to selenium folder find web driver, drill down to Firefox
19 |
20 |
21 | 4. Add path for webdriver and geckodriver:
22 |
23 | * `vim ~/.bash_profile`
24 |
25 | `PATH = '/Library/Python/2.7/site-packages/selenium/webdriver/firefox/:/Users/fp/Downloads/Driver'
26 | `````export PATH````
27 |
28 | * `source ~/.bash_profile`
29 |
30 | 5. Call Selenium Webdriver in Python:
31 |
32 | ```Python
33 | from selenium import webdriver
34 | driver = webdriver.Chrome()
35 | ```
--------------------------------------------------------------------------------
/Jupyter/Untitled.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import json\n",
19 | "f = open(\"merged_all_no_duplicates-csv.json\", \"r\")\n",
20 | "dict_reports = f.read()\n",
21 | "f.close()"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 3,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "table = pd.read_json(dict_reports)"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 4,
36 | "metadata": {
37 | "scrolled": true
38 | },
39 | "outputs": [
40 | {
41 | "data": {
42 | "text/html": [
43 | "
\n",
44 | "
\n",
45 | " \n",
46 | " \n",
47 | " | \n",
48 | " BODY & INTERIOR QUALITY - DESIGN | \n",
49 | " BODY & INTERIOR QUALITY - MECHANICAL | \n",
50 | " FEATURES & ACCESSORIES QUALITY - DESIGN | \n",
51 | " FEATURES & ACCESSORIES QUALITY - MECHANICAL | \n",
52 | " Mileage | \n",
53 | " OVERALL QUALITY - DESIGN | \n",
54 | " OVERALL QUALITY - MECHANICAL | \n",
55 | " POWERTRAIN QUALITY - DESIGN | \n",
56 | " POWERTRAIN QUALITY - MECHANICAL | \n",
57 | " Price_Dealers_Retail | \n",
58 | " Price_Private_Sale | \n",
59 | " Price_Trade-In | \n",
60 | " Summary_Accidents | \n",
61 | " Summary_Junk_Salvage | \n",
62 | " Summary_Recalls | \n",
63 | " Summary_Theft | \n",
64 | " Summary_Title_Problems | \n",
65 | " year | \n",
66 | "
\n",
67 | " \n",
68 | " \n",
69 | " \n",
70 | " | count | \n",
71 | " 11054.000000 | \n",
72 | " 11054.000000 | \n",
73 | " 11054.000000 | \n",
74 | " 11054.000000 | \n",
75 | " 15892.000000 | \n",
76 | " 11054.000000 | \n",
77 | " 11054.000000 | \n",
78 | " 11054.000000 | \n",
79 | " 11054.000000 | \n",
80 | " 13280.000000 | \n",
81 | " 13280.000000 | \n",
82 | " 13280.000000 | \n",
83 | " 15936.000000 | \n",
84 | " 15936.000000 | \n",
85 | " 15936.000000 | \n",
86 | " 15936.000000 | \n",
87 | " 15936.000000 | \n",
88 | " 15940.000000 | \n",
89 | "
\n",
90 | " \n",
91 | " | mean | \n",
92 | " 3.409399 | \n",
93 | " 3.403157 | \n",
94 | " 3.623711 | \n",
95 | " 3.493713 | \n",
96 | " 42497.701359 | \n",
97 | " 3.541478 | \n",
98 | " 3.378279 | \n",
99 | " 3.335173 | \n",
100 | " 3.220463 | \n",
101 | " 16035.226657 | \n",
102 | " 14560.086446 | \n",
103 | " 12882.327786 | \n",
104 | " 0.011797 | \n",
105 | " 0.002134 | \n",
106 | " 3.367093 | \n",
107 | " 0.000126 | \n",
108 | " 0.001381 | \n",
109 | " 2014.139460 | \n",
110 | "
\n",
111 | " \n",
112 | " | std | \n",
113 | " 0.929235 | \n",
114 | " 0.846629 | \n",
115 | " 0.896583 | \n",
116 | " 0.954322 | \n",
117 | " 31768.787255 | \n",
118 | " 0.977871 | \n",
119 | " 0.890147 | \n",
120 | " 0.934002 | \n",
121 | " 0.892976 | \n",
122 | " 11067.915249 | \n",
123 | " 10724.774459 | \n",
124 | " 10232.956041 | \n",
125 | " 0.132978 | \n",
126 | " 0.073433 | \n",
127 | " 3.158152 | \n",
128 | " 0.015843 | \n",
129 | " 0.051321 | \n",
130 | " 3.025925 | \n",
131 | "
\n",
132 | " \n",
133 | " | min | \n",
134 | " 0.000000 | \n",
135 | " 0.000000 | \n",
136 | " 0.000000 | \n",
137 | " 0.000000 | \n",
138 | " 0.000000 | \n",
139 | " 0.000000 | \n",
140 | " 0.000000 | \n",
141 | " 0.000000 | \n",
142 | " 0.000000 | \n",
143 | " 2000.000000 | \n",
144 | " 1063.000000 | \n",
145 | " 500.000000 | \n",
146 | " 0.000000 | \n",
147 | " 0.000000 | \n",
148 | " 0.000000 | \n",
149 | " 0.000000 | \n",
150 | " 0.000000 | \n",
151 | " 1990.000000 | \n",
152 | "
\n",
153 | " \n",
154 | " | 25% | \n",
155 | " 3.000000 | \n",
156 | " 3.000000 | \n",
157 | " 3.000000 | \n",
158 | " 3.000000 | \n",
159 | " 23142.250000 | \n",
160 | " 3.000000 | \n",
161 | " 3.000000 | \n",
162 | " 2.500000 | \n",
163 | " 2.500000 | \n",
164 | " 12006.000000 | \n",
165 | " 10459.000000 | \n",
166 | " 8837.000000 | \n",
167 | " 0.000000 | \n",
168 | " 0.000000 | \n",
169 | " 1.000000 | \n",
170 | " 0.000000 | \n",
171 | " 0.000000 | \n",
172 | " 2014.000000 | \n",
173 | "
\n",
174 | " \n",
175 | " | 50% | \n",
176 | " 3.500000 | \n",
177 | " 3.500000 | \n",
178 | " 3.500000 | \n",
179 | " 3.500000 | \n",
180 | " 35888.000000 | \n",
181 | " 3.500000 | \n",
182 | " 3.500000 | \n",
183 | " 3.500000 | \n",
184 | " 3.000000 | \n",
185 | " 14930.500000 | \n",
186 | " 13591.000000 | \n",
187 | " 11800.000000 | \n",
188 | " 0.000000 | \n",
189 | " 0.000000 | \n",
190 | " 3.000000 | \n",
191 | " 0.000000 | \n",
192 | " 0.000000 | \n",
193 | " 2015.000000 | \n",
194 | "
\n",
195 | " \n",
196 | " | 75% | \n",
197 | " 4.000000 | \n",
198 | " 4.000000 | \n",
199 | " 4.500000 | \n",
200 | " 4.500000 | \n",
201 | " 49672.250000 | \n",
202 | " 4.500000 | \n",
203 | " 4.000000 | \n",
204 | " 4.000000 | \n",
205 | " 4.000000 | \n",
206 | " 17868.000000 | \n",
207 | " 16421.000000 | \n",
208 | " 14766.000000 | \n",
209 | " 0.000000 | \n",
210 | " 0.000000 | \n",
211 | " 5.000000 | \n",
212 | " 0.000000 | \n",
213 | " 0.000000 | \n",
214 | " 2016.000000 | \n",
215 | "
\n",
216 | " \n",
217 | " | max | \n",
218 | " 5.000000 | \n",
219 | " 5.000000 | \n",
220 | " 5.000000 | \n",
221 | " 5.000000 | \n",
222 | " 722414.000000 | \n",
223 | " 5.000000 | \n",
224 | " 5.000000 | \n",
225 | " 5.000000 | \n",
226 | " 5.000000 | \n",
227 | " 782387.000000 | \n",
228 | " 746898.000000 | \n",
229 | " 703523.000000 | \n",
230 | " 4.000000 | \n",
231 | " 4.000000 | \n",
232 | " 23.000000 | \n",
233 | " 2.000000 | \n",
234 | " 4.000000 | \n",
235 | " 2018.000000 | \n",
236 | "
\n",
237 | " \n",
238 | "
\n",
239 | "
"
240 | ],
241 | "text/plain": [
242 | " BODY & INTERIOR QUALITY - DESIGN BODY & INTERIOR QUALITY - MECHANICAL \\\n",
243 | "count 11054.000000 11054.000000 \n",
244 | "mean 3.409399 3.403157 \n",
245 | "std 0.929235 0.846629 \n",
246 | "min 0.000000 0.000000 \n",
247 | "25% 3.000000 3.000000 \n",
248 | "50% 3.500000 3.500000 \n",
249 | "75% 4.000000 4.000000 \n",
250 | "max 5.000000 5.000000 \n",
251 | "\n",
252 | " FEATURES & ACCESSORIES QUALITY - DESIGN \\\n",
253 | "count 11054.000000 \n",
254 | "mean 3.623711 \n",
255 | "std 0.896583 \n",
256 | "min 0.000000 \n",
257 | "25% 3.000000 \n",
258 | "50% 3.500000 \n",
259 | "75% 4.500000 \n",
260 | "max 5.000000 \n",
261 | "\n",
262 | " FEATURES & ACCESSORIES QUALITY - MECHANICAL Mileage \\\n",
263 | "count 11054.000000 15892.000000 \n",
264 | "mean 3.493713 42497.701359 \n",
265 | "std 0.954322 31768.787255 \n",
266 | "min 0.000000 0.000000 \n",
267 | "25% 3.000000 23142.250000 \n",
268 | "50% 3.500000 35888.000000 \n",
269 | "75% 4.500000 49672.250000 \n",
270 | "max 5.000000 722414.000000 \n",
271 | "\n",
272 | " OVERALL QUALITY - DESIGN OVERALL QUALITY - MECHANICAL \\\n",
273 | "count 11054.000000 11054.000000 \n",
274 | "mean 3.541478 3.378279 \n",
275 | "std 0.977871 0.890147 \n",
276 | "min 0.000000 0.000000 \n",
277 | "25% 3.000000 3.000000 \n",
278 | "50% 3.500000 3.500000 \n",
279 | "75% 4.500000 4.000000 \n",
280 | "max 5.000000 5.000000 \n",
281 | "\n",
282 | " POWERTRAIN QUALITY - DESIGN POWERTRAIN QUALITY - MECHANICAL \\\n",
283 | "count 11054.000000 11054.000000 \n",
284 | "mean 3.335173 3.220463 \n",
285 | "std 0.934002 0.892976 \n",
286 | "min 0.000000 0.000000 \n",
287 | "25% 2.500000 2.500000 \n",
288 | "50% 3.500000 3.000000 \n",
289 | "75% 4.000000 4.000000 \n",
290 | "max 5.000000 5.000000 \n",
291 | "\n",
292 | " Price_Dealers_Retail Price_Private_Sale Price_Trade-In \\\n",
293 | "count 13280.000000 13280.000000 13280.000000 \n",
294 | "mean 16035.226657 14560.086446 12882.327786 \n",
295 | "std 11067.915249 10724.774459 10232.956041 \n",
296 | "min 2000.000000 1063.000000 500.000000 \n",
297 | "25% 12006.000000 10459.000000 8837.000000 \n",
298 | "50% 14930.500000 13591.000000 11800.000000 \n",
299 | "75% 17868.000000 16421.000000 14766.000000 \n",
300 | "max 782387.000000 746898.000000 703523.000000 \n",
301 | "\n",
302 | " Summary_Accidents Summary_Junk_Salvage Summary_Recalls \\\n",
303 | "count 15936.000000 15936.000000 15936.000000 \n",
304 | "mean 0.011797 0.002134 3.367093 \n",
305 | "std 0.132978 0.073433 3.158152 \n",
306 | "min 0.000000 0.000000 0.000000 \n",
307 | "25% 0.000000 0.000000 1.000000 \n",
308 | "50% 0.000000 0.000000 3.000000 \n",
309 | "75% 0.000000 0.000000 5.000000 \n",
310 | "max 4.000000 4.000000 23.000000 \n",
311 | "\n",
312 | " Summary_Theft Summary_Title_Problems year \n",
313 | "count 15936.000000 15936.000000 15940.000000 \n",
314 | "mean 0.000126 0.001381 2014.139460 \n",
315 | "std 0.015843 0.051321 3.025925 \n",
316 | "min 0.000000 0.000000 1990.000000 \n",
317 | "25% 0.000000 0.000000 2014.000000 \n",
318 | "50% 0.000000 0.000000 2015.000000 \n",
319 | "75% 0.000000 0.000000 2016.000000 \n",
320 | "max 2.000000 4.000000 2018.000000 "
321 | ]
322 | },
323 | "execution_count": 4,
324 | "metadata": {},
325 | "output_type": "execute_result"
326 | }
327 | ],
328 | "source": [
329 | "table.describe()"
330 | ]
331 | },
332 | {
333 | "cell_type": "code",
334 | "execution_count": 5,
335 | "metadata": {
336 | "scrolled": true
337 | },
338 | "outputs": [
339 | {
340 | "data": {
341 | "text/plain": [
342 | "array([u'BODY & INTERIOR QUALITY - DESIGN',\n",
343 | " u'BODY & INTERIOR QUALITY - MECHANICAL', u'City MPG', u'Drive Type',\n",
344 | " u'Engine', u'Exterior Color',\n",
345 | " u'FEATURES & ACCESSORIES QUALITY - DESIGN',\n",
346 | " u'FEATURES & ACCESSORIES QUALITY - MECHANICAL', u'Fuel Type',\n",
347 | " u'Hwy MPG', u'Interior Color', u'Issues_Detail', u'Mileage',\n",
348 | " u'OVERALL QUALITY - DESIGN', u'OVERALL QUALITY - MECHANICAL',\n",
349 | " u'Odometer_Mileage', u'POWERTRAIN QUALITY - DESIGN',\n",
350 | " u'POWERTRAIN QUALITY - MECHANICAL', u'Price_Dealers_Retail',\n",
351 | " u'Price_Private_Sale', u'Price_Trade-In', u'Recalls_Detail',\n",
352 | " u'Summary_Accidents', u'Summary_Junk_Salvage', u'Summary_Recalls',\n",
353 | " u'Summary_Theft', u'Summary_Title_Problems', u'Title_Problem_Check',\n",
354 | " u'Total Seating', u'Transmission', u'Trim', u'Vehicle_Use',\n",
355 | " u'Warranty', u'Year_Make_Model', u'address', u'make', u'model',\n",
356 | " u'original-model', u'price', u'space', u'vin', u'year'], dtype=object)"
357 | ]
358 | },
359 | "execution_count": 5,
360 | "metadata": {},
361 | "output_type": "execute_result"
362 | }
363 | ],
364 | "source": [
365 | "table.columns.values"
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "execution_count": 6,
371 | "metadata": {},
372 | "outputs": [],
373 | "source": [
374 | "import numpy as np\n",
375 | "table['price'] = table['price'].replace('Unknown', np.nan)"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": 11,
381 | "metadata": {},
382 | "outputs": [
383 | {
384 | "data": {
385 | "text/plain": [
386 | "count 15488\n",
387 | "unique 3739\n",
388 | "top 13995\n",
389 | "freq 132\n",
390 | "Name: price, dtype: object"
391 | ]
392 | },
393 | "execution_count": 11,
394 | "metadata": {},
395 | "output_type": "execute_result"
396 | }
397 | ],
398 | "source": [
399 | "table['price'].describe()"
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": 12,
405 | "metadata": {},
406 | "outputs": [],
407 | "source": [
408 | "table['price'] = pd.to_numeric(table['price'])"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": 13,
414 | "metadata": {},
415 | "outputs": [
416 | {
417 | "data": {
418 | "text/plain": [
419 | "count 15488.000000\n",
420 | "mean 15706.463197\n",
421 | "std 11499.161402\n",
422 | "min 788.000000\n",
423 | "25% 11980.000000\n",
424 | "50% 14300.000000\n",
425 | "75% 16991.000000\n",
426 | "max 437880.000000\n",
427 | "Name: price, dtype: float64"
428 | ]
429 | },
430 | "execution_count": 13,
431 | "metadata": {},
432 | "output_type": "execute_result"
433 | }
434 | ],
435 | "source": [
436 | "table['price'].describe()"
437 | ]
438 | },
439 | {
440 | "cell_type": "code",
441 | "execution_count": 16,
442 | "metadata": {},
443 | "outputs": [
444 | {
445 | "data": {
446 | "text/plain": [
447 | "count 15936.000000\n",
448 | "mean 3.367093\n",
449 | "std 3.158152\n",
450 | "min 0.000000\n",
451 | "25% 1.000000\n",
452 | "50% 3.000000\n",
453 | "75% 5.000000\n",
454 | "max 23.000000\n",
455 | "Name: Summary_Recalls, dtype: float64"
456 | ]
457 | },
458 | "execution_count": 16,
459 | "metadata": {},
460 | "output_type": "execute_result"
461 | }
462 | ],
463 | "source": [
464 | "table['Summary_Recalls'].describe()"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 19,
470 | "metadata": {},
471 | "outputs": [],
472 | "source": [
473 | "table['price_diff_per'] = (table['price']-table['Price_Dealers_Retail'])/table['Price_Dealers_Retail']"
474 | ]
475 | },
476 | {
477 | "cell_type": "code",
478 | "execution_count": 20,
479 | "metadata": {},
480 | "outputs": [
481 | {
482 | "data": {
483 | "text/plain": [
484 | "count 12907.000000\n",
485 | "mean -0.033474\n",
486 | "std 0.268332\n",
487 | "min -0.757463\n",
488 | "25% -0.176401\n",
489 | "50% -0.058069\n",
490 | "75% 0.075218\n",
491 | "max 11.281399\n",
492 | "Name: price_diff_per, dtype: float64"
493 | ]
494 | },
495 | "execution_count": 20,
496 | "metadata": {},
497 | "output_type": "execute_result"
498 | }
499 | ],
500 | "source": [
501 | "table['price_diff_per'].describe()"
502 | ]
503 | },
504 | {
505 | "cell_type": "code",
506 | "execution_count": 33,
507 | "metadata": {},
508 | "outputs": [],
509 | "source": [
510 | "table['price_score'] = ((table['price_diff_per'].apply(lambda x: 1 if x >= 1 else x)) - table['price_diff_per'].min()) / (1 - table['price_diff_per'].min())"
511 | ]
512 | },
513 | {
514 | "cell_type": "code",
515 | "execution_count": 43,
516 | "metadata": {},
517 | "outputs": [],
518 | "source": [
519 | "table['price_score'] = 1 - table['price_score'] # reverse scale"
520 | ]
521 | },
522 | {
523 | "cell_type": "code",
524 | "execution_count": 44,
525 | "metadata": {
526 | "scrolled": true
527 | },
528 | "outputs": [
529 | {
530 | "data": {
531 | "text/html": [
532 | "\n",
533 | "
\n",
534 | " \n",
535 | " \n",
536 | " | \n",
537 | " price_diff_per | \n",
538 | " price_score | \n",
539 | "
\n",
540 | " \n",
541 | " \n",
542 | " \n",
543 | " | 0 | \n",
544 | " -0.279677 | \n",
545 | " 0.728139 | \n",
546 | "
\n",
547 | " \n",
548 | " | 1 | \n",
549 | " 0.193093 | \n",
550 | " 0.459132 | \n",
551 | "
\n",
552 | " \n",
553 | " | 2 | \n",
554 | " 0.012786 | \n",
555 | " 0.561727 | \n",
556 | "
\n",
557 | " \n",
558 | " | 3 | \n",
559 | " 0.090377 | \n",
560 | " 0.517577 | \n",
561 | "
\n",
562 | " \n",
563 | " | 4 | \n",
564 | " -0.291709 | \n",
565 | " 0.734985 | \n",
566 | "
\n",
567 | " \n",
568 | " | 5 | \n",
569 | " 0.041667 | \n",
570 | " 0.545294 | \n",
571 | "
\n",
572 | " \n",
573 | " | 6 | \n",
574 | " NaN | \n",
575 | " NaN | \n",
576 | "
\n",
577 | " \n",
578 | " | 7 | \n",
579 | " -0.157533 | \n",
580 | " 0.658639 | \n",
581 | "
\n",
582 | " \n",
583 | " | 8 | \n",
584 | " -0.060168 | \n",
585 | " 0.603238 | \n",
586 | "
\n",
587 | " \n",
588 | " | 9 | \n",
589 | " -0.007937 | \n",
590 | " 0.573518 | \n",
591 | "
\n",
592 | " \n",
593 | " | 10 | \n",
594 | " 0.615335 | \n",
595 | " 0.218875 | \n",
596 | "
\n",
597 | " \n",
598 | " | 11 | \n",
599 | " -0.171296 | \n",
600 | " 0.666470 | \n",
601 | "
\n",
602 | " \n",
603 | " | 12 | \n",
604 | " 0.055981 | \n",
605 | " 0.537149 | \n",
606 | "
\n",
607 | " \n",
608 | " | 13 | \n",
609 | " -0.163742 | \n",
610 | " 0.662172 | \n",
611 | "
\n",
612 | " \n",
613 | " | 14 | \n",
614 | " -0.125213 | \n",
615 | " 0.640248 | \n",
616 | "
\n",
617 | " \n",
618 | " | 15 | \n",
619 | " NaN | \n",
620 | " NaN | \n",
621 | "
\n",
622 | " \n",
623 | " | 16 | \n",
624 | " -0.057540 | \n",
625 | " 0.601742 | \n",
626 | "
\n",
627 | " \n",
628 | " | 17 | \n",
629 | " NaN | \n",
630 | " NaN | \n",
631 | "
\n",
632 | " \n",
633 | " | 18 | \n",
634 | " -0.112216 | \n",
635 | " 0.632853 | \n",
636 | "
\n",
637 | " \n",
638 | " | 19 | \n",
639 | " NaN | \n",
640 | " NaN | \n",
641 | "
\n",
642 | " \n",
643 | " | 20 | \n",
644 | " 0.051298 | \n",
645 | " 0.539813 | \n",
646 | "
\n",
647 | " \n",
648 | " | 21 | \n",
649 | " -0.182798 | \n",
650 | " 0.673015 | \n",
651 | "
\n",
652 | " \n",
653 | " | 22 | \n",
654 | " NaN | \n",
655 | " NaN | \n",
656 | "
\n",
657 | " \n",
658 | " | 23 | \n",
659 | " -0.211016 | \n",
660 | " 0.689071 | \n",
661 | "
\n",
662 | " \n",
663 | " | 24 | \n",
664 | " 0.070398 | \n",
665 | " 0.528945 | \n",
666 | "
\n",
667 | " \n",
668 | " | 25 | \n",
669 | " NaN | \n",
670 | " NaN | \n",
671 | "
\n",
672 | " \n",
673 | " | 26 | \n",
674 | " -0.064863 | \n",
675 | " 0.605909 | \n",
676 | "
\n",
677 | " \n",
678 | " | 27 | \n",
679 | " NaN | \n",
680 | " NaN | \n",
681 | "
\n",
682 | " \n",
683 | " | 28 | \n",
684 | " 0.159524 | \n",
685 | " 0.478233 | \n",
686 | "
\n",
687 | " \n",
688 | " | 29 | \n",
689 | " 0.050397 | \n",
690 | " 0.540326 | \n",
691 | "
\n",
692 | " \n",
693 | " | ... | \n",
694 | " ... | \n",
695 | " ... | \n",
696 | "
\n",
697 | " \n",
698 | " | 15910 | \n",
699 | " -0.122139 | \n",
700 | " 0.638499 | \n",
701 | "
\n",
702 | " \n",
703 | " | 15911 | \n",
704 | " 0.083035 | \n",
705 | " 0.521755 | \n",
706 | "
\n",
707 | " \n",
708 | " | 15912 | \n",
709 | " 0.126349 | \n",
710 | " 0.497109 | \n",
711 | "
\n",
712 | " \n",
713 | " | 15913 | \n",
714 | " -0.043437 | \n",
715 | " 0.593718 | \n",
716 | "
\n",
717 | " \n",
718 | " | 15914 | \n",
719 | " 0.139688 | \n",
720 | " 0.489519 | \n",
721 | "
\n",
722 | " \n",
723 | " | 15915 | \n",
724 | " -0.161821 | \n",
725 | " 0.661078 | \n",
726 | "
\n",
727 | " \n",
728 | " | 15916 | \n",
729 | " -0.064838 | \n",
730 | " 0.605895 | \n",
731 | "
\n",
732 | " \n",
733 | " | 15917 | \n",
734 | " NaN | \n",
735 | " NaN | \n",
736 | "
\n",
737 | " \n",
738 | " | 15918 | \n",
739 | " 0.258711 | \n",
740 | " 0.421795 | \n",
741 | "
\n",
742 | " \n",
743 | " | 15919 | \n",
744 | " 0.144578 | \n",
745 | " 0.486737 | \n",
746 | "
\n",
747 | " \n",
748 | " | 15920 | \n",
749 | " 0.538314 | \n",
750 | " 0.262701 | \n",
751 | "
\n",
752 | " \n",
753 | " | 15921 | \n",
754 | " -0.137068 | \n",
755 | " 0.646994 | \n",
756 | "
\n",
757 | " \n",
758 | " | 15922 | \n",
759 | " 0.094657 | \n",
760 | " 0.515142 | \n",
761 | "
\n",
762 | " \n",
763 | " | 15923 | \n",
764 | " -0.055116 | \n",
765 | " 0.600363 | \n",
766 | "
\n",
767 | " \n",
768 | " | 15924 | \n",
769 | " 0.171163 | \n",
770 | " 0.471610 | \n",
771 | "
\n",
772 | " \n",
773 | " | 15925 | \n",
774 | " -0.051397 | \n",
775 | " 0.598247 | \n",
776 | "
\n",
777 | " \n",
778 | " | 15926 | \n",
779 | " 0.231017 | \n",
780 | " 0.437553 | \n",
781 | "
\n",
782 | " \n",
783 | " | 15927 | \n",
784 | " -0.239601 | \n",
785 | " 0.705335 | \n",
786 | "
\n",
787 | " \n",
788 | " | 15928 | \n",
789 | " -0.103331 | \n",
790 | " 0.627797 | \n",
791 | "
\n",
792 | " \n",
793 | " | 15929 | \n",
794 | " 0.130418 | \n",
795 | " 0.494794 | \n",
796 | "
\n",
797 | " \n",
798 | " | 15930 | \n",
799 | " 0.030584 | \n",
800 | " 0.551600 | \n",
801 | "
\n",
802 | " \n",
803 | " | 15931 | \n",
804 | " 0.020901 | \n",
805 | " 0.557109 | \n",
806 | "
\n",
807 | " \n",
808 | " | 15932 | \n",
809 | " NaN | \n",
810 | " NaN | \n",
811 | "
\n",
812 | " \n",
813 | " | 15933 | \n",
814 | " -0.073209 | \n",
815 | " 0.610658 | \n",
816 | "
\n",
817 | " \n",
818 | " | 15934 | \n",
819 | " 0.649112 | \n",
820 | " 0.199656 | \n",
821 | "
\n",
822 | " \n",
823 | " | 15935 | \n",
824 | " 1.764894 | \n",
825 | " 0.000000 | \n",
826 | "
\n",
827 | " \n",
828 | " | 15936 | \n",
829 | " -0.050277 | \n",
830 | " 0.597610 | \n",
831 | "
\n",
832 | " \n",
833 | " | 15937 | \n",
834 | " NaN | \n",
835 | " NaN | \n",
836 | "
\n",
837 | " \n",
838 | " | 15938 | \n",
839 | " 0.062363 | \n",
840 | " 0.533517 | \n",
841 | "
\n",
842 | " \n",
843 | " | 15939 | \n",
844 | " NaN | \n",
845 | " NaN | \n",
846 | "
\n",
847 | " \n",
848 | "
\n",
849 | "
15940 rows × 2 columns
\n",
850 | "
"
851 | ],
852 | "text/plain": [
853 | " price_diff_per price_score\n",
854 | "0 -0.279677 0.728139\n",
855 | "1 0.193093 0.459132\n",
856 | "2 0.012786 0.561727\n",
857 | "3 0.090377 0.517577\n",
858 | "4 -0.291709 0.734985\n",
859 | "5 0.041667 0.545294\n",
860 | "6 NaN NaN\n",
861 | "7 -0.157533 0.658639\n",
862 | "8 -0.060168 0.603238\n",
863 | "9 -0.007937 0.573518\n",
864 | "10 0.615335 0.218875\n",
865 | "11 -0.171296 0.666470\n",
866 | "12 0.055981 0.537149\n",
867 | "13 -0.163742 0.662172\n",
868 | "14 -0.125213 0.640248\n",
869 | "15 NaN NaN\n",
870 | "16 -0.057540 0.601742\n",
871 | "17 NaN NaN\n",
872 | "18 -0.112216 0.632853\n",
873 | "19 NaN NaN\n",
874 | "20 0.051298 0.539813\n",
875 | "21 -0.182798 0.673015\n",
876 | "22 NaN NaN\n",
877 | "23 -0.211016 0.689071\n",
878 | "24 0.070398 0.528945\n",
879 | "25 NaN NaN\n",
880 | "26 -0.064863 0.605909\n",
881 | "27 NaN NaN\n",
882 | "28 0.159524 0.478233\n",
883 | "29 0.050397 0.540326\n",
884 | "... ... ...\n",
885 | "15910 -0.122139 0.638499\n",
886 | "15911 0.083035 0.521755\n",
887 | "15912 0.126349 0.497109\n",
888 | "15913 -0.043437 0.593718\n",
889 | "15914 0.139688 0.489519\n",
890 | "15915 -0.161821 0.661078\n",
891 | "15916 -0.064838 0.605895\n",
892 | "15917 NaN NaN\n",
893 | "15918 0.258711 0.421795\n",
894 | "15919 0.144578 0.486737\n",
895 | "15920 0.538314 0.262701\n",
896 | "15921 -0.137068 0.646994\n",
897 | "15922 0.094657 0.515142\n",
898 | "15923 -0.055116 0.600363\n",
899 | "15924 0.171163 0.471610\n",
900 | "15925 -0.051397 0.598247\n",
901 | "15926 0.231017 0.437553\n",
902 | "15927 -0.239601 0.705335\n",
903 | "15928 -0.103331 0.627797\n",
904 | "15929 0.130418 0.494794\n",
905 | "15930 0.030584 0.551600\n",
906 | "15931 0.020901 0.557109\n",
907 | "15932 NaN NaN\n",
908 | "15933 -0.073209 0.610658\n",
909 | "15934 0.649112 0.199656\n",
910 | "15935 1.764894 0.000000\n",
911 | "15936 -0.050277 0.597610\n",
912 | "15937 NaN NaN\n",
913 | "15938 0.062363 0.533517\n",
914 | "15939 NaN NaN\n",
915 | "\n",
916 | "[15940 rows x 2 columns]"
917 | ]
918 | },
919 | "execution_count": 44,
920 | "metadata": {},
921 | "output_type": "execute_result"
922 | }
923 | ],
924 | "source": [
925 | "table[['price_diff_per', 'price_score']]"
926 | ]
927 | },
928 | {
929 | "cell_type": "code",
930 | "execution_count": 45,
931 | "metadata": {},
932 | "outputs": [],
933 | "source": [
934 | "table['mileage_year'] = table['Mileage'] / (2017 - table['year'].apply(lambda x: 2017 if x >= 2017 else x) + 1) \n",
935 | "# maybe year 2018 model"
936 | ]
937 | },
938 | {
939 | "cell_type": "code",
940 | "execution_count": 60,
941 | "metadata": {},
942 | "outputs": [],
943 | "source": [
944 | "table['mileage_year_score'] = 1 - (table['mileage_year'].apply(lambda x: 20000if x>=20000 else x) / 20000)"
945 | ]
946 | },
947 | {
948 | "cell_type": "code",
949 | "execution_count": 64,
950 | "metadata": {},
951 | "outputs": [
952 | {
953 | "data": {
954 | "text/plain": [
955 | "count 15892.000000\n",
956 | "mean 0.412694\n",
957 | "std 0.274199\n",
958 | "min 0.000000\n",
959 | "25% 0.173877\n",
960 | "50% 0.449877\n",
961 | "75% 0.620792\n",
962 | "max 1.000000\n",
963 | "Name: mileage_year_score, dtype: float64"
964 | ]
965 | },
966 | "execution_count": 64,
967 | "metadata": {},
968 | "output_type": "execute_result"
969 | }
970 | ],
971 | "source": [
972 | "table['mileage_year_score'].describe()"
973 | ]
974 | },
975 | {
976 | "cell_type": "code",
977 | "execution_count": 40,
978 | "metadata": {},
979 | "outputs": [
980 | {
981 | "data": {
982 | "text/plain": [
983 | "count 15940.000000\n",
984 | "mean 2014.139460\n",
985 | "std 3.025925\n",
986 | "min 1990.000000\n",
987 | "25% 2014.000000\n",
988 | "50% 2015.000000\n",
989 | "75% 2016.000000\n",
990 | "max 2018.000000\n",
991 | "Name: year, dtype: float64"
992 | ]
993 | },
994 | "execution_count": 40,
995 | "metadata": {},
996 | "output_type": "execute_result"
997 | }
998 | ],
999 | "source": [
1000 | "table['year'].describe()"
1001 | ]
1002 | },
1003 | {
1004 | "cell_type": "code",
1005 | "execution_count": 47,
1006 | "metadata": {},
1007 | "outputs": [],
1008 | "source": [
1009 | "df2 = pd.read_csv(\"truecar_url.csv\")\n",
1010 | "df1 = pd.read_csv(\"edmunds_url.csv\")\n",
1011 | "urls = pd.concat([df1, df2])"
1012 | ]
1013 | },
1014 | {
1015 | "cell_type": "code",
1016 | "execution_count": 53,
1017 | "metadata": {},
1018 | "outputs": [],
1019 | "source": [
1020 | "table = table.join(urls.set_index('vin'), on='vin')"
1021 | ]
1022 | },
1023 | {
1024 | "cell_type": "code",
1025 | "execution_count": 58,
1026 | "metadata": {
1027 | "scrolled": false
1028 | },
1029 | "outputs": [],
1030 | "source": [
1031 | "table = table.drop_duplicates(subset=['vin'])"
1032 | ]
1033 | },
1034 | {
1035 | "cell_type": "code",
1036 | "execution_count": 69,
1037 | "metadata": {},
1038 | "outputs": [],
1039 | "source": [
1040 | "table['recall_score'] = 1 - (table['Summary_Recalls']/(table['Summary_Recalls'].max() - table['Summary_Recalls'].min()))"
1041 | ]
1042 | },
1043 | {
1044 | "cell_type": "code",
1045 | "execution_count": 72,
1046 | "metadata": {
1047 | "scrolled": true
1048 | },
1049 | "outputs": [],
1050 | "source": [
1051 | "table['rating_score'] = (table['BODY & INTERIOR QUALITY - DESIGN'] + table['BODY & INTERIOR QUALITY - MECHANICAL'] + table['FEATURES & ACCESSORIES QUALITY - DESIGN'] + table['FEATURES & ACCESSORIES QUALITY - MECHANICAL'] + table['OVERALL QUALITY - DESIGN'] + table['OVERALL QUALITY - MECHANICAL'] + table['POWERTRAIN QUALITY - DESIGN'] + table['POWERTRAIN QUALITY - MECHANICAL']) / 40"
1052 | ]
1053 | },
1054 | {
1055 | "cell_type": "code",
1056 | "execution_count": 74,
1057 | "metadata": {},
1058 | "outputs": [
1059 | {
1060 | "data": {
1061 | "text/plain": [
1062 | "count 11054.000000\n",
1063 | "mean 0.685134\n",
1064 | "std 0.118719\n",
1065 | "min 0.000000\n",
1066 | "25% 0.600000\n",
1067 | "50% 0.700000\n",
1068 | "75% 0.775000\n",
1069 | "max 0.975000\n",
1070 | "Name: rating_score, dtype: float64"
1071 | ]
1072 | },
1073 | "execution_count": 74,
1074 | "metadata": {},
1075 | "output_type": "execute_result"
1076 | }
1077 | ],
1078 | "source": [
1079 | "table['rating_score'].describe()"
1080 | ]
1081 | },
1082 | {
1083 | "cell_type": "code",
1084 | "execution_count": 76,
1085 | "metadata": {},
1086 | "outputs": [],
1087 | "source": [
1088 | "table['score'] = table['rating_score'].apply(lambda x: 0 if x is np.nan else x) * 20 + \\\n",
1089 | "table['recall_score'].apply(lambda x: 0 if x is np.nan else x) * 10 + \\\n",
1090 | "table['mileage_year_score'].apply(lambda x: 0 if x is np.nan else x) * 30 + \\\n",
1091 | "table['price_score'].apply(lambda x: 0 if x is np.nan else x) * 40"
1092 | ]
1093 | },
1094 | {
1095 | "cell_type": "code",
1096 | "execution_count": 78,
1097 | "metadata": {},
1098 | "outputs": [
1099 | {
1100 | "data": {
1101 | "text/plain": [
1102 | "count 10398.000000\n",
1103 | "mean 58.392275\n",
1104 | "std 8.031637\n",
1105 | "min 31.169745\n",
1106 | "25% 52.439792\n",
1107 | "50% 58.425033\n",
1108 | "75% 64.259776\n",
1109 | "max 84.976546\n",
1110 | "Name: score, dtype: float64"
1111 | ]
1112 | },
1113 | "execution_count": 78,
1114 | "metadata": {},
1115 | "output_type": "execute_result"
1116 | }
1117 | ],
1118 | "source": [
1119 | "table['score'].describe()"
1120 | ]
1121 | },
1122 | {
1123 | "cell_type": "code",
1124 | "execution_count": null,
1125 | "metadata": {},
1126 | "outputs": [],
1127 | "source": []
1128 | }
1129 | ],
1130 | "metadata": {
1131 | "kernelspec": {
1132 | "display_name": "Python 2",
1133 | "language": "python",
1134 | "name": "python2"
1135 | },
1136 | "language_info": {
1137 | "codemirror_mode": {
1138 | "name": "ipython",
1139 | "version": 2
1140 | },
1141 | "file_extension": ".py",
1142 | "mimetype": "text/x-python",
1143 | "name": "python",
1144 | "nbconvert_exporter": "python",
1145 | "pygments_lexer": "ipython2",
1146 | "version": "2.7.10"
1147 | }
1148 | },
1149 | "nbformat": 4,
1150 | "nbformat_minor": 2
1151 | }
1152 |
--------------------------------------------------------------------------------
/Jupyter/__pycache__/neo.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rpedsel/CoursePlanner/7e1aae619740fc6c8eb36b7cb6983aa984e5d503/Jupyter/__pycache__/neo.cpython-36.pyc
--------------------------------------------------------------------------------
/Jupyter/assets/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
47 |
--------------------------------------------------------------------------------
/Jupyter/cypher.py:
--------------------------------------------------------------------------------
1 | from py2neo import Graph
2 | graph = Graph("http://localhost:7474/db/data/")
3 | # remove self loop
4 | graph.data("match (n)-[r]->(n) delete r")
5 | # return graph
6 | graph.data("MATCH p = ()-[r:UnderProgram]->(n {name: program_name}) RETURN p")
7 | # match (n)-[r:HasDuplicate]->(n) delete r
8 | # MATCH (n {name: 'Alice'})->(m)
9 | # "Accounting (BS)"
10 | MATCH p = ()-[r: UnderProgram]->(n {name: "Accounting (BS)"}) RETURN p
11 |
12 | MATCH (m)-[r:UnderProgram]->(n {name: 'Accounting (BS)'})
13 | where (m)-[r:HasPreparation]->()
14 |
15 |
16 | graph_3.data("MATCH (m)-[r:UnderProgram]->(n {name: 'Accounting (BS)'})\
17 | where not (m)-[r:HasPreparation]->() and not (m)-[r:HasPrerequisite]->()\
18 | return m")
19 |
20 |
21 |
22 | MATCH (k)-[r*]->(n:ABC)
23 | with k, r, n, count(k)
24 |
25 | import pandas as pd
26 | courses = list()
27 | courselist = graph_3.data("MATCH p = (m)-[r:UnderProgram]->(n {name: 'Accounting (BS)'}) return p")
28 | for course in courselist:
29 | graph_3.data("Match ")
30 |
--------------------------------------------------------------------------------
/Jupyter/data (2).json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "code":"BUAD 280",
4 | "name":"BUAD 280 Introduction to Financial Accounting",
5 | "mooc":"Accounting: Principles of Financial Accounting",
6 | "url":"http://www.coursera.org/specializations/foundations-management",
7 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/f2/fdfaf0f9a511e6a5f4effa0e2c4d64/investigaci_n-de-mercados-y-comportamiento.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
8 | },
9 | {
10 | "code":"MATH 118",
11 | "name":"MATH 118gx Fundamental Principles of Calculus",
12 | "mooc":"",
13 | "url":"",
14 | "image":""
15 | },
16 | {
17 | "code":"MATH 125",
18 | "name":"MATH 125g Calculus I",
19 | "mooc":"Single Variable Calculus",
20 | "url":"http://www.coursera.org/learn/discrete-calculus",
21 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/05/cda840977511e5aa161903ab17f92e/CSV-logo-chapter-5-copy.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
22 | },
23 | {
24 | "code":"ECON 351",
25 | "name":"ECON 351x Microeconomics for Business",
26 | "mooc":"Strategic Business Management - Microeconomics",
27 | "url":"http://www.coursera.org/learn/parprog1",
28 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/06/ab77207de611e78b8cbf6b7d2487ac/image-1.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
29 | },
30 | {
31 | "code":"BUAD 306",
32 | "name":"BUAD 306 Business Finance",
33 | "mooc":"Business English: Finance and Economics",
34 | "url":"http://www.coursera.org/specializations/english-for-business",
35 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/4c/868cb0658911e6892f017139b2b060/coursera-ma-thumbnail.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
36 | },
37 | {
38 | "code":"BUAD 281",
39 | "name":"BUAD 281 Introduction to Managerial Accounting",
40 | "mooc":"Managerial Accounting: Cost Behaviors, Systems, and Analysis",
41 | "url":"http://www.coursera.org/specializations/value-chain-management",
42 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d396qusza40orc.cloudfront.net/phoenixassets/learn-finance/BNY_Specialization_Banner_logo.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
43 | },
44 | {
45 | "code":"ACCT 370",
46 | "name":"ACCT 370 External Financial Reporting Issues",
47 | "mooc":"",
48 | "url":"",
49 | "image":""
50 | },
51 | {
52 | "code":"ACCT 371",
53 | "name":"ACCT 371 Introduction to Accounting Systems",
54 | "mooc":"Accounting and Finance for IT professionals",
55 | "url":"http://www.coursera.org/specializations/business-technology-managment",
56 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/dd/7ae3d0bd8411e6834f83bd22b5a2e6/PositivePsychology_MOOC_Icons_Specialization.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
57 | },
58 | {
59 | "code":"ACCT 373",
60 | "name":"ACCT 373 Introduction to Auditing and Assurance Services",
61 | "mooc":"",
62 | "url":"",
63 | "image":""
64 | },
65 | {
66 | "code":"ACCT 377",
67 | "name":"ACCT 377 Valuation for Financial Statement Purposes",
68 | "mooc":"",
69 | "url":"",
70 | "image":""
71 | },
72 | {
73 | "code":"ACCT 416",
74 | "name":"ACCT 416 Financial Reporting and Analysis",
75 | "mooc":"More Introduction to Financial Accounting",
76 | "url":"http://www.coursera.org/learn/wharton-financial-accounting",
77 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/3e/b505c0ee7611e4890b43504b150d21/online_learning_slide_vAccounting.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
78 | },
79 | {
80 | "code":"ACCT 462",
81 | "name":"ACCT 462 Detecting Fraudulent Financial Reporting",
82 | "mooc":"",
83 | "url":"",
84 | "image":""
85 | },
86 | {
87 | "code":"ACCT 476",
88 | "name":"ACCT 476 Performance Measurement Issues",
89 | "mooc":"Measurement and data ",
90 | "url":"https://www.khanacademy.org/math/cc-2nd-grade-math/cc-2nd-measurement-data",
91 | "image":"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTWVTAsQKuMSTnREno6zNLCslVti6JN7G9RHpDFXaikAiF36rquQakV1igF"
92 | },
93 | {
94 | "code":"BUAD 302",
95 | "name":"BUAD 302T Communication Strategy in Accounting",
96 | "mooc":"",
97 | "url":"",
98 | "image":""
99 | },
100 | {
101 | "code":"BUAD 307",
102 | "name":"BUAD 307 Marketing Fundamentals",
103 | "mooc":"Digital Marketing Fundamentals",
104 | "url":"https://www.udacity.com/course/digital-marketing-fundamentals--cx11",
105 | "image":"https://s3-us-west-1.amazonaws.com/udacity-content/course-images/cx11-e152d95.jpg"
106 | },
107 | {
108 | "code":"ACCT 477",
109 | "name":"ACCT 477 Intermediate Fair Value Issues in Accounting",
110 | "mooc":"",
111 | "url":"",
112 | "image":""
113 | },
114 | {
115 | "code":"ACCT 478",
116 | "name":"ACCT 478 Accounting Systems Design",
117 | "mooc":"More Introduction to Financial Accounting",
118 | "url":"http://www.coursera.org/learn/wharton-financial-accounting",
119 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/3e/b505c0ee7611e4890b43504b150d21/online_learning_slide_vAccounting.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
120 | },
121 | {
122 | "code":"ACCT 473",
123 | "name":"ACCT 473 Financial Statement Auditing",
124 | "mooc":"",
125 | "url":"",
126 | "image":""
127 | },
128 | {
129 | "code":"ACCT 475",
130 | "name":"ACCT 475 Systems Security and Audit",
131 | "mooc":"Cyber-Physical Systems Security",
132 | "url":"https://www.udacity.com/course/cyber-physical-systems-security--ud279",
133 | "image":"https://lh3.googleusercontent.com/pjV2kaEQlQuzZSTR8YwIYyDieRxdfH6i0QfGzSCI1iTawtnhYJmbn2RC0rXdsnSGf6FaoIuLHE11NyFL=s0#w=1280&h=720=s276#w=1724&h=1060"
134 | },
135 | {
136 | "code":"ACCT 463",
137 | "name":"ACCT 463 Internal Audit",
138 | "mooc":"",
139 | "url":"",
140 | "image":""
141 | },
142 | {
143 | "code":"BUAD 304",
144 | "name":"BUAD 304 Organizational Behavior and Leadership",
145 | "mooc":"International Leadership and Organizational Behavior",
146 | "url":"http://www.coursera.org/learn/academic-discussion-english",
147 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/62/58d990f18cd48083251c3e57fe9e7e/Untitled-1.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
148 | },
149 | {
150 | "code":"BUAD 497",
151 | "name":"BUAD 497 Strategic Management",
152 | "mooc":"Strategic Business Management - Macroeconomics",
153 | "url":"http://www.coursera.org/learn/trigonometry",
154 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/d3/d90f107de511e7b945f75a7283d500/image-2.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
155 | },
156 | {
157 | "code":"ACCT 479",
158 | "name":"ACCT 479 Accounting Systems Development",
159 | "mooc":"Accounting Analytics",
160 | "url":"http://www.coursera.org/specializations/business-analytics",
161 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/ef/485800fcfd11e6b4cc3359e41f40e9/Pricing-Strategy.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
162 | },
163 | {
164 | "code":"ACCT 374",
165 | "name":"ACCT 374 Introduction to Tax Issues",
166 | "mooc":"",
167 | "url":"",
168 | "image":""
169 | },
170 | {
171 | "code":"ACCT 474",
172 | "name":"ACCT 474 Tax Issues for Business",
173 | "mooc":"",
174 | "url":"",
175 | "image":""
176 | },
177 | {
178 | "code":"ACCT 470",
179 | "name":"ACCT 470 Advanced External Financial Reporting Issues",
180 | "mooc":"",
181 | "url":"",
182 | "image":""
183 | },
184 | {
185 | "code":"ACCT 471",
186 | "name":"ACCT 471 Accounting Information Systems",
187 | "mooc":"Accounting Analytics",
188 | "url":"http://www.coursera.org/specializations/business-analytics",
189 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/e7/871c10757e11e7817cc9840feed7fa/enigma_rotors_with_alphabet_rings_cropped.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
190 | },
191 | {
192 | "code":"ACCT 372",
193 | "name":"ACCT 372 Internal Reporting Issues",
194 | "mooc":"",
195 | "url":"",
196 | "image":""
197 | },
198 | {
199 | "code":"ACCT 472",
200 | "name":"ACCT 472 Managerial Accounting",
201 | "mooc":"Managerial Accounting: Cost Behaviors, Systems, and Analysis",
202 | "url":"http://www.coursera.org/specializations/value-chain-management",
203 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d396qusza40orc.cloudfront.net/phoenixassets/learn-finance/BNY_Specialization_Banner_logo.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
204 | },
205 | {
206 | "code":"ACCT 430",
207 | "name":"ACCT 430 Accounting Ethics",
208 | "mooc":"Accounting Analytics",
209 | "url":"http://www.coursera.org/specializations/business-analytics",
210 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/ef/485800fcfd11e6b4cc3359e41f40e9/Pricing-Strategy.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF"
211 | }
212 | ]
--------------------------------------------------------------------------------
/Jupyter/integration.py:
--------------------------------------------------------------------------------
1 | from py2neo import Graph,NodeSelector
2 | import neo
3 | from ipywidgets import *
4 | from IPython.display import display, HTML
5 | neo.init_notebook_mode()
6 | graph = Graph("http://localhost:7474/db/data/")
7 |
8 | searchbox = widgets.Text(
9 | placeholder='Prgram Name',
10 | description='Search:',
11 | disabled=False)
12 |
13 | def program_result(plist):
14 | items_layout = Layout(
15 | flex='1 1 auto',
16 | width='auto') # override the default width of the button to 'auto' to let the button grow
17 |
18 | box_layout = Layout(
19 | display='flex',
20 | flex_flow='column',
21 | align_items='stretch',
22 | width='60%')
23 | items = [ToggleButton(description=w, layout=items_layout) for w in plist]
24 | return Box(children=items, layout=box_layout)
25 |
26 | def search_program(keyword):
27 | query = "MATCH (p:Program) WHERE p.name =~ '(?i).*"+keyword+".*' RETURN p LIMIT 10"
28 | data = graph.data(query)
29 | return [p['p']['name'] for p in data]
30 |
31 | def value_changed(change):
32 | res = search_program(change.new)
33 | box = program_result(res)
34 | display(box)
35 | value_changed.box = box.children
36 |
37 | searchbox.observe(value_changed, 'value')
38 |
39 | display(searchbox)
40 |
41 | button = widgets.Button(description="See Detail!")
42 |
43 | def on_button_clicked(b):
44 | for box in value_changed.box:
45 | if box.value == True:
46 | selected = box.description
47 | visualize(selected)
48 |
49 | button.on_click(on_button_clicked)
50 | display(button)
51 |
52 | def refernce_display(clist):
53 | content = '''
54 |
55 |
56 | | Code |
57 | Name |
58 | Image |
59 | MOOC |
60 |
61 | '''
62 | for c in clist:
63 | content += '''
64 |
65 | | ''' + c['code'] + ''' |
66 | ''' + c['name'] + ''' |
67 | ![]() |
68 | ''' + c['mooc'] + ''' |
'''
69 | #return content
70 | display(HTML(content))
71 |
72 |
73 | from collections import defaultdict
74 | from py2neo import Graph
75 | import pandas as pd
76 |
77 |
78 | def TopoSort(program_name):
79 | graph = Graph("http://localhost:7474/db/data/")
80 | adjPair = graph.data("MATCH (m)-[:HasPrerequisite]->(n) \
81 | WHERE (m)-[:UnderProgram]->({name:" + program_name + "}) \
82 | and (n)-[:UnderProgram]->({name:" + program_name + "}) \
83 | RETURN m.id, n.id")
84 |
85 | class Prerequisite:
86 | def __init__(self, courselist):
87 | self.graph = defaultdict(list)
88 | self.vertices = courselist
89 |
90 | def addEdge(self, u, v):
91 | self.graph[u].append(v)
92 |
93 | def dfs(self, v, visit, order):
94 | visit[v] = True
95 | for u in self.graph[v]:
96 | if visit[u] == False:
97 | self.dfs(u, visit, order)
98 | order.insert(0, v)
99 |
100 | def tps(self):
101 | visit = {v: False for v in self.vertices}
102 | order =[]
103 | for v in self.vertices:
104 | if visit[v] == False:
105 | self.dfs(v, visit, order)
106 | return order
107 |
108 | courselist = []
109 | for i in adjPair:
110 | courselist += [i["m.id"], i["n.id"]]
111 | courselist = list(set(courselist))
112 |
113 | g = Prerequisite(courselist)
114 | for edge in adjPair:
115 | g.addEdge(edge["m.id"], edge["n.id"])
116 | # MATCH (n:Course{id:"BUAD 280"}) RETURN n
117 | toposort = g.tps()
118 |
119 |
120 | nodes = []
121 | course = list()
122 | relation = list()
123 | relat = []
124 | import pandas as pd
125 | for id in toposort[::-1]:
126 | dic = {}
127 | node = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n")
128 | coursename = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n.name")[0]
129 | coursedesc = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n.description")[0]
130 | mooc = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.name limit 1")
131 | image = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.image limit 1")
132 | url = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.url limit 1")
133 | print(mooc)
134 | dic["code"] = id
135 | dic["name"] = coursename["n.name"]
136 | dic["mooc"] = ""
137 | dic["url"] = ""
138 | dic["image"] = ""
139 | if len(mooc) != 0:
140 | rlist = [id, coursename["n.name"], mooc[0]["m.name"], image[0]["m.image"], url[0]["m.url"]]
141 | relation.append(rlist)
142 | dic["mooc"], dic["image"], dic["url"] = rlist[2], rlist[3], rlist[4]
143 | relat.append(dic)
144 | nodes += node
145 | course.append([id, coursename["n.name"], coursedesc["n.description"]])
146 | course_df = pd.DataFrame(course)
147 | relation_df = pd.DataFrame(relation)
148 | # relat
149 | relation_df
150 | refernce_display(relat)
151 |
152 | import json
153 | j = json.dumps(relat)
154 |
155 |
--------------------------------------------------------------------------------
/Jupyter/neo.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import uuid
4 | import tempfile
5 | from IPython.display import HTML, Javascript, display
6 |
7 | DEFAULT_PHYSICS = {
8 | "physics": {
9 | "barnesHut": {
10 | "gravitationalConstant": -15150,
11 | "centralGravity": 3.45,
12 | "springLength": 261,
13 | "damping": 0.3
14 | }
15 | }
16 | }
17 |
18 |
19 | def get_visjs():
20 | return
21 |
22 |
23 | def init_notebook_mode():
24 | """
25 | Creates a script tag and prints the JS read from the file in the tag.
26 | """
27 |
28 | display(
29 | Javascript(data="require.config({ " +
30 | " paths: { " +
31 | " vis: '//cdnjs.cloudflare.com/ajax/libs/vis/4.8.2/vis.min' " +
32 | " } " +
33 | "}); " +
34 | "require(['vis'], function(vis) { " +
35 | " window.vis = vis; " +
36 | "}); ",
37 | css='https://cdnjs.cloudflare.com/ajax/libs/vis/4.8.2/vis.css')
38 | )
39 |
40 | def vis_network(nodes, edges, physics=True):
41 | """
42 | Creates the HTML page with all the parameters
43 | :param nodes: The nodes to be represented an their information.
44 | :param edges: The edges represented an their information.
45 | :param physics: The options for the physics of vis.js.
46 | :return: IPython.display.HTML
47 | """
48 | base = open(os.path.join(os.path.dirname(__file__), 'assets/index.html')).read()
49 |
50 | unique_id = str(uuid.uuid4())
51 | html = base.format(id=unique_id, nodes=json.dumps(nodes), edges=json.dumps(edges), physics=json.dumps(physics))
52 |
53 | return html
54 |
55 |
56 | def draw(data, options, physics=True, limit=100):
57 | """
58 | The options argument should be a dictionary of node labels and property keys; it determines which property
59 | is displayed for the node label. For example, in the movie graph, options = {"Movie": "title", "Person": "name"}.
60 | Omitting a node label from the options dict will leave the node unlabeled in the visualization.
61 | Setting physics = True makes the nodes bounce around when you touch them!
62 |
63 | :param graph: Connection to the DB where the query will be executed.
64 | :param options: Options for the Nodes.
65 | :param physics: Physics of the vis.js visualization.
66 | :param limit: Maximum number of Nodes or Edges.
67 | :return: IPython.display.HTML
68 | """
69 |
70 | query2 = """
71 | MATCH (n)
72 | WITH n, rand() AS random
73 | ORDER BY random
74 | LIMIT {limit}
75 | MATCH (n:Course{id:'edx00010'})-[r]->(m)
76 | RETURN n AS source_node,
77 | id(n) AS source_id,
78 | r,
79 | m AS target_node,
80 | id(m) AS target_id
81 | """
82 |
83 | query = """
84 | MATCH (n:Course{id:'edx00010'})-[r]->(m)
85 | RETURN n AS source_node,
86 | id(n) AS source_id,
87 | r,
88 | m AS target_node,
89 | id(m) AS target_id
90 | LIMIT 5
91 | """
92 |
93 | #data = graph.run(query, limit=limit)
94 |
95 | nodes = []
96 | edges = []
97 |
98 | def get_vis_info(node, id):
99 | node_label = list(node.labels())[0]
100 | prop_key = options.get(node_label)
101 | vis_label = node.properties.get(prop_key, "")
102 |
103 | return {"id": id, "label": vis_label, "group": node_label, "title": repr(node.properties)}
104 |
105 | for row in data:
106 | source_node = row[0]
107 | source_id = row[1]
108 | rel = row[2]
109 | target_node = row[3]
110 | target_id = row[4]
111 |
112 | source_info = get_vis_info(source_node, source_id)
113 |
114 | if source_info not in nodes:
115 | nodes.append(source_info)
116 |
117 | if rel is not None:
118 | target_info = get_vis_info(target_node, target_id)
119 |
120 | if target_info not in nodes:
121 | nodes.append(target_info)
122 |
123 | edges.append({"from": source_info["id"], "to": target_info["id"], "label": rel.type()})
124 |
125 | return vis_network(nodes, edges, physics=physics)
126 |
--------------------------------------------------------------------------------
/Jupyter/toposort.py:
--------------------------------------------------------------------------------
1 | from py2neo import Graph
2 | graph = Graph("http://localhost:7474/db/data/")
3 |
4 |
5 | def TopoSort(program_name):
6 | adjPair = graph.data("MATCH (m)-[:HasPrerequisite]->(n) \
7 | WHERE (m)-[:UnderProgram]->({name: 'Accounting (BS)'}) \
8 | and (n)-[:UnderProgram]->({name: 'Accounting (BS)'}) \
9 | RETURN m.id, n.id")
10 | # if you take m, must first take n
11 | courselist = []
12 | for i in adjPair:
13 | courselist += [i["m.id"], i["n.id"]]
14 | courselist = list(set(courselist))
15 | adjList = {i:[] for i in courselist}
16 | indegree = {i: 0 for i in courselist}
17 | # for post in adjList:
18 | # for edge in adjPair:
19 | # if edge["m.id"] == post:
20 | # adjList[post].append(edge["n.id"])
21 | for edge in adjPair:
22 | adjList[edge["m.id"]].append(edge["n.id"])
23 | indegree[edge["n.id"]] += 1
24 | # startnodes = [i for i in adjList if len(adjList[i]) == 0]
25 | import queue
26 | queue = queue.Queue(maxsize = len(courselist))
27 | for i in adjList:
28 | if len(adjList[i]) == 0:
29 | queue.put(i)
30 | order = []
31 | while not queue.empty():
32 | node = queue.get()
33 | order.append(node)
34 | for x in adjList[node]:
35 | indegree[x] -= 1
36 | if indegree[x] == 0:
37 | queue.put(x)
38 |
39 | return order
40 |
41 |
42 | #Python program to print topological sorting of a DAG
43 | from collections import defaultdict
44 |
45 | #Class to represent a graph
46 | class Graph:
47 | def __init__(self,vertices):
48 | self.graph = defaultdict(list) #dictionary containing adjacency List
49 | self.V = vertices #No. of vertices
50 |
51 | # function to add an edge to graph
52 | def addEdge(self,u,v):
53 | self.graph[u].append(v)
54 |
55 | # A recursive function used by topologicalSort
56 | def topologicalSortUtil(self,v,visited,stack):
57 |
58 | # Mark the current node as visited.
59 | visited[v] = True
60 |
61 | # Recur for all the vertices adjacent to this vertex
62 | for i in self.graph[v]:
63 | if visited[i] == False:
64 | self.topologicalSortUtil(i,visited,stack)
65 |
66 | # Push current vertex to stack which stores result
67 | stack.insert(0,v)
68 |
69 | # The function to do Topological Sort. It uses recursive
70 | # topologicalSortUtil()
71 | def topologicalSort(self):
72 | # Mark all the vertices as not visited
73 | visited = [False]*self.V
74 | stack =[]
75 |
76 | # Call the recursive helper function to store Topological
77 | # Sort starting from all vertices one by one
78 | for i in range(self.V):
79 | if visited[i] == False:
80 | self.topologicalSortUtil(i,visited,stack)
81 |
82 | # Print contents of stack
83 | print stack
84 |
85 |
86 | def tp(self, v, visited, stack):
87 | visited[v] = True
88 | for i in adjList[courselist[v]]:
89 | if visited[v] == False:
90 | self.tp(i, visited, stack)
91 | stack.insert(0, v)
92 |
93 | def ts(self):
94 | visited = [False] * len(courselist)
95 | stack = []
96 | for i in range(len(courselist)):
97 | if visited[i] == False:
98 | self.tp(i, visited, stack)
99 |
100 |
101 |
--------------------------------------------------------------------------------
/Jupyter/tpsort.py:
--------------------------------------------------------------------------------
1 | import json, jgraph
2 | import pandas as pd
3 | from py2neo import Graph
4 | from collections import defaultdict
5 |
6 |
7 | class Prerequisite:
8 | def __init__(self, courselist):
9 | self.graph = defaultdict(list)
10 | self.vertices = courselist
11 |
12 | def addEdge(self, u, v):
13 | self.graph[u].append(v)
14 |
15 | def dfs(self, v, visit, order):
16 | visit[v] = True
17 | for u in self.graph[v]:
18 | if visit[u] == False:
19 | self.dfs(u, visit, order)
20 | order.insert(0, v)
21 |
22 | def tps(self):
23 | visit = {v: False for v in self.vertices}
24 | order =[]
25 | for v in self.vertices:
26 | if visit[v] == False:
27 | self.dfs(v, visit, order)
28 | return order
29 |
30 |
31 | def topo_Sort(program_name):
32 | graph = Graph("http://localhost:7474/db/data/")
33 | adjPair = graph.data("MATCH (m)-[:HasPrerequisite]->(n) \
34 | WHERE (m)-[:UnderProgram]->({name:" + program_name + "}) \
35 | and (n)-[:UnderProgram]->({name:" + program_name + "}) \
36 | RETURN m.id, n.id")
37 |
38 | courselist = []
39 | for i in adjPair:
40 | courselist += [i["m.id"], i["n.id"]]
41 | courselist = list(set(courselist))
42 |
43 | g = Prerequisite(courselist)
44 | for edge in adjPair:
45 | g.addEdge(edge["m.id"], edge["n.id"])
46 | # MATCH (n:Course{id:"BUAD 280"}) RETURN n
47 | toposort = g.tps()
48 | return toposort[::-1]
49 |
50 |
51 | def display_list(order):
52 | nodes, relat = [], []
53 | course, relation = list(), list()
54 |
55 | for id in order:
56 | dic = {}
57 | dic["code"], dic["mooc"], dic["url"], dic["image"] = id, "", "", ""
58 | node = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n")
59 | coursename = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n.name")[0]
60 | coursedesc = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n.description")[0]
61 | mooc = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.name limit 1")
62 | image = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.image limit 1")
63 | url = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.url limit 1")
64 | dic["name"] = coursename["n.name"]
65 | if len(mooc) != 0:
66 | rlist = [id, coursename["n.name"], mooc[0]["m.name"], image[0]["m.image"], url[0]["m.url"]]
67 | relation.append(rlist)
68 | dic["mooc"], dic["image"], dic["url"] = rlist[2], rlist[3], rlist[4]
69 | relat.append(dic)
70 | nodes += node
71 | course.append([id, coursename["n.name"], coursedesc["n.description"]])
72 | course_df = pd.DataFrame(course)
73 | relation_df = pd.DataFrame(relation)
74 | # relat
75 | relation_df
76 | refernce_display(relat)
77 | j = json.dumps(relat)
78 |
--------------------------------------------------------------------------------
/Khan/khan_driver.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import requests
3 | from bs4 import BeautifulSoup
4 | import json
5 | import time
6 | import sys
7 | reload(sys)
8 | sys.setdefaultencoding('utf-8')
9 |
10 | # _x_query = {
11 | # "coursetag": "//div[@class = 'gs-webResult gs-result']",
12 | # "coursename": "//a[@class='gs-title']",
13 | # "description": "//div[@class = 'gs-bidi-start-align gs-snippet']"
14 | # "image_url": "//img[@class='gs-image']",
15 | # }
16 |
17 | def wrapper(url, id):
18 | # source_code = requests.get(url)
19 | # plain_text = source_code.text
20 | # soup = BeautifulSoup(plain_text, 'html.parser')
21 | browser = webdriver.Firefox()
22 | browser.set_page_load_timeout(1200)
23 | browser.get(url)
24 | courselist = browser.find_elements_by_xpath('//div[@class = "gs-webResult gs-result"]')
25 | list = []
26 | for course in courselist:
27 | coursedic = {}
28 | coursedic["id"] = "khan" + str(id).zfill(5)
29 | coursedic["description"] = course.text.split('\n')[-1]
30 | coursedic["name"] = course.text.split('\n')[0].split('|')[0]
31 | coursedic["provenance"] = "khan Academy"
32 | if coursedic["name"] == "" or coursedic["description"] == "":
33 | continue
34 | id += 1
35 | list.append(coursedic)
36 | urls = [i.get_attribute('href') for i in browser.find_elements_by_xpath("//div/a[@class='gs-image']")]
37 | images = [i.get_attribute('src') for i in browser.find_elements_by_xpath("//img[@class='gs-image']")]
38 | alist = []
39 | for i in range(len(list)):
40 | if i < len(urls) and i < len(images):
41 | course = list[i]
42 | course["course_url"] = urls[i]
43 | course["img"] = images[i]
44 | alist.append(course)
45 | browser.close()
46 | return id, alist
47 | # print data
48 |
49 |
50 |
51 | def driver(domain, keywords, data, i):
52 | for keyword in keywords:
53 | print keyword
54 | url = domain + keyword
55 | print url
56 | browser = webdriver.Firefox()
57 | browser.set_page_load_timeout(1200)
58 | browser.get(url)
59 | time.sleep(20)
60 | for j in range(1, 9):
61 | pages = browser.find_elements_by_xpath('//div[@class = "gsc-cursor-page"]')
62 | if j < len(pages):
63 | page = pages[j]
64 | page.click()
65 | time.sleep(40)
66 | result = wrapper(url, i)
67 | data += result[1]
68 | i = result[0]
69 | # pages = browser.find_elements_by_xpath('//div[@class = "gsc-cursor-page"]')
70 | else:
71 | break
72 | browser.close()
73 | browser.close()
74 |
75 | domain = "https://www.khanacademy.org/search?referer=%2F&page_search_query="
76 | keywords = ['data', 'social' 'marketing', 'design', 'web', 'cyber']
77 | #,\
78 | # 'program', 'platform', 'map', 'intelligence', 'knowledge', 'graph', 'probability',\
79 | # 'digital', 'electronic', 'architecture', 'infrastructure', 'digital', 'electronic',\
80 | # 'architecture', 'infrastructure', 'program', 'platform', 'map', 'intelligence',\
81 | # 'entrepreneurship','cyber', 'knowledge', 'graph', 'probability', "engineer"]
82 | i = 0
83 | data = []
84 | driver(domain, keywords, data, i)
85 | # browser.quit()
86 |
87 |
88 |
89 | with open('khan_data.json', 'a') as f:
90 | json.dump(data, f)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CoursePlanner
2 | ### Knowledge Graph Project
3 |
4 | Introduction: [CoursePlanner](https://github.com/rpedsel/CoursePlanner/blob/master/CoursePlanner.pdf)
5 |
6 | Demonstration Video: [Youtube](https://www.youtube.com/watch?v=L34QmfxO4a8&t)
7 |
--------------------------------------------------------------------------------
/Udacity/Src/Process_endpoint.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 |
4 | data = json.load(open('udacity_endpoint.json'))
5 | print len(data["courses"])
6 |
7 | courselist = []
8 | i = 0
9 | for object in data["courses"]:
10 | i += 1
11 | course_obj = {}
12 | course_obj["id"] = "udacity" + str(i).zfill(5)
13 | course_obj["name"] = object["title"]
14 | course_obj["course_url"] = object["homepage"]
15 | if len(object["affiliates"]) != 0:
16 | course_obj["provenance"] = object["affiliates"]
17 | else:
18 | course_obj["provenance"] = [{"name": "udacity"}]
19 | course_obj["description"] = object["expected_learning"] + ' ' + object["summary"]
20 | courselist.append(course_obj)
21 |
22 | with open('udacity_data.json', 'a') as f:
23 | json.dump(courselist, f)
24 |
25 | # print courselist
26 |
27 | # print data["courses"][0]
--------------------------------------------------------------------------------
/Udacity/Udacity_wrapper/driver_udacity.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import requests
3 | import json
4 | from bs4 import BeautifulSoup
5 | import sys
6 | reload(sys)
7 | sys.setdefaultencoding('utf-8')
8 |
9 | url = 'https://www.udacity.com/courses/all'
10 | driver = webdriver.Firefox()
11 | driver.set_page_load_timeout(1200)
12 | driver.get(url)
13 | source_code = requests.get(url)
14 | plain_text = source_code.text
15 | soup = BeautifulSoup(plain_text, 'html.parser')
16 | courseTags = soup.find_all("div", "course-summary-card row row-gap-medium")
17 | courselist = []
18 | domain = 'https://www.udacity.com'
19 | i = 0
20 | for tag in courseTags:
21 | coursedic = {}
22 | i += 1
23 | coursedic["id"] = "udacity" + str(i).zfill(5)
24 | coursedic["provenance"] = "udacity"
25 | coursedic["img"] = tag.select('img[src]')[0]['data-src']
26 | coursedic["course_url"] = domain + tag.select('a[data-course-title]')[0]['href']
27 | coursedic["name"] = tag.select('a[data-course-title]')[0]\
28 | .text.strip().decode('utf8').encode('ascii', errors='ignore')
29 | coursedic["description"] = tag.select('div[data-course-short-summary]')[0]\
30 | .text.strip().decode('utf8').encode('ascii', errors='ignore')
31 | courselist.append(coursedic)
32 | driver.quit()
33 |
34 | print courselist
35 | with open('../udacity_data.json', 'a') as f:
36 | json.dump(courselist, f)
--------------------------------------------------------------------------------
/edX/sample.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "id":"csr00000",
4 | "course_name":"SomeCourse Blah",
5 | "subject":"Computer Science",
6 | "description":"Blah blah blah blah",
7 | "image_url":"http://example.jpg",
8 | "provenance":"Coursera"
9 | },
10 | {
11 | "id":"edx00320",
12 | "course_name":"Intro to SomeOtherCourse",
13 | "subject":"Art & History",
14 | "description":"Blah blah blah blah blah",
15 | "image_url":"http://anotherexample.jpg",
16 | "provenance":"edX"
17 | }
18 | ]
19 |
--------------------------------------------------------------------------------
/edX/sample_cypher_edX:
--------------------------------------------------------------------------------
1 | load csv with headers from ['file path:/edX.csv'] as row
2 | (with row limit 100)
3 | merge (course:Course{
4 | id:row.id,
5 | name:row.course_name,
6 | description:row.description,
7 | url:row.course_url})
8 | ON CREATE SET course.image = row.image_url
9 | ON MATCH SET course.image = row.image_url
10 | merge (provenance:Provenance{name:row.provenance})
11 | merge (course) - [:HostedBy] -> (provenance)
12 | foreach (subjectName in split(row.subject,";") |
13 | merge (subject:Subject{name:subjectName})
14 | merge (course) - [:OfSubject] -> (subject));
15 |
--------------------------------------------------------------------------------
/neo4jupyter/assets/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
47 |
--------------------------------------------------------------------------------
/neo4jupyter/neo.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import uuid
4 | import tempfile
5 | from IPython.display import HTML, Javascript, display
6 |
7 | DEFAULT_PHYSICS = {
8 | "physics": {
9 | "barnesHut": {
10 | "gravitationalConstant": -15150,
11 | "centralGravity": 3.45,
12 | "springLength": 261,
13 | "damping": 0.3
14 | }
15 | }
16 | }
17 |
18 |
19 | def get_visjs():
20 | return
21 |
22 |
23 | def init_notebook_mode():
24 | """
25 | Creates a script tag and prints the JS read from the file in the tag.
26 | """
27 |
28 | display(
29 | Javascript(data="require.config({ " +
30 | " paths: { " +
31 | " vis: '//cdnjs.cloudflare.com/ajax/libs/vis/4.8.2/vis.min' " +
32 | " } " +
33 | "}); " +
34 | "require(['vis'], function(vis) { " +
35 | " window.vis = vis; " +
36 | "}); ",
37 | css='https://cdnjs.cloudflare.com/ajax/libs/vis/4.8.2/vis.css')
38 | )
39 |
40 | def vis_network(nodes, edges, physics=True):
41 | """
42 | Creates the HTML page with all the parameters
43 | :param nodes: The nodes to be represented an their information.
44 | :param edges: The edges represented an their information.
45 | :param physics: The options for the physics of vis.js.
46 | :return: IPython.display.HTML
47 | """
48 | base = open(os.path.join(os.path.dirname(__file__), 'assets/index.html')).read()
49 |
50 | unique_id = str(uuid.uuid4())
51 | html = base.format(id=unique_id, nodes=json.dumps(nodes), edges=json.dumps(edges), physics=json.dumps(physics))
52 |
53 | return html
54 |
55 |
56 | def draw(data, options, physics=True, limit=100):
57 | """
58 | The options argument should be a dictionary of node labels and property keys; it determines which property
59 | is displayed for the node label. For example, in the movie graph, options = {"Movie": "title", "Person": "name"}.
60 | Omitting a node label from the options dict will leave the node unlabeled in the visualization.
61 | Setting physics = True makes the nodes bounce around when you touch them!
62 |
63 | :param graph: Connection to the DB where the query will be executed.
64 | :param options: Options for the Nodes.
65 | :param physics: Physics of the vis.js visualization.
66 | :param limit: Maximum number of Nodes or Edges.
67 | :return: IPython.display.HTML
68 | """
69 |
70 | query2 = """
71 | MATCH (n)
72 | WITH n, rand() AS random
73 | ORDER BY random
74 | LIMIT {limit}
75 | MATCH (n:Course{id:'edx00010'})-[r]->(m)
76 | RETURN n AS source_node,
77 | id(n) AS source_id,
78 | r,
79 | m AS target_node,
80 | id(m) AS target_id
81 | """
82 |
83 | query = """
84 | MATCH (n:Course{id:'edx00010'})-[r]->(m)
85 | RETURN n AS source_node,
86 | id(n) AS source_id,
87 | r,
88 | m AS target_node,
89 | id(m) AS target_id
90 | LIMIT 5
91 | """
92 |
93 | #data = graph.run(query, limit=limit)
94 |
95 | nodes = []
96 | edges = []
97 |
98 | def get_vis_info(node, id):
99 | node_label = list(node.labels())[0]
100 | prop_key = options.get(node_label)
101 | vis_label = node.properties.get(prop_key, "")
102 |
103 | return {"id": id, "label": vis_label, "group": node_label, "title": repr(node.properties)}
104 |
105 | for row in data:
106 | source_node = row[0]
107 | source_id = row[1]
108 | rel = row[2]
109 | target_node = row[3]
110 | target_id = row[4]
111 |
112 | source_info = get_vis_info(source_node, source_id)
113 |
114 | if source_info not in nodes:
115 | nodes.append(source_info)
116 |
117 | if rel is not None:
118 | target_info = get_vis_info(target_node, target_id)
119 |
120 | if target_info not in nodes:
121 | nodes.append(target_info)
122 |
123 | edges.append({"from": source_info["id"], "to": target_info["id"], "label": rel.type()})
124 |
125 | return vis_network(nodes, edges, physics=physics)
126 |
--------------------------------------------------------------------------------
/rpedsel/Cypher:
--------------------------------------------------------------------------------
1 |
2 | # Load processed CSV files into Neo4j Database:
3 |
4 | ===== MOOC Entities =====
5 |
6 | load csv with headers from 'file:/Mooc_merge.csv' as row WITH row WHERE row.provenance is not null
7 | merge (mooc:MOOC{
8 | id:row.id,
9 | name:row.name,
10 | description:row.description})
11 | ON CREATE SET mooc.url = row.course_url
12 | ON MATCH SET mooc.url = row.course_url
13 | ON CREATE SET mooc.image = row.image_url
14 | ON MATCH SET mooc.image = row.image_url
15 | ON CREATE SET mooc.special_id = row.special_id
16 | ON MATCH SET mooc.special_id = row.special_id
17 | merge (provenance:Provenance{pname:row.provenance})
18 | merge (mooc) - [:HostedBy] -> (provenance)
19 | foreach (subjectName in split(row.subject,";") |
20 | merge (subject:Subject{sname:subjectName})
21 | merge (mooc) - [:OfSubject] -> (subject));
22 |
23 |
24 | ===== USC COurse Entities =====
25 |
26 | load csv with headers from 'file:/Catalogue_sim.csv' as row
27 | merge (course:Course{
28 | id:row.id,
29 | name:row.name,
30 | description:row.description})
31 | foreach (prerequisiteID in split(row.prerequisite,";") |
32 | merge (pcourse:Course{id:prerequisiteID})
33 | merge (course) - [:HasPrerequisite] -> (pcourse))
34 | foreach (preparationID in split(row.preparation,";") |
35 | merge (prcourse:Course{id:preparationID})
36 | merge (course) - [:HasPreparation] -> (prcourse))
37 | foreach (corequisiteID in split(row.corequisite,";") |
38 | merge (ccourse:Course{id:corequisiteID})
39 | merge (course) - [:HasCorequisite] -> (ccourse))
40 | foreach (crosslistID in split(row.crosslist,";") |
41 | merge (crcourse:Course{id:crosslistID})
42 | merge (course) - [:HasCrosslist] -> (crcourse))
43 | foreach (duplicateID in split(row.duplicate,";") |
44 | merge (dcourse:Course{id:duplicateID})
45 | merge (course) - [:HasDuplicate] -> (dcourse))
46 |
47 | FOREACH (p IN CASE row.similarity WHEN "" THEN [] ELSE split(row.similarity,';') END |
48 | merge (smooc:MOOC{id:split(p,"/")[0]})
49 | merge (course) - [:SimilarTo{value:split(p,"/")[1]}] -> (smooc));
50 |
51 |
52 | ===== USC Program Entities =====
53 |
54 | load csv with headers from 'file:/Catalogue.csv' as row
55 | merge (program:Program{
56 | id:row.id,
57 | type:row.type,
58 | name:row.name,
59 | url:row.url})
60 | foreach (courseID in split(row.courses,";") |
61 | merge (course:Course{id:courseID})
62 | merge (course) - [:UnderProgram] -> (program));
63 |
--------------------------------------------------------------------------------