├── .gitignore ├── All_Mooc ├── Course_csv │ ├── Coursera_flat.csv │ ├── Mooc_merge.csv │ └── ToCsv.py ├── Cypher │ └── Mooc_merge_cypher.txt ├── Link_Course │ ├── Catalogue_sim.json │ ├── Link_course.py │ ├── inter_link.json │ ├── inter_link_ID.csv │ ├── inter_link_Nam.csv │ └── similarity_compute.ipynb ├── Mooc_json │ ├── Catalogue_course.json │ ├── Coursera_flat.json │ ├── Mooc_merge.json │ └── Unify.py └── Program_order │ ├── Program │ └── Catalogue.json │ └── Specialization │ ├── Coursera_data.json │ ├── Spe_csv_str.json │ ├── Spe_lst.json │ ├── Specialization.csv │ ├── no_duplicate? │ ├── Specialization2.csv │ └── Specialization3.json │ └── process.py ├── Catalogue ├── Catalogue.json ├── Catalogue_course.json ├── Flatten_course.ipynb └── USCCatalog_wrapper.ipynb ├── CoursePlanner.pdf ├── Coursera ├── Coursera_data.json ├── Meta_data │ ├── Coursera.key │ ├── Coursera_sample.json │ └── Page_RDF.json └── Src │ ├── Coursera_driver.py │ ├── Crawl_Coursera.py │ ├── geckodriver.log │ └── troubleshoot.md ├── Jupyter ├── .ipynb_checkpoints │ ├── TopoSort_UserInterface-checkpoint.ipynb │ ├── UserInterface-Copy1-checkpoint.ipynb │ └── test-checkpoint.ipynb ├── Catalogue.csv ├── Catalogue_sim.csv ├── Mooc_merge.csv ├── TopoSort_UserInterface.ipynb ├── Untitled.ipynb ├── UserInterface-Copy1.ipynb ├── __pycache__ │ └── neo.cpython-36.pyc ├── assets │ └── index.html ├── cypher.py ├── data (2).json ├── integration.py ├── neo.py ├── test.ipynb ├── toposort.py └── tpsort.py ├── Khan ├── khan_data.json └── khan_driver.py ├── README.md ├── Udacity ├── Src │ ├── Process_endpoint.py │ └── udacity_endpoint.json ├── Udacity_wrapper │ └── driver_udacity.py └── udacity_data.json ├── edX ├── allcourse.html ├── edX.csv ├── edX.json ├── edx_wrapper.ipynb ├── sample.json └── sample_cypher_edX ├── neo4jupyter ├── UserInterface.ipynb ├── assets │ └── index.html └── neo.py └── rpedsel ├── Catalogue.csv ├── Catalogue_sim.csv ├── Cypher └── Mooc_merge.csv /.gitignore: -------------------------------------------------------------------------------- 1 | /.ipynb_checkpoints/ -------------------------------------------------------------------------------- /All_Mooc/Course_csv/Mooc_merge.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rpedsel/CoursePlanner/7e1aae619740fc6c8eb36b7cb6983aa984e5d503/All_Mooc/Course_csv/Mooc_merge.csv -------------------------------------------------------------------------------- /All_Mooc/Course_csv/ToCsv.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pandas as pd 3 | import sys 4 | reload(sys) 5 | sys.setdefaultencoding('utf8') 6 | 7 | df = pd.read_json("Coursera_flat.json") 8 | print(df) 9 | df.to_csv('Coursera_flat.csv') 10 | 11 | df = pd.read_json("Catalogue_course.json") 12 | print(df) 13 | df.to_csv('Catalogue_course.csv') 14 | 15 | df = pd.read_json("Mooc_merge.json") 16 | print(df) 17 | df.to_csv('Mooc_merge.csv') 18 | values = json.load(open("Catalogue_course.json")) 19 | import csv 20 | with open("Catalogue_course.csv", "wb") as f: 21 | wr = csv.writer(f) 22 | for data in values: 23 | for key, value in data.iteritems(): 24 | wr.writerow([",".join([v.encode("utf-8") for v in value]) if isinstance(value, list) else value.encode("utf8")]) 25 | -------------------------------------------------------------------------------- /All_Mooc/Cypher/Mooc_merge_cypher.txt: -------------------------------------------------------------------------------- 1 | *Delete all nodes and edges* 2 | `match (n)-[r]-() delete n, r` 3 | 4 | *Delete all nodes which have no edges* 5 | `match (n) delete n` 6 | 7 | load csv with headers from "file:///Mooc_merge.csv" as row WITH row 8 | where row.id is not null and row.course_name is not null and row.provenance is not null and row.description is not null 9 | and row.course_url is not null and row.image_url is not null 10 | merge (course:Mooc {Id:row.id, Name:row.course_name, Description:row.description, URL: row.course_url, Img:row.image_url}) 11 | merge (provenance:Provenance{name:row.provenance}) 12 | merge (course) - [:HostedBy] -> (provenance); 13 | 14 | load csv with headers from "file:///Mooc_merge.csv" as row WITH row 15 | where row.id is not null and row.course_name is not null and row.provenance is not null and row.description is not null 16 | and row.course_url is not null and row.image_url is not null 17 | merge (course:Mooc {Id: row.id, Name: row.course_name, Description: row.description, URL: row.course_url, Img: row.image_url}) 18 | with row where row.special_id is not null 19 | merge (topic:Topic{id:row.special_id}) 20 | merge (course) - [:OfTopic] -> (topic); 21 | 22 | load csv with headers from "file:///Mooc_merge.csv" as row WITH row 23 | where row.id is not null and row.course_name is not null and row.provenance is not null and row.description is not null 24 | and row.course_url is not null and row.image_url is not null 25 | merge (course:Mooc {Id: row.id,Name: row.course_name, Description: row.description, URL: row.course_url, Img: row.image_url}) 26 | with row where row.subject is not null 27 | foreach (subjectName in split(row.subject,";") | 28 | merge (subject:Subject{name:subjectName}) 29 | merge (course) - [:OfSubject] -> (subject)); 30 | 31 | //有时会不同步,强制一下属性 32 | load csv with headers from "file:///Mooc_merge.csv" as row WITH row match (course:Mooc) where row.course_id = course.Id set course.Name = row.course_name and course.Description = row.description and course.URL = row.course_url and course.Img = row.image_url; 33 | 34 | //改node名称 Course->Mooc 35 | MATCH (course:Course) 36 | SET course:Mooc 37 | REMOVE course:Course 38 | 39 | //实现join, Topic has id(special_id), Name 40 | load csv with headers from "file:///Specialization.csv" as row WITH row where row.name is not null match (topic:Topic) where row.special_id = topic.id set topic.Name = row.name; 41 | 42 | ///import inter_link.csv 43 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row 44 | where row.course_id is not null and row.preparation is not null 45 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name}) 46 | foreach (pre_course in split(row.preparation,";") | 47 | merge (preparation:Preparation{Id:pre_course}) 48 | merge (preparation)-[:PreparationOf]->(c)); 49 | 50 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row 51 | where row.course_id is not null And row.duplicate is not null 52 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name}) 53 | foreach (dup_course in split(row.duplicate,";") | 54 | merge (duplicate:Duplicate{Id:dup_course}) 55 | merge (duplicate)-[:SameContentAs]->(c)); 56 | 57 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row 58 | where row.course_id is not null And row.crosslist is not null 59 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name}) 60 | foreach (nick_name in split(row.crosslist,";") | 61 | merge (cross:Cross{Id:nick_name}) 62 | merge (cross)-[:KnowAlso]->(c)) 63 | 64 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row 65 | where row.course_id is not null 66 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name}) 67 | with row where row.prerequisite is not null 68 | foreach (pre_course in split(row.prerequisite,";") | 69 | merge (p:Prerequisite{Id:pre_course}) 70 | merge (p)-[:PrerequisiteOf]->(c)) 71 | 72 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row 73 | where row.course_id is not null and row.corequisite is not null 74 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name}) 75 | foreach (co_course in split(row.corequisite,";") | 76 | merge (corequisite:Corequisite{Id:co_course}) 77 | merge (corequisite)-[:CorequisiteOf]->(c)) 78 | 79 | load csv with headers from "file:///inter_link_Nam.csv" as row WITH row 80 | where row.course_id is not null and row.similar_mooc is not null 81 | merge (c:Catalog_course {Id: row.course_id, Name: row.course_name}) 82 | foreach (mooc in split(row.similar_mooc,";") | 83 | merge (m:Mooc{Id:mooc}) 84 | merge (c)-[:CorrespondingTo]->(m)); 85 | -------------------------------------------------------------------------------- /All_Mooc/Link_Course/Link_course.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pandas as pd 3 | catalogue = json.load(open("Catalogue_sim.json")) 4 | inter_link = [] 5 | 6 | # for course in catalogue: 7 | # if len(course["prerequisite"]) > 0 or len(course["preparation"]) > 0\ 8 | # or len(course["corequisite"]) > 0 or len(course["duplicate"]) > 0\ 9 | # or len(course["crosslist"]) > 0: 10 | # del course["description"] 11 | 12 | # if len(course["prerequisite"]) >= 1: 13 | # se1 = course["prerequisite"] 14 | # course["prerequisite"] = [i.encode('UTF8') for i in dict.fromkeys(se1).keys()] 15 | 16 | # if len(course["preparation"]) >= 1: 17 | # se2 = course["preparation"] 18 | # course["preparation"] = [i.encode('UTF8') for i in dict.fromkeys(se2).keys()] 19 | 20 | # if len(course["corequisite"]) >= 1: 21 | # se3 = course["corequisite"] 22 | # course["corequisite"] = [i.encode('UTF8') for i in dict.fromkeys(se3).keys()] 23 | 24 | # if len(course["duplicate"]) >= 1: 25 | # se4 = course["duplicate"] 26 | # course["duplicate"] = [i.encode('UTF8') for i in dict.fromkeys(se4).keys()] 27 | 28 | # if len(course["crosslist"]) >= 1: 29 | # se5 = course["crosslist"] 30 | # course["crosslist"] = [i.encode('UTF8') for i in dict.fromkeys(se5).keys()] 31 | 32 | # empty = [] 33 | # for key in course: 34 | # if len(course[key]) == 0: 35 | # empty.append(key) 36 | # # if len(course[key]) == 1: 37 | # # course[key] = course[key][0] 38 | # if len(course["similarity"]) != 0: 39 | # course["similar_mooc"] = [] 40 | # for item in course["similarity"]: 41 | # course["similar_mooc"] += [item[0]] 42 | # print course["similar_mooc"] 43 | # list = course["similar_mooc"] 44 | # course["similar_mooc"] = ', '.join([i.encode('UTF8') for i in list]) 45 | # del course["similarity"] 46 | # for key in empty: 47 | # del course[key] 48 | # inter_link.append(course) 49 | 50 | # print len(inter_link) 51 | # # print inter_link 52 | # with open("inter_link0.json", "a") as f: 53 | # json.dump(inter_link, f) 54 | 55 | 56 | 57 | 58 | for course in catalogue: 59 | if len(course["prerequisite"]) > 0 or len(course["preparation"]) > 0\ 60 | or len(course["corequisite"]) > 0 or len(course["duplicate"]) > 0\ 61 | or len(course["crosslist"]) > 0: 62 | del course["description"] 63 | 64 | if len(course["prerequisite"]) > 1: 65 | se1 = course["prerequisite"] 66 | course["prerequisite"] = ', '.join([i.encode('UTF8') for i in dict.fromkeys(se1).keys()]) 67 | 68 | if len(course["preparation"]) > 1: 69 | se2 = course["preparation"] 70 | course["preparation"] = ', '.join([i.encode('UTF8') for i in dict.fromkeys(se2).keys()]) 71 | 72 | if len(course["corequisite"]) > 1: 73 | se3 = course["corequisite"] 74 | course["corequisite"] = ', '.join([i.encode('UTF8') for i in dict.fromkeys(se3).keys()]) 75 | 76 | if len(course["duplicate"]) > 1: 77 | se4 = course["duplicate"] 78 | course["duplicate"] = ', '.join([i.encode('UTF8') for i in dict.fromkeys(se4).keys()]) 79 | 80 | if len(course["crosslist"]) > 1: 81 | se5 = course["crosslist"] 82 | course["crosslist"] = ', '.join([i.encode('UTF8') for i in dict.fromkeys(se5).keys()]) 83 | 84 | empty = [] 85 | for key in course: 86 | if len(course[key]) == 0: 87 | empty.append(key) 88 | if len(course["similarity"]) != 0: 89 | course["similar_mooc"] = [] 90 | for item in course["similarity"]: 91 | course["similar_mooc"] += [item[0]] 92 | print course["similar_mooc"] 93 | list = course["similar_mooc"] 94 | course["similar_mooc"] = ', '.join([i.encode('UTF8') for i in list]) 95 | del course["similarity"] 96 | for key in empty: 97 | del course[key] 98 | inter_link.append(course) 99 | 100 | for course in inter_link: 101 | for key in course: 102 | if len(course[key]) == 1: 103 | course[key] = course[key][0] 104 | 105 | print len(inter_link) 106 | # print inter_link 107 | with open("inter_link.json", "a") as f: 108 | json.dump(inter_link, f) 109 | import sys 110 | reload(sys) 111 | sys.setdefaultencoding('utf8') 112 | 113 | df = pd.read_json("inter_link.json") 114 | # print(df) 115 | df.to_csv('inter_link.csv') 116 | -------------------------------------------------------------------------------- /All_Mooc/Link_Course/inter_link_Nam.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rpedsel/CoursePlanner/7e1aae619740fc6c8eb36b7cb6983aa984e5d503/All_Mooc/Link_Course/inter_link_Nam.csv -------------------------------------------------------------------------------- /All_Mooc/Mooc_json/Unify.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | # with open('Coursera_data.json', 'r') as f: 4 | # json.load() 5 | 6 | coursera = json.load(open('Coursera_data.json')) 7 | edX = json.load(open('edX.json')) 8 | khan = json.load(open('khan_data.json')) 9 | udacity = json.load(open('udacity_data.json')) 10 | print len(coursera), len(edX), len(khan), len(udacity) 11 | 12 | # id in 3 digits letter + 5 digits number 13 | # remove unicode, starting words 14 | def Format_desc(s): 15 | # s = item[unicode("description")] 16 | # s = s.replace("About this course: ", "") 17 | # s = s.replace("About the Course", "") 18 | Useless = ["About the Course", "about this course ", \ 19 | "About this Course\n\n", "This course",\ 20 | "About this course: ", "In this course",\ 21 | "in this course", "this course", "Welcome to "\ 22 | "\n\n", " \n", " \n\n", "\n\t", "This is a course",\ 23 | "In this course,", "in this course,", "About the course"] 24 | # s = s.replace("In this course", "") 25 | # s = s.replace("in this course", "") 26 | for ul in Useless: 27 | s = s.replace(ul, "") 28 | s = s.encode('ascii','ignore') 29 | s = s.encode('ascii','replace') 30 | return s.strip() 31 | 32 | 33 | csr = [] 34 | spe_id = 0 35 | for item in coursera: 36 | # if unicode("provenance") not in item: 37 | # print item 38 | # else: 39 | # print item[unicode("provenance")] 40 | if unicode("courseSet") in item: 41 | # if len(item[unicode("courseSet")]) > 1: 42 | del item[unicode("description")] 43 | spe_id += 1 44 | list = item[unicode("courseSet")] 45 | for course in list: 46 | course["img"] = item[unicode("img")] 47 | if unicode("provenance") in item: 48 | course["provenance"] = item[unicode("provenance")] 49 | else: 50 | course["provenance"] = "cousera" 51 | course[unicode("course_url")] = item[unicode("course_url")] 52 | course[unicode("special_id")] = "spc" + str(spe_id).zfill(5) 53 | original = course[unicode("id")] 54 | course["id"] = "csr" + original[-5:] 55 | s = course[unicode("description")] 56 | # s = s.replace("About this course: ", "") 57 | # s = s.replace("About the Course", "") 58 | del course[unicode("description")] 59 | course["description"] = Format_desc(s) 60 | csr.append(course) 61 | else: 62 | original = item[unicode("id")] 63 | item[unicode("id")] = "csr" + original[-5:] 64 | if unicode("description") in item: 65 | s = item[unicode("description")] 66 | item["description"] = Format_desc(s) 67 | else: 68 | item["description"] = "Empty" 69 | del item["specialization"] 70 | csr.append(item) 71 | 72 | print len(csr) 73 | with open("Coursera_flat.json", "a") as f: 74 | json.dump(csr, f) 75 | 76 | 77 | khn = [] 78 | for item in khan: 79 | original = item[unicode("id")] 80 | item[unicode("id")] = "khn" + original[-5:] 81 | if unicode("description") in item: 82 | s = item[unicode("description")] 83 | item["description"] = Format_desc(s) 84 | else: 85 | item["description"] = "Empty" 86 | khn.append(item) 87 | 88 | udc = [] 89 | for item in udacity: 90 | original = item[unicode("id")] 91 | item[unicode("id")] = "udc" + original[-5:] 92 | if unicode("description") in item: 93 | s = item[unicode("description")] 94 | item["description"] = Format_desc(s) 95 | else: 96 | item["description"] = "Empty" 97 | udc.append(item) 98 | print len(udc), len(khn) 99 | 100 | Mooc_merge = csr + edX + khn + udc 101 | print len(Mooc_merge) 102 | 103 | with open("Mooc_merge.json", "a") as f: 104 | json.dump(Mooc_merge, f) 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /All_Mooc/Program_order/Specialization/Specialization.csv: -------------------------------------------------------------------------------- 1 | special_id,name,provenance,courseList 2 | spc00001,Data Science,Johns Hopkins University,"csr00001, csr00002, csr00003, csr00004, csr00005, csr00006, csr00007, csr00008, csr00009, csr00010" 3 | spc00002,Data Science Math Skills,Duke University,"csr00011, csr00012, csr00013, csr00014, csr00015" 4 | spc00003,Data Analysis and Presentation Skills: the PwC Approach,PwC,"csr00016, csr00017, csr00018, csr00019, csr00020" 5 | spc00004,Data Visualization with Tableau,"University of California, Davis","csr00021, csr00022, csr00023, csr00024, csr00025, csr00026" 6 | spc00005,A Crash Course in Data Science,Johns Hopkins University,"csr00027, csr00028, csr00029, csr00030, csr00031, csr00032" 7 | spc00006,Data Mining ,University of Illinois at Urbana-Champaign,"csr00033, csr00034, csr00035, csr00036, csr00037" 8 | spc00007,Big Data,"University of California, San Diego","csr00038, csr00039, csr00040, csr00041, csr00042" 9 | spc00008,Applied Data Science with Python,University of Michigan,"csr00043, csr00044, csr00045, csr00046, csr00047" 10 | spc00009,Data Analysis and Interpretation,Wesleyan University,"csr00048, csr00049, csr00050, csr00051, csr00052, csr00053" 11 | spc00010,Big Data for Data Engineers,Yandex,"csr00054, csr00055, csr00056, csr00057, csr00058" 12 | spc00011,Data Structures and Algorithms,"University of California, San Diego, Higher School of Economics","csr00059, csr00060, csr00061, csr00062, csr00063, csr00064, csr00065, csr00066" 13 | spc00012,Executive Data Science,Johns Hopkins University,"csr00067, csr00068, csr00069, csr00070, csr00071" 14 | spc00013,Genomic Data Science,Johns Hopkins University,"csr00072, csr00073, csr00074, csr00075, csr00076, csr00077, csr00078" 15 | spc00014,Data Warehousing for Business Intelligence,University of Colorado System,"csr00079, csr00080, csr00081, csr00082" 16 | spc00015,Survey Data Collection and Analytics ,"University of Michigan, University of Maryland, College Park","csr00083, csr00084, csr00085, csr00086, csr00087" 17 | spc00016,Master of Computer Science in Data Science (MCS-DS),,"csr00153, csr00154, csr00155, csr00156" 18 | spc00017,"Algorithms, Part I",Princeton University,"csr00157, csr00158, csr00159, csr00160, csr00161, csr00162" 19 | spc00018,"Algorithms, Part II",Princeton University,"csr00163, csr00164, csr00165, csr00166, csr00167" 20 | spc00019,Master of Computer Science in Data Science (MCS-DS),,"csr00186, csr00187, csr00188, csr00189, csr00190" 21 | spc00020,Object Oriented Java Programming: Data Structures and Beyond,"University of California, San Diego","csr00191, csr00192, csr00193, csr00194, csr00195, csr00196, csr00197" 22 | spc00021,Convolutional Neural Networks,deeplearning.ai,"csr00198, csr00199, csr00200, csr00201" 23 | spc00022,Master of Computer Science in Data Science (MCS-DS),,"csr00239, csr00240, csr00241, csr00242" 24 | spc00023, (MCS-DS),,"csr00262, csr00263, csr00264, csr00265, csr00266" 25 | spc00024,Pattern Discovery in Data Mining,University of Illinois at Urbana-Champaign,"csr00267, csr00268, csr00269, csr00270, csr00271, csr00272, csr00273, csr00274, csr00275, csr00276" 26 | spc00025,Command Line Tools for Genomic Data Science,Johns Hopkins University,"csr00277, csr00278, csr00279, csr00280" 27 | spc00026,Internet Giants: The Law and Economics of Media Platforms,The University of Chicago,"csr00281, csr00282, csr00283, csr00284, csr00285, csr00286" 28 | spc00027,"Nanotechnology and Nanosensors, Part1",Technion - Israel Institute of Technology,"csr00287, csr00288, csr00289, csr00290, csr00291" 29 | spc00028,Master of Computer Science in Data Science (MCS-DS),,"csr00297, csr00298, csr00299, csr00300" 30 | spc00029,Introduction to MongoDB,MongoDB Inc.,"csr00301, csr00302, csr00303, csr00304, csr00305, csr00306" 31 | spc00030,Neural Networks and Deep Learning,deeplearning.ai,"csr00307, csr00308, csr00309, csr00310, csr00311" 32 | spc00031, (MCS-DS),,"csr00312, csr00313, csr00314, csr00315" 33 | spc00032,Spanish Vocabulary: Meeting People,"University of California, Davis","csr00316, csr00317, csr00318, csr00319, csr00320" 34 | spc00033,Master of Computer Science in Data Science (MCS-DS),,"csr00336, csr00337, csr00338, csr00339, csr00340, csr00341" 35 | spc00034,Ruby on Rails Web Development,Johns Hopkins University,"csr00342, csr00343, csr00344" 36 | spc00035,Genome Assembly Programming Challenge,"University of California, San Diego, Higher School of Economics","csr00345, csr00346, csr00347, csr00348, csr00349, csr00350" 37 | spc00036,"Parallel, Concurrent, and Distributed Programming in Java",Rice University,"csr00351, csr00352, csr00353, csr00354, csr00355" 38 | spc00037,A Crash Course in Data Science,Johns Hopkins University,"csr00356, csr00357, csr00358, csr00359, csr00360" 39 | spc00038,Software Design Threats and Mitigations,University of Colorado System,"csr00361, csr00362, csr00363, csr00364, csr00365" 40 | spc00039,Character Design for Video Games,California Institute of the Arts,"csr00366, csr00367, csr00368, csr00369, csr00370" 41 | spc00040,Master of Computer Science in Data Science (MCS-DS),,"csr00379, csr00380, csr00381, csr00382" 42 | spc00041,From Data to Insights with Google Cloud Platform,Google Cloud,"csr00383, csr00384, csr00385, csr00386" 43 | spc00042,Real-Time Cyber Threat Detection and Mitigation,New York University Tandon School of Engineering,"csr00387, csr00388, csr00389, csr00390, csr00391, csr00392, csr00393" 44 | spc00043,Secure Software Design,University of Colorado System,"csr00394, csr00395, csr00396, csr00397, csr00398" 45 | spc00044,Structuring Machine Learning Projects,deeplearning.ai,"csr00399, csr00400, csr00401, csr00402" 46 | spc00045,Internet of Things: How did we get here?,"University of California, San Diego","csr00403, csr00404, csr00405, csr00406, csr00407" 47 | spc00046,,Ludwig-Maximilians-Universitt Mnchen (LMU),"csr00408, csr00409, csr00410, csr00411, csr00412" 48 | spc00047,"Trabaja inteligentemente, no ms duro: Gestin del tiempo para la productividad personal y profesional ","University of California, Irvine","csr00413, csr00414, csr00415, csr00416" 49 | spc00048, (MCS-DS),,"csr00418, csr00419, csr00420, csr00421, csr00422, csr00423, csr00424, csr00425, csr00426, csr00427" 50 | spc00049,Digital Manufacturing & Design Technology ,"The State University of New York, University at Buffalo","csr00428, csr00429, csr00430, csr00431, csr00432, csr00433" 51 | spc00050,Internet of Things,"University of California, San Diego","csr00434, csr00435, csr00436, csr00437" 52 | spc00051,Architecting with Google Cloud Platform,Google Cloud,"csr00438, csr00439, csr00440, csr00441, csr00442" 53 | spc00052,Introduction to Discrete Mathematics for Computer Science,"Higher School of Economics, University of California, San Diego","csr00443, csr00444, csr00445, csr00446, csr00447" 54 | spc00053,Android App Development,Vanderbilt University,"csr00523, csr00524, csr00525, csr00526, csr00527" 55 | spc00054,Java Programming and Software Engineering Fundamentals,Duke University,"csr00562, csr00563, csr00564, csr00565, csr00566" 56 | spc00055,Object Oriented Java Programming: Data Structures and Beyond,"University of California, San Diego","csr00567, csr00568, csr00569, csr00570, csr00571" 57 | spc00056,Learn to Program: The Fundamentals,University of Toronto,"csr00572, csr00573, csr00574" 58 | spc00057,"Algorithms, Part I",Princeton University,"csr00575, csr00576, csr00577, csr00578" 59 | spc00058,Web Applications for Everybody,University of Michigan,"csr00595, csr00596, csr00597, csr00598" 60 | spc00059,Building Web Applications in PHP,University of Michigan,"csr00599, csr00600, csr00601, csr00602, csr00603" 61 | spc00060,Python for Everybody,University of Michigan,"csr00604, csr00605, csr00606, csr00607, csr00608" 62 | spc00061,Using Python to Access Web Data,University of Michigan,"csr00609, csr00610, csr00611, csr00612, csr00613, csr00614" 63 | spc00062,Web Application Development: Basic Concepts,University of New Mexico,"csr00615, csr00616, csr00617, csr00618, csr00619" 64 | spc00063,Web Design for Everybody (Basics of Web Development and Coding),University of Michigan,"csr00620, csr00621, csr00622, csr00623, csr00624, csr00625" 65 | spc00064,Introduction to Discrete Mathematics for Computer Science,"Higher School of Economics, University of California, San Diego","csr00640, csr00641, csr00642, csr00643, csr00644" 66 | spc00065,Mathematical Foundations for Cryptography,University of Colorado System,"csr00645, csr00646, csr00647, csr00648" 67 | spc00066,Applied Cryptography,University of Colorado System,"csr00667, csr00668, csr00669, csr00670" 68 | spc00067,Cryptographic Hash and Integrity Protection,University of Colorado System,"csr00671, csr00672, csr00673, csr00674, csr00675, csr00676, csr00677" 69 | spc00068,Global Challenges in Business,University of Illinois at Urbana-Champaign,"csr00678, csr00679, csr00680, csr00681, csr00682, csr00683" 70 | spc00069,Vital Signs: Understanding What the Body Is Telling Us,University of Pennsylvania,"csr00684, csr00685, csr00686, csr00687, csr00688" 71 | spc00070,Responsive Website Development and Design,"University of London, Goldsmiths, University of London","csr00689, csr00690, csr00691, csr00692, csr00693" 72 | spc00071,Practical Machine Learning,Johns Hopkins University,"csr00694, csr00695, csr00696, csr00697, csr00698" 73 | spc00072,Introduction to the Internet of Things and Embedded Systems,"University of California, Irvine","csr00713, csr00714, csr00715, csr00716, csr00717, csr00718" 74 | spc00073,A developer's guide to the Internet of Things (IoT),IBM,"csr00719, csr00720, csr00721, csr00722, csr00723, csr00724, csr00725, csr00726, csr00727, csr00728" 75 | spc00074,Cybersecurity Policy for Aviation and Internet Infrastructures,University of Colorado System,"csr00729, csr00730, csr00731, csr00732, csr00733" 76 | spc00075,Internet of Things: How did we get here?,"University of California, San Diego","csr00734, csr00735, csr00736, csr00737" 77 | spc00076,Master of Business Administration (iMBA),,"csr00754, csr00755, csr00756, csr00757, csr00758, csr00759, csr00760" 78 | spc00077,Master's in Innovation and Entrepreneurship,,"csr00761, csr00762, csr00763, csr00764" 79 | spc00078,Professional Certificate in Innovation Management and Entrepreneurship,,"csr00765, csr00766, csr00767, csr00768" 80 | spc00079,Managerial Economics and Business Analysis ,University of Illinois at Urbana-Champaign,"csr00769, csr00770, csr00771, csr00772, csr00773, csr00774, csr00775" 81 | spc00080,Strategic Business Analytics,ESSEC Business School,"csr00776, csr00777, csr00778, csr00779, csr00780" 82 | spc00081,Excel Skills for Business,Macquarie University,"csr00781, csr00782, csr00783, csr00784" 83 | spc00082,Global Challenges in Business,University of Illinois at Urbana-Champaign,"csr00785, csr00786, csr00787, csr00788" 84 | spc00083,Statistical Molecular Thermodynamics,University of Minnesota,"csr00825, csr00826, csr00827, csr00828" 85 | spc00084,Statistical Thermodynamics: Molecules to Machines,Carnegie Mellon University,"csr00829, csr00830, csr00831, csr00832, csr00833, csr00834, csr00835" 86 | spc00085,Social Media Marketing,Northwestern University,"csr00838, csr00839, csr00840, csr00841, csr00842, csr00843" 87 | spc00086,Social Media Data Analytics,Rutgers the State University of New Jersey,"csr00844, csr00845, csr00846" 88 | spc00087,What is Social?,Northwestern University,"csr00847, csr00848, csr00849, csr00850, csr00851" 89 | spc00088,Design Patterns,University of Alberta,"csr00855, csr00856, csr00857, csr00858, csr00859" 90 | spc00089,Fashion as Design,The Museum of Modern Art,"csr00860, csr00861, csr00862, csr00863, csr00864, csr00865, csr00866, csr00867" 91 | spc00090,Graphic Design,California Institute of the Arts,"csr00868, csr00869, csr00870, csr00871, csr00872" 92 | spc00091,Introduction to User Experience Design ,Georgia Institute of Technology,"csr00873, csr00874, csr00875, csr00876, csr00877" 93 | spc00092,Cryptography and Information Theory,University of Colorado System,"csr00878, csr00879, csr00880, csr00881, csr00882" 94 | spc00093,Academic Information Seeking,"University of Copenhagen, Technical University of Denmark (DTU)","csr00883, csr00884, csr00885, csr00886, csr00887, csr00888, csr00889, csr00890" 95 | spc00094,Metaliteracy: Empowering Yourself in a Connected World,The State University of New York,"csr00891, csr00892, csr00893, csr00894, csr00895" 96 | spc00095,Mindware: Critical Thinking for the Information Age ,University of Michigan,"csr00896, csr00897, csr00898, csr00899, csr00900" 97 | spc00096,Business Analytics,University of Pennsylvania,"csr00901, csr00902, csr00903, csr00904" 98 | spc00097,Introduction to Complex Analysis,Wesleyan University,"csr00920, csr00921, csr00922, csr00923, csr00924" 99 | spc00098,Analysis of Algorithms,Princeton University,"csr00925, csr00926, csr00927, csr00928, csr00929" 100 | spc00099,Data Analysis and Presentation Skills: the PwC Approach,PwC,"csr00930, csr00931, csr00932, csr00933, csr00934" 101 | spc00100,Bayesian Statistics: From Concept to Data Analysis,"University of California, Santa Cruz","csr00935, csr00936, csr00937, csr00938, csr00939, csr00940, csr00941" 102 | spc00101,Business Statistics and Analysis,Rice University,"csr00942, csr00943, csr00944, csr00945, csr00946" 103 | spc00102,"Internet History, Technology, and Security",University of Michigan,"csr00962, csr00963, csr00964, csr00965, csr00966, csr00967" 104 | spc00103,The Impact of Technology,EIT Digital ,"csr00968, csr00969, csr00970, csr00971, csr00972, csr00973" 105 | spc00104,Get Interactive: Practical Teaching with Technology,"University of London, Bloomsbury Learning Environment","csr00974, csr00975, csr00976, csr00977, csr00978, csr00979, csr00980, csr00981, csr00982, csr00983" 106 | spc00105,Fundamentals of Network Communication,University of Colorado System,"csr01001, csr01002, csr01003, csr01004" 107 | spc00106,Network Protocols and Architecture,Cisco,"csr01005, csr01006, csr01007, csr01008, csr01009" 108 | spc00107,Digital Marketing,University of Illinois at Urbana-Champaign,"csr01028, csr01029, csr01030, csr01031, csr01032, csr01033, csr01034" 109 | spc00108,Introduction to Marketing,University of Pennsylvania,"csr01035, csr01036, csr01037, csr01038, csr01039, csr01040" 110 | spc00109,Business Foundations,University of Pennsylvania,"csr01041, csr01042, csr01043, csr01044, csr01045, csr01046" 111 | spc00110,Marketing in a Digital World,University of Illinois at Urbana-Champaign,"csr01047, csr01048, csr01049, csr01050, csr01051" 112 | spc00111,The Cycle: Management of Successful Arts and Cultural Organizations,"University of Maryland, College Park","csr01052, csr01053, csr01054, csr01055, csr01056, csr01057" 113 | spc00112,Food & Beverage Management,Universit Bocconi,"csr01058, csr01059, csr01060, csr01061" 114 | spc00113,International Marketing Entry and Execution,Yonsei University,"csr01062, csr01063, csr01064, csr01065, csr01066" 115 | spc00114,International Marketing & Cross Industry Growth ,Yonsei University,"csr01067, csr01068, csr01069, csr01070, csr01071" 116 | spc00115,Introduccin al Marketing,University of Pennsylvania,"csr01072, csr01073, csr01074, csr01075, csr01076, csr01077" 117 | spc00116,The Strategy of Content Marketing,"University of California, Davis","csr01078, csr01079, csr01080, csr01081, csr01082" 118 | spc00117,Digital Product Management: Modern Fundamentals,University of Virginia,"csr01093, csr01094, csr01095, csr01096, csr01097, csr01098, csr01099" 119 | spc00118,Digital Marketing,University of Illinois at Urbana-Champaign,"csr01100, csr01101, csr01102, csr01103" 120 | spc00119,Digital Signal Processing,cole Polytechnique Fdrale de Lausanne,"csr01104, csr01105, csr01106, csr01107, csr01108, csr01109, csr01110, csr01111, csr01112, csr01113" 121 | spc00120,Digital Footprint,The University of Edinburgh,"csr01114, csr01115, csr01116, csr01117, csr01118" 122 | spc00121,Introduction to Electronics,Georgia Institute of Technology,"csr01135, csr01136, csr01137, csr01138, csr01139, csr01140" 123 | spc00122,Software Architecture,University of Alberta,"csr01160, csr01161, csr01162, csr01163" 124 | spc00123,Service-Oriented Architecture,University of Alberta,"csr01164, csr01165, csr01166, csr01167" 125 | spc00124,Computer Architecture,Princeton University,"csr01168, csr01169, csr01170, csr01171" 126 | spc00125,Financing and Investing in Infrastructure,Universit Bocconi,"csr01189, csr01190, csr01191, csr01192, csr01193, csr01194" 127 | spc00126,Smart Cities Management of Smart Urban Infrastructures,cole Polytechnique Fdrale de Lausanne,"csr01195, csr01196, csr01197, csr01198" 128 | spc00127,Computer Architecture,Princeton University,"csr01199, csr01200, csr01201, csr01202" 129 | spc00128,Learn to Program: The Fundamentals,University of Toronto,"csr01220, csr01221, csr01222, csr01223, csr01224" 130 | spc00129,Beginning Game Programming with C#,University of Colorado System,"csr01225, csr01226, csr01227, csr01228, csr01229, csr01230" 131 | spc00130,Java Programming and Software Engineering Fundamentals,Duke University,"csr01231, csr01232, csr01233, csr01234" 132 | spc00131,Programming for Everybody (Getting Started with Python),University of Michigan,"csr01235, csr01236, csr01237, csr01238, csr01239" 133 | spc00132,Code Yourself! An Introduction to Programming,"The University of Edinburgh, Universidad ORT Uruguay","csr01240, csr01241, csr01242, csr01243" 134 | spc00133,Introduction to Programming with MATLAB,Vanderbilt University,"csr01244, csr01245, csr01246" 135 | spc00134,An Introduction to Programming the Internet of Things (IOT),"University of California, Irvine","csr01247, csr01248, csr01249, csr01250, csr01251" 136 | spc00135,Internet Giants: The Law and Economics of Media Platforms,The University of Chicago,"csr01265, csr01266, csr01267, csr01268" 137 | spc00136,Google Cloud Platform Fundamentals: Core Infrastructure,Google Cloud,"csr01269, csr01270, csr01271, csr01272, csr01273, csr01274" 138 | spc00137,Data Science at Scale,University of Washington,"csr01275, csr01276, csr01277, csr01278" 139 | spc00138,An Introduction to Programming the Internet of Things (IOT),"University of California, Irvine","csr01279, csr01280, csr01281, csr01282, csr01283" 140 | spc00139,Architecting with Google Cloud Platform,Google Cloud,"csr01284, csr01285, csr01286, csr01287" 141 | spc00140,"Big Data Essentials: HDFS, MapReduce and Spark RDD",Yandex,"csr01303, csr01304, csr01305, csr01306, csr01307" 142 | spc00141,Maps and the Geospatial Revolution,The Pennsylvania State University,"csr01308, csr01309, csr01310, csr01311" 143 | spc00142,Data Visualization with Tableau,"University of California, Davis","csr01312, csr01313, csr01314, csr01315, csr01316, csr01317" 144 | spc00143,Visual Analytics with Tableau,"University of California, Davis","csr01318, csr01319, csr01320, csr01321, csr01322" 145 | spc00144,Data Science at Scale,University of Washington,"csr01323, csr01324, csr01325, csr01326, csr01327, csr01328" 146 | spc00145,Mindware: Critical Thinking for the Information Age ,University of Michigan,"csr01344, csr01345, csr01346, csr01347" 147 | spc00146,Homeland Security and Cybersecurity,University of Colorado System,"csr01348, csr01349, csr01350, csr01351, csr01352" 148 | spc00147,Philosophy and the Sciences: Introduction to the Philosophy of Cognitive Sciences,The University of Edinburgh,"csr01353, csr01354, csr01355, csr01356" 149 | spc00148,Personality Types at Work,University of Florida,"csr01357, csr01358, csr01359, csr01360" 150 | spc00149,Data Analysis and Presentation Skills: the PwC Approach,PwC,"csr01361, csr01362, csr01363, csr01364, csr01365" 151 | spc00150,Master's in Innovation and Entrepreneurship,,"csr01381, csr01382, csr01383, csr01384, csr01385" 152 | spc00151,Professional Certificate in Innovation Management and Entrepreneurship,,"csr01386, csr01387, csr01388, csr01389, csr01390, csr01391, csr01392, csr01393, csr01394, csr01395" 153 | spc00152,Entrepreneurship,University of Pennsylvania,"csr01396, csr01397, csr01398, csr01399" 154 | spc00153,Entrepreneurship 1: Developing the Opportunity,University of Pennsylvania,"csr01400, csr01401, csr01402, csr01403, csr01404, csr01405" 155 | spc00154,English for Business and Entrepreneurship,University of Pennsylvania,"csr01406, csr01407, csr01408" 156 | spc00155,Entrepreneurship 2: Launching your Start-Up,University of Pennsylvania,"csr01409, csr01410, csr01411, csr01412" 157 | spc00156,Career Success,"University of California, Irvine","csr01413, csr01414, csr01415, csr01416, csr01417, csr01418, csr01419" 158 | spc00157,Cyber Threats and Attack Vectors,University of Colorado System,"csr01433, csr01434, csr01435, csr01436, csr01437" 159 | spc00158,Detecting and Mitigating Cyber Threats and Attacks,University of Colorado System,"csr01438, csr01439, csr01440, csr01441" 160 | spc00159,Cybersecurity,"University of Maryland, College Park","csr01442, csr01443, csr01444" 161 | spc00160,Bitcoin and Cryptocurrency Technologies,Princeton University,"csr01445, csr01446, csr01447, csr01448" 162 | spc00161,"Knowledge Exchange: Using, Protecting and Monetizing Ideas with Third Parties","Universiteit Leiden, Leiden University Medical Center, Luris","csr01465, csr01466, csr01467, csr01468, csr01469" 163 | spc00162, (Journey of the Universe: Weaving Knowledge and Action),Yale University,"csr01470, csr01471, csr01472, csr01473, csr01474" 164 | spc00163,Big History: Connecting Knowledge,Macquarie University,"csr01475, csr01476, csr01477, csr01478, csr01479, csr01480, csr01481" 165 | spc00164,Geographic Information Systems (GIS),"University of California, Davis","csr01482, csr01483, csr01484, csr01485" 166 | spc00165,Web Design for Everybody (Basics of Web Development and Coding),University of Michigan,"csr01486, csr01487, csr01488, csr01489, csr01490" 167 | spc00166,Fundamentals of Computing,Rice University,"csr01491, csr01492, csr01493, csr01494" 168 | spc00167,The Data Scientists Toolbox,Johns Hopkins University,"csr01495, csr01496, csr01497, csr01498" 169 | spc00168,Introduction to Formal Concept Analysis,Higher School of Economics,"csr01499, csr01500, csr01501, csr01502" 170 | spc00169,Journey Conversations: Weaving Knowledge and Action,Yale University,"csr01503, csr01504, csr01505, csr01506, csr01507" 171 | spc00170,Six Sigma Green Belt,University System of Georgia,"csr01508, csr01509, csr01510, csr01511" 172 | spc00171,Introduction to Graph Theory,"University of California, San Diego, Higher School of Economics","csr01522, csr01523, csr01524" 173 | spc00172,"Big Data Analysis: Hive, Spark SQL, DataFrames and GraphFrames",Yandex,"csr01525, csr01526, csr01527, csr01528, csr01529, csr01530" 174 | spc00173,Probabilistic Graphical Models ,Stanford University,"csr01531, csr01532, csr01533, csr01534, csr01535, csr01536" 175 | spc00174,"Algorithms, Part I",Princeton University,"csr01537, csr01538, csr01539, csr01540, csr01541" 176 | spc00175,Combinatorics and Probability,"University of California, San Diego, Higher School of Economics","csr01558, csr01559, csr01560, csr01561, csr01562" 177 | spc00176,An Intuitive Introduction to Probability,University of Zurich,"csr01563, csr01564, csr01565" 178 | spc00177,Data Science Math Skills,Duke University,"csr01566, csr01567, csr01568, csr01569, csr01570" 179 | spc00178,Introduction to Probability and Data,Duke University,"csr01571, csr01572, csr01573, csr01574, csr01575" 180 | spc00179,Statistics with R,Duke University,"csr01576, csr01577, csr01578, csr01579, csr01580" 181 | spc00180,Probabilistic Graphical Models ,Stanford University,"csr01581, csr01582, csr01583, csr01584" 182 | spc00181,Probabilistic Graphical Models 1: Representation,Stanford University,"csr01585, csr01586, csr01587, csr01588, csr01589" 183 | spc00182,Introduction to Engineering Mechanics,Georgia Institute of Technology,"csr01603, csr01604, csr01605, csr01606, csr01607" 184 | spc00183,Materials Science: 10 Things Every Engineer Should Know,"University of California, Davis","csr01608, csr01609, csr01610, csr01611, csr01612, csr01613" 185 | spc00184,Code Yourself! An Introduction to Programming,"The University of Edinburgh, Universidad ORT Uruguay","csr01614, csr01615, csr01616, csr01617, csr01618" 186 | spc00185,Fundamentals of Engineering Exam Review,Georgia Institute of Technology,"csr01619, csr01620, csr01621" 187 | spc00186,Engineering Systems in Motion: Dynamics of Particles and Bodies in 2D Motion,Georgia Institute of Technology,"csr01622, csr01623, csr01624" 188 | spc00187,Bitcoin and Cryptocurrency Technologies,Princeton University,"csr01625, csr01626, csr01627" 189 | spc00188,Applications in Engineering Mechanics,Georgia Institute of Technology,"csr01628, csr01629, csr01630, csr01631, csr01632" 190 | spc00189,Cryptography I,Stanford University,"csr01646, csr01647, csr01648, csr01649" 191 | spc00190,Essential Design Principles for Tableau,"University of California, Davis","csr01650, csr01651, csr01652, csr01653, csr01654, csr01655" 192 | spc00191,Digital Transformation in Financial Services,Copenhagen Business School,"csr01656, csr01657, csr01658, csr01659, csr01660" 193 | spc00192,Systems Biology and Biotechnology,Icahn School of Medicine at Mount Sinai,"csr01661, csr01662, csr01663, csr01664" 194 | spc00193,Global Warming I: The Science and Modeling of Climate Change,The University of Chicago,"csr01665, csr01666, csr01667, csr01668" 195 | spc00194,Business English: Meetings,University of Washington,"csr01684, csr01685, csr01686, csr01687, csr01688" 196 | spc00195,Introduction to Systems Biology,Icahn School of Medicine at Mount Sinai,"csr01689, csr01690, csr01691, csr01692, csr01693" 197 | spc00196,Dynamical Modeling Methods for Systems Biology,Icahn School of Medicine at Mount Sinai,"csr01694, csr01695, csr01696, csr01697" 198 | spc00197,Software Design Methods and Tools,University of Colorado System,"csr01698, csr01699, csr01700, csr01701, csr01702" 199 | spc00198,"HI-FIVE: Health Informatics For Innovation, Value & Enrichment (Social/Peer Perspective)",Columbia University,"csr01719, csr01720, csr01721, csr01722, csr01723" 200 | spc00199,Subsistence Marketplaces,University of Illinois at Urbana-Champaign,"csr01743, csr01744, csr01745, csr01746, csr01747" 201 | spc00200,Applied Data Science with Python,University of Michigan,"csr01748, csr01749, csr01750, csr01751, csr01752" 202 | spc00201,"Espace mondial, une vision franaise des global studies",Sciences Po,"csr01753, csr01754, csr01755, csr01756, csr01757" 203 | spc00202,Virtual Teacher,"University of California, Irvine","csr01758, csr01759, csr01760, csr01761, csr01762" 204 | spc00203,Advanced Search Engine Optimization Strategies,"University of California, Davis","csr01763, csr01764, csr01765, csr01766, csr01767" 205 | spc00204,"Espace mondial, a French vision of Global studies",Sciences Po,"csr01768, csr01769, csr01770, csr01771, csr01772, csr01773, csr01774" 206 | spc00205,Innovative Finance: Hacking finance to change the world ,University of Cape Town,"csr01789, csr01790, csr01791, csr01792, csr01793, csr01794" 207 | spc00206,Content Strategy for Professionals: Expanding Your Contents Reach,Northwestern University,"csr01795, csr01796, csr01797, csr01798, csr01799, csr01800" 208 | spc00207,Survey analysis to Gain Marketing Insights,Emory University,"csr01839, csr01840, csr01841, csr01842" 209 | spc00208,The Importance of Listening,Northwestern University,"csr01862, csr01863, csr01864, csr01865, csr01866" 210 | spc00209,Pricing Strategy,IE Business School,"csr01867, csr01868, csr01869, csr01870, csr01871" 211 | spc00210,The Business of Social,Northwestern University,"csr01872, csr01873, csr01874, csr01875, csr01876" 212 | spc00211,Channel Management and Retailing ,IE Business School,"csr01877, csr01878, csr01879, csr01880" 213 | spc00212,Introduction to Social Media Analytics,Emory University,"csr01881, csr01882, csr01883, csr01884, csr01885" 214 | spc00213,Essentials of Corporate Finance,The University of Melbourne,"csr01886, csr01887, csr01888, csr01889, csr01890, csr01891, csr01892" 215 | spc00214,Investigacin de mercados y comportamiento del consumidor,IE Business School,"csr01893, csr01894, csr01895, csr01896, csr01897" 216 | spc00215,Business Analytics,University of Pennsylvania,"csr01898, csr01899, csr01900, csr01901" 217 | spc00216,Fundamentals of Global Energy Business,University of Colorado System,"csr01902, csr01903, csr01904, csr01905, csr01906" 218 | spc00217,Business Growth Strategy,University of Virginia,"csr01907, csr01908, csr01909, csr01910, csr01911" 219 | spc00218,Questionnaire Design for Social Surveys,University of Michigan,"csr01922, csr01923, csr01924, csr01925, csr01926" 220 | spc00219,Capstone Design Project in Power Electronics,University of Colorado Boulder,"csr01966, csr01967, csr01968, csr01969, csr01970" 221 | spc00220,Competitive Strategy and Organization Design Project,Ludwig-Maximilians-Universitt Mnchen (LMU),"csr01971, csr01972, csr01973, csr01974, csr01975" 222 | spc00221,Design and Build a Data Warehouse for Business Intelligence Implementation,University of Colorado System,"csr01976, csr01977, csr01978, csr01979" 223 | spc00222,Game Design Document: Define the Art & Concepts,California Institute of the Arts,"csr01980, csr01981, csr01982, csr01983, csr01984" 224 | spc00223,Introduction to Typography,California Institute of the Arts,"csr01985, csr01986, csr01987, csr01988, csr01989" -------------------------------------------------------------------------------- /All_Mooc/Program_order/Specialization/no_duplicate?/Specialization2.csv: -------------------------------------------------------------------------------- 1 | name,provenance,special_id,courseList 2 | Data Science,Johns Hopkins University,spc00001,"csr00001, csr00002, csr00003, csr00004, csr00005, csr00006, csr00007, csr00008, csr00009, csr00010" 3 | Data Science Math Skills,Duke University,spc00002,"csr00011, csr00012, csr00013, csr00014, csr00015" 4 | Data Analysis and Presentation Skills: the PwC Approach,PwC,spc00003,"csr00016, csr00017, csr00018, csr00019, csr00020" 5 | Data Visualization with Tableau,"University of California, Davis",spc00004,"csr00021, csr00022, csr00023, csr00024, csr00025, csr00026" 6 | A Crash Course in Data Science,Johns Hopkins University,spc00005,"csr00027, csr00028, csr00029, csr00030, csr00031, csr00032" 7 | Data Mining ,University of Illinois at Urbana-Champaign,spc00006,"csr00033, csr00034, csr00035, csr00036, csr00037" 8 | Big Data,"University of California, San Diego",spc00007,"csr00038, csr00039, csr00040, csr00041, csr00042" 9 | Applied Data Science with Python,University of Michigan,spc00008,"csr00043, csr00044, csr00045, csr00046, csr00047" 10 | Data Analysis and Interpretation,Wesleyan University,spc00009,"csr00048, csr00049, csr00050, csr00051, csr00052, csr00053" 11 | Big Data for Data Engineers,Yandex,spc00010,"csr00054, csr00055, csr00056, csr00057, csr00058" 12 | Data Structures and Algorithms,"University of California, San Diego, Higher School of Economics",spc00011,"csr00059, csr00060, csr00061, csr00062, csr00063, csr00064, csr00065, csr00066" 13 | Executive Data Science,Johns Hopkins University,spc00012,"csr00067, csr00068, csr00069, csr00070, csr00071" 14 | Genomic Data Science,Johns Hopkins University,spc00013,"csr00072, csr00073, csr00074, csr00075, csr00076, csr00077, csr00078" 15 | Data Warehousing for Business Intelligence,University of Colorado System,spc00014,"csr00079, csr00080, csr00081, csr00082" 16 | Survey Data Collection and Analytics ,"University of Michigan, University of Maryland, College Park",spc00015,"csr00083, csr00084, csr00085, csr00086, csr00087" 17 | Master of Computer Science in Data Science (MCS-DS),,spc00016,"csr00153, csr00154, csr00155, csr00156" 18 | "Algorithms, Part I",Princeton University,spc00017,"csr00157, csr00158, csr00159, csr00160, csr00161, csr00162" 19 | "Algorithms, Part II",Princeton University,spc00018,"csr00163, csr00164, csr00165, csr00166, csr00167" 20 | Object Oriented Java Programming: Data Structures and Beyond,"University of California, San Diego",spc00019,"csr00191, csr00192, csr00193, csr00194, csr00195, csr00196, csr00197" 21 | Convolutional Neural Networks,deeplearning.ai,spc00020,"csr00198, csr00199, csr00200, csr00201" 22 | (MCS-DS),,spc00021,"csr00262, csr00263, csr00264, csr00265, csr00266" 23 | Pattern Discovery in Data Mining,University of Illinois at Urbana-Champaign,spc00022,"csr00267, csr00268, csr00269, csr00270, csr00271, csr00272, csr00273, csr00274, csr00275, csr00276" 24 | Command Line Tools for Genomic Data Science,Johns Hopkins University,spc00023,"csr00277, csr00278, csr00279, csr00280" 25 | Internet Giants: The Law and Economics of Media Platforms,The University of Chicago,spc00024,"csr00281, csr00282, csr00283, csr00284, csr00285, csr00286" 26 | "Nanotechnology and Nanosensors, Part1",Technion - Israel Institute of Technology,spc00025,"csr00287, csr00288, csr00289, csr00290, csr00291" 27 | Introduction to MongoDB,MongoDB Inc.,spc00026,"csr00301, csr00302, csr00303, csr00304, csr00305, csr00306" 28 | Neural Networks and Deep Learning,deeplearning.ai,spc00027,"csr00307, csr00308, csr00309, csr00310, csr00311" 29 | (MCS-DS),,spc00028,"csr00312, csr00313, csr00314, csr00315" 30 | Spanish Vocabulary: Meeting People,"University of California, Davis",spc00029,"csr00316, csr00317, csr00318, csr00319, csr00320" 31 | Ruby on Rails Web Development,Johns Hopkins University,spc00030,"csr00342, csr00343, csr00344" 32 | Genome Assembly Programming Challenge,"University of California, San Diego, Higher School of Economics",spc00031,"csr00345, csr00346, csr00347, csr00348, csr00349, csr00350" 33 | "Parallel, Concurrent, and Distributed Programming in Java",Rice University,spc00032,"csr00351, csr00352, csr00353, csr00354, csr00355" 34 | Software Design Threats and Mitigations,University of Colorado System,spc00033,"csr00361, csr00362, csr00363, csr00364, csr00365" 35 | Character Design for Video Games,California Institute of the Arts,spc00034,"csr00366, csr00367, csr00368, csr00369, csr00370" 36 | From Data to Insights with Google Cloud Platform,Google Cloud,spc00035,"csr00383, csr00384, csr00385, csr00386" 37 | Real-Time Cyber Threat Detection and Mitigation,New York University Tandon School of Engineering,spc00036,"csr00387, csr00388, csr00389, csr00390, csr00391, csr00392, csr00393" 38 | Secure Software Design,University of Colorado System,spc00037,"csr00394, csr00395, csr00396, csr00397, csr00398" 39 | Structuring Machine Learning Projects,deeplearning.ai,spc00038,"csr00399, csr00400, csr00401, csr00402" 40 | Internet of Things: How did we get here?,"University of California, San Diego",spc00039,"csr00403, csr00404, csr00405, csr00406, csr00407" 41 | ,Ludwig-Maximilians-Universitt Mnchen (LMU),spc00040,"csr00408, csr00409, csr00410, csr00411, csr00412" 42 | "Trabaja inteligentemente, no ms duro: Gestin del tiempo para la productividad personal y profesional ","University of California, Irvine",spc00041,"csr00413, csr00414, csr00415, csr00416" 43 | Digital Manufacturing & Design Technology ,"The State University of New York, University at Buffalo",spc00042,"csr00428, csr00429, csr00430, csr00431, csr00432, csr00433" 44 | Internet of Things,"University of California, San Diego",spc00043,"csr00434, csr00435, csr00436, csr00437" 45 | Architecting with Google Cloud Platform,Google Cloud,spc00044,"csr00438, csr00439, csr00440, csr00441, csr00442" 46 | Introduction to Discrete Mathematics for Computer Science,"Higher School of Economics, University of California, San Diego",spc00045,"csr00443, csr00444, csr00445, csr00446, csr00447" 47 | Android App Development,Vanderbilt University,spc00046,"csr00523, csr00524, csr00525, csr00526, csr00527" 48 | Java Programming and Software Engineering Fundamentals,Duke University,spc00047,"csr00562, csr00563, csr00564, csr00565, csr00566" 49 | Learn to Program: The Fundamentals,University of Toronto,spc00048,"csr00572, csr00573, csr00574" 50 | Web Applications for Everybody,University of Michigan,spc00049,"csr00595, csr00596, csr00597, csr00598" 51 | Building Web Applications in PHP,University of Michigan,spc00050,"csr00599, csr00600, csr00601, csr00602, csr00603" 52 | Python for Everybody,University of Michigan,spc00051,"csr00604, csr00605, csr00606, csr00607, csr00608" 53 | Using Python to Access Web Data,University of Michigan,spc00052,"csr00609, csr00610, csr00611, csr00612, csr00613, csr00614" 54 | Web Application Development: Basic Concepts,University of New Mexico,spc00053,"csr00615, csr00616, csr00617, csr00618, csr00619" 55 | Web Design for Everybody (Basics of Web Development and Coding),University of Michigan,spc00054,"csr00620, csr00621, csr00622, csr00623, csr00624, csr00625" 56 | Mathematical Foundations for Cryptography,University of Colorado System,spc00055,"csr00645, csr00646, csr00647, csr00648" 57 | Applied Cryptography,University of Colorado System,spc00056,"csr00667, csr00668, csr00669, csr00670" 58 | Cryptographic Hash and Integrity Protection,University of Colorado System,spc00057,"csr00671, csr00672, csr00673, csr00674, csr00675, csr00676, csr00677" 59 | Global Challenges in Business,University of Illinois at Urbana-Champaign,spc00058,"csr00678, csr00679, csr00680, csr00681, csr00682, csr00683" 60 | Vital Signs: Understanding What the Body Is Telling Us,University of Pennsylvania,spc00059,"csr00684, csr00685, csr00686, csr00687, csr00688" 61 | Responsive Website Development and Design,"University of London, Goldsmiths, University of London",spc00060,"csr00689, csr00690, csr00691, csr00692, csr00693" 62 | Practical Machine Learning,Johns Hopkins University,spc00061,"csr00694, csr00695, csr00696, csr00697, csr00698" 63 | Introduction to the Internet of Things and Embedded Systems,"University of California, Irvine",spc00062,"csr00713, csr00714, csr00715, csr00716, csr00717, csr00718" 64 | A developer's guide to the Internet of Things (IoT),IBM,spc00063,"csr00719, csr00720, csr00721, csr00722, csr00723, csr00724, csr00725, csr00726, csr00727, csr00728" 65 | Cybersecurity Policy for Aviation and Internet Infrastructures,University of Colorado System,spc00064,"csr00729, csr00730, csr00731, csr00732, csr00733" 66 | Master of Business Administration (iMBA),,spc00065,"csr00754, csr00755, csr00756, csr00757, csr00758, csr00759, csr00760" 67 | Master's in Innovation and Entrepreneurship,,spc00066,"csr00761, csr00762, csr00763, csr00764" 68 | Professional Certificate in Innovation Management and Entrepreneurship,,spc00067,"csr00765, csr00766, csr00767, csr00768" 69 | Managerial Economics and Business Analysis ,University of Illinois at Urbana-Champaign,spc00068,"csr00769, csr00770, csr00771, csr00772, csr00773, csr00774, csr00775" 70 | Strategic Business Analytics,ESSEC Business School,spc00069,"csr00776, csr00777, csr00778, csr00779, csr00780" 71 | Excel Skills for Business,Macquarie University,spc00070,"csr00781, csr00782, csr00783, csr00784" 72 | Statistical Molecular Thermodynamics,University of Minnesota,spc00071,"csr00825, csr00826, csr00827, csr00828" 73 | Statistical Thermodynamics: Molecules to Machines,Carnegie Mellon University,spc00072,"csr00829, csr00830, csr00831, csr00832, csr00833, csr00834, csr00835" 74 | Social Media Marketing,Northwestern University,spc00073,"csr00838, csr00839, csr00840, csr00841, csr00842, csr00843" 75 | Social Media Data Analytics,Rutgers the State University of New Jersey,spc00074,"csr00844, csr00845, csr00846" 76 | What is Social?,Northwestern University,spc00075,"csr00847, csr00848, csr00849, csr00850, csr00851" 77 | Design Patterns,University of Alberta,spc00076,"csr00855, csr00856, csr00857, csr00858, csr00859" 78 | Fashion as Design,The Museum of Modern Art,spc00077,"csr00860, csr00861, csr00862, csr00863, csr00864, csr00865, csr00866, csr00867" 79 | Graphic Design,California Institute of the Arts,spc00078,"csr00868, csr00869, csr00870, csr00871, csr00872" 80 | Introduction to User Experience Design ,Georgia Institute of Technology,spc00079,"csr00873, csr00874, csr00875, csr00876, csr00877" 81 | Cryptography and Information Theory,University of Colorado System,spc00080,"csr00878, csr00879, csr00880, csr00881, csr00882" 82 | Academic Information Seeking,"University of Copenhagen, Technical University of Denmark (DTU)",spc00081,"csr00883, csr00884, csr00885, csr00886, csr00887, csr00888, csr00889, csr00890" 83 | Metaliteracy: Empowering Yourself in a Connected World,The State University of New York,spc00082,"csr00891, csr00892, csr00893, csr00894, csr00895" 84 | Mindware: Critical Thinking for the Information Age ,University of Michigan,spc00083,"csr00896, csr00897, csr00898, csr00899, csr00900" 85 | Business Analytics,University of Pennsylvania,spc00084,"csr00901, csr00902, csr00903, csr00904" 86 | Introduction to Complex Analysis,Wesleyan University,spc00085,"csr00920, csr00921, csr00922, csr00923, csr00924" 87 | Analysis of Algorithms,Princeton University,spc00086,"csr00925, csr00926, csr00927, csr00928, csr00929" 88 | Bayesian Statistics: From Concept to Data Analysis,"University of California, Santa Cruz",spc00087,"csr00935, csr00936, csr00937, csr00938, csr00939, csr00940, csr00941" 89 | Business Statistics and Analysis,Rice University,spc00088,"csr00942, csr00943, csr00944, csr00945, csr00946" 90 | "Internet History, Technology, and Security",University of Michigan,spc00089,"csr00962, csr00963, csr00964, csr00965, csr00966, csr00967" 91 | The Impact of Technology,EIT Digital ,spc00090,"csr00968, csr00969, csr00970, csr00971, csr00972, csr00973" 92 | Get Interactive: Practical Teaching with Technology,"University of London, Bloomsbury Learning Environment",spc00091,"csr00974, csr00975, csr00976, csr00977, csr00978, csr00979, csr00980, csr00981, csr00982, csr00983" 93 | Fundamentals of Network Communication,University of Colorado System,spc00092,"csr01001, csr01002, csr01003, csr01004" 94 | Network Protocols and Architecture,Cisco,spc00093,"csr01005, csr01006, csr01007, csr01008, csr01009" 95 | Digital Marketing,University of Illinois at Urbana-Champaign,spc00094,"csr01028, csr01029, csr01030, csr01031, csr01032, csr01033, csr01034" 96 | Introduction to Marketing,University of Pennsylvania,spc00095,"csr01035, csr01036, csr01037, csr01038, csr01039, csr01040" 97 | Business Foundations,University of Pennsylvania,spc00096,"csr01041, csr01042, csr01043, csr01044, csr01045, csr01046" 98 | Marketing in a Digital World,University of Illinois at Urbana-Champaign,spc00097,"csr01047, csr01048, csr01049, csr01050, csr01051" 99 | The Cycle: Management of Successful Arts and Cultural Organizations,"University of Maryland, College Park",spc00098,"csr01052, csr01053, csr01054, csr01055, csr01056, csr01057" 100 | Food & Beverage Management,Universit Bocconi,spc00099,"csr01058, csr01059, csr01060, csr01061" 101 | International Marketing Entry and Execution,Yonsei University,spc00100,"csr01062, csr01063, csr01064, csr01065, csr01066" 102 | International Marketing & Cross Industry Growth ,Yonsei University,spc00101,"csr01067, csr01068, csr01069, csr01070, csr01071" 103 | Introduccin al Marketing,University of Pennsylvania,spc00102,"csr01072, csr01073, csr01074, csr01075, csr01076, csr01077" 104 | The Strategy of Content Marketing,"University of California, Davis",spc00103,"csr01078, csr01079, csr01080, csr01081, csr01082" 105 | Digital Product Management: Modern Fundamentals,University of Virginia,spc00104,"csr01093, csr01094, csr01095, csr01096, csr01097, csr01098, csr01099" 106 | Digital Signal Processing,cole Polytechnique Fdrale de Lausanne,spc00105,"csr01104, csr01105, csr01106, csr01107, csr01108, csr01109, csr01110, csr01111, csr01112, csr01113" 107 | Digital Footprint,The University of Edinburgh,spc00106,"csr01114, csr01115, csr01116, csr01117, csr01118" 108 | Introduction to Electronics,Georgia Institute of Technology,spc00107,"csr01135, csr01136, csr01137, csr01138, csr01139, csr01140" 109 | Software Architecture,University of Alberta,spc00108,"csr01160, csr01161, csr01162, csr01163" 110 | Service-Oriented Architecture,University of Alberta,spc00109,"csr01164, csr01165, csr01166, csr01167" 111 | Computer Architecture,Princeton University,spc00110,"csr01168, csr01169, csr01170, csr01171" 112 | Financing and Investing in Infrastructure,Universit Bocconi,spc00111,"csr01189, csr01190, csr01191, csr01192, csr01193, csr01194" 113 | Smart Cities Management of Smart Urban Infrastructures,cole Polytechnique Fdrale de Lausanne,spc00112,"csr01195, csr01196, csr01197, csr01198" 114 | Beginning Game Programming with C#,University of Colorado System,spc00113,"csr01225, csr01226, csr01227, csr01228, csr01229, csr01230" 115 | Programming for Everybody (Getting Started with Python),University of Michigan,spc00114,"csr01235, csr01236, csr01237, csr01238, csr01239" 116 | Code Yourself! An Introduction to Programming,"The University of Edinburgh, Universidad ORT Uruguay",spc00115,"csr01240, csr01241, csr01242, csr01243" 117 | Introduction to Programming with MATLAB,Vanderbilt University,spc00116,"csr01244, csr01245, csr01246" 118 | An Introduction to Programming the Internet of Things (IOT),"University of California, Irvine",spc00117,"csr01247, csr01248, csr01249, csr01250, csr01251" 119 | Google Cloud Platform Fundamentals: Core Infrastructure,Google Cloud,spc00118,"csr01269, csr01270, csr01271, csr01272, csr01273, csr01274" 120 | Data Science at Scale,University of Washington,spc00119,"csr01275, csr01276, csr01277, csr01278" 121 | "Big Data Essentials: HDFS, MapReduce and Spark RDD",Yandex,spc00120,"csr01303, csr01304, csr01305, csr01306, csr01307" 122 | Maps and the Geospatial Revolution,The Pennsylvania State University,spc00121,"csr01308, csr01309, csr01310, csr01311" 123 | Visual Analytics with Tableau,"University of California, Davis",spc00122,"csr01318, csr01319, csr01320, csr01321, csr01322" 124 | Homeland Security and Cybersecurity,University of Colorado System,spc00123,"csr01348, csr01349, csr01350, csr01351, csr01352" 125 | Philosophy and the Sciences: Introduction to the Philosophy of Cognitive Sciences,The University of Edinburgh,spc00124,"csr01353, csr01354, csr01355, csr01356" 126 | Personality Types at Work,University of Florida,spc00125,"csr01357, csr01358, csr01359, csr01360" 127 | Entrepreneurship,University of Pennsylvania,spc00126,"csr01396, csr01397, csr01398, csr01399" 128 | Entrepreneurship 1: Developing the Opportunity,University of Pennsylvania,spc00127,"csr01400, csr01401, csr01402, csr01403, csr01404, csr01405" 129 | English for Business and Entrepreneurship,University of Pennsylvania,spc00128,"csr01406, csr01407, csr01408" 130 | Entrepreneurship 2: Launching your Start-Up,University of Pennsylvania,spc00129,"csr01409, csr01410, csr01411, csr01412" 131 | Career Success,"University of California, Irvine",spc00130,"csr01413, csr01414, csr01415, csr01416, csr01417, csr01418, csr01419" 132 | Cyber Threats and Attack Vectors,University of Colorado System,spc00131,"csr01433, csr01434, csr01435, csr01436, csr01437" 133 | Detecting and Mitigating Cyber Threats and Attacks,University of Colorado System,spc00132,"csr01438, csr01439, csr01440, csr01441" 134 | Cybersecurity,"University of Maryland, College Park",spc00133,"csr01442, csr01443, csr01444" 135 | Bitcoin and Cryptocurrency Technologies,Princeton University,spc00134,"csr01445, csr01446, csr01447, csr01448" 136 | "Knowledge Exchange: Using, Protecting and Monetizing Ideas with Third Parties","Universiteit Leiden, Leiden University Medical Center, Luris",spc00135,"csr01465, csr01466, csr01467, csr01468, csr01469" 137 | (Journey of the Universe: Weaving Knowledge and Action),Yale University,spc00136,"csr01470, csr01471, csr01472, csr01473, csr01474" 138 | Big History: Connecting Knowledge,Macquarie University,spc00137,"csr01475, csr01476, csr01477, csr01478, csr01479, csr01480, csr01481" 139 | Geographic Information Systems (GIS),"University of California, Davis",spc00138,"csr01482, csr01483, csr01484, csr01485" 140 | Fundamentals of Computing,Rice University,spc00139,"csr01491, csr01492, csr01493, csr01494" 141 | The Data Scientists Toolbox,Johns Hopkins University,spc00140,"csr01495, csr01496, csr01497, csr01498" 142 | Introduction to Formal Concept Analysis,Higher School of Economics,spc00141,"csr01499, csr01500, csr01501, csr01502" 143 | Journey Conversations: Weaving Knowledge and Action,Yale University,spc00142,"csr01503, csr01504, csr01505, csr01506, csr01507" 144 | Six Sigma Green Belt,University System of Georgia,spc00143,"csr01508, csr01509, csr01510, csr01511" 145 | Introduction to Graph Theory,"University of California, San Diego, Higher School of Economics",spc00144,"csr01522, csr01523, csr01524" 146 | "Big Data Analysis: Hive, Spark SQL, DataFrames and GraphFrames",Yandex,spc00145,"csr01525, csr01526, csr01527, csr01528, csr01529, csr01530" 147 | Probabilistic Graphical Models ,Stanford University,spc00146,"csr01531, csr01532, csr01533, csr01534, csr01535, csr01536" 148 | Combinatorics and Probability,"University of California, San Diego, Higher School of Economics",spc00147,"csr01558, csr01559, csr01560, csr01561, csr01562" 149 | An Intuitive Introduction to Probability,University of Zurich,spc00148,"csr01563, csr01564, csr01565" 150 | Introduction to Probability and Data,Duke University,spc00149,"csr01571, csr01572, csr01573, csr01574, csr01575" 151 | Statistics with R,Duke University,spc00150,"csr01576, csr01577, csr01578, csr01579, csr01580" 152 | Probabilistic Graphical Models 1: Representation,Stanford University,spc00151,"csr01585, csr01586, csr01587, csr01588, csr01589" 153 | Introduction to Engineering Mechanics,Georgia Institute of Technology,spc00152,"csr01603, csr01604, csr01605, csr01606, csr01607" 154 | Materials Science: 10 Things Every Engineer Should Know,"University of California, Davis",spc00153,"csr01608, csr01609, csr01610, csr01611, csr01612, csr01613" 155 | Fundamentals of Engineering Exam Review,Georgia Institute of Technology,spc00154,"csr01619, csr01620, csr01621" 156 | Engineering Systems in Motion: Dynamics of Particles and Bodies in 2D Motion,Georgia Institute of Technology,spc00155,"csr01622, csr01623, csr01624" 157 | Applications in Engineering Mechanics,Georgia Institute of Technology,spc00156,"csr01628, csr01629, csr01630, csr01631, csr01632" 158 | Cryptography I,Stanford University,spc00157,"csr01646, csr01647, csr01648, csr01649" 159 | Essential Design Principles for Tableau,"University of California, Davis",spc00158,"csr01650, csr01651, csr01652, csr01653, csr01654, csr01655" 160 | Digital Transformation in Financial Services,Copenhagen Business School,spc00159,"csr01656, csr01657, csr01658, csr01659, csr01660" 161 | Systems Biology and Biotechnology,Icahn School of Medicine at Mount Sinai,spc00160,"csr01661, csr01662, csr01663, csr01664" 162 | Global Warming I: The Science and Modeling of Climate Change,The University of Chicago,spc00161,"csr01665, csr01666, csr01667, csr01668" 163 | Business English: Meetings,University of Washington,spc00162,"csr01684, csr01685, csr01686, csr01687, csr01688" 164 | Introduction to Systems Biology,Icahn School of Medicine at Mount Sinai,spc00163,"csr01689, csr01690, csr01691, csr01692, csr01693" 165 | Dynamical Modeling Methods for Systems Biology,Icahn School of Medicine at Mount Sinai,spc00164,"csr01694, csr01695, csr01696, csr01697" 166 | Software Design Methods and Tools,University of Colorado System,spc00165,"csr01698, csr01699, csr01700, csr01701, csr01702" 167 | "HI-FIVE: Health Informatics For Innovation, Value & Enrichment (Social/Peer Perspective)",Columbia University,spc00166,"csr01719, csr01720, csr01721, csr01722, csr01723" 168 | Subsistence Marketplaces,University of Illinois at Urbana-Champaign,spc00167,"csr01743, csr01744, csr01745, csr01746, csr01747" 169 | "Espace mondial, une vision franaise des global studies",Sciences Po,spc00168,"csr01753, csr01754, csr01755, csr01756, csr01757" 170 | Virtual Teacher,"University of California, Irvine",spc00169,"csr01758, csr01759, csr01760, csr01761, csr01762" 171 | Advanced Search Engine Optimization Strategies,"University of California, Davis",spc00170,"csr01763, csr01764, csr01765, csr01766, csr01767" 172 | "Espace mondial, a French vision of Global studies",Sciences Po,spc00171,"csr01768, csr01769, csr01770, csr01771, csr01772, csr01773, csr01774" 173 | Innovative Finance: Hacking finance to change the world ,University of Cape Town,spc00172,"csr01789, csr01790, csr01791, csr01792, csr01793, csr01794" 174 | Content Strategy for Professionals: Expanding Your Contents Reach,Northwestern University,spc00173,"csr01795, csr01796, csr01797, csr01798, csr01799, csr01800" 175 | Survey analysis to Gain Marketing Insights,Emory University,spc00174,"csr01839, csr01840, csr01841, csr01842" 176 | The Importance of Listening,Northwestern University,spc00175,"csr01862, csr01863, csr01864, csr01865, csr01866" 177 | Pricing Strategy,IE Business School,spc00176,"csr01867, csr01868, csr01869, csr01870, csr01871" 178 | The Business of Social,Northwestern University,spc00177,"csr01872, csr01873, csr01874, csr01875, csr01876" 179 | Channel Management and Retailing ,IE Business School,spc00178,"csr01877, csr01878, csr01879, csr01880" 180 | Introduction to Social Media Analytics,Emory University,spc00179,"csr01881, csr01882, csr01883, csr01884, csr01885" 181 | Essentials of Corporate Finance,The University of Melbourne,spc00180,"csr01886, csr01887, csr01888, csr01889, csr01890, csr01891, csr01892" 182 | Investigacin de mercados y comportamiento del consumidor,IE Business School,spc00181,"csr01893, csr01894, csr01895, csr01896, csr01897" 183 | Fundamentals of Global Energy Business,University of Colorado System,spc00182,"csr01902, csr01903, csr01904, csr01905, csr01906" 184 | Business Growth Strategy,University of Virginia,spc00183,"csr01907, csr01908, csr01909, csr01910, csr01911" 185 | Questionnaire Design for Social Surveys,University of Michigan,spc00184,"csr01922, csr01923, csr01924, csr01925, csr01926" 186 | Capstone Design Project in Power Electronics,University of Colorado Boulder,spc00185,"csr01966, csr01967, csr01968, csr01969, csr01970" 187 | Competitive Strategy and Organization Design Project,Ludwig-Maximilians-Universitt Mnchen (LMU),spc00186,"csr01971, csr01972, csr01973, csr01974, csr01975" 188 | Design and Build a Data Warehouse for Business Intelligence Implementation,University of Colorado System,spc00187,"csr01976, csr01977, csr01978, csr01979" 189 | Game Design Document: Define the Art & Concepts,California Institute of the Arts,spc00188,"csr01980, csr01981, csr01982, csr01983, csr01984" 190 | Introduction to Typography,California Institute of the Arts,spc00189,"csr01985, csr01986, csr01987, csr01988, csr01989" -------------------------------------------------------------------------------- /All_Mooc/Program_order/Specialization/process.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | # coursera = json.load(open('Coursera_data.json')) 4 | 5 | # csr = [] 6 | # spe_id = 0 7 | # for item in coursera: 8 | # if unicode("courseSet") in item: 9 | # spe_id += 1 10 | # item["special_id"] = "spc" + str(spe_id).zfill(5) 11 | # list = item[unicode("courseSet")] 12 | # clist = [] 13 | # for course in list: 14 | # c = {} 15 | # original = course[unicode("id")] 16 | # c["id"] = "csr" + original[-5:] 17 | # c["course_name"] = course[unicode("name")] 18 | # clist.append(c["id"]) 19 | # item[unicode("courseList")] = clist 20 | # del item[unicode("courseSet")] 21 | # del item[unicode("specialization")] 22 | # del item[unicode("description")] 23 | # del item[unicode("course_url")] 24 | # del item[unicode("img")] 25 | # # if unicode("description") in item: 26 | # # s = item[unicode("description")] 27 | # # item["description"] = Format_desc(s) 28 | # # else: 29 | # # item["description"] = "Empty" 30 | # csr.append(item) 31 | 32 | # print len(csr) 33 | # with open("Specialization.json", "a") as f: 34 | # json.dump(csr, f) 35 | 36 | coursera = json.load(open('Coursera_data.json')) 37 | 38 | csr = [] 39 | spe_id = 0 40 | duplicate = [] 41 | for item in coursera: 42 | if unicode("courseSet") in item and item["name"] not in duplicate: 43 | spe_id += 1 44 | item["special_id"] = "spc" + str(spe_id).zfill(5) 45 | list = item[unicode("courseSet")] 46 | clist = [] 47 | for course in list: 48 | c = {} 49 | original = course[unicode("id")] 50 | c["id"] = "csr" + original[-5:] 51 | c["course_name"] = course[unicode("name")] 52 | clist.append(c["id"]) 53 | item[unicode("courseList")] = ', '.join(clist) 54 | del item[unicode("courseSet")] 55 | del item[unicode("specialization")] 56 | del item[unicode("description")] 57 | del item[unicode("course_url")] 58 | del item[unicode("img")] 59 | csr.append(item) 60 | duplicate.append(item["name"]) 61 | 62 | print len(csr) 63 | with open("Specialization3.json", "a") as f: 64 | json.dump(csr, f) 65 | import pandas as pd 66 | import sys 67 | reload(sys) 68 | sys.setdefaultencoding('utf8') 69 | 70 | df = pd.read_json("Specialization3.json") 71 | # print(df) 72 | df.to_csv('Specialization2.csv') 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /CoursePlanner.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rpedsel/CoursePlanner/7e1aae619740fc6c8eb36b7cb6983aa984e5d503/CoursePlanner.pdf -------------------------------------------------------------------------------- /Coursera/Meta_data/Coursera.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rpedsel/CoursePlanner/7e1aae619740fc6c8eb36b7cb6983aa984e5d503/Coursera/Meta_data/Coursera.key -------------------------------------------------------------------------------- /Coursera/Meta_data/Coursera_sample.json: -------------------------------------------------------------------------------- 1 | 2 | // feed key words like "data" in url: http://www.coursera.org 3 | // list of courses and specializations 4 | // each query page has 20 courses list 5 | { 6 | "@querypage": "https://www.coursera.org/courses?languages=en&query=data&start=" "+ int", 7 | "@type" : "ItemList", 8 | "keyWord": "key_input", 9 | "itemListElement":[ 10 | { 11 | "@type":"Specialization", 12 | "position":1, 13 | "image": "image_url", 14 | "provenance": "Organization_or_University", 15 | "url":"https://www.coursera.org/specializations/jhu-data-science", 16 | "@type" : "ItemList", 17 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}], 18 | }, 19 | { 20 | "@type":"Specialization", 21 | "position":2, 22 | "image": "image_url", 23 | "provenance": "Organization_or_University", 24 | "url":"https://www.coursera.org/specializations/big-data", 25 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}], 26 | }, 27 | { 28 | "@type":"Specialization", 29 | "position":3, 30 | "image": "image_url", 31 | "provenance": "Organization_or_University", 32 | "url":"https://www.coursera.org/specializations/data-structures-algorithms", 33 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}], 34 | }, 35 | { 36 | "@type":"Specialization", 37 | "position":4, 38 | "image": "image_url", 39 | "provenance": "Organization_or_University", 40 | "url":"https://www.coursera.org/specializations/gcp-data-machine-learning", 41 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}], 42 | }, 43 | { 44 | "@type":"Specialization", 45 | "position":5, 46 | "image": "image_url", 47 | "provenance": "Organization_or_University", 48 | "url":"https://www.coursera.org/specializations/data-analysis", 49 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}], 50 | }, 51 | { 52 | "@type":"Specialization", 53 | "position":6, 54 | "image": "image_url", 55 | "provenance": "Organization_or_University", 56 | "url":"https://www.coursera.org/specializations/big-data-engineering", 57 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}], 58 | }, 59 | { 60 | "@type":"Specialization", 61 | "position":7, 62 | "image": "image_url", 63 | "provenance": "Organization_or_University", 64 | "url":"https://www.coursera.org/specializations/data-science-python", 65 | "itemListElement":[{"@type":"Course"}, {"@type":"Course"}, {"@type":"Course"}], 66 | }, 67 | { 68 | "@type":"Course", 69 | "position":8, 70 | "image": "image_url", 71 | "provenance": "Organization_or_University", 72 | "url":"https://www.coursera.org/learn/data-structures" 73 | }, 74 | { 75 | "@type":"Course", 76 | "position":9, 77 | "image": "image_url", 78 | "provenance": "Organization_or_University", 79 | "url":"https://www.coursera.org/learn/python-network-data" 80 | }, 81 | { 82 | "@type":"Course", 83 | "position":10, 84 | "image": "image_url", 85 | "provenance": "Organization_or_University", 86 | "url":"https://www.coursera.org/learn/big-data-graph-analytics" 87 | }, 88 | { 89 | "@type":"Course", 90 | "position":11, 91 | "image": "image_url", 92 | "provenance": "Organization_or_University", 93 | "url":"https://www.coursera.org/learn/gcp-big-data-ml-fundamentals" 94 | }, 95 | { 96 | "@type":"Course", 97 | "position":12, 98 | "image": "image_url", 99 | "provenance": "Organization_or_University", 100 | "url":"https://www.coursera.org/learn/big-data-integration-processing" 101 | }, 102 | { 103 | "@type":"Course", 104 | "position":13, 105 | "image": "image_url", 106 | "provenance": "Organization_or_University", 107 | "url":"https://www.coursera.org/learn/java-programming-arrays-lists-data" 108 | }, 109 | { 110 | "@type":"Course", 111 | "position":14, 112 | "image": "image_url", 113 | "provenance": "Organization_or_University", 114 | "url":"https://www.coursera.org/learn/python-data" 115 | }, 116 | { 117 | "@type":"Course", 118 | "position":15, 119 | "image": "image_url", 120 | "provenance": "Organization_or_University", 121 | "url":"https://www.coursera.org/learn/big-data-introduction" 122 | }, 123 | { 124 | "@type":"Course", 125 | "position":16, 126 | "image": "image_url", 127 | "provenance": "Organization_or_University", 128 | "url":"https://www.coursera.org/learn/data-manipulation" 129 | }, 130 | { 131 | "@type":"Course", 132 | "position":17, 133 | "image": "image_url", 134 | "provenance": "Organization_or_University", 135 | "url":"https://www.coursera.org/learn/big-data-machine-learning" 136 | }, 137 | { 138 | "@type":"Course", 139 | "position":18, 140 | "image": "image_url", 141 | "provenance": "Organization_or_University", 142 | "url":"https://www.coursera.org/learn/big-data-management" 143 | }, 144 | { 145 | "@type":"Course", 146 | "position":19, 147 | "image": "image_url", 148 | "provenance": "Organization_or_University", 149 | "url":"https://www.coursera.org/learn/python-data-visualization" 150 | }, 151 | { 152 | "@type":"Course", 153 | "position":20, 154 | "image": "image_url", 155 | "provenance": "Organization_or_University", 156 | "url":"https://www.coursera.org/learn/datasciencemathskills" 157 | } 158 | ] 159 | } 160 | -------------------------------------------------------------------------------- /Coursera/Meta_data/Page_RDF.json: -------------------------------------------------------------------------------- 1 | { 2 | "provenance":"Johns Hopkins University", 3 | "name":"Data Science", 4 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/eb/8e18e0a4f111e59ae9c776a3dd0526/jhu-logo-thumb.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 5 | "specialization":"True" 6 | }, 7 | { 8 | "provenance":"Duke University", 9 | "name":"Data Science Math Skills", 10 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/08/8c6610c07e11e6a7f5e70b413367a6/DMSIcon.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 11 | "specialization":"False" 12 | }, 13 | { 14 | "provenance":"PwC", 15 | "name":"Data Analysis and Presentation Skills: the PwC Approach", 16 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/2a/50c800335611e6a7800fb6ff2f8eb2/PwC_fl_c.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 17 | "specialization":"True" 18 | }, 19 | { 20 | "provenance":"University of California, Davis", 21 | "name":"Data Visualization with Tableau", 22 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/41/d326f0af5b11e5ada4195d312ad6aa/davis_data_thumb.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 23 | "specialization":"True" 24 | }, 25 | { 26 | "provenance":"Johns Hopkins University", 27 | "name":"A Crash Course in Data Science", 28 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/f0/399ec045ea11e5ba8e4bc295fed462/9990024683_955f8f043b_h.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 29 | "specialization":"False" 30 | }, 31 | { 32 | "provenance":"University of Illinois at Urbana-Champaign", 33 | "name":"Data Mining ", 34 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/01/609980fb3311e58521f12840fa495b/DataMining-Specialization_logo.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 35 | "specialization":"True" 36 | }, 37 | { 38 | "provenance":"University of California, San Diego", 39 | "name":"Big Data", 40 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/a6/35516029cf11e6b8dfdbd5deea5f21/UCSD_BigData_PartnerProvided.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 41 | "specialization":"True" 42 | }, 43 | { 44 | "provenance":"University of Michigan", 45 | "name":"Applied Data Science with Python", 46 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/c8/8d6df01eb311e6b5f5f786b289d8ba/pythondatascience_specialization_final.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 47 | "specialization":"True" 48 | }, 49 | { 50 | "provenance":"Wesleyan University", 51 | "name":"Data Analysis and Interpretation", 52 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d2j5ihb19pt1hq.cloudfront.net/sdp_page/s12n_logos/Wesleyan_DataAnalysis_Istock41296982.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 53 | "specialization":"True" 54 | }, 55 | { 56 | "provenance":"Yandex", 57 | "name":"Big Data for Data Engineers", 58 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/8c/7d9d20a14411e7843ef75e19b675b3/Yandex-466_______________7.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 59 | "specialization":"True" 60 | }, 61 | { 62 | "provenance":"University of California, San Diego, Higher School of Economics", 63 | "name":"Data Structures and Algorithms", 64 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/2a/34a150d9ad11e5bd22cb7d7d7686df/logo3.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 65 | "specialization":"True" 66 | }, 67 | { 68 | "provenance":"Johns Hopkins University", 69 | "name":"Executive Data Science", 70 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d396qusza40orc.cloudfront.net/phoenixassets/eds-s12n/eds_thumbnail.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 71 | "specialization":"True" 72 | }, 73 | { 74 | "provenance":"Johns Hopkins University", 75 | "name":"Genomic Data Science", 76 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/17/4df3b0a98411e591c0b9c70a6d8700/GBDS.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 77 | "specialization":"True" 78 | }, 79 | { 80 | "provenance":"University of Colorado System", 81 | "name":"Data Warehousing for Business Intelligence", 82 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d2j5ihb19pt1hq.cloudfront.net/sdp_page/s12n_logos/Colorado_DataWarehousing_Getty471741796.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 83 | "specialization":"True" 84 | }, 85 | { 86 | "provenance":"University of Michigan, University of Maryland, College Park", 87 | "name":"Survey Data Collection and Analytics ", 88 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/e5/f471d0f76811e5a5175d8aed3cf3b0/AdobeStock_83781931.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 89 | "specialization":"True" 90 | }, 91 | { 92 | "provenance":"University of Washington", 93 | "name":"Data Science at Scale", 94 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d2j5ihb19pt1hq.cloudfront.net/sdp_page/s12n_logos/UW_PracticalDataScience_Getty479524481.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 95 | "specialization":"True" 96 | }, 97 | { 98 | "provenance":"University of Michigan", 99 | "name":"Python Data Structures", 100 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/44/2959b0502911e5851f058ad6ebf936/pythondata_thumbnail_1x1.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 101 | "specialization":"False" 102 | }, 103 | { 104 | "provenance":"University of California, San Diego, Higher School of Economics", 105 | "name":"Data Structures", 106 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/2b/96fb00d9ae11e595dfe9e95f32b969/logo3.png?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 107 | "specialization":"False" 108 | }, 109 | { 110 | "provenance":"University of Illinois at Urbana-Champaign", 111 | "name":"Data Visualization", 112 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/f6/4bce20533611e4bc4cff5931b60ef1/Viz_for_Data_Mining.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 113 | "specialization":"False" 114 | }, 115 | { 116 | "provenance":"Google Cloud", 117 | "name":"Data Engineering on Google Cloud Platform", 118 | "img":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/95/77558056d811e79f95119f98b3ba96/visite-google-datacenter-14.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF", 119 | "specialization":"True" 120 | } -------------------------------------------------------------------------------- /Coursera/Src/Coursera_driver.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import requests 3 | from bs4 import BeautifulSoup 4 | import json 5 | import time 6 | import sys 7 | reload(sys) 8 | sys.setdefaultencoding('utf-8') 9 | 10 | _x_query = { 11 | "coursename": "//h2[@class = 'color-primary-text headline-1-text flex-1']", 12 | "specialize": "//span[@class = 'specialization-course-count']/span", 13 | "image_url": "//div[@class='horizontal-box']/div/img", 14 | "provenance": ["//span[@class = 'text-light offering-partner-names']/span", "//div[@class = 'text-light offering-partner-names']/span"] 15 | } 16 | 17 | content = [] 18 | id = 1718 19 | # 4-skip 6 20 | # keyword = "math" 21 | # ['math', 'social','math', 'marketing', design', 'web', 'cyber', 'program', 'platform', 'map', 'intelligence', 'knowledge', 'graph', 'probability'] 22 | # 'digital', 'electronic', 'architecture', 'infrastructure', 23 | keywords = [ 'social', 'marketing', 'design', 'web', 'cyber', 'program', 'platform', 'map', 'intelligence', 'knowledge', 'graph', 'probability', \ 24 | 'digital', 'electronic', 'architecture', 'infrastructure', 'digital', 'electronic', 'architecture', 'infrastructure', 'program', 'platform', 'map', 'intelligence', 'entrepreneurship','cyber', 'knowledge', 'graph', 'probability', "engineer"] 25 | for keyword in keywords: 26 | 27 | for j in xrange(2, 5): 28 | url = "https://www.coursera.org/courses?languages=en&query=" + keyword + "&start=" + str(j * 20) 29 | print url 30 | browser = webdriver.Firefox() 31 | browser.set_page_load_timeout(1200) 32 | browser.get(url) 33 | time.sleep(20) 34 | # each page has 20 courses 35 | # coursename = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["coursename"])] 36 | # image = [item.get_attribute('src').encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["image_url"])] 37 | # provenance = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["provenance"][0])] 38 | # # provenance += [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(i for i in _x_query["provenance"[1]])] 39 | # specialize = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["specialize"])] 40 | # # store course info for each page 41 | 42 | 43 | # for i in range(len(coursename)): 44 | # coursedic[coursename[i]] = {"image": image[i], "provenance": provenance[i]} 45 | 46 | # track the links corresponding to 20 courses 47 | 48 | source_code = requests.get(url) 49 | plain_text = source_code.text 50 | soup = BeautifulSoup(plain_text, 'html.parser') 51 | courseTags = soup.find_all("div", "offering-content") 52 | page = [] 53 | for tag in courseTags: 54 | coursedic = {} 55 | coursedic["img"] = tag.select('img[src]')[0]['src'] 56 | coursedic["name"] = tag.select('h2')[0].text.decode('utf8').encode('ascii', errors='ignore') 57 | specialization = tag.select('.specialization-course-count') 58 | if len(specialization) != 0: 59 | coursedic["specialization"] = True 60 | else: 61 | coursedic["specialization"] = False 62 | if len(tag.find_all('span')) != 0: 63 | coursedic["provenance"] = tag.find_all('span')[-1].text.decode('utf8').encode('ascii', errors='ignore') 64 | else: 65 | print url 66 | page.append(coursedic) 67 | 68 | newpage = [] 69 | 70 | 71 | url = soup.select_one("script[type=application/ld+json]") 72 | if len(url) == 0: 73 | continue 74 | if len(url) != 0: 75 | url_list = json.loads(url.text) 76 | urls = [object['url'].encode('ascii','ignore') for object in url_list["itemListElement"]] 77 | urls = ["http:/" + url[4:] for url in urls] 78 | # urls of 20 courses respectively 79 | for i in range(len(urls)): 80 | print urls[i] 81 | browser.set_page_load_timeout(200000000) 82 | erroe = browser.get(urls[i]) 83 | print erroe 84 | browser.get(urls[i]) 85 | time.sleep(20) 86 | courseObject = page[i] 87 | courseObject["course_url"] = urls[i] 88 | # specialization 89 | source_code = requests.get(urls[i]) 90 | plain_text = source_code.text 91 | soup = BeautifulSoup(plain_text, 'html.parser') 92 | if 'specializations' in urls[i]: 93 | courseObject["courseSet"] = [] 94 | # if courseObject["specialization"]: 95 | # descriptions = browser.find_elements_by_xpath('//div[@class="description-cont"]/div/div/span') 96 | desc = soup.find_all("div", "description subsection") 97 | if len(desc) != 0: 98 | courseObject["description"] = soup.find_all("div", "description subsection")[0].text 99 | else: 100 | print urls[i] 101 | # provenance = browser.find_element_by_xpath('//p[@class = "headline-1-text created-by"]/following-sibling::*[1]/img')\ 102 | # .get_attribute('alt') 103 | # coursenames = browser.find_elements_by_xpath('//h2[@class="course-name headline-5-text"]') 104 | CourseTags = soup.find_all('section', 'rc-Course bgcolor-white') 105 | for tag in CourseTags: 106 | id += 1 107 | course = {} 108 | 109 | name = tag.find_all("h2", "course-name headline-5-text") 110 | if len(name) != 0: 111 | course["name"] = name[0].text.decode('utf8').encode('ascii', errors='ignore') 112 | else: 113 | print "no_name", urls[i] 114 | 115 | if len(tag.select('.description-cont')) != 0: 116 | course["description"] = tag.select('.description-cont')[0].text 117 | else: 118 | print "no_des", urls[i] 119 | course["id"] = "coursera" + str(id).zfill(5) 120 | courseObject["courseSet"].append(course) 121 | # descriptions = [item.text.encode('ascii','ignore') for item in descriptions] 122 | # cousenames = [item.text.encode('ascii','ignore') for item in cousenames] 123 | # coursedic["subject"] = cousenames 124 | else: 125 | id += 1 126 | desc = soup.find_all("p", "body-1-text course-description") 127 | if len(desc) != 0: 128 | courseObject["description"] = soup.find_all("p", "body-1-text course-description")[0].text 129 | else: 130 | print urls[i] 131 | # description = browser.find_element_by_xpath('//p[@class="body-1-text course-description"]') 132 | courseObject["id"] = "coursera" + str(id).zfill(5) 133 | # provenance = browser.find_element_by_xpath('//div[@class = "headline-1-text creator-names"]/span[2]') 134 | # cousename = browser.find_elements_by_xpath('//h1[@class="title display-3-text"]') 135 | # description = description.text.encode('ascii','ignore') 136 | newpage.append(courseObject) 137 | print newpage 138 | with open('Coursera_'+ keyword + str(j) + '.json', 'a') as f: 139 | json.dump(newpage, f) 140 | 141 | content += newpage 142 | with open('Coursera_'+ keyword + '_content' + '.json', 'a') as f: 143 | json.dump(content, f) 144 | 145 | browser.quit() 146 | 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /Coursera/Src/Crawl_Coursera.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import requests 3 | from bs4 import BeautifulSoup 4 | import json 5 | import time 6 | import sys 7 | reload(sys) 8 | sys.setdefaultencoding('utf-8') 9 | 10 | _x_query = { 11 | "coursename": "//h2[@class = 'color-primary-text headline-1-text flex-1']", 12 | "specialize": "//span[@class = 'specialization-course-count']/span", 13 | "image_url": "//div[@class='horizontal-box']/div/img", 14 | "provenance": ["//span[@class = 'text-light offering-partner-names']/span", "//div[@class = 'text-light offering-partner-names']/span"] 15 | } 16 | 17 | 18 | content = [] 19 | id = 0 20 | # 4-skip 5 21 | for j in range(0, 9): 22 | url = "https://www.coursera.org/courses?languages=en&query=web&start=" + str(j * 20) 23 | print url 24 | browser = webdriver.Firefox() 25 | browser.set_page_load_timeout(30) 26 | browser.get(url) 27 | time.sleep(20) 28 | # each page has 20 courses 29 | # coursename = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["coursename"])] 30 | # image = [item.get_attribute('src').encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["image_url"])] 31 | # provenance = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["provenance"][0])] 32 | # # provenance += [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(i for i in _x_query["provenance"[1]])] 33 | # specialize = [item.text.encode('ascii','ignore') for item in browser.find_elements_by_xpath(_x_query["specialize"])] 34 | # # store course info for each page 35 | 36 | 37 | # for i in range(len(coursename)): 38 | # coursedic[coursename[i]] = {"image": image[i], "provenance": provenance[i]} 39 | 40 | # track the links corresponding to 20 courses 41 | 42 | source_code = requests.get(url) 43 | plain_text = source_code.text 44 | soup = BeautifulSoup(plain_text, 'html.parser') 45 | courseTags = soup.find_all("div", "offering-content") 46 | page = [] 47 | for tag in courseTags: 48 | coursedic = {} 49 | coursedic["img"] = tag.select('img[src]')[0]['src'] 50 | coursedic["name"] = tag.select('h2')[0].text.decode('utf8').encode('ascii', errors='ignore') 51 | specialization = tag.select('.specialization-course-count') 52 | if len(specialization) != 0: 53 | coursedic["specialization"] = True 54 | else: 55 | coursedic["specialization"] = False 56 | coursedic["provenance"] = tag.find_all('span')[-1].text.decode('utf8').encode('ascii', errors='ignore') 57 | page.append(coursedic) 58 | 59 | 60 | url_list = json.loads(soup.select_one("script[type=application/ld+json]").text) 61 | urls = [object['url'].encode('ascii','ignore') for object in url_list["itemListElement"]] 62 | urls = ["http:/" + url[4:] for url in urls] 63 | # urls of 20 courses respectively 64 | # for i in range(len(urls)): 65 | # browser.set_page_load_timeout(60) 66 | # browser.get(urls[i]) 67 | # time.sleep(20) 68 | # courseObject = page[i] 69 | # courseObject["course_url"] = urls[i] 70 | # # specialization 71 | # source_code = requests.get(urls[i]) 72 | # plain_text = source_code.text 73 | # soup = BeautifulSoup(plain_text, 'html.parser') 74 | # if 'specializations' in urls[i]: 75 | # courseObject["courseSet"] = [] 76 | # # if courseObject["specialization"]: 77 | # # descriptions = browser.find_elements_by_xpath('//div[@class="description-cont"]/div/div/span') 78 | # desc = soup.find_all("div", "description subsection") 79 | # if len(desc) != 0: 80 | # courseObject["description"] = soup.find_all("div", "description subsection")[0].text 81 | # else: 82 | # print urls[i] 83 | # # provenance = browser.find_element_by_xpath('//p[@class = "headline-1-text created-by"]/following-sibling::*[1]/img')\ 84 | # # .get_attribute('alt') 85 | # # coursenames = browser.find_elements_by_xpath('//h2[@class="course-name headline-5-text"]') 86 | # CourseTags = soup.find_all('section', 'rc-Course bgcolor-white') 87 | # for tag in CourseTags: 88 | # id += 1 89 | # course = {} 90 | 91 | # name = tag.find_all("h2", "course-name headline-5-text") 92 | # if len(name) != 0: 93 | # course["name"] = name[0].text.decode('utf8').encode('ascii', errors='ignore') 94 | # else: 95 | # print "no_name", urls[i] 96 | 97 | # if len(tag.select('.description-cont')) != 0: 98 | # course["description"] = tag.select('.description-cont')[0].text 99 | # else: 100 | # print "no_des", urls[i] 101 | # course["id"] = "coursera" + str(id).zfill(5) 102 | # courseObject["courseSet"].append(course) 103 | # # descriptions = [item.text.encode('ascii','ignore') for item in descriptions] 104 | # # cousenames = [item.text.encode('ascii','ignore') for item in cousenames] 105 | # # coursedic["subject"] = cousenames 106 | # else: 107 | # id += 1 108 | # desc = soup.find_all("p", "body-1-text course-description") 109 | # if len(desc) != 0: 110 | # courseObject["description"] = soup.find_all("p", "body-1-text course-description")[0].text 111 | # else: 112 | # print urls[i] 113 | # # description = browser.find_element_by_xpath('//p[@class="body-1-text course-description"]') 114 | # courseObject["id"] = "coursera" + str(id).zfill(5) 115 | # # provenance = browser.find_element_by_xpath('//div[@class = "headline-1-text creator-names"]/span[2]') 116 | # # cousename = browser.find_elements_by_xpath('//h1[@class="title display-3-text"]') 117 | # # description = description.text.encode('ascii','ignore') 118 | page = url_list 119 | # with open('Coursera_web'+ str(j) + '.json', 'a') as f: 120 | # json.dump(page, f) 121 | 122 | content += page 123 | with open('Coursera_web'+ '_content' + '.json', 'a') as f: 124 | json.dump(content, f) 125 | 126 | browser.quit() 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /Coursera/Src/troubleshoot.md: -------------------------------------------------------------------------------- 1 | \#\# Using Selenium Webdriver MacOS 2 | 3 | 1. Install Selenium: `sudo pip install selenium` 4 | 5 | 6 | 2. Downloads geckodriver (or chromedriber) 7 | 8 | e.g. `Downloads/geckodriver` 9 | 10 | 3. Find the directory of webdriver/chrome (safari, firefox) 11 | 12 | * open unix system directory: finder, hit (command + shift + G) 13 | 14 | * Trick: hit again: `pip install selenium`, terminal will give you: 15 | 16 | "Requirement already satisfied: selenium in /Library/Python/2.7/site-packages" 17 | 18 | * Go to selenium folder find web driver, drill down to Firefox 19 | 20 | 21 | 4. Add path for webdriver and geckodriver: 22 | 23 | * `vim ~/.bash_profile` 24 | 25 | `PATH = '/Library/Python/2.7/site-packages/selenium/webdriver/firefox/:/Users/fp/Downloads/Driver' 26 | `````export PATH```` 27 | 28 | * `source ~/.bash_profile` 29 | 30 | 5. Call Selenium Webdriver in Python: 31 | 32 | ```Python 33 | from selenium import webdriver 34 | driver = webdriver.Chrome() 35 | ``` -------------------------------------------------------------------------------- /Jupyter/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import json\n", 19 | "f = open(\"merged_all_no_duplicates-csv.json\", \"r\")\n", 20 | "dict_reports = f.read()\n", 21 | "f.close()" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "table = pd.read_json(dict_reports)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 4, 36 | "metadata": { 37 | "scrolled": true 38 | }, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/html": [ 43 | "
\n", 44 | "\n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | "
BODY & INTERIOR QUALITY - DESIGNBODY & INTERIOR QUALITY - MECHANICALFEATURES & ACCESSORIES QUALITY - DESIGNFEATURES & ACCESSORIES QUALITY - MECHANICALMileageOVERALL QUALITY - DESIGNOVERALL QUALITY - MECHANICALPOWERTRAIN QUALITY - DESIGNPOWERTRAIN QUALITY - MECHANICALPrice_Dealers_RetailPrice_Private_SalePrice_Trade-InSummary_AccidentsSummary_Junk_SalvageSummary_RecallsSummary_TheftSummary_Title_Problemsyear
count11054.00000011054.00000011054.00000011054.00000015892.00000011054.00000011054.00000011054.00000011054.00000013280.00000013280.00000013280.00000015936.00000015936.00000015936.00000015936.00000015936.00000015940.000000
mean3.4093993.4031573.6237113.49371342497.7013593.5414783.3782793.3351733.22046316035.22665714560.08644612882.3277860.0117970.0021343.3670930.0001260.0013812014.139460
std0.9292350.8466290.8965830.95432231768.7872550.9778710.8901470.9340020.89297611067.91524910724.77445910232.9560410.1329780.0734333.1581520.0158430.0513213.025925
min0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000002000.0000001063.000000500.0000000.0000000.0000000.0000000.0000000.0000001990.000000
25%3.0000003.0000003.0000003.00000023142.2500003.0000003.0000002.5000002.50000012006.00000010459.0000008837.0000000.0000000.0000001.0000000.0000000.0000002014.000000
50%3.5000003.5000003.5000003.50000035888.0000003.5000003.5000003.5000003.00000014930.50000013591.00000011800.0000000.0000000.0000003.0000000.0000000.0000002015.000000
75%4.0000004.0000004.5000004.50000049672.2500004.5000004.0000004.0000004.00000017868.00000016421.00000014766.0000000.0000000.0000005.0000000.0000000.0000002016.000000
max5.0000005.0000005.0000005.000000722414.0000005.0000005.0000005.0000005.000000782387.000000746898.000000703523.0000004.0000004.00000023.0000002.0000004.0000002018.000000
\n", 239 | "
" 240 | ], 241 | "text/plain": [ 242 | " BODY & INTERIOR QUALITY - DESIGN BODY & INTERIOR QUALITY - MECHANICAL \\\n", 243 | "count 11054.000000 11054.000000 \n", 244 | "mean 3.409399 3.403157 \n", 245 | "std 0.929235 0.846629 \n", 246 | "min 0.000000 0.000000 \n", 247 | "25% 3.000000 3.000000 \n", 248 | "50% 3.500000 3.500000 \n", 249 | "75% 4.000000 4.000000 \n", 250 | "max 5.000000 5.000000 \n", 251 | "\n", 252 | " FEATURES & ACCESSORIES QUALITY - DESIGN \\\n", 253 | "count 11054.000000 \n", 254 | "mean 3.623711 \n", 255 | "std 0.896583 \n", 256 | "min 0.000000 \n", 257 | "25% 3.000000 \n", 258 | "50% 3.500000 \n", 259 | "75% 4.500000 \n", 260 | "max 5.000000 \n", 261 | "\n", 262 | " FEATURES & ACCESSORIES QUALITY - MECHANICAL Mileage \\\n", 263 | "count 11054.000000 15892.000000 \n", 264 | "mean 3.493713 42497.701359 \n", 265 | "std 0.954322 31768.787255 \n", 266 | "min 0.000000 0.000000 \n", 267 | "25% 3.000000 23142.250000 \n", 268 | "50% 3.500000 35888.000000 \n", 269 | "75% 4.500000 49672.250000 \n", 270 | "max 5.000000 722414.000000 \n", 271 | "\n", 272 | " OVERALL QUALITY - DESIGN OVERALL QUALITY - MECHANICAL \\\n", 273 | "count 11054.000000 11054.000000 \n", 274 | "mean 3.541478 3.378279 \n", 275 | "std 0.977871 0.890147 \n", 276 | "min 0.000000 0.000000 \n", 277 | "25% 3.000000 3.000000 \n", 278 | "50% 3.500000 3.500000 \n", 279 | "75% 4.500000 4.000000 \n", 280 | "max 5.000000 5.000000 \n", 281 | "\n", 282 | " POWERTRAIN QUALITY - DESIGN POWERTRAIN QUALITY - MECHANICAL \\\n", 283 | "count 11054.000000 11054.000000 \n", 284 | "mean 3.335173 3.220463 \n", 285 | "std 0.934002 0.892976 \n", 286 | "min 0.000000 0.000000 \n", 287 | "25% 2.500000 2.500000 \n", 288 | "50% 3.500000 3.000000 \n", 289 | "75% 4.000000 4.000000 \n", 290 | "max 5.000000 5.000000 \n", 291 | "\n", 292 | " Price_Dealers_Retail Price_Private_Sale Price_Trade-In \\\n", 293 | "count 13280.000000 13280.000000 13280.000000 \n", 294 | "mean 16035.226657 14560.086446 12882.327786 \n", 295 | "std 11067.915249 10724.774459 10232.956041 \n", 296 | "min 2000.000000 1063.000000 500.000000 \n", 297 | "25% 12006.000000 10459.000000 8837.000000 \n", 298 | "50% 14930.500000 13591.000000 11800.000000 \n", 299 | "75% 17868.000000 16421.000000 14766.000000 \n", 300 | "max 782387.000000 746898.000000 703523.000000 \n", 301 | "\n", 302 | " Summary_Accidents Summary_Junk_Salvage Summary_Recalls \\\n", 303 | "count 15936.000000 15936.000000 15936.000000 \n", 304 | "mean 0.011797 0.002134 3.367093 \n", 305 | "std 0.132978 0.073433 3.158152 \n", 306 | "min 0.000000 0.000000 0.000000 \n", 307 | "25% 0.000000 0.000000 1.000000 \n", 308 | "50% 0.000000 0.000000 3.000000 \n", 309 | "75% 0.000000 0.000000 5.000000 \n", 310 | "max 4.000000 4.000000 23.000000 \n", 311 | "\n", 312 | " Summary_Theft Summary_Title_Problems year \n", 313 | "count 15936.000000 15936.000000 15940.000000 \n", 314 | "mean 0.000126 0.001381 2014.139460 \n", 315 | "std 0.015843 0.051321 3.025925 \n", 316 | "min 0.000000 0.000000 1990.000000 \n", 317 | "25% 0.000000 0.000000 2014.000000 \n", 318 | "50% 0.000000 0.000000 2015.000000 \n", 319 | "75% 0.000000 0.000000 2016.000000 \n", 320 | "max 2.000000 4.000000 2018.000000 " 321 | ] 322 | }, 323 | "execution_count": 4, 324 | "metadata": {}, 325 | "output_type": "execute_result" 326 | } 327 | ], 328 | "source": [ 329 | "table.describe()" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 5, 335 | "metadata": { 336 | "scrolled": true 337 | }, 338 | "outputs": [ 339 | { 340 | "data": { 341 | "text/plain": [ 342 | "array([u'BODY & INTERIOR QUALITY - DESIGN',\n", 343 | " u'BODY & INTERIOR QUALITY - MECHANICAL', u'City MPG', u'Drive Type',\n", 344 | " u'Engine', u'Exterior Color',\n", 345 | " u'FEATURES & ACCESSORIES QUALITY - DESIGN',\n", 346 | " u'FEATURES & ACCESSORIES QUALITY - MECHANICAL', u'Fuel Type',\n", 347 | " u'Hwy MPG', u'Interior Color', u'Issues_Detail', u'Mileage',\n", 348 | " u'OVERALL QUALITY - DESIGN', u'OVERALL QUALITY - MECHANICAL',\n", 349 | " u'Odometer_Mileage', u'POWERTRAIN QUALITY - DESIGN',\n", 350 | " u'POWERTRAIN QUALITY - MECHANICAL', u'Price_Dealers_Retail',\n", 351 | " u'Price_Private_Sale', u'Price_Trade-In', u'Recalls_Detail',\n", 352 | " u'Summary_Accidents', u'Summary_Junk_Salvage', u'Summary_Recalls',\n", 353 | " u'Summary_Theft', u'Summary_Title_Problems', u'Title_Problem_Check',\n", 354 | " u'Total Seating', u'Transmission', u'Trim', u'Vehicle_Use',\n", 355 | " u'Warranty', u'Year_Make_Model', u'address', u'make', u'model',\n", 356 | " u'original-model', u'price', u'space', u'vin', u'year'], dtype=object)" 357 | ] 358 | }, 359 | "execution_count": 5, 360 | "metadata": {}, 361 | "output_type": "execute_result" 362 | } 363 | ], 364 | "source": [ 365 | "table.columns.values" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 6, 371 | "metadata": {}, 372 | "outputs": [], 373 | "source": [ 374 | "import numpy as np\n", 375 | "table['price'] = table['price'].replace('Unknown', np.nan)" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 11, 381 | "metadata": {}, 382 | "outputs": [ 383 | { 384 | "data": { 385 | "text/plain": [ 386 | "count 15488\n", 387 | "unique 3739\n", 388 | "top 13995\n", 389 | "freq 132\n", 390 | "Name: price, dtype: object" 391 | ] 392 | }, 393 | "execution_count": 11, 394 | "metadata": {}, 395 | "output_type": "execute_result" 396 | } 397 | ], 398 | "source": [ 399 | "table['price'].describe()" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 12, 405 | "metadata": {}, 406 | "outputs": [], 407 | "source": [ 408 | "table['price'] = pd.to_numeric(table['price'])" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 13, 414 | "metadata": {}, 415 | "outputs": [ 416 | { 417 | "data": { 418 | "text/plain": [ 419 | "count 15488.000000\n", 420 | "mean 15706.463197\n", 421 | "std 11499.161402\n", 422 | "min 788.000000\n", 423 | "25% 11980.000000\n", 424 | "50% 14300.000000\n", 425 | "75% 16991.000000\n", 426 | "max 437880.000000\n", 427 | "Name: price, dtype: float64" 428 | ] 429 | }, 430 | "execution_count": 13, 431 | "metadata": {}, 432 | "output_type": "execute_result" 433 | } 434 | ], 435 | "source": [ 436 | "table['price'].describe()" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": 16, 442 | "metadata": {}, 443 | "outputs": [ 444 | { 445 | "data": { 446 | "text/plain": [ 447 | "count 15936.000000\n", 448 | "mean 3.367093\n", 449 | "std 3.158152\n", 450 | "min 0.000000\n", 451 | "25% 1.000000\n", 452 | "50% 3.000000\n", 453 | "75% 5.000000\n", 454 | "max 23.000000\n", 455 | "Name: Summary_Recalls, dtype: float64" 456 | ] 457 | }, 458 | "execution_count": 16, 459 | "metadata": {}, 460 | "output_type": "execute_result" 461 | } 462 | ], 463 | "source": [ 464 | "table['Summary_Recalls'].describe()" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 19, 470 | "metadata": {}, 471 | "outputs": [], 472 | "source": [ 473 | "table['price_diff_per'] = (table['price']-table['Price_Dealers_Retail'])/table['Price_Dealers_Retail']" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": 20, 479 | "metadata": {}, 480 | "outputs": [ 481 | { 482 | "data": { 483 | "text/plain": [ 484 | "count 12907.000000\n", 485 | "mean -0.033474\n", 486 | "std 0.268332\n", 487 | "min -0.757463\n", 488 | "25% -0.176401\n", 489 | "50% -0.058069\n", 490 | "75% 0.075218\n", 491 | "max 11.281399\n", 492 | "Name: price_diff_per, dtype: float64" 493 | ] 494 | }, 495 | "execution_count": 20, 496 | "metadata": {}, 497 | "output_type": "execute_result" 498 | } 499 | ], 500 | "source": [ 501 | "table['price_diff_per'].describe()" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": 33, 507 | "metadata": {}, 508 | "outputs": [], 509 | "source": [ 510 | "table['price_score'] = ((table['price_diff_per'].apply(lambda x: 1 if x >= 1 else x)) - table['price_diff_per'].min()) / (1 - table['price_diff_per'].min())" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 43, 516 | "metadata": {}, 517 | "outputs": [], 518 | "source": [ 519 | "table['price_score'] = 1 - table['price_score'] # reverse scale" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 44, 525 | "metadata": { 526 | "scrolled": true 527 | }, 528 | "outputs": [ 529 | { 530 | "data": { 531 | "text/html": [ 532 | "
\n", 533 | "\n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | "
price_diff_perprice_score
0-0.2796770.728139
10.1930930.459132
20.0127860.561727
30.0903770.517577
4-0.2917090.734985
50.0416670.545294
6NaNNaN
7-0.1575330.658639
8-0.0601680.603238
9-0.0079370.573518
100.6153350.218875
11-0.1712960.666470
120.0559810.537149
13-0.1637420.662172
14-0.1252130.640248
15NaNNaN
16-0.0575400.601742
17NaNNaN
18-0.1122160.632853
19NaNNaN
200.0512980.539813
21-0.1827980.673015
22NaNNaN
23-0.2110160.689071
240.0703980.528945
25NaNNaN
26-0.0648630.605909
27NaNNaN
280.1595240.478233
290.0503970.540326
.........
15910-0.1221390.638499
159110.0830350.521755
159120.1263490.497109
15913-0.0434370.593718
159140.1396880.489519
15915-0.1618210.661078
15916-0.0648380.605895
15917NaNNaN
159180.2587110.421795
159190.1445780.486737
159200.5383140.262701
15921-0.1370680.646994
159220.0946570.515142
15923-0.0551160.600363
159240.1711630.471610
15925-0.0513970.598247
159260.2310170.437553
15927-0.2396010.705335
15928-0.1033310.627797
159290.1304180.494794
159300.0305840.551600
159310.0209010.557109
15932NaNNaN
15933-0.0732090.610658
159340.6491120.199656
159351.7648940.000000
15936-0.0502770.597610
15937NaNNaN
159380.0623630.533517
15939NaNNaN
\n", 849 | "

15940 rows × 2 columns

\n", 850 | "
" 851 | ], 852 | "text/plain": [ 853 | " price_diff_per price_score\n", 854 | "0 -0.279677 0.728139\n", 855 | "1 0.193093 0.459132\n", 856 | "2 0.012786 0.561727\n", 857 | "3 0.090377 0.517577\n", 858 | "4 -0.291709 0.734985\n", 859 | "5 0.041667 0.545294\n", 860 | "6 NaN NaN\n", 861 | "7 -0.157533 0.658639\n", 862 | "8 -0.060168 0.603238\n", 863 | "9 -0.007937 0.573518\n", 864 | "10 0.615335 0.218875\n", 865 | "11 -0.171296 0.666470\n", 866 | "12 0.055981 0.537149\n", 867 | "13 -0.163742 0.662172\n", 868 | "14 -0.125213 0.640248\n", 869 | "15 NaN NaN\n", 870 | "16 -0.057540 0.601742\n", 871 | "17 NaN NaN\n", 872 | "18 -0.112216 0.632853\n", 873 | "19 NaN NaN\n", 874 | "20 0.051298 0.539813\n", 875 | "21 -0.182798 0.673015\n", 876 | "22 NaN NaN\n", 877 | "23 -0.211016 0.689071\n", 878 | "24 0.070398 0.528945\n", 879 | "25 NaN NaN\n", 880 | "26 -0.064863 0.605909\n", 881 | "27 NaN NaN\n", 882 | "28 0.159524 0.478233\n", 883 | "29 0.050397 0.540326\n", 884 | "... ... ...\n", 885 | "15910 -0.122139 0.638499\n", 886 | "15911 0.083035 0.521755\n", 887 | "15912 0.126349 0.497109\n", 888 | "15913 -0.043437 0.593718\n", 889 | "15914 0.139688 0.489519\n", 890 | "15915 -0.161821 0.661078\n", 891 | "15916 -0.064838 0.605895\n", 892 | "15917 NaN NaN\n", 893 | "15918 0.258711 0.421795\n", 894 | "15919 0.144578 0.486737\n", 895 | "15920 0.538314 0.262701\n", 896 | "15921 -0.137068 0.646994\n", 897 | "15922 0.094657 0.515142\n", 898 | "15923 -0.055116 0.600363\n", 899 | "15924 0.171163 0.471610\n", 900 | "15925 -0.051397 0.598247\n", 901 | "15926 0.231017 0.437553\n", 902 | "15927 -0.239601 0.705335\n", 903 | "15928 -0.103331 0.627797\n", 904 | "15929 0.130418 0.494794\n", 905 | "15930 0.030584 0.551600\n", 906 | "15931 0.020901 0.557109\n", 907 | "15932 NaN NaN\n", 908 | "15933 -0.073209 0.610658\n", 909 | "15934 0.649112 0.199656\n", 910 | "15935 1.764894 0.000000\n", 911 | "15936 -0.050277 0.597610\n", 912 | "15937 NaN NaN\n", 913 | "15938 0.062363 0.533517\n", 914 | "15939 NaN NaN\n", 915 | "\n", 916 | "[15940 rows x 2 columns]" 917 | ] 918 | }, 919 | "execution_count": 44, 920 | "metadata": {}, 921 | "output_type": "execute_result" 922 | } 923 | ], 924 | "source": [ 925 | "table[['price_diff_per', 'price_score']]" 926 | ] 927 | }, 928 | { 929 | "cell_type": "code", 930 | "execution_count": 45, 931 | "metadata": {}, 932 | "outputs": [], 933 | "source": [ 934 | "table['mileage_year'] = table['Mileage'] / (2017 - table['year'].apply(lambda x: 2017 if x >= 2017 else x) + 1) \n", 935 | "# maybe year 2018 model" 936 | ] 937 | }, 938 | { 939 | "cell_type": "code", 940 | "execution_count": 60, 941 | "metadata": {}, 942 | "outputs": [], 943 | "source": [ 944 | "table['mileage_year_score'] = 1 - (table['mileage_year'].apply(lambda x: 20000if x>=20000 else x) / 20000)" 945 | ] 946 | }, 947 | { 948 | "cell_type": "code", 949 | "execution_count": 64, 950 | "metadata": {}, 951 | "outputs": [ 952 | { 953 | "data": { 954 | "text/plain": [ 955 | "count 15892.000000\n", 956 | "mean 0.412694\n", 957 | "std 0.274199\n", 958 | "min 0.000000\n", 959 | "25% 0.173877\n", 960 | "50% 0.449877\n", 961 | "75% 0.620792\n", 962 | "max 1.000000\n", 963 | "Name: mileage_year_score, dtype: float64" 964 | ] 965 | }, 966 | "execution_count": 64, 967 | "metadata": {}, 968 | "output_type": "execute_result" 969 | } 970 | ], 971 | "source": [ 972 | "table['mileage_year_score'].describe()" 973 | ] 974 | }, 975 | { 976 | "cell_type": "code", 977 | "execution_count": 40, 978 | "metadata": {}, 979 | "outputs": [ 980 | { 981 | "data": { 982 | "text/plain": [ 983 | "count 15940.000000\n", 984 | "mean 2014.139460\n", 985 | "std 3.025925\n", 986 | "min 1990.000000\n", 987 | "25% 2014.000000\n", 988 | "50% 2015.000000\n", 989 | "75% 2016.000000\n", 990 | "max 2018.000000\n", 991 | "Name: year, dtype: float64" 992 | ] 993 | }, 994 | "execution_count": 40, 995 | "metadata": {}, 996 | "output_type": "execute_result" 997 | } 998 | ], 999 | "source": [ 1000 | "table['year'].describe()" 1001 | ] 1002 | }, 1003 | { 1004 | "cell_type": "code", 1005 | "execution_count": 47, 1006 | "metadata": {}, 1007 | "outputs": [], 1008 | "source": [ 1009 | "df2 = pd.read_csv(\"truecar_url.csv\")\n", 1010 | "df1 = pd.read_csv(\"edmunds_url.csv\")\n", 1011 | "urls = pd.concat([df1, df2])" 1012 | ] 1013 | }, 1014 | { 1015 | "cell_type": "code", 1016 | "execution_count": 53, 1017 | "metadata": {}, 1018 | "outputs": [], 1019 | "source": [ 1020 | "table = table.join(urls.set_index('vin'), on='vin')" 1021 | ] 1022 | }, 1023 | { 1024 | "cell_type": "code", 1025 | "execution_count": 58, 1026 | "metadata": { 1027 | "scrolled": false 1028 | }, 1029 | "outputs": [], 1030 | "source": [ 1031 | "table = table.drop_duplicates(subset=['vin'])" 1032 | ] 1033 | }, 1034 | { 1035 | "cell_type": "code", 1036 | "execution_count": 69, 1037 | "metadata": {}, 1038 | "outputs": [], 1039 | "source": [ 1040 | "table['recall_score'] = 1 - (table['Summary_Recalls']/(table['Summary_Recalls'].max() - table['Summary_Recalls'].min()))" 1041 | ] 1042 | }, 1043 | { 1044 | "cell_type": "code", 1045 | "execution_count": 72, 1046 | "metadata": { 1047 | "scrolled": true 1048 | }, 1049 | "outputs": [], 1050 | "source": [ 1051 | "table['rating_score'] = (table['BODY & INTERIOR QUALITY - DESIGN'] + table['BODY & INTERIOR QUALITY - MECHANICAL'] + table['FEATURES & ACCESSORIES QUALITY - DESIGN'] + table['FEATURES & ACCESSORIES QUALITY - MECHANICAL'] + table['OVERALL QUALITY - DESIGN'] + table['OVERALL QUALITY - MECHANICAL'] + table['POWERTRAIN QUALITY - DESIGN'] + table['POWERTRAIN QUALITY - MECHANICAL']) / 40" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "code", 1056 | "execution_count": 74, 1057 | "metadata": {}, 1058 | "outputs": [ 1059 | { 1060 | "data": { 1061 | "text/plain": [ 1062 | "count 11054.000000\n", 1063 | "mean 0.685134\n", 1064 | "std 0.118719\n", 1065 | "min 0.000000\n", 1066 | "25% 0.600000\n", 1067 | "50% 0.700000\n", 1068 | "75% 0.775000\n", 1069 | "max 0.975000\n", 1070 | "Name: rating_score, dtype: float64" 1071 | ] 1072 | }, 1073 | "execution_count": 74, 1074 | "metadata": {}, 1075 | "output_type": "execute_result" 1076 | } 1077 | ], 1078 | "source": [ 1079 | "table['rating_score'].describe()" 1080 | ] 1081 | }, 1082 | { 1083 | "cell_type": "code", 1084 | "execution_count": 76, 1085 | "metadata": {}, 1086 | "outputs": [], 1087 | "source": [ 1088 | "table['score'] = table['rating_score'].apply(lambda x: 0 if x is np.nan else x) * 20 + \\\n", 1089 | "table['recall_score'].apply(lambda x: 0 if x is np.nan else x) * 10 + \\\n", 1090 | "table['mileage_year_score'].apply(lambda x: 0 if x is np.nan else x) * 30 + \\\n", 1091 | "table['price_score'].apply(lambda x: 0 if x is np.nan else x) * 40" 1092 | ] 1093 | }, 1094 | { 1095 | "cell_type": "code", 1096 | "execution_count": 78, 1097 | "metadata": {}, 1098 | "outputs": [ 1099 | { 1100 | "data": { 1101 | "text/plain": [ 1102 | "count 10398.000000\n", 1103 | "mean 58.392275\n", 1104 | "std 8.031637\n", 1105 | "min 31.169745\n", 1106 | "25% 52.439792\n", 1107 | "50% 58.425033\n", 1108 | "75% 64.259776\n", 1109 | "max 84.976546\n", 1110 | "Name: score, dtype: float64" 1111 | ] 1112 | }, 1113 | "execution_count": 78, 1114 | "metadata": {}, 1115 | "output_type": "execute_result" 1116 | } 1117 | ], 1118 | "source": [ 1119 | "table['score'].describe()" 1120 | ] 1121 | }, 1122 | { 1123 | "cell_type": "code", 1124 | "execution_count": null, 1125 | "metadata": {}, 1126 | "outputs": [], 1127 | "source": [] 1128 | } 1129 | ], 1130 | "metadata": { 1131 | "kernelspec": { 1132 | "display_name": "Python 2", 1133 | "language": "python", 1134 | "name": "python2" 1135 | }, 1136 | "language_info": { 1137 | "codemirror_mode": { 1138 | "name": "ipython", 1139 | "version": 2 1140 | }, 1141 | "file_extension": ".py", 1142 | "mimetype": "text/x-python", 1143 | "name": "python", 1144 | "nbconvert_exporter": "python", 1145 | "pygments_lexer": "ipython2", 1146 | "version": "2.7.10" 1147 | } 1148 | }, 1149 | "nbformat": 4, 1150 | "nbformat_minor": 2 1151 | } 1152 | -------------------------------------------------------------------------------- /Jupyter/__pycache__/neo.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rpedsel/CoursePlanner/7e1aae619740fc6c8eb36b7cb6983aa984e5d503/Jupyter/__pycache__/neo.cpython-36.pyc -------------------------------------------------------------------------------- /Jupyter/assets/index.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 47 | -------------------------------------------------------------------------------- /Jupyter/cypher.py: -------------------------------------------------------------------------------- 1 | from py2neo import Graph 2 | graph = Graph("http://localhost:7474/db/data/") 3 | # remove self loop 4 | graph.data("match (n)-[r]->(n) delete r") 5 | # return graph 6 | graph.data("MATCH p = ()-[r:UnderProgram]->(n {name: program_name}) RETURN p") 7 | # match (n)-[r:HasDuplicate]->(n) delete r 8 | # MATCH (n {name: 'Alice'})->(m) 9 | # "Accounting (BS)" 10 | MATCH p = ()-[r: UnderProgram]->(n {name: "Accounting (BS)"}) RETURN p 11 | 12 | MATCH (m)-[r:UnderProgram]->(n {name: 'Accounting (BS)'}) 13 | where (m)-[r:HasPreparation]->() 14 | 15 | 16 | graph_3.data("MATCH (m)-[r:UnderProgram]->(n {name: 'Accounting (BS)'})\ 17 | where not (m)-[r:HasPreparation]->() and not (m)-[r:HasPrerequisite]->()\ 18 | return m") 19 | 20 | 21 | 22 | MATCH (k)-[r*]->(n:ABC) 23 | with k, r, n, count(k) 24 | 25 | import pandas as pd 26 | courses = list() 27 | courselist = graph_3.data("MATCH p = (m)-[r:UnderProgram]->(n {name: 'Accounting (BS)'}) return p") 28 | for course in courselist: 29 | graph_3.data("Match ") 30 | -------------------------------------------------------------------------------- /Jupyter/data (2).json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "code":"BUAD 280", 4 | "name":"BUAD 280 Introduction to Financial Accounting", 5 | "mooc":"Accounting: Principles of Financial Accounting", 6 | "url":"http://www.coursera.org/specializations/foundations-management", 7 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/f2/fdfaf0f9a511e6a5f4effa0e2c4d64/investigaci_n-de-mercados-y-comportamiento.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 8 | }, 9 | { 10 | "code":"MATH 118", 11 | "name":"MATH 118gx Fundamental Principles of Calculus", 12 | "mooc":"", 13 | "url":"", 14 | "image":"" 15 | }, 16 | { 17 | "code":"MATH 125", 18 | "name":"MATH 125g Calculus I", 19 | "mooc":"Single Variable Calculus", 20 | "url":"http://www.coursera.org/learn/discrete-calculus", 21 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/05/cda840977511e5aa161903ab17f92e/CSV-logo-chapter-5-copy.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 22 | }, 23 | { 24 | "code":"ECON 351", 25 | "name":"ECON 351x Microeconomics for Business", 26 | "mooc":"Strategic Business Management - Microeconomics", 27 | "url":"http://www.coursera.org/learn/parprog1", 28 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/06/ab77207de611e78b8cbf6b7d2487ac/image-1.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 29 | }, 30 | { 31 | "code":"BUAD 306", 32 | "name":"BUAD 306 Business Finance", 33 | "mooc":"Business English: Finance and Economics", 34 | "url":"http://www.coursera.org/specializations/english-for-business", 35 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/4c/868cb0658911e6892f017139b2b060/coursera-ma-thumbnail.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 36 | }, 37 | { 38 | "code":"BUAD 281", 39 | "name":"BUAD 281 Introduction to Managerial Accounting", 40 | "mooc":"Managerial Accounting: Cost Behaviors, Systems, and Analysis", 41 | "url":"http://www.coursera.org/specializations/value-chain-management", 42 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d396qusza40orc.cloudfront.net/phoenixassets/learn-finance/BNY_Specialization_Banner_logo.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 43 | }, 44 | { 45 | "code":"ACCT 370", 46 | "name":"ACCT 370 External Financial Reporting Issues", 47 | "mooc":"", 48 | "url":"", 49 | "image":"" 50 | }, 51 | { 52 | "code":"ACCT 371", 53 | "name":"ACCT 371 Introduction to Accounting Systems", 54 | "mooc":"Accounting and Finance for IT professionals", 55 | "url":"http://www.coursera.org/specializations/business-technology-managment", 56 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d15cw65ipctsrr.cloudfront.net/dd/7ae3d0bd8411e6834f83bd22b5a2e6/PositivePsychology_MOOC_Icons_Specialization.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 57 | }, 58 | { 59 | "code":"ACCT 373", 60 | "name":"ACCT 373 Introduction to Auditing and Assurance Services", 61 | "mooc":"", 62 | "url":"", 63 | "image":"" 64 | }, 65 | { 66 | "code":"ACCT 377", 67 | "name":"ACCT 377 Valuation for Financial Statement Purposes", 68 | "mooc":"", 69 | "url":"", 70 | "image":"" 71 | }, 72 | { 73 | "code":"ACCT 416", 74 | "name":"ACCT 416 Financial Reporting and Analysis", 75 | "mooc":"More Introduction to Financial Accounting", 76 | "url":"http://www.coursera.org/learn/wharton-financial-accounting", 77 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/3e/b505c0ee7611e4890b43504b150d21/online_learning_slide_vAccounting.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 78 | }, 79 | { 80 | "code":"ACCT 462", 81 | "name":"ACCT 462 Detecting Fraudulent Financial Reporting", 82 | "mooc":"", 83 | "url":"", 84 | "image":"" 85 | }, 86 | { 87 | "code":"ACCT 476", 88 | "name":"ACCT 476 Performance Measurement Issues", 89 | "mooc":"Measurement and data ", 90 | "url":"https://www.khanacademy.org/math/cc-2nd-grade-math/cc-2nd-measurement-data", 91 | "image":"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTWVTAsQKuMSTnREno6zNLCslVti6JN7G9RHpDFXaikAiF36rquQakV1igF" 92 | }, 93 | { 94 | "code":"BUAD 302", 95 | "name":"BUAD 302T Communication Strategy in Accounting", 96 | "mooc":"", 97 | "url":"", 98 | "image":"" 99 | }, 100 | { 101 | "code":"BUAD 307", 102 | "name":"BUAD 307 Marketing Fundamentals", 103 | "mooc":"Digital Marketing Fundamentals", 104 | "url":"https://www.udacity.com/course/digital-marketing-fundamentals--cx11", 105 | "image":"https://s3-us-west-1.amazonaws.com/udacity-content/course-images/cx11-e152d95.jpg" 106 | }, 107 | { 108 | "code":"ACCT 477", 109 | "name":"ACCT 477 Intermediate Fair Value Issues in Accounting", 110 | "mooc":"", 111 | "url":"", 112 | "image":"" 113 | }, 114 | { 115 | "code":"ACCT 478", 116 | "name":"ACCT 478 Accounting Systems Design", 117 | "mooc":"More Introduction to Financial Accounting", 118 | "url":"http://www.coursera.org/learn/wharton-financial-accounting", 119 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/3e/b505c0ee7611e4890b43504b150d21/online_learning_slide_vAccounting.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 120 | }, 121 | { 122 | "code":"ACCT 473", 123 | "name":"ACCT 473 Financial Statement Auditing", 124 | "mooc":"", 125 | "url":"", 126 | "image":"" 127 | }, 128 | { 129 | "code":"ACCT 475", 130 | "name":"ACCT 475 Systems Security and Audit", 131 | "mooc":"Cyber-Physical Systems Security", 132 | "url":"https://www.udacity.com/course/cyber-physical-systems-security--ud279", 133 | "image":"https://lh3.googleusercontent.com/pjV2kaEQlQuzZSTR8YwIYyDieRxdfH6i0QfGzSCI1iTawtnhYJmbn2RC0rXdsnSGf6FaoIuLHE11NyFL=s0#w=1280&h=720=s276#w=1724&h=1060" 134 | }, 135 | { 136 | "code":"ACCT 463", 137 | "name":"ACCT 463 Internal Audit", 138 | "mooc":"", 139 | "url":"", 140 | "image":"" 141 | }, 142 | { 143 | "code":"BUAD 304", 144 | "name":"BUAD 304 Organizational Behavior and Leadership", 145 | "mooc":"International Leadership and Organizational Behavior", 146 | "url":"http://www.coursera.org/learn/academic-discussion-english", 147 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/62/58d990f18cd48083251c3e57fe9e7e/Untitled-1.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 148 | }, 149 | { 150 | "code":"BUAD 497", 151 | "name":"BUAD 497 Strategic Management", 152 | "mooc":"Strategic Business Management - Macroeconomics", 153 | "url":"http://www.coursera.org/learn/trigonometry", 154 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/d3/d90f107de511e7b945f75a7283d500/image-2.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 155 | }, 156 | { 157 | "code":"ACCT 479", 158 | "name":"ACCT 479 Accounting Systems Development", 159 | "mooc":"Accounting Analytics", 160 | "url":"http://www.coursera.org/specializations/business-analytics", 161 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/ef/485800fcfd11e6b4cc3359e41f40e9/Pricing-Strategy.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 162 | }, 163 | { 164 | "code":"ACCT 374", 165 | "name":"ACCT 374 Introduction to Tax Issues", 166 | "mooc":"", 167 | "url":"", 168 | "image":"" 169 | }, 170 | { 171 | "code":"ACCT 474", 172 | "name":"ACCT 474 Tax Issues for Business", 173 | "mooc":"", 174 | "url":"", 175 | "image":"" 176 | }, 177 | { 178 | "code":"ACCT 470", 179 | "name":"ACCT 470 Advanced External Financial Reporting Issues", 180 | "mooc":"", 181 | "url":"", 182 | "image":"" 183 | }, 184 | { 185 | "code":"ACCT 471", 186 | "name":"ACCT 471 Accounting Information Systems", 187 | "mooc":"Accounting Analytics", 188 | "url":"http://www.coursera.org/specializations/business-analytics", 189 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/e7/871c10757e11e7817cc9840feed7fa/enigma_rotors_with_alphabet_rings_cropped.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 190 | }, 191 | { 192 | "code":"ACCT 372", 193 | "name":"ACCT 372 Internal Reporting Issues", 194 | "mooc":"", 195 | "url":"", 196 | "image":"" 197 | }, 198 | { 199 | "code":"ACCT 472", 200 | "name":"ACCT 472 Managerial Accounting", 201 | "mooc":"Managerial Accounting: Cost Behaviors, Systems, and Analysis", 202 | "url":"http://www.coursera.org/specializations/value-chain-management", 203 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://d396qusza40orc.cloudfront.net/phoenixassets/learn-finance/BNY_Specialization_Banner_logo.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 204 | }, 205 | { 206 | "code":"ACCT 430", 207 | "name":"ACCT 430 Accounting Ethics", 208 | "mooc":"Accounting Analytics", 209 | "url":"http://www.coursera.org/specializations/business-analytics", 210 | "image":"https://d3njjcbhbojbot.cloudfront.net/api/utilities/v1/imageproxy/https://coursera-course-photos.s3.amazonaws.com/ef/485800fcfd11e6b4cc3359e41f40e9/Pricing-Strategy.jpg?auto=format%2Ccompress&dpr=1&w=100&h=100&fit=fill&bg=FFF" 211 | } 212 | ] -------------------------------------------------------------------------------- /Jupyter/integration.py: -------------------------------------------------------------------------------- 1 | from py2neo import Graph,NodeSelector 2 | import neo 3 | from ipywidgets import * 4 | from IPython.display import display, HTML 5 | neo.init_notebook_mode() 6 | graph = Graph("http://localhost:7474/db/data/") 7 | 8 | searchbox = widgets.Text( 9 | placeholder='Prgram Name', 10 | description='Search:', 11 | disabled=False) 12 | 13 | def program_result(plist): 14 | items_layout = Layout( 15 | flex='1 1 auto', 16 | width='auto') # override the default width of the button to 'auto' to let the button grow 17 | 18 | box_layout = Layout( 19 | display='flex', 20 | flex_flow='column', 21 | align_items='stretch', 22 | width='60%') 23 | items = [ToggleButton(description=w, layout=items_layout) for w in plist] 24 | return Box(children=items, layout=box_layout) 25 | 26 | def search_program(keyword): 27 | query = "MATCH (p:Program) WHERE p.name =~ '(?i).*"+keyword+".*' RETURN p LIMIT 10" 28 | data = graph.data(query) 29 | return [p['p']['name'] for p in data] 30 | 31 | def value_changed(change): 32 | res = search_program(change.new) 33 | box = program_result(res) 34 | display(box) 35 | value_changed.box = box.children 36 | 37 | searchbox.observe(value_changed, 'value') 38 | 39 | display(searchbox) 40 | 41 | button = widgets.Button(description="See Detail!") 42 | 43 | def on_button_clicked(b): 44 | for box in value_changed.box: 45 | if box.value == True: 46 | selected = box.description 47 | visualize(selected) 48 | 49 | button.on_click(on_button_clicked) 50 | display(button) 51 | 52 | def refernce_display(clist): 53 | content = ''' 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | ''' 62 | for c in clist: 63 | content += ''' 64 | 65 | 66 | 67 | 68 | ''' 69 | #return content 70 | display(HTML(content)) 71 | 72 | 73 | from collections import defaultdict 74 | from py2neo import Graph 75 | import pandas as pd 76 | 77 | 78 | def TopoSort(program_name): 79 | graph = Graph("http://localhost:7474/db/data/") 80 | adjPair = graph.data("MATCH (m)-[:HasPrerequisite]->(n) \ 81 | WHERE (m)-[:UnderProgram]->({name:" + program_name + "}) \ 82 | and (n)-[:UnderProgram]->({name:" + program_name + "}) \ 83 | RETURN m.id, n.id") 84 | 85 | class Prerequisite: 86 | def __init__(self, courselist): 87 | self.graph = defaultdict(list) 88 | self.vertices = courselist 89 | 90 | def addEdge(self, u, v): 91 | self.graph[u].append(v) 92 | 93 | def dfs(self, v, visit, order): 94 | visit[v] = True 95 | for u in self.graph[v]: 96 | if visit[u] == False: 97 | self.dfs(u, visit, order) 98 | order.insert(0, v) 99 | 100 | def tps(self): 101 | visit = {v: False for v in self.vertices} 102 | order =[] 103 | for v in self.vertices: 104 | if visit[v] == False: 105 | self.dfs(v, visit, order) 106 | return order 107 | 108 | courselist = [] 109 | for i in adjPair: 110 | courselist += [i["m.id"], i["n.id"]] 111 | courselist = list(set(courselist)) 112 | 113 | g = Prerequisite(courselist) 114 | for edge in adjPair: 115 | g.addEdge(edge["m.id"], edge["n.id"]) 116 | # MATCH (n:Course{id:"BUAD 280"}) RETURN n 117 | toposort = g.tps() 118 | 119 | 120 | nodes = [] 121 | course = list() 122 | relation = list() 123 | relat = [] 124 | import pandas as pd 125 | for id in toposort[::-1]: 126 | dic = {} 127 | node = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n") 128 | coursename = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n.name")[0] 129 | coursedesc = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n.description")[0] 130 | mooc = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.name limit 1") 131 | image = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.image limit 1") 132 | url = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.url limit 1") 133 | print(mooc) 134 | dic["code"] = id 135 | dic["name"] = coursename["n.name"] 136 | dic["mooc"] = "" 137 | dic["url"] = "" 138 | dic["image"] = "" 139 | if len(mooc) != 0: 140 | rlist = [id, coursename["n.name"], mooc[0]["m.name"], image[0]["m.image"], url[0]["m.url"]] 141 | relation.append(rlist) 142 | dic["mooc"], dic["image"], dic["url"] = rlist[2], rlist[3], rlist[4] 143 | relat.append(dic) 144 | nodes += node 145 | course.append([id, coursename["n.name"], coursedesc["n.description"]]) 146 | course_df = pd.DataFrame(course) 147 | relation_df = pd.DataFrame(relation) 148 | # relat 149 | relation_df 150 | refernce_display(relat) 151 | 152 | import json 153 | j = json.dumps(relat) 154 | 155 | -------------------------------------------------------------------------------- /Jupyter/neo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import uuid 4 | import tempfile 5 | from IPython.display import HTML, Javascript, display 6 | 7 | DEFAULT_PHYSICS = { 8 | "physics": { 9 | "barnesHut": { 10 | "gravitationalConstant": -15150, 11 | "centralGravity": 3.45, 12 | "springLength": 261, 13 | "damping": 0.3 14 | } 15 | } 16 | } 17 | 18 | 19 | def get_visjs(): 20 | return 21 | 22 | 23 | def init_notebook_mode(): 24 | """ 25 | Creates a script tag and prints the JS read from the file in the tag. 26 | """ 27 | 28 | display( 29 | Javascript(data="require.config({ " + 30 | " paths: { " + 31 | " vis: '//cdnjs.cloudflare.com/ajax/libs/vis/4.8.2/vis.min' " + 32 | " } " + 33 | "}); " + 34 | "require(['vis'], function(vis) { " + 35 | " window.vis = vis; " + 36 | "}); ", 37 | css='https://cdnjs.cloudflare.com/ajax/libs/vis/4.8.2/vis.css') 38 | ) 39 | 40 | def vis_network(nodes, edges, physics=True): 41 | """ 42 | Creates the HTML page with all the parameters 43 | :param nodes: The nodes to be represented an their information. 44 | :param edges: The edges represented an their information. 45 | :param physics: The options for the physics of vis.js. 46 | :return: IPython.display.HTML 47 | """ 48 | base = open(os.path.join(os.path.dirname(__file__), 'assets/index.html')).read() 49 | 50 | unique_id = str(uuid.uuid4()) 51 | html = base.format(id=unique_id, nodes=json.dumps(nodes), edges=json.dumps(edges), physics=json.dumps(physics)) 52 | 53 | return html 54 | 55 | 56 | def draw(data, options, physics=True, limit=100): 57 | """ 58 | The options argument should be a dictionary of node labels and property keys; it determines which property 59 | is displayed for the node label. For example, in the movie graph, options = {"Movie": "title", "Person": "name"}. 60 | Omitting a node label from the options dict will leave the node unlabeled in the visualization. 61 | Setting physics = True makes the nodes bounce around when you touch them! 62 | 63 | :param graph: Connection to the DB where the query will be executed. 64 | :param options: Options for the Nodes. 65 | :param physics: Physics of the vis.js visualization. 66 | :param limit: Maximum number of Nodes or Edges. 67 | :return: IPython.display.HTML 68 | """ 69 | 70 | query2 = """ 71 | MATCH (n) 72 | WITH n, rand() AS random 73 | ORDER BY random 74 | LIMIT {limit} 75 | MATCH (n:Course{id:'edx00010'})-[r]->(m) 76 | RETURN n AS source_node, 77 | id(n) AS source_id, 78 | r, 79 | m AS target_node, 80 | id(m) AS target_id 81 | """ 82 | 83 | query = """ 84 | MATCH (n:Course{id:'edx00010'})-[r]->(m) 85 | RETURN n AS source_node, 86 | id(n) AS source_id, 87 | r, 88 | m AS target_node, 89 | id(m) AS target_id 90 | LIMIT 5 91 | """ 92 | 93 | #data = graph.run(query, limit=limit) 94 | 95 | nodes = [] 96 | edges = [] 97 | 98 | def get_vis_info(node, id): 99 | node_label = list(node.labels())[0] 100 | prop_key = options.get(node_label) 101 | vis_label = node.properties.get(prop_key, "") 102 | 103 | return {"id": id, "label": vis_label, "group": node_label, "title": repr(node.properties)} 104 | 105 | for row in data: 106 | source_node = row[0] 107 | source_id = row[1] 108 | rel = row[2] 109 | target_node = row[3] 110 | target_id = row[4] 111 | 112 | source_info = get_vis_info(source_node, source_id) 113 | 114 | if source_info not in nodes: 115 | nodes.append(source_info) 116 | 117 | if rel is not None: 118 | target_info = get_vis_info(target_node, target_id) 119 | 120 | if target_info not in nodes: 121 | nodes.append(target_info) 122 | 123 | edges.append({"from": source_info["id"], "to": target_info["id"], "label": rel.type()}) 124 | 125 | return vis_network(nodes, edges, physics=physics) 126 | -------------------------------------------------------------------------------- /Jupyter/toposort.py: -------------------------------------------------------------------------------- 1 | from py2neo import Graph 2 | graph = Graph("http://localhost:7474/db/data/") 3 | 4 | 5 | def TopoSort(program_name): 6 | adjPair = graph.data("MATCH (m)-[:HasPrerequisite]->(n) \ 7 | WHERE (m)-[:UnderProgram]->({name: 'Accounting (BS)'}) \ 8 | and (n)-[:UnderProgram]->({name: 'Accounting (BS)'}) \ 9 | RETURN m.id, n.id") 10 | # if you take m, must first take n 11 | courselist = [] 12 | for i in adjPair: 13 | courselist += [i["m.id"], i["n.id"]] 14 | courselist = list(set(courselist)) 15 | adjList = {i:[] for i in courselist} 16 | indegree = {i: 0 for i in courselist} 17 | # for post in adjList: 18 | # for edge in adjPair: 19 | # if edge["m.id"] == post: 20 | # adjList[post].append(edge["n.id"]) 21 | for edge in adjPair: 22 | adjList[edge["m.id"]].append(edge["n.id"]) 23 | indegree[edge["n.id"]] += 1 24 | # startnodes = [i for i in adjList if len(adjList[i]) == 0] 25 | import queue 26 | queue = queue.Queue(maxsize = len(courselist)) 27 | for i in adjList: 28 | if len(adjList[i]) == 0: 29 | queue.put(i) 30 | order = [] 31 | while not queue.empty(): 32 | node = queue.get() 33 | order.append(node) 34 | for x in adjList[node]: 35 | indegree[x] -= 1 36 | if indegree[x] == 0: 37 | queue.put(x) 38 | 39 | return order 40 | 41 | 42 | #Python program to print topological sorting of a DAG 43 | from collections import defaultdict 44 | 45 | #Class to represent a graph 46 | class Graph: 47 | def __init__(self,vertices): 48 | self.graph = defaultdict(list) #dictionary containing adjacency List 49 | self.V = vertices #No. of vertices 50 | 51 | # function to add an edge to graph 52 | def addEdge(self,u,v): 53 | self.graph[u].append(v) 54 | 55 | # A recursive function used by topologicalSort 56 | def topologicalSortUtil(self,v,visited,stack): 57 | 58 | # Mark the current node as visited. 59 | visited[v] = True 60 | 61 | # Recur for all the vertices adjacent to this vertex 62 | for i in self.graph[v]: 63 | if visited[i] == False: 64 | self.topologicalSortUtil(i,visited,stack) 65 | 66 | # Push current vertex to stack which stores result 67 | stack.insert(0,v) 68 | 69 | # The function to do Topological Sort. It uses recursive 70 | # topologicalSortUtil() 71 | def topologicalSort(self): 72 | # Mark all the vertices as not visited 73 | visited = [False]*self.V 74 | stack =[] 75 | 76 | # Call the recursive helper function to store Topological 77 | # Sort starting from all vertices one by one 78 | for i in range(self.V): 79 | if visited[i] == False: 80 | self.topologicalSortUtil(i,visited,stack) 81 | 82 | # Print contents of stack 83 | print stack 84 | 85 | 86 | def tp(self, v, visited, stack): 87 | visited[v] = True 88 | for i in adjList[courselist[v]]: 89 | if visited[v] == False: 90 | self.tp(i, visited, stack) 91 | stack.insert(0, v) 92 | 93 | def ts(self): 94 | visited = [False] * len(courselist) 95 | stack = [] 96 | for i in range(len(courselist)): 97 | if visited[i] == False: 98 | self.tp(i, visited, stack) 99 | 100 | 101 | -------------------------------------------------------------------------------- /Jupyter/tpsort.py: -------------------------------------------------------------------------------- 1 | import json, jgraph 2 | import pandas as pd 3 | from py2neo import Graph 4 | from collections import defaultdict 5 | 6 | 7 | class Prerequisite: 8 | def __init__(self, courselist): 9 | self.graph = defaultdict(list) 10 | self.vertices = courselist 11 | 12 | def addEdge(self, u, v): 13 | self.graph[u].append(v) 14 | 15 | def dfs(self, v, visit, order): 16 | visit[v] = True 17 | for u in self.graph[v]: 18 | if visit[u] == False: 19 | self.dfs(u, visit, order) 20 | order.insert(0, v) 21 | 22 | def tps(self): 23 | visit = {v: False for v in self.vertices} 24 | order =[] 25 | for v in self.vertices: 26 | if visit[v] == False: 27 | self.dfs(v, visit, order) 28 | return order 29 | 30 | 31 | def topo_Sort(program_name): 32 | graph = Graph("http://localhost:7474/db/data/") 33 | adjPair = graph.data("MATCH (m)-[:HasPrerequisite]->(n) \ 34 | WHERE (m)-[:UnderProgram]->({name:" + program_name + "}) \ 35 | and (n)-[:UnderProgram]->({name:" + program_name + "}) \ 36 | RETURN m.id, n.id") 37 | 38 | courselist = [] 39 | for i in adjPair: 40 | courselist += [i["m.id"], i["n.id"]] 41 | courselist = list(set(courselist)) 42 | 43 | g = Prerequisite(courselist) 44 | for edge in adjPair: 45 | g.addEdge(edge["m.id"], edge["n.id"]) 46 | # MATCH (n:Course{id:"BUAD 280"}) RETURN n 47 | toposort = g.tps() 48 | return toposort[::-1] 49 | 50 | 51 | def display_list(order): 52 | nodes, relat = [], [] 53 | course, relation = list(), list() 54 | 55 | for id in order: 56 | dic = {} 57 | dic["code"], dic["mooc"], dic["url"], dic["image"] = id, "", "", "" 58 | node = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n") 59 | coursename = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n.name")[0] 60 | coursedesc = graph.data("Match (n:Course{id:'" + id + "'}) RETURN n.description")[0] 61 | mooc = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.name limit 1") 62 | image = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.image limit 1") 63 | url = graph.data("MATCH p = (n:Course{id:'"+ id + "'})-[r:SimilarTo]->(m:MOOC) RETURN m.url limit 1") 64 | dic["name"] = coursename["n.name"] 65 | if len(mooc) != 0: 66 | rlist = [id, coursename["n.name"], mooc[0]["m.name"], image[0]["m.image"], url[0]["m.url"]] 67 | relation.append(rlist) 68 | dic["mooc"], dic["image"], dic["url"] = rlist[2], rlist[3], rlist[4] 69 | relat.append(dic) 70 | nodes += node 71 | course.append([id, coursename["n.name"], coursedesc["n.description"]]) 72 | course_df = pd.DataFrame(course) 73 | relation_df = pd.DataFrame(relation) 74 | # relat 75 | relation_df 76 | refernce_display(relat) 77 | j = json.dumps(relat) 78 | -------------------------------------------------------------------------------- /Khan/khan_driver.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import requests 3 | from bs4 import BeautifulSoup 4 | import json 5 | import time 6 | import sys 7 | reload(sys) 8 | sys.setdefaultencoding('utf-8') 9 | 10 | # _x_query = { 11 | # "coursetag": "//div[@class = 'gs-webResult gs-result']", 12 | # "coursename": "//a[@class='gs-title']", 13 | # "description": "//div[@class = 'gs-bidi-start-align gs-snippet']" 14 | # "image_url": "//img[@class='gs-image']", 15 | # } 16 | 17 | def wrapper(url, id): 18 | # source_code = requests.get(url) 19 | # plain_text = source_code.text 20 | # soup = BeautifulSoup(plain_text, 'html.parser') 21 | browser = webdriver.Firefox() 22 | browser.set_page_load_timeout(1200) 23 | browser.get(url) 24 | courselist = browser.find_elements_by_xpath('//div[@class = "gs-webResult gs-result"]') 25 | list = [] 26 | for course in courselist: 27 | coursedic = {} 28 | coursedic["id"] = "khan" + str(id).zfill(5) 29 | coursedic["description"] = course.text.split('\n')[-1] 30 | coursedic["name"] = course.text.split('\n')[0].split('|')[0] 31 | coursedic["provenance"] = "khan Academy" 32 | if coursedic["name"] == "" or coursedic["description"] == "": 33 | continue 34 | id += 1 35 | list.append(coursedic) 36 | urls = [i.get_attribute('href') for i in browser.find_elements_by_xpath("//div/a[@class='gs-image']")] 37 | images = [i.get_attribute('src') for i in browser.find_elements_by_xpath("//img[@class='gs-image']")] 38 | alist = [] 39 | for i in range(len(list)): 40 | if i < len(urls) and i < len(images): 41 | course = list[i] 42 | course["course_url"] = urls[i] 43 | course["img"] = images[i] 44 | alist.append(course) 45 | browser.close() 46 | return id, alist 47 | # print data 48 | 49 | 50 | 51 | def driver(domain, keywords, data, i): 52 | for keyword in keywords: 53 | print keyword 54 | url = domain + keyword 55 | print url 56 | browser = webdriver.Firefox() 57 | browser.set_page_load_timeout(1200) 58 | browser.get(url) 59 | time.sleep(20) 60 | for j in range(1, 9): 61 | pages = browser.find_elements_by_xpath('//div[@class = "gsc-cursor-page"]') 62 | if j < len(pages): 63 | page = pages[j] 64 | page.click() 65 | time.sleep(40) 66 | result = wrapper(url, i) 67 | data += result[1] 68 | i = result[0] 69 | # pages = browser.find_elements_by_xpath('//div[@class = "gsc-cursor-page"]') 70 | else: 71 | break 72 | browser.close() 73 | browser.close() 74 | 75 | domain = "https://www.khanacademy.org/search?referer=%2F&page_search_query=" 76 | keywords = ['data', 'social' 'marketing', 'design', 'web', 'cyber'] 77 | #,\ 78 | # 'program', 'platform', 'map', 'intelligence', 'knowledge', 'graph', 'probability',\ 79 | # 'digital', 'electronic', 'architecture', 'infrastructure', 'digital', 'electronic',\ 80 | # 'architecture', 'infrastructure', 'program', 'platform', 'map', 'intelligence',\ 81 | # 'entrepreneurship','cyber', 'knowledge', 'graph', 'probability', "engineer"] 82 | i = 0 83 | data = [] 84 | driver(domain, keywords, data, i) 85 | # browser.quit() 86 | 87 | 88 | 89 | with open('khan_data.json', 'a') as f: 90 | json.dump(data, f) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CoursePlanner 2 | ### Knowledge Graph Project 3 | 4 | Introduction: [CoursePlanner](https://github.com/rpedsel/CoursePlanner/blob/master/CoursePlanner.pdf) 5 | 6 | Demonstration Video: [Youtube](https://www.youtube.com/watch?v=L34QmfxO4a8&t) 7 | -------------------------------------------------------------------------------- /Udacity/Src/Process_endpoint.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | data = json.load(open('udacity_endpoint.json')) 5 | print len(data["courses"]) 6 | 7 | courselist = [] 8 | i = 0 9 | for object in data["courses"]: 10 | i += 1 11 | course_obj = {} 12 | course_obj["id"] = "udacity" + str(i).zfill(5) 13 | course_obj["name"] = object["title"] 14 | course_obj["course_url"] = object["homepage"] 15 | if len(object["affiliates"]) != 0: 16 | course_obj["provenance"] = object["affiliates"] 17 | else: 18 | course_obj["provenance"] = [{"name": "udacity"}] 19 | course_obj["description"] = object["expected_learning"] + ' ' + object["summary"] 20 | courselist.append(course_obj) 21 | 22 | with open('udacity_data.json', 'a') as f: 23 | json.dump(courselist, f) 24 | 25 | # print courselist 26 | 27 | # print data["courses"][0] -------------------------------------------------------------------------------- /Udacity/Udacity_wrapper/driver_udacity.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import requests 3 | import json 4 | from bs4 import BeautifulSoup 5 | import sys 6 | reload(sys) 7 | sys.setdefaultencoding('utf-8') 8 | 9 | url = 'https://www.udacity.com/courses/all' 10 | driver = webdriver.Firefox() 11 | driver.set_page_load_timeout(1200) 12 | driver.get(url) 13 | source_code = requests.get(url) 14 | plain_text = source_code.text 15 | soup = BeautifulSoup(plain_text, 'html.parser') 16 | courseTags = soup.find_all("div", "course-summary-card row row-gap-medium") 17 | courselist = [] 18 | domain = 'https://www.udacity.com' 19 | i = 0 20 | for tag in courseTags: 21 | coursedic = {} 22 | i += 1 23 | coursedic["id"] = "udacity" + str(i).zfill(5) 24 | coursedic["provenance"] = "udacity" 25 | coursedic["img"] = tag.select('img[src]')[0]['data-src'] 26 | coursedic["course_url"] = domain + tag.select('a[data-course-title]')[0]['href'] 27 | coursedic["name"] = tag.select('a[data-course-title]')[0]\ 28 | .text.strip().decode('utf8').encode('ascii', errors='ignore') 29 | coursedic["description"] = tag.select('div[data-course-short-summary]')[0]\ 30 | .text.strip().decode('utf8').encode('ascii', errors='ignore') 31 | courselist.append(coursedic) 32 | driver.quit() 33 | 34 | print courselist 35 | with open('../udacity_data.json', 'a') as f: 36 | json.dump(courselist, f) -------------------------------------------------------------------------------- /edX/sample.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id":"csr00000", 4 | "course_name":"SomeCourse Blah", 5 | "subject":"Computer Science", 6 | "description":"Blah blah blah blah", 7 | "image_url":"http://example.jpg", 8 | "provenance":"Coursera" 9 | }, 10 | { 11 | "id":"edx00320", 12 | "course_name":"Intro to SomeOtherCourse", 13 | "subject":"Art & History", 14 | "description":"Blah blah blah blah blah", 15 | "image_url":"http://anotherexample.jpg", 16 | "provenance":"edX" 17 | } 18 | ] 19 | -------------------------------------------------------------------------------- /edX/sample_cypher_edX: -------------------------------------------------------------------------------- 1 | load csv with headers from ['file path:/edX.csv'] as row 2 | (with row limit 100) 3 | merge (course:Course{ 4 | id:row.id, 5 | name:row.course_name, 6 | description:row.description, 7 | url:row.course_url}) 8 | ON CREATE SET course.image = row.image_url 9 | ON MATCH SET course.image = row.image_url 10 | merge (provenance:Provenance{name:row.provenance}) 11 | merge (course) - [:HostedBy] -> (provenance) 12 | foreach (subjectName in split(row.subject,";") | 13 | merge (subject:Subject{name:subjectName}) 14 | merge (course) - [:OfSubject] -> (subject)); 15 | -------------------------------------------------------------------------------- /neo4jupyter/assets/index.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 47 | -------------------------------------------------------------------------------- /neo4jupyter/neo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import uuid 4 | import tempfile 5 | from IPython.display import HTML, Javascript, display 6 | 7 | DEFAULT_PHYSICS = { 8 | "physics": { 9 | "barnesHut": { 10 | "gravitationalConstant": -15150, 11 | "centralGravity": 3.45, 12 | "springLength": 261, 13 | "damping": 0.3 14 | } 15 | } 16 | } 17 | 18 | 19 | def get_visjs(): 20 | return 21 | 22 | 23 | def init_notebook_mode(): 24 | """ 25 | Creates a script tag and prints the JS read from the file in the tag. 26 | """ 27 | 28 | display( 29 | Javascript(data="require.config({ " + 30 | " paths: { " + 31 | " vis: '//cdnjs.cloudflare.com/ajax/libs/vis/4.8.2/vis.min' " + 32 | " } " + 33 | "}); " + 34 | "require(['vis'], function(vis) { " + 35 | " window.vis = vis; " + 36 | "}); ", 37 | css='https://cdnjs.cloudflare.com/ajax/libs/vis/4.8.2/vis.css') 38 | ) 39 | 40 | def vis_network(nodes, edges, physics=True): 41 | """ 42 | Creates the HTML page with all the parameters 43 | :param nodes: The nodes to be represented an their information. 44 | :param edges: The edges represented an their information. 45 | :param physics: The options for the physics of vis.js. 46 | :return: IPython.display.HTML 47 | """ 48 | base = open(os.path.join(os.path.dirname(__file__), 'assets/index.html')).read() 49 | 50 | unique_id = str(uuid.uuid4()) 51 | html = base.format(id=unique_id, nodes=json.dumps(nodes), edges=json.dumps(edges), physics=json.dumps(physics)) 52 | 53 | return html 54 | 55 | 56 | def draw(data, options, physics=True, limit=100): 57 | """ 58 | The options argument should be a dictionary of node labels and property keys; it determines which property 59 | is displayed for the node label. For example, in the movie graph, options = {"Movie": "title", "Person": "name"}. 60 | Omitting a node label from the options dict will leave the node unlabeled in the visualization. 61 | Setting physics = True makes the nodes bounce around when you touch them! 62 | 63 | :param graph: Connection to the DB where the query will be executed. 64 | :param options: Options for the Nodes. 65 | :param physics: Physics of the vis.js visualization. 66 | :param limit: Maximum number of Nodes or Edges. 67 | :return: IPython.display.HTML 68 | """ 69 | 70 | query2 = """ 71 | MATCH (n) 72 | WITH n, rand() AS random 73 | ORDER BY random 74 | LIMIT {limit} 75 | MATCH (n:Course{id:'edx00010'})-[r]->(m) 76 | RETURN n AS source_node, 77 | id(n) AS source_id, 78 | r, 79 | m AS target_node, 80 | id(m) AS target_id 81 | """ 82 | 83 | query = """ 84 | MATCH (n:Course{id:'edx00010'})-[r]->(m) 85 | RETURN n AS source_node, 86 | id(n) AS source_id, 87 | r, 88 | m AS target_node, 89 | id(m) AS target_id 90 | LIMIT 5 91 | """ 92 | 93 | #data = graph.run(query, limit=limit) 94 | 95 | nodes = [] 96 | edges = [] 97 | 98 | def get_vis_info(node, id): 99 | node_label = list(node.labels())[0] 100 | prop_key = options.get(node_label) 101 | vis_label = node.properties.get(prop_key, "") 102 | 103 | return {"id": id, "label": vis_label, "group": node_label, "title": repr(node.properties)} 104 | 105 | for row in data: 106 | source_node = row[0] 107 | source_id = row[1] 108 | rel = row[2] 109 | target_node = row[3] 110 | target_id = row[4] 111 | 112 | source_info = get_vis_info(source_node, source_id) 113 | 114 | if source_info not in nodes: 115 | nodes.append(source_info) 116 | 117 | if rel is not None: 118 | target_info = get_vis_info(target_node, target_id) 119 | 120 | if target_info not in nodes: 121 | nodes.append(target_info) 122 | 123 | edges.append({"from": source_info["id"], "to": target_info["id"], "label": rel.type()}) 124 | 125 | return vis_network(nodes, edges, physics=physics) 126 | -------------------------------------------------------------------------------- /rpedsel/Cypher: -------------------------------------------------------------------------------- 1 | 2 | # Load processed CSV files into Neo4j Database: 3 | 4 | ===== MOOC Entities ===== 5 | 6 | load csv with headers from 'file:/Mooc_merge.csv' as row WITH row WHERE row.provenance is not null 7 | merge (mooc:MOOC{ 8 | id:row.id, 9 | name:row.name, 10 | description:row.description}) 11 | ON CREATE SET mooc.url = row.course_url 12 | ON MATCH SET mooc.url = row.course_url 13 | ON CREATE SET mooc.image = row.image_url 14 | ON MATCH SET mooc.image = row.image_url 15 | ON CREATE SET mooc.special_id = row.special_id 16 | ON MATCH SET mooc.special_id = row.special_id 17 | merge (provenance:Provenance{pname:row.provenance}) 18 | merge (mooc) - [:HostedBy] -> (provenance) 19 | foreach (subjectName in split(row.subject,";") | 20 | merge (subject:Subject{sname:subjectName}) 21 | merge (mooc) - [:OfSubject] -> (subject)); 22 | 23 | 24 | ===== USC COurse Entities ===== 25 | 26 | load csv with headers from 'file:/Catalogue_sim.csv' as row 27 | merge (course:Course{ 28 | id:row.id, 29 | name:row.name, 30 | description:row.description}) 31 | foreach (prerequisiteID in split(row.prerequisite,";") | 32 | merge (pcourse:Course{id:prerequisiteID}) 33 | merge (course) - [:HasPrerequisite] -> (pcourse)) 34 | foreach (preparationID in split(row.preparation,";") | 35 | merge (prcourse:Course{id:preparationID}) 36 | merge (course) - [:HasPreparation] -> (prcourse)) 37 | foreach (corequisiteID in split(row.corequisite,";") | 38 | merge (ccourse:Course{id:corequisiteID}) 39 | merge (course) - [:HasCorequisite] -> (ccourse)) 40 | foreach (crosslistID in split(row.crosslist,";") | 41 | merge (crcourse:Course{id:crosslistID}) 42 | merge (course) - [:HasCrosslist] -> (crcourse)) 43 | foreach (duplicateID in split(row.duplicate,";") | 44 | merge (dcourse:Course{id:duplicateID}) 45 | merge (course) - [:HasDuplicate] -> (dcourse)) 46 | 47 | FOREACH (p IN CASE row.similarity WHEN "" THEN [] ELSE split(row.similarity,';') END | 48 | merge (smooc:MOOC{id:split(p,"/")[0]}) 49 | merge (course) - [:SimilarTo{value:split(p,"/")[1]}] -> (smooc)); 50 | 51 | 52 | ===== USC Program Entities ===== 53 | 54 | load csv with headers from 'file:/Catalogue.csv' as row 55 | merge (program:Program{ 56 | id:row.id, 57 | type:row.type, 58 | name:row.name, 59 | url:row.url}) 60 | foreach (courseID in split(row.courses,";") | 61 | merge (course:Course{id:courseID}) 62 | merge (course) - [:UnderProgram] -> (program)); 63 | --------------------------------------------------------------------------------
CodeNameImageMOOC
''' + c['code'] + '''''' + c['name'] + ''' ''' + c['mooc'] + '''