\\nRole: Data Scientist.\\n\\nLocation: Foster Ci...
\n",
236 | "
\n",
237 | "
\n",
238 | "
920
\n",
239 | "
Data Scientist
\n",
240 | "
Upstart is the leading AI lending platform par...
\n",
241 | "
\n",
242 | "
\n",
243 | "
938
\n",
244 | "
Data Scientist
\n",
245 | "
Why Divvy?Over the past decade, millions of Am...
\n",
246 | "
\n",
247 | "
\n",
248 | "
935
\n",
249 | "
Data Engineer
\n",
250 | "
About Rocket LawyerWe believe everyone deserve...
\n",
251 | "
\n",
252 | "
\n",
253 | "
1068
\n",
254 | "
Data Engineer
\n",
255 | "
Our mission is to create a world where mental ...
\n",
256 | "
\n",
257 | "
\n",
258 | "
1089
\n",
259 | "
Data Engineer
\n",
260 | "
Data Engineer \\nIf you are a Data Engineer wit...
\n",
261 | "
\n",
262 | "
\n",
263 | "
1100
\n",
264 | "
Data Engineer
\n",
265 | "
Prabhav Services Inc. is one of the premier pr...
\n",
266 | "
\n",
267 | "
\n",
268 | "
1105
\n",
269 | "
Data Engineer
\n",
270 | "
About Skupos\\nSkupos is the data platform for ...
\n",
271 | "
\n",
272 | " \n",
273 | "
\n",
274 | "
"
275 | ],
276 | "text/plain": [
277 | " Job_title Job_Desc\n",
278 | "901 Data Scientist We are looking for Data Scientists who are int...\n",
279 | "910 Data Scientist The world's largest and fastest-growing compan...\n",
280 | "916 Data Scientist \\nRole: Data Scientist.\\n\\nLocation: Foster Ci...\n",
281 | "920 Data Scientist Upstart is the leading AI lending platform par...\n",
282 | "938 Data Scientist Why Divvy?Over the past decade, millions of Am...\n",
283 | "935 Data Engineer About Rocket LawyerWe believe everyone deserve...\n",
284 | "1068 Data Engineer Our mission is to create a world where mental ...\n",
285 | "1089 Data Engineer Data Engineer \\nIf you are a Data Engineer wit...\n",
286 | "1100 Data Engineer Prabhav Services Inc. is one of the premier pr...\n",
287 | "1105 Data Engineer About Skupos\\nSkupos is the data platform for ..."
288 | ]
289 | },
290 | "execution_count": 135,
291 | "metadata": {},
292 | "output_type": "execute_result"
293 | }
294 | ],
295 | "source": [
296 | "sample_jobs = jobs_df.loc[[901, 910, 916, 920, 938, 935, 1068, 1089, 1100, 1105], ['Job_title', 'Job_Desc']]\n",
297 | "sample_jobs"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": 145,
303 | "metadata": {},
304 | "outputs": [
305 | {
306 | "data": {
307 | "text/html": [
308 | "
\n",
309 | "\n",
322 | "
\n",
323 | " \n",
324 | "
\n",
325 | "
\n",
326 | "
Job_title
\n",
327 | "
Job_Desc
\n",
328 | "
Job_id
\n",
329 | "
\n",
330 | " \n",
331 | " \n",
332 | "
\n",
333 | "
901
\n",
334 | "
Data Scientist
\n",
335 | "
We are looking for Data Scientists who are int...
\n",
336 | "
901
\n",
337 | "
\n",
338 | "
\n",
339 | "
910
\n",
340 | "
Data Scientist
\n",
341 | "
The world's largest and fastest-growing compan...
\n",
342 | "
910
\n",
343 | "
\n",
344 | "
\n",
345 | "
916
\n",
346 | "
Data Scientist
\n",
347 | "
\\nRole: Data Scientist.\\n\\nLocation: Foster Ci...
\n",
348 | "
916
\n",
349 | "
\n",
350 | "
\n",
351 | "
920
\n",
352 | "
Data Scientist
\n",
353 | "
Upstart is the leading AI lending platform par...
\n",
354 | "
920
\n",
355 | "
\n",
356 | "
\n",
357 | "
938
\n",
358 | "
Data Scientist
\n",
359 | "
Why Divvy?Over the past decade, millions of Am...
\n",
360 | "
938
\n",
361 | "
\n",
362 | "
\n",
363 | "
935
\n",
364 | "
Data Engineer
\n",
365 | "
About Rocket LawyerWe believe everyone deserve...
\n",
366 | "
935
\n",
367 | "
\n",
368 | "
\n",
369 | "
1068
\n",
370 | "
Data Engineer
\n",
371 | "
Our mission is to create a world where mental ...
\n",
372 | "
1068
\n",
373 | "
\n",
374 | "
\n",
375 | "
1089
\n",
376 | "
Data Engineer
\n",
377 | "
Data Engineer \\nIf you are a Data Engineer wit...
\n",
378 | "
1089
\n",
379 | "
\n",
380 | "
\n",
381 | "
1100
\n",
382 | "
Data Engineer
\n",
383 | "
Prabhav Services Inc. is one of the premier pr...
\n",
384 | "
1100
\n",
385 | "
\n",
386 | "
\n",
387 | "
1105
\n",
388 | "
Data Engineer
\n",
389 | "
About Skupos\\nSkupos is the data platform for ...
\n",
390 | "
1105
\n",
391 | "
\n",
392 | " \n",
393 | "
\n",
394 | "
"
395 | ],
396 | "text/plain": [
397 | " Job_title Job_Desc \\\n",
398 | "901 Data Scientist We are looking for Data Scientists who are int... \n",
399 | "910 Data Scientist The world's largest and fastest-growing compan... \n",
400 | "916 Data Scientist \\nRole: Data Scientist.\\n\\nLocation: Foster Ci... \n",
401 | "920 Data Scientist Upstart is the leading AI lending platform par... \n",
402 | "938 Data Scientist Why Divvy?Over the past decade, millions of Am... \n",
403 | "935 Data Engineer About Rocket LawyerWe believe everyone deserve... \n",
404 | "1068 Data Engineer Our mission is to create a world where mental ... \n",
405 | "1089 Data Engineer Data Engineer \\nIf you are a Data Engineer wit... \n",
406 | "1100 Data Engineer Prabhav Services Inc. is one of the premier pr... \n",
407 | "1105 Data Engineer About Skupos\\nSkupos is the data platform for ... \n",
408 | "\n",
409 | " Job_id \n",
410 | "901 901 \n",
411 | "910 910 \n",
412 | "916 916 \n",
413 | "920 920 \n",
414 | "938 938 \n",
415 | "935 935 \n",
416 | "1068 1068 \n",
417 | "1089 1089 \n",
418 | "1100 1100 \n",
419 | "1105 1105 "
420 | ]
421 | },
422 | "execution_count": 145,
423 | "metadata": {},
424 | "output_type": "execute_result"
425 | }
426 | ],
427 | "source": [
428 | "sample_jobs['Job_id'] = sample_jobs.index\n",
429 | "sample_jobs"
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "execution_count": 147,
435 | "metadata": {},
436 | "outputs": [],
437 | "source": [
438 | "sample_jobs.to_csv('jobs_test_sample.csv', index=False)"
439 | ]
440 | },
441 | {
442 | "cell_type": "markdown",
443 | "metadata": {},
444 | "source": [
445 | "## Course test data"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": 137,
451 | "metadata": {},
452 | "outputs": [
453 | {
454 | "name": "stdout",
455 | "output_type": "stream",
456 | "text": [
457 | "(4416, 10)\n"
458 | ]
459 | },
460 | {
461 | "data": {
462 | "text/html": [
463 | "
\"A picture is worth a thousand words\". We are ...
\n",
626 | "
\n",
627 | "
\n",
628 | "
2517
\n",
629 | "
Machine Learning with Python
\n",
630 | "
This course dives into the basics of machine l...
\n",
631 | "
\n",
632 | "
\n",
633 | "
545
\n",
634 | "
Databases and SQL for Data Science
\n",
635 | "
Much of the world's data resides in databases....
\n",
636 | "
\n",
637 | "
\n",
638 | "
1015
\n",
639 | "
Google Cloud Platform Big Data and Machine Lea...
\n",
640 | "
This 2-week accelerated on-demand course intro...
\n",
641 | "
\n",
642 | "
\n",
643 | "
4233
\n",
644 | "
Big Data Modeling and Management Systems
\n",
645 | "
Once you’ve identified a big data issue to ana...
\n",
646 | "
\n",
647 | "
\n",
648 | "
3763
\n",
649 | "
Database Management Essentials
\n",
650 | "
Database Management Essentials provides the fo...
\n",
651 | "
\n",
652 | "
\n",
653 | "
1311
\n",
654 | "
Data Warehouse Concepts, Design, and Data Inte...
\n",
655 | "
This is the second course in the Data Warehous...
\n",
656 | "
\n",
657 | " \n",
658 | "
\n",
659 | "
"
660 | ],
661 | "text/plain": [
662 | " name \\\n",
663 | "3823 The Data Scientist’s Toolbox \n",
664 | "143 Machine Learning \n",
665 | "3165 Applied Machine Learning in Python \n",
666 | "3588 Data Visualization with Python \n",
667 | "2517 Machine Learning with Python \n",
668 | "545 Databases and SQL for Data Science \n",
669 | "1015 Google Cloud Platform Big Data and Machine Lea... \n",
670 | "4233 Big Data Modeling and Management Systems \n",
671 | "3763 Database Management Essentials \n",
672 | "1311 Data Warehouse Concepts, Design, and Data Inte... \n",
673 | "\n",
674 | " description \n",
675 | "3823 In this course you will get an introduction to... \n",
676 | "143 Machine learning is the science of getting com... \n",
677 | "3165 This course will introduce the learner to appl... \n",
678 | "3588 \"A picture is worth a thousand words\". We are ... \n",
679 | "2517 This course dives into the basics of machine l... \n",
680 | "545 Much of the world's data resides in databases.... \n",
681 | "1015 This 2-week accelerated on-demand course intro... \n",
682 | "4233 Once you’ve identified a big data issue to ana... \n",
683 | "3763 Database Management Essentials provides the fo... \n",
684 | "1311 This is the second course in the Data Warehous... "
685 | ]
686 | },
687 | "execution_count": 140,
688 | "metadata": {},
689 | "output_type": "execute_result"
690 | }
691 | ],
692 | "source": [
693 | "sample_courses = courses_df.loc[[3823, 143, 3165, 3588, 2517, 545, 1015, 4233, 3763, 1311], ['name', 'description']]\n",
694 | "sample_courses"
695 | ]
696 | },
697 | {
698 | "cell_type": "code",
699 | "execution_count": 141,
700 | "metadata": {},
701 | "outputs": [
702 | {
703 | "data": {
704 | "text/html": [
705 | "
\n",
706 | "\n",
719 | "
\n",
720 | " \n",
721 | "
\n",
722 | "
\n",
723 | "
name
\n",
724 | "
description
\n",
725 | "
job_title
\n",
726 | "
\n",
727 | " \n",
728 | " \n",
729 | "
\n",
730 | "
3823
\n",
731 | "
The Data Scientist’s Toolbox
\n",
732 | "
In this course you will get an introduction to...
\n",
733 | "
None
\n",
734 | "
\n",
735 | "
\n",
736 | "
143
\n",
737 | "
Machine Learning
\n",
738 | "
Machine learning is the science of getting com...
\n",
739 | "
None
\n",
740 | "
\n",
741 | "
\n",
742 | "
3165
\n",
743 | "
Applied Machine Learning in Python
\n",
744 | "
This course will introduce the learner to appl...
\n",
745 | "
None
\n",
746 | "
\n",
747 | "
\n",
748 | "
3588
\n",
749 | "
Data Visualization with Python
\n",
750 | "
\"A picture is worth a thousand words\". We are ...
\n",
751 | "
None
\n",
752 | "
\n",
753 | "
\n",
754 | "
2517
\n",
755 | "
Machine Learning with Python
\n",
756 | "
This course dives into the basics of machine l...
\n",
757 | "
None
\n",
758 | "
\n",
759 | "
\n",
760 | "
545
\n",
761 | "
Databases and SQL for Data Science
\n",
762 | "
Much of the world's data resides in databases....
\n",
763 | "
None
\n",
764 | "
\n",
765 | "
\n",
766 | "
1015
\n",
767 | "
Google Cloud Platform Big Data and Machine Lea...
\n",
768 | "
This 2-week accelerated on-demand course intro...
\n",
769 | "
None
\n",
770 | "
\n",
771 | "
\n",
772 | "
4233
\n",
773 | "
Big Data Modeling and Management Systems
\n",
774 | "
Once you’ve identified a big data issue to ana...
\n",
775 | "
None
\n",
776 | "
\n",
777 | "
\n",
778 | "
3763
\n",
779 | "
Database Management Essentials
\n",
780 | "
Database Management Essentials provides the fo...
\n",
781 | "
None
\n",
782 | "
\n",
783 | "
\n",
784 | "
1311
\n",
785 | "
Data Warehouse Concepts, Design, and Data Inte...
\n",
786 | "
This is the second course in the Data Warehous...
\n",
787 | "
None
\n",
788 | "
\n",
789 | " \n",
790 | "
\n",
791 | "
"
792 | ],
793 | "text/plain": [
794 | " name \\\n",
795 | "3823 The Data Scientist’s Toolbox \n",
796 | "143 Machine Learning \n",
797 | "3165 Applied Machine Learning in Python \n",
798 | "3588 Data Visualization with Python \n",
799 | "2517 Machine Learning with Python \n",
800 | "545 Databases and SQL for Data Science \n",
801 | "1015 Google Cloud Platform Big Data and Machine Lea... \n",
802 | "4233 Big Data Modeling and Management Systems \n",
803 | "3763 Database Management Essentials \n",
804 | "1311 Data Warehouse Concepts, Design, and Data Inte... \n",
805 | "\n",
806 | " description job_title \n",
807 | "3823 In this course you will get an introduction to... None \n",
808 | "143 Machine learning is the science of getting com... None \n",
809 | "3165 This course will introduce the learner to appl... None \n",
810 | "3588 \"A picture is worth a thousand words\". We are ... None \n",
811 | "2517 This course dives into the basics of machine l... None \n",
812 | "545 Much of the world's data resides in databases.... None \n",
813 | "1015 This 2-week accelerated on-demand course intro... None \n",
814 | "4233 Once you’ve identified a big data issue to ana... None \n",
815 | "3763 Database Management Essentials provides the fo... None \n",
816 | "1311 This is the second course in the Data Warehous... None "
817 | ]
818 | },
819 | "execution_count": 141,
820 | "metadata": {},
821 | "output_type": "execute_result"
822 | }
823 | ],
824 | "source": [
825 | "sample_courses['job_title'] = None\n",
826 | "sample_courses"
827 | ]
828 | },
829 | {
830 | "cell_type": "code",
831 | "execution_count": 142,
832 | "metadata": {},
833 | "outputs": [
834 | {
835 | "data": {
836 | "text/html": [
837 | "
\n",
838 | "\n",
851 | "
\n",
852 | " \n",
853 | "
\n",
854 | "
\n",
855 | "
name
\n",
856 | "
description
\n",
857 | "
job_title
\n",
858 | "
\n",
859 | " \n",
860 | " \n",
861 | "
\n",
862 | "
3823
\n",
863 | "
The Data Scientist’s Toolbox
\n",
864 | "
In this course you will get an introduction to...
\n",
865 | "
Data Scientist
\n",
866 | "
\n",
867 | "
\n",
868 | "
143
\n",
869 | "
Machine Learning
\n",
870 | "
Machine learning is the science of getting com...
\n",
871 | "
Data Scientist
\n",
872 | "
\n",
873 | "
\n",
874 | "
3165
\n",
875 | "
Applied Machine Learning in Python
\n",
876 | "
This course will introduce the learner to appl...
\n",
877 | "
Data Scientist
\n",
878 | "
\n",
879 | "
\n",
880 | "
3588
\n",
881 | "
Data Visualization with Python
\n",
882 | "
\"A picture is worth a thousand words\". We are ...
\n",
883 | "
Data Scientist
\n",
884 | "
\n",
885 | "
\n",
886 | "
2517
\n",
887 | "
Machine Learning with Python
\n",
888 | "
This course dives into the basics of machine l...
\n",
889 | "
Data Scientist
\n",
890 | "
\n",
891 | "
\n",
892 | "
545
\n",
893 | "
Databases and SQL for Data Science
\n",
894 | "
Much of the world's data resides in databases....
\n",
895 | "
Data Engineer
\n",
896 | "
\n",
897 | "
\n",
898 | "
1015
\n",
899 | "
Google Cloud Platform Big Data and Machine Lea...
\n",
900 | "
This 2-week accelerated on-demand course intro...
\n",
901 | "
Data Engineer
\n",
902 | "
\n",
903 | "
\n",
904 | "
4233
\n",
905 | "
Big Data Modeling and Management Systems
\n",
906 | "
Once you’ve identified a big data issue to ana...
\n",
907 | "
Data Engineer
\n",
908 | "
\n",
909 | "
\n",
910 | "
3763
\n",
911 | "
Database Management Essentials
\n",
912 | "
Database Management Essentials provides the fo...
\n",
913 | "
Data Engineer
\n",
914 | "
\n",
915 | "
\n",
916 | "
1311
\n",
917 | "
Data Warehouse Concepts, Design, and Data Inte...
\n",
918 | "
This is the second course in the Data Warehous...
\n",
919 | "
Data Engineer
\n",
920 | "
\n",
921 | " \n",
922 | "
\n",
923 | "
"
924 | ],
925 | "text/plain": [
926 | " name \\\n",
927 | "3823 The Data Scientist’s Toolbox \n",
928 | "143 Machine Learning \n",
929 | "3165 Applied Machine Learning in Python \n",
930 | "3588 Data Visualization with Python \n",
931 | "2517 Machine Learning with Python \n",
932 | "545 Databases and SQL for Data Science \n",
933 | "1015 Google Cloud Platform Big Data and Machine Lea... \n",
934 | "4233 Big Data Modeling and Management Systems \n",
935 | "3763 Database Management Essentials \n",
936 | "1311 Data Warehouse Concepts, Design, and Data Inte... \n",
937 | "\n",
938 | " description job_title \n",
939 | "3823 In this course you will get an introduction to... Data Scientist \n",
940 | "143 Machine learning is the science of getting com... Data Scientist \n",
941 | "3165 This course will introduce the learner to appl... Data Scientist \n",
942 | "3588 \"A picture is worth a thousand words\". We are ... Data Scientist \n",
943 | "2517 This course dives into the basics of machine l... Data Scientist \n",
944 | "545 Much of the world's data resides in databases.... Data Engineer \n",
945 | "1015 This 2-week accelerated on-demand course intro... Data Engineer \n",
946 | "4233 Once you’ve identified a big data issue to ana... Data Engineer \n",
947 | "3763 Database Management Essentials provides the fo... Data Engineer \n",
948 | "1311 This is the second course in the Data Warehous... Data Engineer "
949 | ]
950 | },
951 | "execution_count": 142,
952 | "metadata": {},
953 | "output_type": "execute_result"
954 | }
955 | ],
956 | "source": [
957 | "sample_courses.loc[ds_courses, 'job_title'] = 'Data Scientist'\n",
958 | "sample_courses.loc[de_courses, 'job_title'] = 'Data Engineer'\n",
959 | "sample_courses"
960 | ]
961 | },
962 | {
963 | "cell_type": "code",
964 | "execution_count": 148,
965 | "metadata": {},
966 | "outputs": [
967 | {
968 | "data": {
969 | "text/html": [
970 | "
\n",
971 | "\n",
984 | "
\n",
985 | " \n",
986 | "
\n",
987 | "
\n",
988 | "
name
\n",
989 | "
description
\n",
990 | "
job_title
\n",
991 | "
course_id
\n",
992 | "
\n",
993 | " \n",
994 | " \n",
995 | "
\n",
996 | "
3823
\n",
997 | "
The Data Scientist’s Toolbox
\n",
998 | "
In this course you will get an introduction to...
\n",
999 | "
Data Scientist
\n",
1000 | "
3823
\n",
1001 | "
\n",
1002 | "
\n",
1003 | "
143
\n",
1004 | "
Machine Learning
\n",
1005 | "
Machine learning is the science of getting com...
\n",
1006 | "
Data Scientist
\n",
1007 | "
143
\n",
1008 | "
\n",
1009 | "
\n",
1010 | "
3165
\n",
1011 | "
Applied Machine Learning in Python
\n",
1012 | "
This course will introduce the learner to appl...
\n",
1013 | "
Data Scientist
\n",
1014 | "
3165
\n",
1015 | "
\n",
1016 | "
\n",
1017 | "
3588
\n",
1018 | "
Data Visualization with Python
\n",
1019 | "
\"A picture is worth a thousand words\". We are ...
\n",
1020 | "
Data Scientist
\n",
1021 | "
3588
\n",
1022 | "
\n",
1023 | "
\n",
1024 | "
2517
\n",
1025 | "
Machine Learning with Python
\n",
1026 | "
This course dives into the basics of machine l...
\n",
1027 | "
Data Scientist
\n",
1028 | "
2517
\n",
1029 | "
\n",
1030 | "
\n",
1031 | "
545
\n",
1032 | "
Databases and SQL for Data Science
\n",
1033 | "
Much of the world's data resides in databases....
\n",
1034 | "
Data Engineer
\n",
1035 | "
545
\n",
1036 | "
\n",
1037 | "
\n",
1038 | "
1015
\n",
1039 | "
Google Cloud Platform Big Data and Machine Lea...
\n",
1040 | "
This 2-week accelerated on-demand course intro...
\n",
1041 | "
Data Engineer
\n",
1042 | "
1015
\n",
1043 | "
\n",
1044 | "
\n",
1045 | "
4233
\n",
1046 | "
Big Data Modeling and Management Systems
\n",
1047 | "
Once you’ve identified a big data issue to ana...
\n",
1048 | "
Data Engineer
\n",
1049 | "
4233
\n",
1050 | "
\n",
1051 | "
\n",
1052 | "
3763
\n",
1053 | "
Database Management Essentials
\n",
1054 | "
Database Management Essentials provides the fo...
\n",
1055 | "
Data Engineer
\n",
1056 | "
3763
\n",
1057 | "
\n",
1058 | "
\n",
1059 | "
1311
\n",
1060 | "
Data Warehouse Concepts, Design, and Data Inte...
\n",
1061 | "
This is the second course in the Data Warehous...
\n",
1062 | "
Data Engineer
\n",
1063 | "
1311
\n",
1064 | "
\n",
1065 | " \n",
1066 | "
\n",
1067 | "
"
1068 | ],
1069 | "text/plain": [
1070 | " name \\\n",
1071 | "3823 The Data Scientist’s Toolbox \n",
1072 | "143 Machine Learning \n",
1073 | "3165 Applied Machine Learning in Python \n",
1074 | "3588 Data Visualization with Python \n",
1075 | "2517 Machine Learning with Python \n",
1076 | "545 Databases and SQL for Data Science \n",
1077 | "1015 Google Cloud Platform Big Data and Machine Lea... \n",
1078 | "4233 Big Data Modeling and Management Systems \n",
1079 | "3763 Database Management Essentials \n",
1080 | "1311 Data Warehouse Concepts, Design, and Data Inte... \n",
1081 | "\n",
1082 | " description job_title \\\n",
1083 | "3823 In this course you will get an introduction to... Data Scientist \n",
1084 | "143 Machine learning is the science of getting com... Data Scientist \n",
1085 | "3165 This course will introduce the learner to appl... Data Scientist \n",
1086 | "3588 \"A picture is worth a thousand words\". We are ... Data Scientist \n",
1087 | "2517 This course dives into the basics of machine l... Data Scientist \n",
1088 | "545 Much of the world's data resides in databases.... Data Engineer \n",
1089 | "1015 This 2-week accelerated on-demand course intro... Data Engineer \n",
1090 | "4233 Once you’ve identified a big data issue to ana... Data Engineer \n",
1091 | "3763 Database Management Essentials provides the fo... Data Engineer \n",
1092 | "1311 This is the second course in the Data Warehous... Data Engineer \n",
1093 | "\n",
1094 | " course_id \n",
1095 | "3823 3823 \n",
1096 | "143 143 \n",
1097 | "3165 3165 \n",
1098 | "3588 3588 \n",
1099 | "2517 2517 \n",
1100 | "545 545 \n",
1101 | "1015 1015 \n",
1102 | "4233 4233 \n",
1103 | "3763 3763 \n",
1104 | "1311 1311 "
1105 | ]
1106 | },
1107 | "execution_count": 148,
1108 | "metadata": {},
1109 | "output_type": "execute_result"
1110 | }
1111 | ],
1112 | "source": [
1113 | "sample_courses['course_id'] = sample_courses.index\n",
1114 | "sample_courses"
1115 | ]
1116 | },
1117 | {
1118 | "cell_type": "code",
1119 | "execution_count": 149,
1120 | "metadata": {},
1121 | "outputs": [],
1122 | "source": [
1123 | "sample_courses.to_csv('courses_test_sample.csv', index=False)"
1124 | ]
1125 | },
1126 | {
1127 | "cell_type": "code",
1128 | "execution_count": null,
1129 | "metadata": {},
1130 | "outputs": [],
1131 | "source": []
1132 | }
1133 | ],
1134 | "metadata": {
1135 | "kernelspec": {
1136 | "display_name": "Python 3",
1137 | "language": "python",
1138 | "name": "python3"
1139 | },
1140 | "language_info": {
1141 | "codemirror_mode": {
1142 | "name": "ipython",
1143 | "version": 3
1144 | },
1145 | "file_extension": ".py",
1146 | "mimetype": "text/x-python",
1147 | "name": "python",
1148 | "nbconvert_exporter": "python",
1149 | "pygments_lexer": "ipython3",
1150 | "version": "3.7.6"
1151 | }
1152 | },
1153 | "nbformat": 4,
1154 | "nbformat_minor": 4
1155 | }
1156 |
--------------------------------------------------------------------------------
/Exploratory Data Analysis/Job_Posts_EDA.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Job Listings EDA\n",
8 | "\n",
9 | "This notebook examines the dataset of job posts from Glassdoor. It finds that there are issues in this dataset, such as duplicate rows and adverstisements mixed in. However, because I have decided only to use a small portion of this data for testing out the recommender model, these issues will not affect the project and do not need to be fixed here."
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "# Import libraries\n",
19 | "\n",
20 | "import pandas as pd"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 2,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "name": "stdout",
30 | "output_type": "stream",
31 | "text": [
32 | "(3324, 12)\n"
33 | ]
34 | },
35 | {
36 | "data": {
37 | "text/html": [
38 | "
\n",
39 | "\n",
52 | "
\n",
53 | " \n",
54 | "
\n",
55 | "
\n",
56 | "
Job_title
\n",
57 | "
Company
\n",
58 | "
State
\n",
59 | "
City
\n",
60 | "
Min_Salary
\n",
61 | "
Max_Salary
\n",
62 | "
Job_Desc
\n",
63 | "
Industry
\n",
64 | "
Rating
\n",
65 | "
Date_Posted
\n",
66 | "
Valid_until
\n",
67 | "
Job_Type
\n",
68 | "
\n",
69 | " \n",
70 | " \n",
71 | "
\n",
72 | "
0
\n",
73 | "
Chief Marketing Officer (CMO)
\n",
74 | "
National Debt Relief
\n",
75 | "
NY
\n",
76 | "
New York
\n",
77 | "
-1
\n",
78 | "
-1
\n",
79 | "
Who We're Looking For:\\n\\nThe Chief Marketing ...
"
149 | ],
150 | "text/plain": [
151 | " Job_title \\\n",
152 | "0 Chief Marketing Officer (CMO) \n",
153 | "1 Registered Nurse \n",
154 | "2 Dental Hygienist \n",
155 | "3 Senior Salesforce Developer \n",
156 | "4 DEPUTY EXECUTIVE DIRECTOR, PROGRAM AND LEGAL A... \n",
157 | "\n",
158 | " Company State City Min_Salary \\\n",
159 | "0 National Debt Relief NY New York -1 \n",
160 | "1 Queens Boulevard Endoscopy Center NY Rego Park -1 \n",
161 | "2 Batista Dental NJ West New York -1 \n",
162 | "3 National Debt Relief NY New York 44587 \n",
163 | "4 National Advocates for Pregnant Women NY New York 125410 \n",
164 | "\n",
165 | " Max_Salary Job_Desc Industry \\\n",
166 | "0 -1 Who We're Looking For:\\n\\nThe Chief Marketing ... Finance \n",
167 | "1 -1 Queens Boulevard Endoscopy Center, an endoscop... NaN \n",
168 | "2 -1 Part-time or Full-timedental hygienist positio... NaN \n",
169 | "3 82162 Principle Duties & Responsibilities:\\n\\nAnalyz... Finance \n",
170 | "4 212901 For FULL Job Announcement, visit our website: ... NaN \n",
171 | "\n",
172 | " Rating Date_Posted Valid_until Job_Type \n",
173 | "0 4.0 2020-05-08 2020-06-07 FULL_TIME \n",
174 | "1 3.0 2020-04-25 2020-06-07 FULL_TIME \n",
175 | "2 NaN 2020-05-02 2020-06-07 PART_TIME \n",
176 | "3 4.0 2020-05-08 2020-06-07 FULL_TIME \n",
177 | "4 NaN 2020-04-28 2020-06-07 FULL_TIME "
178 | ]
179 | },
180 | "execution_count": 2,
181 | "metadata": {},
182 | "output_type": "execute_result"
183 | }
184 | ],
185 | "source": [
186 | "# Read in the dataset\n",
187 | "\n",
188 | "df = pd.read_csv('../Data/Job_Data/Glassdoor_Joblist.csv')\n",
189 | "print(df.shape)\n",
190 | "df.head()"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": 3,
196 | "metadata": {},
197 | "outputs": [
198 | {
199 | "data": {
200 | "text/plain": [
201 | "Job_title 0\n",
202 | "Company 0\n",
203 | "State 2\n",
204 | "City 6\n",
205 | "Min_Salary 0\n",
206 | "Max_Salary 0\n",
207 | "Job_Desc 0\n",
208 | "Industry 624\n",
209 | "Rating 475\n",
210 | "Date_Posted 0\n",
211 | "Valid_until 0\n",
212 | "Job_Type 0\n",
213 | "dtype: int64"
214 | ]
215 | },
216 | "execution_count": 3,
217 | "metadata": {},
218 | "output_type": "execute_result"
219 | }
220 | ],
221 | "source": [
222 | "# Check for missing values:\n",
223 | "# No missing values in key columns of job title and description.\n",
224 | "\n",
225 | "df.isna().sum()"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 4,
231 | "metadata": {},
232 | "outputs": [
233 | {
234 | "data": {
235 | "text/plain": [
236 | "Data Scientist 186\n",
237 | "Data Engineer 129\n",
238 | "Data Analyst 69\n",
239 | "Senior Data Engineer 44\n",
240 | "Senior Data Scientist 39\n",
241 | " ... \n",
242 | "Spectral Research Scientist with Security Clearance 1\n",
243 | "Senior Medical Scientist 1\n",
244 | "Senior Scientist, Oncology BioMarker Development 1\n",
245 | "Data Scientist, AMP Commerce/ Payments/ Subscription Analytics 1\n",
246 | "Innovation - Data Science Manager 1\n",
247 | "Name: Job_title, Length: 1619, dtype: int64"
248 | ]
249 | },
250 | "execution_count": 4,
251 | "metadata": {},
252 | "output_type": "execute_result"
253 | }
254 | ],
255 | "source": [
256 | "# Examine the key category of job title:\n",
257 | "# might need to consolidate these; leave it for now...\n",
258 | "\n",
259 | "df['Job_title'].value_counts()"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": 5,
265 | "metadata": {},
266 | "outputs": [
267 | {
268 | "data": {
269 | "text/plain": [
270 | "'Queens Boulevard Endoscopy Center, an endoscopy ASC located in Rego Park, has an exciting opportunity for Full-Time Registered Nurse! Successful candidates will provide quality nursing care in all areas of the Center including pre-assessment, pre-op and pacu Qualified candidates must possess the following:\\n\\nCurrent NY state RN license\\nBLS Certification, ACLS preferred\\nMust be a team-player with excellent multi-tasking and interpersonal skills\\nCompassion for patient needs and a high degree of professionalism\\nChinese Speaking and Spanish Preferred\\n\\nQueens Boulevard Endoscopy Center offers a pleasant professional work environment and no evening or holiday work hours. Drug-free work environment and EOE.'"
271 | ]
272 | },
273 | "execution_count": 5,
274 | "metadata": {},
275 | "output_type": "execute_result"
276 | }
277 | ],
278 | "source": [
279 | "# Examine an example of a job description:\n",
280 | "# Other than \\n line breaks, the text is pretty clean.\n",
281 | "\n",
282 | "df['Job_Desc'][1]"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": 18,
288 | "metadata": {},
289 | "outputs": [
290 | {
291 | "data": {
292 | "text/html": [
293 | "
\n",
294 | "\n",
307 | "
\n",
308 | " \n",
309 | "
\n",
310 | "
\n",
311 | "
Job_title
\n",
312 | "
Company
\n",
313 | "
State
\n",
314 | "
City
\n",
315 | "
Min_Salary
\n",
316 | "
Max_Salary
\n",
317 | "
Job_Desc
\n",
318 | "
Industry
\n",
319 | "
Rating
\n",
320 | "
Date_Posted
\n",
321 | "
Valid_until
\n",
322 | "
Job_Type
\n",
323 | "
\n",
324 | " \n",
325 | " \n",
326 | "
\n",
327 | "
901
\n",
328 | "
Data Scientist
\n",
329 | "
GovTech
\n",
330 | "
CA
\n",
331 | "
San Francisco
\n",
332 | "
78594
\n",
333 | "
147225
\n",
334 | "
We are looking for Data Scientists who are int...
\n",
335 | "
Government
\n",
336 | "
3.6
\n",
337 | "
2020-05-01
\n",
338 | "
2020-06-05
\n",
339 | "
FULL_TIME
\n",
340 | "
\n",
341 | "
\n",
342 | "
902
\n",
343 | "
Data Scientist
\n",
344 | "
Triplebyte
\n",
345 | "
CA
\n",
346 | "
San Francisco
\n",
347 | "
145000
\n",
348 | "
225000
\n",
349 | "
This company is in a hiring surge in response ...
\n",
350 | "
Information Technology
\n",
351 | "
3.6
\n",
352 | "
2020-04-28
\n",
353 | "
2020-06-05
\n",
354 | "
FULL_TIME
\n",
355 | "
\n",
356 | "
\n",
357 | "
903
\n",
358 | "
Data Scientist
\n",
359 | "
Notion Labs
\n",
360 | "
CA
\n",
361 | "
San Francisco
\n",
362 | "
105765
\n",
363 | "
142959
\n",
364 | "
So, what will you do as a Data Scientist at No...
\n",
365 | "
Information Technology
\n",
366 | "
5.0
\n",
367 | "
2020-05-04
\n",
368 | "
2020-06-05
\n",
369 | "
FULL_TIME
\n",
370 | "
\n",
371 | "
\n",
372 | "
904
\n",
373 | "
Data Scientist
\n",
374 | "
Seen by Indeed
\n",
375 | "
CA
\n",
376 | "
San Francisco
\n",
377 | "
110377
\n",
378 | "
143329
\n",
379 | "
With one application you can be considered for...
\n",
380 | "
NaN
\n",
381 | "
NaN
\n",
382 | "
2020-04-25
\n",
383 | "
2020-06-05
\n",
384 | "
FULL_TIME
\n",
385 | "
\n",
386 | "
\n",
387 | "
907
\n",
388 | "
Data Scientist
\n",
389 | "
Formation
\n",
390 | "
CA
\n",
391 | "
San Francisco
\n",
392 | "
119642
\n",
393 | "
135250
\n",
394 | "
Formation provides personalization for the lar...
\n",
395 | "
Information Technology
\n",
396 | "
3.1
\n",
397 | "
2020-04-29
\n",
398 | "
2020-06-05
\n",
399 | "
FULL_TIME
\n",
400 | "
\n",
401 | "
\n",
402 | "
909
\n",
403 | "
Data Scientist
\n",
404 | "
Duetto
\n",
405 | "
CA
\n",
406 | "
San Francisco
\n",
407 | "
108809
\n",
408 | "
173353
\n",
409 | "
We are an ambitious, well-funded, high-growth ...
\n",
410 | "
Information Technology
\n",
411 | "
4.4
\n",
412 | "
2020-04-24
\n",
413 | "
2020-06-05
\n",
414 | "
FULL_TIME
\n",
415 | "
\n",
416 | "
\n",
417 | "
910
\n",
418 | "
Data Scientist
\n",
419 | "
Demandbase
\n",
420 | "
CA
\n",
421 | "
San Francisco
\n",
422 | "
148171
\n",
423 | "
160387
\n",
424 | "
The world's largest and fastest-growing compan...
\n",
425 | "
Information Technology
\n",
426 | "
4.5
\n",
427 | "
2020-04-29
\n",
428 | "
2020-06-05
\n",
429 | "
FULL_TIME
\n",
430 | "
\n",
431 | "
\n",
432 | "
916
\n",
433 | "
Data Scientist
\n",
434 | "
Centraprise
\n",
435 | "
CA
\n",
436 | "
Foster City
\n",
437 | "
116415
\n",
438 | "
143186
\n",
439 | "
\\nRole: Data Scientist.\\n\\nLocation: Foster Ci...
\n",
440 | "
Information Technology
\n",
441 | "
4.2
\n",
442 | "
2020-05-02
\n",
443 | "
2020-06-05
\n",
444 | "
FULL_TIME
\n",
445 | "
\n",
446 | "
\n",
447 | "
918
\n",
448 | "
Data Scientist
\n",
449 | "
CyberCoders
\n",
450 | "
CA
\n",
451 | "
San Francisco
\n",
452 | "
-1
\n",
453 | "
-1
\n",
454 | "
Data Scientist \\nJob Title: Data ScientistLoca...
\n",
455 | "
Business Services
\n",
456 | "
4.1
\n",
457 | "
2020-05-06
\n",
458 | "
2020-06-05
\n",
459 | "
FULL_TIME
\n",
460 | "
\n",
461 | "
\n",
462 | "
920
\n",
463 | "
Data Scientist
\n",
464 | "
Upstart
\n",
465 | "
CA
\n",
466 | "
San Mateo
\n",
467 | "
124204
\n",
468 | "
139717
\n",
469 | "
Upstart is the leading AI lending platform par...
\n",
470 | "
Finance
\n",
471 | "
4.0
\n",
472 | "
2020-04-24
\n",
473 | "
2020-06-05
\n",
474 | "
FULL_TIME
\n",
475 | "
\n",
476 | " \n",
477 | "
\n",
478 | "
"
479 | ],
480 | "text/plain": [
481 | " Job_title Company State City Min_Salary \\\n",
482 | "901 Data Scientist GovTech CA San Francisco 78594 \n",
483 | "902 Data Scientist Triplebyte CA San Francisco 145000 \n",
484 | "903 Data Scientist Notion Labs CA San Francisco 105765 \n",
485 | "904 Data Scientist Seen by Indeed CA San Francisco 110377 \n",
486 | "907 Data Scientist Formation CA San Francisco 119642 \n",
487 | "909 Data Scientist Duetto CA San Francisco 108809 \n",
488 | "910 Data Scientist Demandbase CA San Francisco 148171 \n",
489 | "916 Data Scientist Centraprise CA Foster City 116415 \n",
490 | "918 Data Scientist CyberCoders CA San Francisco -1 \n",
491 | "920 Data Scientist Upstart CA San Mateo 124204 \n",
492 | "\n",
493 | " Max_Salary Job_Desc \\\n",
494 | "901 147225 We are looking for Data Scientists who are int... \n",
495 | "902 225000 This company is in a hiring surge in response ... \n",
496 | "903 142959 So, what will you do as a Data Scientist at No... \n",
497 | "904 143329 With one application you can be considered for... \n",
498 | "907 135250 Formation provides personalization for the lar... \n",
499 | "909 173353 We are an ambitious, well-funded, high-growth ... \n",
500 | "910 160387 The world's largest and fastest-growing compan... \n",
501 | "916 143186 \\nRole: Data Scientist.\\n\\nLocation: Foster Ci... \n",
502 | "918 -1 Data Scientist \\nJob Title: Data ScientistLoca... \n",
503 | "920 139717 Upstart is the leading AI lending platform par... \n",
504 | "\n",
505 | " Industry Rating Date_Posted Valid_until Job_Type \n",
506 | "901 Government 3.6 2020-05-01 2020-06-05 FULL_TIME \n",
507 | "902 Information Technology 3.6 2020-04-28 2020-06-05 FULL_TIME \n",
508 | "903 Information Technology 5.0 2020-05-04 2020-06-05 FULL_TIME \n",
509 | "904 NaN NaN 2020-04-25 2020-06-05 FULL_TIME \n",
510 | "907 Information Technology 3.1 2020-04-29 2020-06-05 FULL_TIME \n",
511 | "909 Information Technology 4.4 2020-04-24 2020-06-05 FULL_TIME \n",
512 | "910 Information Technology 4.5 2020-04-29 2020-06-05 FULL_TIME \n",
513 | "916 Information Technology 4.2 2020-05-02 2020-06-05 FULL_TIME \n",
514 | "918 Business Services 4.1 2020-05-06 2020-06-05 FULL_TIME \n",
515 | "920 Finance 4.0 2020-04-24 2020-06-05 FULL_TIME "
516 | ]
517 | },
518 | "execution_count": 18,
519 | "metadata": {},
520 | "output_type": "execute_result"
521 | }
522 | ],
523 | "source": [
524 | "df.loc[df['Job_title'] == 'Data Scientist'].head(10)"
525 | ]
526 | },
527 | {
528 | "cell_type": "code",
529 | "execution_count": 22,
530 | "metadata": {},
531 | "outputs": [
532 | {
533 | "data": {
534 | "text/plain": [
535 | "'The world\\'s largest and fastest-growing companies such as Accenture, Adobe, DocuSign and Salesforce rely on Demandbase to drive their Account-Based Marketing strategy and maximize their B2B marketing performance. We pioneered the ABM category nearly a decade ago, and today we lead the category as an indispensable part of the B2B MarTech stack. Our achievements and innovation would not be possible without the driven and collaborative teams here at Demandbase. As a company, we\\'re as committed to growing careers as we are to building word-class technology. We invest heavily in people, our culture and the community around us, and have continuously been recognized as one of the best places to work in the Bay Area.\\n\\nDemandbase is currently looking for a Staff Data Scientist to develop ground-breaking insights from our data sets and create a completely new way of data-driven thinking in B2B marketing —providing Sales and Marketing users with unique approaches for account-based advertising and web engagement.\\n\\nAs a Staff Data Scientist, you\\'ll be responsible for developing and testing hypotheses on behavioral responses in B2B marketing, creating models that extract data from, among others, website, digital advertising, and CRM solutions into actionable insights, and defining leading edge thinking on how analytical frameworks can be applied to predictive marketing. You\\'ll engage closely with product managers, engineers, customers and others to turn your models into products that delight customers and create \"A-HA\" moments. You will engage with industry peers and experts and showcase your findings (of course while maintaining company and client confidentiality!). You are both hands-on and strategic—with both a broad ecosystem-level understanding of our market space and the ability to work closely with engineering and product teams to deliver software in an iterative, continual-release environment. This is a high-visibility position involving close collaboration across functional groups and with executive stakeholders at customers like the above.\\n\\nWhat you\\'ll be doing:\\n\\nOwn: Be the functional owner of the Data Science role\\nFrame: Use data and insights to explore questions our customers and product team can and should be asking but never asked before.\\nDefine: Work with customers and internal stakeholders to define hypotheses and models, and with engineering teams to define productionalization of data science system\\nDocument: Write clear, concise descriptions of how insights can be converted into repeatable actions.\\nBuild: Write robust machine learning pipelines and data science systems that interface with production infrastructure and APIs\\nTest: Continually test your models and refine assumptions, data sources and more.\\nDrive: Work to spread understanding and buy-in among all stakeholders at all levels.\\nOther duties as assigned\\n\\nWhat we\\'re looking for:\\n\\n2-4 years of data science experience—you have driven more than one greenfield project from concept to production release\\nStrong quantitative and data analysis abilities (statistics, engineering, or financial academic background preferred)—making data actionable must be your thing!\\nGood working knowledge of Spark is a must (we use Scala heavily)\\nAny experience with Google Cloud (especially BQML) and AWS is a huge plus.\\nExperience defining products & solutions containing large data sets from diverse sources— preferably in sales and/or marketing situations.\\nPrior experience in the marketing or sales analytics/data science space desired\\nKnowledge of web site, digital marketing, and CRM technologies and companies a big plus\\n\\nOther important qualities:\\n\\nYou are perfectly comfortable working in a fast paced, market making environment\\nYou love data and data visualization—you love making data actionable for customers\\nYou are a driver and a doer\\nYou are truly passionate about asking and answering questions – some never asked before\\nYou have a strong sense of ownership for the products you help build\\n\\nBenefits:\\n\\nOur benefits include 100% paid for Medical, Dental and Vision for you and your entire family, 100% paid for short-term and long-term disability, 100% paid for life insurance, 401k, flexible vacation\\n\\nAbout Demandbase:\\n\\nDemandbase is the leader in Account-Based Marketing (ABM) and an indispensable part of the B2B tech stack. The company offers the only end-to-end ABM platform that helps B2B marketers identify, engage, close and measure progress against best-fit accounts. The biggest and fastest growing companies in the world, such as Accenture, Adobe, DocuSign, GE, Salesforce and others rely on Demandbase to drive their ABM strategy and maximize their marketing performance. The company has been named to the JMP Securities list \"The Hot 100: The Best Privately Held Software Companies,\" the Deloitte Fast 500 and named a Gartner Cool Vendor for Tech Go-To Market. In 2019, Demandbase executives authored the definitive book on ABM, Account-Based Marketing: How to Target and Engage the Companies That Will Grow Your Revenue. For more information, please visit www.demandbase.com or follow the company on Twitter @Demandbase.'"
536 | ]
537 | },
538 | "execution_count": 22,
539 | "metadata": {},
540 | "output_type": "execute_result"
541 | }
542 | ],
543 | "source": [
544 | "df['Job_Desc'][910]"
545 | ]
546 | },
547 | {
548 | "cell_type": "code",
549 | "execution_count": 23,
550 | "metadata": {},
551 | "outputs": [
552 | {
553 | "data": {
554 | "text/html": [
555 | "
\n",
556 | "\n",
569 | "
\n",
570 | " \n",
571 | "
\n",
572 | "
\n",
573 | "
Job_title
\n",
574 | "
Company
\n",
575 | "
State
\n",
576 | "
City
\n",
577 | "
Min_Salary
\n",
578 | "
Max_Salary
\n",
579 | "
Job_Desc
\n",
580 | "
Industry
\n",
581 | "
Rating
\n",
582 | "
Date_Posted
\n",
583 | "
Valid_until
\n",
584 | "
Job_Type
\n",
585 | "
\n",
586 | " \n",
587 | " \n",
588 | "
\n",
589 | "
935
\n",
590 | "
Data Engineer
\n",
591 | "
Rocket Lawyer
\n",
592 | "
CA
\n",
593 | "
San Francisco
\n",
594 | "
116784
\n",
595 | "
118008
\n",
596 | "
About Rocket LawyerWe believe everyone deserve...
\n",
597 | "
Information Technology
\n",
598 | "
3.5
\n",
599 | "
2020-04-23
\n",
600 | "
2020-06-05
\n",
601 | "
FULL_TIME
\n",
602 | "
\n",
603 | "
\n",
604 | "
1015
\n",
605 | "
Data Engineer
\n",
606 | "
Seen by Indeed
\n",
607 | "
CA
\n",
608 | "
San Francisco
\n",
609 | "
100959
\n",
610 | "
124595
\n",
611 | "
With one application you can be considered for...
\n",
612 | "
NaN
\n",
613 | "
NaN
\n",
614 | "
2020-04-25
\n",
615 | "
2020-06-05
\n",
616 | "
FULL_TIME
\n",
617 | "
\n",
618 | "
\n",
619 | "
1068
\n",
620 | "
Data Engineer
\n",
621 | "
Ginger
\n",
622 | "
CA
\n",
623 | "
San Francisco
\n",
624 | "
102913
\n",
625 | "
155464
\n",
626 | "
Our mission is to create a world where mental ...
\n",
627 | "
Health Care
\n",
628 | "
4.1
\n",
629 | "
2020-04-29
\n",
630 | "
2020-06-05
\n",
631 | "
FULL_TIME
\n",
632 | "
\n",
633 | "
\n",
634 | "
1081
\n",
635 | "
Data Engineer
\n",
636 | "
Allstate
\n",
637 | "
CA
\n",
638 | "
San Francisco
\n",
639 | "
97656
\n",
640 | "
112456
\n",
641 | "
Avail is a new car sharing platform focused on...
\n",
642 | "
Insurance
\n",
643 | "
3.4
\n",
644 | "
2020-05-06
\n",
645 | "
2020-06-05
\n",
646 | "
FULL_TIME
\n",
647 | "
\n",
648 | "
\n",
649 | "
1089
\n",
650 | "
Data Engineer
\n",
651 | "
CyberCoders
\n",
652 | "
CA
\n",
653 | "
San Francisco
\n",
654 | "
-1
\n",
655 | "
-1
\n",
656 | "
Data Engineer \\nIf you are a Data Engineer wit...
\n",
657 | "
Business Services
\n",
658 | "
4.1
\n",
659 | "
2020-05-02
\n",
660 | "
2020-06-05
\n",
661 | "
FULL_TIME
\n",
662 | "
\n",
663 | "
\n",
664 | "
1100
\n",
665 | "
Data Engineer
\n",
666 | "
Prabhav Services Inc
\n",
667 | "
CA
\n",
668 | "
San Francisco
\n",
669 | "
-1
\n",
670 | "
-1
\n",
671 | "
Prabhav Services Inc. is one of the premier pr...
\n",
672 | "
Information Technology
\n",
673 | "
4.6
\n",
674 | "
2020-05-06
\n",
675 | "
2020-06-05
\n",
676 | "
FULL_TIME
\n",
677 | "
\n",
678 | "
\n",
679 | "
1105
\n",
680 | "
Data Engineer
\n",
681 | "
Skupos
\n",
682 | "
CA
\n",
683 | "
San Francisco
\n",
684 | "
83068
\n",
685 | "
99451
\n",
686 | "
About Skupos\\nSkupos is the data platform for ...
\n",
687 | "
Information Technology
\n",
688 | "
5.0
\n",
689 | "
2020-04-24
\n",
690 | "
2020-06-05
\n",
691 | "
FULL_TIME
\n",
692 | "
\n",
693 | "
\n",
694 | "
1140
\n",
695 | "
Data Engineer
\n",
696 | "
Modern Health
\n",
697 | "
CA
\n",
698 | "
San Francisco
\n",
699 | "
100959
\n",
700 | "
124595
\n",
701 | "
Modern Health-Modern Health is a mental health...
\n",
702 | "
Information Technology
\n",
703 | "
5.0
\n",
704 | "
2020-04-30
\n",
705 | "
2020-06-05
\n",
706 | "
FULL_TIME
\n",
707 | "
\n",
708 | "
\n",
709 | "
1144
\n",
710 | "
Data Engineer
\n",
711 | "
Zypmedia
\n",
712 | "
CA
\n",
713 | "
San Francisco
\n",
714 | "
99278
\n",
715 | "
122333
\n",
716 | "
Data Engineer\\n\\nZypMedia has built an enterpr...
\n",
717 | "
Business Services
\n",
718 | "
4.2
\n",
719 | "
2020-05-01
\n",
720 | "
2020-06-05
\n",
721 | "
FULL_TIME
\n",
722 | "
\n",
723 | "
\n",
724 | "
1165
\n",
725 | "
Data Engineer
\n",
726 | "
DotSolved Systems, Inc.
\n",
727 | "
CA
\n",
728 | "
San Francisco
\n",
729 | "
-1
\n",
730 | "
-1
\n",
731 | "
Data Engineer Minimum 7- 8 years experience No...
\n",
732 | "
Information Technology
\n",
733 | "
4.9
\n",
734 | "
2020-05-05
\n",
735 | "
2020-06-05
\n",
736 | "
FULL_TIME
\n",
737 | "
\n",
738 | " \n",
739 | "
\n",
740 | "
"
741 | ],
742 | "text/plain": [
743 | " Job_title Company State City Min_Salary \\\n",
744 | "935 Data Engineer Rocket Lawyer CA San Francisco 116784 \n",
745 | "1015 Data Engineer Seen by Indeed CA San Francisco 100959 \n",
746 | "1068 Data Engineer Ginger CA San Francisco 102913 \n",
747 | "1081 Data Engineer Allstate CA San Francisco 97656 \n",
748 | "1089 Data Engineer CyberCoders CA San Francisco -1 \n",
749 | "1100 Data Engineer Prabhav Services Inc CA San Francisco -1 \n",
750 | "1105 Data Engineer Skupos CA San Francisco 83068 \n",
751 | "1140 Data Engineer Modern Health CA San Francisco 100959 \n",
752 | "1144 Data Engineer Zypmedia CA San Francisco 99278 \n",
753 | "1165 Data Engineer DotSolved Systems, Inc. CA San Francisco -1 \n",
754 | "\n",
755 | " Max_Salary Job_Desc \\\n",
756 | "935 118008 About Rocket LawyerWe believe everyone deserve... \n",
757 | "1015 124595 With one application you can be considered for... \n",
758 | "1068 155464 Our mission is to create a world where mental ... \n",
759 | "1081 112456 Avail is a new car sharing platform focused on... \n",
760 | "1089 -1 Data Engineer \\nIf you are a Data Engineer wit... \n",
761 | "1100 -1 Prabhav Services Inc. is one of the premier pr... \n",
762 | "1105 99451 About Skupos\\nSkupos is the data platform for ... \n",
763 | "1140 124595 Modern Health-Modern Health is a mental health... \n",
764 | "1144 122333 Data Engineer\\n\\nZypMedia has built an enterpr... \n",
765 | "1165 -1 Data Engineer Minimum 7- 8 years experience No... \n",
766 | "\n",
767 | " Industry Rating Date_Posted Valid_until Job_Type \n",
768 | "935 Information Technology 3.5 2020-04-23 2020-06-05 FULL_TIME \n",
769 | "1015 NaN NaN 2020-04-25 2020-06-05 FULL_TIME \n",
770 | "1068 Health Care 4.1 2020-04-29 2020-06-05 FULL_TIME \n",
771 | "1081 Insurance 3.4 2020-05-06 2020-06-05 FULL_TIME \n",
772 | "1089 Business Services 4.1 2020-05-02 2020-06-05 FULL_TIME \n",
773 | "1100 Information Technology 4.6 2020-05-06 2020-06-05 FULL_TIME \n",
774 | "1105 Information Technology 5.0 2020-04-24 2020-06-05 FULL_TIME \n",
775 | "1140 Information Technology 5.0 2020-04-30 2020-06-05 FULL_TIME \n",
776 | "1144 Business Services 4.2 2020-05-01 2020-06-05 FULL_TIME \n",
777 | "1165 Information Technology 4.9 2020-05-05 2020-06-05 FULL_TIME "
778 | ]
779 | },
780 | "execution_count": 23,
781 | "metadata": {},
782 | "output_type": "execute_result"
783 | }
784 | ],
785 | "source": [
786 | "df.loc[df['Job_title'] == 'Data Engineer'].head(10)"
787 | ]
788 | },
789 | {
790 | "cell_type": "code",
791 | "execution_count": 24,
792 | "metadata": {},
793 | "outputs": [
794 | {
795 | "data": {
796 | "text/plain": [
797 | "\"Our mission is to create a world where mental health is never an obstacle.\\n\\nGinger is transforming how behavioral healthcare is delivered by making it easy for people to get the support they need, when they need it, through on-demand coaching, teletherapy, telepsychiatry, and guided self-care.\\n\\nBusinesses purchase Ginger for their employee's or member's benefit. Through Ginger's secure mobile app, people around the world receive immediate emotional support that's personalized to their needs, and completely confidential. Ginger's high-quality, team-based care works — 70% of people show significant improvement after 12 weeks of using Ginger.\\n\\nAt Ginger, people are at the heart of what we do. We believe that diverse and inclusive teams make our company better. Teams with individuals that bring different perspectives to challenges are more innovative, collaborative, and create better solutions. We're building a workplace that actively embraces a diversity of people, ideas, talents, and experiences. Come join us!\\n\\nFast Company called Ginger one of The World's Top 10 Most Innovative Companies in Healthcare and the World Economic Forum named us a Technology Pioneer.\\n\\nAbout the Role:\\n\\nAt Ginger, we aim to provide better mental health care to humanity at a scale larger than has ever been possible before. This is no small task and as an expanding team we are working on a number of initiatives to achieve this, including aggressively building tools to simultaneously grow our reach and improve quality of care.\\n\\nWhat You'll Do:\\n\\nStanding at the center of multiple teams (data science, engineering) and core systems, you'll..\\n\\n\\nOpen up our data to uncover important patterns at the level of individuals and sub-populations.\\nSurface, serve and persist key actionable insights in mental health, healthy habit formation, goal pursuit, and care efficacy.\\nHelp us scale our services using modern distributed processing tools and GPUs in the cloud (AWS)\\nCollaborate with product to ideate and unlock features which derive as much actionable information from our data (text, media, activity etc) as possible.\\nHelp architect systems for near-real-time delivery of recommendations, care insights and other time-critical information to coaches and members.\\nDesign lightweight data schemas appropriate for storing, organizing and joining processed communication and care analytics.\\nDevise the tooling that takes us from algorithm prototype to production and can track data/model lineage and statistical drift through time.\\nDevelop pipelines that efficiently and reliably route output of machine learning algorithms to consumer processes and persistence mechanisms.\\nOwn operational scalability of our algorithms, systems and data models.\\nStand up infrastructure for optimal extraction, transformation, and loading of data from a wide variety of data sources using SQL, Python and AWS tools.\\nWork with a variety of stakeholders including the Data, Product, Engineering, Security and Executive teams to support their data accessibility needs.\\n\\nNecessary Skills:\\n\\nDatabases SQL/NoSQL 4+ years\\nCloud platform experience 3+ years\\nSQL 4+ years\\nSchema design 2+ years\\nAmazon Web Services (AWS) 2+ years\\nDeployment pipelines 2+ years\\nPython 2+ years\\nDeploying to production systems with active customers 2+ years\\nDistributed computing (e.g Spark, Hadoop etc.) 3+ years\\nInfrastructure monitoring 1+ years\\nWide variety of data warehouse, data lake (s3) etc familiarity\\nAnalytics experience working with structured and unstructured data\\nProject lead (self-managing) 1+ years\\nBachelors in technical field or experiential equivalent\\n\\nIdeal Skills:\\n\\nAmazon Web Services (AWS) 3+ years\\nAWS Lambda, Sagemaker\\nDocker / Kubernetes\\nDB performance engineering\\nMachine Learning (ML) 1+ years\\nRunning ML on GPUs 1+ years\\nPython 3+ years\\nStrong analytics intuition grounded in significant experience\\nExperience in the healthcare space\\nMasters in technical field or experiential equivalent\\n\""
798 | ]
799 | },
800 | "execution_count": 24,
801 | "metadata": {},
802 | "output_type": "execute_result"
803 | }
804 | ],
805 | "source": [
806 | "df['Job_Desc'][1068]"
807 | ]
808 | },
809 | {
810 | "cell_type": "code",
811 | "execution_count": null,
812 | "metadata": {},
813 | "outputs": [],
814 | "source": []
815 | }
816 | ],
817 | "metadata": {
818 | "kernelspec": {
819 | "display_name": "Python 3",
820 | "language": "python",
821 | "name": "python3"
822 | },
823 | "language_info": {
824 | "codemirror_mode": {
825 | "name": "ipython",
826 | "version": 3
827 | },
828 | "file_extension": ".py",
829 | "mimetype": "text/x-python",
830 | "name": "python",
831 | "nbconvert_exporter": "python",
832 | "pygments_lexer": "ipython3",
833 | "version": "3.7.3"
834 | }
835 | },
836 | "nbformat": 4,
837 | "nbformat_minor": 4
838 | }
839 |
--------------------------------------------------------------------------------
/Exploratory Data Analysis/README.md:
--------------------------------------------------------------------------------
1 | # Exploratory Data Analysis
2 |
3 | In this Module, Test Datasets have been created to check out the efficiency of our Model. This notebook `Job_Posts_EDA.ipynb` examines the dataset of
4 | job posts from Glassdoor. It finds that there are issues in this dataset, such as duplicate rows and adverstisements mixed in. However, because
5 | only a small portion of this data is used for testing out the recommender model, these issues will not affect the project and do not need to be fixed here.
6 |
7 | Later a small set of labeled data is taken in `Create_Test_Set.ipynb` that can be used to test the Doc2Vec model. Specifically, 10 sample job descriptions under 2 job titles (data scientist and data
8 | engineer) are selected. It matches each of these 2 job titles with 5 courses that I believe the model should recommend.
9 | This sample data will then be used to test the accuracy of the model.
10 |
11 |
--------------------------------------------------------------------------------
/Exploratory Data Analysis/courses_test_sample.csv:
--------------------------------------------------------------------------------
1 | name,description,job_title,course_id
2 | The Data Scientist’s Toolbox,"In this course you will get an introduction to the main tools and ideas in the data scientist's toolbox. The course gives an overview of the data, questions, and tools that data analysts and data scientists work with. There are two components to this course. The first is a conceptual introduction to the ideas behind turning data into actionable knowledge. The second is a practical introduction to the tools that will be used in the program like version control, markdown, git, GitHub, R, and RStudio.",Data Scientist,3823
3 | Machine Learning,"Machine learning is the science of getting computers to act without being explicitly programmed. In the past decade, machine learning has given us self-driving cars, practical speech recognition, effective web search, and a vastly improved understanding of the human genome. Machine learning is so pervasive today that you probably use it dozens of times a day without knowing it. Many researchers also think it is the best way to make progress towards human-level AI. In this class, you will learn about the most effective machine learning techniques, and gain practice implementing them and getting them to work for yourself. More importantly, you'll learn about not only the theoretical underpinnings of learning, but also gain the practical know-how needed to quickly and powerfully apply these techniques to new problems. Finally, you'll learn about some of Silicon Valley's best practices in innovation as it pertains to machine learning and AI.
4 |
5 | This course provides a broad introduction to machine learning, datamining, and statistical pattern recognition. Topics include: (i) Supervised learning (parametric/non-parametric algorithms, support vector machines, kernels, neural networks). (ii) Unsupervised learning (clustering, dimensionality reduction, recommender systems, deep learning). (iii) Best practices in machine learning (bias/variance theory; innovation process in machine learning and AI). The course will also draw from numerous case studies and applications, so that you'll also learn how to apply learning algorithms to building smart robots (perception, control), text understanding (web search, anti-spam), computer vision, medical informatics, audio, database mining, and other areas.",Data Scientist,143
6 | Applied Machine Learning in Python,"This course will introduce the learner to applied machine learning, focusing more on the techniques and methods than on the statistics behind these methods. The course will start with a discussion of how machine learning is different than descriptive statistics, and introduce the scikit learn toolkit through a tutorial. The issue of dimensionality of data will be discussed, and the task of clustering data, as well as evaluating those clusters, will be tackled. Supervised approaches for creating predictive models will be described, and learners will be able to apply the scikit learn predictive modelling methods while understanding process issues related to data generalizability (e.g. cross validation, overfitting). The course will end with a look at more advanced techniques, such as building ensembles, and practical limitations of predictive models. By the end of this course, students will be able to identify the difference between a supervised (classification) and unsupervised (clustering) technique, identify which technique they need to apply for a particular dataset and need, engineer features to meet that need, and write python code to carry out an analysis.
7 |
8 | This course should be taken after Introduction to Data Science in Python and Applied Plotting, Charting & Data Representation in Python and before Applied Text Mining in Python and Applied Social Analysis in Python.",Data Scientist,3165
9 | Data Visualization with Python,"""A picture is worth a thousand words"". We are all familiar with this expression. It especially applies when trying to explain the insight obtained from the analysis of increasingly large datasets. Data visualization plays an essential role in the representation of both small and large-scale data.
10 |
11 | One of the key skills of a data scientist is the ability to tell a compelling story, visualizing data and findings in an approachable and stimulating way. Learning how to leverage a software tool to visualize data will also enable you to extract information, better understand the data, and make more effective decisions.
12 |
13 | The main goal of this Data Visualization with Python course is to teach you how to take data that at first glance has little meaning and present that data in a form that makes sense to people. Various techniques have been developed for presenting data visually but in this course, we will be using several data visualization libraries in Python, namely Matplotlib, Seaborn, and Folium.
14 |
15 | LIMITED TIME OFFER: Subscription is only $39 USD per month for access to graded materials and a certificate.",Data Scientist,3588
16 | Machine Learning with Python,"This course dives into the basics of machine learning using an approachable, and well-known programming language, Python.
17 | In this course, we will be reviewing two main components:
18 | First, you will be learning about the purpose of Machine Learning and where it applies to the real world.
19 | Second, you will get a general overview of Machine Learning topics such as supervised vs unsupervised learning, model evaluation, and Machine Learning algorithms.
20 |
21 | In this course, you practice with real-life examples of Machine learning and see how it affects society in ways you may not have guessed!
22 |
23 | By just putting in a few hours a week for the next few weeks, this is what you’ll get.
24 | 1) New skills to add to your resume, such as regression, classification, clustering, sci-kit learn and SciPy
25 | 2) New projects that you can add to your portfolio, including cancer detection, predicting economic trends, predicting customer churn, recommendation engines, and many more.
26 | 3) And a certificate in machine learning to prove your competency, and share it anywhere you like online or offline, such as LinkedIn profiles and social media.
27 |
28 | If you choose to take this course and earn the Coursera course certificate, you will also earn an IBM digital badge upon successful completion of the course.",Data Scientist,2517
29 | Databases and SQL for Data Science,"Much of the world's data resides in databases. SQL (or Structured Query Language) is a powerful language which is used for communicating with and extracting data from databases. A working knowledge of databases and SQL is a must if you want to become a data scientist.
30 |
31 | The purpose of this course is to introduce relational database concepts and help you learn and apply foundational knowledge of the SQL language. It is also intended to get you started with performing SQL access in a data science environment.
32 |
33 | The emphasis in this course is on hands-on and practical learning . As such, you will work with real databases, real data science tools, and real-world datasets. You will create a database instance in the cloud. Through a series of hands-on labs you will practice building and running SQL queries. You will also learn how to access databases from Jupyter notebooks using SQL and Python.
34 |
35 | No prior knowledge of databases, SQL, Python, or programming is required.
36 |
37 | Anyone can audit this course at no-charge. If you choose to take this course and earn the Coursera course certificate, you can also earn an IBM digital badge upon successful completion of the course.
38 |
39 | LIMITED TIME OFFER: Subscription is only $39 USD per month for access to graded materials and a certificate.",Data Engineer,545
40 | Google Cloud Platform Big Data and Machine Learning Fundamentals,"This 2-week accelerated on-demand course introduces participants to the Big Data and Machine Learning capabilities of Google Cloud Platform (GCP). It provides a quick overview of the Google Cloud Platform and a deeper dive of the data processing capabilities.
41 |
42 | At the end of this course, participants will be able to:
43 | • Identify the purpose and value of the key Big Data and Machine Learning products in the Google Cloud Platform
44 | • Use CloudSQL and Cloud Dataproc to migrate existing MySQL and Hadoop/Pig/Spark/Hive workloads to Google Cloud Platform
45 | • Employ BigQuery and Cloud Datalab to carry out interactive data analysis
46 | • Choose between Cloud SQL, BigTable and Datastore
47 | • Train and use a neural network using TensorFlow
48 | • Choose between different data processing products on the Google Cloud Platform
49 |
50 | Before enrolling in this course, participants should have roughly one (1) year of experience with one or more of the following:
51 | • A common query language such as SQL
52 | • Extract, transform, load activities
53 | • Data modeling
54 | • Machine learning and/or statistics
55 | • Programming in Python
56 |
57 | Google Account Notes:
58 | • Google services are currently unavailable in China.",Data Engineer,1015
59 | Big Data Modeling and Management Systems,"Once you’ve identified a big data issue to analyze, how do you collect, store and organize your data using Big Data solutions? In this course, you will experience various data genres and management tools appropriate for each. You will be able to describe the reasons behind the evolving plethora of new big data platforms from the perspective of big data management systems and analytical tools. Through guided hands-on tutorials, you will become familiar with techniques using real-time and semi-structured data examples. Systems and tools discussed include: AsterixDB, HP Vertica, Impala, Neo4j, Redis, SparkSQL. This course provides techniques to extract value from existing untapped data sources and discovering new data sources.
60 |
61 | At the end of this course, you will be able to:
62 | * Recognize different data elements in your own work and in everyday life problems
63 | * Explain why your team needs to design a Big Data Infrastructure Plan and Information System Design
64 | * Identify the frequent data operations required for various types of data
65 | * Select a data model to suit the characteristics of your data
66 | * Apply techniques to handle streaming data
67 | * Differentiate between a traditional Database Management System and a Big Data Management System
68 | * Appreciate why there are so many data management systems
69 | * Design a big data information system for an online game company
70 |
71 | This course is for those new to data science. Completion of Intro to Big Data is recommended. No prior programming experience is needed, although the ability to install applications and utilize a virtual machine is necessary to complete the hands-on assignments. Refer to the specialization technical requirements for complete hardware and software specifications.
72 |
73 | Hardware Requirements:
74 | (A) Quad Core Processor (VT-x or AMD-V support recommended), 64-bit; (B) 8 GB RAM; (C) 20 GB disk free. How to find your hardware information: (Windows): Open System by clicking the Start button, right-clicking Computer, and then clicking Properties; (Mac): Open Overview by clicking on the Apple menu and clicking “About This Mac.” Most computers with 8 GB RAM purchased in the last 3 years will meet the minimum requirements.You will need a high speed internet connection because you will be downloading files up to 4 Gb in size.
75 |
76 | Software Requirements:
77 | This course relies on several open-source software tools, including Apache Hadoop. All required software can be downloaded and installed free of charge (except for data charges from your internet provider). Software requirements include: Windows 7+, Mac OS X 10.10+, Ubuntu 14.04+ or CentOS 6+ VirtualBox 5+.",Data Engineer,4233
78 | Database Management Essentials,"Database Management Essentials provides the foundation you need for a career in database development, data warehousing, or business intelligence, as well as for the entire Data Warehousing for Business Intelligence specialization. In this course, you will create relational databases, write SQL statements to extract information to satisfy business reporting requests, create entity relationship diagrams (ERDs) to design databases, and analyze table designs for excessive redundancy. As you develop these skills, you will use either Oracle, MySQL, or PostgreSQL to execute SQL statements and a database diagramming tool such as the ER Assistant or Visual Paradigm to create ERDs. We’ve designed this course to ensure a common foundation for specialization learners. Everyone taking the course can jump right in with writing SQL statements in Oracle, MySQL, or PostgreSQL.",Data Engineer,3763
79 | "Data Warehouse Concepts, Design, and Data Integration","This is the second course in the Data Warehousing for Business Intelligence specialization. Ideally, the courses should be taken in sequence.
80 |
81 | In this course, you will learn exciting concepts and skills for designing data warehouses and creating data integration workflows. These are fundamental skills for data warehouse developers and administrators. You will have hands-on experience for data warehouse design and use open source products for manipulating pivot tables and creating data integration workflows. In the data integration assignment, you can use either Oracle, MySQL, or PostgreSQL databases. You will also gain conceptual background about maturity models, architectures, multidimensional models, and management practices, providing an organizational perspective about data warehouse development. If you are currently a business or information technology professional and want to become a data warehouse designer or administrator, this course will give you the knowledge and skills to do that. By the end of the course, you will have the design experience, software background, and organizational context that prepares you to succeed with data warehouse development projects.
82 |
83 | In this course, you will create data warehouse designs and data integration workflows that satisfy the business intelligence needs of organizations. When you’re done with this course, you’ll be able to:
84 | * Evaluate an organization for data warehouse maturity and business architecture alignment;
85 | * Create a data warehouse design and reflect on alternative design methodologies and design goals;
86 | * Create data integration workflows using prominent open source software;
87 | * Reflect on the role of change data, refresh constraints, refresh frequency trade-offs, and data quality goals in data integration process design; and
88 | * Perform operations on pivot tables to satisfy typical business analysis requests using prominent open source software",Data Engineer,1311
89 |
--------------------------------------------------------------------------------
/Exploratory Data Analysis/jobs_test_sample.csv:
--------------------------------------------------------------------------------
1 | Job_title,Job_Desc,Job_id
2 | Data Scientist,"We are looking for Data Scientists who are interested in using data to draw insights that will result in policy changes or business process optimisation, benefiting the public. The applicant will be scoping projects with stakeholders, using data sets across Government Agencies, applying business acumen to tease out relevant impactful insights, and presenting insights in a clear, concise manner by using appropriate visualisations.
3 |
4 | He/she should have some training and working experiences on data analytics, and should be comfortable with hands-on data manipulation, data modelling and data visualisation. He/she should also be comfortable with engaging stakeholders on sharpening their business problems.
5 |
6 | The analytics work that we do are typically action oriented and cross-cutting across various domains such as social, economic and infrastructure sectors. Over time, he/she will gain exposure to various policy and ops domains and become more adept in bridging between business users and technical expertise.
7 |
8 | What to Expect:
9 |
10 | Work closely with stakeholders to understand their business challenges, scope the problem and develop business case on how to turn data into critical information and knowledge that are actionable and impactful,. Perform data cleaning, pre-processing, feature engineering and build relevant models to conduct meaningful analysis. Apply appropriate visualisation techniques to communicate the insight effectively. Iterate with the stakeholders to perform subsequent deep dives based on the initial insights.Depending on the use case, design of dashboards and interactive visualisations as tools for data exploration and storytelling may be expected. Potentially deployed to other Government Agencies to be their resident Data Scientist. This will involve formulating and implementing strategies to build strong pipeline of impactful projects at the Agency and executing these projects.
11 |
12 | How to Succeed:
13 |
14 |
15 |
16 | Bachelor Degree in Computer Science, Statistics, Economics, Quantitative Social Science, or related degrees. Advanced degrees preferred. We will also factor in relevant certifications (e.g., Coursera)Minimum 2 years of relevant working experience, preferably in public sector or data science fieldAbility to take a broad, strategic perspective as well as drill deep to understand business needs and challengesUnderstand key concepts, techniques and considerations in machine learning and data analyticsTraining and relevant experience in one or more of the following areas:
17 |
18 |
19 | Data science tools such as R, PythonVisual analytics technologies like Tableau, Qlik
20 | Excellent communication skills, both oral and written, with ability to pitch ideas and influence stakeholdersStrong analytical, conceptualisation and problem solving skillsTeam player with strong organization and people handling skillsPassion for the use of analytics and data to improve Public Service
21 | ",901
22 | Data Scientist,"The world's largest and fastest-growing companies such as Accenture, Adobe, DocuSign and Salesforce rely on Demandbase to drive their Account-Based Marketing strategy and maximize their B2B marketing performance. We pioneered the ABM category nearly a decade ago, and today we lead the category as an indispensable part of the B2B MarTech stack. Our achievements and innovation would not be possible without the driven and collaborative teams here at Demandbase. As a company, we're as committed to growing careers as we are to building word-class technology. We invest heavily in people, our culture and the community around us, and have continuously been recognized as one of the best places to work in the Bay Area.
23 |
24 | Demandbase is currently looking for a Staff Data Scientist to develop ground-breaking insights from our data sets and create a completely new way of data-driven thinking in B2B marketing —providing Sales and Marketing users with unique approaches for account-based advertising and web engagement.
25 |
26 | As a Staff Data Scientist, you'll be responsible for developing and testing hypotheses on behavioral responses in B2B marketing, creating models that extract data from, among others, website, digital advertising, and CRM solutions into actionable insights, and defining leading edge thinking on how analytical frameworks can be applied to predictive marketing. You'll engage closely with product managers, engineers, customers and others to turn your models into products that delight customers and create ""A-HA"" moments. You will engage with industry peers and experts and showcase your findings (of course while maintaining company and client confidentiality!). You are both hands-on and strategic—with both a broad ecosystem-level understanding of our market space and the ability to work closely with engineering and product teams to deliver software in an iterative, continual-release environment. This is a high-visibility position involving close collaboration across functional groups and with executive stakeholders at customers like the above.
27 |
28 | What you'll be doing:
29 |
30 | Own: Be the functional owner of the Data Science role
31 | Frame: Use data and insights to explore questions our customers and product team can and should be asking but never asked before.
32 | Define: Work with customers and internal stakeholders to define hypotheses and models, and with engineering teams to define productionalization of data science system
33 | Document: Write clear, concise descriptions of how insights can be converted into repeatable actions.
34 | Build: Write robust machine learning pipelines and data science systems that interface with production infrastructure and APIs
35 | Test: Continually test your models and refine assumptions, data sources and more.
36 | Drive: Work to spread understanding and buy-in among all stakeholders at all levels.
37 | Other duties as assigned
38 |
39 | What we're looking for:
40 |
41 | 2-4 years of data science experience—you have driven more than one greenfield project from concept to production release
42 | Strong quantitative and data analysis abilities (statistics, engineering, or financial academic background preferred)—making data actionable must be your thing!
43 | Good working knowledge of Spark is a must (we use Scala heavily)
44 | Any experience with Google Cloud (especially BQML) and AWS is a huge plus.
45 | Experience defining products & solutions containing large data sets from diverse sources— preferably in sales and/or marketing situations.
46 | Prior experience in the marketing or sales analytics/data science space desired
47 | Knowledge of web site, digital marketing, and CRM technologies and companies a big plus
48 |
49 | Other important qualities:
50 |
51 | You are perfectly comfortable working in a fast paced, market making environment
52 | You love data and data visualization—you love making data actionable for customers
53 | You are a driver and a doer
54 | You are truly passionate about asking and answering questions – some never asked before
55 | You have a strong sense of ownership for the products you help build
56 |
57 | Benefits:
58 |
59 | Our benefits include 100% paid for Medical, Dental and Vision for you and your entire family, 100% paid for short-term and long-term disability, 100% paid for life insurance, 401k, flexible vacation
60 |
61 | About Demandbase:
62 |
63 | Demandbase is the leader in Account-Based Marketing (ABM) and an indispensable part of the B2B tech stack. The company offers the only end-to-end ABM platform that helps B2B marketers identify, engage, close and measure progress against best-fit accounts. The biggest and fastest growing companies in the world, such as Accenture, Adobe, DocuSign, GE, Salesforce and others rely on Demandbase to drive their ABM strategy and maximize their marketing performance. The company has been named to the JMP Securities list ""The Hot 100: The Best Privately Held Software Companies,"" the Deloitte Fast 500 and named a Gartner Cool Vendor for Tech Go-To Market. In 2019, Demandbase executives authored the definitive book on ABM, Account-Based Marketing: How to Target and Engage the Companies That Will Grow Your Revenue. For more information, please visit www.demandbase.com or follow the company on Twitter @Demandbase.",910
64 | Data Scientist,"
65 | Role: Data Scientist.
66 |
67 | Location: Foster City, CA
68 |
69 | Hire Type: 12 Months Contract
70 |
71 | Job Description:
72 |
73 | Advanced degree in Data Science, Statistics, Computer Science, or similar.
74 |
75 | Extensive experience as a Data Scientist.
76 |
77 | Proficiency in R or Python, where the former is preferred.
78 |
79 | In-depth understanding of SQL.
80 |
81 | Competent in machine learning principles and techniques.
82 |
83 | Demonstrable history of devising and overseeing data-centered projects.
84 |
85 | Ability to relay insights in layman's terms, such that these can be used to inform business decisions.
86 |
87 | Outstanding supervision and mentorship abilities.
88 |
89 | Capacity to foster a healthy, stimulating work environment that frequently harnesses teamwork.
90 |
91 | ",916
92 | Data Scientist,"Upstart is the leading AI lending platform partnering with banks to expand access to affordable credit. Forbes recently ranked Upstart #12 on its list of ""most promising AI companies in America."" By leveraging Upstart's AI platform, Upstart-powered banks can have higher approval rates and lower loss rates, while simultaneously delivering the exceptional digital-first lending experience their customers demand. Upstart's patent-pending platform is the first to receive a no-action letter from the Consumer Financial Protection Bureau related to fair lending. Upstart is based in San Mateo, California and Columbus, Ohio.
93 |
94 | The Role
95 |
96 | Our data science team consists of full-stack generalists as well as specialists in statistical modeling or machine learning. Because our challenges are so new, data scientists at Upstart need strong creative problem-solving skills and the technical background to implement solutions. Our research environment affords team members the opportunity to utilize a variety of statistical and machine learning methods with the freedom and encouragement to pursue alternative approaches to solving problems. Whether developing new products or identifying novel approaches to core models, we are continuously seeking the next big ideas to move our business forward.
97 |
98 | Our current Data Scientists summarize some of their favorite aspects of our team as:
99 |
100 |
101 | Having a direct impact on the company's success
102 | Collaborative, intelligent and open team
103 | Mentorship, growth and friendship
104 | Leaders committed to challenging and growing team members
105 | Feeling safe asking for help when it's necessary; feeling trusted to get the job done when it's not
106 |
107 | Hiring Profile
108 |
109 | Strong academic credentials with a M.S. in Computer Science, Statistics, Data Science or a related field of study with a preference for Ph.D.
110 | Comfort with programming (ideally in Python and R)
111 | Rigorous quantitative background
112 | Predictive modeling experience is preferred
113 | Enthusiasm for and alignment with Upstart's mission and values
114 | Strong sense of intellectual curiosity balanced with humility
115 | Numerically-savvy with ability to operate at a speedy pace
116 |
117 | Most Upstarters join us because they connect with our mission of enabling access to effortless credit based on true risk. If you are energized by the impact you can make at Upstart, we would love to hear from you!",920
118 | Data Scientist,"Why Divvy?Over the past decade, millions of Americans have been forced to put their dreams of homeownership on hold. Home prices have outpaced wage growth while mortgage requirements continue to tighten. As a result, renters are missing out on a critical wealth-building opportunity: owning a home.At Divvy, we're building an on-ramp to homeownership – one that's more affordable, more flexible, and an overall better fit for the modern American family – and it’s working.We’re looking for a Data Scientist to join our growing company. In this role, you’ll ensure the financial viability of our business by developing our underwriting and/or pricing models. Developing this model will also mean simulating new financial product offerings which match customer needs to Divvy’s capacities. Day to day, this will include a mix of dataset acquisition, statistical modeling, exploratory data analysis, and software engineering. You’ll report directly to Divvy’s Head of Data Science and work alongside a team of 8-10 software engineers and data scientists.ResponsibilitiesBuild and refine our default and/or pricing models using structured dataIdentify, analyze, and acquire new data sources to improve model accuracyInfluence Divvy’s product offerings based on quantitative insightsBecome a domain expert in risk and/or pricingWork ExperienceYou have 3+ years of experience in machine learning, data science or analyticsYou have experience in either R or PythonYou have a strong understanding of statistical modeling techniquesYou demonstrate the ability to clearly communicate analysisBonus points for previous credit default modeling experience, risk management experience, and/or real estate pricing (AVM) experiencePerksCompetitive salary + equity Full benefits (medical, dental, vision, 401k, commuter) A beautiful dog-friendly office Diverse, smart, and witty co-workersCommitment to Diversity & InclusionWe prioritize a commitment to diversity in our team building process. We enthusiastically encourage individuals from a variety of lived experiences to reach out.",938
119 | Data Engineer,"About Rocket LawyerWe believe everyone deserves access to simple and affordable legal services.
120 | Founded in 2008, Rocket Lawyer is the largest and most widely used online legal service platform in the world. With offices in North America and Europe, Rocket Lawyer has helped over 20 million people create over 50 million legal documents, and get their legal questions answered.
121 | We are in a unique position to enhance and expand the Rocket Lawyer platform to a scale never seen before in the company’s history, to capture audiences worldwide. We are expanding our team to take on this challenge!
122 | About the RoleRocket Lawyer is looking for a Data Engineer that will contribute in all aspects of creating an analytical data driven environment. The core data engineering team is responsible for the building out the data pipeline, gathering internal and external data, generating metrics, managing and monitoring batch and streaming jobs, and implementing analytical tools to drive strategic decision making.A Day in the Life
123 |
124 |
125 | Evangelize Modern Big Data Practices Design warehouse schemas that accurately represent our business, and facilitate analysis and building of reportsHelp build batch and streaming data ingestion pipeline using Hadoop, Hive, Pig, Storm, and Kafka StreamsWrite ETL jobs to transform raw data into business information to drive decision makingDevelop analytical environment using internal and external reporting toolsIntegrate internal and external data with warehouse and external tools
126 |
127 |
128 | Experience
129 |
130 |
131 | Excellent technical skills including expert knowledge of the Hadoop ecosystemExperience of the analysis, design and development of Data Warehouse and Big Data solutions, including analyzing source systems, developing ETL design patterns and templates, ETL development, data profiling and data quality issues resolution.Project and team management experience Excellent communication skills and presentation skillsStrong SQL, Java, and Python skillsDatabase (relational & NoSQL), Data Warehouse knowledgeStream processing experience (Storm, Kafka Streams)Passion and enthusiasm for learning new technologies and techniqueComfortable with LinuxBS or MS in computer scienceDetail oriented and organizedDesire to learn broad set of technologies
132 |
133 |
134 | Benefits and Perks
135 |
136 |
137 | Comprehensive health plans (including Medical, Dental and Vision insurance for full-time employees)Unlimited PTOCompetitive salary packages401k programLife insuranceDisability benefitsFlexible Spending AccountsCommuter/Transit ProgramYour choice of a MAC or PCMonthly onsite masseuse sessionsWeekly Friday catered lunchesCompany sponsored events, both on- and off-site
138 |
139 |
140 | ",935
141 | Data Engineer,"Our mission is to create a world where mental health is never an obstacle.
142 |
143 | Ginger is transforming how behavioral healthcare is delivered by making it easy for people to get the support they need, when they need it, through on-demand coaching, teletherapy, telepsychiatry, and guided self-care.
144 |
145 | Businesses purchase Ginger for their employee's or member's benefit. Through Ginger's secure mobile app, people around the world receive immediate emotional support that's personalized to their needs, and completely confidential. Ginger's high-quality, team-based care works — 70% of people show significant improvement after 12 weeks of using Ginger.
146 |
147 | At Ginger, people are at the heart of what we do. We believe that diverse and inclusive teams make our company better. Teams with individuals that bring different perspectives to challenges are more innovative, collaborative, and create better solutions. We're building a workplace that actively embraces a diversity of people, ideas, talents, and experiences. Come join us!
148 |
149 | Fast Company called Ginger one of The World's Top 10 Most Innovative Companies in Healthcare and the World Economic Forum named us a Technology Pioneer.
150 |
151 | About the Role:
152 |
153 | At Ginger, we aim to provide better mental health care to humanity at a scale larger than has ever been possible before. This is no small task and as an expanding team we are working on a number of initiatives to achieve this, including aggressively building tools to simultaneously grow our reach and improve quality of care.
154 |
155 | What You'll Do:
156 |
157 | Standing at the center of multiple teams (data science, engineering) and core systems, you'll..
158 |
159 |
160 | Open up our data to uncover important patterns at the level of individuals and sub-populations.
161 | Surface, serve and persist key actionable insights in mental health, healthy habit formation, goal pursuit, and care efficacy.
162 | Help us scale our services using modern distributed processing tools and GPUs in the cloud (AWS)
163 | Collaborate with product to ideate and unlock features which derive as much actionable information from our data (text, media, activity etc) as possible.
164 | Help architect systems for near-real-time delivery of recommendations, care insights and other time-critical information to coaches and members.
165 | Design lightweight data schemas appropriate for storing, organizing and joining processed communication and care analytics.
166 | Devise the tooling that takes us from algorithm prototype to production and can track data/model lineage and statistical drift through time.
167 | Develop pipelines that efficiently and reliably route output of machine learning algorithms to consumer processes and persistence mechanisms.
168 | Own operational scalability of our algorithms, systems and data models.
169 | Stand up infrastructure for optimal extraction, transformation, and loading of data from a wide variety of data sources using SQL, Python and AWS tools.
170 | Work with a variety of stakeholders including the Data, Product, Engineering, Security and Executive teams to support their data accessibility needs.
171 |
172 | Necessary Skills:
173 |
174 | Databases SQL/NoSQL 4+ years
175 | Cloud platform experience 3+ years
176 | SQL 4+ years
177 | Schema design 2+ years
178 | Amazon Web Services (AWS) 2+ years
179 | Deployment pipelines 2+ years
180 | Python 2+ years
181 | Deploying to production systems with active customers 2+ years
182 | Distributed computing (e.g Spark, Hadoop etc.) 3+ years
183 | Infrastructure monitoring 1+ years
184 | Wide variety of data warehouse, data lake (s3) etc familiarity
185 | Analytics experience working with structured and unstructured data
186 | Project lead (self-managing) 1+ years
187 | Bachelors in technical field or experiential equivalent
188 |
189 | Ideal Skills:
190 |
191 | Amazon Web Services (AWS) 3+ years
192 | AWS Lambda, Sagemaker
193 | Docker / Kubernetes
194 | DB performance engineering
195 | Machine Learning (ML) 1+ years
196 | Running ML on GPUs 1+ years
197 | Python 3+ years
198 | Strong analytics intuition grounded in significant experience
199 | Experience in the healthcare space
200 | Masters in technical field or experiential equivalent
201 | ",1068
202 | Data Engineer,"Data Engineer
203 | If you are a Data Engineer with several years of relevant experience, please read on!We are poised to triple our customer base AGAIN in 2020 and we need a Data Engineer to help us manage the growth! Our tech stack: AWS, Aptible, Postgres, Redis, Rails, Python, Airflow, Mode Analytics, Android, React, and React Native.
204 |
205 | What You Will Be Doing
206 | - Maintain our current ETL-lite while scaling it for the future- Create and maintain views and expand use of rollup tables- Identify opportunities to improve the integrity of our datasets and implement the fixes- Assist in building out our payments platform for managing medical claims- Help explore options for delivering data to clients, including possible API access- Inform our 2020 objectives and key results around scaling and data needs
207 | What You Need for this Position
208 | Requirements: - Bachelors degree in C.S. or comparable degree preferred- Minimum of 3 years relevant experience in data engineering- Ability to collaborate and problem solve across teams- Excellent communication skills, both written and verbal- Python: using community-standards, linting, and testing at all appropriate levels.- SQL: comfort with joins, unions, views, rollups, windowing functions, testing- JSON parsing and fluency with RESTful APIs- Operational competency with cloud-hosted systems such as AWS, Aptible, or Heroku- Ability to correlate data across multiple sources: RDBs, csv, json- Understands how to write efficient code and can optimize existing software and queriesNice to Have: - Prior experience with healthcare data (PHI/PII/HIPAA requirements)- Experience developing software in Ruby on Rails- Understanding of user experience principles- History of technical writing
209 | What's In It for You
210 | - Competitive compensation with meaningful stock options- Medical, dental, vision- 401K match - 3 months paid parental leave- Daily lunch- Professional development budget - Monthly fitness/gym reimbursement - Annual mental wellness benefit - Noise-cancelling headphones - Work from home policy- Opportunity to join a fantastically talented, diverse, and passionate team at a pivotal time in the companys lifecycle
211 |
212 | So, if you are a Data Engineer with the required experience, please apply today!
213 | - Applicants must be authorized to work in the U.S.
214 | CyberCoders, Inc is proud to be an Equal Opportunity Employer
215 |
216 | All qualified applicants will receive consideration for employment without regard to race, color, religion, sex, national origin, disability, protected veteran status, or any other characteristic protected by law.
217 |
218 | Your Right to Work In compliance with federal law, all persons hired will be required to verify identity and eligibility to work in the United States and to complete the required employment eligibility verification document form upon hire.",1089
219 | Data Engineer,"Prabhav Services Inc. is one of the premier preferred vendor with many end clients, we have offices in USA, Canada and India, we do sponsor H1B for right candidate and do the Greencard immediately as required, we are looking for candidates for next year as well so if you or any of your friends are looking for job feel free to refer to me on parinprabhavonline.com Currently we are seeking the candidates for Data Engineer with excellent in implement automation, data modeling, data wrangling, data analysis, and data vision solutions to complex problems, processes, and scenarios. Familiarity with common data structures and languages. Responsibilities Perform data stream design, integration engineering including a full understanding and support for a typical Capture-Ingestion-Storage-Validation-Analysis-Visualization process Process mapping and automation Wrangling structured, unstructured and poorly structured data into appropriate data structures Develop data architectures that improve automation, processes, data flow and analyses (including recommendations in systems owned by other organizations) Establish objectives, formulate methodologies, and help coordinate fusion of data science, data architecture, data visualization, and data management streams and teams Identify opportunities to further build out our IoT strategy ExperienceSkillsAbilities At least 3 years of experience working as a Systems Integrator, Data Engineer, Software Engineer or similar position demonstrating the ability to design and implement automation, data modeling, data wrangling, data analysis, and data vision solutions to complex problems, processes, and scenarios. Familiarity with common data structures and languages. BS Computer Science, ComputerElectrical Engineering, or Math Degree or relevant experience. Experience with IoT, cloud computing, distributed data systems Experience working with statistical teams andor data scientists ToolsProgramming Experience SQL, Python, R, JS, HTML, CSS, BI, Tableau, AWS (to work and reorder this), excel, R, DB conceptsprogramming, object-orientated languages(e.g. Java, C++), other scripting languages, programming skills Friendly and approachable, with strong communication and presentation skills Desire to keep current with a challenging and evolving environment Team focused and self-motivated. Able to work as part of a coordinated team, yet independently when necessary Proven abilities to take initiative and to be innovative have an analytical mind with a problem-solving aptitude",1100
220 | Data Engineer,"About Skupos
221 | Skupos is the data platform for the convenience retail industry. Retailers, distributors, and brands connect to the Skupos network to create value from disparate data. Convenience retail is a long-standing industry with limited technology adoption, but is responsible for more revenue annually than all of e-commerce in the United States. Skupos leverages our massive datasets to build tools that help the industry succeed.
222 | Skupos software integrates at a retailer’s point of sale, generates analytical insights, and automates the inventory and ordering process. For distributors and CPG brands, we provide real-time visibility into consumer purchasing decisions and enable automated promotional discounts at the point of sale. We view our company as revolutionizing a brick-and-mortar industry by bringing cutting-edge technology to physical stores, and helping harness data to create a frictionless connection between millions of people and the products they need.
223 | What You'll Do
224 | Skupos is seeking a Data Engineer to help build the foundation of our big data platform. As we pave the way for our data product offerings, you will architect, develop and deploy data solutions at scale using modern data technologies. You will have an opportunity to drive the tech stack for this platform. Come, join us and be in charge of your career trajectory and leverage coaching/mentorship opportunities with your manager to write your own success story at Skupos and beyond.
225 |
226 | Build data pipelines for the end to end data ecosystem:Data integration and Ingestion from multiple external data providers/partners.Data processing in accordance with product requirements, ensuring data security and compliance throughout the pipeline. Data Storage layer - Maintain a Single Source of Truth. Data Access layer - Make data available for reporting, dashboards, analytics, business intelligence needs.Data Science and Machine Learning modeling. Collaborate with cross-functional technical, product and business teams to take ownership of data projects to ensure a complete end to end customer experience.Research and recommend technologies to build data solutions at scale with near real-time processing of data using service oriented architecture. Improve project delivery and decrease process redundancy and overhead.Foster a lean agile development culture within the team with emphasis on code quality and software best practices. Join the foundational core data engineering team and play an instrumental role in hiring your future teammates.
227 |
228 |
229 | What You Should Have
230 |
231 |
232 | BA/BS in business, computer science; or similar degree in a related field or equivalent experience with demonstrated proficiency3+ years of hands-on experience building big data pipelines using streaming technologies (Kafka, Spark, or similar) in cloud environments. 5+ years experience in database technologies, including RDBMS, NoSQL, Document storage, graphs, and distributed file systems.Advanced skills with functional programming languages (Scala, Python, R, Java, or similar)Experience with data warehouse architecture and data modeling for Business Intelligence.Working knowledge of BI tools (Tableau, Looker, Snowflake, or similar)Excellent attention to detail and focused on execution through rapid iterations. Self Motivated individual with strong ethics who brings the best version of themselves to raise the bar for the entire team.
233 |
234 |
235 | What Makes You A Great Fit
236 |
237 |
238 | Startup experienceSubject matter knowledge on retail industryExperience building SAAS software
239 |
240 |
241 | What We Offer
242 | • Competitive salary• Healthcare benefits• 401K• Commuter benefits• Major role in a strong, small and growing development team• Be a part of a key platform of product offerings to the retail convenience store industry
243 | What are your goals and aspirations? Build your technical skills, business acumen, and leadership with Skupos.",1105
244 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Harsh Bardhan Mishra
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Other/Course_webpages.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "The purpose of this notebook is to try to retrieve the web addresses for the courses. They do not seem to be available through the API."
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 3,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd\n",
17 | "import requests\n",
18 | "import time"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 8,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "name": "stdout",
28 | "output_type": "stream",
29 | "text": [
30 | "Request status for page 0 is 200.\n",
31 | "Request status for page 1 is 200.\n",
32 | "Finished. The number of courses gotten from the catalog is 200\n"
33 | ]
34 | }
35 | ],
36 | "source": [
37 | "# Get the entire Coursera catalog.\n",
38 | "\n",
39 | "# Instantiate a list to hold the courses\n",
40 | "courses = []\n",
41 | "\n",
42 | "# Set the base url for making get requests\n",
43 | "base_url = 'https://api.coursera.org/api/courses.v1'\n",
44 | "\n",
45 | "# Add the fields I want to include in my requests\n",
46 | "fields = \"&fields=previewLink,photoURL\"\n",
47 | "\n",
48 | "# Loop through all 45 pages of the catalog\n",
49 | "for page in range(2):\n",
50 | " \n",
51 | " # set pagination\n",
52 | " pagination = f\"?start={page*100}&limit=100\"\n",
53 | "\n",
54 | " # make a request\n",
55 | " res = requests.get(base_url + pagination + fields)\n",
56 | " print(f'Request status for page {page} is {res.status_code}.')\n",
57 | " \n",
58 | " # convert from json\n",
59 | " dict = res.json()\n",
60 | " \n",
61 | " # add to the catalog dictionary\n",
62 | " for course in dict['elements']:\n",
63 | " courses.append(course)\n",
64 | " \n",
65 | " # delay time to next request\n",
66 | " time.sleep(2)\n",
67 | "\n",
68 | "print(f'Finished. The number of courses gotten from the catalog is {len(courses)}')"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 9,
74 | "metadata": {},
75 | "outputs": [
76 | {
77 | "name": "stdout",
78 | "output_type": "stream",
79 | "text": [
80 | "(200, 4)\n"
81 | ]
82 | },
83 | {
84 | "data": {
85 | "text/html": [
86 | "