├── .gitignore
├── Brief_Data_Splitter
├── data
│ ├── data.csv
│ └── data.json
├── repo_contributors_05.csv
└── script.ipynb
├── Brief_analyse_opportunités_emploi
├── bubble_chart.py
├── data
│ ├── cleaned_jobs.csv
│ └── jobs.csv
├── query.sql
└── script.ipynb
├── Brief_cleaning_data_of_github
└── cleaning_data_github.ipynb
├── Brief_insertion_data_in_database_sql
├── insertion_data.ipynb
└── reposdb.sql
├── Brief_scraping_github
└── scraping_github.ipynb
├── Brief_visualization_data_of_github
└── vusialization_data_github.ipynb
├── Challenge_CIty_Map
├── data
│ └── youssoufia.json
└── script.ipynb
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/Brief_Data_Splitter/script.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 251,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import pyodbc\n"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 260,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "#function to split data set\n",
20 | "def dataSplitter(path_csv,perJSon,perDataBase):\n",
21 | "\n",
22 | " data = pd.read_csv(path_csv)\n",
23 | "\n",
24 | " #répartition aléatoire des données. \n",
25 | " data_randed = data.sample(frac=1)\n",
26 | "\n",
27 | " #len of data \n",
28 | " lenOfData = len(data_randed)\n",
29 | "\n",
30 | " if(perJSon + perDataBase) == 100:\n",
31 | "\n",
32 | " p1 = round((lenOfData * perJSon) / 100)\n",
33 | " p2 = lenOfData - p1\n",
34 | "\n",
35 | " return data_randed[:p1], data_randed[p1:],[]\n",
36 | " #display(lenOfData,f'{perJSon}% = {p1}',f'{perDataBase}% = {p2}', p1+p2)\n",
37 | " \n",
38 | " elif(perJSon + perDataBase) < 100:\n",
39 | " p1 = round((lenOfData * perJSon) / 100)\n",
40 | " p2 = round((lenOfData * perDataBase) / 100)\n",
41 | " #p3 = lenOfData - (p1 + p2)\n",
42 | "\n",
43 | " return data_randed[:p1],data_randed[p1:p1+p2],data_randed[p1+p2:]\n",
44 | " #display(lenOfData,f'{perJSon}% = {p1}',f'{perDataBase}% = {p2}', f'{100 - (perDataBase + perJSon)}% = {p3}',p1+p2+p3)\n",
45 | " else:\n",
46 | " raise Exception('Saisir un pourcentage valide!')\n",
47 | "\n",
48 | "\n",
49 | "def goDataToJSon(dataToJson,fileName):\n",
50 | " dataToJson.to_json(path_or_buf=f'data/{fileName}.json',orient='records')\n",
51 | "\n",
52 | "def goDataToCsv(dataToCsv,fileName):\n",
53 | " dataToCsv.to_csv(f'data/{fileName}.csv')\n",
54 | "\n",
55 | "def goDataToDataBase(dataToDb):\n",
56 | " #coonection to db\n",
57 | " cnxn = pyodbc.connect('Driver={SQL Server};'\n",
58 | " 'Server=XXXXXX-XXX\\SQLEXPRESS;'\n",
59 | " 'Database=splitterdb;'\n",
60 | " 'Trusted_Connection=yes;')\n",
61 | "\n",
62 | " cursor = cnxn.cursor()\n",
63 | " for index,row in dataToDb.iterrows():\n",
64 | " cursor.execute('INSERT INTO contributors VALUES(?,?,?)',row['full_name'],row['name'],row['commits'])\n",
65 | " cnxn.commit()\n",
66 | " cnxn.close()\n",
67 | " \n",
68 | "\n",
69 | "try:\n",
70 | " allDataFrame = dataSplitter('repo_contributors_05.csv',perJSon=30,perDataBase=20)\n",
71 | " \n",
72 | " dataToJson,dataToDatabase,dataToCsv = allDataFrame\n",
73 | "\n",
74 | " '''\n",
75 | " Création du fichier JSON\n",
76 | " '''\n",
77 | " if(len(dataToJson) > 0):\n",
78 | " goDataToJSon(dataToJson,'data')\n",
79 | "\n",
80 | " '''\n",
81 | " Création du fichier CSv\n",
82 | " '''\n",
83 | " if((len(dataToCsv)) > 0):\n",
84 | " goDataToCsv(dataToCsv,'data')\n",
85 | "\n",
86 | " '''\n",
87 | " Transfert vers la base de données\n",
88 | " '''\n",
89 | " if(len(dataToDatabase) > 0):\n",
90 | " goDataToDataBase(dataToDatabase)\n",
91 | "\n",
92 | "except Exception as e:\n",
93 | " print(e.args[0])\n"
94 | ]
95 | }
96 | ],
97 | "metadata": {
98 | "kernelspec": {
99 | "display_name": "base",
100 | "language": "python",
101 | "name": "python3"
102 | },
103 | "language_info": {
104 | "codemirror_mode": {
105 | "name": "ipython",
106 | "version": 3
107 | },
108 | "file_extension": ".py",
109 | "mimetype": "text/x-python",
110 | "name": "python",
111 | "nbconvert_exporter": "python",
112 | "pygments_lexer": "ipython3",
113 | "version": "3.10.10"
114 | },
115 | "orig_nbformat": 4
116 | },
117 | "nbformat": 4,
118 | "nbformat_minor": 2
119 | }
120 |
--------------------------------------------------------------------------------
/Brief_analyse_opportunités_emploi/bubble_chart.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 |
4 | class BubbleChart:
5 | def __init__(self, area, bubble_spacing=0):
6 | """
7 | Setup for bubble collapse.
8 |
9 | Parameters
10 | ----------
11 | area : array-like
12 | Area of the bubbles.
13 | bubble_spacing : float, default: 0
14 | Minimal spacing between bubbles after collapsing.
15 |
16 | Notes
17 | -----
18 | If "area" is sorted, the results might look weird.
19 | """
20 | area = np.asarray(area)
21 | r = np.sqrt(area / np.pi)
22 |
23 | self.bubble_spacing = bubble_spacing
24 | self.bubbles = np.ones((len(area), 4))
25 | self.bubbles[:, 2] = r
26 | self.bubbles[:, 3] = area
27 | self.maxstep = 2 * self.bubbles[:, 2].max() + self.bubble_spacing
28 | self.step_dist = self.maxstep / 2
29 |
30 | # calculate initial grid layout for bubbles
31 | length = np.ceil(np.sqrt(len(self.bubbles)))
32 | grid = np.arange(length) * self.maxstep
33 | gx, gy = np.meshgrid(grid, grid)
34 | self.bubbles[:, 0] = gx.flatten()[:len(self.bubbles)]
35 | self.bubbles[:, 1] = gy.flatten()[:len(self.bubbles)]
36 |
37 | self.com = self.center_of_mass()
38 |
39 | def center_of_mass(self):
40 | return np.average(
41 | self.bubbles[:, :2], axis=0, weights=self.bubbles[:, 3]
42 | )
43 |
44 | def center_distance(self, bubble, bubbles):
45 | return np.hypot(bubble[0] - bubbles[:, 0],
46 | bubble[1] - bubbles[:, 1])
47 |
48 | def outline_distance(self, bubble, bubbles):
49 | center_distance = self.center_distance(bubble, bubbles)
50 | return center_distance - bubble[2] - \
51 | bubbles[:, 2] - self.bubble_spacing
52 |
53 | def check_collisions(self, bubble, bubbles):
54 | distance = self.outline_distance(bubble, bubbles)
55 | return len(distance[distance < 0])
56 |
57 | def collides_with(self, bubble, bubbles):
58 | distance = self.outline_distance(bubble, bubbles)
59 | idx_min = np.argmin(distance)
60 | return idx_min if type(idx_min) == np.ndarray else [idx_min]
61 |
62 | def collapse(self, n_iterations=50):
63 | """
64 | Move bubbles to the center of mass.
65 |
66 | Parameters
67 | ----------
68 | n_iterations : int, default: 50
69 | Number of moves to perform.
70 | """
71 | for _i in range(n_iterations):
72 | moves = 0
73 | for i in range(len(self.bubbles)):
74 | rest_bub = np.delete(self.bubbles, i, 0)
75 | # try to move directly towards the center of mass
76 | # direction vector from bubble to the center of mass
77 | dir_vec = self.com - self.bubbles[i, :2]
78 |
79 | # shorten direction vector to have length of 1
80 | dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))
81 |
82 | # calculate new bubble position
83 | new_point = self.bubbles[i, :2] + dir_vec * self.step_dist
84 | new_bubble = np.append(new_point, self.bubbles[i, 2:4])
85 |
86 | # check whether new bubble collides with other bubbles
87 | if not self.check_collisions(new_bubble, rest_bub):
88 | self.bubbles[i, :] = new_bubble
89 | self.com = self.center_of_mass()
90 | moves += 1
91 | else:
92 | # try to move around a bubble that you collide with
93 | # find colliding bubble
94 | for colliding in self.collides_with(new_bubble, rest_bub):
95 | # calculate direction vector
96 | dir_vec = rest_bub[colliding, :2] - self.bubbles[i, :2]
97 | dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))
98 | # calculate orthogonal vector
99 | orth = np.array([dir_vec[1], -dir_vec[0]])
100 | # test which direction to go
101 | new_point1 = (self.bubbles[i, :2] + orth *
102 | self.step_dist)
103 | new_point2 = (self.bubbles[i, :2] - orth *
104 | self.step_dist)
105 | dist1 = self.center_distance(
106 | self.com, np.array([new_point1]))
107 | dist2 = self.center_distance(
108 | self.com, np.array([new_point2]))
109 | new_point = new_point1 if dist1 < dist2 else new_point2
110 | new_bubble = np.append(new_point, self.bubbles[i, 2:4])
111 | if not self.check_collisions(new_bubble, rest_bub):
112 | self.bubbles[i, :] = new_bubble
113 | self.com = self.center_of_mass()
114 |
115 | if moves / len(self.bubbles) < 0.1:
116 | self.step_dist = self.step_dist / 2
117 |
118 | def plot(self, ax, labels, colors):
119 | """
120 | Draw the bubble plot.
121 |
122 | Parameters
123 | ----------
124 | ax : matplotlib.axes.Axes
125 | labels : list
126 | Labels of the bubbles.
127 | colors : list
128 | Colors of the bubbles.
129 | """
130 | for i in range(len(self.bubbles)):
131 | circ = plt.Circle(
132 | self.bubbles[i, :2], self.bubbles[i, 2], color=colors[i])
133 | ax.add_patch(circ)
134 | ax.text(*self.bubbles[i, :2], labels[i],
135 | horizontalalignment='center', verticalalignment='center')
--------------------------------------------------------------------------------
/Brief_analyse_opportunités_emploi/data/jobs.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hnadir-dev/Youcode-Data-Dev/d71fe2f2da3dcae7f0c925e9a9f9849c02ce32f9/Brief_analyse_opportunités_emploi/data/jobs.csv
--------------------------------------------------------------------------------
/Brief_analyse_opportunités_emploi/query.sql:
--------------------------------------------------------------------------------
1 | CREATE DATABASE oppodb
2 |
3 | use oppodb
4 |
5 | CREATE TABLE countries(
6 | countryID INT PRIMARY KEY NOT NULL,
7 | countryName VARCHAR(100)
8 | )
9 |
10 | CREATE TABLE experienceLevels(
11 | levelID INT PRIMARY KEY NOT NULL,
12 | levelName VARCHAR(100)
13 | )
14 |
15 | CREATE TABLE jobTypes(
16 | typeID INT PRIMARY KEY NOT NULL,
17 | typeName VARCHAR(100)
18 | )
19 |
20 | CREATE TABLE companies(
21 | companyID INT PRIMARY KEY NOT NULL,
22 | companyName VARCHAR(100)
23 | )
24 |
25 | CREATE TABLE jobs(
26 | jobID INT PRIMARY KEY NOT NULL,
27 | jobName TEXT,
28 | jobLocation TEXT,
29 | salary FLOAT,
30 | companyID INT FOREIGN KEY REFERENCES companies(companyID),
31 | levelID INT FOREIGN KEY REFERENCES experienceLevels(levelID),
32 | countryID INT FOREIGN KEY REFERENCES countries(countryID),
33 | typeID INT FOREIGN KEY REFERENCES jobTypes(typeID),
34 | )
35 |
36 | CREATE TABLE facilities(
37 | jobID INT,
38 | facilityName VARCHAR(100),
39 | PRIMARY KEY (jobID,facilityName),
40 | )
41 |
42 | CREATE TABLE requirments(
43 | jobID INT,
44 | requirmentName VARCHAR(100),
45 | PRIMARY KEY (jobID,requirmentName)
46 | )
47 |
48 | ----- Test queries
49 |
50 | SELECT * FROM countries
51 |
52 | SELECT * FROM experienceLevels
53 |
54 | SELECT * FROM companies
55 |
56 | SELECT * FROM jobTypes
57 |
58 | SELECT * FROM jobs
59 |
60 |
61 | SELECT * FROM requirments
62 |
63 | SELECT * FROM facilities
64 |
65 |
66 | ALTER TABLE facilities
67 | ADD FOREIGN KEY (jobID) REFERENCES jobs(JobID)
68 |
69 |
70 | SELECT companies.companyName, jobs.jobName,jobs.jobLocation,countries.countryName,facilities.facilityName FROM companies,jobs,countries,facilities
71 | where jobs.companyID = companies.companyID
72 | and jobs.countryID = countries.countryID
73 | and facilities.jobID = jobs.jobID
74 |
75 |
76 | select facilities.facilityName,COUNT(jobID) from facilities
77 | group by facilities.facilityName
78 |
79 | SELECT * FROM jobs
80 | where jobs.jobID NOT IN (SELECT jobID from requirments)
81 |
82 |
--------------------------------------------------------------------------------
/Brief_insertion_data_in_database_sql/insertion_data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "df_data_without_watchers_outliers = pd.read_csv('data/data_cleaned/data_without_watchers_outliers_2023_05.csv')\n",
19 | "df_watchers_outlires = pd.read_csv('data/data_cleaned/watchers_outliers_2023_05.csv')\n",
20 | "\n",
21 | "contributors = pd.read_csv('data/data_cleaned/repo_contributors_05.csv')\n",
22 | "\n",
23 | "data_c = pd.concat([df_data_without_watchers_outliers,df_watchers_outlires])\n",
24 | "\n",
25 | "#data_c['full_name'].drop_duplicates(data_c['full_name'].index,inplace=True)\n",
26 | "data_c.drop(columns=['Unnamed: 0.1','Unnamed: 0'],inplace=True)\n",
27 | "\n",
28 | "#data_c['license'].replace('No License',{'key': 0,'name': 'No License','spdx_id': '','url': '','node_id': ''},inplace=True)\n",
29 | "\n",
30 | "data_c.drop_duplicates(inplace=True)\n",
31 | "#data_c.info()"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 3,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "#Replace No License to dict\n",
41 | "data_c['license'].replace('No License',str(dict(key= '',name= 'No License',spdx_id= '',url= '',node_id= '')),inplace=True)"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "
Technologies
"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 4,
54 | "metadata": {},
55 | "outputs": [
56 | {
57 | "data": {
58 | "text/html": [
59 | "\n",
60 | "\n",
73 | "
\n",
74 | " \n",
75 | " \n",
76 | " | \n",
77 | " id | \n",
78 | " name | \n",
79 | "
\n",
80 | " \n",
81 | " \n",
82 | " \n",
83 | " 0 | \n",
84 | " 1 | \n",
85 | " C++ | \n",
86 | "
\n",
87 | " \n",
88 | " 1 | \n",
89 | " 2 | \n",
90 | " Kotlin | \n",
91 | "
\n",
92 | " \n",
93 | " 2 | \n",
94 | " 3 | \n",
95 | " Jupyter Notebook | \n",
96 | "
\n",
97 | " \n",
98 | " 3 | \n",
99 | " 4 | \n",
100 | " Java | \n",
101 | "
\n",
102 | " \n",
103 | " 4 | \n",
104 | " 5 | \n",
105 | " TypeScript | \n",
106 | "
\n",
107 | " \n",
108 | " ... | \n",
109 | " ... | \n",
110 | " ... | \n",
111 | "
\n",
112 | " \n",
113 | " 183 | \n",
114 | " 184 | \n",
115 | " Bikeshed | \n",
116 | "
\n",
117 | " \n",
118 | " 184 | \n",
119 | " 185 | \n",
120 | " Smali | \n",
121 | "
\n",
122 | " \n",
123 | " 185 | \n",
124 | " 186 | \n",
125 | " Vim Snippet | \n",
126 | "
\n",
127 | " \n",
128 | " 186 | \n",
129 | " 187 | \n",
130 | " YARA | \n",
131 | "
\n",
132 | " \n",
133 | " 187 | \n",
134 | " 188 | \n",
135 | " Hy | \n",
136 | "
\n",
137 | " \n",
138 | "
\n",
139 | "
188 rows × 2 columns
\n",
140 | "
"
141 | ],
142 | "text/plain": [
143 | " id name\n",
144 | "0 1 C++\n",
145 | "1 2 Kotlin\n",
146 | "2 3 Jupyter Notebook\n",
147 | "3 4 Java\n",
148 | "4 5 TypeScript\n",
149 | ".. ... ...\n",
150 | "183 184 Bikeshed\n",
151 | "184 185 Smali\n",
152 | "185 186 Vim Snippet\n",
153 | "186 187 YARA\n",
154 | "187 188 Hy\n",
155 | "\n",
156 | "[188 rows x 2 columns]"
157 | ]
158 | },
159 | "execution_count": 4,
160 | "metadata": {},
161 | "output_type": "execute_result"
162 | }
163 | ],
164 | "source": [
165 | "technoligies = pd.DataFrame(list(data_c['language'].unique())).rename(columns={0:'name'})\n",
166 | "techs = []\n",
167 | "for index,tech in technoligies.iterrows():\n",
168 | " techs.append(dict(id=index+1,name=tech['name']))\n",
169 | "\n",
170 | "technoligies = pd.DataFrame(techs)\n",
171 | "technoligies"
172 | ]
173 | },
174 | {
175 | "cell_type": "markdown",
176 | "metadata": {},
177 | "source": [
178 | "Owners
"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 5,
184 | "metadata": {},
185 | "outputs": [
186 | {
187 | "data": {
188 | "text/html": [
189 | "\n",
190 | "\n",
203 | "
\n",
204 | " \n",
205 | " \n",
206 | " | \n",
207 | " id | \n",
208 | " name | \n",
209 | "
\n",
210 | " \n",
211 | " \n",
212 | " \n",
213 | " 0 | \n",
214 | " 1 | \n",
215 | " User | \n",
216 | "
\n",
217 | " \n",
218 | " 1 | \n",
219 | " 2 | \n",
220 | " Organization | \n",
221 | "
\n",
222 | " \n",
223 | "
\n",
224 | "
"
225 | ],
226 | "text/plain": [
227 | " id name\n",
228 | "0 1 User\n",
229 | "1 2 Organization"
230 | ]
231 | },
232 | "execution_count": 5,
233 | "metadata": {},
234 | "output_type": "execute_result"
235 | }
236 | ],
237 | "source": [
238 | "owners = pd.DataFrame(list(data_c['type'].unique())).rename(columns={0:'name'})\n",
239 | "ors = []\n",
240 | "for index,otype in owners.iterrows():\n",
241 | " ors.append(dict(id=index+1,name=otype['name']))\n",
242 | "\n",
243 | "owners = pd.DataFrame(ors)\n",
244 | "owners"
245 | ]
246 | },
247 | {
248 | "cell_type": "markdown",
249 | "metadata": {},
250 | "source": [
251 | "License
"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": 6,
257 | "metadata": {},
258 | "outputs": [
259 | {
260 | "data": {
261 | "text/html": [
262 | "\n",
263 | "\n",
276 | "
\n",
277 | " \n",
278 | " \n",
279 | " | \n",
280 | " id | \n",
281 | " key | \n",
282 | " name | \n",
283 | " spdx_id | \n",
284 | " url | \n",
285 | " node_id | \n",
286 | "
\n",
287 | " \n",
288 | " \n",
289 | " \n",
290 | " 0 | \n",
291 | " 1 | \n",
292 | " | \n",
293 | " No License | \n",
294 | " | \n",
295 | " | \n",
296 | " | \n",
297 | "
\n",
298 | " \n",
299 | " 1 | \n",
300 | " 2 | \n",
301 | " gpl-3.0 | \n",
302 | " GNU General Public License v3.0 | \n",
303 | " GPL-3.0 | \n",
304 | " https://api.github.com/licenses/gpl-3.0 | \n",
305 | " MDc6TGljZW5zZTk= | \n",
306 | "
\n",
307 | " \n",
308 | " 2 | \n",
309 | " 3 | \n",
310 | " mit | \n",
311 | " MIT License | \n",
312 | " MIT | \n",
313 | " https://api.github.com/licenses/mit | \n",
314 | " MDc6TGljZW5zZTEz | \n",
315 | "
\n",
316 | " \n",
317 | " 3 | \n",
318 | " 4 | \n",
319 | " agpl-3.0 | \n",
320 | " GNU Affero General Public License v3.0 | \n",
321 | " AGPL-3.0 | \n",
322 | " https://api.github.com/licenses/agpl-3.0 | \n",
323 | " MDc6TGljZW5zZTE= | \n",
324 | "
\n",
325 | " \n",
326 | " 4 | \n",
327 | " 5 | \n",
328 | " 0bsd | \n",
329 | " BSD Zero Clause License | \n",
330 | " 0BSD | \n",
331 | " https://api.github.com/licenses/0bsd | \n",
332 | " MDc6TGljZW5zZTM1 | \n",
333 | "
\n",
334 | " \n",
335 | " 5 | \n",
336 | " 6 | \n",
337 | " bsd-3-clause | \n",
338 | " BSD 3-Clause \"New\" or \"Revised\" License | \n",
339 | " BSD-3-Clause | \n",
340 | " https://api.github.com/licenses/bsd-3-clause | \n",
341 | " MDc6TGljZW5zZTU= | \n",
342 | "
\n",
343 | " \n",
344 | " 6 | \n",
345 | " 7 | \n",
346 | " mit-0 | \n",
347 | " MIT No Attribution | \n",
348 | " MIT-0 | \n",
349 | " https://api.github.com/licenses/mit-0 | \n",
350 | " MDc6TGljZW5zZTQx | \n",
351 | "
\n",
352 | " \n",
353 | " 7 | \n",
354 | " 8 | \n",
355 | " other | \n",
356 | " Other | \n",
357 | " NOASSERTION | \n",
358 | " None | \n",
359 | " MDc6TGljZW5zZTA= | \n",
360 | "
\n",
361 | " \n",
362 | " 8 | \n",
363 | " 9 | \n",
364 | " apache-2.0 | \n",
365 | " Apache License 2.0 | \n",
366 | " Apache-2.0 | \n",
367 | " https://api.github.com/licenses/apache-2.0 | \n",
368 | " MDc6TGljZW5zZTI= | \n",
369 | "
\n",
370 | " \n",
371 | " 9 | \n",
372 | " 10 | \n",
373 | " lgpl-3.0 | \n",
374 | " GNU Lesser General Public License v3.0 | \n",
375 | " LGPL-3.0 | \n",
376 | " https://api.github.com/licenses/lgpl-3.0 | \n",
377 | " MDc6TGljZW5zZTEy | \n",
378 | "
\n",
379 | " \n",
380 | " 10 | \n",
381 | " 11 | \n",
382 | " gpl-2.0 | \n",
383 | " GNU General Public License v2.0 | \n",
384 | " GPL-2.0 | \n",
385 | " https://api.github.com/licenses/gpl-2.0 | \n",
386 | " MDc6TGljZW5zZTg= | \n",
387 | "
\n",
388 | " \n",
389 | " 11 | \n",
390 | " 12 | \n",
391 | " cc0-1.0 | \n",
392 | " Creative Commons Zero v1.0 Universal | \n",
393 | " CC0-1.0 | \n",
394 | " https://api.github.com/licenses/cc0-1.0 | \n",
395 | " MDc6TGljZW5zZTY= | \n",
396 | "
\n",
397 | " \n",
398 | " 12 | \n",
399 | " 13 | \n",
400 | " lgpl-2.1 | \n",
401 | " GNU Lesser General Public License v2.1 | \n",
402 | " LGPL-2.1 | \n",
403 | " https://api.github.com/licenses/lgpl-2.1 | \n",
404 | " MDc6TGljZW5zZTEx | \n",
405 | "
\n",
406 | " \n",
407 | " 13 | \n",
408 | " 14 | \n",
409 | " cc-by-4.0 | \n",
410 | " Creative Commons Attribution 4.0 International | \n",
411 | " CC-BY-4.0 | \n",
412 | " https://api.github.com/licenses/cc-by-4.0 | \n",
413 | " MDc6TGljZW5zZTI1 | \n",
414 | "
\n",
415 | " \n",
416 | " 14 | \n",
417 | " 15 | \n",
418 | " unlicense | \n",
419 | " The Unlicense | \n",
420 | " Unlicense | \n",
421 | " https://api.github.com/licenses/unlicense | \n",
422 | " MDc6TGljZW5zZTE1 | \n",
423 | "
\n",
424 | " \n",
425 | " 15 | \n",
426 | " 16 | \n",
427 | " isc | \n",
428 | " ISC License | \n",
429 | " ISC | \n",
430 | " https://api.github.com/licenses/isc | \n",
431 | " MDc6TGljZW5zZTEw | \n",
432 | "
\n",
433 | " \n",
434 | " 16 | \n",
435 | " 17 | \n",
436 | " bsd-2-clause | \n",
437 | " BSD 2-Clause \"Simplified\" License | \n",
438 | " BSD-2-Clause | \n",
439 | " https://api.github.com/licenses/bsd-2-clause | \n",
440 | " MDc6TGljZW5zZTQ= | \n",
441 | "
\n",
442 | " \n",
443 | " 17 | \n",
444 | " 18 | \n",
445 | " zlib | \n",
446 | " zlib License | \n",
447 | " Zlib | \n",
448 | " https://api.github.com/licenses/zlib | \n",
449 | " MDc6TGljZW5zZTI3 | \n",
450 | "
\n",
451 | " \n",
452 | " 18 | \n",
453 | " 19 | \n",
454 | " mpl-2.0 | \n",
455 | " Mozilla Public License 2.0 | \n",
456 | " MPL-2.0 | \n",
457 | " https://api.github.com/licenses/mpl-2.0 | \n",
458 | " MDc6TGljZW5zZTE0 | \n",
459 | "
\n",
460 | " \n",
461 | " 19 | \n",
462 | " 20 | \n",
463 | " epl-2.0 | \n",
464 | " Eclipse Public License 2.0 | \n",
465 | " EPL-2.0 | \n",
466 | " https://api.github.com/licenses/epl-2.0 | \n",
467 | " MDc6TGljZW5zZTMy | \n",
468 | "
\n",
469 | " \n",
470 | " 20 | \n",
471 | " 21 | \n",
472 | " bsd-4-clause | \n",
473 | " BSD 4-Clause \"Original\" or \"Old\" License | \n",
474 | " BSD-4-Clause | \n",
475 | " https://api.github.com/licenses/bsd-4-clause | \n",
476 | " MDc6TGljZW5zZTM5 | \n",
477 | "
\n",
478 | " \n",
479 | " 21 | \n",
480 | " 22 | \n",
481 | " osl-3.0 | \n",
482 | " Open Software License 3.0 | \n",
483 | " OSL-3.0 | \n",
484 | " https://api.github.com/licenses/osl-3.0 | \n",
485 | " MDc6TGljZW5zZTE2 | \n",
486 | "
\n",
487 | " \n",
488 | " 22 | \n",
489 | " 23 | \n",
490 | " mulanpsl-2.0 | \n",
491 | " Mulan Permissive Software License, Version 2 | \n",
492 | " MulanPSL-2.0 | \n",
493 | " https://api.github.com/licenses/mulanpsl-2.0 | \n",
494 | " L_kgCsbXVsYW5wc2wtMi4w | \n",
495 | "
\n",
496 | " \n",
497 | " 23 | \n",
498 | " 24 | \n",
499 | " wtfpl | \n",
500 | " Do What The F*ck You Want To Public License | \n",
501 | " WTFPL | \n",
502 | " https://api.github.com/licenses/wtfpl | \n",
503 | " MDc6TGljZW5zZTE4 | \n",
504 | "
\n",
505 | " \n",
506 | " 24 | \n",
507 | " 25 | \n",
508 | " cc-by-sa-4.0 | \n",
509 | " Creative Commons Attribution Share Alike 4.0 I... | \n",
510 | " CC-BY-SA-4.0 | \n",
511 | " https://api.github.com/licenses/cc-by-sa-4.0 | \n",
512 | " MDc6TGljZW5zZTI2 | \n",
513 | "
\n",
514 | " \n",
515 | " 25 | \n",
516 | " 26 | \n",
517 | " bsl-1.0 | \n",
518 | " Boost Software License 1.0 | \n",
519 | " BSL-1.0 | \n",
520 | " https://api.github.com/licenses/bsl-1.0 | \n",
521 | " MDc6TGljZW5zZTI4 | \n",
522 | "
\n",
523 | " \n",
524 | " 26 | \n",
525 | " 27 | \n",
526 | " upl-1.0 | \n",
527 | " Universal Permissive License v1.0 | \n",
528 | " UPL-1.0 | \n",
529 | " https://api.github.com/licenses/upl-1.0 | \n",
530 | " MDc6TGljZW5zZTMz | \n",
531 | "
\n",
532 | " \n",
533 | " 27 | \n",
534 | " 28 | \n",
535 | " lppl-1.3c | \n",
536 | " LaTeX Project Public License v1.3c | \n",
537 | " LPPL-1.3c | \n",
538 | " https://api.github.com/licenses/lppl-1.3c | \n",
539 | " MDc6TGljZW5zZTIz | \n",
540 | "
\n",
541 | " \n",
542 | " 28 | \n",
543 | " 29 | \n",
544 | " ncsa | \n",
545 | " University of Illinois/NCSA Open Source License | \n",
546 | " NCSA | \n",
547 | " https://api.github.com/licenses/ncsa | \n",
548 | " MDc6TGljZW5zZTI5 | \n",
549 | "
\n",
550 | " \n",
551 | " 29 | \n",
552 | " 30 | \n",
553 | " bsd-3-clause-clear | \n",
554 | " BSD 3-Clause Clear License | \n",
555 | " BSD-3-Clause-Clear | \n",
556 | " https://api.github.com/licenses/bsd-3-clause-c... | \n",
557 | " MDc6TGljZW5zZTIx | \n",
558 | "
\n",
559 | " \n",
560 | " 30 | \n",
561 | " 31 | \n",
562 | " epl-1.0 | \n",
563 | " Eclipse Public License 1.0 | \n",
564 | " EPL-1.0 | \n",
565 | " https://api.github.com/licenses/epl-1.0 | \n",
566 | " MDc6TGljZW5zZTc= | \n",
567 | "
\n",
568 | " \n",
569 | " 31 | \n",
570 | " 32 | \n",
571 | " cern-ohl-w-2.0 | \n",
572 | " CERN Open Hardware Licence Version 2 - Weakly ... | \n",
573 | " CERN-OHL-W-2.0 | \n",
574 | " https://api.github.com/licenses/cern-ohl-w-2.0 | \n",
575 | " L_kgCuY2Vybi1vaGwtdy0yLjA | \n",
576 | "
\n",
577 | " \n",
578 | " 32 | \n",
579 | " 33 | \n",
580 | " cern-ohl-p-2.0 | \n",
581 | " CERN Open Hardware Licence Version 2 - Permissive | \n",
582 | " CERN-OHL-P-2.0 | \n",
583 | " https://api.github.com/licenses/cern-ohl-p-2.0 | \n",
584 | " L_kgCuY2Vybi1vaGwtcC0yLjA | \n",
585 | "
\n",
586 | " \n",
587 | " 33 | \n",
588 | " 34 | \n",
589 | " artistic-2.0 | \n",
590 | " Artistic License 2.0 | \n",
591 | " Artistic-2.0 | \n",
592 | " https://api.github.com/licenses/artistic-2.0 | \n",
593 | " MDc6TGljZW5zZTM= | \n",
594 | "
\n",
595 | " \n",
596 | " 34 | \n",
597 | " 35 | \n",
598 | " ofl-1.1 | \n",
599 | " SIL Open Font License 1.1 | \n",
600 | " OFL-1.1 | \n",
601 | " https://api.github.com/licenses/ofl-1.1 | \n",
602 | " MDc6TGljZW5zZTE3 | \n",
603 | "
\n",
604 | " \n",
605 | " 35 | \n",
606 | " 36 | \n",
607 | " odbl-1.0 | \n",
608 | " Open Data Commons Open Database License v1.0 | \n",
609 | " ODbL-1.0 | \n",
610 | " https://api.github.com/licenses/odbl-1.0 | \n",
611 | " MDc6TGljZW5zZTM3 | \n",
612 | "
\n",
613 | " \n",
614 | " 36 | \n",
615 | " 37 | \n",
616 | " eupl-1.2 | \n",
617 | " European Union Public License 1.2 | \n",
618 | " EUPL-1.2 | \n",
619 | " https://api.github.com/licenses/eupl-1.2 | \n",
620 | " MDc6TGljZW5zZTM0 | \n",
621 | "
\n",
622 | " \n",
623 | "
\n",
624 | "
"
625 | ],
626 | "text/plain": [
627 | " id key name \\\n",
628 | "0 1 No License \n",
629 | "1 2 gpl-3.0 GNU General Public License v3.0 \n",
630 | "2 3 mit MIT License \n",
631 | "3 4 agpl-3.0 GNU Affero General Public License v3.0 \n",
632 | "4 5 0bsd BSD Zero Clause License \n",
633 | "5 6 bsd-3-clause BSD 3-Clause \"New\" or \"Revised\" License \n",
634 | "6 7 mit-0 MIT No Attribution \n",
635 | "7 8 other Other \n",
636 | "8 9 apache-2.0 Apache License 2.0 \n",
637 | "9 10 lgpl-3.0 GNU Lesser General Public License v3.0 \n",
638 | "10 11 gpl-2.0 GNU General Public License v2.0 \n",
639 | "11 12 cc0-1.0 Creative Commons Zero v1.0 Universal \n",
640 | "12 13 lgpl-2.1 GNU Lesser General Public License v2.1 \n",
641 | "13 14 cc-by-4.0 Creative Commons Attribution 4.0 International \n",
642 | "14 15 unlicense The Unlicense \n",
643 | "15 16 isc ISC License \n",
644 | "16 17 bsd-2-clause BSD 2-Clause \"Simplified\" License \n",
645 | "17 18 zlib zlib License \n",
646 | "18 19 mpl-2.0 Mozilla Public License 2.0 \n",
647 | "19 20 epl-2.0 Eclipse Public License 2.0 \n",
648 | "20 21 bsd-4-clause BSD 4-Clause \"Original\" or \"Old\" License \n",
649 | "21 22 osl-3.0 Open Software License 3.0 \n",
650 | "22 23 mulanpsl-2.0 Mulan Permissive Software License, Version 2 \n",
651 | "23 24 wtfpl Do What The F*ck You Want To Public License \n",
652 | "24 25 cc-by-sa-4.0 Creative Commons Attribution Share Alike 4.0 I... \n",
653 | "25 26 bsl-1.0 Boost Software License 1.0 \n",
654 | "26 27 upl-1.0 Universal Permissive License v1.0 \n",
655 | "27 28 lppl-1.3c LaTeX Project Public License v1.3c \n",
656 | "28 29 ncsa University of Illinois/NCSA Open Source License \n",
657 | "29 30 bsd-3-clause-clear BSD 3-Clause Clear License \n",
658 | "30 31 epl-1.0 Eclipse Public License 1.0 \n",
659 | "31 32 cern-ohl-w-2.0 CERN Open Hardware Licence Version 2 - Weakly ... \n",
660 | "32 33 cern-ohl-p-2.0 CERN Open Hardware Licence Version 2 - Permissive \n",
661 | "33 34 artistic-2.0 Artistic License 2.0 \n",
662 | "34 35 ofl-1.1 SIL Open Font License 1.1 \n",
663 | "35 36 odbl-1.0 Open Data Commons Open Database License v1.0 \n",
664 | "36 37 eupl-1.2 European Union Public License 1.2 \n",
665 | "\n",
666 | " spdx_id url \\\n",
667 | "0 \n",
668 | "1 GPL-3.0 https://api.github.com/licenses/gpl-3.0 \n",
669 | "2 MIT https://api.github.com/licenses/mit \n",
670 | "3 AGPL-3.0 https://api.github.com/licenses/agpl-3.0 \n",
671 | "4 0BSD https://api.github.com/licenses/0bsd \n",
672 | "5 BSD-3-Clause https://api.github.com/licenses/bsd-3-clause \n",
673 | "6 MIT-0 https://api.github.com/licenses/mit-0 \n",
674 | "7 NOASSERTION None \n",
675 | "8 Apache-2.0 https://api.github.com/licenses/apache-2.0 \n",
676 | "9 LGPL-3.0 https://api.github.com/licenses/lgpl-3.0 \n",
677 | "10 GPL-2.0 https://api.github.com/licenses/gpl-2.0 \n",
678 | "11 CC0-1.0 https://api.github.com/licenses/cc0-1.0 \n",
679 | "12 LGPL-2.1 https://api.github.com/licenses/lgpl-2.1 \n",
680 | "13 CC-BY-4.0 https://api.github.com/licenses/cc-by-4.0 \n",
681 | "14 Unlicense https://api.github.com/licenses/unlicense \n",
682 | "15 ISC https://api.github.com/licenses/isc \n",
683 | "16 BSD-2-Clause https://api.github.com/licenses/bsd-2-clause \n",
684 | "17 Zlib https://api.github.com/licenses/zlib \n",
685 | "18 MPL-2.0 https://api.github.com/licenses/mpl-2.0 \n",
686 | "19 EPL-2.0 https://api.github.com/licenses/epl-2.0 \n",
687 | "20 BSD-4-Clause https://api.github.com/licenses/bsd-4-clause \n",
688 | "21 OSL-3.0 https://api.github.com/licenses/osl-3.0 \n",
689 | "22 MulanPSL-2.0 https://api.github.com/licenses/mulanpsl-2.0 \n",
690 | "23 WTFPL https://api.github.com/licenses/wtfpl \n",
691 | "24 CC-BY-SA-4.0 https://api.github.com/licenses/cc-by-sa-4.0 \n",
692 | "25 BSL-1.0 https://api.github.com/licenses/bsl-1.0 \n",
693 | "26 UPL-1.0 https://api.github.com/licenses/upl-1.0 \n",
694 | "27 LPPL-1.3c https://api.github.com/licenses/lppl-1.3c \n",
695 | "28 NCSA https://api.github.com/licenses/ncsa \n",
696 | "29 BSD-3-Clause-Clear https://api.github.com/licenses/bsd-3-clause-c... \n",
697 | "30 EPL-1.0 https://api.github.com/licenses/epl-1.0 \n",
698 | "31 CERN-OHL-W-2.0 https://api.github.com/licenses/cern-ohl-w-2.0 \n",
699 | "32 CERN-OHL-P-2.0 https://api.github.com/licenses/cern-ohl-p-2.0 \n",
700 | "33 Artistic-2.0 https://api.github.com/licenses/artistic-2.0 \n",
701 | "34 OFL-1.1 https://api.github.com/licenses/ofl-1.1 \n",
702 | "35 ODbL-1.0 https://api.github.com/licenses/odbl-1.0 \n",
703 | "36 EUPL-1.2 https://api.github.com/licenses/eupl-1.2 \n",
704 | "\n",
705 | " node_id \n",
706 | "0 \n",
707 | "1 MDc6TGljZW5zZTk= \n",
708 | "2 MDc6TGljZW5zZTEz \n",
709 | "3 MDc6TGljZW5zZTE= \n",
710 | "4 MDc6TGljZW5zZTM1 \n",
711 | "5 MDc6TGljZW5zZTU= \n",
712 | "6 MDc6TGljZW5zZTQx \n",
713 | "7 MDc6TGljZW5zZTA= \n",
714 | "8 MDc6TGljZW5zZTI= \n",
715 | "9 MDc6TGljZW5zZTEy \n",
716 | "10 MDc6TGljZW5zZTg= \n",
717 | "11 MDc6TGljZW5zZTY= \n",
718 | "12 MDc6TGljZW5zZTEx \n",
719 | "13 MDc6TGljZW5zZTI1 \n",
720 | "14 MDc6TGljZW5zZTE1 \n",
721 | "15 MDc6TGljZW5zZTEw \n",
722 | "16 MDc6TGljZW5zZTQ= \n",
723 | "17 MDc6TGljZW5zZTI3 \n",
724 | "18 MDc6TGljZW5zZTE0 \n",
725 | "19 MDc6TGljZW5zZTMy \n",
726 | "20 MDc6TGljZW5zZTM5 \n",
727 | "21 MDc6TGljZW5zZTE2 \n",
728 | "22 L_kgCsbXVsYW5wc2wtMi4w \n",
729 | "23 MDc6TGljZW5zZTE4 \n",
730 | "24 MDc6TGljZW5zZTI2 \n",
731 | "25 MDc6TGljZW5zZTI4 \n",
732 | "26 MDc6TGljZW5zZTMz \n",
733 | "27 MDc6TGljZW5zZTIz \n",
734 | "28 MDc6TGljZW5zZTI5 \n",
735 | "29 MDc6TGljZW5zZTIx \n",
736 | "30 MDc6TGljZW5zZTc= \n",
737 | "31 L_kgCuY2Vybi1vaGwtdy0yLjA \n",
738 | "32 L_kgCuY2Vybi1vaGwtcC0yLjA \n",
739 | "33 MDc6TGljZW5zZTM= \n",
740 | "34 MDc6TGljZW5zZTE3 \n",
741 | "35 MDc6TGljZW5zZTM3 \n",
742 | "36 MDc6TGljZW5zZTM0 "
743 | ]
744 | },
745 | "execution_count": 6,
746 | "metadata": {},
747 | "output_type": "execute_result"
748 | }
749 | ],
750 | "source": [
751 | "import ast\n",
752 | "\n",
753 | "#table license\n",
754 | "licenses = pd.DataFrame(list(data_c['license'].unique())).rename(columns={0:'name'})\n",
755 | "lics = []\n",
756 | "for index,l in licenses.iterrows():\n",
757 | " lics.append(ast.literal_eval(l['name']))\n",
758 | "\n",
759 | "\n",
760 | "licenses = pd.DataFrame(lics)\n",
761 | "licenses.insert(0,\"id\", list(range(1,len(lics)+1)),True)\n",
762 | "licenses\n"
763 | ]
764 | },
765 | {
766 | "cell_type": "markdown",
767 | "metadata": {},
768 | "source": [
769 | "Contributors
"
770 | ]
771 | },
772 | {
773 | "cell_type": "code",
774 | "execution_count": 7,
775 | "metadata": {},
776 | "outputs": [
777 | {
778 | "data": {
779 | "text/html": [
780 | "\n",
781 | "\n",
794 | "
\n",
795 | " \n",
796 | " \n",
797 | " | \n",
798 | " id | \n",
799 | " name | \n",
800 | "
\n",
801 | " \n",
802 | " \n",
803 | " \n",
804 | " 0 | \n",
805 | " 1 | \n",
806 | " biuuu | \n",
807 | "
\n",
808 | " \n",
809 | " 1 | \n",
810 | " 2 | \n",
811 | " cntvc | \n",
812 | "
\n",
813 | " \n",
814 | " 2 | \n",
815 | " 3 | \n",
816 | " TremblingMoeNew | \n",
817 | "
\n",
818 | " \n",
819 | " 3 | \n",
820 | " 4 | \n",
821 | " LaiYueTing | \n",
822 | "
\n",
823 | " \n",
824 | " 4 | \n",
825 | " 5 | \n",
826 | " vdohney | \n",
827 | "
\n",
828 | " \n",
829 | " ... | \n",
830 | " ... | \n",
831 | " ... | \n",
832 | "
\n",
833 | " \n",
834 | " 12566 | \n",
835 | " 12567 | \n",
836 | " nijuy | \n",
837 | "
\n",
838 | " \n",
839 | " 12567 | \n",
840 | " 12568 | \n",
841 | " jingliu9 | \n",
842 | "
\n",
843 | " \n",
844 | " 12568 | \n",
845 | " 12569 | \n",
846 | " RogerPu | \n",
847 | "
\n",
848 | " \n",
849 | " 12569 | \n",
850 | " 12570 | \n",
851 | " tumGER | \n",
852 | "
\n",
853 | " \n",
854 | " 12570 | \n",
855 | " 12571 | \n",
856 | " NicholeMattera | \n",
857 | "
\n",
858 | " \n",
859 | "
\n",
860 | "
12571 rows × 2 columns
\n",
861 | "
"
862 | ],
863 | "text/plain": [
864 | " id name\n",
865 | "0 1 biuuu\n",
866 | "1 2 cntvc\n",
867 | "2 3 TremblingMoeNew\n",
868 | "3 4 LaiYueTing\n",
869 | "4 5 vdohney\n",
870 | "... ... ...\n",
871 | "12566 12567 nijuy\n",
872 | "12567 12568 jingliu9\n",
873 | "12568 12569 RogerPu\n",
874 | "12569 12570 tumGER\n",
875 | "12570 12571 NicholeMattera\n",
876 | "\n",
877 | "[12571 rows x 2 columns]"
878 | ]
879 | },
880 | "execution_count": 7,
881 | "metadata": {},
882 | "output_type": "execute_result"
883 | }
884 | ],
885 | "source": [
886 | "cs = pd.DataFrame(contributors['name'].unique()).rename(columns={0:'name'})\n",
887 | "\n",
888 | "cs.insert(0,'id',range(1,len(cs)+1),True)\n",
889 | "\n",
890 | "cs"
891 | ]
892 | },
893 | {
894 | "cell_type": "markdown",
895 | "metadata": {},
896 | "source": [
897 | "Repositories
"
898 | ]
899 | },
900 | {
901 | "cell_type": "code",
902 | "execution_count": 8,
903 | "metadata": {},
904 | "outputs": [
905 | {
906 | "data": {
907 | "text/html": [
908 | "\n",
909 | "\n",
922 | "
\n",
923 | " \n",
924 | " \n",
925 | " | \n",
926 | " id | \n",
927 | " full_name | \n",
928 | "
\n",
929 | " \n",
930 | " \n",
931 | " \n",
932 | " 0 | \n",
933 | " 1 | \n",
934 | " 477132016yb/plant-vs-zombies | \n",
935 | "
\n",
936 | " \n",
937 | " 1 | \n",
938 | " 2 | \n",
939 | " ParveshSandila/CountryCodeChooser | \n",
940 | "
\n",
941 | " \n",
942 | " 2 | \n",
943 | " 3 | \n",
944 | " ozodbek-sobirjonovich/sqlite3_tutorial | \n",
945 | "
\n",
946 | " \n",
947 | " 3 | \n",
948 | " 4 | \n",
949 | " mahomaps/mm-v1 | \n",
950 | "
\n",
951 | " \n",
952 | " 4 | \n",
953 | " 5 | \n",
954 | " cruip/cruip-tutorials-next | \n",
955 | "
\n",
956 | " \n",
957 | " ... | \n",
958 | " ... | \n",
959 | " ... | \n",
960 | "
\n",
961 | " \n",
962 | " 25485 | \n",
963 | " 25486 | \n",
964 | " gptlink/gptlink-deploy | \n",
965 | "
\n",
966 | " \n",
967 | " 25486 | \n",
968 | " 25487 | \n",
969 | " akashusr/assignment-solution-batch-2 | \n",
970 | "
\n",
971 | " \n",
972 | " 25487 | \n",
973 | " 25488 | \n",
974 | " smile1130/laravel9_CMS | \n",
975 | "
\n",
976 | " \n",
977 | " 25488 | \n",
978 | " 25489 | \n",
979 | " asaotomo/makephonedict | \n",
980 | "
\n",
981 | " \n",
982 | " 25489 | \n",
983 | " 25490 | \n",
984 | " ACodeDaily/AcodeDaily | \n",
985 | "
\n",
986 | " \n",
987 | "
\n",
988 | "
25490 rows × 2 columns
\n",
989 | "
"
990 | ],
991 | "text/plain": [
992 | " id full_name\n",
993 | "0 1 477132016yb/plant-vs-zombies\n",
994 | "1 2 ParveshSandila/CountryCodeChooser\n",
995 | "2 3 ozodbek-sobirjonovich/sqlite3_tutorial\n",
996 | "3 4 mahomaps/mm-v1\n",
997 | "4 5 cruip/cruip-tutorials-next\n",
998 | "... ... ...\n",
999 | "25485 25486 gptlink/gptlink-deploy\n",
1000 | "25486 25487 akashusr/assignment-solution-batch-2\n",
1001 | "25487 25488 smile1130/laravel9_CMS\n",
1002 | "25488 25489 asaotomo/makephonedict\n",
1003 | "25489 25490 ACodeDaily/AcodeDaily\n",
1004 | "\n",
1005 | "[25490 rows x 2 columns]"
1006 | ]
1007 | },
1008 | "execution_count": 8,
1009 | "metadata": {},
1010 | "output_type": "execute_result"
1011 | }
1012 | ],
1013 | "source": [
1014 | "repositories = pd.DataFrame(data_c['full_name'].unique()).rename(columns={0:'full_name'})\n",
1015 | "\n",
1016 | "repositories.insert(0,'id',range(1,len(repositories)+1),True)\n",
1017 | "repositories"
1018 | ]
1019 | },
1020 | {
1021 | "cell_type": "code",
1022 | "execution_count": 9,
1023 | "metadata": {},
1024 | "outputs": [
1025 | {
1026 | "data": {
1027 | "text/html": [
1028 | "\n",
1029 | "\n",
1042 | "
\n",
1043 | " \n",
1044 | " \n",
1045 | " | \n",
1046 | " full_name | \n",
1047 | " url | \n",
1048 | " clone_url | \n",
1049 | " watchers_count | \n",
1050 | " stargazers_count | \n",
1051 | " language | \n",
1052 | " forks | \n",
1053 | " description | \n",
1054 | " type | \n",
1055 | " license | \n",
1056 | " fork | \n",
1057 | " created_at | \n",
1058 | " updated_at | \n",
1059 | " pushed_at | \n",
1060 | "
\n",
1061 | " \n",
1062 | " \n",
1063 | " \n",
1064 | " 0 | \n",
1065 | " 477132016yb/plant-vs-zombies | \n",
1066 | " https://github.com/477132016yb/plant-vs-zombies | \n",
1067 | " https://github.com/477132016yb/plant-vs-zombie... | \n",
1068 | " 12 | \n",
1069 | " 12 | \n",
1070 | " 1 | \n",
1071 | " 0 | \n",
1072 | " NaN | \n",
1073 | " 1 | \n",
1074 | " {'key': '', 'name': 'No License', 'spdx_id': '... | \n",
1075 | " False | \n",
1076 | " 2023-05-01 | \n",
1077 | " 2023-06-15 | \n",
1078 | " 2023-06-13 | \n",
1079 | "
\n",
1080 | " \n",
1081 | " 1 | \n",
1082 | " ParveshSandila/CountryCodeChooser | \n",
1083 | " https://github.com/ParveshSandila/CountryCodeC... | \n",
1084 | " https://github.com/ParveshSandila/CountryCodeC... | \n",
1085 | " 12 | \n",
1086 | " 12 | \n",
1087 | " 2 | \n",
1088 | " 1 | \n",
1089 | " Country code chooser in Jetpack compose | \n",
1090 | " 1 | \n",
1091 | " {'key': '', 'name': 'No License', 'spdx_id': '... | \n",
1092 | " False | \n",
1093 | " 2023-05-01 | \n",
1094 | " 2023-06-06 | \n",
1095 | " 2023-06-09 | \n",
1096 | "
\n",
1097 | " \n",
1098 | "
\n",
1099 | "
"
1100 | ],
1101 | "text/plain": [
1102 | " full_name \\\n",
1103 | "0 477132016yb/plant-vs-zombies \n",
1104 | "1 ParveshSandila/CountryCodeChooser \n",
1105 | "\n",
1106 | " url \\\n",
1107 | "0 https://github.com/477132016yb/plant-vs-zombies \n",
1108 | "1 https://github.com/ParveshSandila/CountryCodeC... \n",
1109 | "\n",
1110 | " clone_url watchers_count \\\n",
1111 | "0 https://github.com/477132016yb/plant-vs-zombie... 12 \n",
1112 | "1 https://github.com/ParveshSandila/CountryCodeC... 12 \n",
1113 | "\n",
1114 | " stargazers_count language forks description \\\n",
1115 | "0 12 1 0 NaN \n",
1116 | "1 12 2 1 Country code chooser in Jetpack compose \n",
1117 | "\n",
1118 | " type license fork created_at \\\n",
1119 | "0 1 {'key': '', 'name': 'No License', 'spdx_id': '... False 2023-05-01 \n",
1120 | "1 1 {'key': '', 'name': 'No License', 'spdx_id': '... False 2023-05-01 \n",
1121 | "\n",
1122 | " updated_at pushed_at \n",
1123 | "0 2023-06-15 2023-06-13 \n",
1124 | "1 2023-06-06 2023-06-09 "
1125 | ]
1126 | },
1127 | "execution_count": 9,
1128 | "metadata": {},
1129 | "output_type": "execute_result"
1130 | }
1131 | ],
1132 | "source": [
1133 | "dataToInserted = data_c\n",
1134 | "\n",
1135 | "#--Replace technologies\n",
1136 | "for index,tech in technoligies.iterrows():\n",
1137 | " dataToInserted['language'].replace(tech['name'],tech['id'],inplace=True)\n",
1138 | "\n",
1139 | "\n",
1140 | "#--Replace owners\n",
1141 | "for index,tech in owners.iterrows():\n",
1142 | " dataToInserted['type'].replace(tech['name'],tech['id'],inplace=True)\n",
1143 | "\n",
1144 | "dataToInserted.head(2)\n"
1145 | ]
1146 | },
1147 | {
1148 | "cell_type": "code",
1149 | "execution_count": 10,
1150 | "metadata": {},
1151 | "outputs": [
1152 | {
1153 | "data": {
1154 | "text/html": [
1155 | "\n",
1156 | "\n",
1169 | "
\n",
1170 | " \n",
1171 | " \n",
1172 | " | \n",
1173 | " full_name | \n",
1174 | " url | \n",
1175 | " clone_url | \n",
1176 | " watchers_count | \n",
1177 | " stargazers_count | \n",
1178 | " language | \n",
1179 | " forks | \n",
1180 | " description | \n",
1181 | " type | \n",
1182 | " license | \n",
1183 | " fork | \n",
1184 | " created_at | \n",
1185 | " updated_at | \n",
1186 | " pushed_at | \n",
1187 | "
\n",
1188 | " \n",
1189 | " \n",
1190 | " \n",
1191 | " 0 | \n",
1192 | " 477132016yb/plant-vs-zombies | \n",
1193 | " https://github.com/477132016yb/plant-vs-zombies | \n",
1194 | " https://github.com/477132016yb/plant-vs-zombie... | \n",
1195 | " 12 | \n",
1196 | " 12 | \n",
1197 | " 1 | \n",
1198 | " 0 | \n",
1199 | " NaN | \n",
1200 | " 1 | \n",
1201 | " 1 | \n",
1202 | " False | \n",
1203 | " 2023-05-01 | \n",
1204 | " 2023-06-15 | \n",
1205 | " 2023-06-13 | \n",
1206 | "
\n",
1207 | " \n",
1208 | " 1 | \n",
1209 | " ParveshSandila/CountryCodeChooser | \n",
1210 | " https://github.com/ParveshSandila/CountryCodeC... | \n",
1211 | " https://github.com/ParveshSandila/CountryCodeC... | \n",
1212 | " 12 | \n",
1213 | " 12 | \n",
1214 | " 2 | \n",
1215 | " 1 | \n",
1216 | " Country code chooser in Jetpack compose | \n",
1217 | " 1 | \n",
1218 | " 1 | \n",
1219 | " False | \n",
1220 | " 2023-05-01 | \n",
1221 | " 2023-06-06 | \n",
1222 | " 2023-06-09 | \n",
1223 | "
\n",
1224 | " \n",
1225 | "
\n",
1226 | "
"
1227 | ],
1228 | "text/plain": [
1229 | " full_name \\\n",
1230 | "0 477132016yb/plant-vs-zombies \n",
1231 | "1 ParveshSandila/CountryCodeChooser \n",
1232 | "\n",
1233 | " url \\\n",
1234 | "0 https://github.com/477132016yb/plant-vs-zombies \n",
1235 | "1 https://github.com/ParveshSandila/CountryCodeC... \n",
1236 | "\n",
1237 | " clone_url watchers_count \\\n",
1238 | "0 https://github.com/477132016yb/plant-vs-zombie... 12 \n",
1239 | "1 https://github.com/ParveshSandila/CountryCodeC... 12 \n",
1240 | "\n",
1241 | " stargazers_count language forks description \\\n",
1242 | "0 12 1 0 NaN \n",
1243 | "1 12 2 1 Country code chooser in Jetpack compose \n",
1244 | "\n",
1245 | " type license fork created_at updated_at pushed_at \n",
1246 | "0 1 1 False 2023-05-01 2023-06-15 2023-06-13 \n",
1247 | "1 1 1 False 2023-05-01 2023-06-06 2023-06-09 "
1248 | ]
1249 | },
1250 | "execution_count": 10,
1251 | "metadata": {},
1252 | "output_type": "execute_result"
1253 | }
1254 | ],
1255 | "source": [
1256 | "#--Replace license\n",
1257 | "for index,l in licenses.iterrows():\n",
1258 | " #print(str(dict(l[['key','name','spdx_id','url','node_id']])))\n",
1259 | " dataToInserted['license'].replace(str(dict(l[['key','name','spdx_id','url','node_id']])),l['id'],inplace=True)\n",
1260 | "\n",
1261 | "#dataToInserted[dataToInserted['license'] == str(dict(licenses[['key','name','spdx_id','url','node_id']].head(1)))]\n",
1262 | "\n",
1263 | "#dict(licenses[['key','name','spdx_id','url','node_id']].head(1))\n",
1264 | "dataToInserted.head(2)"
1265 | ]
1266 | },
1267 | {
1268 | "cell_type": "code",
1269 | "execution_count": 11,
1270 | "metadata": {},
1271 | "outputs": [],
1272 | "source": [
1273 | "# add id to each repos\n",
1274 | "#add columns ID\n",
1275 | "dataToInserted['id'] = dataToInserted['full_name']\n",
1276 | "\n",
1277 | "for index, r in repositories.iterrows():\n",
1278 | " dataToInserted['id'].replace(r['full_name'],r['id'],inplace=True)\n"
1279 | ]
1280 | },
1281 | {
1282 | "cell_type": "code",
1283 | "execution_count": 12,
1284 | "metadata": {},
1285 | "outputs": [],
1286 | "source": [
1287 | "dataToInserted.to_csv('dataToInserted.csv')"
1288 | ]
1289 | },
1290 | {
1291 | "cell_type": "markdown",
1292 | "metadata": {},
1293 | "source": [
1294 | "L'insertion des données en base de données"
1295 | ]
1296 | },
1297 | {
1298 | "cell_type": "code",
1299 | "execution_count": 12,
1300 | "metadata": {},
1301 | "outputs": [],
1302 | "source": [
1303 | "import pyodbc\n",
1304 | "\n",
1305 | "cnxn = pyodbc.connect('Driver={SQL Server};'\n",
1306 | " 'Server=LAPTOP-B5O30HDH\\SQLEXPRESS;'\n",
1307 | " 'Database=githubdb;'\n",
1308 | " 'Trusted_Connection=yes;')"
1309 | ]
1310 | },
1311 | {
1312 | "cell_type": "markdown",
1313 | "metadata": {},
1314 | "source": [
1315 | "Technologies
"
1316 | ]
1317 | },
1318 | {
1319 | "cell_type": "code",
1320 | "execution_count": 16,
1321 | "metadata": {},
1322 | "outputs": [],
1323 | "source": [
1324 | "cursor = cnxn.cursor()\n",
1325 | "\n",
1326 | "for index,tech in technoligies.iterrows():\n",
1327 | " cursor.execute('INSERT INTO technologies (name) values (?)',tech['name'])\n",
1328 | "\n",
1329 | "cursor.commit()\n",
1330 | "cursor.close()"
1331 | ]
1332 | },
1333 | {
1334 | "cell_type": "markdown",
1335 | "metadata": {},
1336 | "source": [
1337 | "Owners
"
1338 | ]
1339 | },
1340 | {
1341 | "cell_type": "code",
1342 | "execution_count": 17,
1343 | "metadata": {},
1344 | "outputs": [],
1345 | "source": [
1346 | "cursor = cnxn.cursor()\n",
1347 | "\n",
1348 | "for index, owner in owners.iterrows():\n",
1349 | " cursor.execute('INSERT INTO owners (type) values (?)',owner['name'])\n",
1350 | "cursor.commit()\n",
1351 | "cursor.close()"
1352 | ]
1353 | },
1354 | {
1355 | "cell_type": "markdown",
1356 | "metadata": {},
1357 | "source": [
1358 | "Licenses
"
1359 | ]
1360 | },
1361 | {
1362 | "cell_type": "code",
1363 | "execution_count": 18,
1364 | "metadata": {},
1365 | "outputs": [],
1366 | "source": [
1367 | "\n",
1368 | "\n",
1369 | "cursor = cnxn.cursor()\n",
1370 | "\n",
1371 | "for index, lic in licenses.iterrows():\n",
1372 | " cursor.execute(\"INSERT INTO licenses (lkey,name,url,spdxID,nodeID) values (?,?,?,?,?)\",lic['key'],lic['name'],lic['spdx_id'],lic['url'],lic['node_id'])\n",
1373 | "cursor.commit()\n",
1374 | "cursor.close()"
1375 | ]
1376 | },
1377 | {
1378 | "cell_type": "markdown",
1379 | "metadata": {},
1380 | "source": [
1381 | "Contributors
"
1382 | ]
1383 | },
1384 | {
1385 | "cell_type": "code",
1386 | "execution_count": 19,
1387 | "metadata": {},
1388 | "outputs": [],
1389 | "source": [
1390 | "cursor = cnxn.cursor()\n",
1391 | "\n",
1392 | "for index, c in contributors.iterrows():\n",
1393 | " cursor.execute(\"INSERT INTO contributors (name) values (?)\",c['name'])\n",
1394 | "cursor.commit()\n",
1395 | "cursor.close()"
1396 | ]
1397 | },
1398 | {
1399 | "cell_type": "code",
1400 | "execution_count": 27,
1401 | "metadata": {},
1402 | "outputs": [
1403 | {
1404 | "data": {
1405 | "text/html": [
1406 | "\n",
1407 | "\n",
1420 | "
\n",
1421 | " \n",
1422 | " \n",
1423 | " | \n",
1424 | " full_name | \n",
1425 | " url | \n",
1426 | " clone_url | \n",
1427 | " watchers_count | \n",
1428 | " stargazers_count | \n",
1429 | " language | \n",
1430 | " forks | \n",
1431 | " description | \n",
1432 | " type | \n",
1433 | " license | \n",
1434 | " fork | \n",
1435 | " created_at | \n",
1436 | " updated_at | \n",
1437 | " pushed_at | \n",
1438 | " id | \n",
1439 | "
\n",
1440 | " \n",
1441 | " \n",
1442 | " \n",
1443 | " 0 | \n",
1444 | " 477132016yb/plant-vs-zombies | \n",
1445 | " https://github.com/477132016yb/plant-vs-zombies | \n",
1446 | " https://github.com/477132016yb/plant-vs-zombie... | \n",
1447 | " 12 | \n",
1448 | " 12 | \n",
1449 | " 1 | \n",
1450 | " 0 | \n",
1451 | " No Description | \n",
1452 | " 1 | \n",
1453 | " 1 | \n",
1454 | " False | \n",
1455 | " 2023-05-01 | \n",
1456 | " 2023-06-15 | \n",
1457 | " 2023-06-13 | \n",
1458 | " 1 | \n",
1459 | "
\n",
1460 | " \n",
1461 | " 1 | \n",
1462 | " ParveshSandila/CountryCodeChooser | \n",
1463 | " https://github.com/ParveshSandila/CountryCodeC... | \n",
1464 | " https://github.com/ParveshSandila/CountryCodeC... | \n",
1465 | " 12 | \n",
1466 | " 12 | \n",
1467 | " 2 | \n",
1468 | " 1 | \n",
1469 | " Country code chooser in Jetpack compose | \n",
1470 | " 1 | \n",
1471 | " 1 | \n",
1472 | " False | \n",
1473 | " 2023-05-01 | \n",
1474 | " 2023-06-06 | \n",
1475 | " 2023-06-09 | \n",
1476 | " 2 | \n",
1477 | "
\n",
1478 | " \n",
1479 | " 2 | \n",
1480 | " ozodbek-sobirjonovich/sqlite3_tutorial | \n",
1481 | " https://github.com/ozodbek-sobirjonovich/sqlit... | \n",
1482 | " https://github.com/ozodbek-sobirjonovich/sqlit... | \n",
1483 | " 12 | \n",
1484 | " 12 | \n",
1485 | " 3 | \n",
1486 | " 0 | \n",
1487 | " No Description | \n",
1488 | " 1 | \n",
1489 | " 1 | \n",
1490 | " False | \n",
1491 | " 2023-05-01 | \n",
1492 | " 2023-06-10 | \n",
1493 | " 2023-05-01 | \n",
1494 | " 3 | \n",
1495 | "
\n",
1496 | " \n",
1497 | " 3 | \n",
1498 | " mahomaps/mm-v1 | \n",
1499 | " https://github.com/mahomaps/mm-v1 | \n",
1500 | " https://github.com/mahomaps/mm-v1.git | \n",
1501 | " 12 | \n",
1502 | " 12 | \n",
1503 | " 4 | \n",
1504 | " 2 | \n",
1505 | " MahoMaps: Yandex.Maps client for MIDP2 | \n",
1506 | " 2 | \n",
1507 | " 2 | \n",
1508 | " False | \n",
1509 | " 2023-05-01 | \n",
1510 | " 2023-06-04 | \n",
1511 | " 2023-06-18 | \n",
1512 | " 4 | \n",
1513 | "
\n",
1514 | " \n",
1515 | " 4 | \n",
1516 | " cruip/cruip-tutorials-next | \n",
1517 | " https://github.com/cruip/cruip-tutorials-next | \n",
1518 | " https://github.com/cruip/cruip-tutorials-next.git | \n",
1519 | " 12 | \n",
1520 | " 12 | \n",
1521 | " 5 | \n",
1522 | " 4 | \n",
1523 | " A repository of Cruip's guides & tutorials mad... | \n",
1524 | " 2 | \n",
1525 | " 1 | \n",
1526 | " False | \n",
1527 | " 2023-05-01 | \n",
1528 | " 2023-06-18 | \n",
1529 | " 2023-06-18 | \n",
1530 | " 5 | \n",
1531 | "
\n",
1532 | " \n",
1533 | "
\n",
1534 | "
"
1535 | ],
1536 | "text/plain": [
1537 | " full_name \\\n",
1538 | "0 477132016yb/plant-vs-zombies \n",
1539 | "1 ParveshSandila/CountryCodeChooser \n",
1540 | "2 ozodbek-sobirjonovich/sqlite3_tutorial \n",
1541 | "3 mahomaps/mm-v1 \n",
1542 | "4 cruip/cruip-tutorials-next \n",
1543 | "\n",
1544 | " url \\\n",
1545 | "0 https://github.com/477132016yb/plant-vs-zombies \n",
1546 | "1 https://github.com/ParveshSandila/CountryCodeC... \n",
1547 | "2 https://github.com/ozodbek-sobirjonovich/sqlit... \n",
1548 | "3 https://github.com/mahomaps/mm-v1 \n",
1549 | "4 https://github.com/cruip/cruip-tutorials-next \n",
1550 | "\n",
1551 | " clone_url watchers_count \\\n",
1552 | "0 https://github.com/477132016yb/plant-vs-zombie... 12 \n",
1553 | "1 https://github.com/ParveshSandila/CountryCodeC... 12 \n",
1554 | "2 https://github.com/ozodbek-sobirjonovich/sqlit... 12 \n",
1555 | "3 https://github.com/mahomaps/mm-v1.git 12 \n",
1556 | "4 https://github.com/cruip/cruip-tutorials-next.git 12 \n",
1557 | "\n",
1558 | " stargazers_count language forks \\\n",
1559 | "0 12 1 0 \n",
1560 | "1 12 2 1 \n",
1561 | "2 12 3 0 \n",
1562 | "3 12 4 2 \n",
1563 | "4 12 5 4 \n",
1564 | "\n",
1565 | " description type license fork \\\n",
1566 | "0 No Description 1 1 False \n",
1567 | "1 Country code chooser in Jetpack compose 1 1 False \n",
1568 | "2 No Description 1 1 False \n",
1569 | "3 MahoMaps: Yandex.Maps client for MIDP2 2 2 False \n",
1570 | "4 A repository of Cruip's guides & tutorials mad... 2 1 False \n",
1571 | "\n",
1572 | " created_at updated_at pushed_at id \n",
1573 | "0 2023-05-01 2023-06-15 2023-06-13 1 \n",
1574 | "1 2023-05-01 2023-06-06 2023-06-09 2 \n",
1575 | "2 2023-05-01 2023-06-10 2023-05-01 3 \n",
1576 | "3 2023-05-01 2023-06-04 2023-06-18 4 \n",
1577 | "4 2023-05-01 2023-06-18 2023-06-18 5 "
1578 | ]
1579 | },
1580 | "execution_count": 27,
1581 | "metadata": {},
1582 | "output_type": "execute_result"
1583 | }
1584 | ],
1585 | "source": [
1586 | "dataToInserted.head()"
1587 | ]
1588 | },
1589 | {
1590 | "cell_type": "markdown",
1591 | "metadata": {},
1592 | "source": [
1593 | "Repositories
"
1594 | ]
1595 | },
1596 | {
1597 | "cell_type": "code",
1598 | "execution_count": 13,
1599 | "metadata": {},
1600 | "outputs": [],
1601 | "source": [
1602 | "dataToInserted['description'].fillna('No Description',inplace=True)\n",
1603 | "#dataToInserted['description'].isnull().sum()"
1604 | ]
1605 | },
1606 | {
1607 | "cell_type": "code",
1608 | "execution_count": 14,
1609 | "metadata": {},
1610 | "outputs": [],
1611 | "source": [
1612 | "cursor = cnxn.cursor()\n",
1613 | "\n",
1614 | "for index, d in dataToInserted.iterrows():\n",
1615 | " #print(d['full_name'],d['description'],d['url'],d['clone_url'],d['fork'],d['watchers_count'],d['forks'],d['created_at'],d['updated_at'],d['pushed_at'],d['language'],d['type'],d['license'])\n",
1616 | " cursor.execute(\"INSERT INTO repositories (fullName,description,url,cloneUrl,watchersCount,forks,createdAt,updatedAt,pushedAt,langID,ownerID,licenseID) values (?,?,?,?,?,?,?,?,?,?,?,?)\",d['full_name'],d['description'],d['url'],d['clone_url'],d['watchers_count'],d['forks'],d['created_at'],d['updated_at'],d['pushed_at'],d['language'],d['type'],d['license'])\n",
1617 | "cursor.commit()\n",
1618 | "cursor.close()"
1619 | ]
1620 | },
1621 | {
1622 | "cell_type": "markdown",
1623 | "metadata": {},
1624 | "source": [
1625 | "Commits
"
1626 | ]
1627 | },
1628 | {
1629 | "cell_type": "code",
1630 | "execution_count": 15,
1631 | "metadata": {},
1632 | "outputs": [
1633 | {
1634 | "name": "stderr",
1635 | "output_type": "stream",
1636 | "text": [
1637 | "C:\\Users\\Youcode\\AppData\\Local\\Temp\\ipykernel_14064\\542819858.py:7: SettingWithCopyWarning: \n",
1638 | "A value is trying to be set on a copy of a slice from a DataFrame\n",
1639 | "\n",
1640 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
1641 | " commits['name'].replace(c['name'],c['id'],inplace=True)\n"
1642 | ]
1643 | }
1644 | ],
1645 | "source": [
1646 | "#commits = dataToInserted[['id','full_name',]]\n",
1647 | "\n",
1648 | "commits = contributors[['full_name','name','commits']]\n",
1649 | "\n",
1650 | "#replace name contributors by id\n",
1651 | "for index, c in cs.iterrows():\n",
1652 | " commits['name'].replace(c['name'],c['id'],inplace=True)\n"
1653 | ]
1654 | },
1655 | {
1656 | "cell_type": "code",
1657 | "execution_count": 17,
1658 | "metadata": {},
1659 | "outputs": [
1660 | {
1661 | "name": "stderr",
1662 | "output_type": "stream",
1663 | "text": [
1664 | "C:\\Users\\Youcode\\AppData\\Local\\Temp\\ipykernel_14064\\3544187285.py:3: SettingWithCopyWarning: \n",
1665 | "A value is trying to be set on a copy of a slice from a DataFrame\n",
1666 | "\n",
1667 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
1668 | " commits['full_name'].replace(c['full_name'],c['id'],inplace=True)\n"
1669 | ]
1670 | }
1671 | ],
1672 | "source": [
1673 | "#replace name contributors by id\n",
1674 | "for index, c in repositories.iterrows():\n",
1675 | " commits['full_name'].replace(c['full_name'],c['id'],inplace=True)r"
1676 | ]
1677 | },
1678 | {
1679 | "cell_type": "code",
1680 | "execution_count": 24,
1681 | "metadata": {},
1682 | "outputs": [
1683 | {
1684 | "data": {
1685 | "text/html": [
1686 | "\n",
1687 | "\n",
1700 | "
\n",
1701 | " \n",
1702 | " \n",
1703 | " | \n",
1704 | " full_name | \n",
1705 | " name | \n",
1706 | " commits | \n",
1707 | "
\n",
1708 | " \n",
1709 | " \n",
1710 | " \n",
1711 | " 0 | \n",
1712 | " 22222 | \n",
1713 | " 1 | \n",
1714 | " 38 | \n",
1715 | "
\n",
1716 | " \n",
1717 | " 1 | \n",
1718 | " 22222 | \n",
1719 | " 2 | \n",
1720 | " 1 | \n",
1721 | "
\n",
1722 | " \n",
1723 | " 2 | \n",
1724 | " 22222 | \n",
1725 | " 3 | \n",
1726 | " 1 | \n",
1727 | "
\n",
1728 | " \n",
1729 | " 3 | \n",
1730 | " 22222 | \n",
1731 | " 4 | \n",
1732 | " 1 | \n",
1733 | "
\n",
1734 | " \n",
1735 | " 4 | \n",
1736 | " 22223 | \n",
1737 | " 5 | \n",
1738 | " 25 | \n",
1739 | "
\n",
1740 | " \n",
1741 | " ... | \n",
1742 | " ... | \n",
1743 | " ... | \n",
1744 | " ... | \n",
1745 | "
\n",
1746 | " \n",
1747 | " 13914 | \n",
1748 | " 14805 | \n",
1749 | " 12567 | \n",
1750 | " 240 | \n",
1751 | "
\n",
1752 | " \n",
1753 | " 13915 | \n",
1754 | " 14806 | \n",
1755 | " 12568 | \n",
1756 | " 14 | \n",
1757 | "
\n",
1758 | " \n",
1759 | " 13916 | \n",
1760 | " 14807 | \n",
1761 | " 12569 | \n",
1762 | " 13 | \n",
1763 | "
\n",
1764 | " \n",
1765 | " 13917 | \n",
1766 | " 14808 | \n",
1767 | " 12570 | \n",
1768 | " 25 | \n",
1769 | "
\n",
1770 | " \n",
1771 | " 13918 | \n",
1772 | " 14808 | \n",
1773 | " 12571 | \n",
1774 | " 19 | \n",
1775 | "
\n",
1776 | " \n",
1777 | "
\n",
1778 | "
13811 rows × 3 columns
\n",
1779 | "
"
1780 | ],
1781 | "text/plain": [
1782 | " full_name name commits\n",
1783 | "0 22222 1 38\n",
1784 | "1 22222 2 1\n",
1785 | "2 22222 3 1\n",
1786 | "3 22222 4 1\n",
1787 | "4 22223 5 25\n",
1788 | "... ... ... ...\n",
1789 | "13914 14805 12567 240\n",
1790 | "13915 14806 12568 14\n",
1791 | "13916 14807 12569 13\n",
1792 | "13917 14808 12570 25\n",
1793 | "13918 14808 12571 19\n",
1794 | "\n",
1795 | "[13811 rows x 3 columns]"
1796 | ]
1797 | },
1798 | "execution_count": 24,
1799 | "metadata": {},
1800 | "output_type": "execute_result"
1801 | }
1802 | ],
1803 | "source": [
1804 | "commits = pd.read_csv('commits.csv',index_col=0)\n",
1805 | "\n",
1806 | "commits['full_name'].astype('int')\n",
1807 | "commits['name'].astype('int')\n",
1808 | "\n",
1809 | "commits"
1810 | ]
1811 | },
1812 | {
1813 | "cell_type": "code",
1814 | "execution_count": null,
1815 | "metadata": {},
1816 | "outputs": [],
1817 | "source": [
1818 | "cursor = cnxn.cursor()\n",
1819 | "for row in commits.itertuples():\n",
1820 | " cursor.execute(\n",
1821 | " f'''INSERT INTO CommitsVALUES (?,?,?)''',\n",
1822 | " row.full_name,\n",
1823 | " row.name,\n",
1824 | " row.commits\n",
1825 | " )\n",
1826 | "cursor.commit()\n",
1827 | "cursor.close()"
1828 | ]
1829 | }
1830 | ],
1831 | "metadata": {
1832 | "kernelspec": {
1833 | "display_name": "base",
1834 | "language": "python",
1835 | "name": "python3"
1836 | },
1837 | "language_info": {
1838 | "codemirror_mode": {
1839 | "name": "ipython",
1840 | "version": 3
1841 | },
1842 | "file_extension": ".py",
1843 | "mimetype": "text/x-python",
1844 | "name": "python",
1845 | "nbconvert_exporter": "python",
1846 | "pygments_lexer": "ipython3",
1847 | "version": "3.10.10"
1848 | },
1849 | "orig_nbformat": 4
1850 | },
1851 | "nbformat": 4,
1852 | "nbformat_minor": 2
1853 | }
1854 |
--------------------------------------------------------------------------------
/Brief_insertion_data_in_database_sql/reposdb.sql:
--------------------------------------------------------------------------------
1 |
2 | CREATE DATABASE githubdb
3 |
4 | use githubdb
5 |
6 | -- Table Technologies
7 | CREATE TABLE technologies(
8 | "langID" INT PRIMARY KEY IDENTITY,
9 | "name" VARCHAR(55) NOT NULL
10 | )
11 |
12 | -- Table Owners
13 | CREATE TABLE owners(
14 | "ownerID" INT PRIMARY KEY IDENTITY,
15 | "type" VARCHAR(55) NOT NULL
16 | )
17 |
18 | -- Table License
19 | CREATE TABLE licenses(
20 | "licenseID" INT PRIMARY KEY IDENTITY,
21 | "lkey" VARCHAR(255),
22 | "name" VARCHAR(255),
23 | "url" VARCHAR(255),
24 | "spdxID" VARCHAR(255),
25 | "nodeID" VARCHAR(255)
26 | )
27 |
28 | -- Table Contributors
29 | CREATE TABLE contributors(
30 | "contributorID" INT PRIMARY KEY IDENTITY,
31 | "name" VARCHAR(255) NOT NULL
32 | )
33 |
34 | -- Table Repositories
35 | CREATE TABLE repositories(
36 | "repoID" INT PRIMARY KEY IDENTITY,
37 | "fullName" TEXT NOT NULL,
38 | "description" TEXT,
39 | "url" VARCHAR(255),
40 | "cloneUrl" VARCHAR(255),
41 | "fork" BIT,
42 | "watchersCount" INT,
43 | "forks" INT,
44 | "createdAt" DATE,
45 | "updatedAt" DATE,
46 | "pushedAt" DATE,
47 | "langID" INT FOREIGN KEY REFERENCES technologies(langID),
48 | "ownerID" INT FOREIGN KEY REFERENCES owners(ownerID),
49 | "licenseID" INT FOREIGN KEY REFERENCES licenses(licenseID)
50 | )
51 |
52 | -- Table Commits
53 | CREATE TABLE commits(
54 | "repoID" int,
55 | "contributorID" int,
56 | counts int,
57 | PRIMARY KEY CLUSTERED ("repoID", "contributorID"),
58 | foreign key("repoID") references repositories("repoID"),
59 | foreign key("contributorID") references contributors("contributorID")
60 | )
61 | --Create indexs
62 |
63 | CREATE INDEX reposNameIndex on repositories ("repoID")
64 |
65 | CREATE INDEX licenseIdIndex on licenses ("licenseID")
66 |
67 | CREATE INDEX contributorNameIndex on contributors ("name")
68 |
69 | -- Select with index
70 |
71 | SELECT * from repositories
72 | WITH (INDEX(reposNameIndex))
73 |
74 | SELECT * from contributors
75 | WITH (INDEX(contributorNameIndex))
76 |
77 |
78 | --Select
79 |
80 | select * from technologies
81 |
82 | select * from owners
83 |
84 | select * from licenses
85 |
86 | select * from contributors
87 |
88 | select * from repositories
89 |
90 | select * from commits
91 |
92 |
--------------------------------------------------------------------------------
/Challenge_CIty_Map/data/youssoufia.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "features": [
4 | {
5 | "type": "Feature",
6 | "properties": {
7 | "title": "OCY"
8 | },
9 | "geometry": {
10 | "coordinates": [
11 | -8.535287255587662,
12 | 32.23776969283607
13 | ],
14 | "type": "Point"
15 | },
16 | "id": 0
17 | },
18 | {
19 | "type": "Feature",
20 | "properties": {
21 | "title": "Youcode"
22 | },
23 | "geometry": {
24 | "coordinates": [
25 | -8.521753524447945,
26 | 32.24763669446159
27 | ],
28 | "type": "Point"
29 | },
30 | "id": 1
31 | },
32 | {
33 | "type": "Feature",
34 | "properties": {
35 | "title": "cafe"
36 | },
37 | "geometry": {
38 | "coordinates": [
39 | -8.534590862329196,
40 | 32.24722278148822
41 | ],
42 | "type": "Point"
43 | },
44 | "id": 2
45 | },
46 | {
47 | "type": "Feature",
48 | "properties": {
49 | "title": "hopital cheikh ZAID"
50 | },
51 | "geometry": {
52 | "coordinates": [
53 | -8.521920677256531,
54 | 32.24864658138999
55 | ],
56 | "type": "Point"
57 | },
58 | "id": 3
59 | },
60 | {
61 | "type": "Feature",
62 | "properties": {
63 | "title": "la gare des voyageurs"
64 | },
65 | "geometry": {
66 | "coordinates": [
67 | -8.540009132881096,
68 | 32.239437494319276
69 | ],
70 | "type": "Point"
71 | },
72 | "id": 4
73 | },
74 | {
75 | "type": "Feature",
76 | "properties": {
77 | "title": "Hotel Atlas"
78 | },
79 | "geometry": {
80 | "coordinates": [
81 | -8.536129605308304,
82 | 32.242233862139784
83 | ],
84 | "type": "Point"
85 | },
86 | "id": 5
87 | },
88 | {
89 | "type": "Feature",
90 | "properties": {
91 | "title": "Attijariwafa bank"
92 | },
93 | "geometry": {
94 | "coordinates": [
95 | -8.533854145768288,
96 | 32.246190520166536
97 | ],
98 | "type": "Point"
99 | },
100 | "id": 6
101 | },
102 | {
103 | "type": "Feature",
104 | "properties": {
105 | "title": "مسجد العتيق"
106 | },
107 | "geometry": {
108 | "coordinates": [
109 | -8.536531512363297,
110 | 32.24278344535385
111 | ],
112 | "type": "Point"
113 | },
114 | "id": 7
115 | },
116 | {
117 | "type": "Feature",
118 | "properties": {
119 | "title": "BANQUE POPULAIRE"
120 | },
121 | "geometry": {
122 | "coordinates": [
123 | -8.537708923537252,
124 | 32.24123515696351
125 | ],
126 | "type": "Point"
127 | },
128 | "id": 8
129 | },
130 | {
131 | "type": "Feature",
132 | "properties": {
133 | "title": "OFPPT YOUSSOUFIA"
134 | },
135 | "geometry": {
136 | "coordinates": [
137 | -8.533018562846507,
138 | 32.24170730167809
139 | ],
140 | "type": "Point"
141 | },
142 | "id": 9
143 | },
144 | {
145 | "type": "Feature",
146 | "properties": {
147 | "title": "Marché"
148 | },
149 | "geometry": {
150 | "coordinates": [
151 | -8.537243951910256,
152 | 32.23931145808096
153 | ],
154 | "type": "Point"
155 | },
156 | "id": 10
157 | }
158 | ]
159 | }
--------------------------------------------------------------------------------
/Challenge_CIty_Map/script.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 246,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#pip install geopandas\n",
10 | "#pip install folium\n",
11 | "#pip install geopy\n",
12 | "#!pip install openrouteservice"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": null,
18 | "metadata": {},
19 | "outputs": [
20 | {
21 | "data": {
22 | "application/vnd.jupyter.widget-view+json": {
23 | "model_id": "c8d2861b60a74835a52ec290b4a28fcb",
24 | "version_major": 2,
25 | "version_minor": 0
26 | },
27 | "text/plain": [
28 | "Dropdown(description='From:', index=1, options=('OCY', 'Youcode', 'cafe', 'hopital cheikh ZAID', 'la gare des …"
29 | ]
30 | },
31 | "metadata": {},
32 | "output_type": "display_data"
33 | },
34 | {
35 | "data": {
36 | "application/vnd.jupyter.widget-view+json": {
37 | "model_id": "73fb1fcfc924459992ab8efaa1add9d1",
38 | "version_major": 2,
39 | "version_minor": 0
40 | },
41 | "text/plain": [
42 | "Dropdown(description='To:', index=4, options=('OCY', 'Youcode', 'cafe', 'hopital cheikh ZAID', 'la gare des vo…"
43 | ]
44 | },
45 | "metadata": {},
46 | "output_type": "display_data"
47 | },
48 | {
49 | "data": {
50 | "application/vnd.jupyter.widget-view+json": {
51 | "model_id": "3e2e4266b9724aca91be2b9a7c6740a2",
52 | "version_major": 2,
53 | "version_minor": 0
54 | },
55 | "text/plain": [
56 | "Button(button_style='success', description='Calcule', icon='check', style=ButtonStyle(), tooltip='Description'…"
57 | ]
58 | },
59 | "metadata": {},
60 | "output_type": "display_data"
61 | },
62 | {
63 | "data": {
64 | "text/html": [
65 | "Make this Notebook Trusted to load map: File -> Trust Notebook
"
446 | ],
447 | "text/plain": [
448 | ""
449 | ]
450 | },
451 | "metadata": {},
452 | "output_type": "display_data"
453 | }
454 | ],
455 | "source": [
456 | "import geopandas as gpd\n",
457 | "import folium\n",
458 | "from folium.plugins import AntPath\n",
459 | "from geopy.distance import distance\n",
460 | "import ipywidgets as widgets\n",
461 | "from IPython.display import clear_output\n",
462 | "\n",
463 | "m = folium.Map(location=[32.24556456855282,-8.532570502180334],zoom_start=15,)\n",
464 | "\n",
465 | "df_places = gpd.read_file('data/youssoufia.json')\n",
466 | "\n",
467 | "\n",
468 | "def getMap(fromPos,toPos):\n",
469 | "\n",
470 | " clear_output()\n",
471 | "\n",
472 | " fromPosition = df_places.loc[df_places['title'] == fromPos ]['geometry']\n",
473 | " toPosition = df_places.loc[df_places['title'] == toPos ]['geometry']\n",
474 | " \n",
475 | " for _,p in df_places.iterrows():\n",
476 | " marker = folium.Marker(\n",
477 | " location=[p['geometry'].y,p['geometry'].x],\n",
478 | " popup=p['title'],\n",
479 | " icon=folium.Icon(icon=\"home\"),\n",
480 | " )\n",
481 | " marker.add_to(m)\n",
482 | " \n",
483 | " if float(fromPosition.x) != float(toPosition.x):\n",
484 | " pos1 = (float(fromPosition.y),float(fromPosition.x))\n",
485 | " pos2 = (float(toPosition.y),float(toPosition.x))\n",
486 | " coords = (pos1,pos2)\n",
487 | " \n",
488 | " AntPath(coords,color='blue',dash_array=[30,15],tooltip=f'{round(distance(pos1,pos2).km,2)} KM',weight=5, opacity=.85).add_to(m)\n",
489 | "\n",
490 | " display(w1,w2,btn,m)\n",
491 | "\n",
492 | "\n",
493 | "titleFromPosition = df_places['title'][0]\n",
494 | "titletoPosition = df_places['title'][1]\n",
495 | "\n",
496 | "w1 = widgets.Dropdown(\n",
497 | " options=df_places['title'],\n",
498 | " value=titleFromPosition,\n",
499 | " description='From:',\n",
500 | " disabled=False,\n",
501 | ")\n",
502 | "\n",
503 | "w2 = widgets.Dropdown(\n",
504 | " options=df_places['title'],\n",
505 | " value=titletoPosition,\n",
506 | " description='To:',\n",
507 | " disabled=False,\n",
508 | ")\n",
509 | "\n",
510 | "btn = widgets.Button(\n",
511 | " value=False,\n",
512 | " description='Calcule',\n",
513 | " disabled=False,\n",
514 | " button_style='success',\n",
515 | " tooltip='Description',\n",
516 | " icon='check'\n",
517 | ")\n",
518 | "\n",
519 | "def getFirstPosition(title):\n",
520 | " global titleFromPosition\n",
521 | " titleFromPosition = title\n",
522 | "\n",
523 | "def getSecondPosition(title):\n",
524 | " global titletoPosition\n",
525 | " titletoPosition = title\n",
526 | "\n",
527 | "w1.observe(lambda x:getFirstPosition(x.new),'value')\n",
528 | "w2.observe(lambda x:getSecondPosition(x.new),'value')\n",
529 | "\n",
530 | "btn.on_click(lambda x:getMap(titleFromPosition,titletoPosition))\n",
531 | "\n",
532 | "display(w1,w2,btn)\n"
533 | ]
534 | }
535 | ],
536 | "metadata": {
537 | "kernelspec": {
538 | "display_name": "base",
539 | "language": "python",
540 | "name": "python3"
541 | },
542 | "language_info": {
543 | "codemirror_mode": {
544 | "name": "ipython",
545 | "version": 3
546 | },
547 | "file_extension": ".py",
548 | "mimetype": "text/x-python",
549 | "name": "python",
550 | "nbconvert_exporter": "python",
551 | "pygments_lexer": "ipython3",
552 | "version": "3.10.10"
553 | },
554 | "orig_nbformat": 4
555 | },
556 | "nbformat": 4,
557 | "nbformat_minor": 2
558 | }
559 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Youcode-Data-Dev
--------------------------------------------------------------------------------