├── .gitignore ├── Brief_Data_Splitter ├── data │ ├── data.csv │ └── data.json ├── repo_contributors_05.csv └── script.ipynb ├── Brief_analyse_opportunités_emploi ├── bubble_chart.py ├── data │ ├── cleaned_jobs.csv │ └── jobs.csv ├── query.sql └── script.ipynb ├── Brief_cleaning_data_of_github └── cleaning_data_github.ipynb ├── Brief_insertion_data_in_database_sql ├── insertion_data.ipynb └── reposdb.sql ├── Brief_scraping_github └── scraping_github.ipynb ├── Brief_visualization_data_of_github └── vusialization_data_github.ipynb ├── Challenge_CIty_Map ├── data │ └── youssoufia.json └── script.ipynb └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /Brief_Data_Splitter/script.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 251, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import pyodbc\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 260, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "#function to split data set\n", 20 | "def dataSplitter(path_csv,perJSon,perDataBase):\n", 21 | "\n", 22 | " data = pd.read_csv(path_csv)\n", 23 | "\n", 24 | " #répartition aléatoire des données. \n", 25 | " data_randed = data.sample(frac=1)\n", 26 | "\n", 27 | " #len of data \n", 28 | " lenOfData = len(data_randed)\n", 29 | "\n", 30 | " if(perJSon + perDataBase) == 100:\n", 31 | "\n", 32 | " p1 = round((lenOfData * perJSon) / 100)\n", 33 | " p2 = lenOfData - p1\n", 34 | "\n", 35 | " return data_randed[:p1], data_randed[p1:],[]\n", 36 | " #display(lenOfData,f'{perJSon}% = {p1}',f'{perDataBase}% = {p2}', p1+p2)\n", 37 | " \n", 38 | " elif(perJSon + perDataBase) < 100:\n", 39 | " p1 = round((lenOfData * perJSon) / 100)\n", 40 | " p2 = round((lenOfData * perDataBase) / 100)\n", 41 | " #p3 = lenOfData - (p1 + p2)\n", 42 | "\n", 43 | " return data_randed[:p1],data_randed[p1:p1+p2],data_randed[p1+p2:]\n", 44 | " #display(lenOfData,f'{perJSon}% = {p1}',f'{perDataBase}% = {p2}', f'{100 - (perDataBase + perJSon)}% = {p3}',p1+p2+p3)\n", 45 | " else:\n", 46 | " raise Exception('Saisir un pourcentage valide!')\n", 47 | "\n", 48 | "\n", 49 | "def goDataToJSon(dataToJson,fileName):\n", 50 | " dataToJson.to_json(path_or_buf=f'data/{fileName}.json',orient='records')\n", 51 | "\n", 52 | "def goDataToCsv(dataToCsv,fileName):\n", 53 | " dataToCsv.to_csv(f'data/{fileName}.csv')\n", 54 | "\n", 55 | "def goDataToDataBase(dataToDb):\n", 56 | " #coonection to db\n", 57 | " cnxn = pyodbc.connect('Driver={SQL Server};'\n", 58 | " 'Server=XXXXXX-XXX\\SQLEXPRESS;'\n", 59 | " 'Database=splitterdb;'\n", 60 | " 'Trusted_Connection=yes;')\n", 61 | "\n", 62 | " cursor = cnxn.cursor()\n", 63 | " for index,row in dataToDb.iterrows():\n", 64 | " cursor.execute('INSERT INTO contributors VALUES(?,?,?)',row['full_name'],row['name'],row['commits'])\n", 65 | " cnxn.commit()\n", 66 | " cnxn.close()\n", 67 | " \n", 68 | "\n", 69 | "try:\n", 70 | " allDataFrame = dataSplitter('repo_contributors_05.csv',perJSon=30,perDataBase=20)\n", 71 | " \n", 72 | " dataToJson,dataToDatabase,dataToCsv = allDataFrame\n", 73 | "\n", 74 | " '''\n", 75 | " Création du fichier JSON\n", 76 | " '''\n", 77 | " if(len(dataToJson) > 0):\n", 78 | " goDataToJSon(dataToJson,'data')\n", 79 | "\n", 80 | " '''\n", 81 | " Création du fichier CSv\n", 82 | " '''\n", 83 | " if((len(dataToCsv)) > 0):\n", 84 | " goDataToCsv(dataToCsv,'data')\n", 85 | "\n", 86 | " '''\n", 87 | " Transfert vers la base de données\n", 88 | " '''\n", 89 | " if(len(dataToDatabase) > 0):\n", 90 | " goDataToDataBase(dataToDatabase)\n", 91 | "\n", 92 | "except Exception as e:\n", 93 | " print(e.args[0])\n" 94 | ] 95 | } 96 | ], 97 | "metadata": { 98 | "kernelspec": { 99 | "display_name": "base", 100 | "language": "python", 101 | "name": "python3" 102 | }, 103 | "language_info": { 104 | "codemirror_mode": { 105 | "name": "ipython", 106 | "version": 3 107 | }, 108 | "file_extension": ".py", 109 | "mimetype": "text/x-python", 110 | "name": "python", 111 | "nbconvert_exporter": "python", 112 | "pygments_lexer": "ipython3", 113 | "version": "3.10.10" 114 | }, 115 | "orig_nbformat": 4 116 | }, 117 | "nbformat": 4, 118 | "nbformat_minor": 2 119 | } 120 | -------------------------------------------------------------------------------- /Brief_analyse_opportunités_emploi/bubble_chart.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | class BubbleChart: 5 | def __init__(self, area, bubble_spacing=0): 6 | """ 7 | Setup for bubble collapse. 8 | 9 | Parameters 10 | ---------- 11 | area : array-like 12 | Area of the bubbles. 13 | bubble_spacing : float, default: 0 14 | Minimal spacing between bubbles after collapsing. 15 | 16 | Notes 17 | ----- 18 | If "area" is sorted, the results might look weird. 19 | """ 20 | area = np.asarray(area) 21 | r = np.sqrt(area / np.pi) 22 | 23 | self.bubble_spacing = bubble_spacing 24 | self.bubbles = np.ones((len(area), 4)) 25 | self.bubbles[:, 2] = r 26 | self.bubbles[:, 3] = area 27 | self.maxstep = 2 * self.bubbles[:, 2].max() + self.bubble_spacing 28 | self.step_dist = self.maxstep / 2 29 | 30 | # calculate initial grid layout for bubbles 31 | length = np.ceil(np.sqrt(len(self.bubbles))) 32 | grid = np.arange(length) * self.maxstep 33 | gx, gy = np.meshgrid(grid, grid) 34 | self.bubbles[:, 0] = gx.flatten()[:len(self.bubbles)] 35 | self.bubbles[:, 1] = gy.flatten()[:len(self.bubbles)] 36 | 37 | self.com = self.center_of_mass() 38 | 39 | def center_of_mass(self): 40 | return np.average( 41 | self.bubbles[:, :2], axis=0, weights=self.bubbles[:, 3] 42 | ) 43 | 44 | def center_distance(self, bubble, bubbles): 45 | return np.hypot(bubble[0] - bubbles[:, 0], 46 | bubble[1] - bubbles[:, 1]) 47 | 48 | def outline_distance(self, bubble, bubbles): 49 | center_distance = self.center_distance(bubble, bubbles) 50 | return center_distance - bubble[2] - \ 51 | bubbles[:, 2] - self.bubble_spacing 52 | 53 | def check_collisions(self, bubble, bubbles): 54 | distance = self.outline_distance(bubble, bubbles) 55 | return len(distance[distance < 0]) 56 | 57 | def collides_with(self, bubble, bubbles): 58 | distance = self.outline_distance(bubble, bubbles) 59 | idx_min = np.argmin(distance) 60 | return idx_min if type(idx_min) == np.ndarray else [idx_min] 61 | 62 | def collapse(self, n_iterations=50): 63 | """ 64 | Move bubbles to the center of mass. 65 | 66 | Parameters 67 | ---------- 68 | n_iterations : int, default: 50 69 | Number of moves to perform. 70 | """ 71 | for _i in range(n_iterations): 72 | moves = 0 73 | for i in range(len(self.bubbles)): 74 | rest_bub = np.delete(self.bubbles, i, 0) 75 | # try to move directly towards the center of mass 76 | # direction vector from bubble to the center of mass 77 | dir_vec = self.com - self.bubbles[i, :2] 78 | 79 | # shorten direction vector to have length of 1 80 | dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec)) 81 | 82 | # calculate new bubble position 83 | new_point = self.bubbles[i, :2] + dir_vec * self.step_dist 84 | new_bubble = np.append(new_point, self.bubbles[i, 2:4]) 85 | 86 | # check whether new bubble collides with other bubbles 87 | if not self.check_collisions(new_bubble, rest_bub): 88 | self.bubbles[i, :] = new_bubble 89 | self.com = self.center_of_mass() 90 | moves += 1 91 | else: 92 | # try to move around a bubble that you collide with 93 | # find colliding bubble 94 | for colliding in self.collides_with(new_bubble, rest_bub): 95 | # calculate direction vector 96 | dir_vec = rest_bub[colliding, :2] - self.bubbles[i, :2] 97 | dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec)) 98 | # calculate orthogonal vector 99 | orth = np.array([dir_vec[1], -dir_vec[0]]) 100 | # test which direction to go 101 | new_point1 = (self.bubbles[i, :2] + orth * 102 | self.step_dist) 103 | new_point2 = (self.bubbles[i, :2] - orth * 104 | self.step_dist) 105 | dist1 = self.center_distance( 106 | self.com, np.array([new_point1])) 107 | dist2 = self.center_distance( 108 | self.com, np.array([new_point2])) 109 | new_point = new_point1 if dist1 < dist2 else new_point2 110 | new_bubble = np.append(new_point, self.bubbles[i, 2:4]) 111 | if not self.check_collisions(new_bubble, rest_bub): 112 | self.bubbles[i, :] = new_bubble 113 | self.com = self.center_of_mass() 114 | 115 | if moves / len(self.bubbles) < 0.1: 116 | self.step_dist = self.step_dist / 2 117 | 118 | def plot(self, ax, labels, colors): 119 | """ 120 | Draw the bubble plot. 121 | 122 | Parameters 123 | ---------- 124 | ax : matplotlib.axes.Axes 125 | labels : list 126 | Labels of the bubbles. 127 | colors : list 128 | Colors of the bubbles. 129 | """ 130 | for i in range(len(self.bubbles)): 131 | circ = plt.Circle( 132 | self.bubbles[i, :2], self.bubbles[i, 2], color=colors[i]) 133 | ax.add_patch(circ) 134 | ax.text(*self.bubbles[i, :2], labels[i], 135 | horizontalalignment='center', verticalalignment='center') -------------------------------------------------------------------------------- /Brief_analyse_opportunités_emploi/data/jobs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnadir-dev/Youcode-Data-Dev/d71fe2f2da3dcae7f0c925e9a9f9849c02ce32f9/Brief_analyse_opportunités_emploi/data/jobs.csv -------------------------------------------------------------------------------- /Brief_analyse_opportunités_emploi/query.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE oppodb 2 | 3 | use oppodb 4 | 5 | CREATE TABLE countries( 6 | countryID INT PRIMARY KEY NOT NULL, 7 | countryName VARCHAR(100) 8 | ) 9 | 10 | CREATE TABLE experienceLevels( 11 | levelID INT PRIMARY KEY NOT NULL, 12 | levelName VARCHAR(100) 13 | ) 14 | 15 | CREATE TABLE jobTypes( 16 | typeID INT PRIMARY KEY NOT NULL, 17 | typeName VARCHAR(100) 18 | ) 19 | 20 | CREATE TABLE companies( 21 | companyID INT PRIMARY KEY NOT NULL, 22 | companyName VARCHAR(100) 23 | ) 24 | 25 | CREATE TABLE jobs( 26 | jobID INT PRIMARY KEY NOT NULL, 27 | jobName TEXT, 28 | jobLocation TEXT, 29 | salary FLOAT, 30 | companyID INT FOREIGN KEY REFERENCES companies(companyID), 31 | levelID INT FOREIGN KEY REFERENCES experienceLevels(levelID), 32 | countryID INT FOREIGN KEY REFERENCES countries(countryID), 33 | typeID INT FOREIGN KEY REFERENCES jobTypes(typeID), 34 | ) 35 | 36 | CREATE TABLE facilities( 37 | jobID INT, 38 | facilityName VARCHAR(100), 39 | PRIMARY KEY (jobID,facilityName), 40 | ) 41 | 42 | CREATE TABLE requirments( 43 | jobID INT, 44 | requirmentName VARCHAR(100), 45 | PRIMARY KEY (jobID,requirmentName) 46 | ) 47 | 48 | ----- Test queries 49 | 50 | SELECT * FROM countries 51 | 52 | SELECT * FROM experienceLevels 53 | 54 | SELECT * FROM companies 55 | 56 | SELECT * FROM jobTypes 57 | 58 | SELECT * FROM jobs 59 | 60 | 61 | SELECT * FROM requirments 62 | 63 | SELECT * FROM facilities 64 | 65 | 66 | ALTER TABLE facilities 67 | ADD FOREIGN KEY (jobID) REFERENCES jobs(JobID) 68 | 69 | 70 | SELECT companies.companyName, jobs.jobName,jobs.jobLocation,countries.countryName,facilities.facilityName FROM companies,jobs,countries,facilities 71 | where jobs.companyID = companies.companyID 72 | and jobs.countryID = countries.countryID 73 | and facilities.jobID = jobs.jobID 74 | 75 | 76 | select facilities.facilityName,COUNT(jobID) from facilities 77 | group by facilities.facilityName 78 | 79 | SELECT * FROM jobs 80 | where jobs.jobID NOT IN (SELECT jobID from requirments) 81 | 82 | -------------------------------------------------------------------------------- /Brief_insertion_data_in_database_sql/insertion_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "df_data_without_watchers_outliers = pd.read_csv('data/data_cleaned/data_without_watchers_outliers_2023_05.csv')\n", 19 | "df_watchers_outlires = pd.read_csv('data/data_cleaned/watchers_outliers_2023_05.csv')\n", 20 | "\n", 21 | "contributors = pd.read_csv('data/data_cleaned/repo_contributors_05.csv')\n", 22 | "\n", 23 | "data_c = pd.concat([df_data_without_watchers_outliers,df_watchers_outlires])\n", 24 | "\n", 25 | "#data_c['full_name'].drop_duplicates(data_c['full_name'].index,inplace=True)\n", 26 | "data_c.drop(columns=['Unnamed: 0.1','Unnamed: 0'],inplace=True)\n", 27 | "\n", 28 | "#data_c['license'].replace('No License',{'key': 0,'name': 'No License','spdx_id': '','url': '','node_id': ''},inplace=True)\n", 29 | "\n", 30 | "data_c.drop_duplicates(inplace=True)\n", 31 | "#data_c.info()" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "#Replace No License to dict\n", 41 | "data_c['license'].replace('No License',str(dict(key= '',name= 'No License',spdx_id= '',url= '',node_id= '')),inplace=True)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "

Technologies

" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/html": [ 59 | "
\n", 60 | "\n", 73 | "\n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | "
idname
01C++
12Kotlin
23Jupyter Notebook
34Java
45TypeScript
.........
183184Bikeshed
184185Smali
185186Vim Snippet
186187YARA
187188Hy
\n", 139 | "

188 rows × 2 columns

\n", 140 | "
" 141 | ], 142 | "text/plain": [ 143 | " id name\n", 144 | "0 1 C++\n", 145 | "1 2 Kotlin\n", 146 | "2 3 Jupyter Notebook\n", 147 | "3 4 Java\n", 148 | "4 5 TypeScript\n", 149 | ".. ... ...\n", 150 | "183 184 Bikeshed\n", 151 | "184 185 Smali\n", 152 | "185 186 Vim Snippet\n", 153 | "186 187 YARA\n", 154 | "187 188 Hy\n", 155 | "\n", 156 | "[188 rows x 2 columns]" 157 | ] 158 | }, 159 | "execution_count": 4, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "technoligies = pd.DataFrame(list(data_c['language'].unique())).rename(columns={0:'name'})\n", 166 | "techs = []\n", 167 | "for index,tech in technoligies.iterrows():\n", 168 | " techs.append(dict(id=index+1,name=tech['name']))\n", 169 | "\n", 170 | "technoligies = pd.DataFrame(techs)\n", 171 | "technoligies" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "

Owners

" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 5, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/html": [ 189 | "
\n", 190 | "\n", 203 | "\n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | "
idname
01User
12Organization
\n", 224 | "
" 225 | ], 226 | "text/plain": [ 227 | " id name\n", 228 | "0 1 User\n", 229 | "1 2 Organization" 230 | ] 231 | }, 232 | "execution_count": 5, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "owners = pd.DataFrame(list(data_c['type'].unique())).rename(columns={0:'name'})\n", 239 | "ors = []\n", 240 | "for index,otype in owners.iterrows():\n", 241 | " ors.append(dict(id=index+1,name=otype['name']))\n", 242 | "\n", 243 | "owners = pd.DataFrame(ors)\n", 244 | "owners" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "

License

" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 6, 257 | "metadata": {}, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/html": [ 262 | "
\n", 263 | "\n", 276 | "\n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | "
idkeynamespdx_idurlnode_id
01No License
12gpl-3.0GNU General Public License v3.0GPL-3.0https://api.github.com/licenses/gpl-3.0MDc6TGljZW5zZTk=
23mitMIT LicenseMIThttps://api.github.com/licenses/mitMDc6TGljZW5zZTEz
34agpl-3.0GNU Affero General Public License v3.0AGPL-3.0https://api.github.com/licenses/agpl-3.0MDc6TGljZW5zZTE=
450bsdBSD Zero Clause License0BSDhttps://api.github.com/licenses/0bsdMDc6TGljZW5zZTM1
56bsd-3-clauseBSD 3-Clause \"New\" or \"Revised\" LicenseBSD-3-Clausehttps://api.github.com/licenses/bsd-3-clauseMDc6TGljZW5zZTU=
67mit-0MIT No AttributionMIT-0https://api.github.com/licenses/mit-0MDc6TGljZW5zZTQx
78otherOtherNOASSERTIONNoneMDc6TGljZW5zZTA=
89apache-2.0Apache License 2.0Apache-2.0https://api.github.com/licenses/apache-2.0MDc6TGljZW5zZTI=
910lgpl-3.0GNU Lesser General Public License v3.0LGPL-3.0https://api.github.com/licenses/lgpl-3.0MDc6TGljZW5zZTEy
1011gpl-2.0GNU General Public License v2.0GPL-2.0https://api.github.com/licenses/gpl-2.0MDc6TGljZW5zZTg=
1112cc0-1.0Creative Commons Zero v1.0 UniversalCC0-1.0https://api.github.com/licenses/cc0-1.0MDc6TGljZW5zZTY=
1213lgpl-2.1GNU Lesser General Public License v2.1LGPL-2.1https://api.github.com/licenses/lgpl-2.1MDc6TGljZW5zZTEx
1314cc-by-4.0Creative Commons Attribution 4.0 InternationalCC-BY-4.0https://api.github.com/licenses/cc-by-4.0MDc6TGljZW5zZTI1
1415unlicenseThe UnlicenseUnlicensehttps://api.github.com/licenses/unlicenseMDc6TGljZW5zZTE1
1516iscISC LicenseISChttps://api.github.com/licenses/iscMDc6TGljZW5zZTEw
1617bsd-2-clauseBSD 2-Clause \"Simplified\" LicenseBSD-2-Clausehttps://api.github.com/licenses/bsd-2-clauseMDc6TGljZW5zZTQ=
1718zlibzlib LicenseZlibhttps://api.github.com/licenses/zlibMDc6TGljZW5zZTI3
1819mpl-2.0Mozilla Public License 2.0MPL-2.0https://api.github.com/licenses/mpl-2.0MDc6TGljZW5zZTE0
1920epl-2.0Eclipse Public License 2.0EPL-2.0https://api.github.com/licenses/epl-2.0MDc6TGljZW5zZTMy
2021bsd-4-clauseBSD 4-Clause \"Original\" or \"Old\" LicenseBSD-4-Clausehttps://api.github.com/licenses/bsd-4-clauseMDc6TGljZW5zZTM5
2122osl-3.0Open Software License 3.0OSL-3.0https://api.github.com/licenses/osl-3.0MDc6TGljZW5zZTE2
2223mulanpsl-2.0Mulan Permissive Software License, Version 2MulanPSL-2.0https://api.github.com/licenses/mulanpsl-2.0L_kgCsbXVsYW5wc2wtMi4w
2324wtfplDo What The F*ck You Want To Public LicenseWTFPLhttps://api.github.com/licenses/wtfplMDc6TGljZW5zZTE4
2425cc-by-sa-4.0Creative Commons Attribution Share Alike 4.0 I...CC-BY-SA-4.0https://api.github.com/licenses/cc-by-sa-4.0MDc6TGljZW5zZTI2
2526bsl-1.0Boost Software License 1.0BSL-1.0https://api.github.com/licenses/bsl-1.0MDc6TGljZW5zZTI4
2627upl-1.0Universal Permissive License v1.0UPL-1.0https://api.github.com/licenses/upl-1.0MDc6TGljZW5zZTMz
2728lppl-1.3cLaTeX Project Public License v1.3cLPPL-1.3chttps://api.github.com/licenses/lppl-1.3cMDc6TGljZW5zZTIz
2829ncsaUniversity of Illinois/NCSA Open Source LicenseNCSAhttps://api.github.com/licenses/ncsaMDc6TGljZW5zZTI5
2930bsd-3-clause-clearBSD 3-Clause Clear LicenseBSD-3-Clause-Clearhttps://api.github.com/licenses/bsd-3-clause-c...MDc6TGljZW5zZTIx
3031epl-1.0Eclipse Public License 1.0EPL-1.0https://api.github.com/licenses/epl-1.0MDc6TGljZW5zZTc=
3132cern-ohl-w-2.0CERN Open Hardware Licence Version 2 - Weakly ...CERN-OHL-W-2.0https://api.github.com/licenses/cern-ohl-w-2.0L_kgCuY2Vybi1vaGwtdy0yLjA
3233cern-ohl-p-2.0CERN Open Hardware Licence Version 2 - PermissiveCERN-OHL-P-2.0https://api.github.com/licenses/cern-ohl-p-2.0L_kgCuY2Vybi1vaGwtcC0yLjA
3334artistic-2.0Artistic License 2.0Artistic-2.0https://api.github.com/licenses/artistic-2.0MDc6TGljZW5zZTM=
3435ofl-1.1SIL Open Font License 1.1OFL-1.1https://api.github.com/licenses/ofl-1.1MDc6TGljZW5zZTE3
3536odbl-1.0Open Data Commons Open Database License v1.0ODbL-1.0https://api.github.com/licenses/odbl-1.0MDc6TGljZW5zZTM3
3637eupl-1.2European Union Public License 1.2EUPL-1.2https://api.github.com/licenses/eupl-1.2MDc6TGljZW5zZTM0
\n", 624 | "
" 625 | ], 626 | "text/plain": [ 627 | " id key name \\\n", 628 | "0 1 No License \n", 629 | "1 2 gpl-3.0 GNU General Public License v3.0 \n", 630 | "2 3 mit MIT License \n", 631 | "3 4 agpl-3.0 GNU Affero General Public License v3.0 \n", 632 | "4 5 0bsd BSD Zero Clause License \n", 633 | "5 6 bsd-3-clause BSD 3-Clause \"New\" or \"Revised\" License \n", 634 | "6 7 mit-0 MIT No Attribution \n", 635 | "7 8 other Other \n", 636 | "8 9 apache-2.0 Apache License 2.0 \n", 637 | "9 10 lgpl-3.0 GNU Lesser General Public License v3.0 \n", 638 | "10 11 gpl-2.0 GNU General Public License v2.0 \n", 639 | "11 12 cc0-1.0 Creative Commons Zero v1.0 Universal \n", 640 | "12 13 lgpl-2.1 GNU Lesser General Public License v2.1 \n", 641 | "13 14 cc-by-4.0 Creative Commons Attribution 4.0 International \n", 642 | "14 15 unlicense The Unlicense \n", 643 | "15 16 isc ISC License \n", 644 | "16 17 bsd-2-clause BSD 2-Clause \"Simplified\" License \n", 645 | "17 18 zlib zlib License \n", 646 | "18 19 mpl-2.0 Mozilla Public License 2.0 \n", 647 | "19 20 epl-2.0 Eclipse Public License 2.0 \n", 648 | "20 21 bsd-4-clause BSD 4-Clause \"Original\" or \"Old\" License \n", 649 | "21 22 osl-3.0 Open Software License 3.0 \n", 650 | "22 23 mulanpsl-2.0 Mulan Permissive Software License, Version 2 \n", 651 | "23 24 wtfpl Do What The F*ck You Want To Public License \n", 652 | "24 25 cc-by-sa-4.0 Creative Commons Attribution Share Alike 4.0 I... \n", 653 | "25 26 bsl-1.0 Boost Software License 1.0 \n", 654 | "26 27 upl-1.0 Universal Permissive License v1.0 \n", 655 | "27 28 lppl-1.3c LaTeX Project Public License v1.3c \n", 656 | "28 29 ncsa University of Illinois/NCSA Open Source License \n", 657 | "29 30 bsd-3-clause-clear BSD 3-Clause Clear License \n", 658 | "30 31 epl-1.0 Eclipse Public License 1.0 \n", 659 | "31 32 cern-ohl-w-2.0 CERN Open Hardware Licence Version 2 - Weakly ... \n", 660 | "32 33 cern-ohl-p-2.0 CERN Open Hardware Licence Version 2 - Permissive \n", 661 | "33 34 artistic-2.0 Artistic License 2.0 \n", 662 | "34 35 ofl-1.1 SIL Open Font License 1.1 \n", 663 | "35 36 odbl-1.0 Open Data Commons Open Database License v1.0 \n", 664 | "36 37 eupl-1.2 European Union Public License 1.2 \n", 665 | "\n", 666 | " spdx_id url \\\n", 667 | "0 \n", 668 | "1 GPL-3.0 https://api.github.com/licenses/gpl-3.0 \n", 669 | "2 MIT https://api.github.com/licenses/mit \n", 670 | "3 AGPL-3.0 https://api.github.com/licenses/agpl-3.0 \n", 671 | "4 0BSD https://api.github.com/licenses/0bsd \n", 672 | "5 BSD-3-Clause https://api.github.com/licenses/bsd-3-clause \n", 673 | "6 MIT-0 https://api.github.com/licenses/mit-0 \n", 674 | "7 NOASSERTION None \n", 675 | "8 Apache-2.0 https://api.github.com/licenses/apache-2.0 \n", 676 | "9 LGPL-3.0 https://api.github.com/licenses/lgpl-3.0 \n", 677 | "10 GPL-2.0 https://api.github.com/licenses/gpl-2.0 \n", 678 | "11 CC0-1.0 https://api.github.com/licenses/cc0-1.0 \n", 679 | "12 LGPL-2.1 https://api.github.com/licenses/lgpl-2.1 \n", 680 | "13 CC-BY-4.0 https://api.github.com/licenses/cc-by-4.0 \n", 681 | "14 Unlicense https://api.github.com/licenses/unlicense \n", 682 | "15 ISC https://api.github.com/licenses/isc \n", 683 | "16 BSD-2-Clause https://api.github.com/licenses/bsd-2-clause \n", 684 | "17 Zlib https://api.github.com/licenses/zlib \n", 685 | "18 MPL-2.0 https://api.github.com/licenses/mpl-2.0 \n", 686 | "19 EPL-2.0 https://api.github.com/licenses/epl-2.0 \n", 687 | "20 BSD-4-Clause https://api.github.com/licenses/bsd-4-clause \n", 688 | "21 OSL-3.0 https://api.github.com/licenses/osl-3.0 \n", 689 | "22 MulanPSL-2.0 https://api.github.com/licenses/mulanpsl-2.0 \n", 690 | "23 WTFPL https://api.github.com/licenses/wtfpl \n", 691 | "24 CC-BY-SA-4.0 https://api.github.com/licenses/cc-by-sa-4.0 \n", 692 | "25 BSL-1.0 https://api.github.com/licenses/bsl-1.0 \n", 693 | "26 UPL-1.0 https://api.github.com/licenses/upl-1.0 \n", 694 | "27 LPPL-1.3c https://api.github.com/licenses/lppl-1.3c \n", 695 | "28 NCSA https://api.github.com/licenses/ncsa \n", 696 | "29 BSD-3-Clause-Clear https://api.github.com/licenses/bsd-3-clause-c... \n", 697 | "30 EPL-1.0 https://api.github.com/licenses/epl-1.0 \n", 698 | "31 CERN-OHL-W-2.0 https://api.github.com/licenses/cern-ohl-w-2.0 \n", 699 | "32 CERN-OHL-P-2.0 https://api.github.com/licenses/cern-ohl-p-2.0 \n", 700 | "33 Artistic-2.0 https://api.github.com/licenses/artistic-2.0 \n", 701 | "34 OFL-1.1 https://api.github.com/licenses/ofl-1.1 \n", 702 | "35 ODbL-1.0 https://api.github.com/licenses/odbl-1.0 \n", 703 | "36 EUPL-1.2 https://api.github.com/licenses/eupl-1.2 \n", 704 | "\n", 705 | " node_id \n", 706 | "0 \n", 707 | "1 MDc6TGljZW5zZTk= \n", 708 | "2 MDc6TGljZW5zZTEz \n", 709 | "3 MDc6TGljZW5zZTE= \n", 710 | "4 MDc6TGljZW5zZTM1 \n", 711 | "5 MDc6TGljZW5zZTU= \n", 712 | "6 MDc6TGljZW5zZTQx \n", 713 | "7 MDc6TGljZW5zZTA= \n", 714 | "8 MDc6TGljZW5zZTI= \n", 715 | "9 MDc6TGljZW5zZTEy \n", 716 | "10 MDc6TGljZW5zZTg= \n", 717 | "11 MDc6TGljZW5zZTY= \n", 718 | "12 MDc6TGljZW5zZTEx \n", 719 | "13 MDc6TGljZW5zZTI1 \n", 720 | "14 MDc6TGljZW5zZTE1 \n", 721 | "15 MDc6TGljZW5zZTEw \n", 722 | "16 MDc6TGljZW5zZTQ= \n", 723 | "17 MDc6TGljZW5zZTI3 \n", 724 | "18 MDc6TGljZW5zZTE0 \n", 725 | "19 MDc6TGljZW5zZTMy \n", 726 | "20 MDc6TGljZW5zZTM5 \n", 727 | "21 MDc6TGljZW5zZTE2 \n", 728 | "22 L_kgCsbXVsYW5wc2wtMi4w \n", 729 | "23 MDc6TGljZW5zZTE4 \n", 730 | "24 MDc6TGljZW5zZTI2 \n", 731 | "25 MDc6TGljZW5zZTI4 \n", 732 | "26 MDc6TGljZW5zZTMz \n", 733 | "27 MDc6TGljZW5zZTIz \n", 734 | "28 MDc6TGljZW5zZTI5 \n", 735 | "29 MDc6TGljZW5zZTIx \n", 736 | "30 MDc6TGljZW5zZTc= \n", 737 | "31 L_kgCuY2Vybi1vaGwtdy0yLjA \n", 738 | "32 L_kgCuY2Vybi1vaGwtcC0yLjA \n", 739 | "33 MDc6TGljZW5zZTM= \n", 740 | "34 MDc6TGljZW5zZTE3 \n", 741 | "35 MDc6TGljZW5zZTM3 \n", 742 | "36 MDc6TGljZW5zZTM0 " 743 | ] 744 | }, 745 | "execution_count": 6, 746 | "metadata": {}, 747 | "output_type": "execute_result" 748 | } 749 | ], 750 | "source": [ 751 | "import ast\n", 752 | "\n", 753 | "#table license\n", 754 | "licenses = pd.DataFrame(list(data_c['license'].unique())).rename(columns={0:'name'})\n", 755 | "lics = []\n", 756 | "for index,l in licenses.iterrows():\n", 757 | " lics.append(ast.literal_eval(l['name']))\n", 758 | "\n", 759 | "\n", 760 | "licenses = pd.DataFrame(lics)\n", 761 | "licenses.insert(0,\"id\", list(range(1,len(lics)+1)),True)\n", 762 | "licenses\n" 763 | ] 764 | }, 765 | { 766 | "cell_type": "markdown", 767 | "metadata": {}, 768 | "source": [ 769 | "

Contributors

" 770 | ] 771 | }, 772 | { 773 | "cell_type": "code", 774 | "execution_count": 7, 775 | "metadata": {}, 776 | "outputs": [ 777 | { 778 | "data": { 779 | "text/html": [ 780 | "
\n", 781 | "\n", 794 | "\n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | "
idname
01biuuu
12cntvc
23TremblingMoeNew
34LaiYueTing
45vdohney
.........
1256612567nijuy
1256712568jingliu9
1256812569RogerPu
1256912570tumGER
1257012571NicholeMattera
\n", 860 | "

12571 rows × 2 columns

\n", 861 | "
" 862 | ], 863 | "text/plain": [ 864 | " id name\n", 865 | "0 1 biuuu\n", 866 | "1 2 cntvc\n", 867 | "2 3 TremblingMoeNew\n", 868 | "3 4 LaiYueTing\n", 869 | "4 5 vdohney\n", 870 | "... ... ...\n", 871 | "12566 12567 nijuy\n", 872 | "12567 12568 jingliu9\n", 873 | "12568 12569 RogerPu\n", 874 | "12569 12570 tumGER\n", 875 | "12570 12571 NicholeMattera\n", 876 | "\n", 877 | "[12571 rows x 2 columns]" 878 | ] 879 | }, 880 | "execution_count": 7, 881 | "metadata": {}, 882 | "output_type": "execute_result" 883 | } 884 | ], 885 | "source": [ 886 | "cs = pd.DataFrame(contributors['name'].unique()).rename(columns={0:'name'})\n", 887 | "\n", 888 | "cs.insert(0,'id',range(1,len(cs)+1),True)\n", 889 | "\n", 890 | "cs" 891 | ] 892 | }, 893 | { 894 | "cell_type": "markdown", 895 | "metadata": {}, 896 | "source": [ 897 | "

Repositories

" 898 | ] 899 | }, 900 | { 901 | "cell_type": "code", 902 | "execution_count": 8, 903 | "metadata": {}, 904 | "outputs": [ 905 | { 906 | "data": { 907 | "text/html": [ 908 | "
\n", 909 | "\n", 922 | "\n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | "
idfull_name
01477132016yb/plant-vs-zombies
12ParveshSandila/CountryCodeChooser
23ozodbek-sobirjonovich/sqlite3_tutorial
34mahomaps/mm-v1
45cruip/cruip-tutorials-next
.........
2548525486gptlink/gptlink-deploy
2548625487akashusr/assignment-solution-batch-2
2548725488smile1130/laravel9_CMS
2548825489asaotomo/makephonedict
2548925490ACodeDaily/AcodeDaily
\n", 988 | "

25490 rows × 2 columns

\n", 989 | "
" 990 | ], 991 | "text/plain": [ 992 | " id full_name\n", 993 | "0 1 477132016yb/plant-vs-zombies\n", 994 | "1 2 ParveshSandila/CountryCodeChooser\n", 995 | "2 3 ozodbek-sobirjonovich/sqlite3_tutorial\n", 996 | "3 4 mahomaps/mm-v1\n", 997 | "4 5 cruip/cruip-tutorials-next\n", 998 | "... ... ...\n", 999 | "25485 25486 gptlink/gptlink-deploy\n", 1000 | "25486 25487 akashusr/assignment-solution-batch-2\n", 1001 | "25487 25488 smile1130/laravel9_CMS\n", 1002 | "25488 25489 asaotomo/makephonedict\n", 1003 | "25489 25490 ACodeDaily/AcodeDaily\n", 1004 | "\n", 1005 | "[25490 rows x 2 columns]" 1006 | ] 1007 | }, 1008 | "execution_count": 8, 1009 | "metadata": {}, 1010 | "output_type": "execute_result" 1011 | } 1012 | ], 1013 | "source": [ 1014 | "repositories = pd.DataFrame(data_c['full_name'].unique()).rename(columns={0:'full_name'})\n", 1015 | "\n", 1016 | "repositories.insert(0,'id',range(1,len(repositories)+1),True)\n", 1017 | "repositories" 1018 | ] 1019 | }, 1020 | { 1021 | "cell_type": "code", 1022 | "execution_count": 9, 1023 | "metadata": {}, 1024 | "outputs": [ 1025 | { 1026 | "data": { 1027 | "text/html": [ 1028 | "
\n", 1029 | "\n", 1042 | "\n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | "
full_nameurlclone_urlwatchers_countstargazers_countlanguageforksdescriptiontypelicenseforkcreated_atupdated_atpushed_at
0477132016yb/plant-vs-zombieshttps://github.com/477132016yb/plant-vs-zombieshttps://github.com/477132016yb/plant-vs-zombie...121210NaN1{'key': '', 'name': 'No License', 'spdx_id': '...False2023-05-012023-06-152023-06-13
1ParveshSandila/CountryCodeChooserhttps://github.com/ParveshSandila/CountryCodeC...https://github.com/ParveshSandila/CountryCodeC...121221Country code chooser in Jetpack compose1{'key': '', 'name': 'No License', 'spdx_id': '...False2023-05-012023-06-062023-06-09
\n", 1099 | "
" 1100 | ], 1101 | "text/plain": [ 1102 | " full_name \\\n", 1103 | "0 477132016yb/plant-vs-zombies \n", 1104 | "1 ParveshSandila/CountryCodeChooser \n", 1105 | "\n", 1106 | " url \\\n", 1107 | "0 https://github.com/477132016yb/plant-vs-zombies \n", 1108 | "1 https://github.com/ParveshSandila/CountryCodeC... \n", 1109 | "\n", 1110 | " clone_url watchers_count \\\n", 1111 | "0 https://github.com/477132016yb/plant-vs-zombie... 12 \n", 1112 | "1 https://github.com/ParveshSandila/CountryCodeC... 12 \n", 1113 | "\n", 1114 | " stargazers_count language forks description \\\n", 1115 | "0 12 1 0 NaN \n", 1116 | "1 12 2 1 Country code chooser in Jetpack compose \n", 1117 | "\n", 1118 | " type license fork created_at \\\n", 1119 | "0 1 {'key': '', 'name': 'No License', 'spdx_id': '... False 2023-05-01 \n", 1120 | "1 1 {'key': '', 'name': 'No License', 'spdx_id': '... False 2023-05-01 \n", 1121 | "\n", 1122 | " updated_at pushed_at \n", 1123 | "0 2023-06-15 2023-06-13 \n", 1124 | "1 2023-06-06 2023-06-09 " 1125 | ] 1126 | }, 1127 | "execution_count": 9, 1128 | "metadata": {}, 1129 | "output_type": "execute_result" 1130 | } 1131 | ], 1132 | "source": [ 1133 | "dataToInserted = data_c\n", 1134 | "\n", 1135 | "#--Replace technologies\n", 1136 | "for index,tech in technoligies.iterrows():\n", 1137 | " dataToInserted['language'].replace(tech['name'],tech['id'],inplace=True)\n", 1138 | "\n", 1139 | "\n", 1140 | "#--Replace owners\n", 1141 | "for index,tech in owners.iterrows():\n", 1142 | " dataToInserted['type'].replace(tech['name'],tech['id'],inplace=True)\n", 1143 | "\n", 1144 | "dataToInserted.head(2)\n" 1145 | ] 1146 | }, 1147 | { 1148 | "cell_type": "code", 1149 | "execution_count": 10, 1150 | "metadata": {}, 1151 | "outputs": [ 1152 | { 1153 | "data": { 1154 | "text/html": [ 1155 | "
\n", 1156 | "\n", 1169 | "\n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | "
full_nameurlclone_urlwatchers_countstargazers_countlanguageforksdescriptiontypelicenseforkcreated_atupdated_atpushed_at
0477132016yb/plant-vs-zombieshttps://github.com/477132016yb/plant-vs-zombieshttps://github.com/477132016yb/plant-vs-zombie...121210NaN11False2023-05-012023-06-152023-06-13
1ParveshSandila/CountryCodeChooserhttps://github.com/ParveshSandila/CountryCodeC...https://github.com/ParveshSandila/CountryCodeC...121221Country code chooser in Jetpack compose11False2023-05-012023-06-062023-06-09
\n", 1226 | "
" 1227 | ], 1228 | "text/plain": [ 1229 | " full_name \\\n", 1230 | "0 477132016yb/plant-vs-zombies \n", 1231 | "1 ParveshSandila/CountryCodeChooser \n", 1232 | "\n", 1233 | " url \\\n", 1234 | "0 https://github.com/477132016yb/plant-vs-zombies \n", 1235 | "1 https://github.com/ParveshSandila/CountryCodeC... \n", 1236 | "\n", 1237 | " clone_url watchers_count \\\n", 1238 | "0 https://github.com/477132016yb/plant-vs-zombie... 12 \n", 1239 | "1 https://github.com/ParveshSandila/CountryCodeC... 12 \n", 1240 | "\n", 1241 | " stargazers_count language forks description \\\n", 1242 | "0 12 1 0 NaN \n", 1243 | "1 12 2 1 Country code chooser in Jetpack compose \n", 1244 | "\n", 1245 | " type license fork created_at updated_at pushed_at \n", 1246 | "0 1 1 False 2023-05-01 2023-06-15 2023-06-13 \n", 1247 | "1 1 1 False 2023-05-01 2023-06-06 2023-06-09 " 1248 | ] 1249 | }, 1250 | "execution_count": 10, 1251 | "metadata": {}, 1252 | "output_type": "execute_result" 1253 | } 1254 | ], 1255 | "source": [ 1256 | "#--Replace license\n", 1257 | "for index,l in licenses.iterrows():\n", 1258 | " #print(str(dict(l[['key','name','spdx_id','url','node_id']])))\n", 1259 | " dataToInserted['license'].replace(str(dict(l[['key','name','spdx_id','url','node_id']])),l['id'],inplace=True)\n", 1260 | "\n", 1261 | "#dataToInserted[dataToInserted['license'] == str(dict(licenses[['key','name','spdx_id','url','node_id']].head(1)))]\n", 1262 | "\n", 1263 | "#dict(licenses[['key','name','spdx_id','url','node_id']].head(1))\n", 1264 | "dataToInserted.head(2)" 1265 | ] 1266 | }, 1267 | { 1268 | "cell_type": "code", 1269 | "execution_count": 11, 1270 | "metadata": {}, 1271 | "outputs": [], 1272 | "source": [ 1273 | "# add id to each repos\n", 1274 | "#add columns ID\n", 1275 | "dataToInserted['id'] = dataToInserted['full_name']\n", 1276 | "\n", 1277 | "for index, r in repositories.iterrows():\n", 1278 | " dataToInserted['id'].replace(r['full_name'],r['id'],inplace=True)\n" 1279 | ] 1280 | }, 1281 | { 1282 | "cell_type": "code", 1283 | "execution_count": 12, 1284 | "metadata": {}, 1285 | "outputs": [], 1286 | "source": [ 1287 | "dataToInserted.to_csv('dataToInserted.csv')" 1288 | ] 1289 | }, 1290 | { 1291 | "cell_type": "markdown", 1292 | "metadata": {}, 1293 | "source": [ 1294 | "

L'insertion des données en base de données

" 1295 | ] 1296 | }, 1297 | { 1298 | "cell_type": "code", 1299 | "execution_count": 12, 1300 | "metadata": {}, 1301 | "outputs": [], 1302 | "source": [ 1303 | "import pyodbc\n", 1304 | "\n", 1305 | "cnxn = pyodbc.connect('Driver={SQL Server};'\n", 1306 | " 'Server=LAPTOP-B5O30HDH\\SQLEXPRESS;'\n", 1307 | " 'Database=githubdb;'\n", 1308 | " 'Trusted_Connection=yes;')" 1309 | ] 1310 | }, 1311 | { 1312 | "cell_type": "markdown", 1313 | "metadata": {}, 1314 | "source": [ 1315 | "

Technologies

" 1316 | ] 1317 | }, 1318 | { 1319 | "cell_type": "code", 1320 | "execution_count": 16, 1321 | "metadata": {}, 1322 | "outputs": [], 1323 | "source": [ 1324 | "cursor = cnxn.cursor()\n", 1325 | "\n", 1326 | "for index,tech in technoligies.iterrows():\n", 1327 | " cursor.execute('INSERT INTO technologies (name) values (?)',tech['name'])\n", 1328 | "\n", 1329 | "cursor.commit()\n", 1330 | "cursor.close()" 1331 | ] 1332 | }, 1333 | { 1334 | "cell_type": "markdown", 1335 | "metadata": {}, 1336 | "source": [ 1337 | "

Owners

" 1338 | ] 1339 | }, 1340 | { 1341 | "cell_type": "code", 1342 | "execution_count": 17, 1343 | "metadata": {}, 1344 | "outputs": [], 1345 | "source": [ 1346 | "cursor = cnxn.cursor()\n", 1347 | "\n", 1348 | "for index, owner in owners.iterrows():\n", 1349 | " cursor.execute('INSERT INTO owners (type) values (?)',owner['name'])\n", 1350 | "cursor.commit()\n", 1351 | "cursor.close()" 1352 | ] 1353 | }, 1354 | { 1355 | "cell_type": "markdown", 1356 | "metadata": {}, 1357 | "source": [ 1358 | "

Licenses

" 1359 | ] 1360 | }, 1361 | { 1362 | "cell_type": "code", 1363 | "execution_count": 18, 1364 | "metadata": {}, 1365 | "outputs": [], 1366 | "source": [ 1367 | "\n", 1368 | "\n", 1369 | "cursor = cnxn.cursor()\n", 1370 | "\n", 1371 | "for index, lic in licenses.iterrows():\n", 1372 | " cursor.execute(\"INSERT INTO licenses (lkey,name,url,spdxID,nodeID) values (?,?,?,?,?)\",lic['key'],lic['name'],lic['spdx_id'],lic['url'],lic['node_id'])\n", 1373 | "cursor.commit()\n", 1374 | "cursor.close()" 1375 | ] 1376 | }, 1377 | { 1378 | "cell_type": "markdown", 1379 | "metadata": {}, 1380 | "source": [ 1381 | "

Contributors

" 1382 | ] 1383 | }, 1384 | { 1385 | "cell_type": "code", 1386 | "execution_count": 19, 1387 | "metadata": {}, 1388 | "outputs": [], 1389 | "source": [ 1390 | "cursor = cnxn.cursor()\n", 1391 | "\n", 1392 | "for index, c in contributors.iterrows():\n", 1393 | " cursor.execute(\"INSERT INTO contributors (name) values (?)\",c['name'])\n", 1394 | "cursor.commit()\n", 1395 | "cursor.close()" 1396 | ] 1397 | }, 1398 | { 1399 | "cell_type": "code", 1400 | "execution_count": 27, 1401 | "metadata": {}, 1402 | "outputs": [ 1403 | { 1404 | "data": { 1405 | "text/html": [ 1406 | "
\n", 1407 | "\n", 1420 | "\n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | "
full_nameurlclone_urlwatchers_countstargazers_countlanguageforksdescriptiontypelicenseforkcreated_atupdated_atpushed_atid
0477132016yb/plant-vs-zombieshttps://github.com/477132016yb/plant-vs-zombieshttps://github.com/477132016yb/plant-vs-zombie...121210No Description11False2023-05-012023-06-152023-06-131
1ParveshSandila/CountryCodeChooserhttps://github.com/ParveshSandila/CountryCodeC...https://github.com/ParveshSandila/CountryCodeC...121221Country code chooser in Jetpack compose11False2023-05-012023-06-062023-06-092
2ozodbek-sobirjonovich/sqlite3_tutorialhttps://github.com/ozodbek-sobirjonovich/sqlit...https://github.com/ozodbek-sobirjonovich/sqlit...121230No Description11False2023-05-012023-06-102023-05-013
3mahomaps/mm-v1https://github.com/mahomaps/mm-v1https://github.com/mahomaps/mm-v1.git121242MahoMaps: Yandex.Maps client for MIDP222False2023-05-012023-06-042023-06-184
4cruip/cruip-tutorials-nexthttps://github.com/cruip/cruip-tutorials-nexthttps://github.com/cruip/cruip-tutorials-next.git121254A repository of Cruip's guides & tutorials mad...21False2023-05-012023-06-182023-06-185
\n", 1534 | "
" 1535 | ], 1536 | "text/plain": [ 1537 | " full_name \\\n", 1538 | "0 477132016yb/plant-vs-zombies \n", 1539 | "1 ParveshSandila/CountryCodeChooser \n", 1540 | "2 ozodbek-sobirjonovich/sqlite3_tutorial \n", 1541 | "3 mahomaps/mm-v1 \n", 1542 | "4 cruip/cruip-tutorials-next \n", 1543 | "\n", 1544 | " url \\\n", 1545 | "0 https://github.com/477132016yb/plant-vs-zombies \n", 1546 | "1 https://github.com/ParveshSandila/CountryCodeC... \n", 1547 | "2 https://github.com/ozodbek-sobirjonovich/sqlit... \n", 1548 | "3 https://github.com/mahomaps/mm-v1 \n", 1549 | "4 https://github.com/cruip/cruip-tutorials-next \n", 1550 | "\n", 1551 | " clone_url watchers_count \\\n", 1552 | "0 https://github.com/477132016yb/plant-vs-zombie... 12 \n", 1553 | "1 https://github.com/ParveshSandila/CountryCodeC... 12 \n", 1554 | "2 https://github.com/ozodbek-sobirjonovich/sqlit... 12 \n", 1555 | "3 https://github.com/mahomaps/mm-v1.git 12 \n", 1556 | "4 https://github.com/cruip/cruip-tutorials-next.git 12 \n", 1557 | "\n", 1558 | " stargazers_count language forks \\\n", 1559 | "0 12 1 0 \n", 1560 | "1 12 2 1 \n", 1561 | "2 12 3 0 \n", 1562 | "3 12 4 2 \n", 1563 | "4 12 5 4 \n", 1564 | "\n", 1565 | " description type license fork \\\n", 1566 | "0 No Description 1 1 False \n", 1567 | "1 Country code chooser in Jetpack compose 1 1 False \n", 1568 | "2 No Description 1 1 False \n", 1569 | "3 MahoMaps: Yandex.Maps client for MIDP2 2 2 False \n", 1570 | "4 A repository of Cruip's guides & tutorials mad... 2 1 False \n", 1571 | "\n", 1572 | " created_at updated_at pushed_at id \n", 1573 | "0 2023-05-01 2023-06-15 2023-06-13 1 \n", 1574 | "1 2023-05-01 2023-06-06 2023-06-09 2 \n", 1575 | "2 2023-05-01 2023-06-10 2023-05-01 3 \n", 1576 | "3 2023-05-01 2023-06-04 2023-06-18 4 \n", 1577 | "4 2023-05-01 2023-06-18 2023-06-18 5 " 1578 | ] 1579 | }, 1580 | "execution_count": 27, 1581 | "metadata": {}, 1582 | "output_type": "execute_result" 1583 | } 1584 | ], 1585 | "source": [ 1586 | "dataToInserted.head()" 1587 | ] 1588 | }, 1589 | { 1590 | "cell_type": "markdown", 1591 | "metadata": {}, 1592 | "source": [ 1593 | "

Repositories

" 1594 | ] 1595 | }, 1596 | { 1597 | "cell_type": "code", 1598 | "execution_count": 13, 1599 | "metadata": {}, 1600 | "outputs": [], 1601 | "source": [ 1602 | "dataToInserted['description'].fillna('No Description',inplace=True)\n", 1603 | "#dataToInserted['description'].isnull().sum()" 1604 | ] 1605 | }, 1606 | { 1607 | "cell_type": "code", 1608 | "execution_count": 14, 1609 | "metadata": {}, 1610 | "outputs": [], 1611 | "source": [ 1612 | "cursor = cnxn.cursor()\n", 1613 | "\n", 1614 | "for index, d in dataToInserted.iterrows():\n", 1615 | " #print(d['full_name'],d['description'],d['url'],d['clone_url'],d['fork'],d['watchers_count'],d['forks'],d['created_at'],d['updated_at'],d['pushed_at'],d['language'],d['type'],d['license'])\n", 1616 | " cursor.execute(\"INSERT INTO repositories (fullName,description,url,cloneUrl,watchersCount,forks,createdAt,updatedAt,pushedAt,langID,ownerID,licenseID) values (?,?,?,?,?,?,?,?,?,?,?,?)\",d['full_name'],d['description'],d['url'],d['clone_url'],d['watchers_count'],d['forks'],d['created_at'],d['updated_at'],d['pushed_at'],d['language'],d['type'],d['license'])\n", 1617 | "cursor.commit()\n", 1618 | "cursor.close()" 1619 | ] 1620 | }, 1621 | { 1622 | "cell_type": "markdown", 1623 | "metadata": {}, 1624 | "source": [ 1625 | "

Commits

" 1626 | ] 1627 | }, 1628 | { 1629 | "cell_type": "code", 1630 | "execution_count": 15, 1631 | "metadata": {}, 1632 | "outputs": [ 1633 | { 1634 | "name": "stderr", 1635 | "output_type": "stream", 1636 | "text": [ 1637 | "C:\\Users\\Youcode\\AppData\\Local\\Temp\\ipykernel_14064\\542819858.py:7: SettingWithCopyWarning: \n", 1638 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 1639 | "\n", 1640 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 1641 | " commits['name'].replace(c['name'],c['id'],inplace=True)\n" 1642 | ] 1643 | } 1644 | ], 1645 | "source": [ 1646 | "#commits = dataToInserted[['id','full_name',]]\n", 1647 | "\n", 1648 | "commits = contributors[['full_name','name','commits']]\n", 1649 | "\n", 1650 | "#replace name contributors by id\n", 1651 | "for index, c in cs.iterrows():\n", 1652 | " commits['name'].replace(c['name'],c['id'],inplace=True)\n" 1653 | ] 1654 | }, 1655 | { 1656 | "cell_type": "code", 1657 | "execution_count": 17, 1658 | "metadata": {}, 1659 | "outputs": [ 1660 | { 1661 | "name": "stderr", 1662 | "output_type": "stream", 1663 | "text": [ 1664 | "C:\\Users\\Youcode\\AppData\\Local\\Temp\\ipykernel_14064\\3544187285.py:3: SettingWithCopyWarning: \n", 1665 | "A value is trying to be set on a copy of a slice from a DataFrame\n", 1666 | "\n", 1667 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 1668 | " commits['full_name'].replace(c['full_name'],c['id'],inplace=True)\n" 1669 | ] 1670 | } 1671 | ], 1672 | "source": [ 1673 | "#replace name contributors by id\n", 1674 | "for index, c in repositories.iterrows():\n", 1675 | " commits['full_name'].replace(c['full_name'],c['id'],inplace=True)r" 1676 | ] 1677 | }, 1678 | { 1679 | "cell_type": "code", 1680 | "execution_count": 24, 1681 | "metadata": {}, 1682 | "outputs": [ 1683 | { 1684 | "data": { 1685 | "text/html": [ 1686 | "
\n", 1687 | "\n", 1700 | "\n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | "
full_namenamecommits
022222138
12222221
22222231
32222241
422223525
............
139141480512567240
13915148061256814
13916148071256913
13917148081257025
13918148081257119
\n", 1778 | "

13811 rows × 3 columns

\n", 1779 | "
" 1780 | ], 1781 | "text/plain": [ 1782 | " full_name name commits\n", 1783 | "0 22222 1 38\n", 1784 | "1 22222 2 1\n", 1785 | "2 22222 3 1\n", 1786 | "3 22222 4 1\n", 1787 | "4 22223 5 25\n", 1788 | "... ... ... ...\n", 1789 | "13914 14805 12567 240\n", 1790 | "13915 14806 12568 14\n", 1791 | "13916 14807 12569 13\n", 1792 | "13917 14808 12570 25\n", 1793 | "13918 14808 12571 19\n", 1794 | "\n", 1795 | "[13811 rows x 3 columns]" 1796 | ] 1797 | }, 1798 | "execution_count": 24, 1799 | "metadata": {}, 1800 | "output_type": "execute_result" 1801 | } 1802 | ], 1803 | "source": [ 1804 | "commits = pd.read_csv('commits.csv',index_col=0)\n", 1805 | "\n", 1806 | "commits['full_name'].astype('int')\n", 1807 | "commits['name'].astype('int')\n", 1808 | "\n", 1809 | "commits" 1810 | ] 1811 | }, 1812 | { 1813 | "cell_type": "code", 1814 | "execution_count": null, 1815 | "metadata": {}, 1816 | "outputs": [], 1817 | "source": [ 1818 | "cursor = cnxn.cursor()\n", 1819 | "for row in commits.itertuples():\n", 1820 | " cursor.execute(\n", 1821 | " f'''INSERT INTO CommitsVALUES (?,?,?)''',\n", 1822 | " row.full_name,\n", 1823 | " row.name,\n", 1824 | " row.commits\n", 1825 | " )\n", 1826 | "cursor.commit()\n", 1827 | "cursor.close()" 1828 | ] 1829 | } 1830 | ], 1831 | "metadata": { 1832 | "kernelspec": { 1833 | "display_name": "base", 1834 | "language": "python", 1835 | "name": "python3" 1836 | }, 1837 | "language_info": { 1838 | "codemirror_mode": { 1839 | "name": "ipython", 1840 | "version": 3 1841 | }, 1842 | "file_extension": ".py", 1843 | "mimetype": "text/x-python", 1844 | "name": "python", 1845 | "nbconvert_exporter": "python", 1846 | "pygments_lexer": "ipython3", 1847 | "version": "3.10.10" 1848 | }, 1849 | "orig_nbformat": 4 1850 | }, 1851 | "nbformat": 4, 1852 | "nbformat_minor": 2 1853 | } 1854 | -------------------------------------------------------------------------------- /Brief_insertion_data_in_database_sql/reposdb.sql: -------------------------------------------------------------------------------- 1 | 2 | CREATE DATABASE githubdb 3 | 4 | use githubdb 5 | 6 | -- Table Technologies 7 | CREATE TABLE technologies( 8 | "langID" INT PRIMARY KEY IDENTITY, 9 | "name" VARCHAR(55) NOT NULL 10 | ) 11 | 12 | -- Table Owners 13 | CREATE TABLE owners( 14 | "ownerID" INT PRIMARY KEY IDENTITY, 15 | "type" VARCHAR(55) NOT NULL 16 | ) 17 | 18 | -- Table License 19 | CREATE TABLE licenses( 20 | "licenseID" INT PRIMARY KEY IDENTITY, 21 | "lkey" VARCHAR(255), 22 | "name" VARCHAR(255), 23 | "url" VARCHAR(255), 24 | "spdxID" VARCHAR(255), 25 | "nodeID" VARCHAR(255) 26 | ) 27 | 28 | -- Table Contributors 29 | CREATE TABLE contributors( 30 | "contributorID" INT PRIMARY KEY IDENTITY, 31 | "name" VARCHAR(255) NOT NULL 32 | ) 33 | 34 | -- Table Repositories 35 | CREATE TABLE repositories( 36 | "repoID" INT PRIMARY KEY IDENTITY, 37 | "fullName" TEXT NOT NULL, 38 | "description" TEXT, 39 | "url" VARCHAR(255), 40 | "cloneUrl" VARCHAR(255), 41 | "fork" BIT, 42 | "watchersCount" INT, 43 | "forks" INT, 44 | "createdAt" DATE, 45 | "updatedAt" DATE, 46 | "pushedAt" DATE, 47 | "langID" INT FOREIGN KEY REFERENCES technologies(langID), 48 | "ownerID" INT FOREIGN KEY REFERENCES owners(ownerID), 49 | "licenseID" INT FOREIGN KEY REFERENCES licenses(licenseID) 50 | ) 51 | 52 | -- Table Commits 53 | CREATE TABLE commits( 54 | "repoID" int, 55 | "contributorID" int, 56 | counts int, 57 | PRIMARY KEY CLUSTERED ("repoID", "contributorID"), 58 | foreign key("repoID") references repositories("repoID"), 59 | foreign key("contributorID") references contributors("contributorID") 60 | ) 61 | --Create indexs 62 | 63 | CREATE INDEX reposNameIndex on repositories ("repoID") 64 | 65 | CREATE INDEX licenseIdIndex on licenses ("licenseID") 66 | 67 | CREATE INDEX contributorNameIndex on contributors ("name") 68 | 69 | -- Select with index 70 | 71 | SELECT * from repositories 72 | WITH (INDEX(reposNameIndex)) 73 | 74 | SELECT * from contributors 75 | WITH (INDEX(contributorNameIndex)) 76 | 77 | 78 | --Select 79 | 80 | select * from technologies 81 | 82 | select * from owners 83 | 84 | select * from licenses 85 | 86 | select * from contributors 87 | 88 | select * from repositories 89 | 90 | select * from commits 91 | 92 | -------------------------------------------------------------------------------- /Challenge_CIty_Map/data/youssoufia.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "properties": { 7 | "title": "OCY" 8 | }, 9 | "geometry": { 10 | "coordinates": [ 11 | -8.535287255587662, 12 | 32.23776969283607 13 | ], 14 | "type": "Point" 15 | }, 16 | "id": 0 17 | }, 18 | { 19 | "type": "Feature", 20 | "properties": { 21 | "title": "Youcode" 22 | }, 23 | "geometry": { 24 | "coordinates": [ 25 | -8.521753524447945, 26 | 32.24763669446159 27 | ], 28 | "type": "Point" 29 | }, 30 | "id": 1 31 | }, 32 | { 33 | "type": "Feature", 34 | "properties": { 35 | "title": "cafe" 36 | }, 37 | "geometry": { 38 | "coordinates": [ 39 | -8.534590862329196, 40 | 32.24722278148822 41 | ], 42 | "type": "Point" 43 | }, 44 | "id": 2 45 | }, 46 | { 47 | "type": "Feature", 48 | "properties": { 49 | "title": "hopital cheikh ZAID" 50 | }, 51 | "geometry": { 52 | "coordinates": [ 53 | -8.521920677256531, 54 | 32.24864658138999 55 | ], 56 | "type": "Point" 57 | }, 58 | "id": 3 59 | }, 60 | { 61 | "type": "Feature", 62 | "properties": { 63 | "title": "la gare des voyageurs" 64 | }, 65 | "geometry": { 66 | "coordinates": [ 67 | -8.540009132881096, 68 | 32.239437494319276 69 | ], 70 | "type": "Point" 71 | }, 72 | "id": 4 73 | }, 74 | { 75 | "type": "Feature", 76 | "properties": { 77 | "title": "Hotel Atlas" 78 | }, 79 | "geometry": { 80 | "coordinates": [ 81 | -8.536129605308304, 82 | 32.242233862139784 83 | ], 84 | "type": "Point" 85 | }, 86 | "id": 5 87 | }, 88 | { 89 | "type": "Feature", 90 | "properties": { 91 | "title": "Attijariwafa bank" 92 | }, 93 | "geometry": { 94 | "coordinates": [ 95 | -8.533854145768288, 96 | 32.246190520166536 97 | ], 98 | "type": "Point" 99 | }, 100 | "id": 6 101 | }, 102 | { 103 | "type": "Feature", 104 | "properties": { 105 | "title": "مسجد العتيق" 106 | }, 107 | "geometry": { 108 | "coordinates": [ 109 | -8.536531512363297, 110 | 32.24278344535385 111 | ], 112 | "type": "Point" 113 | }, 114 | "id": 7 115 | }, 116 | { 117 | "type": "Feature", 118 | "properties": { 119 | "title": "BANQUE POPULAIRE" 120 | }, 121 | "geometry": { 122 | "coordinates": [ 123 | -8.537708923537252, 124 | 32.24123515696351 125 | ], 126 | "type": "Point" 127 | }, 128 | "id": 8 129 | }, 130 | { 131 | "type": "Feature", 132 | "properties": { 133 | "title": "OFPPT YOUSSOUFIA" 134 | }, 135 | "geometry": { 136 | "coordinates": [ 137 | -8.533018562846507, 138 | 32.24170730167809 139 | ], 140 | "type": "Point" 141 | }, 142 | "id": 9 143 | }, 144 | { 145 | "type": "Feature", 146 | "properties": { 147 | "title": "Marché" 148 | }, 149 | "geometry": { 150 | "coordinates": [ 151 | -8.537243951910256, 152 | 32.23931145808096 153 | ], 154 | "type": "Point" 155 | }, 156 | "id": 10 157 | } 158 | ] 159 | } -------------------------------------------------------------------------------- /Challenge_CIty_Map/script.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 246, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#pip install geopandas\n", 10 | "#pip install folium\n", 11 | "#pip install geopy\n", 12 | "#!pip install openrouteservice" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "application/vnd.jupyter.widget-view+json": { 23 | "model_id": "c8d2861b60a74835a52ec290b4a28fcb", 24 | "version_major": 2, 25 | "version_minor": 0 26 | }, 27 | "text/plain": [ 28 | "Dropdown(description='From:', index=1, options=('OCY', 'Youcode', 'cafe', 'hopital cheikh ZAID', 'la gare des …" 29 | ] 30 | }, 31 | "metadata": {}, 32 | "output_type": "display_data" 33 | }, 34 | { 35 | "data": { 36 | "application/vnd.jupyter.widget-view+json": { 37 | "model_id": "73fb1fcfc924459992ab8efaa1add9d1", 38 | "version_major": 2, 39 | "version_minor": 0 40 | }, 41 | "text/plain": [ 42 | "Dropdown(description='To:', index=4, options=('OCY', 'Youcode', 'cafe', 'hopital cheikh ZAID', 'la gare des vo…" 43 | ] 44 | }, 45 | "metadata": {}, 46 | "output_type": "display_data" 47 | }, 48 | { 49 | "data": { 50 | "application/vnd.jupyter.widget-view+json": { 51 | "model_id": "3e2e4266b9724aca91be2b9a7c6740a2", 52 | "version_major": 2, 53 | "version_minor": 0 54 | }, 55 | "text/plain": [ 56 | "Button(button_style='success', description='Calcule', icon='check', style=ButtonStyle(), tooltip='Description'…" 57 | ] 58 | }, 59 | "metadata": {}, 60 | "output_type": "display_data" 61 | }, 62 | { 63 | "data": { 64 | "text/html": [ 65 | "
Make this Notebook Trusted to load map: File -> Trust Notebook
" 446 | ], 447 | "text/plain": [ 448 | "" 449 | ] 450 | }, 451 | "metadata": {}, 452 | "output_type": "display_data" 453 | } 454 | ], 455 | "source": [ 456 | "import geopandas as gpd\n", 457 | "import folium\n", 458 | "from folium.plugins import AntPath\n", 459 | "from geopy.distance import distance\n", 460 | "import ipywidgets as widgets\n", 461 | "from IPython.display import clear_output\n", 462 | "\n", 463 | "m = folium.Map(location=[32.24556456855282,-8.532570502180334],zoom_start=15,)\n", 464 | "\n", 465 | "df_places = gpd.read_file('data/youssoufia.json')\n", 466 | "\n", 467 | "\n", 468 | "def getMap(fromPos,toPos):\n", 469 | "\n", 470 | " clear_output()\n", 471 | "\n", 472 | " fromPosition = df_places.loc[df_places['title'] == fromPos ]['geometry']\n", 473 | " toPosition = df_places.loc[df_places['title'] == toPos ]['geometry']\n", 474 | " \n", 475 | " for _,p in df_places.iterrows():\n", 476 | " marker = folium.Marker(\n", 477 | " location=[p['geometry'].y,p['geometry'].x],\n", 478 | " popup=p['title'],\n", 479 | " icon=folium.Icon(icon=\"home\"),\n", 480 | " )\n", 481 | " marker.add_to(m)\n", 482 | " \n", 483 | " if float(fromPosition.x) != float(toPosition.x):\n", 484 | " pos1 = (float(fromPosition.y),float(fromPosition.x))\n", 485 | " pos2 = (float(toPosition.y),float(toPosition.x))\n", 486 | " coords = (pos1,pos2)\n", 487 | " \n", 488 | " AntPath(coords,color='blue',dash_array=[30,15],tooltip=f'{round(distance(pos1,pos2).km,2)} KM',weight=5, opacity=.85).add_to(m)\n", 489 | "\n", 490 | " display(w1,w2,btn,m)\n", 491 | "\n", 492 | "\n", 493 | "titleFromPosition = df_places['title'][0]\n", 494 | "titletoPosition = df_places['title'][1]\n", 495 | "\n", 496 | "w1 = widgets.Dropdown(\n", 497 | " options=df_places['title'],\n", 498 | " value=titleFromPosition,\n", 499 | " description='From:',\n", 500 | " disabled=False,\n", 501 | ")\n", 502 | "\n", 503 | "w2 = widgets.Dropdown(\n", 504 | " options=df_places['title'],\n", 505 | " value=titletoPosition,\n", 506 | " description='To:',\n", 507 | " disabled=False,\n", 508 | ")\n", 509 | "\n", 510 | "btn = widgets.Button(\n", 511 | " value=False,\n", 512 | " description='Calcule',\n", 513 | " disabled=False,\n", 514 | " button_style='success',\n", 515 | " tooltip='Description',\n", 516 | " icon='check'\n", 517 | ")\n", 518 | "\n", 519 | "def getFirstPosition(title):\n", 520 | " global titleFromPosition\n", 521 | " titleFromPosition = title\n", 522 | "\n", 523 | "def getSecondPosition(title):\n", 524 | " global titletoPosition\n", 525 | " titletoPosition = title\n", 526 | "\n", 527 | "w1.observe(lambda x:getFirstPosition(x.new),'value')\n", 528 | "w2.observe(lambda x:getSecondPosition(x.new),'value')\n", 529 | "\n", 530 | "btn.on_click(lambda x:getMap(titleFromPosition,titletoPosition))\n", 531 | "\n", 532 | "display(w1,w2,btn)\n" 533 | ] 534 | } 535 | ], 536 | "metadata": { 537 | "kernelspec": { 538 | "display_name": "base", 539 | "language": "python", 540 | "name": "python3" 541 | }, 542 | "language_info": { 543 | "codemirror_mode": { 544 | "name": "ipython", 545 | "version": 3 546 | }, 547 | "file_extension": ".py", 548 | "mimetype": "text/x-python", 549 | "name": "python", 550 | "nbconvert_exporter": "python", 551 | "pygments_lexer": "ipython3", 552 | "version": "3.10.10" 553 | }, 554 | "orig_nbformat": 4 555 | }, 556 | "nbformat": 4, 557 | "nbformat_minor": 2 558 | } 559 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Youcode-Data-Dev --------------------------------------------------------------------------------