├── .gitignore ├── Public └── Images │ └── github_web_licence.png ├── Config ├── __pycache__ │ ├── conf.cpython-311.pyc │ └── helpers.cpython-311.pyc ├── conf.py └── helpers.py ├── Src ├── __pycache__ │ ├── model.cpython-311.pyc │ └── controller.cpython-311.pyc ├── model.py └── controller.py ├── __pycache__ └── functions.cpython-311.pyc ├── main.py ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | Config/__pycache__ 3 | Src/__pycache__ 4 | -------------------------------------------------------------------------------- /Public/Images/github_web_licence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sefdine/Fetch_github_api/HEAD/Public/Images/github_web_licence.png -------------------------------------------------------------------------------- /Config/__pycache__/conf.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sefdine/Fetch_github_api/HEAD/Config/__pycache__/conf.cpython-311.pyc -------------------------------------------------------------------------------- /Src/__pycache__/model.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sefdine/Fetch_github_api/HEAD/Src/__pycache__/model.cpython-311.pyc -------------------------------------------------------------------------------- /__pycache__/functions.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sefdine/Fetch_github_api/HEAD/__pycache__/functions.cpython-311.pyc -------------------------------------------------------------------------------- /Config/__pycache__/helpers.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sefdine/Fetch_github_api/HEAD/Config/__pycache__/helpers.cpython-311.pyc -------------------------------------------------------------------------------- /Src/__pycache__/controller.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sefdine/Fetch_github_api/HEAD/Src/__pycache__/controller.cpython-311.pyc -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | 2 | # Import modules 3 | from Src.controller import * 4 | 5 | # Get data from github 6 | data = fetch_data() 7 | 8 | # Save data to csv file 9 | if data: 10 | clean_data(data) 11 | 12 | -------------------------------------------------------------------------------- /Config/conf.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Define my github access token 4 | access_token = input('Veuillez entrer votre token : ') 5 | ACCESS_TOKEN = 'Bearer '+access_token 6 | 7 | # Define the url(based on the search repositories) 8 | URL = "https://api.github.com/search/repositories" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Nom du Projet 2 | 3 | Fetch_github_api 4 | 5 | ## Description 6 | 7 | Ce projet se base sur le web scraping de données depuis l'API de GitHub. Les données sont ensuite nettoyées, analysées et visualisées pour fournir des informations utiles. L'objectif principal de ce projet est de déterminer les langages de programmation les plus utilisés, d'identifier les meilleures technologies récentes et de mettre en évidence des collaborations intéressantes entre les développeurs. 8 | 9 | ## Fonctionnalités principales 10 | 11 | - Scrapping de données depuis l'API de GitHub pour collecter des informations sur les dépôts et les contributeurs. 12 | - Nettoyage et prétraitement des données pour les rendre exploitables. 13 | - Analyse des données pour identifier les langages de programmation les plus populaires et les tendances actuelles. 14 | - Visualisations graphiques pour représenter les résultats de l'analyse de manière claire et informative. 15 | 16 | ## Utilisation 17 | 18 | Télecharger le projet avec ``git clone https://github.com/Sefdine/Fetch_github_api.git`` et lancer le fichier main.py 19 | 20 | ## Contributions 21 | 22 | Vous pouvez faire un fork du projet et envoyer vos modifications par pull-request. 23 | 24 | ## Licence 25 | 26 | La licence utilisée dans le projet est Creative Commons Zero v1.0 Universal https://github.com/Sefdine/Fetch_github_api/blame/main/LICENSE 27 | 28 | -------------------------------------------------------------------------------- /Src/model.py: -------------------------------------------------------------------------------- 1 | # Import necessary python packages 2 | import requests 3 | 4 | # Import config 5 | from Config.conf import * 6 | 7 | # Create function that connect and fetch data from Github API 8 | def get_github_repositories(query): 9 | 10 | # Define headers 11 | headers = { 12 | "Authorization": ACCESS_TOKEN 13 | } 14 | 15 | # Define parameters 16 | params = { 17 | 'q': query, 18 | 'sort': 'stars', 19 | 'order': 'desc', 20 | 'per_page': 100, 21 | 'fork': False 22 | } 23 | 24 | # Create a request 25 | response = requests.get(URL, headers=headers, params=params) 26 | 27 | # Retrieve data in json format 28 | data = response.json() 29 | 30 | # Implementing pagination logic 31 | while 'next' in response.links: 32 | response = requests.get(response.links['next']['url'], headers=headers) 33 | response_data = response.json() 34 | 35 | # When items doesn't exist then return the response and false 36 | if 'items' not in response_data: 37 | return {'data': False, 'request': response} 38 | else: 39 | # Extend new results to the existed data 40 | data['items'].extend(response_data['items']) 41 | 42 | # Return data['items'] that contains repo's informations if it exists 43 | return {'data': data.get('items', []), 'request': response} -------------------------------------------------------------------------------- /Config/helpers.py: -------------------------------------------------------------------------------- 1 | # Import python packages 2 | import pandas as pd 3 | 4 | # Save data to csv 5 | def save_data_to_csv(data, filename): 6 | df = pd.DataFrame(data) 7 | df.to_csv(filename) 8 | 9 | # Concatenate files 10 | def concatenate_files(file1, file2, exportFile): 11 | df1 = pd.read_csv(file1) 12 | df2 = pd.read_csv(file2) 13 | 14 | concatenate_df = pd.concat([df1, df2], ignore_index=True) 15 | result = concatenate_df.to_csv(exportFile, index=False) 16 | 17 | if not result: 18 | print('CSV files concatenated successfully, size to ', concatenate_df.shape[0]) 19 | else: 20 | print('Error :',result) 21 | 22 | # Answer 1 or 0 23 | def handle_answer(answer): 24 | while answer != '1' and answer != '0': 25 | answer = input('Veuillez choisir 1 pour oui, 0 pour non : ') 26 | return answer 27 | 28 | # handle csv creating file 29 | def handle_csv_create_file(answer): 30 | if not answer: 31 | print('\nLe fichier github_repos.csv a bien été créer') 32 | else: 33 | print('Erreur : ',answer) 34 | 35 | # Display size of data 36 | def display_data_size(data): 37 | print('La taille des données récuperer est de ',len(data)) 38 | 39 | # Display error message github api 40 | def display_error_message_api(): 41 | print('Erreur: Nous sommes désolé mais l\'API de github subi de nombreuses demande.') 42 | print('Veuillez réessayer ultérieurement') 43 | 44 | # Ask to continue fetching data from github api 45 | def continue_fetching_data_from_api(): 46 | print('\nVoulez vous continuer ?') 47 | continue_res = input('Taper 1 pour oui 0 pour non : ') 48 | continue_res = handle_answer(continue_res) 49 | return continue_res 50 | 51 | # Get columns name with dict or list dtypes 52 | def get_columns_name_type_dict_list(df): 53 | # Get columns with object dtype 54 | object_columns = df.select_dtypes(include='object').columns 55 | 56 | # Create a list that will contain dict and list dtype 57 | columns_with_objects = [] 58 | for column in object_columns: 59 | if df[column].apply(lambda x: isinstance(x, (dict, list))).any(): 60 | columns_with_objects.append(column) 61 | return columns_with_objects 62 | 63 | # Has null values in columns 64 | def has_null_values_column(df): 65 | has_null_columns = {} 66 | for column in df.columns: 67 | null_length = df.loc[df[column].isnull()].shape[0] 68 | if null_length > 0: 69 | has_null_columns[column] = null_length 70 | return has_null_columns -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /Src/controller.py: -------------------------------------------------------------------------------- 1 | # Import python packages 2 | import time 3 | import pandas as pd 4 | import missingno as msno 5 | 6 | # Import model 7 | from Src.model import * 8 | from Config.helpers import * 9 | 10 | # Fetch data 11 | def fetch_data(): 12 | print('Chargement...') 13 | query = 'stars:>0' 14 | res = get_github_repositories(query) 15 | data = res['data'] 16 | final_data = data 17 | 18 | if data and len(data)>0: 19 | display_data_size(final_data) 20 | continue_res = continue_fetching_data_from_api() 21 | 22 | while data[-1]['stargazers_count'] > 10 and continue_res == '1': 23 | print('Chargement...') 24 | time.sleep(60) 25 | query = f"stars:<{data[-1]['stargazers_count']}" 26 | res = get_github_repositories(query) 27 | data = res['data'] 28 | if not data or len(data)<=0: 29 | display_error_message_api() 30 | break 31 | else: 32 | final_data.extend(data) 33 | display_data_size(final_data) 34 | continue_res = continue_fetching_data_from_api() 35 | else: 36 | display_error_message_api() 37 | print('\nFin du processus') 38 | return final_data 39 | 40 | # Mirror_url null values 41 | def handle_mirror_url_null_values(df, has_null_columns): 42 | if 'mirror_url' in has_null_columns: 43 | print('La colonne mirroir represente une copie du dépot dans un autre emplacement.\nCette colonne ne nous sera pas utile pour ce projet.') 44 | print('Voulez vous supprimer la supprimé ?') 45 | delete_mirror_url_res = input('Taper 1 pour oui 0 pour non : ') 46 | delete_mirror_url_res = handle_answer(delete_mirror_url_res) 47 | if delete_mirror_url_res == '1': 48 | df.drop('mirror_url', axis=1, inplace=True) 49 | print('La suppression a reussi\n') 50 | return df 51 | 52 | # Homepage null values 53 | def handle_homepage_null_values(df, has_null_columns): 54 | if 'homepage' in has_null_columns: 55 | print('La colonne homepage fait reference aux pages d\'accueil de contribution.\nCette colonne ne nous sera pas utile pour ce projet') 56 | print('Voulez vous supprimer la supprimé ?') 57 | delete_homepage_res = input('Taper 1 pour oui 0 pour non : ') 58 | delete_homepage_res = handle_answer(delete_homepage_res) 59 | if delete_homepage_res == '1': 60 | df.drop('homepage', axis=1, inplace=True) 61 | print('La suppression a reussi\n') 62 | return df 63 | 64 | # License null values 65 | def handle_license_null_values(df, has_null_columns): 66 | if 'license' in has_null_columns: 67 | print('La colonne license décrit les licences utilisé dans un dépôt. Elle contient dans notre dataframe, un objet de type dictionnaire.\n') 68 | print('Voici un example d\'un element dans la colonne license') 69 | license_example = df.loc[df['license'].notnull(), 'license'][0] 70 | print(license_example) 71 | print('\nOn vous propose de prendre uniquement les noms pour chaque licence\n') 72 | print('Voulez vous garder la colonne license en entier ?') 73 | licence_all_res = input('Taper 1 pour oui 0 pour non : ') 74 | licence_all_res = handle_answer(licence_all_res) 75 | if licence_all_res == '0': 76 | print('Veuillez écrire les noms exactes des champs que vous voulez garder : \n') 77 | license_columns_to_keep = [] 78 | while True: 79 | print('Entrer q pour quitter') 80 | column = input(f"Entrer le champ n°{len(license_columns_to_keep)+1} : ") 81 | if column == 'q': 82 | if len(license_columns_to_keep) == 0: 83 | print('Vous n\'avez choisis aucun champ. La colonne license va être supprimée.\n') 84 | print('Êtes vous sur de vouloir supprimé la colonne license en entier ?') 85 | licence_all_delete_res = input('Taper 1 pour oui 0 pour non : ') 86 | licence_all_delete_res = handle_answer(licence_all_delete_res) 87 | if licence_all_delete_res == '1': 88 | # Drop license column 89 | df.drop('license', axis=1, inplace=True) 90 | print('La colonne license a bien été supprimée\n') 91 | break 92 | else: 93 | print('Voici les champs choisi : ',license_columns_to_keep) 94 | break 95 | elif column in license_example.keys(): 96 | if column not in license_columns_to_keep: 97 | license_columns_to_keep.append(column) 98 | else: 99 | print('Vous avez déjà choisi ce champs. Veuillez indiquer un autre\n.') 100 | else: 101 | print('Le champs choisi n\'exist pas en tant que clé de la colonne license\n') 102 | # Create new columns based on license 103 | if len(license_columns_to_keep) > 0: 104 | for name in license_columns_to_keep: 105 | df['license'+str.capitalize(name)] = df['license'].apply(lambda x: x[name] if pd.notnull(x) else None) 106 | print(f"La colonne {'license'+str.capitalize(name)} a bien été crée") 107 | df.drop('license', axis=1, inplace=True) 108 | return df 109 | 110 | # Language null values 111 | def handle_language_null_values(df, has_null_columns): 112 | if 'language' in has_null_columns: 113 | print('\nLa colonne language montre le language principale utilisé dans un dépôt') 114 | print('Les lignes n\'ayant pas de language sont font reference à des dépôts d\'annuaire pour des livres ou autres chose') 115 | print('Il est essentiel dans notre projet d\'avoir un language de programmation.\n') 116 | print('\nDe ce fait, nous te conseillons de supprimer tous les lignes n\'ayant pas de language.') 117 | print('\nVoulez vous supprimé les lignes qui n\'ont pas de language ?') 118 | 119 | delete_language_res = input('Taper 1 pour oui 0 pour non : ') 120 | delete_language_res = handle_answer(delete_language_res) 121 | if delete_language_res == '1': 122 | # Delete language null values 123 | df.dropna(subset=['language'], inplace=True) 124 | print('La suppression a bien été effectué\n') 125 | return df 126 | 127 | # Language null values 128 | def handle_description_null_values(df, has_null_columns): 129 | if 'description' in has_null_columns: 130 | print('La colonne description montre une description du projet.\nCe n\'est pas pertinent de supprimer les lignes ne contenant pas de description car vous riquez de perdre d\'autres informations pertinentes') 131 | print('Nous vous proposons de changer les valeurs nulles de cette colonne par "No description"') 132 | print('\nVoulez vous changer les descriptions nulles par "No description" ?') 133 | change_description_res = input('Taper 1 pour oui 0 pour non : ') 134 | change_description_res = handle_answer(change_description_res) 135 | if change_description_res == '1': 136 | # Fill null descriptions 137 | df['description'].fillna('No description', inplace=True) 138 | print('Les descriptions nulles ont bien été changé\n') 139 | return df 140 | 141 | # Display percentages of null values in columns 142 | def display_null_values_column(df): 143 | has_null_columns = has_null_values_column(df) 144 | if len(has_null_columns) > 0: 145 | print('\nCes colonnes contient n% de valeurs nulles.') 146 | for i in has_null_columns: 147 | print(i, '\t => ',round((has_null_columns[i] / len(df)*100), 2),'%') 148 | return has_null_columns 149 | 150 | # Clean and save data 151 | def clean_data(data): 152 | df = pd.DataFrame(data) 153 | # Display data size 154 | print(f"La taille de vos données est de {df.shape[0]} lignes et {df.shape[1]} colonnes") 155 | # Drop unnamed column 156 | if 'Unnamed: 0' in df.columns: 157 | df.drop('Unnamed: 0', axis=1, inplace=True) 158 | 159 | # Check column that contain object 160 | object_columns = get_columns_name_type_dict_list(df) 161 | 162 | # Display missing values 163 | has_null_columns = display_null_values_column(df) 164 | if len(has_null_columns) > 0: 165 | print('\n----------------- Traitement des valeurs nulles --------------\n') 166 | # ************ Mirror_url ************* 167 | df = handle_mirror_url_null_values(df, has_null_columns) 168 | # ************ Homepage ************* 169 | df = handle_homepage_null_values(df, has_null_columns) 170 | # ************ Licence ************* 171 | df = handle_license_null_values(df, has_null_columns) 172 | # ************ Language ************* 173 | df = handle_language_null_values(df, has_null_columns) 174 | # ************ Description ************* 175 | df = handle_description_null_values(df, has_null_columns) 176 | 177 | has_null_columns = display_null_values_column(df) 178 | 179 | # # Display columns with list or dict dtype 180 | # object_columns = get_columns_name_type_dict_list(df) 181 | # if object_columns: 182 | # print('Ces colonnes de votre dataset contiennet des objets') 183 | # print(df[object_columns].head()) 184 | # print('Voulez vous les supprimés ?') 185 | # # Ask to delete columns with object dict and list 186 | # dict_list_res = input('Taper 1 pour oui 0 pour non : ') 187 | # dict_list_res = handle_answer(dict_list_res) 188 | 189 | # if dict_list_res == '1': 190 | # df.drop(object_columns, axis=1, inplace=True) 191 | 192 | # # Display available columns 193 | # print('\nVoici les colonnes de vos données') 194 | # time.sleep(1) 195 | # print(df.columns) 196 | 197 | # # Ask to keep with all columns 198 | # print('\nSouhaitez vous conserver toutes les colonnes ?') 199 | # keep_columns_re = input('Taper 1 pour oui 0 pour non : ') 200 | # keep_columns_re = handle_answer(keep_columns_re) 201 | # if keep_columns_re == '0': 202 | # # Demand of the needed columns 203 | # print('\nVeuillez entrez les noms exacts des colonnes que vous voulez conserver') 204 | # conserved_columns = [] 205 | # while True: 206 | # conserved_column = input(f"\nTaper q pour quitter\nEntrez le nom exact de la colonne N° {len(conserved_columns)+1}: ") 207 | # if conserved_column in df.columns: 208 | # conserved_columns.append(conserved_column) 209 | # elif conserved_column == 'q': 210 | # if len(conserved_columns) > 0: 211 | # print('\nVoici les colonnes choisis :') 212 | # print(conserved_columns) 213 | # time.sleep(1) 214 | # else: 215 | # print('\nVous n\'avez choisis aucune colonne.\nVos données contiennent donc toutes les colonnes initiales') 216 | # break 217 | # else: 218 | # print('\nErreur: la colonne choisis n\'existe pas') 219 | # else: 220 | # conserved_columns = df.columns 221 | # # Create final dataframe 222 | # df_final = df[conserved_columns] 223 | 224 | # # Drop duplicates 225 | # object_columns = list(set(object_columns).intersection(df_final.columns)) 226 | # duplicated_rows = df_final[df_final.duplicated(subset=df_final.columns.difference(object_columns))].shape[0] 227 | 228 | # if duplicated_rows > 0: 229 | # print(f"\nVous avez {duplicated_rows} lignes dupliquées. \nVoulez vous les supprimées ?") 230 | # duplicated_res = input('Taper 1 pour Oui, 0 pour non : ') 231 | # duplicated_res = handle_answer(duplicated_res) 232 | 233 | # if duplicated_res == '1': 234 | # df_final.drop_duplicates(subset=df_final.columns.difference(object_columns), inplace=True) 235 | # print('\nLes données dupliquées ont été suprimés') 236 | # print('La taille de vos données sont de ',df_final.shape[0]) 237 | # time.sleep(1) 238 | 239 | # print(f"\nLa taille de vos données est de {df_final.shape[0]} lignes et de {df_final.shape[1]} colonnes") 240 | # print('Voulez vous sauvergarder vos données sous forme csv ?') 241 | # save_to_csv_res = input('Taper 1 pour oui 0 pour non : ') 242 | # save_to_csv_res = handle_answer(save_to_csv_res) 243 | 244 | # if save_to_csv_res == '0': 245 | # print('\nVos données ne seront pas sauvegarder. Etes vous sur ?') 246 | # final_res = input('Taper 1 pour oui 0 pour non : ') 247 | # final_res = handle_answer(final_res) 248 | # if final_res == '0': 249 | # # Save data to csv 250 | # csv_save_res = save_data_to_csv(df_final, 'github_repos.csv') 251 | # handle_csv_create_file(csv_save_res) 252 | # else: 253 | # print('\nTres bien, au revoir !') 254 | # else: 255 | # # Save data to csv 256 | # csv_save_res = save_data_to_csv(df_final, 'github_repos.csv') 257 | # handle_csv_create_file(csv_save_res) --------------------------------------------------------------------------------