├── .gitignore ├── analysis ├── __init__.py ├── csv.py └── xml.py ├── data └── current_mps.csv ├── parite.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | env/* 2 | __pycache__/ 3 | data/* 4 | analysis/__pycache__/ 5 | analysis/__init__.pyc 6 | analysis/csv.pyc 7 | analysis/xml.pyc 8 | -------------------------------------------------------------------------------- /analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oc-courses/perfectionnez_vous_en_python/8a4647806d6c795da0ca68c06a6d8801bf8a3eff/analysis/__init__.py -------------------------------------------------------------------------------- /analysis/csv.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # coding: utf-8 3 | 4 | import os 5 | import pprint 6 | import logging as lg 7 | 8 | import pandas as pd 9 | import matplotlib 10 | matplotlib.use('TkAgg') # you need this if you are on MacOS 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | 14 | class SetOfParliamentMember: 15 | def __init__(self, name): 16 | self.name = name 17 | 18 | def data_from_csv(self, csv_file): 19 | lg.info("Opening data file {}".format(csv_file)) 20 | self.dataframe = pd.read_csv(csv_file, sep=";") 21 | 22 | def data_from_dataframe(self, dataframe): 23 | self.dataframe = dataframe 24 | 25 | def display_chart(self): 26 | data = self.dataframe 27 | female_mps = data[data.sexe == "F"] 28 | male_mps = data[data.sexe == "H"] 29 | 30 | counts = [len(female_mps), len(male_mps)] 31 | counts = np.array(counts) 32 | nb_mps = counts.sum() 33 | proportions = counts / nb_mps 34 | 35 | labels = ["Female ({})".format(counts[0]), "Male ({})".format(counts[1])] 36 | 37 | fig, ax = plt.subplots() 38 | ax.axis("equal") 39 | ax.pie( 40 | proportions, 41 | labels=labels, 42 | autopct="%1.1f%%" 43 | ) 44 | plt.title("{} ({} MPs)".format(self.name, nb_mps)) 45 | plt.show() 46 | 47 | def split_by_political_party(self): 48 | result = {} 49 | data = self.dataframe 50 | 51 | # These 2 syntaxes are equivalent : data.parti_ratt_financier and data['parti_ratt_financier'] 52 | all_parties = data["parti_ratt_financier"].dropna().unique() 53 | 54 | for party in all_parties: 55 | data_subset = data[data.parti_ratt_financier == party] 56 | subset = SetOfParliamentMember('MPs from party "{}"'.format(party)) 57 | subset.data_from_dataframe(data_subset) 58 | result[party] = subset 59 | 60 | return result 61 | 62 | def __str__(self): 63 | names = [] ## todo: remplacer a la fin par une comprehension 64 | for row_index, mp in self.dataframe.iterrows(): ##todo: ici il y a du packing/unpacking 65 | names += [mp.nom] 66 | return str(names) # Python knows how to convert a list into a string 67 | 68 | def __repr__(self): 69 | return "SetOfParliamentMember: {} members".format(len(self.dataframe)) 70 | 71 | def __len__(self): 72 | return self.number_of_mps 73 | 74 | def __contains__(self, mp_name): 75 | return mp_name in self.dataframe["nom"].values 76 | 77 | def __getitem__(self, index): 78 | try: 79 | result = dict(self.dataframe.iloc[index]) 80 | except: 81 | if index < 0: 82 | raise Exception("Please select a positive index") 83 | elif index >= len(self.dataframe): 84 | raise Exception("There are only {} MPs!".format(len(self.dataframe))) 85 | else: 86 | raise Exception("Wrong index") 87 | return result 88 | 89 | def __add__(self, other): 90 | if not isinstance(other, SetOfParliamentMember): 91 | raise Exception("Can not add a SetOfParliamentMember with an object of type {}".format(type(other))) 92 | 93 | df1, df2 = self.dataframe, other.dataframe ##todo: ici il y a du packing/unpacking 94 | df = df1.append(df2) 95 | df = df.drop_duplicates() 96 | 97 | s = SetOfParliamentMember("{} - {}".format(self.name, other.name)) 98 | s.data_from_dataframe(df) 99 | return s 100 | 101 | def __radd__(self, other): ## todo: l'implementation de cette methode ne suit a mon avis pas les bonnes pratiques 102 | return self 103 | 104 | def __lt__(self, other): 105 | return self.number_of_mps < other.number_of_mps 106 | 107 | def __gt__(self, other): 108 | return self.number_of_mps > other.number_of_mps 109 | 110 | # The following 2 methods are a way to simulate a calculated attribute 111 | # (attribute 'number_of_mps' is calculated from attribute 'seld.dataframe') 112 | # There is a much better way to do it, using decorator '@property' 113 | def __getattr__(self, attr): 114 | if attr == "number_of_mps": ##todo: faire la version avec @property 115 | return len(self.dataframe) 116 | 117 | def __setattr__(self, attr, value): 118 | if attr == "number_of_mps": 119 | raise Exception("You can not set the number of MPs!") 120 | self.__dict__[attr] = value ## todo: c'est l'occasion de parler de __dict__ dans le cours ;) 121 | 122 | def launch_analysis(data_file, 123 | by_party = False, info = False, displaynames = False, 124 | searchname = None, index = None, groupfirst = None): 125 | 126 | sopm = SetOfParliamentMember("All MPs") 127 | sopm.data_from_csv(os.path.join("data",data_file)) 128 | sopm.display_chart() 129 | 130 | if by_party: 131 | for party, s in sopm.split_by_political_party().items(): 132 | s.display_chart() 133 | 134 | if info: 135 | print() 136 | print(repr(sopm)) 137 | 138 | if displaynames: 139 | print() 140 | print(sopm) 141 | 142 | if searchname != None: 143 | is_present = searchname in sopm 144 | print() 145 | print("Testing if {} is present: {}".format(searchname, is_present)) 146 | 147 | if index is not None: 148 | index = int(index) 149 | print() 150 | pprint.pprint(sopm[index]) # prints the dict a nice way 151 | 152 | if groupfirst is not None: 153 | groupfirst = int(groupfirst) 154 | parties = sopm.split_by_political_party() 155 | parties = parties.values() 156 | parties_by_size = sorted(parties, reverse = True) 157 | 158 | print() 159 | print("Info: the {} biggest groups are :".format(groupfirst)) 160 | for p in parties_by_size[0:groupfirst]: 161 | print(p.name) 162 | 163 | s = sum(parties_by_size[0:groupfirst]) 164 | 165 | s.display_chart() 166 | 167 | if __name__ == "__main__": 168 | launch_analysis('current_mps.csv') 169 | -------------------------------------------------------------------------------- /analysis/xml.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # coding: utf-8 3 | 4 | import os 5 | import logging as lg 6 | 7 | def launch_analysis(data_file): 8 | path_to_file = os.path.join("data", data_file) 9 | 10 | file_name = os.path.basename(path_to_file) 11 | directory = os.path.dirname(path_to_file) 12 | lg.info("Opening data file {} from directory '{}'".format(file_name,directory)) 13 | 14 | try: 15 | with open(path_to_file,"r") as f: 16 | preview = f.readline() 17 | lg.debug("Yeah! We managed to read the file. Here is a preview: {%s}" % preview) 18 | except FileNotFoundError as e: 19 | lg.critical("Ow :( The file was not found. Here is the original message of the exception : {%s}" % e) 20 | except: 21 | lg.critical('Destination unknown') 22 | 23 | if __name__ == "__main__": 24 | launch_analysis("SyceronBrut.xml") 25 | -------------------------------------------------------------------------------- /parite.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # coding: utf-8 3 | 4 | import argparse 5 | import logging as lg 6 | 7 | import analysis.csv as c_an 8 | import analysis.xml as x_an 9 | 10 | lg.basicConfig(level=lg.DEBUG) 11 | 12 | def parse_arguments(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("-d","--datafile",help="""CSV file containing pieces of 15 | information about the members of parliament""") 16 | parser.add_argument("-e", "--extension", help="""Kind of file to analyse. Is it a CSV or an XML?""") 17 | parser.add_argument("-p","--byparty", action='store_true', help="""displays 18 | a graph for each political party""") 19 | parser.add_argument("-i","--info", action='store_true', help="""information about 20 | the file""") 21 | parser.add_argument("-n","--displaynames", action='store_true', help="""displays 22 | the names of all the mps""") 23 | parser.add_argument("-s","--searchname", help="""search for a mp name""") 24 | parser.add_argument("-I","--index", help="""displays information about the Ith mp""") 25 | parser.add_argument("-g","--groupfirst", help="""displays a graph groupping all the 'g' 26 | biggest political parties""") 27 | return parser.parse_args() 28 | 29 | def main(): 30 | args = parse_arguments() 31 | try: 32 | datafile = args.datafile 33 | if datafile == None: 34 | raise Warning('You must indicate a datafile!') 35 | except Warning as e: 36 | lg.warning(e) 37 | else: 38 | if args.extension == 'xml': 39 | x_an.launch_analysis(datafile) 40 | elif args.extension == 'csv': 41 | c_an.launch_analysis(datafile, args.byparty, args.info, args.displaynames, 42 | args.searchname, args.index, args.groupfirst) 43 | finally: 44 | lg.info('#################### Analysis is over ######################') 45 | 46 | if __name__ == '__main__': 47 | main() 48 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | matplotlib 3 | numpy 4 | seaborn -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oc-courses/perfectionnez_vous_en_python/8a4647806d6c795da0ca68c06a6d8801bf8a3eff/setup.py --------------------------------------------------------------------------------