├── .gitignore
├── analysis
    ├── __init__.py
    ├── csv.py
    └── xml.py
├── data
    └── current_mps.csv
├── parite.py
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | env/*
2 | __pycache__/
3 | data/*
4 | analysis/__pycache__/
5 | analysis/__init__.pyc
6 | analysis/csv.pyc
7 | analysis/xml.pyc
8 | 


--------------------------------------------------------------------------------
/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oc-courses/perfectionnez_vous_en_python/8a4647806d6c795da0ca68c06a6d8801bf8a3eff/analysis/__init__.py


--------------------------------------------------------------------------------
/analysis/csv.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | # coding: utf-8
  3 | 
  4 | import os
  5 | import pprint
  6 | import logging as lg
  7 | 
  8 | import pandas as pd
  9 | import matplotlib
 10 | matplotlib.use('TkAgg') # you need this if you are on MacOS
 11 | import matplotlib.pyplot as plt
 12 | import numpy as np
 13 | 
 14 | class SetOfParliamentMember:
 15 |     def __init__(self, name):
 16 |         self.name = name
 17 | 
 18 |     def data_from_csv(self, csv_file):
 19 |         lg.info("Opening data file {}".format(csv_file))
 20 |         self.dataframe = pd.read_csv(csv_file, sep=";")
 21 | 
 22 |     def data_from_dataframe(self, dataframe):
 23 |         self.dataframe = dataframe
 24 | 
 25 |     def display_chart(self):
 26 |         data = self.dataframe
 27 |         female_mps = data[data.sexe == "F"]
 28 |         male_mps = data[data.sexe == "H"]
 29 | 
 30 |         counts = [len(female_mps), len(male_mps)]
 31 |         counts = np.array(counts)
 32 |         nb_mps = counts.sum()
 33 |         proportions = counts / nb_mps
 34 | 
 35 |         labels = ["Female ({})".format(counts[0]), "Male ({})".format(counts[1])]
 36 | 
 37 |         fig, ax = plt.subplots()
 38 |         ax.axis("equal")
 39 |         ax.pie(
 40 |                 proportions,
 41 |                 labels=labels,
 42 |                 autopct="%1.1f%%"
 43 |                 )
 44 |         plt.title("{} ({} MPs)".format(self.name, nb_mps))
 45 |         plt.show()
 46 | 
 47 |     def split_by_political_party(self):
 48 |         result = {}
 49 |         data = self.dataframe
 50 | 
 51 |         # These 2 syntaxes are equivalent : data.parti_ratt_financier and data['parti_ratt_financier']
 52 |         all_parties = data["parti_ratt_financier"].dropna().unique()
 53 | 
 54 |         for party in all_parties:
 55 |             data_subset = data[data.parti_ratt_financier == party]
 56 |             subset = SetOfParliamentMember('MPs from party "{}"'.format(party))
 57 |             subset.data_from_dataframe(data_subset)
 58 |             result[party] = subset
 59 | 
 60 |         return result
 61 | 
 62 |     def __str__(self):
 63 |         names = [] ## todo: remplacer a la fin par une comprehension
 64 |         for row_index, mp in self.dataframe.iterrows(): ##todo: ici il y a du packing/unpacking
 65 |             names += [mp.nom]
 66 |         return str(names) # Python knows how to convert a list into a string
 67 | 
 68 |     def __repr__(self):
 69 |         return "SetOfParliamentMember: {} members".format(len(self.dataframe))
 70 | 
 71 |     def __len__(self):
 72 |         return self.number_of_mps
 73 | 
 74 |     def __contains__(self, mp_name):
 75 |         return mp_name in self.dataframe["nom"].values
 76 | 
 77 |     def __getitem__(self, index):
 78 |         try:
 79 |             result = dict(self.dataframe.iloc[index])
 80 |         except:
 81 |             if index < 0:
 82 |                 raise Exception("Please select a positive index")
 83 |             elif index >= len(self.dataframe):
 84 |                 raise Exception("There are only {} MPs!".format(len(self.dataframe)))
 85 |             else:
 86 |                 raise Exception("Wrong index")
 87 |         return result
 88 | 
 89 |     def __add__(self, other):
 90 |         if not isinstance(other, SetOfParliamentMember):
 91 |             raise Exception("Can not add a SetOfParliamentMember with an object of type {}".format(type(other)))
 92 | 
 93 |         df1, df2 = self.dataframe, other.dataframe ##todo: ici il y a du packing/unpacking
 94 |         df = df1.append(df2)
 95 |         df = df.drop_duplicates()
 96 | 
 97 |         s = SetOfParliamentMember("{} - {}".format(self.name, other.name))
 98 |         s.data_from_dataframe(df)
 99 |         return s
100 | 
101 |     def __radd__(self, other): ## todo: l'implementation de cette methode ne suit a mon avis pas les bonnes pratiques
102 |         return self
103 | 
104 |     def __lt__(self, other):
105 |         return self.number_of_mps < other.number_of_mps
106 | 
107 |     def __gt__(self, other):
108 |         return self.number_of_mps > other.number_of_mps
109 | 
110 |     # The following 2 methods are a way to simulate a calculated attribute
111 |     # (attribute 'number_of_mps' is calculated from attribute 'seld.dataframe')
112 |     # There is a much better way to do it, using decorator '@property'
113 |     def __getattr__(self, attr):
114 |         if attr == "number_of_mps": ##todo: faire la version avec @property
115 |             return len(self.dataframe)
116 | 
117 |     def __setattr__(self, attr, value):
118 |         if attr == "number_of_mps":
119 |             raise Exception("You can not set the number of MPs!")
120 |         self.__dict__[attr] = value ## todo: c'est l'occasion de parler de __dict__ dans le cours ;)
121 | 
122 | def launch_analysis(data_file,
123 |                     by_party = False, info = False, displaynames = False,
124 |                     searchname = None, index = None, groupfirst = None):
125 | 
126 |     sopm = SetOfParliamentMember("All MPs")
127 |     sopm.data_from_csv(os.path.join("data",data_file))
128 |     sopm.display_chart()
129 | 
130 |     if by_party:
131 |         for party, s in sopm.split_by_political_party().items():
132 |             s.display_chart()
133 | 
134 |     if info:
135 |         print()
136 |         print(repr(sopm))
137 | 
138 |     if displaynames:
139 |         print()
140 |         print(sopm)
141 | 
142 |     if searchname != None:
143 |         is_present = searchname in sopm
144 |         print()
145 |         print("Testing if {} is present: {}".format(searchname, is_present))
146 | 
147 |     if index is not None:
148 |         index = int(index)
149 |         print()
150 |         pprint.pprint(sopm[index]) # prints the dict a nice way
151 | 
152 |     if groupfirst is not None:
153 |         groupfirst = int(groupfirst)
154 |         parties = sopm.split_by_political_party()
155 |         parties = parties.values()
156 |         parties_by_size = sorted(parties, reverse = True)
157 | 
158 |         print()
159 |         print("Info: the {} biggest groups are :".format(groupfirst))
160 |         for p in parties_by_size[0:groupfirst]:
161 |             print(p.name)
162 | 
163 |         s = sum(parties_by_size[0:groupfirst])
164 | 
165 |         s.display_chart()
166 | 
167 | if __name__ == "__main__":
168 |     launch_analysis('current_mps.csv')
169 | 


--------------------------------------------------------------------------------
/analysis/xml.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # coding: utf-8
 3 | 
 4 | import os
 5 | import logging as lg
 6 | 
 7 | def launch_analysis(data_file):
 8 |     path_to_file = os.path.join("data", data_file)
 9 | 
10 |     file_name = os.path.basename(path_to_file)
11 |     directory = os.path.dirname(path_to_file)
12 |     lg.info("Opening data file {} from directory '{}'".format(file_name,directory))
13 | 
14 |     try:
15 |         with open(path_to_file,"r") as f:
16 |             preview = f.readline()
17 |             lg.debug("Yeah! We managed to read the file. Here is a preview: {%s}" % preview)
18 |     except FileNotFoundError as e:
19 |         lg.critical("Ow :( The file was not found. Here is the original message of the exception : {%s}" % e)
20 |     except:
21 |         lg.critical('Destination unknown')
22 | 
23 | if __name__ == "__main__":
24 |     launch_analysis("SyceronBrut.xml")
25 | 


--------------------------------------------------------------------------------
/parite.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # coding: utf-8
 3 | 
 4 | import argparse
 5 | import logging as lg
 6 | 
 7 | import analysis.csv as c_an
 8 | import analysis.xml as x_an
 9 | 
10 | lg.basicConfig(level=lg.DEBUG)
11 | 
12 | def parse_arguments():
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument("-d","--datafile",help="""CSV file containing pieces of
15 |         information about the members of parliament""")
16 |     parser.add_argument("-e", "--extension", help="""Kind of file to analyse. Is it a CSV or an XML?""")
17 |     parser.add_argument("-p","--byparty", action='store_true', help="""displays
18 |         a graph for each political party""")
19 |     parser.add_argument("-i","--info", action='store_true', help="""information about
20 |         the file""")
21 |     parser.add_argument("-n","--displaynames", action='store_true', help="""displays
22 |         the names of all the mps""")
23 |     parser.add_argument("-s","--searchname", help="""search for a mp name""")
24 |     parser.add_argument("-I","--index", help="""displays information about the Ith mp""")
25 |     parser.add_argument("-g","--groupfirst", help="""displays a graph groupping all the 'g'
26 |         biggest political parties""")
27 |     return parser.parse_args()
28 | 
29 | def main():
30 |     args = parse_arguments()
31 |     try:
32 |         datafile = args.datafile
33 |         if datafile == None:
34 |             raise Warning('You must indicate a datafile!')
35 |     except Warning as e:
36 |         lg.warning(e)
37 |     else:
38 |         if args.extension == 'xml':
39 |             x_an.launch_analysis(datafile)
40 |         elif args.extension == 'csv':
41 |             c_an.launch_analysis(datafile, args.byparty, args.info, args.displaynames,
42 |                            args.searchname, args.index, args.groupfirst)
43 |     finally:
44 |         lg.info('#################### Analysis is over ######################')
45 | 
46 | if __name__ == '__main__':
47 |     main()
48 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | matplotlib
3 | numpy
4 | seaborn


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oc-courses/perfectionnez_vous_en_python/8a4647806d6c795da0ca68c06a6d8801bf8a3eff/setup.py


--------------------------------------------------------------------------------