├── fetch_data.sh ├── README.md └── voteparser.py /fetch_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | wget 'http://parltrack.euwiki.org/dumps/ep_votes.json.xz' && unxz ep_votes.json.xz 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Extracting European Parliament vote similarities 2 | 3 | 4 | datasource: [parltrack](https://parltrack.euwiki.org/) 5 | -------------------------------------------------------------------------------- /voteparser.py: -------------------------------------------------------------------------------- 1 | from json import loads 2 | from itertools import combinations 3 | from sys import stderr, argv, exit 4 | from dateutil import parser 5 | 6 | meps = {} 7 | 8 | vote_types = ('For', 'Against', 'Abstain') 9 | 10 | def parse(outprefix, from_date, to_date=None, ep_refs=None): 11 | with open('ep_votes.json') as infile: 12 | infile.seek(1) 13 | line = infile.readline().strip() 14 | i = 0 15 | while line: 16 | if line in (',', ']'): 17 | i += 1 18 | try: 19 | line = infile.readline().strip() 20 | except: 21 | break 22 | continue 23 | vote_data = loads(line) 24 | vote_date = parser.parse(vote_data['ts']) 25 | if vote_date < from_date\ 26 | or (to_date and vote_date > to_date)\ 27 | or (ep_refs and vote_data.get('epref') not in ep_refs): 28 | i += 1 29 | try: 30 | line = infile.readline().strip() 31 | except: 32 | break 33 | continue 34 | stderr.write('.'); stderr.flush() 35 | for vid,vote_type in enumerate(vote_types): 36 | if vote_type not in vote_data: 37 | continue 38 | for groups in vote_data[vote_type]['groups']: 39 | for vote in groups['votes']: 40 | if type(vote) != dict: 41 | continue 42 | meps.setdefault(vote['id'], [set(),set(),set()])[vid].add(i) 43 | i += 1 44 | try: 45 | line = infile.readline().strip() 46 | except: 47 | break 48 | 49 | stderr.write('[parsing done]\n') 50 | 51 | with open(outprefix+'_vote_counts.csv', 'w') as outfile: 52 | outfile.write('mep\tvote_count\n') 53 | for mep in meps: 54 | outfile.write('{0}\t{1}\n'.format(mep, sum(len(x) for x in meps[mep]))) 55 | 56 | stderr.write('[vote counts done]\n') 57 | 58 | with open(outprefix+'_same_votes.csv', 'w') as outfile: 59 | outfile.write('mep1\tmep2\tsame_vote_count\n') 60 | for mep1, mep2 in combinations(meps.keys(), 2): 61 | same_votes = 0 62 | for cid,categ in enumerate(meps[mep1]): 63 | for vote in categ: 64 | if vote in meps[mep2][cid]: 65 | same_votes += 1 66 | outfile.write('{0}\t{1}\t{2}\n'.format(mep1,mep2,same_votes)) 67 | 68 | stderr.write('[same votes done]\n') 69 | 70 | 71 | if __name__ == '__main__': 72 | if len(argv) not in (4, 5): 73 | print('''[Error] wrong parameters 74 | run 75 | python voteparser.py 76 | or 77 | python voteparser.py ''') 78 | exit(1) 79 | 80 | prefix = argv[1] 81 | from_date = parser.parse(argv[2]) 82 | to_date = parser.parse(argv[3]) 83 | prefix += '_{0}_-_{1}'.format(argv[2].replace('/', '-'), argv[3].replace('/', '-')) 84 | print(prefix) 85 | if len(argv) == 5: 86 | ep_refs = set(map(str.strip, open(argv[4]).readlines())) 87 | else: 88 | ep_refs = None 89 | 90 | parse(prefix, from_date, to_date, ep_refs) 91 | --------------------------------------------------------------------------------