├── .gitignore ├── LICENSE ├── pdpy ├── __init__.py ├── combine.py ├── constants.py ├── core.py ├── elections.py ├── errors.py ├── filter.py ├── lords.py ├── members.py ├── mps.py ├── settings.py └── utils.py ├── readme.md ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── data ├── commons_memberships_raw.pkl ├── fetch_commons_memberships.pkl ├── fetch_commons_memberships_from_to.pkl ├── fetch_lords.pkl ├── fetch_lords_committee_memberships.pkl ├── fetch_lords_committee_memberships_from_to.pkl ├── fetch_lords_committee_memberships_while_lord.pkl ├── fetch_lords_from_to.pkl ├── fetch_lords_government_roles.pkl ├── fetch_lords_government_roles_from_to.pkl ├── fetch_lords_government_roles_while_lord.pkl ├── fetch_lords_memberships.pkl ├── fetch_lords_memberships_from_to.pkl ├── fetch_lords_opposition_roles.pkl ├── fetch_lords_opposition_roles_from_to.pkl ├── fetch_lords_opposition_roles_while_lord.pkl ├── fetch_lords_party_memberships.pkl ├── fetch_lords_party_memberships_collapse.pkl ├── fetch_lords_party_memberships_from_to.pkl ├── fetch_lords_party_memberships_while_lord.pkl ├── fetch_mps.pkl ├── fetch_mps_committee_memberships.pkl ├── fetch_mps_committee_memberships_from_to.pkl ├── fetch_mps_committee_memberships_while_mp.pkl ├── fetch_mps_from_to.pkl ├── fetch_mps_government_roles.pkl ├── fetch_mps_government_roles_from_to.pkl ├── fetch_mps_government_roles_while_mp.pkl ├── fetch_mps_opposition_roles.pkl ├── fetch_mps_opposition_roles_from_to.pkl ├── fetch_mps_opposition_roles_while_mp.pkl ├── fetch_mps_party_memberships.pkl ├── fetch_mps_party_memberships_collapse.pkl ├── fetch_mps_party_memberships_from_to.pkl ├── fetch_mps_party_memberships_while_mp.pkl ├── lords_committee_memberships_raw.pkl ├── lords_government_roles_raw.pkl ├── lords_memberships_raw.pkl ├── lords_opposition_roles_raw.pkl ├── lords_party_memberships_raw.pkl ├── lords_raw.pkl ├── mps_committee_memberships_raw.pkl ├── mps_government_roles_raw.pkl ├── mps_opposition_roles_raw.pkl ├── mps_party_memberships_raw.pkl └── mps_raw.pkl ├── test_combine.py ├── test_core.py ├── test_elections.py ├── test_filter.py ├── test_lords.py ├── test_mps.py ├── test_settings.py ├── validate.py ├── validate_lords.py └── validate_mps.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Directories 2 | __pycache__ 3 | dist 4 | docs 5 | pdpy.egg-info 6 | 7 | # Files 8 | .DS_Store 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Oliver Hawkins 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software without 15 | specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /pdpy/__init__.py: -------------------------------------------------------------------------------- 1 | """pdpr: A package for downloading data from the Parliamentary Data Platform. 2 | 3 | The pdpr package provides a suite of functions for downloading data from 4 | the data platform for the UK Parliament. 5 | """ 6 | 7 | from . import core 8 | from .core import sparql_select 9 | 10 | from . import elections 11 | from .elections import get_general_elections 12 | from .elections import get_general_elections_dict 13 | 14 | from . import lords 15 | from .lords import fetch_lords 16 | from .lords import fetch_lords_memberships 17 | from .lords import fetch_lords_party_memberships 18 | from .lords import fetch_lords_government_roles 19 | from .lords import fetch_lords_opposition_roles 20 | from .lords import fetch_lords_committee_memberships 21 | 22 | from . import mps 23 | from .mps import fetch_mps 24 | from .mps import fetch_commons_memberships 25 | from .mps import fetch_mps_party_memberships 26 | from .mps import fetch_mps_government_roles 27 | from .mps import fetch_mps_opposition_roles 28 | from .mps import fetch_mps_committee_memberships 29 | 30 | from . import settings 31 | from .settings import get_api_url 32 | from .settings import set_api_url 33 | from .settings import reset_api_url 34 | 35 | from . import utils 36 | from .utils import readable 37 | -------------------------------------------------------------------------------- /pdpy/combine.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Functions for combining related records in a dataframe.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import pandas as pd 7 | 8 | from . import utils 9 | 10 | # Functions ------------------------------------------------------------------- 11 | 12 | def combine_party_memberships(pm): 13 | 14 | """Combine consecutive records in a dataframe of party memberships. 15 | 16 | combine_party_memberships takes a datatframe of party memberships and 17 | combines historically consecutive memberships of the same party into a 18 | single continuous memberships with the start date of thre first membership 19 | and the end date of the last. Combining the memberships in this way means 20 | that party membership ids from the data platform are not included in the 21 | dataframe returned. 22 | 23 | Parameters 24 | ---------- 25 | pm : DataFrame 26 | A pandas dataframe containing party memberships as returned by one of 27 | the fetch party membership functions. 28 | 29 | Returns 30 | ------- 31 | out : DataFrame 32 | A pandas dataframe of party memberships, with one row per party 33 | membership. The memberships are processed and combined so that there is 34 | only one party membership for a period of continuous membership within 35 | the same party. 36 | 37 | """ 38 | 39 | # Create a copy of pm 40 | pm = pm.copy() 41 | 42 | # Check the party memberships dataframe has the expected structure 43 | required_columns = [ 44 | 'person_id', 45 | 'mnis_id', 46 | 'given_name', 47 | 'family_name', 48 | 'display_name', 49 | 'party_id', 50 | 'party_mnis_id', 51 | 'party_name', 52 | 'party_membership_id', 53 | 'party_membership_start_date', 54 | 'party_membership_end_date'] 55 | 56 | if len(pm.columns) != len(required_columns) or \ 57 | not (pm.columns == required_columns).all(): 58 | raise ValueError('pm does not have the expected columns') 59 | 60 | # Function to identify consecutive memberships of the same party 61 | def get_map_party_changes(): 62 | 63 | previous_per_par_id = "" 64 | group_id = 0 65 | 66 | def map_party_changes(per_par_id): 67 | nonlocal previous_per_par_id 68 | nonlocal group_id 69 | if per_par_id != previous_per_par_id: 70 | previous_per_par_id = per_par_id 71 | group_id = group_id + 1 72 | return "{0}-{1}".format(per_par_id, group_id) 73 | 74 | return map_party_changes 75 | 76 | # Sort by person id and membership start date 77 | pm.sort_values( 78 | by=['person_id', 79 | 'party_membership_start_date'], 80 | inplace=True) 81 | 82 | # Create unique combination of person_id and party_id 83 | pm['per_par_id'] = pm.apply( 84 | lambda x: '{0}-{1}'.format(x['person_id'], x['party_id']), axis=1) 85 | 86 | # Build an id for consecutive memberships of the same party 87 | pm['per_par_mem_id'] = pm['per_par_id'].map(get_map_party_changes()) 88 | 89 | # Group by person, party and consecutive membership, then take the 90 | # earliest start date and latest end date 91 | aggregation = { 92 | 'party_membership_start_date': utils.min_date_nan, 93 | 'party_membership_end_date': utils.max_date_nan 94 | } 95 | 96 | pmg = pm.groupby([ 97 | 'person_id', 98 | 'party_id', 99 | 'per_par_mem_id']) 100 | 101 | pms = pmg.agg(aggregation) 102 | pms.reset_index(inplace=True) 103 | 104 | pm = pms.merge( 105 | pm[[ 106 | 'person_id', 107 | 'party_id', 108 | 'mnis_id', 109 | 'given_name', 110 | 'family_name', 111 | 'display_name', 112 | 'party_mnis_id', 113 | 'party_name']], 114 | how='left', 115 | on=['person_id', 'party_id']) 116 | 117 | pm.drop_duplicates(inplace=True) 118 | 119 | pm = pm[[ 120 | 'person_id', 121 | 'mnis_id', 122 | 'given_name', 123 | 'family_name', 124 | 'display_name', 125 | 'party_id', 126 | 'party_mnis_id', 127 | 'party_name', 128 | 'party_membership_start_date', 129 | 'party_membership_end_date']] 130 | 131 | pm.sort_values( 132 | by=['family_name', 133 | 'party_membership_start_date'], 134 | inplace=True) 135 | pm.reset_index(drop=True, inplace=True) 136 | 137 | return pm 138 | -------------------------------------------------------------------------------- /pdpy/constants.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Package constants.""" 3 | 4 | # Package settings ------------------------------------------------------------ 5 | 6 | SETTINGS_API_URL = 'api_url' 7 | SETTINGS_API_URL_DEFAULT = 'https://api.parliament.uk/sparql' 8 | 9 | # API settings ---------------------------------------------------------------- 10 | 11 | API_PAUSE_TIME = 0.5 12 | 13 | # XML ids --------------------------------------------------------------------- 14 | 15 | XML_DATE = 'http://www.w3.org/2001/XMLSchema#date' 16 | 17 | # Parliamentary Data Platform ids --------------------------------------------- 18 | 19 | PDP_ID_HOUSE_OF_COMMONS = '1AFu55Hs' 20 | PDP_ID_HOUSE_OF_LORDS = 'WkUWUBMx' 21 | -------------------------------------------------------------------------------- /pdpy/core.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Core download functions.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import datetime 7 | import json 8 | import numpy as np 9 | import pandas as pd 10 | import requests 11 | 12 | from . import constants 13 | from . import errors 14 | from . import settings 15 | 16 | # Functions ------------------------------------------------------------------ 17 | 18 | def request(query): 19 | 20 | """Send an http request with a query and return the response. 21 | 22 | request sends a SPARQL query to the api endpoint and returns the response 23 | object. It is a simple wrapper around request.post. It sets the appropriate 24 | headers and sends the query as the request body. It does not validate the 25 | query or handle the response in any way. The response format is JSON. 26 | 27 | Parameters 28 | ---------- 29 | query : str 30 | A SPARQL query as a string. 31 | 32 | Returns 33 | ------- 34 | out : Response 35 | The http response object from requests. 36 | 37 | """ 38 | 39 | url = settings.get_api_url() 40 | headers = {} 41 | headers['content-type'] = 'application/sparql-query' 42 | headers['accept'] = 'application/sparql-results+json' 43 | response = requests.post(url, headers=headers, data=query) 44 | return response 45 | 46 | 47 | def sparql_select(query): 48 | 49 | """Send a select query and return the response as a DataFrame. 50 | 51 | sparql_select sends a SPARQL query to the api endpoint and returns the 52 | response as a DataFrame. The SPARQL should be a SELECT query as the 53 | response is processed as tabular data. The function will convert datatypes 54 | that it recognises. It currently recognises date types. All other data 55 | returned in the DataFrame will be strings. If the query syntax is not valid 56 | or the request fails for any other reason a RequestError will be raised 57 | with the response text. 58 | 59 | Parameters 60 | ---------- 61 | query : str 62 | A SPARQL SELECT query as a string. 63 | 64 | Returns 65 | ------- 66 | out : DataFrame 67 | A pandas dataframe containing the results of the query. 68 | 69 | """ 70 | 71 | # Send the query and get the response 72 | response = request(query) 73 | 74 | # If the server returned an error raise it with the response text 75 | if not response.ok: 76 | raise errors.RequestError(response.text) 77 | 78 | # Process the response as tabular data and return it as a DataFrame 79 | json = response.json() 80 | rows = [] 81 | headers = json['head']['vars'] 82 | records = json['results']['bindings'] 83 | 84 | # For each record build a row and assign values based on the data type 85 | for record in records: 86 | row = [] 87 | for header in headers: 88 | if header in record: 89 | if 'datatype' in record[header] and \ 90 | record[header]['datatype'] == constants.XML_DATE: 91 | 92 | row.append( 93 | datetime.datetime.strptime( 94 | record[header]['value'], '%Y-%m-%d+%H:%M').date()) 95 | else: 96 | row.append(record[header]['value'].strip()) 97 | else: 98 | row.append(None) 99 | rows.append(row) 100 | 101 | return pd.DataFrame(data=rows, columns=headers).fillna(value=np.NaN) 102 | -------------------------------------------------------------------------------- /pdpy/elections.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Elections data functions.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import datetime 7 | import io 8 | import pandas as pd 9 | 10 | from . import errors 11 | from . import utils 12 | 13 | # Functions ------------------------------------------------ 14 | 15 | def get_general_elections(): 16 | 17 | """Return the dates of UK general elections stme 1929 as a DataFrame. 18 | 19 | get_general_elections returns the dates of UK general elections stme 1929 20 | as a DataFrame with three columns: 21 | 22 | name -- The name of each general election as a string 23 | dissolution -- The date of dissolution as a datetime.date 24 | election -- The date of the election as a datetime.date 25 | 26 | Returns 27 | ------- 28 | out : DataFrame 29 | A pandas dataframe with data on general elections. 30 | 31 | """ 32 | 33 | election_csv = """ 34 | name, dissolution, election 35 | 1929, 1929-05-10, 1929-05-30 36 | 1931, 1931-10-07, 1931-10-27 37 | 1935, 1935-10-25, 1935-11-14 38 | 1945, 1945-06-15, 1945-07-05 39 | 1950, 1950-02-03, 1950-02-23 40 | 1951, 1951-10-05, 1951-10-25 41 | 1955, 1955-05-06, 1955-05-26 42 | 1959, 1959-09-18, 1959-10-08 43 | 1964, 1964-09-25, 1964-10-15 44 | 1966, 1966-03-10, 1966-03-31 45 | 1970, 1970-05-29, 1970-06-18 46 | 1974 (Feb), 1974-02-08, 1974-02-28 47 | 1974 (Oct), 1974-09-20, 1974-10-10 48 | 1979, 1979-04-07, 1979-05-03 49 | 1983, 1983-05-13, 1983-06-09 50 | 1987, 1987-05-18, 1987-06-11 51 | 1992, 1992-03-16, 1992-04-09 52 | 1997, 1997-04-08, 1997-05-01 53 | 2001, 2001-05-14, 2001-06-07 54 | 2005, 2005-04-11, 2005-05-05 55 | 2010, 2010-04-12, 2010-05-06 56 | 2015, 2015-03-30, 2015-05-07 57 | 2017, 2017-05-03, 2017-06-08 58 | 2019, 2019-11-06, 2019-12-12 59 | """ 60 | 61 | election_dates = pd.read_csv( 62 | io.BytesIO(bytes(election_csv, encoding='utf-8')), 63 | skipinitialspace = True) 64 | 65 | election_dates['dissolution'] = utils.convert_date_series( 66 | election_dates['dissolution']) 67 | 68 | election_dates['election'] = utils.convert_date_series( 69 | election_dates['election']) 70 | 71 | return election_dates 72 | 73 | 74 | def get_general_elections_dict(): 75 | 76 | """Return the dates of UK general elections stme 1929 as a dict. 77 | 78 | get_general_elections_dict returns a dict containing the dissolution and election 79 | dates for each general election stme 1929 as datetime.dates. Each item 80 | in the list is keyed with the election name and contains a dict of two 81 | values: one named "dissolution" containing the dissolution date and the 82 | other named "election" containing the election date. 83 | 84 | Returns 85 | ------- 86 | out : dict 87 | A dictionary containing the dissolution and election dates for each 88 | general election. 89 | 90 | """ 91 | 92 | election_dates = get_general_elections() 93 | election_dict = {} 94 | 95 | def add_row(row): 96 | election_dict[row['name']] = { 97 | 'dissolution': row['dissolution'], 98 | 'election': row['election']} 99 | 100 | election_dates.apply(add_row, axis=1) 101 | return election_dict 102 | -------------------------------------------------------------------------------- /pdpy/errors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Package errors.""" 3 | 4 | class Error(Exception): 5 | 6 | """Base class for exceptions in this module.""" 7 | pass 8 | 9 | 10 | class RequestError(Error): 11 | 12 | """Exception raised for errors with http requests. Typically these are the 13 | result of malformed SPARQL queries. 14 | 15 | Parameters 16 | ---------- 17 | response : str 18 | The text of the server reponse. 19 | 20 | """ 21 | 22 | def __init__(self, response): 23 | message = ('The server responded with the following message: ' 24 | '{0}'.format(response)) 25 | super(RequestError, self).__init__(message) 26 | self.message = message 27 | self.response = response 28 | 29 | 30 | class DateFormatError(Error): 31 | 32 | """Exception raised for errors parsing date strings. 33 | 34 | Parameters 35 | ---------- 36 | date_str : str 37 | The date string that could not be parsed. 38 | 39 | """ 40 | 41 | def __init__(self, date_str): 42 | message = ( 43 | 'Could not parse \'{0}\' as a date: ' 44 | 'use format \'YYYY-MM-DD\''.format(date_str)) 45 | super(DateFormatError, self).__init__(message) 46 | self.message = message 47 | self.date_str = date_str 48 | 49 | 50 | class MissingColumnError(Error): 51 | 52 | """Exception raised for errors handling dataframes with missing columms. 53 | 54 | Parameters 55 | ---------- 56 | colname : str 57 | The name of the column that could not be found. 58 | 59 | """ 60 | 61 | def __init__(self, colname): 62 | message = ('Could not find a column called \'{0}\''.format(colname)) 63 | super(MissingColumnError, self).__init__(message) 64 | self.message = message 65 | self.colname = colname 66 | -------------------------------------------------------------------------------- /pdpy/filter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Filter functions.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import datetime 7 | import numpy as np 8 | import pandas as pd 9 | 10 | from . import errors 11 | 12 | # Filter dates ---------------------------------------------------------------- 13 | 14 | def filter_dates(df, 15 | start_col, 16 | end_col, 17 | from_date=np.NaN, 18 | to_date=np.NaN): 19 | 20 | """Filter a dataframe of data based on the given from and to dates. 21 | 22 | filter_dates takes a dataframe which contains data on a time bound 23 | activity and returns the subset of rows where that activity took place 24 | within a given period. The dataframe must contain two columns of 25 | datetime.date objects, which record the start and end dates of an 26 | activity. The from and to dates provided are used to find all rows where 27 | some part of the period of activity took place within the period of 28 | filtering. The filtering process is tmlusive: as long as at least one day 29 | of activity falls withinthe filtering period, the row is returned. 30 | 31 | Parameters 32 | ---------- 33 | df : DataFrame 34 | A pandas dataframe containing data on a time bound activity. 35 | start_col : str 36 | The name of the column that contains the start date for the activity. 37 | end_col : str 38 | The name of the column that contains the end date for the activity. 39 | from_date : str or date or NaN, optional 40 | A string or datetime.date representing a date. If a string is used it 41 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 42 | default value is numpy.NaN, which means no records are excluded on the 43 | basis of the from_date. 44 | to_date : str or date or NaN, optional 45 | A string or datetime.date representing a date. If a string is used it 46 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 47 | default value is np.NaN, which means no records are excluded on the 48 | basis of the to_date. 49 | 50 | Returns 51 | ------- 52 | out : DataFrame 53 | A dataframe with the same structure as the input df containing the 54 | rows that meet the filtering criteria. 55 | 56 | """ 57 | 58 | # Check the start and end columns exist 59 | if start_col not in df.columns: 60 | raise errors.MissingColumnError(start_col) 61 | 62 | if end_col not in df.columns: 63 | raise errors.MissingColumnError(end_col) 64 | 65 | # Check the dataframe has rows 66 | if df.shape[0] == 0: 67 | return df 68 | 69 | # Check there are dates to filter 70 | if pd.isna(from_date) and pd.isna(to_date): 71 | return df 72 | 73 | # Handle from and to dates 74 | from_date = handle_date(from_date) 75 | to_date = handle_date(to_date) 76 | 77 | # Check from date is before to date 78 | if not pd.isna(from_date) and not pd.isna(to_date) and from_date > to_date: 79 | raise ValueError('to_date is before from_date') 80 | 81 | # Set default values 82 | from_after_end = False 83 | to_before_start = False 84 | 85 | # Get matching rows 86 | if not pd.isna(from_date): 87 | from_after_end = df[end_col].map( 88 | lambda d: False if pd.isna(d) else from_date > d) 89 | 90 | if not pd.isna(to_date): 91 | to_before_start = df[start_col].map( 92 | lambda d: False if pd.isna(d) else to_date < d) 93 | 94 | return df[~(from_after_end | to_before_start)] 95 | 96 | 97 | def handle_date(d): 98 | 99 | """Take a date which may be a string or a date and returns a date. 100 | 101 | handle_date takes a date which may be a datetime.date or an ISO 8601 date 102 | string, checks it is valid, and returns the date as a datetime.date. NaN 103 | values are returned unmodified. This function raises a DateFromatError if 104 | it is unable to handle the date. 105 | 106 | """ 107 | 108 | if pd.isna(d): 109 | return d 110 | elif type(d) == datetime.date: 111 | return d 112 | elif type(d) == str: 113 | try: 114 | return datetime.datetime.strptime(d, '%Y-%m-%d').date() 115 | except ValueError: 116 | raise errors.DateFormatError(d) 117 | else: 118 | raise TypeError( 119 | '{0} is not a valid datetime.date or date string'.format(d)) 120 | 121 | # Filter memberships ---------------------------------------------------------- 122 | 123 | def filter_memberships(tm, 124 | fm, 125 | tm_id_col, 126 | tm_start_col, 127 | tm_end_col, 128 | fm_start_col, 129 | fm_end_col, 130 | join_col): 131 | 132 | """Filter a dataframe of memberships to include only the rows whose period 133 | of membership intersects with those in another dataframe of memberships. 134 | 135 | filter_memberships is a function to find all memberships in one dataframe 136 | that intersect with those in another data frame for each person, or other 137 | entity. This function lets you find things like all committee memberships 138 | for Commons Members during the period they have served as an MP, or all 139 | government roles held by Members of the House Lords while they have served 140 | in the Lords. 141 | 142 | Parameters 143 | ---------- 144 | tm : DataFrame 145 | A pandas dataframe containing the target memberships. These are the 146 | memberships to be filtered. 147 | fm : DataFrame 148 | A pandas dataframe containing the filter memberships. These are the 149 | memberships that are used to filter the target memberships. 150 | tm_id_col : str 151 | The name of the column in the target memberships that contains the 152 | target membership id. 153 | tm_start_col : str 154 | The name of the column in target memberships that contains the start 155 | date for the membership. 156 | tm_end_col : str 157 | The name of the column in target memberships that contains the end 158 | date for the membership. 159 | fm_start_col : str 160 | The name of the column in filter memberships that contains the start 161 | date for the membership. 162 | fm_end_col : str 163 | The name of the column in filter memberships that contains the end 164 | date for the membership. 165 | join_col : str 166 | The name of the column in both the target and filter memberships that 167 | contains the id of the entity that is common to both tables. Where the 168 | entity is a person this will be the person id. 169 | 170 | Returns 171 | ------- 172 | out : DataFrame 173 | A dataframe with the same structure as the input tm containing the rows 174 | that meet the filtering criteria. 175 | 176 | """ 177 | 178 | # Check the target memberships dataframe has rows 179 | if tm.shape[0] == 0: 180 | return tm 181 | 182 | # Check the columns exist in each dataframe 183 | if tm_id_col not in tm.columns: 184 | raise errors.MissingColumnError(tm_id_col) 185 | 186 | if tm_start_col not in tm.columns: 187 | raise errors.MissingColumnError(tm_start_col) 188 | 189 | if tm_end_col not in tm.columns: 190 | raise errors.MissingColumnError(tm_end_col) 191 | 192 | if fm_start_col not in fm.columns: 193 | raise errors.MissingColumnError(fm_start_col) 194 | 195 | if fm_end_col not in fm.columns: 196 | raise errors.MissingColumnError(fm_end_col) 197 | 198 | if join_col not in fm.columns: 199 | raise errors.MissingColumnError(join_col) 200 | 201 | # Create abstract copies of tm and fm 202 | tma = tm[[join_col, tm_id_col, tm_start_col, tm_end_col]] 203 | tma.columns = ['join_col', 'tm_id_col', 'tm_start_col', 'tm_end_col'] 204 | 205 | fma = fm[[join_col, fm_start_col, fm_end_col]] 206 | fma.columns = ['join_col', 'fm_start_col', 'fm_end_col'] 207 | 208 | # Join the target memberships with the filter membership dates on join_col 209 | tm_fm = tma.merge( 210 | fma, 211 | how='left', 212 | on='join_col') 213 | 214 | # Function to test if a target membership and filter membership intersect 215 | def in_fm_func(row): 216 | 217 | # Handle dates 218 | tm_start_date = row['tm_start_col'] 219 | tm_end_date = row['tm_end_col'] 220 | fm_start_date = row['fm_start_col'] 221 | fm_end_date = row['fm_end_col'] 222 | tm_start_after_fm_end = False 223 | tm_end_before_fm_start = False 224 | 225 | # Get the match status of the rows 226 | if not pd.isna(tm_start_date): 227 | tm_start_after_fm_end = False if pd.isna(fm_end_date) \ 228 | else tm_start_date > fm_end_date 229 | 230 | if not pd.isna(tm_end_date): 231 | tm_end_before_fm_start = False if pd.isna(fm_start_date) \ 232 | else tm_end_date < fm_start_date 233 | 234 | # Return if the memberships instersect 235 | return not (tm_start_after_fm_end or tm_end_before_fm_start) 236 | 237 | # Apply the function to each combination of target and filter membership 238 | tm_fm['in_membership'] = tm_fm.apply(in_fm_func, axis=1) 239 | 240 | # Group the target/filter combinations on the id column 241 | grouped = tm_fm.groupby('tm_id_col') 242 | 243 | # Check if each target membership intersected with any filter memberships 244 | match_status = grouped[['in_membership']].any() 245 | 246 | # Restore the actual target membership id column name for joining 247 | match_status.reset_index(inplace=True) 248 | match_status.columns = [tm_id_col, 'in_membership'] 249 | 250 | # Join the match status with the original target memberships data 251 | tm_fm_status = tm.merge( 252 | match_status, 253 | how='left', 254 | on=tm_id_col) 255 | 256 | # Return the target memberships after filtering 257 | tmf = tm_fm_status[tm_fm_status['in_membership']] 258 | tmf.reset_index(drop=True, inplace=True) 259 | tmf = tmf.drop(columns=['in_membership']) 260 | return tmf 261 | -------------------------------------------------------------------------------- /pdpy/lords.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Functions for downloading and analysing data on Lords.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | from . import combine 10 | from . import constants 11 | from . import core 12 | from . import filter 13 | from . import members 14 | from . import utils 15 | 16 | # Raw Lords queries ----------------------------------------------------------- 17 | 18 | def fetch_lords_raw(): 19 | """Fetch key details for all Lords.""" 20 | return members.fetch_members_raw( 21 | house=constants.PDP_ID_HOUSE_OF_LORDS) 22 | 23 | 24 | def fetch_lords_memberships_raw(): 25 | 26 | """Fetch Lords memberships for all Lords.""" 27 | 28 | lords_memberships_query = """ 29 | PREFIX : 30 | PREFIX d: 31 | SELECT DISTINCT 32 | 33 | ?person_id 34 | ?mnis_id 35 | ?given_name 36 | ?family_name 37 | ?display_name 38 | ?seat_type_id 39 | ?seat_type_name 40 | ?seat_incumbency_id 41 | ?seat_incumbency_start_date 42 | ?seat_incumbency_end_date 43 | 44 | WHERE {{ 45 | 46 | # House constraint for the House of Lords 47 | BIND(d:{0} AS ?house) 48 | 49 | ?person_id :memberMnisId ?mnis_id; 50 | :personGivenName ?given_name ; 51 | :personFamilyName ?family_name ; 52 | ?display_name ; 53 | :memberHasParliamentaryIncumbency ?seat_incumbency_id . 54 | ?seat_incumbency_id a :SeatIncumbency ; 55 | :seatIncumbencyHasHouseSeat ?seat ; 56 | :parliamentaryIncumbencyStartDate ?seat_incumbency_start_date . 57 | OPTIONAL {{ ?seat_incumbency_id :parliamentaryIncumbencyEndDate ?seat_incumbency_end_date . }} 58 | ?seat :houseSeatHasHouse ?house ; 59 | :houseSeatHasHouseSeatType ?seat_type_id . 60 | ?seat_type_id :houseSeatTypeName ?seat_type_name . 61 | }} 62 | """.format(constants.PDP_ID_HOUSE_OF_LORDS) 63 | 64 | return core.sparql_select(lords_memberships_query) 65 | 66 | 67 | def fetch_lords_party_memberships_raw(): 68 | """Fetch party memberships for all Lords.""" 69 | return members.fetch_party_memberships_raw( 70 | house=constants.PDP_ID_HOUSE_OF_LORDS) 71 | 72 | 73 | def fetch_lords_government_roles_raw(): 74 | """Fetch government roles for all Lords.""" 75 | return members.fetch_government_roles_raw( 76 | house=constants.PDP_ID_HOUSE_OF_LORDS) 77 | 78 | 79 | def fetch_lords_opposition_roles_raw(): 80 | """Fetch opposition roles for all Lords.""" 81 | return members.fetch_opposition_roles_raw( 82 | house=constants.PDP_ID_HOUSE_OF_LORDS) 83 | 84 | 85 | def fetch_lords_committee_memberships_raw(): 86 | """Fetch committee memberships for all Lords.""" 87 | return members.fetch_committee_memberships_raw( 88 | house=constants.PDP_ID_HOUSE_OF_LORDS) 89 | 90 | # Main Lords API -------------------------------------------------------------- 91 | 92 | def fetch_lords(from_date=np.NaN, 93 | to_date=np.NaN, 94 | on_date=np.NaN): 95 | 96 | """Fetch key details for all Lords. 97 | 98 | fetch_lords fetches data from the data platform showing key details about 99 | each Lord, with one row per Lord. 100 | 101 | The from_date and to_date arguments can be used to filter the Lords 102 | returned based on the dates of their Lords memberships. The on_date 103 | argument is a convenience that sets the from_date and to_date to the same 104 | given date. The on_date has priority: if the on_date is set, the from_date 105 | and to_date are ignored. 106 | 107 | The filtering is inclusive: a Lord is returned if any part of one of their 108 | Lords membership falls within the period specified with the from and to 109 | dates. 110 | 111 | Parameters 112 | ---------- 113 | 114 | from_date : str or date or NaN, optional 115 | A string or datetime.date representing a date. If a string is used it 116 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 117 | default value is numpy.NaN, which means no records are excluded on the 118 | basis of the from_date. 119 | to_date : str or date or NaN, optional 120 | A string or datetime.date representing a date. If a string is used it 121 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 122 | default value is np.NaN, which means no records are excluded on the 123 | basis of the to_date. 124 | on_date : str or date or NaN, optional 125 | A string or datetime.date representing a date. If a string is used it 126 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 127 | default value is np.NaN, which means no records are excluded on the 128 | basis of the on_date. 129 | 130 | Returns 131 | ------- 132 | out : DataFrame 133 | A pandas dataframe of key details for each Lord, with one row per Lord. 134 | 135 | """ 136 | 137 | # Set from_date and to_date to on_date if set 138 | if not pd.isna(on_date): 139 | from_date = on_date 140 | to_date = on_date 141 | 142 | # Fetch key details 143 | lords = fetch_lords_raw() 144 | 145 | # Filter based on membership dates if requested 146 | if not pd.isna(from_date) or not pd.isna(to_date): 147 | lords_memberships = fetch_lords_memberships() 148 | mathching_memberships = filter.filter_dates( 149 | lords_memberships, 150 | start_col='seat_incumbency_start_date', 151 | end_col='seat_incumbency_end_date', 152 | from_date=from_date, 153 | to_date=to_date) 154 | lords = lords[lords['person_id'].isin( 155 | mathching_memberships['person_id'])] 156 | 157 | # Tidy up and return 158 | lords.sort_values( 159 | by=['family_name'], 160 | inplace=True) 161 | lords.reset_index(drop=True, inplace=True) 162 | return lords 163 | 164 | 165 | def fetch_lords_memberships(from_date=np.NaN, 166 | to_date=np.NaN, 167 | on_date=np.NaN): 168 | 169 | """Fetch Lords memberships for all Lords. 170 | 171 | fetch_lords_memberships fetches data from the data platform showing 172 | Lords memberships for each Lord. 173 | 174 | The from_date and to_date arguments can be used to filter the memberships 175 | returned. The on_date argument is a convenience that sets the from_date and 176 | to_date to the same given date. The on_date has priority: if the on_date is 177 | set, the from_date and to_date are ignored. 178 | 179 | The filtering is inclusive: a membership is returned if any part of it 180 | falls within the period specified with the from and to dates. 181 | 182 | Note that a membership with a NaN end date is still open. 183 | 184 | Parameters 185 | ---------- 186 | 187 | from_date : str or date or NaN, optional 188 | A string or datetime.date representing a date. If a string is used it 189 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 190 | default value is numpy.NaN, which means no records are excluded on the 191 | basis of the from_date. 192 | to_date : str or date or NaN, optional 193 | A string or datetime.date representing a date. If a string is used it 194 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 195 | default value is np.NaN, which means no records are excluded on the 196 | basis of the to_date. 197 | on_date : str or date or NaN, optional 198 | A string or datetime.date representing a date. If a string is used it 199 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 200 | default value is np.NaN, which means no records are excluded on the 201 | basis of the on_date. 202 | 203 | Returns 204 | ------- 205 | out : DataFrame 206 | A pandas dataframe of Lords memberships for each Lord, with one row 207 | per Lords membership. 208 | 209 | """ 210 | 211 | # Set from_date and to_date to on_date if set 212 | if not pd.isna(on_date): 213 | from_date = on_date 214 | to_date = on_date 215 | 216 | # Fetch the Lords memberships 217 | lords_memberships = fetch_lords_memberships_raw() 218 | 219 | # Filter on dates if requested 220 | if not pd.isna(from_date) or not pd.isna(to_date): 221 | lords_memberships = filter.filter_dates( 222 | lords_memberships, 223 | start_col='seat_incumbency_start_date', 224 | end_col='seat_incumbency_end_date', 225 | from_date=from_date, 226 | to_date=to_date) 227 | 228 | # Tidy up and return 229 | lords_memberships.sort_values( 230 | by=['family_name'], 231 | inplace=True) 232 | lords_memberships.reset_index(drop=True, inplace=True) 233 | return lords_memberships 234 | 235 | 236 | def fetch_lords_party_memberships(from_date=np.NaN, 237 | to_date=np.NaN, 238 | on_date=np.NaN, 239 | while_lord=True, 240 | collapse=False): 241 | 242 | """Fetch party memberships for all Lords. 243 | 244 | fetch_lords_party_memberships fetches data from the data platform showing 245 | party memberships for each Lord. 246 | 247 | The from_date and to_date arguments can be used to filter the memberships 248 | returned. The on_date argument is a convenience that sets the from_date and 249 | to_date to the same given date. The on_date has priority: if the on_date is 250 | set, the from_date and to_date are ignored. 251 | 252 | The while_lord argument can be used to filter the memberships to include 253 | only those that occurred during the period when each individual was a Lord. 254 | 255 | The filtering is inclusive: a membership is returned if any part 256 | of it falls within the period specified with the from and to dates. 257 | 258 | The collapse argument controls whether memberships are combined so that 259 | there is only one row for each period of continuous membership within the 260 | same party. Combining the memberships in this way means that party 261 | membership ids from the data platform are not included in the dataframe 262 | returned. 263 | 264 | Note that a membership with a NaN end date is still open. 265 | 266 | Parameters 267 | ---------- 268 | 269 | from_date : str or date or NaN, optional 270 | A string or datetime.date representing a date. If a string is used it 271 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 272 | default value is numpy.NaN, which means no records are excluded on the 273 | basis of the from_date. 274 | to_date : str or date or NaN, optional 275 | A string or datetime.date representing a date. If a string is used it 276 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 277 | default value is np.NaN, which means no records are excluded on the 278 | basis of the to_date. 279 | on_date : str or date or NaN, optional 280 | A string or datetime.date representing a date. If a string is used it 281 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 282 | default value is np.NaN, which means no records are excluded on the 283 | basis of the on_date. 284 | while_lord : bool, optional 285 | A boolean indicating whether to filter the party memberships to include 286 | only those memberships that were held while each individual was serving 287 | as a Lord. The default value is True. 288 | collapse: bool, optional 289 | Determines whether to collapse consecutive memberships within the same 290 | party into a single period of continuous party membership. Setting this 291 | to True means that party membership ids are not returned in the 292 | dataframe. The default value is False. 293 | 294 | Returns 295 | ------- 296 | out : DataFrame 297 | A pandas dataframe of party memberships for each Lord, with one row per 298 | party membership. The memberships are processed and merged so that 299 | there is only one party membership for a period of continuous 300 | membership within the same party. A membership with a NaN end date is 301 | still open. 302 | 303 | """ 304 | 305 | # Set from_date and to_date to on_date if set 306 | if not pd.isna(on_date): 307 | from_date = on_date 308 | to_date = on_date 309 | 310 | # Fetch the party memberships 311 | party_memberships = fetch_lords_party_memberships_raw() 312 | 313 | # Filter on dates if requested 314 | if not pd.isna(from_date) or not pd.isna(to_date): 315 | party_memberships = filter.filter_dates( 316 | party_memberships, 317 | start_col='party_membership_start_date', 318 | end_col='party_membership_end_date', 319 | from_date=from_date, 320 | to_date=to_date) 321 | 322 | # Filter on Lords memberships if requested 323 | if while_lord: 324 | lords_memberships = fetch_lords_memberships() 325 | party_memberships = filter.filter_memberships( 326 | tm=party_memberships, 327 | fm=lords_memberships, 328 | tm_id_col='party_membership_id', 329 | tm_start_col='party_membership_start_date', 330 | tm_end_col='party_membership_end_date', 331 | fm_start_col='seat_incumbency_start_date', 332 | fm_end_col='seat_incumbency_end_date', 333 | join_col='person_id') 334 | 335 | # Collapse consecutive memberships and return if requested 336 | if collapse: 337 | return combine.combine_party_memberships(party_memberships) 338 | 339 | # Otherwise tidy up and return 340 | party_memberships.sort_values( 341 | by=['family_name', 342 | 'party_membership_start_date'], 343 | inplace=True) 344 | party_memberships.reset_index(drop=True, inplace=True) 345 | 346 | return party_memberships 347 | 348 | 349 | def fetch_lords_government_roles(from_date=np.NaN, 350 | to_date=np.NaN, 351 | on_date=np.NaN, 352 | while_lord=True): 353 | 354 | """Fetch government roles for all Lords. 355 | 356 | fetch_lords_government_roles fetches data from the data platform showing 357 | government roles for each Lord. 358 | 359 | The from_date and to_date arguments can be used to filter the roles 360 | returned. The on_date argument is a convenience that sets the from_date and 361 | to_date to the same given date. The on_date has priority: if the on_date is 362 | set, the from_date and to_date are ignored. 363 | 364 | The while_lord argument can be used to filter the roles to include only 365 | those that occurred during the period when each individual was a Lord. 366 | 367 | The filtering is inclusive: a role is returned if any part of it falls 368 | within the period specified with the from and to dates. 369 | 370 | Note that a role with a NaN end date is still open. 371 | 372 | Parameters 373 | ---------- 374 | 375 | from_date : str or date or NaN, optional 376 | A string or datetime.date representing a date. If a string is used it 377 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 378 | default value is numpy.NaN, which means no records are excluded on the 379 | basis of the from_date. 380 | to_date : str or date or NaN, optional 381 | A string or datetime.date representing a date. If a string is used it 382 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 383 | default value is np.NaN, which means no records are excluded on the 384 | basis of the to_date. 385 | on_date : str or date or NaN, optional 386 | A string or datetime.date representing a date. If a string is used it 387 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 388 | default value is np.NaN, which means no records are excluded on the 389 | basis of the on_date. 390 | while_lord : bool, optional 391 | A boolean indicating whether to filter the government roles to include 392 | only those roles that were held while each individual was serving as a 393 | Lord. The default value is True. 394 | 395 | Returns 396 | ------- 397 | out : DataFrame 398 | A dataframe of government roles for each Lord, with one row per role. 399 | 400 | """ 401 | 402 | # Set from_date and to_date to on_date if set 403 | if not pd.isna(on_date): 404 | from_date = on_date 405 | to_date = on_date 406 | 407 | # Fetch the government roles 408 | government_roles = fetch_lords_government_roles_raw() 409 | 410 | # Filter on dates if requested 411 | if not pd.isna(from_date) or not pd.isna(to_date): 412 | government_roles = filter.filter_dates( 413 | government_roles, 414 | start_col='government_incumbency_start_date', 415 | end_col='government_incumbency_end_date', 416 | from_date=from_date, 417 | to_date=to_date) 418 | 419 | # Filter on Lords memberships if requested 420 | if while_lord: 421 | lords_memberships = fetch_lords_memberships() 422 | government_roles = filter.filter_memberships( 423 | tm=government_roles, 424 | fm=lords_memberships, 425 | tm_id_col='government_incumbency_id', 426 | tm_start_col='government_incumbency_start_date', 427 | tm_end_col='government_incumbency_end_date', 428 | fm_start_col='seat_incumbency_start_date', 429 | fm_end_col='seat_incumbency_end_date', 430 | join_col='person_id') 431 | 432 | # Tidy up and return 433 | government_roles.sort_values( 434 | by=['family_name', 435 | 'government_incumbency_start_date'], 436 | inplace=True) 437 | government_roles.reset_index(drop=True, inplace=True) 438 | return government_roles 439 | 440 | 441 | def fetch_lords_opposition_roles(from_date=np.NaN, 442 | to_date=np.NaN, 443 | on_date=np.NaN, 444 | while_lord=True): 445 | 446 | """Fetch opposition roles for all Lords. 447 | 448 | fetch_lords_opposition_roles fetches data from the data platform showing 449 | opposition roles for each Lord. 450 | 451 | The from_date and to_date arguments can be used to filter the roles 452 | returned. The on_date argument is a convenience that sets the from_date and 453 | to_date to the same given date. The on_date has priority: if the on_date is 454 | set, the from_date and to_date are ignored. 455 | 456 | The while_lord argument can be used to filter the roles to include only 457 | those that occurred during the period when each individual was a Lord. 458 | 459 | The filtering is inclusive: a role is returned if any part of it falls 460 | within the period specified with the from and to dates. 461 | 462 | Note that a role with a NaN end date is still open. 463 | 464 | Parameters 465 | ---------- 466 | 467 | from_date : str or date or NaN, optional 468 | A string or datetime.date representing a date. If a string is used it 469 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 470 | default value is numpy.NaN, which means no records are excluded on the 471 | basis of the from_date. 472 | to_date : str or date or NaN, optional 473 | A string or datetime.date representing a date. If a string is used it 474 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 475 | default value is np.NaN, which means no records are excluded on the 476 | basis of the to_date. 477 | on_date : str or date or NaN, optional 478 | A string or datetime.date representing a date. If a string is used it 479 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 480 | default value is np.NaN, which means no records are excluded on the 481 | basis of the on_date. 482 | while_lord : bool, optional 483 | A boolean indicating whether to filter the opposition roles to include 484 | only those roles that were held while each individual was serving as a 485 | Lord. The default value is True. 486 | 487 | Returns 488 | ------- 489 | out : DataFrame 490 | A dataframe of opposition roles for each Lord, with one row per role. 491 | 492 | """ 493 | 494 | # Set from_date and to_date to on_date if set 495 | if not pd.isna(on_date): 496 | from_date = on_date 497 | to_date = on_date 498 | 499 | # Fetch the opposition roles 500 | opposition_roles = fetch_lords_opposition_roles_raw() 501 | 502 | # Filter on dates if requested 503 | if not pd.isna(from_date) or not pd.isna(to_date): 504 | opposition_roles = filter.filter_dates( 505 | opposition_roles, 506 | start_col='opposition_incumbency_start_date', 507 | end_col='opposition_incumbency_end_date', 508 | from_date=from_date, 509 | to_date=to_date) 510 | 511 | # Filter on Lords memberships if requested 512 | if while_lord: 513 | lords_memberships = fetch_lords_memberships() 514 | opposition_roles = filter.filter_memberships( 515 | tm=opposition_roles, 516 | fm=lords_memberships, 517 | tm_id_col='opposition_incumbency_id', 518 | tm_start_col='opposition_incumbency_start_date', 519 | tm_end_col='opposition_incumbency_end_date', 520 | fm_start_col='seat_incumbency_start_date', 521 | fm_end_col='seat_incumbency_end_date', 522 | join_col='person_id') 523 | 524 | # Tidy up and return 525 | opposition_roles.sort_values( 526 | by=['family_name', 527 | 'opposition_incumbency_start_date'], 528 | inplace=True) 529 | opposition_roles.reset_index(drop=True, inplace=True) 530 | return opposition_roles 531 | 532 | 533 | def fetch_lords_committee_memberships(from_date=np.NaN, 534 | to_date=np.NaN, 535 | on_date=np.NaN, 536 | while_lord=True): 537 | 538 | """Fetch committee memberships for all Lords. 539 | 540 | fetch_lords_commitee_memberships fetches data from the data platform showing 541 | Parliamentary committee memberships for each Lord. 542 | 543 | The from_date, to_date arguments can be used to filter the memberships 544 | returned based on the given dates. The on_date argument is a convenience 545 | that sets the from_date and to_date to the same given date. The on_date has 546 | priority: if the on_date is set, the from_date and to_date are ignored. 547 | 548 | The while_lord argument can be used to filter the memberships to include 549 | only those that occurred during the period when each individual was a Lord. 550 | 551 | The filtering is inclusive: a membership is returned if any part of it 552 | falls within the period specified with the from and to dates. 553 | 554 | Note that a membership with a NaN end date is still open. 555 | 556 | Parameters 557 | ---------- 558 | 559 | from_date : str or date or NaN, optional 560 | A string or datetime.date representing a date. If a string is used it 561 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 562 | default value is numpy.NaN, which means no records are excluded on the 563 | basis of the from_date. 564 | to_date : str or date or NaN, optional 565 | A string or datetime.date representing a date. If a string is used it 566 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 567 | default value is np.NaN, which means no records are excluded on the 568 | basis of the to_date. 569 | on_date : str or date or NaN, optional 570 | A string or datetime.date representing a date. If a string is used it 571 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 572 | default value is np.NaN, which means no records are excluded on the 573 | basis of the on_date. 574 | while_lord : bool, optional 575 | A boolean indicating whether to filter the committee memberships to 576 | include only those memberships that were held while each individual was 577 | serving as a Lord. The default value is True. 578 | 579 | Returns 580 | ------- 581 | out : DataFrame 582 | A dataframe of committee memberships for each Lord, with one row per 583 | membership. 584 | 585 | """ 586 | 587 | # Set from_date and to_date to on_date if set 588 | if not pd.isna(on_date): 589 | from_date = on_date 590 | to_date = on_date 591 | 592 | # Fetch the committee memberships 593 | committee_memberships = fetch_lords_committee_memberships_raw() 594 | 595 | # Filter on dates if requested 596 | if not pd.isna(from_date) or not pd.isna(to_date): 597 | committee_memberships = filter.filter_dates( 598 | committee_memberships, 599 | start_col='committee_membership_start_date', 600 | end_col='committee_membership_end_date', 601 | from_date=from_date, 602 | to_date=to_date) 603 | 604 | # Filter on Lords memberships if requested 605 | if while_lord: 606 | lords_memberships = fetch_lords_memberships() 607 | committee_memberships = filter.filter_memberships( 608 | tm=committee_memberships, 609 | fm=lords_memberships, 610 | tm_id_col='committee_membership_id', 611 | tm_start_col='committee_membership_start_date', 612 | tm_end_col='committee_membership_end_date', 613 | fm_start_col='seat_incumbency_start_date', 614 | fm_end_col='seat_incumbency_end_date', 615 | join_col='person_id') 616 | 617 | # Tidy up and return 618 | committee_memberships.sort_values( 619 | by=['family_name', 620 | 'committee_membership_start_date'], 621 | inplace=True) 622 | committee_memberships.reset_index(drop=True, inplace=True) 623 | return committee_memberships 624 | -------------------------------------------------------------------------------- /pdpy/members.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Functions for downloading and analysing data on Members of either House.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | from . import constants 7 | from . import core 8 | 9 | # Raw Members queries --------------------------------------------------------- 10 | 11 | def fetch_members_raw(house=None): 12 | 13 | """Fetch key details for Members.""" 14 | 15 | # Initialise house constraint 16 | house_constraint = '' 17 | 18 | # If a house is specified set the house constraint 19 | if house == constants.PDP_ID_HOUSE_OF_COMMONS or \ 20 | house == constants.PDP_ID_HOUSE_OF_LORDS: 21 | house_constraint = 'BIND(d:{0} AS ?house)'.format(house) 22 | 23 | # Build the query 24 | members_query = """ 25 | PREFIX : 26 | PREFIX d: 27 | SELECT DISTINCT 28 | 29 | ?person_id 30 | ?mnis_id 31 | ?given_name 32 | ?family_name 33 | ?other_names 34 | ?display_name 35 | ?full_title 36 | ?gender 37 | ?date_of_birth 38 | ?date_of_death 39 | 40 | WHERE {{ 41 | 42 | # House constraint 43 | {0} 44 | 45 | ?person_id :memberMnisId ?mnis_id ; 46 | :personGivenName ?given_name ; 47 | :personFamilyName ?family_name ; 48 | ?display_name ; 49 | ?full_title ; 50 | :personHasGenderIdentity/:genderIdentityHasGender/:genderName ?gender ; 51 | :memberHasParliamentaryIncumbency/:seatIncumbencyHasHouseSeat/:houseSeatHasHouse ?house . 52 | OPTIONAL {{ ?person_id :personOtherNames ?other_names . }} 53 | OPTIONAL {{ ?person_id :personDateOfBirth ?date_of_birth . }} 54 | OPTIONAL {{ ?person_id :personDateOfDeath ?date_of_death . }} 55 | }} 56 | """.format(house_constraint) 57 | 58 | return core.sparql_select(members_query) 59 | 60 | 61 | def fetch_party_memberships_raw(house=None): 62 | 63 | """Fetch party memberships for Members.""" 64 | 65 | # Initialise house constraint 66 | house_constraint = '' 67 | 68 | # If a house is specified set the house constraint 69 | if house == constants.PDP_ID_HOUSE_OF_COMMONS or \ 70 | house == constants.PDP_ID_HOUSE_OF_LORDS: 71 | house_constraint = 'BIND(d:{0} AS ?house)'.format(house) 72 | 73 | party_memberships_query = """ 74 | PREFIX : 75 | PREFIX d: 76 | SELECT DISTINCT 77 | 78 | ?person_id 79 | ?mnis_id 80 | ?given_name 81 | ?family_name 82 | ?display_name 83 | ?party_id 84 | ?party_mnis_id 85 | ?party_name 86 | ?party_membership_id 87 | ?party_membership_start_date 88 | ?party_membership_end_date 89 | 90 | WHERE {{ 91 | 92 | # House constraint 93 | {0} 94 | 95 | ?person_id :memberMnisId ?mnis_id; 96 | :personGivenName ?given_name ; 97 | :personFamilyName ?family_name ; 98 | ?display_name ; 99 | :partyMemberHasPartyMembership ?party_membership_id ; 100 | :memberHasParliamentaryIncumbency/:seatIncumbencyHasHouseSeat/:houseSeatHasHouse ?house . 101 | ?party_membership_id a :PartyMembership ; 102 | :partyMembershipHasParty ?party_id ; 103 | :partyMembershipStartDate ?party_membership_start_date . 104 | OPTIONAL {{ ?party_membership_id :partyMembershipEndDate ?party_membership_end_date . }} 105 | ?party_id :partyMnisId ?party_mnis_id ; 106 | :partyName ?party_name . 107 | }} 108 | """.format(house_constraint) 109 | 110 | return core.sparql_select(party_memberships_query) 111 | 112 | 113 | def fetch_government_roles_raw(house=None): 114 | 115 | """Fetch government roles for Members.""" 116 | 117 | # Initialise house constraint 118 | house_constraint = '' 119 | 120 | # If a house is specified set the house constraint 121 | if house == constants.PDP_ID_HOUSE_OF_COMMONS or \ 122 | house == constants.PDP_ID_HOUSE_OF_LORDS: 123 | house_constraint = 'BIND(d:{0} AS ?house)'.format(house) 124 | 125 | government_roles_query = """ 126 | PREFIX : 127 | PREFIX d: 128 | SELECT DISTINCT 129 | 130 | ?person_id 131 | ?mnis_id 132 | ?given_name 133 | ?family_name 134 | ?display_name 135 | ?position_id 136 | ?position_name 137 | ?government_incumbency_id 138 | ?government_incumbency_start_date 139 | ?government_incumbency_end_date 140 | 141 | WHERE {{ 142 | 143 | # House constraint 144 | {0} 145 | 146 | ?person_id :memberMnisId ?mnis_id; 147 | :personGivenName ?given_name ; 148 | :personFamilyName ?family_name ; 149 | ?display_name ; 150 | :governmentPersonHasGovernmentIncumbency ?government_incumbency_id ; 151 | :memberHasParliamentaryIncumbency/:seatIncumbencyHasHouseSeat/:houseSeatHasHouse ?house . 152 | ?government_incumbency_id a :GovernmentIncumbency ; 153 | :governmentIncumbencyHasGovernmentPosition ?position_id ; 154 | :incumbencyStartDate ?government_incumbency_start_date . 155 | OPTIONAL {{ ?government_incumbency_id :incumbencyEndDate ?government_incumbency_end_date . }} 156 | ?position_id :positionName ?position_name . 157 | }} 158 | """.format(house_constraint) 159 | 160 | return core.sparql_select(government_roles_query) 161 | 162 | 163 | def fetch_opposition_roles_raw(house=None): 164 | 165 | """Fetch opposition roles for Members.""" 166 | 167 | # Initialise house constraint 168 | house_constraint = '' 169 | 170 | # If a house is specified set the house constraint 171 | if house == constants.PDP_ID_HOUSE_OF_COMMONS or \ 172 | house == constants.PDP_ID_HOUSE_OF_LORDS: 173 | house_constraint = 'BIND(d:{0} AS ?house)'.format(house) 174 | 175 | opposition_roles_query = """ 176 | PREFIX : 177 | PREFIX d: 178 | SELECT DISTINCT 179 | 180 | ?person_id 181 | ?mnis_id 182 | ?given_name 183 | ?family_name 184 | ?display_name 185 | ?position_id 186 | ?position_name 187 | ?opposition_incumbency_id 188 | ?opposition_incumbency_start_date 189 | ?opposition_incumbency_end_date 190 | 191 | WHERE {{ 192 | 193 | # House constraint 194 | {0} 195 | 196 | ?person_id :memberMnisId ?mnis_id; 197 | :personGivenName ?given_name ; 198 | :personFamilyName ?family_name ; 199 | ?display_name ; 200 | :oppositionPersonHasOppositionIncumbency ?opposition_incumbency_id ; 201 | :memberHasParliamentaryIncumbency/:seatIncumbencyHasHouseSeat/:houseSeatHasHouse ?house . 202 | ?opposition_incumbency_id a :OppositionIncumbency ; 203 | :oppositionIncumbencyHasOppositionPosition ?position_id ; 204 | :incumbencyStartDate ?opposition_incumbency_start_date . 205 | OPTIONAL {{ ?opposition_incumbency_id :incumbencyEndDate ?opposition_incumbency_end_date . }} 206 | ?position_id :positionName ?position_name . 207 | }} 208 | """.format(house_constraint) 209 | 210 | return core.sparql_select(opposition_roles_query) 211 | 212 | 213 | def fetch_committee_memberships_raw(house=None): 214 | 215 | """Fetch committee memberships for Members.""" 216 | 217 | # Initialise house constraint 218 | house_constraint = '' 219 | 220 | # If a house is specified set the house constraint 221 | if house == constants.PDP_ID_HOUSE_OF_COMMONS or \ 222 | house == constants.PDP_ID_HOUSE_OF_LORDS: 223 | house_constraint = 'BIND(d:{0} AS ?house)'.format(house) 224 | 225 | committee_memberships_query = """ 226 | PREFIX : 227 | PREFIX d: 228 | SELECT DISTINCT 229 | 230 | ?person_id 231 | ?mnis_id 232 | ?given_name 233 | ?family_name 234 | ?display_name 235 | ?committee_id 236 | ?committee_name 237 | ?committee_type_id 238 | ?committee_type_name 239 | ?committee_membership_id 240 | ?committee_membership_start_date 241 | ?committee_membership_end_date 242 | 243 | WHERE {{ 244 | 245 | # House constraint 246 | {0} 247 | 248 | ?person_id :memberMnisId ?mnis_id; 249 | :personGivenName ?given_name ; 250 | :personFamilyName ?family_name ; 251 | ?display_name ; 252 | :personHasFormalBodyMembership ?committee_membership_id ; 253 | :memberHasParliamentaryIncumbency/:seatIncumbencyHasHouseSeat/:houseSeatHasHouse ?house . 254 | ?committee_membership_id :formalBodyMembershipHasFormalBody ?committee_id ; 255 | :formalBodyMembershipStartDate ?committee_membership_start_date . 256 | OPTIONAL {{ ?committee_membership_id :formalBodyMembershipEndDate ?committee_membership_end_date . }} 257 | ?committee_id a :FormalBody ; 258 | :formalBodyName ?committee_name . 259 | OPTIONAL {{ 260 | ?committee_id :formalBodyHasFormalBodyType ?committee_type_id ; 261 | :formalBodyHasFormalBodyType/:formalBodyTypeName ?committee_type_name . 262 | }} 263 | }} 264 | """.format(house_constraint) 265 | 266 | return core.sparql_select(committee_memberships_query) 267 | -------------------------------------------------------------------------------- /pdpy/mps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Functions for downloading and analysing data on MPs.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | from . import combine 10 | from . import constants 11 | from . import core 12 | from . import elections 13 | from . import filter 14 | from . import members 15 | from . import utils 16 | 17 | # Raw MPs queries ------------------------------------------------------------- 18 | 19 | def fetch_mps_raw(): 20 | """Fetch key details for all MPs.""" 21 | return members.fetch_members_raw( 22 | house=constants.PDP_ID_HOUSE_OF_COMMONS) 23 | 24 | 25 | def fetch_commons_memberships_raw(): 26 | 27 | """Fetch Commons memberships for all MPs.""" 28 | 29 | commons_memberships_query = """ 30 | PREFIX : 31 | PREFIX d: 32 | SELECT DISTINCT 33 | 34 | ?person_id 35 | ?mnis_id 36 | ?given_name 37 | ?family_name 38 | ?display_name 39 | ?constituency_id 40 | ?constituency_name 41 | ?constituency_ons_id 42 | ?seat_incumbency_id 43 | ?seat_incumbency_start_date 44 | ?seat_incumbency_end_date 45 | 46 | WHERE {{ 47 | 48 | # House constraint for the House of Commons 49 | BIND(d:{0} AS ?house) 50 | 51 | ?person_id :memberMnisId ?mnis_id; 52 | :personGivenName ?given_name ; 53 | :personFamilyName ?family_name ; 54 | ?display_name ; 55 | :memberHasParliamentaryIncumbency ?seat_incumbency_id . 56 | ?seat_incumbency_id a :SeatIncumbency ; 57 | :seatIncumbencyHasHouseSeat ?seat ; 58 | :parliamentaryIncumbencyStartDate ?seat_incumbency_start_date . 59 | OPTIONAL {{ ?seat_incumbency_id :parliamentaryIncumbencyEndDate ?seat_incumbency_end_date . }} 60 | ?seat :houseSeatHasHouse ?house ; 61 | :houseSeatHasConstituencyGroup ?constituency_id . 62 | ?constituency_id :constituencyGroupName ?constituency_name ; 63 | :constituencyGroupStartDate ?constituencyStartDate . 64 | OPTIONAL {{ ?constituency_id :constituencyGroupOnsCode ?constituency_ons_id . }} 65 | }} 66 | """.format(constants.PDP_ID_HOUSE_OF_COMMONS) 67 | 68 | return core.sparql_select(commons_memberships_query) 69 | 70 | 71 | def fetch_mps_party_memberships_raw(): 72 | """Fetch party memberships for all MPs.""" 73 | return members.fetch_party_memberships_raw( 74 | house=constants.PDP_ID_HOUSE_OF_COMMONS) 75 | 76 | 77 | def fetch_mps_government_roles_raw(): 78 | """Fetch government roles for all MPs.""" 79 | return members.fetch_government_roles_raw( 80 | house=constants.PDP_ID_HOUSE_OF_COMMONS) 81 | 82 | 83 | def fetch_mps_opposition_roles_raw(): 84 | """Fetch opposition roles for all MPs.""" 85 | return members.fetch_opposition_roles_raw( 86 | house=constants.PDP_ID_HOUSE_OF_COMMONS) 87 | 88 | 89 | def fetch_mps_committee_memberships_raw(): 90 | """Fetch committee memberships for all MPs.""" 91 | return members.fetch_committee_memberships_raw( 92 | house=constants.PDP_ID_HOUSE_OF_COMMONS) 93 | 94 | # Main MPs API ---------------------------------------------------------------- 95 | 96 | def fetch_mps(from_date=np.NaN, 97 | to_date=np.NaN, 98 | on_date=np.NaN): 99 | 100 | """Fetch key details for all MPs. 101 | 102 | fetch_mps fetches data from the data platform showing key details about 103 | each MP, with one row per MP. 104 | 105 | The from_date and to_date arguments can be used to filter the MPs returned 106 | based on the dates of their Commons memberships. The on_date argument is a 107 | convenience that sets the from_date and to_date to the same given date. The 108 | on_date has priority: if the on_date is set, the from_date and to_date are 109 | ignored. 110 | 111 | The filtering is inclusive: an MP is returned if any part of one of their 112 | Commons memberships falls within the period specified with the from and to 113 | dates. 114 | 115 | Parameters 116 | ---------- 117 | 118 | from_date : str or date or NaN, optional 119 | A string or datetime.date representing a date. If a string is used it 120 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 121 | default value is numpy.NaN, which means no records are excluded on the 122 | basis of the from_date. 123 | to_date : str or date or NaN, optional 124 | A string or datetime.date representing a date. If a string is used it 125 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 126 | default value is np.NaN, which means no records are excluded on the 127 | basis of the to_date. 128 | on_date : str or date or NaN, optional 129 | A string or datetime.date representing a date. If a string is used it 130 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 131 | default value is np.NaN, which means no records are excluded on the 132 | basis of the on_date. 133 | 134 | Returns 135 | ------- 136 | out : DataFrame 137 | A pandas dataframe of key details for each MP, with one row per MP. 138 | 139 | """ 140 | 141 | # Set from_date and to_date to on_date if set 142 | if not pd.isna(on_date): 143 | from_date = on_date 144 | to_date = on_date 145 | 146 | # Fetch key details 147 | mps = fetch_mps_raw() 148 | 149 | # Filter based on membership dates if requested 150 | if not pd.isna(from_date) or not pd.isna(to_date): 151 | commons_memberships = fetch_commons_memberships() 152 | matching_memberships = filter.filter_dates( 153 | commons_memberships, 154 | start_col='seat_incumbency_start_date', 155 | end_col='seat_incumbency_end_date', 156 | from_date=from_date, 157 | to_date=to_date) 158 | mps = mps[mps['person_id'].isin(matching_memberships['person_id'])] 159 | 160 | # Tidy up and return 161 | mps.sort_values( 162 | by=['family_name'], 163 | inplace=True) 164 | mps.reset_index(drop=True, inplace=True) 165 | return mps 166 | 167 | 168 | def fetch_commons_memberships(from_date=np.NaN, 169 | to_date=np.NaN, 170 | on_date=np.NaN): 171 | 172 | """Fetch Commons memberships for all MPs. 173 | 174 | fetch_commons_memberships fetches data from the data platform showing 175 | Commons memberships for each MP. The memberships are processed to impose 176 | consistent rules on the start and end dates for memberships. 177 | 178 | The from_date and to_date arguments can be used to filter the memberships 179 | returned. The on_date argument is a convenience that sets the from_date and 180 | to_date to the same given date. The on_date has priority: if the on_date is 181 | set, the from_date and to_date are ignored. 182 | 183 | The filtering is inclusive: a membership is returned if any part 184 | of it falls within the period specified with the from and to dates. 185 | 186 | Note that a membership with a NaN end date is still open. 187 | 188 | Parameters 189 | ---------- 190 | 191 | from_date : str or date or NaN, optional 192 | A string or datetime.date representing a date. If a string is used it 193 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 194 | default value is numpy.NaN, which means no records are excluded on the 195 | basis of the from_date. 196 | to_date : str or date or NaN, optional 197 | A string or datetime.date representing a date. If a string is used it 198 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 199 | default value is np.NaN, which means no records are excluded on the 200 | basis of the to_date. 201 | on_date : str or date or NaN, optional 202 | A string or datetime.date representing a date. If a string is used it 203 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 204 | default value is np.NaN, which means no records are excluded on the 205 | basis of the on_date. 206 | 207 | Returns 208 | ------- 209 | out : DataFrame 210 | A pandas dataframe of Commons memberships for each MP, with one row 211 | per Commons membership. 212 | 213 | """ 214 | 215 | # Set from_date and to_date to on_date if set 216 | if not pd.isna(on_date): 217 | from_date = on_date 218 | to_date = on_date 219 | 220 | # Fetch the Commons memberships 221 | commons_memberships = fetch_commons_memberships_raw() 222 | 223 | # Get elections and fix the end dates of memberships 224 | end_dates = commons_memberships['seat_incumbency_end_date'].values 225 | 226 | general_elections = elections.get_general_elections().values 227 | general_elections_count = len(general_elections) 228 | 229 | # If the end date for a membership falls after dissolution adjust it 230 | for i in range(len(end_dates)): 231 | 232 | date = end_dates[i] 233 | if pd.isna(date): continue 234 | 235 | for j in range(general_elections_count): 236 | 237 | dissolution = general_elections[j, 1] 238 | election = general_elections[j, 2] 239 | 240 | if date > dissolution and date <= election: 241 | end_dates[i] = dissolution 242 | continue 243 | 244 | commons_memberships['seat_incumbency_end_date'] = end_dates 245 | 246 | # Filter on dates if requested 247 | if not pd.isna(from_date) or not pd.isna(to_date): 248 | commons_memberships = filter.filter_dates( 249 | commons_memberships, 250 | start_col='seat_incumbency_start_date', 251 | end_col='seat_incumbency_end_date', 252 | from_date=from_date, 253 | to_date=to_date) 254 | 255 | # Tidy up and return 256 | commons_memberships.sort_values( 257 | by=['family_name', 258 | 'seat_incumbency_start_date'], 259 | inplace=True) 260 | commons_memberships.reset_index(drop=True, inplace=True) 261 | return commons_memberships 262 | 263 | 264 | def fetch_mps_party_memberships(from_date=np.NaN, 265 | to_date=np.NaN, 266 | on_date=np.NaN, 267 | while_mp=True, 268 | collapse=False): 269 | 270 | """Fetch party memberships for all MPs. 271 | 272 | fetch_mps_party_memberships fetches data from the data platform showing 273 | party memberships for each MP. 274 | 275 | The from_date and to_date arguments can be used to filter the memberships 276 | returned. The on_date argument is a convenience that sets the from_date and 277 | to_date to the same given date. The on_date has priority: if the on_date is 278 | set, the from_date and to_date are ignored. 279 | 280 | The while_mp argument can be used to filter the memberships to include only 281 | those that occurred during the period when each individual was an MP. 282 | 283 | The filtering is inclusive: a membership is returned if any part 284 | of it falls within the period specified with the from and to dates. 285 | 286 | The collapse argument controls whether memberships are combined so that 287 | there is only one row for each period of continuous membership within the 288 | same party. Combining the memberships in this way means that party 289 | membership ids from the data platform are not included in the dataframe 290 | returned. 291 | 292 | Note that a membership with a NaN end date is still open. 293 | 294 | Parameters 295 | ---------- 296 | 297 | from_date : str or date or NaN, optional 298 | A string or datetime.date representing a date. If a string is used it 299 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 300 | default value is numpy.NaN, which means no records are excluded on the 301 | basis of the from_date. 302 | to_date : str or date or NaN, optional 303 | A string or datetime.date representing a date. If a string is used it 304 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 305 | default value is np.NaN, which means no records are excluded on the 306 | basis of the to_date. 307 | on_date : str or date or NaN, optional 308 | A string or datetime.date representing a date. If a string is used it 309 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 310 | default value is np.NaN, which means no records are excluded on the 311 | basis of the on_date. 312 | while_mp : bool, optional 313 | A boolean indicating whether to filter the party memberships to include 314 | only those memberships that were held while each individual was serving 315 | as an MP. The default value is True. 316 | collapse: bool, optional 317 | Determines whether to collapse consecutive memberships within the same 318 | party into a single period of continuous party membership. Setting this 319 | to True means that party membership ids are not returned in the 320 | dataframe. The default value is False. 321 | 322 | Returns 323 | ------- 324 | out : DataFrame 325 | A pandas dataframe of party memberships for each MP, with one row per 326 | party membership. The memberships are processed and merged so that 327 | there is only one party membership for a period of continuous 328 | membership within the same party. A membership with a NaN end date is 329 | still open. 330 | 331 | """ 332 | 333 | # Set from_date and to_date to on_date if set 334 | if not pd.isna(on_date): 335 | from_date = on_date 336 | to_date = on_date 337 | 338 | # Fetch the party memberships 339 | party_memberships = fetch_mps_party_memberships_raw() 340 | 341 | # Filter on dates if requested 342 | if not pd.isna(from_date) or not pd.isna(to_date): 343 | party_memberships = filter.filter_dates( 344 | party_memberships, 345 | start_col='party_membership_start_date', 346 | end_col='party_membership_end_date', 347 | from_date=from_date, 348 | to_date=to_date) 349 | 350 | # Filter on Commons memberships if requested 351 | if while_mp: 352 | commons_memberships = fetch_commons_memberships() 353 | party_memberships = filter.filter_memberships( 354 | tm=party_memberships, 355 | fm=commons_memberships, 356 | tm_id_col='party_membership_id', 357 | tm_start_col='party_membership_start_date', 358 | tm_end_col='party_membership_end_date', 359 | fm_start_col='seat_incumbency_start_date', 360 | fm_end_col='seat_incumbency_end_date', 361 | join_col='person_id') 362 | 363 | # Collapse consecutive memberships and return if requested 364 | if collapse: 365 | return combine.combine_party_memberships(party_memberships) 366 | 367 | # Otherwise tidy up and return 368 | party_memberships.sort_values( 369 | by=['family_name', 370 | 'party_membership_start_date'], 371 | inplace=True) 372 | party_memberships.reset_index(drop=True, inplace=True) 373 | 374 | return party_memberships 375 | 376 | 377 | def fetch_mps_government_roles(from_date=np.NaN, 378 | to_date=np.NaN, 379 | on_date=np.NaN, 380 | while_mp=True): 381 | 382 | """Fetch government roles for all MPs. 383 | 384 | fetch_mps_government_roles fetches data from the data platform showing 385 | government roles for each MP. 386 | 387 | The from_date and to_date arguments can be used to filter the roles 388 | returned. The on_date argument is a convenience that sets the from_date and 389 | to_date to the same given date. The on_date has priority: if the on_date is 390 | set, the from_date and to_date are ignored. 391 | 392 | The while_mp argument can be used to filter the roles to include only those 393 | that occurred during the period when each individual was an MP. 394 | 395 | The filtering is inclusive: a role is returned if any part of it falls 396 | within the period specified with the from and to dates. 397 | 398 | Note that a role with a NaN end date is still open. 399 | 400 | Parameters 401 | ---------- 402 | 403 | from_date : str or date or NaN, optional 404 | A string or datetime.date representing a date. If a string is used it 405 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 406 | default value is numpy.NaN, which means no records are excluded on the 407 | basis of the from_date. 408 | to_date : str or date or NaN, optional 409 | A string or datetime.date representing a date. If a string is used it 410 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 411 | default value is np.NaN, which means no records are excluded on the 412 | basis of the to_date. 413 | on_date : str or date or NaN, optional 414 | A string or datetime.date representing a date. If a string is used it 415 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 416 | default value is np.NaN, which means no records are excluded on the 417 | basis of the on_date. 418 | while_mp : bool, optional 419 | A boolean indicating whether to filter the government roles to include 420 | only those roles that were held while each individual was serving as an 421 | MP. The default value is True. 422 | 423 | Returns 424 | ------- 425 | out : DataFrame 426 | A dataframe of government roles for each MP, with one row per role. 427 | 428 | """ 429 | 430 | # Set from_date and to_date to on_date if set 431 | if not pd.isna(on_date): 432 | from_date = on_date 433 | to_date = on_date 434 | 435 | # Fetch the government roles 436 | government_roles = fetch_mps_government_roles_raw() 437 | 438 | # Filter on dates if requested 439 | if not pd.isna(from_date) or not pd.isna(to_date): 440 | government_roles = filter.filter_dates( 441 | government_roles, 442 | start_col='government_incumbency_start_date', 443 | end_col='government_incumbency_end_date', 444 | from_date=from_date, 445 | to_date=to_date) 446 | 447 | # Filter on Commons memberships if requested 448 | if while_mp: 449 | commons_memberships = fetch_commons_memberships() 450 | government_roles = filter.filter_memberships( 451 | tm=government_roles, 452 | fm=commons_memberships, 453 | tm_id_col='government_incumbency_id', 454 | tm_start_col='government_incumbency_start_date', 455 | tm_end_col='government_incumbency_end_date', 456 | fm_start_col='seat_incumbency_start_date', 457 | fm_end_col='seat_incumbency_end_date', 458 | join_col='person_id') 459 | 460 | # Tidy up and return 461 | government_roles.sort_values( 462 | by=['family_name', 463 | 'government_incumbency_start_date'], 464 | inplace=True) 465 | government_roles.reset_index(drop=True, inplace=True) 466 | return government_roles 467 | 468 | 469 | def fetch_mps_opposition_roles(from_date=np.NaN, 470 | to_date=np.NaN, 471 | on_date=np.NaN, 472 | while_mp=True): 473 | 474 | """Fetch opposition roles for all MPs. 475 | 476 | fetch_mps_opposition_roles fetches data from the data platform showing 477 | opposition roles for each MP. 478 | 479 | The from_date and to_date arguments can be used to filter the roles 480 | returned. The on_date argument is a convenience that sets the from_date and 481 | to_date to the same given date. The on_date has priority: if the on_date is 482 | set, the from_date and to_date are ignored. 483 | 484 | The while_mp argument can be used to filter the roles to include only those 485 | that occurred during the period when each individual was an MP. 486 | 487 | The filtering is inclusive: a role is returned if any part of it falls 488 | within the period specified with the from and to dates. 489 | 490 | Note that a role with a NaN end date is still open. 491 | 492 | Parameters 493 | ---------- 494 | 495 | from_date : str or date or NaN, optional 496 | A string or datetime.date representing a date. If a string is used it 497 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 498 | default value is numpy.NaN, which means no records are excluded on the 499 | basis of the from_date. 500 | to_date : str or date or NaN, optional 501 | A string or datetime.date representing a date. If a string is used it 502 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 503 | default value is np.NaN, which means no records are excluded on the 504 | basis of the to_date. 505 | on_date : str or date or NaN, optional 506 | A string or datetime.date representing a date. If a string is used it 507 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 508 | default value is np.NaN, which means no records are excluded on the 509 | basis of the on_date. 510 | while_mp : bool, optional 511 | A boolean indicating whether to filter the opposition roles to include 512 | only those roles that were held while each individual was serving as an 513 | MP. The default value is True. 514 | 515 | Returns 516 | ------- 517 | out : DataFrame 518 | A dataframe of opposition roles for each MP, with one row per role. 519 | 520 | """ 521 | 522 | # Set from_date and to_date to on_date if set 523 | if not pd.isna(on_date): 524 | from_date = on_date 525 | to_date = on_date 526 | 527 | # Fetch the opposition roles 528 | opposition_roles = fetch_mps_opposition_roles_raw() 529 | 530 | # Filter on dates if requested 531 | if not pd.isna(from_date) or not pd.isna(to_date): 532 | opposition_roles = filter.filter_dates( 533 | opposition_roles, 534 | start_col='opposition_incumbency_start_date', 535 | end_col='opposition_incumbency_end_date', 536 | from_date=from_date, 537 | to_date=to_date) 538 | 539 | # Filter on Commons memberships if requested 540 | if while_mp: 541 | commons_memberships = fetch_commons_memberships() 542 | opposition_roles = filter.filter_memberships( 543 | tm=opposition_roles, 544 | fm=commons_memberships, 545 | tm_id_col='opposition_incumbency_id', 546 | tm_start_col='opposition_incumbency_start_date', 547 | tm_end_col='opposition_incumbency_end_date', 548 | fm_start_col='seat_incumbency_start_date', 549 | fm_end_col='seat_incumbency_end_date', 550 | join_col='person_id') 551 | 552 | # Tidy up and return 553 | opposition_roles.sort_values( 554 | by=['family_name', 555 | 'opposition_incumbency_start_date'], 556 | inplace=True) 557 | opposition_roles.reset_index(drop=True, inplace=True) 558 | return opposition_roles 559 | 560 | 561 | def fetch_mps_committee_memberships(from_date=np.NaN, 562 | to_date=np.NaN, 563 | on_date=np.NaN, 564 | while_mp=True): 565 | 566 | """Fetch committee memberships for all MPs. 567 | 568 | fetch_mps_commitee_memberships fetches data from the data platform showing 569 | Parliamentary committee memberships for each MP. 570 | 571 | The from_date, to_date arguments can be used to filter the memberships 572 | returned based on the given dates. The on_date argument is a convenience 573 | that sets the from_date and to_date to the same given date. The on_date has 574 | priority: if the on_date is set, the from_date and to_date are ignored. 575 | 576 | The while_mp argument can be used to filter the memberships to include only 577 | those that occurred during the period when each individual was an MP. 578 | 579 | The filtering is inclusive: a membership is returned if any part of it 580 | falls within the period specified with the from and to dates. 581 | 582 | Note that a membership with a NaN end date is still open. 583 | 584 | Parameters 585 | ---------- 586 | 587 | from_date : str or date or NaN, optional 588 | A string or datetime.date representing a date. If a string is used it 589 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 590 | default value is numpy.NaN, which means no records are excluded on the 591 | basis of the from_date. 592 | to_date : str or date or NaN, optional 593 | A string or datetime.date representing a date. If a string is used it 594 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 595 | default value is np.NaN, which means no records are excluded on the 596 | basis of the to_date. 597 | on_date : str or date or NaN, optional 598 | A string or datetime.date representing a date. If a string is used it 599 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The 600 | default value is np.NaN, which means no records are excluded on the 601 | basis of the on_date. 602 | while_mp : bool, optional 603 | A boolean indicating whether to filter the committee memberships to 604 | include only those memberships that were held while each individual was 605 | serving as an MP. The default value is True. 606 | 607 | Returns 608 | ------- 609 | out : DataFrame 610 | A dataframe of committee memberships for each MP, with one row per 611 | membership. 612 | 613 | """ 614 | 615 | # Set from_date and to_date to on_date if set 616 | if not pd.isna(on_date): 617 | from_date = on_date 618 | to_date = on_date 619 | 620 | # Fetch the committee memberships 621 | committee_memberships = fetch_mps_committee_memberships_raw() 622 | 623 | # Filter on dates if requested 624 | if not pd.isna(from_date) or not pd.isna(to_date): 625 | committee_memberships = filter.filter_dates( 626 | committee_memberships, 627 | start_col='committee_membership_start_date', 628 | end_col='committee_membership_end_date', 629 | from_date=from_date, 630 | to_date=to_date) 631 | 632 | # Filter on Commons memberships if requested 633 | if while_mp: 634 | commons_memberships = fetch_commons_memberships() 635 | committee_memberships = filter.filter_memberships( 636 | tm=committee_memberships, 637 | fm=commons_memberships, 638 | tm_id_col='committee_membership_id', 639 | tm_start_col='committee_membership_start_date', 640 | tm_end_col='committee_membership_end_date', 641 | fm_start_col='seat_incumbency_start_date', 642 | fm_end_col='seat_incumbency_end_date', 643 | join_col='person_id') 644 | 645 | # Tidy up and return 646 | committee_memberships.sort_values( 647 | by=['family_name', 648 | 'committee_membership_start_date'], 649 | inplace=True) 650 | committee_memberships.reset_index(drop=True, inplace=True) 651 | return committee_memberships 652 | -------------------------------------------------------------------------------- /pdpy/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """User configurable package settings.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | from . import constants 7 | 8 | # Settings dictionary --------------------------------------------------------- 9 | 10 | settings = {} 11 | 12 | # Settings: api url ----------------------------------------------------------- 13 | 14 | def get_api_url(): 15 | 16 | """Get the api url. 17 | 18 | get_api_url gets the url that the package is currently configured to use 19 | for the SPARQL endpoint to a data platform instance. 20 | 21 | Returns 22 | ------- 23 | out : str 24 | The currently set api url as a string. 25 | 26 | """ 27 | 28 | if constants.SETTINGS_API_URL not in settings: 29 | set_api_url(constants.SETTINGS_API_URL_DEFAULT) 30 | 31 | return settings[constants.SETTINGS_API_URL] 32 | 33 | 34 | def set_api_url(api_url): 35 | 36 | """Set the api url. 37 | 38 | set_api_url sets the url that the package uses for the api endpoint. By 39 | default the package uses the main live endpoint for the data platform's 40 | SPARQL api. If you wish to run a local version of the api you can use this 41 | function to tell the package to use that endpoint instead. 42 | 43 | Parameters 44 | ---------- 45 | api_url : str 46 | The url of an available data platform SPARQL endpoint. 47 | 48 | Returns 49 | ------- 50 | out : None 51 | 52 | """ 53 | 54 | settings[constants.SETTINGS_API_URL] = api_url 55 | 56 | 57 | def reset_api_url(): 58 | 59 | """Reset the api url to the default. 60 | 61 | reset_api_url resets the url that the package uses for the api endpoint to 62 | the live api url. 63 | 64 | """ 65 | 66 | set_api_url(constants.SETTINGS_API_URL_DEFAULT) 67 | -------------------------------------------------------------------------------- /pdpy/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Package utility functions.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import datetime 7 | import numpy as np 8 | import pandas as pd 9 | import requests 10 | 11 | # API Functions --------------------------------------------------------------- 12 | 13 | def check_api(): 14 | 15 | """Check if Python can reach the api and return a boolean.""" 16 | 17 | api_url = ( 18 | 'https://api.parliament.uk/sparql' 19 | '?query=SELECT+*+WHERE+%7B+%3Fs+%' 20 | '3Fp+%3Fo+.+%7D+LIMIT+1%0D%0A') 21 | 22 | try: 23 | response = requests.get(api_url) 24 | return response.ok 25 | except: 26 | return False 27 | 28 | # Date handling functions ----------------------------------------------------- 29 | 30 | def convert_date_series(date_str_series): 31 | 32 | """Convert a series of ISO 8601 date strings to datetime.dates.""" 33 | 34 | return [np.NaN if pd.isna(d) \ 35 | else datetime.datetime.strptime(d, '%Y-%m-%d').date() \ 36 | for d in date_str_series] 37 | 38 | 39 | def min_date_nan(dates): 40 | 41 | """Find the earliest date from a series that may contain NaNs. 42 | 43 | Find the earliest date from a pandas series of datetime.dates that may 44 | contain NaNs. NaN dates are considered earlier than all others. 45 | 46 | """ 47 | 48 | if dates.isna().any(): 49 | return np.NaN 50 | else: 51 | return min(dates) 52 | 53 | 54 | def max_date_nan(dates): 55 | 56 | """Find the latest date from a series that may contain NaNs. 57 | 58 | Find the latest date from a pandas series of datetime.dates that may 59 | contain NaNs. NaN dates are considered later than all others. 60 | 61 | """ 62 | 63 | if dates.isna().any(): 64 | return np.NaN 65 | else: 66 | return max(dates) 67 | 68 | # Data presentation functions ------------------------------------------------- 69 | 70 | def readable(df): 71 | 72 | """Take a dataframe and remove all columns that end in the suffix '_id'. 73 | 74 | The intended purpose of this function is to display a dataframe on the 75 | console showing only the readable columns i.e. not the identifiers. 76 | 77 | Parameters 78 | ---------- 79 | df : DataFrame 80 | A pandas dataframe. 81 | 82 | Returns 83 | ------- 84 | out : DataFrame 85 | A dataframe with the same structure as the input df with any columns 86 | ending in the suffix '_id' removed. 87 | 88 | """ 89 | 90 | readable_cols = list(filter(lambda c: not c.endswith('_id'), df.columns)) 91 | return df[readable_cols] 92 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # pdpy 2 | 3 | pdpy is a Python package for downloading data from the UK Parliament's data platform. An equivalent package is available for R called [pdpr](https://github.com/houseofcommonslibrary/pdpr). 4 | 5 | ## Overview 6 | 7 | The UK Parliament's data platform contains data on Parliamentary activity. It underpins Parliament's new website, which is being developed at [beta.parliament.uk](https://beta.parliament.uk). Data in the platform is stored as RDF and is available through a SPARQL endpoint. You can see the structure of the vocabulary for the data visualised with [WebVOWL](http://visualdataweb.de/webvowl/#iri=https://raw.githubusercontent.com/ukparliament/Ontology/master/Ontology.ttl). 8 | 9 | This package provides access to data stored in the data platform through two interfaces at different levels: 10 | 11 | * A low level interface that takes takes a SPARQL SELECT query, sends it to the platform, and returns the result as a [pandas](http://pandas.pydata.org) _DataFrame_, with data types appropriately converted. 12 | 13 | * A high level interface comprising families of functions for downloading specific datasets, whose contents can be customised through function arguments. In some cases these higher level functions can additionally process the data to make it more suitable for analysis. 14 | 15 | The higher level interface currently focuses on providing key data about Members of both Houses of Parliament, but you can use the lower level interface to send custom queries of your own for other data. 16 | 17 | ### Installation 18 | 19 | Install from PyPI using pip: 20 | ```sh 21 | pip install pdpy 22 | ``` 23 | 24 | ## Package conventions 25 | 26 | There are certain conventions that apply across the package. 27 | 28 | Functions that make calls to the data platform (or to other online resources) are prefixed `fetch_*`, while those that retrieve or generate data locally are prefixed `get_*`. 29 | 30 | Column names used in dataframes returned by higher level functions reflect the terms used for those data items in the UK Parliament RDF vocabulary, but modified so that the `camelCase` of RDF is replaced with the `lowercase_and_underscores` used in Python. This means that column names can sometimes be long, but I think maintaining a transparent relationship between the data returned by the package and the data stored in the platform makes both the package and platform more useful. 31 | 32 | Higher level functions always return columns containing the ids of the entities represented in the data to help with grouping, summarising, and linking between datasets. This can make the dataframes harder to browse in an interactive shell. To make this easier, the package has a function called `readable` that returns a copy of the dataframe with any id columns removed. 33 | 34 | ``` python 35 | import pdpy 36 | mps = pdpy.fetch_mps() 37 | pdpy.readable(mps) 38 | ``` 39 | 40 | ## Package status 41 | 42 | This package is currently in _beta_. This partly reflects the fact that the data platform is still evolving but mainly reflects the fact that this package is still young. Over time new sets of functions will be added to access other datasets and more established parts of the package API will be declared stable. But right now it's all beta. 43 | 44 | ## Roadmap 45 | 46 | * Further analysis functions for data on MPs and Lords 47 | * Caching 48 | * Written Questions and Answers API 49 | * New APIs for new datasets in future 50 | 51 | ## Contributions 52 | 53 | I welcome any feedback, bug reports, and suggestions for improvement. Please talk to me before submitting a pull request. There are potentially a very large number of features that could be added to the package and I want to make sure it evolves with a consistent set of interfaces that share common design patterns. The package also has an R sibling, and I aim to maintain feature parity across both languages. 54 | 55 | ## Query API 56 | 57 | __sparql_select__(_query_) 58 | 59 | The low level query API consists of a single function which takes a SPARQL SELECT query, sends it to the data platform, and returns the results as a pandas _DataFrame_. 60 | 61 | ```python 62 | query = """ 63 | PREFIX : 64 | SELECT * WHERE { ?p ?s ?o . } LIMIT 1 65 | """ 66 | 67 | result = pdpy.sparql_select(query) 68 | result.iloc[0] 69 | 70 | # Output: 71 | # p http://www.w3.org/1999/02/22-rdf-syntax-ns#type 72 | # s http://www.w3.org/1999/02/22-rdf-syntax-ns#type 73 | # o http://www.w3.org/1999/02/22-rdf-syntax-ns#Property 74 | # Name: 0, dtype: object 75 | ``` 76 | 77 | The function will try to convert data types it recognises to native Python types. Currently, it converts XML dates to _datetime.date_ objects and returns all other values as strings. New data types may be added as they are encountered in expanding the higher level api. 78 | 79 | ## Members API 80 | 81 | The Members API provides access to data on Members of both Houses of Parliament. It provides similar functions for downloading data on both MPs and Lords, but the structure of the data returned in each case may differ to reflect differences between Commons and Lords memberships. 82 | 83 | Each of these Member functions can take optional arguments for a `from_date` and a `to_date`, which can be used to filter the rows returned based on a period of activity related to each row. The `on_date` argument is a convenience that sets the `from_date` and `to_date` to the same given date. The `on_date` has priority: if the `on_date` is set, the `from_date` and `to_date` are ignored. The values for these arguments can be either a _datetime.date_ or a string specifying a date in ISO 8601 format ('YYYY-MM-DD'). 84 | 85 | The filtering performed using these arguments is inclusive: a row is returned if any part of the activity in question falls within the period specified with the from and to dates. If the activity in question has not yet ended, the end date will have a value of NumPy.NaN. 86 | 87 | --- 88 | 89 | ### MPs 90 | 91 | Some MP functions have an optional argument called `while_mp`, which filters the data to include only those rows that coincide with the period when the individual was serving in the House of Commons. This is sometimes necessary because someone who serves in the House of Commons may later serve in the House of Lords and may hold government roles or committee memberships while serving in both Houses. When this argument is set to _False_ these functions will return all relevant records for each individual, even if the records themselves relate to periods when the individual was not an MP. 92 | 93 | --- 94 | 95 | _pdpy_.__fetch_mps__(_from_date=None_, _to_date=None_, _on_date=None_) 96 | 97 | Fetch a dataframe of key details about each MP, with one row per MP. 98 | 99 | This dataframe contains summary details for each MP, such as names, gender, and dates of birth and death. 100 | 101 | The `from_date`, `to_date` and `on_date` arguments can be used to filter the MPs returned based on the dates of their Commons memberships. Note that in this particular case the filtering does not rely on dates shown in the dataframe but uses Commons membership records to calculate whether an MP was serving on the dates in question. While breaks in service are therefore accounted for, this function does not yet have an option to exclude serving Members who were prevented from sitting at a given point in time for some reason. 102 | 103 | --- 104 | 105 | _pdpy_.__fetch_commons_memberships__(_from_date=None_, _to_date=None_, _on_date=None_) 106 | 107 | Fetch a dataframe of Commons memberships for each MP, with one row per Commons membership. 108 | 109 | The memberships dates are processed to impose consistent rules on the start and end dates for memberships. Specifically, Commons memberships are taken to end at the dissolution of each Parliament, rather than on the date of the general election at which an MP was defeated. 110 | 111 | --- 112 | 113 | _pdpy_.__fetch_mps_party_memberships__(_from_date=None_, _to_date=None_, _on_date=None_, _while_mp=True_, _collapse=False_) 114 | 115 | Fetch a dataframe of party memberships for each MP, with one row per party membership. 116 | 117 | The `collapse` argument determines whether to collapse consecutive memberships within the same party into a single period of continuous party membership. The default value of this argument is _False_, but it can be useful sometimes because some Members' party memberships have been recorded separately for each Parliament, even when they haven't changed party. Setting this value to _True_ is helpful when you want to identify Members who have changed party allegiance. Note that setting this value to _True_ means that party membership ids are not returned in the dataframe, as individual party memberships are combined. 118 | 119 | Note that party memberships are not necessarily closed when an individual stops being an MP. 120 | 121 | --- 122 | 123 | _pdpy_.__fetch_mps_government_roles__(_from_date=None_, _to_date=None_, _on_date=None_, _while_mp=True_) 124 | 125 | Fetch a dataframe of government roles for each MP, with one row per government role. 126 | 127 | --- 128 | 129 | _pdpy_.__fetch_mps_opposition_roles__(_from_date=None_, _to_date=None_, _on_date=None_, _while_mp=True_) 130 | 131 | Fetch a dataframe of opposition roles for each MP, with one row per opposition role. 132 | 133 | --- 134 | 135 | _pdpy_.__fetch_mps_committee_memberships__(_from_date=None_, _to_date=None_, _on_date=None_, _while_mp=True_) 136 | 137 | Fetch a dataframe of Parliamentary committee memberships for each MP, with one row per committee membership. 138 | 139 | --- 140 | 141 | ### Lords 142 | 143 | Some Lords functions have an optional argument called `while_lord`, which filters the rows to include only those records that coincide with the period when the individual was serving in the House of Lords. This is sometimes necessary because someone who serves in the House of Lords may previously have served in the House of Commons and may have held government roles or committee memberships while serving in both Houses. When this argument is set to _False_ these functions will return all relevant records for each individual, even if the records themselves relate to periods when the individual was not a Lord. 144 | 145 | --- 146 | 147 | _pdpy_.__fetch_lords__(_from_date=None_, _to_date=None_, _on_date=None_) 148 | 149 | Fetch a dataframe of key details about each Lord, with one row per Lord. 150 | 151 | This dataframe contains summary details for each Lord, such as names, gender, and dates of birth and death. 152 | 153 | The `from_date`, `to_date` and `on_date` arguments can be used to filter the Lords returned based on the dates of their Lords memberships. Note that in this particular case the filtering does not rely on dates shown in the dataframe but uses Lords membership records to calculate whether a Lord was serving on the dates in question. While breaks in service are therefore accounted for, this function does not yet have an option to exclude serving Members who were prevented from sitting at a given point in time for some reason. 154 | 155 | --- 156 | 157 | _pdpy_.__fetch_lords_memberships__(_from_date=None_, _to_date=None_, _on_date=None_) 158 | 159 | Fetch a dataframe of Lords memberships for each Lord, with one row per Lords membership. 160 | 161 | --- 162 | 163 | _pdpy_.__fetch_lords_party_memberships__(_from_date=None_, _to_date=None_, _on_date=None_, _while_lord=True_, _collapse=False_) 164 | 165 | Fetch a dataframe of party memberships for each Lord, with one row per party membership. 166 | 167 | The `collapse` argument determines whether to collapse consecutive memberships within the same party into a single period of continuous party membership. The default value of this argument is _False_, but it can be useful sometimes because some Members' party memberships have been recorded separately for each Parliament, even when they haven't changed party. Setting this value to _True_ is helpful when you want to identify Members who have changed party allegiance. Note that setting this value to _True_ means that party membership ids are not returned in the dataframe, as individual party memberships are combined. 168 | 169 | Note that party memberships are not necessarily closed when an individual stops being a Lord. 170 | 171 | --- 172 | 173 | _pdpy_.__fetch_lords_government_roles__(_from_date=None_, _to_date=None_, _on_date=None_, _while_lord=True_) 174 | 175 | Fetch a dataframe of government roles for each Lord, with one row per government role. 176 | 177 | --- 178 | 179 | _pdpy_.__fetch_lords_opposition_roles__(_from_date=None_, _to_date=None_, _on_date=None_, _while_lord=True_) 180 | 181 | Fetch a dataframe of opposition roles for each Lord, with one row per opposition role. 182 | 183 | --- 184 | 185 | _pdpy_.__fetch_lords_committee_memberships__(_from_date=None_, _to_date=None_, _on_date=None_, _while_lord=True_) 186 | 187 | Fetch a dataframe of Parliamentary committee memberships for each Lord, with one row per committee membership. 188 | 189 | --- 190 | 191 | ## Settings 192 | 193 | You can configure the package to use a different data platform API endpoint at runtime. This allows you to run the package against a local version of the data platform. As explained by @matthieubosquet in this [comment](https://github.com/houseofcommonslibrary/pdpr/issues/1#issuecomment-484026350), the data platform team maintain a docker image of the data platform API which is updated daily with the latest data. 194 | 195 | You can run a local version of the data platform API with docker using: 196 | 197 | ```bash 198 | docker run --rm -d -p 7200:7200 ukparliament/graphdb:latest 199 | ``` 200 | 201 | Use `pdpy.set_api_url` to point the package at the local version of the data platform API: 202 | 203 | ```python 204 | pdpy.set_api_url('http://localhost:7200/repositories/parliament') 205 | ``` 206 | 207 | Use `pdpy.reset_api_url` to reset the package to use the live version of the API: 208 | 209 | ```python 210 | pdpy.reset_api_url() 211 | ``` 212 | 213 | You can check the currently set API url with `pdpy.get_api_url()`. 214 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = readme.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | setup( 3 | name = 'pdpy', 4 | packages = ['pdpy'], 5 | version = '0.1.6', 6 | description = 'A package for downloading data from the Parliamentary Data Platform', 7 | author = 'Oliver Hawkins', 8 | author_email = 'oli@olihawkins.com', 9 | url = 'https://github.com/houseofcommonslibrary/pdpy', 10 | license = 'BSD', 11 | keywords = ['Parliament', 'MP', 'House of Commons', 'House of Lords'], 12 | install_requires = ['numpy', 'pandas', 'requests'], 13 | classifiers = [], 14 | ) 15 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/commons_memberships_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/commons_memberships_raw.pkl -------------------------------------------------------------------------------- /tests/data/fetch_commons_memberships.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_commons_memberships.pkl -------------------------------------------------------------------------------- /tests/data/fetch_commons_memberships_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_commons_memberships_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_committee_memberships.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_committee_memberships.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_committee_memberships_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_committee_memberships_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_committee_memberships_while_lord.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_committee_memberships_while_lord.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_government_roles.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_government_roles.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_government_roles_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_government_roles_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_government_roles_while_lord.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_government_roles_while_lord.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_memberships.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_memberships.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_memberships_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_memberships_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_opposition_roles.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_opposition_roles.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_opposition_roles_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_opposition_roles_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_opposition_roles_while_lord.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_opposition_roles_while_lord.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_party_memberships.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_party_memberships.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_party_memberships_collapse.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_party_memberships_collapse.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_party_memberships_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_party_memberships_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_lords_party_memberships_while_lord.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_party_memberships_while_lord.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_committee_memberships.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_committee_memberships.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_committee_memberships_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_committee_memberships_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_committee_memberships_while_mp.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_committee_memberships_while_mp.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_government_roles.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_government_roles.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_government_roles_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_government_roles_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_government_roles_while_mp.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_government_roles_while_mp.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_opposition_roles.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_opposition_roles.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_opposition_roles_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_opposition_roles_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_opposition_roles_while_mp.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_opposition_roles_while_mp.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_party_memberships.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_party_memberships.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_party_memberships_collapse.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_party_memberships_collapse.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_party_memberships_from_to.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_party_memberships_from_to.pkl -------------------------------------------------------------------------------- /tests/data/fetch_mps_party_memberships_while_mp.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_party_memberships_while_mp.pkl -------------------------------------------------------------------------------- /tests/data/lords_committee_memberships_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_committee_memberships_raw.pkl -------------------------------------------------------------------------------- /tests/data/lords_government_roles_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_government_roles_raw.pkl -------------------------------------------------------------------------------- /tests/data/lords_memberships_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_memberships_raw.pkl -------------------------------------------------------------------------------- /tests/data/lords_opposition_roles_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_opposition_roles_raw.pkl -------------------------------------------------------------------------------- /tests/data/lords_party_memberships_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_party_memberships_raw.pkl -------------------------------------------------------------------------------- /tests/data/lords_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_raw.pkl -------------------------------------------------------------------------------- /tests/data/mps_committee_memberships_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/mps_committee_memberships_raw.pkl -------------------------------------------------------------------------------- /tests/data/mps_government_roles_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/mps_government_roles_raw.pkl -------------------------------------------------------------------------------- /tests/data/mps_opposition_roles_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/mps_opposition_roles_raw.pkl -------------------------------------------------------------------------------- /tests/data/mps_party_memberships_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/mps_party_memberships_raw.pkl -------------------------------------------------------------------------------- /tests/data/mps_raw.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/mps_raw.pkl -------------------------------------------------------------------------------- /tests/test_combine.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Test combine functions.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import datetime 7 | import io 8 | import numpy as np 9 | import pandas as pd 10 | import unittest 11 | 12 | import pdpy.combine as combine 13 | import pdpy.errors as errors 14 | import pdpy.utils as utils 15 | 16 | 17 | # Test data ------------------------------------------------------------------- 18 | 19 | pm_csv = """ 20 | person_id, membership_id, party_id, start_date, end_date 21 | p1, m1, pa1, 2001-01-01, 2001-12-31 22 | p1, m2, pa2, 2002-01-01, 2002-12-31 23 | p1, m3, pa1, 2003-01-01, 2003-12-31 24 | p1, m4, pa1, 2004-01-01, 2004-12-31 25 | p2, m5, pa1, 2002-01-01, 2002-12-31 26 | p2, m6, pa2, 2004-01-01, NA 27 | p2, m7, pa2, 2003-01-01, 2003-12-31 28 | p2, m8, pa1, 2001-01-01, 2001-12-31 29 | """ 30 | 31 | pm = pd.read_csv(io.BytesIO(bytes(pm_csv, encoding='utf-8')), 32 | skipinitialspace = True) 33 | 34 | pm['party_membership_start_date'] = utils.convert_date_series(pm['start_date']) 35 | pm['party_membership_end_date'] = utils.convert_date_series(pm['end_date']) 36 | pm['mnis_id'] = '' 37 | pm['given_name'] = pm['person_id'] 38 | pm['family_name'] = pm['person_id'] 39 | pm['display_name'] = pm['person_id'] 40 | pm['party_membership_id'] = '' 41 | pm['party_mnis_id'] = '' 42 | pm['party_name'] = '' 43 | pm = pm[[ 44 | 'person_id', 45 | 'mnis_id', 46 | 'given_name', 47 | 'family_name', 48 | 'display_name', 49 | 'party_id', 50 | 'party_mnis_id', 51 | 'party_name', 52 | 'party_membership_id', 53 | 'party_membership_start_date', 54 | 'party_membership_end_date']] 55 | 56 | # Test combine_party_memberships ---------------------------------------------- 57 | 58 | class CombinePartyMemberships(unittest.TestCase): 59 | 60 | """ 61 | Test that combine_party_memberships returns a DataFrame with the 62 | expected properties. 63 | 64 | """ 65 | 66 | def test_that_combine_party_memberships_raises_value_error(self): 67 | 68 | with self.assertRaises(ValueError): 69 | pm_missing_column = pm.drop('person_id', axis=1) 70 | cpm = combine.combine_party_memberships(pm_missing_column) 71 | 72 | with self.assertRaises(ValueError): 73 | pm_wrong_column_names = pm.drop('person_id', axis=1) 74 | pm_wrong_column_names['pid'] = pm['person_id'] 75 | cpm = combine.combine_party_memberships(pm_wrong_column_names) 76 | 77 | def test_that_filter_memberships_filters_correct_memberships(self): 78 | 79 | cpm = combine.combine_party_memberships(pm) 80 | 81 | self.assertEqual(cpm.shape[0], 5) 82 | self.assertEqual(cpm.shape[1], pm.shape[1] - 1) 83 | 84 | expected_columns = pm.drop('party_membership_id', axis=1).columns 85 | self.assertEqual((cpm.columns == expected_columns).all(), True) 86 | 87 | self.assertEqual((cpm['person_id'] == [ 88 | 'p1', 'p1', 'p1', 'p2', 'p2']).all(), True) 89 | 90 | self.assertEqual((cpm['party_membership_start_date'] == [ 91 | datetime.date(2001, 1, 1), 92 | datetime.date(2002, 1, 1), 93 | datetime.date(2003, 1, 1), 94 | datetime.date(2001, 1, 1), 95 | datetime.date(2003, 1, 1)]).all(), True) 96 | 97 | self.assertEqual((cpm['party_membership_end_date'][0:4] == [ 98 | datetime.date(2001, 12, 31), 99 | datetime.date(2002, 12, 31), 100 | datetime.date(2004, 12, 31), 101 | datetime.date(2002, 12, 31)]).all(), True) 102 | 103 | self.assertTrue(pd.isna(cpm['party_membership_end_date'].iloc[4])) 104 | -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Test core download functions.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import datetime 7 | import numpy as np 8 | import pandas as pd 9 | import requests 10 | import time 11 | import unittest 12 | import warnings 13 | 14 | import pdpy.constants as constants 15 | import pdpy.core as core 16 | import pdpy.errors as errors 17 | import pdpy.utils as utils 18 | 19 | # Setup ----------------------------------------------------------------------- 20 | 21 | # Check api is available 22 | api_available = utils.check_api() 23 | 24 | # Queries --------------------------------------------------------------------- 25 | 26 | query_basic = """ 27 | PREFIX : 28 | SELECT * 29 | WHERE { 30 | ?p ?s ?o . 31 | } 32 | LIMIT 1 33 | """ 34 | 35 | query_person = """ 36 | PREFIX : 37 | PREFIX d: 38 | SELECT DISTINCT 39 | 40 | ?person 41 | ?given_name 42 | ?family_name 43 | ?gender 44 | ?dob 45 | 46 | WHERE { 47 | 48 | # Entity id for Shirley Williams 49 | BIND(d:URDlhhkg AS ?person) 50 | 51 | ?person :personGivenName ?given_name ; 52 | :personFamilyName ?family_name ; 53 | :personHasGenderIdentity/:genderIdentityHasGender/:genderName ?gender . 54 | OPTIONAL { ?person :personDateOfBirth ?dob . } 55 | } 56 | """ 57 | 58 | query_broken = """ 59 | PREFIX : 60 | # PREFIX d: Commented out to break query 61 | SELECT DISTINCT 62 | 63 | ?person 64 | ?given_name 65 | ?family_name 66 | ?gender 67 | ?dob 68 | 69 | WHERE { 70 | 71 | # Entity id for Shirley Williams 72 | BIND(d:URDlhhkg AS ?person) 73 | 74 | ?person :personGivenName ?given_name ; 75 | :personFamilyName ?family_name ; 76 | :personHasGenderIdentity/:genderIdentityHasGender/:genderName ?gender . 77 | OPTIONAL { ?person :personDateOfBirth ?dob . } 78 | } 79 | """ 80 | 81 | query_broken_error = "{}{}".format( 82 | 'MALFORMED QUERY: org.eclipse.rdf4j.query.parser.sparql.ast.', 83 | 'VisitorException: QName \'d:URDlhhkg\' uses an undefined prefix') 84 | 85 | # Tests ----------------------------------------------------------------------- 86 | 87 | class TestRequestBasic(unittest.TestCase): 88 | 89 | """Test that request sends and receives the most basic SPARQL query.""" 90 | 91 | def setUp(self): 92 | if not api_available: 93 | self.skipTest('api could not be reached') 94 | 95 | def test_request_basic(self): 96 | 97 | # Suppress the warning for the broken socket 98 | with warnings.catch_warnings(): 99 | warnings.simplefilter("ignore", ResourceWarning) 100 | 101 | response = core.request(query_basic) 102 | json = response.json() 103 | headers = json['head']['vars'] 104 | records = json['results']['bindings'] 105 | 106 | self.assertTrue(response.ok) 107 | self.assertEqual(headers, ['p', 's', 'o']) 108 | self.assertEqual(len(records), 1) 109 | self.assertEqual(records[0]['p']['value'], 110 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') 111 | self.assertEqual(records[0]['s']['value'], 112 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') 113 | self.assertEqual(records[0]['o']['value'], 114 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property') 115 | 116 | time.sleep(constants.API_PAUSE_TIME) 117 | 118 | 119 | class TestRequestPerson(unittest.TestCase): 120 | 121 | """Test that request sends and receives a Parliamentary query.""" 122 | 123 | def setUp(self): 124 | if not api_available: 125 | self.skipTest('api could not be reached') 126 | 127 | def test_request_person(self): 128 | 129 | # Suppress the warning for the broken socket 130 | with warnings.catch_warnings(): 131 | warnings.simplefilter("ignore", ResourceWarning) 132 | 133 | response = core.request(query_person) 134 | json = response.json() 135 | headers = json['head']['vars'] 136 | records = json['results']['bindings'] 137 | 138 | self.assertTrue(response.ok) 139 | self.assertEqual(headers, 140 | ['person', 'given_name', 'family_name', 'gender', 'dob']) 141 | self.assertEqual(len(records), 1) 142 | self.assertEqual(records[0]['person']['value'], 143 | 'https://id.parliament.uk/URDlhhkg') 144 | self.assertEqual(records[0]['given_name']['value'], 'Shirley') 145 | self.assertEqual(records[0]['family_name']['value'], 'Williams') 146 | self.assertEqual(records[0]['gender']['value'], 'Female') 147 | self.assertEqual(records[0]['dob']['value'], '1930-07-27+01:00') 148 | self.assertEqual(records[0]['dob']['datatype'], constants.XML_DATE) 149 | 150 | time.sleep(constants.API_PAUSE_TIME) 151 | 152 | 153 | class TestSelectBasic(unittest.TestCase): 154 | 155 | """Test that select returns data for the most basic SPARQL query.""" 156 | 157 | def setUp(self): 158 | if not api_available: 159 | self.skipTest('api could not be reached') 160 | 161 | def test_select_basic(self): 162 | 163 | # Suppress the warning for the broken socket 164 | with warnings.catch_warnings(): 165 | warnings.simplefilter("ignore", ResourceWarning) 166 | 167 | data = core.sparql_select(query_basic) 168 | 169 | self.assertEqual(list(data), ['p', 's', 'o']) 170 | self.assertEqual(data['p'].dtype, np.dtype('O')) 171 | self.assertEqual(data['s'].dtype, np.dtype('O')) 172 | self.assertEqual(data['o'].dtype, np.dtype('O')) 173 | self.assertEqual(data.shape, (1, 3)) 174 | self.assertIsInstance(data['p'][0], str) 175 | self.assertIsInstance(data['s'][0], str) 176 | self.assertIsInstance(data['o'][0], str) 177 | self.assertEqual(data['p'][0], 178 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') 179 | self.assertEqual(data['s'][0], 180 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') 181 | self.assertEqual(data['o'][0], 182 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property') 183 | 184 | time.sleep(constants.API_PAUSE_TIME) 185 | 186 | 187 | class TestSelectPerson(unittest.TestCase): 188 | 189 | """Test that select returns data for a Parliamentary query.""" 190 | 191 | def setUp(self): 192 | if not api_available: 193 | self.skipTest('api could not be reached') 194 | 195 | def test_select_person(self): 196 | 197 | # Suppress the warning for the broken socket 198 | with warnings.catch_warnings(): 199 | warnings.simplefilter("ignore", ResourceWarning) 200 | 201 | data = core.sparql_select(query_person) 202 | 203 | self.assertEqual(list(data), 204 | ['person', 'given_name', 'family_name', 'gender', 'dob']) 205 | self.assertEqual(data['person'].dtype, np.dtype('O')) 206 | self.assertEqual(data['given_name'].dtype, np.dtype('O')) 207 | self.assertEqual(data['family_name'].dtype, np.dtype('O')) 208 | self.assertEqual(data['gender'].dtype, np.dtype('O')) 209 | self.assertEqual(data['dob'].dtype, np.dtype('O')) 210 | self.assertEqual(data.shape, (1, 5)) 211 | self.assertIsInstance(data['person'][0], str) 212 | self.assertIsInstance(data['given_name'][0], str) 213 | self.assertIsInstance(data['family_name'][0], str) 214 | self.assertIsInstance(data['gender'][0], str) 215 | self.assertIsInstance(data['dob'][0], datetime.date) 216 | self.assertEqual(data['person'][0], 217 | 'https://id.parliament.uk/URDlhhkg') 218 | self.assertEqual(data['given_name'][0], 'Shirley') 219 | self.assertEqual(data['family_name'][0], 'Williams') 220 | self.assertEqual(data['gender'][0], 'Female') 221 | self.assertEqual(data['dob'][0], datetime.date(1930, 7, 27)) 222 | 223 | time.sleep(constants.API_PAUSE_TIME) 224 | 225 | 226 | class TestSelectBroken(unittest.TestCase): 227 | 228 | """Test that select raises a request error for a broken query.""" 229 | 230 | def setUp(self): 231 | if not api_available: 232 | self.skipTest('api could not be reached') 233 | 234 | def test_select_broken(self): 235 | 236 | # Suppress the warning for the broken socket 237 | with warnings.catch_warnings(): 238 | warnings.simplefilter("ignore", ResourceWarning) 239 | 240 | with self.assertRaises(errors.RequestError) as cm: 241 | data = core.sparql_select(query_broken) 242 | 243 | request_exception = cm.exception 244 | self.assertEqual(request_exception.response, query_broken_error) 245 | 246 | time.sleep(constants.API_PAUSE_TIME) 247 | -------------------------------------------------------------------------------- /tests/test_elections.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Test elections data functions.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import datetime 7 | import numpy as np 8 | import pandas as pd 9 | import unittest 10 | 11 | import pdpy.elections as elections 12 | 13 | # Tests ----------------------------------------------------------------------- 14 | 15 | class TestGetGeneralElections(unittest.TestCase): 16 | 17 | """ 18 | Test that get_general_elections returns the expected elections data. 19 | 20 | """ 21 | 22 | def test_get_general_elections(self): 23 | 24 | ge = elections.get_general_elections() 25 | 26 | self.assertEqual(list(ge), ['name', 'dissolution', 'election']) 27 | self.assertEqual(ge['name'].dtype, np.dtype('O')) 28 | self.assertEqual(ge['dissolution'].dtype, np.dtype('O')) 29 | self.assertEqual(ge['election'].dtype, np.dtype('O')) 30 | self.assertIsInstance(ge['name'][0], str) 31 | self.assertIsInstance(ge['dissolution'][0], datetime.date) 32 | self.assertIsInstance(ge['election'][0], datetime.date) 33 | 34 | # Test that dissolutions always precede elections 35 | self.assertTrue( 36 | (ge['dissolution'] < ge['election']).all()) 37 | 38 | # Test that elections always precede the following dissolution 39 | self.assertTrue(( 40 | ge['election'][:-1].reset_index(drop=True) < 41 | ge['dissolution'][1:].reset_index(drop=True) 42 | ).all()) 43 | 44 | # Test that election names are unique 45 | self.assertTrue( 46 | len(ge['name']) == len(ge['name'].unique())) 47 | 48 | 49 | class TestGetGeneralElectionsDict(unittest.TestCase): 50 | 51 | """ 52 | Test that get_general_elections_dict returns the expected elections data. 53 | 54 | """ 55 | 56 | def test_get_general_elections_dict(self): 57 | 58 | ge = elections.get_general_elections_dict() 59 | 60 | for e in ge.values(): 61 | self.assertEqual(len(e.keys()), 2) 62 | self.assertIn('election', list(e.keys())) 63 | self.assertIn('dissolution', list(e.keys())) 64 | self.assertIsInstance(e['dissolution'], datetime.date) 65 | self.assertIsInstance(e['election'], datetime.date) 66 | self.assertTrue(e['dissolution'] < e['election']) 67 | -------------------------------------------------------------------------------- /tests/test_filter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Test filter functions.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import datetime 7 | import io 8 | import numpy as np 9 | import pandas as pd 10 | import unittest 11 | 12 | import pdpy.errors as errors 13 | import pdpy.filter as filter 14 | import pdpy.utils as utils 15 | 16 | 17 | # Test data ------------------------------------------------------------------- 18 | 19 | mem_a_csv = """ 20 | person_id, membership_id, start_date, end_date 21 | p1, a1, 2001-01-01, 2001-12-31 22 | p1, a2, 2005-01-01, 2005-12-31 23 | p1, a3, 2006-01-01, 2006-12-31 24 | p1, a4, 2010-01-01, 2010-12-31 25 | p2, a5, 2005-01-01, 2005-12-31 26 | p2, a6, 2006-01-01, 2006-12-31 27 | p2, a7, 2010-01-01, 2010-12-31 28 | p2, a8, 2015-01-01, 2015-12-31 29 | """ 30 | 31 | mem_b_csv = """ 32 | person_id, membership_id, start_date, end_date 33 | p1, b1, 2001-06-01, 2002-06-30 34 | p1, b2, 2004-01-01, 2004-12-31 35 | p1, b3, 2006-01-01, 2006-12-31 36 | p1, b4, 2011-01-01, 2011-12-31 37 | p2, b5, 2004-01-01, 2004-12-31 38 | p2, b6, 2006-01-01, 2006-12-31 39 | p2, b7, 2011-01-01, 2011-12-31 40 | p2, b8, 2015-06-01, 2016-06-30 41 | """ 42 | 43 | mem_a = pd.read_csv( 44 | io.BytesIO(bytes(mem_a_csv, encoding='utf-8')), 45 | skipinitialspace = True) 46 | mem_a['start_date'] = utils.convert_date_series(mem_a['start_date']) 47 | mem_a['end_date'] = utils.convert_date_series(mem_a['end_date']) 48 | 49 | mem_b = pd.read_csv( 50 | io.BytesIO(bytes(mem_b_csv, encoding='utf-8')), 51 | skipinitialspace = True) 52 | mem_b['start_date'] = utils.convert_date_series(mem_b['start_date']) 53 | mem_b['end_date'] = utils.convert_date_series(mem_b['end_date']) 54 | 55 | # Test filter_dates ----------------------------------------------------------- 56 | 57 | class TestFilterDates(unittest.TestCase): 58 | 59 | """ 60 | Test that filter_dates returns a DataFrame with the expected properties. 61 | 62 | """ 63 | 64 | def test_that_filter_dates_raises_missing_column_error(self): 65 | 66 | with self.assertRaises(errors.MissingColumnError): 67 | 68 | f_mem_a = filter.filter_dates( 69 | mem_a, 70 | start_col='no_such_column', 71 | end_col='end_date') 72 | 73 | with self.assertRaises(errors.MissingColumnError): 74 | 75 | f_mem_a = filter.filter_dates( 76 | mem_a, 77 | start_col='start_date', 78 | end_col='no_such_column') 79 | 80 | def test_that_filter_dates_raises_value_error(self): 81 | 82 | with self.assertRaises(ValueError): 83 | 84 | f_mem_a = filter.filter_dates( 85 | mem_a, 86 | start_col='start_date', 87 | end_col='end_date', 88 | from_date='2010-01-01', 89 | to_date='2009-12-31') 90 | 91 | def test_that_filter_dates_raises_date_format_error(self): 92 | 93 | with self.assertRaises(errors.DateFormatError): 94 | 95 | f_mem_a = filter.filter_dates( 96 | mem_a, 97 | start_col='start_date', 98 | end_col='end_date', 99 | from_date='2010-01-XX', 100 | to_date='2010-12-31') 101 | 102 | with self.assertRaises(errors.DateFormatError): 103 | 104 | f_mem_a = filter.filter_dates( 105 | mem_a, 106 | start_col='start_date', 107 | end_col='end_date', 108 | from_date='2010-01-01', 109 | to_date='2010-12-XX') 110 | 111 | def test_filter_dates_does_not_filter_without_dates(self): 112 | 113 | f_mem_a = filter.filter_dates( 114 | mem_a, 115 | start_col='start_date', 116 | end_col='end_date') 117 | 118 | self.assertEqual(f_mem_a.shape, mem_a.shape) 119 | self.assertTrue((f_mem_a == mem_a).all().all()) 120 | 121 | def test_filter_dates_excludes_rows_before_from_date(self): 122 | 123 | f_mem_a = filter.filter_dates( 124 | mem_a, 125 | start_col='start_date', 126 | end_col='end_date', 127 | from_date='2004-12-31') 128 | 129 | self.assertEqual(f_mem_a.shape[0], mem_a.shape[0] - 1) 130 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1]) 131 | 132 | self.assertEqual(f_mem_a.iloc[0]['person_id'], 'p1') 133 | self.assertEqual(f_mem_a.iloc[0]['membership_id'], 'a2') 134 | self.assertEqual(f_mem_a.iloc[0]['start_date'], 135 | datetime.date(2005, 1, 1)) 136 | self.assertEqual(f_mem_a.iloc[0]['end_date'], 137 | datetime.date(2005, 12, 31)) 138 | 139 | def test_filter_dates_excludes_rows_after_to_date(self): 140 | 141 | f_mem_a = filter.filter_dates( 142 | mem_a, 143 | start_col='start_date', 144 | end_col='end_date', 145 | to_date='2011-01-01') 146 | 147 | self.assertEqual(f_mem_a.shape[0], mem_a.shape[0] - 1) 148 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1]) 149 | 150 | self.assertEqual(f_mem_a.iloc[-1]['person_id'], 'p2') 151 | self.assertEqual(f_mem_a.iloc[-1]['membership_id'], 'a7') 152 | self.assertEqual(f_mem_a.iloc[-1]['start_date'], 153 | datetime.date(2010, 1, 1)) 154 | self.assertEqual(f_mem_a.iloc[-1]['end_date'], 155 | datetime.date(2010, 12, 31)) 156 | 157 | def test_filter_dates_excludes_rows_outside_both_dates(self): 158 | 159 | f_mem_a = filter.filter_dates( 160 | mem_a, 161 | start_col='start_date', 162 | end_col='end_date', 163 | from_date='2004-12-31', 164 | to_date='2011-01-01') 165 | 166 | self.assertEqual(f_mem_a.shape[0], mem_a.shape[0] - 2) 167 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1]) 168 | 169 | self.assertEqual(f_mem_a.iloc[0]['person_id'], 'p1') 170 | self.assertEqual(f_mem_a.iloc[0]['membership_id'], 'a2') 171 | self.assertEqual(f_mem_a.iloc[0]['start_date'], 172 | datetime.date(2005, 1, 1)) 173 | self.assertEqual(f_mem_a.iloc[0]['end_date'], 174 | datetime.date(2005, 12, 31)) 175 | 176 | self.assertEqual(f_mem_a.iloc[-1]['person_id'], 'p2') 177 | self.assertEqual(f_mem_a.iloc[-1]['membership_id'], 'a7') 178 | self.assertEqual(f_mem_a.iloc[-1]['start_date'], 179 | datetime.date(2010, 1, 1)) 180 | self.assertEqual(f_mem_a.iloc[-1]['end_date'], 181 | datetime.date(2010, 12, 31)) 182 | 183 | def test_filter_dates_includes_rows_with_partial_instersection(self): 184 | 185 | f_mem_a = filter.filter_dates( 186 | mem_a, 187 | start_col='start_date', 188 | end_col='end_date', 189 | from_date='2005-06-30', 190 | to_date='2010-06-30') 191 | 192 | self.assertEqual(f_mem_a.shape[0], mem_a.shape[0] - 2) 193 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1]) 194 | 195 | self.assertEqual(f_mem_a.iloc[0]['person_id'], 'p1') 196 | self.assertEqual(f_mem_a.iloc[0]['membership_id'], 'a2') 197 | self.assertEqual(f_mem_a.iloc[0]['start_date'], 198 | datetime.date(2005, 1, 1)) 199 | self.assertEqual(f_mem_a.iloc[0]['end_date'], 200 | datetime.date(2005, 12, 31)) 201 | 202 | self.assertEqual(f_mem_a.iloc[-1]['person_id'], 'p2') 203 | self.assertEqual(f_mem_a.iloc[-1]['membership_id'], 'a7') 204 | self.assertEqual(f_mem_a.iloc[-1]['start_date'], 205 | datetime.date(2010, 1, 1)) 206 | self.assertEqual(f_mem_a.iloc[-1]['end_date'], 207 | datetime.date(2010, 12, 31)) 208 | 209 | def test_filter_dates_includes_rows_enclosing_dates(self): 210 | 211 | f_mem_a = filter.filter_dates( 212 | mem_a, 213 | start_col='start_date', 214 | end_col='end_date', 215 | from_date='2005-06-30', 216 | to_date='2005-06-30') 217 | 218 | self.assertEqual(f_mem_a.shape[0], 2) 219 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1]) 220 | 221 | self.assertEqual(f_mem_a.iloc[0]['person_id'], 'p1') 222 | self.assertEqual(f_mem_a.iloc[0]['membership_id'], 'a2') 223 | self.assertEqual(f_mem_a.iloc[0]['start_date'], 224 | datetime.date(2005, 1, 1)) 225 | self.assertEqual(f_mem_a.iloc[0]['end_date'], 226 | datetime.date(2005, 12, 31)) 227 | 228 | self.assertEqual(f_mem_a.iloc[1]['person_id'], 'p2') 229 | self.assertEqual(f_mem_a.iloc[1]['membership_id'], 'a5') 230 | self.assertEqual(f_mem_a.iloc[1]['start_date'], 231 | datetime.date(2005, 1, 1)) 232 | self.assertEqual(f_mem_a.iloc[-1]['end_date'], 233 | datetime.date(2005, 12, 31)) 234 | 235 | # Test filter_memberships ----------------------------------------------------- 236 | 237 | class TestFilterMemberships(unittest.TestCase): 238 | 239 | """ 240 | Test that filter_memberships returns a DataFrame with the expected 241 | properties. 242 | 243 | """ 244 | 245 | def test_that_filter_dates_raises_missing_column_error(self): 246 | 247 | with self.assertRaises(errors.MissingColumnError): 248 | 249 | f_mem_a = filter.filter_memberships( 250 | tm=mem_a, 251 | fm=mem_b, 252 | tm_id_col='no_such_column', 253 | tm_start_col='start_date', 254 | tm_end_col='end_date', 255 | fm_start_col='start_date', 256 | fm_end_col='end_date', 257 | join_col='person_id') 258 | 259 | with self.assertRaises(errors.MissingColumnError): 260 | 261 | f_mem_a = filter.filter_memberships( 262 | tm=mem_a, 263 | fm=mem_b, 264 | tm_id_col='membership_id', 265 | tm_start_col='no_such_column', 266 | tm_end_col='end_date', 267 | fm_start_col='start_date', 268 | fm_end_col='end_date', 269 | join_col='person_id') 270 | 271 | with self.assertRaises(errors.MissingColumnError): 272 | 273 | f_mem_a = filter.filter_memberships( 274 | tm=mem_a, 275 | fm=mem_b, 276 | tm_id_col='membership_id', 277 | tm_start_col='start_date', 278 | tm_end_col='no_such_column', 279 | fm_start_col='start_date', 280 | fm_end_col='end_date', 281 | join_col='person_id') 282 | 283 | with self.assertRaises(errors.MissingColumnError): 284 | 285 | f_mem_a = filter.filter_memberships( 286 | tm=mem_a, 287 | fm=mem_b, 288 | tm_id_col='membership_id', 289 | tm_start_col='start_date', 290 | tm_end_col='end_date', 291 | fm_start_col='no_such_column', 292 | fm_end_col='end_date', 293 | join_col='person_id') 294 | 295 | with self.assertRaises(errors.MissingColumnError): 296 | 297 | f_mem_a = filter.filter_memberships( 298 | tm=mem_a, 299 | fm=mem_b, 300 | tm_id_col='membership_id', 301 | tm_start_col='start_date', 302 | tm_end_col='end_date', 303 | fm_start_col='start_date', 304 | fm_end_col='no_such_column', 305 | join_col='person_id') 306 | 307 | with self.assertRaises(errors.MissingColumnError): 308 | 309 | f_mem_a = filter.filter_memberships( 310 | tm=mem_a, 311 | fm=mem_b, 312 | tm_id_col='membership_id', 313 | tm_start_col='start_date', 314 | tm_end_col='end_date', 315 | fm_start_col='start_date', 316 | fm_end_col='end_date', 317 | join_col='no_such_column') 318 | 319 | def test_that_filter_memberships_filters_correct_memberships(self): 320 | 321 | f_mem_a = filter.filter_memberships( 322 | tm = mem_a, 323 | fm = mem_b, 324 | tm_id_col='membership_id', 325 | tm_start_col='start_date', 326 | tm_end_col='end_date', 327 | fm_start_col='start_date', 328 | fm_end_col='end_date', 329 | join_col='person_id') 330 | 331 | self.assertEqual(f_mem_a.shape[0], 4) 332 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1]) 333 | self.assertEqual((f_mem_a.columns == mem_a.columns).all(), True) 334 | 335 | self.assertEqual((f_mem_a['person_id'] == [ 336 | 'p1', 'p1', 'p2', 'p2']).all(), True) 337 | 338 | self.assertEqual((f_mem_a['membership_id'] == [ 339 | 'a1', 'a3', 'a6', 'a8']).all(), True) 340 | 341 | self.assertEqual((f_mem_a['start_date'] == [ 342 | datetime.date(2001, 1, 1), 343 | datetime.date(2006, 1, 1), 344 | datetime.date(2006, 1, 1), 345 | datetime.date(2015, 1, 1)]).all(), True) 346 | 347 | self.assertEqual((f_mem_a['end_date'] == [ 348 | datetime.date(2001, 12, 31), 349 | datetime.date(2006, 12, 31), 350 | datetime.date(2006, 12, 31), 351 | datetime.date(2015, 12, 31)]).all(), True) 352 | -------------------------------------------------------------------------------- /tests/test_lords.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Test Lords functions.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import unittest 7 | from unittest.mock import patch 8 | 9 | import pdpy.lords as lords 10 | import tests.validate as validate 11 | 12 | 13 | # Mocks ----------------------------------------------------------------------- 14 | 15 | def mock_fetch_lords_raw(): 16 | return validate.read('lords_raw') 17 | 18 | def mock_fetch_lords_memberships_raw(): 19 | return validate.read('lords_memberships_raw') 20 | 21 | def mock_fetch_lords_party_memberships_raw(): 22 | return validate.read('lords_party_memberships_raw') 23 | 24 | def mock_fetch_lords_government_roles_raw(): 25 | return validate.read('lords_government_roles_raw') 26 | 27 | def mock_fetch_lords_opposition_roles_raw(): 28 | return validate.read('lords_opposition_roles_raw') 29 | 30 | def mock_fetch_lords_committee_memberships_raw(): 31 | return validate.read('lords_committee_memberships_raw') 32 | 33 | # Tests ----------------------------------------------------------------------- 34 | 35 | class TestFetchLords(unittest.TestCase): 36 | 37 | """Test fetch_lords processes results correctly.""" 38 | 39 | @patch('pdpy.lords.fetch_lords_raw', mock_fetch_lords_raw) 40 | @patch('pdpy.lords.fetch_lords_memberships_raw', 41 | mock_fetch_lords_memberships_raw) 42 | 43 | def test_fetch_lords(self): 44 | 45 | cols = [ 46 | 'person_id', 47 | 'mnis_id', 48 | 'given_name', 49 | 'family_name', 50 | 'display_name', 51 | 'full_title', 52 | 'gender'] 53 | 54 | obs = lords.fetch_lords() 55 | exp = validate.read('fetch_lords') 56 | validate.compare_obs_exp(self, obs, exp, cols) 57 | 58 | obs = lords.fetch_lords(from_date='2017-06-08', to_date='2017-06-08') 59 | exp = validate.read('fetch_lords_from_to') 60 | validate.compare_obs_exp(self, obs, exp, cols) 61 | 62 | obs = lords.fetch_lords(on_date='2017-06-08') 63 | exp = validate.read('fetch_lords_from_to') 64 | validate.compare_obs_exp(self, obs, exp, cols) 65 | 66 | 67 | class TestFetchCommonsMemberships(unittest.TestCase): 68 | 69 | """Test fetch_lords_memberships processes results correctly.""" 70 | 71 | @patch('pdpy.lords.fetch_lords_memberships_raw', 72 | mock_fetch_lords_memberships_raw) 73 | 74 | def test_fetch_lords_memberships(self): 75 | 76 | cols = [ 77 | 'person_id', 78 | 'mnis_id', 79 | 'given_name', 80 | 'family_name', 81 | 'display_name', 82 | 'seat_type_id', 83 | 'seat_type_name', 84 | 'seat_incumbency_id', 85 | 'seat_incumbency_start_date'] 86 | 87 | obs = lords.fetch_lords_memberships() 88 | exp = validate.read('fetch_lords_memberships') 89 | validate.compare_obs_exp(self, obs, exp, cols) 90 | 91 | obs = lords.fetch_lords_memberships( 92 | from_date='2017-06-08', to_date='2017-06-08') 93 | exp = validate.read('fetch_lords_memberships_from_to') 94 | validate.compare_obs_exp(self, obs, exp, cols) 95 | 96 | obs = lords.fetch_lords_memberships(on_date='2017-06-08') 97 | exp = validate.read('fetch_lords_memberships_from_to') 98 | validate.compare_obs_exp(self, obs, exp, cols) 99 | 100 | 101 | class TestFetchLordsPartyMemberships(unittest.TestCase): 102 | 103 | """ 104 | Test fetch_lords_party_memberships processes results correctly. 105 | 106 | """ 107 | 108 | @patch('pdpy.lords.fetch_lords_party_memberships_raw', 109 | mock_fetch_lords_party_memberships_raw) 110 | 111 | def test_fetch_lords_party_memberships(self): 112 | 113 | cols = [ 114 | 'person_id', 115 | 'mnis_id', 116 | 'given_name', 117 | 'family_name', 118 | 'display_name', 119 | 'party_id', 120 | 'party_mnis_id', 121 | 'party_name', 122 | 'party_membership_start_date'] 123 | 124 | obs = lords.fetch_lords_party_memberships() 125 | exp = validate.read('fetch_lords_party_memberships') 126 | validate.compare_obs_exp(self, obs, exp, cols) 127 | 128 | obs = lords.fetch_lords_party_memberships( 129 | from_date='2017-06-08', to_date='2017-06-08') 130 | exp = validate.read('fetch_lords_party_memberships_from_to') 131 | validate.compare_obs_exp(self, obs, exp, cols) 132 | 133 | obs = lords.fetch_lords_party_memberships(on_date='2017-06-08') 134 | exp = validate.read('fetch_lords_party_memberships_from_to') 135 | validate.compare_obs_exp(self, obs, exp, cols) 136 | 137 | obs = lords.fetch_lords_party_memberships(while_lord=False) 138 | exp = validate.read('fetch_lords_party_memberships_while_lord') 139 | validate.compare_obs_exp(self, obs, exp, cols) 140 | 141 | obs = lords.fetch_lords_party_memberships(collapse=True) 142 | exp = validate.read('fetch_lords_party_memberships_collapse') 143 | validate.compare_obs_exp(self, obs, exp, cols) 144 | 145 | 146 | class TestFetchLordsGovernmentRoles(unittest.TestCase): 147 | 148 | """Test fetch_lords_government_roles processes results correctly.""" 149 | 150 | @patch('pdpy.lords.fetch_lords_government_roles_raw', 151 | mock_fetch_lords_government_roles_raw) 152 | 153 | def test_fetch_lords_government_roles(self): 154 | 155 | cols = [ 156 | 'person_id', 157 | 'mnis_id', 158 | 'given_name', 159 | 'family_name', 160 | 'display_name', 161 | 'position_id', 162 | 'position_name', 163 | 'government_incumbency_id', 164 | 'government_incumbency_start_date'] 165 | 166 | obs = lords.fetch_lords_government_roles() 167 | exp = validate.read('fetch_lords_government_roles') 168 | validate.compare_obs_exp(self, obs, exp, cols) 169 | 170 | obs = lords.fetch_lords_government_roles( 171 | from_date='2017-06-08', to_date='2017-06-08') 172 | exp = validate.read('fetch_lords_government_roles_from_to') 173 | validate.compare_obs_exp(self, obs, exp, cols) 174 | 175 | obs = lords.fetch_lords_government_roles(on_date='2017-06-08') 176 | exp = validate.read('fetch_lords_government_roles_from_to') 177 | validate.compare_obs_exp(self, obs, exp, cols) 178 | 179 | obs = lords.fetch_lords_government_roles(while_lord=False) 180 | exp = validate.read('fetch_lords_government_roles_while_lord') 181 | validate.compare_obs_exp(self, obs, exp, cols) 182 | 183 | 184 | class TestFetchLordsOppositionRoles(unittest.TestCase): 185 | 186 | """Test fetch_lords_opposition_roles processes results correctly.""" 187 | 188 | @patch('pdpy.lords.fetch_lords_opposition_roles_raw', 189 | mock_fetch_lords_opposition_roles_raw) 190 | 191 | def test_fetch_lords_opposition_roles(self): 192 | 193 | cols = [ 194 | 'person_id', 195 | 'mnis_id', 196 | 'given_name', 197 | 'family_name', 198 | 'display_name', 199 | 'position_id', 200 | 'position_name', 201 | 'opposition_incumbency_id', 202 | 'opposition_incumbency_start_date'] 203 | 204 | obs = lords.fetch_lords_opposition_roles() 205 | exp = validate.read('fetch_lords_opposition_roles') 206 | validate.compare_obs_exp(self, obs, exp, cols) 207 | 208 | obs = lords.fetch_lords_opposition_roles( 209 | from_date='2017-06-08', to_date='2017-06-08') 210 | exp = validate.read('fetch_lords_opposition_roles_from_to') 211 | validate.compare_obs_exp(self, obs, exp, cols) 212 | 213 | obs = lords.fetch_lords_opposition_roles(on_date='2017-06-08') 214 | exp = validate.read('fetch_lords_opposition_roles_from_to') 215 | validate.compare_obs_exp(self, obs, exp, cols) 216 | 217 | obs = lords.fetch_lords_opposition_roles(while_lord=False) 218 | exp = validate.read('fetch_lords_opposition_roles_while_lord') 219 | validate.compare_obs_exp(self, obs, exp, cols) 220 | 221 | 222 | class TestFetchLordsCommitteeMemberships(unittest.TestCase): 223 | 224 | """Test fetch_lords_committee_memberships processes results correctly.""" 225 | 226 | @patch('pdpy.lords.fetch_lords_committee_memberships_raw', 227 | mock_fetch_lords_committee_memberships_raw) 228 | 229 | def test_fetch_lords_committee_memberships(self): 230 | 231 | cols = [ 232 | 'person_id', 233 | 'mnis_id', 234 | 'given_name', 235 | 'family_name', 236 | 'display_name', 237 | 'committee_id', 238 | 'committee_name', 239 | 'committee_membership_id', 240 | 'committee_membership_start_date'] 241 | 242 | obs = lords.fetch_lords_committee_memberships() 243 | exp = validate.read('fetch_lords_committee_memberships') 244 | validate.compare_obs_exp(self, obs, exp, cols) 245 | 246 | obs = lords.fetch_lords_committee_memberships( 247 | from_date='2017-06-08', to_date='2017-06-08') 248 | exp = validate.read('fetch_lords_committee_memberships_from_to') 249 | validate.compare_obs_exp(self, obs, exp, cols) 250 | 251 | obs = lords.fetch_lords_committee_memberships(on_date='2017-06-08') 252 | exp = validate.read('fetch_lords_committee_memberships_from_to') 253 | validate.compare_obs_exp(self, obs, exp, cols) 254 | 255 | obs = lords.fetch_lords_committee_memberships(while_lord=False) 256 | exp = validate.read('fetch_lords_committee_memberships_while_lord') 257 | validate.compare_obs_exp(self, obs, exp, cols) 258 | -------------------------------------------------------------------------------- /tests/test_mps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Test MPs functions.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import unittest 7 | from unittest.mock import patch 8 | 9 | import pdpy.mps as mps 10 | import tests.validate as validate 11 | 12 | # Mocks ----------------------------------------------------------------------- 13 | 14 | def mock_fetch_mps_raw(): 15 | return validate.read('mps_raw') 16 | 17 | def mock_fetch_commons_memberships_raw(): 18 | return validate.read('commons_memberships_raw') 19 | 20 | def mock_fetch_mps_party_memberships_raw(): 21 | return validate.read('mps_party_memberships_raw') 22 | 23 | def mock_fetch_mps_government_roles_raw(): 24 | return validate.read('mps_government_roles_raw') 25 | 26 | def mock_fetch_mps_opposition_roles_raw(): 27 | return validate.read('mps_opposition_roles_raw') 28 | 29 | def mock_fetch_mps_committee_memberships_raw(): 30 | return validate.read('mps_committee_memberships_raw') 31 | 32 | # Tests ----------------------------------------------------------------------- 33 | 34 | class TestFetchMps(unittest.TestCase): 35 | 36 | """Test fetch_mps processes results correctly.""" 37 | 38 | @patch('pdpy.mps.fetch_mps_raw', mock_fetch_mps_raw) 39 | @patch('pdpy.mps.fetch_commons_memberships_raw', 40 | mock_fetch_commons_memberships_raw) 41 | 42 | def test_fetch_mps(self): 43 | 44 | cols = [ 45 | 'person_id', 46 | 'mnis_id', 47 | 'given_name', 48 | 'family_name', 49 | 'display_name', 50 | 'full_title', 51 | 'gender'] 52 | 53 | obs = mps.fetch_mps() 54 | exp = validate.read('fetch_mps') 55 | validate.compare_obs_exp(self, obs, exp, cols) 56 | 57 | obs = mps.fetch_mps(from_date='2017-06-08', to_date='2017-06-08') 58 | exp = validate.read('fetch_mps_from_to') 59 | validate.compare_obs_exp(self, obs, exp, cols) 60 | 61 | obs = mps.fetch_mps(on_date='2017-06-08') 62 | exp = validate.read('fetch_mps_from_to') 63 | validate.compare_obs_exp(self, obs, exp, cols) 64 | 65 | 66 | class TestFetchCommonsMemberships(unittest.TestCase): 67 | 68 | """Test fetch_commons_memberships processes results correctly.""" 69 | 70 | @patch('pdpy.mps.fetch_commons_memberships_raw', 71 | mock_fetch_commons_memberships_raw) 72 | 73 | def test_fetch_commons_memberships(self): 74 | 75 | cols = [ 76 | 'person_id', 77 | 'mnis_id', 78 | 'given_name', 79 | 'family_name', 80 | 'display_name', 81 | 'constituency_id', 82 | 'constituency_name', 83 | 'seat_incumbency_id', 84 | 'seat_incumbency_start_date'] 85 | 86 | obs = mps.fetch_commons_memberships() 87 | exp = validate.read('fetch_commons_memberships') 88 | validate.compare_obs_exp(self, obs, exp, cols) 89 | 90 | obs = mps.fetch_commons_memberships( 91 | from_date='2017-06-08', to_date='2017-06-08') 92 | exp = validate.read('fetch_commons_memberships_from_to') 93 | validate.compare_obs_exp(self, obs, exp, cols) 94 | 95 | obs = mps.fetch_commons_memberships(on_date='2017-06-08') 96 | exp = validate.read('fetch_commons_memberships_from_to') 97 | validate.compare_obs_exp(self, obs, exp, cols) 98 | 99 | 100 | class TestFetchMpsPartyMemberships(unittest.TestCase): 101 | 102 | """ 103 | Test fetch_mps_party_memberships processes results correctly. 104 | 105 | """ 106 | 107 | @patch('pdpy.mps.fetch_mps_party_memberships_raw', 108 | mock_fetch_mps_party_memberships_raw) 109 | 110 | def test_fetch_mps_party_memberships(self): 111 | 112 | cols = [ 113 | 'person_id', 114 | 'mnis_id', 115 | 'given_name', 116 | 'family_name', 117 | 'display_name', 118 | 'party_id', 119 | 'party_mnis_id', 120 | 'party_name', 121 | 'party_membership_start_date'] 122 | 123 | obs = mps.fetch_mps_party_memberships() 124 | exp = validate.read('fetch_mps_party_memberships') 125 | validate.compare_obs_exp(self, obs, exp, cols) 126 | 127 | obs = mps.fetch_mps_party_memberships( 128 | from_date='2017-06-08', to_date='2017-06-08') 129 | exp = validate.read('fetch_mps_party_memberships_from_to') 130 | validate.compare_obs_exp(self, obs, exp, cols) 131 | 132 | obs = mps.fetch_mps_party_memberships(on_date='2017-06-08') 133 | exp = validate.read('fetch_mps_party_memberships_from_to') 134 | validate.compare_obs_exp(self, obs, exp, cols) 135 | 136 | obs = mps.fetch_mps_party_memberships(while_mp=False) 137 | exp = validate.read('fetch_mps_party_memberships_while_mp') 138 | validate.compare_obs_exp(self, obs, exp, cols) 139 | 140 | obs = mps.fetch_mps_party_memberships(collapse=True) 141 | exp = validate.read('fetch_mps_party_memberships_collapse') 142 | validate.compare_obs_exp(self, obs, exp, cols) 143 | 144 | 145 | class TestFetchMpsGovernmentRoles(unittest.TestCase): 146 | 147 | """Test fetch_mps_government_roles processes results correctly.""" 148 | 149 | @patch('pdpy.mps.fetch_mps_government_roles_raw', 150 | mock_fetch_mps_government_roles_raw) 151 | 152 | def test_fetch_mps_government_roles(self): 153 | 154 | cols = [ 155 | 'person_id', 156 | 'mnis_id', 157 | 'given_name', 158 | 'family_name', 159 | 'display_name', 160 | 'position_id', 161 | 'position_name', 162 | 'government_incumbency_id', 163 | 'government_incumbency_start_date'] 164 | 165 | obs = mps.fetch_mps_government_roles() 166 | exp = validate.read('fetch_mps_government_roles') 167 | validate.compare_obs_exp(self, obs, exp, cols) 168 | 169 | obs = mps.fetch_mps_government_roles( 170 | from_date='2017-06-08', to_date='2017-06-08') 171 | exp = validate.read('fetch_mps_government_roles_from_to') 172 | validate.compare_obs_exp(self, obs, exp, cols) 173 | 174 | obs = mps.fetch_mps_government_roles(on_date='2017-06-08') 175 | exp = validate.read('fetch_mps_government_roles_from_to') 176 | validate.compare_obs_exp(self, obs, exp, cols) 177 | 178 | obs = mps.fetch_mps_government_roles(while_mp=False) 179 | exp = validate.read('fetch_mps_government_roles_while_mp') 180 | validate.compare_obs_exp(self, obs, exp, cols) 181 | 182 | 183 | class TestFetchMpsOppositionRoles(unittest.TestCase): 184 | 185 | """Test fetch_mps_opposition_roles processes results correctly.""" 186 | 187 | @patch('pdpy.mps.fetch_mps_opposition_roles_raw', 188 | mock_fetch_mps_opposition_roles_raw) 189 | 190 | def test_fetch_mps_opposition_roles(self): 191 | 192 | cols = [ 193 | 'person_id', 194 | 'mnis_id', 195 | 'given_name', 196 | 'family_name', 197 | 'display_name', 198 | 'position_id', 199 | 'position_name', 200 | 'opposition_incumbency_id', 201 | 'opposition_incumbency_start_date'] 202 | 203 | obs = mps.fetch_mps_opposition_roles() 204 | exp = validate.read('fetch_mps_opposition_roles') 205 | validate.compare_obs_exp(self, obs, exp, cols) 206 | 207 | obs = mps.fetch_mps_opposition_roles( 208 | from_date='2017-06-08', to_date='2017-06-08') 209 | exp = validate.read('fetch_mps_opposition_roles_from_to') 210 | validate.compare_obs_exp(self, obs, exp, cols) 211 | 212 | obs = mps.fetch_mps_opposition_roles(on_date='2017-06-08') 213 | exp = validate.read('fetch_mps_opposition_roles_from_to') 214 | validate.compare_obs_exp(self, obs, exp, cols) 215 | 216 | obs = mps.fetch_mps_opposition_roles(while_mp=False) 217 | exp = validate.read('fetch_mps_opposition_roles_while_mp') 218 | validate.compare_obs_exp(self, obs, exp, cols) 219 | 220 | 221 | class TestFetchMpsCommitteeMemberships(unittest.TestCase): 222 | 223 | """Test fetch_mps_committee_memberships processes results correctly.""" 224 | 225 | @patch('pdpy.mps.fetch_mps_committee_memberships_raw', 226 | mock_fetch_mps_committee_memberships_raw) 227 | 228 | def test_fetch_mps_committee_memberships(self): 229 | 230 | cols = [ 231 | 'person_id', 232 | 'mnis_id', 233 | 'given_name', 234 | 'family_name', 235 | 'display_name', 236 | 'committee_id', 237 | 'committee_name', 238 | 'committee_membership_id', 239 | 'committee_membership_start_date'] 240 | 241 | obs = mps.fetch_mps_committee_memberships() 242 | exp = validate.read('fetch_mps_committee_memberships') 243 | validate.compare_obs_exp(self, obs, exp, cols) 244 | 245 | obs = mps.fetch_mps_committee_memberships( 246 | from_date='2017-06-08', to_date='2017-06-08') 247 | exp = validate.read('fetch_mps_committee_memberships_from_to') 248 | validate.compare_obs_exp(self, obs, exp, cols) 249 | 250 | obs = mps.fetch_mps_committee_memberships(on_date='2017-06-08') 251 | exp = validate.read('fetch_mps_committee_memberships_from_to') 252 | validate.compare_obs_exp(self, obs, exp, cols) 253 | 254 | obs = mps.fetch_mps_committee_memberships(while_mp=False) 255 | exp = validate.read('fetch_mps_committee_memberships_while_mp') 256 | validate.compare_obs_exp(self, obs, exp, cols) 257 | -------------------------------------------------------------------------------- /tests/test_settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Test settings functions.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import unittest 7 | 8 | import pdpy.constants as constants 9 | import pdpy.settings as settings 10 | 11 | # Test get_api_url ------------------------------------------------------------ 12 | 13 | class GetApiUrl(unittest.TestCase): 14 | 15 | """ 16 | Test that get_api_url returns default url when a url has not been set. 17 | 18 | """ 19 | 20 | def test_that_get_api_url_returns_default_url(self): 21 | 22 | self.assertEqual( 23 | settings.get_api_url(), 24 | constants.SETTINGS_API_URL_DEFAULT) 25 | 26 | # Test set_api_url ------------------------------------------------------------ 27 | 28 | class SetApiUrl(unittest.TestCase): 29 | 30 | """ 31 | Test that set_api_url sets the api url returned by get_api_url. 32 | 33 | """ 34 | 35 | def test_that_set_api_url_sets_api_url(self): 36 | 37 | api_url = 'http://localhost:8000/sparql' 38 | settings.set_api_url(api_url) 39 | self.assertEqual(settings.get_api_url(), api_url) 40 | settings.set_api_url(constants.SETTINGS_API_URL_DEFAULT) 41 | self.assertEqual( 42 | settings.get_api_url(), 43 | constants.SETTINGS_API_URL_DEFAULT) 44 | 45 | # Test reset_api_url ---------------------------------------------------------- 46 | 47 | class ResetApiUrl(unittest.TestCase): 48 | 49 | """ 50 | Test that reset_api_url resets the api url returned by get_api_url. 51 | 52 | """ 53 | 54 | def test_that_reset_api_url_resets_api_url(self): 55 | 56 | api_url = 'http://localhost:8000/sparql' 57 | settings.set_api_url(api_url) 58 | self.assertEqual(settings.get_api_url(), api_url) 59 | settings.reset_api_url() 60 | self.assertEqual( 61 | settings.get_api_url(), 62 | constants.SETTINGS_API_URL_DEFAULT) 63 | -------------------------------------------------------------------------------- /tests/validate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Manage test data for validation.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import os 7 | import pandas as pd 8 | 9 | # Constants ------------------------------------------------------------------- 10 | 11 | TEST_DATA_DIR = os.path.join('tests', 'data') 12 | 13 | # Read and write data --------------------------------------------------------- 14 | 15 | def read(filename): 16 | """Read a file from the data directory.""" 17 | return pd.read_pickle( 18 | os.path.join(TEST_DATA_DIR, '{0}.pkl'.format(filename))) 19 | 20 | def write(df, filename): 21 | """Write a dataframe to the data directory.""" 22 | df.to_pickle(os.path.join(TEST_DATA_DIR, '{0}.pkl'.format(filename))) 23 | 24 | # Comparison function --------------------------------------------------------- 25 | 26 | def compare_obs_exp(self, obs, exp, cols): 27 | 28 | """Compare two dataframes on structure and contents of selected columns.""" 29 | 30 | self.assertEqual(obs.shape[0], exp.shape[0]) 31 | self.assertEqual(obs.shape[1], exp.shape[1]) 32 | self.assertTrue((obs.columns == exp.columns).all()) 33 | 34 | for col in cols: 35 | self.assertTrue((obs[col] == exp[col]).all()) 36 | -------------------------------------------------------------------------------- /tests/validate_lords.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Download data for unit testing Lords.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import time 7 | 8 | import pdpy.constants as constants 9 | import pdpy.lords as lords 10 | import tests.validate as validate 11 | 12 | # Mocks data ------------------------------------------------------------------ 13 | 14 | def fetch_lords_mocks_data(): 15 | 16 | """Fetch mocks data for unit tests of Lords.""" 17 | 18 | # Download Lords 19 | l = lords.fetch_lords_raw() 20 | validate.write(l, 'lords_raw') 21 | time.sleep(constants.API_PAUSE_TIME) 22 | 23 | # Download Lords memberships 24 | l_cm = lords.fetch_lords_memberships_raw() 25 | validate.write(l_cm, 'lords_memberships_raw') 26 | time.sleep(constants.API_PAUSE_TIME) 27 | 28 | # Download Lords party memberships 29 | l_pm = lords.fetch_lords_party_memberships_raw() 30 | validate.write(l_pm, 'lords_party_memberships_raw') 31 | time.sleep(constants.API_PAUSE_TIME) 32 | 33 | # Download Lords government roles 34 | l_gor = lords.fetch_lords_government_roles_raw() 35 | validate.write(l_gor, 'lords_government_roles_raw') 36 | time.sleep(constants.API_PAUSE_TIME) 37 | 38 | # Download Lords opposition roles 39 | l_opr = lords.fetch_lords_opposition_roles_raw() 40 | validate.write(l_opr, 'lords_opposition_roles_raw') 41 | time.sleep(constants.API_PAUSE_TIME) 42 | 43 | # Download Lords committee memberships 44 | l_ctm = lords.fetch_lords_committee_memberships_raw() 45 | validate.write(l_ctm, 'lords_committee_memberships_raw') 46 | time.sleep(constants.API_PAUSE_TIME) 47 | 48 | # Validation data ------------------------------------------------------------- 49 | 50 | def fetch_lords_validation_data(): 51 | 52 | """Fetch validation data for unit tests of Lords.""" 53 | 54 | # Fetch Lords 55 | l = lords.fetch_lords() 56 | validate.write(l, 'fetch_lords') 57 | time.sleep(constants.API_PAUSE_TIME) 58 | 59 | # Fetch Lords with from and to dates 60 | l = lords.fetch_lords(from_date='2017-06-08', to_date='2017-06-08') 61 | validate.write(l, 'fetch_lords_from_to') 62 | time.sleep(constants.API_PAUSE_TIME) 63 | 64 | # Fetch Lords memberships 65 | lm = lords.fetch_lords_memberships() 66 | validate.write(lm, 'fetch_lords_memberships') 67 | time.sleep(constants.API_PAUSE_TIME) 68 | 69 | # Fetch Lords memberships with from and to dates 70 | lm = lords.fetch_lords_memberships( 71 | from_date='2017-06-08', to_date='2017-06-08') 72 | validate.write(lm, 'fetch_lords_memberships_from_to') 73 | time.sleep(constants.API_PAUSE_TIME) 74 | 75 | # Fetch Lords party memberships 76 | pm = lords.fetch_lords_party_memberships() 77 | validate.write(pm, 'fetch_lords_party_memberships') 78 | time.sleep(constants.API_PAUSE_TIME) 79 | 80 | # Fetch Lords party memberships with from and to dates 81 | pm = lords.fetch_lords_party_memberships( 82 | from_date='2017-06-08', to_date='2017-06-08') 83 | validate.write(pm, 'fetch_lords_party_memberships_from_to') 84 | time.sleep(constants.API_PAUSE_TIME) 85 | 86 | # Fetch Lords party memberships with while_lord 87 | pm = lords.fetch_lords_party_memberships(while_lord=False) 88 | validate.write(pm, 'fetch_lords_party_memberships_while_lord') 89 | time.sleep(constants.API_PAUSE_TIME) 90 | 91 | # Fetch Lords party memberships with collapse 92 | pm = lords.fetch_lords_party_memberships(collapse=True) 93 | validate.write(pm, 'fetch_lords_party_memberships_collapse') 94 | time.sleep(constants.API_PAUSE_TIME) 95 | 96 | # Fetch Lords government roles 97 | gor = lords.fetch_lords_government_roles() 98 | validate.write(gor, 'fetch_lords_government_roles') 99 | time.sleep(constants.API_PAUSE_TIME) 100 | 101 | # Fetch Lords government roles with from and to dates 102 | gor = lords.fetch_lords_government_roles( 103 | from_date='2017-06-08', to_date='2017-06-08') 104 | validate.write(gor, 'fetch_lords_government_roles_from_to') 105 | time.sleep(constants.API_PAUSE_TIME) 106 | 107 | # Fetch Lords government roles with while_lord 108 | gor = lords.fetch_lords_government_roles(while_lord=False) 109 | validate.write(gor, 'fetch_lords_government_roles_while_lord') 110 | time.sleep(constants.API_PAUSE_TIME) 111 | 112 | # Fetch Lords opposition roles 113 | opr = lords.fetch_lords_opposition_roles() 114 | validate.write(opr, 'fetch_lords_opposition_roles') 115 | time.sleep(constants.API_PAUSE_TIME) 116 | 117 | # Fetch Lords opposition roles with from and to dates 118 | opr = lords.fetch_lords_opposition_roles( 119 | from_date='2017-06-08', to_date='2017-06-08') 120 | validate.write(opr, 'fetch_lords_opposition_roles_from_to') 121 | time.sleep(constants.API_PAUSE_TIME) 122 | 123 | # Fetch Lords opposition roles with while_lord 124 | opr = lords.fetch_lords_opposition_roles(while_lord=False) 125 | validate.write(opr, 'fetch_lords_opposition_roles_while_lord') 126 | time.sleep(constants.API_PAUSE_TIME) 127 | 128 | # Fetch Lords committee memberships 129 | cmt = lords.fetch_lords_committee_memberships() 130 | validate.write(cmt, 'fetch_lords_committee_memberships') 131 | time.sleep(constants.API_PAUSE_TIME) 132 | 133 | # Fetch Lords committee memberships with from and to dates 134 | cmt = lords.fetch_lords_committee_memberships( 135 | from_date='2017-06-08', to_date='2017-06-08') 136 | validate.write(cmt, 'fetch_lords_committee_memberships_from_to') 137 | time.sleep(constants.API_PAUSE_TIME) 138 | 139 | # Fetch Lords committee memberships with while_lord 140 | cmt = lords.fetch_lords_committee_memberships(while_lord=False) 141 | validate.write(cmt, 'fetch_lords_committee_memberships_while_lord') 142 | time.sleep(constants.API_PAUSE_TIME) 143 | 144 | # Fetch all data -------------------------------------------------------------- 145 | 146 | def fetch_lords_test_data(): 147 | 148 | """Fetch mocks and validation data for unit tests of Lords.""" 149 | fetch_lords_mocks_data() 150 | fetch_lords_validation_data() 151 | -------------------------------------------------------------------------------- /tests/validate_mps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Download data for unit testing MPs.""" 3 | 4 | # Imports --------------------------------------------------------------------- 5 | 6 | import time 7 | 8 | import pdpy.constants as constants 9 | import pdpy.mps as mps 10 | import tests.validate as validate 11 | 12 | # Mocks data ------------------------------------------------------------------ 13 | 14 | def fetch_mps_mocks_data(): 15 | 16 | """Fetch mocks data for unit tests of MPs.""" 17 | 18 | # Download MPs 19 | m = mps.fetch_mps_raw() 20 | validate.write(m, 'mps_raw') 21 | time.sleep(constants.API_PAUSE_TIME) 22 | 23 | # Download Commons memberships 24 | cm = mps.fetch_commons_memberships_raw() 25 | validate.write(cm, 'commons_memberships_raw') 26 | time.sleep(constants.API_PAUSE_TIME) 27 | 28 | # Download MP party memberships 29 | m_pm = mps.fetch_mps_party_memberships_raw() 30 | validate.write(m_pm, 'mps_party_memberships_raw') 31 | time.sleep(constants.API_PAUSE_TIME) 32 | 33 | # Download MP government roles 34 | m_gor = mps.fetch_mps_government_roles_raw() 35 | validate.write(m_gor, 'mps_government_roles_raw') 36 | time.sleep(constants.API_PAUSE_TIME) 37 | 38 | # Download MP opposition roles 39 | m_opr = mps.fetch_mps_opposition_roles_raw() 40 | validate.write(m_opr, 'mps_opposition_roles_raw') 41 | time.sleep(constants.API_PAUSE_TIME) 42 | 43 | # Download MP committee memberships 44 | m_cmt = mps.fetch_mps_committee_memberships_raw() 45 | validate.write(m_cmt, 'mps_committee_memberships_raw') 46 | time.sleep(constants.API_PAUSE_TIME) 47 | 48 | # Validation data ------------------------------------------------------------- 49 | 50 | def fetch_mps_validation_data(): 51 | 52 | """Fetch validation data for unit tests of MPs.""" 53 | 54 | # Fetch MPs 55 | m = mps.fetch_mps() 56 | validate.write(m, 'fetch_mps') 57 | time.sleep(constants.API_PAUSE_TIME) 58 | 59 | # Fetch MPs with from and to dates 60 | m = mps.fetch_mps(from_date='2017-06-08', to_date='2017-06-08') 61 | validate.write(m, 'fetch_mps_from_to') 62 | time.sleep(constants.API_PAUSE_TIME) 63 | 64 | # Fetch Commons memberships 65 | cm = mps.fetch_commons_memberships() 66 | validate.write(cm, 'fetch_commons_memberships') 67 | time.sleep(constants.API_PAUSE_TIME) 68 | 69 | # Fetch Commons memberships with from and to dates 70 | cm = mps.fetch_commons_memberships( 71 | from_date='2017-06-08', to_date='2017-06-08') 72 | validate.write(cm, 'fetch_commons_memberships_from_to') 73 | time.sleep(constants.API_PAUSE_TIME) 74 | 75 | # Fetch MPs party memberships 76 | pm = mps.fetch_mps_party_memberships() 77 | validate.write(pm, 'fetch_mps_party_memberships') 78 | time.sleep(constants.API_PAUSE_TIME) 79 | 80 | # Fetch MPs party memberships with from and to dates 81 | pm = mps.fetch_mps_party_memberships( 82 | from_date='2017-06-08', to_date='2017-06-08') 83 | validate.write(pm, 'fetch_mps_party_memberships_from_to') 84 | time.sleep(constants.API_PAUSE_TIME) 85 | 86 | # Fetch MPs party memberships with while_mp 87 | pm = mps.fetch_mps_party_memberships(while_mp=False) 88 | validate.write(pm, 'fetch_mps_party_memberships_while_mp') 89 | time.sleep(constants.API_PAUSE_TIME) 90 | 91 | # Fetch MPs party memberships with collapse 92 | pm = mps.fetch_mps_party_memberships(collapse=True) 93 | validate.write(pm, 'fetch_mps_party_memberships_collapse') 94 | time.sleep(constants.API_PAUSE_TIME) 95 | 96 | # Fetch MPs government roles 97 | gor = mps.fetch_mps_government_roles() 98 | validate.write(gor, 'fetch_mps_government_roles') 99 | time.sleep(constants.API_PAUSE_TIME) 100 | 101 | # Fetch MPs government roles with from and to dates 102 | gor = mps.fetch_mps_government_roles( 103 | from_date='2017-06-08', to_date='2017-06-08') 104 | validate.write(gor, 'fetch_mps_government_roles_from_to') 105 | time.sleep(constants.API_PAUSE_TIME) 106 | 107 | # Fetch MPs government roles with while_mp 108 | gor = mps.fetch_mps_government_roles(while_mp=False) 109 | validate.write(gor, 'fetch_mps_government_roles_while_mp') 110 | time.sleep(constants.API_PAUSE_TIME) 111 | 112 | # Fetch MPs opposition roles 113 | opr = mps.fetch_mps_opposition_roles() 114 | validate.write(opr, 'fetch_mps_opposition_roles') 115 | time.sleep(constants.API_PAUSE_TIME) 116 | 117 | # Fetch MPs opposition roles with from and to dates 118 | opr = mps.fetch_mps_opposition_roles( 119 | from_date='2017-06-08', to_date='2017-06-08') 120 | validate.write(opr, 'fetch_mps_opposition_roles_from_to') 121 | time.sleep(constants.API_PAUSE_TIME) 122 | 123 | # Fetch MPs opposition roles with while_mp 124 | opr = mps.fetch_mps_opposition_roles(while_mp=False) 125 | validate.write(opr, 'fetch_mps_opposition_roles_while_mp') 126 | time.sleep(constants.API_PAUSE_TIME) 127 | 128 | # Fetch MPs committee memberships 129 | cmt = mps.fetch_mps_committee_memberships() 130 | validate.write(cmt, 'fetch_mps_committee_memberships') 131 | time.sleep(constants.API_PAUSE_TIME) 132 | 133 | # Fetch MPs committee memberships with from and to dates 134 | cmt = mps.fetch_mps_committee_memberships( 135 | from_date='2017-06-08', to_date='2017-06-08') 136 | validate.write(cmt, 'fetch_mps_committee_memberships_from_to') 137 | time.sleep(constants.API_PAUSE_TIME) 138 | 139 | # Fetch MPs committee memberships with while_mp 140 | cmt = mps.fetch_mps_committee_memberships(while_mp=False) 141 | validate.write(cmt, 'fetch_mps_committee_memberships_while_mp') 142 | time.sleep(constants.API_PAUSE_TIME) 143 | 144 | # Fetch all data -------------------------------------------------------------- 145 | 146 | def fetch_mps_test_data(): 147 | 148 | """Fetch mocks and validation data for unit tests of MPs.""" 149 | fetch_mps_mocks_data() 150 | fetch_mps_validation_data() 151 | --------------------------------------------------------------------------------