├── .gitignore
├── LICENSE
├── pdpy
├── __init__.py
├── combine.py
├── constants.py
├── core.py
├── elections.py
├── errors.py
├── filter.py
├── lords.py
├── members.py
├── mps.py
├── settings.py
└── utils.py
├── readme.md
├── setup.cfg
├── setup.py
└── tests
├── __init__.py
├── data
├── commons_memberships_raw.pkl
├── fetch_commons_memberships.pkl
├── fetch_commons_memberships_from_to.pkl
├── fetch_lords.pkl
├── fetch_lords_committee_memberships.pkl
├── fetch_lords_committee_memberships_from_to.pkl
├── fetch_lords_committee_memberships_while_lord.pkl
├── fetch_lords_from_to.pkl
├── fetch_lords_government_roles.pkl
├── fetch_lords_government_roles_from_to.pkl
├── fetch_lords_government_roles_while_lord.pkl
├── fetch_lords_memberships.pkl
├── fetch_lords_memberships_from_to.pkl
├── fetch_lords_opposition_roles.pkl
├── fetch_lords_opposition_roles_from_to.pkl
├── fetch_lords_opposition_roles_while_lord.pkl
├── fetch_lords_party_memberships.pkl
├── fetch_lords_party_memberships_collapse.pkl
├── fetch_lords_party_memberships_from_to.pkl
├── fetch_lords_party_memberships_while_lord.pkl
├── fetch_mps.pkl
├── fetch_mps_committee_memberships.pkl
├── fetch_mps_committee_memberships_from_to.pkl
├── fetch_mps_committee_memberships_while_mp.pkl
├── fetch_mps_from_to.pkl
├── fetch_mps_government_roles.pkl
├── fetch_mps_government_roles_from_to.pkl
├── fetch_mps_government_roles_while_mp.pkl
├── fetch_mps_opposition_roles.pkl
├── fetch_mps_opposition_roles_from_to.pkl
├── fetch_mps_opposition_roles_while_mp.pkl
├── fetch_mps_party_memberships.pkl
├── fetch_mps_party_memberships_collapse.pkl
├── fetch_mps_party_memberships_from_to.pkl
├── fetch_mps_party_memberships_while_mp.pkl
├── lords_committee_memberships_raw.pkl
├── lords_government_roles_raw.pkl
├── lords_memberships_raw.pkl
├── lords_opposition_roles_raw.pkl
├── lords_party_memberships_raw.pkl
├── lords_raw.pkl
├── mps_committee_memberships_raw.pkl
├── mps_government_roles_raw.pkl
├── mps_opposition_roles_raw.pkl
├── mps_party_memberships_raw.pkl
└── mps_raw.pkl
├── test_combine.py
├── test_core.py
├── test_elections.py
├── test_filter.py
├── test_lords.py
├── test_mps.py
├── test_settings.py
├── validate.py
├── validate_lords.py
└── validate_mps.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Directories
2 | __pycache__
3 | dist
4 | docs
5 | pdpy.egg-info
6 |
7 | # Files
8 | .DS_Store
9 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2018 Oliver Hawkins
2 |
3 | Redistribution and use in source and binary forms, with or without
4 | modification, are permitted provided that the following conditions are met:
5 |
6 | 1. Redistributions of source code must retain the above copyright notice, this
7 | list of conditions and the following disclaimer.
8 |
9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 | this list of conditions and the following disclaimer in the documentation
11 | and/or other materials provided with the distribution.
12 |
13 | 3. Neither the name of the copyright holder nor the names of its contributors
14 | may be used to endorse or promote products derived from this software without
15 | specific prior written permission.
16 |
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 |
--------------------------------------------------------------------------------
/pdpy/__init__.py:
--------------------------------------------------------------------------------
1 | """pdpr: A package for downloading data from the Parliamentary Data Platform.
2 |
3 | The pdpr package provides a suite of functions for downloading data from
4 | the data platform for the UK Parliament.
5 | """
6 |
7 | from . import core
8 | from .core import sparql_select
9 |
10 | from . import elections
11 | from .elections import get_general_elections
12 | from .elections import get_general_elections_dict
13 |
14 | from . import lords
15 | from .lords import fetch_lords
16 | from .lords import fetch_lords_memberships
17 | from .lords import fetch_lords_party_memberships
18 | from .lords import fetch_lords_government_roles
19 | from .lords import fetch_lords_opposition_roles
20 | from .lords import fetch_lords_committee_memberships
21 |
22 | from . import mps
23 | from .mps import fetch_mps
24 | from .mps import fetch_commons_memberships
25 | from .mps import fetch_mps_party_memberships
26 | from .mps import fetch_mps_government_roles
27 | from .mps import fetch_mps_opposition_roles
28 | from .mps import fetch_mps_committee_memberships
29 |
30 | from . import settings
31 | from .settings import get_api_url
32 | from .settings import set_api_url
33 | from .settings import reset_api_url
34 |
35 | from . import utils
36 | from .utils import readable
37 |
--------------------------------------------------------------------------------
/pdpy/combine.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Functions for combining related records in a dataframe."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import pandas as pd
7 |
8 | from . import utils
9 |
10 | # Functions -------------------------------------------------------------------
11 |
12 | def combine_party_memberships(pm):
13 |
14 | """Combine consecutive records in a dataframe of party memberships.
15 |
16 | combine_party_memberships takes a datatframe of party memberships and
17 | combines historically consecutive memberships of the same party into a
18 | single continuous memberships with the start date of thre first membership
19 | and the end date of the last. Combining the memberships in this way means
20 | that party membership ids from the data platform are not included in the
21 | dataframe returned.
22 |
23 | Parameters
24 | ----------
25 | pm : DataFrame
26 | A pandas dataframe containing party memberships as returned by one of
27 | the fetch party membership functions.
28 |
29 | Returns
30 | -------
31 | out : DataFrame
32 | A pandas dataframe of party memberships, with one row per party
33 | membership. The memberships are processed and combined so that there is
34 | only one party membership for a period of continuous membership within
35 | the same party.
36 |
37 | """
38 |
39 | # Create a copy of pm
40 | pm = pm.copy()
41 |
42 | # Check the party memberships dataframe has the expected structure
43 | required_columns = [
44 | 'person_id',
45 | 'mnis_id',
46 | 'given_name',
47 | 'family_name',
48 | 'display_name',
49 | 'party_id',
50 | 'party_mnis_id',
51 | 'party_name',
52 | 'party_membership_id',
53 | 'party_membership_start_date',
54 | 'party_membership_end_date']
55 |
56 | if len(pm.columns) != len(required_columns) or \
57 | not (pm.columns == required_columns).all():
58 | raise ValueError('pm does not have the expected columns')
59 |
60 | # Function to identify consecutive memberships of the same party
61 | def get_map_party_changes():
62 |
63 | previous_per_par_id = ""
64 | group_id = 0
65 |
66 | def map_party_changes(per_par_id):
67 | nonlocal previous_per_par_id
68 | nonlocal group_id
69 | if per_par_id != previous_per_par_id:
70 | previous_per_par_id = per_par_id
71 | group_id = group_id + 1
72 | return "{0}-{1}".format(per_par_id, group_id)
73 |
74 | return map_party_changes
75 |
76 | # Sort by person id and membership start date
77 | pm.sort_values(
78 | by=['person_id',
79 | 'party_membership_start_date'],
80 | inplace=True)
81 |
82 | # Create unique combination of person_id and party_id
83 | pm['per_par_id'] = pm.apply(
84 | lambda x: '{0}-{1}'.format(x['person_id'], x['party_id']), axis=1)
85 |
86 | # Build an id for consecutive memberships of the same party
87 | pm['per_par_mem_id'] = pm['per_par_id'].map(get_map_party_changes())
88 |
89 | # Group by person, party and consecutive membership, then take the
90 | # earliest start date and latest end date
91 | aggregation = {
92 | 'party_membership_start_date': utils.min_date_nan,
93 | 'party_membership_end_date': utils.max_date_nan
94 | }
95 |
96 | pmg = pm.groupby([
97 | 'person_id',
98 | 'party_id',
99 | 'per_par_mem_id'])
100 |
101 | pms = pmg.agg(aggregation)
102 | pms.reset_index(inplace=True)
103 |
104 | pm = pms.merge(
105 | pm[[
106 | 'person_id',
107 | 'party_id',
108 | 'mnis_id',
109 | 'given_name',
110 | 'family_name',
111 | 'display_name',
112 | 'party_mnis_id',
113 | 'party_name']],
114 | how='left',
115 | on=['person_id', 'party_id'])
116 |
117 | pm.drop_duplicates(inplace=True)
118 |
119 | pm = pm[[
120 | 'person_id',
121 | 'mnis_id',
122 | 'given_name',
123 | 'family_name',
124 | 'display_name',
125 | 'party_id',
126 | 'party_mnis_id',
127 | 'party_name',
128 | 'party_membership_start_date',
129 | 'party_membership_end_date']]
130 |
131 | pm.sort_values(
132 | by=['family_name',
133 | 'party_membership_start_date'],
134 | inplace=True)
135 | pm.reset_index(drop=True, inplace=True)
136 |
137 | return pm
138 |
--------------------------------------------------------------------------------
/pdpy/constants.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Package constants."""
3 |
4 | # Package settings ------------------------------------------------------------
5 |
6 | SETTINGS_API_URL = 'api_url'
7 | SETTINGS_API_URL_DEFAULT = 'https://api.parliament.uk/sparql'
8 |
9 | # API settings ----------------------------------------------------------------
10 |
11 | API_PAUSE_TIME = 0.5
12 |
13 | # XML ids ---------------------------------------------------------------------
14 |
15 | XML_DATE = 'http://www.w3.org/2001/XMLSchema#date'
16 |
17 | # Parliamentary Data Platform ids ---------------------------------------------
18 |
19 | PDP_ID_HOUSE_OF_COMMONS = '1AFu55Hs'
20 | PDP_ID_HOUSE_OF_LORDS = 'WkUWUBMx'
21 |
--------------------------------------------------------------------------------
/pdpy/core.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Core download functions."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import datetime
7 | import json
8 | import numpy as np
9 | import pandas as pd
10 | import requests
11 |
12 | from . import constants
13 | from . import errors
14 | from . import settings
15 |
16 | # Functions ------------------------------------------------------------------
17 |
18 | def request(query):
19 |
20 | """Send an http request with a query and return the response.
21 |
22 | request sends a SPARQL query to the api endpoint and returns the response
23 | object. It is a simple wrapper around request.post. It sets the appropriate
24 | headers and sends the query as the request body. It does not validate the
25 | query or handle the response in any way. The response format is JSON.
26 |
27 | Parameters
28 | ----------
29 | query : str
30 | A SPARQL query as a string.
31 |
32 | Returns
33 | -------
34 | out : Response
35 | The http response object from requests.
36 |
37 | """
38 |
39 | url = settings.get_api_url()
40 | headers = {}
41 | headers['content-type'] = 'application/sparql-query'
42 | headers['accept'] = 'application/sparql-results+json'
43 | response = requests.post(url, headers=headers, data=query)
44 | return response
45 |
46 |
47 | def sparql_select(query):
48 |
49 | """Send a select query and return the response as a DataFrame.
50 |
51 | sparql_select sends a SPARQL query to the api endpoint and returns the
52 | response as a DataFrame. The SPARQL should be a SELECT query as the
53 | response is processed as tabular data. The function will convert datatypes
54 | that it recognises. It currently recognises date types. All other data
55 | returned in the DataFrame will be strings. If the query syntax is not valid
56 | or the request fails for any other reason a RequestError will be raised
57 | with the response text.
58 |
59 | Parameters
60 | ----------
61 | query : str
62 | A SPARQL SELECT query as a string.
63 |
64 | Returns
65 | -------
66 | out : DataFrame
67 | A pandas dataframe containing the results of the query.
68 |
69 | """
70 |
71 | # Send the query and get the response
72 | response = request(query)
73 |
74 | # If the server returned an error raise it with the response text
75 | if not response.ok:
76 | raise errors.RequestError(response.text)
77 |
78 | # Process the response as tabular data and return it as a DataFrame
79 | json = response.json()
80 | rows = []
81 | headers = json['head']['vars']
82 | records = json['results']['bindings']
83 |
84 | # For each record build a row and assign values based on the data type
85 | for record in records:
86 | row = []
87 | for header in headers:
88 | if header in record:
89 | if 'datatype' in record[header] and \
90 | record[header]['datatype'] == constants.XML_DATE:
91 |
92 | row.append(
93 | datetime.datetime.strptime(
94 | record[header]['value'], '%Y-%m-%d+%H:%M').date())
95 | else:
96 | row.append(record[header]['value'].strip())
97 | else:
98 | row.append(None)
99 | rows.append(row)
100 |
101 | return pd.DataFrame(data=rows, columns=headers).fillna(value=np.NaN)
102 |
--------------------------------------------------------------------------------
/pdpy/elections.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Elections data functions."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import datetime
7 | import io
8 | import pandas as pd
9 |
10 | from . import errors
11 | from . import utils
12 |
13 | # Functions ------------------------------------------------
14 |
15 | def get_general_elections():
16 |
17 | """Return the dates of UK general elections stme 1929 as a DataFrame.
18 |
19 | get_general_elections returns the dates of UK general elections stme 1929
20 | as a DataFrame with three columns:
21 |
22 | name -- The name of each general election as a string
23 | dissolution -- The date of dissolution as a datetime.date
24 | election -- The date of the election as a datetime.date
25 |
26 | Returns
27 | -------
28 | out : DataFrame
29 | A pandas dataframe with data on general elections.
30 |
31 | """
32 |
33 | election_csv = """
34 | name, dissolution, election
35 | 1929, 1929-05-10, 1929-05-30
36 | 1931, 1931-10-07, 1931-10-27
37 | 1935, 1935-10-25, 1935-11-14
38 | 1945, 1945-06-15, 1945-07-05
39 | 1950, 1950-02-03, 1950-02-23
40 | 1951, 1951-10-05, 1951-10-25
41 | 1955, 1955-05-06, 1955-05-26
42 | 1959, 1959-09-18, 1959-10-08
43 | 1964, 1964-09-25, 1964-10-15
44 | 1966, 1966-03-10, 1966-03-31
45 | 1970, 1970-05-29, 1970-06-18
46 | 1974 (Feb), 1974-02-08, 1974-02-28
47 | 1974 (Oct), 1974-09-20, 1974-10-10
48 | 1979, 1979-04-07, 1979-05-03
49 | 1983, 1983-05-13, 1983-06-09
50 | 1987, 1987-05-18, 1987-06-11
51 | 1992, 1992-03-16, 1992-04-09
52 | 1997, 1997-04-08, 1997-05-01
53 | 2001, 2001-05-14, 2001-06-07
54 | 2005, 2005-04-11, 2005-05-05
55 | 2010, 2010-04-12, 2010-05-06
56 | 2015, 2015-03-30, 2015-05-07
57 | 2017, 2017-05-03, 2017-06-08
58 | 2019, 2019-11-06, 2019-12-12
59 | """
60 |
61 | election_dates = pd.read_csv(
62 | io.BytesIO(bytes(election_csv, encoding='utf-8')),
63 | skipinitialspace = True)
64 |
65 | election_dates['dissolution'] = utils.convert_date_series(
66 | election_dates['dissolution'])
67 |
68 | election_dates['election'] = utils.convert_date_series(
69 | election_dates['election'])
70 |
71 | return election_dates
72 |
73 |
74 | def get_general_elections_dict():
75 |
76 | """Return the dates of UK general elections stme 1929 as a dict.
77 |
78 | get_general_elections_dict returns a dict containing the dissolution and election
79 | dates for each general election stme 1929 as datetime.dates. Each item
80 | in the list is keyed with the election name and contains a dict of two
81 | values: one named "dissolution" containing the dissolution date and the
82 | other named "election" containing the election date.
83 |
84 | Returns
85 | -------
86 | out : dict
87 | A dictionary containing the dissolution and election dates for each
88 | general election.
89 |
90 | """
91 |
92 | election_dates = get_general_elections()
93 | election_dict = {}
94 |
95 | def add_row(row):
96 | election_dict[row['name']] = {
97 | 'dissolution': row['dissolution'],
98 | 'election': row['election']}
99 |
100 | election_dates.apply(add_row, axis=1)
101 | return election_dict
102 |
--------------------------------------------------------------------------------
/pdpy/errors.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Package errors."""
3 |
4 | class Error(Exception):
5 |
6 | """Base class for exceptions in this module."""
7 | pass
8 |
9 |
10 | class RequestError(Error):
11 |
12 | """Exception raised for errors with http requests. Typically these are the
13 | result of malformed SPARQL queries.
14 |
15 | Parameters
16 | ----------
17 | response : str
18 | The text of the server reponse.
19 |
20 | """
21 |
22 | def __init__(self, response):
23 | message = ('The server responded with the following message: '
24 | '{0}'.format(response))
25 | super(RequestError, self).__init__(message)
26 | self.message = message
27 | self.response = response
28 |
29 |
30 | class DateFormatError(Error):
31 |
32 | """Exception raised for errors parsing date strings.
33 |
34 | Parameters
35 | ----------
36 | date_str : str
37 | The date string that could not be parsed.
38 |
39 | """
40 |
41 | def __init__(self, date_str):
42 | message = (
43 | 'Could not parse \'{0}\' as a date: '
44 | 'use format \'YYYY-MM-DD\''.format(date_str))
45 | super(DateFormatError, self).__init__(message)
46 | self.message = message
47 | self.date_str = date_str
48 |
49 |
50 | class MissingColumnError(Error):
51 |
52 | """Exception raised for errors handling dataframes with missing columms.
53 |
54 | Parameters
55 | ----------
56 | colname : str
57 | The name of the column that could not be found.
58 |
59 | """
60 |
61 | def __init__(self, colname):
62 | message = ('Could not find a column called \'{0}\''.format(colname))
63 | super(MissingColumnError, self).__init__(message)
64 | self.message = message
65 | self.colname = colname
66 |
--------------------------------------------------------------------------------
/pdpy/filter.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Filter functions."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import datetime
7 | import numpy as np
8 | import pandas as pd
9 |
10 | from . import errors
11 |
12 | # Filter dates ----------------------------------------------------------------
13 |
14 | def filter_dates(df,
15 | start_col,
16 | end_col,
17 | from_date=np.NaN,
18 | to_date=np.NaN):
19 |
20 | """Filter a dataframe of data based on the given from and to dates.
21 |
22 | filter_dates takes a dataframe which contains data on a time bound
23 | activity and returns the subset of rows where that activity took place
24 | within a given period. The dataframe must contain two columns of
25 | datetime.date objects, which record the start and end dates of an
26 | activity. The from and to dates provided are used to find all rows where
27 | some part of the period of activity took place within the period of
28 | filtering. The filtering process is tmlusive: as long as at least one day
29 | of activity falls withinthe filtering period, the row is returned.
30 |
31 | Parameters
32 | ----------
33 | df : DataFrame
34 | A pandas dataframe containing data on a time bound activity.
35 | start_col : str
36 | The name of the column that contains the start date for the activity.
37 | end_col : str
38 | The name of the column that contains the end date for the activity.
39 | from_date : str or date or NaN, optional
40 | A string or datetime.date representing a date. If a string is used it
41 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
42 | default value is numpy.NaN, which means no records are excluded on the
43 | basis of the from_date.
44 | to_date : str or date or NaN, optional
45 | A string or datetime.date representing a date. If a string is used it
46 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
47 | default value is np.NaN, which means no records are excluded on the
48 | basis of the to_date.
49 |
50 | Returns
51 | -------
52 | out : DataFrame
53 | A dataframe with the same structure as the input df containing the
54 | rows that meet the filtering criteria.
55 |
56 | """
57 |
58 | # Check the start and end columns exist
59 | if start_col not in df.columns:
60 | raise errors.MissingColumnError(start_col)
61 |
62 | if end_col not in df.columns:
63 | raise errors.MissingColumnError(end_col)
64 |
65 | # Check the dataframe has rows
66 | if df.shape[0] == 0:
67 | return df
68 |
69 | # Check there are dates to filter
70 | if pd.isna(from_date) and pd.isna(to_date):
71 | return df
72 |
73 | # Handle from and to dates
74 | from_date = handle_date(from_date)
75 | to_date = handle_date(to_date)
76 |
77 | # Check from date is before to date
78 | if not pd.isna(from_date) and not pd.isna(to_date) and from_date > to_date:
79 | raise ValueError('to_date is before from_date')
80 |
81 | # Set default values
82 | from_after_end = False
83 | to_before_start = False
84 |
85 | # Get matching rows
86 | if not pd.isna(from_date):
87 | from_after_end = df[end_col].map(
88 | lambda d: False if pd.isna(d) else from_date > d)
89 |
90 | if not pd.isna(to_date):
91 | to_before_start = df[start_col].map(
92 | lambda d: False if pd.isna(d) else to_date < d)
93 |
94 | return df[~(from_after_end | to_before_start)]
95 |
96 |
97 | def handle_date(d):
98 |
99 | """Take a date which may be a string or a date and returns a date.
100 |
101 | handle_date takes a date which may be a datetime.date or an ISO 8601 date
102 | string, checks it is valid, and returns the date as a datetime.date. NaN
103 | values are returned unmodified. This function raises a DateFromatError if
104 | it is unable to handle the date.
105 |
106 | """
107 |
108 | if pd.isna(d):
109 | return d
110 | elif type(d) == datetime.date:
111 | return d
112 | elif type(d) == str:
113 | try:
114 | return datetime.datetime.strptime(d, '%Y-%m-%d').date()
115 | except ValueError:
116 | raise errors.DateFormatError(d)
117 | else:
118 | raise TypeError(
119 | '{0} is not a valid datetime.date or date string'.format(d))
120 |
121 | # Filter memberships ----------------------------------------------------------
122 |
123 | def filter_memberships(tm,
124 | fm,
125 | tm_id_col,
126 | tm_start_col,
127 | tm_end_col,
128 | fm_start_col,
129 | fm_end_col,
130 | join_col):
131 |
132 | """Filter a dataframe of memberships to include only the rows whose period
133 | of membership intersects with those in another dataframe of memberships.
134 |
135 | filter_memberships is a function to find all memberships in one dataframe
136 | that intersect with those in another data frame for each person, or other
137 | entity. This function lets you find things like all committee memberships
138 | for Commons Members during the period they have served as an MP, or all
139 | government roles held by Members of the House Lords while they have served
140 | in the Lords.
141 |
142 | Parameters
143 | ----------
144 | tm : DataFrame
145 | A pandas dataframe containing the target memberships. These are the
146 | memberships to be filtered.
147 | fm : DataFrame
148 | A pandas dataframe containing the filter memberships. These are the
149 | memberships that are used to filter the target memberships.
150 | tm_id_col : str
151 | The name of the column in the target memberships that contains the
152 | target membership id.
153 | tm_start_col : str
154 | The name of the column in target memberships that contains the start
155 | date for the membership.
156 | tm_end_col : str
157 | The name of the column in target memberships that contains the end
158 | date for the membership.
159 | fm_start_col : str
160 | The name of the column in filter memberships that contains the start
161 | date for the membership.
162 | fm_end_col : str
163 | The name of the column in filter memberships that contains the end
164 | date for the membership.
165 | join_col : str
166 | The name of the column in both the target and filter memberships that
167 | contains the id of the entity that is common to both tables. Where the
168 | entity is a person this will be the person id.
169 |
170 | Returns
171 | -------
172 | out : DataFrame
173 | A dataframe with the same structure as the input tm containing the rows
174 | that meet the filtering criteria.
175 |
176 | """
177 |
178 | # Check the target memberships dataframe has rows
179 | if tm.shape[0] == 0:
180 | return tm
181 |
182 | # Check the columns exist in each dataframe
183 | if tm_id_col not in tm.columns:
184 | raise errors.MissingColumnError(tm_id_col)
185 |
186 | if tm_start_col not in tm.columns:
187 | raise errors.MissingColumnError(tm_start_col)
188 |
189 | if tm_end_col not in tm.columns:
190 | raise errors.MissingColumnError(tm_end_col)
191 |
192 | if fm_start_col not in fm.columns:
193 | raise errors.MissingColumnError(fm_start_col)
194 |
195 | if fm_end_col not in fm.columns:
196 | raise errors.MissingColumnError(fm_end_col)
197 |
198 | if join_col not in fm.columns:
199 | raise errors.MissingColumnError(join_col)
200 |
201 | # Create abstract copies of tm and fm
202 | tma = tm[[join_col, tm_id_col, tm_start_col, tm_end_col]]
203 | tma.columns = ['join_col', 'tm_id_col', 'tm_start_col', 'tm_end_col']
204 |
205 | fma = fm[[join_col, fm_start_col, fm_end_col]]
206 | fma.columns = ['join_col', 'fm_start_col', 'fm_end_col']
207 |
208 | # Join the target memberships with the filter membership dates on join_col
209 | tm_fm = tma.merge(
210 | fma,
211 | how='left',
212 | on='join_col')
213 |
214 | # Function to test if a target membership and filter membership intersect
215 | def in_fm_func(row):
216 |
217 | # Handle dates
218 | tm_start_date = row['tm_start_col']
219 | tm_end_date = row['tm_end_col']
220 | fm_start_date = row['fm_start_col']
221 | fm_end_date = row['fm_end_col']
222 | tm_start_after_fm_end = False
223 | tm_end_before_fm_start = False
224 |
225 | # Get the match status of the rows
226 | if not pd.isna(tm_start_date):
227 | tm_start_after_fm_end = False if pd.isna(fm_end_date) \
228 | else tm_start_date > fm_end_date
229 |
230 | if not pd.isna(tm_end_date):
231 | tm_end_before_fm_start = False if pd.isna(fm_start_date) \
232 | else tm_end_date < fm_start_date
233 |
234 | # Return if the memberships instersect
235 | return not (tm_start_after_fm_end or tm_end_before_fm_start)
236 |
237 | # Apply the function to each combination of target and filter membership
238 | tm_fm['in_membership'] = tm_fm.apply(in_fm_func, axis=1)
239 |
240 | # Group the target/filter combinations on the id column
241 | grouped = tm_fm.groupby('tm_id_col')
242 |
243 | # Check if each target membership intersected with any filter memberships
244 | match_status = grouped[['in_membership']].any()
245 |
246 | # Restore the actual target membership id column name for joining
247 | match_status.reset_index(inplace=True)
248 | match_status.columns = [tm_id_col, 'in_membership']
249 |
250 | # Join the match status with the original target memberships data
251 | tm_fm_status = tm.merge(
252 | match_status,
253 | how='left',
254 | on=tm_id_col)
255 |
256 | # Return the target memberships after filtering
257 | tmf = tm_fm_status[tm_fm_status['in_membership']]
258 | tmf.reset_index(drop=True, inplace=True)
259 | tmf = tmf.drop(columns=['in_membership'])
260 | return tmf
261 |
--------------------------------------------------------------------------------
/pdpy/lords.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Functions for downloading and analysing data on Lords."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import numpy as np
7 | import pandas as pd
8 |
9 | from . import combine
10 | from . import constants
11 | from . import core
12 | from . import filter
13 | from . import members
14 | from . import utils
15 |
16 | # Raw Lords queries -----------------------------------------------------------
17 |
18 | def fetch_lords_raw():
19 | """Fetch key details for all Lords."""
20 | return members.fetch_members_raw(
21 | house=constants.PDP_ID_HOUSE_OF_LORDS)
22 |
23 |
24 | def fetch_lords_memberships_raw():
25 |
26 | """Fetch Lords memberships for all Lords."""
27 |
28 | lords_memberships_query = """
29 | PREFIX :
30 | PREFIX d:
31 | SELECT DISTINCT
32 |
33 | ?person_id
34 | ?mnis_id
35 | ?given_name
36 | ?family_name
37 | ?display_name
38 | ?seat_type_id
39 | ?seat_type_name
40 | ?seat_incumbency_id
41 | ?seat_incumbency_start_date
42 | ?seat_incumbency_end_date
43 |
44 | WHERE {{
45 |
46 | # House constraint for the House of Lords
47 | BIND(d:{0} AS ?house)
48 |
49 | ?person_id :memberMnisId ?mnis_id;
50 | :personGivenName ?given_name ;
51 | :personFamilyName ?family_name ;
52 | ?display_name ;
53 | :memberHasParliamentaryIncumbency ?seat_incumbency_id .
54 | ?seat_incumbency_id a :SeatIncumbency ;
55 | :seatIncumbencyHasHouseSeat ?seat ;
56 | :parliamentaryIncumbencyStartDate ?seat_incumbency_start_date .
57 | OPTIONAL {{ ?seat_incumbency_id :parliamentaryIncumbencyEndDate ?seat_incumbency_end_date . }}
58 | ?seat :houseSeatHasHouse ?house ;
59 | :houseSeatHasHouseSeatType ?seat_type_id .
60 | ?seat_type_id :houseSeatTypeName ?seat_type_name .
61 | }}
62 | """.format(constants.PDP_ID_HOUSE_OF_LORDS)
63 |
64 | return core.sparql_select(lords_memberships_query)
65 |
66 |
67 | def fetch_lords_party_memberships_raw():
68 | """Fetch party memberships for all Lords."""
69 | return members.fetch_party_memberships_raw(
70 | house=constants.PDP_ID_HOUSE_OF_LORDS)
71 |
72 |
73 | def fetch_lords_government_roles_raw():
74 | """Fetch government roles for all Lords."""
75 | return members.fetch_government_roles_raw(
76 | house=constants.PDP_ID_HOUSE_OF_LORDS)
77 |
78 |
79 | def fetch_lords_opposition_roles_raw():
80 | """Fetch opposition roles for all Lords."""
81 | return members.fetch_opposition_roles_raw(
82 | house=constants.PDP_ID_HOUSE_OF_LORDS)
83 |
84 |
85 | def fetch_lords_committee_memberships_raw():
86 | """Fetch committee memberships for all Lords."""
87 | return members.fetch_committee_memberships_raw(
88 | house=constants.PDP_ID_HOUSE_OF_LORDS)
89 |
90 | # Main Lords API --------------------------------------------------------------
91 |
92 | def fetch_lords(from_date=np.NaN,
93 | to_date=np.NaN,
94 | on_date=np.NaN):
95 |
96 | """Fetch key details for all Lords.
97 |
98 | fetch_lords fetches data from the data platform showing key details about
99 | each Lord, with one row per Lord.
100 |
101 | The from_date and to_date arguments can be used to filter the Lords
102 | returned based on the dates of their Lords memberships. The on_date
103 | argument is a convenience that sets the from_date and to_date to the same
104 | given date. The on_date has priority: if the on_date is set, the from_date
105 | and to_date are ignored.
106 |
107 | The filtering is inclusive: a Lord is returned if any part of one of their
108 | Lords membership falls within the period specified with the from and to
109 | dates.
110 |
111 | Parameters
112 | ----------
113 |
114 | from_date : str or date or NaN, optional
115 | A string or datetime.date representing a date. If a string is used it
116 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
117 | default value is numpy.NaN, which means no records are excluded on the
118 | basis of the from_date.
119 | to_date : str or date or NaN, optional
120 | A string or datetime.date representing a date. If a string is used it
121 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
122 | default value is np.NaN, which means no records are excluded on the
123 | basis of the to_date.
124 | on_date : str or date or NaN, optional
125 | A string or datetime.date representing a date. If a string is used it
126 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
127 | default value is np.NaN, which means no records are excluded on the
128 | basis of the on_date.
129 |
130 | Returns
131 | -------
132 | out : DataFrame
133 | A pandas dataframe of key details for each Lord, with one row per Lord.
134 |
135 | """
136 |
137 | # Set from_date and to_date to on_date if set
138 | if not pd.isna(on_date):
139 | from_date = on_date
140 | to_date = on_date
141 |
142 | # Fetch key details
143 | lords = fetch_lords_raw()
144 |
145 | # Filter based on membership dates if requested
146 | if not pd.isna(from_date) or not pd.isna(to_date):
147 | lords_memberships = fetch_lords_memberships()
148 | mathching_memberships = filter.filter_dates(
149 | lords_memberships,
150 | start_col='seat_incumbency_start_date',
151 | end_col='seat_incumbency_end_date',
152 | from_date=from_date,
153 | to_date=to_date)
154 | lords = lords[lords['person_id'].isin(
155 | mathching_memberships['person_id'])]
156 |
157 | # Tidy up and return
158 | lords.sort_values(
159 | by=['family_name'],
160 | inplace=True)
161 | lords.reset_index(drop=True, inplace=True)
162 | return lords
163 |
164 |
165 | def fetch_lords_memberships(from_date=np.NaN,
166 | to_date=np.NaN,
167 | on_date=np.NaN):
168 |
169 | """Fetch Lords memberships for all Lords.
170 |
171 | fetch_lords_memberships fetches data from the data platform showing
172 | Lords memberships for each Lord.
173 |
174 | The from_date and to_date arguments can be used to filter the memberships
175 | returned. The on_date argument is a convenience that sets the from_date and
176 | to_date to the same given date. The on_date has priority: if the on_date is
177 | set, the from_date and to_date are ignored.
178 |
179 | The filtering is inclusive: a membership is returned if any part of it
180 | falls within the period specified with the from and to dates.
181 |
182 | Note that a membership with a NaN end date is still open.
183 |
184 | Parameters
185 | ----------
186 |
187 | from_date : str or date or NaN, optional
188 | A string or datetime.date representing a date. If a string is used it
189 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
190 | default value is numpy.NaN, which means no records are excluded on the
191 | basis of the from_date.
192 | to_date : str or date or NaN, optional
193 | A string or datetime.date representing a date. If a string is used it
194 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
195 | default value is np.NaN, which means no records are excluded on the
196 | basis of the to_date.
197 | on_date : str or date or NaN, optional
198 | A string or datetime.date representing a date. If a string is used it
199 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
200 | default value is np.NaN, which means no records are excluded on the
201 | basis of the on_date.
202 |
203 | Returns
204 | -------
205 | out : DataFrame
206 | A pandas dataframe of Lords memberships for each Lord, with one row
207 | per Lords membership.
208 |
209 | """
210 |
211 | # Set from_date and to_date to on_date if set
212 | if not pd.isna(on_date):
213 | from_date = on_date
214 | to_date = on_date
215 |
216 | # Fetch the Lords memberships
217 | lords_memberships = fetch_lords_memberships_raw()
218 |
219 | # Filter on dates if requested
220 | if not pd.isna(from_date) or not pd.isna(to_date):
221 | lords_memberships = filter.filter_dates(
222 | lords_memberships,
223 | start_col='seat_incumbency_start_date',
224 | end_col='seat_incumbency_end_date',
225 | from_date=from_date,
226 | to_date=to_date)
227 |
228 | # Tidy up and return
229 | lords_memberships.sort_values(
230 | by=['family_name'],
231 | inplace=True)
232 | lords_memberships.reset_index(drop=True, inplace=True)
233 | return lords_memberships
234 |
235 |
236 | def fetch_lords_party_memberships(from_date=np.NaN,
237 | to_date=np.NaN,
238 | on_date=np.NaN,
239 | while_lord=True,
240 | collapse=False):
241 |
242 | """Fetch party memberships for all Lords.
243 |
244 | fetch_lords_party_memberships fetches data from the data platform showing
245 | party memberships for each Lord.
246 |
247 | The from_date and to_date arguments can be used to filter the memberships
248 | returned. The on_date argument is a convenience that sets the from_date and
249 | to_date to the same given date. The on_date has priority: if the on_date is
250 | set, the from_date and to_date are ignored.
251 |
252 | The while_lord argument can be used to filter the memberships to include
253 | only those that occurred during the period when each individual was a Lord.
254 |
255 | The filtering is inclusive: a membership is returned if any part
256 | of it falls within the period specified with the from and to dates.
257 |
258 | The collapse argument controls whether memberships are combined so that
259 | there is only one row for each period of continuous membership within the
260 | same party. Combining the memberships in this way means that party
261 | membership ids from the data platform are not included in the dataframe
262 | returned.
263 |
264 | Note that a membership with a NaN end date is still open.
265 |
266 | Parameters
267 | ----------
268 |
269 | from_date : str or date or NaN, optional
270 | A string or datetime.date representing a date. If a string is used it
271 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
272 | default value is numpy.NaN, which means no records are excluded on the
273 | basis of the from_date.
274 | to_date : str or date or NaN, optional
275 | A string or datetime.date representing a date. If a string is used it
276 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
277 | default value is np.NaN, which means no records are excluded on the
278 | basis of the to_date.
279 | on_date : str or date or NaN, optional
280 | A string or datetime.date representing a date. If a string is used it
281 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
282 | default value is np.NaN, which means no records are excluded on the
283 | basis of the on_date.
284 | while_lord : bool, optional
285 | A boolean indicating whether to filter the party memberships to include
286 | only those memberships that were held while each individual was serving
287 | as a Lord. The default value is True.
288 | collapse: bool, optional
289 | Determines whether to collapse consecutive memberships within the same
290 | party into a single period of continuous party membership. Setting this
291 | to True means that party membership ids are not returned in the
292 | dataframe. The default value is False.
293 |
294 | Returns
295 | -------
296 | out : DataFrame
297 | A pandas dataframe of party memberships for each Lord, with one row per
298 | party membership. The memberships are processed and merged so that
299 | there is only one party membership for a period of continuous
300 | membership within the same party. A membership with a NaN end date is
301 | still open.
302 |
303 | """
304 |
305 | # Set from_date and to_date to on_date if set
306 | if not pd.isna(on_date):
307 | from_date = on_date
308 | to_date = on_date
309 |
310 | # Fetch the party memberships
311 | party_memberships = fetch_lords_party_memberships_raw()
312 |
313 | # Filter on dates if requested
314 | if not pd.isna(from_date) or not pd.isna(to_date):
315 | party_memberships = filter.filter_dates(
316 | party_memberships,
317 | start_col='party_membership_start_date',
318 | end_col='party_membership_end_date',
319 | from_date=from_date,
320 | to_date=to_date)
321 |
322 | # Filter on Lords memberships if requested
323 | if while_lord:
324 | lords_memberships = fetch_lords_memberships()
325 | party_memberships = filter.filter_memberships(
326 | tm=party_memberships,
327 | fm=lords_memberships,
328 | tm_id_col='party_membership_id',
329 | tm_start_col='party_membership_start_date',
330 | tm_end_col='party_membership_end_date',
331 | fm_start_col='seat_incumbency_start_date',
332 | fm_end_col='seat_incumbency_end_date',
333 | join_col='person_id')
334 |
335 | # Collapse consecutive memberships and return if requested
336 | if collapse:
337 | return combine.combine_party_memberships(party_memberships)
338 |
339 | # Otherwise tidy up and return
340 | party_memberships.sort_values(
341 | by=['family_name',
342 | 'party_membership_start_date'],
343 | inplace=True)
344 | party_memberships.reset_index(drop=True, inplace=True)
345 |
346 | return party_memberships
347 |
348 |
349 | def fetch_lords_government_roles(from_date=np.NaN,
350 | to_date=np.NaN,
351 | on_date=np.NaN,
352 | while_lord=True):
353 |
354 | """Fetch government roles for all Lords.
355 |
356 | fetch_lords_government_roles fetches data from the data platform showing
357 | government roles for each Lord.
358 |
359 | The from_date and to_date arguments can be used to filter the roles
360 | returned. The on_date argument is a convenience that sets the from_date and
361 | to_date to the same given date. The on_date has priority: if the on_date is
362 | set, the from_date and to_date are ignored.
363 |
364 | The while_lord argument can be used to filter the roles to include only
365 | those that occurred during the period when each individual was a Lord.
366 |
367 | The filtering is inclusive: a role is returned if any part of it falls
368 | within the period specified with the from and to dates.
369 |
370 | Note that a role with a NaN end date is still open.
371 |
372 | Parameters
373 | ----------
374 |
375 | from_date : str or date or NaN, optional
376 | A string or datetime.date representing a date. If a string is used it
377 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
378 | default value is numpy.NaN, which means no records are excluded on the
379 | basis of the from_date.
380 | to_date : str or date or NaN, optional
381 | A string or datetime.date representing a date. If a string is used it
382 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
383 | default value is np.NaN, which means no records are excluded on the
384 | basis of the to_date.
385 | on_date : str or date or NaN, optional
386 | A string or datetime.date representing a date. If a string is used it
387 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
388 | default value is np.NaN, which means no records are excluded on the
389 | basis of the on_date.
390 | while_lord : bool, optional
391 | A boolean indicating whether to filter the government roles to include
392 | only those roles that were held while each individual was serving as a
393 | Lord. The default value is True.
394 |
395 | Returns
396 | -------
397 | out : DataFrame
398 | A dataframe of government roles for each Lord, with one row per role.
399 |
400 | """
401 |
402 | # Set from_date and to_date to on_date if set
403 | if not pd.isna(on_date):
404 | from_date = on_date
405 | to_date = on_date
406 |
407 | # Fetch the government roles
408 | government_roles = fetch_lords_government_roles_raw()
409 |
410 | # Filter on dates if requested
411 | if not pd.isna(from_date) or not pd.isna(to_date):
412 | government_roles = filter.filter_dates(
413 | government_roles,
414 | start_col='government_incumbency_start_date',
415 | end_col='government_incumbency_end_date',
416 | from_date=from_date,
417 | to_date=to_date)
418 |
419 | # Filter on Lords memberships if requested
420 | if while_lord:
421 | lords_memberships = fetch_lords_memberships()
422 | government_roles = filter.filter_memberships(
423 | tm=government_roles,
424 | fm=lords_memberships,
425 | tm_id_col='government_incumbency_id',
426 | tm_start_col='government_incumbency_start_date',
427 | tm_end_col='government_incumbency_end_date',
428 | fm_start_col='seat_incumbency_start_date',
429 | fm_end_col='seat_incumbency_end_date',
430 | join_col='person_id')
431 |
432 | # Tidy up and return
433 | government_roles.sort_values(
434 | by=['family_name',
435 | 'government_incumbency_start_date'],
436 | inplace=True)
437 | government_roles.reset_index(drop=True, inplace=True)
438 | return government_roles
439 |
440 |
441 | def fetch_lords_opposition_roles(from_date=np.NaN,
442 | to_date=np.NaN,
443 | on_date=np.NaN,
444 | while_lord=True):
445 |
446 | """Fetch opposition roles for all Lords.
447 |
448 | fetch_lords_opposition_roles fetches data from the data platform showing
449 | opposition roles for each Lord.
450 |
451 | The from_date and to_date arguments can be used to filter the roles
452 | returned. The on_date argument is a convenience that sets the from_date and
453 | to_date to the same given date. The on_date has priority: if the on_date is
454 | set, the from_date and to_date are ignored.
455 |
456 | The while_lord argument can be used to filter the roles to include only
457 | those that occurred during the period when each individual was a Lord.
458 |
459 | The filtering is inclusive: a role is returned if any part of it falls
460 | within the period specified with the from and to dates.
461 |
462 | Note that a role with a NaN end date is still open.
463 |
464 | Parameters
465 | ----------
466 |
467 | from_date : str or date or NaN, optional
468 | A string or datetime.date representing a date. If a string is used it
469 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
470 | default value is numpy.NaN, which means no records are excluded on the
471 | basis of the from_date.
472 | to_date : str or date or NaN, optional
473 | A string or datetime.date representing a date. If a string is used it
474 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
475 | default value is np.NaN, which means no records are excluded on the
476 | basis of the to_date.
477 | on_date : str or date or NaN, optional
478 | A string or datetime.date representing a date. If a string is used it
479 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
480 | default value is np.NaN, which means no records are excluded on the
481 | basis of the on_date.
482 | while_lord : bool, optional
483 | A boolean indicating whether to filter the opposition roles to include
484 | only those roles that were held while each individual was serving as a
485 | Lord. The default value is True.
486 |
487 | Returns
488 | -------
489 | out : DataFrame
490 | A dataframe of opposition roles for each Lord, with one row per role.
491 |
492 | """
493 |
494 | # Set from_date and to_date to on_date if set
495 | if not pd.isna(on_date):
496 | from_date = on_date
497 | to_date = on_date
498 |
499 | # Fetch the opposition roles
500 | opposition_roles = fetch_lords_opposition_roles_raw()
501 |
502 | # Filter on dates if requested
503 | if not pd.isna(from_date) or not pd.isna(to_date):
504 | opposition_roles = filter.filter_dates(
505 | opposition_roles,
506 | start_col='opposition_incumbency_start_date',
507 | end_col='opposition_incumbency_end_date',
508 | from_date=from_date,
509 | to_date=to_date)
510 |
511 | # Filter on Lords memberships if requested
512 | if while_lord:
513 | lords_memberships = fetch_lords_memberships()
514 | opposition_roles = filter.filter_memberships(
515 | tm=opposition_roles,
516 | fm=lords_memberships,
517 | tm_id_col='opposition_incumbency_id',
518 | tm_start_col='opposition_incumbency_start_date',
519 | tm_end_col='opposition_incumbency_end_date',
520 | fm_start_col='seat_incumbency_start_date',
521 | fm_end_col='seat_incumbency_end_date',
522 | join_col='person_id')
523 |
524 | # Tidy up and return
525 | opposition_roles.sort_values(
526 | by=['family_name',
527 | 'opposition_incumbency_start_date'],
528 | inplace=True)
529 | opposition_roles.reset_index(drop=True, inplace=True)
530 | return opposition_roles
531 |
532 |
533 | def fetch_lords_committee_memberships(from_date=np.NaN,
534 | to_date=np.NaN,
535 | on_date=np.NaN,
536 | while_lord=True):
537 |
538 | """Fetch committee memberships for all Lords.
539 |
540 | fetch_lords_commitee_memberships fetches data from the data platform showing
541 | Parliamentary committee memberships for each Lord.
542 |
543 | The from_date, to_date arguments can be used to filter the memberships
544 | returned based on the given dates. The on_date argument is a convenience
545 | that sets the from_date and to_date to the same given date. The on_date has
546 | priority: if the on_date is set, the from_date and to_date are ignored.
547 |
548 | The while_lord argument can be used to filter the memberships to include
549 | only those that occurred during the period when each individual was a Lord.
550 |
551 | The filtering is inclusive: a membership is returned if any part of it
552 | falls within the period specified with the from and to dates.
553 |
554 | Note that a membership with a NaN end date is still open.
555 |
556 | Parameters
557 | ----------
558 |
559 | from_date : str or date or NaN, optional
560 | A string or datetime.date representing a date. If a string is used it
561 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
562 | default value is numpy.NaN, which means no records are excluded on the
563 | basis of the from_date.
564 | to_date : str or date or NaN, optional
565 | A string or datetime.date representing a date. If a string is used it
566 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
567 | default value is np.NaN, which means no records are excluded on the
568 | basis of the to_date.
569 | on_date : str or date or NaN, optional
570 | A string or datetime.date representing a date. If a string is used it
571 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
572 | default value is np.NaN, which means no records are excluded on the
573 | basis of the on_date.
574 | while_lord : bool, optional
575 | A boolean indicating whether to filter the committee memberships to
576 | include only those memberships that were held while each individual was
577 | serving as a Lord. The default value is True.
578 |
579 | Returns
580 | -------
581 | out : DataFrame
582 | A dataframe of committee memberships for each Lord, with one row per
583 | membership.
584 |
585 | """
586 |
587 | # Set from_date and to_date to on_date if set
588 | if not pd.isna(on_date):
589 | from_date = on_date
590 | to_date = on_date
591 |
592 | # Fetch the committee memberships
593 | committee_memberships = fetch_lords_committee_memberships_raw()
594 |
595 | # Filter on dates if requested
596 | if not pd.isna(from_date) or not pd.isna(to_date):
597 | committee_memberships = filter.filter_dates(
598 | committee_memberships,
599 | start_col='committee_membership_start_date',
600 | end_col='committee_membership_end_date',
601 | from_date=from_date,
602 | to_date=to_date)
603 |
604 | # Filter on Lords memberships if requested
605 | if while_lord:
606 | lords_memberships = fetch_lords_memberships()
607 | committee_memberships = filter.filter_memberships(
608 | tm=committee_memberships,
609 | fm=lords_memberships,
610 | tm_id_col='committee_membership_id',
611 | tm_start_col='committee_membership_start_date',
612 | tm_end_col='committee_membership_end_date',
613 | fm_start_col='seat_incumbency_start_date',
614 | fm_end_col='seat_incumbency_end_date',
615 | join_col='person_id')
616 |
617 | # Tidy up and return
618 | committee_memberships.sort_values(
619 | by=['family_name',
620 | 'committee_membership_start_date'],
621 | inplace=True)
622 | committee_memberships.reset_index(drop=True, inplace=True)
623 | return committee_memberships
624 |
--------------------------------------------------------------------------------
/pdpy/members.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Functions for downloading and analysing data on Members of either House."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | from . import constants
7 | from . import core
8 |
9 | # Raw Members queries ---------------------------------------------------------
10 |
11 | def fetch_members_raw(house=None):
12 |
13 | """Fetch key details for Members."""
14 |
15 | # Initialise house constraint
16 | house_constraint = ''
17 |
18 | # If a house is specified set the house constraint
19 | if house == constants.PDP_ID_HOUSE_OF_COMMONS or \
20 | house == constants.PDP_ID_HOUSE_OF_LORDS:
21 | house_constraint = 'BIND(d:{0} AS ?house)'.format(house)
22 |
23 | # Build the query
24 | members_query = """
25 | PREFIX :
26 | PREFIX d:
27 | SELECT DISTINCT
28 |
29 | ?person_id
30 | ?mnis_id
31 | ?given_name
32 | ?family_name
33 | ?other_names
34 | ?display_name
35 | ?full_title
36 | ?gender
37 | ?date_of_birth
38 | ?date_of_death
39 |
40 | WHERE {{
41 |
42 | # House constraint
43 | {0}
44 |
45 | ?person_id :memberMnisId ?mnis_id ;
46 | :personGivenName ?given_name ;
47 | :personFamilyName ?family_name ;
48 | ?display_name ;
49 | ?full_title ;
50 | :personHasGenderIdentity/:genderIdentityHasGender/:genderName ?gender ;
51 | :memberHasParliamentaryIncumbency/:seatIncumbencyHasHouseSeat/:houseSeatHasHouse ?house .
52 | OPTIONAL {{ ?person_id :personOtherNames ?other_names . }}
53 | OPTIONAL {{ ?person_id :personDateOfBirth ?date_of_birth . }}
54 | OPTIONAL {{ ?person_id :personDateOfDeath ?date_of_death . }}
55 | }}
56 | """.format(house_constraint)
57 |
58 | return core.sparql_select(members_query)
59 |
60 |
61 | def fetch_party_memberships_raw(house=None):
62 |
63 | """Fetch party memberships for Members."""
64 |
65 | # Initialise house constraint
66 | house_constraint = ''
67 |
68 | # If a house is specified set the house constraint
69 | if house == constants.PDP_ID_HOUSE_OF_COMMONS or \
70 | house == constants.PDP_ID_HOUSE_OF_LORDS:
71 | house_constraint = 'BIND(d:{0} AS ?house)'.format(house)
72 |
73 | party_memberships_query = """
74 | PREFIX :
75 | PREFIX d:
76 | SELECT DISTINCT
77 |
78 | ?person_id
79 | ?mnis_id
80 | ?given_name
81 | ?family_name
82 | ?display_name
83 | ?party_id
84 | ?party_mnis_id
85 | ?party_name
86 | ?party_membership_id
87 | ?party_membership_start_date
88 | ?party_membership_end_date
89 |
90 | WHERE {{
91 |
92 | # House constraint
93 | {0}
94 |
95 | ?person_id :memberMnisId ?mnis_id;
96 | :personGivenName ?given_name ;
97 | :personFamilyName ?family_name ;
98 | ?display_name ;
99 | :partyMemberHasPartyMembership ?party_membership_id ;
100 | :memberHasParliamentaryIncumbency/:seatIncumbencyHasHouseSeat/:houseSeatHasHouse ?house .
101 | ?party_membership_id a :PartyMembership ;
102 | :partyMembershipHasParty ?party_id ;
103 | :partyMembershipStartDate ?party_membership_start_date .
104 | OPTIONAL {{ ?party_membership_id :partyMembershipEndDate ?party_membership_end_date . }}
105 | ?party_id :partyMnisId ?party_mnis_id ;
106 | :partyName ?party_name .
107 | }}
108 | """.format(house_constraint)
109 |
110 | return core.sparql_select(party_memberships_query)
111 |
112 |
113 | def fetch_government_roles_raw(house=None):
114 |
115 | """Fetch government roles for Members."""
116 |
117 | # Initialise house constraint
118 | house_constraint = ''
119 |
120 | # If a house is specified set the house constraint
121 | if house == constants.PDP_ID_HOUSE_OF_COMMONS or \
122 | house == constants.PDP_ID_HOUSE_OF_LORDS:
123 | house_constraint = 'BIND(d:{0} AS ?house)'.format(house)
124 |
125 | government_roles_query = """
126 | PREFIX :
127 | PREFIX d:
128 | SELECT DISTINCT
129 |
130 | ?person_id
131 | ?mnis_id
132 | ?given_name
133 | ?family_name
134 | ?display_name
135 | ?position_id
136 | ?position_name
137 | ?government_incumbency_id
138 | ?government_incumbency_start_date
139 | ?government_incumbency_end_date
140 |
141 | WHERE {{
142 |
143 | # House constraint
144 | {0}
145 |
146 | ?person_id :memberMnisId ?mnis_id;
147 | :personGivenName ?given_name ;
148 | :personFamilyName ?family_name ;
149 | ?display_name ;
150 | :governmentPersonHasGovernmentIncumbency ?government_incumbency_id ;
151 | :memberHasParliamentaryIncumbency/:seatIncumbencyHasHouseSeat/:houseSeatHasHouse ?house .
152 | ?government_incumbency_id a :GovernmentIncumbency ;
153 | :governmentIncumbencyHasGovernmentPosition ?position_id ;
154 | :incumbencyStartDate ?government_incumbency_start_date .
155 | OPTIONAL {{ ?government_incumbency_id :incumbencyEndDate ?government_incumbency_end_date . }}
156 | ?position_id :positionName ?position_name .
157 | }}
158 | """.format(house_constraint)
159 |
160 | return core.sparql_select(government_roles_query)
161 |
162 |
163 | def fetch_opposition_roles_raw(house=None):
164 |
165 | """Fetch opposition roles for Members."""
166 |
167 | # Initialise house constraint
168 | house_constraint = ''
169 |
170 | # If a house is specified set the house constraint
171 | if house == constants.PDP_ID_HOUSE_OF_COMMONS or \
172 | house == constants.PDP_ID_HOUSE_OF_LORDS:
173 | house_constraint = 'BIND(d:{0} AS ?house)'.format(house)
174 |
175 | opposition_roles_query = """
176 | PREFIX :
177 | PREFIX d:
178 | SELECT DISTINCT
179 |
180 | ?person_id
181 | ?mnis_id
182 | ?given_name
183 | ?family_name
184 | ?display_name
185 | ?position_id
186 | ?position_name
187 | ?opposition_incumbency_id
188 | ?opposition_incumbency_start_date
189 | ?opposition_incumbency_end_date
190 |
191 | WHERE {{
192 |
193 | # House constraint
194 | {0}
195 |
196 | ?person_id :memberMnisId ?mnis_id;
197 | :personGivenName ?given_name ;
198 | :personFamilyName ?family_name ;
199 | ?display_name ;
200 | :oppositionPersonHasOppositionIncumbency ?opposition_incumbency_id ;
201 | :memberHasParliamentaryIncumbency/:seatIncumbencyHasHouseSeat/:houseSeatHasHouse ?house .
202 | ?opposition_incumbency_id a :OppositionIncumbency ;
203 | :oppositionIncumbencyHasOppositionPosition ?position_id ;
204 | :incumbencyStartDate ?opposition_incumbency_start_date .
205 | OPTIONAL {{ ?opposition_incumbency_id :incumbencyEndDate ?opposition_incumbency_end_date . }}
206 | ?position_id :positionName ?position_name .
207 | }}
208 | """.format(house_constraint)
209 |
210 | return core.sparql_select(opposition_roles_query)
211 |
212 |
213 | def fetch_committee_memberships_raw(house=None):
214 |
215 | """Fetch committee memberships for Members."""
216 |
217 | # Initialise house constraint
218 | house_constraint = ''
219 |
220 | # If a house is specified set the house constraint
221 | if house == constants.PDP_ID_HOUSE_OF_COMMONS or \
222 | house == constants.PDP_ID_HOUSE_OF_LORDS:
223 | house_constraint = 'BIND(d:{0} AS ?house)'.format(house)
224 |
225 | committee_memberships_query = """
226 | PREFIX :
227 | PREFIX d:
228 | SELECT DISTINCT
229 |
230 | ?person_id
231 | ?mnis_id
232 | ?given_name
233 | ?family_name
234 | ?display_name
235 | ?committee_id
236 | ?committee_name
237 | ?committee_type_id
238 | ?committee_type_name
239 | ?committee_membership_id
240 | ?committee_membership_start_date
241 | ?committee_membership_end_date
242 |
243 | WHERE {{
244 |
245 | # House constraint
246 | {0}
247 |
248 | ?person_id :memberMnisId ?mnis_id;
249 | :personGivenName ?given_name ;
250 | :personFamilyName ?family_name ;
251 | ?display_name ;
252 | :personHasFormalBodyMembership ?committee_membership_id ;
253 | :memberHasParliamentaryIncumbency/:seatIncumbencyHasHouseSeat/:houseSeatHasHouse ?house .
254 | ?committee_membership_id :formalBodyMembershipHasFormalBody ?committee_id ;
255 | :formalBodyMembershipStartDate ?committee_membership_start_date .
256 | OPTIONAL {{ ?committee_membership_id :formalBodyMembershipEndDate ?committee_membership_end_date . }}
257 | ?committee_id a :FormalBody ;
258 | :formalBodyName ?committee_name .
259 | OPTIONAL {{
260 | ?committee_id :formalBodyHasFormalBodyType ?committee_type_id ;
261 | :formalBodyHasFormalBodyType/:formalBodyTypeName ?committee_type_name .
262 | }}
263 | }}
264 | """.format(house_constraint)
265 |
266 | return core.sparql_select(committee_memberships_query)
267 |
--------------------------------------------------------------------------------
/pdpy/mps.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Functions for downloading and analysing data on MPs."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import numpy as np
7 | import pandas as pd
8 |
9 | from . import combine
10 | from . import constants
11 | from . import core
12 | from . import elections
13 | from . import filter
14 | from . import members
15 | from . import utils
16 |
17 | # Raw MPs queries -------------------------------------------------------------
18 |
19 | def fetch_mps_raw():
20 | """Fetch key details for all MPs."""
21 | return members.fetch_members_raw(
22 | house=constants.PDP_ID_HOUSE_OF_COMMONS)
23 |
24 |
25 | def fetch_commons_memberships_raw():
26 |
27 | """Fetch Commons memberships for all MPs."""
28 |
29 | commons_memberships_query = """
30 | PREFIX :
31 | PREFIX d:
32 | SELECT DISTINCT
33 |
34 | ?person_id
35 | ?mnis_id
36 | ?given_name
37 | ?family_name
38 | ?display_name
39 | ?constituency_id
40 | ?constituency_name
41 | ?constituency_ons_id
42 | ?seat_incumbency_id
43 | ?seat_incumbency_start_date
44 | ?seat_incumbency_end_date
45 |
46 | WHERE {{
47 |
48 | # House constraint for the House of Commons
49 | BIND(d:{0} AS ?house)
50 |
51 | ?person_id :memberMnisId ?mnis_id;
52 | :personGivenName ?given_name ;
53 | :personFamilyName ?family_name ;
54 | ?display_name ;
55 | :memberHasParliamentaryIncumbency ?seat_incumbency_id .
56 | ?seat_incumbency_id a :SeatIncumbency ;
57 | :seatIncumbencyHasHouseSeat ?seat ;
58 | :parliamentaryIncumbencyStartDate ?seat_incumbency_start_date .
59 | OPTIONAL {{ ?seat_incumbency_id :parliamentaryIncumbencyEndDate ?seat_incumbency_end_date . }}
60 | ?seat :houseSeatHasHouse ?house ;
61 | :houseSeatHasConstituencyGroup ?constituency_id .
62 | ?constituency_id :constituencyGroupName ?constituency_name ;
63 | :constituencyGroupStartDate ?constituencyStartDate .
64 | OPTIONAL {{ ?constituency_id :constituencyGroupOnsCode ?constituency_ons_id . }}
65 | }}
66 | """.format(constants.PDP_ID_HOUSE_OF_COMMONS)
67 |
68 | return core.sparql_select(commons_memberships_query)
69 |
70 |
71 | def fetch_mps_party_memberships_raw():
72 | """Fetch party memberships for all MPs."""
73 | return members.fetch_party_memberships_raw(
74 | house=constants.PDP_ID_HOUSE_OF_COMMONS)
75 |
76 |
77 | def fetch_mps_government_roles_raw():
78 | """Fetch government roles for all MPs."""
79 | return members.fetch_government_roles_raw(
80 | house=constants.PDP_ID_HOUSE_OF_COMMONS)
81 |
82 |
83 | def fetch_mps_opposition_roles_raw():
84 | """Fetch opposition roles for all MPs."""
85 | return members.fetch_opposition_roles_raw(
86 | house=constants.PDP_ID_HOUSE_OF_COMMONS)
87 |
88 |
89 | def fetch_mps_committee_memberships_raw():
90 | """Fetch committee memberships for all MPs."""
91 | return members.fetch_committee_memberships_raw(
92 | house=constants.PDP_ID_HOUSE_OF_COMMONS)
93 |
94 | # Main MPs API ----------------------------------------------------------------
95 |
96 | def fetch_mps(from_date=np.NaN,
97 | to_date=np.NaN,
98 | on_date=np.NaN):
99 |
100 | """Fetch key details for all MPs.
101 |
102 | fetch_mps fetches data from the data platform showing key details about
103 | each MP, with one row per MP.
104 |
105 | The from_date and to_date arguments can be used to filter the MPs returned
106 | based on the dates of their Commons memberships. The on_date argument is a
107 | convenience that sets the from_date and to_date to the same given date. The
108 | on_date has priority: if the on_date is set, the from_date and to_date are
109 | ignored.
110 |
111 | The filtering is inclusive: an MP is returned if any part of one of their
112 | Commons memberships falls within the period specified with the from and to
113 | dates.
114 |
115 | Parameters
116 | ----------
117 |
118 | from_date : str or date or NaN, optional
119 | A string or datetime.date representing a date. If a string is used it
120 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
121 | default value is numpy.NaN, which means no records are excluded on the
122 | basis of the from_date.
123 | to_date : str or date or NaN, optional
124 | A string or datetime.date representing a date. If a string is used it
125 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
126 | default value is np.NaN, which means no records are excluded on the
127 | basis of the to_date.
128 | on_date : str or date or NaN, optional
129 | A string or datetime.date representing a date. If a string is used it
130 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
131 | default value is np.NaN, which means no records are excluded on the
132 | basis of the on_date.
133 |
134 | Returns
135 | -------
136 | out : DataFrame
137 | A pandas dataframe of key details for each MP, with one row per MP.
138 |
139 | """
140 |
141 | # Set from_date and to_date to on_date if set
142 | if not pd.isna(on_date):
143 | from_date = on_date
144 | to_date = on_date
145 |
146 | # Fetch key details
147 | mps = fetch_mps_raw()
148 |
149 | # Filter based on membership dates if requested
150 | if not pd.isna(from_date) or not pd.isna(to_date):
151 | commons_memberships = fetch_commons_memberships()
152 | matching_memberships = filter.filter_dates(
153 | commons_memberships,
154 | start_col='seat_incumbency_start_date',
155 | end_col='seat_incumbency_end_date',
156 | from_date=from_date,
157 | to_date=to_date)
158 | mps = mps[mps['person_id'].isin(matching_memberships['person_id'])]
159 |
160 | # Tidy up and return
161 | mps.sort_values(
162 | by=['family_name'],
163 | inplace=True)
164 | mps.reset_index(drop=True, inplace=True)
165 | return mps
166 |
167 |
168 | def fetch_commons_memberships(from_date=np.NaN,
169 | to_date=np.NaN,
170 | on_date=np.NaN):
171 |
172 | """Fetch Commons memberships for all MPs.
173 |
174 | fetch_commons_memberships fetches data from the data platform showing
175 | Commons memberships for each MP. The memberships are processed to impose
176 | consistent rules on the start and end dates for memberships.
177 |
178 | The from_date and to_date arguments can be used to filter the memberships
179 | returned. The on_date argument is a convenience that sets the from_date and
180 | to_date to the same given date. The on_date has priority: if the on_date is
181 | set, the from_date and to_date are ignored.
182 |
183 | The filtering is inclusive: a membership is returned if any part
184 | of it falls within the period specified with the from and to dates.
185 |
186 | Note that a membership with a NaN end date is still open.
187 |
188 | Parameters
189 | ----------
190 |
191 | from_date : str or date or NaN, optional
192 | A string or datetime.date representing a date. If a string is used it
193 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
194 | default value is numpy.NaN, which means no records are excluded on the
195 | basis of the from_date.
196 | to_date : str or date or NaN, optional
197 | A string or datetime.date representing a date. If a string is used it
198 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
199 | default value is np.NaN, which means no records are excluded on the
200 | basis of the to_date.
201 | on_date : str or date or NaN, optional
202 | A string or datetime.date representing a date. If a string is used it
203 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
204 | default value is np.NaN, which means no records are excluded on the
205 | basis of the on_date.
206 |
207 | Returns
208 | -------
209 | out : DataFrame
210 | A pandas dataframe of Commons memberships for each MP, with one row
211 | per Commons membership.
212 |
213 | """
214 |
215 | # Set from_date and to_date to on_date if set
216 | if not pd.isna(on_date):
217 | from_date = on_date
218 | to_date = on_date
219 |
220 | # Fetch the Commons memberships
221 | commons_memberships = fetch_commons_memberships_raw()
222 |
223 | # Get elections and fix the end dates of memberships
224 | end_dates = commons_memberships['seat_incumbency_end_date'].values
225 |
226 | general_elections = elections.get_general_elections().values
227 | general_elections_count = len(general_elections)
228 |
229 | # If the end date for a membership falls after dissolution adjust it
230 | for i in range(len(end_dates)):
231 |
232 | date = end_dates[i]
233 | if pd.isna(date): continue
234 |
235 | for j in range(general_elections_count):
236 |
237 | dissolution = general_elections[j, 1]
238 | election = general_elections[j, 2]
239 |
240 | if date > dissolution and date <= election:
241 | end_dates[i] = dissolution
242 | continue
243 |
244 | commons_memberships['seat_incumbency_end_date'] = end_dates
245 |
246 | # Filter on dates if requested
247 | if not pd.isna(from_date) or not pd.isna(to_date):
248 | commons_memberships = filter.filter_dates(
249 | commons_memberships,
250 | start_col='seat_incumbency_start_date',
251 | end_col='seat_incumbency_end_date',
252 | from_date=from_date,
253 | to_date=to_date)
254 |
255 | # Tidy up and return
256 | commons_memberships.sort_values(
257 | by=['family_name',
258 | 'seat_incumbency_start_date'],
259 | inplace=True)
260 | commons_memberships.reset_index(drop=True, inplace=True)
261 | return commons_memberships
262 |
263 |
264 | def fetch_mps_party_memberships(from_date=np.NaN,
265 | to_date=np.NaN,
266 | on_date=np.NaN,
267 | while_mp=True,
268 | collapse=False):
269 |
270 | """Fetch party memberships for all MPs.
271 |
272 | fetch_mps_party_memberships fetches data from the data platform showing
273 | party memberships for each MP.
274 |
275 | The from_date and to_date arguments can be used to filter the memberships
276 | returned. The on_date argument is a convenience that sets the from_date and
277 | to_date to the same given date. The on_date has priority: if the on_date is
278 | set, the from_date and to_date are ignored.
279 |
280 | The while_mp argument can be used to filter the memberships to include only
281 | those that occurred during the period when each individual was an MP.
282 |
283 | The filtering is inclusive: a membership is returned if any part
284 | of it falls within the period specified with the from and to dates.
285 |
286 | The collapse argument controls whether memberships are combined so that
287 | there is only one row for each period of continuous membership within the
288 | same party. Combining the memberships in this way means that party
289 | membership ids from the data platform are not included in the dataframe
290 | returned.
291 |
292 | Note that a membership with a NaN end date is still open.
293 |
294 | Parameters
295 | ----------
296 |
297 | from_date : str or date or NaN, optional
298 | A string or datetime.date representing a date. If a string is used it
299 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
300 | default value is numpy.NaN, which means no records are excluded on the
301 | basis of the from_date.
302 | to_date : str or date or NaN, optional
303 | A string or datetime.date representing a date. If a string is used it
304 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
305 | default value is np.NaN, which means no records are excluded on the
306 | basis of the to_date.
307 | on_date : str or date or NaN, optional
308 | A string or datetime.date representing a date. If a string is used it
309 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
310 | default value is np.NaN, which means no records are excluded on the
311 | basis of the on_date.
312 | while_mp : bool, optional
313 | A boolean indicating whether to filter the party memberships to include
314 | only those memberships that were held while each individual was serving
315 | as an MP. The default value is True.
316 | collapse: bool, optional
317 | Determines whether to collapse consecutive memberships within the same
318 | party into a single period of continuous party membership. Setting this
319 | to True means that party membership ids are not returned in the
320 | dataframe. The default value is False.
321 |
322 | Returns
323 | -------
324 | out : DataFrame
325 | A pandas dataframe of party memberships for each MP, with one row per
326 | party membership. The memberships are processed and merged so that
327 | there is only one party membership for a period of continuous
328 | membership within the same party. A membership with a NaN end date is
329 | still open.
330 |
331 | """
332 |
333 | # Set from_date and to_date to on_date if set
334 | if not pd.isna(on_date):
335 | from_date = on_date
336 | to_date = on_date
337 |
338 | # Fetch the party memberships
339 | party_memberships = fetch_mps_party_memberships_raw()
340 |
341 | # Filter on dates if requested
342 | if not pd.isna(from_date) or not pd.isna(to_date):
343 | party_memberships = filter.filter_dates(
344 | party_memberships,
345 | start_col='party_membership_start_date',
346 | end_col='party_membership_end_date',
347 | from_date=from_date,
348 | to_date=to_date)
349 |
350 | # Filter on Commons memberships if requested
351 | if while_mp:
352 | commons_memberships = fetch_commons_memberships()
353 | party_memberships = filter.filter_memberships(
354 | tm=party_memberships,
355 | fm=commons_memberships,
356 | tm_id_col='party_membership_id',
357 | tm_start_col='party_membership_start_date',
358 | tm_end_col='party_membership_end_date',
359 | fm_start_col='seat_incumbency_start_date',
360 | fm_end_col='seat_incumbency_end_date',
361 | join_col='person_id')
362 |
363 | # Collapse consecutive memberships and return if requested
364 | if collapse:
365 | return combine.combine_party_memberships(party_memberships)
366 |
367 | # Otherwise tidy up and return
368 | party_memberships.sort_values(
369 | by=['family_name',
370 | 'party_membership_start_date'],
371 | inplace=True)
372 | party_memberships.reset_index(drop=True, inplace=True)
373 |
374 | return party_memberships
375 |
376 |
377 | def fetch_mps_government_roles(from_date=np.NaN,
378 | to_date=np.NaN,
379 | on_date=np.NaN,
380 | while_mp=True):
381 |
382 | """Fetch government roles for all MPs.
383 |
384 | fetch_mps_government_roles fetches data from the data platform showing
385 | government roles for each MP.
386 |
387 | The from_date and to_date arguments can be used to filter the roles
388 | returned. The on_date argument is a convenience that sets the from_date and
389 | to_date to the same given date. The on_date has priority: if the on_date is
390 | set, the from_date and to_date are ignored.
391 |
392 | The while_mp argument can be used to filter the roles to include only those
393 | that occurred during the period when each individual was an MP.
394 |
395 | The filtering is inclusive: a role is returned if any part of it falls
396 | within the period specified with the from and to dates.
397 |
398 | Note that a role with a NaN end date is still open.
399 |
400 | Parameters
401 | ----------
402 |
403 | from_date : str or date or NaN, optional
404 | A string or datetime.date representing a date. If a string is used it
405 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
406 | default value is numpy.NaN, which means no records are excluded on the
407 | basis of the from_date.
408 | to_date : str or date or NaN, optional
409 | A string or datetime.date representing a date. If a string is used it
410 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
411 | default value is np.NaN, which means no records are excluded on the
412 | basis of the to_date.
413 | on_date : str or date or NaN, optional
414 | A string or datetime.date representing a date. If a string is used it
415 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
416 | default value is np.NaN, which means no records are excluded on the
417 | basis of the on_date.
418 | while_mp : bool, optional
419 | A boolean indicating whether to filter the government roles to include
420 | only those roles that were held while each individual was serving as an
421 | MP. The default value is True.
422 |
423 | Returns
424 | -------
425 | out : DataFrame
426 | A dataframe of government roles for each MP, with one row per role.
427 |
428 | """
429 |
430 | # Set from_date and to_date to on_date if set
431 | if not pd.isna(on_date):
432 | from_date = on_date
433 | to_date = on_date
434 |
435 | # Fetch the government roles
436 | government_roles = fetch_mps_government_roles_raw()
437 |
438 | # Filter on dates if requested
439 | if not pd.isna(from_date) or not pd.isna(to_date):
440 | government_roles = filter.filter_dates(
441 | government_roles,
442 | start_col='government_incumbency_start_date',
443 | end_col='government_incumbency_end_date',
444 | from_date=from_date,
445 | to_date=to_date)
446 |
447 | # Filter on Commons memberships if requested
448 | if while_mp:
449 | commons_memberships = fetch_commons_memberships()
450 | government_roles = filter.filter_memberships(
451 | tm=government_roles,
452 | fm=commons_memberships,
453 | tm_id_col='government_incumbency_id',
454 | tm_start_col='government_incumbency_start_date',
455 | tm_end_col='government_incumbency_end_date',
456 | fm_start_col='seat_incumbency_start_date',
457 | fm_end_col='seat_incumbency_end_date',
458 | join_col='person_id')
459 |
460 | # Tidy up and return
461 | government_roles.sort_values(
462 | by=['family_name',
463 | 'government_incumbency_start_date'],
464 | inplace=True)
465 | government_roles.reset_index(drop=True, inplace=True)
466 | return government_roles
467 |
468 |
469 | def fetch_mps_opposition_roles(from_date=np.NaN,
470 | to_date=np.NaN,
471 | on_date=np.NaN,
472 | while_mp=True):
473 |
474 | """Fetch opposition roles for all MPs.
475 |
476 | fetch_mps_opposition_roles fetches data from the data platform showing
477 | opposition roles for each MP.
478 |
479 | The from_date and to_date arguments can be used to filter the roles
480 | returned. The on_date argument is a convenience that sets the from_date and
481 | to_date to the same given date. The on_date has priority: if the on_date is
482 | set, the from_date and to_date are ignored.
483 |
484 | The while_mp argument can be used to filter the roles to include only those
485 | that occurred during the period when each individual was an MP.
486 |
487 | The filtering is inclusive: a role is returned if any part of it falls
488 | within the period specified with the from and to dates.
489 |
490 | Note that a role with a NaN end date is still open.
491 |
492 | Parameters
493 | ----------
494 |
495 | from_date : str or date or NaN, optional
496 | A string or datetime.date representing a date. If a string is used it
497 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
498 | default value is numpy.NaN, which means no records are excluded on the
499 | basis of the from_date.
500 | to_date : str or date or NaN, optional
501 | A string or datetime.date representing a date. If a string is used it
502 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
503 | default value is np.NaN, which means no records are excluded on the
504 | basis of the to_date.
505 | on_date : str or date or NaN, optional
506 | A string or datetime.date representing a date. If a string is used it
507 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
508 | default value is np.NaN, which means no records are excluded on the
509 | basis of the on_date.
510 | while_mp : bool, optional
511 | A boolean indicating whether to filter the opposition roles to include
512 | only those roles that were held while each individual was serving as an
513 | MP. The default value is True.
514 |
515 | Returns
516 | -------
517 | out : DataFrame
518 | A dataframe of opposition roles for each MP, with one row per role.
519 |
520 | """
521 |
522 | # Set from_date and to_date to on_date if set
523 | if not pd.isna(on_date):
524 | from_date = on_date
525 | to_date = on_date
526 |
527 | # Fetch the opposition roles
528 | opposition_roles = fetch_mps_opposition_roles_raw()
529 |
530 | # Filter on dates if requested
531 | if not pd.isna(from_date) or not pd.isna(to_date):
532 | opposition_roles = filter.filter_dates(
533 | opposition_roles,
534 | start_col='opposition_incumbency_start_date',
535 | end_col='opposition_incumbency_end_date',
536 | from_date=from_date,
537 | to_date=to_date)
538 |
539 | # Filter on Commons memberships if requested
540 | if while_mp:
541 | commons_memberships = fetch_commons_memberships()
542 | opposition_roles = filter.filter_memberships(
543 | tm=opposition_roles,
544 | fm=commons_memberships,
545 | tm_id_col='opposition_incumbency_id',
546 | tm_start_col='opposition_incumbency_start_date',
547 | tm_end_col='opposition_incumbency_end_date',
548 | fm_start_col='seat_incumbency_start_date',
549 | fm_end_col='seat_incumbency_end_date',
550 | join_col='person_id')
551 |
552 | # Tidy up and return
553 | opposition_roles.sort_values(
554 | by=['family_name',
555 | 'opposition_incumbency_start_date'],
556 | inplace=True)
557 | opposition_roles.reset_index(drop=True, inplace=True)
558 | return opposition_roles
559 |
560 |
561 | def fetch_mps_committee_memberships(from_date=np.NaN,
562 | to_date=np.NaN,
563 | on_date=np.NaN,
564 | while_mp=True):
565 |
566 | """Fetch committee memberships for all MPs.
567 |
568 | fetch_mps_commitee_memberships fetches data from the data platform showing
569 | Parliamentary committee memberships for each MP.
570 |
571 | The from_date, to_date arguments can be used to filter the memberships
572 | returned based on the given dates. The on_date argument is a convenience
573 | that sets the from_date and to_date to the same given date. The on_date has
574 | priority: if the on_date is set, the from_date and to_date are ignored.
575 |
576 | The while_mp argument can be used to filter the memberships to include only
577 | those that occurred during the period when each individual was an MP.
578 |
579 | The filtering is inclusive: a membership is returned if any part of it
580 | falls within the period specified with the from and to dates.
581 |
582 | Note that a membership with a NaN end date is still open.
583 |
584 | Parameters
585 | ----------
586 |
587 | from_date : str or date or NaN, optional
588 | A string or datetime.date representing a date. If a string is used it
589 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
590 | default value is numpy.NaN, which means no records are excluded on the
591 | basis of the from_date.
592 | to_date : str or date or NaN, optional
593 | A string or datetime.date representing a date. If a string is used it
594 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
595 | default value is np.NaN, which means no records are excluded on the
596 | basis of the to_date.
597 | on_date : str or date or NaN, optional
598 | A string or datetime.date representing a date. If a string is used it
599 | should specify the date in ISO 8601 date format e.g. '2000-12-31'. The
600 | default value is np.NaN, which means no records are excluded on the
601 | basis of the on_date.
602 | while_mp : bool, optional
603 | A boolean indicating whether to filter the committee memberships to
604 | include only those memberships that were held while each individual was
605 | serving as an MP. The default value is True.
606 |
607 | Returns
608 | -------
609 | out : DataFrame
610 | A dataframe of committee memberships for each MP, with one row per
611 | membership.
612 |
613 | """
614 |
615 | # Set from_date and to_date to on_date if set
616 | if not pd.isna(on_date):
617 | from_date = on_date
618 | to_date = on_date
619 |
620 | # Fetch the committee memberships
621 | committee_memberships = fetch_mps_committee_memberships_raw()
622 |
623 | # Filter on dates if requested
624 | if not pd.isna(from_date) or not pd.isna(to_date):
625 | committee_memberships = filter.filter_dates(
626 | committee_memberships,
627 | start_col='committee_membership_start_date',
628 | end_col='committee_membership_end_date',
629 | from_date=from_date,
630 | to_date=to_date)
631 |
632 | # Filter on Commons memberships if requested
633 | if while_mp:
634 | commons_memberships = fetch_commons_memberships()
635 | committee_memberships = filter.filter_memberships(
636 | tm=committee_memberships,
637 | fm=commons_memberships,
638 | tm_id_col='committee_membership_id',
639 | tm_start_col='committee_membership_start_date',
640 | tm_end_col='committee_membership_end_date',
641 | fm_start_col='seat_incumbency_start_date',
642 | fm_end_col='seat_incumbency_end_date',
643 | join_col='person_id')
644 |
645 | # Tidy up and return
646 | committee_memberships.sort_values(
647 | by=['family_name',
648 | 'committee_membership_start_date'],
649 | inplace=True)
650 | committee_memberships.reset_index(drop=True, inplace=True)
651 | return committee_memberships
652 |
--------------------------------------------------------------------------------
/pdpy/settings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """User configurable package settings."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | from . import constants
7 |
8 | # Settings dictionary ---------------------------------------------------------
9 |
10 | settings = {}
11 |
12 | # Settings: api url -----------------------------------------------------------
13 |
14 | def get_api_url():
15 |
16 | """Get the api url.
17 |
18 | get_api_url gets the url that the package is currently configured to use
19 | for the SPARQL endpoint to a data platform instance.
20 |
21 | Returns
22 | -------
23 | out : str
24 | The currently set api url as a string.
25 |
26 | """
27 |
28 | if constants.SETTINGS_API_URL not in settings:
29 | set_api_url(constants.SETTINGS_API_URL_DEFAULT)
30 |
31 | return settings[constants.SETTINGS_API_URL]
32 |
33 |
34 | def set_api_url(api_url):
35 |
36 | """Set the api url.
37 |
38 | set_api_url sets the url that the package uses for the api endpoint. By
39 | default the package uses the main live endpoint for the data platform's
40 | SPARQL api. If you wish to run a local version of the api you can use this
41 | function to tell the package to use that endpoint instead.
42 |
43 | Parameters
44 | ----------
45 | api_url : str
46 | The url of an available data platform SPARQL endpoint.
47 |
48 | Returns
49 | -------
50 | out : None
51 |
52 | """
53 |
54 | settings[constants.SETTINGS_API_URL] = api_url
55 |
56 |
57 | def reset_api_url():
58 |
59 | """Reset the api url to the default.
60 |
61 | reset_api_url resets the url that the package uses for the api endpoint to
62 | the live api url.
63 |
64 | """
65 |
66 | set_api_url(constants.SETTINGS_API_URL_DEFAULT)
67 |
--------------------------------------------------------------------------------
/pdpy/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Package utility functions."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import datetime
7 | import numpy as np
8 | import pandas as pd
9 | import requests
10 |
11 | # API Functions ---------------------------------------------------------------
12 |
13 | def check_api():
14 |
15 | """Check if Python can reach the api and return a boolean."""
16 |
17 | api_url = (
18 | 'https://api.parliament.uk/sparql'
19 | '?query=SELECT+*+WHERE+%7B+%3Fs+%'
20 | '3Fp+%3Fo+.+%7D+LIMIT+1%0D%0A')
21 |
22 | try:
23 | response = requests.get(api_url)
24 | return response.ok
25 | except:
26 | return False
27 |
28 | # Date handling functions -----------------------------------------------------
29 |
30 | def convert_date_series(date_str_series):
31 |
32 | """Convert a series of ISO 8601 date strings to datetime.dates."""
33 |
34 | return [np.NaN if pd.isna(d) \
35 | else datetime.datetime.strptime(d, '%Y-%m-%d').date() \
36 | for d in date_str_series]
37 |
38 |
39 | def min_date_nan(dates):
40 |
41 | """Find the earliest date from a series that may contain NaNs.
42 |
43 | Find the earliest date from a pandas series of datetime.dates that may
44 | contain NaNs. NaN dates are considered earlier than all others.
45 |
46 | """
47 |
48 | if dates.isna().any():
49 | return np.NaN
50 | else:
51 | return min(dates)
52 |
53 |
54 | def max_date_nan(dates):
55 |
56 | """Find the latest date from a series that may contain NaNs.
57 |
58 | Find the latest date from a pandas series of datetime.dates that may
59 | contain NaNs. NaN dates are considered later than all others.
60 |
61 | """
62 |
63 | if dates.isna().any():
64 | return np.NaN
65 | else:
66 | return max(dates)
67 |
68 | # Data presentation functions -------------------------------------------------
69 |
70 | def readable(df):
71 |
72 | """Take a dataframe and remove all columns that end in the suffix '_id'.
73 |
74 | The intended purpose of this function is to display a dataframe on the
75 | console showing only the readable columns i.e. not the identifiers.
76 |
77 | Parameters
78 | ----------
79 | df : DataFrame
80 | A pandas dataframe.
81 |
82 | Returns
83 | -------
84 | out : DataFrame
85 | A dataframe with the same structure as the input df with any columns
86 | ending in the suffix '_id' removed.
87 |
88 | """
89 |
90 | readable_cols = list(filter(lambda c: not c.endswith('_id'), df.columns))
91 | return df[readable_cols]
92 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # pdpy
2 |
3 | pdpy is a Python package for downloading data from the UK Parliament's data platform. An equivalent package is available for R called [pdpr](https://github.com/houseofcommonslibrary/pdpr).
4 |
5 | ## Overview
6 |
7 | The UK Parliament's data platform contains data on Parliamentary activity. It underpins Parliament's new website, which is being developed at [beta.parliament.uk](https://beta.parliament.uk). Data in the platform is stored as RDF and is available through a SPARQL endpoint. You can see the structure of the vocabulary for the data visualised with [WebVOWL](http://visualdataweb.de/webvowl/#iri=https://raw.githubusercontent.com/ukparliament/Ontology/master/Ontology.ttl).
8 |
9 | This package provides access to data stored in the data platform through two interfaces at different levels:
10 |
11 | * A low level interface that takes takes a SPARQL SELECT query, sends it to the platform, and returns the result as a [pandas](http://pandas.pydata.org) _DataFrame_, with data types appropriately converted.
12 |
13 | * A high level interface comprising families of functions for downloading specific datasets, whose contents can be customised through function arguments. In some cases these higher level functions can additionally process the data to make it more suitable for analysis.
14 |
15 | The higher level interface currently focuses on providing key data about Members of both Houses of Parliament, but you can use the lower level interface to send custom queries of your own for other data.
16 |
17 | ### Installation
18 |
19 | Install from PyPI using pip:
20 | ```sh
21 | pip install pdpy
22 | ```
23 |
24 | ## Package conventions
25 |
26 | There are certain conventions that apply across the package.
27 |
28 | Functions that make calls to the data platform (or to other online resources) are prefixed `fetch_*`, while those that retrieve or generate data locally are prefixed `get_*`.
29 |
30 | Column names used in dataframes returned by higher level functions reflect the terms used for those data items in the UK Parliament RDF vocabulary, but modified so that the `camelCase` of RDF is replaced with the `lowercase_and_underscores` used in Python. This means that column names can sometimes be long, but I think maintaining a transparent relationship between the data returned by the package and the data stored in the platform makes both the package and platform more useful.
31 |
32 | Higher level functions always return columns containing the ids of the entities represented in the data to help with grouping, summarising, and linking between datasets. This can make the dataframes harder to browse in an interactive shell. To make this easier, the package has a function called `readable` that returns a copy of the dataframe with any id columns removed.
33 |
34 | ``` python
35 | import pdpy
36 | mps = pdpy.fetch_mps()
37 | pdpy.readable(mps)
38 | ```
39 |
40 | ## Package status
41 |
42 | This package is currently in _beta_. This partly reflects the fact that the data platform is still evolving but mainly reflects the fact that this package is still young. Over time new sets of functions will be added to access other datasets and more established parts of the package API will be declared stable. But right now it's all beta.
43 |
44 | ## Roadmap
45 |
46 | * Further analysis functions for data on MPs and Lords
47 | * Caching
48 | * Written Questions and Answers API
49 | * New APIs for new datasets in future
50 |
51 | ## Contributions
52 |
53 | I welcome any feedback, bug reports, and suggestions for improvement. Please talk to me before submitting a pull request. There are potentially a very large number of features that could be added to the package and I want to make sure it evolves with a consistent set of interfaces that share common design patterns. The package also has an R sibling, and I aim to maintain feature parity across both languages.
54 |
55 | ## Query API
56 |
57 | __sparql_select__(_query_)
58 |
59 | The low level query API consists of a single function which takes a SPARQL SELECT query, sends it to the data platform, and returns the results as a pandas _DataFrame_.
60 |
61 | ```python
62 | query = """
63 | PREFIX :
64 | SELECT * WHERE { ?p ?s ?o . } LIMIT 1
65 | """
66 |
67 | result = pdpy.sparql_select(query)
68 | result.iloc[0]
69 |
70 | # Output:
71 | # p http://www.w3.org/1999/02/22-rdf-syntax-ns#type
72 | # s http://www.w3.org/1999/02/22-rdf-syntax-ns#type
73 | # o http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
74 | # Name: 0, dtype: object
75 | ```
76 |
77 | The function will try to convert data types it recognises to native Python types. Currently, it converts XML dates to _datetime.date_ objects and returns all other values as strings. New data types may be added as they are encountered in expanding the higher level api.
78 |
79 | ## Members API
80 |
81 | The Members API provides access to data on Members of both Houses of Parliament. It provides similar functions for downloading data on both MPs and Lords, but the structure of the data returned in each case may differ to reflect differences between Commons and Lords memberships.
82 |
83 | Each of these Member functions can take optional arguments for a `from_date` and a `to_date`, which can be used to filter the rows returned based on a period of activity related to each row. The `on_date` argument is a convenience that sets the `from_date` and `to_date` to the same given date. The `on_date` has priority: if the `on_date` is set, the `from_date` and `to_date` are ignored. The values for these arguments can be either a _datetime.date_ or a string specifying a date in ISO 8601 format ('YYYY-MM-DD').
84 |
85 | The filtering performed using these arguments is inclusive: a row is returned if any part of the activity in question falls within the period specified with the from and to dates. If the activity in question has not yet ended, the end date will have a value of NumPy.NaN.
86 |
87 | ---
88 |
89 | ### MPs
90 |
91 | Some MP functions have an optional argument called `while_mp`, which filters the data to include only those rows that coincide with the period when the individual was serving in the House of Commons. This is sometimes necessary because someone who serves in the House of Commons may later serve in the House of Lords and may hold government roles or committee memberships while serving in both Houses. When this argument is set to _False_ these functions will return all relevant records for each individual, even if the records themselves relate to periods when the individual was not an MP.
92 |
93 | ---
94 |
95 | _pdpy_.__fetch_mps__(_from_date=None_, _to_date=None_, _on_date=None_)
96 |
97 | Fetch a dataframe of key details about each MP, with one row per MP.
98 |
99 | This dataframe contains summary details for each MP, such as names, gender, and dates of birth and death.
100 |
101 | The `from_date`, `to_date` and `on_date` arguments can be used to filter the MPs returned based on the dates of their Commons memberships. Note that in this particular case the filtering does not rely on dates shown in the dataframe but uses Commons membership records to calculate whether an MP was serving on the dates in question. While breaks in service are therefore accounted for, this function does not yet have an option to exclude serving Members who were prevented from sitting at a given point in time for some reason.
102 |
103 | ---
104 |
105 | _pdpy_.__fetch_commons_memberships__(_from_date=None_, _to_date=None_, _on_date=None_)
106 |
107 | Fetch a dataframe of Commons memberships for each MP, with one row per Commons membership.
108 |
109 | The memberships dates are processed to impose consistent rules on the start and end dates for memberships. Specifically, Commons memberships are taken to end at the dissolution of each Parliament, rather than on the date of the general election at which an MP was defeated.
110 |
111 | ---
112 |
113 | _pdpy_.__fetch_mps_party_memberships__(_from_date=None_, _to_date=None_, _on_date=None_, _while_mp=True_, _collapse=False_)
114 |
115 | Fetch a dataframe of party memberships for each MP, with one row per party membership.
116 |
117 | The `collapse` argument determines whether to collapse consecutive memberships within the same party into a single period of continuous party membership. The default value of this argument is _False_, but it can be useful sometimes because some Members' party memberships have been recorded separately for each Parliament, even when they haven't changed party. Setting this value to _True_ is helpful when you want to identify Members who have changed party allegiance. Note that setting this value to _True_ means that party membership ids are not returned in the dataframe, as individual party memberships are combined.
118 |
119 | Note that party memberships are not necessarily closed when an individual stops being an MP.
120 |
121 | ---
122 |
123 | _pdpy_.__fetch_mps_government_roles__(_from_date=None_, _to_date=None_, _on_date=None_, _while_mp=True_)
124 |
125 | Fetch a dataframe of government roles for each MP, with one row per government role.
126 |
127 | ---
128 |
129 | _pdpy_.__fetch_mps_opposition_roles__(_from_date=None_, _to_date=None_, _on_date=None_, _while_mp=True_)
130 |
131 | Fetch a dataframe of opposition roles for each MP, with one row per opposition role.
132 |
133 | ---
134 |
135 | _pdpy_.__fetch_mps_committee_memberships__(_from_date=None_, _to_date=None_, _on_date=None_, _while_mp=True_)
136 |
137 | Fetch a dataframe of Parliamentary committee memberships for each MP, with one row per committee membership.
138 |
139 | ---
140 |
141 | ### Lords
142 |
143 | Some Lords functions have an optional argument called `while_lord`, which filters the rows to include only those records that coincide with the period when the individual was serving in the House of Lords. This is sometimes necessary because someone who serves in the House of Lords may previously have served in the House of Commons and may have held government roles or committee memberships while serving in both Houses. When this argument is set to _False_ these functions will return all relevant records for each individual, even if the records themselves relate to periods when the individual was not a Lord.
144 |
145 | ---
146 |
147 | _pdpy_.__fetch_lords__(_from_date=None_, _to_date=None_, _on_date=None_)
148 |
149 | Fetch a dataframe of key details about each Lord, with one row per Lord.
150 |
151 | This dataframe contains summary details for each Lord, such as names, gender, and dates of birth and death.
152 |
153 | The `from_date`, `to_date` and `on_date` arguments can be used to filter the Lords returned based on the dates of their Lords memberships. Note that in this particular case the filtering does not rely on dates shown in the dataframe but uses Lords membership records to calculate whether a Lord was serving on the dates in question. While breaks in service are therefore accounted for, this function does not yet have an option to exclude serving Members who were prevented from sitting at a given point in time for some reason.
154 |
155 | ---
156 |
157 | _pdpy_.__fetch_lords_memberships__(_from_date=None_, _to_date=None_, _on_date=None_)
158 |
159 | Fetch a dataframe of Lords memberships for each Lord, with one row per Lords membership.
160 |
161 | ---
162 |
163 | _pdpy_.__fetch_lords_party_memberships__(_from_date=None_, _to_date=None_, _on_date=None_, _while_lord=True_, _collapse=False_)
164 |
165 | Fetch a dataframe of party memberships for each Lord, with one row per party membership.
166 |
167 | The `collapse` argument determines whether to collapse consecutive memberships within the same party into a single period of continuous party membership. The default value of this argument is _False_, but it can be useful sometimes because some Members' party memberships have been recorded separately for each Parliament, even when they haven't changed party. Setting this value to _True_ is helpful when you want to identify Members who have changed party allegiance. Note that setting this value to _True_ means that party membership ids are not returned in the dataframe, as individual party memberships are combined.
168 |
169 | Note that party memberships are not necessarily closed when an individual stops being a Lord.
170 |
171 | ---
172 |
173 | _pdpy_.__fetch_lords_government_roles__(_from_date=None_, _to_date=None_, _on_date=None_, _while_lord=True_)
174 |
175 | Fetch a dataframe of government roles for each Lord, with one row per government role.
176 |
177 | ---
178 |
179 | _pdpy_.__fetch_lords_opposition_roles__(_from_date=None_, _to_date=None_, _on_date=None_, _while_lord=True_)
180 |
181 | Fetch a dataframe of opposition roles for each Lord, with one row per opposition role.
182 |
183 | ---
184 |
185 | _pdpy_.__fetch_lords_committee_memberships__(_from_date=None_, _to_date=None_, _on_date=None_, _while_lord=True_)
186 |
187 | Fetch a dataframe of Parliamentary committee memberships for each Lord, with one row per committee membership.
188 |
189 | ---
190 |
191 | ## Settings
192 |
193 | You can configure the package to use a different data platform API endpoint at runtime. This allows you to run the package against a local version of the data platform. As explained by @matthieubosquet in this [comment](https://github.com/houseofcommonslibrary/pdpr/issues/1#issuecomment-484026350), the data platform team maintain a docker image of the data platform API which is updated daily with the latest data.
194 |
195 | You can run a local version of the data platform API with docker using:
196 |
197 | ```bash
198 | docker run --rm -d -p 7200:7200 ukparliament/graphdb:latest
199 | ```
200 |
201 | Use `pdpy.set_api_url` to point the package at the local version of the data platform API:
202 |
203 | ```python
204 | pdpy.set_api_url('http://localhost:7200/repositories/parliament')
205 | ```
206 |
207 | Use `pdpy.reset_api_url` to reset the package to use the live version of the API:
208 |
209 | ```python
210 | pdpy.reset_api_url()
211 | ```
212 |
213 | You can check the currently set API url with `pdpy.get_api_url()`.
214 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = readme.md
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | setup(
3 | name = 'pdpy',
4 | packages = ['pdpy'],
5 | version = '0.1.6',
6 | description = 'A package for downloading data from the Parliamentary Data Platform',
7 | author = 'Oliver Hawkins',
8 | author_email = 'oli@olihawkins.com',
9 | url = 'https://github.com/houseofcommonslibrary/pdpy',
10 | license = 'BSD',
11 | keywords = ['Parliament', 'MP', 'House of Commons', 'House of Lords'],
12 | install_requires = ['numpy', 'pandas', 'requests'],
13 | classifiers = [],
14 | )
15 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/__init__.py
--------------------------------------------------------------------------------
/tests/data/commons_memberships_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/commons_memberships_raw.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_commons_memberships.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_commons_memberships.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_commons_memberships_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_commons_memberships_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_committee_memberships.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_committee_memberships.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_committee_memberships_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_committee_memberships_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_committee_memberships_while_lord.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_committee_memberships_while_lord.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_government_roles.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_government_roles.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_government_roles_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_government_roles_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_government_roles_while_lord.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_government_roles_while_lord.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_memberships.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_memberships.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_memberships_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_memberships_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_opposition_roles.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_opposition_roles.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_opposition_roles_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_opposition_roles_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_opposition_roles_while_lord.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_opposition_roles_while_lord.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_party_memberships.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_party_memberships.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_party_memberships_collapse.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_party_memberships_collapse.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_party_memberships_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_party_memberships_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_lords_party_memberships_while_lord.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_lords_party_memberships_while_lord.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_committee_memberships.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_committee_memberships.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_committee_memberships_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_committee_memberships_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_committee_memberships_while_mp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_committee_memberships_while_mp.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_government_roles.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_government_roles.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_government_roles_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_government_roles_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_government_roles_while_mp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_government_roles_while_mp.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_opposition_roles.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_opposition_roles.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_opposition_roles_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_opposition_roles_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_opposition_roles_while_mp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_opposition_roles_while_mp.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_party_memberships.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_party_memberships.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_party_memberships_collapse.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_party_memberships_collapse.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_party_memberships_from_to.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_party_memberships_from_to.pkl
--------------------------------------------------------------------------------
/tests/data/fetch_mps_party_memberships_while_mp.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/fetch_mps_party_memberships_while_mp.pkl
--------------------------------------------------------------------------------
/tests/data/lords_committee_memberships_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_committee_memberships_raw.pkl
--------------------------------------------------------------------------------
/tests/data/lords_government_roles_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_government_roles_raw.pkl
--------------------------------------------------------------------------------
/tests/data/lords_memberships_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_memberships_raw.pkl
--------------------------------------------------------------------------------
/tests/data/lords_opposition_roles_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_opposition_roles_raw.pkl
--------------------------------------------------------------------------------
/tests/data/lords_party_memberships_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_party_memberships_raw.pkl
--------------------------------------------------------------------------------
/tests/data/lords_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/lords_raw.pkl
--------------------------------------------------------------------------------
/tests/data/mps_committee_memberships_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/mps_committee_memberships_raw.pkl
--------------------------------------------------------------------------------
/tests/data/mps_government_roles_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/mps_government_roles_raw.pkl
--------------------------------------------------------------------------------
/tests/data/mps_opposition_roles_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/mps_opposition_roles_raw.pkl
--------------------------------------------------------------------------------
/tests/data/mps_party_memberships_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/mps_party_memberships_raw.pkl
--------------------------------------------------------------------------------
/tests/data/mps_raw.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/houseofcommonslibrary/pdpy/9f0058e4de432f09b69d556fa04d0889afac3fab/tests/data/mps_raw.pkl
--------------------------------------------------------------------------------
/tests/test_combine.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Test combine functions."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import datetime
7 | import io
8 | import numpy as np
9 | import pandas as pd
10 | import unittest
11 |
12 | import pdpy.combine as combine
13 | import pdpy.errors as errors
14 | import pdpy.utils as utils
15 |
16 |
17 | # Test data -------------------------------------------------------------------
18 |
19 | pm_csv = """
20 | person_id, membership_id, party_id, start_date, end_date
21 | p1, m1, pa1, 2001-01-01, 2001-12-31
22 | p1, m2, pa2, 2002-01-01, 2002-12-31
23 | p1, m3, pa1, 2003-01-01, 2003-12-31
24 | p1, m4, pa1, 2004-01-01, 2004-12-31
25 | p2, m5, pa1, 2002-01-01, 2002-12-31
26 | p2, m6, pa2, 2004-01-01, NA
27 | p2, m7, pa2, 2003-01-01, 2003-12-31
28 | p2, m8, pa1, 2001-01-01, 2001-12-31
29 | """
30 |
31 | pm = pd.read_csv(io.BytesIO(bytes(pm_csv, encoding='utf-8')),
32 | skipinitialspace = True)
33 |
34 | pm['party_membership_start_date'] = utils.convert_date_series(pm['start_date'])
35 | pm['party_membership_end_date'] = utils.convert_date_series(pm['end_date'])
36 | pm['mnis_id'] = ''
37 | pm['given_name'] = pm['person_id']
38 | pm['family_name'] = pm['person_id']
39 | pm['display_name'] = pm['person_id']
40 | pm['party_membership_id'] = ''
41 | pm['party_mnis_id'] = ''
42 | pm['party_name'] = ''
43 | pm = pm[[
44 | 'person_id',
45 | 'mnis_id',
46 | 'given_name',
47 | 'family_name',
48 | 'display_name',
49 | 'party_id',
50 | 'party_mnis_id',
51 | 'party_name',
52 | 'party_membership_id',
53 | 'party_membership_start_date',
54 | 'party_membership_end_date']]
55 |
56 | # Test combine_party_memberships ----------------------------------------------
57 |
58 | class CombinePartyMemberships(unittest.TestCase):
59 |
60 | """
61 | Test that combine_party_memberships returns a DataFrame with the
62 | expected properties.
63 |
64 | """
65 |
66 | def test_that_combine_party_memberships_raises_value_error(self):
67 |
68 | with self.assertRaises(ValueError):
69 | pm_missing_column = pm.drop('person_id', axis=1)
70 | cpm = combine.combine_party_memberships(pm_missing_column)
71 |
72 | with self.assertRaises(ValueError):
73 | pm_wrong_column_names = pm.drop('person_id', axis=1)
74 | pm_wrong_column_names['pid'] = pm['person_id']
75 | cpm = combine.combine_party_memberships(pm_wrong_column_names)
76 |
77 | def test_that_filter_memberships_filters_correct_memberships(self):
78 |
79 | cpm = combine.combine_party_memberships(pm)
80 |
81 | self.assertEqual(cpm.shape[0], 5)
82 | self.assertEqual(cpm.shape[1], pm.shape[1] - 1)
83 |
84 | expected_columns = pm.drop('party_membership_id', axis=1).columns
85 | self.assertEqual((cpm.columns == expected_columns).all(), True)
86 |
87 | self.assertEqual((cpm['person_id'] == [
88 | 'p1', 'p1', 'p1', 'p2', 'p2']).all(), True)
89 |
90 | self.assertEqual((cpm['party_membership_start_date'] == [
91 | datetime.date(2001, 1, 1),
92 | datetime.date(2002, 1, 1),
93 | datetime.date(2003, 1, 1),
94 | datetime.date(2001, 1, 1),
95 | datetime.date(2003, 1, 1)]).all(), True)
96 |
97 | self.assertEqual((cpm['party_membership_end_date'][0:4] == [
98 | datetime.date(2001, 12, 31),
99 | datetime.date(2002, 12, 31),
100 | datetime.date(2004, 12, 31),
101 | datetime.date(2002, 12, 31)]).all(), True)
102 |
103 | self.assertTrue(pd.isna(cpm['party_membership_end_date'].iloc[4]))
104 |
--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Test core download functions."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import datetime
7 | import numpy as np
8 | import pandas as pd
9 | import requests
10 | import time
11 | import unittest
12 | import warnings
13 |
14 | import pdpy.constants as constants
15 | import pdpy.core as core
16 | import pdpy.errors as errors
17 | import pdpy.utils as utils
18 |
19 | # Setup -----------------------------------------------------------------------
20 |
21 | # Check api is available
22 | api_available = utils.check_api()
23 |
24 | # Queries ---------------------------------------------------------------------
25 |
26 | query_basic = """
27 | PREFIX :
28 | SELECT *
29 | WHERE {
30 | ?p ?s ?o .
31 | }
32 | LIMIT 1
33 | """
34 |
35 | query_person = """
36 | PREFIX :
37 | PREFIX d:
38 | SELECT DISTINCT
39 |
40 | ?person
41 | ?given_name
42 | ?family_name
43 | ?gender
44 | ?dob
45 |
46 | WHERE {
47 |
48 | # Entity id for Shirley Williams
49 | BIND(d:URDlhhkg AS ?person)
50 |
51 | ?person :personGivenName ?given_name ;
52 | :personFamilyName ?family_name ;
53 | :personHasGenderIdentity/:genderIdentityHasGender/:genderName ?gender .
54 | OPTIONAL { ?person :personDateOfBirth ?dob . }
55 | }
56 | """
57 |
58 | query_broken = """
59 | PREFIX :
60 | # PREFIX d: Commented out to break query
61 | SELECT DISTINCT
62 |
63 | ?person
64 | ?given_name
65 | ?family_name
66 | ?gender
67 | ?dob
68 |
69 | WHERE {
70 |
71 | # Entity id for Shirley Williams
72 | BIND(d:URDlhhkg AS ?person)
73 |
74 | ?person :personGivenName ?given_name ;
75 | :personFamilyName ?family_name ;
76 | :personHasGenderIdentity/:genderIdentityHasGender/:genderName ?gender .
77 | OPTIONAL { ?person :personDateOfBirth ?dob . }
78 | }
79 | """
80 |
81 | query_broken_error = "{}{}".format(
82 | 'MALFORMED QUERY: org.eclipse.rdf4j.query.parser.sparql.ast.',
83 | 'VisitorException: QName \'d:URDlhhkg\' uses an undefined prefix')
84 |
85 | # Tests -----------------------------------------------------------------------
86 |
87 | class TestRequestBasic(unittest.TestCase):
88 |
89 | """Test that request sends and receives the most basic SPARQL query."""
90 |
91 | def setUp(self):
92 | if not api_available:
93 | self.skipTest('api could not be reached')
94 |
95 | def test_request_basic(self):
96 |
97 | # Suppress the warning for the broken socket
98 | with warnings.catch_warnings():
99 | warnings.simplefilter("ignore", ResourceWarning)
100 |
101 | response = core.request(query_basic)
102 | json = response.json()
103 | headers = json['head']['vars']
104 | records = json['results']['bindings']
105 |
106 | self.assertTrue(response.ok)
107 | self.assertEqual(headers, ['p', 's', 'o'])
108 | self.assertEqual(len(records), 1)
109 | self.assertEqual(records[0]['p']['value'],
110 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
111 | self.assertEqual(records[0]['s']['value'],
112 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
113 | self.assertEqual(records[0]['o']['value'],
114 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property')
115 |
116 | time.sleep(constants.API_PAUSE_TIME)
117 |
118 |
119 | class TestRequestPerson(unittest.TestCase):
120 |
121 | """Test that request sends and receives a Parliamentary query."""
122 |
123 | def setUp(self):
124 | if not api_available:
125 | self.skipTest('api could not be reached')
126 |
127 | def test_request_person(self):
128 |
129 | # Suppress the warning for the broken socket
130 | with warnings.catch_warnings():
131 | warnings.simplefilter("ignore", ResourceWarning)
132 |
133 | response = core.request(query_person)
134 | json = response.json()
135 | headers = json['head']['vars']
136 | records = json['results']['bindings']
137 |
138 | self.assertTrue(response.ok)
139 | self.assertEqual(headers,
140 | ['person', 'given_name', 'family_name', 'gender', 'dob'])
141 | self.assertEqual(len(records), 1)
142 | self.assertEqual(records[0]['person']['value'],
143 | 'https://id.parliament.uk/URDlhhkg')
144 | self.assertEqual(records[0]['given_name']['value'], 'Shirley')
145 | self.assertEqual(records[0]['family_name']['value'], 'Williams')
146 | self.assertEqual(records[0]['gender']['value'], 'Female')
147 | self.assertEqual(records[0]['dob']['value'], '1930-07-27+01:00')
148 | self.assertEqual(records[0]['dob']['datatype'], constants.XML_DATE)
149 |
150 | time.sleep(constants.API_PAUSE_TIME)
151 |
152 |
153 | class TestSelectBasic(unittest.TestCase):
154 |
155 | """Test that select returns data for the most basic SPARQL query."""
156 |
157 | def setUp(self):
158 | if not api_available:
159 | self.skipTest('api could not be reached')
160 |
161 | def test_select_basic(self):
162 |
163 | # Suppress the warning for the broken socket
164 | with warnings.catch_warnings():
165 | warnings.simplefilter("ignore", ResourceWarning)
166 |
167 | data = core.sparql_select(query_basic)
168 |
169 | self.assertEqual(list(data), ['p', 's', 'o'])
170 | self.assertEqual(data['p'].dtype, np.dtype('O'))
171 | self.assertEqual(data['s'].dtype, np.dtype('O'))
172 | self.assertEqual(data['o'].dtype, np.dtype('O'))
173 | self.assertEqual(data.shape, (1, 3))
174 | self.assertIsInstance(data['p'][0], str)
175 | self.assertIsInstance(data['s'][0], str)
176 | self.assertIsInstance(data['o'][0], str)
177 | self.assertEqual(data['p'][0],
178 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
179 | self.assertEqual(data['s'][0],
180 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
181 | self.assertEqual(data['o'][0],
182 | 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property')
183 |
184 | time.sleep(constants.API_PAUSE_TIME)
185 |
186 |
187 | class TestSelectPerson(unittest.TestCase):
188 |
189 | """Test that select returns data for a Parliamentary query."""
190 |
191 | def setUp(self):
192 | if not api_available:
193 | self.skipTest('api could not be reached')
194 |
195 | def test_select_person(self):
196 |
197 | # Suppress the warning for the broken socket
198 | with warnings.catch_warnings():
199 | warnings.simplefilter("ignore", ResourceWarning)
200 |
201 | data = core.sparql_select(query_person)
202 |
203 | self.assertEqual(list(data),
204 | ['person', 'given_name', 'family_name', 'gender', 'dob'])
205 | self.assertEqual(data['person'].dtype, np.dtype('O'))
206 | self.assertEqual(data['given_name'].dtype, np.dtype('O'))
207 | self.assertEqual(data['family_name'].dtype, np.dtype('O'))
208 | self.assertEqual(data['gender'].dtype, np.dtype('O'))
209 | self.assertEqual(data['dob'].dtype, np.dtype('O'))
210 | self.assertEqual(data.shape, (1, 5))
211 | self.assertIsInstance(data['person'][0], str)
212 | self.assertIsInstance(data['given_name'][0], str)
213 | self.assertIsInstance(data['family_name'][0], str)
214 | self.assertIsInstance(data['gender'][0], str)
215 | self.assertIsInstance(data['dob'][0], datetime.date)
216 | self.assertEqual(data['person'][0],
217 | 'https://id.parliament.uk/URDlhhkg')
218 | self.assertEqual(data['given_name'][0], 'Shirley')
219 | self.assertEqual(data['family_name'][0], 'Williams')
220 | self.assertEqual(data['gender'][0], 'Female')
221 | self.assertEqual(data['dob'][0], datetime.date(1930, 7, 27))
222 |
223 | time.sleep(constants.API_PAUSE_TIME)
224 |
225 |
226 | class TestSelectBroken(unittest.TestCase):
227 |
228 | """Test that select raises a request error for a broken query."""
229 |
230 | def setUp(self):
231 | if not api_available:
232 | self.skipTest('api could not be reached')
233 |
234 | def test_select_broken(self):
235 |
236 | # Suppress the warning for the broken socket
237 | with warnings.catch_warnings():
238 | warnings.simplefilter("ignore", ResourceWarning)
239 |
240 | with self.assertRaises(errors.RequestError) as cm:
241 | data = core.sparql_select(query_broken)
242 |
243 | request_exception = cm.exception
244 | self.assertEqual(request_exception.response, query_broken_error)
245 |
246 | time.sleep(constants.API_PAUSE_TIME)
247 |
--------------------------------------------------------------------------------
/tests/test_elections.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Test elections data functions."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import datetime
7 | import numpy as np
8 | import pandas as pd
9 | import unittest
10 |
11 | import pdpy.elections as elections
12 |
13 | # Tests -----------------------------------------------------------------------
14 |
15 | class TestGetGeneralElections(unittest.TestCase):
16 |
17 | """
18 | Test that get_general_elections returns the expected elections data.
19 |
20 | """
21 |
22 | def test_get_general_elections(self):
23 |
24 | ge = elections.get_general_elections()
25 |
26 | self.assertEqual(list(ge), ['name', 'dissolution', 'election'])
27 | self.assertEqual(ge['name'].dtype, np.dtype('O'))
28 | self.assertEqual(ge['dissolution'].dtype, np.dtype('O'))
29 | self.assertEqual(ge['election'].dtype, np.dtype('O'))
30 | self.assertIsInstance(ge['name'][0], str)
31 | self.assertIsInstance(ge['dissolution'][0], datetime.date)
32 | self.assertIsInstance(ge['election'][0], datetime.date)
33 |
34 | # Test that dissolutions always precede elections
35 | self.assertTrue(
36 | (ge['dissolution'] < ge['election']).all())
37 |
38 | # Test that elections always precede the following dissolution
39 | self.assertTrue((
40 | ge['election'][:-1].reset_index(drop=True) <
41 | ge['dissolution'][1:].reset_index(drop=True)
42 | ).all())
43 |
44 | # Test that election names are unique
45 | self.assertTrue(
46 | len(ge['name']) == len(ge['name'].unique()))
47 |
48 |
49 | class TestGetGeneralElectionsDict(unittest.TestCase):
50 |
51 | """
52 | Test that get_general_elections_dict returns the expected elections data.
53 |
54 | """
55 |
56 | def test_get_general_elections_dict(self):
57 |
58 | ge = elections.get_general_elections_dict()
59 |
60 | for e in ge.values():
61 | self.assertEqual(len(e.keys()), 2)
62 | self.assertIn('election', list(e.keys()))
63 | self.assertIn('dissolution', list(e.keys()))
64 | self.assertIsInstance(e['dissolution'], datetime.date)
65 | self.assertIsInstance(e['election'], datetime.date)
66 | self.assertTrue(e['dissolution'] < e['election'])
67 |
--------------------------------------------------------------------------------
/tests/test_filter.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Test filter functions."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import datetime
7 | import io
8 | import numpy as np
9 | import pandas as pd
10 | import unittest
11 |
12 | import pdpy.errors as errors
13 | import pdpy.filter as filter
14 | import pdpy.utils as utils
15 |
16 |
17 | # Test data -------------------------------------------------------------------
18 |
19 | mem_a_csv = """
20 | person_id, membership_id, start_date, end_date
21 | p1, a1, 2001-01-01, 2001-12-31
22 | p1, a2, 2005-01-01, 2005-12-31
23 | p1, a3, 2006-01-01, 2006-12-31
24 | p1, a4, 2010-01-01, 2010-12-31
25 | p2, a5, 2005-01-01, 2005-12-31
26 | p2, a6, 2006-01-01, 2006-12-31
27 | p2, a7, 2010-01-01, 2010-12-31
28 | p2, a8, 2015-01-01, 2015-12-31
29 | """
30 |
31 | mem_b_csv = """
32 | person_id, membership_id, start_date, end_date
33 | p1, b1, 2001-06-01, 2002-06-30
34 | p1, b2, 2004-01-01, 2004-12-31
35 | p1, b3, 2006-01-01, 2006-12-31
36 | p1, b4, 2011-01-01, 2011-12-31
37 | p2, b5, 2004-01-01, 2004-12-31
38 | p2, b6, 2006-01-01, 2006-12-31
39 | p2, b7, 2011-01-01, 2011-12-31
40 | p2, b8, 2015-06-01, 2016-06-30
41 | """
42 |
43 | mem_a = pd.read_csv(
44 | io.BytesIO(bytes(mem_a_csv, encoding='utf-8')),
45 | skipinitialspace = True)
46 | mem_a['start_date'] = utils.convert_date_series(mem_a['start_date'])
47 | mem_a['end_date'] = utils.convert_date_series(mem_a['end_date'])
48 |
49 | mem_b = pd.read_csv(
50 | io.BytesIO(bytes(mem_b_csv, encoding='utf-8')),
51 | skipinitialspace = True)
52 | mem_b['start_date'] = utils.convert_date_series(mem_b['start_date'])
53 | mem_b['end_date'] = utils.convert_date_series(mem_b['end_date'])
54 |
55 | # Test filter_dates -----------------------------------------------------------
56 |
57 | class TestFilterDates(unittest.TestCase):
58 |
59 | """
60 | Test that filter_dates returns a DataFrame with the expected properties.
61 |
62 | """
63 |
64 | def test_that_filter_dates_raises_missing_column_error(self):
65 |
66 | with self.assertRaises(errors.MissingColumnError):
67 |
68 | f_mem_a = filter.filter_dates(
69 | mem_a,
70 | start_col='no_such_column',
71 | end_col='end_date')
72 |
73 | with self.assertRaises(errors.MissingColumnError):
74 |
75 | f_mem_a = filter.filter_dates(
76 | mem_a,
77 | start_col='start_date',
78 | end_col='no_such_column')
79 |
80 | def test_that_filter_dates_raises_value_error(self):
81 |
82 | with self.assertRaises(ValueError):
83 |
84 | f_mem_a = filter.filter_dates(
85 | mem_a,
86 | start_col='start_date',
87 | end_col='end_date',
88 | from_date='2010-01-01',
89 | to_date='2009-12-31')
90 |
91 | def test_that_filter_dates_raises_date_format_error(self):
92 |
93 | with self.assertRaises(errors.DateFormatError):
94 |
95 | f_mem_a = filter.filter_dates(
96 | mem_a,
97 | start_col='start_date',
98 | end_col='end_date',
99 | from_date='2010-01-XX',
100 | to_date='2010-12-31')
101 |
102 | with self.assertRaises(errors.DateFormatError):
103 |
104 | f_mem_a = filter.filter_dates(
105 | mem_a,
106 | start_col='start_date',
107 | end_col='end_date',
108 | from_date='2010-01-01',
109 | to_date='2010-12-XX')
110 |
111 | def test_filter_dates_does_not_filter_without_dates(self):
112 |
113 | f_mem_a = filter.filter_dates(
114 | mem_a,
115 | start_col='start_date',
116 | end_col='end_date')
117 |
118 | self.assertEqual(f_mem_a.shape, mem_a.shape)
119 | self.assertTrue((f_mem_a == mem_a).all().all())
120 |
121 | def test_filter_dates_excludes_rows_before_from_date(self):
122 |
123 | f_mem_a = filter.filter_dates(
124 | mem_a,
125 | start_col='start_date',
126 | end_col='end_date',
127 | from_date='2004-12-31')
128 |
129 | self.assertEqual(f_mem_a.shape[0], mem_a.shape[0] - 1)
130 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1])
131 |
132 | self.assertEqual(f_mem_a.iloc[0]['person_id'], 'p1')
133 | self.assertEqual(f_mem_a.iloc[0]['membership_id'], 'a2')
134 | self.assertEqual(f_mem_a.iloc[0]['start_date'],
135 | datetime.date(2005, 1, 1))
136 | self.assertEqual(f_mem_a.iloc[0]['end_date'],
137 | datetime.date(2005, 12, 31))
138 |
139 | def test_filter_dates_excludes_rows_after_to_date(self):
140 |
141 | f_mem_a = filter.filter_dates(
142 | mem_a,
143 | start_col='start_date',
144 | end_col='end_date',
145 | to_date='2011-01-01')
146 |
147 | self.assertEqual(f_mem_a.shape[0], mem_a.shape[0] - 1)
148 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1])
149 |
150 | self.assertEqual(f_mem_a.iloc[-1]['person_id'], 'p2')
151 | self.assertEqual(f_mem_a.iloc[-1]['membership_id'], 'a7')
152 | self.assertEqual(f_mem_a.iloc[-1]['start_date'],
153 | datetime.date(2010, 1, 1))
154 | self.assertEqual(f_mem_a.iloc[-1]['end_date'],
155 | datetime.date(2010, 12, 31))
156 |
157 | def test_filter_dates_excludes_rows_outside_both_dates(self):
158 |
159 | f_mem_a = filter.filter_dates(
160 | mem_a,
161 | start_col='start_date',
162 | end_col='end_date',
163 | from_date='2004-12-31',
164 | to_date='2011-01-01')
165 |
166 | self.assertEqual(f_mem_a.shape[0], mem_a.shape[0] - 2)
167 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1])
168 |
169 | self.assertEqual(f_mem_a.iloc[0]['person_id'], 'p1')
170 | self.assertEqual(f_mem_a.iloc[0]['membership_id'], 'a2')
171 | self.assertEqual(f_mem_a.iloc[0]['start_date'],
172 | datetime.date(2005, 1, 1))
173 | self.assertEqual(f_mem_a.iloc[0]['end_date'],
174 | datetime.date(2005, 12, 31))
175 |
176 | self.assertEqual(f_mem_a.iloc[-1]['person_id'], 'p2')
177 | self.assertEqual(f_mem_a.iloc[-1]['membership_id'], 'a7')
178 | self.assertEqual(f_mem_a.iloc[-1]['start_date'],
179 | datetime.date(2010, 1, 1))
180 | self.assertEqual(f_mem_a.iloc[-1]['end_date'],
181 | datetime.date(2010, 12, 31))
182 |
183 | def test_filter_dates_includes_rows_with_partial_instersection(self):
184 |
185 | f_mem_a = filter.filter_dates(
186 | mem_a,
187 | start_col='start_date',
188 | end_col='end_date',
189 | from_date='2005-06-30',
190 | to_date='2010-06-30')
191 |
192 | self.assertEqual(f_mem_a.shape[0], mem_a.shape[0] - 2)
193 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1])
194 |
195 | self.assertEqual(f_mem_a.iloc[0]['person_id'], 'p1')
196 | self.assertEqual(f_mem_a.iloc[0]['membership_id'], 'a2')
197 | self.assertEqual(f_mem_a.iloc[0]['start_date'],
198 | datetime.date(2005, 1, 1))
199 | self.assertEqual(f_mem_a.iloc[0]['end_date'],
200 | datetime.date(2005, 12, 31))
201 |
202 | self.assertEqual(f_mem_a.iloc[-1]['person_id'], 'p2')
203 | self.assertEqual(f_mem_a.iloc[-1]['membership_id'], 'a7')
204 | self.assertEqual(f_mem_a.iloc[-1]['start_date'],
205 | datetime.date(2010, 1, 1))
206 | self.assertEqual(f_mem_a.iloc[-1]['end_date'],
207 | datetime.date(2010, 12, 31))
208 |
209 | def test_filter_dates_includes_rows_enclosing_dates(self):
210 |
211 | f_mem_a = filter.filter_dates(
212 | mem_a,
213 | start_col='start_date',
214 | end_col='end_date',
215 | from_date='2005-06-30',
216 | to_date='2005-06-30')
217 |
218 | self.assertEqual(f_mem_a.shape[0], 2)
219 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1])
220 |
221 | self.assertEqual(f_mem_a.iloc[0]['person_id'], 'p1')
222 | self.assertEqual(f_mem_a.iloc[0]['membership_id'], 'a2')
223 | self.assertEqual(f_mem_a.iloc[0]['start_date'],
224 | datetime.date(2005, 1, 1))
225 | self.assertEqual(f_mem_a.iloc[0]['end_date'],
226 | datetime.date(2005, 12, 31))
227 |
228 | self.assertEqual(f_mem_a.iloc[1]['person_id'], 'p2')
229 | self.assertEqual(f_mem_a.iloc[1]['membership_id'], 'a5')
230 | self.assertEqual(f_mem_a.iloc[1]['start_date'],
231 | datetime.date(2005, 1, 1))
232 | self.assertEqual(f_mem_a.iloc[-1]['end_date'],
233 | datetime.date(2005, 12, 31))
234 |
235 | # Test filter_memberships -----------------------------------------------------
236 |
237 | class TestFilterMemberships(unittest.TestCase):
238 |
239 | """
240 | Test that filter_memberships returns a DataFrame with the expected
241 | properties.
242 |
243 | """
244 |
245 | def test_that_filter_dates_raises_missing_column_error(self):
246 |
247 | with self.assertRaises(errors.MissingColumnError):
248 |
249 | f_mem_a = filter.filter_memberships(
250 | tm=mem_a,
251 | fm=mem_b,
252 | tm_id_col='no_such_column',
253 | tm_start_col='start_date',
254 | tm_end_col='end_date',
255 | fm_start_col='start_date',
256 | fm_end_col='end_date',
257 | join_col='person_id')
258 |
259 | with self.assertRaises(errors.MissingColumnError):
260 |
261 | f_mem_a = filter.filter_memberships(
262 | tm=mem_a,
263 | fm=mem_b,
264 | tm_id_col='membership_id',
265 | tm_start_col='no_such_column',
266 | tm_end_col='end_date',
267 | fm_start_col='start_date',
268 | fm_end_col='end_date',
269 | join_col='person_id')
270 |
271 | with self.assertRaises(errors.MissingColumnError):
272 |
273 | f_mem_a = filter.filter_memberships(
274 | tm=mem_a,
275 | fm=mem_b,
276 | tm_id_col='membership_id',
277 | tm_start_col='start_date',
278 | tm_end_col='no_such_column',
279 | fm_start_col='start_date',
280 | fm_end_col='end_date',
281 | join_col='person_id')
282 |
283 | with self.assertRaises(errors.MissingColumnError):
284 |
285 | f_mem_a = filter.filter_memberships(
286 | tm=mem_a,
287 | fm=mem_b,
288 | tm_id_col='membership_id',
289 | tm_start_col='start_date',
290 | tm_end_col='end_date',
291 | fm_start_col='no_such_column',
292 | fm_end_col='end_date',
293 | join_col='person_id')
294 |
295 | with self.assertRaises(errors.MissingColumnError):
296 |
297 | f_mem_a = filter.filter_memberships(
298 | tm=mem_a,
299 | fm=mem_b,
300 | tm_id_col='membership_id',
301 | tm_start_col='start_date',
302 | tm_end_col='end_date',
303 | fm_start_col='start_date',
304 | fm_end_col='no_such_column',
305 | join_col='person_id')
306 |
307 | with self.assertRaises(errors.MissingColumnError):
308 |
309 | f_mem_a = filter.filter_memberships(
310 | tm=mem_a,
311 | fm=mem_b,
312 | tm_id_col='membership_id',
313 | tm_start_col='start_date',
314 | tm_end_col='end_date',
315 | fm_start_col='start_date',
316 | fm_end_col='end_date',
317 | join_col='no_such_column')
318 |
319 | def test_that_filter_memberships_filters_correct_memberships(self):
320 |
321 | f_mem_a = filter.filter_memberships(
322 | tm = mem_a,
323 | fm = mem_b,
324 | tm_id_col='membership_id',
325 | tm_start_col='start_date',
326 | tm_end_col='end_date',
327 | fm_start_col='start_date',
328 | fm_end_col='end_date',
329 | join_col='person_id')
330 |
331 | self.assertEqual(f_mem_a.shape[0], 4)
332 | self.assertEqual(f_mem_a.shape[1], mem_a.shape[1])
333 | self.assertEqual((f_mem_a.columns == mem_a.columns).all(), True)
334 |
335 | self.assertEqual((f_mem_a['person_id'] == [
336 | 'p1', 'p1', 'p2', 'p2']).all(), True)
337 |
338 | self.assertEqual((f_mem_a['membership_id'] == [
339 | 'a1', 'a3', 'a6', 'a8']).all(), True)
340 |
341 | self.assertEqual((f_mem_a['start_date'] == [
342 | datetime.date(2001, 1, 1),
343 | datetime.date(2006, 1, 1),
344 | datetime.date(2006, 1, 1),
345 | datetime.date(2015, 1, 1)]).all(), True)
346 |
347 | self.assertEqual((f_mem_a['end_date'] == [
348 | datetime.date(2001, 12, 31),
349 | datetime.date(2006, 12, 31),
350 | datetime.date(2006, 12, 31),
351 | datetime.date(2015, 12, 31)]).all(), True)
352 |
--------------------------------------------------------------------------------
/tests/test_lords.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Test Lords functions."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import unittest
7 | from unittest.mock import patch
8 |
9 | import pdpy.lords as lords
10 | import tests.validate as validate
11 |
12 |
13 | # Mocks -----------------------------------------------------------------------
14 |
15 | def mock_fetch_lords_raw():
16 | return validate.read('lords_raw')
17 |
18 | def mock_fetch_lords_memberships_raw():
19 | return validate.read('lords_memberships_raw')
20 |
21 | def mock_fetch_lords_party_memberships_raw():
22 | return validate.read('lords_party_memberships_raw')
23 |
24 | def mock_fetch_lords_government_roles_raw():
25 | return validate.read('lords_government_roles_raw')
26 |
27 | def mock_fetch_lords_opposition_roles_raw():
28 | return validate.read('lords_opposition_roles_raw')
29 |
30 | def mock_fetch_lords_committee_memberships_raw():
31 | return validate.read('lords_committee_memberships_raw')
32 |
33 | # Tests -----------------------------------------------------------------------
34 |
35 | class TestFetchLords(unittest.TestCase):
36 |
37 | """Test fetch_lords processes results correctly."""
38 |
39 | @patch('pdpy.lords.fetch_lords_raw', mock_fetch_lords_raw)
40 | @patch('pdpy.lords.fetch_lords_memberships_raw',
41 | mock_fetch_lords_memberships_raw)
42 |
43 | def test_fetch_lords(self):
44 |
45 | cols = [
46 | 'person_id',
47 | 'mnis_id',
48 | 'given_name',
49 | 'family_name',
50 | 'display_name',
51 | 'full_title',
52 | 'gender']
53 |
54 | obs = lords.fetch_lords()
55 | exp = validate.read('fetch_lords')
56 | validate.compare_obs_exp(self, obs, exp, cols)
57 |
58 | obs = lords.fetch_lords(from_date='2017-06-08', to_date='2017-06-08')
59 | exp = validate.read('fetch_lords_from_to')
60 | validate.compare_obs_exp(self, obs, exp, cols)
61 |
62 | obs = lords.fetch_lords(on_date='2017-06-08')
63 | exp = validate.read('fetch_lords_from_to')
64 | validate.compare_obs_exp(self, obs, exp, cols)
65 |
66 |
67 | class TestFetchCommonsMemberships(unittest.TestCase):
68 |
69 | """Test fetch_lords_memberships processes results correctly."""
70 |
71 | @patch('pdpy.lords.fetch_lords_memberships_raw',
72 | mock_fetch_lords_memberships_raw)
73 |
74 | def test_fetch_lords_memberships(self):
75 |
76 | cols = [
77 | 'person_id',
78 | 'mnis_id',
79 | 'given_name',
80 | 'family_name',
81 | 'display_name',
82 | 'seat_type_id',
83 | 'seat_type_name',
84 | 'seat_incumbency_id',
85 | 'seat_incumbency_start_date']
86 |
87 | obs = lords.fetch_lords_memberships()
88 | exp = validate.read('fetch_lords_memberships')
89 | validate.compare_obs_exp(self, obs, exp, cols)
90 |
91 | obs = lords.fetch_lords_memberships(
92 | from_date='2017-06-08', to_date='2017-06-08')
93 | exp = validate.read('fetch_lords_memberships_from_to')
94 | validate.compare_obs_exp(self, obs, exp, cols)
95 |
96 | obs = lords.fetch_lords_memberships(on_date='2017-06-08')
97 | exp = validate.read('fetch_lords_memberships_from_to')
98 | validate.compare_obs_exp(self, obs, exp, cols)
99 |
100 |
101 | class TestFetchLordsPartyMemberships(unittest.TestCase):
102 |
103 | """
104 | Test fetch_lords_party_memberships processes results correctly.
105 |
106 | """
107 |
108 | @patch('pdpy.lords.fetch_lords_party_memberships_raw',
109 | mock_fetch_lords_party_memberships_raw)
110 |
111 | def test_fetch_lords_party_memberships(self):
112 |
113 | cols = [
114 | 'person_id',
115 | 'mnis_id',
116 | 'given_name',
117 | 'family_name',
118 | 'display_name',
119 | 'party_id',
120 | 'party_mnis_id',
121 | 'party_name',
122 | 'party_membership_start_date']
123 |
124 | obs = lords.fetch_lords_party_memberships()
125 | exp = validate.read('fetch_lords_party_memberships')
126 | validate.compare_obs_exp(self, obs, exp, cols)
127 |
128 | obs = lords.fetch_lords_party_memberships(
129 | from_date='2017-06-08', to_date='2017-06-08')
130 | exp = validate.read('fetch_lords_party_memberships_from_to')
131 | validate.compare_obs_exp(self, obs, exp, cols)
132 |
133 | obs = lords.fetch_lords_party_memberships(on_date='2017-06-08')
134 | exp = validate.read('fetch_lords_party_memberships_from_to')
135 | validate.compare_obs_exp(self, obs, exp, cols)
136 |
137 | obs = lords.fetch_lords_party_memberships(while_lord=False)
138 | exp = validate.read('fetch_lords_party_memberships_while_lord')
139 | validate.compare_obs_exp(self, obs, exp, cols)
140 |
141 | obs = lords.fetch_lords_party_memberships(collapse=True)
142 | exp = validate.read('fetch_lords_party_memberships_collapse')
143 | validate.compare_obs_exp(self, obs, exp, cols)
144 |
145 |
146 | class TestFetchLordsGovernmentRoles(unittest.TestCase):
147 |
148 | """Test fetch_lords_government_roles processes results correctly."""
149 |
150 | @patch('pdpy.lords.fetch_lords_government_roles_raw',
151 | mock_fetch_lords_government_roles_raw)
152 |
153 | def test_fetch_lords_government_roles(self):
154 |
155 | cols = [
156 | 'person_id',
157 | 'mnis_id',
158 | 'given_name',
159 | 'family_name',
160 | 'display_name',
161 | 'position_id',
162 | 'position_name',
163 | 'government_incumbency_id',
164 | 'government_incumbency_start_date']
165 |
166 | obs = lords.fetch_lords_government_roles()
167 | exp = validate.read('fetch_lords_government_roles')
168 | validate.compare_obs_exp(self, obs, exp, cols)
169 |
170 | obs = lords.fetch_lords_government_roles(
171 | from_date='2017-06-08', to_date='2017-06-08')
172 | exp = validate.read('fetch_lords_government_roles_from_to')
173 | validate.compare_obs_exp(self, obs, exp, cols)
174 |
175 | obs = lords.fetch_lords_government_roles(on_date='2017-06-08')
176 | exp = validate.read('fetch_lords_government_roles_from_to')
177 | validate.compare_obs_exp(self, obs, exp, cols)
178 |
179 | obs = lords.fetch_lords_government_roles(while_lord=False)
180 | exp = validate.read('fetch_lords_government_roles_while_lord')
181 | validate.compare_obs_exp(self, obs, exp, cols)
182 |
183 |
184 | class TestFetchLordsOppositionRoles(unittest.TestCase):
185 |
186 | """Test fetch_lords_opposition_roles processes results correctly."""
187 |
188 | @patch('pdpy.lords.fetch_lords_opposition_roles_raw',
189 | mock_fetch_lords_opposition_roles_raw)
190 |
191 | def test_fetch_lords_opposition_roles(self):
192 |
193 | cols = [
194 | 'person_id',
195 | 'mnis_id',
196 | 'given_name',
197 | 'family_name',
198 | 'display_name',
199 | 'position_id',
200 | 'position_name',
201 | 'opposition_incumbency_id',
202 | 'opposition_incumbency_start_date']
203 |
204 | obs = lords.fetch_lords_opposition_roles()
205 | exp = validate.read('fetch_lords_opposition_roles')
206 | validate.compare_obs_exp(self, obs, exp, cols)
207 |
208 | obs = lords.fetch_lords_opposition_roles(
209 | from_date='2017-06-08', to_date='2017-06-08')
210 | exp = validate.read('fetch_lords_opposition_roles_from_to')
211 | validate.compare_obs_exp(self, obs, exp, cols)
212 |
213 | obs = lords.fetch_lords_opposition_roles(on_date='2017-06-08')
214 | exp = validate.read('fetch_lords_opposition_roles_from_to')
215 | validate.compare_obs_exp(self, obs, exp, cols)
216 |
217 | obs = lords.fetch_lords_opposition_roles(while_lord=False)
218 | exp = validate.read('fetch_lords_opposition_roles_while_lord')
219 | validate.compare_obs_exp(self, obs, exp, cols)
220 |
221 |
222 | class TestFetchLordsCommitteeMemberships(unittest.TestCase):
223 |
224 | """Test fetch_lords_committee_memberships processes results correctly."""
225 |
226 | @patch('pdpy.lords.fetch_lords_committee_memberships_raw',
227 | mock_fetch_lords_committee_memberships_raw)
228 |
229 | def test_fetch_lords_committee_memberships(self):
230 |
231 | cols = [
232 | 'person_id',
233 | 'mnis_id',
234 | 'given_name',
235 | 'family_name',
236 | 'display_name',
237 | 'committee_id',
238 | 'committee_name',
239 | 'committee_membership_id',
240 | 'committee_membership_start_date']
241 |
242 | obs = lords.fetch_lords_committee_memberships()
243 | exp = validate.read('fetch_lords_committee_memberships')
244 | validate.compare_obs_exp(self, obs, exp, cols)
245 |
246 | obs = lords.fetch_lords_committee_memberships(
247 | from_date='2017-06-08', to_date='2017-06-08')
248 | exp = validate.read('fetch_lords_committee_memberships_from_to')
249 | validate.compare_obs_exp(self, obs, exp, cols)
250 |
251 | obs = lords.fetch_lords_committee_memberships(on_date='2017-06-08')
252 | exp = validate.read('fetch_lords_committee_memberships_from_to')
253 | validate.compare_obs_exp(self, obs, exp, cols)
254 |
255 | obs = lords.fetch_lords_committee_memberships(while_lord=False)
256 | exp = validate.read('fetch_lords_committee_memberships_while_lord')
257 | validate.compare_obs_exp(self, obs, exp, cols)
258 |
--------------------------------------------------------------------------------
/tests/test_mps.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Test MPs functions."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import unittest
7 | from unittest.mock import patch
8 |
9 | import pdpy.mps as mps
10 | import tests.validate as validate
11 |
12 | # Mocks -----------------------------------------------------------------------
13 |
14 | def mock_fetch_mps_raw():
15 | return validate.read('mps_raw')
16 |
17 | def mock_fetch_commons_memberships_raw():
18 | return validate.read('commons_memberships_raw')
19 |
20 | def mock_fetch_mps_party_memberships_raw():
21 | return validate.read('mps_party_memberships_raw')
22 |
23 | def mock_fetch_mps_government_roles_raw():
24 | return validate.read('mps_government_roles_raw')
25 |
26 | def mock_fetch_mps_opposition_roles_raw():
27 | return validate.read('mps_opposition_roles_raw')
28 |
29 | def mock_fetch_mps_committee_memberships_raw():
30 | return validate.read('mps_committee_memberships_raw')
31 |
32 | # Tests -----------------------------------------------------------------------
33 |
34 | class TestFetchMps(unittest.TestCase):
35 |
36 | """Test fetch_mps processes results correctly."""
37 |
38 | @patch('pdpy.mps.fetch_mps_raw', mock_fetch_mps_raw)
39 | @patch('pdpy.mps.fetch_commons_memberships_raw',
40 | mock_fetch_commons_memberships_raw)
41 |
42 | def test_fetch_mps(self):
43 |
44 | cols = [
45 | 'person_id',
46 | 'mnis_id',
47 | 'given_name',
48 | 'family_name',
49 | 'display_name',
50 | 'full_title',
51 | 'gender']
52 |
53 | obs = mps.fetch_mps()
54 | exp = validate.read('fetch_mps')
55 | validate.compare_obs_exp(self, obs, exp, cols)
56 |
57 | obs = mps.fetch_mps(from_date='2017-06-08', to_date='2017-06-08')
58 | exp = validate.read('fetch_mps_from_to')
59 | validate.compare_obs_exp(self, obs, exp, cols)
60 |
61 | obs = mps.fetch_mps(on_date='2017-06-08')
62 | exp = validate.read('fetch_mps_from_to')
63 | validate.compare_obs_exp(self, obs, exp, cols)
64 |
65 |
66 | class TestFetchCommonsMemberships(unittest.TestCase):
67 |
68 | """Test fetch_commons_memberships processes results correctly."""
69 |
70 | @patch('pdpy.mps.fetch_commons_memberships_raw',
71 | mock_fetch_commons_memberships_raw)
72 |
73 | def test_fetch_commons_memberships(self):
74 |
75 | cols = [
76 | 'person_id',
77 | 'mnis_id',
78 | 'given_name',
79 | 'family_name',
80 | 'display_name',
81 | 'constituency_id',
82 | 'constituency_name',
83 | 'seat_incumbency_id',
84 | 'seat_incumbency_start_date']
85 |
86 | obs = mps.fetch_commons_memberships()
87 | exp = validate.read('fetch_commons_memberships')
88 | validate.compare_obs_exp(self, obs, exp, cols)
89 |
90 | obs = mps.fetch_commons_memberships(
91 | from_date='2017-06-08', to_date='2017-06-08')
92 | exp = validate.read('fetch_commons_memberships_from_to')
93 | validate.compare_obs_exp(self, obs, exp, cols)
94 |
95 | obs = mps.fetch_commons_memberships(on_date='2017-06-08')
96 | exp = validate.read('fetch_commons_memberships_from_to')
97 | validate.compare_obs_exp(self, obs, exp, cols)
98 |
99 |
100 | class TestFetchMpsPartyMemberships(unittest.TestCase):
101 |
102 | """
103 | Test fetch_mps_party_memberships processes results correctly.
104 |
105 | """
106 |
107 | @patch('pdpy.mps.fetch_mps_party_memberships_raw',
108 | mock_fetch_mps_party_memberships_raw)
109 |
110 | def test_fetch_mps_party_memberships(self):
111 |
112 | cols = [
113 | 'person_id',
114 | 'mnis_id',
115 | 'given_name',
116 | 'family_name',
117 | 'display_name',
118 | 'party_id',
119 | 'party_mnis_id',
120 | 'party_name',
121 | 'party_membership_start_date']
122 |
123 | obs = mps.fetch_mps_party_memberships()
124 | exp = validate.read('fetch_mps_party_memberships')
125 | validate.compare_obs_exp(self, obs, exp, cols)
126 |
127 | obs = mps.fetch_mps_party_memberships(
128 | from_date='2017-06-08', to_date='2017-06-08')
129 | exp = validate.read('fetch_mps_party_memberships_from_to')
130 | validate.compare_obs_exp(self, obs, exp, cols)
131 |
132 | obs = mps.fetch_mps_party_memberships(on_date='2017-06-08')
133 | exp = validate.read('fetch_mps_party_memberships_from_to')
134 | validate.compare_obs_exp(self, obs, exp, cols)
135 |
136 | obs = mps.fetch_mps_party_memberships(while_mp=False)
137 | exp = validate.read('fetch_mps_party_memberships_while_mp')
138 | validate.compare_obs_exp(self, obs, exp, cols)
139 |
140 | obs = mps.fetch_mps_party_memberships(collapse=True)
141 | exp = validate.read('fetch_mps_party_memberships_collapse')
142 | validate.compare_obs_exp(self, obs, exp, cols)
143 |
144 |
145 | class TestFetchMpsGovernmentRoles(unittest.TestCase):
146 |
147 | """Test fetch_mps_government_roles processes results correctly."""
148 |
149 | @patch('pdpy.mps.fetch_mps_government_roles_raw',
150 | mock_fetch_mps_government_roles_raw)
151 |
152 | def test_fetch_mps_government_roles(self):
153 |
154 | cols = [
155 | 'person_id',
156 | 'mnis_id',
157 | 'given_name',
158 | 'family_name',
159 | 'display_name',
160 | 'position_id',
161 | 'position_name',
162 | 'government_incumbency_id',
163 | 'government_incumbency_start_date']
164 |
165 | obs = mps.fetch_mps_government_roles()
166 | exp = validate.read('fetch_mps_government_roles')
167 | validate.compare_obs_exp(self, obs, exp, cols)
168 |
169 | obs = mps.fetch_mps_government_roles(
170 | from_date='2017-06-08', to_date='2017-06-08')
171 | exp = validate.read('fetch_mps_government_roles_from_to')
172 | validate.compare_obs_exp(self, obs, exp, cols)
173 |
174 | obs = mps.fetch_mps_government_roles(on_date='2017-06-08')
175 | exp = validate.read('fetch_mps_government_roles_from_to')
176 | validate.compare_obs_exp(self, obs, exp, cols)
177 |
178 | obs = mps.fetch_mps_government_roles(while_mp=False)
179 | exp = validate.read('fetch_mps_government_roles_while_mp')
180 | validate.compare_obs_exp(self, obs, exp, cols)
181 |
182 |
183 | class TestFetchMpsOppositionRoles(unittest.TestCase):
184 |
185 | """Test fetch_mps_opposition_roles processes results correctly."""
186 |
187 | @patch('pdpy.mps.fetch_mps_opposition_roles_raw',
188 | mock_fetch_mps_opposition_roles_raw)
189 |
190 | def test_fetch_mps_opposition_roles(self):
191 |
192 | cols = [
193 | 'person_id',
194 | 'mnis_id',
195 | 'given_name',
196 | 'family_name',
197 | 'display_name',
198 | 'position_id',
199 | 'position_name',
200 | 'opposition_incumbency_id',
201 | 'opposition_incumbency_start_date']
202 |
203 | obs = mps.fetch_mps_opposition_roles()
204 | exp = validate.read('fetch_mps_opposition_roles')
205 | validate.compare_obs_exp(self, obs, exp, cols)
206 |
207 | obs = mps.fetch_mps_opposition_roles(
208 | from_date='2017-06-08', to_date='2017-06-08')
209 | exp = validate.read('fetch_mps_opposition_roles_from_to')
210 | validate.compare_obs_exp(self, obs, exp, cols)
211 |
212 | obs = mps.fetch_mps_opposition_roles(on_date='2017-06-08')
213 | exp = validate.read('fetch_mps_opposition_roles_from_to')
214 | validate.compare_obs_exp(self, obs, exp, cols)
215 |
216 | obs = mps.fetch_mps_opposition_roles(while_mp=False)
217 | exp = validate.read('fetch_mps_opposition_roles_while_mp')
218 | validate.compare_obs_exp(self, obs, exp, cols)
219 |
220 |
221 | class TestFetchMpsCommitteeMemberships(unittest.TestCase):
222 |
223 | """Test fetch_mps_committee_memberships processes results correctly."""
224 |
225 | @patch('pdpy.mps.fetch_mps_committee_memberships_raw',
226 | mock_fetch_mps_committee_memberships_raw)
227 |
228 | def test_fetch_mps_committee_memberships(self):
229 |
230 | cols = [
231 | 'person_id',
232 | 'mnis_id',
233 | 'given_name',
234 | 'family_name',
235 | 'display_name',
236 | 'committee_id',
237 | 'committee_name',
238 | 'committee_membership_id',
239 | 'committee_membership_start_date']
240 |
241 | obs = mps.fetch_mps_committee_memberships()
242 | exp = validate.read('fetch_mps_committee_memberships')
243 | validate.compare_obs_exp(self, obs, exp, cols)
244 |
245 | obs = mps.fetch_mps_committee_memberships(
246 | from_date='2017-06-08', to_date='2017-06-08')
247 | exp = validate.read('fetch_mps_committee_memberships_from_to')
248 | validate.compare_obs_exp(self, obs, exp, cols)
249 |
250 | obs = mps.fetch_mps_committee_memberships(on_date='2017-06-08')
251 | exp = validate.read('fetch_mps_committee_memberships_from_to')
252 | validate.compare_obs_exp(self, obs, exp, cols)
253 |
254 | obs = mps.fetch_mps_committee_memberships(while_mp=False)
255 | exp = validate.read('fetch_mps_committee_memberships_while_mp')
256 | validate.compare_obs_exp(self, obs, exp, cols)
257 |
--------------------------------------------------------------------------------
/tests/test_settings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Test settings functions."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import unittest
7 |
8 | import pdpy.constants as constants
9 | import pdpy.settings as settings
10 |
11 | # Test get_api_url ------------------------------------------------------------
12 |
13 | class GetApiUrl(unittest.TestCase):
14 |
15 | """
16 | Test that get_api_url returns default url when a url has not been set.
17 |
18 | """
19 |
20 | def test_that_get_api_url_returns_default_url(self):
21 |
22 | self.assertEqual(
23 | settings.get_api_url(),
24 | constants.SETTINGS_API_URL_DEFAULT)
25 |
26 | # Test set_api_url ------------------------------------------------------------
27 |
28 | class SetApiUrl(unittest.TestCase):
29 |
30 | """
31 | Test that set_api_url sets the api url returned by get_api_url.
32 |
33 | """
34 |
35 | def test_that_set_api_url_sets_api_url(self):
36 |
37 | api_url = 'http://localhost:8000/sparql'
38 | settings.set_api_url(api_url)
39 | self.assertEqual(settings.get_api_url(), api_url)
40 | settings.set_api_url(constants.SETTINGS_API_URL_DEFAULT)
41 | self.assertEqual(
42 | settings.get_api_url(),
43 | constants.SETTINGS_API_URL_DEFAULT)
44 |
45 | # Test reset_api_url ----------------------------------------------------------
46 |
47 | class ResetApiUrl(unittest.TestCase):
48 |
49 | """
50 | Test that reset_api_url resets the api url returned by get_api_url.
51 |
52 | """
53 |
54 | def test_that_reset_api_url_resets_api_url(self):
55 |
56 | api_url = 'http://localhost:8000/sparql'
57 | settings.set_api_url(api_url)
58 | self.assertEqual(settings.get_api_url(), api_url)
59 | settings.reset_api_url()
60 | self.assertEqual(
61 | settings.get_api_url(),
62 | constants.SETTINGS_API_URL_DEFAULT)
63 |
--------------------------------------------------------------------------------
/tests/validate.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Manage test data for validation."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import os
7 | import pandas as pd
8 |
9 | # Constants -------------------------------------------------------------------
10 |
11 | TEST_DATA_DIR = os.path.join('tests', 'data')
12 |
13 | # Read and write data ---------------------------------------------------------
14 |
15 | def read(filename):
16 | """Read a file from the data directory."""
17 | return pd.read_pickle(
18 | os.path.join(TEST_DATA_DIR, '{0}.pkl'.format(filename)))
19 |
20 | def write(df, filename):
21 | """Write a dataframe to the data directory."""
22 | df.to_pickle(os.path.join(TEST_DATA_DIR, '{0}.pkl'.format(filename)))
23 |
24 | # Comparison function ---------------------------------------------------------
25 |
26 | def compare_obs_exp(self, obs, exp, cols):
27 |
28 | """Compare two dataframes on structure and contents of selected columns."""
29 |
30 | self.assertEqual(obs.shape[0], exp.shape[0])
31 | self.assertEqual(obs.shape[1], exp.shape[1])
32 | self.assertTrue((obs.columns == exp.columns).all())
33 |
34 | for col in cols:
35 | self.assertTrue((obs[col] == exp[col]).all())
36 |
--------------------------------------------------------------------------------
/tests/validate_lords.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Download data for unit testing Lords."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import time
7 |
8 | import pdpy.constants as constants
9 | import pdpy.lords as lords
10 | import tests.validate as validate
11 |
12 | # Mocks data ------------------------------------------------------------------
13 |
14 | def fetch_lords_mocks_data():
15 |
16 | """Fetch mocks data for unit tests of Lords."""
17 |
18 | # Download Lords
19 | l = lords.fetch_lords_raw()
20 | validate.write(l, 'lords_raw')
21 | time.sleep(constants.API_PAUSE_TIME)
22 |
23 | # Download Lords memberships
24 | l_cm = lords.fetch_lords_memberships_raw()
25 | validate.write(l_cm, 'lords_memberships_raw')
26 | time.sleep(constants.API_PAUSE_TIME)
27 |
28 | # Download Lords party memberships
29 | l_pm = lords.fetch_lords_party_memberships_raw()
30 | validate.write(l_pm, 'lords_party_memberships_raw')
31 | time.sleep(constants.API_PAUSE_TIME)
32 |
33 | # Download Lords government roles
34 | l_gor = lords.fetch_lords_government_roles_raw()
35 | validate.write(l_gor, 'lords_government_roles_raw')
36 | time.sleep(constants.API_PAUSE_TIME)
37 |
38 | # Download Lords opposition roles
39 | l_opr = lords.fetch_lords_opposition_roles_raw()
40 | validate.write(l_opr, 'lords_opposition_roles_raw')
41 | time.sleep(constants.API_PAUSE_TIME)
42 |
43 | # Download Lords committee memberships
44 | l_ctm = lords.fetch_lords_committee_memberships_raw()
45 | validate.write(l_ctm, 'lords_committee_memberships_raw')
46 | time.sleep(constants.API_PAUSE_TIME)
47 |
48 | # Validation data -------------------------------------------------------------
49 |
50 | def fetch_lords_validation_data():
51 |
52 | """Fetch validation data for unit tests of Lords."""
53 |
54 | # Fetch Lords
55 | l = lords.fetch_lords()
56 | validate.write(l, 'fetch_lords')
57 | time.sleep(constants.API_PAUSE_TIME)
58 |
59 | # Fetch Lords with from and to dates
60 | l = lords.fetch_lords(from_date='2017-06-08', to_date='2017-06-08')
61 | validate.write(l, 'fetch_lords_from_to')
62 | time.sleep(constants.API_PAUSE_TIME)
63 |
64 | # Fetch Lords memberships
65 | lm = lords.fetch_lords_memberships()
66 | validate.write(lm, 'fetch_lords_memberships')
67 | time.sleep(constants.API_PAUSE_TIME)
68 |
69 | # Fetch Lords memberships with from and to dates
70 | lm = lords.fetch_lords_memberships(
71 | from_date='2017-06-08', to_date='2017-06-08')
72 | validate.write(lm, 'fetch_lords_memberships_from_to')
73 | time.sleep(constants.API_PAUSE_TIME)
74 |
75 | # Fetch Lords party memberships
76 | pm = lords.fetch_lords_party_memberships()
77 | validate.write(pm, 'fetch_lords_party_memberships')
78 | time.sleep(constants.API_PAUSE_TIME)
79 |
80 | # Fetch Lords party memberships with from and to dates
81 | pm = lords.fetch_lords_party_memberships(
82 | from_date='2017-06-08', to_date='2017-06-08')
83 | validate.write(pm, 'fetch_lords_party_memberships_from_to')
84 | time.sleep(constants.API_PAUSE_TIME)
85 |
86 | # Fetch Lords party memberships with while_lord
87 | pm = lords.fetch_lords_party_memberships(while_lord=False)
88 | validate.write(pm, 'fetch_lords_party_memberships_while_lord')
89 | time.sleep(constants.API_PAUSE_TIME)
90 |
91 | # Fetch Lords party memberships with collapse
92 | pm = lords.fetch_lords_party_memberships(collapse=True)
93 | validate.write(pm, 'fetch_lords_party_memberships_collapse')
94 | time.sleep(constants.API_PAUSE_TIME)
95 |
96 | # Fetch Lords government roles
97 | gor = lords.fetch_lords_government_roles()
98 | validate.write(gor, 'fetch_lords_government_roles')
99 | time.sleep(constants.API_PAUSE_TIME)
100 |
101 | # Fetch Lords government roles with from and to dates
102 | gor = lords.fetch_lords_government_roles(
103 | from_date='2017-06-08', to_date='2017-06-08')
104 | validate.write(gor, 'fetch_lords_government_roles_from_to')
105 | time.sleep(constants.API_PAUSE_TIME)
106 |
107 | # Fetch Lords government roles with while_lord
108 | gor = lords.fetch_lords_government_roles(while_lord=False)
109 | validate.write(gor, 'fetch_lords_government_roles_while_lord')
110 | time.sleep(constants.API_PAUSE_TIME)
111 |
112 | # Fetch Lords opposition roles
113 | opr = lords.fetch_lords_opposition_roles()
114 | validate.write(opr, 'fetch_lords_opposition_roles')
115 | time.sleep(constants.API_PAUSE_TIME)
116 |
117 | # Fetch Lords opposition roles with from and to dates
118 | opr = lords.fetch_lords_opposition_roles(
119 | from_date='2017-06-08', to_date='2017-06-08')
120 | validate.write(opr, 'fetch_lords_opposition_roles_from_to')
121 | time.sleep(constants.API_PAUSE_TIME)
122 |
123 | # Fetch Lords opposition roles with while_lord
124 | opr = lords.fetch_lords_opposition_roles(while_lord=False)
125 | validate.write(opr, 'fetch_lords_opposition_roles_while_lord')
126 | time.sleep(constants.API_PAUSE_TIME)
127 |
128 | # Fetch Lords committee memberships
129 | cmt = lords.fetch_lords_committee_memberships()
130 | validate.write(cmt, 'fetch_lords_committee_memberships')
131 | time.sleep(constants.API_PAUSE_TIME)
132 |
133 | # Fetch Lords committee memberships with from and to dates
134 | cmt = lords.fetch_lords_committee_memberships(
135 | from_date='2017-06-08', to_date='2017-06-08')
136 | validate.write(cmt, 'fetch_lords_committee_memberships_from_to')
137 | time.sleep(constants.API_PAUSE_TIME)
138 |
139 | # Fetch Lords committee memberships with while_lord
140 | cmt = lords.fetch_lords_committee_memberships(while_lord=False)
141 | validate.write(cmt, 'fetch_lords_committee_memberships_while_lord')
142 | time.sleep(constants.API_PAUSE_TIME)
143 |
144 | # Fetch all data --------------------------------------------------------------
145 |
146 | def fetch_lords_test_data():
147 |
148 | """Fetch mocks and validation data for unit tests of Lords."""
149 | fetch_lords_mocks_data()
150 | fetch_lords_validation_data()
151 |
--------------------------------------------------------------------------------
/tests/validate_mps.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Download data for unit testing MPs."""
3 |
4 | # Imports ---------------------------------------------------------------------
5 |
6 | import time
7 |
8 | import pdpy.constants as constants
9 | import pdpy.mps as mps
10 | import tests.validate as validate
11 |
12 | # Mocks data ------------------------------------------------------------------
13 |
14 | def fetch_mps_mocks_data():
15 |
16 | """Fetch mocks data for unit tests of MPs."""
17 |
18 | # Download MPs
19 | m = mps.fetch_mps_raw()
20 | validate.write(m, 'mps_raw')
21 | time.sleep(constants.API_PAUSE_TIME)
22 |
23 | # Download Commons memberships
24 | cm = mps.fetch_commons_memberships_raw()
25 | validate.write(cm, 'commons_memberships_raw')
26 | time.sleep(constants.API_PAUSE_TIME)
27 |
28 | # Download MP party memberships
29 | m_pm = mps.fetch_mps_party_memberships_raw()
30 | validate.write(m_pm, 'mps_party_memberships_raw')
31 | time.sleep(constants.API_PAUSE_TIME)
32 |
33 | # Download MP government roles
34 | m_gor = mps.fetch_mps_government_roles_raw()
35 | validate.write(m_gor, 'mps_government_roles_raw')
36 | time.sleep(constants.API_PAUSE_TIME)
37 |
38 | # Download MP opposition roles
39 | m_opr = mps.fetch_mps_opposition_roles_raw()
40 | validate.write(m_opr, 'mps_opposition_roles_raw')
41 | time.sleep(constants.API_PAUSE_TIME)
42 |
43 | # Download MP committee memberships
44 | m_cmt = mps.fetch_mps_committee_memberships_raw()
45 | validate.write(m_cmt, 'mps_committee_memberships_raw')
46 | time.sleep(constants.API_PAUSE_TIME)
47 |
48 | # Validation data -------------------------------------------------------------
49 |
50 | def fetch_mps_validation_data():
51 |
52 | """Fetch validation data for unit tests of MPs."""
53 |
54 | # Fetch MPs
55 | m = mps.fetch_mps()
56 | validate.write(m, 'fetch_mps')
57 | time.sleep(constants.API_PAUSE_TIME)
58 |
59 | # Fetch MPs with from and to dates
60 | m = mps.fetch_mps(from_date='2017-06-08', to_date='2017-06-08')
61 | validate.write(m, 'fetch_mps_from_to')
62 | time.sleep(constants.API_PAUSE_TIME)
63 |
64 | # Fetch Commons memberships
65 | cm = mps.fetch_commons_memberships()
66 | validate.write(cm, 'fetch_commons_memberships')
67 | time.sleep(constants.API_PAUSE_TIME)
68 |
69 | # Fetch Commons memberships with from and to dates
70 | cm = mps.fetch_commons_memberships(
71 | from_date='2017-06-08', to_date='2017-06-08')
72 | validate.write(cm, 'fetch_commons_memberships_from_to')
73 | time.sleep(constants.API_PAUSE_TIME)
74 |
75 | # Fetch MPs party memberships
76 | pm = mps.fetch_mps_party_memberships()
77 | validate.write(pm, 'fetch_mps_party_memberships')
78 | time.sleep(constants.API_PAUSE_TIME)
79 |
80 | # Fetch MPs party memberships with from and to dates
81 | pm = mps.fetch_mps_party_memberships(
82 | from_date='2017-06-08', to_date='2017-06-08')
83 | validate.write(pm, 'fetch_mps_party_memberships_from_to')
84 | time.sleep(constants.API_PAUSE_TIME)
85 |
86 | # Fetch MPs party memberships with while_mp
87 | pm = mps.fetch_mps_party_memberships(while_mp=False)
88 | validate.write(pm, 'fetch_mps_party_memberships_while_mp')
89 | time.sleep(constants.API_PAUSE_TIME)
90 |
91 | # Fetch MPs party memberships with collapse
92 | pm = mps.fetch_mps_party_memberships(collapse=True)
93 | validate.write(pm, 'fetch_mps_party_memberships_collapse')
94 | time.sleep(constants.API_PAUSE_TIME)
95 |
96 | # Fetch MPs government roles
97 | gor = mps.fetch_mps_government_roles()
98 | validate.write(gor, 'fetch_mps_government_roles')
99 | time.sleep(constants.API_PAUSE_TIME)
100 |
101 | # Fetch MPs government roles with from and to dates
102 | gor = mps.fetch_mps_government_roles(
103 | from_date='2017-06-08', to_date='2017-06-08')
104 | validate.write(gor, 'fetch_mps_government_roles_from_to')
105 | time.sleep(constants.API_PAUSE_TIME)
106 |
107 | # Fetch MPs government roles with while_mp
108 | gor = mps.fetch_mps_government_roles(while_mp=False)
109 | validate.write(gor, 'fetch_mps_government_roles_while_mp')
110 | time.sleep(constants.API_PAUSE_TIME)
111 |
112 | # Fetch MPs opposition roles
113 | opr = mps.fetch_mps_opposition_roles()
114 | validate.write(opr, 'fetch_mps_opposition_roles')
115 | time.sleep(constants.API_PAUSE_TIME)
116 |
117 | # Fetch MPs opposition roles with from and to dates
118 | opr = mps.fetch_mps_opposition_roles(
119 | from_date='2017-06-08', to_date='2017-06-08')
120 | validate.write(opr, 'fetch_mps_opposition_roles_from_to')
121 | time.sleep(constants.API_PAUSE_TIME)
122 |
123 | # Fetch MPs opposition roles with while_mp
124 | opr = mps.fetch_mps_opposition_roles(while_mp=False)
125 | validate.write(opr, 'fetch_mps_opposition_roles_while_mp')
126 | time.sleep(constants.API_PAUSE_TIME)
127 |
128 | # Fetch MPs committee memberships
129 | cmt = mps.fetch_mps_committee_memberships()
130 | validate.write(cmt, 'fetch_mps_committee_memberships')
131 | time.sleep(constants.API_PAUSE_TIME)
132 |
133 | # Fetch MPs committee memberships with from and to dates
134 | cmt = mps.fetch_mps_committee_memberships(
135 | from_date='2017-06-08', to_date='2017-06-08')
136 | validate.write(cmt, 'fetch_mps_committee_memberships_from_to')
137 | time.sleep(constants.API_PAUSE_TIME)
138 |
139 | # Fetch MPs committee memberships with while_mp
140 | cmt = mps.fetch_mps_committee_memberships(while_mp=False)
141 | validate.write(cmt, 'fetch_mps_committee_memberships_while_mp')
142 | time.sleep(constants.API_PAUSE_TIME)
143 |
144 | # Fetch all data --------------------------------------------------------------
145 |
146 | def fetch_mps_test_data():
147 |
148 | """Fetch mocks and validation data for unit tests of MPs."""
149 | fetch_mps_mocks_data()
150 | fetch_mps_validation_data()
151 |
--------------------------------------------------------------------------------