├── result.png
├── ballot21.csv
├── ballot22.csv
├── ballot23final.csv
├── ballot24final.csv
├── README.md
├── .gitignore
└── analyze_linear_transfer.py


/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harelc/elections-vote-transfer/HEAD/result.png


--------------------------------------------------------------------------------
/ballot21.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harelc/elections-vote-transfer/HEAD/ballot21.csv


--------------------------------------------------------------------------------
/ballot22.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harelc/elections-vote-transfer/HEAD/ballot22.csv


--------------------------------------------------------------------------------
/ballot23final.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harelc/elections-vote-transfer/HEAD/ballot23final.csv


--------------------------------------------------------------------------------
/ballot24final.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harelc/elections-vote-transfer/HEAD/ballot24final.csv


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # elections-vote-transfer
 2 | ## Analysis of vote transfer between two elections
 3 | ### Harel Cain, September 2019
 4 | 
 5 | The code analyzes the ballot results for ~10,000 polling stations with
 6 | the same identity (locale and polling station number) between the elections for the 21st and the 22nd knesset.
 7 | 
 8 | It assumes there is a linear transfer matrix $M$ such that $V_21 * M ~ V_22$.
 9 | 
10 | It solves for the matrix M (argmin_M |V_21*M-V_22|) in one of three ways:
11 | 
12 | 1. Closed-form least linear squares. This way M is not constrained and may contain negative numbers as well as numbers > 1, also its columns don't sum up to 1.
13 | 2. Non-negative least squares. This handles the non-negative constraint but still doesn't guarantee stochasity.
14 | 3. Convex optimization with the SCS solver, with constraints on 0<M<1 and M.sum(axis=1) = 1.
15 | 
16 | There is an option of treating the non-participating voters as an additional party.
17 | 
18 | The motivation and idea came from Itamar Mushkin. See https://www.themarker.com/techblogs/ormigoldstein/BLOG-1.6567019.
19 | 
20 | The elections results are taken from https://votes21.bechirot.gov.il/ and https://votes22.bechirot.gov.il/ ( 20/09/2019 02:37 version).
21 | ![](result.png)
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/analyze_linear_transfer.py:
--------------------------------------------------------------------------------
  1 | #### Written by Harel Cain, September 2019
  2 | #### Thanks to Itamar Mushkin for inspiration and a code piece
  3 | 
  4 | import cvxpy as cvx
  5 | import numpy as np
  6 | import plotly.graph_objects as go
  7 | 
  8 | import pandas as pd
  9 | 
 10 | pd.options.mode.chained_assignment = None  # default='warn'
 11 | from scipy.optimize import nnls
 12 | 
 13 | DESTINATION_PARTY_COLORS = [
 14 |     'rgba(31, 119, 180, 0.4)',
 15 |     'rgba(255, 127, 14, 0.4)',
 16 |     'rgba(44, 160, 44, 0.4)',
 17 |     'rgba(214, 39, 40, 0.4)',
 18 |     'rgba(148, 103, 189, 0.4)',
 19 |     'rgba(140, 86, 75, 0.4)',
 20 |     'rgba(227, 119, 194, 0.4)',
 21 |     'rgba(205, 155, 105, 0.4)',
 22 |     'rgba(188, 189, 34, 0.4)',
 23 |     'rgba(23, 190, 207, 0.4)',
 24 |     'rgba(201, 201, 255, 0.4)',
 25 |     'rgba(255, 189, 189, 0.4)',
 26 |     'rgba(181, 234, 215, 0.4)',
 27 |     'rgba(177, 117, 189, 0.4)',
 28 |     'rgba(50, 50 ,50 , 0.4)']
 29 | 
 30 | 
 31 | def adapt_df(df, parties_symbols, parties_full_names, include_no_vote=True, ballot_number_field_name='קלפי'):
 32 |     assert (len(parties_symbols) == len(parties_full_names))
 33 |     print(f'{len(df)} precincts analyzed')
 34 |     df['ballot_id'] = df['סמל ישוב'].astype(str) + '__' + \
 35 |                       df[ballot_number_field_name].astype(str).copy()
 36 |     df = df.set_index('ballot_id')
 37 |     eligible_voters = df['בזב']
 38 |     total_voters = df['מצביעים']
 39 |     df = df.reindex(sorted(df.columns), axis=1)
 40 |     df_trimmed = df[df['סמל ישוב'] != 9999]
 41 |     print(f'{len(df_trimmed)} precincts after discarding city symbol 9999')
 42 | 
 43 |     df = df[parties_symbols]
 44 |     df_trimmed = df_trimmed[parties_symbols]
 45 |     df.rename(columns={x: y for x, y in zip(parties_symbols, parties_full_names)}, inplace=True)
 46 |     df_trimmed.rename(columns={x: y for x, y in zip(parties_symbols, parties_full_names)}, inplace=True)
 47 |     if include_no_vote:
 48 |         df['לא הצביע'] = eligible_voters - total_voters
 49 |         df_trimmed['לא הצביע'] = eligible_voters - total_voters
 50 |     return df_trimmed, df
 51 | 
 52 | 
 53 | def solve_transfer_coefficients(x_data, y_data, verbose):
 54 |     M = cvx.Variable((x_data.shape[1], y_data.shape[1]))
 55 |     constraints = [0 <= M, M <= 1, cvx.sum(M, axis=1) == 1]
 56 |     objective = cvx.Minimize(cvx.norm((x_data @ M) - y_data, 'fro'))
 57 |     prob = cvx.Problem(objective, constraints)
 58 |     prob.solve(solver='SCS', verbose=True, max_iters=20000)
 59 |     M = M.value
 60 | 
 61 |     if verbose:
 62 |         print(M.min())  # should be close to 0
 63 |         print(M.max())  # should be close to 1
 64 |         print(M.sum(axis=1).min())  # should be close to 1
 65 |         print(M.sum(axis=1).max())  # should be close to 1
 66 |     return M
 67 | 
 68 | 
 69 | def sankey(vote_movements, before_labels, after_labels, n_ballots):
 70 |     import time
 71 |     source, target = np.meshgrid(np.arange(0, len(before_labels)),
 72 |                                  np.arange(len(before_labels), len(before_labels) + len(after_labels)))
 73 |     before_labels = [x + '_24' for x in before_labels]
 74 |     after_labels = [x + '_25' for x in after_labels]
 75 |     source = source.flatten()
 76 |     target = target.flatten()
 77 | 
 78 |     fig = go.Figure(data=[go.Sankey(
 79 |         node=dict(
 80 |             pad=12,
 81 |             thickness=8,
 82 |             label=list(before_labels) + list(after_labels),
 83 |             color=['gray'] * len(before_labels) + DESTINATION_PARTY_COLORS
 84 |         ),
 85 |         link=dict(
 86 |             source=source,  # indices correspond to labels, eg A1, A2, A2, B1, ...
 87 |             target=target,
 88 |             value=vote_movements.flatten(),
 89 |             color=[DESTINATION_PARTY_COLORS[x - len(before_labels)] for x in target],
 90 |         ))])
 91 | 
 92 |     fig.update_layout(title_text=f"""
 93 | Analysis of vote transfer between the elections for the 24th and the 25th Knessets.
 94 | <br>Based on analysis of {n_ballots} precincts whose serial number appeared in both. 
 95 | <br>Created by Harel Cain on {time.strftime('%d.%m.%Y %H%:%M')}. All rights reserved.
 96 | <br>Source code: https://github.com/harelc/elections-vote-transfer/
 97 | <br>
 98 | <br>""", title_font_size=16, font_size=14)
 99 |     fig.write_html("index.html")
100 |     fig.show()
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     method = "convex solver"  # "nnls", "closed form"
105 |     df_previous = pd.read_csv('ballot24final.csv', encoding='iso8859_8')
106 |     df_current = pd.read_csv('ballot25.csv')
107 | 
108 |     # 23rd knesset
109 |     # parties_previous_full = 'יש_עתיד ליכוד המשותפת ש״ס ישראל_ביתנו יהדות_התורה ימינה העבודה'.split()
110 |     # parties_previous = 'פה מחל ודעם שס ל ג טב אמת'.split()
111 | 
112 |     # 24th knesset
113 |     parties_previous_full = 'יש_עתיד הליכוד המשותפת ש״ס ישראל_ביתנו יהדות_התורה הציונות_הדתית העבודה ימינה מרצ רע״ם כחול_לבן תקווה_חדשה '.split()
114 |     parties_previous = 'פה מחל ודעם שס ל ג ט אמת ב מרצ עם כן ת'.split()
115 | 
116 |     # 25th knesset
117 |     parties_current_full = 'העבודה אביר_קארה הבית_היהודי יהדות_התורה בל״ד חד״ש_תע״ל הציונות_הדתית המחנה_הממלכתי ישראל_ביתנו הליכוד מרצ רע״ם יש_עתיד ש״ס'.split()
118 |     parties_current = 'אמת אצ ב ג ד ום ט כן ל מחל מרצ עם פה שס'.split()
119 | 
120 |     df_previous, df_previous_full = adapt_df(df_previous, parties_previous, parties_previous_full, include_no_vote=True)
121 |     df_current, df_current_full = adapt_df(df_current, parties_current, parties_current_full, include_no_vote=True)
122 | 
123 |     merged_df = pd.merge(df_previous, df_current, how='inner', left_index=True, right_index=True)
124 | 
125 |     print('Analyzing {} precincts common to both elections. Largest ballot has {} votes.'.format(
126 |         len(merged_df),
127 |         merged_df.sum(axis=1).max()
128 |     ))
129 |     values_previous = df_previous.loc[merged_df.index].values
130 |     values_current = df_current.loc[merged_df.index].values
131 | 
132 |     if method == "closed form":
133 |         #### method 1: closed-form solution with no non-negative constraint
134 |         transfer_matrix = values_current.T @ values_previous @ np.linalg.pinv(values_previous.T @ values_previous)
135 | 
136 |     elif method == "nnls":
137 |         ### method 2: non-negative least square solution
138 |         transfer_matrix = np.zeros((values_current.shape[1], values_previous.shape[1]))
139 |         for i in range(values_current.shape[1]):
140 |             sol, r2 = nnls(values_previous, values_current[:, i])
141 |             transfer_matrix[i, :] = sol
142 |             pred = values_previous @ sol
143 |             res = pred - values_current[:, i]
144 |             # print MSE, MAE, sum of error
145 |             # print(r2, np.mean(np.abs(res)), res.sum())
146 | 
147 |     elif method == "convex solver":
148 |         ## method 3: use convex solver with constraints
149 |         transfer_matrix = solve_transfer_coefficients(values_previous, values_current, verbose=True).T
150 | 
151 |     y_bar = values_current.mean(axis=0)
152 |     ss_tot = ((values_current - y_bar) ** 2).sum()
153 |     ss_res = ((values_current - values_previous @ transfer_matrix.T) ** 2).sum()
154 |     print('R^2 is {:3.3f}'.format(1. - ss_res / ss_tot))
155 |     print(transfer_matrix.sum(axis=0))
156 |     print(transfer_matrix.sum(axis=1))
157 | 
158 |     vote_movements = transfer_matrix * df_previous_full.sum(axis=0).values
159 |     print('Removing vote movements smaller than 5000')
160 |     vote_movements[vote_movements < 5000] = 0.
161 | 
162 |     sankey(vote_movements, df_previous.columns.values, df_current.columns.values, n_ballots=len(merged_df))
163 | 


--------------------------------------------------------------------------------