├── LICENSE ├── functions.py ├── game_result_by_evaluation.ipynb ├── position_sharpness.ipynb └── queries.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Chris Goldammer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /functions.py: -------------------------------------------------------------------------------- 1 | CONNSTRING_PROD = "host=localhost port=5434 user=postgres dbname=chess_prod" 2 | CONNSTRING_DEV = "host=localhost port=5434 user=postgres dbname=chess_dev" 3 | 4 | names = { 5 | 0: "number_moves_own" 6 | , 1: "number_moves_opp" 7 | , 2: "number_checks_own" 8 | , 3: "number_checks_opp" 9 | , 4: "number_takes_own" 10 | , 5: "number_takes_opp" 11 | , 6: "number_takes_pawn_own" 12 | , 7: "number_takes_pawn_opp" 13 | , 8: "opp_kings" 14 | , 9: "piece_values_own" 15 | , 10: "piece_values_opp" 16 | , 11: "kings_pawn_own" 17 | , 12: "kings_pawn_opp" 18 | , 13: "king_row_own" 19 | , 14: "king_row_opp" 20 | , 15: "queens_own" 21 | , 16: "queens_opp" 22 | , 17: "rooks_own" 23 | , 18: "rooks_opp" 24 | , 19: "in_check" 25 | } 26 | 27 | # data StatType = NumberMovesOwn 28 | # | NumberMovesOpp 29 | # | NumberChecksOwn 30 | # | NumberChecksOpp 31 | # | NumberTakesOwn 32 | # | NumberTakesOpp 33 | # | NumberTakesPawnOwn 34 | # | NumberTakesPawnOpp 35 | # | OppKings 36 | # | PieceValuesOwn 37 | # | PieceValuesOpp 38 | # | KingPawnsOwn 39 | # | KingPawnsOpp 40 | # | KingRowOwn 41 | # | KingRowOpp 42 | # | QueensOwn 43 | # | QueensOpp 44 | # | RooksOwn 45 | # | RooksOpp deriving (Eq, Enum) 46 | 47 | def top_code(x, val): 48 | x[x > val] = val 49 | return x 50 | 51 | def bottom_code(x, val): 52 | x[x < val] = val 53 | return x 54 | 55 | def top_and_bottom(x, bottom, top): 56 | return top_code(bottom_code(x, bottom), top) 57 | 58 | -------------------------------------------------------------------------------- /game_result_by_evaluation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Overview\n", 8 | "\n", 9 | "In this analysis, we measure how well players convert an advantage, as measured by the computer evaluation.\n", 10 | "\n", 11 | "For instance, we would expect a player to obtain roughly 50% of all available points if they have an equal position against a similarly-rated player. But this percentage will increase if they are higher-rated than their opponent.\n", 12 | "\n", 13 | "I find that, if you are playing against someone who is 300 points lower-rated than you, and you are in a losing position (-2), then you're still expected to get more than 50% of points!\n", 14 | "\n", 15 | "The impact of the rating on the win percentage is not strongly affected by player quality, the effects are similarly strong for a sample of strong players (Elo > 2000) and super tournaments." 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "# Setup" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 5, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import pandas as pd\n", 32 | "import numpy as np\n", 33 | "import psycopg2\n", 34 | "import pandas.io.sql as sqlio\n", 35 | "import statsmodels.formula.api as smf\n", 36 | "import imp" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 6, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": [ 47 | "" 48 | ] 49 | }, 50 | "execution_count": 6, 51 | "metadata": {}, 52 | "output_type": "execute_result" 53 | } 54 | ], 55 | "source": [ 56 | "import queries\n", 57 | "import functions as f\n", 58 | "imp.reload(f); imp.reload(queries)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 7, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "pd.set_option('precision', 2) \n", 68 | "pd.set_option('display.width', 200)\n", 69 | "\n", 70 | "%matplotlib inline\n", 71 | "import matplotlib\n", 72 | "import numpy as np\n", 73 | "import matplotlib.pyplot as plt" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 13, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "use_dev = False\n", 83 | "connstring = f.CONNSTRING_DEV if use_dev else f.CONNSTRING_PROD\n", 84 | "conn = psycopg2.connect(connstring)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# Data cleaning" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "We look only at moves between move 10 and 40 of a game. Thus we exclude moves in which most players are still in book and also long endgames which provide a lot of observations, but little change in the status of the game." 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 18, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "params = {\n", 108 | " 'move_number_start': 10,\n", 109 | " 'move_number_end': 40\n", 110 | "}" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 24, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "(572665, 6)" 122 | ] 123 | }, 124 | "execution_count": 24, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "df = sqlio.read_sql_query(queries.q_rating, conn, params=params) \n", 131 | "df.shape" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "## Summary statistics" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "### Number of observations " 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 25, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "kingbase_random 532836\n", 157 | "Rejkjavik Open 2018 23716\n", 158 | "Candidates 2011-2018 5916\n", 159 | "Wijk An Zee (Tata Steel) 2012-2018 5193\n", 160 | "Supertournaments 2017 4461\n", 161 | "World Championships 1886-2014 543\n", 162 | "Name: db_name, dtype: int64" 163 | ] 164 | }, 165 | "execution_count": 25, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": [ 171 | "df.db_name.value_counts()" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 26, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "df['difference'] = df.rating_own - df.rating_opponent \n", 181 | "df['result'] = df.win + 0.5 * df.draw" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "### Standard deviation of rating difference\n", 189 | "The standard deviation is much smaller in elite tournaments than in opens:" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 27, 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "data": { 199 | "text/plain": [ 200 | "db_name\n", 201 | "Candidates 2011-2018 35.01\n", 202 | "Rejkjavik Open 2018 321.70\n", 203 | "Supertournaments 2017 61.70\n", 204 | "Wijk An Zee (Tata Steel) 2012-2018 87.20\n", 205 | "World Championships 1886-2014 82.67\n", 206 | "kingbase_random 173.47\n", 207 | "Name: difference, dtype: float64" 208 | ] 209 | }, 210 | "execution_count": 27, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "df.groupby('db_name').difference.std()" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "## Data cleaning" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "Creating a grouped variable for the rating difference" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 28, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "df['difference_group'] = round(df.difference / 200)\n", 240 | "diff_max = 3\n", 241 | "df.loc[df.difference_group < -diff_max, 'difference_group'] = -diff_max\n", 242 | "df.loc[df.difference_group > diff_max, 'difference_group'] = diff_max\n", 243 | "\n", 244 | "df['eval_group'] = round(df.ev / 100)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "# Summarizing the data" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 35, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "df_king = df.query('db_name == \"kingbase_random\"')" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 36, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "means = pd.DataFrame(\n", 270 | " df_king\n", 271 | " .query('-3 <= eval_group <= 3')\n", 272 | " .groupby(['difference_group', 'eval_group'])\n", 273 | " .result\n", 274 | " .mean()\n", 275 | ")\n", 276 | "counts = (df_king\n", 277 | " .query('-3 <= eval_group <= 3')\n", 278 | " .groupby(['difference_group', 'eval_group'])\n", 279 | " .result\n", 280 | " .count())\n", 281 | "\n", 282 | "means['counts'] = counts \n", 283 | "means = means.query('counts >= 10 and difference_group >= 0')\n", 284 | "del means['counts']" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 42, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "table = means.unstack(0) * 100 \n", 294 | "table.index = [int(v) for v in table.index]\n", 295 | "def group_name(v):\n", 296 | " return '%s to %s' % (v*200-100, v*200+100)\n", 297 | "table.columns = [group_name(v) for v in table.columns.get_level_values(1)]" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 43, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "text/plain": [ 308 | "" 309 | ] 310 | }, 311 | "execution_count": 43, 312 | "metadata": {}, 313 | "output_type": "execute_result" 314 | }, 315 | { 316 | "data": { 317 | "image/png": "\n", 318 | "text/plain": [ 319 | "
" 320 | ] 321 | }, 322 | "metadata": { 323 | "needs_background": "light" 324 | }, 325 | "output_type": "display_data" 326 | } 327 | ], 328 | "source": [ 329 | "fig, ax = plt.subplots()\n", 330 | "ax.set_xlabel(\"Evaluation\", fontsize=20)\n", 331 | "ax.set_ylabel(\"Result percentage\", fontsize=20)\n", 332 | "settings = {\n", 333 | " 'title': \"Result percentage as function of evaluation and rating difference\",\n", 334 | " 'fontsize': 20,\n", 335 | " 'legend': True,\n", 336 | " 'figsize': (10, 10),\n", 337 | " 'lw': 4,\n", 338 | " 'ax': ax\n", 339 | "}\n", 340 | "table.plot(**settings)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "The graph shows the expected result percentage as a function of the position evaluation and the rating difference to the opponent.\n", 348 | "\n", 349 | "For instance, we find that, if you play against someone who is 400 points lower-rated then you and your evaluation is -3, then you are still obtaining more than 40% of the points. This is among players with Elo above 2000.\n", 350 | "\n", 351 | "Note, however, that this is just one point on this graph. To quantify this by summarizing all the data, we work with a linear regression, below. " 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "The same data in table format" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 44, 364 | "metadata": {}, 365 | "outputs": [ 366 | { 367 | "data": { 368 | "text/html": [ 369 | "
\n", 370 | "\n", 383 | "\n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | "
-100.0 to 100.0100.0 to 300.0300.0 to 500.0500.0 to 700.0
-37.3415.9834.40NaN
-215.3829.4139.17NaN
-130.7850.3756.4179.41
052.5871.1581.5095.54
166.2782.8290.7998.02
287.7294.1195.96100.00
393.5497.1297.35100.00
\n", 445 | "
" 446 | ], 447 | "text/plain": [ 448 | " -100.0 to 100.0 100.0 to 300.0 300.0 to 500.0 500.0 to 700.0\n", 449 | "-3 7.34 15.98 34.40 NaN\n", 450 | "-2 15.38 29.41 39.17 NaN\n", 451 | "-1 30.78 50.37 56.41 79.41\n", 452 | " 0 52.58 71.15 81.50 95.54\n", 453 | " 1 66.27 82.82 90.79 98.02\n", 454 | " 2 87.72 94.11 95.96 100.00\n", 455 | " 3 93.54 97.12 97.35 100.00" 456 | ] 457 | }, 458 | "execution_count": 44, 459 | "metadata": {}, 460 | "output_type": "execute_result" 461 | } 462 | ], 463 | "source": [ 464 | "table" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "## Quantifying the rating-evaluation tradeoff\n", 472 | "We know try to quantify these effects and calculate the tradeoff between player rating and the position evaluation. We do this using a linear regression." 473 | ] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "metadata": {}, 478 | "source": [ 479 | "Creating helper variables that are scaled, which gives us nicer regression coefficients." 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 45, 485 | "metadata": {}, 486 | "outputs": [], 487 | "source": [ 488 | "df['rating_own_small'] = df.rating_own / 100\n", 489 | "df['rating_opponent_small'] = df.rating_opponent / 100\n", 490 | "df['diff_small'] = df.difference / 100\n", 491 | "df['eval_small'] = df.ev / 100" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 47, 497 | "metadata": {}, 498 | "outputs": [ 499 | { 500 | "data": { 501 | "text/html": [ 502 | "\n", 503 | "\n", 504 | "\n", 505 | " \n", 506 | "\n", 507 | "\n", 508 | " \n", 509 | "\n", 510 | "\n", 511 | " \n", 512 | "\n", 513 | "\n", 514 | " \n", 515 | "\n", 516 | "\n", 517 | " \n", 518 | "\n", 519 | "\n", 520 | " \n", 521 | "\n", 522 | "\n", 523 | " \n", 524 | "\n", 525 | "\n", 526 | " \n", 527 | "\n", 528 | "\n", 529 | " \n", 530 | "\n", 531 | "
OLS Regression Results
Dep. Variable: result R-squared: 0.362
Model: OLS Adj. R-squared: 0.362
Method: Least Squares F-statistic: 1.453e+05
Date: Sat, 01 Jan 2022 Prob (F-statistic): 0.00
Time: 12:55:14 Log-Likelihood: -1.4711e+05
No. Observations: 511532 AIC: 2.942e+05
Df Residuals: 511529 BIC: 2.943e+05
Df Model: 2
Covariance Type: nonrobust
\n", 532 | "\n", 533 | "\n", 534 | " \n", 535 | "\n", 536 | "\n", 537 | " \n", 538 | "\n", 539 | "\n", 540 | " \n", 541 | "\n", 542 | "\n", 543 | " \n", 544 | "\n", 545 | "
coef std err t P>|t| [0.025 0.975]
Intercept 0.5119 0.000 1122.939 0.000 0.511 0.513
eval_small 0.1280 0.000 336.778 0.000 0.127 0.129
diff_small 0.0875 0.000 325.704 0.000 0.087 0.088
\n", 546 | "\n", 547 | "\n", 548 | " \n", 549 | "\n", 550 | "\n", 551 | " \n", 552 | "\n", 553 | "\n", 554 | " \n", 555 | "\n", 556 | "\n", 557 | " \n", 558 | "\n", 559 | "
Omnibus: 7838.850 Durbin-Watson: 0.076
Prob(Omnibus): 0.000 Jarque-Bera (JB): 5068.977
Skew: -0.108 Prob(JB): 0.00
Kurtosis: 2.562 Cond. No. 1.84


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." 560 | ], 561 | "text/plain": [ 562 | "\n", 563 | "\"\"\"\n", 564 | " OLS Regression Results \n", 565 | "==============================================================================\n", 566 | "Dep. Variable: result R-squared: 0.362\n", 567 | "Model: OLS Adj. R-squared: 0.362\n", 568 | "Method: Least Squares F-statistic: 1.453e+05\n", 569 | "Date: Sat, 01 Jan 2022 Prob (F-statistic): 0.00\n", 570 | "Time: 12:55:14 Log-Likelihood: -1.4711e+05\n", 571 | "No. Observations: 511532 AIC: 2.942e+05\n", 572 | "Df Residuals: 511529 BIC: 2.943e+05\n", 573 | "Df Model: 2 \n", 574 | "Covariance Type: nonrobust \n", 575 | "==============================================================================\n", 576 | " coef std err t P>|t| [0.025 0.975]\n", 577 | "------------------------------------------------------------------------------\n", 578 | "Intercept 0.5119 0.000 1122.939 0.000 0.511 0.513\n", 579 | "eval_small 0.1280 0.000 336.778 0.000 0.127 0.129\n", 580 | "diff_small 0.0875 0.000 325.704 0.000 0.087 0.088\n", 581 | "==============================================================================\n", 582 | "Omnibus: 7838.850 Durbin-Watson: 0.076\n", 583 | "Prob(Omnibus): 0.000 Jarque-Bera (JB): 5068.977\n", 584 | "Skew: -0.108 Prob(JB): 0.00\n", 585 | "Kurtosis: 2.562 Cond. No. 1.84\n", 586 | "==============================================================================\n", 587 | "\n", 588 | "Notes:\n", 589 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", 590 | "\"\"\"" 591 | ] 592 | }, 593 | "execution_count": 47, 594 | "metadata": {}, 595 | "output_type": "execute_result" 596 | } 597 | ], 598 | "source": [ 599 | "df_king = df.query('db_name == \"kingbase_random\"')\n", 600 | "reg = smf.ols('result ~ eval_small + diff_small', data=df_king.query('-5 <= eval_small <= 5')).fit()\n", 601 | "params = reg.params\n", 602 | "reg.summary()" 603 | ] 604 | }, 605 | { 606 | "cell_type": "markdown", 607 | "metadata": {}, 608 | "source": [ 609 | "To check for a non-linear effect, I include a squared term in the regression. The coefficient is small." 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": 50, 615 | "metadata": {}, 616 | "outputs": [ 617 | { 618 | "data": { 619 | "text/html": [ 620 | "\n", 621 | "\n", 622 | "\n", 623 | " \n", 624 | "\n", 625 | "\n", 626 | " \n", 627 | "\n", 628 | "\n", 629 | " \n", 630 | "\n", 631 | "\n", 632 | " \n", 633 | "\n", 634 | "\n", 635 | " \n", 636 | "\n", 637 | "\n", 638 | " \n", 639 | "\n", 640 | "\n", 641 | " \n", 642 | "\n", 643 | "\n", 644 | " \n", 645 | "\n", 646 | "\n", 647 | " \n", 648 | "\n", 649 | "
OLS Regression Results
Dep. Variable: result R-squared: 0.362
Model: OLS Adj. R-squared: 0.362
Method: Least Squares F-statistic: 9.687e+04
Date: Sat, 01 Jan 2022 Prob (F-statistic): 0.00
Time: 12:55:38 Log-Likelihood: -1.4709e+05
No. Observations: 511532 AIC: 2.942e+05
Df Residuals: 511528 BIC: 2.942e+05
Df Model: 3
Covariance Type: nonrobust
\n", 650 | "\n", 651 | "\n", 652 | " \n", 653 | "\n", 654 | "\n", 655 | " \n", 656 | "\n", 657 | "\n", 658 | " \n", 659 | "\n", 660 | "\n", 661 | " \n", 662 | "\n", 663 | "\n", 664 | " \n", 665 | "\n", 666 | "
coef std err t P>|t| [0.025 0.975]
Intercept 0.5129 0.000 1038.282 0.000 0.512 0.514
eval_small 0.1281 0.000 336.586 0.000 0.127 0.129
I(eval_small * eval_small) -0.0007 0.000 -5.364 0.000 -0.001 -0.000
diff_small 0.0875 0.000 325.726 0.000 0.087 0.088
\n", 667 | "\n", 668 | "\n", 669 | " \n", 670 | "\n", 671 | "\n", 672 | " \n", 673 | "\n", 674 | "\n", 675 | " \n", 676 | "\n", 677 | "\n", 678 | " \n", 679 | "\n", 680 | "
Omnibus: 7840.481 Durbin-Watson: 0.076
Prob(Omnibus): 0.000 Jarque-Bera (JB): 5098.351
Skew: -0.110 Prob(JB): 0.00
Kurtosis: 2.564 Cond. No. 4.33


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." 681 | ], 682 | "text/plain": [ 683 | "\n", 684 | "\"\"\"\n", 685 | " OLS Regression Results \n", 686 | "==============================================================================\n", 687 | "Dep. Variable: result R-squared: 0.362\n", 688 | "Model: OLS Adj. R-squared: 0.362\n", 689 | "Method: Least Squares F-statistic: 9.687e+04\n", 690 | "Date: Sat, 01 Jan 2022 Prob (F-statistic): 0.00\n", 691 | "Time: 12:55:38 Log-Likelihood: -1.4709e+05\n", 692 | "No. Observations: 511532 AIC: 2.942e+05\n", 693 | "Df Residuals: 511528 BIC: 2.942e+05\n", 694 | "Df Model: 3 \n", 695 | "Covariance Type: nonrobust \n", 696 | "==============================================================================================\n", 697 | " coef std err t P>|t| [0.025 0.975]\n", 698 | "----------------------------------------------------------------------------------------------\n", 699 | "Intercept 0.5129 0.000 1038.282 0.000 0.512 0.514\n", 700 | "eval_small 0.1281 0.000 336.586 0.000 0.127 0.129\n", 701 | "I(eval_small * eval_small) -0.0007 0.000 -5.364 0.000 -0.001 -0.000\n", 702 | "diff_small 0.0875 0.000 325.726 0.000 0.087 0.088\n", 703 | "==============================================================================\n", 704 | "Omnibus: 7840.481 Durbin-Watson: 0.076\n", 705 | "Prob(Omnibus): 0.000 Jarque-Bera (JB): 5098.351\n", 706 | "Skew: -0.110 Prob(JB): 0.00\n", 707 | "Kurtosis: 2.564 Cond. No. 4.33\n", 708 | "==============================================================================\n", 709 | "\n", 710 | "Notes:\n", 711 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", 712 | "\"\"\"" 713 | ] 714 | }, 715 | "execution_count": 50, 716 | "metadata": {}, 717 | "output_type": "execute_result" 718 | } 719 | ], 720 | "source": [ 721 | "reg = smf.ols('result ~ eval_small + I(eval_small*eval_small) + diff_small', data=df_king.query('-5 <= eval_small <= 5')).fit()\n", 722 | "params = reg.params\n", 723 | "reg.summary()" 724 | ] 725 | }, 726 | { 727 | "cell_type": "code", 728 | "execution_count": 51, 729 | "metadata": {}, 730 | "outputs": [ 731 | { 732 | "name": "stdout", 733 | "output_type": "stream", 734 | "text": [ 735 | "Value of 100 rating points: 0.68\n" 736 | ] 737 | } 738 | ], 739 | "source": [ 740 | "value_of_100_rating = params['diff_small'] / params['eval_small']\n", 741 | "print(\"Value of 100 rating points: %0.2f\" % value_of_100_rating)" 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": 52, 747 | "metadata": {}, 748 | "outputs": [], 749 | "source": [ 750 | "df_value = pd.DataFrame()\n", 751 | "df_value['rating_difference'] = np.arange(0, 400, 100)\n", 752 | "df_value['evaluation'] = -(df_value.rating_difference * value_of_100_rating)/100" 753 | ] 754 | }, 755 | { 756 | "cell_type": "markdown", 757 | "metadata": {}, 758 | "source": [ 759 | "### Evaluation to get result of 50% against a player\n", 760 | "Note that this is a linear interpolation from the linear regression." 761 | ] 762 | }, 763 | { 764 | "cell_type": "code", 765 | "execution_count": 55, 766 | "metadata": {}, 767 | "outputs": [ 768 | { 769 | "data": { 770 | "text/html": [ 771 | "
\n", 772 | "\n", 785 | "\n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | "
evaluation
rating_difference
0-0.00
100-0.68
200-1.37
300-2.05
\n", 815 | "
" 816 | ], 817 | "text/plain": [ 818 | " evaluation\n", 819 | "rating_difference \n", 820 | "0 -0.00\n", 821 | "100 -0.68\n", 822 | "200 -1.37\n", 823 | "300 -2.05" 824 | ] 825 | }, 826 | "execution_count": 55, 827 | "metadata": {}, 828 | "output_type": "execute_result" 829 | } 830 | ], 831 | "source": [ 832 | "df_value.set_index('rating_difference')" 833 | ] 834 | }, 835 | { 836 | "cell_type": "markdown", 837 | "metadata": {}, 838 | "source": [ 839 | "# Comparison to super-tournaments" 840 | ] 841 | }, 842 | { 843 | "cell_type": "code", 844 | "execution_count": 57, 845 | "metadata": {}, 846 | "outputs": [ 847 | { 848 | "data": { 849 | "text/html": [ 850 | "\n", 851 | "\n", 852 | "\n", 853 | " \n", 854 | "\n", 855 | "\n", 856 | " \n", 857 | "\n", 858 | "\n", 859 | " \n", 860 | "\n", 861 | "\n", 862 | " \n", 863 | "\n", 864 | "\n", 865 | " \n", 866 | "\n", 867 | "\n", 868 | " \n", 869 | "\n", 870 | "\n", 871 | " \n", 872 | "\n", 873 | "\n", 874 | " \n", 875 | "\n", 876 | "\n", 877 | " \n", 878 | "\n", 879 | "
OLS Regression Results
Dep. Variable: result R-squared: 0.362
Model: OLS Adj. R-squared: 0.362
Method: Least Squares F-statistic: 9.687e+04
Date: Sat, 01 Jan 2022 Prob (F-statistic): 0.00
Time: 12:56:34 Log-Likelihood: -1.4709e+05
No. Observations: 511532 AIC: 2.942e+05
Df Residuals: 511528 BIC: 2.942e+05
Df Model: 3
Covariance Type: nonrobust
\n", 880 | "\n", 881 | "\n", 882 | " \n", 883 | "\n", 884 | "\n", 885 | " \n", 886 | "\n", 887 | "\n", 888 | " \n", 889 | "\n", 890 | "\n", 891 | " \n", 892 | "\n", 893 | "\n", 894 | " \n", 895 | "\n", 896 | "
coef std err t P>|t| [0.025 0.975]
Intercept 0.5129 0.000 1038.282 0.000 0.512 0.514
eval_small 0.1281 0.000 336.586 0.000 0.127 0.129
I(eval_small * eval_small) -0.0007 0.000 -5.364 0.000 -0.001 -0.000
diff_small 0.0875 0.000 325.726 0.000 0.087 0.088
\n", 897 | "\n", 898 | "\n", 899 | " \n", 900 | "\n", 901 | "\n", 902 | " \n", 903 | "\n", 904 | "\n", 905 | " \n", 906 | "\n", 907 | "\n", 908 | " \n", 909 | "\n", 910 | "
Omnibus: 7840.481 Durbin-Watson: 0.076
Prob(Omnibus): 0.000 Jarque-Bera (JB): 5098.351
Skew: -0.110 Prob(JB): 0.00
Kurtosis: 2.564 Cond. No. 4.33


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." 911 | ], 912 | "text/plain": [ 913 | "\n", 914 | "\"\"\"\n", 915 | " OLS Regression Results \n", 916 | "==============================================================================\n", 917 | "Dep. Variable: result R-squared: 0.362\n", 918 | "Model: OLS Adj. R-squared: 0.362\n", 919 | "Method: Least Squares F-statistic: 9.687e+04\n", 920 | "Date: Sat, 01 Jan 2022 Prob (F-statistic): 0.00\n", 921 | "Time: 12:56:34 Log-Likelihood: -1.4709e+05\n", 922 | "No. Observations: 511532 AIC: 2.942e+05\n", 923 | "Df Residuals: 511528 BIC: 2.942e+05\n", 924 | "Df Model: 3 \n", 925 | "Covariance Type: nonrobust \n", 926 | "==============================================================================================\n", 927 | " coef std err t P>|t| [0.025 0.975]\n", 928 | "----------------------------------------------------------------------------------------------\n", 929 | "Intercept 0.5129 0.000 1038.282 0.000 0.512 0.514\n", 930 | "eval_small 0.1281 0.000 336.586 0.000 0.127 0.129\n", 931 | "I(eval_small * eval_small) -0.0007 0.000 -5.364 0.000 -0.001 -0.000\n", 932 | "diff_small 0.0875 0.000 325.726 0.000 0.087 0.088\n", 933 | "==============================================================================\n", 934 | "Omnibus: 7840.481 Durbin-Watson: 0.076\n", 935 | "Prob(Omnibus): 0.000 Jarque-Bera (JB): 5098.351\n", 936 | "Skew: -0.110 Prob(JB): 0.00\n", 937 | "Kurtosis: 2.564 Cond. No. 4.33\n", 938 | "==============================================================================\n", 939 | "\n", 940 | "Notes:\n", 941 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", 942 | "\"\"\"" 943 | ] 944 | }, 945 | "execution_count": 57, 946 | "metadata": {}, 947 | "output_type": "execute_result" 948 | } 949 | ], 950 | "source": [ 951 | "df_super = df.query('db_name == \"Supertournaments 2017\"')\n", 952 | "reg_super = smf.ols('result ~ eval_small + diff_small', data=df_super.query('-5 <= eval_small <= 5')).fit()\n", 953 | "params_super = reg_super.params\n", 954 | "reg.summary()" 955 | ] 956 | }, 957 | { 958 | "cell_type": "code", 959 | "execution_count": 60, 960 | "metadata": {}, 961 | "outputs": [ 962 | { 963 | "name": "stdout", 964 | "output_type": "stream", 965 | "text": [ 966 | "Value of 100 rating points (Kingbase): 0.68\n" 967 | ] 968 | } 969 | ], 970 | "source": [ 971 | "value_of_100_rating = params['diff_small'] / params['eval_small']\n", 972 | "print(\"Value of 100 rating points (Kingbase): %0.2f\" % value_of_100_rating)" 973 | ] 974 | }, 975 | { 976 | "cell_type": "code", 977 | "execution_count": 61, 978 | "metadata": {}, 979 | "outputs": [ 980 | { 981 | "name": "stdout", 982 | "output_type": "stream", 983 | "text": [ 984 | "Value of 100 rating points (Supertournaments): 0.59\n" 985 | ] 986 | } 987 | ], 988 | "source": [ 989 | "value_of_100_rating = params_super['diff_small'] / params_super['eval_small']\n", 990 | "print(\"Value of 100 rating points (Supertournaments): %0.2f\" % value_of_100_rating)" 991 | ] 992 | }, 993 | { 994 | "cell_type": "markdown", 995 | "metadata": {}, 996 | "source": [ 997 | "For elite players, the impact of rating on the result percentage is only a little smaller (0.6 vs 0.7) compared to a sample of games from players with rating above 2000 (Kingbase)." 998 | ] 999 | } 1000 | ], 1001 | "metadata": { 1002 | "kernelspec": { 1003 | "display_name": "Python 3", 1004 | "language": "python", 1005 | "name": "python3" 1006 | }, 1007 | "language_info": { 1008 | "codemirror_mode": { 1009 | "name": "ipython", 1010 | "version": 3 1011 | }, 1012 | "file_extension": ".py", 1013 | "mimetype": "text/x-python", 1014 | "name": "python", 1015 | "nbconvert_exporter": "python", 1016 | "pygments_lexer": "ipython3", 1017 | "version": "3.8.3" 1018 | } 1019 | }, 1020 | "nbformat": 4, 1021 | "nbformat_minor": 2 1022 | } 1023 | -------------------------------------------------------------------------------- /queries.py: -------------------------------------------------------------------------------- 1 | q_rating = """ 2 | SELECT 3 | db_name 4 | , rating_own 5 | , rating_opponent 6 | , eval as ev 7 | , (result=1) :: Int as win 8 | , (result=0) :: Int as draw 9 | FROM ( 10 | SELECT 11 | game_result as result 12 | , is_white 13 | , rating1.rating as rating_own 14 | , rating2.rating as rating_opponent 15 | , eval 16 | , move_number 17 | , database.name as db_name 18 | FROM game 19 | JOIN player_rating as rating1 ON 20 | game.player_white_id=rating1.player_id 21 | AND extract(year from game.date)=rating1.year 22 | AND extract(month from game.date)=rating1.month 23 | JOIN player_rating as rating2 ON 24 | game.player_black_id=rating2.player_id 25 | AND extract(year from game.date)=rating2.year 26 | AND extract(month from game.date)=rating2.month 27 | JOIN move_eval on game.id=move_eval.game_id 28 | JOIN database on game.database_id=database.id 29 | WHERE 30 | move_number>=%(move_number_start)s and move_number<=%(move_number_end)s 31 | AND eval is not null 32 | AND database.is_public OR name='kingbase_random' 33 | ) values 34 | WHERE is_white 35 | """ 36 | 37 | q_games = """SELECT * from game""" 38 | q_db = """SELECT * from database WHERE is_public OR name='kingbase_random'""" 39 | q_players = """SELECT * from player""" 40 | q_tournaments = """SELECT * from tournament""" 41 | 42 | q_positions = """ 43 | SELECT 44 | g.id as game_id 45 | , me.move_number, me.is_white 46 | , me.fen, me.eval as eval_played, me.eval_best 47 | , g.game_result as result 48 | , pa.typ, pa.value 49 | FROM move_eval me 50 | JOIN game g on me.game_id = g.id 51 | JOIN position p on me.fen = p.fen 52 | JOIN position_attribute pa on pa.position_id = p.id 53 | WHERE eval_best is not null 54 | LIMIT {limit} 55 | """ 56 | --------------------------------------------------------------------------------