├── requirements.txt
├── .gitignore
├── README.md
├── contributor
    ├── manager.py
    ├── evaluation.py
    ├── __main__.py
    └── stockfish.py
├── scripts
    ├── gen_tactic_fens.py
    └── gen_fens.py
└── .github
    └── workflows
        └── compiler.yml


/requirements.txt:
--------------------------------------------------------------------------------
1 | Gooey==1.0.6
2 | requests==2.23.0
3 | python-chess==0.30.1
4 | py-cpuinfo==5.0.0
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | zips/*
 2 | pgns/*
 3 | *.pickle
 4 | *~
 5 | stockfish/
 6 | stockfish.zip
 7 | __MAC*
 8 | *cpython*
 9 | data/
10 | dest/
11 | cache/
12 | passw.py
13 | dist/
14 | build/
15 | auto.spec
16 | test*.txt
17 | *.swp
18 | test_*.progress
19 | test_*.csv
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Chess Data Contributor
 2 | The data contribution application for the [ChessData](https://github.com/r2dev2bb8/ChessData) dataset.
 3 | 
 4 | # Download
 5 | Navigate to the [releases page](https://github.com/r2dev2bb8/ChessDataContributor/releases) and download the appropriate executable. Supported platforms are Windows, Linux, and Mac.
 6 | 
 7 | # Additional Information
 8 | * This application generates chess evaluations for the [ChessData](https://github.com/r2dev2bb8/ChessData) dataset using [Stockfish](https://github.com/mcostalba/Stockfish).
 9 | * The application and dataset are maintained by [Ronak Badhe](https://github.com/r2dev2bb8).
10 | * The GitHub Actions for compilation are developed by [Kento Nishi](https://github.com/kentonishi).
11 | * The current contributor won't work properly
12 | 


--------------------------------------------------------------------------------
/contributor/manager.py:
--------------------------------------------------------------------------------
 1 | import itertools as it
 2 | from threading import Thread, Lock
 3 | from queue import Queue
 4 | 
 5 | 
 6 | class Manager(it.count):
 7 |     """
 8 |     Manages which fens to evaluate and is thread safe.
 9 | 
10 |     Usage:
11 |         >>> m = Manager()
12 |         >>> m.init("path to fen file", "path to progress file")
13 |         >>> for fen in m:
14 |         >>>     evaluate(fen)
15 |         >>>     m.mark_done(fen)
16 |     """
17 |     def init(self, fen_path, output_path):
18 |         with open(fen_path, 'r') as fin:
19 |             self._fens = [*map(str.strip, fin)]
20 |         self._progress =  Progress(output_path)
21 |         self.last = 0
22 | 
23 |     def mark_done(self, fen):
24 |         self._progress.add(fen)
25 | 
26 |     def __len__(self):
27 |         return len(self._fens)
28 | 
29 |     def __next__(self):
30 |         try:
31 |             res = super().__next__()
32 |             while (self._fens[res] in self._progress):
33 |                 res = super().__next__()
34 |             self.last = res
35 |         except IndexError:
36 |             raise StopIteration
37 | 
38 |         return self._fens[res]
39 | 
40 | 
41 | class Progress(set):
42 |     def __init__(self, output_file):
43 |         super().__init__()
44 |         try:
45 |             with open(output_file, 'r') as fin:
46 |                 for line in fin:
47 |                     super().add(line.split(",")[0].strip())
48 |         except FileNotFoundError:
49 |             pass
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     m = Manager()
54 |     m.init("test_file.txt", "progress.txt")
55 |     for fen in m:
56 |         print(fen)
57 |         if input("done? ") == 'y':
58 |             m.mark_done(fen)
59 | 


--------------------------------------------------------------------------------
/scripts/gen_tactic_fens.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generate unique fens from a lichess puzzle database.
 3 | 
 4 | Usage:
 5 | 
 6 | >>> cat evals1.csv evals2.csv ... | python gen_tactic_fens.py lichess_puzzle_db.csv
 7 | """
 8 | 
 9 | import itertools as it
10 | import random as rng
11 | import sys
12 | from typing import NamedTuple, Iterable
13 | 
14 | import chess
15 | 
16 | from gen_fens import pos_from_fen, fens_from_stream
17 | 
18 | rng.seed(420)
19 | blacklist = set()
20 | 
21 | 
22 | class Puzzle(NamedTuple):
23 |     fen: str
24 |     moves: str
25 |     rating: int
26 | 
27 |     def from_line(line: str) -> "Puzzle":
28 |         f, m, r = line.split(",")[1:4]
29 |         return Puzzle(f, m, int(r))
30 | 
31 |     def get_positions(self) -> Iterable[str]:
32 |         board = chess.Board(self.fen)
33 |         for move in self.moves.split(" "):
34 |             board.push_uci(move)
35 |             fen = pos_from_fen(board.fen())
36 |             if fen in blacklist:
37 |                 continue
38 |             blacklist.add(fen)
39 |             yield board.fen()
40 | 
41 | 
42 | def get_tactics(stream):
43 |     puzzles = [*map(Puzzle.from_line, stream)]
44 |     rng.shuffle(puzzles)
45 |     ez = [p for p in puzzles if 600 <= p.rating <= 1300]
46 |     mi = [p for p in puzzles if 1300 < p.rating <= 1900]
47 |     ha = [p for p in puzzles if 1900 < p.rating <= 3000]
48 |     amount = min(map(len, [ez, mi, ha]))
49 |     return [*ez[:amount], *mi[:amount], *ha[:amount]]
50 | 
51 | 
52 | def main(tactics_file):
53 |     blacklist.update(fens_from_stream(sys.stdin))
54 | 
55 |     with open(tactics_file, "r") as fin:
56 |         tactics = get_tactics(fin)
57 | 
58 |     positions = []
59 |     for tactic in tactics:
60 |         positions.extend(tactic.get_positions())
61 |         print("Positions: ", len(positions), end="\r", flush=True)
62 |     print()
63 | 
64 |     with open("tactic_positions.txt", "w+") as fout:
65 |         print(*positions, file=fout, sep="\n")
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     main(sys.argv[1])
70 | 


--------------------------------------------------------------------------------
/contributor/evaluation.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import chess
 4 | import chess.engine
 5 | 
 6 | 
 7 | class Evaluator:
 8 |     """
 9 |     Serializeable stockfish wrapper
10 | 
11 |     Example usage:
12 |     >>> engine = Evaluator("stockfish")
13 |     >>> engine("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1")
14 |     >>> del engine
15 |     """
16 |     depth = 20
17 | 
18 |     number_pattern = re.compile(r"\d+\.")
19 |     def __init__(self, enginepath = "stockfish"):
20 |         self._engine = chess.engine.SimpleEngine.popen_uci(enginepath)
21 |         self._engine.configure({"Use NNUE": False})
22 |         self._enginepath = enginepath
23 | 
24 |     def close(self):
25 |         self._engine.close()
26 | 
27 |     def __call__(self, fen):
28 |         """
29 |         Evaluates the position.
30 | 
31 |         :param fen: the fen of the position
32 |         :return: a tuple of (fen, evaluation, best move)
33 |         """
34 |         try:
35 |             board = chess.Board(fen)
36 |             info = self._engine.analyse(board, chess.engine.Limit(depth=Evaluator.depth))
37 |             return self.__info_to_tuple(fen, info)
38 |         except KeyboardInterrupt:
39 |             pass
40 | 
41 |     @staticmethod
42 |     def __info_to_tuple(fen, info):
43 |         ev = str(info["score"].white())
44 |         mov = str(info.get("pv", " ")[0]).strip()
45 |         return fen, ev, mov
46 | 
47 |         # board = chess.Board(fen)
48 |         # for move in info["pv"]:
49 |         #     board.push(move)
50 |         # san = chess.Board(fen).variation_san(board.move_stack)
51 |         # filtered_san = ' '.join((
52 |         #     m for m in san.split(' ')
53 |         #     if re.match(Evaluator.number_pattern, m) is None
54 |         # ))
55 | 
56 |         # return fen, ev, filtered_san
57 | 
58 |     def __getstate__(self):
59 |         return (self._enginepath,)
60 | 
61 |     def __setstate__(self, state):
62 |         self.__init__(*state)
63 | 
64 |     def __del__(self):
65 |         self.close()
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     engine = Evaluator("stockfish")
70 |     print("Eval:", engine("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"))
71 | 
72 | 


--------------------------------------------------------------------------------
/scripts/gen_fens.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generate unique fens from lichess games database.
 3 | 
 4 | Usage:
 5 | >>> cat evals1.csv evals2.csv ... | python gen_fens.py lichess_games_db.csv
 6 | """
 7 | 
 8 | import itertools as it
 9 | import sys
10 | 
11 | import chess
12 | import chess.pgn
13 | 
14 | def get_games(path):
15 |     try:
16 |         with open(path, "r") as fin:
17 |             for i in it.count():
18 |                 game = chess.pgn.read_game(fin)
19 |                 if game is None:
20 |                     return
21 |                 yield game
22 |     finally:
23 |         print()
24 | 
25 | 
26 | def extract_positions(game, blacklist, after_position):
27 |     board = game.board()
28 |     for move in game.mainline_moves():
29 |         board.push(move)
30 |         for pos in it.filterfalse(blacklist, possible_positions(board.fen())):
31 |             after_position(pos)
32 |             yield pos
33 | 
34 | 
35 | def possible_positions(fen):
36 |     board = chess.Board(fen)
37 |     for move in board.legal_moves:
38 |         board.push(move)
39 |         yield board.fen()
40 |         board.pop()
41 | 
42 | 
43 | def pos_from_fen(fen):
44 |     return "".join(fen.split(" ")[:4])
45 | 
46 | 
47 | def fen_set_blacklist(set_):
48 |     return lambda fen: pos_from_fen(fen) in set_
49 | 
50 | 
51 | def fen_set_after_pos(set_):
52 |     return lambda fen: set_.add(pos_from_fen(fen))
53 | 
54 | 
55 | def fens_from_stream(stream):
56 |     for line in stream:
57 |         fen = line.split(",")[0]
58 |         yield pos_from_fen(fen)
59 | 
60 | 
61 | def main(pgnpath, fens=[]):
62 |     positions = []
63 |     already_exist = set(fens)
64 |     for game in get_games(pgnpath):
65 |         if len(positions) > 1e6:
66 |             break
67 |         positions.extend(extract_positions(
68 |             game,
69 |             fen_set_blacklist(already_exist),
70 |             fen_set_after_pos(already_exist)
71 |         ))
72 |         print("Positions: ", len(positions), end="\r", flush=True)
73 |     print(len(positions))
74 |     with open("random_moves.txt", "w+") as fout:
75 |         print(*positions, file=fout, sep="\n")
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     main(sys.argv[1], fens_from_stream(sys.stdin))
80 | 


--------------------------------------------------------------------------------
/.github/workflows/compiler.yml:
--------------------------------------------------------------------------------
 1 | name: Compile and Release
 2 | 
 3 | on: 
 4 |   release:
 5 |     types:
 6 |       - published
 7 | 
 8 | jobs:
 9 |   deploy:
10 |     runs-on: ${{ matrix.os }}
11 |     strategy:
12 |       matrix:
13 |         os: [macos-latest, ubuntu-latest, windows-latest]
14 |         include: 
15 |         - os: macos-latest
16 |           executable: chesscontributor-mac.zip
17 |           filename: contributor.zip
18 |         - os: windows-latest
19 |           executable: chesscontributor-win.exe
20 |           filename: dist/contributor.exe
21 |         - os: ubuntu-latest
22 |           executable: chesscontributor-linux.zip
23 |           filename: contributor.zip
24 |     env:
25 |       chessPass: ${{ secrets.chessPass }}
26 |       chessUser: ${{ secrets.chessUser }}
27 |       chessToEmail: ${{ secrets.chessToEmail }}
28 |     steps:
29 |     - uses: actions/checkout@v2
30 |     - name: Set up Python
31 |       uses: actions/setup-python@v1
32 |       with:
33 |         python-version: '3.x'
34 |     - name : Print GitHub context
35 |       env: 
36 |         GITHUB_CONTEXT: ${{ toJson(github) }} 
37 |       run: echo "$GITHUB_CONTEXT" 
38 |     - name: Install dependencies
39 |       run: |
40 |         python -m pip install --upgrade pip
41 |         pip install pyinstaller
42 |         pip install -r requirements.txt
43 |     - name: Set password
44 |       if: matrix.os != 'windows-latest'
45 |       run: |
46 |         printf "USERNAME='"$chessUser"'\nPASSWORD='"$chessPass"'\nTO_EMAIL='"$chessToEmail"'\n" > passw.py
47 |     - name: Set password
48 |       if: matrix.os == 'windows-latest'
49 |       shell: cmd
50 |       run: |
51 |         python -c "import os;open(\"passw.py^\",'w').write('PASSWORD=\"'+os.environ['chessPass']+'\"\n"'+'USERNAME=\"'+os.environ['chessUser']+'\"\n'+'TO_EMAIL=\"'+os.environ['chessToEmail']+'\"')"
52 |     - name: Compile
53 |       run: |
54 |         pyinstaller --onefile --noupx contributor/__main__.py
55 |     - name: Rename file for Unix
56 |       if: matrix.os != 'windows-latest'
57 |       run: |
58 |         mv dist/__main__ dist/contributor
59 |     - name: Rename file for Windows
60 |       if: matrix.os == 'windows-latest'
61 |       run: |
62 |         mv dist/__main__.exe dist/contributor.exe
63 |     - name: Zip for Unix
64 |       if: matrix.os != 'windows-latest'
65 |       run: |
66 |         zip -9 -r contributor.zip dist
67 |     - name: Upload binaries to release
68 |       uses: svenstaro/upload-release-action@v1-release
69 |       with:
70 |         repo_token: ${{ secrets.GITHUB_TOKEN }}
71 |         file: ${{ matrix.filename }}
72 |         asset_name: ${{ matrix.executable }}
73 |         tag: ${{ github.ref }}
74 |         overwrite: true
75 | 


--------------------------------------------------------------------------------
/contributor/__main__.py:
--------------------------------------------------------------------------------
  1 | import itertools as it
  2 | import os
  3 | import sys
  4 | import time
  5 | from multiprocessing import freeze_support
  6 | from pathlib import Path
  7 | from queue import Queue
  8 | from threading import Event, Lock, Thread
  9 | 
 10 | import chess
 11 | 
 12 | import stockfish as sf
 13 | from manager import Manager
 14 | from evaluation import Evaluator
 15 | 
 16 | 
 17 | pwd = Path.cwd() / ".ChessContrib"
 18 | cli = "--cli" in sys.argv
 19 | 
 20 | 
 21 | def main():
 22 |     parser = ArgParser(description="Chess data contributor")
 23 |     arguments = {
 24 |             "--threads": {
 25 |                 "type": int,
 26 |                 "help": "Number of threads to use",
 27 |                 "default": os.cpu_count(),
 28 |             },
 29 |             "--input": {
 30 |                 "help": "Name of input file",
 31 |                 "widget": "FileChooser",
 32 |                 "required": True,
 33 |             },
 34 |             "--output": {
 35 |                 "help": "Name of output file",
 36 |                 "widget": "FileSaver",
 37 |                 "required": True,
 38 |             }
 39 |     }
 40 |     for name, kwargs in arguments.items():
 41 |         if cli and "widget" in kwargs:
 42 |             del kwargs["widget"]
 43 |         parser.add_argument(name, **kwargs)
 44 |     args = parser.parse_args()
 45 | 
 46 | 
 47 |     threads = int(args.threads)
 48 | 
 49 |     print("Using %d threads" % threads)
 50 | 
 51 |     sf.init()
 52 |     sfpath = sf.get_engine_path()
 53 | 
 54 |     print("Initialized stockfish")
 55 | 
 56 |     needs_q = Queue()
 57 |     evaluators = [Evaluator(sfpath) for _ in range(threads)]
 58 |     busy = [Event() for _ in range(threads)]
 59 |     for t in range(threads):
 60 |         busy[t].set()
 61 |         needs_q.put(t)
 62 |     manager = Manager()
 63 |     manager.init(args.input, args.output)
 64 |     output_mutex = Lock()
 65 | 
 66 |     counterr = it.count()
 67 |     beg_time = time.time()
 68 | 
 69 |     fense = set()
 70 |     for i, fen in enumerate(manager, 1):
 71 |         evrid = needs_q.get()
 72 |         def task(i, evrid, fen):
 73 |             try:
 74 |                 assert busy[evrid].is_set()
 75 |                 busy[evrid].clear()
 76 |                 info = evaluators[evrid](fen)
 77 |                 busy[evrid].set()
 78 |                 needs_q.put(evrid)
 79 |                 with output_mutex:
 80 |                     with open(args.output, 'a+') as fout:
 81 |                         print(*info, sep=',', file=fout)
 82 |                 manager.mark_done(fen)
 83 |                 next(counterr)
 84 |             except Exception:
 85 |                 print("Error analysing %d with evaluators[%d]" % (i, evrid))
 86 | 
 87 |         assert fen not in fense
 88 |         fense.add(fen)
 89 |         Thread(target=task, args=(i, evrid, fen)).start()
 90 |         print(manager.last + 1, "/", len(manager), end='\r')
 91 | 
 92 |     for sem in busy:
 93 |         sem.wait()
 94 | 
 95 |     print()
 96 |     print(len(fense), next(counterr))
 97 |     print("Took", time.time() - beg_time, "seconds")
 98 | 
 99 |     del evaluators
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     freeze_support()
104 |     if "--cli" in sys.argv:
105 |         sys.argv.remove("--cli")
106 |         from argparse import ArgumentParser as ArgParser
107 |     else:
108 |         from gooey import Gooey, GooeyParser
109 |         ArgParser = GooeyParser
110 |         main = Gooey(main)
111 |     main()
112 | 


--------------------------------------------------------------------------------
/contributor/stockfish.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import stat
  4 | import sys
  5 | import zipfile
  6 | from multiprocessing import freeze_support
  7 | from pathlib import Path
  8 | from subprocess import call
  9 | 
 10 | import cpuinfo
 11 | 
 12 | 
 13 | pwd = Path.cwd() / ".ChessContrib"
 14 | 
 15 | 
 16 | def get_engine_path():
 17 |     platform = sys.platform.replace("32", '')
 18 |     version = __get_engine_version()
 19 |     location = __DATA[platform][version]["file"]
 20 |     return (
 21 |         pwd / "engines" / "stockfish" / location
 22 |     )
 23 | 
 24 | 
 25 | def init():
 26 |     try:
 27 |         with open(get_engine_path(), 'rb') as fin:
 28 |             pass
 29 |     except FileNotFoundError:
 30 |         print("Downloading stockfish...")
 31 |         __create_dir(str(pwd))
 32 |         __download_stockfish()
 33 |         print("Downloaded stockfish")
 34 |     except PermissionError:
 35 |         pass
 36 | 
 37 | 
 38 | def __download_stockfish():
 39 |     version = __get_engine_version()
 40 |     platform = sys.platform.replace("32", '')
 41 |     link = __DATA[platform][version]["link"]
 42 |     call(["curl", "-o", "stockfish.zip", link])
 43 |     __unzip("stockfish.zip", str(pwd / "engines" / "stockfish"))
 44 |     os.remove("stockfish.zip")
 45 |     stockfish_executable = str(get_engine_path())
 46 |     if sys.platform != "win32":
 47 |         os.chmod(stockfish_executable, stat.S_IEXEC)
 48 | 
 49 | 
 50 | def __unzip(filepath: str, resultpath: str) -> None:
 51 |     with zipfile.ZipFile(filepath, 'r') as zip_ref:
 52 |         zip_ref.extractall(resultpath)
 53 | 
 54 | 
 55 | 
 56 | def __create_dir(path):
 57 |     try:
 58 |         os.mkdir(path)
 59 |     except FileExistsError:
 60 |         shutil.rmtree(path)
 61 |         os.mkdir(path)
 62 | 
 63 | 
 64 | # uses fancy multiprocessing, beware when compiling
 65 | def __get_engine_version():
 66 |     flags = cpuinfo.get_cpu_info()["flags"]
 67 |     if "bmi2" in flags:
 68 |         version = "bmi2"
 69 |     elif "popcnt" in flags:
 70 |         version = "popcnt"
 71 |     else:
 72 |         version = "64bit"
 73 |     return version
 74 | 
 75 | 
 76 | # Not in json file to package easier
 77 | __DATA = {
 78 |   "win": {
 79 |     "bmi2": {
 80 |       "link": "https://stockfishchess.org/files/stockfish_12_win_x64_bmi2.zip",
 81 |       "file": "stockfish_20090216_x64_bmi2.exe"
 82 |     },
 83 |     "popcnt": {
 84 |       "link": "https://stockfishchess.org/files/stockfish_12_win_x64_modern.zip",
 85 |       "file": "stockfish_20090216_x64_modern.exe"
 86 |     },
 87 |     "64bit": {
 88 |       "link": "https://stockfishchess.org/files/stockfish_12_win_x64.zip",
 89 |       "file": "stockfish_20090216_x64.exe"
 90 |     }
 91 |   },
 92 |   "linux": {
 93 |     "bmi2": {
 94 |       "link": "https://stockfishchess.org/files/stockfish_12_linux_x64_bmi2.zip",
 95 |       "file": "stockfish_20090216_x64_bmi2"
 96 |     },
 97 |     "popcnt": {
 98 |       "link": "https://stockfishchess.org/files/stockfish_12_linux_x64_modern.zip",
 99 |       "file": "stockfish_20090216_x64_modern"
100 |     },
101 |     "64bit": {
102 |       "link": "https://stockfishchess.org/files/stockfish_12_linux_x64.zip",
103 |       "file": "stockfish_20090216_x64"
104 |     }
105 |   },
106 |   "darwin": {
107 |     "bmi2": {
108 |       "link": "https://stockfishchess.org/files/stockfish-11-mac.zip",
109 |       "file": "stockfish-11-mac/Mac/stockfish-11-bmi2"
110 |     },
111 |     "popcnt": {
112 |       "link": "https://stockfishchess.org/files/stockfish-11-mac.zip",
113 |       "file": "stockfish-11-mac/Mac/stockfish-11-modern"
114 |     },
115 |     "64bit": {
116 |       "link": "https://stockfishchess.org/files/stockfish-11-mac.zip",
117 |       "file": "stockfish-11-mac/Mac/stockfish-11-64"
118 |     }
119 |   }
120 | }
121 | 


--------------------------------------------------------------------------------