├── tests
    ├── eval
    │   ├── __init__.py
    │   └── test_evaluation.py
    ├── test_version.py
    ├── data
    │   └── prompts_debug.jsonl
    ├── training
    │   ├── integration
    │   │   └── test_train.py
    │   └── units
    │   │   ├── test_dataset.py
    │   │   └── test_model.py
    └── dataset_gen
    │   ├── test_integration.py
    │   └── test_units.py
├── human-eval
    ├── human_eval
    │   ├── __init__.py
    │   ├── evaluate_functional_correctness.py
    │   ├── data.py
    │   ├── evaluation.py
    │   └── execution.py
    ├── requirements.txt
    ├── data
    │   ├── HumanEval.jsonl.gz
    │   ├── example_problem.jsonl
    │   └── example_samples.jsonl
    ├── setup.py
    ├── LICENSE
    └── README.md
├── textbook
    ├── dataset_gen
    │   ├── __init__.py
    │   ├── .gitignore
    │   ├── tree
    │   │   ├── professions.json
    │   │   ├── topics.csv
    │   │   └── subsubtopics.json
    │   ├── filtering.py
    │   ├── dataset_gen_cli.py
    │   ├── create_prompts.py
    │   └── dataset_gen.py
    ├── __init__.py
    ├── api.py
    ├── model.py
    ├── dataset.py
    ├── evaluate.py
    └── train.py
├── .pre-commit-config.yaml
├── setup_vm.sh
├── ds_config.json
├── pyproject.toml
├── .gitignore
├── .github
    └── workflows
    │   └── ci.yml
└── README.md


/tests/eval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/human-eval/human_eval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/textbook/dataset_gen/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/textbook/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.0"
2 | 


--------------------------------------------------------------------------------
/textbook/dataset_gen/.gitignore:
--------------------------------------------------------------------------------
1 | exercises/*
2 | 


--------------------------------------------------------------------------------
/human-eval/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | fire
3 | numpy
4 | 


--------------------------------------------------------------------------------
/human-eval/data/HumanEval.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/textbook/HEAD/human-eval/data/HumanEval.jsonl.gz


--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
1 | from textbook import __version__
2 | 
3 | 
4 | def test_version():
5 |     assert __version__ == "0.1.0"
6 | 


--------------------------------------------------------------------------------
/human-eval/data/example_problem.jsonl:
--------------------------------------------------------------------------------
1 | {"task_id": "test/0", "prompt": "def return1():\n", "canonical_solution": "    return 1", "test": "def check(candidate):\n    assert candidate() == 1", "entry_point": "return1"}
2 | 


--------------------------------------------------------------------------------
/tests/data/prompts_debug.jsonl:
--------------------------------------------------------------------------------
1 | {"prompt": "What is the weather today?"}
2 | {"prompt": "Tell me a joke."}
3 | {"prompt": "What is the capital of France?"}
4 | {"prompt": "Who won the world series last year?"}
5 | {"prompt": "Translate 'Hello' to Spanish."}
6 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/ambv/black
 3 |   rev: 22.3.0
 4 |   hooks:
 5 |   - id: black
 6 |     types: [python]
 7 | 
 8 | - repo: https://github.com/charliermarsh/ruff-pre-commit
 9 |   rev: v0.0.243
10 |   hooks:
11 |     - id: ruff
12 | 


--------------------------------------------------------------------------------
/setup_vm.sh:
--------------------------------------------------------------------------------
 1 | apt update -y
 2 | apt install gh screen neovim nvtop -y
 3 | pip install -U poetry
 4 | gh auth login
 5 | gh repo clone jina-ai/textbook
 6 | cd textbook
 7 | poetry config virtualenvs.create false \
 8 |      && poetry install --no-interaction --no-ansi
 9 | 
10 | poetry run pip install torch
11 | 


--------------------------------------------------------------------------------
/textbook/api.py:
--------------------------------------------------------------------------------
 1 | from typer import Typer
 2 | import typer
 3 | from typing import Annotated
 4 | 
 5 | app = Typer(pretty_exceptions_enable=False)
 6 | 
 7 | 
 8 | @app.command()
 9 | def train(
10 |     local_rank: Annotated[int, typer.Option("--local_rank")] = 0,
11 | ):
12 |     print(local_rank)
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     app()
17 | 


--------------------------------------------------------------------------------
/human-eval/data/example_samples.jsonl:
--------------------------------------------------------------------------------
1 | {"task_id": "test/0", "completion": "    import subprocess\n    subprocess.check_output('rm -rf tmp')"}
2 | {"task_id": "test/0", "completion": "    import time\n    time.sleep(10)\n    return 1"}
3 | {"task_id": "test/0", "completion": "    return input('enter a number')"}
4 | {"task_id": "test/0", "completion": "    return 1"}
5 | {"task_id": "test/0", "completion": "  return 1"}
6 | {"task_id": "test/0", "completion": "\treturn 1"}
7 | 


--------------------------------------------------------------------------------
/tests/training/integration/test_train.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from textbook.train import train
 3 | 
 4 | 
 5 | @pytest.mark.parametrize("module", ["Replit", "StarCoder"])
 6 | @pytest.mark.parametrize("dataset", ["DummyDataset", "ExerciseDatast"])
 7 | def test_train(module, dataset):
 8 |     train(
 9 |         module=module,
10 |         dataset=dataset,
11 |         debug=True,
12 |         epochs=1,
13 |         micro_batch_size=1,
14 |         batch_size=1,
15 |         use_wandb=False,
16 |     )
17 | 


--------------------------------------------------------------------------------
/human-eval/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pkg_resources
 4 | from setuptools import setup, find_packages
 5 | 
 6 | 
 7 | setup(
 8 |     name="human-eval",
 9 |     py_modules=["human-eval"],
10 |     version="1.0",
11 |     description="",
12 |     author="OpenAI",
13 |     packages=find_packages(),
14 |     install_requires=[
15 |         str(r)
16 |         for r in pkg_resources.parse_requirements(
17 |             open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
18 |         )
19 |     ],
20 | )
21 | 


--------------------------------------------------------------------------------
/tests/training/units/test_dataset.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from textbook.dataset import DummyDataset, ExerciseDatast
 4 | from textbook.model import Replit
 5 | 
 6 | from transformers import PreTrainedTokenizer
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def tokenizer() -> PreTrainedTokenizer:
11 |     return Replit().tokenizer
12 | 
13 | 
14 | def test_tiny_stories(tokenizer):
15 |     DummyDataset(debug=True, tokenizer=tokenizer)
16 | 
17 | 
18 | def test_exercises_dataet(tokenizer):
19 |     ExerciseDatast(debug=True, tokenizer=tokenizer)
20 | 


--------------------------------------------------------------------------------
/tests/training/units/test_model.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from textbook.model import Replit, StarCoder
 3 | import torch
 4 | 
 5 | 
 6 | @pytest.mark.slow
 7 | def test_replit_base():
 8 |     Replit()
 9 | 
10 | 
11 | def test_replit_debug():
12 |     model = Replit(debug=True)
13 |     assert model.model.dtype == torch.float32
14 | 
15 | 
16 | @pytest.mark.slow
17 | def test_starcoer_base():
18 |     StarCoder()
19 | 
20 | 
21 | def test_starcoder_debug():
22 |     model = StarCoder(debug=True)
23 |     assert model.model.dtype == torch.float32
24 | 


--------------------------------------------------------------------------------
/tests/dataset_gen/test_integration.py:
--------------------------------------------------------------------------------
 1 | from textbook.dataset_gen.dataset_gen_cli import generate, filter
 2 | import os
 3 | 
 4 | 
 5 | def test_cli_dataset_gen(tmp_path):
 6 |     generate(
 7 |         tree_path="textbook/dataset_gen/tree/professions.json",
 8 |         leaves_path="textbook/dataset_gen/tree/subsubtopics.json",
 9 |         debug=True,
10 |         debug_speed=-1,
11 |         retries=10,
12 |         pool_size=10,
13 |         output_path=tmp_path,
14 |     )
15 | 
16 |     filter(exo_path=tmp_path, dataset_file=os.path.join(tmp_path, "dataset.jsonl"))
17 | 
18 |     assert os.path.exists(os.path.join(tmp_path, "dataset.jsonl"))
19 | 


--------------------------------------------------------------------------------
/textbook/dataset_gen/tree/professions.json:
--------------------------------------------------------------------------------
1 | ["Biologist", "Fashion Designer", "Zoologist", "Carpenter", "Jina AI Machine Learning Engineer", "Archaeologist", "Physical Therapist", "Artist", "Pilot", "Economist", "Aerospace Engineer", "Journalist", "Police Officer", "Actor/Actress", "Musician", "Historian", "Anthropologist", "Physician", "Teacher", "Software Developer", "Electrician", "Psychologist", "Geologist", "Engineer", "Social Worker", "Accountant", "Architect", "Astronomer", "Firefighter", "Civil Engineer", "Librarian", "Athlete", "Interior Designer", "Environmental Scientist", "Marketing Manager", "Mathematician", "Game Developer", "Photographer", "Veterinarian", "Chef", "Farmer", "Geographer", "Lawyer", "Linguist", "Nurse", "Dancer", "Biomedical Engineer", "Graphic Designer"]


--------------------------------------------------------------------------------
/human-eval/human_eval/evaluate_functional_correctness.py:
--------------------------------------------------------------------------------
 1 | import fire
 2 | import sys
 3 | 
 4 | from human_eval.data import HUMAN_EVAL
 5 | from human_eval.evaluation import evaluate_functional_correctness
 6 | 
 7 | 
 8 | def entry_point(
 9 |     sample_file: str,
10 |     k: str = "1,10,100",
11 |     n_workers: int = 4,
12 |     timeout: float = 3.0,
13 |     problem_file: str = HUMAN_EVAL,
14 | ):
15 |     """
16 |     Evaluates the functional correctness of generated samples, and writes
17 |     results to f"{sample_file}_results.jsonl.gz"
18 |     """
19 |     k = list(map(int, k.split(",")))
20 |     results = evaluate_functional_correctness(
21 |         sample_file, k, n_workers, timeout, problem_file
22 |     )
23 |     print(results)
24 | 
25 | 
26 | def main():
27 |     fire.Fire(entry_point)
28 | 
29 | 
30 | sys.exit(main())
31 | 


--------------------------------------------------------------------------------
/ds_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "optimizer": {
 3 |       "type": "AdamW",
 4 |       "params": {
 5 |         "lr": "auto",
 6 |         "betas": "auto",
 7 |         "eps":"auto",
 8 |         "weight_decay":"auto"
 9 |       }
10 |     },
11 |   
12 |     "scheduler": {
13 |       "type": "WarmupDecayLR",
14 |       "params": {
15 |         "total_num_steps": "auto",
16 |         "warmup_max_lr": "auto",
17 |         "warmup_num_steps": "auto"
18 |       }
19 |     },
20 |   
21 |     "zero_optimization": {
22 |       "stage": 2,
23 |       "allgather_partitions": true,
24 |       "allgather_bucket_size": 2e8,
25 |       "reduce_scatter": true,
26 |       "reduce_bucket_size": 2e8,
27 |       "overlap_comm": true,
28 |       "contiguous_gradients": true,
29 |       "cpu_offload": false
30 |     },
31 |   
32 |     "train_batch_size": "auto",
33 |     "train_micro_batch_size_per_gpu": "auto"
34 |   }


--------------------------------------------------------------------------------
/tests/eval/test_evaluation.py:
--------------------------------------------------------------------------------
 1 | from textbook import evaluate
 2 | from textbook.model import Replit
 3 | 
 4 | 
 5 | def test_evaluate(monkeypatch):
 6 |     # Define a replacement function to be used in the test
 7 |     def mock_generate_one_completion(
 8 |         model, tokenizer, prompt, max_new_tokens: int = 512
 9 |     ):
10 |         return "\n  return 1"
11 | 
12 |     # Monkey patch the 'add_numbers' function with the 'mock_add_numbers' function
13 |     monkeypatch.setattr(
14 |         evaluate, "generate_one_completion", mock_generate_one_completion
15 |     )
16 | 
17 |     replit = Replit(debug=True)
18 |     accuracy_results, results = evaluate.evaluate(
19 |         model=replit.model,
20 |         tokenizer=replit.tokenizer,
21 |         eval_file="human-eval/data/example_problem.jsonl",
22 |     )
23 | 
24 |     assert accuracy_results["pass@1"] == 1
25 |     assert results["test/0"]["passed"]
26 |     assert results["test/0"]["result"] == "passed"
27 | 


--------------------------------------------------------------------------------
/human-eval/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) OpenAI (https://openai.com)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "textbook"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Jina ai"]
 6 | license = "apache 2.0"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = "^3.9"
11 | torch = ">=2.0.1"
12 | transformers = {version = ">=4.30.2", extras = ["deepspeed"]}
13 | typer = {version = "^0.9.0", extras = ["all"]}
14 | wandb = "^0.15.3"
15 | datasets = "^2.13.1"
16 | accelerate = "^0.21.0"
17 | sentencepiece = "^0.1.99"
18 | einops = "^0.6.1"
19 | openai = "^0.27.8"
20 | pydantic = "<2.0.0"
21 | human-eval = {path = "human-eval", develop = true}
22 | 
23 | 
24 | [tool.poetry.group.dev.dependencies]
25 | pytest = ">=7.3.1"
26 | black = ">=23.3.0"
27 | isort = ">=5.12.0"
28 | ruff = ">=0.0.269"
29 | pre-commit = ">=3.3.2"
30 | jupyterlab = ">=4.0.0"
31 | jupyterlab-code-formatter = ">=2.2.1"
32 | mypy = ">=1"
33 | pytest-mock = ">=3.11.1"
34 | 
35 | [build-system]
36 | requires = ["poetry-core"]
37 | build-backend = "poetry.core.masonry.api"
38 | 
39 | [tool.ruff]
40 | ignore = ["F722"]
41 | line-length = 120
42 | 
43 | [tool.mypy]
44 | ignore_missing_imports = true
45 | 
46 | [tool.pytest.ini_options]
47 | markers = [
48 |     "slow: marks tests as slow (deselect with '-m \"not slow\"')",
49 |     "openai: need openai key ",
50 | ]


--------------------------------------------------------------------------------
/textbook/dataset_gen/filtering.py:
--------------------------------------------------------------------------------
 1 | from textbook.dataset_gen.dataset_gen import Exercise
 2 | from typing import List, Union
 3 | import os
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | def load_one_file(path: Union[Path, str]) -> List[Exercise]:
 8 |     with open(path, "r") as f:
 9 |         lines = f.readlines()
10 |     return [Exercise.parse_raw(line) for line in lines]
11 | 
12 | 
13 | def load_all_exo(path: Union[Path, str]) -> List[Exercise]:
14 |     if isinstance(path, str):
15 |         path = Path(path)
16 |     exos: List[Exercise] = []
17 |     for sub_dir in os.listdir(path):
18 |         for fn in os.listdir(path / sub_dir):
19 |             exos += load_one_file(path / sub_dir / fn)
20 |     return exos
21 | 
22 | 
23 | def filter_bad_exos(
24 |     exos: List[Exercise], carac_to_remove=["??", "___"]
25 | ) -> List[Exercise]:
26 |     clean_exos: List[Exercise] = []
27 |     for exo in exos:
28 |         keep = True
29 |         for carac in carac_to_remove:
30 |             if carac in exo.solution:
31 |                 keep = False
32 |                 break
33 | 
34 |         if keep:
35 |             clean_exos.append(exo)
36 | 
37 |     return clean_exos
38 | 
39 | 
40 | def remove_extra(exos: List[Exercise], carac_to_split=["# Test", "```"]):
41 |     for exo in exos:
42 |         for carac in carac_to_split:
43 |             exo.solution = exo.solution.split(carac)[0]
44 | 
45 | 
46 | def load_and_filter_exos(path: Union[Path, str]) -> List[Exercise]:
47 |     exos = load_all_exo(path)
48 |     print(len(exos))
49 |     clean_exos = filter_bad_exos(exos)
50 |     print(len(clean_exos))
51 | 
52 |     remove_extra(clean_exos)
53 |     return clean_exos
54 | 


--------------------------------------------------------------------------------
/human-eval/human_eval/data.py:
--------------------------------------------------------------------------------
 1 | from typing import Iterable, Dict
 2 | import gzip
 3 | import json
 4 | import os
 5 | 
 6 | 
 7 | ROOT = os.path.dirname(os.path.abspath(__file__))
 8 | HUMAN_EVAL = os.path.join(ROOT, "..", "data", "HumanEval.jsonl.gz")
 9 | 
10 | 
11 | def read_problems(evalset_file: str = HUMAN_EVAL) -> Dict[str, Dict]:
12 |     return {task["task_id"]: task for task in stream_jsonl(evalset_file)}
13 | 
14 | 
15 | def stream_jsonl(filename: str) -> Iterable[Dict]:
16 |     """
17 |     Parses each jsonl line and yields it as a dictionary
18 |     """
19 |     if filename.endswith(".gz"):
20 |         with open(filename, "rb") as gzfp:
21 |             with gzip.open(gzfp, "rt") as fp:
22 |                 for line in fp:
23 |                     if any(not x.isspace() for x in line):
24 |                         yield json.loads(line)
25 |     else:
26 |         with open(filename, "r") as fp:
27 |             for line in fp:
28 |                 if any(not x.isspace() for x in line):
29 |                     yield json.loads(line)
30 | 
31 | 
32 | def write_jsonl(filename: str, data: Iterable[Dict], append: bool = False):
33 |     """
34 |     Writes an iterable of dictionaries to jsonl
35 |     """
36 |     if append:
37 |         mode = "ab"
38 |     else:
39 |         mode = "wb"
40 |     filename = os.path.expanduser(filename)
41 |     if filename.endswith(".gz"):
42 |         with open(filename, mode) as fp:
43 |             with gzip.GzipFile(fileobj=fp, mode="wb") as gzfp:
44 |                 for x in data:
45 |                     gzfp.write((json.dumps(x) + "\n").encode("utf-8"))
46 |     else:
47 |         with open(filename, mode) as fp:
48 |             for x in data:
49 |                 fp.write((json.dumps(x) + "\n").encode("utf-8"))
50 | 


--------------------------------------------------------------------------------
/textbook/model.py:
--------------------------------------------------------------------------------
 1 | from typing import Protocol
 2 | from transformers import (
 3 |     AutoTokenizer,
 4 |     PreTrainedTokenizer,
 5 |     AutoConfig,
 6 |     PreTrainedModel,
 7 |     AutoModelForCausalLM,
 8 |     GPTBigCodeConfig,
 9 | )
10 | 
11 | 
12 | class BaseModule(Protocol):
13 |     tokenizer: PreTrainedTokenizer
14 |     model: PreTrainedModel
15 | 
16 |     def __init__(self, debug: bool = False):
17 |         ...
18 | 
19 | 
20 | class Replit:
21 |     tokenizer: PreTrainedTokenizer
22 |     model: PreTrainedModel
23 |     base_model = "replit/replit-code-v1-3b"
24 | 
25 |     config = AutoConfig.from_pretrained(
26 |         "replit/replit-code-v1-3b",
27 |         trust_remote_code=True,
28 |         init_device="cuda",
29 |     )
30 | 
31 |     debug_config = AutoConfig.from_pretrained(
32 |         "replit/replit-code-v1-3b",
33 |         trust_remote_code=True,
34 |         init_device="cuda",
35 |         n_layers=1,
36 |     )
37 | 
38 |     def __init__(self, debug: bool = False):
39 |         self._init_tokenizer()
40 |         self.model = AutoModelForCausalLM.from_pretrained(
41 |             self.base_model,
42 |             config=self.config if not debug else self.debug_config,
43 |             trust_remote_code=True,
44 |         )
45 | 
46 |     def _init_tokenizer(self):
47 |         self.tokenizer = AutoTokenizer.from_pretrained(
48 |             self.base_model, trust_remote_code=True
49 |         )
50 |         self.tokenizer.pad_token = self.tokenizer.eos_token
51 | 
52 | 
53 | class StarCoder:
54 |     tokenizer: PreTrainedTokenizer
55 |     model: PreTrainedModel
56 |     base_model = "bigcode/starcoderbase-1b"
57 |     config = GPTBigCodeConfig.from_pretrained(
58 |         "bigcode/starcoderbase-1b",
59 |         init_device="cuda",
60 |     )
61 | 
62 |     debug_config = GPTBigCodeConfig.from_pretrained(
63 |         "bigcode/starcoderbase-1b",
64 |         init_device="cuda",
65 |         n_layer=1,
66 |     )
67 | 
68 |     def __init__(self, debug: bool = False):
69 |         self._init_tokenizer()
70 |         if debug:
71 |             self.model = AutoModelForCausalLM.from_pretrained(
72 |                 self.base_model,
73 |                 config=self.debug_config,
74 |             )
75 |         else:
76 |             self.model = AutoModelForCausalLM.from_pretrained(self.base_model).to(
77 |                 "cuda"
78 |             )
79 | 
80 |     def _init_tokenizer(self):
81 |         self.tokenizer = AutoTokenizer.from_pretrained(
82 |             self.base_model,
83 |         )
84 |         self.tokenizer.pad_token = self.tokenizer.eos_token
85 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | wandb/*
  2 | # Initially taken from Github's Python gitignore file
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | tests/unit/array/jina*.db*
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | docs/.python-version
 86 | 
 87 | # celery beat schedule file
 88 | celerybeat-schedule
 89 | 
 90 | # SageMath parsed files
 91 | *.sage.py
 92 | 
 93 | # Environments
 94 | .venv
 95 | env/
 96 | venv/
 97 | ENV/
 98 | env.bak/
 99 | venv.bak/
100 | 
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # mkdocs documentation
109 | /site
110 | 
111 | # mypy
112 | .mypy_cache/
113 | .dmypy.json
114 | dmypy.json
115 | 
116 | # Pyre type checker
117 | .pyre/
118 | .idea/
119 | toy*.py
120 | .DS_Store
121 | post/
122 | toy*.ipynb
123 | *.c
124 | .nes_cache
125 | toy*.yml
126 | *.tmp
127 | 
128 | shell/jina-wizard.sh
129 | /junit/
130 | /tests/junit/
131 | /docs/chapters/proto/docs.md
132 | /tests/.pytest-kind
133 | 
134 | # IntelliJ IDEA
135 | *.iml
136 | .idea
137 | 
138 | # VSCode
139 | .vscode
140 | 
141 | # test with config in resources
142 | tests/integration/crud/simple/simple_indexer/
143 | 
144 | # latency tracking
145 | latency
146 | MyIndexer/
147 | MyMemMap/
148 | original/
149 | output/
150 | 
151 | # kubernetes testing
152 | .pytest-kind
153 | .kube
154 | 
155 | *.ipynb


--------------------------------------------------------------------------------
/textbook/dataset_gen/tree/topics.csv:
--------------------------------------------------------------------------------
 1 | Topic,Use,Mixing,
 2 | 1. Introduction to Python Programming,0,,
 3 | 2. Python Data Types and Variables,1,1,
 4 | 3. Python Basic Operators,1,1,
 5 | 4. Control Structures in Python,1,1,
 6 | 5. Python Loops,1,1,
 7 | 6. Python Data Structures: Lists and Tuples,1,1,
 8 | 7. Python Data Structures: Sets and Frozensets,1,1,
 9 | 8. Python Data Structures: Dictionaries,1,1,
10 | 9. Functions in Python,1,1,
11 | 10. Python Built-In Functions,1,1,
12 | 11. Understanding Python Scope (Global vs Local),0,,
13 | 12. Python Generators and Iterators,1,1,
14 | 13. Python List Comprehensions,1,1,
15 | 14. Python Lambda Functions,1,1,
16 | 15. Python Classes and Objects,1,1,
17 | 16. Inheritance and Polymorphism in Python,1,1,
18 | 17. Python Decorators,1,1,
19 | 18. Python Exception Handling,1,1,
20 | 19. File I/O in Python,0,,
21 | 20. Recursion in Python,1,1,
22 | 21. Introduction to Algorithm Complexity and Big O Notation,1,1,
23 | "22. Basic Sorting Algorithms: Bubble, Selection, Insertion Sort",1,1,
24 | "23. Advanced Sorting Algorithms: Quick Sort, Merge Sort, Heap Sort",1,1,
25 | 24. Searching Algorithms: Linear and Binary Search,1,1,
26 | 25. Hashing and Hash Tables,1,1,
27 | 26. Data Structures: Stacks and Queues,1,1,
28 | 27. Data Structures: Linked Lists,1,1,
29 | "28. Data Structures: Trees (Binary Trees, BSTs, Heaps)",1,1,
30 | 29. Data Structures: Graphs,1,0,
31 | "30. Tree Traversal Algorithms: Preorder, Inorder, Postorder",1,0,
32 | "31. Graph Algorithms: Breadth-First Search, Depth-First Search",1,0,
33 | "32. Graph Algorithms: Dijkstra’s Algorithm, Bellman-Ford Algorithm",1,0,
34 | "33. Graph Algorithms: Kruskal’s, Prim's Algorithm",1,0,
35 | 34. Dynamic Programming in Python,1,0,
36 | 35. Greedy Algorithms in Python,1,1,
37 | 36. Backtracking Algorithms in Python,1,0,
38 | "37. String Algorithms: Pattern Searching, Palindromes, Anagrams",1,1,
39 | "38. Number Theory Algorithms: GCD, Prime Numbers",1,0,
40 | 39. Python and Databases,0,,
41 | 40. Understanding Python's json and csv Modules,0,,
42 | 41. Python's datetime Module,0,,
43 | 42. Python's math and random Modules,1,1,
44 | "43. Python Testing: Unit Tests, DocTests",0,,
45 | 44. Python Debugging Techniques,0,,
46 | 45. Profiling and Optimizing Python,0,,
47 | "46. Concurrency in Python: Multithreading, Multiprocessing, asyncio",1,1,
48 | 47. Networking with Python's socket Module,0,,
49 | 48. Python Coding Standards (PEP8) and Code Linting,0,,
50 | 49. Refactoring Python Code,0,,
51 | 50. Understanding the Python GIL,0,,
52 | 51. Immutable Data Structures in Python,1,1,
53 | 52. Understanding Python Metaclasses,0,,
54 | 53. Python's Enumerations (Enum),1,1,
55 | "54. Understanding Python's ""with"" statement",1,1,
56 | 55. Packaging and Distributing Python Applications,0,0,
57 | 56. Python's garbage collection and memory management,1,1,
58 | 57. Understanding the Python Standard Library,1,1,
59 | "58. Understanding __name__ and ""__main__"" in Python",0,,
60 | 59. A Look into Python's Future: What's new in Python 4?,0,,
61 | 60. Final Project: Implementing a complex algorithm or data structure from scratch,0,,
62 | ,,,42


--------------------------------------------------------------------------------
/human-eval/human_eval/evaluation.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict, Counter
  2 | from concurrent.futures import ThreadPoolExecutor, as_completed
  3 | from typing import List, Union, Dict, Optional
  4 | import itertools
  5 | 
  6 | import numpy as np
  7 | import tqdm
  8 | 
  9 | from human_eval.data import HUMAN_EVAL, read_problems, stream_jsonl, write_jsonl
 10 | from human_eval.execution import check_correctness
 11 | 
 12 | 
 13 | def estimate_pass_at_k(
 14 |     num_samples: Union[int, List[int], np.ndarray],
 15 |     num_correct: Union[List[int], np.ndarray],
 16 |     k: int,
 17 | ) -> np.ndarray:
 18 |     """
 19 |     Estimates pass@k of each problem and returns them in an array.
 20 |     """
 21 | 
 22 |     def estimator(n: int, c: int, k: int) -> float:
 23 |         """
 24 |         Calculates 1 - comb(n - c, k) / comb(n, k).
 25 |         """
 26 |         if n - c < k:
 27 |             return 1.0
 28 |         return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))
 29 | 
 30 |     if isinstance(num_samples, int):
 31 |         num_samples_it = itertools.repeat(num_samples, len(num_correct))
 32 |     else:
 33 |         assert len(num_samples) == len(num_correct)
 34 |         num_samples_it = iter(num_samples)
 35 | 
 36 |     return np.array(
 37 |         [estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)]
 38 |     )
 39 | 
 40 | 
 41 | def evaluate_functional_correctness(
 42 |     sample_file: str,
 43 |     k: List[int] = [1, 10, 100],
 44 |     n_workers: int = 4,
 45 |     timeout: float = 3.0,
 46 |     problem_file: str = HUMAN_EVAL,
 47 |     problems: Optional[Dict] = None,
 48 | ):
 49 |     """
 50 |     Evaluates the functional correctness of generated samples, and writes
 51 |     results to f"{sample_file}_results.jsonl.gz"
 52 |     """
 53 | 
 54 |     problems = problems or read_problems(problem_file)
 55 | 
 56 |     # Check the generated samples against test suites.
 57 |     with ThreadPoolExecutor(max_workers=n_workers) as executor:
 58 |         futures = []
 59 |         completion_id = Counter()
 60 |         n_samples = 0
 61 |         results = defaultdict(list)
 62 | 
 63 |         print("Reading samples...")
 64 |         for sample in tqdm.tqdm(stream_jsonl(sample_file)):
 65 |             task_id = sample["task_id"]
 66 |             completion = sample["completion"]
 67 |             args = (problems[task_id], completion, timeout, completion_id[task_id])
 68 |             future = executor.submit(check_correctness, *args)
 69 |             futures.append(future)
 70 |             completion_id[task_id] += 1
 71 |             n_samples += 1
 72 | 
 73 |         assert len(completion_id) == len(problems), "Some problems are not attempted."
 74 | 
 75 |         print("Running test suites...")
 76 |         for future in tqdm.tqdm(as_completed(futures), total=len(futures)):
 77 |             result = future.result()
 78 |             results[result["task_id"]].append((result["completion_id"], result))
 79 | 
 80 |     # Calculate pass@k.
 81 |     total, correct = [], []
 82 |     for result in results.values():
 83 |         result.sort()
 84 |         passed = [r[1]["passed"] for r in result]
 85 |         total.append(len(passed))
 86 |         correct.append(sum(passed))
 87 |     total = np.array(total)
 88 |     correct = np.array(correct)
 89 | 
 90 |     ks = k
 91 |     pass_at_k = {
 92 |         f"pass@{k}": estimate_pass_at_k(total, correct, k).mean()
 93 |         for k in ks
 94 |         if (total >= k).all()
 95 |     }
 96 | 
 97 |     # Finally, save the results in one file:
 98 |     def combine_results():
 99 |         for sample in stream_jsonl(sample_file):
100 |             task_id = sample["task_id"]
101 |             result = results[task_id].pop(0)
102 |             sample["result"] = result[1]["result"]
103 |             sample["passed"] = result[1]["passed"]
104 |             yield sample
105 | 
106 |     out_file = sample_file + "_results.jsonl"
107 |     print(f"Writing results to {out_file}...")
108 |     write_jsonl(out_file, tqdm.tqdm(combine_results(), total=n_samples))
109 | 
110 |     return pass_at_k
111 | 


--------------------------------------------------------------------------------
/textbook/dataset.py:
--------------------------------------------------------------------------------
  1 | from typing import Protocol, Optional
  2 | import random
  3 | 
  4 | from datasets import Dataset, load_dataset
  5 | from transformers import (
  6 |     PreTrainedTokenizer,
  7 |     DataCollatorForLanguageModeling,
  8 |     DataCollatorForSeq2Seq,
  9 | )
 10 | from transformers.data.data_collator import DataCollatorMixin
 11 | 
 12 | 
 13 | class CustomDataset(Protocol):
 14 |     train_dataset: Dataset
 15 |     test_dataset: Dataset
 16 |     data_collator: DataCollatorMixin
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         tokenizer: PreTrainedTokenizer,
 21 |         debug: bool = False,
 22 |         dataset_name: Optional[str] = None,
 23 |     ):
 24 |         ...
 25 | 
 26 | 
 27 | class DummyDataset:
 28 |     @staticmethod
 29 |     def gen(n: int = 100_000, upper_bound: int = 512):
 30 |         for _ in range(n):
 31 |             random_integer = random.randint(1, upper_bound)
 32 |             yield {"text": "hello world" * random_integer}
 33 | 
 34 |     def __init__(self, tokenizer: PreTrainedTokenizer, debug: bool = False, **kwargs):
 35 |         self.debug = debug
 36 | 
 37 |         dataset = Dataset.from_generator(self.gen)
 38 | 
 39 |         if debug:
 40 |             dataset = dataset.select(range(10))
 41 | 
 42 |         split_dataset = dataset.train_test_split(test_size=0.1)
 43 | 
 44 |         self.train_dataset = split_dataset["train"]
 45 |         self.test_dataset = split_dataset["test"]
 46 | 
 47 |         self.train_dataset = self.train_dataset.map(
 48 |             self._get_preprocess_fn(tokenizer),
 49 |             batched=True,
 50 |             num_proc=4,
 51 |             remove_columns=self.train_dataset.column_names,
 52 |         )
 53 | 
 54 |         self.test_dataset = self.test_dataset.map(
 55 |             self._get_preprocess_fn(tokenizer),
 56 |             batched=True,
 57 |             num_proc=4,
 58 |             remove_columns=self.test_dataset.column_names,
 59 |         )
 60 | 
 61 |         self.data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
 62 | 
 63 |     @staticmethod
 64 |     def _get_preprocess_fn(tokenizer: PreTrainedTokenizer):
 65 |         def tokenize_fn(input):
 66 |             return tokenizer(
 67 |                 input["text"],
 68 |             )
 69 | 
 70 |         return tokenize_fn
 71 | 
 72 | 
 73 | class ExerciseDatast:
 74 |     def __init__(
 75 |         self,
 76 |         tokenizer: PreTrainedTokenizer,
 77 |         dataset_name: str = "jinaai/code_exercises_40k",
 78 |         debug: bool = False,
 79 |     ):
 80 |         self.debug = debug
 81 | 
 82 |         dataset = load_dataset(dataset_name)["train"]
 83 | 
 84 |         if debug:
 85 |             dataset = dataset.select(range(10))
 86 | 
 87 |         split_dataset = dataset.train_test_split(test_size=0.1)
 88 | 
 89 |         self.train_dataset = split_dataset["train"]
 90 |         self.test_dataset = split_dataset["test"]
 91 | 
 92 |         self.train_dataset = self.train_dataset.map(
 93 |             self._get_preprocess_fn(tokenizer),
 94 |             batched=False,
 95 |             num_proc=4,
 96 |             remove_columns=self.train_dataset.column_names,
 97 |         )
 98 | 
 99 |         self.test_dataset = self.test_dataset.map(
100 |             self._get_preprocess_fn(tokenizer),
101 |             batched=False,
102 |             num_proc=4,
103 |             remove_columns=self.test_dataset.column_names,
104 |         )
105 | 
106 |         self.data_collator = DataCollatorForSeq2Seq(
107 |             tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
108 |         )
109 | 
110 |     @staticmethod
111 |     def _get_preprocess_fn(tokenizer: PreTrainedTokenizer):
112 |         def tokenize_fn(input):
113 |             input_problem = input["problem"]
114 |             input_solution = input["solution"]
115 | 
116 |             inputs = tokenizer(input_problem)
117 |             targets = tokenizer(input_solution)
118 |             inputs["labels"] = [-100] * len(inputs["input_ids"]) + targets[
119 |                 "input_ids"
120 |             ]  # we don't train on the problem tokens
121 |             inputs["input_ids"] = inputs["input_ids"] + targets["input_ids"]
122 |             inputs["attention_mask"] = (
123 |                 inputs["attention_mask"] + targets["attention_mask"]
124 |             )
125 | 
126 |             return inputs
127 | 
128 |         return tokenize_fn
129 | 


--------------------------------------------------------------------------------
/textbook/dataset_gen/dataset_gen_cli.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import itertools
  3 | import json
  4 | from typer import Typer
  5 | from typing import List
  6 | from textbook.dataset_gen.dataset_gen import (
  7 |     load_leaves,
  8 |     mass_generation,
  9 |     OpenAIGenerator,
 10 |     MonkeyGenerator,
 11 |     write_results_to_jsonl,
 12 | )
 13 | import openai
 14 | import os
 15 | from pathlib import Path
 16 | 
 17 | from textbook.dataset_gen.create_prompts import Topic, Query
 18 | from textbook.dataset_gen.filtering import load_and_filter_exos
 19 | from datasets import Dataset
 20 | 
 21 | app = Typer()
 22 | 
 23 | 
 24 | def create_prompt_query(topic_1: Topic, topic_2: Topic, profession: str) -> str:
 25 |     query = f'''
 26 |             Create a code completion exercise on the intersection of “{topic_1.topic}” and “{topic_2.topic}”.  
 27 |             Write it for a {profession}. 
 28 | 
 29 |             The exercise must be of the style: 
 30 | 
 31 |             ```
 32 |             def name(args):
 33 | 
 34 |             """Docstring explaining the exercise"""
 35 | 
 36 |             python code to solve the exercise
 37 |             ```
 38 | 
 39 |             NO CLASSES
 40 | 
 41 |             MAKE IT VERY DIFFICULT
 42 |             '''
 43 |     query = "\n".join([m.lstrip() for m in query.strip().split("\n")])
 44 |     return query
 45 | 
 46 | 
 47 | def create_prompts(
 48 |     topic: Topic,
 49 |     combination_options: List[Topic],
 50 |     professions: List[str],
 51 | ) -> List[Query]:
 52 |     random.shuffle(combination_options)
 53 | 
 54 |     prompts: List[Query] = []
 55 | 
 56 |     for loc_topic in combination_options:
 57 |         if (
 58 |             loc_topic.mixing
 59 |             and loc_topic.parent != topic.parent
 60 |             and loc_topic.topic != topic.topic
 61 |         ):
 62 |             for profession in professions:
 63 |                 query = create_prompt_query(topic, loc_topic, profession)
 64 |                 prompts.append(Query(query=query, topic_1=topic, topic_2=loc_topic))
 65 | 
 66 |     return prompts
 67 | 
 68 | 
 69 | @app.command()
 70 | def generate(
 71 |     tree_path: str,
 72 |     leaves_path: str,
 73 |     output_path: str,
 74 |     retries: int = 10,
 75 |     pool_size: int = 10,
 76 |     debug: bool = False,
 77 |     debug_speed: int = 2,
 78 |     n_prompts: int = 100,
 79 | ):
 80 |     with open(tree_path, "r") as openfile:
 81 |         # Reading from json file
 82 |         professions = list(json.load(openfile))
 83 | 
 84 |     if not os.path.exists(output_path):
 85 |         os.makedirs(output_path)
 86 | 
 87 |     if not debug:
 88 |         openai.api_key = os.environ["OPENAI_API_KEY"]
 89 | 
 90 |         def get_generator():
 91 |             return OpenAIGenerator()
 92 | 
 93 |     else:
 94 | 
 95 |         def get_generator():
 96 |             return MonkeyGenerator(speed=debug_speed)
 97 | 
 98 |     leaves = load_leaves(leaves_path)
 99 |     prompts: List[List[Query]] = [
100 |         create_prompts(
101 |             i,
102 |             combination_options=leaves,
103 |             professions=professions,
104 |         )
105 |         for i in leaves
106 |     ]
107 | 
108 |     prompts_flat = list(itertools.chain(*prompts))
109 |     if n_prompts > len(prompts_flat):
110 |         raise ValueError(
111 |             f"Cannot generate({n_prompts}) prompts because it is larger than the number of"
112 |             f" available prompts ({len(prompts_flat)})"
113 |         )
114 |     prompts_selection = [i.query for i in prompts_flat]
115 | 
116 |     print(f"prompts: {len(prompts_selection)}")
117 | 
118 |     solo_prompts = list(set(prompts_selection))
119 | 
120 |     print(f"solo prompts: {len(solo_prompts)}")
121 |     prompts_selection = solo_prompts[:n_prompts]
122 |     print(f"total prompts: {len(prompts_selection)}")
123 | 
124 |     mass_generation(
125 |         prompts_selection,
126 |         get_generator,
127 |         save_dir=output_path,
128 |         pool_size=pool_size,
129 |         retries=retries,
130 |     )
131 | 
132 | 
133 | @app.command()
134 | def filter(exo_path: Path, dataset_file: str):
135 |     print(exo_path)
136 |     exos = load_and_filter_exos(exo_path)
137 |     print(len(exos))
138 |     write_results_to_jsonl(dataset_file, exos)
139 | 
140 | 
141 | @app.command()
142 | def push(repo_name: str, dataset_file: Path):
143 |     with open(dataset_file, "r") as file:
144 |         lines = file.readlines()
145 |         exercises = [json.loads(line) for line in lines]
146 | 
147 |     def gen():
148 |         for exo in exercises:
149 |             yield exo
150 | 
151 |     dataset = Dataset.from_generator(gen)
152 |     dataset.push_to_hub(repo_name)
153 | 
154 | 
155 | if __name__ == "__main__":
156 |     app()
157 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   pull_request:
  5 |     types: [opened, synchronize, reopened]
  6 |   push:
  7 |     branches:
  8 |       - main
  9 | 
 10 | jobs:
 11 |   lint-ruff:
 12 |     runs-on: ubuntu-20.04
 13 |     steps:
 14 |       - uses: actions/checkout@v3
 15 |       - name: Set up Python 3.9
 16 |         uses: actions/setup-python@v4
 17 |         with:
 18 |           python-version: 3.9
 19 |       - name: Lint with ruff
 20 |         run: |
 21 |           python -m pip install --upgrade pip
 22 |           python -m pip install poetry
 23 |           poetry install
 24 | 
 25 |           # stop the build if there are Python syntax errors or undefined names
 26 |           poetry run ruff .
 27 |           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
 28 |           poetry run ruff .
 29 | 
 30 |   check-black:
 31 |     runs-on: ubuntu-20.04
 32 |     steps:
 33 |       - uses: actions/checkout@v3
 34 |       - name: Set up Python 3.9
 35 |         uses: actions/setup-python@v4
 36 |         with:
 37 |           python-version: 3.9
 38 |       - name: check black
 39 |         run: |
 40 |           python -m pip install --upgrade pip
 41 |           python -m pip install poetry
 42 |           poetry install --only dev
 43 |           poetry run black --check .
 44 | 
 45 |         env:
 46 |           CHANGED_FILES: ${{ steps.file_changes.outputs.added_modified }}
 47 | 
 48 |   check-mypy:
 49 |     runs-on: ubuntu-20.04
 50 |     steps:
 51 |       - uses: actions/checkout@v2.5.0
 52 |       - name: Set up Python 3.9
 53 |         uses: actions/setup-python@v4
 54 |         with:
 55 |           python-version: 3.9
 56 |       - name: check mypy
 57 |         run: |
 58 |           python -m pip install --upgrade pip
 59 |           python -m pip install poetry
 60 |           poetry install --all-extras
 61 |           poetry run mypy textbook
 62 | 
 63 | # run all tests
 64 |   run-training-test:
 65 |     needs: [check-black, lint-ruff]
 66 |     runs-on: [self-hosted, x64, gpu, linux]
 67 |     strategy:
 68 |       fail-fast: false
 69 |     steps:
 70 |       - uses: actions/checkout@v3
 71 |       - name: Set up Python 3.9
 72 |         uses: actions/setup-python@v4
 73 |         with:
 74 |           python-version: 3.9
 75 |       - name: Prepare environment
 76 |         run: |
 77 |           python -m pip install --upgrade pip
 78 |           python -m pip install -U poetry
 79 |           poetry install
 80 |           poetry run pip install torch
 81 |           poetry run huggingface-cli login --token $HF_AUTH_TOKEN
 82 | 
 83 | 
 84 |       - name: Test
 85 |         id: test
 86 |         run: |
 87 |           poetry run pytest tests/training -m "not slow"
 88 |         timeout-minutes: 30
 89 |     env:
 90 |       HF_AUTH_TOKEN: ${{ secrets.HF_AUTH_TOKEN }}
 91 | 
 92 |   run-dataset-gen-test:
 93 |     needs: [check-black, lint-ruff]
 94 |     runs-on: ubuntu-20.04
 95 |     strategy:
 96 |       fail-fast: false
 97 |     steps:
 98 |       - uses: actions/checkout@v3
 99 |       - name: Set up Python 3.9
100 |         uses: actions/setup-python@v4
101 |         with:
102 |           python-version: 3.9
103 |       - name: Prepare environment
104 |         run: |
105 |           python -m pip install --upgrade pip
106 |           python -m pip install -U poetry
107 |           poetry install
108 |           poetry run pip install torch
109 | 
110 |       - name: Test
111 |         id: test
112 |         run: |
113 |           poetry run pytest tests/dataset_gen -m "not slow and not openai"
114 |         timeout-minutes: 30
115 | 
116 |   run-evaluation-test:
117 |     needs: [check-black, lint-ruff]
118 |     runs-on: [self-hosted, x64, gpu, linux]
119 |     strategy:
120 |       fail-fast: false
121 |     steps:
122 |       - uses: actions/checkout@v3
123 |       - name: Set up Python 3.9
124 |         uses: actions/setup-python@v4
125 |         with:
126 |           python-version: 3.9
127 |       - name: Prepare environment
128 |         run: |
129 |           python -m pip install --upgrade pip
130 |           python -m pip install -U poetry
131 |           poetry install
132 |           poetry run pip install torch
133 |           poetry run huggingface-cli login --token $HF_AUTH_TOKEN
134 | 
135 | 
136 |       - name: Test
137 |         id: test
138 |         run: |
139 |           poetry run pytest tests/eval
140 |         timeout-minutes: 10
141 |     env:
142 |       HF_AUTH_TOKEN: ${{ secrets.HF_AUTH_TOKEN }}
143 | 
144 | # just for blocking the merge until all parallel core-test are successful
145 |   success-all-test:
146 |     needs: [check-mypy, run-training-test, run-dataset-gen-test, check-black, lint-ruff]
147 |     if: always()
148 |     runs-on: ubuntu-20.04
149 |     steps:
150 |       - uses: technote-space/workflow-conclusion-action@v2
151 |       - name: Check Failure
152 |         if: env.WORKFLOW_CONCLUSION == 'failure'
153 |         run: exit 1
154 |       - name: Success
155 |         if: ${{ success() }}
156 |         run: echo "All Done"
157 | 


--------------------------------------------------------------------------------
/textbook/evaluate.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import tempfile
  3 | from typing import Optional, Union, List
  4 | 
  5 | import torch
  6 | from transformers import (
  7 |     PreTrainedTokenizer,
  8 |     PreTrainedModel,
  9 |     StoppingCriteria,
 10 |     StoppingCriteriaList,
 11 | )
 12 | from human_eval.data import write_jsonl, read_problems, HUMAN_EVAL
 13 | from human_eval.evaluation import evaluate_functional_correctness
 14 | 
 15 | if torch.cuda.is_available():
 16 |     device = "cuda"
 17 | else:
 18 |     device = "cpu"
 19 | 
 20 | STOP_WORDS = ["\nclass", "\ndef", "\n@", "\nprint", "\nif", "\n#"]
 21 | 
 22 | 
 23 | class EndOfFunctionCriteria(StoppingCriteria):
 24 |     """Custom `StoppingCriteria` which checks if all generated functions in the batch are completed."""
 25 | 
 26 |     def __init__(self, tokenizer, start_length=0):
 27 |         self.start_length = start_length
 28 |         self.tokenizer = tokenizer
 29 | 
 30 |     def __call__(self, input_ids, scores, **kwargs):
 31 |         """Returns true if all generated sequences contain any of the end-of-function strings."""
 32 |         decoded_generations = self.tokenizer.batch_decode(
 33 |             input_ids[:, self.start_length :]
 34 |         )
 35 |         done = []
 36 |         for decoded_generation in decoded_generations:
 37 |             done.append(
 38 |                 any([stop_string in decoded_generation for stop_string in STOP_WORDS])
 39 |             )
 40 |         return all(done)
 41 | 
 42 | 
 43 | def _stop_at_stop_token(decoded_string, stop_tokens):
 44 |     """
 45 |     Produces the prefix of decoded_string that ends at the first occurrence of
 46 |     a stop_token.
 47 |     WARNING: the decoded_string *must not* include the prompt, which may have stop tokens
 48 |     itself.
 49 |     """
 50 |     min_stop_index = len(decoded_string)
 51 |     for stop_token in stop_tokens:
 52 |         stop_index = decoded_string.find(stop_token)
 53 |         if stop_index != -1 and stop_index < min_stop_index:
 54 |             min_stop_index = stop_index
 55 |     return decoded_string[:min_stop_index]
 56 | 
 57 | 
 58 | def read_jsonl_file(file_path):
 59 |     data = []
 60 |     with open(file_path, "r") as f:
 61 |         for line in f:
 62 |             json_data = json.loads(line)
 63 |             data.append(json_data)
 64 |     return data
 65 | 
 66 | 
 67 | def generate_one_completion(
 68 |     model: PreTrainedModel,
 69 |     tokenizer: PreTrainedTokenizer,
 70 |     prompt: str,
 71 |     max_new_tokens: int = 512,
 72 | ) -> List[str]:
 73 |     inputs = tokenizer(prompt.rstrip(), return_tensors="pt").to("cuda")
 74 |     stopping_criteria = StoppingCriteriaList(
 75 |         [EndOfFunctionCriteria(tokenizer, start_length=len(inputs["input_ids"][0]))]
 76 |     )
 77 |     generation_output = model.generate(
 78 |         **inputs,
 79 |         max_new_tokens=max_new_tokens,
 80 |         eos_token_id=tokenizer.eos_token_id,
 81 |         return_dict_in_generate=True,
 82 |         stopping_criteria=stopping_criteria,
 83 |         # do_sample=True,
 84 |         # temperature=0.2,
 85 |         # top_k=0,
 86 |         # top_p=0.95
 87 |     )
 88 | 
 89 |     s = generation_output.sequences[0]
 90 |     output = tokenizer.decode(s, skip_special_tokens=True)
 91 |     generation = output[len(prompt) :]
 92 |     generation = prompt + _stop_at_stop_token(generation, STOP_WORDS)
 93 |     return generation
 94 | 
 95 | 
 96 | def evaluate(
 97 |     model: Union[torch.nn.Module, PreTrainedModel],
 98 |     tokenizer: PreTrainedTokenizer,
 99 |     prompt_template: str = "{prompt}",
100 |     eval_file: str = HUMAN_EVAL,
101 |     eval_size: Optional[int] = None,
102 |     max_new_tokens: int = 512,
103 | ):
104 |     model.eval()
105 |     problems = read_problems(evalset_file=eval_file)
106 |     eval_size = eval_size or len(list(problems.items()))
107 |     problems = dict(list(problems.items())[:eval_size])
108 | 
109 |     # since k=1, no need for more samples
110 |     num_samples_per_task = 1
111 |     samples = [
112 |         dict(
113 |             task_id=task_id,
114 |             completion=generate_one_completion(
115 |                 model,
116 |                 tokenizer,
117 |                 prompt_template.format(prompt=problems[task_id]["prompt"]),
118 |                 max_new_tokens=max_new_tokens,
119 |             ),
120 |         )
121 |         for task_id in problems
122 |         for _ in range(num_samples_per_task)
123 |     ]
124 |     with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
125 |         write_jsonl(temp_file.name, samples)
126 | 
127 |         accuracy_results = evaluate_functional_correctness(
128 |             temp_file.name, k=[1], problem_file=eval_file, problems=problems
129 |         )
130 |         sample_results = read_jsonl_file(f"{temp_file.name}_results.jsonl")
131 | 
132 |     # merge results and problems
133 |     results = {
134 |         item["task_id"]: {**item, **problems[item["task_id"]]}
135 |         for item in sample_results
136 |     }
137 | 
138 |     return accuracy_results, results
139 | 


--------------------------------------------------------------------------------
/human-eval/README.md:
--------------------------------------------------------------------------------
  1 | # HumanEval: Hand-Written Evaluation Set 
  2 | 
  3 | This is an evaluation harness for the HumanEval problem solving dataset
  4 | described in the paper "[Evaluating Large Language Models Trained on
  5 | Code](https://arxiv.org/abs/2107.03374)".
  6 | 
  7 | ## Installation
  8 | 
  9 | Make sure to use python 3.7 or later:
 10 | ```
 11 | $ conda create -n codex python=3.7
 12 | $ conda activate codex
 13 | ```
 14 | 
 15 | Check out and install this repository:
 16 | ```
 17 | $ git clone https://github.com/openai/human-eval
 18 | $ pip install -e human-eval
 19 | ```
 20 | 
 21 | ## Usage
 22 | 
 23 | **This program exists to run untrusted model-generated code. Users are strongly
 24 | encouraged not to do so outside of a robust security sandbox. The [execution
 25 | call](https://github.com/openai/human-eval/blob/master/human_eval/execution.py#L48-L58)
 26 | in `execution.py` is deliberately commented out to ensure users read this
 27 | disclaimer before running code in a potentially unsafe manner. See the comment in
 28 | `execution.py` for more information and instructions.**
 29 | 
 30 | After following the above instructions to enable execution, generate samples
 31 | and save them in the following JSON Lines (jsonl) format, where each sample is
 32 | formatted into a single line like so:
 33 | ```
 34 | {"task_id": "Corresponding HumanEval task ID", "completion": "Completion only without the prompt"}
 35 | ```
 36 | We provide `example_problem.jsonl` and `example_solutions.jsonl` under `data`
 37 | to illustrate the format and help with debugging.
 38 | 
 39 | Here is nearly functional example code (you just have to provide
 40 | `generate_one_completion` to make it work) that saves generated completions to
 41 | `samples.jsonl`.
 42 | ```
 43 | from human_eval.data import write_jsonl, read_problems
 44 | 
 45 | problems = read_problems()
 46 | 
 47 | num_samples_per_task = 200
 48 | samples = [
 49 |     dict(task_id=task_id, completion=generate_one_completion(problems[task_id]["prompt"]))
 50 |     for task_id in problems
 51 |     for _ in range(num_samples_per_task)
 52 | ]
 53 | write_jsonl("samples.jsonl", samples)
 54 | ```
 55 | 
 56 | To evaluate the samples, run
 57 | ```
 58 | $ evaluate_functional_correctness samples.jsonl
 59 | Reading samples...
 60 | 32800it [00:01, 23787.50it/s]
 61 | Running test suites...
 62 | 100%|...| 32800/32800 [16:11<00:00, 33.76it/s]
 63 | Writing results to samples.jsonl_results.jsonl...
 64 | 100%|...| 32800/32800 [00:00<00:00, 42876.84it/s]
 65 | {'pass@1': ..., 'pass@10': ..., 'pass@100': ...}
 66 | ```
 67 | This script provides more fine-grained information in a new file ending in
 68 | `<input_path>_results.jsonl`. Each row now contains whether the completion
 69 | `passed` along with the execution `result` which is one of "passed", "timed
 70 | out", or "failed".
 71 | 
 72 | As a quick sanity-check, the example samples should yield 0.5 pass@1.
 73 | ```
 74 | $ evaluate_functional_correctness data/example_samples.jsonl --problem_file=data/example_problem.jsonl
 75 | Reading samples...
 76 | 6it [00:00, 3397.11it/s]
 77 | Running example suites...
 78 | 100%|...| 6/6 [00:03<00:00,  1.96it/s]
 79 | Writing results to data/example_samples.jsonl_results.jsonl...
 80 | 100%|...| 6/6 [00:00<00:00, 6148.50it/s]
 81 | {'pass@1': 0.4999999999999999}
 82 | ```
 83 | 
 84 | Because there is no unbiased way of estimating pass@k when there are fewer
 85 | samples than k, the script does not evaluate pass@k for these cases. To
 86 | evaluate with other k values, pass `--k=<comma-separated-values-here>`. For
 87 | other options, see
 88 | ```
 89 | $ evaluate_functional_correctness --help
 90 | ```
 91 | However, we recommend that you use the default values for the rest.
 92 | 
 93 | ## Known Issues
 94 | 
 95 | While evaluation uses very little memory, you might see the following error
 96 | message when the system is running out of RAM. Since this may cause some
 97 | correct programs to fail, we recommend that you free some memory and try again.
 98 | ```
 99 | malloc: can't allocate region
100 | ```
101 | 
102 | ## Citation
103 | 
104 | Please cite using the following bibtex entry:
105 | 
106 | ```
107 | @article{chen2021codex,
108 |   title={Evaluating Large Language Models Trained on Code},
109 |   author={Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de Oliveira Pinto and Jared Kaplan and Harri Edwards and Yuri Burda and Nicholas Joseph and Greg Brockman and Alex Ray and Raul Puri and Gretchen Krueger and Michael Petrov and Heidy Khlaaf and Girish Sastry and Pamela Mishkin and Brooke Chan and Scott Gray and Nick Ryder and Mikhail Pavlov and Alethea Power and Lukasz Kaiser and Mohammad Bavarian and Clemens Winter and Philippe Tillet and Felipe Petroski Such and Dave Cummings and Matthias Plappert and Fotios Chantzis and Elizabeth Barnes and Ariel Herbert-Voss and William Hebgen Guss and Alex Nichol and Alex Paino and Nikolas Tezak and Jie Tang and Igor Babuschkin and Suchir Balaji and Shantanu Jain and William Saunders and Christopher Hesse and Andrew N. Carr and Jan Leike and Josh Achiam and Vedant Misra and Evan Morikawa and Alec Radford and Matthew Knight and Miles Brundage and Mira Murati and Katie Mayer and Peter Welinder and Bob McGrew and Dario Amodei and Sam McCandlish and Ilya Sutskever and Wojciech Zaremba},
110 |   year={2021},
111 |   eprint={2107.03374},
112 |   archivePrefix={arXiv},
113 |   primaryClass={cs.LG}
114 | }
115 | ```
116 | 


--------------------------------------------------------------------------------
/textbook/train.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | from importlib import import_module
  3 | from typing import Optional, Dict, Type, Annotated
  4 | 
  5 | 
  6 | import torch
  7 | 
  8 | from textbook.dataset import CustomDataset
  9 | from textbook.evaluate import evaluate
 10 | from textbook.model import BaseModule
 11 | 
 12 | import transformers
 13 | import tempfile
 14 | 
 15 | from typer import Typer
 16 | import typer
 17 | import wandb
 18 | 
 19 | app = Typer(pretty_exceptions_enable=False)
 20 | 
 21 | 
 22 | config_to_log: Dict = {}
 23 | 
 24 | 
 25 | def log_args(func):
 26 |     @functools.wraps(func)
 27 |     def wrapper(*args, **kwargs):
 28 |         global config_to_log
 29 |         config_to_log = kwargs
 30 |         return func(*args, **kwargs)
 31 | 
 32 |     return wrapper
 33 | 
 34 | 
 35 | @app.command()
 36 | @log_args
 37 | def train(
 38 |     *,
 39 |     module: str = "StarCoder",
 40 |     dataset: str = "ExerciseDatast",
 41 |     epochs: int = 1,
 42 |     micro_batch_size: int = 1,
 43 |     batch_size: int = 1,
 44 |     learning_rate: float = 3e-5,
 45 |     output_dir: Optional[str] = None,
 46 |     wandb_run_name: str = "",
 47 |     use_wandb: bool = False,
 48 |     wandb_project: str = "textbook",
 49 |     wandb_log_model: Optional[
 50 |         bool
 51 |     ] = None,  # will be true by default if use_wandb is true
 52 |     push_model_to_hf: bool = False,  # if set, will push the model to hf
 53 |     local_rank: Annotated[int, typer.Option("--local_rank")] = 0,
 54 |     deepspeed: Optional[str] = None,
 55 |     debug: bool = False,
 56 |     eval_size: Optional[int] = None,
 57 |     eval_max_new_tokens: int = 512,
 58 |     n_samples: Optional[int] = None,
 59 |     dataset_name: Optional[str] = "jinaai/code_exercises_40k",
 60 | ):
 61 |     module_cls: Type[BaseModule] = getattr(import_module("textbook.model"), module)
 62 |     module_instance = module_cls(debug=debug)
 63 |     model = torch.compile(module_instance.model)
 64 |     model = module_instance.model
 65 |     tokenizer = module_instance.tokenizer
 66 | 
 67 |     dataset_cls: Type[CustomDataset] = getattr(
 68 |         import_module("textbook.dataset"), dataset
 69 |     )
 70 |     if dataset_name:
 71 |         dataset_instance = dataset_cls(
 72 |             tokenizer=tokenizer, debug=debug, dataset_name=dataset_name
 73 |         )
 74 |     else:
 75 |         dataset_instance = dataset_cls(tokenizer=tokenizer, debug=debug)
 76 | 
 77 |     if n_samples:
 78 |         dataset_instance.train_dataset = dataset_instance.train_dataset.select(
 79 |             range(n_samples)
 80 |         )
 81 | 
 82 |     if debug:
 83 |         wandb_run_name = "debug"
 84 | 
 85 |     if batch_size % micro_batch_size:
 86 |         raise ValueError(
 87 |             f"batch_size {batch_size} and micro_batch_size {micro_batch_size} are not compatible"
 88 |         )
 89 | 
 90 |     if wandb_log_model is None:
 91 |         wandb_log_model = use_wandb
 92 | 
 93 |     if output_dir is None:
 94 |         output_dir = tempfile.mkdtemp()
 95 |         print(f"temp folder : {output_dir}")
 96 | 
 97 |     use_wandb = local_rank == 0 and use_wandb
 98 |     if use_wandb:
 99 |         run = wandb.init(project=wandb_project, **dict(config=config_to_log))  # type: ignore
100 |     else:
101 |         run = None  # type: ignore
102 | 
103 |     trainer = transformers.Trainer(
104 |         model=model,
105 |         train_dataset=dataset_instance.train_dataset,
106 |         eval_dataset=dataset_instance.test_dataset,
107 |         args=transformers.TrainingArguments(
108 |             per_device_train_batch_size=micro_batch_size,
109 |             gradient_accumulation_steps=batch_size // micro_batch_size,
110 |             optim="adamw_torch",
111 |             # gradient_checkpointing=True,
112 |             warmup_steps=100,
113 |             num_train_epochs=epochs,
114 |             learning_rate=learning_rate,
115 |             fp16=True,
116 |             logging_steps=10 if debug else 1,
117 |             save_strategy="epoch" if debug else "no",
118 |             eval_steps=20 if debug else 1,
119 |             output_dir=output_dir,
120 |             save_total_limit=1,
121 |             load_best_model_at_end=False,
122 |             report_to="wandb" if use_wandb else "none",
123 |             run_name=wandb_run_name if use_wandb else None,
124 |             remove_unused_columns=False,
125 |         ),
126 |         data_collator=dataset_instance.data_collator,
127 |     )
128 | 
129 |     trainer.train()
130 | 
131 |     if push_model_to_hf:
132 |         # Save the pretrained model locally
133 |         model.save_pretrained(output_dir)  # type: ignore
134 |         tokenizer.save_pretrained(output_dir)  # type: ignore
135 | 
136 |         # Push to the hub
137 |         model.push_to_hub("jinaai/starcoder-1b-textbook")  # type: ignore
138 |         tokenizer.push_to_hub("jinaai/starcoder-1b-textbook")  # type: ignore
139 | 
140 |     accuracy_results, sample_results = evaluate(
141 |         model, tokenizer, eval_size=eval_size, max_new_tokens=eval_max_new_tokens
142 |     )
143 | 
144 |     if use_wandb and run:
145 |         # log accuracy@k results
146 |         run.log(accuracy_results)
147 | 
148 |         # log sample values
149 |         results = list(sample_results.values())
150 |         columns = list(results[0].keys())
151 |         results_data = [[result[key] for key in columns] for result in results]
152 |         eval_table = wandb.Table(columns=columns, data=results_data)
153 |         run.log({"Evaluation": eval_table})
154 | 
155 |         if wandb_log_model:
156 |             # upload model weights
157 |             artifact = wandb.Artifact(name="model_weight", type="model")
158 |             artifact.add_dir(output_dir)
159 |             run.log_artifact(artifact)  # type: ignore
160 | 
161 | 
162 | if __name__ == "__main__":
163 |     app()
164 | 


--------------------------------------------------------------------------------
/textbook/dataset_gen/create_prompts.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | import itertools
  3 | from typing import List, Optional
  4 | from pydantic import BaseModel
  5 | import random
  6 | import pandas as pd
  7 | import numpy as np
  8 | import openai
  9 | import os
 10 | import json
 11 | from rich.progress import track
 12 | 
 13 | 
 14 | class Topic(BaseModel):
 15 |     topic: str
 16 |     mixing: int
 17 |     parent: Optional[Topic] = None
 18 | 
 19 | 
 20 | class Exercise(BaseModel):
 21 |     exercise: str
 22 |     topic: Topic
 23 | 
 24 | 
 25 | class Query(BaseModel):
 26 |     query: str
 27 |     topic_1: Topic
 28 |     topic_2: Topic
 29 | 
 30 | 
 31 | def create_subtopic_query(topic: str, n: int) -> str:
 32 |     return f"""For a Python textbook give me {n} subtopics of {topic}, formatted as a Python list. 
 33 |     Just provide the titles and give no explanation.
 34 |     Format the result as Python list.
 35 |     """
 36 | 
 37 | 
 38 | def create_prompt_query(topic_1: Topic, topic_2: Topic, profession: str) -> str:
 39 |     query = f'''
 40 |             Create a code completion exercise on the intersection of “{topic_1.topic}” and “{topic_2.topic}”.  
 41 |             Write it for a {profession}. 
 42 | 
 43 |             The exercise must be of the style: 
 44 | 
 45 |             ```
 46 |             def name(args):
 47 | 
 48 |             """Docstring explaining the exercise"""
 49 | 
 50 |             python code to solve the exercise
 51 |             ```
 52 | 
 53 |             NO CLASSES
 54 | 
 55 |             MAKE IT VERY DIFFICULT
 56 |             '''
 57 |     query = "\n".join([m.lstrip() for m in query.strip().split("\n")])
 58 |     return query
 59 | 
 60 | 
 61 | def create_subtopics(topic: Topic, n: int, retries: int = 10) -> List[Topic]:
 62 |     success = False
 63 |     query = create_subtopic_query(topic.topic, n)
 64 |     print(query)
 65 |     for i in range(retries):
 66 |         try:
 67 |             completion = openai.ChatCompletion.create(
 68 |                 model="gpt-4",
 69 |                 messages=[
 70 |                     {"role": "system", "content": "You are a helpful assistant."},
 71 |                     {"role": "user", "content": query},
 72 |                 ],
 73 |                 temperature=1.5,
 74 |             )
 75 | 
 76 |             result = [
 77 |                 Topic(topic=i, mixing=topic.mixing, parent=topic)
 78 |                 for i in eval(completion.choices[0].message["content"])
 79 |             ]
 80 |             success = True
 81 |         except Exception:
 82 |             print(f"Generation failed for prompt, retrying {i + 1}/{retries}")
 83 |         else:
 84 |             break
 85 | 
 86 |     if success:
 87 |         return result
 88 |     else:
 89 |         return []
 90 | 
 91 | 
 92 | def create_prompts(
 93 |     topic: Topic,
 94 |     combination_options: List[Topic],
 95 |     professions: List[str],
 96 |     n: int,
 97 | ) -> List[Query]:
 98 |     random.shuffle(combination_options)
 99 |     prompts: List[Query] = []
100 | 
101 |     for loc_topic in combination_options:
102 |         if len(prompts) == n:
103 |             break
104 | 
105 |         if loc_topic.mixing and loc_topic.parent != topic.parent:
106 |             profession = professions[np.random.randint(0, len(professions))]
107 |             query = create_prompt_query(topic, loc_topic, profession)
108 |             prompts.append(Query(query=query, topic_1=topic, topic_2=loc_topic))
109 | 
110 |     return prompts
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     # Load list of topics
115 |     API_KEY = os.environ["API_PASSWORD"]
116 |     TOPICS_PATH = "tree/topics.csv"
117 |     openai.api_key = API_KEY
118 | 
119 |     topics = pd.read_csv(TOPICS_PATH)
120 |     topics = topics.fillna(0)
121 |     topics = topics.iloc[:, :3]
122 |     topics.Topic = topics.Topic.str.split(".").str[1]
123 |     topics.Use = topics.Use.astype(int)
124 |     topics.Mixing = topics.Mixing.astype(int)
125 |     topics_df = topics[topics.Use == 1].reset_index(drop=True)
126 |     topics_df = topics_df.drop("Use", axis=1)
127 |     topics_list = list(zip(topics_df.Topic, topics_df.Mixing))
128 | 
129 |     # Debug mode to create few prompts
130 |     DEBUG = False
131 |     if DEBUG:
132 |         n_base_topics = 5
133 |         n_combinations = 2
134 |     else:
135 |         n_base_topics = len(topics_df)
136 |         n_combinations = 200
137 | 
138 |     root = Topic(topic="Python", mixing=1)
139 |     base_topics = [
140 |         Topic(topic=top, mixing=mix, parent=root)
141 |         for (top, mix) in zip(topics_df.Topic, topics_df.Mixing)
142 |     ]
143 |     subtopics = [create_subtopics(t, 10) for t in base_topics[:n_base_topics]]
144 |     subtopics_list = list(itertools.chain(*subtopics))
145 |     subtopics_json = json.dumps([x.dict() for x in subtopics_list])
146 | 
147 |     with open("tree/subtopics.json", "w") as outfile:
148 |         outfile.write(subtopics_json)
149 | 
150 |     subsubtopics: List[List[Topic]] = [
151 |         create_subtopics(t, 5)
152 |         for t in track(itertools.chain(*subtopics), description="Processing...")
153 |     ]
154 |     subsubtopics_list = list(itertools.chain(*subsubtopics))
155 |     subsubtopics_json: str = json.dumps([x.dict() for x in subsubtopics_list])
156 | 
157 |     with open("tree/subsubtopics.json", "w") as outfile:
158 |         outfile.write(subsubtopics_json)
159 | 
160 |     with open("tree/professions.json", "r") as openfile:
161 |         # Reading from json file
162 |         professions = list(json.load(openfile))
163 | 
164 |     prompts: List[List[Query]] = [
165 |         create_prompts(
166 |             i,
167 |             combination_options=subsubtopics_list,
168 |             professions=professions,
169 |             n=n_combinations,
170 |         )
171 |         for i in track(itertools.chain(*subsubtopics), description="Processing...")
172 |     ]
173 | 
174 |     prompts_list = list(itertools.chain(*prompts))
175 |     prompts_json = json.dumps([p.dict() for p in prompts_list])
176 |     with open("tree/prompts.json", "w") as outfile:
177 |         outfile.write(prompts_json)
178 | 


--------------------------------------------------------------------------------
/tests/dataset_gen/test_units.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | from textbook.dataset_gen.dataset_gen import (
  5 |     OpenAIGenerator,
  6 |     load_prompts,
  7 |     mass_generation,
  8 |     generation,
  9 |     MonkeyGenerator,
 10 |     write_results_to_jsonl,
 11 |     Result,
 12 |     generator_to_exercises,
 13 |     split_exercises,
 14 |     check_exercise,
 15 | )
 16 | import numpy as np
 17 | import pytest
 18 | 
 19 | 
 20 | def mock_openai(mocker):
 21 |     mocker.patch(
 22 |         "textbook.dataset_gen.dataset_gen.OpenAIGenerator.generate",
 23 |         return_value=Result(
 24 |             prompt="Cheesecake with strawberries",
 25 |             output='def gruyere(): """No way jose""" return 0' * 2,
 26 |         ),
 27 |     )
 28 | 
 29 | 
 30 | def update_progress():
 31 |     ...
 32 | 
 33 | 
 34 | @pytest.mark.openai
 35 | def test_generation():
 36 |     generator = OpenAIGenerator()
 37 |     gen = generator.generate("Hello world")
 38 |     assert isinstance(gen, Result)
 39 | 
 40 | 
 41 | def test_generation_mock(mocker):
 42 |     mock_openai(mocker)
 43 |     generator = OpenAIGenerator()
 44 |     gen = generator.generate("Hello world")
 45 |     prompts = "Hello World"
 46 |     generation(prompts, generator, update_progress, 10)
 47 |     assert isinstance(gen, Result)
 48 |     assert gen.prompt == "Cheesecake with strawberries"
 49 |     assert gen.output == 'def gruyere(): """No way jose""" return 0' * 2
 50 | 
 51 | 
 52 | def test_mass_generation(mocker, tmp_path):
 53 |     mock_openai(mocker)
 54 | 
 55 |     def get_generator():
 56 |         return OpenAIGenerator()
 57 | 
 58 |     prompts = ["Hello world", "Goodbye world"]
 59 |     mass_generation(prompts, get_generator, save_dir=str(tmp_path))
 60 | 
 61 |     ls = os.listdir(tmp_path)
 62 |     assert len(ls) > 0
 63 | 
 64 |     file_path = os.listdir(os.path.join(tmp_path, ls[0]))
 65 |     assert len(file_path) > 0
 66 | 
 67 | 
 68 | def test_generation_monkey_generator():
 69 |     n_functions = np.random.randint(0, 100)
 70 |     generator = MonkeyGenerator(speed=-1, n_functions=n_functions)
 71 |     prompts = "Hello world"
 72 |     result = generation(prompts, generator, update_progress, 10)
 73 |     assert len(result) == n_functions
 74 | 
 75 | 
 76 | def test_mass_generation_monkey_generator(mocker, tmp_path):
 77 |     n_functions = np.random.randint(1, 100)
 78 | 
 79 |     def get_generator():
 80 |         return MonkeyGenerator(speed=-1, n_functions=n_functions)
 81 | 
 82 |     prompts = ["Hello world", "Goodbye world"] * 20
 83 |     mass_generation(prompts, get_generator, save_dir=str(tmp_path))
 84 |     ls = os.listdir(tmp_path)
 85 |     assert len(ls) > 0
 86 | 
 87 |     file_path = os.listdir(os.path.join(tmp_path, ls[0]))
 88 |     assert len(file_path) > 0
 89 | 
 90 | 
 91 | def test_load_prompts():
 92 |     prompts = load_prompts("tests/data/prompts_debug.jsonl", "prompt")
 93 |     assert len(prompts) == 5
 94 |     assert isinstance(prompts[0], str)
 95 | 
 96 | 
 97 | def test_save_results(tmp_path):
 98 |     results = [
 99 |         Result(
100 |             prompt="Hello world",
101 |             output='def gruyere(): """No way jose""" return 0',
102 |         ),
103 |         Result(
104 |             prompt="Goodbye world",
105 |             output='def emmentaler(): """No way jose""" return 1',
106 |         ),
107 |     ]
108 |     file = f"{tmp_path}/results.jsonl"
109 |     write_results_to_jsonl(file, results)
110 | 
111 |     with open(file, "r") as f:
112 |         lines = f.readlines()
113 | 
114 |     prompts = [Result.parse_obj(json.loads(line)) for line in lines]
115 | 
116 |     assert len(prompts) == 2
117 |     assert prompts[0].prompt == "Hello world"
118 |     assert prompts[0].output == 'def gruyere(): """No way jose""" return 0'
119 |     assert prompts[1].prompt == "Goodbye world"
120 |     assert prompts[1].output == 'def emmentaler(): """No way jose""" return 1'
121 | 
122 | 
123 | def test_split_exercises():
124 |     input = '''
125 |     ```python
126 |     def reverse_name(name: str) -> str:
127 |         """Reverses the letters of a name and returns it.
128 | 
129 |         >>> reverse_name("LeBron")
130 |         'norBeL'
131 |         >>> reverse_name("Curry")
132 |         'yrruC'
133 |         """
134 |         return name[::-1]
135 | 
136 |     def reverse_words(sentence: str) -> str:
137 |         """Reverses the order of words in a sentence and returns it.
138 | 
139 |         >>> reverse_words("I love playing basketball")
140 |         'basketball playing love I'
141 |         >>> reverse_words("Hello World!")
142 |         'World! Hello'
143 |         """
144 |         words = sentence.split()
145 |         return " ".join(words[::-1])
146 | 
147 |     '''
148 |     assert len(split_exercises(input)) == 2
149 | 
150 | 
151 | def test_check_exercise():
152 |     good_exercise = '''
153 |     def cheesecake():
154 |         """Cheesecake is delicious.""""
155 |         return 0
156 |     '''
157 |     another_good_exercise = '''
158 |     def marmelade():
159 |         """Marmelade is delicious.""""
160 |         print("Hello world")
161 |     '''
162 |     bad_exercise = '''
163 |     def blubberfish():
164 |         """Blubberfish is delicious.""""
165 |     '''
166 |     assert check_exercise(good_exercise)
167 |     assert check_exercise(another_good_exercise)
168 |     assert not check_exercise(bad_exercise)
169 | 
170 | 
171 | def test_generator_to_functions():
172 |     input = '''
173 |     ```python
174 |     def reverse_name(name: str) -> str:
175 |         """Reverses the letters of a name and returns it.
176 | 
177 |         >>> reverse_name("LeBron")
178 |         'norBeL'
179 |         >>> reverse_name("Curry")
180 |         'yrruC'
181 |         """
182 |         return name[::-1]
183 | 
184 |     def reverse_words(sentence: str) -> str:
185 |         """Reverses the order of words in a sentence and returns it.
186 | 
187 |         >>> reverse_words("I love playing basketball")
188 |         'basketball playing love I'
189 |         >>> reverse_words("Hello World!")
190 |         'World! Hello'
191 |         """
192 |         words = sentence.split()
193 |         return " ".join(words[::-1])
194 | 
195 |     def reverse_alphabetical_order(names: list) -> list:
196 |         """Reverses the order of names in a list and returns it.
197 | 
198 |         >>> reverse_alphabetical_order(['LeBron', 'Curry', 'Kobe'])
199 |         ['Kobe', 'Curry', 'LeBron']
200 |         >>> reverse_alphabetical_order(['Jordan', 'Magic', 'Bird'])
201 |         ['Bird', 'Magic', 'Jordan']
202 |         """
203 |         return names[::-1]
204 | 
205 |     def reverse_phone_number(number: str) -> str:
206 |         """Reverses the order of digits in a phone number and returns it.
207 | 
208 |         >>> reverse_phone_number("123-456-7890")
209 |         '0987-654-321'
210 |         >>> reverse_phone_number("555-123-4567")
211 |         '7654-321-555'
212 |         """
213 |         area_code, first_half, second_half = number.split("-")
214 |         return second_half + "-" + first_half + "-" + area_code
215 | 
216 |     def intersection_names_to_frozen_sets(names1: list, names2: list) -> set:
217 |         """Finds the intersection of two lists of names and returns it as a frozen set.
218 | 
219 |         >>> intersection_names_to_frozen_sets(['LeBron', 'Curry', 'Kobe'], ['Kobe', 'Jordan'])
220 |         {'Kobe'}
221 |         >>> intersection_names_to_frozen_sets(['Bird', 'Magic', 'Jordan'], ['LeBron', 'Kobe', 'Bird'])
222 |         {'Bird'}
223 |         """
224 |         set1 = set(names1)
225 |         set2 = set(names2)
226 |         return frozenset(set1.intersection(set2))
227 |     ```
228 |     '''
229 |     assert len(generator_to_exercises(input)) == 5
230 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Textbook
  2 | The goal of this project is to distill ChatGPT's Python coding ability into a smaller model with only 1 billion parameters. Our focus is on training the smaller model to solve coding tasks with natural language descriptions, and we use the [HumanEval](https://github.com/openai/human-eval) benchmark to evaluate our model. While we are aware that that benchmark is far from ideal, we believe that it is a good starting point to demonstrate the success of our approach to model distillation. We have drawn some inspiration from efforts to the results reported in the paper _Textbooks Are All You Need_ [(Gunasekar et al. 2023)](https://doi.org/10.48550/arXiv.2306.11644).
  3 | 
  4 | This repository consists of two parts:
  5 | 
  6 | * Dataset Generation: The code that we used to generate a \~120 million token dataset of Python programming exercises from ChatGPT 3.5.
  7 | * Model Fine-tuning: The code that we used to fine-tune the [Starcoder 1b model](https://github.com/bigcode-project/starcoder) using the generated dataset.
  8 | 
  9 | The generated exercises dataset is composed of a diverse set of \~120k Python code exercises (~120m total tokens) generated by ChatGPT 3.5. It follows the format of the [Human Eval benchmark](https://github.com/openai/human-eval): Each training sample is split into a Python function signature with a descriptive docstring, and a solution to the exercise.
 10 | 
 11 | 
 12 | ## Usage
 13 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1T4IfGfDJ8uxgU8XBPpMZivw_JThzdQim?usp=sharing)
 14 | 
 15 | You can download and use the model like so:
 16 | ```python
 17 | from transformers import AutoModelForCausalLM, AutoTokenizer
 18 | 
 19 | model = AutoModelForCausalLM.from_pretrained(
 20 |         "jinaai/starcoder-1b-textbook", device_map='auto'
 21 |     )
 22 | 
 23 | tokenizer = AutoTokenizer.from_pretrained("jinaai/starcoder-1b-textbook")
 24 | 
 25 | prompt = '''
 26 | def unique(l: list):
 27 |     """Return sorted unique elements in a list
 28 |     >>> unique([5, 3, 5, 2, 3, 3, 9, 0, 123])
 29 |     [0, 2, 3, 5, 9, 123]
 30 |     """
 31 | '''
 32 | 
 33 | inputs = tokenizer(prompt.rstrip(), return_tensors="pt").to("cuda")
 34 | 
 35 | generation_output = model.generate(
 36 |     **inputs,
 37 |     max_new_tokens=128,
 38 |     eos_token_id=tokenizer.eos_token_id,
 39 |     return_dict_in_generate=True,
 40 | )
 41 | 
 42 | s = generation_output.sequences[0]
 43 | output = tokenizer.decode(s, skip_special_tokens=True)
 44 | 
 45 | print(output)
 46 | ```
 47 | 
 48 | ```text
 49 | def unique(l: list):
 50 |     """Return sorted unique elements in a list
 51 |     >>> unique([5, 3, 5, 2, 3, 3, 9, 0, 123])
 52 |     [0, 2, 3, 5, 9, 123]
 53 |     """
 54 |     return sorted(set(l))
 55 | ```
 56 | 
 57 | ## Synthetic exercise creation
 58 | 
 59 | Model distillation is the process of transferring some of the skilled performance of large models on specific classes of tasks to significantly smaller models. The purpose is to get performance comparable to the larger model, but at a fraction of the cost and at a vastly quicker speed. The general outline of this strategy is described (without technical implementation details) in [Textbooks Are All You Need](https://doi.org/10.48550/arXiv.2306.11644).
 60 | 
 61 | Key to the distillation process is the creation of synthetic data, generated by the larger AI model, to train the smaller model. We have applied this approach to Python programming tasks and are publishing a summary of our methods here along with the synthetic dataset.
 62 | 
 63 | For fuller details and implementation code, see the [related GitHub repository](https://github.com/jina-ai/textbook).
 64 | 
 65 | ### Diversity
 66 | 
 67 | The main problem with model-generated synthetic data is its diversity. If we had constructed this dataset by giving ChatGPT 3.5 the same prompt several hundred thousand times, we would get many very similar, if not functionally identical, results. This would reduce the usefulness of the dataset for training. In principle, one might solve the problem by filtering the results for near duplicates, but this is a non-trivial problem, and even if it could be solved, it would be a wasteful and potentially expensive use of the larger model.
 68 | 
 69 | And even then, we could not be sure the examples adequately covered the topic. To solve this problem, we introduced a novel scheme for systematically prompting large language models to produce diverse examples.
 70 | 
 71 | ### Using a topic tree to build diverse prompts
 72 | 
 73 | We constructed a hierarchical model of subjects in Python programming, i.e. a topic tree. First, we manually identified 42 general topic areas in Python knowledge, for example, _data structures_ and _sorting algorithms_. We asked an LLM to propose 10 subtopics for each, and then for each of those 420 fine-grained topics, we asked the LLM to generate 5 even more fine-grained sub-subtopics. This resulted in roughly 2000 very fine-grained topics.
 74 | 
 75 | We generated prompts by randomly selecting two of those roughly two thousand topics and combining them:
 76 | 
 77 | ```
 78 | Create a code completion exercise on the intersection of {topic 1} and {topic 2}.
 79 | ```
 80 | 
 81 | To increase randomness and diversity in the results, we also constructed a list of 40 professions, like _economist_, _engineer_, and _social worker_, and added them to the prompt:
 82 | 
 83 | ```
 84 | Create a code completion exercise on the intersection of {topic 1} and {topic 2}.
 85 | Write it for a {profession}. 
 86 | ```
 87 | 
 88 | In principle, there are approximately two million possible pairs of topics, and with 40 possible professions, this yields 80 million unique prompts. If the response to each prompt averages 100 tokens, this means our method can generate an 8 billion token synthetic dataset while maintaining a high degree of diversity. The dataset used here is only a small sample of the possible total.
 89 | 
 90 | 
 91 | ## Install dependency
 92 | 
 93 | 
 94 | ```cmd
 95 | poetry install
 96 | poetry shell
 97 | pip install torch
 98 | ```
 99 | 
100 | 
101 | ## Generating Dataset
102 | 
103 | 
104 | Follow this step to reproduce the dataset generation
105 | 
106 | 
107 | First export your openAI key 
108 | ```shell
109 | export OPENAI_API_KEY=sk-XXX
110 | ```
111 | then start to parrallel call to open ai
112 | ```shell
113 | cd textbook/dataset_gen
114 | python dataset_gen_cli.py generate ./tree/professions.json ./tree/subsubtopics.json ./exercises --n-prompts 2_000_000 --pool-size 40 
115 | ```
116 | 
117 | this should take around 6hours. The process might be killed before the end but the data will still be save progressivly.
118 | 
119 | 
120 | Once the file are generated you can postprocess the files and save it into a jsonl file
121 | 
122 | ```shell 
123 | python dataset_gen_cli.py filter ./exercises dataset.jsonl
124 | ```
125 | 
126 | push to hf dataset
127 | 
128 | ```shell
129 | python dataset_gen_cli.py push "jinaai/code_exercises_40k" dataset.jsonl
130 | ```
131 | 
132 | ## Training  
133 | 
134 | 
135 | Single gpu run
136 | 
137 | ```cmd
138 | python textbook/train.py --epochs 2 --micro-batch-size 4 --batch-size 128 --learning-rate 1e-4
139 | ```
140 | 
141 | a100 run :
142 | 
143 | 
144 | ```cmd
145 | python textbook/train.py --module StarCoder --dataset ExerciseDatast --epochs 1 --micro-batch-size 8 --batch-size 128 --wandb-project textbook_debug --use-wandb --no-wandb-log-model
146 | ```
147 | 
148 | 
149 | ```cmd
150 | deepspeed --num_gpus=2 textbook/train.py --deepspeed ds_config.json --epochs 2 --micro-batch-size 4 --batch-size 128 --learning-rate 1e-4
151 | ```
152 | 
153 | 
154 | Note:
155 | 
156 | to use starcoder base model you need to first login to HF and accept the ToS of the used starcoder base model (https://huggingface.co/bigcode/starcoderbase-1b)
157 | ```cmd
158 | huggingface-cli login
159 | ```
160 | 
161 | 
162 | ## setup runpod
163 | 
164 | bash <(curl -Ls https://raw.githubusercontent.com/jina-ai/textbook/main/setup_vm.sh)
165 | 
166 | 


--------------------------------------------------------------------------------
/human-eval/human_eval/execution.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Dict
  2 | import contextlib
  3 | import faulthandler
  4 | import io
  5 | import os
  6 | import multiprocessing
  7 | import platform
  8 | import signal
  9 | import tempfile
 10 | 
 11 | 
 12 | def check_correctness(
 13 |     problem: Dict, completion: str, timeout: float, completion_id: Optional[int] = None
 14 | ) -> Dict:
 15 |     """
 16 |     Evaluates the functional correctness of a completion by running the test
 17 |     suite provided in the problem.
 18 | 
 19 |     :param completion_id: an optional completion ID so we can match
 20 |         the results later even if execution finishes asynchronously.
 21 |     """
 22 | 
 23 |     def unsafe_execute():
 24 |         with create_tempdir():
 25 |             # These system calls are needed when cleaning up tempdir.
 26 |             import os
 27 |             import shutil
 28 | 
 29 |             rmtree = shutil.rmtree
 30 |             rmdir = os.rmdir
 31 |             chdir = os.chdir
 32 | 
 33 |             # Disable functionalities that can make destructive changes to the test.
 34 |             reliability_guard()
 35 | 
 36 |             # Construct the check program and run it.
 37 |             check_program = (
 38 |                 problem["prompt"]
 39 |                 + completion
 40 |                 + "\n"
 41 |                 + problem["test"]
 42 |                 + "\n"
 43 |                 + f"check({problem['entry_point']})"
 44 |             )
 45 | 
 46 |             try:
 47 |                 exec_globals = {}
 48 |                 with swallow_io():
 49 |                     with time_limit(timeout):
 50 |                         # WARNING
 51 |                         # This program exists to execute untrusted model-generated code. Although
 52 |                         # it is highly unlikely that model-generated code will do something overtly
 53 |                         # malicious in response to this test suite, model-generated code may act
 54 |                         # destructively due to a lack of model capability or alignment.
 55 |                         # Users are strongly encouraged to sandbox this evaluation suite so that it
 56 |                         # does not perform destructive actions on their host or network. For more
 57 |                         # information on how OpenAI sandboxes its code, see the accompanying paper.
 58 |                         # Once you have read this disclaimer and taken appropriate precautions,
 59 |                         # uncomment the following line and proceed at your own risk:
 60 |                         exec(check_program, exec_globals)
 61 |                 result.append("passed")
 62 |             except TimeoutException:
 63 |                 result.append("timed out")
 64 |             except BaseException as e:
 65 |                 result.append(f"failed: {e}")
 66 | 
 67 |             # Needed for cleaning up.
 68 |             shutil.rmtree = rmtree
 69 |             os.rmdir = rmdir
 70 |             os.chdir = chdir
 71 | 
 72 |     manager = multiprocessing.Manager()
 73 |     result = manager.list()
 74 | 
 75 |     p = multiprocessing.Process(target=unsafe_execute)
 76 |     p.start()
 77 |     p.join(timeout=timeout + 1)
 78 |     if p.is_alive():
 79 |         p.kill()
 80 | 
 81 |     if not result:
 82 |         result.append("timed out")
 83 | 
 84 |     return dict(
 85 |         task_id=problem["task_id"],
 86 |         passed=result[0] == "passed",
 87 |         result=result[0],
 88 |         completion_id=completion_id,
 89 |     )
 90 | 
 91 | 
 92 | @contextlib.contextmanager
 93 | def time_limit(seconds: float):
 94 |     def signal_handler(signum, frame):
 95 |         raise TimeoutException("Timed out!")
 96 | 
 97 |     signal.setitimer(signal.ITIMER_REAL, seconds)
 98 |     signal.signal(signal.SIGALRM, signal_handler)
 99 |     try:
100 |         yield
101 |     finally:
102 |         signal.setitimer(signal.ITIMER_REAL, 0)
103 | 
104 | 
105 | @contextlib.contextmanager
106 | def swallow_io():
107 |     stream = WriteOnlyStringIO()
108 |     with contextlib.redirect_stdout(stream):
109 |         with contextlib.redirect_stderr(stream):
110 |             with redirect_stdin(stream):
111 |                 yield
112 | 
113 | 
114 | @contextlib.contextmanager
115 | def create_tempdir():
116 |     with tempfile.TemporaryDirectory() as dirname:
117 |         with chdir(dirname):
118 |             yield dirname
119 | 
120 | 
121 | class TimeoutException(Exception):
122 |     pass
123 | 
124 | 
125 | class WriteOnlyStringIO(io.StringIO):
126 |     """StringIO that throws an exception when it's read from"""
127 | 
128 |     def read(self, *args, **kwargs):
129 |         raise IOError
130 | 
131 |     def readline(self, *args, **kwargs):
132 |         raise IOError
133 | 
134 |     def readlines(self, *args, **kwargs):
135 |         raise IOError
136 | 
137 |     def readable(self, *args, **kwargs):
138 |         """Returns True if the IO object can be read."""
139 |         return False
140 | 
141 | 
142 | class redirect_stdin(contextlib._RedirectStream):  # type: ignore
143 |     _stream = "stdin"
144 | 
145 | 
146 | @contextlib.contextmanager
147 | def chdir(root):
148 |     if root == ".":
149 |         yield
150 |         return
151 |     cwd = os.getcwd()
152 |     os.chdir(root)
153 |     try:
154 |         yield
155 |     except BaseException as exc:
156 |         raise exc
157 |     finally:
158 |         os.chdir(cwd)
159 | 
160 | 
161 | def reliability_guard(maximum_memory_bytes: Optional[int] = None):
162 |     """
163 |     This disables various destructive functions and prevents the generated code
164 |     from interfering with the test (e.g. fork bomb, killing other processes,
165 |     removing filesystem files, etc.)
166 | 
167 |     WARNING
168 |     This function is NOT a security sandbox. Untrusted code, including, model-
169 |     generated code, should not be blindly executed outside of one. See the
170 |     Codex paper for more information about OpenAI's code sandbox, and proceed
171 |     with caution.
172 |     """
173 | 
174 |     if maximum_memory_bytes is not None:
175 |         import resource
176 | 
177 |         resource.setrlimit(
178 |             resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes)
179 |         )
180 |         resource.setrlimit(
181 |             resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes)
182 |         )
183 |         if not platform.uname().system == "Darwin":
184 |             resource.setrlimit(
185 |                 resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes)
186 |             )
187 | 
188 |     faulthandler.disable()
189 | 
190 |     import builtins
191 | 
192 |     builtins.exit = None
193 |     builtins.quit = None
194 | 
195 |     import os
196 | 
197 |     os.environ["OMP_NUM_THREADS"] = "1"
198 | 
199 |     os.kill = None
200 |     os.system = None
201 |     os.putenv = None
202 |     os.remove = None
203 |     os.removedirs = None
204 |     os.rmdir = None
205 |     os.fchdir = None
206 |     os.setuid = None
207 |     os.fork = None
208 |     os.forkpty = None
209 |     os.killpg = None
210 |     os.rename = None
211 |     os.renames = None
212 |     os.truncate = None
213 |     os.replace = None
214 |     os.unlink = None
215 |     os.fchmod = None
216 |     os.fchown = None
217 |     os.chmod = None
218 |     os.chown = None
219 |     os.chroot = None
220 |     os.fchdir = None
221 |     os.lchflags = None
222 |     os.lchmod = None
223 |     os.lchown = None
224 |     os.getcwd = None
225 |     os.chdir = None
226 | 
227 |     import shutil
228 | 
229 |     shutil.rmtree = None
230 |     shutil.move = None
231 |     shutil.chown = None
232 | 
233 |     import subprocess
234 | 
235 |     subprocess.Popen = None  # type: ignore
236 | 
237 |     __builtins__["help"] = None
238 | 
239 |     import sys
240 | 
241 |     sys.modules["ipdb"] = None
242 |     sys.modules["joblib"] = None
243 |     sys.modules["resource"] = None
244 |     sys.modules["psutil"] = None
245 |     sys.modules["tkinter"] = None
246 | 


--------------------------------------------------------------------------------
/textbook/dataset_gen/dataset_gen.py:
--------------------------------------------------------------------------------
  1 | import threading
  2 | from concurrent.futures import ThreadPoolExecutor
  3 | import json
  4 | import os
  5 | import random
  6 | import time
  7 | 
  8 | from typing import Callable, List, Protocol
  9 | 
 10 | import openai
 11 | from openai import OpenAIError
 12 | 
 13 | from pydantic import BaseModel
 14 | from textbook.dataset_gen.create_prompts import Topic
 15 | from rich.progress import (
 16 |     Progress,
 17 |     TimeElapsedColumn,
 18 |     TextColumn,
 19 | )
 20 | import hashlib
 21 | 
 22 | THREAD_LOCK = threading.Lock()
 23 | PROMPT_TOKENS_CNT = 0
 24 | COMPLETION_TOKENS_CNT = 0
 25 | 
 26 | 
 27 | class Exercise(BaseModel):
 28 |     problem: str
 29 |     solution: str
 30 | 
 31 | 
 32 | class Result(BaseModel):
 33 |     prompt: str
 34 |     output: str
 35 | 
 36 | 
 37 | def split_exercises(output: str) -> List[str]:
 38 |     """Split the result of the generation into separate functions"""
 39 |     return ["def" + i for i in output.split("def")[1:]]
 40 | 
 41 | 
 42 | def check_exercise(exercise: str) -> bool:
 43 |     try:
 44 |         if (
 45 |             "return" not in exercise.split('"""')[2]
 46 |             and "print" not in exercise.split('"""')[2]
 47 |         ):
 48 |             return False
 49 |         else:
 50 |             return True
 51 |     except IndexError:
 52 |         return False
 53 | 
 54 | 
 55 | def generator_to_exercises(output: str) -> List[Exercise]:
 56 |     exercises = split_exercises(output)
 57 |     exercises = [i for i in exercises if check_exercise(i)]
 58 |     results = []
 59 |     for j in exercises:
 60 |         try:
 61 |             splitted_exercise = j.split('"""')
 62 |             question = '"""'.join(splitted_exercise[:2]) + '"""'
 63 |             answer = splitted_exercise[2]
 64 |             results.append(Exercise(problem=question, solution=answer))
 65 |         except IndexError:
 66 |             splitted_exercise = j.split("'''")
 67 |             question = "'''".join(splitted_exercise[:2]) + "'''"
 68 |             answer = splitted_exercise[2]
 69 |             results.append(Exercise(problem=question, solution=answer))
 70 | 
 71 |     return results
 72 | 
 73 | 
 74 | class Generator(Protocol):
 75 |     def generate(self, prompt: str) -> Result:
 76 |         ...
 77 | 
 78 | 
 79 | class OpenAIGenerator:
 80 |     def __init__(
 81 |         self,
 82 |         model: str = "gpt-3.5-turbo",
 83 |     ):
 84 |         self.model = model
 85 | 
 86 |     def generate(self, prompt: str) -> Result:
 87 |         global PROMPT_TOKENS_CNT
 88 |         global COMPLETION_TOKENS_CNT
 89 |         chat_completion = openai.ChatCompletion.create(
 90 |             model=self.model,
 91 |             messages=[{"role": "user", "content": prompt}],
 92 |             max_tokens=250,
 93 |             timeout=60,
 94 |         )
 95 |         with THREAD_LOCK:
 96 |             PROMPT_TOKENS_CNT += chat_completion.usage.prompt_tokens
 97 |             COMPLETION_TOKENS_CNT += chat_completion.usage.completion_tokens
 98 |         result = Result(
 99 |             prompt=prompt, output=chat_completion.choices[0].message.content
100 |         )
101 | 
102 |         return result
103 | 
104 | 
105 | class GenerationError(OpenAIError):
106 |     ...
107 | 
108 | 
109 | class MonkeyGenerator:
110 |     """
111 |     A generator with a random response time and a random failure rate
112 |     """
113 | 
114 |     def __init__(self, speed: int = 2, n_functions: int = 10):
115 |         self.speed = speed
116 |         self.n_functions = n_functions
117 | 
118 |     def generate(self, prompt: str) -> Result:
119 |         seed = random.randint(0, 100)
120 | 
121 |         if self.speed > 0:
122 |             time.sleep(seed / 100 * self.speed)
123 |         # if not (seed % 50):
124 |         #     raise GenerationError("Monkey failed")
125 | 
126 |         return Result(
127 |             prompt=prompt,
128 |             output='def gorilla(): """Empty function for a gorilla""" return 0'
129 |             * self.n_functions,
130 |         )
131 | 
132 | 
133 | def generation(
134 |     prompt: str,
135 |     generator: Generator,
136 |     update_progress: Callable,
137 |     retries: int,
138 | ) -> List[Exercise]:
139 |     success = False
140 |     time.sleep(random.random())
141 |     for i in range(retries):
142 |         try:
143 |             result = generator.generate(prompt)
144 |             success = True
145 |         except GenerationError:
146 |             print(f"Generation failed for prompt {prompt}, retrying {i + 1}/{retries}")
147 |             time.sleep(1)
148 |         else:
149 |             break
150 | 
151 |     if success:
152 |         exercises = generator_to_exercises(result.output)
153 |         update_progress()
154 |         return exercises
155 | 
156 |     else:
157 |         print(f"Generation failed for prompt {prompt}, skipping")
158 |         return [Exercise(problem=prompt, solution="")]
159 | 
160 | 
161 | def _generation_wrapper(
162 |     prompt: str,
163 |     get_generator: Callable[[], Generator],
164 |     update_progress: Callable,
165 |     save_dir: str,
166 |     retries: int,
167 | ):
168 |     file_path_sum = hashlib.md5(prompt.encode("utf-8")).hexdigest()
169 | 
170 |     dir_path, file_path = file_path_sum[:4], file_path_sum[4:]
171 |     dir_path = os.path.join(save_dir, dir_path)
172 |     file_path = os.path.join(dir_path, file_path + ".jsonl")
173 | 
174 |     if not os.path.exists(dir_path):
175 |         os.makedirs(dir_path)
176 | 
177 |     if os.path.exists(file_path):  # we don't regenerate each query
178 |         print(f"skip {file_path} generation because it already exist ")
179 |         return
180 | 
181 |     generator = get_generator()
182 | 
183 |     results = generation(prompt, generator, update_progress, retries)
184 | 
185 |     write_results_to_jsonl(file_path, results)
186 | 
187 | 
188 | def mass_generation(
189 |     prompts: List[str],
190 |     get_generator: Callable[[], Generator],
191 |     save_dir: str,
192 |     pool_size: int = 10,
193 |     retries: int = 10,
194 | ):
195 |     """
196 |     Generate from a list of prompts. Use a thread pool to parallelize the generation with catch and retry mechanism
197 |     """
198 |     with Progress(
199 |         *Progress.get_default_columns(),
200 |         "•",
201 |         TimeElapsedColumn(),
202 |         TextColumn("completion: [bold green]{task.fields[completion_tokens]}"),
203 |         TextColumn("prompt: [bold green]{task.fields[prompt_tokens]}"),
204 |     ) as progress:
205 |         with ThreadPoolExecutor(max_workers=pool_size) as executor:
206 |             progress_task = progress.add_task(
207 |                 "[red]Generating...",
208 |                 total=len(prompts),
209 |                 completion_tokens=0,
210 |                 prompt_tokens=0,
211 |             )
212 | 
213 |             def update_progress():
214 |                 progress.update(
215 |                     progress_task,
216 |                     advance=1,
217 |                     completion_tokens=COMPLETION_TOKENS_CNT,
218 |                     prompt_tokens=PROMPT_TOKENS_CNT,
219 |                 )
220 | 
221 |             tasks = []
222 | 
223 |             for prompt in prompts:
224 |                 tasks.append(
225 |                     executor.submit(
226 |                         _generation_wrapper,
227 |                         prompt,
228 |                         get_generator,
229 |                         update_progress,
230 |                         save_dir,
231 |                         retries,
232 |                     )
233 |                 )
234 | 
235 |             for task in tasks:
236 |                 try:
237 |                     task.result()
238 |                 except Exception as e:
239 |                     print(e)
240 | 
241 | 
242 | def load_prompts(file: str, key_prompt: str = "prompt") -> List[str]:
243 |     with open(file, "r") as f:
244 |         lines = f.readlines()
245 | 
246 |     prompts = [json.loads(line)[key_prompt] for line in lines]
247 |     return prompts
248 | 
249 | 
250 | def load_leaves(file: str) -> List[Topic]:
251 |     with open(file, "r") as f:
252 |         lines = json.load(f)
253 |     topics = [Topic.parse_obj(line) for line in lines]
254 |     return topics
255 | 
256 | 
257 | def write_results_to_jsonl(file_path: str, results: List[Exercise]):
258 |     with open(file_path, "w") as file:
259 |         for item in results:
260 |             json.dump(item.dict(), file)
261 |             file.write("\n")
262 | 


--------------------------------------------------------------------------------
/textbook/dataset_gen/tree/subsubtopics.json:
--------------------------------------------------------------------------------
1 | [{"topic": "Positive Integers", "mixing": 1, "parent": {"topic": "integers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Negative Integers", "mixing": 1, "parent": {"topic": "integers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Operations on Integers", "mixing": 1, "parent": {"topic": "integers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Number line representation", "mixing": 1, "parent": {"topic": "integers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Absolute value", "mixing": 1, "parent": {"topic": "integers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Introduction to floating-point numbers", "mixing": 1, "parent": {"topic": "floating-point numbers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Floating-point arithmetic operations", "mixing": 1, "parent": {"topic": "floating-point numbers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Representing floating-point numbers", "mixing": 1, "parent": {"topic": "floating-point numbers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Rounding and precision of floating-point numbers", "mixing": 1, "parent": {"topic": "floating-point numbers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Floating-point error and understanding epsilon", "mixing": 1, "parent": {"topic": "floating-point numbers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "String indexing", "mixing": 1, "parent": {"topic": "strings", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "String slicing", "mixing": 1, "parent": {"topic": "strings", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "String concatenation", "mixing": 1, "parent": {"topic": "strings", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "String methods", "mixing": 1, "parent": {"topic": "strings", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "String formatting", "mixing": 1, "parent": {"topic": "strings", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Introduction to booleans", "mixing": 1, "parent": {"topic": "booleans", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Boolean operations", "mixing": 1, "parent": {"topic": "booleans", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Conditional statements", "mixing": 1, "parent": {"topic": "booleans", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Boolean expressions in loops", "mixing": 1, "parent": {"topic": "booleans", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Boolean functions", "mixing": 1, "parent": {"topic": "booleans", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "indexing", "mixing": 1, "parent": {"topic": "lists", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "slicing", "mixing": 1, "parent": {"topic": "lists", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "updating", "mixing": 1, "parent": {"topic": "lists", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "built-in functions", "mixing": 1, "parent": {"topic": "lists", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "list comprehension", "mixing": 1, "parent": {"topic": "lists", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Creating a tuple", "mixing": 1, "parent": {"topic": "tuples", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Indexing and slicing tuples", "mixing": 1, "parent": {"topic": "tuples", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Tuple concatenation and repeating", "mixing": 1, "parent": {"topic": "tuples", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Tuple unpacking", "mixing": 1, "parent": {"topic": "tuples", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Comparing tuples", "mixing": 1, "parent": {"topic": "tuples", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Creating a dictionary", "mixing": 1, "parent": {"topic": "dictionaries", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Accessing dictionary items", "mixing": 1, "parent": {"topic": "dictionaries", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Modifying dictionary items", "mixing": 1, "parent": {"topic": "dictionaries", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Looping through a dictionary", "mixing": 1, "parent": {"topic": "dictionaries", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Dictionary methods", "mixing": 1, "parent": {"topic": "dictionaries", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Creating a set", "mixing": 1, "parent": {"topic": "sets", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Adding elements to a set", "mixing": 1, "parent": {"topic": "sets", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Removing elements from a set", "mixing": 1, "parent": {"topic": "sets", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Operations on sets", "mixing": 1, "parent": {"topic": "sets", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Conversion between sets and other data structures", "mixing": 1, "parent": {"topic": "sets", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Introduction to complex numbers", "mixing": 1, "parent": {"topic": "complex numbers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Representation of complex numbers in Python", "mixing": 1, "parent": {"topic": "complex numbers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Arithmetic operations with complex numbers", "mixing": 1, "parent": {"topic": "complex numbers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Complex conjugate and absolute value", "mixing": 1, "parent": {"topic": "complex numbers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Polar representation of complex numbers", "mixing": 1, "parent": {"topic": "complex numbers", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Introduction to None", "mixing": 1, "parent": {"topic": "None", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Comparison with other values", "mixing": 1, "parent": {"topic": "None", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "None in conditional statements", "mixing": 1, "parent": {"topic": "None", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Default values of variables", "mixing": 1, "parent": {"topic": "None", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "None as a placeholder", "mixing": 1, "parent": {"topic": "None", "mixing": 1, "parent": {"topic": " Python Data Types and Variables", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Addition", "mixing": 1, "parent": {"topic": "Arithmetic Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Subtraction", "mixing": 1, "parent": {"topic": "Arithmetic Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Multiplication", "mixing": 1, "parent": {"topic": "Arithmetic Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Division", "mixing": 1, "parent": {"topic": "Arithmetic Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Modulus", "mixing": 1, "parent": {"topic": "Arithmetic Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Simple Assignment Operator", "mixing": 1, "parent": {"topic": "Assignment Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Addition Assignment Operator", "mixing": 1, "parent": {"topic": "Assignment Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Subtraction Assignment Operator", "mixing": 1, "parent": {"topic": "Assignment Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Multiplication Assignment Operator", "mixing": 1, "parent": {"topic": "Assignment Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Division Assignment Operator", "mixing": 1, "parent": {"topic": "Assignment Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Equal", "mixing": 1, "parent": {"topic": "Comparison Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Not Equal", "mixing": 1, "parent": {"topic": "Comparison Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Greater Than", "mixing": 1, "parent": {"topic": "Comparison Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Less Than", "mixing": 1, "parent": {"topic": "Comparison Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Greater Than or Equal", "mixing": 1, "parent": {"topic": "Comparison Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "AND operator", "mixing": 1, "parent": {"topic": "Logical Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "OR operator", "mixing": 1, "parent": {"topic": "Logical Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "NOT operator", "mixing": 1, "parent": {"topic": "Logical Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested logical operators", "mixing": 1, "parent": {"topic": "Logical Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Precedence of logical operators", "mixing": 1, "parent": {"topic": "Logical Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "AND Operator", "mixing": 1, "parent": {"topic": "Bitwise Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "OR Operator", "mixing": 1, "parent": {"topic": "Bitwise Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "XOR Operator", "mixing": 1, "parent": {"topic": "Bitwise Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Complement Operator", "mixing": 1, "parent": {"topic": "Bitwise Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Left and Right Shift Operators", "mixing": 1, "parent": {"topic": "Bitwise Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "is Operator", "mixing": 1, "parent": {"topic": "Identity Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "is not Operator", "mixing": 1, "parent": {"topic": "Identity Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "in Operator", "mixing": 1, "parent": {"topic": "Identity Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "not in Operator", "mixing": 1, "parent": {"topic": "Identity Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Identity Comparison", "mixing": 1, "parent": {"topic": "Identity Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "in Operator", "mixing": 1, "parent": {"topic": "Membership Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "not in Operator", "mixing": 1, "parent": {"topic": "Membership Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Selection", "mixing": 1, "parent": {"topic": "Control Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Iteration", "mixing": 1, "parent": {"topic": "Control Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Conditional Statements", "mixing": 1, "parent": {"topic": "Control Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Looping Structures", "mixing": 1, "parent": {"topic": "Control Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Error Handling", "mixing": 1, "parent": {"topic": "Control Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Concatenation", "mixing": 1, "parent": {"topic": "String Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Repetition", "mixing": 1, "parent": {"topic": "String Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Membership", "mixing": 1, "parent": {"topic": "String Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Indexing", "mixing": 1, "parent": {"topic": "String Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Slicing", "mixing": 1, "parent": {"topic": "String Operators", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Syntax of the Ternary Operator", "mixing": 1, "parent": {"topic": "Ternary Operator", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Example: Unconditioned Ternary Operator", "mixing": 1, "parent": {"topic": "Ternary Operator", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested Ternary Operators", "mixing": 1, "parent": {"topic": "Ternary Operator", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Using Ternary Operator with Multiple Conditions", "mixing": 1, "parent": {"topic": "Ternary Operator", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Alternative to Ternary Operator", "mixing": 1, "parent": {"topic": "Ternary Operator", "mixing": 1, "parent": {"topic": " Python Basic Operators", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Basic Syntax", "mixing": 1, "parent": {"topic": "Sequential Execution", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Sequential Statements", "mixing": 1, "parent": {"topic": "Sequential Execution", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Branching Statements", "mixing": 1, "parent": {"topic": "Sequential Execution", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Looping Statements", "mixing": 1, "parent": {"topic": "Sequential Execution", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Function Calls", "mixing": 1, "parent": {"topic": "Sequential Execution", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "if statement", "mixing": 1, "parent": {"topic": "Conditionals", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "else statement", "mixing": 1, "parent": {"topic": "Conditionals", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "elif statement", "mixing": 1, "parent": {"topic": "Conditionals", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "nested if", "mixing": 1, "parent": {"topic": "Conditionals", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "short-circuiting", "mixing": 1, "parent": {"topic": "Conditionals", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "While Loops", "mixing": 1, "parent": {"topic": "Loops", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "For Loops", "mixing": 1, "parent": {"topic": "Loops", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested Loops", "mixing": 1, "parent": {"topic": "Loops", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Continue Statement", "mixing": 1, "parent": {"topic": "Loops", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Break Statement", "mixing": 1, "parent": {"topic": "Loops", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "if statements", "mixing": 1, "parent": {"topic": "If-else Statements", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "if-else statements", "mixing": 1, "parent": {"topic": "If-else Statements", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "if-elif-else statements", "mixing": 1, "parent": {"topic": "If-else Statements", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "nested if-else statements", "mixing": 1, "parent": {"topic": "If-else Statements", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "short-circuiting with if-else", "mixing": 1, "parent": {"topic": "If-else Statements", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Basic While Loop Syntax", "mixing": 1, "parent": {"topic": "While loop", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Loop Control Statements", "mixing": 1, "parent": {"topic": "While loop", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Infinite Loops", "mixing": 1, "parent": {"topic": "While loop", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested While Loops", "mixing": 1, "parent": {"topic": "While loop", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Common Use Cases for While Loop", "mixing": 1, "parent": {"topic": "While loop", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Iterating over a sequence", "mixing": 1, "parent": {"topic": "For loop", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Executing a block of code multiple times", "mixing": 1, "parent": {"topic": "For loop", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested loop", "mixing": 1, "parent": {"topic": "For loop", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Using break and continue statements", "mixing": 1, "parent": {"topic": "For loop", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Combining for loop with other functions", "mixing": 1, "parent": {"topic": "For loop", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Break Statement", "mixing": 1, "parent": {"topic": "Break and Continue", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested Loops", "mixing": 1, "parent": {"topic": "Break and Continue", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Loop Control", "mixing": 1, "parent": {"topic": "Break and Continue", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Continue Statement", "mixing": 1, "parent": {"topic": "Break and Continue", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Infinite Loops", "mixing": 1, "parent": {"topic": "Break and Continue", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested if statements", "mixing": 1, "parent": {"topic": "Nested Control Structures", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested for loops", "mixing": 1, "parent": {"topic": "Nested Control Structures", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested while loops", "mixing": 1, "parent": {"topic": "Nested Control Structures", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested conditional statements", "mixing": 1, "parent": {"topic": "Nested Control Structures", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested try-except statements", "mixing": 1, "parent": {"topic": "Nested Control Structures", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Iterating over a list", "mixing": 1, "parent": {"topic": "Control Statements with Lists", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Conditional statements with lists", "mixing": 1, "parent": {"topic": "Control Statements with Lists", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "List comprehension", "mixing": 1, "parent": {"topic": "Control Statements with Lists", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Using range function with lists", "mixing": 1, "parent": {"topic": "Control Statements with Lists", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested lists and control statements", "mixing": 1, "parent": {"topic": "Control Statements with Lists", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "if statement", "mixing": 1, "parent": {"topic": "Control Statements with Dictionaries", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "for loop", "mixing": 1, "parent": {"topic": "Control Statements with Dictionaries", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "while loop", "mixing": 1, "parent": {"topic": "Control Statements with Dictionaries", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "enumerate", "mixing": 1, "parent": {"topic": "Control Statements with Dictionaries", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "in statement", "mixing": 1, "parent": {"topic": "Control Statements with Dictionaries", "mixing": 1, "parent": {"topic": " Control Structures in Python", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Basic for loop", "mixing": 1, "parent": {"topic": "For loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested for loop", "mixing": 1, "parent": {"topic": "For loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Breaking out of a loop", "mixing": 1, "parent": {"topic": "For loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Using continue", "mixing": 1, "parent": {"topic": "For loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Iterating over a specific range", "mixing": 1, "parent": {"topic": "For loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Introduction to while loops", "mixing": 1, "parent": {"topic": "While loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested while loops", "mixing": 1, "parent": {"topic": "While loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Exiting while loops", "mixing": 1, "parent": {"topic": "While loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Infinite while loops", "mixing": 1, "parent": {"topic": "While loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": " Application to real-life scenarios", "mixing": 1, "parent": {"topic": "While loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Syntax of nested loops", "mixing": 1, "parent": {"topic": "Nested loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Executing nested loops", "mixing": 1, "parent": {"topic": "Nested loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Control flow within nested loops", "mixing": 1, "parent": {"topic": "Nested loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested loops with break and continue statements", "mixing": 1, "parent": {"topic": "Nested loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Applications and examples of nested loops", "mixing": 1, "parent": {"topic": "Nested loops", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "While Loop", "mixing": 1, "parent": {"topic": "Loop control statements", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "For Loop", "mixing": 1, "parent": {"topic": "Loop control statements", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Range Function", "mixing": 1, "parent": {"topic": "Loop control statements", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Continue statement", "mixing": 1, "parent": {"topic": "Loop control statements", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Break statement", "mixing": 1, "parent": {"topic": "Loop control statements", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Introduction to the range function", "mixing": 1, "parent": {"topic": "Range function", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Creating a range with start and stop parameters", "mixing": 1, "parent": {"topic": "Range function", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Harnessing the power of step parameter", "mixing": 1, "parent": {"topic": "Range function", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Understanding inclusive and exclusive Range", "mixing": 1, "parent": {"topic": "Range function", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Range with negative values", "mixing": 1, "parent": {"topic": "Range function", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Introduction", "mixing": 1, "parent": {"topic": "Enumerate function", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Syntax", "mixing": 1, "parent": {"topic": "Enumerate function", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Usage", "mixing": 1, "parent": {"topic": "Enumerate function", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Advantages", "mixing": 1, "parent": {"topic": "Enumerate function", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Examples", "mixing": 1, "parent": {"topic": "Enumerate function", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Introduction to loops", "mixing": 1, "parent": {"topic": "Using loops with lists", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Basic loop syntax", "mixing": 1, "parent": {"topic": "Using loops with lists", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Looping through lists", "mixing": 1, "parent": {"topic": "Using loops with lists", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Modifying lists with loops", "mixing": 1, "parent": {"topic": "Using loops with lists", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested loops", "mixing": 1, "parent": {"topic": "Using loops with lists", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Using for loop with strings", "mixing": 1, "parent": {"topic": "Using loops with strings", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Using while loop with strings", "mixing": 1, "parent": {"topic": "Using loops with strings", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Iterating over each character of a string", "mixing": 1, "parent": {"topic": "Using loops with strings", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Finding characters in a string using loops", "mixing": 1, "parent": {"topic": "Using loops with strings", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "/manipulating a string using loops", "mixing": 1, "parent": {"topic": "Using loops with strings", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Iterating over dictionary", "mixing": 1, "parent": {"topic": "Using loops with dictionaries", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Accessing values in dictionary", "mixing": 1, "parent": {"topic": "Using loops with dictionaries", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Modifying values in dictionary", "mixing": 1, "parent": {"topic": "Using loops with dictionaries", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Using nested dictionaries with loops", "mixing": 1, "parent": {"topic": "Using loops with dictionaries", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Applying conditional statements with loops and dictionaries", "mixing": 1, "parent": {"topic": "Using loops with dictionaries", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Iterating through sets", "mixing": 1, "parent": {"topic": "Using loops with sets", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Looping through sets with for loop", "mixing": 1, "parent": {"topic": "Using loops with sets", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Using set comprehension", "mixing": 1, "parent": {"topic": "Using loops with sets", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Practical application of loops with sets", "mixing": 1, "parent": {"topic": "Using loops with sets", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested loops with sets", "mixing": 1, "parent": {"topic": "Using loops with sets", "mixing": 1, "parent": {"topic": " Python Loops", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Lists", "mixing": 1, "parent": {"topic": "Introduction to Python Data Structures", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Tuples", "mixing": 1, "parent": {"topic": "Introduction to Python Data Structures", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Sets", "mixing": 1, "parent": {"topic": "Introduction to Python Data Structures", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Dictionaries", "mixing": 1, "parent": {"topic": "Introduction to Python Data Structures", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Arrays", "mixing": 1, "parent": {"topic": "Introduction to Python Data Structures", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Creating a list", "mixing": 1, "parent": {"topic": "Lists", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Accessing elements in a list", "mixing": 1, "parent": {"topic": "Lists", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Modifying elements in a list", "mixing": 1, "parent": {"topic": "Lists", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "List methods", "mixing": 1, "parent": {"topic": "Lists", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "List slicing", "mixing": 1, "parent": {"topic": "Lists", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Creating empty lists", "mixing": 1, "parent": {"topic": "Creating Lists", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Creating lists with initial elements", "mixing": 1, "parent": {"topic": "Creating Lists", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Appending elements to a list", "mixing": 1, "parent": {"topic": "Creating Lists", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "List comprehension", "mixing": 1, "parent": {"topic": "Creating Lists", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Copying lists", "mixing": 1, "parent": {"topic": "Creating Lists", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Indexing", "mixing": 1, "parent": {"topic": "Accessing List Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Slicing", "mixing": 1, "parent": {"topic": "Accessing List Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Negative indexing", "mixing": 1, "parent": {"topic": "Accessing List Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Working with nested lists", "mixing": 1, "parent": {"topic": "Accessing List Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Accessing elements with nested index", "mixing": 1, "parent": {"topic": "Accessing List Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Modifying Elements using Indexing", "mixing": 1, "parent": {"topic": "Modifying List Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Appending and Removing Elements", "mixing": 1, "parent": {"topic": "Modifying List Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Modifying Elements using List Methods", "mixing": 1, "parent": {"topic": "Modifying List Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Modifying Elements using Slicing", "mixing": 1, "parent": {"topic": "Modifying List Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Modifying Elements using List Comprehensions", "mixing": 1, "parent": {"topic": "Modifying List Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Accessing elements of a list", "mixing": 1, "parent": {"topic": "List Operations", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Modifying elements of a list", "mixing": 1, "parent": {"topic": "List Operations", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Appending elements to a list", "mixing": 1, "parent": {"topic": "List Operations", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Removing elements from a list", "mixing": 1, "parent": {"topic": "List Operations", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Slicing a list", "mixing": 1, "parent": {"topic": "List Operations", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "1. Creating a Tuple", "mixing": 1, "parent": {"topic": "Tuples", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "2. Accessing Tuple Elements", "mixing": 1, "parent": {"topic": "Tuples", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "3. Modifying Tuples", "mixing": 1, "parent": {"topic": "Tuples", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "4. Tuple Methods", "mixing": 1, "parent": {"topic": "Tuples", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "5. Looping Through Tuples", "mixing": 1, "parent": {"topic": "Tuples", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "What is a tuple?", "mixing": 1, "parent": {"topic": "Creating Tuples", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Creating a tuple using parentheses", "mixing": 1, "parent": {"topic": "Creating Tuples", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Creating a tuple using the tuple() function", "mixing": 1, "parent": {"topic": "Creating Tuples", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Creating a tuple from a range of values", "mixing": 1, "parent": {"topic": "Creating Tuples", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Creating a tuple from a string", "mixing": 1, "parent": {"topic": "Creating Tuples", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Indexing Tuples", "mixing": 1, "parent": {"topic": "Accessing Tuple Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Slicing Tuples", "mixing": 1, "parent": {"topic": "Accessing Tuple Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Method: index()", "mixing": 1, "parent": {"topic": "Accessing Tuple Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Method: count()", "mixing": 1, "parent": {"topic": "Accessing Tuple Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Nested Tuples", "mixing": 1, "parent": {"topic": "Accessing Tuple Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Changing individual elements", "mixing": 1, "parent": {"topic": "Modifying Tuple Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Appending new elements", "mixing": 1, "parent": {"topic": "Modifying Tuple Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Deleting elements", "mixing": 1, "parent": {"topic": "Modifying Tuple Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Slicing and replacing elements", "mixing": 1, "parent": {"topic": "Modifying Tuple Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}, {"topic": "Iterating and modifying elements", "mixing": 1, "parent": {"topic": "Modifying Tuple Elements", "mixing": 1, "parent": {"topic": " Python Data Structures: Lists and Tuples", "mixing": 1, "parent": {"topic": "Python", "mixing": 1, "parent": null}}}}]


--------------------------------------------------------------------------------