├── DEAP.pdf ├── application ├── img │ ├── CV.png │ ├── DT.png │ ├── Fox.png │ ├── NN.png │ ├── Fox2.png │ ├── Parallel.png │ └── greywolfga.jpg ├── theoretical_analysis.ipynb ├── multiobjective-sr.ipynb ├── alpha_dominance_mogp.py ├── symbolic-regression.ipynb ├── automatically-design-de-operators.ipynb └── TSP.ipynb ├── tricks ├── img │ └── async_parallel_graph.png ├── multiprocess_speedup.py ├── multiprocess_speedup.md ├── numpy-speedup.ipynb ├── compiler-speedup.ipynb ├── numba-lexicase-selection.ipynb └── numpy_speedup_sr.py ├── README.md ├── .gitignore └── operator ├── lexicase-selection.ipynb ├── crossover.ipynb └── varor-varand.ipynb /DEAP.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hengzhe-zhang/DEAP-GP-Tutorial/HEAD/DEAP.pdf -------------------------------------------------------------------------------- /application/img/CV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hengzhe-zhang/DEAP-GP-Tutorial/HEAD/application/img/CV.png -------------------------------------------------------------------------------- /application/img/DT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hengzhe-zhang/DEAP-GP-Tutorial/HEAD/application/img/DT.png -------------------------------------------------------------------------------- /application/img/Fox.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hengzhe-zhang/DEAP-GP-Tutorial/HEAD/application/img/Fox.png -------------------------------------------------------------------------------- /application/img/NN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hengzhe-zhang/DEAP-GP-Tutorial/HEAD/application/img/NN.png -------------------------------------------------------------------------------- /application/img/Fox2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hengzhe-zhang/DEAP-GP-Tutorial/HEAD/application/img/Fox2.png -------------------------------------------------------------------------------- /application/img/Parallel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hengzhe-zhang/DEAP-GP-Tutorial/HEAD/application/img/Parallel.png -------------------------------------------------------------------------------- /application/img/greywolfga.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hengzhe-zhang/DEAP-GP-Tutorial/HEAD/application/img/greywolfga.jpg -------------------------------------------------------------------------------- /tricks/img/async_parallel_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hengzhe-zhang/DEAP-GP-Tutorial/HEAD/tricks/img/async_parallel_graph.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 基于DEAP的遗传编程系列教程 2 | 3 | 本系列教程主要介绍如何基于DEAP实现一些流行的遗传编程概念,包括: 4 | 5 | * 单树GP 6 | * 多树GP 7 | * 多目标GP 8 | * 集成学习 9 | * 启发式算法生成 10 | 11 | 上述概念通过以下示例实现: 12 | 13 | 1. [基于单树GP的符号回归(Symbolic Regression)](application/symbolic-regression.ipynb) 14 | 2. [基于多树GP的特征工程(Feature Construction)](application/feature-construction.ipynb) 15 | 3. [基于多目标GP的符号回归 (Multi-Objective Symbolic Regression)](application/multiobjective-sr.ipynb) 16 | 4. [基于GP的集成学习(Ensemble Learning)](application/ensemble-learning.ipynb) 17 | 5. [基于GP的旅行商问题规则生成(TSP)](application/TSP.ipynb) 18 | 6. [为什么使用GP而不是神经网络?(Feature Construction)](application/cross-validation-score.ipynb) 19 | 6. [基于GP自动设计优化算法](application/automatically-design-de-operators.ipynb) 20 | 7. [基于不同算子集的多树GP](application/multisets_gp.ipynb) 21 | 22 | 同时,本教程包含了一些工程技巧: 23 | 24 | 1. [基于Numpy实现向量化加速](tricks/numpy-speedup.ipynb) 25 | 2. [基于PyTorch实现GPU加速](tricks/pytorch-speedup.ipynb) 26 | 3. [基于手动编写编译器实现加速](tricks/compiler-speedup.ipynb) 27 | 4. [基于Numba实现Lexicase Selection加速](tricks/numba-lexicase-selection.ipynb) 28 | 5. [基于多进程实现异步并行评估](tricks/multiprocess_speedup.md) 29 | 6. [基于sklearn接口的Numpy加速符号回归](tricks/numpy_speedup_sr.py) 30 | 31 | 此外,DEAP还有一些注意事项: 32 | 33 | 1. [VarAnd和VarOr算子的使用注意事项](operator/varor-varand.ipynb) 34 | 2. [Crossover算子的注意事项](operator/crossover.ipynb) 35 | 2. [Lexicase Selection算子的注意事项](operator/lexicase-selection.ipynb) 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | .idea 3 | __pycache__/ 4 | application/catboost_info 5 | application/kaggle.ipynb 6 | application/data 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # dotenv 88 | .env 89 | 90 | # virtualenv 91 | .venv 92 | venv/ 93 | ENV/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | # IDE settings 109 | .vscode/ 110 | -------------------------------------------------------------------------------- /application/theoretical_analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "### 北极狐算法是什么?\n", 7 | "在文章《基于遗传编程自动设计优化算法(自动发现北极狐优化算法)》中,我们利用遗传编程算法在五分钟内发现了一个新的优化算法,$X_{\\text{new}} = X + (F \\cdot X - F)$, 命名为北极狐算法。\n", 8 | "众所周知,在设计算法的时候,我们不仅仅希望取得良好的实验效果,还希望设计出的算法有理论保证。那么,我们是否可以证明北极狐算法一定收敛到全局最优呢?\n", 9 | "\n", 10 | "### 北极狐算法的收敛性\n", 11 | "**假设:**\n", 12 | "- $ \\mathcal{S} \\subset \\mathbb{R}^n $ 是一个有界搜索空间,其中包含全局最优解 $ X^* $。\n", 13 | "- $ F $ 是从某个分布中抽取的随机扰动向量,$ F $ 的取值范围是整个搜索空间 $\\mathcal{S}$。\n", 14 | "\n", 15 | "**证明:**\n", 16 | "定义 $ A_\\epsilon $ 为 $ X^* $ 的 $\\epsilon$-邻域,即 $ A_\\epsilon = \\{ X \\in \\mathcal{S} : \\|X - X^*\\| < \\epsilon \\} $。设 $ P_\\epsilon $ 为一次操作中 $X_{\\text{new}} = X + (F \\cdot X - F)$ 落入 $ A_\\epsilon $ 的概率。因为 $ F $ 能覆盖整个搜索空间,所以即便对于足够小的 $ \\epsilon $,依然可以得到$ P_\\epsilon > 0 $。\n", 17 | "\n", 18 | "在 $ N $ 次迭代中,$X_{\\text{new}}$ 至少一次落入 $ A_\\epsilon $ 的概率是 $ 1 - (1 - P_\\epsilon)^N $。使用极限,我们可以表示这个概率在 $ N $ 趋向无穷大时的行为:\n", 19 | "\n", 20 | "$$ \\lim_{N \\to \\infty} \\left[ 1 - (1 - P_\\epsilon)^N \\right] = 1 $$\n", 21 | "\n", 22 | "这个极限表达了随着迭代次数 $ N $ 的增加,$X_{\\text{new}}$ 至少一次落入 $ X^* $ 的 $\\epsilon$-邻域的概率趋近于 1。\n", 23 | "\n", 24 | "### 结论:\n", 25 | "现在,我们证明了北极狐算法 $X_{\\text{new}} = X + (F \\cdot X - F)$ 在无限次迭代的情况下,能够以概率 1 接近全局最优解 $X^*$。" 26 | ], 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "id": "b6c87f3572f1a7aa" 31 | }, 32 | { 33 | "cell_type": "code", 34 | "outputs": [], 35 | "source": [], 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "id": "e64e0268ee5ee8f7" 40 | } 41 | ], 42 | "metadata": { 43 | "kernelspec": { 44 | "display_name": "Python 3", 45 | "language": "python", 46 | "name": "python3" 47 | }, 48 | "language_info": { 49 | "codemirror_mode": { 50 | "name": "ipython", 51 | "version": 2 52 | }, 53 | "file_extension": ".py", 54 | "mimetype": "text/x-python", 55 | "name": "python", 56 | "nbconvert_exporter": "python", 57 | "pygments_lexer": "ipython2", 58 | "version": "2.7.6" 59 | } 60 | }, 61 | "nbformat": 4, 62 | "nbformat_minor": 5 63 | } 64 | -------------------------------------------------------------------------------- /operator/lexicase-selection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "## Lexicase Selection注意事项\n", 7 | "\n", 8 | "对于Lexicase Selection,适应度评估需要更改为返回多个误差组成的向量,而不是均方误差(MSE)。这样,Lexicase Selection才能独立考虑每个个体在每个测试样本上的表现,从而提高选择的多样性。" 9 | ], 10 | "metadata": { 11 | "collapsed": false 12 | }, 13 | "id": "ff6050dfa4dc1b6" 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 7, 18 | "source": [ 19 | "import numpy as np\n", 20 | "import math\n", 21 | "import operator\n", 22 | "\n", 23 | "from deap import base, creator, tools, gp\n", 24 | "\n", 25 | "\n", 26 | "# 符号回归\n", 27 | "def evalSymbReg(individual, pset):\n", 28 | " # 编译GP树为函数\n", 29 | " func = gp.compile(expr=individual, pset=pset)\n", 30 | " \n", 31 | " # 使用numpy创建一个向量\n", 32 | " x = np.linspace(-10, 10, 100) \n", 33 | " \n", 34 | " return tuple((func(x) - x**2)**2)\n", 35 | "\n", 36 | "\n", 37 | "# 创建个体和适应度函数,适应度数组大小与数据量相同\n", 38 | "creator.create(\"FitnessMin\", base.Fitness, weights=(-1.0,) * 100) # 假设我们有20个数据点\n", 39 | "creator.create(\"Individual\", gp.PrimitiveTree, fitness=creator.FitnessMin)" 40 | ], 41 | "metadata": { 42 | "collapsed": false, 43 | "ExecuteTime": { 44 | "end_time": "2023-11-07T09:06:58.369619300Z", 45 | "start_time": "2023-11-07T09:06:58.365066500Z" 46 | } 47 | }, 48 | "id": "59cfefc0467c74ad", 49 | "outputs": [] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "source": [ 54 | "### 遗传算子\n", 55 | "选择算子需要改成Lexicase Selection,其他不需要改变。对于回归问题,需要使用AutomaticEpsilonLexicase。而对于分类问题,则使用Lexicase即可。" 56 | ], 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "id": "956e01e17271daa6" 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 8, 65 | "source": [ 66 | "import random\n", 67 | "\n", 68 | "# 定义函数集合和终端集合\n", 69 | "pset = gp.PrimitiveSet(\"MAIN\", arity=1)\n", 70 | "pset.addPrimitive(operator.add, 2)\n", 71 | "pset.addPrimitive(operator.sub, 2)\n", 72 | "pset.addPrimitive(operator.mul, 2)\n", 73 | "pset.addPrimitive(operator.neg, 1)\n", 74 | "pset.addEphemeralConstant(\"rand101\", lambda: random.randint(-1, 1))\n", 75 | "pset.renameArguments(ARG0='x')\n", 76 | "\n", 77 | "# 定义遗传编程操作\n", 78 | "toolbox = base.Toolbox()\n", 79 | "toolbox.register(\"expr\", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)\n", 80 | "toolbox.register(\"individual\", tools.initIterate, creator.Individual, toolbox.expr)\n", 81 | "toolbox.register(\"population\", tools.initRepeat, list, toolbox.individual)\n", 82 | "toolbox.register(\"compile\", gp.compile, pset=pset)\n", 83 | "toolbox.register(\"evaluate\", evalSymbReg, pset=pset)\n", 84 | "toolbox.register(\"select\", tools.selAutomaticEpsilonLexicase)\n", 85 | "toolbox.register(\"mate\", gp.cxOnePoint)\n", 86 | "toolbox.register(\"mutate\", gp.mutUniform, expr=toolbox.expr, pset=pset)" 87 | ], 88 | "metadata": { 89 | "collapsed": false, 90 | "ExecuteTime": { 91 | "end_time": "2023-11-07T09:06:58.378447200Z", 92 | "start_time": "2023-11-07T09:06:58.370620700Z" 93 | } 94 | }, 95 | "id": "851794d4d36e3681", 96 | "outputs": [] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 9, 101 | "source": [ 102 | "import numpy\n", 103 | "from deap import algorithms\n", 104 | "\n", 105 | "# 定义统计指标\n", 106 | "stats_fit = tools.Statistics(lambda ind: ind.fitness.values)\n", 107 | "stats_size = tools.Statistics(len)\n", 108 | "mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)\n", 109 | "mstats.register(\"avg\", numpy.mean)\n", 110 | "mstats.register(\"std\", numpy.std)\n", 111 | "mstats.register(\"min\", numpy.min)\n", 112 | "mstats.register(\"max\", numpy.max)\n", 113 | "\n", 114 | "# 使用默认算法\n", 115 | "population = toolbox.population(n=20)\n", 116 | "hof = tools.HallOfFame(1)\n", 117 | "pop, log = algorithms.eaSimple(population=population,\n", 118 | " toolbox=toolbox, cxpb=0.5, mutpb=0.2, ngen=20, stats=mstats, halloffame=hof, verbose=True)\n", 119 | "print(str(hof[0]))\n" 120 | ], 121 | "metadata": { 122 | "collapsed": false, 123 | "ExecuteTime": { 124 | "end_time": "2023-11-07T09:07:09.006767300Z", 125 | "start_time": "2023-11-07T09:06:58.377448600Z" 126 | } 127 | }, 128 | "id": "515b587d4f8876ea", 129 | "outputs": [] 130 | } 131 | ], 132 | "metadata": { 133 | "kernelspec": { 134 | "display_name": "Python 3", 135 | "language": "python", 136 | "name": "python3" 137 | }, 138 | "language_info": { 139 | "codemirror_mode": { 140 | "name": "ipython", 141 | "version": 2 142 | }, 143 | "file_extension": ".py", 144 | "mimetype": "text/x-python", 145 | "name": "python", 146 | "nbconvert_exporter": "python", 147 | "pygments_lexer": "ipython2", 148 | "version": "2.7.6" 149 | } 150 | }, 151 | "nbformat": 4, 152 | "nbformat_minor": 5 153 | } 154 | -------------------------------------------------------------------------------- /tricks/multiprocess_speedup.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import random 3 | import time 4 | 5 | import numpy 6 | import numpy as np 7 | from deap import base, creator, gp 8 | from deap import tools 9 | from deap.algorithms import varAnd, eaSimple 10 | from deap.tools import selBest 11 | 12 | # 使用numpy创建一个数据集 13 | x = np.linspace(-10, 10, 1000000) 14 | 15 | 16 | # 符号回归 17 | def evalSymbReg(ind): 18 | func = toolbox.compile(ind) 19 | # 评估生成的函数并计算MSE 20 | mse = np.mean((func(x) - (x + 1) ** 2) ** 2) 21 | return (mse,) 22 | 23 | 24 | # 创建个体和适应度函数 25 | creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) 26 | creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) 27 | 28 | # 定义函数和终端变量 29 | pset = gp.PrimitiveSet("MAIN", arity=1) 30 | pset.addPrimitive(np.add, 2) 31 | pset.addPrimitive(np.subtract, 2) 32 | pset.addPrimitive(np.multiply, 2) 33 | pset.addPrimitive(np.negative, 1) 34 | 35 | 36 | def random_int(): 37 | return random.randint(-1, 1) 38 | 39 | 40 | pset.addEphemeralConstant("rand101", random_int) 41 | pset.renameArguments(ARG0="x") 42 | 43 | # 定义遗传编程操作 44 | toolbox = base.Toolbox() 45 | toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=2, max_=6) 46 | toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr) 47 | toolbox.register("population", tools.initRepeat, list, toolbox.individual) 48 | toolbox.register("compile", gp.compile, pset=pset) 49 | toolbox.register("evaluate", evalSymbReg) 50 | toolbox.register("select", tools.selTournament, tournsize=3) 51 | toolbox.register("mate", gp.cxOnePoint) 52 | toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr, pset=pset) 53 | 54 | # 定义统计指标 55 | stats_fit = tools.Statistics(lambda ind: ind.fitness.values) 56 | stats_size = tools.Statistics(len) 57 | mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size) 58 | mstats.register("avg", numpy.mean) 59 | mstats.register("std", numpy.std) 60 | mstats.register("min", numpy.min) 61 | mstats.register("max", numpy.max) 62 | 63 | def steady_state_gp( 64 | population, 65 | toolbox, 66 | cxpb, 67 | mutpb, 68 | max_evaluations, 69 | stats=None, 70 | halloffame=None, 71 | verbose=__debug__, 72 | ): 73 | logbook = tools.Logbook() 74 | logbook.header = ["evals", "nevals"] + (stats.fields if stats else []) 75 | 76 | executor = concurrent.futures.ProcessPoolExecutor(max_workers=4) 77 | futures = {} 78 | evaluations =0 79 | 80 | # 评估初始种群 81 | for i, ind in enumerate(population): 82 | if not ind.fitness.valid: 83 | future = executor.submit(toolbox.evaluate, ind) 84 | futures[future] = ind 85 | 86 | all_done = population 87 | 88 | while evaluations < max_evaluations: 89 | # 生成新个体 90 | if evaluations + len(futures) <= max_evaluations and len(all_done) >= 2: 91 | selected = toolbox.select(population, len(all_done)) 92 | offspring = varAnd(selected, toolbox, cxpb, mutpb) 93 | all_done = [] 94 | 95 | # 提交评估任务 96 | for child in offspring: 97 | if evaluations + len(futures) <= max_evaluations: 98 | future = executor.submit(toolbox.evaluate, child) 99 | futures[future] = child 100 | else: 101 | break 102 | 103 | # 等待至少一个个体完成评估 104 | current_done, _ = concurrent.futures.wait( 105 | list(futures.keys()), 106 | return_when=concurrent.futures.FIRST_COMPLETED, 107 | ) 108 | 109 | # 处理评估完成的个体 110 | done_inds = [] 111 | for future in current_done: 112 | ind = futures.pop(future) 113 | ind.fitness.values = future.result() 114 | done_inds.append(ind) 115 | all_done.append(ind) 116 | evaluations += 1 117 | 118 | if halloffame is not None: 119 | halloffame.update(done_inds) 120 | 121 | # 用高适应度个体替换低适应度个体 122 | population = selBest(population + done_inds, len(population)) 123 | 124 | if verbose and evaluations % 100 == 0: 125 | record = stats.compile(population) if stats else {} 126 | logbook.record(evals=evaluations, **record) 127 | print(logbook.stream) 128 | 129 | executor.shutdown() 130 | return population, logbook 131 | 132 | 133 | if __name__ == "__main__": 134 | start = time.time() 135 | population = toolbox.population(n=100) 136 | hof = tools.HallOfFame(1) 137 | pop, log = steady_state_gp( 138 | population=population, 139 | toolbox=toolbox, 140 | cxpb=0.9, 141 | mutpb=0.1, 142 | max_evaluations=(5+1) * 100, 143 | stats=mstats, 144 | halloffame=hof, 145 | verbose=True, 146 | ) 147 | end = time.time() 148 | print("time:", end - start) 149 | 150 | start = time.time() 151 | population = toolbox.population(n=100) 152 | hof = tools.HallOfFame(1) 153 | pop, log = eaSimple( 154 | population=population, 155 | toolbox=toolbox, 156 | cxpb=0.9, 157 | mutpb=0.1, 158 | ngen=5, 159 | stats=mstats, 160 | halloffame=hof, 161 | verbose=True, 162 | ) 163 | end = time.time() 164 | print("time:", end - start) -------------------------------------------------------------------------------- /tricks/multiprocess_speedup.md: -------------------------------------------------------------------------------- 1 | ### 动机 2 | 遗传编程在并行评估的过程中,存在一个瓶颈即快的个体需要等待慢的个体评估完成才能进入下一代,导致整体CPU利用率不高。 3 | 4 | 实际上,在演化计算中,评估是可以异步执行的。 5 | 6 | 遗传编程通常使用的编程范式是Generation-based,即每一代的个体需要等待所有个体评估完成才能进入下一代。 7 | 8 | **但是,我们也可以使用Steady-state-based的编程范式,即每一个个体都是异步评估的,从而提高CPU利用率。** 9 | 10 | 这里,我将通过一个简单的例子,来展示出一下如何使用基于Python实现遗传编程的并行评估。 11 | 12 | 13 | ```python 14 | import concurrent.futures 15 | import random 16 | import time 17 | 18 | import numpy 19 | import numpy as np 20 | from deap import base, creator, gp 21 | from deap import tools 22 | from deap.algorithms import varAnd, eaSimple 23 | from deap.tools import selBest 24 | 25 | # 使用numpy创建一个数据集 26 | x = np.linspace(-10, 10, 1000000) 27 | 28 | 29 | # 符号回归 30 | def evalSymbReg(ind): 31 | func = toolbox.compile(ind) 32 | # 评估生成的函数并计算MSE 33 | mse = np.mean((func(x) - (x + 1) ** 2) ** 2) 34 | return (mse,) 35 | 36 | 37 | # 创建个体和适应度函数 38 | creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) 39 | creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) 40 | 41 | # 定义函数和终端变量 42 | pset = gp.PrimitiveSet("MAIN", arity=1) 43 | pset.addPrimitive(np.add, 2) 44 | pset.addPrimitive(np.subtract, 2) 45 | pset.addPrimitive(np.multiply, 2) 46 | pset.addPrimitive(np.negative, 1) 47 | 48 | 49 | def random_int(): 50 | return random.randint(-1, 1) 51 | 52 | 53 | pset.addEphemeralConstant("rand101", random_int) 54 | pset.renameArguments(ARG0="x") 55 | 56 | # 定义遗传编程操作 57 | toolbox = base.Toolbox() 58 | toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=2, max_=6) 59 | toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr) 60 | toolbox.register("population", tools.initRepeat, list, toolbox.individual) 61 | toolbox.register("compile", gp.compile, pset=pset) 62 | toolbox.register("evaluate", evalSymbReg) 63 | toolbox.register("select", tools.selTournament, tournsize=3) 64 | toolbox.register("mate", gp.cxOnePoint) 65 | toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr, pset=pset) 66 | 67 | # 定义统计指标 68 | stats_fit = tools.Statistics(lambda ind: ind.fitness.values) 69 | stats_size = tools.Statistics(len) 70 | mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size) 71 | mstats.register("avg", numpy.mean) 72 | mstats.register("std", numpy.std) 73 | mstats.register("min", numpy.min) 74 | mstats.register("max", numpy.max) 75 | ``` 76 | 77 | ### 异步并行 78 | 在本教程中,异步并行处理是通过ProcessPoolExecutor实现的。 79 | 简单来说,原理是创建一个进程池,然后将评估任务提交给进程池,进程池会自动分配任务给空闲的进程,如下图所示。 80 | 81 | ![异步评估](img/async_parallel_graph.png) 82 | 83 | 当任何一个进程完成评估任务时,我们可以获取其结果。如果新个体的适应度好于种群中的最差个体,我们可以将其加入种群,替换掉最差个体。 84 | 当至少两个任务完成时,我们可以开始下一代的演化。实际上,开始下一代演化的条件是一个可以调节的参数,这里设置的越小,CPU利用率越高。 85 | 86 | ```python 87 | def steady_state_gp( 88 | population, 89 | toolbox, 90 | cxpb, 91 | mutpb, 92 | max_evaluations, 93 | stats=None, 94 | halloffame=None, 95 | verbose=__debug__, 96 | ): 97 | logbook = tools.Logbook() 98 | logbook.header = ["evals", "nevals"] + (stats.fields if stats else []) 99 | 100 | executor = concurrent.futures.ProcessPoolExecutor(max_workers=4) 101 | futures = {} 102 | evaluations =0 103 | 104 | # 评估初始种群 105 | for i, ind in enumerate(population): 106 | if not ind.fitness.valid: 107 | future = executor.submit(toolbox.evaluate, ind) 108 | futures[future] = ind 109 | 110 | all_done = population 111 | 112 | while evaluations < max_evaluations: 113 | # 生成新个体 114 | if evaluations + len(futures) <= max_evaluations and len(all_done) >= 2: 115 | selected = toolbox.select(population, len(all_done)) 116 | offspring = varAnd(selected, toolbox, cxpb, mutpb) 117 | all_done = [] 118 | 119 | # 提交评估任务 120 | for child in offspring: 121 | if evaluations + len(futures) <= max_evaluations: 122 | future = executor.submit(toolbox.evaluate, child) 123 | futures[future] = child 124 | else: 125 | break 126 | 127 | # 等待至少一个个体完成评估 128 | current_done, _ = concurrent.futures.wait( 129 | list(futures.keys()), 130 | return_when=concurrent.futures.FIRST_COMPLETED, 131 | ) 132 | 133 | # 处理评估完成的个体 134 | done_inds = [] 135 | for future in current_done: 136 | ind = futures.pop(future) 137 | ind.fitness.values = future.result() 138 | done_inds.append(ind) 139 | all_done.append(ind) 140 | evaluations += 1 141 | 142 | if halloffame is not None: 143 | halloffame.update(done_inds) 144 | 145 | # 用高适应度个体替换低适应度个体 146 | population = selBest(population + done_inds, len(population)) 147 | 148 | if verbose and evaluations % 100 == 0: 149 | record = stats.compile(population) if stats else {} 150 | logbook.record(evals=evaluations, **record) 151 | print(logbook.stream) 152 | 153 | executor.shutdown() 154 | return population, logbook 155 | ``` 156 | 157 | ### 结果测试 158 | 最后,我们可以测试一下异步并行和传统的串行遗传编程算法的区别。 159 | 从下面的测试结果可以看出,异步并行的遗传编程算法的速度要快于传统的串行遗传编程算法。 160 | 异步并行的遗传编程算法只消耗了10秒,而传统的串行遗传编程算法消耗了16秒。 161 | 162 | ```python 163 | start = time.time() 164 | population = toolbox.population(n=100) 165 | hof = tools.HallOfFame(1) 166 | pop, log = steady_state_gp( 167 | population=population, 168 | toolbox=toolbox, 169 | cxpb=0.9, 170 | mutpb=0.1, 171 | max_evaluations=(5+1) * 100, 172 | stats=mstats, 173 | halloffame=hof, 174 | verbose=True, 175 | ) 176 | end = time.time() 177 | print("time:", end - start) 178 | 179 | start = time.time() 180 | population = toolbox.population(n=100) 181 | hof = tools.HallOfFame(1) 182 | pop, log = eaSimple( 183 | population=population, 184 | toolbox=toolbox, 185 | cxpb=0.9, 186 | mutpb=0.1, 187 | ngen=5, 188 | stats=mstats, 189 | halloffame=hof, 190 | verbose=True, 191 | ) 192 | end = time.time() 193 | print("time:", end - start) 194 | ``` -------------------------------------------------------------------------------- /operator/crossover.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "### Crossover算子\n", 7 | "值得一提的是,DEAP中GP默认实现的Crossover算子不考虑根节点。因此,如果要按照GP的原始论文实现,需要稍作修改。" 8 | ], 9 | "metadata": { 10 | "collapsed": false 11 | }, 12 | "id": "8db4ada5ce6ebf73" 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "source": [ 18 | "import time\n", 19 | "\n", 20 | "import numpy as np\n", 21 | "from deap import base, creator, tools, gp\n", 22 | "\n", 23 | "\n", 24 | "# 符号回归\n", 25 | "def evalSymbReg(individual, pset):\n", 26 | " # 编译GP树为函数\n", 27 | " func = gp.compile(expr=individual, pset=pset)\n", 28 | " \n", 29 | " # 使用numpy创建一个向量\n", 30 | " x = np.linspace(-10, 10, 100) \n", 31 | " \n", 32 | " # 评估生成的函数并计算MSE\n", 33 | " mse = np.mean((func(x) - x**2)**2)\n", 34 | " \n", 35 | " return (mse,)\n", 36 | "\n", 37 | "# 创建个体和适应度函数\n", 38 | "creator.create(\"FitnessMin\", base.Fitness, weights=(-1.0,))\n", 39 | "creator.create(\"Individual\", gp.PrimitiveTree, fitness=creator.FitnessMin)\n", 40 | "\n" 41 | ], 42 | "metadata": { 43 | "collapsed": false, 44 | "ExecuteTime": { 45 | "end_time": "2023-11-07T08:49:09.672369400Z", 46 | "start_time": "2023-11-07T08:49:09.564823400Z" 47 | } 48 | }, 49 | "id": "initial_id", 50 | "outputs": [] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "source": [ 55 | "具体来说,需要修改交叉点的取值范围,以包括根节点。" 56 | ], 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "id": "e3d94e424b58af5a" 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 2, 65 | "source": [ 66 | "\n", 67 | "from collections import defaultdict\n", 68 | "\n", 69 | "__type__ = object\n", 70 | "\n", 71 | "def cxOnePoint(ind1, ind2):\n", 72 | " # List all available primitive types in each individual\n", 73 | " types1 = defaultdict(list)\n", 74 | " types2 = defaultdict(list)\n", 75 | " if ind1.root.ret == __type__:\n", 76 | " # Not STGP optimization\n", 77 | " types1[__type__] = list(range(0, len(ind1)))\n", 78 | " types2[__type__] = list(range(0, len(ind2)))\n", 79 | " common_types = [__type__]\n", 80 | " else:\n", 81 | " for idx, node in enumerate(ind1[0:], 1):\n", 82 | " types1[node.ret].append(idx)\n", 83 | " for idx, node in enumerate(ind2[0:], 1):\n", 84 | " types2[node.ret].append(idx)\n", 85 | " common_types = set(types1.keys()).intersection(set(types2.keys()))\n", 86 | "\n", 87 | " if len(common_types) > 0:\n", 88 | " type_ = random.choice(list(common_types))\n", 89 | "\n", 90 | " index1 = random.choice(types1[type_])\n", 91 | " index2 = random.choice(types2[type_])\n", 92 | "\n", 93 | " slice1 = ind1.searchSubtree(index1)\n", 94 | " slice2 = ind2.searchSubtree(index2)\n", 95 | " ind1[slice1], ind2[slice2] = ind2[slice2], ind1[slice1]\n", 96 | "\n", 97 | " return ind1, ind2" 98 | ], 99 | "metadata": { 100 | "collapsed": false, 101 | "ExecuteTime": { 102 | "end_time": "2023-11-07T08:49:09.678933300Z", 103 | "start_time": "2023-11-07T08:49:09.675377100Z" 104 | } 105 | }, 106 | "id": "5dde655dc691a423", 107 | "outputs": [] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 3, 112 | "source": [ 113 | "import random\n", 114 | "\n", 115 | "# 定义函数集合和终端集合\n", 116 | "pset = gp.PrimitiveSet(\"MAIN\", arity=1)\n", 117 | "pset.addPrimitive(np.add, 2)\n", 118 | "pset.addPrimitive(np.subtract, 2)\n", 119 | "pset.addPrimitive(np.multiply, 2)\n", 120 | "pset.addPrimitive(np.negative, 1)\n", 121 | "pset.addEphemeralConstant(\"rand101\", lambda: random.randint(-1, 1))\n", 122 | "pset.renameArguments(ARG0='x')\n", 123 | "\n", 124 | "# 定义遗传编程操作\n", 125 | "toolbox = base.Toolbox()\n", 126 | "toolbox.register(\"expr\", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)\n", 127 | "toolbox.register(\"individual\", tools.initIterate, creator.Individual, toolbox.expr)\n", 128 | "toolbox.register(\"population\", tools.initRepeat, list, toolbox.individual)\n", 129 | "toolbox.register(\"compile\", gp.compile, pset=pset)\n", 130 | "toolbox.register(\"evaluate\", evalSymbReg, pset=pset)\n", 131 | "toolbox.register(\"select\", tools.selTournament, tournsize=3)\n", 132 | "toolbox.register(\"mate\", cxOnePoint)\n", 133 | "toolbox.register(\"mutate\", gp.mutUniform, expr=toolbox.expr, pset=pset)" 134 | ], 135 | "metadata": { 136 | "collapsed": false, 137 | "ExecuteTime": { 138 | "end_time": "2023-11-07T08:49:09.694753600Z", 139 | "start_time": "2023-11-07T08:49:09.680991300Z" 140 | } 141 | }, 142 | "id": "cb6cf38094256262", 143 | "outputs": [] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 4, 148 | "source": [ 149 | "import numpy\n", 150 | "from deap import algorithms\n", 151 | "\n", 152 | "# 定义统计指标\n", 153 | "stats_fit = tools.Statistics(lambda ind: ind.fitness.values)\n", 154 | "stats_size = tools.Statistics(len)\n", 155 | "mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)\n", 156 | "mstats.register(\"avg\", numpy.mean)\n", 157 | "mstats.register(\"std\", numpy.std)\n", 158 | "mstats.register(\"min\", numpy.min)\n", 159 | "mstats.register(\"max\", numpy.max)\n", 160 | "\n", 161 | "# 使用默认算法\n", 162 | "start=time.time()\n", 163 | "population = toolbox.population(n=300)\n", 164 | "hof = tools.HallOfFame(1)\n", 165 | "pop, log = algorithms.eaSimple(population=population,\n", 166 | " toolbox=toolbox, cxpb=0.5, mutpb=0.2, ngen=50, stats=mstats, halloffame=hof, verbose=True)\n", 167 | "end=time.time()\n", 168 | "print('time:',end-start)\n", 169 | "print(str(hof[0]))" 170 | ], 171 | "metadata": { 172 | "collapsed": false, 173 | "ExecuteTime": { 174 | "end_time": "2023-11-07T08:49:12.030799500Z", 175 | "start_time": "2023-11-07T08:49:09.694753600Z" 176 | } 177 | }, 178 | "id": "88c62bc071d56191", 179 | "outputs": [] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "Python 3", 185 | "language": "python", 186 | "name": "python3" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 2 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython2", 198 | "version": "2.7.6" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 5 203 | } 204 | -------------------------------------------------------------------------------- /application/multiobjective-sr.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ff6050dfa4dc1b6", 6 | "metadata": {}, 7 | "source": [ 8 | "## 基于多目标GP的符号回归\n", 9 | "\n", 10 | "多目标GP是指使用多个目标函数来评估GP树的适应度。在符号回归问题中,通常使用均方误差(MSE)作为目标函数。然而,MSE并不能很好地反映模型的复杂度,因此,我们还可以使用树的大小作为目标函数。这样,就可以得到更为精简的模型。" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 5, 16 | "id": "59cfefc0467c74ad", 17 | "metadata": { 18 | "ExecuteTime": { 19 | "end_time": "2023-11-10T08:50:31.317854700Z", 20 | "start_time": "2023-11-10T08:50:31.272249300Z" 21 | } 22 | }, 23 | "source": [ 24 | "import math\n", 25 | "import operator\n", 26 | "import random\n", 27 | "from deap import base, creator, tools, gp, algorithms\n", 28 | "\n", 29 | "# 定义评估函数,包含两个目标:均方误差和树的大小\n", 30 | "def evalSymbReg(individual,pset):\n", 31 | " # 编译GP树为函数\n", 32 | " func = gp.compile(expr=individual, pset=pset)\n", 33 | " # 计算均方误差(Mean Square Error,MSE)\n", 34 | " mse = ((func(x) - x**2)**2 for x in range(-10, 10))\n", 35 | " # 计算GP树的大小\n", 36 | " size = len(individual)\n", 37 | " return math.fsum(mse), size\n", 38 | "\n", 39 | "# 修改适应度函数,包含两个权重:MSE和树的大小。MSE是最小化,树的大小也是最小化\n", 40 | "creator.create(\"FitnessMulti\", base.Fitness, weights=(-1.0, -1.0))\n", 41 | "creator.create(\"Individual\", gp.PrimitiveTree, fitness=creator.FitnessMulti)" 42 | ], 43 | "outputs": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "id": "956e01e17271daa6", 48 | "metadata": {}, 49 | "source": [ 50 | "### 遗传算子\n", 51 | "遗传算子基本不需要修改。由于是多目标优化问题,所以选择算子需要使用NSGA2(Non-dominated Sorting Genetic Algorithm II)。\n", 52 | "NSGA2算法的基本思想是,首先对种群中的个体进行非支配排序,然后根据非支配排序的结果计算拥挤度距离,最后根据非支配排序和拥挤度距离两个指标对个体进行排序。" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 6, 58 | "id": "851794d4d36e3681", 59 | "metadata": { 60 | "ExecuteTime": { 61 | "end_time": "2023-11-10T08:50:31.317854700Z", 62 | "start_time": "2023-11-10T08:50:31.278882Z" 63 | } 64 | }, 65 | "source": [ 66 | "import random\n", 67 | "\n", 68 | "# 定义函数集合和终端集合\n", 69 | "pset = gp.PrimitiveSet(\"MAIN\", arity=1)\n", 70 | "pset.addPrimitive(operator.add, 2)\n", 71 | "pset.addPrimitive(operator.sub, 2)\n", 72 | "pset.addPrimitive(operator.mul, 2)\n", 73 | "pset.addPrimitive(operator.neg, 1)\n", 74 | "pset.addEphemeralConstant(\"rand101\", lambda: random.randint(-1, 1))\n", 75 | "pset.renameArguments(ARG0='x')\n", 76 | "\n", 77 | "# 定义遗传编程操作\n", 78 | "toolbox = base.Toolbox()\n", 79 | "toolbox.register(\"expr\", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)\n", 80 | "toolbox.register(\"individual\", tools.initIterate, creator.Individual, toolbox.expr)\n", 81 | "toolbox.register(\"population\", tools.initRepeat, list, toolbox.individual)\n", 82 | "toolbox.register(\"compile\", gp.compile, pset=pset)\n", 83 | "toolbox.register(\"evaluate\", evalSymbReg, pset=pset)\n", 84 | "toolbox.register(\"select\", tools.selNSGA2)\n", 85 | "toolbox.register(\"mate\", gp.cxOnePoint)\n", 86 | "toolbox.register(\"mutate\", gp.mutUniform, expr=toolbox.expr, pset=pset)" 87 | ], 88 | "outputs": [] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "id": "62f30d17704db709", 93 | "metadata": {}, 94 | "source": [ 95 | "### 算法模块\n", 96 | "DEAP算法包提供了eaMuPlusLambda函数,可以比较方便地使用NSGA2的环境选择算子。 \n", 97 | "理想情况下,最好还是自行实现演化函数,这样才能完整地使用NSGA-II算法中的锦标赛选择算子。 \n", 98 | "NSGA-II算法中的锦标赛选择算子是指,首先从种群中随机选择两个个体,然后根据非支配排序和拥挤度距离两个指标对两个个体进行排序,最后选择排名较高的个体作为父代。简单起见,我们忽略了锦标赛选择算子。" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 7, 104 | "id": "515b587d4f8876ea", 105 | "metadata": { 106 | "ExecuteTime": { 107 | "end_time": "2023-11-10T08:50:31.364942900Z", 108 | "start_time": "2023-11-10T08:50:31.284352200Z" 109 | } 110 | }, 111 | "source": [ 112 | "import numpy\n", 113 | "from deap import algorithms\n", 114 | "\n", 115 | "# 统计指标\n", 116 | "stats_fit = tools.Statistics(lambda ind: ind.fitness.values[0])\n", 117 | "stats_size = tools.Statistics(lambda ind: ind.fitness.values[1])\n", 118 | "mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)\n", 119 | "mstats.register(\"avg\", numpy.mean)\n", 120 | "mstats.register(\"std\", numpy.std)\n", 121 | "mstats.register(\"min\", numpy.min)\n", 122 | "mstats.register(\"max\", numpy.max)\n", 123 | "\n", 124 | "population = toolbox.population(n=50)\n", 125 | "pop, log = algorithms.eaMuPlusLambda(population=population,\n", 126 | " toolbox=toolbox, mu=len(population),lambda_=len(population),\n", 127 | " cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=None, verbose=True)\n", 128 | "\n", 129 | "# 最佳个体\n", 130 | "best_ind = tools.selBest(pop, 1)[0]\n", 131 | "print('Best individual is:\\n', best_ind)\n", 132 | "print('\\nWith fitness:', best_ind.fitness.values)\n" 133 | ], 134 | "outputs": [] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "id": "7aa57e0f8b6151ad", 139 | "metadata": {}, 140 | "source": [ 141 | "基于优化结果,我们还可以绘制Pareto前沿,以便于选择最终的模型。" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 8, 147 | "id": "28284e0a0047fcfc", 148 | "metadata": { 149 | "ExecuteTime": { 150 | "end_time": "2023-11-10T08:50:31.483100600Z", 151 | "start_time": "2023-11-10T08:50:31.314335800Z" 152 | } 153 | }, 154 | "source": [ 155 | "from matplotlib import pyplot as plt\n", 156 | "import seaborn as sns\n", 157 | "\n", 158 | "# 非支配排序\n", 159 | "fronts = tools.sortNondominated(pop, len(pop), first_front_only=True)\n", 160 | "\n", 161 | "# Pareto前沿\n", 162 | "pareto_front = fronts[0]\n", 163 | "fitnesses = [ind.fitness.values for ind in pareto_front]\n", 164 | "\n", 165 | "# 分离均方误差和树的大小\n", 166 | "mse = [fit[0] for fit in fitnesses]\n", 167 | "sizes = [fit[1] for fit in fitnesses]\n", 168 | "\n", 169 | "# 使用seaborn绘制散点图\n", 170 | "sns.set(style=\"whitegrid\")\n", 171 | "plt.figure(figsize=(10, 6))\n", 172 | "sns.scatterplot(x=mse, y=sizes, palette=\"viridis\", s=60, edgecolor=\"w\", alpha=0.7)\n", 173 | "plt.xlabel('Mean Square Error')\n", 174 | "plt.ylabel('Size of the GP Tree')\n", 175 | "plt.title('Pareto Front')\n", 176 | "plt.show()" 177 | ], 178 | "outputs": [] 179 | } 180 | ], 181 | "metadata": { 182 | "kernelspec": { 183 | "display_name": "Python 3 (ipykernel)", 184 | "language": "python", 185 | "name": "python3" 186 | }, 187 | "language_info": { 188 | "codemirror_mode": { 189 | "name": "ipython", 190 | "version": 3 191 | }, 192 | "file_extension": ".py", 193 | "mimetype": "text/x-python", 194 | "name": "python", 195 | "nbconvert_exporter": "python", 196 | "pygments_lexer": "ipython3", 197 | "version": "3.11.4" 198 | } 199 | }, 200 | "nbformat": 4, 201 | "nbformat_minor": 5 202 | } 203 | -------------------------------------------------------------------------------- /operator/varor-varand.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "### VarOr/VarAnd\n", 7 | "VarOr和VarAnd是演化算法中的两种范式。VarOr表示交叉和变异必须选择其中一种执行。VarAnd则相对自由,可以同时执行交叉和变异,也可以同时不执行它们。GP的原始论文使用的是VarOr。" 8 | ], 9 | "metadata": { 10 | "collapsed": false 11 | }, 12 | "id": "8db4ada5ce6ebf73" 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "source": [ 18 | "import time\n", 19 | "\n", 20 | "import numpy as np\n", 21 | "from deap import base, creator, tools, gp\n", 22 | "\n", 23 | "\n", 24 | "# 符号回归\n", 25 | "def evalSymbReg(individual, pset):\n", 26 | " # 编译GP树为函数\n", 27 | " func = gp.compile(expr=individual, pset=pset)\n", 28 | "\n", 29 | " # 使用numpy创建一个向量\n", 30 | " x = np.linspace(-10, 10, 100)\n", 31 | "\n", 32 | " # 评估生成的函数并计算MSE\n", 33 | " mse = np.mean((func(x) - x ** 2) ** 2)\n", 34 | "\n", 35 | " return (mse,)\n", 36 | "\n", 37 | "\n", 38 | "# 创建个体和适应度函数\n", 39 | "creator.create(\"FitnessMin\", base.Fitness, weights=(-1.0,))\n", 40 | "creator.create(\"Individual\", gp.PrimitiveTree, fitness=creator.FitnessMin)" 41 | ], 42 | "metadata": { 43 | "collapsed": false, 44 | "ExecuteTime": { 45 | "end_time": "2023-11-07T08:45:34.512379200Z", 46 | "start_time": "2023-11-07T08:45:34.394805500Z" 47 | } 48 | }, 49 | "id": "initial_id", 50 | "outputs": [] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 2, 55 | "source": [ 56 | "import random\n", 57 | "\n", 58 | "# 定义函数集合和终端集合\n", 59 | "pset = gp.PrimitiveSet(\"MAIN\", arity=1)\n", 60 | "pset.addPrimitive(np.add, 2)\n", 61 | "pset.addPrimitive(np.subtract, 2)\n", 62 | "pset.addPrimitive(np.multiply, 2)\n", 63 | "pset.addPrimitive(np.negative, 1)\n", 64 | "pset.addEphemeralConstant(\"rand101\", lambda: random.randint(-1, 1))\n", 65 | "pset.renameArguments(ARG0='x')\n", 66 | "\n", 67 | "# 定义遗传编程操作\n", 68 | "toolbox = base.Toolbox()\n", 69 | "toolbox.register(\"expr\", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)\n", 70 | "toolbox.register(\"individual\", tools.initIterate, creator.Individual, toolbox.expr)\n", 71 | "toolbox.register(\"population\", tools.initRepeat, list, toolbox.individual)\n", 72 | "toolbox.register(\"compile\", gp.compile, pset=pset)\n", 73 | "toolbox.register(\"evaluate\", evalSymbReg, pset=pset)\n", 74 | "toolbox.register(\"select\", tools.selTournament, tournsize=3)\n", 75 | "toolbox.register(\"mate\", gp.cxOnePoint)\n", 76 | "toolbox.register(\"mutate\", gp.mutUniform, expr=toolbox.expr, pset=pset)" 77 | ], 78 | "metadata": { 79 | "collapsed": false, 80 | "ExecuteTime": { 81 | "end_time": "2023-11-07T08:45:34.526223500Z", 82 | "start_time": "2023-11-07T08:45:34.514378Z" 83 | } 84 | }, 85 | "id": "cb6cf38094256262", 86 | "outputs": [] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "source": [ 91 | "DEAP默认使用VarAnd范式,如果我们想要实现VarOr,就需要自己修改eaSimple函数。当然,具体选择VarAnd还是VarOr要根据具体问题而定。目前尚无统一的结论表明哪种方式一定更好,需要根据问题的特性来决定。" 92 | ], 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "id": "e09fa8e7890d583b" 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 3, 101 | "source": [ 102 | "from deap.algorithms import varOr\n", 103 | "import numpy\n", 104 | "from deap import algorithms\n", 105 | "\n", 106 | "\n", 107 | "def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None,\n", 108 | " halloffame=None, verbose=__debug__):\n", 109 | " logbook = tools.Logbook()\n", 110 | " logbook.header = ['gen', 'nevals'] + (stats.fields if stats else [])\n", 111 | "\n", 112 | " # Evaluate the individuals with an invalid fitness\n", 113 | " invalid_ind = [ind for ind in population if not ind.fitness.valid]\n", 114 | " fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)\n", 115 | " for ind, fit in zip(invalid_ind, fitnesses):\n", 116 | " ind.fitness.values = fit\n", 117 | "\n", 118 | " if halloffame is not None:\n", 119 | " halloffame.update(population)\n", 120 | "\n", 121 | " record = stats.compile(population) if stats else {}\n", 122 | " logbook.record(gen=0, nevals=len(invalid_ind), **record)\n", 123 | " if verbose:\n", 124 | " print(logbook.stream)\n", 125 | "\n", 126 | " # Begin the generational process\n", 127 | " for gen in range(1, ngen + 1):\n", 128 | " # Select the next generation individuals\n", 129 | " offspring = toolbox.select(population, len(population))\n", 130 | "\n", 131 | " # Vary the pool of individuals\n", 132 | " offspring = varOr(offspring, toolbox, len(offspring),cxpb, mutpb)\n", 133 | "\n", 134 | " # Evaluate the individuals with an invalid fitness\n", 135 | " invalid_ind = [ind for ind in offspring if not ind.fitness.valid]\n", 136 | " fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)\n", 137 | " for ind, fit in zip(invalid_ind, fitnesses):\n", 138 | " ind.fitness.values = fit\n", 139 | "\n", 140 | " # Update the hall of fame with the generated individuals\n", 141 | " if halloffame is not None:\n", 142 | " halloffame.update(offspring)\n", 143 | "\n", 144 | " # Replace the current population by the offspring\n", 145 | " population[:] = offspring\n", 146 | "\n", 147 | " # Append the current generation statistics to the logbook\n", 148 | " record = stats.compile(population) if stats else {}\n", 149 | " logbook.record(gen=gen, nevals=len(invalid_ind), **record)\n", 150 | " if verbose:\n", 151 | " print(logbook.stream)\n", 152 | "\n", 153 | " return population, logbook\n", 154 | "\n", 155 | "\n", 156 | "# 定义统计指标\n", 157 | "stats_fit = tools.Statistics(lambda ind: ind.fitness.values)\n", 158 | "stats_size = tools.Statistics(len)\n", 159 | "mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)\n", 160 | "mstats.register(\"avg\", numpy.mean)\n", 161 | "mstats.register(\"std\", numpy.std)\n", 162 | "mstats.register(\"min\", numpy.min)\n", 163 | "mstats.register(\"max\", numpy.max)\n", 164 | "\n", 165 | "# 使用默认算法\n", 166 | "start = time.time()\n", 167 | "population = toolbox.population(n=300)\n", 168 | "hof = tools.HallOfFame(1)\n", 169 | "pop, log = algorithms.eaSimple(population=population,\n", 170 | " toolbox=toolbox, cxpb=0.5, mutpb=0.2, ngen=50, stats=mstats, halloffame=hof,\n", 171 | " verbose=True)\n", 172 | "end = time.time()\n", 173 | "print('time:', end - start)\n", 174 | "print(str(hof[0]))" 175 | ], 176 | "metadata": { 177 | "collapsed": false, 178 | "ExecuteTime": { 179 | "end_time": "2023-11-07T08:45:35.379380400Z", 180 | "start_time": "2023-11-07T08:45:34.528223800Z" 181 | } 182 | }, 183 | "id": "88c62bc071d56191", 184 | "outputs": [] 185 | } 186 | ], 187 | "metadata": { 188 | "kernelspec": { 189 | "display_name": "Python 3", 190 | "language": "python", 191 | "name": "python3" 192 | }, 193 | "language_info": { 194 | "codemirror_mode": { 195 | "name": "ipython", 196 | "version": 2 197 | }, 198 | "file_extension": ".py", 199 | "mimetype": "text/x-python", 200 | "name": "python", 201 | "nbconvert_exporter": "python", 202 | "pygments_lexer": "ipython2", 203 | "version": "2.7.6" 204 | } 205 | }, 206 | "nbformat": 4, 207 | "nbformat_minor": 5 208 | } 209 | -------------------------------------------------------------------------------- /tricks/numpy-speedup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8db4ada5ce6ebf73", 6 | "metadata": {}, 7 | "source": [ 8 | "### Numpy 加速\n", 9 | "Python 是一种相对较慢的编程语言,但是我们可以通过使用Numpy来加速Python的运算。Numpy是一个基于C语言的库,提供了许多高效的运算函数,例如矩阵运算和线性代数运算等。这些运算都基于C语言实现,因此速度非常快。\n", 10 | "\n", 11 | "GP的性能瓶颈通常在于模型评估。因此,在这里,我们重点关注如何加速评估函数。其实很简单,将数据集转换为Numpy数组,然后使用Numpy函数来计算MSE即可。下面是一个例子。" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "id": "initial_id", 18 | "metadata": { 19 | "ExecuteTime": { 20 | "end_time": "2023-11-14T09:14:24.923043100Z", 21 | "start_time": "2023-11-14T09:14:24.908046400Z" 22 | } 23 | }, 24 | "source": [ 25 | "import time\n", 26 | "\n", 27 | "import numpy as np\n", 28 | "from deap import base, creator, tools, gp\n", 29 | "\n", 30 | "\n", 31 | "# 符号回归\n", 32 | "def evalSymbReg(individual, pset):\n", 33 | " # 编译GP树为函数\n", 34 | " func = gp.compile(expr=individual, pset=pset)\n", 35 | " \n", 36 | " # 使用numpy创建一个向量\n", 37 | " x = np.linspace(-10, 10, 100) \n", 38 | " \n", 39 | " # 评估生成的函数并计算MSE\n", 40 | " mse = np.mean((func(x) - x**2)**2)\n", 41 | " \n", 42 | " return (mse,)\n", 43 | "\n", 44 | "# 创建个体和适应度函数\n", 45 | "creator.create(\"FitnessMin\", base.Fitness, weights=(-1.0,))\n", 46 | "creator.create(\"Individual\", gp.PrimitiveTree, fitness=creator.FitnessMin)\n" 47 | ], 48 | "outputs": [] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "id": "e3d94e424b58af5a", 53 | "metadata": {}, 54 | "source": [ 55 | "同时,我们还可以考虑将一些算子替换为Numpy函数。尽管这并不是非常重要,因为Numpy已经重载了许多运算符。" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 2, 61 | "id": "cb6cf38094256262", 62 | "metadata": { 63 | "ExecuteTime": { 64 | "end_time": "2023-11-14T09:14:24.933166800Z", 65 | "start_time": "2023-11-14T09:14:24.927568400Z" 66 | } 67 | }, 68 | "source": [ 69 | "import random\n", 70 | "\n", 71 | "# 定义函数集合和终端集合\n", 72 | "pset = gp.PrimitiveSet(\"MAIN\", arity=1)\n", 73 | "pset.addPrimitive(np.add, 2)\n", 74 | "pset.addPrimitive(np.subtract, 2)\n", 75 | "pset.addPrimitive(np.multiply, 2)\n", 76 | "pset.addPrimitive(np.negative, 1)\n", 77 | "def random_int(): return random.randint(-1, 1)\n", 78 | "pset.addEphemeralConstant(\"rand101\", random_int)\n", 79 | "pset.renameArguments(ARG0='x')\n", 80 | "\n", 81 | "# 定义遗传编程操作\n", 82 | "toolbox = base.Toolbox()\n", 83 | "toolbox.register(\"expr\", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)\n", 84 | "toolbox.register(\"individual\", tools.initIterate, creator.Individual, toolbox.expr)\n", 85 | "toolbox.register(\"population\", tools.initRepeat, list, toolbox.individual)\n", 86 | "toolbox.register(\"compile\", gp.compile, pset=pset)\n", 87 | "toolbox.register(\"evaluate\", evalSymbReg, pset=pset)\n", 88 | "toolbox.register(\"select\", tools.selTournament, tournsize=3)\n", 89 | "toolbox.register(\"mate\", gp.cxOnePoint)\n", 90 | "toolbox.register(\"mutate\", gp.mutUniform, expr=toolbox.expr, pset=pset)" 91 | ], 92 | "outputs": [] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "id": "e09fa8e7890d583b", 97 | "metadata": {}, 98 | "source": [ 99 | "现在,让我们来测试一下加速效果。" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 3, 105 | "id": "88c62bc071d56191", 106 | "metadata": { 107 | "ExecuteTime": { 108 | "end_time": "2023-11-14T09:14:25.525098600Z", 109 | "start_time": "2023-11-14T09:14:24.935256200Z" 110 | } 111 | }, 112 | "source": [ 113 | "import numpy\n", 114 | "from deap import algorithms\n", 115 | "\n", 116 | "# 定义统计指标\n", 117 | "stats_fit = tools.Statistics(lambda ind: ind.fitness.values)\n", 118 | "stats_size = tools.Statistics(len)\n", 119 | "mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)\n", 120 | "mstats.register(\"avg\", numpy.mean)\n", 121 | "mstats.register(\"std\", numpy.std)\n", 122 | "mstats.register(\"min\", numpy.min)\n", 123 | "mstats.register(\"max\", numpy.max)\n", 124 | "\n", 125 | "# 使用默认算法\n", 126 | "np_time=[]\n", 127 | "for i in range(3):\n", 128 | " start=time.time()\n", 129 | " population = toolbox.population(n=300)\n", 130 | " hof = tools.HallOfFame(1)\n", 131 | " pop, log = algorithms.eaSimple(population=population,\n", 132 | " toolbox=toolbox, cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=hof, verbose=True)\n", 133 | " end=time.time()\n", 134 | " print('time:',end-start)\n", 135 | " np_time.append(end-start)\n", 136 | " print(str(hof[0]))" 137 | ], 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "id": "be250c9740bc2817", 143 | "metadata": {}, 144 | "source": [ 145 | "对比下面的原始评估函数,使用Numpy的加速效果还是非常明显的。" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 4, 151 | "id": "f2ddb57d24051753", 152 | "metadata": { 153 | "ExecuteTime": { 154 | "end_time": "2023-11-14T09:14:27.572677800Z", 155 | "start_time": "2023-11-14T09:14:25.521044600Z" 156 | } 157 | }, 158 | "source": [ 159 | "# 慢速评估\n", 160 | "def evalSymbRegSlow(individual, pset):\n", 161 | " # 编译GP树为函数\n", 162 | " func = gp.compile(expr=individual, pset=pset)\n", 163 | " \n", 164 | " # 创建评估数据\n", 165 | " xs = [x/5.0 for x in range(-50, 51)]\n", 166 | " \n", 167 | " # 评估生成的函数并计算MSE\n", 168 | " mse = sum((func(x) - x**2)**2 for x in xs) / len(xs)\n", 169 | " \n", 170 | " return (mse,)\n", 171 | "\n", 172 | "toolbox.register(\"evaluate\", evalSymbRegSlow, pset=pset)\n", 173 | "\n", 174 | "py_time=[]\n", 175 | "for i in range(3):\n", 176 | " start=time.time()\n", 177 | " population = toolbox.population(n=300)\n", 178 | " hof = tools.HallOfFame(1)\n", 179 | " pop, log = algorithms.eaSimple(population=population,\n", 180 | " toolbox=toolbox, cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=hof, verbose=True)\n", 181 | " end=time.time()\n", 182 | " print('time:',end-start)\n", 183 | " py_time.append(end-start)" 184 | ], 185 | "outputs": [] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "id": "75ed499f209894ae", 190 | "metadata": {}, 191 | "source": [ 192 | "最后,我们可以使用seaborn绘制一个图来比较Numpy和Python的性能。可以看出,Numpy显著提高了速度。" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 7, 198 | "id": "f09f85635ed36092", 199 | "metadata": { 200 | "ExecuteTime": { 201 | "end_time": "2023-11-14T09:24:29.905469100Z", 202 | "start_time": "2023-11-14T09:24:29.810538800Z" 203 | } 204 | }, 205 | "source": [ 206 | "import seaborn as sns\n", 207 | "import matplotlib.pyplot as plt\n", 208 | "import pandas as pd\n", 209 | "data = pd.DataFrame({'Category': ['Numpy'] * len(np_time) + ['Python'] * len(py_time),\n", 210 | " 'Time': np.concatenate([np_time, py_time])})\n", 211 | "\n", 212 | "\n", 213 | "plt.figure(figsize=(4, 3))\n", 214 | "sns.set_style(\"whitegrid\")\n", 215 | "sns.boxplot(data=data, x='Category', y='Time',palette=\"Set3\", width=0.4)\n", 216 | "plt.title('Comparison of Numpy and Python')\n", 217 | "plt.xlabel('')\n", 218 | "plt.ylabel('Time')\n", 219 | "plt.show()" 220 | ], 221 | "outputs": [] 222 | } 223 | ], 224 | "metadata": { 225 | "kernelspec": { 226 | "display_name": "Python 3 (ipykernel)", 227 | "language": "python", 228 | "name": "python3" 229 | }, 230 | "language_info": { 231 | "codemirror_mode": { 232 | "name": "ipython", 233 | "version": 3 234 | }, 235 | "file_extension": ".py", 236 | "mimetype": "text/x-python", 237 | "name": "python", 238 | "nbconvert_exporter": "python", 239 | "pygments_lexer": "ipython3", 240 | "version": "3.11.4" 241 | } 242 | }, 243 | "nbformat": 4, 244 | "nbformat_minor": 5 245 | } 246 | -------------------------------------------------------------------------------- /application/alpha_dominance_mogp.py: -------------------------------------------------------------------------------- 1 | import math 2 | import operator 3 | import random 4 | 5 | import numpy as np 6 | from deap import base, creator, tools, gp, algorithms 7 | from deap.tools import sortNondominated, selNSGA2 8 | 9 | 10 | # 定义评估函数,包含两个目标:均方误差和树的大小 11 | def evalSymbReg(individual, pset): 12 | # 编译GP树为函数 13 | func = gp.compile(expr=individual, pset=pset) 14 | # 计算均方误差(Mean Square Error,MSE) 15 | mse = ((func(x) - (x**2 + x)) ** 2 for x in range(-10, 10)) 16 | # 计算GP树的大小 17 | size = len(individual) 18 | return math.fsum(mse), size 19 | 20 | 21 | # 定义Alpha支配 22 | class AlphaDominance: 23 | def __init__(self, algorithm=None, initial_alpha=0.1, step_size=0.1): 24 | self.historical_largest = 0 25 | self.historical_smallest = math.inf 26 | self.algorithm = algorithm 27 | self.step_size = step_size 28 | self.initial_alpha = initial_alpha 29 | 30 | def update_best(self, population): 31 | self.historical_smallest = min( 32 | self.historical_smallest, min([len(p) for p in population]) 33 | ) 34 | self.historical_largest = max( 35 | self.historical_largest, max([len(p) for p in population]) 36 | ) 37 | 38 | def selection(self, population, offspring, alpha): 39 | # 调整适应度以考虑大小 40 | self.set_fitness_with_size(population, offspring, alpha) 41 | 42 | # 应用NSGA-II选择 43 | first_pareto_front = sortNondominated(offspring + population, len(population))[ 44 | 0 45 | ] 46 | selected_pop = selNSGA2(offspring + population, len(population)) 47 | 48 | if hasattr(self.algorithm, "hof") and self.algorithm.hof is not None: 49 | self.algorithm.hof.update(selected_pop) 50 | 51 | # 恢复原始适应度值 52 | self.restore_original_fitness(selected_pop) 53 | 54 | # 根据大小调整alpha 55 | theta = np.rad2deg(np.arctan(alpha)) 56 | avg_size = np.mean([len(p) for p in first_pareto_front]) 57 | 58 | # 更新历史最大和最小值 59 | self.update_best(first_pareto_front) 60 | 61 | # 计算新的alpha值 62 | new_alpha = self.adjust_alpha(theta, avg_size) 63 | 64 | return selected_pop, new_alpha 65 | 66 | def adjust_alpha(self, theta, avg_size): 67 | historical_largest = self.historical_largest 68 | historical_smallest = self.historical_smallest 69 | 70 | # 防止除以零 71 | if historical_largest == historical_smallest: 72 | return np.tan(np.deg2rad(theta)) 73 | 74 | theta = theta + ( 75 | historical_largest + historical_smallest - 2 * avg_size 76 | ) * self.step_size / (historical_largest - historical_smallest) 77 | theta = np.clip(theta, 0, 90) 78 | return np.tan(np.deg2rad(theta)) 79 | 80 | def restore_original_fitness(self, population): 81 | for ind in population: 82 | ind.fitness.weights = (-1, -1) 83 | ind.fitness.values = getattr(ind, "original_fitness") 84 | 85 | def set_fitness_with_size(self, population, offspring, alpha): 86 | max_size = max([len(x) for x in offspring + population]) 87 | for ind in offspring + population: 88 | assert alpha >= 0, f"Alpha Value {alpha}" 89 | setattr(ind, "original_fitness", ind.fitness.values) 90 | ind.fitness.weights = (-1, -1) 91 | # 修改第二个目标为模型大小和准确率的加权 92 | ind.fitness.values = ( 93 | ind.fitness.values[0], 94 | len(ind) / max_size + alpha * ind.fitness.values[0], 95 | ) 96 | 97 | 98 | # 修改适应度函数,包含两个权重:MSE和树的大小。MSE是最小化,树的大小也是最小化 99 | creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0)) 100 | creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMulti) 101 | 102 | # 定义函数集合和终端集合 103 | pset = gp.PrimitiveSet("MAIN", arity=1) 104 | pset.addPrimitive(operator.add, 2) 105 | pset.addPrimitive(operator.sub, 2) 106 | pset.addPrimitive(operator.mul, 2) 107 | pset.addPrimitive(operator.neg, 1) 108 | pset.addEphemeralConstant("rand101", lambda: random.randint(-1, 1)) 109 | pset.renameArguments(ARG0="x") 110 | 111 | # 定义遗传编程操作 112 | toolbox = base.Toolbox() 113 | toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2) 114 | toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr) 115 | toolbox.register("population", tools.initRepeat, list, toolbox.individual) 116 | toolbox.register("compile", gp.compile, pset=pset) 117 | toolbox.register("evaluate", evalSymbReg, pset=pset) 118 | toolbox.register("mate", gp.cxOnePoint) 119 | toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr, pset=pset) 120 | 121 | 122 | # 实现基于alpha支配的演化算法 123 | def eaMuPlusLambdaWithAlphaDominance( 124 | population, 125 | toolbox, 126 | mu, 127 | lambda_, 128 | cxpb, 129 | mutpb, 130 | ngen, 131 | stats=None, 132 | halloffame=None, 133 | verbose=__debug__, 134 | initial_alpha=0.1, 135 | step_size=0.1, 136 | ): 137 | """ 138 | 基于alpha支配的(mu + lambda)演化策略 139 | """ 140 | logbook = tools.Logbook() 141 | logbook.header = ["gen", "nevals"] + (stats.fields if stats else []) 142 | 143 | # 评估初始种群 144 | invalid_ind = [ind for ind in population if not ind.fitness.valid] 145 | fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) 146 | for ind, fit in zip(invalid_ind, fitnesses): 147 | ind.fitness.values = fit 148 | 149 | if halloffame is not None: 150 | halloffame.update(population) 151 | 152 | record = stats.compile(population) if stats else {} 153 | logbook.record(gen=0, nevals=len(invalid_ind), **record) 154 | if verbose: 155 | print(logbook.stream) 156 | 157 | # 初始化alpha支配 158 | selector = AlphaDominance( 159 | algorithm=toolbox, initial_alpha=initial_alpha, step_size=step_size 160 | ) 161 | alpha = initial_alpha 162 | 163 | # 开始演化 164 | for gen in range(1, ngen + 1): 165 | # 变异操作 166 | offspring = algorithms.varOr(population, toolbox, lambda_, cxpb, mutpb) 167 | 168 | # 评估后代 169 | invalid_ind = [ind for ind in offspring if not ind.fitness.valid] 170 | fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) 171 | for ind, fit in zip(invalid_ind, fitnesses): 172 | ind.fitness.values = fit 173 | 174 | # 使用alpha支配选择下一代 175 | population[:], alpha = selector.selection(population, offspring, alpha) 176 | 177 | if halloffame is not None: 178 | halloffame.update(population) 179 | 180 | # 记录统计信息 181 | record = stats.compile(population) if stats else {} 182 | logbook.record(gen=gen, nevals=len(invalid_ind), alpha=alpha, **record) 183 | if verbose: 184 | print(logbook.stream) 185 | 186 | return population, logbook 187 | 188 | 189 | # 统计指标 190 | stats_fit = tools.Statistics(lambda ind: ind.fitness.values[0]) 191 | stats_size = tools.Statistics(lambda ind: ind.fitness.values[1]) 192 | mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size) 193 | mstats.register("avg", np.mean) 194 | mstats.register("std", np.std) 195 | mstats.register("min", np.min) 196 | mstats.register("max", np.max) 197 | 198 | # 初始化种群 199 | population = toolbox.population(n=50) 200 | hof = tools.HallOfFame(1) 201 | 202 | # 运行演化算法 203 | pop, log = eaMuPlusLambdaWithAlphaDominance( 204 | population=population, 205 | toolbox=toolbox, 206 | mu=len(population), 207 | lambda_=len(population), 208 | cxpb=0.9, 209 | mutpb=0.1, 210 | ngen=10, 211 | stats=mstats, 212 | halloffame=hof, 213 | verbose=True, 214 | initial_alpha=0.1, # 初始alpha值 215 | step_size=0.1, # 自适应步长 216 | ) 217 | 218 | # 输出最佳个体 219 | best_ind = hof[0] if len(hof) > 0 else tools.selBest(pop, 1)[0] 220 | print("Best individual is:\n", best_ind) 221 | print("\nWith fitness:", best_ind.fitness.values) 222 | 223 | # 绘制Pareto前沿 224 | from matplotlib import pyplot as plt 225 | import seaborn as sns 226 | 227 | # 非支配排序 228 | fronts = tools.sortNondominated(pop, len(pop), first_front_only=True) 229 | 230 | # Pareto前沿 231 | pareto_front = fronts[0] 232 | fitnesses = [ind.fitness.values for ind in pareto_front] 233 | 234 | # 分离均方误差和树的大小 235 | mse = [fit[0] for fit in fitnesses] 236 | sizes = [fit[1] for fit in fitnesses] 237 | 238 | # 使用seaborn绘制散点图 239 | sns.set(style="whitegrid") 240 | plt.figure(figsize=(10, 6)) 241 | sns.scatterplot(x=mse, y=sizes, palette="viridis", s=60, edgecolor="w", alpha=0.7) 242 | plt.xlabel("Mean Square Error") 243 | plt.ylabel("Size of the GP Tree") 244 | plt.title("Pareto Front with Alpha Dominance") 245 | plt.show() 246 | -------------------------------------------------------------------------------- /tricks/compiler-speedup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8db4ada5ce6ebf73", 6 | "metadata": {}, 7 | "source": [ 8 | "### 低开销编译器\n", 9 | "DEAP在编译GP时使用了Python的默认编译器,但是Python默认编译器在编译GP时实际上速度较慢,因此我们可以考虑自行实现一个编译器来加速GP运算。更严格来说,应该是自行实现一个GP树的解析函数,从而降低编译的时间开销。" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "id": "initial_id", 16 | "metadata": { 17 | "ExecuteTime": { 18 | "end_time": "2023-12-25T02:37:08.047926400Z", 19 | "start_time": "2023-12-25T02:37:08.020758200Z" 20 | } 21 | }, 22 | "source": [ 23 | "import time\n", 24 | "import warnings\n", 25 | "\n", 26 | "import numpy as np\n", 27 | "from deap import base, creator, tools, gp\n", 28 | "from deap.gp import PrimitiveTree, Primitive, Terminal\n", 29 | "\n", 30 | "warnings.filterwarnings(\"ignore\")\n", 31 | "\n", 32 | "\n", 33 | "def quick_evaluate(expr: PrimitiveTree, pset, data, prefix='ARG'):\n", 34 | " result = None\n", 35 | " stack = []\n", 36 | " for node in expr:\n", 37 | " stack.append((node, []))\n", 38 | " while len(stack[-1][1]) == stack[-1][0].arity:\n", 39 | " prim, args = stack.pop()\n", 40 | " if isinstance(prim, Primitive):\n", 41 | " result = pset.context[prim.name](*args)\n", 42 | " elif isinstance(prim, Terminal):\n", 43 | " if prefix in prim.name:\n", 44 | " result = data[:, int(prim.name.replace(prefix, ''))]\n", 45 | " else:\n", 46 | " result = prim.value\n", 47 | " else:\n", 48 | " raise Exception\n", 49 | " if len(stack) == 0:\n", 50 | " break # 栈为空代表所有节点都已经被访问\n", 51 | " stack[-1][1].append(result)\n", 52 | " return result\n", 53 | "\n", 54 | "\n", 55 | "# 符号回归\n", 56 | "def evalSymbReg(individual, pset):\n", 57 | " # 使用numpy创建一个向量\n", 58 | " x = np.linspace(-10, 10, 100).reshape(-1, 1)\n", 59 | "\n", 60 | " # 评估生成的函数并计算MSE\n", 61 | " mse = np.mean((quick_evaluate(individual, pset, x) - x ** 2) ** 2)\n", 62 | "\n", 63 | " return (mse,)\n", 64 | "\n", 65 | "\n", 66 | "# 创建个体和适应度函数\n", 67 | "creator.create(\"FitnessMin\", base.Fitness, weights=(-1.0,))\n", 68 | "creator.create(\"Individual\", gp.PrimitiveTree, fitness=creator.FitnessMin)" 69 | ], 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "id": "e3d94e424b58af5a", 75 | "metadata": {}, 76 | "source": [] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 2, 81 | "id": "cb6cf38094256262", 82 | "metadata": { 83 | "ExecuteTime": { 84 | "end_time": "2023-12-25T02:37:08.054440500Z", 85 | "start_time": "2023-12-25T02:37:08.049951400Z" 86 | } 87 | }, 88 | "source": [ 89 | "import random\n", 90 | "\n", 91 | "# 定义函数集合和终端集合\n", 92 | "pset = gp.PrimitiveSet(\"MAIN\", arity=1)\n", 93 | "pset.addPrimitive(np.add, 2)\n", 94 | "pset.addPrimitive(np.subtract, 2)\n", 95 | "pset.addPrimitive(np.multiply, 2)\n", 96 | "pset.addPrimitive(np.negative, 1)\n", 97 | "pset.addEphemeralConstant(\"rand101\", lambda: random.randint(-1, 1))\n", 98 | "\n", 99 | "# 定义遗传编程操作\n", 100 | "toolbox = base.Toolbox()\n", 101 | "toolbox.register(\"expr\", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)\n", 102 | "toolbox.register(\"individual\", tools.initIterate, creator.Individual, toolbox.expr)\n", 103 | "toolbox.register(\"population\", tools.initRepeat, list, toolbox.individual)\n", 104 | "toolbox.register(\"compile\", gp.compile, pset=pset)\n", 105 | "toolbox.register(\"evaluate\", evalSymbReg, pset=pset)\n", 106 | "toolbox.register(\"select\", tools.selTournament, tournsize=3)\n", 107 | "toolbox.register(\"mate\", gp.cxOnePoint)\n", 108 | "toolbox.register(\"mutate\", gp.mutUniform, expr=toolbox.expr, pset=pset)" 109 | ], 110 | "outputs": [] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "id": "e09fa8e7890d583b", 115 | "metadata": {}, 116 | "source": [] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 3, 121 | "id": "88c62bc071d56191", 122 | "metadata": { 123 | "ExecuteTime": { 124 | "end_time": "2023-12-25T02:37:08.226956400Z", 125 | "start_time": "2023-12-25T02:37:08.054649800Z" 126 | } 127 | }, 128 | "source": [ 129 | "import numpy\n", 130 | "from deap import algorithms\n", 131 | "\n", 132 | "# 定义统计指标\n", 133 | "stats_fit = tools.Statistics(lambda ind: ind.fitness.values)\n", 134 | "stats_size = tools.Statistics(len)\n", 135 | "mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)\n", 136 | "mstats.register(\"avg\", numpy.mean)\n", 137 | "mstats.register(\"std\", numpy.std)\n", 138 | "mstats.register(\"min\", numpy.min)\n", 139 | "mstats.register(\"max\", numpy.max)\n", 140 | "\n", 141 | "# 使用默认算法\n", 142 | "custom_compiler_time = []\n", 143 | "for i in range(3):\n", 144 | " start = time.time()\n", 145 | " population = toolbox.population(n=100)\n", 146 | " hof = tools.HallOfFame(1)\n", 147 | " pop, log = algorithms.eaSimple(population=population,\n", 148 | " toolbox=toolbox, cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=hof,\n", 149 | " verbose=True)\n", 150 | " end = time.time()\n", 151 | " print('time:', end - start)\n", 152 | " print(str(hof[0]))\n", 153 | " custom_compiler_time.append(end - start)" 154 | ], 155 | "outputs": [] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 4, 160 | "id": "93e26be78cb63ef", 161 | "metadata": { 162 | "ExecuteTime": { 163 | "end_time": "2023-12-25T02:37:08.449160200Z", 164 | "start_time": "2023-12-25T02:37:08.228958300Z" 165 | } 166 | }, 167 | "source": [ 168 | "# 慢速评估\n", 169 | "def evalSymbReg(individual, pset):\n", 170 | " # 编译GP树为函数\n", 171 | " func = gp.compile(expr=individual, pset=pset)\n", 172 | "\n", 173 | " # 使用numpy创建一个向量\n", 174 | " x = np.linspace(-10, 10, 100)\n", 175 | "\n", 176 | " # 评估生成的函数并计算MSE\n", 177 | " mse = np.mean((func(x) - x ** 2) ** 2)\n", 178 | "\n", 179 | " return (mse,)\n", 180 | "\n", 181 | "\n", 182 | "toolbox.register(\"evaluate\", evalSymbReg, pset=pset)\n", 183 | "\n", 184 | "py_time = []\n", 185 | "for i in range(3):\n", 186 | " start = time.time()\n", 187 | " population = toolbox.population(n=100)\n", 188 | " hof = tools.HallOfFame(1)\n", 189 | " pop, log = algorithms.eaSimple(population=population,\n", 190 | " toolbox=toolbox, cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=hof,\n", 191 | " verbose=True)\n", 192 | " end = time.time()\n", 193 | " print('time:', end - start)\n", 194 | " py_time.append(end - start)" 195 | ], 196 | "outputs": [] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "id": "8c746d5032e852bb", 201 | "metadata": {}, 202 | "source": [ 203 | "下图展示了实验结果,从实验结果可以看出,自行实现的编译器在编译GP树时的速度要快于Python默认编译器。主要是因为自行实现的编译器基本没有额外开销,而Python默认编译器在编译时会进行一些额外的操作,因此速度较慢。" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 5, 209 | "id": "c6e9fb08c172a7f0", 210 | "metadata": { 211 | "ExecuteTime": { 212 | "end_time": "2023-12-25T02:37:08.749483500Z", 213 | "start_time": "2023-12-25T02:37:08.450148200Z" 214 | } 215 | }, 216 | "source": [ 217 | "import seaborn as sns\n", 218 | "import matplotlib.pyplot as plt\n", 219 | "import pandas as pd\n", 220 | "\n", 221 | "data = pd.DataFrame(\n", 222 | " {'Category': ['Efficient Compiler'] * len(custom_compiler_time) + ['Python Compiler'] * len(py_time),\n", 223 | " 'Time': np.concatenate([custom_compiler_time, py_time])})\n", 224 | "\n", 225 | "plt.figure(figsize=(4, 3))\n", 226 | "sns.set_style(\"whitegrid\")\n", 227 | "sns.boxplot(data=data, x='Category', y='Time', palette=\"Set3\", width=0.4)\n", 228 | "plt.title('Comparison of Efficient Compiler and Python Compiler')\n", 229 | "plt.xlabel('')\n", 230 | "plt.ylabel('Time')\n", 231 | "plt.show()" 232 | ], 233 | "outputs": [] 234 | } 235 | ], 236 | "metadata": { 237 | "kernelspec": { 238 | "display_name": "Python 3 (ipykernel)", 239 | "language": "python", 240 | "name": "python3" 241 | }, 242 | "language_info": { 243 | "codemirror_mode": { 244 | "name": "ipython", 245 | "version": 3 246 | }, 247 | "file_extension": ".py", 248 | "mimetype": "text/x-python", 249 | "name": "python", 250 | "nbconvert_exporter": "python", 251 | "pygments_lexer": "ipython3", 252 | "version": "3.11.4" 253 | } 254 | }, 255 | "nbformat": 4, 256 | "nbformat_minor": 5 257 | } 258 | -------------------------------------------------------------------------------- /application/symbolic-regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "cbf709014ce0316e", 6 | "metadata": {}, 7 | "source": [ 8 | "## 基于单树GP的符号回归(Symbolic Regression)\n", 9 | "\n", 10 | "基于单树GP的符号回归是指使用遗传编程(GP)生成数学公式来逼近一组数据的关系,通过组合DEAP的Creator,Toolbox和Algorithms这三个模块即可实现。\n", 11 | "\n", 12 | "\n", 13 | "### Creator类\n", 14 | "Creator是一个工具类,其主要作用是创建新的类。在遗传编程中,通常需要自定义个体(Individual)和适应度(Fitness)类,因为不同的问题可能需要不同的适应度类型和个体结构。在DEAP中,我们可以使用creator来动态地创建这些类。\n", 15 | "\n", 16 | "在下面的例子中,我们创建了一个最基本的单目标单树GP,可以使用base.Fitness和gp.PrimitiveTree来定义。" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 11, 22 | "id": "59cfefc0467c74ad", 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2023-11-08T02:39:00.130308400Z", 26 | "start_time": "2023-11-08T02:39:00.012636500Z" 27 | } 28 | }, 29 | "source": [ 30 | "import math\n", 31 | "import operator\n", 32 | "\n", 33 | "from deap import base, creator, tools, gp\n", 34 | "\n", 35 | "\n", 36 | "# 符号回归\n", 37 | "def evalSymbReg(individual, pset):\n", 38 | " # 编译GP树为函数\n", 39 | " func = gp.compile(expr=individual, pset=pset)\n", 40 | " # 计算均方误差(Mean Square Error,MSE)\n", 41 | " mse = ((func(x) - x**2)**2 for x in range(-10, 10))\n", 42 | " return (math.fsum(mse),)\n", 43 | "\n", 44 | "# 创建个体和适应度函数\n", 45 | "creator.create(\"FitnessMin\", base.Fitness, weights=(-1.0,))\n", 46 | "creator.create(\"Individual\", gp.PrimitiveTree, fitness=creator.FitnessMin)" 47 | ], 48 | "outputs": [] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "id": "956e01e17271daa6", 53 | "metadata": {}, 54 | "source": [ 55 | "### Toolbox类\n", 56 | "Toolbox的作用类似于一个调度中心,它负责“注册”各种操作和函数。在遗传编程中,这些操作通常包括交叉(crossover)、变异(mutation)、选择(selection)和评估(evaluation)。通过register,我们可以将这些操作和相关的函数绑定在一起,以供后续算法使用。" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 12, 62 | "id": "851794d4d36e3681", 63 | "metadata": { 64 | "ExecuteTime": { 65 | "end_time": "2023-11-08T02:39:00.214209Z", 66 | "start_time": "2023-11-08T02:39:00.052073500Z" 67 | } 68 | }, 69 | "source": [ 70 | "import random\n", 71 | "\n", 72 | "# 定义函数集合和终端集合\n", 73 | "pset = gp.PrimitiveSet(\"MAIN\", arity=1)\n", 74 | "pset.addPrimitive(operator.add, 2)\n", 75 | "pset.addPrimitive(operator.sub, 2)\n", 76 | "pset.addPrimitive(operator.mul, 2)\n", 77 | "pset.addPrimitive(operator.neg, 1)\n", 78 | "pset.addEphemeralConstant(\"rand101\", lambda: random.randint(-1, 1))\n", 79 | "pset.renameArguments(ARG0='x')\n", 80 | "\n", 81 | "# 定义遗传编程操作\n", 82 | "toolbox = base.Toolbox()\n", 83 | "toolbox.register(\"expr\", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)\n", 84 | "toolbox.register(\"individual\", tools.initIterate, creator.Individual, toolbox.expr)\n", 85 | "toolbox.register(\"population\", tools.initRepeat, list, toolbox.individual)\n", 86 | "toolbox.register(\"compile\", gp.compile, pset=pset)\n", 87 | "toolbox.register(\"evaluate\", evalSymbReg, pset=pset)\n", 88 | "toolbox.register(\"select\", tools.selTournament, tournsize=3)\n", 89 | "toolbox.register(\"mate\", gp.cxOnePoint)\n", 90 | "toolbox.register(\"mutate\", gp.mutUniform, expr=toolbox.expr, pset=pset)" 91 | ], 92 | "outputs": [] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "id": "62f30d17704db709", 97 | "metadata": {}, 98 | "source": [ 99 | "### Algorithms类\n", 100 | "Algorithms模块提供了一些现成的遗传算法和遗传编程的实现。例如,eaSimple是一个简单的遗传算法,它可以处理基本的选择、交叉、变异和演化迭代。" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 13, 106 | "id": "515b587d4f8876ea", 107 | "metadata": { 108 | "ExecuteTime": { 109 | "end_time": "2023-11-08T02:39:00.216839200Z", 110 | "start_time": "2023-11-08T02:39:00.068850700Z" 111 | } 112 | }, 113 | "source": [ 114 | "import numpy\n", 115 | "from deap import algorithms\n", 116 | "\n", 117 | "# 定义统计指标\n", 118 | "stats_fit = tools.Statistics(lambda ind: ind.fitness.values)\n", 119 | "stats_size = tools.Statistics(len)\n", 120 | "mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)\n", 121 | "mstats.register(\"avg\", numpy.mean)\n", 122 | "mstats.register(\"std\", numpy.std)\n", 123 | "mstats.register(\"min\", numpy.min)\n", 124 | "mstats.register(\"max\", numpy.max)\n", 125 | "\n", 126 | "# 使用默认算法\n", 127 | "population = toolbox.population(n=100)\n", 128 | "hof = tools.HallOfFame(1)\n", 129 | "pop, log = algorithms.eaSimple(population=population,\n", 130 | " toolbox=toolbox, cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=hof, verbose=True)\n" 131 | ], 132 | "outputs": [] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "id": "237b39454ea988bc", 137 | "metadata": {}, 138 | "source": [ 139 | "由于DEAP重载了字符串运算符,因此可以直接输出结果。" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 14, 145 | "id": "918142f4e60d65a0", 146 | "metadata": { 147 | "ExecuteTime": { 148 | "end_time": "2023-11-08T02:39:00.217794500Z", 149 | "start_time": "2023-11-08T02:39:00.118939200Z" 150 | } 151 | }, 152 | "source": [ 153 | "print(str(hof[0]))" 154 | ], 155 | "outputs": [] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "id": "54fe3d72a677307c", 160 | "metadata": {}, 161 | "source": [ 162 | "当然,我们也可以利用NetworkX库来对GP树进行可视化。" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 15, 168 | "id": "2fa44e7277d90c4c", 169 | "metadata": { 170 | "ExecuteTime": { 171 | "end_time": "2023-11-08T02:39:00.449935300Z", 172 | "start_time": "2023-11-08T02:39:00.134624200Z" 173 | } 174 | }, 175 | "source": [ 176 | "import networkx as nx\n", 177 | "from deap.gp import graph\n", 178 | "from networkx.drawing.nx_agraph import graphviz_layout\n", 179 | "\n", 180 | "function_name = {\n", 181 | " 'add':'Add',\n", 182 | " 'sub':'Sub',\n", 183 | " 'mul':'Mul',\n", 184 | " 'neg':'Neg'\n", 185 | "}\n", 186 | "\n", 187 | "def is_number(string):\n", 188 | " try:\n", 189 | " float(string)\n", 190 | " return True\n", 191 | " except ValueError:\n", 192 | " return False\n", 193 | "\n", 194 | "\n", 195 | "def plot_a_tree(tree=hof[0]):\n", 196 | " red_nodes = []\n", 197 | " purple_nodes = []\n", 198 | " blue_nodes = []\n", 199 | " for gid, g in enumerate(tree):\n", 200 | " if (\n", 201 | " hasattr(g, \"value\")\n", 202 | " and isinstance(g.value, str)\n", 203 | " and g.value.startswith(\"ARG\")\n", 204 | " ):\n", 205 | " g.value = g.value.replace(\"ARG\", \"X\")\n", 206 | "\n", 207 | " if g.name in function_name:\n", 208 | " g.name = function_name[g.name]\n", 209 | "\n", 210 | " if hasattr(g, \"value\") and (\n", 211 | " is_number(g.value)\n", 212 | " or (g.value.startswith(\"X\") and int(g.value[1:]) < X.shape[1])\n", 213 | " ):\n", 214 | " # 基础节点\n", 215 | " red_nodes.append(gid)\n", 216 | " elif hasattr(g, \"value\") and g.value.startswith(\"X\"):\n", 217 | " g.value = \"$\\phi$\" + str(int(g.value.replace(\"X\", \"\")) - X.shape[1] + 1)\n", 218 | " purple_nodes.append(gid)\n", 219 | " elif hasattr(g, \"value\") and g.value.startswith(\"$\\phi$\"):\n", 220 | " purple_nodes.append(gid)\n", 221 | " else:\n", 222 | " # 深蓝色节点\n", 223 | " blue_nodes.append(gid)\n", 224 | " nodes, edges, labels = graph(tree)\n", 225 | " g = nx.Graph()\n", 226 | " g.add_nodes_from(nodes)\n", 227 | " g.add_edges_from(edges)\n", 228 | " pos = graphviz_layout(g, prog=\"dot\")\n", 229 | " red_nodes_idx = [nodes.index(n) for n in nodes if n in red_nodes]\n", 230 | " purple_nodes_idx = [nodes.index(n) for n in nodes if n in purple_nodes]\n", 231 | " blue_nodes_idx = [nodes.index(n) for n in nodes if n in blue_nodes]\n", 232 | " nx.draw_networkx_nodes(\n", 233 | " g, pos, nodelist=red_nodes_idx, node_color=\"darkred\", node_size=500\n", 234 | " )\n", 235 | " nx.draw_networkx_nodes(\n", 236 | " g, pos, nodelist=purple_nodes_idx, node_color=\"indigo\", node_size=500\n", 237 | " )\n", 238 | " nx.draw_networkx_nodes(\n", 239 | " g, pos, nodelist=blue_nodes_idx, node_color=\"darkblue\", node_size=500\n", 240 | " )\n", 241 | " nx.draw_networkx_edges(g, pos)\n", 242 | " nx.draw_networkx_labels(g, pos, labels, font_color=\"white\")\n", 243 | "\n", 244 | "\n", 245 | "plot_a_tree(hof[0])" 246 | ], 247 | "outputs": [] 248 | } 249 | ], 250 | "metadata": { 251 | "kernelspec": { 252 | "display_name": "Python 3 (ipykernel)", 253 | "language": "python", 254 | "name": "python3" 255 | }, 256 | "language_info": { 257 | "codemirror_mode": { 258 | "name": "ipython", 259 | "version": 3 260 | }, 261 | "file_extension": ".py", 262 | "mimetype": "text/x-python", 263 | "name": "python", 264 | "nbconvert_exporter": "python", 265 | "pygments_lexer": "ipython3", 266 | "version": "3.11.4" 267 | } 268 | }, 269 | "nbformat": 4, 270 | "nbformat_minor": 5 271 | } 272 | -------------------------------------------------------------------------------- /tricks/numba-lexicase-selection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ff6050dfa4dc1b6", 6 | "metadata": {}, 7 | "source": [ 8 | "## Lexicase Selection Numba加速\n", 9 | "\n", 10 | "DEAP中Lexicase Selection的默认实现速度较慢。因此,我们可以尝试使用Numba来加速它。\n", 11 | "Numba的原理是将Python代码编译为LLVM中间代码,然后再编译为机器码。从而显著提高Python代码的运行速度。" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 3, 17 | "id": "59cfefc0467c74ad", 18 | "metadata": { 19 | "ExecuteTime": { 20 | "end_time": "2023-12-25T03:39:37.866831400Z", 21 | "start_time": "2023-12-25T03:39:37.665471400Z" 22 | } 23 | }, 24 | "source": [ 25 | "import numpy as np\n", 26 | "import math\n", 27 | "import operator\n", 28 | "\n", 29 | "from deap import base, creator, tools, gp\n", 30 | "import time\n", 31 | "\n", 32 | "\n", 33 | "# 符号回归\n", 34 | "def evalSymbReg(individual, pset):\n", 35 | " # 编译GP树为函数\n", 36 | " func = gp.compile(expr=individual, pset=pset)\n", 37 | " \n", 38 | " # 使用numpy创建一个向量\n", 39 | " x = np.linspace(-10, 10, 100) \n", 40 | " \n", 41 | " return tuple((func(x) - x**2)**2)\n", 42 | "\n", 43 | "\n", 44 | "# 创建个体和适应度函数,适应度数组大小与数据量相同\n", 45 | "creator.create(\"FitnessMin\", base.Fitness, weights=(-1.0,) * 100) # 假设我们有100个数据点\n", 46 | "creator.create(\"Individual\", gp.PrimitiveTree, fitness=creator.FitnessMin)" 47 | ], 48 | "outputs": [] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "id": "956e01e17271daa6", 53 | "metadata": {}, 54 | "source": [ 55 | "### 遗传算子\n", 56 | "在使用Numba进行对Lexicase加速时,只需要重写Lexicase函数,加上@njit(cache=True)这个注解就可以了。\n", 57 | "需要注意一些特殊的函数可能不受Numba支持,但所有基本的Python运算符都是支持的。" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "id": "c5dbeab9190022", 64 | "metadata": { 65 | "ExecuteTime": { 66 | "end_time": "2023-12-25T03:39:37.891395500Z", 67 | "start_time": "2023-12-25T03:39:37.870837200Z" 68 | } 69 | }, 70 | "source": [ 71 | "from numba import njit\n", 72 | "import numpy as np\n", 73 | "\n", 74 | "\n", 75 | "@njit(cache=True)\n", 76 | "def selAutomaticEpsilonLexicaseNumba(case_values, fit_weights, k):\n", 77 | " selected_individuals = []\n", 78 | " avg_cases = 0\n", 79 | "\n", 80 | " for i in range(k):\n", 81 | " candidates = list(range(len(case_values)))\n", 82 | " cases = np.arange(len(case_values[0]))\n", 83 | " np.random.shuffle(cases)\n", 84 | "\n", 85 | " while len(cases) > 0 and len(candidates) > 1:\n", 86 | " errors_for_this_case = np.array(\n", 87 | " [case_values[x][cases[0]] for x in candidates]\n", 88 | " )\n", 89 | " median_val = np.median(errors_for_this_case)\n", 90 | " median_absolute_deviation = np.median(\n", 91 | " np.array([abs(x - median_val) for x in errors_for_this_case])\n", 92 | " )\n", 93 | " if fit_weights > 0:\n", 94 | " best_val_for_case = np.max(errors_for_this_case)\n", 95 | " min_val_to_survive = best_val_for_case - median_absolute_deviation\n", 96 | " candidates = list(\n", 97 | " [\n", 98 | " x\n", 99 | " for x in candidates\n", 100 | " if case_values[x][cases[0]] >= min_val_to_survive\n", 101 | " ]\n", 102 | " )\n", 103 | " else:\n", 104 | " best_val_for_case = np.min(errors_for_this_case)\n", 105 | " max_val_to_survive = best_val_for_case + median_absolute_deviation\n", 106 | " candidates = list(\n", 107 | " [\n", 108 | " x\n", 109 | " for x in candidates\n", 110 | " if case_values[x][cases[0]] <= max_val_to_survive\n", 111 | " ]\n", 112 | " )\n", 113 | " cases = np.delete(cases, 0)\n", 114 | " avg_cases = (avg_cases * i + (len(case_values[0]) - len(cases))) / (i + 1)\n", 115 | " selected_individuals.append(np.random.choice(np.array(candidates)))\n", 116 | " return selected_individuals, avg_cases\n", 117 | "\n", 118 | "def selAutomaticEpsilonLexicaseFast(individuals, k):\n", 119 | " fit_weights = individuals[0].fitness.weights[0]\n", 120 | " case_values = np.array([ind.fitness.values for ind in individuals])\n", 121 | " index, avg_cases = selAutomaticEpsilonLexicaseNumba(case_values, fit_weights, k)\n", 122 | " selected_individuals = [individuals[i] for i in index]\n", 123 | " return selected_individuals" 124 | ], 125 | "outputs": [] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "id": "783887f49d890b79", 130 | "metadata": {}, 131 | "source": [ 132 | "在定义好了新的Lexicase选择算子之后,在注册选择算子的时候,将新的选择算子注册进去就可以了。" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 5, 138 | "id": "851794d4d36e3681", 139 | "metadata": { 140 | "ExecuteTime": { 141 | "end_time": "2023-12-25T03:39:37.964779400Z", 142 | "start_time": "2023-12-25T03:39:37.897670100Z" 143 | } 144 | }, 145 | "source": [ 146 | "import random\n", 147 | "\n", 148 | "# 定义函数集合和终端集合\n", 149 | "pset = gp.PrimitiveSet(\"MAIN\", arity=1)\n", 150 | "pset.addPrimitive(operator.add, 2)\n", 151 | "pset.addPrimitive(operator.sub, 2)\n", 152 | "pset.addPrimitive(operator.mul, 2)\n", 153 | "pset.addPrimitive(operator.neg, 1)\n", 154 | "pset.addEphemeralConstant(\"rand101\", lambda: random.randint(-1, 1))\n", 155 | "pset.renameArguments(ARG0='x')\n", 156 | "\n", 157 | "# 定义遗传编程操作\n", 158 | "toolbox = base.Toolbox()\n", 159 | "toolbox.register(\"expr\", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)\n", 160 | "toolbox.register(\"individual\", tools.initIterate, creator.Individual, toolbox.expr)\n", 161 | "toolbox.register(\"population\", tools.initRepeat, list, toolbox.individual)\n", 162 | "toolbox.register(\"compile\", gp.compile, pset=pset)\n", 163 | "toolbox.register(\"evaluate\", evalSymbReg, pset=pset)\n", 164 | "toolbox.register(\"select\", selAutomaticEpsilonLexicaseFast)\n", 165 | "toolbox.register(\"mate\", gp.cxOnePoint)\n", 166 | "toolbox.register(\"mutate\", gp.mutUniform, expr=toolbox.expr, pset=pset)" 167 | ], 168 | "outputs": [] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "id": "62f30d17704db709", 173 | "metadata": {}, 174 | "source": [ 175 | "### 演化流程\n", 176 | "演化流程与传统符号回归相同。" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 6, 182 | "id": "515b587d4f8876ea", 183 | "metadata": { 184 | "ExecuteTime": { 185 | "end_time": "2023-12-25T03:39:39.571928900Z", 186 | "start_time": "2023-12-25T03:39:37.971234500Z" 187 | } 188 | }, 189 | "source": [ 190 | "import numpy\n", 191 | "from deap import algorithms\n", 192 | "\n", 193 | "# 定义统计指标\n", 194 | "stats_fit = tools.Statistics(lambda ind: ind.fitness.values)\n", 195 | "stats_size = tools.Statistics(len)\n", 196 | "mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)\n", 197 | "mstats.register(\"avg\", numpy.mean)\n", 198 | "mstats.register(\"std\", numpy.std)\n", 199 | "mstats.register(\"min\", numpy.min)\n", 200 | "mstats.register(\"max\", numpy.max)\n", 201 | "\n", 202 | "# 使用Numba加速\n", 203 | "numba_lexicase_time = []\n", 204 | "for i in range(3):\n", 205 | " start = time.time()\n", 206 | " population = toolbox.population(n=100)\n", 207 | " hof = tools.HallOfFame(1)\n", 208 | " pop, log = algorithms.eaSimple(population=population,\n", 209 | " toolbox=toolbox, cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=hof, verbose=True)\n", 210 | " end = time.time()\n", 211 | " print(str(hof[0]))\n", 212 | " numba_lexicase_time.append(end - start)\n" 213 | ], 214 | "outputs": [] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "id": "98efab3037e032ea", 219 | "metadata": {}, 220 | "source": [ 221 | "为了展示Numba加速的效果,我们将使用纯Python实现的Lexicase Selection进行对比。" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 7, 227 | "id": "e1efd33d5f96a536", 228 | "metadata": { 229 | "ExecuteTime": { 230 | "end_time": "2023-12-25T03:45:33.479324Z", 231 | "start_time": "2023-12-25T03:39:39.565415500Z" 232 | } 233 | }, 234 | "source": [ 235 | "# 使用纯Python实现的Lexicase Selection\n", 236 | "toolbox.register(\"select\", tools.selAutomaticEpsilonLexicase)\n", 237 | "python_lexicase_time = []\n", 238 | "for i in range(3):\n", 239 | " start = time.time()\n", 240 | " population = toolbox.population(n=100)\n", 241 | " hof = tools.HallOfFame(1)\n", 242 | " pop, log = algorithms.eaSimple(population=population,\n", 243 | " toolbox=toolbox, cxpb=0.9, mutpb=0.1, ngen=10, stats=mstats, halloffame=hof, verbose=True)\n", 244 | " end = time.time()\n", 245 | " print(str(hof[0]))\n", 246 | " python_lexicase_time.append(end - start)" 247 | ], 248 | "outputs": [] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "id": "adae32e725d21dcf", 253 | "metadata": {}, 254 | "source": [ 255 | "下面是Numba加速和纯Python实现的Lexicase Selection的运行时间对比。从结果可以看出,Numba加速后的Lexicase Selection的运行速度远优于纯Python实现的Lexicase Selection。" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 8, 261 | "id": "3caf7686fef519a", 262 | "metadata": { 263 | "ExecuteTime": { 264 | "end_time": "2023-12-25T03:45:33.641210400Z", 265 | "start_time": "2023-12-25T03:45:33.479324Z" 266 | } 267 | }, 268 | "source": [ 269 | "import seaborn as sns\n", 270 | "import matplotlib.pyplot as plt\n", 271 | "import pandas as pd\n", 272 | "\n", 273 | "data = pd.DataFrame(\n", 274 | " {'Category': ['Numba Lexicase'] * len(numba_lexicase_time) + ['Python Lexicase'] * len(python_lexicase_time),\n", 275 | " 'Time': np.concatenate([numba_lexicase_time, python_lexicase_time])})\n", 276 | "\n", 277 | "plt.figure(figsize=(4, 3))\n", 278 | "sns.set_style(\"whitegrid\")\n", 279 | "sns.boxplot(data=data, x='Category', y='Time', palette=\"Set3\", width=0.4)\n", 280 | "plt.title('Comparison of Numba and Pure Python')\n", 281 | "plt.xlabel('')\n", 282 | "plt.ylabel('Time')\n", 283 | "plt.show()" 284 | ], 285 | "outputs": [] 286 | } 287 | ], 288 | "metadata": { 289 | "kernelspec": { 290 | "display_name": "Python 3 (ipykernel)", 291 | "language": "python", 292 | "name": "python3" 293 | }, 294 | "language_info": { 295 | "codemirror_mode": { 296 | "name": "ipython", 297 | "version": 3 298 | }, 299 | "file_extension": ".py", 300 | "mimetype": "text/x-python", 301 | "name": "python", 302 | "nbconvert_exporter": "python", 303 | "pygments_lexer": "ipython3", 304 | "version": "3.11.4" 305 | } 306 | }, 307 | "nbformat": 4, 308 | "nbformat_minor": 5 309 | } 310 | -------------------------------------------------------------------------------- /application/automatically-design-de-operators.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "81d831b3b0e92996", 6 | "metadata": {}, 7 | "source": [ 8 | "### 基于遗传编程自动设计优化算法\n", 9 | "众所周知,演化计算中一个重要的研究课题就是设计新的优化算法。这个过程通常是由人类专家完成的,但是,我们是否可以让计算机自动设计优化算法呢?这个问题的答案是肯定的。本文将介绍如何基于遗传编程自动设计优化算法。\n", 10 | "\n", 11 | "**根据这样一个自动算法设计的工具,我们在得到一个算法公式之后,只要再观察一下自然界中是否有对应的生物行为,就可以得到一个新的智能优化算法。**\n", 12 | "\n", 13 | "比如,本文将尝试使用遗传编程自动设计出北极狐算法!\n", 14 | "\n", 15 | "![北极狐算法](img/Fox2.png)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "id": "8e3427b91e831fc9", 21 | "metadata": {}, 22 | "source": [ 23 | "### 优化函数\n", 24 | "比如,我们希望自动设计出的算法可以再球型函数上表现良好。球型函数是一个单目标优化领域中的经典测试函数,其公式如下:" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 133, 30 | "id": "initial_id", 31 | "metadata": { 32 | "ExecuteTime": { 33 | "end_time": "2024-02-07T23:56:31.688305600Z", 34 | "start_time": "2024-02-07T23:56:31.666788Z" 35 | }, 36 | "collapsed": true 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "import operator\n", 41 | "import random\n", 42 | "\n", 43 | "from deap import base, creator, tools, gp, algorithms\n", 44 | "import numpy as np\n", 45 | "\n", 46 | "np.random.seed(0)\n", 47 | "random.seed(0)\n", 48 | "\n", 49 | "\n", 50 | "def sphere(x, c=[1, 1, 1]):\n", 51 | " \"\"\"\n", 52 | " Shifted Sphere function.\n", 53 | "\n", 54 | " Parameters:\n", 55 | " - x: Input vector.\n", 56 | " - c: Shift vector indicating the new optimal location.\n", 57 | "\n", 58 | " Returns:\n", 59 | " - The value of the shifted Sphere function at x.\n", 60 | " \"\"\"\n", 61 | " return sum((xi - ci) ** 2 for xi, ci in zip(x, c))" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "id": "d87e994c3144076d", 67 | "metadata": {}, 68 | "source": [ 69 | "### 经典优化算法\n", 70 | "在文献中,差分演化可以用来求解这个球型函数优化问题。" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 134, 76 | "id": "feb772104d562277", 77 | "metadata": { 78 | "ExecuteTime": { 79 | "end_time": "2024-02-07T23:56:31.817414Z", 80 | "start_time": "2024-02-07T23:56:31.695306200Z" 81 | } 82 | }, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "传统DE算法得到的优化结果 4.506377260849465e-05\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "# DE\n", 94 | "dim = 3\n", 95 | "bounds = np.array([[-5, 5]] * dim)\n", 96 | "\n", 97 | "\n", 98 | "# Define a simple DE algorithm to test the crossover\n", 99 | "def differential_evolution(\n", 100 | " crossover_func, bounds, population_size=10, max_generations=50\n", 101 | "):\n", 102 | " population = [\n", 103 | " np.random.rand(len(bounds)) * (bounds[:, 1] - bounds[:, 0]) + bounds[:, 0]\n", 104 | " for _ in range(population_size)\n", 105 | " ]\n", 106 | " population = np.array(population)\n", 107 | " best = min(population, key=lambda ind: sphere(ind))\n", 108 | " for gen in range(max_generations):\n", 109 | " for i, x in enumerate(population):\n", 110 | " a, b, c = population[np.random.choice(len(population), 3, replace=False)]\n", 111 | " mutant = np.clip(crossover_func(a, b, c, np.random.randn(dim)), bounds[:, 0], bounds[:, 1])\n", 112 | " if sphere(mutant) < sphere(x):\n", 113 | " population[i] = mutant\n", 114 | " if sphere(mutant) < sphere(best):\n", 115 | " best = mutant\n", 116 | " return sphere(best)\n", 117 | "\n", 118 | "\n", 119 | "print(\"传统DE算法得到的优化结果\",\n", 120 | " np.mean([differential_evolution(lambda a, b, c, F: a + F * (b - c), bounds) for _ in range(10)]))" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "id": "e46b8aec8871cdd9", 126 | "metadata": {}, 127 | "source": [ 128 | "可以看到,传统DE算法得到的优化结果是不错的。但是,我们是否可以自动设计出一个更好的算法呢?" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "id": "712f8d2a7147ff03", 134 | "metadata": {}, 135 | "source": [ 136 | "### 基于遗传编程的自动设计优化算法\n", 137 | "其实DE的交叉算子本质上就是输入三个向量和一个随机向量,然后输出一个向量的函数。因此,我们可以使用遗传编程来自动设计这个交叉算子。" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 135, 143 | "id": "3b598a4e994266e8", 144 | "metadata": { 145 | "ExecuteTime": { 146 | "end_time": "2024-02-07T23:56:46.285724800Z", 147 | "start_time": "2024-02-07T23:56:31.818414300Z" 148 | } 149 | }, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "gen\tnevals\tavg \tmin \tmax \n", 156 | "0 \t50 \t2.6796\t0.0112234\t15.2248\n", 157 | "1 \t50 \t2.41407\t0.00253387\t17.9657\n", 158 | "2 \t45 \t1.41727\t0.0205569 \t18.5921\n", 159 | "3 \t47 \t0.99445\t0.00658522\t14.4601\n", 160 | "4 \t47 \t0.929668\t0.005623 \t13.84 \n", 161 | "5 \t48 \t1.61888 \t0.00913134\t13.9251\n", 162 | "6 \t50 \t1.18172 \t0.000383948\t14.9727\n", 163 | "7 \t48 \t0.624159\t0.000705421\t12.3018\n", 164 | "8 \t50 \t0.765903\t0.00214913 \t8.71667\n", 165 | "9 \t43 \t0.3652 \t0.0110385 \t3.56652\n", 166 | "10 \t47 \t1.39889 \t0.00685267 \t22.123 \n", 167 | "11 \t43 \t1.27877 \t0.00685267 \t20.31 \n", 168 | "12 \t48 \t1.82377 \t0.0027862 \t11.4693\n", 169 | "13 \t49 \t0.736725\t0.0108848 \t12.7022\n", 170 | "14 \t50 \t1.39344 \t0.0102804 \t12.8329\n", 171 | "15 \t47 \t0.847688\t0.00398283 \t11.3424\n", 172 | "16 \t44 \t0.9867 \t0.0067096 \t15.8511\n", 173 | "17 \t48 \t0.971622\t0.0180985 \t9.05041\n", 174 | "18 \t42 \t0.843393\t0.00948021 \t11.9563\n", 175 | "19 \t47 \t0.849741\t0.00759852 \t10.9686\n", 176 | "20 \t47 \t0.999861\t0.00425035 \t14.4111\n", 177 | "21 \t42 \t1.18842 \t0.00665311 \t13.5106\n", 178 | "22 \t46 \t1.41895 \t0.00320289 \t15.9007\n", 179 | "23 \t47 \t1.19332 \t0.00406941 \t9.579 \n", 180 | "24 \t48 \t0.923953\t0.00313277 \t11.4326\n", 181 | "25 \t45 \t0.599486\t0.00469191 \t8.87691\n", 182 | "26 \t43 \t1.06541 \t3.39457e-29\t15.4452\n", 183 | "27 \t44 \t1.38335 \t0.00224764 \t13.3298\n", 184 | "28 \t48 \t1.45239 \t0.017065 \t9.51407\n", 185 | "29 \t48 \t1.08886 \t0.00518668 \t12.8216\n", 186 | "30 \t48 \t0.55234 \t0.00209358 \t6.49766\n", 187 | "Best Crossover Operator:\n", 188 | "add(ARG0, subtract(multiply(ARG0, ARG3), ARG3))\n", 189 | "Fitness: (3.3945670827791664e-29,)\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "# GP 算子\n", 195 | "pset = gp.PrimitiveSetTyped(\"MAIN\", [np.ndarray, np.ndarray, np.ndarray, np.ndarray], np.ndarray)\n", 196 | "pset.addPrimitive(np.add, [np.ndarray, np.ndarray], np.ndarray)\n", 197 | "pset.addPrimitive(np.subtract, [np.ndarray, np.ndarray], np.ndarray)\n", 198 | "pset.addPrimitive(np.multiply, [np.ndarray, np.ndarray], np.ndarray)\n", 199 | "pset.addEphemeralConstant(\"rand100\", lambda: np.random.randn(dim), np.ndarray)\n", 200 | "\n", 201 | "pset.context[\"array\"] = np.array\n", 202 | "\n", 203 | "creator.create(\"FitnessMin\", base.Fitness, weights=(-1.0,))\n", 204 | "creator.create(\"Individual\", gp.PrimitiveTree, fitness=creator.FitnessMin)\n", 205 | "\n", 206 | "toolbox = base.Toolbox()\n", 207 | "toolbox.register(\"expr\", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)\n", 208 | "toolbox.register(\"individual\", tools.initIterate, creator.Individual, toolbox.expr)\n", 209 | "toolbox.register(\"population\", tools.initRepeat, list, toolbox.individual)\n", 210 | "toolbox.register(\"compile\", gp.compile, pset=pset)\n", 211 | "\n", 212 | "\n", 213 | "# Evaluate function for GP individuals\n", 214 | "def evalCrossover(individual):\n", 215 | " # Convert the individual into a function\n", 216 | " func = toolbox.compile(expr=individual)\n", 217 | " return (differential_evolution(func, bounds),)\n", 218 | "\n", 219 | "\n", 220 | "toolbox.register(\"evaluate\", evalCrossover)\n", 221 | "toolbox.register(\"select\", tools.selTournament, tournsize=3)\n", 222 | "toolbox.register(\"mate\", gp.cxOnePoint)\n", 223 | "toolbox.register(\"mutate\", gp.mutUniform, expr=toolbox.expr, pset=pset)\n", 224 | "\n", 225 | "# Evolve crossover strategies\n", 226 | "population = toolbox.population(n=50)\n", 227 | "hof = tools.HallOfFame(1)\n", 228 | "stats = tools.Statistics(lambda ind: ind.fitness.values)\n", 229 | "stats.register(\"avg\", np.mean)\n", 230 | "stats.register(\"min\", np.min)\n", 231 | "stats.register(\"max\", np.max)\n", 232 | "\n", 233 | "algorithms.eaSimple(population, toolbox, 0.9, 0.1, 30, stats, halloffame=hof)\n", 234 | "\n", 235 | "# Best crossover operator\n", 236 | "best_crossover = hof[0]\n", 237 | "print(f\"Best Crossover Operator:\\n{best_crossover}\")\n", 238 | "print(f\"Fitness: {best_crossover.fitness.values}\")" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "id": "cf2c8fb3e5d148c6", 244 | "metadata": {}, 245 | "source": [ 246 | "### 分析新算法\n", 247 | "现在,我们得到了一个新的交叉算子。我们可以看一下这个交叉算子的公式。\n", 248 | "$X_{new}=X+(F*X-F)$, F是一个随机变量。" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 137, 254 | "id": "71c1e9de586767b0", 255 | "metadata": { 256 | "ExecuteTime": { 257 | "end_time": "2024-02-07T23:58:03.859051200Z", 258 | "start_time": "2024-02-07T23:58:03.730618Z" 259 | } 260 | }, 261 | "outputs": [ 262 | { 263 | "name": "stdout", 264 | "output_type": "stream", 265 | "text": [ 266 | "新优化算法得到的优化结果 1.0213225557390857e-19\n" 267 | ] 268 | } 269 | ], 270 | "source": [ 271 | "add = np.add\n", 272 | "subtract = np.subtract\n", 273 | "multiply = np.multiply\n", 274 | "square = np.square\n", 275 | "array = np.array\n", 276 | "\n", 277 | "crossover_operator = lambda ARG0, ARG1, ARG2, ARG3: add(ARG0, subtract(multiply(ARG0, ARG3), ARG3))\n", 278 | "print(\"新优化算法得到的优化结果\", np.mean([differential_evolution(crossover_operator, bounds) for _ in range(10)]))\n" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "id": "c39ad9e7553bc87", 284 | "metadata": {}, 285 | "source": [ 286 | "从结果可以看到,新的优化算法得到的优化结果优于传统DE算法。这证明GP发现了一个更好的新算法。" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "id": "37941230eb02cbab", 292 | "metadata": {}, 293 | "source": [ 294 | "### 北极狐算法\n", 295 | "现在,这个算法我们可以命名为北极狐算法。北极狐的毛色会根据季节变化。在这个公式中,X会根据随机变量F的变化而变化。这个公式的形式和北极狐的毛色变化有些相似。因此,我们可以将这个算法命名为北极狐算法。\n", 296 | "![北极狐算法](img/Fox.png)\n", 297 | "\n", 298 | "该算法的交叉算子为$X_{new}=X+(F*X-F)$。" 299 | ] 300 | } 301 | ], 302 | "metadata": { 303 | "kernelspec": { 304 | "display_name": "Python 3 (ipykernel)", 305 | "language": "python", 306 | "name": "python3" 307 | }, 308 | "language_info": { 309 | "codemirror_mode": { 310 | "name": "ipython", 311 | "version": 3 312 | }, 313 | "file_extension": ".py", 314 | "mimetype": "text/x-python", 315 | "name": "python", 316 | "nbconvert_exporter": "python", 317 | "pygments_lexer": "ipython3", 318 | "version": "3.11.4" 319 | } 320 | }, 321 | "nbformat": 4, 322 | "nbformat_minor": 5 323 | } 324 | -------------------------------------------------------------------------------- /tricks/numpy_speedup_sr.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import sympy as sp 4 | from sklearn.base import BaseEstimator, RegressorMixin 5 | from sklearn.preprocessing import StandardScaler 6 | from sklearn.utils import check_array, check_random_state 7 | from deap import base, creator, tools, gp, algorithms 8 | 9 | 10 | if not hasattr(creator, "FitnessMin"): 11 | creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) 12 | if not hasattr(creator, "Individual"): 13 | creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) 14 | 15 | 16 | class SymbolicRegressor(BaseEstimator, RegressorMixin): 17 | """ 18 | Symbolic Regression estimator using DEAP with numpy acceleration. 19 | 20 | Parameters 21 | ---------- 22 | population_size : int, default=300 23 | Number of individuals in the population. 24 | 25 | n_generations : int, default=10 26 | Number of generations to evolve. 27 | 28 | cxpb : float, default=0.9 29 | Crossover probability. 30 | 31 | mutpb : float, default=0.1 32 | Mutation probability. 33 | 34 | tournsize : int, default=3 35 | Tournament size for selection. 36 | 37 | min_depth : int, default=1 38 | Minimum tree depth for initialization. 39 | 40 | max_depth : int, default=2 41 | Maximum tree depth for initialization. 42 | 43 | mut_min_depth : int, default=0 44 | Minimum tree depth for mutation. 45 | 46 | mut_max_depth : int, default=2 47 | Maximum tree depth for mutation. 48 | 49 | verbose : bool, default=False 50 | Whether to print progress during evolution. 51 | 52 | random_state : int or None, default=None 53 | Random seed for reproducibility. 54 | 55 | Attributes 56 | ---------- 57 | best_individual_ : PrimitiveTree 58 | The best individual found during evolution. 59 | 60 | pset_ : PrimitiveSet 61 | The primitive set used for evolution. 62 | 63 | toolbox_ : Toolbox 64 | The DEAP toolbox with registered operations. 65 | 66 | feature_names_ : list of str 67 | Names of the input features. 68 | """ 69 | 70 | def __init__( 71 | self, 72 | population_size=300, 73 | n_generations=10, 74 | cxpb=0.9, 75 | mutpb=0.1, 76 | tournsize=3, 77 | min_depth=1, 78 | max_depth=2, 79 | mut_min_depth=0, 80 | mut_max_depth=2, 81 | verbose=False, 82 | random_state=None, 83 | ): 84 | self.population_size = population_size 85 | self.n_generations = n_generations 86 | self.cxpb = cxpb 87 | self.mutpb = mutpb 88 | self.tournsize = tournsize 89 | self.min_depth = min_depth 90 | self.max_depth = max_depth 91 | self.mut_min_depth = mut_min_depth 92 | self.mut_max_depth = mut_max_depth 93 | self.verbose = verbose 94 | self.random_state = random_state 95 | 96 | self.best_individual_ = None 97 | self.pset_ = None 98 | self.toolbox_ = None 99 | self.feature_names_ = None 100 | self.n_features_ = None 101 | self.scaler_X_ = StandardScaler() 102 | self.scaler_y_ = StandardScaler() 103 | 104 | def _create_primitive_set(self, n_features): 105 | """Create a primitive set for the given number of features.""" 106 | pset = gp.PrimitiveSet("MAIN", arity=n_features) 107 | 108 | pset.addPrimitive(np.add, 2, name="add") 109 | pset.addPrimitive(np.subtract, 2, name="subtract") 110 | pset.addPrimitive(np.multiply, 2, name="multiply") 111 | pset.addPrimitive(np.negative, 1, name="negative") 112 | 113 | def protected_div(x1, x2): 114 | with np.errstate(divide="ignore", invalid="ignore"): 115 | return np.where(np.abs(x2) > 0.001, np.divide(x1, x2), 1.0) 116 | 117 | pset.addPrimitive(protected_div, 2, name="protected_div") 118 | 119 | pset.addPrimitive(np.sin, 1, name="sin") 120 | pset.addPrimitive(np.cos, 1, name="cos") 121 | pset.addPrimitive(np.exp, 1, name="exp") 122 | pset.addPrimitive(np.log, 1, name="log") 123 | 124 | def random_float(): 125 | return random.uniform(-1.0, 1.0) 126 | 127 | pset.addEphemeralConstant("rand", random_float) 128 | 129 | if self.feature_names_ is not None: 130 | arg_names = {f"ARG{i}": name for i, name in enumerate(self.feature_names_)} 131 | pset.renameArguments(**arg_names) 132 | 133 | return pset 134 | 135 | def _evaluate_func(self, func, X): 136 | return ( 137 | func(X.ravel()) 138 | if self.n_features_ == 1 139 | else func(*[X[:, i] for i in range(self.n_features_)]) 140 | ) 141 | 142 | def _eval_symb_reg(self, individual, pset, X, y): 143 | """Evaluate a symbolic regression individual using numpy.""" 144 | func = gp.compile(expr=individual, pset=pset) 145 | try: 146 | y_pred = self._evaluate_func(func, X) 147 | mse = np.mean((y_pred - y) ** 2) 148 | return (1e10 if not np.isfinite(mse) else mse,) 149 | except (ValueError, TypeError, ZeroDivisionError, OverflowError): 150 | return (1e10,) 151 | 152 | def _create_toolbox(self, pset): 153 | """Create and configure the DEAP toolbox.""" 154 | toolbox = base.Toolbox() 155 | 156 | toolbox.register( 157 | "expr", 158 | gp.genHalfAndHalf, 159 | pset=pset, 160 | min_=self.min_depth, 161 | max_=self.max_depth, 162 | ) 163 | 164 | toolbox.register( 165 | "individual", tools.initIterate, creator.Individual, toolbox.expr 166 | ) 167 | toolbox.register("population", tools.initRepeat, list, toolbox.individual) 168 | toolbox.register("compile", gp.compile, pset=pset) 169 | 170 | def evaluate(individual): 171 | return self._eval_symb_reg(individual, pset, self.X_train_, self.y_train_) 172 | 173 | toolbox.register("evaluate", evaluate) 174 | toolbox.register("select", tools.selTournament, tournsize=self.tournsize) 175 | toolbox.register("mate", gp.cxOnePoint) 176 | toolbox.register( 177 | "expr_mut", gp.genFull, min_=self.mut_min_depth, max_=self.mut_max_depth 178 | ) 179 | toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset) 180 | 181 | return toolbox 182 | 183 | def fit(self, X, y): 184 | """ 185 | Fit the symbolic regression model. 186 | 187 | Parameters 188 | ---------- 189 | X : array-like of shape (n_samples, n_features) 190 | Training data. 191 | 192 | y : array-like of shape (n_samples,) 193 | Target values. 194 | 195 | Returns 196 | ------- 197 | self : object 198 | Returns self. 199 | """ 200 | X = check_array(X, ensure_2d=True, ensure_all_finite=True) 201 | y = np.asarray(y).ravel() 202 | 203 | self.n_features_ = X.shape[1] 204 | 205 | X_normalized = self.scaler_X_.fit_transform(X) 206 | y_normalized = self.scaler_y_.fit_transform(y.reshape(-1, 1)).ravel() 207 | 208 | self.X_train_ = X_normalized 209 | self.y_train_ = y_normalized 210 | 211 | if hasattr(X, "columns"): 212 | self.feature_names_ = list(X.columns) 213 | else: 214 | self.feature_names_ = [f"x{i}" for i in range(self.n_features_)] 215 | 216 | if self.random_state is not None: 217 | rng = check_random_state(self.random_state) 218 | random.seed(rng.randint(0, 2**31)) 219 | np.random.seed(self.random_state) 220 | 221 | self.pset_ = self._create_primitive_set(self.n_features_) 222 | self.toolbox_ = self._create_toolbox(self.pset_) 223 | 224 | stats = tools.Statistics(lambda ind: ind.fitness.values) 225 | for stat_name, stat_func in [ 226 | ("avg", np.mean), 227 | ("std", np.std), 228 | ("min", np.min), 229 | ("max", np.max), 230 | ]: 231 | stats.register(stat_name, stat_func) 232 | 233 | population = self.toolbox_.population(n=self.population_size) 234 | for ind in population: 235 | ind.fitness.values = self.toolbox_.evaluate(ind) 236 | 237 | hof = tools.HallOfFame(1) 238 | hof.update(population) 239 | 240 | algorithms.eaSimple( 241 | population=population, 242 | toolbox=self.toolbox_, 243 | cxpb=self.cxpb, 244 | mutpb=self.mutpb, 245 | ngen=self.n_generations, 246 | stats=stats, 247 | halloffame=hof, 248 | verbose=self.verbose, 249 | ) 250 | 251 | self.best_individual_ = hof[0] 252 | 253 | return self 254 | 255 | def predict(self, X): 256 | """ 257 | Predict using the symbolic regression model. 258 | 259 | Parameters 260 | ---------- 261 | X : array-like of shape (n_samples, n_features) 262 | Samples. 263 | 264 | Returns 265 | ------- 266 | y_pred : ndarray of shape (n_samples,) 267 | Predicted values. 268 | """ 269 | if self.best_individual_ is None: 270 | raise ValueError("Model has not been fitted yet. Call fit() first.") 271 | 272 | X = check_array(X, ensure_2d=True, ensure_all_finite=True) 273 | X_normalized = self.scaler_X_.transform(X) 274 | n_samples = X.shape[0] 275 | 276 | func = gp.compile(expr=self.best_individual_, pset=self.pset_) 277 | 278 | try: 279 | y_pred_normalized = self._evaluate_func(func, X_normalized) 280 | except (ValueError, TypeError, ZeroDivisionError, OverflowError): 281 | y_pred_normalized = np.zeros(n_samples) 282 | 283 | y_pred_normalized = np.asarray(y_pred_normalized).ravel() 284 | if len(y_pred_normalized) != n_samples: 285 | y_pred_normalized = np.resize(y_pred_normalized, n_samples) 286 | 287 | y_pred_normalized = np.nan_to_num( 288 | y_pred_normalized, nan=0.0, posinf=0.0, neginf=0.0 289 | ) 290 | y_pred = self.scaler_y_.inverse_transform( 291 | y_pred_normalized.reshape(-1, 1) 292 | ).ravel() 293 | 294 | return y_pred 295 | 296 | def model(self): 297 | """ 298 | Return the symbolic expression of the model as a SymPy expression. 299 | 300 | This method implements the SRbench algorithm interface. 301 | 302 | Returns 303 | ------- 304 | sympy_expr : sympy.Expr 305 | The symbolic expression representing the model. 306 | """ 307 | if self.best_individual_ is None: 308 | raise ValueError("Model has not been fitted yet. Call fit() first.") 309 | 310 | feature_names = self.feature_names_ or [ 311 | f"x{i}" for i in range(self.n_features_) 312 | ] 313 | symbols = {name: sp.Symbol(name) for name in feature_names} 314 | symbols.update( 315 | {f"ARG{i}": symbols[feature_names[i]] for i in range(self.n_features_)} 316 | ) 317 | 318 | stack = [] 319 | 320 | for node in reversed(self.best_individual_): 321 | if isinstance(node, gp.Primitive): 322 | name = node.name 323 | arity = node.arity 324 | 325 | args = [stack.pop() for _ in range(arity)] 326 | args.reverse() 327 | 328 | func_map = { 329 | "add": lambda a, b: a + b, 330 | "subtract": lambda a, b: a - b, 331 | "multiply": lambda a, b: a * b, 332 | "negative": lambda a: -a, 333 | "protected_div": lambda a, b: a / b, 334 | "sin": sp.sin, 335 | "cos": sp.cos, 336 | "exp": sp.exp, 337 | "log": sp.log, 338 | } 339 | if name in func_map: 340 | result = func_map[name](*args) 341 | else: 342 | result = sp.Function(name)(*args) 343 | 344 | stack.append(result) 345 | else: 346 | if isinstance(node, gp.Terminal): 347 | node_name = node.name 348 | 349 | if node_name in symbols: 350 | stack.append(symbols[node_name]) 351 | elif node_name.startswith("ARG"): 352 | try: 353 | arg_num = int(node_name[3:]) 354 | if self.feature_names_ is not None and arg_num < len( 355 | self.feature_names_ 356 | ): 357 | var_name = self.feature_names_[arg_num] 358 | if var_name not in symbols: 359 | symbols[var_name] = sp.Symbol(var_name) 360 | stack.append(symbols[var_name]) 361 | else: 362 | stack.append(sp.Symbol(node_name)) 363 | except (ValueError, IndexError): 364 | stack.append(sp.Symbol(node_name)) 365 | else: 366 | try: 367 | value = ( 368 | node.value 369 | if hasattr(node, "value") 370 | else float(node_name) 371 | ) 372 | stack.append(sp.Float(value)) 373 | except (ValueError, TypeError): 374 | stack.append(sp.Symbol(node_name)) 375 | else: 376 | try: 377 | stack.append(sp.Float(float(node))) 378 | except (ValueError, TypeError): 379 | stack.append(sp.Symbol(str(node))) 380 | 381 | if len(stack) != 1: 382 | raise ValueError( 383 | f"Invalid expression tree. Stack size: {len(stack)}, expected 1." 384 | ) 385 | 386 | return stack[0] 387 | 388 | def __str__(self): 389 | """String representation of the model.""" 390 | if self.best_individual_ is None: 391 | return "SymbolicRegressor(not fitted)" 392 | return f"SymbolicRegressor: {self.best_individual_}" 393 | 394 | def __repr__(self): 395 | """Detailed string representation.""" 396 | return ( 397 | f"SymbolicRegressor(population_size={self.population_size}, " 398 | f"n_generations={self.n_generations}, " 399 | f"cxpb={self.cxpb}, mutpb={self.mutpb})" 400 | ) 401 | 402 | 403 | if __name__ == "__main__": 404 | import numpy as np 405 | 406 | X = np.linspace(-10, 10, 100).reshape(-1, 1) 407 | y = X.ravel() ** 2 408 | 409 | regressor = SymbolicRegressor( 410 | population_size=300, n_generations=10, verbose=True, random_state=0 411 | ) 412 | 413 | regressor.fit(X, y) 414 | 415 | X_test = np.linspace(-5, 5, 20).reshape(-1, 1) 416 | y_pred = regressor.predict(X_test) 417 | 418 | print(f"\nPredictions: {y_pred[:5]}") 419 | print(f"True values: {(X_test[:5].ravel() ** 2)}") 420 | 421 | model_expr = regressor.model() 422 | print(f"\nSymbolic model: {model_expr}") 423 | print(f"Model type: {type(model_expr)}") 424 | -------------------------------------------------------------------------------- /application/TSP.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "5205e210503bfb61", 6 | "metadata": {}, 7 | "source": [ 8 | "### GP求解旅行商问题\n", 9 | "\n", 10 | "GP当然也可以用于求解组合优化问题。在这里,我们将使用GP来解决旅行商问题(TSP)。TSP是指一个旅行商要拜访n个城市,他必须从自己所在的城市出发,到这n个城市中的每一个城市去一次,最后回到自己所在的城市,而且每个城市只能去一次,求解从出发到回到自己所在城市的最短路径。\n", 11 | "\n", 12 | "旅行商问题是一个NP难问题,通常可以使用遗传算法求解。但是遗传算法的缺点是需要大量的迭代次数才能收敛到最优解。因此我们将使用启发式函数来求解这个问题。启发式函数是一个根据特征输入返回排序分数的函数。排序分数将用于选择下一个城市。\n", 13 | "\n", 14 | "在本教程中,我们将使用距离作为启发式函数的输入,也就是说,我们的启发式函数是距离的非线性变换函数。我们将尝试使用GP来演化这个函数。" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "id": "a74153623d4c5a7e", 20 | "metadata": {}, 21 | "source": [ 22 | "### 评估函数\n", 23 | "对于组合优化问题,评估函数相对复杂,需要根据领域知识进行设计。" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "id": "initial_id", 30 | "metadata": { 31 | "ExecuteTime": { 32 | "end_time": "2023-11-12T06:28:22.884624500Z", 33 | "start_time": "2023-11-12T06:28:22.815968500Z" 34 | } 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "import numpy as np\n", 39 | "from deap import algorithms, base, creator, tools, gp\n", 40 | "\n", 41 | "def select_next_city(current_city, unvisited_cities, heuristic, distance_matrix):\n", 42 | " scores = []\n", 43 | " for next_city in unvisited_cities:\n", 44 | " # 计算从当前城市到下一个可能城市的距离\n", 45 | " distance = distance_matrix[current_city][next_city]\n", 46 | " # 使用距离作为启发式函数的输入,计算得分\n", 47 | " heuristic_score = heuristic(distance)\n", 48 | " # 计算得分用于选择下一个城市\n", 49 | " # 这里可以直接使用heuristic_score,也可以与距离结合\n", 50 | " # 例如,使用距离的倒数与启发式得分的和\n", 51 | " score = 1 / distance + heuristic_score\n", 52 | " scores.append((next_city, score))\n", 53 | "\n", 54 | " # 选择得分最高的城市作为下一站\n", 55 | " selected_city = max(scores, key=lambda c: c[1])[0]\n", 56 | " return selected_city\n", 57 | "\n", 58 | "def decode(individual, distance_matrix):\n", 59 | " # 初始化城市列表和路线\n", 60 | " unvisited_cities = list(range(len(distance_matrix))) # 未访问的城市列表\n", 61 | " route = [unvisited_cities.pop(0)] # 从第一个城市开始\n", 62 | "\n", 63 | " # 编译 GP 树为函数\n", 64 | " heuristic = gp.compile(expr=individual, pset=pset)\n", 65 | "\n", 66 | " # 循环直到所有城市都被访问\n", 67 | " while unvisited_cities:\n", 68 | " # 根据启发式函数和距离选择下一个城市\n", 69 | " current_city = route[-1] # 获取路线中的最后一个城市作为当前城市\n", 70 | " next_city = select_next_city(current_city, unvisited_cities, heuristic, distance_matrix)\n", 71 | " # 将下一个城市添加到路线中,并将其从未访问城市列表中移除\n", 72 | " route.append(next_city)\n", 73 | " unvisited_cities.remove(next_city)\n", 74 | "\n", 75 | " # 路径闭环,返回起始城市\n", 76 | " route.append(route[0])\n", 77 | " return route\n", 78 | "\n", 79 | "# 每个城市的坐标在一个单位正方形内随机生成\n", 80 | "coordinates = np.random.rand(10, 2) # 生成10个城市的x,y坐标\n", 81 | "\n", 82 | "# 计算距离矩阵\n", 83 | "def calculate_distance_matrix(coords):\n", 84 | " num_cities = len(coords)\n", 85 | " distance_matrix = np.zeros((num_cities, num_cities))\n", 86 | " for i in range(num_cities):\n", 87 | " for j in range(num_cities):\n", 88 | " if i != j:\n", 89 | " # 计算欧几里得距离\n", 90 | " distance_matrix[i][j] = np.sqrt(np.sum((coords[i] - coords[j])**2))\n", 91 | " else:\n", 92 | " # 城市不与自己连接\n", 93 | " distance_matrix[i][j] = np.inf\n", 94 | " return distance_matrix\n", 95 | "\n", 96 | "distance_matrix = calculate_distance_matrix(coordinates)\n", 97 | "\n", 98 | "# 评价函数,计算路径的总距离\n", 99 | "def evalTSP(individual):\n", 100 | " path = decode(individual, distance_matrix)\n", 101 | " distance = sum(distance_matrix[path[i]][path[i-1]] for i in range(1,len(path)))\n", 102 | " return distance,\n" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "id": "ff38033627ba98b4", 108 | "metadata": {}, 109 | "source": [ 110 | "### 遗传编程算子\n", 111 | "一旦问题定义好了,遗传编程算子的定义就变得相对简单。我们可以使用DEAP中内置的算子。" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 2, 117 | "id": "5b804caa1f073b17", 118 | "metadata": { 119 | "ExecuteTime": { 120 | "end_time": "2023-11-12T06:28:22.892859300Z", 121 | "start_time": "2023-11-12T06:28:22.889153400Z" 122 | } 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "# 初始化GP\n", 127 | "creator.create(\"FitnessMin\", base.Fitness, weights=(-1.0,)) # 最小化问题\n", 128 | "creator.create(\"Individual\", gp.PrimitiveTree, fitness=creator.FitnessMin)\n", 129 | "\n", 130 | "# 基本函数\n", 131 | "pset = gp.PrimitiveSet(\"MAIN\", arity=1)\n", 132 | "pset.addPrimitive(np.add, 2)\n", 133 | "pset.addPrimitive(np.subtract, 2)\n", 134 | "pset.addPrimitive(np.multiply, 2)\n", 135 | "pset.addPrimitive(np.negative, 1)\n", 136 | "\n", 137 | "toolbox = base.Toolbox()\n", 138 | "toolbox.register(\"expr\", gp.genGrow, pset=pset, min_=1, max_=3)\n", 139 | "toolbox.register(\"individual\", tools.initIterate, creator.Individual, toolbox.expr)\n", 140 | "toolbox.register(\"population\", tools.initRepeat, list, toolbox.individual)\n", 141 | "toolbox.register(\"compile\", gp.compile, pset=pset)\n", 142 | "toolbox.register(\"evaluate\", evalTSP)\n", 143 | "toolbox.register(\"select\", tools.selTournament, tournsize=3)\n", 144 | "toolbox.register(\"mate\", gp.cxOnePoint)\n", 145 | "toolbox.register(\"mutate\", gp.mutUniform, expr=toolbox.expr, pset=pset)\n", 146 | "toolbox.register(\"expr_mut\", gp.genGrow, min_=0, max_=2)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "id": "a60b7c6d4d31cd23", 152 | "metadata": {}, 153 | "source": [ 154 | "接下来,我们将使用DEAP内置的演化流程来运行已定义的函数。" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 3, 160 | "id": "24db570e7597ba08", 161 | "metadata": { 162 | "ExecuteTime": { 163 | "end_time": "2023-11-12T06:28:23.107994600Z", 164 | "start_time": "2023-11-12T06:28:22.894867100Z" 165 | } 166 | }, 167 | "outputs": [ 168 | { 169 | "name": "stdout", 170 | "output_type": "stream", 171 | "text": [ 172 | " \t \t fitness \t size \n", 173 | " \t \t------------------------------------------------\t----------------------------------------------\n", 174 | "gen\tnevals\tavg \tgen\tmax \tmin \tnevals\tstd \tavg \tgen\tmax\tmin\tnevals\tstd \n", 175 | "0 \t100 \t3.19162\t0 \t3.5749\t3.15372\t100 \t0.120534\t5.86\t0 \t14 \t2 \t100 \t3.5497\n", 176 | "1 \t89 \t3.17056\t1 \t3.5749\t3.15372\t89 \t0.0825342\t6.54\t1 \t15 \t2 \t89 \t3.34192\n", 177 | "2 \t94 \t3.15793\t2 \t3.5749\t3.15372\t94 \t0.0419069\t6.81\t2 \t17 \t2 \t94 \t3.52901\n", 178 | "3 \t93 \t3.18741\t3 \t3.5749\t3.15372\t93 \t0.114263 \t7.25\t3 \t17 \t2 \t93 \t4.1143 \n", 179 | "4 \t94 \t3.17477\t4 \t3.5749\t3.15372\t94 \t0.0917942\t7.33\t4 \t17 \t2 \t94 \t4.13051\n", 180 | "5 \t89 \t3.17056\t5 \t3.5749\t3.15372\t89 \t0.0825342\t8.08\t5 \t22 \t2 \t89 \t4.58406\n", 181 | "6 \t86 \t3.17477\t6 \t3.5749\t3.15372\t86 \t0.0917942\t8.34\t6 \t22 \t2 \t86 \t4.62649\n", 182 | "7 \t86 \t3.17056\t7 \t3.5749\t3.15372\t86 \t0.0825342\t7.77\t7 \t23 \t2 \t86 \t4.48521\n", 183 | "8 \t92 \t3.18741\t8 \t3.5749\t3.15372\t92 \t0.114263 \t8.15\t8 \t19 \t2 \t92 \t4.36892\n", 184 | "9 \t92 \t3.16214\t9 \t3.5749\t3.15372\t92 \t0.0589653\t8.38\t9 \t23 \t2 \t92 \t5.00755\n", 185 | "10 \t94 \t3.15793\t10 \t3.5749\t3.15372\t94 \t0.0419069\t8.36\t10 \t25 \t2 \t94 \t5.05474\n", 186 | "Best individual: subtract(ARG0, ARG0) (3.1537154779102305,)\n" 187 | ] 188 | } 189 | ], 190 | "source": [ 191 | "# 统计函数\n", 192 | "stats_fit = tools.Statistics(lambda ind: ind.fitness.values)\n", 193 | "stats_size = tools.Statistics(len)\n", 194 | "mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)\n", 195 | "mstats.register(\"avg\", np.mean)\n", 196 | "mstats.register(\"std\", np.std)\n", 197 | "mstats.register(\"min\", np.min)\n", 198 | "mstats.register(\"max\", np.max)\n", 199 | "\n", 200 | "population = toolbox.population(n=100)\n", 201 | "hof = tools.HallOfFame(1)\n", 202 | "\n", 203 | "# 运行遗传编程算法\n", 204 | "algorithms.eaSimple(population, toolbox, 0.9, 0.1, 10, mstats, halloffame=hof)\n", 205 | "\n", 206 | "# 输出最好的个体\n", 207 | "best_ind = hof[0]\n", 208 | "print('Best individual:', best_ind, best_ind.fitness)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 4, 214 | "id": "f614c8f953aaf380", 215 | "metadata": { 216 | "ExecuteTime": { 217 | "end_time": "2023-11-12T06:28:23.401796500Z", 218 | "start_time": "2023-11-12T06:28:23.107994600Z" 219 | } 220 | }, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "image/png": "", 225 | "text/plain": [ 226 | "
" 227 | ] 228 | }, 229 | "metadata": {}, 230 | "output_type": "display_data" 231 | } 232 | ], 233 | "source": [ 234 | "import seaborn as sns\n", 235 | "import matplotlib.pyplot as plt\n", 236 | "\n", 237 | "sns.set_theme(context='notebook', style='whitegrid')\n", 238 | "\n", 239 | "# 绘图展示最佳路径\n", 240 | "def plot_route(route, coords):\n", 241 | " plt.figure(figsize=(12, 8))\n", 242 | " \n", 243 | " # 绘制路线\n", 244 | " ax = sns.lineplot(x=coords[route, 0], y=coords[route, 1], marker='o', sort=False)\n", 245 | " \n", 246 | " # 用不同颜色突出显示回到起点的路径\n", 247 | " ax.plot([coords[route[-2]][0], coords[route[0]][0]],\n", 248 | " [coords[route[-2]][1], coords[route[0]][1]], 'r-o')\n", 249 | " \n", 250 | " # 添加城市编号标签\n", 251 | " for i, (x, y) in enumerate(coords):\n", 252 | " plt.text(x, y, f'{i}')\n", 253 | " \n", 254 | " plt.title('TSP Route')\n", 255 | " plt.xlabel('X coordinate')\n", 256 | " plt.ylabel('Y coordinate')\n", 257 | " plt.show()\n", 258 | "\n", 259 | "best_route = decode(best_ind, distance_matrix)\n", 260 | "plot_route(best_route, coordinates)" 261 | ] 262 | } 263 | ], 264 | "metadata": { 265 | "kernelspec": { 266 | "display_name": "Python 3 (ipykernel)", 267 | "language": "python", 268 | "name": "python3" 269 | }, 270 | "language_info": { 271 | "codemirror_mode": { 272 | "name": "ipython", 273 | "version": 3 274 | }, 275 | "file_extension": ".py", 276 | "mimetype": "text/x-python", 277 | "name": "python", 278 | "nbconvert_exporter": "python", 279 | "pygments_lexer": "ipython3", 280 | "version": "3.11.4" 281 | } 282 | }, 283 | "nbformat": 4, 284 | "nbformat_minor": 5 285 | } 286 | --------------------------------------------------------------------------------