├── MANIFEST.in ├── LICENSE.txt ├── README.md ├── setup.py ├── .gitignore └── src ├── analysis.py └── design.py /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | recursive-include tests *.py 3 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [year] [fullname] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # design-of-experiments 2 | A Python Package for Design of Experiments 3 | 4 | # Motivation 5 | I worked with a research team investigating how Design of Experiments could be applied to Synthetic Biology over the summer of 2020, as part of this I made a general software review and was a little dissapointed at the sparsity of python packages for designing experiments, and what packages were available I found to be considerably inferior to the comercial DOE software like JMP, MODDE, or Minitab. So, I have created this python package to offer an open-source package for the Design of Experiments! 6 | 7 | # Example 8 | ```python 9 | >>>import design 10 | >>>Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3],'Temp':[10,20],'Pressure':[100,200]} 11 | >>>design.Factorial.frac_fact_2level(Factors,10) 12 | 13 | Height Width Depth Temp Pressure 14 | 0 1.6 0.2 0.2 20 200 15 | 1 1.6 0.2 0.3 20 100 16 | 2 1.6 0.4 0.2 10 200 17 | 3 1.6 0.4 0.3 10 100 18 | 4 2.0 0.2 0.2 10 100 19 | 5 2.0 0.2 0.3 10 200 20 | 6 2.0 0.4 0.2 20 100 21 | 7 2.0 0.4 0.3 20 200 22 | ``` 23 | # Installation 24 | ``` 25 | pip install designofexperiment 26 | ``` 27 | 28 | # 29 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setup( 7 | name='designofexperiment', 8 | version='1.6.2', 9 | description='A Python Package for intuitive design of experiments with user-friendly analysis of results', 10 | py_modules=["design", 11 | "analysis"], 12 | package_dir={'': 'src'}, 13 | classifiers = [ 14 | "Programming Language :: Python :: 3", 15 | "Programming Language :: Python :: 3.6", 16 | "Programming Language :: Python :: 3.7", 17 | "Programming Language :: Python :: 3.8", 18 | "License :: OSI Approved :: MIT License", 19 | "Natural Language :: English", 20 | "Operating System :: OS Independent" 21 | ], 22 | long_description=long_description, 23 | long_description_content_type="text/markdown", 24 | install_requires = [ 25 | "numpy == 1.19.1", 26 | "pandas == 1.1.1" 27 | ], 28 | extras_require={ 29 | "dev": [ 30 | "pytest>=6.0.1", 31 | "check-manifest>=0.42", 32 | "twine>=3.2.0" 33 | ], 34 | }, 35 | url="https://github.com/JamesMarshall31/design-of-experiments", 36 | author="James Marshall, Benedict Carling", 37 | author_email="jm7618@ic.ac.uk, bencarling1@gmail.com" 38 | ) 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /src/analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import design 3 | import pandas as pd 4 | import itertools 5 | 6 | 7 | def fit_two_level_screening(df): 8 | """ 9 | Returns p-values for unreplicated two level factorial designs 10 | 11 | Parameters: 12 | df: The dataframe containing the experimental design 13 | 14 | Returns: 15 | Dataframe of P Values 16 | 17 | Example: 18 | >>>import analysis 19 | >>>import design 20 | >>>factors = {'Temp':[50,25],'Concentration':[0.4,0.6],'Enzyme':[-1,1]} 21 | >>>df = design.full_factorial_2level(factors) 22 | >>>df['Yield'] = [60,52,54,45,72,83,68,80] 23 | >>>print(fit_two_level_screening(df)) 24 | Individual p-Value 25 | Temp 0.0017 26 | Concentration 0.0610 27 | Enzyme 0.4551 28 | Temp*Concentration 0.5014 29 | Temp*Enzyme 0.0142 30 | Concentration*Enzyme 1.0000 31 | Temp*Concentration*Enzyme 0.8516 32 | """ 33 | n = len(df.index) 34 | # p_columns_list stores all the actual variable names, by stopping before the last column it doesnt include results 35 | p_columns_list = list(df.columns)[:-1] 36 | # Creating the T matrix 37 | # ------------------------ 38 | t_matrix = np.ones((n, n), dtype=int) 39 | # count variable keeps track of what row of the t-matrix is currently being added to 40 | count = 1 41 | # for loop to set all numbers to one or minus one 42 | for i in df.columns[0:-1]: 43 | high = max(df[i]) 44 | low = min(df[i]) 45 | df[i] = (df[i].map({high: 1, low: -1})) 46 | t_matrix[count] = df[i] 47 | count += 1 48 | 49 | rows_remaining = n - (len(df.columns)) 50 | t_rows = [] # we are technically making the transpose of the T matrix as we are adding to the rows 51 | r = 2 52 | combination_check = 0 53 | for x in range(1, count): 54 | t_rows.append(x) 55 | while rows_remaining > 0: 56 | for combination in itertools.combinations(t_rows, r): 57 | string ='' 58 | for i in combination: 59 | string = string + p_columns_list[i-1] + '*' 60 | t_matrix[count] = 1 61 | string = string[:-1] # Get rid of asterisk at end 62 | p_columns_list.append(string) 63 | for i in range(r): 64 | t_matrix[count] = t_matrix[count] * t_matrix[combination[i]] 65 | rows_remaining -= 1 66 | count += 1 67 | combination_check += 1 68 | if combination_check == len(list(itertools.combinations(t_rows, r))): 69 | r += 1 70 | combination_check = 0 71 | break 72 | if rows_remaining == 0: 73 | break 74 | t_matrix = t_matrix/np.sqrt(n) # to normalize 75 | 76 | # Using the T matrix to get the contrasts 77 | # --------------------------------------- 78 | results = np.array(df.iloc[:,-1:]).reshape(n, 1) 79 | Contrasts = np.matmul(t_matrix, results) # The first index is the intercept contrast 80 | Contrasts = Contrasts[1:n] 81 | 82 | # Calculating Lenth's Pseudo-Standard Error 83 | # --------------------------------------- 84 | v = 1.5 * np.median(abs(Contrasts)) 85 | PSE = 1.5 * np.median(abs(Contrasts[abs(Contrasts) < (2.5 * v)])) 86 | 87 | # Calculate Lenth t-ratios for each contrast 88 | # ----------------------------------------- 89 | t_ratios = abs(Contrasts/PSE) 90 | 91 | # Run Monte Carlo simulations to generate contrasts 92 | # ----------------------------------------- 93 | sim_t_ratio = [] 94 | for i in range(10000): 95 | simulation = np.array(np.random.normal(0, PSE, n-1)) 96 | v_sim = 1.5 * np.median(abs(simulation)) 97 | PSE_sim = 1.5 * np.median(abs(simulation[abs(simulation) < (2.5 * v_sim)])) 98 | sim_t_ratio.append(abs(simulation / PSE_sim)) 99 | # Reorder these t-values so our t-values can be found in relation to it 100 | simulated_t_reordered = np.zeros((n-1, 10000)) 101 | for j in range(n-1): 102 | for i in range(10000): 103 | simulated_t_reordered[j][i] = sim_t_ratio[i][j] 104 | simulated_t_reordered[j] = np.sort(simulated_t_reordered[j]) 105 | p_value = [] 106 | for j in range(n-1): 107 | for i in range(10000): 108 | if t_ratios[j][0] < simulated_t_reordered[j][i]: 109 | p_value.append(1 - (i / 10000)) 110 | break 111 | p_values = pd.DataFrame(p_value,index=p_columns_list,columns=['Individual p-Value']) 112 | return p_values 113 | 114 | 115 | factors = {'Temp':[50,25],'Concentration':[0.4,0.6],'Enzyme':[-1,1]} 116 | df = design.full_factorial_2level(factors) 117 | df['Yield'] = [60,52,54,45,72,83,68,80] 118 | print(fit_two_level_screening(df)) -------------------------------------------------------------------------------- /src/design.py: -------------------------------------------------------------------------------- 1 | # Our Design of experiment class 2 | 3 | import urllib.request 4 | import pandas as pd 5 | import itertools 6 | import math 7 | import lhsmdu 8 | import numpy as np 9 | 10 | 11 | def full_factorial_2level(dic_factors): 12 | """ 13 | Creates a Two-level full factorial design from the dictionary of factors entered, 14 | if more than two levels are given for each factor the maximum and minimum values will be selected 15 | 16 | Parameters: 17 | dic_factors: The dictionary of factors to be included in the full factorial's design 18 | 19 | Returns: 20 | df: A dataframe of the two-level full factorial resulting from the factors entered 21 | 22 | Example: 23 | >>> import design 24 | >>> Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3]} 25 | >>> design.Factorial.full_factorial_2level(Factors) 26 | Height Width Depth 27 | 0 1.6 0.2 0.2 28 | 1 1.6 0.2 0.3 29 | 2 1.6 0.4 0.2 30 | 3 1.6 0.4 0.3 31 | 4 2.0 0.2 0.2 32 | 5 2.0 0.2 0.3 33 | 6 2.0 0.4 0.2 34 | 7 2.0 0.4 0.3 35 | """ 36 | # df is the dataframe that will be returned. 37 | df = pd.DataFrame() 38 | # factor_levels will be filled with the levels of each factor and 39 | # used when iterating through the runs of the design. 40 | factor_levels = [] 41 | # factor_names is filled at the same time as factor_levels and 42 | # is used at the end to correctly name the columns of the dataframe. 43 | factor_names = [] 44 | 45 | # This for loop fills up factor_levels with the maximum and minimum of each factor, 46 | # as well as filling up factor_names. 47 | for name in dic_factors: 48 | factor_names.append(name) 49 | factor_levels.append([min(dic_factors[name]), max(dic_factors[name])]) 50 | 51 | # This for loop will run through each combination(technically product) and build up 52 | # the dataframe df with each loop. 53 | for run in itertools.product(*factor_levels, repeat=1): 54 | run = list(run) 55 | s_add = pd.Series(run) 56 | df = pd.concat([df, s_add], axis=1, ignore_index=True) 57 | # The dataframe is made with the runs being the columns, we want them to be the rows 58 | # hence the need to transpose. 59 | df = df.transpose() 60 | # The column headers are initially labelled '0','1','2' etc.., the line below 61 | # renames them by relating them to the factor_names list made earlier 62 | df = df.rename(columns=lambda x: factor_names[x]) 63 | return df 64 | 65 | 66 | def full_factorial(dic_factors): 67 | """ 68 | Creates a full factorial design from the dictionary of factors, but does not choose 69 | highest and lowest levels of each factor. 70 | 71 | Parameters: 72 | dic_factors: The dictionary of factors to be included in the full factorial's design 73 | 74 | Returns: 75 | df: A dataframe of the full factorial resulting from the factors entered 76 | 77 | Example: 78 | >>> import design 79 | >>> Factors = {'Height':[1.6,1.8,2],'Width':[0.2,0.3,0.4]} 80 | >>> design.Factorial.full_factorial(Factors) 81 | Height Width 82 | 0 1.6 0.2 83 | 1 1.6 0.3 84 | 2 1.6 0.4 85 | 3 1.8 0.2 86 | 4 1.8 0.3 87 | 5 1.8 0.4 88 | 6 2.0 0.2 89 | 7 2.0 0.3 90 | 8 2.0 0.4 91 | """ 92 | # The variables initialised below play the same role here as in the two level 93 | # full factorial above. 94 | df = pd.DataFrame() 95 | factor_levels = [] 96 | factor_names = [] 97 | # This for loop plays the same role as the for loop in the two level 98 | # but does not take the maximum and minimum factor levels, so does not reduce 99 | # the design to a two level design automatically. 100 | for name in dic_factors: 101 | factor_names.append(name) 102 | factor_levels.append(dic_factors[name]) 103 | 104 | # This for loop functions the same as its two level counterpart. 105 | for run in itertools.product(*factor_levels, repeat=1): 106 | run = list(run) 107 | s_add = pd.Series(run) 108 | df = pd.concat([df, s_add], axis=1, ignore_index=True) 109 | # As in the two level, the dataframe must be transposed and renamed. 110 | df = df.transpose() 111 | df = df.rename(columns=lambda x: factor_names[x]) 112 | return df 113 | 114 | 115 | def frac_fact_2level(dic_factors, runs): 116 | """ 117 | Returns a fractional factorial based on the dictionary of factors entered and the runs entered, 118 | the number of runs of the design will be the next lowest power of 2 from the runs entered 119 | i.e 9->8, 8->8 120 | 121 | Parameters: 122 | dic_factors: The dictionary of factors to be included in the fractional factorial's design. 123 | 124 | runs: The number of runs the design can use - if the number of runs causes the design's resolution 125 | to be less than three then it will not work. 126 | 127 | returns: 128 | df: A dataframe of the runs for the fractional factorial resulting from the factors and runs entered. 129 | 130 | Example: 131 | >>> import design 132 | >>> Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3],'Temp':[10,20],'Pressure':[100,200]} 133 | >>> design.Factorial.frac_fact_2level(Factors,10) 134 | Height Width Depth Temp Pressure 135 | 0 1.6 0.2 0.2 20 200 136 | 1 1.6 0.2 0.3 20 100 137 | 2 1.6 0.4 0.2 10 200 138 | 3 1.6 0.4 0.3 10 100 139 | 4 2.0 0.2 0.2 10 100 140 | 5 2.0 0.2 0.3 10 200 141 | 6 2.0 0.4 0.2 20 100 142 | 7 2.0 0.4 0.3 20 200 143 | """ 144 | df = pd.DataFrame() 145 | factor_levels = [] 146 | factor_names = [] 147 | # this for loop fills up factor_levels and factor_names arrays 148 | for name in dic_factors: 149 | factor_names.append(name) 150 | factor_levels.append([min(dic_factors[name]), max(dic_factors[name])]) 151 | 152 | # If runs entered isn't a power of 2 this will set it to the 153 | # next lowest power of 2. 154 | runs = int((1 << (runs).bit_length()) / 2) 155 | # The fractional factorial is generated from a base full factorial 156 | # see https://www.itl.nist.gov/div898/handbook/pri/section3/pri3342.htm 157 | # the line below determines the level of this full fact level 158 | full_fact_level = int(math.log(runs, 2)) 159 | 160 | # This for loop creates the base Full Factorial from which the 161 | # fractional factorial will be generated 162 | for run in itertools.product([-1, 1], repeat=full_fact_level): 163 | run = list(run) 164 | s_add = pd.Series(run) 165 | df = pd.concat([df, s_add], axis=1, ignore_index=True) 166 | df = df.transpose() 167 | # factors_remaining will be used in the coming loops - it measures how many columns 168 | # are left to be added to the fractional factorial. 169 | factors_remaining = len(factor_names) - full_fact_level 170 | # count will be used for the creation of new columns in the dataframe. 171 | count = 0 172 | # df_cols is quite literally a list of the column headers in the dataframe 173 | # the columns are automatically name as if in an array 0,1,2 etc.. 174 | df_cols = [] 175 | # r is used in the itertools combination function, it is set to two as 176 | # the first columns of the fractional factorial are the two combinations, 177 | # it is incremented as higher combinations are required. 178 | r = 2 179 | # r will need to be incremented when all combinations at its current level have been 180 | # added, combination check is used in a conditional that will increase r and 181 | # reinitialise the for loop that r is used in. 182 | combination_check = 0 183 | 184 | # This for loop fills up df_cols with the columns already made in the base full factorial 185 | for x in range((len(dic_factors) - factors_remaining)): 186 | df_cols.append(x) 187 | 188 | # The while loop is here to reinitialise the for loop once r has changed, and 189 | # to stop the for loop creating too many columns once factors_remaining is 0. 190 | while factors_remaining > 0: 191 | # As each new column is the combination of columns from the base full factorial 192 | # this for loop goes through each combination and creates these columns. 193 | for combination in itertools.combinations(df_cols, r): 194 | # We initialise the values as 1 so that we can multiply them by however much 195 | # is necessary in the coming for loop. 196 | df[full_fact_level + count] = 1 197 | 198 | # The for loop here goes up to r because if r is two the result will be the multiplication 199 | # of two columns, if r is three, three columns etc.. 200 | for i in range(r): 201 | df[full_fact_level + count] = df[full_fact_level + count] * df[combination[i]] 202 | count += 1 203 | factors_remaining -= 1 204 | combination_check += 1 205 | 206 | # If we have run through all combinations with this 'r' value 207 | # we should increase r. 208 | if combination_check == len(list(itertools.combinations(df_cols, r))): 209 | r += 1 210 | combination_check = 0 211 | break 212 | # If there are no factors left then we should stop adding columns. 213 | if factors_remaining == 0: 214 | break 215 | 216 | # The dataframe is currently -1 and 1, this for loop assigns the right levels to 217 | # each factor using the factor_levels list 218 | for i in range(len(dic_factors)): 219 | df[i] = df[i].apply(lambda y: factor_levels[i][0] if y == -1 else factor_levels[i][1]) 220 | df = df.rename(columns=lambda y: factor_names[y]) 221 | return df 222 | 223 | 224 | def plackett_burman(dic_factors, runs): 225 | """ 226 | Returns a Plackett-Burman design where the number of runs is the next multiple of four 227 | higher than the number of runs entered if the runs given isn't a multiple of four. 228 | 229 | """ 230 | # Plackett-Burman designs are made using hadamard matrices 231 | # the hadamard matrices are taken in via an online library 232 | factor_names = [] 233 | factor_levels = [] 234 | # this for loop fills up factor_levels and factor_names arrays 235 | for name in dic_factors: 236 | factor_names.append(name) 237 | factor_levels.append([min(dic_factors[name]), max(dic_factors[name])]) 238 | # The links to the various URLs of the hadamard matrices are stored in this dictionary 239 | url_dictionary = {8: "http://neilsloane.com/hadamard/had.8.txt", 240 | 12: "http://neilsloane.com/hadamard/had.12.txt", 241 | 16: "http://neilsloane.com/hadamard/had.16.0.txt", 242 | 20: "http://neilsloane.com/hadamard/had.20.hall.n.txt", 243 | 24: "http://neilsloane.com/hadamard/had.24.pal.txt", 244 | 28: "http://neilsloane.com/hadamard/had.28.pal2.txt", 245 | 32: "http://neilsloane.com/hadamard/had.32.pal.txt"} 246 | # Conditional changes run number to be a multiple of four 247 | if runs % 4 != 0: 248 | runs = runs + (4 - (runs % 4)) 249 | 250 | file = urllib.request.urlopen(url_dictionary.get(runs)) 251 | array = [] 252 | # This for loop takes the lines of the hadamard matrices and places them into the array variable 253 | for line in file: 254 | # decoded_line stores each line in a way that can be interacted with 255 | decoded_line = line.decode("utf-8") 256 | # Conditional breaks the for loop when the table has been read completely 257 | if decoded_line[0] == 'H': 258 | break 259 | # The array is appended with the current row of the table, excluding the new line 260 | array.append(list(decoded_line.split('\n')[0])) 261 | # Array is currently a square, so only the columns are taken that are needed for the number of factors entered 262 | df = pd.DataFrame(array[(runs - len(dic_factors)):]) 263 | df = df.transpose() 264 | # The dataframe is currently '+' and '-' so this for loop converts to the factor levels entered in the dictionary 265 | for i in range(len(dic_factors)): 266 | df[i] = df[i].apply(lambda y: factor_levels[i][0] if y == '-' else factor_levels[i][1]) 267 | df = df.rename(columns=lambda y: factor_names[y]) 268 | return df 269 | 270 | 271 | def box_behnken(dic_factors): 272 | """ 273 | Creates a dataframe for a Box-Behken experimental design based on the factors given. 274 | 275 | Parameters: 276 | dic_factors: The dictionary of factors to be included in the Box-Behnken design. 277 | 278 | Returns: 279 | df: The dataframe containing the Box-Behnken design. 280 | 281 | Examples: 282 | >>> import design 283 | >>> Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3],'Temp':[10,20],'Pressure':[100,200]} 284 | >>> design.box_behnken(Factors) 285 | Height Width Depth Temp Pressure 286 | 0 1.6 0.2 0.25 15.0 150.0 287 | 1 1.6 0.4 0.25 15.0 150.0 288 | 2 1.6 0.3 0.20 15.0 150.0 289 | 3 1.6 0.3 0.30 15.0 150.0 290 | 4 1.6 0.3 0.25 10.0 150.0 291 | 5 1.6 0.3 0.25 20.0 150.0 292 | 6 1.6 0.3 0.25 15.0 100.0 293 | 7 1.6 0.3 0.25 15.0 200.0 294 | 8 2.0 0.2 0.25 15.0 150.0 295 | 9 2.0 0.4 0.25 15.0 150.0 296 | 10 2.0 0.3 0.20 15.0 150.0 297 | 11 2.0 0.3 0.30 15.0 150.0 298 | 12 2.0 0.3 0.25 10.0 150.0 299 | 13 2.0 0.3 0.25 20.0 150.0 300 | 14 2.0 0.3 0.25 15.0 100.0 301 | 15 2.0 0.3 0.25 15.0 200.0 302 | 16 1.8 0.2 0.20 15.0 150.0 303 | 17 1.8 0.2 0.30 15.0 150.0 304 | 18 1.8 0.2 0.25 10.0 150.0 305 | 19 1.8 0.2 0.25 20.0 150.0 306 | 20 1.8 0.2 0.25 15.0 100.0 307 | 21 1.8 0.2 0.25 15.0 200.0 308 | 22 1.8 0.4 0.20 15.0 150.0 309 | 23 1.8 0.4 0.30 15.0 150.0 310 | 24 1.8 0.4 0.25 10.0 150.0 311 | 25 1.8 0.4 0.25 20.0 150.0 312 | 26 1.8 0.4 0.25 15.0 100.0 313 | 27 1.8 0.4 0.25 15.0 200.0 314 | 28 1.8 0.3 0.20 10.0 150.0 315 | 29 1.8 0.3 0.20 20.0 150.0 316 | 30 1.8 0.3 0.20 15.0 100.0 317 | 31 1.8 0.3 0.20 15.0 200.0 318 | 32 1.8 0.3 0.30 10.0 150.0 319 | 33 1.8 0.3 0.30 20.0 150.0 320 | 34 1.8 0.3 0.30 15.0 100.0 321 | 35 1.8 0.3 0.30 15.0 200.0 322 | 36 1.8 0.3 0.25 10.0 100.0 323 | 37 1.8 0.3 0.25 10.0 200.0 324 | 38 1.8 0.3 0.25 20.0 100.0 325 | 39 1.8 0.3 0.25 20.0 200.0 326 | 40 1.8 0.3 0.25 15.0 150.0 327 | 41 1.8 0.3 0.25 15.0 150.0 328 | 42 1.8 0.3 0.25 15.0 150.0 329 | 43 1.8 0.3 0.25 15.0 150.0 330 | 44 1.8 0.3 0.25 15.0 150.0 331 | """ 332 | df = pd.DataFrame() 333 | factor_levels = [] 334 | factor_names = [] 335 | # this for loop fills up factor_levels and factor_names arrays 336 | for name in dic_factors: 337 | factor_names.append(name) 338 | # This conditional creates a middle factor by averaging the two highest and lowest when too many 339 | # or too few levels are given, else sorts the three given and sets the levels that way 340 | if len(dic_factors[name]) != 3: 341 | factor_levels.append( 342 | [min(dic_factors[name]), (min(dic_factors[name]) + max(dic_factors[name])) / 2, max(dic_factors[name])]) 343 | else: 344 | factor_levels.append( 345 | [sorted(dic_factors[name])[0], sorted(dic_factors[name])[1], sorted(dic_factors[name])[2]]) 346 | # This for loop will go through too many iterations, generating +1,+1,+1 designs which aren't needed, 347 | # so a conditional is added to cut it down 348 | for run in itertools.product([-1, 1, 0], repeat=len(dic_factors)): 349 | run = list(run) 350 | if run.count(1) < 3 and run.count(-1) < 3 and run.count(0) == len(dic_factors) - 2: 351 | s_add = pd.Series(run) 352 | df = pd.concat([df, s_add], axis=1, ignore_index=True) 353 | # for loop adds default centre runs 354 | for i in range(len(dic_factors)): 355 | centre_points = [0,0,0,0,0] 356 | df = pd.concat([df, pd.Series(centre_points[:len(dic_factors)])], axis=1, ignore_index=True) 357 | df = df.transpose() 358 | # for loop takes the -1, 0, +1 to the corresponding three factor levels for each factor 359 | for i in range(len(dic_factors)): 360 | df[i] = df[i].apply( 361 | lambda y: factor_levels[i][0] if y == -1 else (factor_levels[i][1] if y == 0 else factor_levels[i][2])) 362 | df = df.rename(columns=lambda y: factor_names[y]) 363 | return df 364 | 365 | 366 | def central_composite(dic_factors): 367 | """ 368 | Creates a Central Composite design for the factors given 369 | 370 | Parameters: 371 | dic_factors: The dictionary of factors to be included in the Central Composite design. 372 | 373 | Returns: 374 | df: The dataframe containing the Central Composite design. 375 | 376 | Examples: 377 | >>> import design 378 | >>> Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3],'Temp':[10,20],'Pressure':[100,200]} 379 | >>> design.central_composite(Factors) 380 | Height Width Depth Temp Pressure 381 | 0 1.600000 0.200000 0.200000 10.000000 100.000000 382 | 1 1.600000 0.200000 0.200000 10.000000 200.000000 383 | 2 1.600000 0.200000 0.200000 20.000000 100.000000 384 | 3 1.600000 0.200000 0.200000 20.000000 200.000000 385 | 4 1.600000 0.200000 0.300000 10.000000 100.000000 386 | 5 1.600000 0.200000 0.300000 10.000000 200.000000 387 | 6 1.600000 0.200000 0.300000 20.000000 100.000000 388 | 7 1.600000 0.200000 0.300000 20.000000 200.000000 389 | 8 1.600000 0.400000 0.200000 10.000000 100.000000 390 | 9 1.600000 0.400000 0.200000 10.000000 200.000000 391 | 10 1.600000 0.400000 0.200000 20.000000 100.000000 392 | 11 1.600000 0.400000 0.200000 20.000000 200.000000 393 | 12 1.600000 0.400000 0.300000 10.000000 100.000000 394 | 13 1.600000 0.400000 0.300000 10.000000 200.000000 395 | 14 1.600000 0.400000 0.300000 20.000000 100.000000 396 | 15 1.600000 0.400000 0.300000 20.000000 200.000000 397 | 16 2.000000 0.200000 0.200000 10.000000 100.000000 398 | 17 2.000000 0.200000 0.200000 10.000000 200.000000 399 | 18 2.000000 0.200000 0.200000 20.000000 100.000000 400 | 19 2.000000 0.200000 0.200000 20.000000 200.000000 401 | 20 2.000000 0.200000 0.300000 10.000000 100.000000 402 | 21 2.000000 0.200000 0.300000 10.000000 200.000000 403 | 22 2.000000 0.200000 0.300000 20.000000 100.000000 404 | 23 2.000000 0.200000 0.300000 20.000000 200.000000 405 | 24 2.000000 0.400000 0.200000 10.000000 100.000000 406 | 25 2.000000 0.400000 0.200000 10.000000 200.000000 407 | 26 2.000000 0.400000 0.200000 20.000000 100.000000 408 | 27 2.000000 0.400000 0.200000 20.000000 200.000000 409 | 28 2.000000 0.400000 0.300000 10.000000 100.000000 410 | 29 2.000000 0.400000 0.300000 10.000000 200.000000 411 | 30 2.000000 0.400000 0.300000 20.000000 100.000000 412 | 31 2.000000 0.400000 0.300000 20.000000 200.000000 413 | 32 2.275683 0.300000 0.250000 15.000000 150.000000 414 | 33 1.324317 0.300000 0.250000 15.000000 150.000000 415 | 34 1.800000 0.537841 0.250000 15.000000 150.000000 416 | 35 1.800000 0.062159 0.250000 15.000000 150.000000 417 | 36 1.800000 0.300000 0.368921 15.000000 150.000000 418 | 37 1.800000 0.300000 0.131079 15.000000 150.000000 419 | 38 1.800000 0.300000 0.250000 26.892071 150.000000 420 | 39 1.800000 0.300000 0.250000 3.107929 150.000000 421 | 40 1.800000 0.300000 0.250000 15.000000 268.920712 422 | 41 1.800000 0.300000 0.250000 15.000000 31.079288 423 | 42 1.800000 0.300000 0.250000 15.000000 150.000000 424 | 43 1.800000 0.300000 0.250000 15.000000 150.000000 425 | 44 1.800000 0.300000 0.250000 15.000000 150.000000 426 | 45 1.800000 0.300000 0.250000 15.000000 150.000000 427 | 46 1.800000 0.300000 0.250000 15.000000 150.000000 428 | """ 429 | df2 = pd.DataFrame() # df2 will contain the axial points of the design 430 | factor_levels = [] 431 | factor_names = [] 432 | alpha = 2 ** (len(dic_factors) / 4) # this is alpha for rotatability, alpha should eventually be optional 433 | # this for loop fills up factor_levels and factor_names arrays 434 | for name in dic_factors: 435 | factor_names.append(name) 436 | # This conditional creates a middle factor by averaging the two highest and lowest when too many 437 | # or too few levels are given, else sorts the three given and sets the levels that way 438 | if len(dic_factors[name]) != 3: 439 | factor_levels.append( 440 | [min(dic_factors[name]), (min(dic_factors[name]) + max(dic_factors[name])) / 2, max(dic_factors[name])]) 441 | else: 442 | factor_levels.append( 443 | [sorted(dic_factors[name])[0], sorted(dic_factors[name])[1], sorted(dic_factors[name])[2]]) 444 | # The full factorial design points are made using the full factorial function 445 | df1 = full_factorial_2level(dic_factors) 446 | # This for loop creates the dataframe of the axial points 447 | for i in range(len(dic_factors)): 448 | run1 = [] 449 | run2 = [] 450 | # extremeplus and extrememinus contain the values for the extreme points of the design 451 | extremeplus = factor_levels[i][1] + ((factor_levels[i][2] - factor_levels[i][1]) * alpha) 452 | extrememinus = factor_levels[i][1] - ((factor_levels[i][2] - factor_levels[i][1]) * alpha) 453 | # This for loop fills up the runs with the centre points for all factors 454 | for j in range(len(dic_factors)): 455 | run1.append(factor_levels[j][1]) 456 | run2.append(factor_levels[j][1]) 457 | # run1 and run2 have the extreme points entered in, replacing the centre point they will currently have 458 | run1[i] = extremeplus 459 | run2[i] = extrememinus 460 | # The runs are stored as series to be concatenated to the dataframe 461 | s_add1 = pd.Series(run1) 462 | s_add2 = pd.Series(run2) 463 | df2 = pd.concat([df2, s_add1, s_add2], axis=1, ignore_index=True) 464 | # The axial dataframe is then transposed and renamed so it can be concated with the full factorial dataframe 465 | df2 = df2.transpose() 466 | df2 = df2.rename(columns=lambda y: factor_names[y]) 467 | df = pd.concat([df1, df2], axis=0, ignore_index=True) 468 | # This for loop adds as many centre points as there are factors entered 469 | centre_points = [] 470 | for i in range(len(dic_factors)): 471 | centre_points.append(factor_levels[i][1]) 472 | for j in range(len(dic_factors)): 473 | df3 = pd.DataFrame([centre_points],columns=list(dic_factors)) 474 | df = df.append(df3,ignore_index=True) 475 | return df 476 | 477 | 478 | def latin_hypercube(dic_factors, runs): 479 | """ 480 | Parameters: 481 | dic_factors: The dictionary of factors to be included in the Latin Hypercube design. 482 | 483 | runs: The number of runs to be used in the design. 484 | 485 | Returns: 486 | df: The dataframe containing the Latin Hypercube design. 487 | 488 | Example: 489 | >>> import design 490 | >>> Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3],'Temp':[10,20],'Pressure':[100,200]} 491 | >>> design.latin_hypercube(Factors,50) 492 | Height Width Depth Temp Pressure 493 | 0 1.814372 0.316126 0.203734 12.633408 150.994350 494 | 1 1.683852 0.327745 0.221157 10.833524 149.235694 495 | 2 1.952938 0.220208 0.212877 14.207334 177.737810 496 | 3 1.921001 0.306165 0.249451 13.747280 195.141219 497 | 4 1.709485 0.286836 0.214973 12.132761 144.060774 498 | 5 1.795442 0.339484 0.263747 16.494926 105.861897 499 | 6 1.849604 0.390856 0.229801 17.768834 157.379054 500 | 7 1.635933 0.295207 0.244843 15.561134 119.353027 501 | 8 1.800514 0.257358 0.232554 19.117071 114.431350 502 | 9 1.748656 0.311259 0.209185 19.573654 147.317771 503 | 10 1.610152 0.200320 0.269825 14.041168 192.787729 504 | 11 1.670380 0.283579 0.270421 11.422384 161.302466 505 | 12 1.914483 0.374190 0.273246 15.253950 110.213186 506 | 13 1.731642 0.363269 0.211263 15.011417 175.315691 507 | 14 1.864093 0.245809 0.235466 10.506234 123.998827 508 | 15 1.856580 0.314574 0.260263 11.787321 152.096424 509 | 16 1.651140 0.262106 0.289432 14.407869 121.954348 510 | 17 1.827840 0.278926 0.223818 12.824422 168.813816 511 | 18 1.780800 0.380327 0.252359 12.290440 171.741507 512 | 19 1.762333 0.224241 0.216475 18.386775 165.564771 513 | 20 1.949560 0.300988 0.285943 10.063231 155.134033 514 | 21 1.646881 0.248638 0.250362 16.701447 163.476898 515 | 22 1.974239 0.379487 0.279709 17.208315 181.757031 516 | 23 1.904317 0.216877 0.292985 18.829669 136.808281 517 | 24 1.899844 0.343903 0.230494 13.197326 198.654066 518 | 25 1.696839 0.329348 0.283741 18.193024 135.335187 519 | 26 1.689936 0.272728 0.218891 19.800988 131.615692 520 | 27 1.823893 0.299159 0.247030 10.790362 191.524570 521 | 28 1.841140 0.210635 0.286718 10.327824 167.595627 522 | 29 1.883991 0.385993 0.277186 18.773584 178.871167 523 | 30 1.932945 0.358221 0.294327 16.890948 125.635668 524 | 31 1.837620 0.370877 0.242782 17.103119 142.240418 525 | 32 1.740477 0.352914 0.265939 14.697769 129.088978 526 | 33 1.624078 0.347985 0.298516 13.933373 132.011517 527 | 34 1.786612 0.351899 0.225313 15.827930 188.649172 528 | 35 1.892142 0.206601 0.254650 14.805995 138.732923 529 | 36 1.656703 0.252798 0.205547 18.461586 184.792345 530 | 37 1.770805 0.270721 0.226262 11.940936 113.390934 531 | 38 1.672266 0.288289 0.275940 15.640371 186.777116 532 | 39 1.600629 0.240123 0.280908 17.934686 126.897387 533 | 40 1.995175 0.237031 0.240472 16.393982 116.475088 534 | 41 1.713062 0.265850 0.256147 17.418780 172.746504 535 | 42 1.964540 0.235473 0.266340 11.334520 196.454539 536 | 43 1.757516 0.366909 0.207040 13.488750 102.146392 537 | 44 1.942405 0.214971 0.290674 13.373628 109.206897 538 | 45 1.985601 0.229702 0.297658 12.435430 101.336426 539 | 46 1.617340 0.321384 0.200862 19.338525 159.238981 540 | 47 1.976837 0.393484 0.258497 16.167623 140.926988 541 | 48 1.877091 0.399951 0.239234 19.788923 182.759572 542 | 49 1.725652 0.332160 0.237414 11.136650 107.726667 543 | """ 544 | df = pd.DataFrame() 545 | factor_names = [] 546 | count = 0 547 | # Creates an array filled with a latin hypercube form 0 to 1 548 | array = lhsmdu.sample(len(dic_factors), runs) 549 | # This for loop converts the latin hypercube to have the levels entered into the dictionary of factors 550 | for name in dic_factors: 551 | factor_names.append(name) 552 | low = min(dic_factors[name]) 553 | high = max(dic_factors[name]) 554 | # non_coded stored the array being mapped to fit the levels of the factors entered 555 | non_coded = np.array(list(map(lambda x: low + ((high - low) * x), array[count]))) 556 | # Converts non_coded (which is currently one column of the final dataframe) to a series 557 | s_add = pd.Series(non_coded[0][0]) 558 | count += 1 559 | # Adds the series to the dataframe 560 | df = pd.concat([df, s_add], ignore_index=True, axis=1) 561 | df = df.rename(columns=lambda y: factor_names[y]) 562 | return df --------------------------------------------------------------------------------