├── MANIFEST.in
├── LICENSE.txt
├── README.md
├── setup.py
├── .gitignore
└── src
    ├── analysis.py
    └── design.py


/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | recursive-include tests *.py
3 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) [year] [fullname]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # design-of-experiments
 2 | A Python Package for Design of Experiments
 3 | 
 4 | # Motivation
 5 | I worked with a research team investigating how Design of Experiments could be applied to Synthetic Biology over the summer of 2020, as part of this I made a general software review and was a little dissapointed at the sparsity of python packages for designing experiments, and what packages were available I found to be considerably inferior to the comercial DOE software like JMP, MODDE, or Minitab. So, I have created this python package to offer an open-source package for the Design of Experiments!
 6 | 
 7 | # Example
 8 | ```python
 9 | >>>import design
10 | >>>Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3],'Temp':[10,20],'Pressure':[100,200]}
11 | >>>design.Factorial.frac_fact_2level(Factors,10)
12 | 
13 |    Height  Width  Depth  Temp  Pressure
14 | 0     1.6    0.2    0.2    20       200
15 | 1     1.6    0.2    0.3    20       100
16 | 2     1.6    0.4    0.2    10       200
17 | 3     1.6    0.4    0.3    10       100
18 | 4     2.0    0.2    0.2    10       100
19 | 5     2.0    0.2    0.3    10       200
20 | 6     2.0    0.4    0.2    20       100
21 | 7     2.0    0.4    0.3    20       200
22 | ```
23 | # Installation
24 | ```
25 | pip install designofexperiment
26 | ```
27 | 
28 | #
29 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setup(
 7 |     name='designofexperiment',
 8 |     version='1.6.2',
 9 |     description='A Python Package for intuitive design of experiments with user-friendly analysis of results',
10 |     py_modules=["design",
11 |                 "analysis"],
12 |     package_dir={'': 'src'},
13 |     classifiers = [
14 |         "Programming Language :: Python :: 3",
15 |         "Programming Language :: Python :: 3.6",
16 |         "Programming Language :: Python :: 3.7",
17 |         "Programming Language :: Python :: 3.8",
18 |         "License :: OSI Approved :: MIT License",
19 |         "Natural Language :: English",
20 |         "Operating System :: OS Independent"
21 |     ],
22 |     long_description=long_description,
23 |     long_description_content_type="text/markdown",
24 |     install_requires = [
25 |         "numpy == 1.19.1",
26 |         "pandas == 1.1.1"
27 |     ],
28 |     extras_require={
29 |         "dev": [
30 |             "pytest>=6.0.1",
31 |             "check-manifest>=0.42",
32 |             "twine>=3.2.0"
33 |         ],
34 |     },
35 |     url="https://github.com/JamesMarshall31/design-of-experiments",
36 |     author="James Marshall, Benedict Carling",
37 |     author_email="jm7618@ic.ac.uk, bencarling1@gmail.com"
38 | )
39 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/src/analysis.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import design
  3 | import pandas as pd
  4 | import itertools
  5 | 
  6 | 
  7 | def fit_two_level_screening(df):
  8 |     """
  9 |     Returns p-values for unreplicated two level  factorial designs
 10 | 
 11 |     Parameters:
 12 |         df: The dataframe containing the experimental design
 13 | 
 14 |     Returns:
 15 |          Dataframe of P Values
 16 | 
 17 |     Example:
 18 |         >>>import analysis
 19 |         >>>import design
 20 |         >>>factors = {'Temp':[50,25],'Concentration':[0.4,0.6],'Enzyme':[-1,1]}
 21 |         >>>df = design.full_factorial_2level(factors)
 22 |         >>>df['Yield'] = [60,52,54,45,72,83,68,80]
 23 |         >>>print(fit_two_level_screening(df))
 24 |                                     Individual p-Value
 25 |         Temp                                   0.0017
 26 |         Concentration                          0.0610
 27 |         Enzyme                                 0.4551
 28 |         Temp*Concentration                     0.5014
 29 |         Temp*Enzyme                            0.0142
 30 |         Concentration*Enzyme                   1.0000
 31 |         Temp*Concentration*Enzyme              0.8516
 32 |     """
 33 |     n = len(df.index)
 34 |     # p_columns_list stores all the actual variable names, by stopping before the last column it doesnt include results
 35 |     p_columns_list = list(df.columns)[:-1]
 36 |     # Creating the T matrix
 37 |     # ------------------------
 38 |     t_matrix = np.ones((n, n), dtype=int)
 39 |     # count variable keeps track of what row of the t-matrix is currently being added to
 40 |     count = 1
 41 |     # for loop to set all numbers to one or minus one
 42 |     for i in df.columns[0:-1]:
 43 |         high = max(df[i])
 44 |         low = min(df[i])
 45 |         df[i] = (df[i].map({high: 1, low: -1}))
 46 |         t_matrix[count] = df[i]
 47 |         count += 1
 48 | 
 49 |     rows_remaining = n - (len(df.columns))
 50 |     t_rows = []  # we are technically making the transpose of the T matrix as we are adding to the rows
 51 |     r = 2
 52 |     combination_check = 0
 53 |     for x in range(1, count):
 54 |         t_rows.append(x)
 55 |     while rows_remaining > 0:
 56 |         for combination in itertools.combinations(t_rows, r):
 57 |             string =''
 58 |             for i in combination:
 59 |                 string = string + p_columns_list[i-1] + '*'
 60 |             t_matrix[count] = 1
 61 |             string = string[:-1]        # Get rid of asterisk at end
 62 |             p_columns_list.append(string)
 63 |             for i in range(r):
 64 |                 t_matrix[count] = t_matrix[count] * t_matrix[combination[i]]
 65 |             rows_remaining -= 1
 66 |             count += 1
 67 |             combination_check += 1
 68 |             if combination_check == len(list(itertools.combinations(t_rows, r))):
 69 |                 r += 1
 70 |                 combination_check = 0
 71 |                 break
 72 |             if rows_remaining == 0:
 73 |                 break
 74 |     t_matrix = t_matrix/np.sqrt(n)      # to normalize
 75 | 
 76 |     # Using the T matrix to get the contrasts
 77 |     # ---------------------------------------
 78 |     results = np.array(df.iloc[:,-1:]).reshape(n, 1)
 79 |     Contrasts = np.matmul(t_matrix, results)        # The first index is the intercept contrast
 80 |     Contrasts = Contrasts[1:n]
 81 | 
 82 |     # Calculating Lenth's Pseudo-Standard Error
 83 |     # ---------------------------------------
 84 |     v = 1.5 * np.median(abs(Contrasts))
 85 |     PSE = 1.5 * np.median(abs(Contrasts[abs(Contrasts) < (2.5 * v)]))
 86 | 
 87 |     # Calculate Lenth t-ratios for each contrast
 88 |     # -----------------------------------------
 89 |     t_ratios = abs(Contrasts/PSE)
 90 | 
 91 |     # Run Monte Carlo simulations to generate contrasts
 92 |     # -----------------------------------------
 93 |     sim_t_ratio = []
 94 |     for i in range(10000):
 95 |         simulation = np.array(np.random.normal(0, PSE, n-1))
 96 |         v_sim = 1.5 * np.median(abs(simulation))
 97 |         PSE_sim = 1.5 * np.median(abs(simulation[abs(simulation) < (2.5 * v_sim)]))
 98 |         sim_t_ratio.append(abs(simulation / PSE_sim))
 99 |     # Reorder these t-values so our t-values can be found in relation to it
100 |     simulated_t_reordered = np.zeros((n-1, 10000))
101 |     for j in range(n-1):
102 |         for i in range(10000):
103 |             simulated_t_reordered[j][i] = sim_t_ratio[i][j]
104 |         simulated_t_reordered[j] = np.sort(simulated_t_reordered[j])
105 |     p_value = []
106 |     for j in range(n-1):
107 |         for i in range(10000):
108 |             if t_ratios[j][0] < simulated_t_reordered[j][i]:
109 |                 p_value.append(1 - (i / 10000))
110 |                 break
111 |     p_values = pd.DataFrame(p_value,index=p_columns_list,columns=['Individual p-Value'])
112 |     return p_values
113 | 
114 | 
115 | factors = {'Temp':[50,25],'Concentration':[0.4,0.6],'Enzyme':[-1,1]}
116 | df = design.full_factorial_2level(factors)
117 | df['Yield'] = [60,52,54,45,72,83,68,80]
118 | print(fit_two_level_screening(df))


--------------------------------------------------------------------------------
/src/design.py:
--------------------------------------------------------------------------------
  1 | # Our Design of experiment class
  2 | 
  3 | import urllib.request
  4 | import pandas as pd
  5 | import itertools
  6 | import math
  7 | import lhsmdu
  8 | import numpy as np
  9 | 
 10 | 
 11 | def full_factorial_2level(dic_factors):
 12 |     """
 13 |     Creates a Two-level full factorial design from the dictionary of factors entered,
 14 |     if more than two levels are given for each factor the maximum and minimum values will be selected
 15 | 
 16 |     Parameters:
 17 |         dic_factors: The dictionary of factors to be included in the full factorial's design
 18 | 
 19 |     Returns:
 20 |         df: A dataframe of the two-level full factorial resulting from the factors entered
 21 | 
 22 |     Example:
 23 |         >>> import design
 24 |         >>> Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3]}
 25 |         >>> design.Factorial.full_factorial_2level(Factors)
 26 |            Height  Width  Depth
 27 |         0     1.6    0.2    0.2
 28 |         1     1.6    0.2    0.3
 29 |         2     1.6    0.4    0.2
 30 |         3     1.6    0.4    0.3
 31 |         4     2.0    0.2    0.2
 32 |         5     2.0    0.2    0.3
 33 |         6     2.0    0.4    0.2
 34 |         7     2.0    0.4    0.3
 35 |     """
 36 |     # df is the dataframe that will be returned.
 37 |     df = pd.DataFrame()
 38 |     # factor_levels will be filled with the levels of each factor and
 39 |     # used when iterating through the runs of the design.
 40 |     factor_levels = []
 41 |     # factor_names is filled at the same time as factor_levels and
 42 |     # is used at the end to correctly name the columns of the dataframe.
 43 |     factor_names = []
 44 | 
 45 |     # This for loop fills up factor_levels with the maximum and minimum of each factor,
 46 |     # as well as filling up factor_names.
 47 |     for name in dic_factors:
 48 |         factor_names.append(name)
 49 |         factor_levels.append([min(dic_factors[name]), max(dic_factors[name])])
 50 | 
 51 |     # This for loop will run through each combination(technically product) and build up
 52 |     # the dataframe df with each loop.
 53 |     for run in itertools.product(*factor_levels, repeat=1):
 54 |         run = list(run)
 55 |         s_add = pd.Series(run)
 56 |         df = pd.concat([df, s_add], axis=1, ignore_index=True)
 57 |     # The dataframe is made with the runs being the columns, we want them to be the rows
 58 |     # hence the need to transpose.
 59 |     df = df.transpose()
 60 |     # The column headers are initially labelled '0','1','2' etc.., the line below
 61 |     # renames them by relating them to the factor_names list made earlier
 62 |     df = df.rename(columns=lambda x: factor_names[x])
 63 |     return df
 64 | 
 65 | 
 66 | def full_factorial(dic_factors):
 67 |     """
 68 |     Creates a full factorial design from the dictionary of factors, but does not choose
 69 |     highest and lowest levels of each factor.
 70 | 
 71 |     Parameters:
 72 |         dic_factors: The dictionary of factors to be included in the full factorial's design
 73 | 
 74 |     Returns:
 75 |         df: A dataframe of the full factorial resulting from the factors entered
 76 | 
 77 |     Example:
 78 |         >>> import design
 79 |         >>> Factors = {'Height':[1.6,1.8,2],'Width':[0.2,0.3,0.4]}
 80 |         >>> design.Factorial.full_factorial(Factors)
 81 |             Height  Width
 82 |         0     1.6    0.2
 83 |         1     1.6    0.3
 84 |         2     1.6    0.4
 85 |         3     1.8    0.2
 86 |         4     1.8    0.3
 87 |         5     1.8    0.4
 88 |         6     2.0    0.2
 89 |         7     2.0    0.3
 90 |         8     2.0    0.4
 91 |     """
 92 |     # The variables initialised below play the same role here as in the two level
 93 |     # full factorial above.
 94 |     df = pd.DataFrame()
 95 |     factor_levels = []
 96 |     factor_names = []
 97 |     # This for loop plays the same role as the for loop in the two level
 98 |     # but does not take the maximum and minimum factor levels, so does not reduce
 99 |     # the design to a two level design automatically.
100 |     for name in dic_factors:
101 |         factor_names.append(name)
102 |         factor_levels.append(dic_factors[name])
103 | 
104 |     # This for loop functions the same as its two level counterpart.
105 |     for run in itertools.product(*factor_levels, repeat=1):
106 |         run = list(run)
107 |         s_add = pd.Series(run)
108 |         df = pd.concat([df, s_add], axis=1, ignore_index=True)
109 |     # As in the two level, the dataframe must be transposed and renamed.
110 |     df = df.transpose()
111 |     df = df.rename(columns=lambda x: factor_names[x])
112 |     return df
113 | 
114 | 
115 | def frac_fact_2level(dic_factors, runs):
116 |     """
117 |     Returns a fractional factorial based on the dictionary of factors entered and the runs entered,
118 |      the number of runs of the design will be the next lowest power of 2 from the runs entered
119 |      i.e 9->8, 8->8
120 | 
121 |     Parameters:
122 |         dic_factors: The dictionary of factors to be included in the fractional factorial's design.
123 | 
124 |         runs: The number of runs the design can use - if the number of runs causes the design's resolution
125 |         to be less than three then it will not work.
126 | 
127 |     returns:
128 |         df: A dataframe of the runs for the fractional factorial resulting from the factors and runs entered.
129 | 
130 |     Example:
131 |         >>> import design
132 |         >>> Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3],'Temp':[10,20],'Pressure':[100,200]}
133 |         >>> design.Factorial.frac_fact_2level(Factors,10)
134 |             Height  Width  Depth  Temp  Pressure
135 |         0     1.6    0.2    0.2    20       200
136 |         1     1.6    0.2    0.3    20       100
137 |         2     1.6    0.4    0.2    10       200
138 |         3     1.6    0.4    0.3    10       100
139 |         4     2.0    0.2    0.2    10       100
140 |         5     2.0    0.2    0.3    10       200
141 |         6     2.0    0.4    0.2    20       100
142 |         7     2.0    0.4    0.3    20       200
143 |     """
144 |     df = pd.DataFrame()
145 |     factor_levels = []
146 |     factor_names = []
147 |     # this for loop fills up factor_levels and factor_names arrays
148 |     for name in dic_factors:
149 |         factor_names.append(name)
150 |         factor_levels.append([min(dic_factors[name]), max(dic_factors[name])])
151 | 
152 |     # If runs entered isn't a power of 2 this will set it to the
153 |     # next lowest power of 2.
154 |     runs = int((1 << (runs).bit_length()) / 2)
155 |     # The fractional factorial is generated from a base full factorial
156 |     # see https://www.itl.nist.gov/div898/handbook/pri/section3/pri3342.htm
157 |     # the line below determines the level of this full fact level
158 |     full_fact_level = int(math.log(runs, 2))
159 | 
160 |     # This for loop creates the base Full Factorial from which the
161 |     # fractional factorial will be generated
162 |     for run in itertools.product([-1, 1], repeat=full_fact_level):
163 |         run = list(run)
164 |         s_add = pd.Series(run)
165 |         df = pd.concat([df, s_add], axis=1, ignore_index=True)
166 |     df = df.transpose()
167 |     # factors_remaining will be used in the coming loops - it measures how many columns
168 |     # are left to be added to the fractional factorial.
169 |     factors_remaining = len(factor_names) - full_fact_level
170 |     # count will be used for the creation of new columns in the dataframe.
171 |     count = 0
172 |     # df_cols is quite literally a list of the column headers in the dataframe
173 |     # the columns are automatically name as if in an array 0,1,2 etc..
174 |     df_cols = []
175 |     # r is used in the itertools combination function, it is set to two as
176 |     # the first columns of the fractional factorial are the two combinations,
177 |     # it is incremented as higher combinations are required.
178 |     r = 2
179 |     # r will need to be incremented when all combinations at its current level have been
180 |     # added, combination check is used in a conditional that will increase r and
181 |     # reinitialise the for loop that r is used in.
182 |     combination_check = 0
183 | 
184 |     # This for loop fills up df_cols with the columns already made in the base full factorial
185 |     for x in range((len(dic_factors) - factors_remaining)):
186 |         df_cols.append(x)
187 | 
188 |     # The while loop is here to reinitialise the for loop once r has changed, and
189 |     # to stop the for loop creating too many columns once factors_remaining is 0.
190 |     while factors_remaining > 0:
191 |         # As each new column is the combination of columns from the base full factorial
192 |         # this for loop goes through each combination and creates these columns.
193 |         for combination in itertools.combinations(df_cols, r):
194 |             # We initialise the values as 1 so that we can multiply them by however much
195 |             # is necessary in the coming for loop.
196 |             df[full_fact_level + count] = 1
197 | 
198 |             # The for loop here goes up to r because if r is two the result will be the multiplication
199 |             # of two columns, if r is three, three columns etc..
200 |             for i in range(r):
201 |                 df[full_fact_level + count] = df[full_fact_level + count] * df[combination[i]]
202 |             count += 1
203 |             factors_remaining -= 1
204 |             combination_check += 1
205 | 
206 |             # If we have run through all combinations with this 'r' value
207 |             # we should increase r.
208 |             if combination_check == len(list(itertools.combinations(df_cols, r))):
209 |                 r += 1
210 |                 combination_check = 0
211 |                 break
212 |             # If there are no factors left then we should stop adding columns.
213 |             if factors_remaining == 0:
214 |                 break
215 | 
216 |     # The dataframe is currently -1 and 1, this for loop assigns the right levels to
217 |     # each factor using the factor_levels list
218 |     for i in range(len(dic_factors)):
219 |         df[i] = df[i].apply(lambda y: factor_levels[i][0] if y == -1 else factor_levels[i][1])
220 |     df = df.rename(columns=lambda y: factor_names[y])
221 |     return df
222 | 
223 | 
224 | def plackett_burman(dic_factors, runs):
225 |     """
226 |     Returns a Plackett-Burman design where the number of runs is the next multiple of four
227 |     higher than the number of runs entered if the runs given isn't a multiple of four.
228 | 
229 |     """
230 |     # Plackett-Burman designs are made using hadamard matrices
231 |     # the hadamard matrices are taken in via an online library
232 |     factor_names = []
233 |     factor_levels = []
234 |     # this for loop fills up factor_levels and factor_names arrays
235 |     for name in dic_factors:
236 |         factor_names.append(name)
237 |         factor_levels.append([min(dic_factors[name]), max(dic_factors[name])])
238 |     # The links to the various URLs of the hadamard matrices are stored in this dictionary
239 |     url_dictionary = {8: "http://neilsloane.com/hadamard/had.8.txt",
240 |                       12: "http://neilsloane.com/hadamard/had.12.txt",
241 |                       16: "http://neilsloane.com/hadamard/had.16.0.txt",
242 |                       20: "http://neilsloane.com/hadamard/had.20.hall.n.txt",
243 |                       24: "http://neilsloane.com/hadamard/had.24.pal.txt",
244 |                       28: "http://neilsloane.com/hadamard/had.28.pal2.txt",
245 |                       32: "http://neilsloane.com/hadamard/had.32.pal.txt"}
246 |     # Conditional changes run number to be a multiple of four
247 |     if runs % 4 != 0:
248 |         runs = runs + (4 - (runs % 4))
249 | 
250 |     file = urllib.request.urlopen(url_dictionary.get(runs))
251 |     array = []
252 |     # This for loop takes the lines of the hadamard matrices and places them into the array variable
253 |     for line in file:
254 |         # decoded_line stores each line in a way that can be interacted with
255 |         decoded_line = line.decode("utf-8")
256 |         # Conditional breaks the for loop when the table has been read completely
257 |         if decoded_line[0] == 'H':
258 |             break
259 |         # The array is appended with the current row of the table, excluding the new line
260 |         array.append(list(decoded_line.split('\n')[0]))
261 |     # Array is currently a square, so only the columns are taken that are needed for the number of factors entered
262 |     df = pd.DataFrame(array[(runs - len(dic_factors)):])
263 |     df = df.transpose()
264 |     # The dataframe is currently '+' and '-' so this for loop converts to the factor levels entered in the dictionary
265 |     for i in range(len(dic_factors)):
266 |         df[i] = df[i].apply(lambda y: factor_levels[i][0] if y == '-' else factor_levels[i][1])
267 |     df = df.rename(columns=lambda y: factor_names[y])
268 |     return df
269 | 
270 | 
271 | def box_behnken(dic_factors):
272 |     """
273 |     Creates a dataframe for a Box-Behken experimental design based on the factors given.
274 | 
275 |     Parameters:
276 |         dic_factors: The dictionary of factors to be included in the Box-Behnken design.
277 | 
278 |     Returns:
279 |         df: The dataframe containing the Box-Behnken design.
280 | 
281 |     Examples:
282 |         >>> import design
283 |         >>> Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3],'Temp':[10,20],'Pressure':[100,200]}
284 |         >>> design.box_behnken(Factors)
285 |             Height  Width  Depth  Temp  Pressure
286 |         0      1.6    0.2   0.25  15.0     150.0
287 |         1      1.6    0.4   0.25  15.0     150.0
288 |         2      1.6    0.3   0.20  15.0     150.0
289 |         3      1.6    0.3   0.30  15.0     150.0
290 |         4      1.6    0.3   0.25  10.0     150.0
291 |         5      1.6    0.3   0.25  20.0     150.0
292 |         6      1.6    0.3   0.25  15.0     100.0
293 |         7      1.6    0.3   0.25  15.0     200.0
294 |         8      2.0    0.2   0.25  15.0     150.0
295 |         9      2.0    0.4   0.25  15.0     150.0
296 |         10     2.0    0.3   0.20  15.0     150.0
297 |         11     2.0    0.3   0.30  15.0     150.0
298 |         12     2.0    0.3   0.25  10.0     150.0
299 |         13     2.0    0.3   0.25  20.0     150.0
300 |         14     2.0    0.3   0.25  15.0     100.0
301 |         15     2.0    0.3   0.25  15.0     200.0
302 |         16     1.8    0.2   0.20  15.0     150.0
303 |         17     1.8    0.2   0.30  15.0     150.0
304 |         18     1.8    0.2   0.25  10.0     150.0
305 |         19     1.8    0.2   0.25  20.0     150.0
306 |         20     1.8    0.2   0.25  15.0     100.0
307 |         21     1.8    0.2   0.25  15.0     200.0
308 |         22     1.8    0.4   0.20  15.0     150.0
309 |         23     1.8    0.4   0.30  15.0     150.0
310 |         24     1.8    0.4   0.25  10.0     150.0
311 |         25     1.8    0.4   0.25  20.0     150.0
312 |         26     1.8    0.4   0.25  15.0     100.0
313 |         27     1.8    0.4   0.25  15.0     200.0
314 |         28     1.8    0.3   0.20  10.0     150.0
315 |         29     1.8    0.3   0.20  20.0     150.0
316 |         30     1.8    0.3   0.20  15.0     100.0
317 |         31     1.8    0.3   0.20  15.0     200.0
318 |         32     1.8    0.3   0.30  10.0     150.0
319 |         33     1.8    0.3   0.30  20.0     150.0
320 |         34     1.8    0.3   0.30  15.0     100.0
321 |         35     1.8    0.3   0.30  15.0     200.0
322 |         36     1.8    0.3   0.25  10.0     100.0
323 |         37     1.8    0.3   0.25  10.0     200.0
324 |         38     1.8    0.3   0.25  20.0     100.0
325 |         39     1.8    0.3   0.25  20.0     200.0
326 |         40     1.8    0.3   0.25  15.0     150.0
327 |         41     1.8    0.3   0.25  15.0     150.0
328 |         42     1.8    0.3   0.25  15.0     150.0
329 |         43     1.8    0.3   0.25  15.0     150.0
330 |         44     1.8    0.3   0.25  15.0     150.0
331 |     """
332 |     df = pd.DataFrame()
333 |     factor_levels = []
334 |     factor_names = []
335 |     # this for loop fills up factor_levels and factor_names arrays
336 |     for name in dic_factors:
337 |         factor_names.append(name)
338 |         # This conditional creates a middle factor by averaging the two highest and lowest when too many
339 |         # or too few levels are given, else sorts the three given and sets the levels that way
340 |         if len(dic_factors[name]) != 3:
341 |             factor_levels.append(
342 |                 [min(dic_factors[name]), (min(dic_factors[name]) + max(dic_factors[name])) / 2, max(dic_factors[name])])
343 |         else:
344 |             factor_levels.append(
345 |                 [sorted(dic_factors[name])[0], sorted(dic_factors[name])[1], sorted(dic_factors[name])[2]])
346 |     # This for loop will go through too many iterations, generating +1,+1,+1 designs which aren't needed,
347 |     # so a conditional is added to cut it down
348 |     for run in itertools.product([-1, 1, 0], repeat=len(dic_factors)):
349 |         run = list(run)
350 |         if run.count(1) < 3 and run.count(-1) < 3 and run.count(0) == len(dic_factors) - 2:
351 |             s_add = pd.Series(run)
352 |             df = pd.concat([df, s_add], axis=1, ignore_index=True)
353 |     # for loop adds default centre runs
354 |     for i in range(len(dic_factors)):
355 |         centre_points = [0,0,0,0,0]
356 |         df = pd.concat([df, pd.Series(centre_points[:len(dic_factors)])], axis=1, ignore_index=True)
357 |     df = df.transpose()
358 |     # for loop takes the -1, 0, +1 to the corresponding three factor levels for each factor
359 |     for i in range(len(dic_factors)):
360 |         df[i] = df[i].apply(
361 |             lambda y: factor_levels[i][0] if y == -1 else (factor_levels[i][1] if y == 0 else factor_levels[i][2]))
362 |     df = df.rename(columns=lambda y: factor_names[y])
363 |     return df
364 | 
365 | 
366 | def central_composite(dic_factors):
367 |     """
368 |     Creates a Central Composite design for the factors given
369 | 
370 |     Parameters:
371 |         dic_factors: The dictionary of factors to be included in the Central Composite design.
372 | 
373 |     Returns:
374 |         df: The dataframe containing the Central Composite design.
375 | 
376 |     Examples:
377 |         >>> import design
378 |         >>> Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3],'Temp':[10,20],'Pressure':[100,200]}
379 |         >>> design.central_composite(Factors)
380 |             Height     Width     Depth       Temp    Pressure
381 |         0   1.600000  0.200000  0.200000  10.000000  100.000000
382 |         1   1.600000  0.200000  0.200000  10.000000  200.000000
383 |         2   1.600000  0.200000  0.200000  20.000000  100.000000
384 |         3   1.600000  0.200000  0.200000  20.000000  200.000000
385 |         4   1.600000  0.200000  0.300000  10.000000  100.000000
386 |         5   1.600000  0.200000  0.300000  10.000000  200.000000
387 |         6   1.600000  0.200000  0.300000  20.000000  100.000000
388 |         7   1.600000  0.200000  0.300000  20.000000  200.000000
389 |         8   1.600000  0.400000  0.200000  10.000000  100.000000
390 |         9   1.600000  0.400000  0.200000  10.000000  200.000000
391 |         10  1.600000  0.400000  0.200000  20.000000  100.000000
392 |         11  1.600000  0.400000  0.200000  20.000000  200.000000
393 |         12  1.600000  0.400000  0.300000  10.000000  100.000000
394 |         13  1.600000  0.400000  0.300000  10.000000  200.000000
395 |         14  1.600000  0.400000  0.300000  20.000000  100.000000
396 |         15  1.600000  0.400000  0.300000  20.000000  200.000000
397 |         16  2.000000  0.200000  0.200000  10.000000  100.000000
398 |         17  2.000000  0.200000  0.200000  10.000000  200.000000
399 |         18  2.000000  0.200000  0.200000  20.000000  100.000000
400 |         19  2.000000  0.200000  0.200000  20.000000  200.000000
401 |         20  2.000000  0.200000  0.300000  10.000000  100.000000
402 |         21  2.000000  0.200000  0.300000  10.000000  200.000000
403 |         22  2.000000  0.200000  0.300000  20.000000  100.000000
404 |         23  2.000000  0.200000  0.300000  20.000000  200.000000
405 |         24  2.000000  0.400000  0.200000  10.000000  100.000000
406 |         25  2.000000  0.400000  0.200000  10.000000  200.000000
407 |         26  2.000000  0.400000  0.200000  20.000000  100.000000
408 |         27  2.000000  0.400000  0.200000  20.000000  200.000000
409 |         28  2.000000  0.400000  0.300000  10.000000  100.000000
410 |         29  2.000000  0.400000  0.300000  10.000000  200.000000
411 |         30  2.000000  0.400000  0.300000  20.000000  100.000000
412 |         31  2.000000  0.400000  0.300000  20.000000  200.000000
413 |         32  2.275683  0.300000  0.250000  15.000000  150.000000
414 |         33  1.324317  0.300000  0.250000  15.000000  150.000000
415 |         34  1.800000  0.537841  0.250000  15.000000  150.000000
416 |         35  1.800000  0.062159  0.250000  15.000000  150.000000
417 |         36  1.800000  0.300000  0.368921  15.000000  150.000000
418 |         37  1.800000  0.300000  0.131079  15.000000  150.000000
419 |         38  1.800000  0.300000  0.250000  26.892071  150.000000
420 |         39  1.800000  0.300000  0.250000   3.107929  150.000000
421 |         40  1.800000  0.300000  0.250000  15.000000  268.920712
422 |         41  1.800000  0.300000  0.250000  15.000000   31.079288
423 |         42  1.800000  0.300000  0.250000  15.000000  150.000000
424 |         43  1.800000  0.300000  0.250000  15.000000  150.000000
425 |         44  1.800000  0.300000  0.250000  15.000000  150.000000
426 |         45  1.800000  0.300000  0.250000  15.000000  150.000000
427 |         46  1.800000  0.300000  0.250000  15.000000  150.000000
428 |     """
429 |     df2 = pd.DataFrame()  # df2 will contain the axial points of the design
430 |     factor_levels = []
431 |     factor_names = []
432 |     alpha = 2 ** (len(dic_factors) / 4)  # this is alpha for rotatability, alpha should eventually be optional
433 |     # this for loop fills up factor_levels and factor_names arrays
434 |     for name in dic_factors:
435 |         factor_names.append(name)
436 |         # This conditional creates a middle factor by averaging the two highest and lowest when too many
437 |         # or too few levels are given, else sorts the three given and sets the levels that way
438 |         if len(dic_factors[name]) != 3:
439 |             factor_levels.append(
440 |                 [min(dic_factors[name]), (min(dic_factors[name]) + max(dic_factors[name])) / 2, max(dic_factors[name])])
441 |         else:
442 |             factor_levels.append(
443 |                 [sorted(dic_factors[name])[0], sorted(dic_factors[name])[1], sorted(dic_factors[name])[2]])
444 |     # The full factorial design points are made using the full factorial function
445 |     df1 = full_factorial_2level(dic_factors)
446 |     # This for loop creates the dataframe of the axial points
447 |     for i in range(len(dic_factors)):
448 |         run1 = []
449 |         run2 = []
450 |         # extremeplus and extrememinus contain the values for the extreme points of the design
451 |         extremeplus = factor_levels[i][1] + ((factor_levels[i][2] - factor_levels[i][1]) * alpha)
452 |         extrememinus = factor_levels[i][1] - ((factor_levels[i][2] - factor_levels[i][1]) * alpha)
453 |         # This for loop fills up the runs with the centre points for all factors
454 |         for j in range(len(dic_factors)):
455 |             run1.append(factor_levels[j][1])
456 |             run2.append(factor_levels[j][1])
457 |         # run1 and run2 have the extreme points entered in, replacing the centre point they will currently have
458 |         run1[i] = extremeplus
459 |         run2[i] = extrememinus
460 |         # The runs are stored as series to be concatenated to the dataframe
461 |         s_add1 = pd.Series(run1)
462 |         s_add2 = pd.Series(run2)
463 |         df2 = pd.concat([df2, s_add1, s_add2], axis=1, ignore_index=True)
464 |     # The axial dataframe is then transposed and renamed so it can be concated with the full factorial dataframe
465 |     df2 = df2.transpose()
466 |     df2 = df2.rename(columns=lambda y: factor_names[y])
467 |     df = pd.concat([df1, df2], axis=0, ignore_index=True)
468 |     # This for loop adds as many centre points as there are factors entered
469 |     centre_points = []
470 |     for i in range(len(dic_factors)):
471 |         centre_points.append(factor_levels[i][1])
472 |     for j in range(len(dic_factors)):
473 |         df3 = pd.DataFrame([centre_points],columns=list(dic_factors))
474 |         df = df.append(df3,ignore_index=True)
475 |     return df
476 | 
477 | 
478 | def latin_hypercube(dic_factors, runs):
479 |     """
480 |     Parameters:
481 |         dic_factors: The dictionary of factors to be included in the Latin Hypercube design.
482 | 
483 |         runs: The number of runs to be used in the design.
484 | 
485 |     Returns:
486 |         df: The dataframe containing the Latin Hypercube design.
487 | 
488 |     Example:
489 |         >>> import design
490 |         >>> Factors = {'Height':[1.6,2],'Width':[0.2,0.4],'Depth':[0.2,0.3],'Temp':[10,20],'Pressure':[100,200]}
491 |         >>> design.latin_hypercube(Factors,50)
492 |               Height     Width     Depth       Temp    Pressure
493 |         0   1.814372  0.316126  0.203734  12.633408  150.994350
494 |         1   1.683852  0.327745  0.221157  10.833524  149.235694
495 |         2   1.952938  0.220208  0.212877  14.207334  177.737810
496 |         3   1.921001  0.306165  0.249451  13.747280  195.141219
497 |         4   1.709485  0.286836  0.214973  12.132761  144.060774
498 |         5   1.795442  0.339484  0.263747  16.494926  105.861897
499 |         6   1.849604  0.390856  0.229801  17.768834  157.379054
500 |         7   1.635933  0.295207  0.244843  15.561134  119.353027
501 |         8   1.800514  0.257358  0.232554  19.117071  114.431350
502 |         9   1.748656  0.311259  0.209185  19.573654  147.317771
503 |         10  1.610152  0.200320  0.269825  14.041168  192.787729
504 |         11  1.670380  0.283579  0.270421  11.422384  161.302466
505 |         12  1.914483  0.374190  0.273246  15.253950  110.213186
506 |         13  1.731642  0.363269  0.211263  15.011417  175.315691
507 |         14  1.864093  0.245809  0.235466  10.506234  123.998827
508 |         15  1.856580  0.314574  0.260263  11.787321  152.096424
509 |         16  1.651140  0.262106  0.289432  14.407869  121.954348
510 |         17  1.827840  0.278926  0.223818  12.824422  168.813816
511 |         18  1.780800  0.380327  0.252359  12.290440  171.741507
512 |         19  1.762333  0.224241  0.216475  18.386775  165.564771
513 |         20  1.949560  0.300988  0.285943  10.063231  155.134033
514 |         21  1.646881  0.248638  0.250362  16.701447  163.476898
515 |         22  1.974239  0.379487  0.279709  17.208315  181.757031
516 |         23  1.904317  0.216877  0.292985  18.829669  136.808281
517 |         24  1.899844  0.343903  0.230494  13.197326  198.654066
518 |         25  1.696839  0.329348  0.283741  18.193024  135.335187
519 |         26  1.689936  0.272728  0.218891  19.800988  131.615692
520 |         27  1.823893  0.299159  0.247030  10.790362  191.524570
521 |         28  1.841140  0.210635  0.286718  10.327824  167.595627
522 |         29  1.883991  0.385993  0.277186  18.773584  178.871167
523 |         30  1.932945  0.358221  0.294327  16.890948  125.635668
524 |         31  1.837620  0.370877  0.242782  17.103119  142.240418
525 |         32  1.740477  0.352914  0.265939  14.697769  129.088978
526 |         33  1.624078  0.347985  0.298516  13.933373  132.011517
527 |         34  1.786612  0.351899  0.225313  15.827930  188.649172
528 |         35  1.892142  0.206601  0.254650  14.805995  138.732923
529 |         36  1.656703  0.252798  0.205547  18.461586  184.792345
530 |         37  1.770805  0.270721  0.226262  11.940936  113.390934
531 |         38  1.672266  0.288289  0.275940  15.640371  186.777116
532 |         39  1.600629  0.240123  0.280908  17.934686  126.897387
533 |         40  1.995175  0.237031  0.240472  16.393982  116.475088
534 |         41  1.713062  0.265850  0.256147  17.418780  172.746504
535 |         42  1.964540  0.235473  0.266340  11.334520  196.454539
536 |         43  1.757516  0.366909  0.207040  13.488750  102.146392
537 |         44  1.942405  0.214971  0.290674  13.373628  109.206897
538 |         45  1.985601  0.229702  0.297658  12.435430  101.336426
539 |         46  1.617340  0.321384  0.200862  19.338525  159.238981
540 |         47  1.976837  0.393484  0.258497  16.167623  140.926988
541 |         48  1.877091  0.399951  0.239234  19.788923  182.759572
542 |         49  1.725652  0.332160  0.237414  11.136650  107.726667
543 |     """
544 |     df = pd.DataFrame()
545 |     factor_names = []
546 |     count = 0
547 |     # Creates an array filled with a latin hypercube form 0 to 1
548 |     array = lhsmdu.sample(len(dic_factors), runs)
549 |     # This for loop converts the latin hypercube to have the levels entered into the dictionary of factors
550 |     for name in dic_factors:
551 |         factor_names.append(name)
552 |         low = min(dic_factors[name])
553 |         high = max(dic_factors[name])
554 |         # non_coded stored the array being mapped to fit the levels of the factors entered
555 |         non_coded = np.array(list(map(lambda x: low + ((high - low) * x), array[count])))
556 |         # Converts non_coded (which is currently one column of the final dataframe) to a series
557 |         s_add = pd.Series(non_coded[0][0])
558 |         count += 1
559 |         # Adds the series to the dataframe
560 |         df = pd.concat([df, s_add], ignore_index=True, axis=1)
561 |     df = df.rename(columns=lambda y: factor_names[y])
562 |     return df


--------------------------------------------------------------------------------