├── src └── hommmer │ ├── __init__.py │ ├── features │ ├── interaction.py │ ├── bfe.py │ ├── bif.py │ ├── ffe.py │ ├── power_saturation.py │ ├── lag.py │ ├── geometric_adstock.py │ ├── hill_saturation.py │ ├── scaled_saturation.py │ ├── normalize.py │ ├── denormalize.py │ ├── s_curve_saturation.py │ ├── loss_function.py │ ├── optimizer.py │ ├── rfe.py │ ├── __init__.py │ ├── vif.py │ ├── seasonal_decomp.py │ ├── prophet_seasonality.py │ ├── delayed_adstock.py │ ├── weibull_adstock.py │ └── weibull_adstock_delayed.py │ ├── cleaners │ ├── count_na.py │ ├── describe_data.py │ ├── drop_cols.py │ ├── get_cols_containing.py │ ├── cat_to_dummies.py │ ├── rename_column.py │ ├── count_na_cols.py │ ├── count_dup_cols.py │ ├── guess_numerical_variables.py │ ├── add_X_labels.py │ ├── del_X_labels.py │ ├── make_column_index.py │ ├── end_of_month.py │ ├── payday_dummies.py │ ├── get_all_X_labels.py │ ├── drop_n_rows.py │ ├── start_of_month.py │ ├── str_to_dummy.py │ ├── guess_date_column.py │ ├── make_date_index.py │ ├── convert_date.py │ ├── guess_y_column.py │ ├── standard_scaler.py │ ├── modify_labels.py │ ├── guess_categorical_variables.py │ ├── train_test_split.py │ ├── days_in_month.py │ ├── guess_media_columns.py │ ├── date_dummies.py │ ├── make_geodate_index.py │ ├── remove_outliers.py │ ├── date_range_dummies.py │ ├── group_weekly.py │ ├── group_monthly.py │ ├── transpose_data.py │ ├── clean_numeric.py │ ├── merge_data.py │ ├── categorize_campaigns.py │ ├── shift_dummies.py │ ├── interpolate_dates.py │ ├── week_commencing.py │ ├── holiday_dummies.py │ ├── unstack_data.py │ ├── interpolate_weekly.py │ ├── interpolate_monthly.py │ └── __init__.py │ ├── helpers │ ├── its_working.py │ ├── __init__.py │ ├── logging.py │ ├── exp_ex_zeros.py │ ├── log_ex_zeros.py │ └── check_metric.py │ ├── metrics │ ├── spend_share.py │ ├── effect_share.py │ ├── max_error.py │ ├── dummy_median.py │ ├── dummy_constant.py │ ├── dummy_mean.py │ ├── mse.py │ ├── mae.py │ ├── mdape.py │ ├── degrees_of_freedom.py │ ├── rmse.py │ ├── smape.py │ ├── log_accuracy_ratio.py │ ├── mfe.py │ ├── rsquared.py │ ├── durbin_watson.py │ ├── vars_obs.py │ ├── condition_number.py │ ├── breuschpagan.py │ ├── mda.py │ ├── nrmse.py │ ├── harvey_collier.py │ ├── ljungbox.py │ ├── rainbox.py │ ├── mape.py │ ├── jarque_bera.py │ ├── mase.py │ ├── __init__.py │ └── decomp_rssd.py │ ├── charts │ ├── lineplot.py │ ├── pairplot.py │ ├── __init__.py │ ├── y_corr.py │ ├── heatmap.py │ ├── partial_dependence.py │ ├── accuracy.py │ └── response.py │ ├── connectors │ ├── __init__.py │ ├── covid_mobility.py │ ├── nasa_weather.py │ ├── search_trends.py │ └── colab_helpers.py │ ├── models │ ├── __init__.py │ ├── Ridge.py │ ├── Linear.py │ ├── LogLinear.py │ ├── LogLog.py │ ├── DeepLearning.py │ └── Model.py │ ├── datasets │ ├── __init__.py │ ├── add_noise.py │ ├── load_duff.py │ ├── load_holidays.py │ ├── scale_feature.py │ ├── make_dates.py │ ├── make_data.py │ └── duff.csv │ └── main.py ├── website ├── public │ └── favicon.ico ├── postcss.config.js ├── pages │ ├── _app.js │ └── index.js ├── package.json ├── .gitignore └── tailwind.config.js ├── setup.py ├── LICENSE ├── TODO.md ├── .gitignore ├── README.md ├── CONTRIBUTING.md └── SOURCES.md /src/hommmer/__init__.py: -------------------------------------------------------------------------------- 1 | from .main import build 2 | from .datasets import load_duff -------------------------------------------------------------------------------- /src/hommmer/features/interaction.py: -------------------------------------------------------------------------------- 1 | def interaction(x1, x2): 2 | return x1 * x2 -------------------------------------------------------------------------------- /src/hommmer/cleaners/count_na.py: -------------------------------------------------------------------------------- 1 | def count_na(df): 2 | return df.isna().sum().sum() -------------------------------------------------------------------------------- /src/hommmer/helpers/its_working.py: -------------------------------------------------------------------------------- 1 | def its_working(): 2 | print("It's working! 4") -------------------------------------------------------------------------------- /src/hommmer/cleaners/describe_data.py: -------------------------------------------------------------------------------- 1 | def describe_data(df): 2 | return df.describe().T -------------------------------------------------------------------------------- /src/hommmer/features/bfe.py: -------------------------------------------------------------------------------- 1 | def bfe(y, X): 2 | # backward feature elimination 3 | pass -------------------------------------------------------------------------------- /src/hommmer/features/bif.py: -------------------------------------------------------------------------------- 1 | def bif(): 2 | # find best incremental feature 3 | pass -------------------------------------------------------------------------------- /src/hommmer/features/ffe.py: -------------------------------------------------------------------------------- 1 | def ffe(y, X): 2 | # forward feature enhancement 3 | pass -------------------------------------------------------------------------------- /src/hommmer/features/power_saturation.py: -------------------------------------------------------------------------------- 1 | def power_saturation(x, beta): 2 | return x ** beta -------------------------------------------------------------------------------- /src/hommmer/features/lag.py: -------------------------------------------------------------------------------- 1 | def lag(series, periods): 2 | return series.shift(periods).fillna(0) -------------------------------------------------------------------------------- /src/hommmer/cleaners/drop_cols.py: -------------------------------------------------------------------------------- 1 | def drop_cols(df, columns): 2 | df.drop(columns, axis=1, inplace=True) -------------------------------------------------------------------------------- /src/hommmer/metrics/spend_share.py: -------------------------------------------------------------------------------- 1 | def spend_share(X_df): 2 | return (X_df.sum()/X_df.sum().sum()).values -------------------------------------------------------------------------------- /website/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hammer-mt/hommmer/HEAD/website/public/favicon.ico -------------------------------------------------------------------------------- /src/hommmer/cleaners/get_cols_containing.py: -------------------------------------------------------------------------------- 1 | def get_cols_containing(columns, containing): 2 | return [x for x in columns if containing in x] -------------------------------------------------------------------------------- /src/hommmer/metrics/effect_share.py: -------------------------------------------------------------------------------- 1 | def effect_share(contribution_df): 2 | return (contribution_df.sum()/contribution_df.sum().sum()).values -------------------------------------------------------------------------------- /src/hommmer/charts/lineplot.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | 3 | def lineplot(df, x_label, y_label): 4 | sns.lineplot(data=df, x=x_label,y=y_label) -------------------------------------------------------------------------------- /src/hommmer/cleaners/cat_to_dummies.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def cat_to_dummies(df, columns): 4 | return pd.get_dummies(df, columns=columns) -------------------------------------------------------------------------------- /src/hommmer/cleaners/rename_column.py: -------------------------------------------------------------------------------- 1 | def rename_column(df, column_label, new_name): 2 | df.rename(columns={column_label: new_name}, inplace=True) -------------------------------------------------------------------------------- /src/hommmer/metrics/max_error.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import max_error 2 | 3 | def max_error(y_actual, y_pred): 4 | return max_error(y_actual, y_pred) -------------------------------------------------------------------------------- /src/hommmer/charts/pairplot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | 4 | def pairplot(df, y_label): 5 | sns.pairplot(df) 6 | plt.show() -------------------------------------------------------------------------------- /src/hommmer/connectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .covid_mobility import covid_mobility 2 | from .nasa_weather import nasa_weather 3 | from .search_trends import search_trends -------------------------------------------------------------------------------- /src/hommmer/features/geometric_adstock.py: -------------------------------------------------------------------------------- 1 | import statsmodels.tsa.api as tsa 2 | 3 | def geometric_adstock(x, theta): 4 | return tsa.filters.recursive_filter(x, theta) -------------------------------------------------------------------------------- /src/hommmer/cleaners/count_na_cols.py: -------------------------------------------------------------------------------- 1 | def count_na_cols(df): 2 | missing = df.isna().sum() * 100 / len(df) 3 | return missing[missing > 0].sort_values(ascending=False) -------------------------------------------------------------------------------- /src/hommmer/features/hill_saturation.py: -------------------------------------------------------------------------------- 1 | # https://github.com/sibylhe/mmm_stan#13-diminishing-return 2 | def hill_saturation(x, ec, slope): 3 | return 1 / (1 + (x / ec)**(-slope)) -------------------------------------------------------------------------------- /src/hommmer/metrics/dummy_median.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def dummy_median(y_actual): 4 | # dummy median predictor 5 | return np.full(y_actual.shape, np.median(y_actual)) -------------------------------------------------------------------------------- /src/hommmer/cleaners/count_dup_cols.py: -------------------------------------------------------------------------------- 1 | def count_dup_cols(df): 2 | duplicate = df.duplicated().sum() * 100 / len(df) 3 | return duplicate[duplicate > 0].sort_values(ascending=False) -------------------------------------------------------------------------------- /src/hommmer/cleaners/guess_numerical_variables.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def guess_numerical_variables(df): 4 | return list(df.select_dtypes(include=[np.number]).columns.values) -------------------------------------------------------------------------------- /src/hommmer/metrics/dummy_constant.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def dummy_constant(y_actual, constant): 4 | # dummy constant predictor 5 | return np.full(y_actual.shape, constant) -------------------------------------------------------------------------------- /src/hommmer/metrics/dummy_mean.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def dummy_mean(y_actual): 4 | # dummy mean predictor 5 | return np.full(y_actual.shape, np.mean(y_actual)) 6 | -------------------------------------------------------------------------------- /src/hommmer/metrics/mse.py: -------------------------------------------------------------------------------- 1 | from sklearn import metrics 2 | 3 | def mse(y_actual, y_pred): 4 | # mean square error 5 | return round(metrics.mean_squared_error(y_actual, y_pred), 3) -------------------------------------------------------------------------------- /src/hommmer/metrics/mae.py: -------------------------------------------------------------------------------- 1 | from sklearn import metrics 2 | 3 | def mae(y_actual, y_pred): 4 | # mean absolute error 5 | return round(metrics.mean_absolute_error(y_actual, y_pred),3) -------------------------------------------------------------------------------- /src/hommmer/charts/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import accuracy 2 | from .heatmap import heatmap 3 | from .pairplot import pairplot 4 | from .response import response 5 | from .y_corr import y_corr -------------------------------------------------------------------------------- /src/hommmer/cleaners/add_X_labels.py: -------------------------------------------------------------------------------- 1 | def add_X_labels(X_labels, add_cols): 2 | for x in add_cols: 3 | if x not in X_labels: 4 | X_labels.append(x) 5 | 6 | return X_labels -------------------------------------------------------------------------------- /src/hommmer/cleaners/del_X_labels.py: -------------------------------------------------------------------------------- 1 | def del_X_labels(X_labels, del_cols): 2 | for x in del_cols: 3 | if x in X_labels: 4 | X_labels.remove(x) 5 | 6 | return X_labels -------------------------------------------------------------------------------- /src/hommmer/cleaners/make_column_index.py: -------------------------------------------------------------------------------- 1 | def make_column_index(df, column_label): 2 | df.index = df[column_label] 3 | df.drop(column_label, axis=1, inplace=True) 4 | df.index.name = None -------------------------------------------------------------------------------- /src/hommmer/metrics/mdape.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def mdape(y_actual, y_pred): 4 | # median absolute percentage error 5 | return np.median(np.abs((y_actual - y_pred) / y_actual)) * 100 -------------------------------------------------------------------------------- /src/hommmer/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .Linear import Linear 2 | from .LogLinear import LogLinear 3 | from .LogLog import LogLog 4 | from .Ridge import Ridge 5 | from .DeepLearning import DeepLearning -------------------------------------------------------------------------------- /src/hommmer/features/scaled_saturation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def scaled_saturation(x, alpha=None): 4 | if alpha is None: 5 | alpha = x.max() 6 | return alpha * (1 - np.exp(x/-alpha)) -------------------------------------------------------------------------------- /src/hommmer/metrics/degrees_of_freedom.py: -------------------------------------------------------------------------------- 1 | def degrees_of_freedom(num_obs, num_params): 2 | # https://machinelearningmastery.com/degrees-of-freedom-in-machine-learning/ 3 | return num_obs - num_params -------------------------------------------------------------------------------- /src/hommmer/cleaners/end_of_month.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def end_of_month(df, date_col): 4 | end_of_month = pd.to_datetime(df[date_col]) + pd.offsets.MonthEnd(1) 5 | df['end_of_month'] = end_of_month 6 | -------------------------------------------------------------------------------- /src/hommmer/cleaners/payday_dummies.py: -------------------------------------------------------------------------------- 1 | def add_payday_dummies(df, date_label): 2 | df['payday'] = df[date_label].apply(lambda x:1 if x.strftime('%d') in ('14','15','16','30','31','1','2') else 0) 3 | 4 | return df -------------------------------------------------------------------------------- /src/hommmer/charts/y_corr.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | def y_corr(df, y_label): 4 | plt.figure(figsize=(15,6)) 5 | bars = df.corr()[y_label].sort_values(ascending=False).plot(kind='bar') 6 | plt.show() -------------------------------------------------------------------------------- /src/hommmer/cleaners/get_all_X_labels.py: -------------------------------------------------------------------------------- 1 | def get_all_X_labels(columns, y_label, date_label): 2 | X_labels = columns.copy() 3 | X_labels.remove(y_label) 4 | X_labels.remove(date_label) 5 | 6 | return X_labels -------------------------------------------------------------------------------- /src/hommmer/metrics/rmse.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import metrics 3 | 4 | def rmse(y_actual, y_pred): 5 | # root mean square error 6 | return round(np.sqrt(metrics.mean_squared_error(y_actual, y_pred)), 3) -------------------------------------------------------------------------------- /src/hommmer/charts/heatmap.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | 4 | def heatmap(df): 5 | plt.figure(figsize=(15,6)) 6 | heatmap = sns.heatmap(df.corr(), annot=True, cmap="Blues") 7 | plt.show() -------------------------------------------------------------------------------- /src/hommmer/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .make_data import make_data 2 | from .scale_feature import scale_feature 3 | from .make_dates import make_dates 4 | from .load_duff import load_duff 5 | from .load_holidays import load_holidays -------------------------------------------------------------------------------- /src/hommmer/features/normalize.py: -------------------------------------------------------------------------------- 1 | # standardize variable 2 | def normalize(x, method="mean"): 3 | if method == "minmax": 4 | return (x-x.min())/(x.max()-x.min()) 5 | else: 6 | return (x - x.mean())/x.std() 7 | -------------------------------------------------------------------------------- /src/hommmer/datasets/add_noise.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def add_noise(series): 4 | series = np.array(series) 5 | series += np.random.normal(scale=0.1, size=series.shape) 6 | series = np.squeeze(series) 7 | return series -------------------------------------------------------------------------------- /src/hommmer/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | from .logging import init_logging, log 2 | from .its_working import its_working 3 | from .check_metric import check_metric 4 | from .exp_ex_zeros import exp_ex_zeros 5 | from .log_ex_zeros import log_ex_zeros -------------------------------------------------------------------------------- /src/hommmer/cleaners/drop_n_rows.py: -------------------------------------------------------------------------------- 1 | def drop_n_rows(df, n=1, top=False): 2 | if top: 3 | df.drop(df.head(n).index, inplace=True) # drop first n rows 4 | else: 5 | df.drop(df.tail(n).index, inplace=True) # drop last n rows -------------------------------------------------------------------------------- /src/hommmer/metrics/smape.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def smape(y_actual, y_pred): 4 | # symmetric mean absolute percentage error 5 | return 100/len(y_actual) * np.sum(2 * np.abs(y_pred - y_actual) / (np.abs(y_actual) + np.abs(y_pred))) -------------------------------------------------------------------------------- /src/hommmer/cleaners/start_of_month.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def start_of_month(df, date_col): 4 | start_of_month = (df[date_col].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1)) 5 | df['start_of_month'] = start_of_month -------------------------------------------------------------------------------- /src/hommmer/metrics/log_accuracy_ratio.py: -------------------------------------------------------------------------------- 1 | # the logarithm of the accuracy ratio (the ratio of the forecasted to the actual value) 2 | # https://agupubs.onlinelibrary.wiley.com/doi/full/10.1002/2017SW001669 3 | 4 | def log_accuracy_ratio(): 5 | pass -------------------------------------------------------------------------------- /src/hommmer/metrics/mfe.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # https://datasciencestunt.com/mean-directional-accuracy-of-time-series-forecast/ 3 | 4 | def mfe(y_actual, y_pred): 5 | # mean forecast error or forecast bias 6 | return np.mean(y_actual - y_pred) -------------------------------------------------------------------------------- /src/hommmer/metrics/rsquared.py: -------------------------------------------------------------------------------- 1 | from sklearn import metrics 2 | 3 | def rsquared(y_actual, y_pred): 4 | # r squared 5 | value = round(metrics.r2_score(y_actual, y_pred), 3) 6 | passed = "✔️" if value > 0.8 else "❌" 7 | return value, passed -------------------------------------------------------------------------------- /src/hommmer/cleaners/str_to_dummy.py: -------------------------------------------------------------------------------- 1 | d = { 2 | "y": 1, "yes": 1, "t": 1, "true": 1, "on": 1, "1": 1, 3 | "n": 0, "no": 0, "f": 0, "false": 0, "off": 0, "0": 0 4 | } 5 | 6 | def str_to_dummy(series): 7 | return series.lower().map(d).astype(int) -------------------------------------------------------------------------------- /website/postcss.config.js: -------------------------------------------------------------------------------- 1 | // If you want to use other PostCSS plugins, see the following: 2 | // https://tailwindcss.com/docs/using-with-preprocessors 3 | module.exports = { 4 | plugins: { 5 | tailwindcss: {}, 6 | autoprefixer: {}, 7 | }, 8 | } 9 | -------------------------------------------------------------------------------- /src/hommmer/cleaners/guess_date_column.py: -------------------------------------------------------------------------------- 1 | def guess_date_column(columns): 2 | columns = [x.lower() for x in columns] 3 | guesses = ['date', 'day', 'week', 'month'] 4 | for x in guesses: 5 | if x in columns: 6 | return x 7 | return None -------------------------------------------------------------------------------- /src/hommmer/cleaners/make_date_index.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def make_date_index(df, date_label): 4 | df[date_label] = pd.to_datetime(df[date_label]) 5 | df.index = df[date_label] 6 | df.drop(date_label, axis=1, inplace=True) 7 | df.index.name = None -------------------------------------------------------------------------------- /src/hommmer/charts/partial_dependence.py: -------------------------------------------------------------------------------- 1 | from sklearn.inspection import plot_partial_dependence 2 | # https://scikit-learn.org/stable/modules/partial_dependence.html 3 | 4 | def partial_dependence(model, X_test, features): 5 | plot_partial_dependence(model, X_test, features) -------------------------------------------------------------------------------- /src/hommmer/metrics/durbin_watson.py: -------------------------------------------------------------------------------- 1 | import statsmodels.stats.api as sms 2 | 3 | def durbin_watson(residuals): 4 | # tests for autocorrelation 5 | # durbin watson should be between 1.5 and 2.5 6 | test = sms.durbin_watson(residuals) 7 | return ('Durbin Watson', test) -------------------------------------------------------------------------------- /src/hommmer/cleaners/convert_date.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | 3 | def convert_date(date, from_format="%m/%d/%Y", to_format="%Y-%m-%d"): 4 | date_str = str(date) 5 | date_obj = dt.datetime.strptime(date_str, from_format) 6 | return dt.datetime.strftime(date_obj, to_format) -------------------------------------------------------------------------------- /src/hommmer/cleaners/guess_y_column.py: -------------------------------------------------------------------------------- 1 | def guess_y_column(columns): 2 | guesses = ['revenue', 'sales', 'conversions', 'purchases'] 3 | columns = [x.lower() for x in columns] 4 | for x in guesses: 5 | if x in columns: 6 | return x 7 | return None 8 | -------------------------------------------------------------------------------- /src/hommmer/metrics/vars_obs.py: -------------------------------------------------------------------------------- 1 | def vars_obs(df): 2 | # 7 - 10 observations per variable 3 | # https://storage.googleapis.com/pub-tools-public-publication-data/pdf/2d0395bc7d4d13ddedef54d744ba7748e8ba8dd1.pdf 4 | return df.shape[1] / df.shape[0] >= 7, df.shape[1] / df.shape[0] -------------------------------------------------------------------------------- /src/hommmer/cleaners/standard_scaler.py: -------------------------------------------------------------------------------- 1 | from sklearn.preprocessing import StandardScaler 2 | 3 | def standard_scaler(X_train, X_test): 4 | scaler = StandardScaler() 5 | X_train = scaler.fit_transform(X_train) 6 | X_test = scaler.transform(X_test) 7 | return X_train, X_test -------------------------------------------------------------------------------- /src/hommmer/metrics/condition_number.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | def condition_number(X): 3 | # tests for multicollinearity 4 | # condition number should be less than 30 5 | value = round(np.linalg.cond(X)) 6 | passed = "✔️" if value < 30 else "❌" 7 | return value, passed -------------------------------------------------------------------------------- /src/hommmer/cleaners/modify_labels.py: -------------------------------------------------------------------------------- 1 | def modify_labels(text, labels, prefix=False, sep=" | "): 2 | modified_labels = [] 3 | for x in labels: 4 | if prefix: 5 | modified_labels.append(f"{text}{sep}{x}") 6 | else: 7 | modified_labels.append(f"{x}{sep}{text}") -------------------------------------------------------------------------------- /src/hommmer/features/denormalize.py: -------------------------------------------------------------------------------- 1 | # https://stackoverflow.com/questions/51471672/reverse-z-score-pandas-dataframe 2 | def denormalize(x_trans, x, method="mean"): 3 | if method == "minmax": 4 | return (x.max()-x.min())*x_trans+x.min() 5 | else: 6 | return x_trans*x.std()+x.mean() -------------------------------------------------------------------------------- /src/hommmer/features/s_curve_saturation.py: -------------------------------------------------------------------------------- 1 | # https://facebookexperimental.github.io/Robyn/docs/variable-transformations/ 2 | def s_curve_saturation(x, alpha, gamma): 3 | """ 4 | x = array 5 | alpha = shape 6 | gamma = inflection 7 | """ 8 | return x**alpha / (x ** alpha + gamma ** alpha) -------------------------------------------------------------------------------- /src/hommmer/metrics/breuschpagan.py: -------------------------------------------------------------------------------- 1 | import statsmodels.stats.api as sms 2 | 3 | def breuschpagan(residuals, exog): 4 | # tests for heteroskedasticity 5 | # p-value should be less than 0.05 6 | name = ['Lagrange', 'p-value','f-value', 'f p-value'] 7 | test = sms.het_breuschpagan(residuals, exog) -------------------------------------------------------------------------------- /src/hommmer/helpers/logging.py: -------------------------------------------------------------------------------- 1 | # print logs if verbose 2 | def log(string): 3 | print(string) if VERBOSE else False 4 | 5 | # set a global variable for logging 6 | def init_logging(verbose): 7 | global VERBOSE 8 | if verbose: 9 | VERBOSE = True 10 | else: 11 | VERBOSE = False -------------------------------------------------------------------------------- /src/hommmer/metrics/mda.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # https://datasciencestunt.com/mean-directional-accuracy-of-time-series-forecast/ 3 | 4 | def mda(y_actual, y_pred): 5 | # mean directional accuracy 6 | return np.mean((np.sign(y_actual[1:] - y_actual[:-1]) == np.sign(y_pred[1:] - y_pred[:-1])).astype(int)) -------------------------------------------------------------------------------- /src/hommmer/cleaners/guess_categorical_variables.py: -------------------------------------------------------------------------------- 1 | def guess_categorical_variables(df): 2 | cat_vars = [] 3 | for x in df.columns: 4 | values = list(df[x].value_counts().index) 5 | if values in [[0, 1], [1], [True, False], [True]]: 6 | cat_vars.append(x) 7 | 8 | return cat_vars -------------------------------------------------------------------------------- /src/hommmer/metrics/nrmse.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import metrics 3 | 4 | def nrmse(y_actual, y_pred): 5 | # normalized root mean square error 6 | value = round(np.sqrt(metrics.mean_squared_error(y_actual, y_pred)) / np.mean(y_actual), 3) 7 | passed = "✔️" if value < 0.15 else "❌" 8 | return value, passed -------------------------------------------------------------------------------- /src/hommmer/cleaners/train_test_split.py: -------------------------------------------------------------------------------- 1 | from sklearn.model_selection import train_test_split 2 | 3 | def train_test_split(df, y_label, X_labels): 4 | X = df[X_labels] 5 | y = df[y_label] 6 | 7 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) 8 | return X_train, X_test, y_train, y_test -------------------------------------------------------------------------------- /src/hommmer/cleaners/days_in_month.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def days_in_month(df, date_col): 4 | start_of_month = (df[date_col].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1)) 5 | end_of_month = pd.to_datetime(df[date_col]) + pd.offsets.MonthEnd(1) 6 | df['days_in_month'] = (end_of_month - start_of_month).dt.days + 1 -------------------------------------------------------------------------------- /src/hommmer/datasets/load_duff.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | def load_duff(download=True): 5 | path = os.path.join(os.path.dirname(__file__), "duff.csv") 6 | df = pd.read_csv(path) 7 | if download: 8 | df.to_csv('duff.csv', index=None) 9 | print("saving duff.csv") 10 | else: 11 | return df -------------------------------------------------------------------------------- /src/hommmer/connectors/covid_mobility.py: -------------------------------------------------------------------------------- 1 | def covid_mobility(df, sub_region_1=None): 2 | if sub_region_1 is None: 3 | data = df[df['sub_region_1'].isnull()] 4 | else: 5 | data = df[df['sub_region_1'] == sub_region_1] 6 | data = df[df['sub_region_2'].isnull()] 7 | 8 | data.reset_index(inplace=True) 9 | return data[data.columns[9:]] -------------------------------------------------------------------------------- /src/hommmer/datasets/load_holidays.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | def load_holidays(download=True): 5 | path = os.path.join(os.path.dirname(__file__), "holidays.csv") 6 | df = pd.read_csv(path) 7 | if download: 8 | df.to_csv('holidays.csv', index=None) 9 | print("saving holidays.csv") 10 | else: 11 | return df -------------------------------------------------------------------------------- /src/hommmer/helpers/exp_ex_zeros.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def exp_ex_zeros(series): 5 | np_array = np.array(series.values, dtype=np.float) 6 | out = np.zeros_like(np_array) 7 | exponent = np.exp(np_array, where=np_array!=0, out=out) 8 | exp_series = pd.Series(exponent, name=series.name, index=series.index) 9 | return exp_series -------------------------------------------------------------------------------- /src/hommmer/helpers/log_ex_zeros.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def log_ex_zeros(series): 5 | np_array = np.array(series.values, dtype=np.float) 6 | out = np.zeros_like(np_array) 7 | logged = np.log(np_array, where=np_array!=0, out=out) 8 | log_series = pd.Series(logged, name=series.name, index=series.index) 9 | return log_series -------------------------------------------------------------------------------- /src/hommmer/cleaners/guess_media_columns.py: -------------------------------------------------------------------------------- 1 | def guess_media_columns(columns): 2 | guesses = ['cost', 'spend', 'impression', 'spent', 'clicks'] 3 | columns = [x.lower() for x in columns] 4 | media_columns = [] 5 | for x in guesses: 6 | for y in columns: 7 | if x in y: 8 | media_columns.append(y) 9 | 10 | return media_columns -------------------------------------------------------------------------------- /src/hommmer/metrics/harvey_collier.py: -------------------------------------------------------------------------------- 1 | import statsmodels.stats.api as sms 2 | from statsmodels.compat import lzip 3 | 4 | # need a way to run without passing results object 5 | def harvey_collier(residuals, results, exog): 6 | # p-value should be less than 0.05 7 | name = ['t value', 'p value'] 8 | test = sms.linear_harvey_collier(results) 9 | 10 | return lzip(name, test) -------------------------------------------------------------------------------- /src/hommmer/cleaners/date_dummies.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def date_dummies(df): 4 | dr = pd.date_range(start=df['date'].min(), end=df['date'].max()) 5 | date_df = pd.DataFrame({'date': dr}) 6 | for _, row in df.iterrows(): 7 | date_df[row[1]] = (date_df['date'] == row[0]) 8 | 9 | date_df.iloc[:, 1:] = date_df.iloc[:, 1:].astype(int) 10 | return date_df -------------------------------------------------------------------------------- /src/hommmer/datasets/scale_feature.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | def scale_feature(series, min_value=None, max_value=None): 5 | # if no min or max values supplied 6 | if min_value is None: 7 | min_value = 0 8 | if max_value is None: 9 | max_value = series.max() * 100 10 | 11 | return np.interp(series, (series.min(), series.max()), (min_value, max_value)) -------------------------------------------------------------------------------- /src/hommmer/metrics/ljungbox.py: -------------------------------------------------------------------------------- 1 | import statsmodels.stats.api as sms 2 | from statsmodels.compat import lzip 3 | 4 | def ljungbox(residuals, X_labels): 5 | # tests for autocorrelation 6 | # p-value should be less than 0.05 7 | name = ['Ljung-Box stat', 'p-value'] 8 | lags = min(len(X_labels)/2-2, 40) 9 | test = sms.acorr_ljungbox(residuals, lags=[lags]) 10 | return lzip(name, test) -------------------------------------------------------------------------------- /src/hommmer/cleaners/make_geodate_index.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def make_geodate_index(df, date_label, geo_label): 4 | key_label = f"{date_label}${geo_label}" 5 | # df[key_label] = pd.to_datetime(df[date_label]).astype(str) + "$" + df[geo_label] 6 | df[key_label] = df[date_label] + "$" + df[geo_label] 7 | df.index = df[key_label] 8 | df.drop(key_label, axis=1, inplace=True) 9 | df.index.name = None -------------------------------------------------------------------------------- /src/hommmer/metrics/rainbox.py: -------------------------------------------------------------------------------- 1 | import statsmodels.stats.api as sms 2 | from statsmodels.compat import lzip 3 | 4 | # need a way to run without passing results object 5 | def rainbox(residuals, results, X_labels): 6 | # tests for linearity 7 | # p-value should be less than 0.05 8 | name = ['rainbow F stat', 'rainbow F stat p-value'] 9 | test = sms.linear_rainbow(results) 10 | return lzip(name, test) -------------------------------------------------------------------------------- /src/hommmer/metrics/mape.py: -------------------------------------------------------------------------------- 1 | from sklearn import metrics 2 | import numpy as np 3 | 4 | #https://www.researchgate.net/post/Is-there-a-cut-off-point-for-the-mean-absolute-percentage-error-MAPE 5 | 6 | def mape(y_actual, y_pred): 7 | # mean absolute percentage error 8 | value = round(metrics.mean_absolute_error(y_actual, y_pred)/np.mean(y_actual),3) 9 | passed = "✔️" if value < 0.15 else "❌" 10 | return value, passed -------------------------------------------------------------------------------- /src/hommmer/metrics/jarque_bera.py: -------------------------------------------------------------------------------- 1 | import statsmodels.stats.api as sms 2 | from statsmodels.compat import lzip 3 | 4 | def jarque_bera(residuals): 5 | # Tests for normality of the residuals 6 | # skewness should be between -2 and 2 7 | # kurtosis should be between -7 and 7 8 | name = ['Jarque-Bera', 'Chi^2 prob', 'Skewness', 'Kurtosis'] 9 | test = sms.jarque_bera(residuals) 10 | 11 | return lzip(name, test) -------------------------------------------------------------------------------- /src/hommmer/cleaners/remove_outliers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def remove_outliers(series, num_std_devs=3): 4 | mean = np.mean(series) 5 | std_dev = np.std(series) 6 | outliers_cutoff = std_dev * num_std_devs 7 | lower_limit = mean - outliers_cutoff 8 | upper_limit = mean + outliers_cutoff 9 | 10 | no_outliers = series.apply(lambda x: mean if x > upper_limit or x < lower_limit else x) 11 | 12 | return no_outliers -------------------------------------------------------------------------------- /website/pages/_app.js: -------------------------------------------------------------------------------- 1 | import "tailwindcss/tailwind.css"; 2 | 3 | import TagManager from "react-gtm-module"; 4 | import { useEffect } from "react"; 5 | 6 | function MyApp({ Component, pageProps }) { 7 | const tagManagerArgs = { 8 | gtmId: "GTM-P24ZPZM", 9 | }; 10 | useEffect(() => { 11 | TagManager.initialize(tagManagerArgs); 12 | }, []); 13 | 14 | return ; 15 | } 16 | 17 | export default MyApp; 18 | -------------------------------------------------------------------------------- /src/hommmer/cleaners/date_range_dummies.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def date_range_dummies(df): 4 | dr = pd.date_range(start=df['start'].min(), end=df['end'].max()) 5 | 6 | date_df = pd.DataFrame({'date': dr}) 7 | 8 | for _, row in df.iterrows(): 9 | date_df[row[2]] = (date_df['date'] >= row[0]) & (date_df['date'] <= row[1]) 10 | 11 | date_df.iloc[:, 1:] = date_df.iloc[:, 1:].astype(int) 12 | return date_df -------------------------------------------------------------------------------- /src/hommmer/cleaners/group_weekly.py: -------------------------------------------------------------------------------- 1 | def group_weekly(df, date_col): 2 | weekly = df.copy() 3 | weekly['week'] = weekly[date_col].dt.isocalendar().week 4 | weekly['year'] = weekly[date_col].dt.isocalendar().year 5 | weekly['year_week'] = weekly['year'].astype(str) + "-" + weekly['week'].astype(str) 6 | weekly = weekly.groupby('year_week').sum() 7 | weekly.drop(['week', 'year'], axis=1, inplace=True) 8 | weekly.reset_index(inplace=True) 9 | return weekly -------------------------------------------------------------------------------- /src/hommmer/datasets/make_dates.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | import pandas as pd 3 | 4 | def make_dates(days=180, end_date=None): 5 | if end_date: 6 | end_date = dt.datetime.strptime(end_date, "%Y-%m-%d") 7 | else: 8 | end_date = dt.datetime.today() 9 | 10 | start_date = end_date - dt.timedelta(days-1) 11 | dates = pd.date_range(start_date, periods=days, freq='D') 12 | dates = pd.Series(dates.strftime("%Y-%m-%d")) 13 | return dates -------------------------------------------------------------------------------- /src/hommmer/cleaners/group_monthly.py: -------------------------------------------------------------------------------- 1 | def group_monthly(df, date_col): 2 | monthly = df.copy() 3 | monthly['month'] = monthly[date_col].dt.month 4 | monthly['year'] = monthly[date_col].dt.isocalendar().year 5 | monthly['year_month'] = monthly['year'].astype(str) + "-" + monthly['month'].astype(str) 6 | monthly = monthly.groupby('year_month').sum() 7 | monthly.drop(['month', 'year'], axis=1, inplace=True) 8 | monthly.reset_index(inplace=True) 9 | return monthly -------------------------------------------------------------------------------- /src/hommmer/cleaners/transpose_data.py: -------------------------------------------------------------------------------- 1 | def transpose_data(df, date_col=None): 2 | if date_col is None: 3 | date_col = df.columns[0] 4 | 5 | transposed = df.T.copy() 6 | transposed.columns = transposed.iloc[0] 7 | transposed.drop(transposed.index[0], inplace=True) 8 | transposed.reset_index(inplace=True) 9 | transposed.rename(columns={"index": date_col}, inplace=True) 10 | transposed = transposed.rename_axis(None, axis = 1) 11 | return transposed -------------------------------------------------------------------------------- /src/hommmer/connectors/nasa_weather.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def nasa_weather(df): 4 | year = df['YEAR'].astype(str) 5 | month = df['MO'].astype(str) 6 | day = df['DY'].astype(str) 7 | 8 | month = month.apply(lambda x: '0'+x if len(x) == 1 else x) 9 | day = day.apply(lambda x: '0'+x if len(x) == 1 else x) 10 | 11 | df['date'] = pd.to_datetime(year + "-" + month + "-" + day) 12 | df = df[['date', 'T2M_RANGE', 'T2M_MAX', 'T2M_MIN', 'T2M']] 13 | 14 | return df -------------------------------------------------------------------------------- /website/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "private": true, 3 | "scripts": { 4 | "dev": "next dev", 5 | "build": "next build", 6 | "start": "next start" 7 | }, 8 | "dependencies": { 9 | "@heroicons/react": "^1.0.5", 10 | "next": "^12.0.4", 11 | "react": "^17.0.2", 12 | "react-dom": "^17.0.2", 13 | "react-gtm-module": "^2.0.11" 14 | }, 15 | "devDependencies": { 16 | "autoprefixer": "^10.2.6", 17 | "postcss": "^8.3.5", 18 | "tailwindcss": "^2.2.4" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/hommmer/cleaners/clean_numeric.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def clean_numeric(series): 4 | series = series.fillna(0) 5 | series = series.astype(str) 6 | series = series.apply(lambda x: x.replace(',','')) 7 | series = series.apply(lambda x: x.replace('$','')) 8 | series = series.apply(lambda x: x.replace('£','')) 9 | series = series.apply(lambda x: x.replace('€','')) 10 | series = series.apply(lambda x: x.replace('%','')) 11 | series = pd.to_numeric(series) 12 | 13 | return series -------------------------------------------------------------------------------- /src/hommmer/metrics/mase.py: -------------------------------------------------------------------------------- 1 | from sklearn import metrics 2 | import numpy as np 3 | # https://github.com/CamDavidsonPilon/Python-Numerics/blob/master/TimeSeries/MASE.py 4 | # https://medium.com/@ashishdce/mean-absolute-scaled-error-mase-in-forecasting-8f3aecc21968 5 | 6 | def mase(y_train, y_test, y_pred): 7 | # mean absolute scaled error 8 | n = y_train.shape[0] 9 | naive = np.abs(np.diff(y_train).sum()/(n-1)) 10 | mae = metrics.mean_absolute_error(y_test, y_pred) 11 | 12 | return round(mae/naive,3) -------------------------------------------------------------------------------- /src/hommmer/cleaners/merge_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def merge_data(left_df, right_df, date_col="date"): 4 | 5 | # get clean copies of data with date format 6 | left_df = left_df.copy() 7 | left_df[date_col] = pd.to_datetime(left_df[date_col]) 8 | 9 | right_df = right_df.copy() 10 | right_df[date_col] = pd.to_datetime(right_df[date_col]) 11 | 12 | # join data together 13 | merged_df = left_df.merge(right_df, on=date_col, how='left') 14 | merged_df.fillna(0, inplace=True) 15 | return merged_df -------------------------------------------------------------------------------- /website/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | # misc 19 | .DS_Store 20 | *.pem 21 | 22 | # debug 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | 27 | # local env files 28 | .env.local 29 | .env.development.local 30 | .env.test.local 31 | .env.production.local 32 | 33 | # vercel 34 | .vercel 35 | -------------------------------------------------------------------------------- /website/tailwind.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | mode: "jit", 3 | purge: ["./pages/**/*.{js,ts,jsx,tsx}", "./components/**/*.{js,ts,jsx,tsx}"], 4 | darkMode: false, // or 'media' or 'class' 5 | theme: { 6 | extend: { 7 | visibility: ["hover"], 8 | colors: { 9 | "pants-blue": "#70d1fe", 10 | "skin-yellow": "#fed90f", 11 | "stubble-brown": "#d1b271", 12 | "shoe-black": "#424f46", 13 | "donut-pink": "#ff66ff", 14 | }, 15 | }, 16 | }, 17 | variants: { 18 | extend: {}, 19 | }, 20 | plugins: [], 21 | }; 22 | -------------------------------------------------------------------------------- /src/hommmer/cleaners/categorize_campaigns.py: -------------------------------------------------------------------------------- 1 | def categorize_campaigns(campaign_name, categories=None): 2 | if categories is None: 3 | categories = { 4 | "prospecting": ['prosp'], 5 | "remarketing": ['remar', 'retar'] 6 | } 7 | 8 | campaign_name = campaign_name.lower() 9 | 10 | campaign_category = "uncategorized" 11 | 12 | for category, containing in categories.items(): 13 | for text in containing: 14 | if text in campaign_name: 15 | campaign_category = category 16 | 17 | return campaign_category -------------------------------------------------------------------------------- /src/hommmer/cleaners/shift_dummies.py: -------------------------------------------------------------------------------- 1 | def shift_dummies(df, col, shift): 2 | shift_cols = [] 3 | shift_sign = "+" 4 | if shift < 0: 5 | shift_sign = "-" 6 | 7 | for t in range(shift): 8 | col_name = f"{col} t{shift_sign}{abs(t)}" 9 | df[col_name] = df[col].shift(t) 10 | shift_cols.append(col_name) 11 | 12 | if shift_sign == "+": 13 | prefix = "post" 14 | else: 15 | prefix = "pre" 16 | 17 | col_name = f"{prefix}-{col} {shift_sign}{abs(t)}" 18 | df[col_name] = (df[shift_cols].sum(axis=1) > 0).astype(int) 19 | shift_cols.push(col_name) 20 | return shift_cols -------------------------------------------------------------------------------- /src/hommmer/features/loss_function.py: -------------------------------------------------------------------------------- 1 | def loss_function(X_values, X_media, X_org): 2 | # X_media = { 3 | # "labels": ["facebook", "tiktok"], 4 | # "coefs": [6.454, 1.545], 5 | # "drs": [0.6, 0.7] 6 | # } 7 | # X_org = { 8 | # "labels": ["const"], 9 | # "coefs": [-27.5], 10 | # "values": [1] 11 | # } 12 | y = 0 13 | for i in range(len(X_values)): 14 | transform = X_values[i] ** X_media["drs"][i] 15 | contrib = X_media["coefs"][i] * transform 16 | y += contrib 17 | 18 | for i in range(len(X_org)): 19 | contrib = X_org["coefs"][i] * X_org["values"][i] 20 | y += contrib 21 | 22 | return -y -------------------------------------------------------------------------------- /src/hommmer/helpers/check_metric.py: -------------------------------------------------------------------------------- 1 | from hommmer.metrics import * 2 | 3 | def check_metric(metric_label, model): 4 | X_test = model.X_test 5 | y_test = model.y_test 6 | y_pred = model.predict(X=X_test) 7 | 8 | if metric_label == 'nrmse': 9 | return nrmse(y_test, y_pred) 10 | elif metric_label == 'rsquared': 11 | return rsquared(y_test, y_pred) 12 | elif metric_label == 'decomp-rssd': 13 | contrib_df = model.contribution()[model.media_labels] 14 | media_X_df = model.X_actual[model.media_labels] 15 | return decomp_rssd(effect_share(contrib_df), spend_share(media_X_df)) 16 | elif metric_label == 'cond-no': 17 | return condition_number(model.X_train) 18 | elif metric_label == 'mape': 19 | return mape(y_test, y_pred) -------------------------------------------------------------------------------- /src/hommmer/connectors/search_trends.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import datetime as dt 3 | 4 | def search_trends(df): 5 | # delete any '<' signs for low volume days 6 | for c in df.select_dtypes(include=['object']).columns[1:]: 7 | df[c] = df[c].str.replace('<', '') 8 | df[c] = pd.to_numeric(df[c]) 9 | 10 | date_col = df.columns[0] 11 | df[date_col] = pd.to_datetime(df[date_col]) 12 | df.set_index(date_col, inplace=True) 13 | df_reindexed = df.reindex(pd.date_range(start=df.index.min(), 14 | end=df.index.max() + dt.timedelta(days=6), freq='1D')) 15 | df = df_reindexed.interpolate(method='linear') 16 | df = df.round(1) 17 | df.reset_index(inplace=True) 18 | df.rename({'index': 'date'}, axis=1, inplace=True) 19 | return df -------------------------------------------------------------------------------- /src/hommmer/features/optimizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.optimize as sco 3 | 4 | from .loss_function import loss_function 5 | 6 | def optimizer(X_media, X_org, budget): 7 | args = (X_media, X_org) # pass non-optimized values into model_function 8 | len_X_media = len(X_media['labels']) 9 | guesses = len_X_media*[budget/len_X_media,] # starting guesses: divide budget evenly 10 | con_1 = {'type': 'eq', 'fun': lambda X: np.sum(X) - budget} # so we can't go over budget 11 | constraints = (con_1) 12 | bound = (0, budget) # spend for a channel can't be negative or higher than budget 13 | bounds = tuple(bound for x in range(len_X_media)) 14 | solution = sco.minimize(loss_function, x0=guesses, args=args, method='SLSQP', constraints=constraints, bounds=bounds) 15 | return (-1 * solution.fun), solution.x -------------------------------------------------------------------------------- /src/hommmer/cleaners/interpolate_dates.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def interpolate_dates(df, date_col=None): 5 | data = df.copy() 6 | if date_col is None: 7 | date_col = data.columns[0] 8 | 9 | data[date_col] = pd.to_datetime(data[date_col]) 10 | 11 | dr = pd.date_range(start=data[date_col].min(), end=data[date_col].max(), freq='1D') 12 | 13 | date_df = pd.DataFrame({f'{date_col}': dr}) 14 | 15 | merged = date_df.merge(data, how='left', on=date_col) 16 | reindexed = merged.set_index(date_col) 17 | 18 | reindexed.replace({0: np.nan}, inplace=True) 19 | resampled = reindexed.interpolate(method='linear') 20 | resampled = resampled.reset_index() 21 | resampled.rename({'index': date_col}, axis=1, inplace=True) 22 | resampled.fillna(0, inplace=True) 23 | return resampled -------------------------------------------------------------------------------- /src/hommmer/cleaners/week_commencing.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | 3 | def week_commencing(date_str=None, date_format="%Y-%m-%d"): 4 | # https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes 5 | 6 | # if no date supplied, default to today 7 | if date_str is None: 8 | today = dt.datetime.today() 9 | date_str = today.strftime(date_format) 10 | 11 | # parse the date string into a datetime object 12 | date = dt.datetime.strptime(date_str, date_format) 13 | 14 | # get the year and week number from the datetime 15 | year_week = dt.datetime.strftime(date, "%Y-%W") 16 | 17 | # hack to get the monday of the week 18 | monday = dt.datetime.strptime(f"{year_week}-1", "%Y-%W-%w") 19 | 20 | # return the monday date in the same format 21 | return dt.datetime.strftime(monday, date_format) -------------------------------------------------------------------------------- /src/hommmer/features/rfe.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LinearRegression 2 | from sklearn.feature_selection import RFE 3 | import pandas as pd 4 | 5 | def rfe(df, y_label, X_labels, max_features=None): 6 | if max_features is None: 7 | # A rule-of-thumb for a minimum number of data points for a stable linear regression 8 | # are 7-10 data points per parameter. 9 | # https://storage.googleapis.com/pub-tools-public-publication-data/pdf/2d0395bc7d4d13ddedef54d744ba7748e8ba8dd1.pdf 10 | max_features = max(round(df.shape[0]/7),1) 11 | 12 | rfe = RFE(LinearRegression(), n_features_to_select=max_features).fit(df[X_labels], df[y_label]) 13 | rfe_keep = pd.Series(rfe.support_) 14 | rfe_keep.index = X_labels 15 | 16 | rfe_df = pd.DataFrame({'rfe_keep': rfe_keep}) 17 | rfe_df['rfe_ranking'] = rfe.ranking_ 18 | return rfe_keep, rfe_df -------------------------------------------------------------------------------- /src/hommmer/features/__init__.py: -------------------------------------------------------------------------------- 1 | from .bfe import bfe 2 | from .loss_function import loss_function 3 | from .optimizer import optimizer 4 | from .delayed_adstock import delayed_adstock 5 | from .geometric_adstock import geometric_adstock 6 | from .hill_saturation import hill_saturation 7 | from .power_saturation import power_saturation 8 | from .s_curve_saturation import s_curve_saturation 9 | from .scaled_saturation import scaled_saturation 10 | from .weibull_adstock import weibull_adstock 11 | from .weibull_adstock_delayed import weibull_adstock_delayed 12 | from .interaction import interaction 13 | from .lag import lag 14 | from .vif import vif 15 | from .ffe import ffe 16 | from .bif import bif 17 | from .rfe import rfe 18 | # from .prophet_seasonality import prophet_seasonality 19 | from .seasonal_decomp import seasonal_decomp 20 | from .normalize import normalize 21 | from .denormalize import denormalize -------------------------------------------------------------------------------- /src/hommmer/cleaners/holiday_dummies.py: -------------------------------------------------------------------------------- 1 | import re 2 | import pandas as pd 3 | 4 | from hommmer.datasets import load_holidays 5 | 6 | def holiday_dummies(start, end, country="US", brackets=False): 7 | all_holidays = load_holidays(download=False) 8 | country_holidays = all_holidays[all_holidays['country'] == country] 9 | if brackets == False: 10 | country_holidays['holiday'] = country_holidays['holiday'].apply( 11 | lambda x: re.sub(' [\[\(].*[\]\)]','', x)) 12 | 13 | dr = pd.date_range(start=start, end=end) 14 | date_df = pd.DataFrame({'ds': dr}) 15 | for _, row in country_holidays.iterrows(): 16 | if row[1] in date_df.columns: 17 | date_df[row[1]] = date_df[row[1]] | (date_df['ds'] == row[0]) 18 | else: 19 | date_df[row[1]] = (date_df['ds'] == row[0]) 20 | 21 | date_df.iloc[:, 1:] = date_df.iloc[:, 1:].astype(int) 22 | return date_df 23 | 24 | -------------------------------------------------------------------------------- /src/hommmer/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .nrmse import nrmse 2 | from .rsquared import rsquared 3 | from .breuschpagan import breuschpagan 4 | from .condition_number import condition_number 5 | from .durbin_watson import durbin_watson 6 | from .jarque_bera import jarque_bera 7 | from .ljungbox import ljungbox 8 | from .mae import mae 9 | from .mape import mape 10 | from .mse import mse 11 | from .rmse import rmse 12 | from .degrees_of_freedom import degrees_of_freedom 13 | from .decomp_rssd import decomp_rssd 14 | from .harvey_collier import harvey_collier 15 | from .rainbox import rainbox 16 | from .vars_obs import vars_obs 17 | from .mdape import mdape 18 | from .smape import smape 19 | from .mda import mda 20 | from .mase import mase 21 | from .mfe import mfe 22 | from .log_accuracy_ratio import log_accuracy_ratio 23 | from .max_error import max_error 24 | from .dummy_constant import dummy_constant 25 | from .dummy_mean import dummy_mean 26 | from .dummy_median import dummy_median 27 | from .effect_share import effect_share 28 | from .spend_share import spend_share -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """The setup script.""" 2 | import pathlib 3 | from setuptools import setup, find_packages 4 | HERE = pathlib.Path(__file__).parent 5 | VERSION = "0.0.0.4" 6 | PACKAGE_NAME = "hommmer" 7 | AUTHOR = "mike taylor" 8 | AUTHOR_EMAIL = "mike@saxifrage.xyz" 9 | URL = "https://github.com/hammer-mt/hommmer" 10 | LICENSE = "MIT" 11 | DESCRIPTION = "A simple Marketing Mix Modeling library in Python" 12 | LONG_DESCRIPTION = (HERE / "README.md").read_text(encoding='utf8') 13 | LONG_DESC_TYPE = "text/markdown" 14 | INSTALL_REQUIRES = [ 15 | "numpy", 16 | "pandas", 17 | "matplotlib", 18 | "statsmodels", 19 | "typing", 20 | "sklearn", 21 | "seaborn", 22 | # "prophet" 23 | ] 24 | setup(name=PACKAGE_NAME, 25 | version=VERSION, 26 | description=DESCRIPTION, 27 | long_description=LONG_DESCRIPTION, 28 | long_description_content_type=LONG_DESC_TYPE, 29 | author=AUTHOR, 30 | license=LICENSE, 31 | author_email=AUTHOR_EMAIL, 32 | url=URL, 33 | install_requires=INSTALL_REQUIRES, 34 | package_dir={"": "src"}, 35 | packages=find_packages(where="src") 36 | ) 37 | -------------------------------------------------------------------------------- /src/hommmer/features/vif.py: -------------------------------------------------------------------------------- 1 | from statsmodels.stats.outliers_influence import variance_inflation_factor 2 | import numpy as np 3 | import pandas as pd 4 | np.seterr(divide='ignore', invalid='ignore') # hide error warning for vif 5 | 6 | def vif(df, X_labels, max_vif=5): 7 | # Variance Inflation Factor (VIF) 8 | # tests for colinearity: A VIF of over 10 for some feature indicates that over 90% 9 | # of the variance in that feature is explained by the remaining features. Over 100 10 | # indicates over 99%. Best practice is to keep variables with a VIF less than 5. 11 | 12 | X = df[X_labels] 13 | X_np = np.array(X) 14 | 15 | vif_results = [(X.columns[i], variance_inflation_factor(X_np, i)) for i in range(X_np.shape[1])] 16 | vif_df = pd.DataFrame(vif_results) 17 | vif_df.columns = ['idx', 'vif'] 18 | vif_df.index = vif_df['idx'] 19 | vif_df.drop(['idx'], axis=1, inplace=True) 20 | vif_df.index.name = None 21 | vif_df['vif_keep'] = vif_df['vif'] < max_vif 22 | 23 | vif_keep = list(vif_df[vif_df['vif_keep']==True].index.values) 24 | 25 | return vif_keep, vif_df -------------------------------------------------------------------------------- /src/hommmer/connectors/colab_helpers.py: -------------------------------------------------------------------------------- 1 | ### NOTE: these functions only work in Google Colab 2 | 3 | def save_local(df, file_name='abt'): 4 | from google.colab import files 5 | file_name = file_name + '.csv' 6 | df.to_csv(file_name, index=False) 7 | files.download(file_name) 8 | 9 | 10 | def upload_local(): 11 | from google.colab import files 12 | uploaded = files.upload() 13 | return uploaded 14 | 15 | 16 | def load_gsheet(url, offset=None): 17 | from google.colab import auth 18 | import gspread 19 | from oauth2client.client import GoogleCredentials 20 | import pandas as pd 21 | # authorize google sheets 22 | auth.authenticate_user() 23 | 24 | gc = gspread.authorize(GoogleCredentials.get_application_default()) 25 | 26 | spreadsheet = gc.open_by_url(url) 27 | sheet = spreadsheet.get_worksheet(0) 28 | 29 | if offset is None: 30 | df = pd.DataFrame(sheet.get_all_records()) 31 | else: 32 | df = pd.DataFrame(sheet.get_all_values()[offset:]) 33 | df.columns = df.iloc[0] 34 | df.drop([0], inplace=True) 35 | 36 | return df -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Michael Taylor 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/hommmer/datasets/make_data.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import make_regression 2 | import pandas as pd 3 | 4 | from .make_dates import make_dates 5 | 6 | # generate regression dataset 7 | def make_data(target_name="y", num_variables=5, num_significant=4, num_observations=180, noise=30): 8 | # Make sure not more significant than variables 9 | if num_significant > num_variables: 10 | num_significant = num_variables 11 | 12 | # Generate the regression data 13 | features, target = make_regression(n_samples=num_observations, 14 | n_features=num_variables, 15 | n_informative=num_significant, 16 | n_targets=1, 17 | noise=noise) 18 | 19 | variable_names = ['x'+str(i) for i in range(len(num_variables))] 20 | 21 | # Create dataframe 22 | df = pd.DataFrame(features, columns=variable_names) 23 | 24 | # Add target data 25 | df[target_name] = target 26 | 27 | # Add dates 28 | df['date'] = make_dates(days=num_observations) 29 | 30 | return df 31 | -------------------------------------------------------------------------------- /src/hommmer/cleaners/unstack_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def unstack_data(df, metric_column, unstack_column, date_column='date'): 5 | 6 | # make a copy of the date with just the columns we need 7 | data = df[[date_column, metric_column, unstack_column]].copy() 8 | 9 | # convert the metric column to numeric 10 | data[metric_column] = pd.to_numeric(data[metric_column]) 11 | 12 | # pivot the data set 13 | pivoted = pd.pivot_table(data, index=[date_column], values=[metric_column], columns=[unstack_column], aggfunc=[np.sum]) 14 | 15 | # drop level and reset index 16 | pivoted.columns = pivoted.columns.droplevel(0) 17 | pivoted.columns.name = None 18 | pivoted = pivoted.reset_index() 19 | pivoted.columns = [col[1] for col in pivoted.columns] 20 | 21 | # rename unstacked metric columns 22 | metric_columns = list(pivoted.columns[1:]) 23 | metric_columns = [f"{c} | {metric_column}" for c in metric_columns] 24 | pivoted.columns = [date_column] + metric_columns 25 | 26 | # replace errors with zeros 27 | pivoted.fillna(0, inplace=True) 28 | 29 | # return the pivoted data 30 | return pivoted -------------------------------------------------------------------------------- /src/hommmer/models/Ridge.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import statsmodels.api as sm 3 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python 4 | from sklearn.linear_model import Ridge as SKRidge # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html 5 | 6 | from .Model import Model 7 | 8 | class Ridge(Model): 9 | def __init__(self, y, X, media_labels, settings): 10 | # inheritance and start timer 11 | super().__init__(y, X, media_labels, settings, "Ridge") 12 | start = timer() 13 | 14 | # fit the model 15 | self._model = self._fit() 16 | 17 | # init required properties 18 | self.coefficients = self._coefficients() 19 | 20 | # finish running 21 | end = timer() 22 | self.runtime = end - start # Time in seconds, e.g. 5.38091952400282 23 | 24 | # log model locally 25 | self._save() 26 | 27 | ### EDIT BELOW HERE ### 28 | 29 | # fit the model 30 | def _fit(self): 31 | return SKRidge(alpha=0.01, fit_intercept=False).fit(self.X_train, self.y_train) 32 | 33 | # get the coefficients 34 | def _coefficients(self): 35 | return self._model.coef_ 36 | -------------------------------------------------------------------------------- /src/hommmer/metrics/decomp_rssd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # https://github.com/facebookexperimental/Robyn/issues/82#issuecomment-845846447 3 | # https://github.com/facebookexperimental/Robyn/issues/110 4 | # https://github.com/facebookexperimental/Robyn/blob/dbd8d1f0e640265d5c0a1c3750e51ccf5e3e117d/source/fb_robyn.func.R#L1177 5 | # https://github.com/facebookexperimental/Robyn/issues/95 6 | 7 | # decomposition distance (DECOMP.RSSD, decomposition root-sum-square distance, a major innovation of Facebook Robyn 8 | # The intuition is this: assuming you're spending 90% on TV and 10% on FB. If you get 10% effect for TV and 90% for FB, 9 | # you'd probably not believe this result, no matter how low the model error (NRMSE) is. If you get 80% TV and 20% FB as 10 | # effect share, it'll more "realistic". This is where the logic is from: minimising the distance between share of spend 11 | # and share of effect. It's really about getting rid of the very extreme cases and have a set of results that are more realistic. 12 | 13 | # decomposition root sum of squared distance 14 | def decomp_rssd(effect_share, spend_share): 15 | value = round(np.sqrt(sum((np.array(effect_share)-np.array(spend_share))**2)),3) 16 | passed = "✔️" if value < 0.5 else "❌" 17 | return value, passed 18 | 19 | -------------------------------------------------------------------------------- /src/hommmer/cleaners/interpolate_weekly.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import datetime as dt 4 | 5 | def interpolate_weekly(df, date_col=None, resample_col=None): 6 | 7 | if date_col == None: 8 | date_col = df.columns[0] 9 | 10 | if resample_col == None: 11 | resample_col = df.columns[1] 12 | 13 | data = df[[date_col, resample_col]].copy() 14 | 15 | data[date_col] = data[date_col].apply(lambda x: dt.datetime.strptime(f"{x}-1", "%Y-%W-%w")) # mondays 16 | data[date_col] = pd.to_datetime(data[date_col]) # datetime 17 | data.set_index(date_col, inplace=True) 18 | data_reindexed = data.reindex(pd.date_range(start=data.index.min(), 19 | end=data.index.max() + dt.timedelta(days=6), 20 | freq='1D')) 21 | 22 | col_to_resample = data_reindexed.columns[0] 23 | data_reindexed[col_to_resample] = pd.to_numeric(data_reindexed[col_to_resample]) 24 | data_reindexed[col_to_resample].replace({0:np.nan}, inplace=True) 25 | interpolated = data_reindexed.interpolate(method='linear') 26 | interpolated = interpolated / 7 27 | interpolated.reset_index(inplace=True) 28 | interpolated.rename({'index': 'date'}, axis=1, inplace=True) 29 | 30 | return interpolated -------------------------------------------------------------------------------- /src/hommmer/features/seasonal_decomp.py: -------------------------------------------------------------------------------- 1 | from statsmodels.tsa.seasonal import seasonal_decompose 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | # https://juanitorduz.github.io/fb_prophet/ 5 | 6 | def seasonal_decomp(df, target_col, date_col, freq="D"): 7 | # freq == "W-Mon" or "W-Sun" or "D" 8 | pred_df = df[[date_col, target_col]].copy() 9 | pred_df.rename(columns={date_col:'ds', target_col:'y'}, inplace=True) 10 | pred_df = pred_df.set_index('ds').asfreq(freq) 11 | decomp_obj = seasonal_decompose( 12 | x=pred_df['y'], 13 | model='additive' 14 | ) 15 | fig, ax = plt.subplots(4, 1, figsize=(12, 12)) 16 | 17 | # Observed time series. 18 | decomp_obj.observed.plot(ax=ax[0]) 19 | ax[0].set(title='observed') 20 | # Trend component. 21 | decomp_obj.trend.plot(label='fit', ax=ax[1]) 22 | ax[1].set(title='trend') 23 | # Seasonal component. 24 | decomp_obj.seasonal.plot(label='fit', ax=ax[2]) 25 | ax[2].set(title='seasonal') 26 | # Residual. 27 | decomp_obj.resid.plot(label='fit', ax=ax[3]) 28 | ax[3].set(title='resid') 29 | 30 | fig.suptitle('Time Series Decomposition', y=1.01) 31 | plt.tight_layout() 32 | decomp_df = pd.DataFrame([decomp_obj.observed, decomp_obj.trend, decomp_obj.seasonal, decomp_obj.resid]) 33 | decomp_df.fillna(0, inplace=True) 34 | return decomp_df -------------------------------------------------------------------------------- /src/hommmer/features/prophet_seasonality.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from fbprophet import Prophet 4 | from prophet.diagnostics import performance_metrics, cross_validation 5 | 6 | def prophet_seasonality(df, target_col, date_col, country="US", p=30, freq="D"): 7 | pred_df = df[[date_col, target_col]].copy() 8 | pred_df.rename(columns={date_col:'ds', target_col:'y'}, inplace=True) 9 | daily = True if freq == "D" else False 10 | m = Prophet(yearly_seasonality=True,weekly_seasonality=daily,seasonality_mode='multiplicative') #instantiate Prophet 11 | m.add_country_holidays(country_name=country) 12 | 13 | #fit the model 14 | m.fit(pred_df) 15 | 16 | # predict the future 17 | future = m.make_future_dataframe(periods=p, freq = 'D') 18 | 19 | #use the data in the future dataframe to predict y and insert it into a new dataframe 20 | forecast = m.predict(future) 21 | 22 | #let's see how the prediction worked 23 | m.plot(forecast, figsize=(40,10)) 24 | 25 | #let's see how the seasonality worked to predict the y 26 | m.plot_components(forecast) 27 | 28 | # Cross validate your performances 29 | df_cv = cross_validation(m, initial='400 days', period='200 days', horizon = '60 days') 30 | #define how many days of prediction have the lowest mape 31 | 32 | df_p = performance_metrics(df_cv) 33 | 34 | return forecast, df_p 35 | -------------------------------------------------------------------------------- /src/hommmer/charts/accuracy.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | import matplotlib.ticker as mtick 4 | 5 | def accuracy(y_actual, y_pred, accuracy=None): 6 | # set up figure and subplots 7 | fig, ax = plt.subplots(figsize=(14,8), nrows=2, ncols=1, gridspec_kw={'height_ratios': [3, 1]}) 8 | 9 | # create plot df 10 | plot_df = pd.DataFrame() 11 | plot_df['Actual'] = y_actual 12 | plot_df['Predicted'] = y_pred 13 | plot_df['Error'] = (y_pred - y_actual) / y_actual * 100 14 | 15 | # plot actual vs predicted on grid 16 | plot_df[['Actual', 'Predicted']].plot(ax=ax[0], ylabel=y_actual.name) 17 | 18 | if accuracy: 19 | ax[0].annotate(f'{accuracy[0]} = {accuracy[1]}', xy=(0.05, 0.92), xycoords='axes fraction') 20 | 21 | ax[0].legend(loc="upper center", bbox_to_anchor=(0.5, 1.12), ncol=2) 22 | ax[0].grid(True, which='both') 23 | 24 | # plot error on grid 25 | plot_df[['Error']].plot(ax=ax[1], color='red') 26 | ax[1].grid(True, which='both') 27 | ax[1].legend(loc="upper center", bbox_to_anchor=(0.5, 1.35), ncol=2) 28 | fmt = '%.0f%%' # Format you want the ticks, e.g. '40%' 29 | yticks = mtick.FormatStrFormatter(fmt) 30 | ax[1].yaxis.set_major_formatter(yticks) 31 | 32 | # show plots 33 | fig.autofmt_xdate(rotation=45) 34 | plt.gcf().suptitle("Actual vs Predicted", fontsize=20) 35 | 36 | plt.show() -------------------------------------------------------------------------------- /src/hommmer/features/delayed_adstock.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def delayed_adstock(x, L, P, D): 4 | ''' 5 | params: 6 | x: original media variable, array 7 | L: length 8 | P: peak, delay in effect 9 | D: decay, retain rate 10 | returns: 11 | array, adstocked media variable 12 | ''' 13 | # https://github.com/sibylhe/mmm_stan#12-adstock 14 | # prepend x with zeros equal to the length -1 15 | x = np.append(np.zeros(L-1), x) 16 | 17 | # create an array of zeros equal to the length 18 | weights = np.zeros(L) 19 | 20 | # loop through each day in length 21 | for l in range(L): 22 | # weight is decay to the power of index - peak squared 23 | weight = D**((l-P)**2) 24 | # add weight to weights in the right place (from back to front) 25 | weights[L-1-l] = weight 26 | 27 | # create an empty list 28 | adstocked_x = [] 29 | # loop through length - 1 up to len(x) 30 | for i in range(L-1, len(x)): 31 | # get array of x from index - length + 1 to index + 1 32 | x_array = x[i-L+1:i+1] 33 | # sum the x_array * weights / sum(weights) to get adstock value 34 | xi = sum(x_array * weights)/sum(weights) 35 | # append adstocked value to adstocked_x 36 | adstocked_x.append(xi) 37 | 38 | # convert adstocked_x into an np.array 39 | adstocked_x = np.array(adstocked_x) 40 | 41 | # return adstocked_x 42 | return adstocked_x -------------------------------------------------------------------------------- /src/hommmer/cleaners/interpolate_monthly.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def interpolate_monthly(df, date_col=None, resample_col=None): 5 | 6 | if date_col == None: 7 | date_col = df.columns[0] 8 | 9 | if resample_col == None: 10 | resample_col = df.columns[1] 11 | 12 | data = df[[date_col, resample_col]].copy() 13 | 14 | data[date_col] = pd.to_datetime(data[date_col], format="%Y-%m") 15 | data['start_of_month'] = (data[date_col].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1)) 16 | data['end_of_month'] = pd.to_datetime(data['start_of_month']) + pd.offsets.MonthEnd(1) 17 | data['days_in_month'] = (data['end_of_month'] - data['start_of_month']).dt.days + 1 18 | data[resample_col] = data[resample_col] / data['days_in_month'] 19 | data['date'] = data['start_of_month'] 20 | 21 | dr = pd.date_range(start=data.start_of_month.min(), 22 | end=data.end_of_month.max(), 23 | freq='1D') 24 | date_df = pd.DataFrame({'date': dr}) 25 | merged = date_df.merge(data, how='left', on='date') 26 | reindexed = merged.set_index('date') 27 | 28 | resampled = reindexed[resample_col] 29 | resampled.replace({0:np.nan}, inplace=True) 30 | resampled = resampled.interpolate(method='linear') 31 | resampled = resampled.reset_index() 32 | resampled.rename({'index': 'date'}, axis=1, inplace=True) 33 | resampled.fillna(0, inplace=True) 34 | return resampled -------------------------------------------------------------------------------- /src/hommmer/models/Linear.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import statsmodels.api as sm 3 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python 4 | 5 | from .Model import Model 6 | 7 | class Linear(Model): 8 | def __init__(self, y, X, media_labels, settings): 9 | # inheritance and start timer 10 | super().__init__(y, X, media_labels, settings, "Linear") 11 | start = timer() 12 | 13 | # fit the model 14 | self._model = self._fit() 15 | 16 | # init required properties 17 | self.coefficients = self._coefficients() 18 | 19 | # finish running 20 | end = timer() 21 | self.runtime = end - start # Time in seconds, e.g. 5.38091952400282 22 | 23 | # log model locally 24 | self._save() 25 | 26 | ### EDIT BELOW HERE ### 27 | 28 | # fit the model 29 | def _fit(self): 30 | return sm.OLS(self.y_train, self.X_train).fit() 31 | 32 | # get the coefficients 33 | def _coefficients(self): 34 | return self._model.params.values 35 | 36 | # get the pvalues 37 | def _pvalues(self): 38 | return self._model.pvalues 39 | 40 | # calculate the confidence intervals 41 | def _confidence_intervals(self): 42 | conf_int_df = self._model.conf_int() 43 | conf_int_df.columns = ["lower", "upper"] 44 | conf_int_df['uncertainty'] = (conf_int_df["upper"] - conf_int_df["lower"]) / np.mean(self.y_train) * 100 45 | return conf_int_df 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /src/hommmer/features/weibull_adstock.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # https://github.com/annalectnl/weibull-adstock/blob/master/adstock_weibull_annalect.pdf 4 | # https://towardsdatascience.com/python-stan-implementation-of-multiplicative-marketing-mix-model-with-deep-dive-into-adstock-a7320865b334 5 | def weibull_adstock(x, window, k): 6 | ''' 7 | params: 8 | x: original media variable, array 9 | window: length 10 | k: shape 11 | returns: 12 | array, adstocked media variable 13 | ''' 14 | # prepend x with zeros equal to the window - 1 15 | x = np.append(np.zeros(window-1), x) 16 | 17 | # create an array of zeros equal to the window 18 | weights = np.zeros(window) 19 | 20 | # lambda is window / (-ln(0.001)) to the power of 1/k 21 | lam = window / (-np.log(0.001))**(1/k) 22 | 23 | # loop through each day in window 24 | for l in range(window): 25 | # weight is minus lag/lambda to the power of k exponentiated 26 | weight = np.exp(-(l/lam)**k) 27 | # add weight to weights in the right place (from front to back) 28 | weights[window-1-l] = weight 29 | 30 | # create an empty list 31 | adstocked_x = [] 32 | # loop through window - 1 up to len(x) 33 | for i in range(window-1, len(x)): 34 | # get array of x from index - length + 1 to index + 1 35 | x_array = x[i-window+1:i+1] 36 | # sum the x_array * weights / sum(weights) to get adstock value 37 | xi = sum(x_array * weights)/sum(weights) 38 | # append adstocked value to adstocked_x 39 | adstocked_x.append(xi) 40 | 41 | # convert adstocked_x into an np.array 42 | adstocked_x = np.array(adstocked_x) 43 | 44 | # return adstocked_x 45 | return adstocked_x -------------------------------------------------------------------------------- /src/hommmer/charts/response.py: -------------------------------------------------------------------------------- 1 | from sklearn import linear_model 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | def response(df, interval, tranformer, **kwargs): 5 | df['spend_transformed'] = tranformer(df['spend'], **kwargs) 6 | predict_y = 'conversions' # variable you're predicting 7 | dependent_X = ['spend_transformed'] # variables you're using to predict 8 | 9 | y = df[predict_y] 10 | X = df[dependent_X] 11 | 12 | model = linear_model.LinearRegression() 13 | model.fit(X, y) 14 | 15 | xmax = df['spend'].max() 16 | xmax_round = xmax if xmax % interval == 0 else xmax + interval - xmax % interval 17 | 18 | resp_df = pd.DataFrame({ 19 | "spend": range(interval, int(xmax_round), interval) 20 | }) 21 | 22 | resp_df['spend_transformed'] = tranformer(resp_df['spend'], **kwargs) 23 | X_resp = resp_df[['spend_transformed']] 24 | resp_df['forecast'] = model.predict(X_resp) 25 | resp_df['forecast'] = resp_df['forecast'].round().astype(int) 26 | resp_df['CPA'] = round(resp_df['spend'] / resp_df['forecast'],2) 27 | 28 | resp_df.plot(x='spend', y='forecast', kind='line', figsize=(10,5), style='.-') 29 | plt.title('Response Curve', y=1.12) 30 | plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.12), ncol=2) 31 | r_sq = round(model.score(X, y), 2) 32 | plt.annotate(f'R-squared = {r_sq}', xy=(0.05, 0.90), xycoords='axes fraction') 33 | plt.annotate(f'Transform = {tranformer.__name__}', xy=(0.05, 0.80), xycoords='axes fraction') 34 | plt.annotate(f'{"".join([f"{k}={v} " for k,v in kwargs.items()])}', xy=(0.05, 0.70), xycoords='axes fraction') 35 | 36 | plt.xticks(df.index) # force all x values to show 37 | plt.ylabel('Conversions') 38 | 39 | plt.show(); 40 | return resp_df -------------------------------------------------------------------------------- /src/hommmer/features/weibull_adstock_delayed.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # https://github.com/annalectnl/weibull-adstock/blob/master/adstock_weibull_annalect.pdf 4 | # https://towardsdatascience.com/python-stan-implementation-of-multiplicative-marketing-mix-model-with-deep-dive-into-adstock-a7320865b334 5 | def weibull_adstock_delayed(x, window, k, p): 6 | ''' 7 | params: 8 | x: original media variable, array 9 | window: length 10 | k: shape 11 | p: peak 12 | returns: 13 | array, adstocked media variable 14 | ''' 15 | # prepend x with zeros equal to the window - 1 16 | x = np.append(np.zeros(window-1), x) 17 | 18 | # create an array of zeros equal to the window 19 | weights = np.zeros(window) 20 | 21 | # lambda is window / (-ln(0.001)) to the power of 1/k 22 | lam = window / (-np.log(0.001))**(1/k) 23 | 24 | # loop through each day in window 25 | for l in range(window): 26 | # weight is minus lag/lambda to the power of k exponentiated with a delay 27 | delayed_weight = np.exp(-((l-p)/lam)**k) 28 | # add weight to weights in the right place (from front to back) 29 | weights[window-1-l] = delayed_weight 30 | 31 | # create an empty list 32 | adstocked_x = [] 33 | # loop through window - 1 up to len(x) 34 | for i in range(window-1, len(x)): 35 | # get array of x from index - length + 1 to index + 1 36 | x_array = x[i-window+1:i+1] 37 | # sum the x_array * weights / sum(weights) to get adstock value 38 | xi = sum(x_array * weights)/sum(weights) 39 | # append adstocked value to adstocked_x 40 | adstocked_x.append(xi) 41 | 42 | # convert adstocked_x into an np.array 43 | adstocked_x = np.array(adstocked_x) 44 | 45 | # return adstocked_x 46 | return adstocked_x -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # Todo List 2 | 3 | - multiplier rules by channel 4 | - bayesian MCMC 5 | - embedded / hierarchical modeling 6 | - rainbox test 7 | - harvey collier test 8 | - goldfeld quandt test 9 | - margin of error / ci 10 | - principal components 11 | - shuffle dataset sklearn.utils 12 | - gradientboostingregressor 13 | - plot partial dependence 14 | - cascading benchmark priors 15 | - model calibration with lift / geo tests 16 | - rollback update windows 17 | - creative scorecard grading 18 | - nevergrad hyperparameter optimization 19 | - train test split 20 | - cross validation 21 | - simulated annealing feature selection with random hill jumping 22 | - forward feature selection 23 | - automated anomaly dummies 24 | - model pipelines 25 | - etl connectors (facebook, google, etc) 26 | - expected value of information gain 27 | - geo interpolation 28 | - automated time lags 29 | - simulator model training 30 | - naive baseline validation 31 | - dummy baseline validation 32 | - nvar incremental shading feature selection (1 var models, 2 var models, etc) 33 | - kpi tree rollup models 34 | - gaap accounting for MMM 35 | - cohort time to value 36 | - scaling values 37 | - post-selection inference 38 | - principal component analysis 39 | - original correlated variables 40 | - AIC / BIC selection critera 41 | - channel deprivation chart 42 | - pareto front chart 43 | - decomposition waterfall chart 44 | - share of spend vs share of effect chart 45 | - response curves chart 46 | - variable histogram chart 47 | - export to google slides / powerpoint 48 | - export to excel / csv 49 | - export to png / jpeg 50 | - last x periods train test split 51 | - seasonality with facebook prophet 52 | - national holidays by country 53 | - fix mapes div/0 with + np.finfo(float).eps 54 | - walk forward validation 55 | - facebook prophet variable input trend forecasting 56 | - forecast on accuracy chart with train test split 57 | -------------------------------------------------------------------------------- /src/hommmer/cleaners/__init__.py: -------------------------------------------------------------------------------- 1 | from .week_commencing import week_commencing 2 | from .unstack_data import unstack_data 3 | from .clean_numeric import clean_numeric 4 | from .date_range_dummies import date_range_dummies 5 | from .days_in_month import days_in_month 6 | from .end_of_month import end_of_month 7 | from .group_monthly import group_monthly 8 | from .group_weekly import group_weekly 9 | from .date_dummies import date_dummies 10 | from .interpolate_monthly import interpolate_monthly 11 | from .interpolate_weekly import interpolate_weekly 12 | from .make_column_index import make_column_index 13 | from .remove_outliers import remove_outliers 14 | from .rename_column import rename_column 15 | from .start_of_month import start_of_month 16 | from .transpose_data import transpose_data 17 | from .unstack_data import unstack_data 18 | from .week_commencing import week_commencing 19 | from .categorize_campaigns import categorize_campaigns 20 | from .merge_data import merge_data 21 | from .guess_date_column import guess_date_column 22 | from .guess_y_column import guess_y_column 23 | from .guess_media_columns import guess_media_columns 24 | from .add_X_labels import add_X_labels 25 | from .del_X_labels import del_X_labels 26 | from .get_all_X_labels import get_all_X_labels 27 | from .get_cols_containing import get_cols_containing 28 | from .str_to_dummy import str_to_dummy 29 | from .cat_to_dummies import cat_to_dummies 30 | from .drop_cols import drop_cols 31 | from .convert_date import convert_date 32 | from .drop_n_rows import drop_n_rows 33 | from .count_na import count_na 34 | from .count_na_cols import count_na_cols 35 | from .interpolate_dates import interpolate_dates 36 | from .count_dup_cols import count_dup_cols 37 | from .modify_labels import modify_labels 38 | from .describe_data import describe_data 39 | from .standard_scaler import standard_scaler 40 | from .train_test_split import train_test_split 41 | from .make_date_index import make_date_index 42 | from .holiday_dummies import holiday_dummies 43 | from .guess_categorical_variables import guess_categorical_variables 44 | from .guess_numerical_variables import guess_numerical_variables 45 | from .make_geodate_index import make_geodate_index 46 | 47 | -------------------------------------------------------------------------------- /src/hommmer/datasets/duff.csv: -------------------------------------------------------------------------------- 1 | date,sales,facebook,tiktok,google,emails,blog,search,sold_out 2 | 2029-12-31,1976,0,0,1580.74,184,322,100,0 3 | 2030-01-07,722,0,0,196.73,0,447,65,0 4 | 2030-01-14,800,0,0,498.89,0,469,57,0 5 | 2030-01-21,812,0,0,346.76,0,461,70,0 6 | 2030-01-28,887,0,0,646.23,0,454,71,0 7 | 2030-02-04,916,0,0,633.05,67,503,72,0 8 | 2030-02-11,921,0,0,378.24,0,510,82,0 9 | 2030-02-18,956,0,0,522.66,0,529,84,0 10 | 2030-02-25,883,0,0,144.75,0,556,82,0 11 | 2030-03-04,875,0,0,299.05,0,554,68,0 12 | 2030-03-11,921,0,0,284.25,0,558,75,0 13 | 2030-03-18,920,0,0,254.57,0,580,71,0 14 | 2030-03-25,969,0,0,253.71,0,580,67,0 15 | 2030-04-01,891,0,0,164.9,764,597,57,0 16 | 2030-04-08,993,0,0,223.16,997,604,62,0 17 | 2030-04-15,942,0,0,122.97,0,552,65,0 18 | 2030-04-22,1060,0,0,271.75,833,631,70,0 19 | 2030-04-29,1056,0,0,333.63,0,677,62,0 20 | 2030-05-06,1043,0,0,437.34,0,711,58,0 21 | 2030-05-13,1104,0,0,465.92,0,784,68,0 22 | 2030-05-20,1109,0,0,427.07,1220,723,57,0 23 | 2030-05-27,1108,0,0,213.95,23,677,74,0 24 | 2030-06-03,1303,268.83,0,556.79,0,729,65,0 25 | 2030-06-10,1361,259.54,0,443.61,270,729,73,0 26 | 2030-06-17,1482,470.94,0,1056.08,150,741,77,0 27 | 2030-06-24,1592,691.37,0,1035.98,0,773,90,0 28 | 2030-07-01,2097,820.81,0,824.07,0,721,100,0 29 | 2030-07-08,1575,875.65,0,675,0,833,84,0 30 | 2030-07-15,1551,856.12,0,517.92,218,1060,74,0 31 | 2030-07-22,1511,818.55,0,214.05,0,1080,90,0 32 | 2030-07-29,1524,863.49,0,632.07,0,1036,74,0 33 | 2030-08-05,1529,844.6,0,680.1,1083,1091,80,0 34 | 2030-08-12,778,385.1,0,373.61,0,895,74,0.5 35 | 2030-08-19,68,0,0,0,0,925,74,1 36 | 2030-08-26,1469,788.88,0,1548.51,0,1009,66,0 37 | 2030-09-02,1497,783.7,0,598.4,1159,901,80,0 38 | 2030-09-09,1510,1494.76,0,198.91,0,1093,64,0 39 | 2030-09-16,1737,1957.76,0,296.15,1151,1359,75,0 40 | 2030-09-23,1527,1982.06,0,463.13,0,1688,71,0 41 | 2030-09-30,1440,1416.94,0,497.73,0,1490,73,0 42 | 2030-10-07,2101,3791.06,145,622.07,0,1739,71,0 43 | 2030-10-14,1946,3943.91,72.53,700.78,0,1954,70,0 44 | 2030-10-21,1825,3499.61,646.74,605.02,0,1945,69,0 45 | 2030-10-28,1916,3479.21,885.01,658.08,0,1728,73,0 46 | 2030-11-04,1825,3367.33,1221.43,1552.87,0,1968,69,0 47 | 2030-11-11,1941,3413.13,2798.73,735.96,0,2261,76,0 48 | 2030-11-18,1753,3503.93,1948.8,546.8,0,3016,77,0 49 | 2030-11-25,1676,3492.87,2697.06,638.9,0,1879,66,0 50 | 2030-12-02,1969,3743.92,2653.56,1597.17,238,2334,77,0 51 | 2030-12-09,1849,3348.18,1895.71,1033.51,0,2674,76,0 52 | 2030-12-16,1880,3410.33,2386.76,928.93,0,2700,82,0 53 | 2030-12-23,2216,3740.37,2012.53,625.26,0,1663,100,0 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Local notebooks for testing 132 | notebooks/ 133 | .ipynb -------------------------------------------------------------------------------- /src/hommmer/models/LogLinear.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import statsmodels.api as sm 3 | import pandas as pd 4 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python 5 | 6 | from .Model import Model 7 | from hommmer.helpers import log_ex_zeros 8 | 9 | # https://www.spencertom.com/2020/08/29/marketing-mix-modeling-mmm-part-3-of-3/ 10 | # https://stats.stackexchange.com/questions/140713/making-predictions-with-log-log-regression-model 11 | # https://davegiles.blogspot.com/2014/12/s.html 12 | class LogLinear(Model): 13 | def __init__(self, y, X, media_labels, settings): 14 | # inheritance and start timer 15 | super().__init__(y, X, media_labels, settings, "LogLinear") 16 | start = timer() 17 | 18 | # fit the model 19 | self._model = self._fit() 20 | 21 | # init required properties 22 | self.coefficients = self._coefficients() 23 | 24 | # finish running 25 | end = timer() 26 | self.runtime = end - start # Time in seconds, e.g. 5.38091952400282 27 | 28 | # log model locally 29 | self._save() 30 | 31 | ### EDIT BELOW HERE ### 32 | 33 | # fit the model 34 | def _fit(self): 35 | logged_y = np.log(self.y_train + 1) 36 | return sm.OLS(logged_y, self.X_train).fit() # log y 37 | 38 | # get the coefficients 39 | def _coefficients(self): 40 | return self._model.params.values 41 | 42 | # get the pvalues 43 | def _pvalues(self): 44 | return self._model.pvalues 45 | 46 | # calculate the confidence intervals 47 | def _confidence_intervals(self): 48 | conf_int_df = self._model.conf_int() 49 | conf_int_df.columns = ["lower", "upper"] 50 | return (conf_int_df["upper"] - conf_int_df["lower"]) / np.mean(np.log(self.y_train)) * 100 51 | 52 | ### OVERRIDE BASE FUNCS ### 53 | def contribution(self, X=None): 54 | if (X) is None: 55 | X = self.X_actual 56 | 57 | coef_df = pd.DataFrame({'coefficient': self.coefficients}, index=X.columns) 58 | 59 | y_pred_log = self._model.predict(X) 60 | y_pred = np.exp(y_pred_log) - 1 # transform log y back into y 61 | data = [] 62 | for x in list(X.columns): 63 | contrib = coef_df['coefficient'].loc[x] * X[x] 64 | data.append(contrib) 65 | 66 | log_contrib_df = pd.DataFrame(data).T 67 | contrib_df = log_contrib_df.copy() 68 | # transform log contribs by using share 69 | for x in contrib_df.columns: 70 | contrib_share = log_contrib_df[x] / y_pred_log 71 | contrib_df[x] = y_pred * contrib_share 72 | 73 | return contrib_df 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /src/hommmer/models/LogLog.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import statsmodels.api as sm 3 | import pandas as pd 4 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python 5 | 6 | from .Model import Model 7 | from hommmer.helpers import log_ex_zeros 8 | 9 | # https://www.spencertom.com/2020/08/29/marketing-mix-modeling-mmm-part-3-of-3/ 10 | # https://stats.stackexchange.com/questions/140713/making-predictions-with-log-log-regression-model 11 | # https://davegiles.blogspot.com/2014/12/s.html 12 | class LogLog(Model): 13 | def __init__(self, y, X, media_labels, settings): 14 | # inheritance and start timer 15 | super().__init__(y, X, media_labels, settings, "LogLog") 16 | start = timer() 17 | 18 | # fit the model 19 | self._model = self._fit() 20 | 21 | # init required properties 22 | self.coefficients = self._coefficients() 23 | 24 | # finish running 25 | end = timer() 26 | self.runtime = end - start # Time in seconds, e.g. 5.38091952400282 27 | 28 | # log model locally 29 | self._save() 30 | 31 | ### EDIT BELOW HERE ### 32 | 33 | # fit the model 34 | def _fit(self): 35 | logged_y = np.log(self.y_train + 1) 36 | logged_X = self.X_train.copy() 37 | for x in list(self.X_train.columns): 38 | logged_X[x] = np.log(self.X_train[x] + 1) 39 | 40 | return sm.OLS(logged_y, logged_X).fit() # log both y and X 41 | 42 | # get the coefficients 43 | def _coefficients(self): 44 | return self._model.params.values 45 | 46 | # get the pvalues 47 | def _pvalues(self): 48 | return self._model.pvalues 49 | 50 | # calculate the confidence intervals 51 | def _confidence_intervals(self): 52 | conf_int_df= self._model.conf_int() 53 | conf_int_df.columns = ["lower", "upper"] 54 | return (conf_int_df["upper"] - conf_int_df["lower"]) / np.mean(np.log(self.y_train)) * 100 55 | 56 | ### OVERRIDE BASE FUNCS ### 57 | def contribution(self, X=None): 58 | if (X) is None: 59 | X = self.X_actual 60 | 61 | coef_df = pd.DataFrame({'coefficient': self.coefficients}, index=X.columns) 62 | 63 | X_log = np.log(X+1) 64 | y_pred_log = self._model.predict(X_log) 65 | y_pred = np.exp(y_pred_log) - 1 # transform log y back into y 66 | 67 | data = [] 68 | for x in list(X.columns): 69 | contrib = coef_df['coefficient'].loc[x] * np.log(X[x] + 1) 70 | data.append(contrib) 71 | 72 | log_contrib_df = pd.DataFrame(data).T 73 | contrib_df = log_contrib_df.copy() 74 | # transform log contribs by using share 75 | for x in contrib_df.columns: 76 | contrib_share = log_contrib_df[x] / y_pred_log 77 | contrib_df[x] = y_pred * contrib_share 78 | 79 | return contrib_df -------------------------------------------------------------------------------- /src/hommmer/models/DeepLearning.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import seaborn as sns 3 | import matplotlib.pyplot as plt 4 | from sklearn.compose import ColumnTransformer 5 | from sklearn.model_selection import GridSearchCV 6 | from sklearn.neural_network import MLPRegressor 7 | from sklearn.preprocessing import QuantileTransformer, OneHotEncoder, Normalizer 8 | from sklearn import set_config 9 | 10 | from hommmer.cleaners import guess_categorical_variables, guess_numerical_variables 11 | set_config(display='diagram') 12 | from sklearn.pipeline import Pipeline 13 | from sklearn.model_selection import train_test_split,cross_validate 14 | from joblib import load, dump 15 | from sklearn.inspection import permutation_importance, plot_partial_dependence 16 | from sklearn.metrics import mean_absolute_error as mae 17 | from sklearn.utils import shuffle 18 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python 19 | 20 | from .Model import Model 21 | 22 | class DeepLearning(Model): 23 | def __init__(self, y, X, media_labels, settings): 24 | # inheritance and start timer 25 | super().__init__(y, X, media_labels, settings, "DeepLearning") 26 | start = timer() 27 | 28 | # fit the model 29 | self._model = self._fit() 30 | 31 | # finish running 32 | end = timer() 33 | self.runtime = end - start # Time in seconds, e.g. 5.38091952400282 34 | 35 | # log model locally 36 | self._save() 37 | 38 | ### EDIT BELOW HERE ### 39 | 40 | # fit the model 41 | def _fit(self): 42 | all_features = list(self.X_train.columns) 43 | categorical = guess_categorical_variables(self.X_train) 44 | numerical = guess_numerical_variables(self.X_train.drop(categorical, axis=1)) 45 | transformers =[ 46 | ('one hot', OneHotEncoder(handle_unknown='ignore'), categorical), 47 | ('scaler', QuantileTransformer(), numerical), 48 | ('normalizer',Normalizer(), all_features) 49 | ] 50 | ct = ColumnTransformer(transformers) 51 | steps =[ 52 | ('column_transformer', ct), 53 | ('model', MLPRegressor(solver='lbfgs')) 54 | # solver 'lbfgs' is used for dataset with less than 1000 rows, if more than 1000 use solver 'adam' 55 | ] 56 | pipeline= Pipeline(steps) 57 | param_space={ 58 | 'column_transformer__scaler__n_quantiles':[80,100,120], 59 | 'column_transformer__normalizer':[ Normalizer(), 'passthrough' ], 60 | 'model__hidden_layer_sizes':[(35,35),(50,50),(75,75)], 61 | 'model__alpha':[0.005, 0.001] 62 | } 63 | 64 | #input the param space into "param_grid", define what pipeline it needs to run, in our case is named "pipeline", and the you can decide how many cross validation can do "cv=" and the verbosity. 65 | grid = GridSearchCV(pipeline, param_grid=param_space, cv=3, verbose=2) 66 | grid.fit(self.X_train, self.y_train) 67 | return grid.best_estimator_ 68 | 69 | ### OVERRIDE ### 70 | def contribution(self, X=None): 71 | if (X) is None: 72 | X = self.X_actual 73 | 74 | res = permutation_importance(self._model, X, self.y_actual, n_repeats=10) 75 | 76 | # create dataframe to collect results 77 | imp = pd.DataFrame(res['importances'].T, columns=X.columns) -------------------------------------------------------------------------------- /src/hommmer/main.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from .helpers import log, init_logging 4 | from .cleaners import make_date_index, make_geodate_index 5 | from .features import geometric_adstock, power_saturation 6 | from .models import Linear, LogLinear, LogLog, Ridge, DeepLearning 7 | 8 | def build(path, target, media, organic=None, date=None, geo=None, adstock=None, saturation=None, override={}): 9 | # default settings 10 | settings = { 11 | "file": path, 12 | "model": 'linear', 13 | "geo": geo, 14 | "split": 0.15, 15 | "metric": 'nrmse', 16 | "verbose": False 17 | } 18 | 19 | # override settings 20 | settings.update(override) 21 | 22 | init_logging(settings['verbose']) 23 | 24 | # load the dataframe and get the X_labels 25 | df = pd.read_csv(path) 26 | df.fillna(0, inplace=True) 27 | X_labels = list(df.columns) 28 | 29 | # guess date if not set 30 | if date is None: 31 | if 'date' in df.columns: 32 | date = 'date' 33 | elif 'Date' in df.columns: 34 | date = 'Date' 35 | else: 36 | date = df.columns[0] 37 | 38 | # if organic is not set, set it by removing media vars 39 | if organic is None: 40 | organic = X_labels.copy() 41 | for x in media: 42 | organic.remove(x) 43 | # if organic is set, remove anything not in media or organic 44 | else: 45 | for x in df.columns: 46 | if x not in media and x not in organic: 47 | X_labels.remove(x) 48 | 49 | # log model info 50 | log("building a model") 51 | log(f"file: {path}") 52 | log(f"y = {target}") 53 | log(f"X = {', '.join(X_labels)}") 54 | log(f"vars: {len(X_labels)}") 55 | log(f"obs: {df.shape[0]}") 56 | log(f"settings: {settings}") 57 | 58 | # make date the index 59 | if geo is None: 60 | make_date_index(df, date) 61 | else: 62 | make_geodate_index(df, date, geo) 63 | 64 | # adstock transform 65 | if adstock: 66 | for i in range(len(media)): 67 | x_label = media[i] 68 | theta = adstock[i] 69 | if theta > 0: 70 | trans_label = x_label+" θ="+str(theta) 71 | df[trans_label] = geometric_adstock(df[x_label], theta) 72 | X_labels.append(trans_label) 73 | X_labels.remove(x_label) 74 | media[i] = trans_label 75 | 76 | # saturation transform 77 | if saturation: 78 | for i in range(len(media)): 79 | x_label = media[i] 80 | alpha = saturation[i] 81 | if alpha > 0: 82 | trans_label = x_label+" α="+str(alpha) 83 | df[trans_label] = power_saturation(df[x_label], 1-alpha) 84 | X_labels.append(trans_label) 85 | X_labels.remove(x_label) 86 | media[i] = trans_label 87 | 88 | # assign the y and X frames 89 | y = df[target] 90 | X = df[X_labels] 91 | 92 | # run model 93 | if settings['model'] == 'linear': 94 | return Linear(y, X, media, settings) 95 | elif settings['model'] == 'log-linear': 96 | return LogLinear(y, X, media, settings) 97 | elif settings['model'] == 'log-log': 98 | return LogLog(y, X, media, settings) 99 | elif settings['model'] == 'ridge': 100 | return Ridge(y, X, media, settings) 101 | elif settings['model'] == 'deep-learning': 102 | return DeepLearning(y, X, media, settings) 103 | else: 104 | all_models = { 105 | 'linear': Linear(y, X, media, settings), 106 | 'log-linear': LogLinear(y, X, media, settings), 107 | 'log-log': LogLog(y, X, media, settings), 108 | # 'ridge': Ridge(y, X, media, settings), 109 | # 'deep-learning': DeepLearning(y, X, media, settings) 110 | } 111 | accuracies = [{"model": x, f"{settings['metric']}": all_models[x].metric(settings['metric'])} for x in all_models.keys()] 112 | # min_error = min(all_models.keys(), key=lambda x: all_models[x].metric(settings['metric'])) 113 | min_error = min(accuracies, key=lambda x: x[settings['metric']]) 114 | return all_models[min_error['model']] 115 | 116 | 117 | 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /src/hommmer/models/Model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime as dt 3 | import pandas as pd 4 | import numpy as np 5 | from IPython.display import display 6 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python 7 | from sklearn.model_selection import train_test_split 8 | 9 | from hommmer.charts import accuracy 10 | from hommmer.helpers import check_metric 11 | 12 | class Model(): 13 | def __init__(self, y, X, media_labels, settings, model): 14 | # set timestamp 15 | self.timestamp = dt.datetime.today().strftime('%Y-%m-%d %H:%M') 16 | 17 | # train-test split 18 | if settings['split']: 19 | X_train, X_test, y_train, y_test = train_test_split(X, y, 20 | test_size=settings['split'], random_state=0) 21 | else: 22 | X_train, X_test, y_train, y_test = X, X, y, y 23 | 24 | self.settings = settings 25 | self.model = model 26 | self.runtime = None 27 | 28 | # assign X and y 29 | self.X_actual = X 30 | self.y_actual = y 31 | self.X_train = X_train 32 | self.y_train = y_train 33 | self.X_test = X_test 34 | self.y_test = y_test 35 | self.media_labels = media_labels 36 | 37 | # placeholders 38 | self.coefficients = [] 39 | 40 | def _fit(self, y, X): 41 | return None 42 | 43 | def results(self): 44 | results_df = pd.DataFrame(self.contribution().sum(), columns=['contribution']) 45 | results_df['share'] = results_df['contribution'] / results_df['contribution'].sum() * 100 46 | results_df['coefficient'] = self.coefficients 47 | results_df['pvalue'] = self._pvalues() 48 | results_df = pd.concat([results_df, self._confidence_intervals()], axis=1) 49 | 50 | return np.around(results_df, 3) 51 | 52 | def contribution(self, X=None): 53 | if (X) is None: 54 | X = self.X_actual 55 | 56 | coef_df = pd.DataFrame({'coefficient': self.coefficients}, index=X.columns) 57 | 58 | data = [] 59 | for x in list(X.columns): 60 | contrib = coef_df['coefficient'].loc[x] * X[x] 61 | data.append(contrib) 62 | 63 | contrib_df = pd.DataFrame(data).T 64 | 65 | return contrib_df 66 | 67 | def predict(self, X=None): 68 | contribution = self.contribution(X) 69 | y_pred = contribution.sum(axis=1) 70 | return y_pred 71 | 72 | def metrics(self, metric_labels): 73 | metrics = [] 74 | for metric in metric_labels: 75 | value = check_metric(metric, self) 76 | metrics.append((metric, value)) 77 | for label, output in metrics: 78 | print(f"{output[1]} {label}: {output[0]}") 79 | 80 | def metric(self, metric_label): 81 | value = check_metric(metric_label, self) 82 | return value[0] 83 | 84 | def _save(self): 85 | file = self.settings['file'] 86 | file_paths = file.split('/') 87 | filename = file_paths.pop() 88 | file_paths.append("models-"+filename) 89 | file_loc = '/'.join(file_paths) 90 | models_output = pd.DataFrame.from_dict([{ 91 | 'file': self.settings['file'], 92 | 'model': self.model, 93 | 'metric': self.settings['metric'], 94 | 'error': self.metric(self.settings['metric']), 95 | 'timestamp': dt.datetime.today().strftime('%Y-%m-%d %H:%M'), 96 | 'runtime': self.runtime, 97 | 'y_label': self.y_train.name, 98 | 'X_labels': ', '.join(list(self.X_train.columns)), 99 | }]) 100 | if os.path.isfile(file_loc): 101 | # save to existing file 102 | loaded_models = pd.read_csv(file_loc) 103 | all_models = loaded_models.append(models_output) 104 | all_models.to_csv(file_loc, index=False) 105 | print("added model to existing file") 106 | else: 107 | # save new model file 108 | models_output.to_csv(file_loc, index=False) 109 | print("added new model file locally") 110 | 111 | def show(self, charts=True, metrics=True, results=True): 112 | accuracy(self.y_actual, self.predict()) if charts else False 113 | self.metrics(["rsquared", "nrmse", "mape", "decomp-rssd", "cond-no"]) if metrics else False 114 | display(self.results()) if results else False -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hommmer 2 | 3 | A simple Marketing Mix Modeling library in Python. 4 | 5 | \*\*\* **_NOTE: this library is in alpha and not yet working._** \*\*\* 6 | 7 | ## Quick start 8 | 9 | ### 1. Install the library 10 | 11 | > `pip install hommmer` 12 | 13 | ### 2. Build the model 14 | 15 | ``` 16 | # import the library 17 | import hommmer as mmm 18 | 19 | # download example data 20 | mmm.load_duff() 21 | 22 | # list media columns 23 | media = ['facebook', 'google', 'tiktok'] 24 | 25 | # build the model 26 | model = mmm.build('duff.csv', 'sales', media) 27 | ``` 28 | 29 | #### Required 30 | 31 | - **path**: the location of the file with your data 32 | - **target**: the column with your conversions or conversion value 33 | - **media**: a list of the columns with media spend 34 | 35 | #### Optional 36 | 37 | - **organic**: a list of the organic columns. default: everything not listed in `media`. 38 | - **date**: the column with your date labels (YYYY-MM-DD). default: `date` 39 | - **verbose**: see what the model is doing by printing logs. default: `False` 40 | - **override**: use custom settings for aspects of the model. default: `{}` 41 | 42 | Provide at least 1 year of weekly data where the `date` column is the start of the week (Monday). 43 | 44 | ### 3. Use the results 45 | 46 | ``` 47 | # show the charts and metrics 48 | model.show() 49 | 50 | # save locally to png and csv 51 | model.save() 52 | ``` 53 | 54 | ### Other features 55 | 56 | Our solution is fully automated, but if you want to build a model manually, or use our helper functions for cleaning data, you can import from our sublibraries. 57 | 58 | ```python 59 | from hommmer.cleaners import transpose 60 | from hommmer.features import adstocks 61 | from hommmer.charts import accuracy 62 | from hommmer.metrics import nrsme 63 | from hommmer.models import Linear 64 | ``` 65 | 66 | ## About Marketing Mix Modeling 67 | 68 | Marketing Mix Modeling (MMM) was introduced in the 1960s to match spikes and dips in sales to actions taken in marketing. No user data required - it's privacy-friendly, adblocker-proof and works across all channels (even offline). 69 | 70 | What used to be a 3-6 month, $50k+ job for the Fortune 500, is now an always-on, automated source of truth for startups like [Harry's](https://ladder.io/blog/attribution-technique), [HelloFresh](https://engineering.hellofresh.com/bayesian-media-mix-modeling-using-pymc3-for-fun-and-profit-2bd4667504e6) and [Monday․com](https://www.youtube.com/watch?v=p-YbHMCUycw). Even Facebook and Google are getting in on the game with [research papers](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46001.pdf) and [open-source projects](https://facebookexperimental.github.io/Robyn/). 71 | 72 | ## About hommmer 73 | 74 | Most modeling libraries, like [Statsmodels](https://www.statsmodels.org/stable/index.html), [SciKitLearn](https://scikit-learn.org/stable/) and [Facebook's Robyn](https://facebookexperimental.github.io/Robyn/), cater to statisticians and data scientists. They offer complex configuration options and advanced algorithms only accessible to the biggest companies spending millions on marketing, who can afford to spend 3-6 months on a solution. 75 | 76 | So most Marketing Mix Modeling by small businesses and startups is [done in Excel](https://www.saxifrage.xyz/post/econometrics-gsheets). But there are things you can't do in Excel, like automatically building 1,130 models to see which one works best. We'd like MMM to be in the hands of more people, but that can't happen if you need to be a nuclear physicist to use it. 77 | 78 | `hommmer` is built for the rest of us. The 'everyman' (of any gender) modeling hobbyist, for which MMM is just one of many jobs on the todo list. It's designed to be simple to use, but powerful underneath, without getting you into trouble. Over-simplifying things will annoy the statisticians (Doh!), but it'll make allocating budget quick and easy. 79 | 80 | ## Design Principles: 81 | 82 | ### 1. Excel is the operating system 83 | 84 | Full compatability with Excel / GSheets / CSV for importing and exporting. 85 | 86 | ### 2. Don't make me think 87 | 88 | All user input should be treated as error. Everything needs a good default. 89 | 90 | ### 3. Good is better than great 91 | 92 | Where there's a choice between optimization and usefullness, take the latter. 93 | 94 | ### 4. Better data beats fancier algorithms 95 | 96 | We focus on helper functions to clean data, and treat algorithms as commodities. 97 | 98 | ### 5. We know less than the client 99 | 100 | Assume the client knows what they're doing, then try to prove otherwise. 101 | 102 | ## Contributors 103 | 104 | These people are building `hommmer` for fun in their spare time. Cheers! 🍻 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 |

hammer-mt

💻
115 | 116 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Thank you for investing your time in contributing to our project! Any contribution you make will be reflected on [https://www.hommmer.org/](https://www.hommmer.org/). 4 | 5 | ## New contributor guide 6 | 7 | See the [README](README.md) to get an overview of the project. Ideas for improvement are in [TODO](TODO.md). We're operating under the MIT Open Source [license](LICENSE) so you can still use anything you contribute (but so can the rest of us). 8 | 9 | ## Getting started 10 | 11 | ### Issues 12 | 13 | #### Create a new issue 14 | 15 | If you spot a problem with the docs, [search if an issue already exists](https://docs.github.com/en/github/searching-for-information-on-github/searching-on-github/searching-issues-and-pull-requests#search-by-the-title-body-or-comments). If a related issue doesn't exist, you can open a [new issue](https://github.com/hammer-mt/hommmer/issues/new). 16 | 17 | #### Solve an issue 18 | 19 | Scan through our [existing issues](https://github.com/hammer-mt/hommmer/issues) to find one that interests you. Leave a comment on the issue asking if you can pick up the issue so maintainers knowing you want to work on it. 20 | 21 | ### Make Changes 22 | 23 | #### Prerequisites 24 | 25 | Make sure you have the following installed in your development environment: 26 | 27 | - [Python](https://www.python.org/downloads/) 28 | 29 | #### Development Workflow 30 | 31 | Follow these steps below to get the package working locally: 32 | 33 | 1. Create a personal fork of the project on GitHub and clone locally 34 | 35 | ```shell 36 | # Using HTTPS 37 | git clone https://github.com/your-username/hommmer.git 38 | 39 | # Or using SSH 40 | git clone git@github.com:your-username/hommmer.git 41 | ``` 42 | 43 | 2. Add the original repository as a remote called `upstream` 44 | 45 | ```shell 46 | git remote add upstream https://github.com/hammer-mt/hommmer.git 47 | ``` 48 | 49 | 3. Make sure to pull upstream changes into your local repository 50 | 51 | ```shell 52 | git fetch upstream 53 | ``` 54 | 55 | 4. Create a new branch to work from 56 | 57 | ```shell 58 | git checkout -b branchname 59 | ``` 60 | 61 | 5. Activate a virtual environment 62 | 63 | ```shell 64 | python -m venv venv 65 | 66 | # Using Windows 67 | `venv\Scripts\activate` 68 | 69 | # Using Mac 70 | `source ./venv/bin/activate` 71 | ``` 72 | 73 | 6. Install the package as editable 74 | 75 | ```shell 76 | # Install from the cloned repo: 77 | %pip install -e your/local/path 78 | ``` 79 | 80 | I like working from Jupyter Notebook (Anaconda) because if you run `%load_ext autoreload` then `%autoreload 2` the module will auto-reload on every saved change to your local package! Note: restart the kernal if you run into an error with classes. 81 | 82 | 7. Make your changes / contributions 83 | 84 | Make sure to follow the code style of the project, run any tests (if available) and add / update the documentation as needed. 85 | 86 | Squash your commits with git's [interactive rebase](http://git-scm.com/docs/git-rebase) (create a new branch if necessary). Write your commit messages in the present tense (what does it does to the code?). Push your changes to your fork on GitHub, the remote `origin`. 87 | 88 | ```shell 89 | # Squash commits, fix up commit messages etc. 90 | git rebase -i origin/main 91 | 92 | # Push to your fork on GitHub 93 | git push origin main 94 | ``` 95 | 96 | ### Pull Request 97 | 98 | When you're done making the changes, open a pull request, often referred to as a PR. You do this in GitHub from your Fork of the project. Target the `develop` branch if there is one, else go for `main`. 99 | 100 | - Fill out the PR description summarizing your changes so we can review your PR. This template helps reviewers understand your changes and the purpose of your pull request. 101 | - Don't forget to [link PR to issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue) if you are solving one. 102 | - Enable the checkbox to [allow maintainer edits](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/allowing-changes-to-a-pull-request-branch-created-from-a-fork) so the branch can be updated for a merge. Once you submit your PR, a Docs team member will review your proposal. We may ask questions or request for additional information. 103 | - We may ask for changes to be made before a PR can be merged, either using [suggested changes](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/incorporating-feedback-in-your-pull-request) or pull request comments. You can apply suggested changes directly through the UI. You can make any other changes in your fork, then commit them to your branch. 104 | - As you update your PR and apply changes, mark each conversation as [resolved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/commenting-on-a-pull-request#resolving-conversations). 105 | - If you run into any merge issues, checkout this [git tutorial](https://lab.github.com/githubtraining/managing-merge-conflicts) to help you resolve merge conflicts and other issues. 106 | - Once the pull request is approved and merged you can pull the changes from upstream to your local repo and delete your extra branch(es). 107 | 108 | ### Your PR is merged! 109 | 110 | Congratulations :tada::tada: The hommmer team thanks you! 111 | 112 | Once your PR is merged, we will add you to the All Contributors Table in the [`README.md`](./README.md#all-contributors) 113 | 114 | ### Publishing to PyPi 115 | 116 | This is more a note to self, because I keep forgetting. 117 | 118 | 1. `pip install twine` 119 | 2. `cd Documents\Projects\hommmer` 120 | 3. update the version number in `setup.py` 121 | 4. `python setup.py sdist bdist_wheel` 122 | 5. delete old versions in the `dist` folder 123 | 6. `twine check dist/*` 124 | 7. `twine upload --repository-url https://test.pypi.org/legacy/dist/*` (optional) 125 | 8. `Twine upload dist/*` 126 | 127 | Resources: 128 | 129 | - [Using TestPyPi](https://packaging.python.org/guides/using-testpypi/) 130 | - [Building a Python Package and Publishing on PyPi (The Python Package Index)](https://www.section.io/engineering-education/building-a-python-package-and-publishing-on-pypi/) 131 | - [Packaging Python Projects](https://packaging.python.org/tutorials/packaging-projects/) 132 | -------------------------------------------------------------------------------- /website/pages/index.js: -------------------------------------------------------------------------------- 1 | import Head from "next/head"; 2 | import { useState, useRef } from "react"; 3 | import { DuplicateIcon, CheckIcon } from "@heroicons/react/solid"; 4 | 5 | export default function Home() { 6 | const copyAreaRef = useRef(null); 7 | const [copiedText, setCopiedText] = useState(""); 8 | 9 | const quickStartInstructions = [ 10 | { title: "Install the package", code: "pip install hommmer" }, 11 | { title: "Import the library", code: "import hommmer as mmm" }, 12 | { title: "Download example data", code: "mmm.load_duff()" }, 13 | { 14 | title: "Build your model", 15 | code: "media = ['facebook', 'google', 'tiktok']\nmodel = mmm.build('duff.csv', 'sales', media)", 16 | }, 17 | { title: "Display the results", code: "model.show()" }, 18 | ]; 19 | 20 | const handleClickToCopy = (text) => { 21 | copyAreaRef.current.value = text; 22 | copyAreaRef.current.select(); 23 | document.execCommand("copy"); 24 | copyAreaRef.current.value = ""; 25 | copyAreaRef.current.blur(); 26 | setCopiedText(text); 27 | }; 28 | return ( 29 |
30 | 31 | 32 | hommmer: A simple Marketing Mix Modeling library in Python. 33 | 34 | 35 | 36 | 37 |
38 |
39 |

40 | 44 | hommmer 45 | 46 |

47 | 48 |

49 | A simple Marketing Mix Modeling library in Python. 50 |

51 |
52 | 53 |
54 |

Quick Start

55 | 56 | {quickStartInstructions.map((instruction) => { 57 | return ( 58 | <> 59 |

{instruction.title}:

60 |
{ 63 | handleClickToCopy(instruction.code); 64 | }} 65 | > 66 |
67 | 68 | {instruction.code} 69 | 70 | {copiedText === instruction.code ? ( 71 |
82 |
83 | 84 | ); 85 | })} 86 | 87 | 97 |
98 | 99 |
100 | 104 |

Documentation →

105 |

106 | Learn how to use hommmer for 107 | marketing mix modeling. 108 |

109 |
110 | 111 | 115 |

Tutorials →

116 |

117 | Learn about marketing mix modeling in simulator-based courses. 118 |

119 |
120 | 121 | 125 |

GitHub →

126 |

127 | Take a look at the code in our GitHub repository and contribute. 128 |

129 |
130 | 131 | 135 |

Discord →

136 |

137 | Join users and contributors in our Discord community. 138 |

139 |
140 |
141 |
142 | 143 | 167 |
168 | ); 169 | } 170 | -------------------------------------------------------------------------------- /SOURCES.md: -------------------------------------------------------------------------------- 1 | ## Sources 2 | 3 | Wherever we use code directly we reference it in the code comments, but this content gave us inspiration for the project more generally. 4 | 5 | - [Facebook Robyn](https://facebookexperimental.github.io/Robyn/) – variable transformations, mission 6 | - [Bayesian Media Mix Modeling using PyMC3, for Fun and Profit](https://engineering.hellofresh.com/bayesian-media-mix-modeling-using-pymc3-for-fun-and-profit-2bd4667504e6) – bayesian methods, process 7 | - [Bayesian Methods for Media Mix Modeling with Carryover and 8 | Shape Effects](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46001.pdf) – bayesian methods, variable transformations 9 | - [Bayesian Methods for Hackers](https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers) – how to build bayesian models, PyMC3 10 | - [Statistical Rethinking: A Bayesian Course Using R and Stan](https://github.com/rmcelreath/statrethinking_winter2019) – how to interpret bayesian stats, STAN 11 | - [A Hierarchical Bayesian Approach to Improve Media Mix Models Using Category Data](https://research.google/pubs/pub45999/) – hierarchical models 12 | - [Introduction to Bayesian Methods for MMM](https://getrecast.com/bayesian-methods-for-mmm/) – running Bayesian MMM, variable distributions 13 | - [Feature Selection For Machine Learning in Python](https://machinelearningmastery.com/feature-selection-machine-learning-python/) – feature selection 14 | - [The Five Linear Regression Assumptions: Testing on the Kaggle Housing Price Dataset](https://boostedml.com/2018/08/testing-linear-regression-assumptions-the-kaggle-housing-price-dataset.html) – statistical tests 15 | - [A Complete Guide To Linear Regression In Python](<[https://](https://www.listendata.com/2018/01/linear-regression-in-python.html)>) – standard linear regression 16 | - [Advertising Adstock – Concept & Formula](https://analyticsartist.wordpress.com/2013/11/02/calculating-adstock-effect/) – adstocks 17 | - [Advertising Diminishing Returns & Saturation](https://analyticsartist.wordpress.com/2015/03/08/advertising-diminishing-returns-saturation/) – diminishing returns 18 | - [Building and Validating Media Mix Models](https://github.com/mecommerce/ThirdLove-Tech-Blog/blob/master/Media_Mix_Model/ThirdLove_MMM_Whitepaper.pdf) – process, history of MMM 19 | - [Market Mix Modeling using Sales data](https://www.kaggle.com/imdineshgrewal/market-mix-modeling-using-sales-data) – canonical example of MMM on Kaggle 20 | - [DT MART: Market Mix Modeling](https://www.kaggle.com/datatattle/dt-mart-market-mix-modeling) – data cleaning, process 21 | - [Multiple Regression with Google Sheets XL Miner](https://www.youtube.com/watch?v=YhBU92eyNRo) – inspired my first post on MMM, "[Econometrics in GSheets](https://www.saxifrage.xyz/post/econometrics-gsheets)" 22 | - [How to create a basic Marketing Mix Model in scikit-learn](https://practicaldatascience.co.uk/machine-learning/how-to-create-a-basic-marketing-mix-model-in-scikit-learn) – using multiple algos, scikit learn 23 | - [Python/STAN Implementation of Multiplicative Marketing Mix Model](https://towardsdatascience.com/python-stan-implementation-of-multiplicative-marketing-mix-model-with-deep-dive-into-adstock-a7320865b334) – multiplicative models, seasonality 24 | - [Carryover and Shape Effects in Media Mix Modeling: Paper Review](https://towardsdatascience.com/carryover-and-shape-effects-in-media-mix-modeling-paper-review-fd699b509e2d) – Bayesian, PyMC3, adstocks & diminishing returns 25 | - [Modeling adstock using Weibull transformations](https://github.com/annalectnl/weibull-adstock/blob/master/adstock_weibull_annalect.pdf) – Adstocks 26 | - [Media mix models are the future of mobile advertising](https://mobiledevmemo.com/media-mix-models-are-the-future-of-mobile-advertising/) – trends, history, context, strategy 27 | - [Geo-level Bayesian Hierarchical Media Mix Modeling](https://research.google/pubs/pub46000/) – geo models 28 | - [Challenges and Opportunities in Media Mix Modeling](https://research.google/pubs/pub45998/) – trends, history, context, strategy 29 | - [Meet the geniuses behind our BI tool BigBrain](https://engineering.monday.com/meet-the-geniuses-behind-our-bi-tool-bigbrain/) – real world usage 30 | - [Market Mix Model - ElecKart | Kaggle](https://www.kaggle.com/goyalshalini93/market-mix-model-eleckart) – process, data cleaning 31 | - [Data Science Primer | Elite Data Science](https://elitedatascience.com/primer) – data cleaning, process, algorithms = commodities, better data > fancier algorithms 32 | - [How to Calculate Feature Importance With Python](https://machinelearningmastery.com/calculate-feature-importance-with-python/) – feature importances, feature selection 33 | - [Marketing Mix Modeling MMM (Part 3 of 3)](https://www.spencertom.com/2020/08/29/marketing-mix-modeling-mmm-part-3-of-3/) – Log-Linear Models, Log-Log Models 34 | 35 | ## Reading List 36 | 37 | Here's everything we have collected on our list to read, but haven't had a chance to yet. Feel free to add to the list or let us know if something isn't worth our time. 38 | 39 | - https://github.com/dps/montesheet 40 | - https://www.facebook.com/fbgaminghome/blog/marketers/the-future-is-modeled 41 | - https://www.microprediction.com/blog/prophet 42 | - https://www.youtube.com/watch?v=B7ZWehBHVw0 43 | - https://www.latticeworkinsights.com/press/we-evaluated-3-media-mix-models-so-you-dont-have-to 44 | - https://ekimetrics.com/wp-content/uploads/2020/05/Ekimetrics_Facebook_White-paper.pdf?fbclid=IwAR1mvLJ8zcVO567q-3nv21c2DF57kA_eAQWRp1KI4a56eDYGMIPIQ1ieduI 45 | - https://www.marketingevolution.com/marketing-essentials/media-mix-modeling 46 | - https://stat.ethz.ch/R-manual/R-devel/library/stats/html/Weibull.html?fbclid=IwAR0Fygnw1TtxYckg9IYDPPhWwAYrdY666l0Tw0RAJvBhsxanm91uSB3SZP4 47 | - https://blog.brandops.io/tracking-brand-and-demand-the-4-methods-placeholder-title 48 | - https://research.google/pubs/pub41854/ 49 | - https://www.facebook.com/fbgaminghome/blog/marketers/the-future-is-modeled 50 | - https://www.adroll.com/blog/marketing-analytics/first-last-touch-attribution-why-its-out-of-style 51 | - https://blackwoodseven.com/the-next-generation-of-marketing-mix-modeling-is-bayesian/ 52 | - https://www.youtube.com/watch?v=UznM_-_760Y 53 | - https://www.facebook.com/business/news/insights/5-ways-to-adjust-marketing-mix-models-for-unexpected-events 54 | - https://www.forbes.com/sites/forbesagencycouncil/2020/05/18/digital-marketing-in-a-cookie-less-internet/?sh=7fa7bc3121e2 55 | - https://motamem.org/wp-content/uploads/2019/07/Borden-1984_The-concept-of-marketing-mix.pdf 56 | - https://www.youtube.com/watch?v=UznM_-_760Y&t=908s 57 | - https://www.youtube.com/watch?v=p-YbHMCUycw 58 | - https://www.warc.com/newsandopinion/opinion/the-econometric-hero-and-five-questions-every-cmo-should-ask-about-mmm/4214?es_id=13fa7619cd 59 | - https://www2.deloitte.com/content/dam/Deloitte/es/Documents/estrategia/Deloitte-es-estrategia-y-operaciones-combinacion-mmm-cle.pdf 60 | - https://www.thinkwithgoogle.com/intl/en-gb/marketing-strategies/data-and-measurement/roi-marketing-mix-models/ 61 | - https://www.nielsen.com/us/en/insights/article/2017/when-it-comes-to-advertising-effectiveness-what-is-key/ 62 | - https://stackoverflow.com/questions/21765794/python-constrained-non-linear-optimization 63 | - https://towardsdatascience.com/efficient-frontier-portfolio-optimisation-in-python-e7844051e7f 64 | - https://medium.com/analytics-vidhya/marketing-mix-model-guide-with-dataset-using-python-r-and-excel-4e319be47b4 65 | - https://www.ashokcharan.com/Marketing-Analytics/~mx-mmm-sales-response-function.php 66 | - https://justrthings.com/2017/12/30/a-multivariate-approach-to-adstock-rate-modeling-in-r/ 67 | - http://www.17bigdata.com/robyn-mmm-step-by-step-guide-a-beta-project-from-facebook-marketing-science/ 68 | - https://www.themarketingtechnologist.co/the-gam-approach-to-spend-your-money-more-efficiently/ 69 | - https://multithreaded.stitchfix.com/blog/2015/07/30/gam/ 70 | - https://rstudio-pubs-static.s3.amazonaws.com/294627_5f7e9a449b6c442e806a4743f1b4f8a7.html 71 | - https://www.facebook.com/business/news/insights/considerations-for-creating-modern-marketing-mix-models 72 | - https://www.listendata.com/2019/09/marketing-mix-modeling.html 73 | - https://towardsdatascience.com/building-a-simple-marketing-mix-model-with-ols-571ac3d5b64f 74 | - https://www.facebook.com/business/news/insights/a-full-funnel-approach-how-brand-marketing-drive-short-term-sales 75 | - https://quantmar.com/8/What-is-media-mix-modeling 76 | - https://aaltodoc.aalto.fi/handle/123456789/26743 77 | - https://www.forbes.com/sites/scottmcdonald1/2018/01/23/measuring-the-roi-of-marketing-ab-tests-vs-market-mix-models-vs-multi-touch-attribution/ 78 | - https://www.iab.com/insights/the-essential-guide-to-marketing-mix-modeling-and-multi-touch-attribution/ 79 | - https://link.springer.com/article/10.1057/jma.2014.3 80 | - https://danaleeling.blogspot.com/2019/09/graphical-confidence-intervals-for.html 81 | - https://www.certificationanswers.com/en/a-marketing-manager-wants-to-use-an-attribution-model-that-includes-both-converting-and-non-converting-paths-in-order-to-evaluate-individual-customer-paths-which-attribution-model-will-fulfill-this-r/ 82 | - https://towardsdatascience.com/market-mix-modeling-mmm-101-3d094df976f9 83 | - https://wiki.q-researchsoftware.com/wiki/Driver_(Importance)_Analysis 84 | - https://www.real-statistics.com/multiple-regression/multiple-regression-analysis/multiple-regression-analysis-excel/ 85 | - https://www.forbes.com/sites/onmarketing/2012/11/28/the-downside-of-marketing-mix-models-is-theres-no-upside-for-cmos/ 86 | - https://sd-group.com.au/en/blog/market-mix-vs-multi-touch-attribution-model 87 | - https://www.slideshare.net/wolfeman02/shows-approach-which-expands-the-breadth-of-what-marketingmix-models-c 88 | - https://bottomlineanalytics.com/brand-content-drivers-modeling-optimizing-content-marketing/ 89 | - https://www.slideshare.net/PeterCain1/dynamic-marketing-mix-modelling 90 | - https://blog.hurree.co/blog/marketing-mix-modeling 91 | - https://www.analytic-edge.com/is-marketing-mix-modeling-only-for-fortune-500-companies-think-again/ 92 | - https://towardsdatascience.com/causal-vs-statistical-inference-3f2c3e617220 93 | - https://www.futuremarketinsights.com/reports/marketing-mix-optimisation-market 94 | - https://uk.news.yahoo.com/success-story-marketing-mix-modeling-130500042.html 95 | - https://uk.sganalytics.com/case-study/analytics/market-mix-modeling-what-if-simulator-insurance/ 96 | - https://www.slideshare.net/vivastream/disney-marketinganalyticsoptimization-14907727 97 | - https://www.thinkwithgoogle.com/intl/en-145/marketing-strategies/video/through-marketing-mix-modeling-loreal-uncovers-youtubes-ability-deliver-sales/ 98 | - https://www.jogordonconsulting.com/blog/marketing-mix-modelling-3-case-studies-from-2020 99 | - https://marketingeffectiveness.nielsen.com/our-solutions/marketing-mix-modeling/ 100 | - https://www.ashokcharan.com/Marketing-Analytics/~mx-mmm-what-if-analysis.php 101 | - https://www.wsj.com/articles/SB112415492969313998 102 | - https://videoadnews.com/2020/09/14/econometric-modelling-in-advertising-explained/ 103 | - https://www.ebiquity.com/news-insights/blog/can-econometrics-prove-the-value-of-influencers/ 104 | - https://www.jstor.org/stable/3149922?seq=1 105 | - https://www.jstor.org/stable/3151017?seq=1 106 | - https://www.sequentpartners.com/case-studies-in-holistic-marketing-mix-modeling/ 107 | - https://www.treasuredata.com/resources/a-forbes-cmo-practice-report-for-marketing-effectiveness/ 108 | - https://www.thedrum.com/opinion/2020/03/16/marketing-mix-marketing-effect-modelling 109 | - https://www.marketingevolution.com/knowledge-center/changing-approach-to-marketing-mix-modeling 110 | - https://nathanbrixius.wordpress.com/2013/11/26/marketing-mix-i/ 111 | - https://www.scanmarqed.com/marketing-mix-modeling 112 | - https://www.cmswire.com/cms/customer-experience/forrester-wave-highlights-marketing-mix-modeling-vendors-021092.php 113 | - https://www.linkedin.com/pulse/death-marketing-mix-modeling-we-know-michael-wolfe/ 114 | - https://www.sellforte.com/marketing-mix-modeling 115 | - https://towardsdatascience.com/market-mix-modeling-101-part-2-95c5e147c8a3 116 | - https://www.arymalabs.com/Blogs.aspx 117 | - https://www.nielsen.com/uk/en/solutions/capabilities/marketing-mix-modeling/ 118 | - https://web.archive.org/web/20190327101933/https://www.tvba.co.uk/article/route-to-market-finding-the-back-door-to-tough-markets 119 | - https://fospha.com/case-studies/joined-up-strategy-driven-by-marketing-mix-modelling 120 | - https://www.businesswire.com/news/home/20200602005541/en/How-Marketing-Mix-Modeling-Helped-a-Food-and-Beverage-Company-to-Gain-Visibility-into-Consumer-Buying-Behavior-A-Case-Study-by-Quantzig 121 | - https://www.businesswire.com/news/home/20200807005016/en/Success-Story---Marketing-mix-modeling-helps-increase-MROI-for-a-US-based-telecommunication-service-provider-Quantzig 122 | - https://www.thinkwithgoogle.com/marketing-strategies/data-and-measurement/marketing-mix-modeling-tutorial/ 123 | - https://rstudio-pubs-static.s3.amazonaws.com/294627_5f7e9a449b6c442e806a4743f1b4f8a7.html 124 | - http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41854.pdf 125 | - https://www.listendata.com/2019/09/marketing-mix-modeling.html 126 | - https://hbr.org/2013/03/advertising-analytics-20 127 | - https://aaltodoc.aalto.fi/handle/123456789/26743 128 | - https://towardsdatascience.com/how-to-revise-your-marketing-mix-model-to-capture-covid-19-impact-863b65982408 129 | - https://www.youtube.com/playlist?list=PLwJRxp3blEvZyQBTTOMFRP_TDaSdly3gU 130 | - https://c3metrics.com/whats-the-difference-between-mmm-mta/ 131 | - https://medium.com/@gustavobramao/mmm-vs-gbhmmm-ebe537ccf15b 132 | - https://blog.hurree.co/blog/marketing-mix-modeling 133 | - https://towardsdatascience.com/explaining-feature-importance-by-example-of-a-random-forest-d9166011959e 134 | - https://www.marketingattribution.com/marketing-mix-models/ 135 | - https://bottomlineanalytics.com/our-thinking/ 136 | - https://economagician.co/2014/05/30/an-example-of-a-bad-market-mix-model/ 137 | - https://towardsdatascience.com/machine-learning-vs-econometrics-in-the-real-world-4058095b1013 138 | - https://www.cmswire.com/cms/customer-experience/forrester-wave-highlights-marketing-mix-modeling-vendors-021092.php 139 | - https://blog.hurree.co/blog/marketing-mix-modeling 140 | - https://services.google.com/fh/files/misc/article_marketing_mix_modeling_final.pdf 141 | - https://marketingeffectiveness.nielsen.com/our-solutions/marketing-mix-modeling/ 142 | - https://www.latentview.com/marketing-mix-modeling/ 143 | - https://www.investopedia.com/terms/e/econometrics.asp 144 | - https://www.ashokcharan.com/Marketing-Analytics/~mx-mmm-what-if-analysis.php 145 | - https://www.nielsen.com/sa/en/insights/article/2019/5-important-questions-to-ask-your-marketing-mix-vendor/ 146 | - https://www.marketingiq.co.uk/tv-media-planning-terms-calculating-media-reach-and-frequency-using-tvrs/ 147 | - https://towardsdatascience.com/predicting-sales-611cb5a252de 148 | - https://www.cpgdatainsights.com/answer-business-questions/volume-decomp-part-1/ 149 | - https://www.thinkbox.tv/research/demand-generator/ 150 | - https://www.quora.com/What-kind-of-econometrics-can-I-do-with-Python 151 | - https://www.jstor.org/stable/40206298 152 | - http://www.upfie.net/ 153 | - https://medium.com/@vince.shields913/econometrics-with-python-pt-1-646b6eeff7da 154 | - https://www.kdnuggets.com/2018/12/machine-learning-explainability-interpretability-ai.html 155 | - https://www.kaggle.com/learn/machine-learning-explainability 156 | - https://towardsdatascience.com/an-overview-of-model-explainability-in-modern-machine-learning-fc0f22c8c29a 157 | - https://stats.stackexchange.com/questions/150975/linear-regression-with-diminishing-returns 158 | - https://stats.stackexchange.com/questions/27185/whether-to-include-x-and-x2-in-regression-model-examining-diminishing-retur 159 | - https://stats.stackexchange.com/questions/80559/why-is-functional-form-so-important-when-specifying-models/80563#80563 160 | - https://www.dummies.com/education/economics/econometrics/the-linear-log-model-in-econometrics/ 161 | - https://stats.stackexchange.com/questions/356117/how-to-fit-exponential-y-a1-expbx-function-to-a-given-data-set-especially 162 | - http://www.real-statistics.com/regression/exponential-regression-models/exponential-regression/ 163 | - https://stats.idre.ucla.edu/stata/dae/multivariate-regression-analysis/ 164 | - https://www.kaggle.com/fayejavad/marketing-linear-multiple-regression 165 | - https://towardsdatascience.com/perform-regression-diagnostics-and-tackle-uncertainties-of-linear-models-1372a03b1f56 166 | - https://towardsdatascience.com/fisher-test-for-regression-analysis-1e1687867259 167 | - https://www.coursera.org/lecture/uva-darden-market-analytics/marketing-mix-models-XCCSf 168 | - https://www.kaggle.com/rishph7/market-mix-model 169 | - https://github.com/palitr/Budget-Optimization-in-Ecommerce-using-Market-Mix-Modelling 170 | - https://towardsdatascience.com/market-mix-modelling-application-with-mlr-60b18bd3dc81 171 | - https://sites.google.com/site/2015pcsu/data-science/marketing-mix-modeling-to-find-the-best-advertising-route 172 | - http://www.andrewwalterowens.com/post/111019666926/marketing-mix-modeling-using-statsmodels-part-1 173 | - http://arxiv.org/ftp/arxiv/papers/1403/1403.7971.pdf 174 | - https://medium.com/swlh/marketing-mix-modelling-step-by-step-part-1-702c793d91fd 175 | - https://analyticsartist.wordpress.com/2014/08/17/marketing-mix-modeling-explained-with-r/ 176 | - http://datafeedtoolbox.com/marketing-mix-model-for-all-using-r-for-mmm/ 177 | - https://www.datasciencecentral.com/profiles/blogs/market-mix-modeling-mmm 178 | - https://rpubs.com/nihil0/mmm01 179 | - https://www.rdocumentation.org/packages/bayesm/versions/3.0-2/topics/cheese 180 | - https://www.quora.com/Marketing-mix-modelling-What-are-the-best-and-most-practical-statistical-techniques-to-use-for-MMM 181 | - https://medium.com/@yasimk_87248/marketing-mix-modeling-for-marketers-de406a988757 182 | - https://web.archive.org/web/20200525194505/https://online-behavior.com/analytics/statistical-significance 183 | - https://proofanalytics.ai/how-it-works-automated-mmm/ 184 | - https://www.measured.com/products 185 | - https://mma.com/solutions/marketing-mix-modeling/ 186 | - https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html 187 | - https://www.datacamp.com/community/tutorials/tutorial-ridge-lasso-elastic-net 188 | https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html 189 | - https://towardsdatascience.com/marketing-channel-attribution-with-markov-chains-in-python-part-2-the-complete-walkthrough-733c65b23323 190 | - https://medium.com/@mortenhegewald/marketing-channel-attribution-using-markov-chains-101-in-python-78fb181ebf1e 191 | - https://stats.stackexchange.com/questions/74622/converting-standardized-betas-back-to-original-variables 192 | - https://stattrek.com/multiple-regression/interaction.aspx 193 | - https://stackoverflow.com/questions/31029340/how-to-adjust-scaled-scikit-learn-logicistic-regression-coeffs-to-score-a-non-sc 194 | - https://www.youtube.com/watch?v=68ABAU_V8qI 195 | - https://medium.com/towards-artificial-intelligence/understanding-non-linear-regression-fbef9a396b71 196 | - https://www.advancedwebranking.com/ctrstudy/ 197 | - https://realpython.com/simpy-simulating-with-python/ 198 | - https://www.searchviu.com/en/machine-learning-seo-predicting-rankings/ 199 | - https://www.forbes.com/sites/gilpress/2016/03/23/data-preparation-most-time-consuming-least-enjoyable-data-science-task-survey-says/?sh=3b7582c86f63 200 | - https://arxiv.org/pdf/2106.03322.pdf 201 | - https://medium.com/@Marko_Mi/advertising-adstock-theory-85cc9e26ea0e 202 | - http://www.17bigdata.com/python-stan-implementation-of-multiplicative-marketing-mix-model/ 203 | - https://analyticsartist.wordpress.com/2014/01/31/adstock-rate-deriving-with-analytical-methods/ 204 | - https://storage.googleapis.com/pub-tools-public-publication-data/pdf/b20467a5c27b86c08cceed56fc72ceadb875184a.pdf 205 | - https://github.com/psu4/Marketing-Mix-Modeling-/blob/master/Marketing-Mix-Modeling.py 206 | - https://medium.com/swlh/marketing-response-curves-the-science-of-diminishing-returns-and-saturation-f8cf226e8dc5 207 | - https://www.youtube.com/watch?v=4N0FFzGYfTs 208 | - https://towardsdatascience.com/types-of-interaction-effects-in-market-mix-modeling-mmm-95247f3de36e 209 | - https://twitter.com/RichardFergie/status/1461653687697387524?s=20 210 | - read this: http://brucehardie.com/ 211 | - https://betanalpha.github.io/assets/case_studies/falling.html 212 | - https://betanalpha.github.io/assets/case_studies/pystan_workflow.html 213 | - https://twiecki.io/blog/2019/01/14/supply_chain/ 214 | - Remake this Statistical Significance code on Saxifrage: https://colab.research.google.com/drive/1DogSh8asM2-13Lv0SC69xRmbQ_DBM5rF?authuser=3 215 | - Lambda School Inferential Statistics Lecture https://colab.research.google.com/drive/1HWgUTIGiuLqDd2538b0p-Xu4O8WBahZ5?authuser=1 216 | - Lambda School Inferential Statistics Assignment https://drive.google.com/open?id=1XPYoZesZT0asuZdQjuurKeJgC-Ytl-Xd 217 | - Bayesian spam filter https://colab.research.google.com/drive/1GKIN_RM3r3JC9R-3AE9ZuyMcPjdG6DH4 218 | - https://quotefancy.com/quote/2401115/Pedro-Domingos-Each-of-the-five-tribes-of-machine-learning-has-its-own-master-algorithm-a 219 | - https://github.com/pymc-devs/resources/tree/master/BCM 220 | - https://github.com/pymc-devs/resources/tree/master/BSM 221 | - https://github.com/pymc-devs/resources/tree/master/BDA3 222 | - https://stats.stackexchange.com/questions/500260/pymc3-implementation-of-bayesian-mmm-poor-posterior-inference 223 | - https://www.youtube.com/watch?v=SWMaoBbIp04 224 | - https://www.youtube.com/watch?v=7tSFNhQO3jg 225 | - https://www.youtube.com/watch?v=ZxR3mw-Znzc 226 | - https://www.youtube.com/watch?v=uxGhjXS3ILE 227 | - https://www.youtube.com/watch?v=appLxcMLT9Y 228 | - https://www.youtube.com/watch?v=BrK7X_XlGB8 229 | - https://www.pymc-labs.io/blog-posts/bayesian-media-mix-modeling-for-marketing-optimization/ 230 | - https://stats.stackexchange.com/questions/500260/pymc3-implementation-of-bayesian-mmm-poor-posterior-inference 231 | - https://www.pymc-labs.io/blog-posts/reducing-customer-acquisition-costs-how-we-helped-optimizing-hellofreshs-marketing-budget/ 232 | - http://www.joshuakim.io/marketing-mix-modelling-with-bayesian-regression/ 233 | - https://www.coursera.org/learn/bayesian 234 | - https://statswithr.github.io/book/introduction-to-bayesian-regression.html#sec:simple-linear 235 | - https://twiecki.io/blog/2017/02/08/bayesian-hierchical-non-centered/ 236 | - https://towardsdatascience.com/bayesian-hierarchical-modeling-in-pymc3-d113c97f5149 237 | - https://docs.pymc.io/en/stable/pymc-examples/examples/case_studies/multilevel_modeling.html 238 | - https://twitter.com/tvladeck/status/1462447221304143894?t=Tm9-OmYEQ-QYivzmed7nJQ&s=03 239 | - https://twitter.com/Mike_Kaminsky/status/1462439240487350276?t=bf_asUh8eXUwUMzTpYNUEQ&s=03 240 | - https://twitter.com/RichardFergie/status/1462425574161453056?t=GT1WmTBdNJH_mJq-VaYAZw&s=03 241 | - https://tvladeck.substack.com/p/did-you-control-for-_?s=03 242 | - https://vincentk1991.github.io/Bayesian-regression-tutorial/ 243 | - https://stackoverflow.com/questions/39677240/multivariate-linear-regression-in-pymc3 244 | - https://docs.pymc.io/en/stable/pymc-examples/examples/generalized_linear_models/GLM-linear.html 245 | - https://www.chrisstucchio.com/blog/2017/bayesian_linear_regression.html 246 | - https://twitter.com/emollick/status/1462265543495491591?t=-orv6yDbuswMJA7bMYudOQ&s=03 247 | - https://stats.stackexchange.com/questions/140713/making-predictions-with-log-log-regression-model 248 | - https://davegiles.blogspot.com/2014/12/s.html 249 | - https://stats.stackexchange.com/questions/171386/what-are-bayesian-p-values 250 | - https://royalsocietypublishing.org/doi/10.1098/rsbl.2019.0174 251 | - https://juanitorduz.github.io/fb_prophet/ 252 | - https://stats.stackexchange.com/questions/238297/how-to-determine-appropriate-lagged-features-for-learning-systems-with-states 253 | - https://proceedings.neurips.cc/paper/2008/file/7380ad8a673226ae47fce7bff88e9c33-Paper.pdf 254 | - https://www.youtube.com/watch?v=DJ0c7Bm5Djk&t=16809s 255 | - https://www.eigenfoo.xyz/_posts/2018-06-19-bayesian-modelling-cookbook/ 256 | - https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7 257 | - https://www.semanticscholar.org/paper/Probabilistic-programming-in-Python-using-PyMC3-Salvatier-Wiecki/8085b60ce1771647f11ccc4728397275b502f359?p2df 258 | - https://www.quantstart.com/articles/Bayesian-Linear-Regression-Models-with-PyMC3/ 259 | - https://twiecki.io/blog/2013/08/12/bayesian-glms-1/ 260 | - https://www.datasciencecentral.com/k-means-a-step-towards-marketing-mix-modeling/ 261 | - https://vincentk1991.github.io/adstock-pyro/ 262 | - https://discourse.pymc.io/t/geometric-adstock-paramter-estimation-in-pymc3-and-theano-using-theano-scan/1864 263 | - http://www.17bigdata.com/marketing-mix-modelling-mmm-a-potential-solution/ 264 | - https://www.sciencedirect.com/science/article/abs/pii/S016781161500066X 265 | - https://juanitorduz.github.io/pymc_mmm/ 266 | - https://towardsdatascience.com/python-stan-implementation-of-multiplicative-marketing-mix-model-with-deep-dive-into-adstock-a7320865b334 267 | - https://blog.asana.com/2022/01/marketing-measurement-capabilities/#close 268 | --------------------------------------------------------------------------------