├── src
└── hommmer
│ ├── __init__.py
│ ├── features
│ ├── interaction.py
│ ├── bfe.py
│ ├── bif.py
│ ├── ffe.py
│ ├── power_saturation.py
│ ├── lag.py
│ ├── geometric_adstock.py
│ ├── hill_saturation.py
│ ├── scaled_saturation.py
│ ├── normalize.py
│ ├── denormalize.py
│ ├── s_curve_saturation.py
│ ├── loss_function.py
│ ├── optimizer.py
│ ├── rfe.py
│ ├── __init__.py
│ ├── vif.py
│ ├── seasonal_decomp.py
│ ├── prophet_seasonality.py
│ ├── delayed_adstock.py
│ ├── weibull_adstock.py
│ └── weibull_adstock_delayed.py
│ ├── cleaners
│ ├── count_na.py
│ ├── describe_data.py
│ ├── drop_cols.py
│ ├── get_cols_containing.py
│ ├── cat_to_dummies.py
│ ├── rename_column.py
│ ├── count_na_cols.py
│ ├── count_dup_cols.py
│ ├── guess_numerical_variables.py
│ ├── add_X_labels.py
│ ├── del_X_labels.py
│ ├── make_column_index.py
│ ├── end_of_month.py
│ ├── payday_dummies.py
│ ├── get_all_X_labels.py
│ ├── drop_n_rows.py
│ ├── start_of_month.py
│ ├── str_to_dummy.py
│ ├── guess_date_column.py
│ ├── make_date_index.py
│ ├── convert_date.py
│ ├── guess_y_column.py
│ ├── standard_scaler.py
│ ├── modify_labels.py
│ ├── guess_categorical_variables.py
│ ├── train_test_split.py
│ ├── days_in_month.py
│ ├── guess_media_columns.py
│ ├── date_dummies.py
│ ├── make_geodate_index.py
│ ├── remove_outliers.py
│ ├── date_range_dummies.py
│ ├── group_weekly.py
│ ├── group_monthly.py
│ ├── transpose_data.py
│ ├── clean_numeric.py
│ ├── merge_data.py
│ ├── categorize_campaigns.py
│ ├── shift_dummies.py
│ ├── interpolate_dates.py
│ ├── week_commencing.py
│ ├── holiday_dummies.py
│ ├── unstack_data.py
│ ├── interpolate_weekly.py
│ ├── interpolate_monthly.py
│ └── __init__.py
│ ├── helpers
│ ├── its_working.py
│ ├── __init__.py
│ ├── logging.py
│ ├── exp_ex_zeros.py
│ ├── log_ex_zeros.py
│ └── check_metric.py
│ ├── metrics
│ ├── spend_share.py
│ ├── effect_share.py
│ ├── max_error.py
│ ├── dummy_median.py
│ ├── dummy_constant.py
│ ├── dummy_mean.py
│ ├── mse.py
│ ├── mae.py
│ ├── mdape.py
│ ├── degrees_of_freedom.py
│ ├── rmse.py
│ ├── smape.py
│ ├── log_accuracy_ratio.py
│ ├── mfe.py
│ ├── rsquared.py
│ ├── durbin_watson.py
│ ├── vars_obs.py
│ ├── condition_number.py
│ ├── breuschpagan.py
│ ├── mda.py
│ ├── nrmse.py
│ ├── harvey_collier.py
│ ├── ljungbox.py
│ ├── rainbox.py
│ ├── mape.py
│ ├── jarque_bera.py
│ ├── mase.py
│ ├── __init__.py
│ └── decomp_rssd.py
│ ├── charts
│ ├── lineplot.py
│ ├── pairplot.py
│ ├── __init__.py
│ ├── y_corr.py
│ ├── heatmap.py
│ ├── partial_dependence.py
│ ├── accuracy.py
│ └── response.py
│ ├── connectors
│ ├── __init__.py
│ ├── covid_mobility.py
│ ├── nasa_weather.py
│ ├── search_trends.py
│ └── colab_helpers.py
│ ├── models
│ ├── __init__.py
│ ├── Ridge.py
│ ├── Linear.py
│ ├── LogLinear.py
│ ├── LogLog.py
│ ├── DeepLearning.py
│ └── Model.py
│ ├── datasets
│ ├── __init__.py
│ ├── add_noise.py
│ ├── load_duff.py
│ ├── load_holidays.py
│ ├── scale_feature.py
│ ├── make_dates.py
│ ├── make_data.py
│ └── duff.csv
│ └── main.py
├── website
├── public
│ └── favicon.ico
├── postcss.config.js
├── pages
│ ├── _app.js
│ └── index.js
├── package.json
├── .gitignore
└── tailwind.config.js
├── setup.py
├── LICENSE
├── TODO.md
├── .gitignore
├── README.md
├── CONTRIBUTING.md
└── SOURCES.md
/src/hommmer/__init__.py:
--------------------------------------------------------------------------------
1 | from .main import build
2 | from .datasets import load_duff
--------------------------------------------------------------------------------
/src/hommmer/features/interaction.py:
--------------------------------------------------------------------------------
1 | def interaction(x1, x2):
2 | return x1 * x2
--------------------------------------------------------------------------------
/src/hommmer/cleaners/count_na.py:
--------------------------------------------------------------------------------
1 | def count_na(df):
2 | return df.isna().sum().sum()
--------------------------------------------------------------------------------
/src/hommmer/helpers/its_working.py:
--------------------------------------------------------------------------------
1 | def its_working():
2 | print("It's working! 4")
--------------------------------------------------------------------------------
/src/hommmer/cleaners/describe_data.py:
--------------------------------------------------------------------------------
1 | def describe_data(df):
2 | return df.describe().T
--------------------------------------------------------------------------------
/src/hommmer/features/bfe.py:
--------------------------------------------------------------------------------
1 | def bfe(y, X):
2 | # backward feature elimination
3 | pass
--------------------------------------------------------------------------------
/src/hommmer/features/bif.py:
--------------------------------------------------------------------------------
1 | def bif():
2 | # find best incremental feature
3 | pass
--------------------------------------------------------------------------------
/src/hommmer/features/ffe.py:
--------------------------------------------------------------------------------
1 | def ffe(y, X):
2 | # forward feature enhancement
3 | pass
--------------------------------------------------------------------------------
/src/hommmer/features/power_saturation.py:
--------------------------------------------------------------------------------
1 | def power_saturation(x, beta):
2 | return x ** beta
--------------------------------------------------------------------------------
/src/hommmer/features/lag.py:
--------------------------------------------------------------------------------
1 | def lag(series, periods):
2 | return series.shift(periods).fillna(0)
--------------------------------------------------------------------------------
/src/hommmer/cleaners/drop_cols.py:
--------------------------------------------------------------------------------
1 | def drop_cols(df, columns):
2 | df.drop(columns, axis=1, inplace=True)
--------------------------------------------------------------------------------
/src/hommmer/metrics/spend_share.py:
--------------------------------------------------------------------------------
1 | def spend_share(X_df):
2 | return (X_df.sum()/X_df.sum().sum()).values
--------------------------------------------------------------------------------
/website/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hammer-mt/hommmer/HEAD/website/public/favicon.ico
--------------------------------------------------------------------------------
/src/hommmer/cleaners/get_cols_containing.py:
--------------------------------------------------------------------------------
1 | def get_cols_containing(columns, containing):
2 | return [x for x in columns if containing in x]
--------------------------------------------------------------------------------
/src/hommmer/metrics/effect_share.py:
--------------------------------------------------------------------------------
1 | def effect_share(contribution_df):
2 | return (contribution_df.sum()/contribution_df.sum().sum()).values
--------------------------------------------------------------------------------
/src/hommmer/charts/lineplot.py:
--------------------------------------------------------------------------------
1 | import seaborn as sns
2 |
3 | def lineplot(df, x_label, y_label):
4 | sns.lineplot(data=df, x=x_label,y=y_label)
--------------------------------------------------------------------------------
/src/hommmer/cleaners/cat_to_dummies.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def cat_to_dummies(df, columns):
4 | return pd.get_dummies(df, columns=columns)
--------------------------------------------------------------------------------
/src/hommmer/cleaners/rename_column.py:
--------------------------------------------------------------------------------
1 | def rename_column(df, column_label, new_name):
2 | df.rename(columns={column_label: new_name}, inplace=True)
--------------------------------------------------------------------------------
/src/hommmer/metrics/max_error.py:
--------------------------------------------------------------------------------
1 | from sklearn.metrics import max_error
2 |
3 | def max_error(y_actual, y_pred):
4 | return max_error(y_actual, y_pred)
--------------------------------------------------------------------------------
/src/hommmer/charts/pairplot.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import seaborn as sns
3 |
4 | def pairplot(df, y_label):
5 | sns.pairplot(df)
6 | plt.show()
--------------------------------------------------------------------------------
/src/hommmer/connectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .covid_mobility import covid_mobility
2 | from .nasa_weather import nasa_weather
3 | from .search_trends import search_trends
--------------------------------------------------------------------------------
/src/hommmer/features/geometric_adstock.py:
--------------------------------------------------------------------------------
1 | import statsmodels.tsa.api as tsa
2 |
3 | def geometric_adstock(x, theta):
4 | return tsa.filters.recursive_filter(x, theta)
--------------------------------------------------------------------------------
/src/hommmer/cleaners/count_na_cols.py:
--------------------------------------------------------------------------------
1 | def count_na_cols(df):
2 | missing = df.isna().sum() * 100 / len(df)
3 | return missing[missing > 0].sort_values(ascending=False)
--------------------------------------------------------------------------------
/src/hommmer/features/hill_saturation.py:
--------------------------------------------------------------------------------
1 | # https://github.com/sibylhe/mmm_stan#13-diminishing-return
2 | def hill_saturation(x, ec, slope):
3 | return 1 / (1 + (x / ec)**(-slope))
--------------------------------------------------------------------------------
/src/hommmer/metrics/dummy_median.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def dummy_median(y_actual):
4 | # dummy median predictor
5 | return np.full(y_actual.shape, np.median(y_actual))
--------------------------------------------------------------------------------
/src/hommmer/cleaners/count_dup_cols.py:
--------------------------------------------------------------------------------
1 | def count_dup_cols(df):
2 | duplicate = df.duplicated().sum() * 100 / len(df)
3 | return duplicate[duplicate > 0].sort_values(ascending=False)
--------------------------------------------------------------------------------
/src/hommmer/cleaners/guess_numerical_variables.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def guess_numerical_variables(df):
4 | return list(df.select_dtypes(include=[np.number]).columns.values)
--------------------------------------------------------------------------------
/src/hommmer/metrics/dummy_constant.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def dummy_constant(y_actual, constant):
4 | # dummy constant predictor
5 | return np.full(y_actual.shape, constant)
--------------------------------------------------------------------------------
/src/hommmer/metrics/dummy_mean.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def dummy_mean(y_actual):
4 | # dummy mean predictor
5 | return np.full(y_actual.shape, np.mean(y_actual))
6 |
--------------------------------------------------------------------------------
/src/hommmer/metrics/mse.py:
--------------------------------------------------------------------------------
1 | from sklearn import metrics
2 |
3 | def mse(y_actual, y_pred):
4 | # mean square error
5 | return round(metrics.mean_squared_error(y_actual, y_pred), 3)
--------------------------------------------------------------------------------
/src/hommmer/metrics/mae.py:
--------------------------------------------------------------------------------
1 | from sklearn import metrics
2 |
3 | def mae(y_actual, y_pred):
4 | # mean absolute error
5 | return round(metrics.mean_absolute_error(y_actual, y_pred),3)
--------------------------------------------------------------------------------
/src/hommmer/charts/__init__.py:
--------------------------------------------------------------------------------
1 | from .accuracy import accuracy
2 | from .heatmap import heatmap
3 | from .pairplot import pairplot
4 | from .response import response
5 | from .y_corr import y_corr
--------------------------------------------------------------------------------
/src/hommmer/cleaners/add_X_labels.py:
--------------------------------------------------------------------------------
1 | def add_X_labels(X_labels, add_cols):
2 | for x in add_cols:
3 | if x not in X_labels:
4 | X_labels.append(x)
5 |
6 | return X_labels
--------------------------------------------------------------------------------
/src/hommmer/cleaners/del_X_labels.py:
--------------------------------------------------------------------------------
1 | def del_X_labels(X_labels, del_cols):
2 | for x in del_cols:
3 | if x in X_labels:
4 | X_labels.remove(x)
5 |
6 | return X_labels
--------------------------------------------------------------------------------
/src/hommmer/cleaners/make_column_index.py:
--------------------------------------------------------------------------------
1 | def make_column_index(df, column_label):
2 | df.index = df[column_label]
3 | df.drop(column_label, axis=1, inplace=True)
4 | df.index.name = None
--------------------------------------------------------------------------------
/src/hommmer/metrics/mdape.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def mdape(y_actual, y_pred):
4 | # median absolute percentage error
5 | return np.median(np.abs((y_actual - y_pred) / y_actual)) * 100
--------------------------------------------------------------------------------
/src/hommmer/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .Linear import Linear
2 | from .LogLinear import LogLinear
3 | from .LogLog import LogLog
4 | from .Ridge import Ridge
5 | from .DeepLearning import DeepLearning
--------------------------------------------------------------------------------
/src/hommmer/features/scaled_saturation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def scaled_saturation(x, alpha=None):
4 | if alpha is None:
5 | alpha = x.max()
6 | return alpha * (1 - np.exp(x/-alpha))
--------------------------------------------------------------------------------
/src/hommmer/metrics/degrees_of_freedom.py:
--------------------------------------------------------------------------------
1 | def degrees_of_freedom(num_obs, num_params):
2 | # https://machinelearningmastery.com/degrees-of-freedom-in-machine-learning/
3 | return num_obs - num_params
--------------------------------------------------------------------------------
/src/hommmer/cleaners/end_of_month.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def end_of_month(df, date_col):
4 | end_of_month = pd.to_datetime(df[date_col]) + pd.offsets.MonthEnd(1)
5 | df['end_of_month'] = end_of_month
6 |
--------------------------------------------------------------------------------
/src/hommmer/cleaners/payday_dummies.py:
--------------------------------------------------------------------------------
1 | def add_payday_dummies(df, date_label):
2 | df['payday'] = df[date_label].apply(lambda x:1 if x.strftime('%d') in ('14','15','16','30','31','1','2') else 0)
3 |
4 | return df
--------------------------------------------------------------------------------
/src/hommmer/charts/y_corr.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 |
3 | def y_corr(df, y_label):
4 | plt.figure(figsize=(15,6))
5 | bars = df.corr()[y_label].sort_values(ascending=False).plot(kind='bar')
6 | plt.show()
--------------------------------------------------------------------------------
/src/hommmer/cleaners/get_all_X_labels.py:
--------------------------------------------------------------------------------
1 | def get_all_X_labels(columns, y_label, date_label):
2 | X_labels = columns.copy()
3 | X_labels.remove(y_label)
4 | X_labels.remove(date_label)
5 |
6 | return X_labels
--------------------------------------------------------------------------------
/src/hommmer/metrics/rmse.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn import metrics
3 |
4 | def rmse(y_actual, y_pred):
5 | # root mean square error
6 | return round(np.sqrt(metrics.mean_squared_error(y_actual, y_pred)), 3)
--------------------------------------------------------------------------------
/src/hommmer/charts/heatmap.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import seaborn as sns
3 |
4 | def heatmap(df):
5 | plt.figure(figsize=(15,6))
6 | heatmap = sns.heatmap(df.corr(), annot=True, cmap="Blues")
7 | plt.show()
--------------------------------------------------------------------------------
/src/hommmer/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .make_data import make_data
2 | from .scale_feature import scale_feature
3 | from .make_dates import make_dates
4 | from .load_duff import load_duff
5 | from .load_holidays import load_holidays
--------------------------------------------------------------------------------
/src/hommmer/features/normalize.py:
--------------------------------------------------------------------------------
1 | # standardize variable
2 | def normalize(x, method="mean"):
3 | if method == "minmax":
4 | return (x-x.min())/(x.max()-x.min())
5 | else:
6 | return (x - x.mean())/x.std()
7 |
--------------------------------------------------------------------------------
/src/hommmer/datasets/add_noise.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def add_noise(series):
4 | series = np.array(series)
5 | series += np.random.normal(scale=0.1, size=series.shape)
6 | series = np.squeeze(series)
7 | return series
--------------------------------------------------------------------------------
/src/hommmer/helpers/__init__.py:
--------------------------------------------------------------------------------
1 | from .logging import init_logging, log
2 | from .its_working import its_working
3 | from .check_metric import check_metric
4 | from .exp_ex_zeros import exp_ex_zeros
5 | from .log_ex_zeros import log_ex_zeros
--------------------------------------------------------------------------------
/src/hommmer/cleaners/drop_n_rows.py:
--------------------------------------------------------------------------------
1 | def drop_n_rows(df, n=1, top=False):
2 | if top:
3 | df.drop(df.head(n).index, inplace=True) # drop first n rows
4 | else:
5 | df.drop(df.tail(n).index, inplace=True) # drop last n rows
--------------------------------------------------------------------------------
/src/hommmer/metrics/smape.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def smape(y_actual, y_pred):
4 | # symmetric mean absolute percentage error
5 | return 100/len(y_actual) * np.sum(2 * np.abs(y_pred - y_actual) / (np.abs(y_actual) + np.abs(y_pred)))
--------------------------------------------------------------------------------
/src/hommmer/cleaners/start_of_month.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def start_of_month(df, date_col):
4 | start_of_month = (df[date_col].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))
5 | df['start_of_month'] = start_of_month
--------------------------------------------------------------------------------
/src/hommmer/metrics/log_accuracy_ratio.py:
--------------------------------------------------------------------------------
1 | # the logarithm of the accuracy ratio (the ratio of the forecasted to the actual value)
2 | # https://agupubs.onlinelibrary.wiley.com/doi/full/10.1002/2017SW001669
3 |
4 | def log_accuracy_ratio():
5 | pass
--------------------------------------------------------------------------------
/src/hommmer/metrics/mfe.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | # https://datasciencestunt.com/mean-directional-accuracy-of-time-series-forecast/
3 |
4 | def mfe(y_actual, y_pred):
5 | # mean forecast error or forecast bias
6 | return np.mean(y_actual - y_pred)
--------------------------------------------------------------------------------
/src/hommmer/metrics/rsquared.py:
--------------------------------------------------------------------------------
1 | from sklearn import metrics
2 |
3 | def rsquared(y_actual, y_pred):
4 | # r squared
5 | value = round(metrics.r2_score(y_actual, y_pred), 3)
6 | passed = "✔️" if value > 0.8 else "❌"
7 | return value, passed
--------------------------------------------------------------------------------
/src/hommmer/cleaners/str_to_dummy.py:
--------------------------------------------------------------------------------
1 | d = {
2 | "y": 1, "yes": 1, "t": 1, "true": 1, "on": 1, "1": 1,
3 | "n": 0, "no": 0, "f": 0, "false": 0, "off": 0, "0": 0
4 | }
5 |
6 | def str_to_dummy(series):
7 | return series.lower().map(d).astype(int)
--------------------------------------------------------------------------------
/website/postcss.config.js:
--------------------------------------------------------------------------------
1 | // If you want to use other PostCSS plugins, see the following:
2 | // https://tailwindcss.com/docs/using-with-preprocessors
3 | module.exports = {
4 | plugins: {
5 | tailwindcss: {},
6 | autoprefixer: {},
7 | },
8 | }
9 |
--------------------------------------------------------------------------------
/src/hommmer/cleaners/guess_date_column.py:
--------------------------------------------------------------------------------
1 | def guess_date_column(columns):
2 | columns = [x.lower() for x in columns]
3 | guesses = ['date', 'day', 'week', 'month']
4 | for x in guesses:
5 | if x in columns:
6 | return x
7 | return None
--------------------------------------------------------------------------------
/src/hommmer/cleaners/make_date_index.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def make_date_index(df, date_label):
4 | df[date_label] = pd.to_datetime(df[date_label])
5 | df.index = df[date_label]
6 | df.drop(date_label, axis=1, inplace=True)
7 | df.index.name = None
--------------------------------------------------------------------------------
/src/hommmer/charts/partial_dependence.py:
--------------------------------------------------------------------------------
1 | from sklearn.inspection import plot_partial_dependence
2 | # https://scikit-learn.org/stable/modules/partial_dependence.html
3 |
4 | def partial_dependence(model, X_test, features):
5 | plot_partial_dependence(model, X_test, features)
--------------------------------------------------------------------------------
/src/hommmer/metrics/durbin_watson.py:
--------------------------------------------------------------------------------
1 | import statsmodels.stats.api as sms
2 |
3 | def durbin_watson(residuals):
4 | # tests for autocorrelation
5 | # durbin watson should be between 1.5 and 2.5
6 | test = sms.durbin_watson(residuals)
7 | return ('Durbin Watson', test)
--------------------------------------------------------------------------------
/src/hommmer/cleaners/convert_date.py:
--------------------------------------------------------------------------------
1 | import datetime as dt
2 |
3 | def convert_date(date, from_format="%m/%d/%Y", to_format="%Y-%m-%d"):
4 | date_str = str(date)
5 | date_obj = dt.datetime.strptime(date_str, from_format)
6 | return dt.datetime.strftime(date_obj, to_format)
--------------------------------------------------------------------------------
/src/hommmer/cleaners/guess_y_column.py:
--------------------------------------------------------------------------------
1 | def guess_y_column(columns):
2 | guesses = ['revenue', 'sales', 'conversions', 'purchases']
3 | columns = [x.lower() for x in columns]
4 | for x in guesses:
5 | if x in columns:
6 | return x
7 | return None
8 |
--------------------------------------------------------------------------------
/src/hommmer/metrics/vars_obs.py:
--------------------------------------------------------------------------------
1 | def vars_obs(df):
2 | # 7 - 10 observations per variable
3 | # https://storage.googleapis.com/pub-tools-public-publication-data/pdf/2d0395bc7d4d13ddedef54d744ba7748e8ba8dd1.pdf
4 | return df.shape[1] / df.shape[0] >= 7, df.shape[1] / df.shape[0]
--------------------------------------------------------------------------------
/src/hommmer/cleaners/standard_scaler.py:
--------------------------------------------------------------------------------
1 | from sklearn.preprocessing import StandardScaler
2 |
3 | def standard_scaler(X_train, X_test):
4 | scaler = StandardScaler()
5 | X_train = scaler.fit_transform(X_train)
6 | X_test = scaler.transform(X_test)
7 | return X_train, X_test
--------------------------------------------------------------------------------
/src/hommmer/metrics/condition_number.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | def condition_number(X):
3 | # tests for multicollinearity
4 | # condition number should be less than 30
5 | value = round(np.linalg.cond(X))
6 | passed = "✔️" if value < 30 else "❌"
7 | return value, passed
--------------------------------------------------------------------------------
/src/hommmer/cleaners/modify_labels.py:
--------------------------------------------------------------------------------
1 | def modify_labels(text, labels, prefix=False, sep=" | "):
2 | modified_labels = []
3 | for x in labels:
4 | if prefix:
5 | modified_labels.append(f"{text}{sep}{x}")
6 | else:
7 | modified_labels.append(f"{x}{sep}{text}")
--------------------------------------------------------------------------------
/src/hommmer/features/denormalize.py:
--------------------------------------------------------------------------------
1 | # https://stackoverflow.com/questions/51471672/reverse-z-score-pandas-dataframe
2 | def denormalize(x_trans, x, method="mean"):
3 | if method == "minmax":
4 | return (x.max()-x.min())*x_trans+x.min()
5 | else:
6 | return x_trans*x.std()+x.mean()
--------------------------------------------------------------------------------
/src/hommmer/features/s_curve_saturation.py:
--------------------------------------------------------------------------------
1 | # https://facebookexperimental.github.io/Robyn/docs/variable-transformations/
2 | def s_curve_saturation(x, alpha, gamma):
3 | """
4 | x = array
5 | alpha = shape
6 | gamma = inflection
7 | """
8 | return x**alpha / (x ** alpha + gamma ** alpha)
--------------------------------------------------------------------------------
/src/hommmer/metrics/breuschpagan.py:
--------------------------------------------------------------------------------
1 | import statsmodels.stats.api as sms
2 |
3 | def breuschpagan(residuals, exog):
4 | # tests for heteroskedasticity
5 | # p-value should be less than 0.05
6 | name = ['Lagrange', 'p-value','f-value', 'f p-value']
7 | test = sms.het_breuschpagan(residuals, exog)
--------------------------------------------------------------------------------
/src/hommmer/helpers/logging.py:
--------------------------------------------------------------------------------
1 | # print logs if verbose
2 | def log(string):
3 | print(string) if VERBOSE else False
4 |
5 | # set a global variable for logging
6 | def init_logging(verbose):
7 | global VERBOSE
8 | if verbose:
9 | VERBOSE = True
10 | else:
11 | VERBOSE = False
--------------------------------------------------------------------------------
/src/hommmer/metrics/mda.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | # https://datasciencestunt.com/mean-directional-accuracy-of-time-series-forecast/
3 |
4 | def mda(y_actual, y_pred):
5 | # mean directional accuracy
6 | return np.mean((np.sign(y_actual[1:] - y_actual[:-1]) == np.sign(y_pred[1:] - y_pred[:-1])).astype(int))
--------------------------------------------------------------------------------
/src/hommmer/cleaners/guess_categorical_variables.py:
--------------------------------------------------------------------------------
1 | def guess_categorical_variables(df):
2 | cat_vars = []
3 | for x in df.columns:
4 | values = list(df[x].value_counts().index)
5 | if values in [[0, 1], [1], [True, False], [True]]:
6 | cat_vars.append(x)
7 |
8 | return cat_vars
--------------------------------------------------------------------------------
/src/hommmer/metrics/nrmse.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn import metrics
3 |
4 | def nrmse(y_actual, y_pred):
5 | # normalized root mean square error
6 | value = round(np.sqrt(metrics.mean_squared_error(y_actual, y_pred)) / np.mean(y_actual), 3)
7 | passed = "✔️" if value < 0.15 else "❌"
8 | return value, passed
--------------------------------------------------------------------------------
/src/hommmer/cleaners/train_test_split.py:
--------------------------------------------------------------------------------
1 | from sklearn.model_selection import train_test_split
2 |
3 | def train_test_split(df, y_label, X_labels):
4 | X = df[X_labels]
5 | y = df[y_label]
6 |
7 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
8 | return X_train, X_test, y_train, y_test
--------------------------------------------------------------------------------
/src/hommmer/cleaners/days_in_month.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def days_in_month(df, date_col):
4 | start_of_month = (df[date_col].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))
5 | end_of_month = pd.to_datetime(df[date_col]) + pd.offsets.MonthEnd(1)
6 | df['days_in_month'] = (end_of_month - start_of_month).dt.days + 1
--------------------------------------------------------------------------------
/src/hommmer/datasets/load_duff.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 |
4 | def load_duff(download=True):
5 | path = os.path.join(os.path.dirname(__file__), "duff.csv")
6 | df = pd.read_csv(path)
7 | if download:
8 | df.to_csv('duff.csv', index=None)
9 | print("saving duff.csv")
10 | else:
11 | return df
--------------------------------------------------------------------------------
/src/hommmer/connectors/covid_mobility.py:
--------------------------------------------------------------------------------
1 | def covid_mobility(df, sub_region_1=None):
2 | if sub_region_1 is None:
3 | data = df[df['sub_region_1'].isnull()]
4 | else:
5 | data = df[df['sub_region_1'] == sub_region_1]
6 | data = df[df['sub_region_2'].isnull()]
7 |
8 | data.reset_index(inplace=True)
9 | return data[data.columns[9:]]
--------------------------------------------------------------------------------
/src/hommmer/datasets/load_holidays.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 |
4 | def load_holidays(download=True):
5 | path = os.path.join(os.path.dirname(__file__), "holidays.csv")
6 | df = pd.read_csv(path)
7 | if download:
8 | df.to_csv('holidays.csv', index=None)
9 | print("saving holidays.csv")
10 | else:
11 | return df
--------------------------------------------------------------------------------
/src/hommmer/helpers/exp_ex_zeros.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | def exp_ex_zeros(series):
5 | np_array = np.array(series.values, dtype=np.float)
6 | out = np.zeros_like(np_array)
7 | exponent = np.exp(np_array, where=np_array!=0, out=out)
8 | exp_series = pd.Series(exponent, name=series.name, index=series.index)
9 | return exp_series
--------------------------------------------------------------------------------
/src/hommmer/helpers/log_ex_zeros.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | def log_ex_zeros(series):
5 | np_array = np.array(series.values, dtype=np.float)
6 | out = np.zeros_like(np_array)
7 | logged = np.log(np_array, where=np_array!=0, out=out)
8 | log_series = pd.Series(logged, name=series.name, index=series.index)
9 | return log_series
--------------------------------------------------------------------------------
/src/hommmer/cleaners/guess_media_columns.py:
--------------------------------------------------------------------------------
1 | def guess_media_columns(columns):
2 | guesses = ['cost', 'spend', 'impression', 'spent', 'clicks']
3 | columns = [x.lower() for x in columns]
4 | media_columns = []
5 | for x in guesses:
6 | for y in columns:
7 | if x in y:
8 | media_columns.append(y)
9 |
10 | return media_columns
--------------------------------------------------------------------------------
/src/hommmer/metrics/harvey_collier.py:
--------------------------------------------------------------------------------
1 | import statsmodels.stats.api as sms
2 | from statsmodels.compat import lzip
3 |
4 | # need a way to run without passing results object
5 | def harvey_collier(residuals, results, exog):
6 | # p-value should be less than 0.05
7 | name = ['t value', 'p value']
8 | test = sms.linear_harvey_collier(results)
9 |
10 | return lzip(name, test)
--------------------------------------------------------------------------------
/src/hommmer/cleaners/date_dummies.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def date_dummies(df):
4 | dr = pd.date_range(start=df['date'].min(), end=df['date'].max())
5 | date_df = pd.DataFrame({'date': dr})
6 | for _, row in df.iterrows():
7 | date_df[row[1]] = (date_df['date'] == row[0])
8 |
9 | date_df.iloc[:, 1:] = date_df.iloc[:, 1:].astype(int)
10 | return date_df
--------------------------------------------------------------------------------
/src/hommmer/datasets/scale_feature.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 |
4 | def scale_feature(series, min_value=None, max_value=None):
5 | # if no min or max values supplied
6 | if min_value is None:
7 | min_value = 0
8 | if max_value is None:
9 | max_value = series.max() * 100
10 |
11 | return np.interp(series, (series.min(), series.max()), (min_value, max_value))
--------------------------------------------------------------------------------
/src/hommmer/metrics/ljungbox.py:
--------------------------------------------------------------------------------
1 | import statsmodels.stats.api as sms
2 | from statsmodels.compat import lzip
3 |
4 | def ljungbox(residuals, X_labels):
5 | # tests for autocorrelation
6 | # p-value should be less than 0.05
7 | name = ['Ljung-Box stat', 'p-value']
8 | lags = min(len(X_labels)/2-2, 40)
9 | test = sms.acorr_ljungbox(residuals, lags=[lags])
10 | return lzip(name, test)
--------------------------------------------------------------------------------
/src/hommmer/cleaners/make_geodate_index.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def make_geodate_index(df, date_label, geo_label):
4 | key_label = f"{date_label}${geo_label}"
5 | # df[key_label] = pd.to_datetime(df[date_label]).astype(str) + "$" + df[geo_label]
6 | df[key_label] = df[date_label] + "$" + df[geo_label]
7 | df.index = df[key_label]
8 | df.drop(key_label, axis=1, inplace=True)
9 | df.index.name = None
--------------------------------------------------------------------------------
/src/hommmer/metrics/rainbox.py:
--------------------------------------------------------------------------------
1 | import statsmodels.stats.api as sms
2 | from statsmodels.compat import lzip
3 |
4 | # need a way to run without passing results object
5 | def rainbox(residuals, results, X_labels):
6 | # tests for linearity
7 | # p-value should be less than 0.05
8 | name = ['rainbow F stat', 'rainbow F stat p-value']
9 | test = sms.linear_rainbow(results)
10 | return lzip(name, test)
--------------------------------------------------------------------------------
/src/hommmer/metrics/mape.py:
--------------------------------------------------------------------------------
1 | from sklearn import metrics
2 | import numpy as np
3 |
4 | #https://www.researchgate.net/post/Is-there-a-cut-off-point-for-the-mean-absolute-percentage-error-MAPE
5 |
6 | def mape(y_actual, y_pred):
7 | # mean absolute percentage error
8 | value = round(metrics.mean_absolute_error(y_actual, y_pred)/np.mean(y_actual),3)
9 | passed = "✔️" if value < 0.15 else "❌"
10 | return value, passed
--------------------------------------------------------------------------------
/src/hommmer/metrics/jarque_bera.py:
--------------------------------------------------------------------------------
1 | import statsmodels.stats.api as sms
2 | from statsmodels.compat import lzip
3 |
4 | def jarque_bera(residuals):
5 | # Tests for normality of the residuals
6 | # skewness should be between -2 and 2
7 | # kurtosis should be between -7 and 7
8 | name = ['Jarque-Bera', 'Chi^2 prob', 'Skewness', 'Kurtosis']
9 | test = sms.jarque_bera(residuals)
10 |
11 | return lzip(name, test)
--------------------------------------------------------------------------------
/src/hommmer/cleaners/remove_outliers.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def remove_outliers(series, num_std_devs=3):
4 | mean = np.mean(series)
5 | std_dev = np.std(series)
6 | outliers_cutoff = std_dev * num_std_devs
7 | lower_limit = mean - outliers_cutoff
8 | upper_limit = mean + outliers_cutoff
9 |
10 | no_outliers = series.apply(lambda x: mean if x > upper_limit or x < lower_limit else x)
11 |
12 | return no_outliers
--------------------------------------------------------------------------------
/website/pages/_app.js:
--------------------------------------------------------------------------------
1 | import "tailwindcss/tailwind.css";
2 |
3 | import TagManager from "react-gtm-module";
4 | import { useEffect } from "react";
5 |
6 | function MyApp({ Component, pageProps }) {
7 | const tagManagerArgs = {
8 | gtmId: "GTM-P24ZPZM",
9 | };
10 | useEffect(() => {
11 | TagManager.initialize(tagManagerArgs);
12 | }, []);
13 |
14 | return ;
15 | }
16 |
17 | export default MyApp;
18 |
--------------------------------------------------------------------------------
/src/hommmer/cleaners/date_range_dummies.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def date_range_dummies(df):
4 | dr = pd.date_range(start=df['start'].min(), end=df['end'].max())
5 |
6 | date_df = pd.DataFrame({'date': dr})
7 |
8 | for _, row in df.iterrows():
9 | date_df[row[2]] = (date_df['date'] >= row[0]) & (date_df['date'] <= row[1])
10 |
11 | date_df.iloc[:, 1:] = date_df.iloc[:, 1:].astype(int)
12 | return date_df
--------------------------------------------------------------------------------
/src/hommmer/cleaners/group_weekly.py:
--------------------------------------------------------------------------------
1 | def group_weekly(df, date_col):
2 | weekly = df.copy()
3 | weekly['week'] = weekly[date_col].dt.isocalendar().week
4 | weekly['year'] = weekly[date_col].dt.isocalendar().year
5 | weekly['year_week'] = weekly['year'].astype(str) + "-" + weekly['week'].astype(str)
6 | weekly = weekly.groupby('year_week').sum()
7 | weekly.drop(['week', 'year'], axis=1, inplace=True)
8 | weekly.reset_index(inplace=True)
9 | return weekly
--------------------------------------------------------------------------------
/src/hommmer/datasets/make_dates.py:
--------------------------------------------------------------------------------
1 | import datetime as dt
2 | import pandas as pd
3 |
4 | def make_dates(days=180, end_date=None):
5 | if end_date:
6 | end_date = dt.datetime.strptime(end_date, "%Y-%m-%d")
7 | else:
8 | end_date = dt.datetime.today()
9 |
10 | start_date = end_date - dt.timedelta(days-1)
11 | dates = pd.date_range(start_date, periods=days, freq='D')
12 | dates = pd.Series(dates.strftime("%Y-%m-%d"))
13 | return dates
--------------------------------------------------------------------------------
/src/hommmer/cleaners/group_monthly.py:
--------------------------------------------------------------------------------
1 | def group_monthly(df, date_col):
2 | monthly = df.copy()
3 | monthly['month'] = monthly[date_col].dt.month
4 | monthly['year'] = monthly[date_col].dt.isocalendar().year
5 | monthly['year_month'] = monthly['year'].astype(str) + "-" + monthly['month'].astype(str)
6 | monthly = monthly.groupby('year_month').sum()
7 | monthly.drop(['month', 'year'], axis=1, inplace=True)
8 | monthly.reset_index(inplace=True)
9 | return monthly
--------------------------------------------------------------------------------
/src/hommmer/cleaners/transpose_data.py:
--------------------------------------------------------------------------------
1 | def transpose_data(df, date_col=None):
2 | if date_col is None:
3 | date_col = df.columns[0]
4 |
5 | transposed = df.T.copy()
6 | transposed.columns = transposed.iloc[0]
7 | transposed.drop(transposed.index[0], inplace=True)
8 | transposed.reset_index(inplace=True)
9 | transposed.rename(columns={"index": date_col}, inplace=True)
10 | transposed = transposed.rename_axis(None, axis = 1)
11 | return transposed
--------------------------------------------------------------------------------
/src/hommmer/connectors/nasa_weather.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def nasa_weather(df):
4 | year = df['YEAR'].astype(str)
5 | month = df['MO'].astype(str)
6 | day = df['DY'].astype(str)
7 |
8 | month = month.apply(lambda x: '0'+x if len(x) == 1 else x)
9 | day = day.apply(lambda x: '0'+x if len(x) == 1 else x)
10 |
11 | df['date'] = pd.to_datetime(year + "-" + month + "-" + day)
12 | df = df[['date', 'T2M_RANGE', 'T2M_MAX', 'T2M_MIN', 'T2M']]
13 |
14 | return df
--------------------------------------------------------------------------------
/website/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "private": true,
3 | "scripts": {
4 | "dev": "next dev",
5 | "build": "next build",
6 | "start": "next start"
7 | },
8 | "dependencies": {
9 | "@heroicons/react": "^1.0.5",
10 | "next": "^12.0.4",
11 | "react": "^17.0.2",
12 | "react-dom": "^17.0.2",
13 | "react-gtm-module": "^2.0.11"
14 | },
15 | "devDependencies": {
16 | "autoprefixer": "^10.2.6",
17 | "postcss": "^8.3.5",
18 | "tailwindcss": "^2.2.4"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/hommmer/cleaners/clean_numeric.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def clean_numeric(series):
4 | series = series.fillna(0)
5 | series = series.astype(str)
6 | series = series.apply(lambda x: x.replace(',',''))
7 | series = series.apply(lambda x: x.replace('$',''))
8 | series = series.apply(lambda x: x.replace('£',''))
9 | series = series.apply(lambda x: x.replace('€',''))
10 | series = series.apply(lambda x: x.replace('%',''))
11 | series = pd.to_numeric(series)
12 |
13 | return series
--------------------------------------------------------------------------------
/src/hommmer/metrics/mase.py:
--------------------------------------------------------------------------------
1 | from sklearn import metrics
2 | import numpy as np
3 | # https://github.com/CamDavidsonPilon/Python-Numerics/blob/master/TimeSeries/MASE.py
4 | # https://medium.com/@ashishdce/mean-absolute-scaled-error-mase-in-forecasting-8f3aecc21968
5 |
6 | def mase(y_train, y_test, y_pred):
7 | # mean absolute scaled error
8 | n = y_train.shape[0]
9 | naive = np.abs(np.diff(y_train).sum()/(n-1))
10 | mae = metrics.mean_absolute_error(y_test, y_pred)
11 |
12 | return round(mae/naive,3)
--------------------------------------------------------------------------------
/src/hommmer/cleaners/merge_data.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | def merge_data(left_df, right_df, date_col="date"):
4 |
5 | # get clean copies of data with date format
6 | left_df = left_df.copy()
7 | left_df[date_col] = pd.to_datetime(left_df[date_col])
8 |
9 | right_df = right_df.copy()
10 | right_df[date_col] = pd.to_datetime(right_df[date_col])
11 |
12 | # join data together
13 | merged_df = left_df.merge(right_df, on=date_col, how='left')
14 | merged_df.fillna(0, inplace=True)
15 | return merged_df
--------------------------------------------------------------------------------
/website/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 |
3 | # dependencies
4 | /node_modules
5 | /.pnp
6 | .pnp.js
7 |
8 | # testing
9 | /coverage
10 |
11 | # next.js
12 | /.next/
13 | /out/
14 |
15 | # production
16 | /build
17 |
18 | # misc
19 | .DS_Store
20 | *.pem
21 |
22 | # debug
23 | npm-debug.log*
24 | yarn-debug.log*
25 | yarn-error.log*
26 |
27 | # local env files
28 | .env.local
29 | .env.development.local
30 | .env.test.local
31 | .env.production.local
32 |
33 | # vercel
34 | .vercel
35 |
--------------------------------------------------------------------------------
/website/tailwind.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | mode: "jit",
3 | purge: ["./pages/**/*.{js,ts,jsx,tsx}", "./components/**/*.{js,ts,jsx,tsx}"],
4 | darkMode: false, // or 'media' or 'class'
5 | theme: {
6 | extend: {
7 | visibility: ["hover"],
8 | colors: {
9 | "pants-blue": "#70d1fe",
10 | "skin-yellow": "#fed90f",
11 | "stubble-brown": "#d1b271",
12 | "shoe-black": "#424f46",
13 | "donut-pink": "#ff66ff",
14 | },
15 | },
16 | },
17 | variants: {
18 | extend: {},
19 | },
20 | plugins: [],
21 | };
22 |
--------------------------------------------------------------------------------
/src/hommmer/cleaners/categorize_campaigns.py:
--------------------------------------------------------------------------------
1 | def categorize_campaigns(campaign_name, categories=None):
2 | if categories is None:
3 | categories = {
4 | "prospecting": ['prosp'],
5 | "remarketing": ['remar', 'retar']
6 | }
7 |
8 | campaign_name = campaign_name.lower()
9 |
10 | campaign_category = "uncategorized"
11 |
12 | for category, containing in categories.items():
13 | for text in containing:
14 | if text in campaign_name:
15 | campaign_category = category
16 |
17 | return campaign_category
--------------------------------------------------------------------------------
/src/hommmer/cleaners/shift_dummies.py:
--------------------------------------------------------------------------------
1 | def shift_dummies(df, col, shift):
2 | shift_cols = []
3 | shift_sign = "+"
4 | if shift < 0:
5 | shift_sign = "-"
6 |
7 | for t in range(shift):
8 | col_name = f"{col} t{shift_sign}{abs(t)}"
9 | df[col_name] = df[col].shift(t)
10 | shift_cols.append(col_name)
11 |
12 | if shift_sign == "+":
13 | prefix = "post"
14 | else:
15 | prefix = "pre"
16 |
17 | col_name = f"{prefix}-{col} {shift_sign}{abs(t)}"
18 | df[col_name] = (df[shift_cols].sum(axis=1) > 0).astype(int)
19 | shift_cols.push(col_name)
20 | return shift_cols
--------------------------------------------------------------------------------
/src/hommmer/features/loss_function.py:
--------------------------------------------------------------------------------
1 | def loss_function(X_values, X_media, X_org):
2 | # X_media = {
3 | # "labels": ["facebook", "tiktok"],
4 | # "coefs": [6.454, 1.545],
5 | # "drs": [0.6, 0.7]
6 | # }
7 | # X_org = {
8 | # "labels": ["const"],
9 | # "coefs": [-27.5],
10 | # "values": [1]
11 | # }
12 | y = 0
13 | for i in range(len(X_values)):
14 | transform = X_values[i] ** X_media["drs"][i]
15 | contrib = X_media["coefs"][i] * transform
16 | y += contrib
17 |
18 | for i in range(len(X_org)):
19 | contrib = X_org["coefs"][i] * X_org["values"][i]
20 | y += contrib
21 |
22 | return -y
--------------------------------------------------------------------------------
/src/hommmer/helpers/check_metric.py:
--------------------------------------------------------------------------------
1 | from hommmer.metrics import *
2 |
3 | def check_metric(metric_label, model):
4 | X_test = model.X_test
5 | y_test = model.y_test
6 | y_pred = model.predict(X=X_test)
7 |
8 | if metric_label == 'nrmse':
9 | return nrmse(y_test, y_pred)
10 | elif metric_label == 'rsquared':
11 | return rsquared(y_test, y_pred)
12 | elif metric_label == 'decomp-rssd':
13 | contrib_df = model.contribution()[model.media_labels]
14 | media_X_df = model.X_actual[model.media_labels]
15 | return decomp_rssd(effect_share(contrib_df), spend_share(media_X_df))
16 | elif metric_label == 'cond-no':
17 | return condition_number(model.X_train)
18 | elif metric_label == 'mape':
19 | return mape(y_test, y_pred)
--------------------------------------------------------------------------------
/src/hommmer/connectors/search_trends.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import datetime as dt
3 |
4 | def search_trends(df):
5 | # delete any '<' signs for low volume days
6 | for c in df.select_dtypes(include=['object']).columns[1:]:
7 | df[c] = df[c].str.replace('<', '')
8 | df[c] = pd.to_numeric(df[c])
9 |
10 | date_col = df.columns[0]
11 | df[date_col] = pd.to_datetime(df[date_col])
12 | df.set_index(date_col, inplace=True)
13 | df_reindexed = df.reindex(pd.date_range(start=df.index.min(),
14 | end=df.index.max() + dt.timedelta(days=6), freq='1D'))
15 | df = df_reindexed.interpolate(method='linear')
16 | df = df.round(1)
17 | df.reset_index(inplace=True)
18 | df.rename({'index': 'date'}, axis=1, inplace=True)
19 | return df
--------------------------------------------------------------------------------
/src/hommmer/features/optimizer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.optimize as sco
3 |
4 | from .loss_function import loss_function
5 |
6 | def optimizer(X_media, X_org, budget):
7 | args = (X_media, X_org) # pass non-optimized values into model_function
8 | len_X_media = len(X_media['labels'])
9 | guesses = len_X_media*[budget/len_X_media,] # starting guesses: divide budget evenly
10 | con_1 = {'type': 'eq', 'fun': lambda X: np.sum(X) - budget} # so we can't go over budget
11 | constraints = (con_1)
12 | bound = (0, budget) # spend for a channel can't be negative or higher than budget
13 | bounds = tuple(bound for x in range(len_X_media))
14 | solution = sco.minimize(loss_function, x0=guesses, args=args, method='SLSQP', constraints=constraints, bounds=bounds)
15 | return (-1 * solution.fun), solution.x
--------------------------------------------------------------------------------
/src/hommmer/cleaners/interpolate_dates.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | def interpolate_dates(df, date_col=None):
5 | data = df.copy()
6 | if date_col is None:
7 | date_col = data.columns[0]
8 |
9 | data[date_col] = pd.to_datetime(data[date_col])
10 |
11 | dr = pd.date_range(start=data[date_col].min(), end=data[date_col].max(), freq='1D')
12 |
13 | date_df = pd.DataFrame({f'{date_col}': dr})
14 |
15 | merged = date_df.merge(data, how='left', on=date_col)
16 | reindexed = merged.set_index(date_col)
17 |
18 | reindexed.replace({0: np.nan}, inplace=True)
19 | resampled = reindexed.interpolate(method='linear')
20 | resampled = resampled.reset_index()
21 | resampled.rename({'index': date_col}, axis=1, inplace=True)
22 | resampled.fillna(0, inplace=True)
23 | return resampled
--------------------------------------------------------------------------------
/src/hommmer/cleaners/week_commencing.py:
--------------------------------------------------------------------------------
1 | import datetime as dt
2 |
3 | def week_commencing(date_str=None, date_format="%Y-%m-%d"):
4 | # https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
5 |
6 | # if no date supplied, default to today
7 | if date_str is None:
8 | today = dt.datetime.today()
9 | date_str = today.strftime(date_format)
10 |
11 | # parse the date string into a datetime object
12 | date = dt.datetime.strptime(date_str, date_format)
13 |
14 | # get the year and week number from the datetime
15 | year_week = dt.datetime.strftime(date, "%Y-%W")
16 |
17 | # hack to get the monday of the week
18 | monday = dt.datetime.strptime(f"{year_week}-1", "%Y-%W-%w")
19 |
20 | # return the monday date in the same format
21 | return dt.datetime.strftime(monday, date_format)
--------------------------------------------------------------------------------
/src/hommmer/features/rfe.py:
--------------------------------------------------------------------------------
1 | from sklearn.linear_model import LinearRegression
2 | from sklearn.feature_selection import RFE
3 | import pandas as pd
4 |
5 | def rfe(df, y_label, X_labels, max_features=None):
6 | if max_features is None:
7 | # A rule-of-thumb for a minimum number of data points for a stable linear regression
8 | # are 7-10 data points per parameter.
9 | # https://storage.googleapis.com/pub-tools-public-publication-data/pdf/2d0395bc7d4d13ddedef54d744ba7748e8ba8dd1.pdf
10 | max_features = max(round(df.shape[0]/7),1)
11 |
12 | rfe = RFE(LinearRegression(), n_features_to_select=max_features).fit(df[X_labels], df[y_label])
13 | rfe_keep = pd.Series(rfe.support_)
14 | rfe_keep.index = X_labels
15 |
16 | rfe_df = pd.DataFrame({'rfe_keep': rfe_keep})
17 | rfe_df['rfe_ranking'] = rfe.ranking_
18 | return rfe_keep, rfe_df
--------------------------------------------------------------------------------
/src/hommmer/features/__init__.py:
--------------------------------------------------------------------------------
1 | from .bfe import bfe
2 | from .loss_function import loss_function
3 | from .optimizer import optimizer
4 | from .delayed_adstock import delayed_adstock
5 | from .geometric_adstock import geometric_adstock
6 | from .hill_saturation import hill_saturation
7 | from .power_saturation import power_saturation
8 | from .s_curve_saturation import s_curve_saturation
9 | from .scaled_saturation import scaled_saturation
10 | from .weibull_adstock import weibull_adstock
11 | from .weibull_adstock_delayed import weibull_adstock_delayed
12 | from .interaction import interaction
13 | from .lag import lag
14 | from .vif import vif
15 | from .ffe import ffe
16 | from .bif import bif
17 | from .rfe import rfe
18 | # from .prophet_seasonality import prophet_seasonality
19 | from .seasonal_decomp import seasonal_decomp
20 | from .normalize import normalize
21 | from .denormalize import denormalize
--------------------------------------------------------------------------------
/src/hommmer/cleaners/holiday_dummies.py:
--------------------------------------------------------------------------------
1 | import re
2 | import pandas as pd
3 |
4 | from hommmer.datasets import load_holidays
5 |
6 | def holiday_dummies(start, end, country="US", brackets=False):
7 | all_holidays = load_holidays(download=False)
8 | country_holidays = all_holidays[all_holidays['country'] == country]
9 | if brackets == False:
10 | country_holidays['holiday'] = country_holidays['holiday'].apply(
11 | lambda x: re.sub(' [\[\(].*[\]\)]','', x))
12 |
13 | dr = pd.date_range(start=start, end=end)
14 | date_df = pd.DataFrame({'ds': dr})
15 | for _, row in country_holidays.iterrows():
16 | if row[1] in date_df.columns:
17 | date_df[row[1]] = date_df[row[1]] | (date_df['ds'] == row[0])
18 | else:
19 | date_df[row[1]] = (date_df['ds'] == row[0])
20 |
21 | date_df.iloc[:, 1:] = date_df.iloc[:, 1:].astype(int)
22 | return date_df
23 |
24 |
--------------------------------------------------------------------------------
/src/hommmer/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .nrmse import nrmse
2 | from .rsquared import rsquared
3 | from .breuschpagan import breuschpagan
4 | from .condition_number import condition_number
5 | from .durbin_watson import durbin_watson
6 | from .jarque_bera import jarque_bera
7 | from .ljungbox import ljungbox
8 | from .mae import mae
9 | from .mape import mape
10 | from .mse import mse
11 | from .rmse import rmse
12 | from .degrees_of_freedom import degrees_of_freedom
13 | from .decomp_rssd import decomp_rssd
14 | from .harvey_collier import harvey_collier
15 | from .rainbox import rainbox
16 | from .vars_obs import vars_obs
17 | from .mdape import mdape
18 | from .smape import smape
19 | from .mda import mda
20 | from .mase import mase
21 | from .mfe import mfe
22 | from .log_accuracy_ratio import log_accuracy_ratio
23 | from .max_error import max_error
24 | from .dummy_constant import dummy_constant
25 | from .dummy_mean import dummy_mean
26 | from .dummy_median import dummy_median
27 | from .effect_share import effect_share
28 | from .spend_share import spend_share
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """The setup script."""
2 | import pathlib
3 | from setuptools import setup, find_packages
4 | HERE = pathlib.Path(__file__).parent
5 | VERSION = "0.0.0.4"
6 | PACKAGE_NAME = "hommmer"
7 | AUTHOR = "mike taylor"
8 | AUTHOR_EMAIL = "mike@saxifrage.xyz"
9 | URL = "https://github.com/hammer-mt/hommmer"
10 | LICENSE = "MIT"
11 | DESCRIPTION = "A simple Marketing Mix Modeling library in Python"
12 | LONG_DESCRIPTION = (HERE / "README.md").read_text(encoding='utf8')
13 | LONG_DESC_TYPE = "text/markdown"
14 | INSTALL_REQUIRES = [
15 | "numpy",
16 | "pandas",
17 | "matplotlib",
18 | "statsmodels",
19 | "typing",
20 | "sklearn",
21 | "seaborn",
22 | # "prophet"
23 | ]
24 | setup(name=PACKAGE_NAME,
25 | version=VERSION,
26 | description=DESCRIPTION,
27 | long_description=LONG_DESCRIPTION,
28 | long_description_content_type=LONG_DESC_TYPE,
29 | author=AUTHOR,
30 | license=LICENSE,
31 | author_email=AUTHOR_EMAIL,
32 | url=URL,
33 | install_requires=INSTALL_REQUIRES,
34 | package_dir={"": "src"},
35 | packages=find_packages(where="src")
36 | )
37 |
--------------------------------------------------------------------------------
/src/hommmer/features/vif.py:
--------------------------------------------------------------------------------
1 | from statsmodels.stats.outliers_influence import variance_inflation_factor
2 | import numpy as np
3 | import pandas as pd
4 | np.seterr(divide='ignore', invalid='ignore') # hide error warning for vif
5 |
6 | def vif(df, X_labels, max_vif=5):
7 | # Variance Inflation Factor (VIF)
8 | # tests for colinearity: A VIF of over 10 for some feature indicates that over 90%
9 | # of the variance in that feature is explained by the remaining features. Over 100
10 | # indicates over 99%. Best practice is to keep variables with a VIF less than 5.
11 |
12 | X = df[X_labels]
13 | X_np = np.array(X)
14 |
15 | vif_results = [(X.columns[i], variance_inflation_factor(X_np, i)) for i in range(X_np.shape[1])]
16 | vif_df = pd.DataFrame(vif_results)
17 | vif_df.columns = ['idx', 'vif']
18 | vif_df.index = vif_df['idx']
19 | vif_df.drop(['idx'], axis=1, inplace=True)
20 | vif_df.index.name = None
21 | vif_df['vif_keep'] = vif_df['vif'] < max_vif
22 |
23 | vif_keep = list(vif_df[vif_df['vif_keep']==True].index.values)
24 |
25 | return vif_keep, vif_df
--------------------------------------------------------------------------------
/src/hommmer/connectors/colab_helpers.py:
--------------------------------------------------------------------------------
1 | ### NOTE: these functions only work in Google Colab
2 |
3 | def save_local(df, file_name='abt'):
4 | from google.colab import files
5 | file_name = file_name + '.csv'
6 | df.to_csv(file_name, index=False)
7 | files.download(file_name)
8 |
9 |
10 | def upload_local():
11 | from google.colab import files
12 | uploaded = files.upload()
13 | return uploaded
14 |
15 |
16 | def load_gsheet(url, offset=None):
17 | from google.colab import auth
18 | import gspread
19 | from oauth2client.client import GoogleCredentials
20 | import pandas as pd
21 | # authorize google sheets
22 | auth.authenticate_user()
23 |
24 | gc = gspread.authorize(GoogleCredentials.get_application_default())
25 |
26 | spreadsheet = gc.open_by_url(url)
27 | sheet = spreadsheet.get_worksheet(0)
28 |
29 | if offset is None:
30 | df = pd.DataFrame(sheet.get_all_records())
31 | else:
32 | df = pd.DataFrame(sheet.get_all_values()[offset:])
33 | df.columns = df.iloc[0]
34 | df.drop([0], inplace=True)
35 |
36 | return df
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Michael Taylor
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/hommmer/datasets/make_data.py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import make_regression
2 | import pandas as pd
3 |
4 | from .make_dates import make_dates
5 |
6 | # generate regression dataset
7 | def make_data(target_name="y", num_variables=5, num_significant=4, num_observations=180, noise=30):
8 | # Make sure not more significant than variables
9 | if num_significant > num_variables:
10 | num_significant = num_variables
11 |
12 | # Generate the regression data
13 | features, target = make_regression(n_samples=num_observations,
14 | n_features=num_variables,
15 | n_informative=num_significant,
16 | n_targets=1,
17 | noise=noise)
18 |
19 | variable_names = ['x'+str(i) for i in range(len(num_variables))]
20 |
21 | # Create dataframe
22 | df = pd.DataFrame(features, columns=variable_names)
23 |
24 | # Add target data
25 | df[target_name] = target
26 |
27 | # Add dates
28 | df['date'] = make_dates(days=num_observations)
29 |
30 | return df
31 |
--------------------------------------------------------------------------------
/src/hommmer/cleaners/unstack_data.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | def unstack_data(df, metric_column, unstack_column, date_column='date'):
5 |
6 | # make a copy of the date with just the columns we need
7 | data = df[[date_column, metric_column, unstack_column]].copy()
8 |
9 | # convert the metric column to numeric
10 | data[metric_column] = pd.to_numeric(data[metric_column])
11 |
12 | # pivot the data set
13 | pivoted = pd.pivot_table(data, index=[date_column], values=[metric_column], columns=[unstack_column], aggfunc=[np.sum])
14 |
15 | # drop level and reset index
16 | pivoted.columns = pivoted.columns.droplevel(0)
17 | pivoted.columns.name = None
18 | pivoted = pivoted.reset_index()
19 | pivoted.columns = [col[1] for col in pivoted.columns]
20 |
21 | # rename unstacked metric columns
22 | metric_columns = list(pivoted.columns[1:])
23 | metric_columns = [f"{c} | {metric_column}" for c in metric_columns]
24 | pivoted.columns = [date_column] + metric_columns
25 |
26 | # replace errors with zeros
27 | pivoted.fillna(0, inplace=True)
28 |
29 | # return the pivoted data
30 | return pivoted
--------------------------------------------------------------------------------
/src/hommmer/models/Ridge.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import statsmodels.api as sm
3 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python
4 | from sklearn.linear_model import Ridge as SKRidge # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html
5 |
6 | from .Model import Model
7 |
8 | class Ridge(Model):
9 | def __init__(self, y, X, media_labels, settings):
10 | # inheritance and start timer
11 | super().__init__(y, X, media_labels, settings, "Ridge")
12 | start = timer()
13 |
14 | # fit the model
15 | self._model = self._fit()
16 |
17 | # init required properties
18 | self.coefficients = self._coefficients()
19 |
20 | # finish running
21 | end = timer()
22 | self.runtime = end - start # Time in seconds, e.g. 5.38091952400282
23 |
24 | # log model locally
25 | self._save()
26 |
27 | ### EDIT BELOW HERE ###
28 |
29 | # fit the model
30 | def _fit(self):
31 | return SKRidge(alpha=0.01, fit_intercept=False).fit(self.X_train, self.y_train)
32 |
33 | # get the coefficients
34 | def _coefficients(self):
35 | return self._model.coef_
36 |
--------------------------------------------------------------------------------
/src/hommmer/metrics/decomp_rssd.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | # https://github.com/facebookexperimental/Robyn/issues/82#issuecomment-845846447
3 | # https://github.com/facebookexperimental/Robyn/issues/110
4 | # https://github.com/facebookexperimental/Robyn/blob/dbd8d1f0e640265d5c0a1c3750e51ccf5e3e117d/source/fb_robyn.func.R#L1177
5 | # https://github.com/facebookexperimental/Robyn/issues/95
6 |
7 | # decomposition distance (DECOMP.RSSD, decomposition root-sum-square distance, a major innovation of Facebook Robyn
8 | # The intuition is this: assuming you're spending 90% on TV and 10% on FB. If you get 10% effect for TV and 90% for FB,
9 | # you'd probably not believe this result, no matter how low the model error (NRMSE) is. If you get 80% TV and 20% FB as
10 | # effect share, it'll more "realistic". This is where the logic is from: minimising the distance between share of spend
11 | # and share of effect. It's really about getting rid of the very extreme cases and have a set of results that are more realistic.
12 |
13 | # decomposition root sum of squared distance
14 | def decomp_rssd(effect_share, spend_share):
15 | value = round(np.sqrt(sum((np.array(effect_share)-np.array(spend_share))**2)),3)
16 | passed = "✔️" if value < 0.5 else "❌"
17 | return value, passed
18 |
19 |
--------------------------------------------------------------------------------
/src/hommmer/cleaners/interpolate_weekly.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import datetime as dt
4 |
5 | def interpolate_weekly(df, date_col=None, resample_col=None):
6 |
7 | if date_col == None:
8 | date_col = df.columns[0]
9 |
10 | if resample_col == None:
11 | resample_col = df.columns[1]
12 |
13 | data = df[[date_col, resample_col]].copy()
14 |
15 | data[date_col] = data[date_col].apply(lambda x: dt.datetime.strptime(f"{x}-1", "%Y-%W-%w")) # mondays
16 | data[date_col] = pd.to_datetime(data[date_col]) # datetime
17 | data.set_index(date_col, inplace=True)
18 | data_reindexed = data.reindex(pd.date_range(start=data.index.min(),
19 | end=data.index.max() + dt.timedelta(days=6),
20 | freq='1D'))
21 |
22 | col_to_resample = data_reindexed.columns[0]
23 | data_reindexed[col_to_resample] = pd.to_numeric(data_reindexed[col_to_resample])
24 | data_reindexed[col_to_resample].replace({0:np.nan}, inplace=True)
25 | interpolated = data_reindexed.interpolate(method='linear')
26 | interpolated = interpolated / 7
27 | interpolated.reset_index(inplace=True)
28 | interpolated.rename({'index': 'date'}, axis=1, inplace=True)
29 |
30 | return interpolated
--------------------------------------------------------------------------------
/src/hommmer/features/seasonal_decomp.py:
--------------------------------------------------------------------------------
1 | from statsmodels.tsa.seasonal import seasonal_decompose
2 | import matplotlib.pyplot as plt
3 | import pandas as pd
4 | # https://juanitorduz.github.io/fb_prophet/
5 |
6 | def seasonal_decomp(df, target_col, date_col, freq="D"):
7 | # freq == "W-Mon" or "W-Sun" or "D"
8 | pred_df = df[[date_col, target_col]].copy()
9 | pred_df.rename(columns={date_col:'ds', target_col:'y'}, inplace=True)
10 | pred_df = pred_df.set_index('ds').asfreq(freq)
11 | decomp_obj = seasonal_decompose(
12 | x=pred_df['y'],
13 | model='additive'
14 | )
15 | fig, ax = plt.subplots(4, 1, figsize=(12, 12))
16 |
17 | # Observed time series.
18 | decomp_obj.observed.plot(ax=ax[0])
19 | ax[0].set(title='observed')
20 | # Trend component.
21 | decomp_obj.trend.plot(label='fit', ax=ax[1])
22 | ax[1].set(title='trend')
23 | # Seasonal component.
24 | decomp_obj.seasonal.plot(label='fit', ax=ax[2])
25 | ax[2].set(title='seasonal')
26 | # Residual.
27 | decomp_obj.resid.plot(label='fit', ax=ax[3])
28 | ax[3].set(title='resid')
29 |
30 | fig.suptitle('Time Series Decomposition', y=1.01)
31 | plt.tight_layout()
32 | decomp_df = pd.DataFrame([decomp_obj.observed, decomp_obj.trend, decomp_obj.seasonal, decomp_obj.resid])
33 | decomp_df.fillna(0, inplace=True)
34 | return decomp_df
--------------------------------------------------------------------------------
/src/hommmer/features/prophet_seasonality.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | from fbprophet import Prophet
4 | from prophet.diagnostics import performance_metrics, cross_validation
5 |
6 | def prophet_seasonality(df, target_col, date_col, country="US", p=30, freq="D"):
7 | pred_df = df[[date_col, target_col]].copy()
8 | pred_df.rename(columns={date_col:'ds', target_col:'y'}, inplace=True)
9 | daily = True if freq == "D" else False
10 | m = Prophet(yearly_seasonality=True,weekly_seasonality=daily,seasonality_mode='multiplicative') #instantiate Prophet
11 | m.add_country_holidays(country_name=country)
12 |
13 | #fit the model
14 | m.fit(pred_df)
15 |
16 | # predict the future
17 | future = m.make_future_dataframe(periods=p, freq = 'D')
18 |
19 | #use the data in the future dataframe to predict y and insert it into a new dataframe
20 | forecast = m.predict(future)
21 |
22 | #let's see how the prediction worked
23 | m.plot(forecast, figsize=(40,10))
24 |
25 | #let's see how the seasonality worked to predict the y
26 | m.plot_components(forecast)
27 |
28 | # Cross validate your performances
29 | df_cv = cross_validation(m, initial='400 days', period='200 days', horizon = '60 days')
30 | #define how many days of prediction have the lowest mape
31 |
32 | df_p = performance_metrics(df_cv)
33 |
34 | return forecast, df_p
35 |
--------------------------------------------------------------------------------
/src/hommmer/charts/accuracy.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 | import matplotlib.ticker as mtick
4 |
5 | def accuracy(y_actual, y_pred, accuracy=None):
6 | # set up figure and subplots
7 | fig, ax = plt.subplots(figsize=(14,8), nrows=2, ncols=1, gridspec_kw={'height_ratios': [3, 1]})
8 |
9 | # create plot df
10 | plot_df = pd.DataFrame()
11 | plot_df['Actual'] = y_actual
12 | plot_df['Predicted'] = y_pred
13 | plot_df['Error'] = (y_pred - y_actual) / y_actual * 100
14 |
15 | # plot actual vs predicted on grid
16 | plot_df[['Actual', 'Predicted']].plot(ax=ax[0], ylabel=y_actual.name)
17 |
18 | if accuracy:
19 | ax[0].annotate(f'{accuracy[0]} = {accuracy[1]}', xy=(0.05, 0.92), xycoords='axes fraction')
20 |
21 | ax[0].legend(loc="upper center", bbox_to_anchor=(0.5, 1.12), ncol=2)
22 | ax[0].grid(True, which='both')
23 |
24 | # plot error on grid
25 | plot_df[['Error']].plot(ax=ax[1], color='red')
26 | ax[1].grid(True, which='both')
27 | ax[1].legend(loc="upper center", bbox_to_anchor=(0.5, 1.35), ncol=2)
28 | fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
29 | yticks = mtick.FormatStrFormatter(fmt)
30 | ax[1].yaxis.set_major_formatter(yticks)
31 |
32 | # show plots
33 | fig.autofmt_xdate(rotation=45)
34 | plt.gcf().suptitle("Actual vs Predicted", fontsize=20)
35 |
36 | plt.show()
--------------------------------------------------------------------------------
/src/hommmer/features/delayed_adstock.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def delayed_adstock(x, L, P, D):
4 | '''
5 | params:
6 | x: original media variable, array
7 | L: length
8 | P: peak, delay in effect
9 | D: decay, retain rate
10 | returns:
11 | array, adstocked media variable
12 | '''
13 | # https://github.com/sibylhe/mmm_stan#12-adstock
14 | # prepend x with zeros equal to the length -1
15 | x = np.append(np.zeros(L-1), x)
16 |
17 | # create an array of zeros equal to the length
18 | weights = np.zeros(L)
19 |
20 | # loop through each day in length
21 | for l in range(L):
22 | # weight is decay to the power of index - peak squared
23 | weight = D**((l-P)**2)
24 | # add weight to weights in the right place (from back to front)
25 | weights[L-1-l] = weight
26 |
27 | # create an empty list
28 | adstocked_x = []
29 | # loop through length - 1 up to len(x)
30 | for i in range(L-1, len(x)):
31 | # get array of x from index - length + 1 to index + 1
32 | x_array = x[i-L+1:i+1]
33 | # sum the x_array * weights / sum(weights) to get adstock value
34 | xi = sum(x_array * weights)/sum(weights)
35 | # append adstocked value to adstocked_x
36 | adstocked_x.append(xi)
37 |
38 | # convert adstocked_x into an np.array
39 | adstocked_x = np.array(adstocked_x)
40 |
41 | # return adstocked_x
42 | return adstocked_x
--------------------------------------------------------------------------------
/src/hommmer/cleaners/interpolate_monthly.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | def interpolate_monthly(df, date_col=None, resample_col=None):
5 |
6 | if date_col == None:
7 | date_col = df.columns[0]
8 |
9 | if resample_col == None:
10 | resample_col = df.columns[1]
11 |
12 | data = df[[date_col, resample_col]].copy()
13 |
14 | data[date_col] = pd.to_datetime(data[date_col], format="%Y-%m")
15 | data['start_of_month'] = (data[date_col].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))
16 | data['end_of_month'] = pd.to_datetime(data['start_of_month']) + pd.offsets.MonthEnd(1)
17 | data['days_in_month'] = (data['end_of_month'] - data['start_of_month']).dt.days + 1
18 | data[resample_col] = data[resample_col] / data['days_in_month']
19 | data['date'] = data['start_of_month']
20 |
21 | dr = pd.date_range(start=data.start_of_month.min(),
22 | end=data.end_of_month.max(),
23 | freq='1D')
24 | date_df = pd.DataFrame({'date': dr})
25 | merged = date_df.merge(data, how='left', on='date')
26 | reindexed = merged.set_index('date')
27 |
28 | resampled = reindexed[resample_col]
29 | resampled.replace({0:np.nan}, inplace=True)
30 | resampled = resampled.interpolate(method='linear')
31 | resampled = resampled.reset_index()
32 | resampled.rename({'index': 'date'}, axis=1, inplace=True)
33 | resampled.fillna(0, inplace=True)
34 | return resampled
--------------------------------------------------------------------------------
/src/hommmer/models/Linear.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import statsmodels.api as sm
3 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python
4 |
5 | from .Model import Model
6 |
7 | class Linear(Model):
8 | def __init__(self, y, X, media_labels, settings):
9 | # inheritance and start timer
10 | super().__init__(y, X, media_labels, settings, "Linear")
11 | start = timer()
12 |
13 | # fit the model
14 | self._model = self._fit()
15 |
16 | # init required properties
17 | self.coefficients = self._coefficients()
18 |
19 | # finish running
20 | end = timer()
21 | self.runtime = end - start # Time in seconds, e.g. 5.38091952400282
22 |
23 | # log model locally
24 | self._save()
25 |
26 | ### EDIT BELOW HERE ###
27 |
28 | # fit the model
29 | def _fit(self):
30 | return sm.OLS(self.y_train, self.X_train).fit()
31 |
32 | # get the coefficients
33 | def _coefficients(self):
34 | return self._model.params.values
35 |
36 | # get the pvalues
37 | def _pvalues(self):
38 | return self._model.pvalues
39 |
40 | # calculate the confidence intervals
41 | def _confidence_intervals(self):
42 | conf_int_df = self._model.conf_int()
43 | conf_int_df.columns = ["lower", "upper"]
44 | conf_int_df['uncertainty'] = (conf_int_df["upper"] - conf_int_df["lower"]) / np.mean(self.y_train) * 100
45 | return conf_int_df
46 |
47 |
48 |
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/src/hommmer/features/weibull_adstock.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # https://github.com/annalectnl/weibull-adstock/blob/master/adstock_weibull_annalect.pdf
4 | # https://towardsdatascience.com/python-stan-implementation-of-multiplicative-marketing-mix-model-with-deep-dive-into-adstock-a7320865b334
5 | def weibull_adstock(x, window, k):
6 | '''
7 | params:
8 | x: original media variable, array
9 | window: length
10 | k: shape
11 | returns:
12 | array, adstocked media variable
13 | '''
14 | # prepend x with zeros equal to the window - 1
15 | x = np.append(np.zeros(window-1), x)
16 |
17 | # create an array of zeros equal to the window
18 | weights = np.zeros(window)
19 |
20 | # lambda is window / (-ln(0.001)) to the power of 1/k
21 | lam = window / (-np.log(0.001))**(1/k)
22 |
23 | # loop through each day in window
24 | for l in range(window):
25 | # weight is minus lag/lambda to the power of k exponentiated
26 | weight = np.exp(-(l/lam)**k)
27 | # add weight to weights in the right place (from front to back)
28 | weights[window-1-l] = weight
29 |
30 | # create an empty list
31 | adstocked_x = []
32 | # loop through window - 1 up to len(x)
33 | for i in range(window-1, len(x)):
34 | # get array of x from index - length + 1 to index + 1
35 | x_array = x[i-window+1:i+1]
36 | # sum the x_array * weights / sum(weights) to get adstock value
37 | xi = sum(x_array * weights)/sum(weights)
38 | # append adstocked value to adstocked_x
39 | adstocked_x.append(xi)
40 |
41 | # convert adstocked_x into an np.array
42 | adstocked_x = np.array(adstocked_x)
43 |
44 | # return adstocked_x
45 | return adstocked_x
--------------------------------------------------------------------------------
/src/hommmer/charts/response.py:
--------------------------------------------------------------------------------
1 | from sklearn import linear_model
2 | import pandas as pd
3 | import matplotlib.pyplot as plt
4 | def response(df, interval, tranformer, **kwargs):
5 | df['spend_transformed'] = tranformer(df['spend'], **kwargs)
6 | predict_y = 'conversions' # variable you're predicting
7 | dependent_X = ['spend_transformed'] # variables you're using to predict
8 |
9 | y = df[predict_y]
10 | X = df[dependent_X]
11 |
12 | model = linear_model.LinearRegression()
13 | model.fit(X, y)
14 |
15 | xmax = df['spend'].max()
16 | xmax_round = xmax if xmax % interval == 0 else xmax + interval - xmax % interval
17 |
18 | resp_df = pd.DataFrame({
19 | "spend": range(interval, int(xmax_round), interval)
20 | })
21 |
22 | resp_df['spend_transformed'] = tranformer(resp_df['spend'], **kwargs)
23 | X_resp = resp_df[['spend_transformed']]
24 | resp_df['forecast'] = model.predict(X_resp)
25 | resp_df['forecast'] = resp_df['forecast'].round().astype(int)
26 | resp_df['CPA'] = round(resp_df['spend'] / resp_df['forecast'],2)
27 |
28 | resp_df.plot(x='spend', y='forecast', kind='line', figsize=(10,5), style='.-')
29 | plt.title('Response Curve', y=1.12)
30 | plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.12), ncol=2)
31 | r_sq = round(model.score(X, y), 2)
32 | plt.annotate(f'R-squared = {r_sq}', xy=(0.05, 0.90), xycoords='axes fraction')
33 | plt.annotate(f'Transform = {tranformer.__name__}', xy=(0.05, 0.80), xycoords='axes fraction')
34 | plt.annotate(f'{"".join([f"{k}={v} " for k,v in kwargs.items()])}', xy=(0.05, 0.70), xycoords='axes fraction')
35 |
36 | plt.xticks(df.index) # force all x values to show
37 | plt.ylabel('Conversions')
38 |
39 | plt.show();
40 | return resp_df
--------------------------------------------------------------------------------
/src/hommmer/features/weibull_adstock_delayed.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # https://github.com/annalectnl/weibull-adstock/blob/master/adstock_weibull_annalect.pdf
4 | # https://towardsdatascience.com/python-stan-implementation-of-multiplicative-marketing-mix-model-with-deep-dive-into-adstock-a7320865b334
5 | def weibull_adstock_delayed(x, window, k, p):
6 | '''
7 | params:
8 | x: original media variable, array
9 | window: length
10 | k: shape
11 | p: peak
12 | returns:
13 | array, adstocked media variable
14 | '''
15 | # prepend x with zeros equal to the window - 1
16 | x = np.append(np.zeros(window-1), x)
17 |
18 | # create an array of zeros equal to the window
19 | weights = np.zeros(window)
20 |
21 | # lambda is window / (-ln(0.001)) to the power of 1/k
22 | lam = window / (-np.log(0.001))**(1/k)
23 |
24 | # loop through each day in window
25 | for l in range(window):
26 | # weight is minus lag/lambda to the power of k exponentiated with a delay
27 | delayed_weight = np.exp(-((l-p)/lam)**k)
28 | # add weight to weights in the right place (from front to back)
29 | weights[window-1-l] = delayed_weight
30 |
31 | # create an empty list
32 | adstocked_x = []
33 | # loop through window - 1 up to len(x)
34 | for i in range(window-1, len(x)):
35 | # get array of x from index - length + 1 to index + 1
36 | x_array = x[i-window+1:i+1]
37 | # sum the x_array * weights / sum(weights) to get adstock value
38 | xi = sum(x_array * weights)/sum(weights)
39 | # append adstocked value to adstocked_x
40 | adstocked_x.append(xi)
41 |
42 | # convert adstocked_x into an np.array
43 | adstocked_x = np.array(adstocked_x)
44 |
45 | # return adstocked_x
46 | return adstocked_x
--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 | # Todo List
2 |
3 | - multiplier rules by channel
4 | - bayesian MCMC
5 | - embedded / hierarchical modeling
6 | - rainbox test
7 | - harvey collier test
8 | - goldfeld quandt test
9 | - margin of error / ci
10 | - principal components
11 | - shuffle dataset sklearn.utils
12 | - gradientboostingregressor
13 | - plot partial dependence
14 | - cascading benchmark priors
15 | - model calibration with lift / geo tests
16 | - rollback update windows
17 | - creative scorecard grading
18 | - nevergrad hyperparameter optimization
19 | - train test split
20 | - cross validation
21 | - simulated annealing feature selection with random hill jumping
22 | - forward feature selection
23 | - automated anomaly dummies
24 | - model pipelines
25 | - etl connectors (facebook, google, etc)
26 | - expected value of information gain
27 | - geo interpolation
28 | - automated time lags
29 | - simulator model training
30 | - naive baseline validation
31 | - dummy baseline validation
32 | - nvar incremental shading feature selection (1 var models, 2 var models, etc)
33 | - kpi tree rollup models
34 | - gaap accounting for MMM
35 | - cohort time to value
36 | - scaling values
37 | - post-selection inference
38 | - principal component analysis
39 | - original correlated variables
40 | - AIC / BIC selection critera
41 | - channel deprivation chart
42 | - pareto front chart
43 | - decomposition waterfall chart
44 | - share of spend vs share of effect chart
45 | - response curves chart
46 | - variable histogram chart
47 | - export to google slides / powerpoint
48 | - export to excel / csv
49 | - export to png / jpeg
50 | - last x periods train test split
51 | - seasonality with facebook prophet
52 | - national holidays by country
53 | - fix mapes div/0 with + np.finfo(float).eps
54 | - walk forward validation
55 | - facebook prophet variable input trend forecasting
56 | - forecast on accuracy chart with train test split
57 |
--------------------------------------------------------------------------------
/src/hommmer/cleaners/__init__.py:
--------------------------------------------------------------------------------
1 | from .week_commencing import week_commencing
2 | from .unstack_data import unstack_data
3 | from .clean_numeric import clean_numeric
4 | from .date_range_dummies import date_range_dummies
5 | from .days_in_month import days_in_month
6 | from .end_of_month import end_of_month
7 | from .group_monthly import group_monthly
8 | from .group_weekly import group_weekly
9 | from .date_dummies import date_dummies
10 | from .interpolate_monthly import interpolate_monthly
11 | from .interpolate_weekly import interpolate_weekly
12 | from .make_column_index import make_column_index
13 | from .remove_outliers import remove_outliers
14 | from .rename_column import rename_column
15 | from .start_of_month import start_of_month
16 | from .transpose_data import transpose_data
17 | from .unstack_data import unstack_data
18 | from .week_commencing import week_commencing
19 | from .categorize_campaigns import categorize_campaigns
20 | from .merge_data import merge_data
21 | from .guess_date_column import guess_date_column
22 | from .guess_y_column import guess_y_column
23 | from .guess_media_columns import guess_media_columns
24 | from .add_X_labels import add_X_labels
25 | from .del_X_labels import del_X_labels
26 | from .get_all_X_labels import get_all_X_labels
27 | from .get_cols_containing import get_cols_containing
28 | from .str_to_dummy import str_to_dummy
29 | from .cat_to_dummies import cat_to_dummies
30 | from .drop_cols import drop_cols
31 | from .convert_date import convert_date
32 | from .drop_n_rows import drop_n_rows
33 | from .count_na import count_na
34 | from .count_na_cols import count_na_cols
35 | from .interpolate_dates import interpolate_dates
36 | from .count_dup_cols import count_dup_cols
37 | from .modify_labels import modify_labels
38 | from .describe_data import describe_data
39 | from .standard_scaler import standard_scaler
40 | from .train_test_split import train_test_split
41 | from .make_date_index import make_date_index
42 | from .holiday_dummies import holiday_dummies
43 | from .guess_categorical_variables import guess_categorical_variables
44 | from .guess_numerical_variables import guess_numerical_variables
45 | from .make_geodate_index import make_geodate_index
46 |
47 |
--------------------------------------------------------------------------------
/src/hommmer/datasets/duff.csv:
--------------------------------------------------------------------------------
1 | date,sales,facebook,tiktok,google,emails,blog,search,sold_out
2 | 2029-12-31,1976,0,0,1580.74,184,322,100,0
3 | 2030-01-07,722,0,0,196.73,0,447,65,0
4 | 2030-01-14,800,0,0,498.89,0,469,57,0
5 | 2030-01-21,812,0,0,346.76,0,461,70,0
6 | 2030-01-28,887,0,0,646.23,0,454,71,0
7 | 2030-02-04,916,0,0,633.05,67,503,72,0
8 | 2030-02-11,921,0,0,378.24,0,510,82,0
9 | 2030-02-18,956,0,0,522.66,0,529,84,0
10 | 2030-02-25,883,0,0,144.75,0,556,82,0
11 | 2030-03-04,875,0,0,299.05,0,554,68,0
12 | 2030-03-11,921,0,0,284.25,0,558,75,0
13 | 2030-03-18,920,0,0,254.57,0,580,71,0
14 | 2030-03-25,969,0,0,253.71,0,580,67,0
15 | 2030-04-01,891,0,0,164.9,764,597,57,0
16 | 2030-04-08,993,0,0,223.16,997,604,62,0
17 | 2030-04-15,942,0,0,122.97,0,552,65,0
18 | 2030-04-22,1060,0,0,271.75,833,631,70,0
19 | 2030-04-29,1056,0,0,333.63,0,677,62,0
20 | 2030-05-06,1043,0,0,437.34,0,711,58,0
21 | 2030-05-13,1104,0,0,465.92,0,784,68,0
22 | 2030-05-20,1109,0,0,427.07,1220,723,57,0
23 | 2030-05-27,1108,0,0,213.95,23,677,74,0
24 | 2030-06-03,1303,268.83,0,556.79,0,729,65,0
25 | 2030-06-10,1361,259.54,0,443.61,270,729,73,0
26 | 2030-06-17,1482,470.94,0,1056.08,150,741,77,0
27 | 2030-06-24,1592,691.37,0,1035.98,0,773,90,0
28 | 2030-07-01,2097,820.81,0,824.07,0,721,100,0
29 | 2030-07-08,1575,875.65,0,675,0,833,84,0
30 | 2030-07-15,1551,856.12,0,517.92,218,1060,74,0
31 | 2030-07-22,1511,818.55,0,214.05,0,1080,90,0
32 | 2030-07-29,1524,863.49,0,632.07,0,1036,74,0
33 | 2030-08-05,1529,844.6,0,680.1,1083,1091,80,0
34 | 2030-08-12,778,385.1,0,373.61,0,895,74,0.5
35 | 2030-08-19,68,0,0,0,0,925,74,1
36 | 2030-08-26,1469,788.88,0,1548.51,0,1009,66,0
37 | 2030-09-02,1497,783.7,0,598.4,1159,901,80,0
38 | 2030-09-09,1510,1494.76,0,198.91,0,1093,64,0
39 | 2030-09-16,1737,1957.76,0,296.15,1151,1359,75,0
40 | 2030-09-23,1527,1982.06,0,463.13,0,1688,71,0
41 | 2030-09-30,1440,1416.94,0,497.73,0,1490,73,0
42 | 2030-10-07,2101,3791.06,145,622.07,0,1739,71,0
43 | 2030-10-14,1946,3943.91,72.53,700.78,0,1954,70,0
44 | 2030-10-21,1825,3499.61,646.74,605.02,0,1945,69,0
45 | 2030-10-28,1916,3479.21,885.01,658.08,0,1728,73,0
46 | 2030-11-04,1825,3367.33,1221.43,1552.87,0,1968,69,0
47 | 2030-11-11,1941,3413.13,2798.73,735.96,0,2261,76,0
48 | 2030-11-18,1753,3503.93,1948.8,546.8,0,3016,77,0
49 | 2030-11-25,1676,3492.87,2697.06,638.9,0,1879,66,0
50 | 2030-12-02,1969,3743.92,2653.56,1597.17,238,2334,77,0
51 | 2030-12-09,1849,3348.18,1895.71,1033.51,0,2674,76,0
52 | 2030-12-16,1880,3410.33,2386.76,928.93,0,2700,82,0
53 | 2030-12-23,2216,3740.37,2012.53,625.26,0,1663,100,0
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # Local notebooks for testing
132 | notebooks/
133 | .ipynb
--------------------------------------------------------------------------------
/src/hommmer/models/LogLinear.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import statsmodels.api as sm
3 | import pandas as pd
4 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python
5 |
6 | from .Model import Model
7 | from hommmer.helpers import log_ex_zeros
8 |
9 | # https://www.spencertom.com/2020/08/29/marketing-mix-modeling-mmm-part-3-of-3/
10 | # https://stats.stackexchange.com/questions/140713/making-predictions-with-log-log-regression-model
11 | # https://davegiles.blogspot.com/2014/12/s.html
12 | class LogLinear(Model):
13 | def __init__(self, y, X, media_labels, settings):
14 | # inheritance and start timer
15 | super().__init__(y, X, media_labels, settings, "LogLinear")
16 | start = timer()
17 |
18 | # fit the model
19 | self._model = self._fit()
20 |
21 | # init required properties
22 | self.coefficients = self._coefficients()
23 |
24 | # finish running
25 | end = timer()
26 | self.runtime = end - start # Time in seconds, e.g. 5.38091952400282
27 |
28 | # log model locally
29 | self._save()
30 |
31 | ### EDIT BELOW HERE ###
32 |
33 | # fit the model
34 | def _fit(self):
35 | logged_y = np.log(self.y_train + 1)
36 | return sm.OLS(logged_y, self.X_train).fit() # log y
37 |
38 | # get the coefficients
39 | def _coefficients(self):
40 | return self._model.params.values
41 |
42 | # get the pvalues
43 | def _pvalues(self):
44 | return self._model.pvalues
45 |
46 | # calculate the confidence intervals
47 | def _confidence_intervals(self):
48 | conf_int_df = self._model.conf_int()
49 | conf_int_df.columns = ["lower", "upper"]
50 | return (conf_int_df["upper"] - conf_int_df["lower"]) / np.mean(np.log(self.y_train)) * 100
51 |
52 | ### OVERRIDE BASE FUNCS ###
53 | def contribution(self, X=None):
54 | if (X) is None:
55 | X = self.X_actual
56 |
57 | coef_df = pd.DataFrame({'coefficient': self.coefficients}, index=X.columns)
58 |
59 | y_pred_log = self._model.predict(X)
60 | y_pred = np.exp(y_pred_log) - 1 # transform log y back into y
61 | data = []
62 | for x in list(X.columns):
63 | contrib = coef_df['coefficient'].loc[x] * X[x]
64 | data.append(contrib)
65 |
66 | log_contrib_df = pd.DataFrame(data).T
67 | contrib_df = log_contrib_df.copy()
68 | # transform log contribs by using share
69 | for x in contrib_df.columns:
70 | contrib_share = log_contrib_df[x] / y_pred_log
71 | contrib_df[x] = y_pred * contrib_share
72 |
73 | return contrib_df
74 |
75 |
76 |
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/src/hommmer/models/LogLog.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import statsmodels.api as sm
3 | import pandas as pd
4 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python
5 |
6 | from .Model import Model
7 | from hommmer.helpers import log_ex_zeros
8 |
9 | # https://www.spencertom.com/2020/08/29/marketing-mix-modeling-mmm-part-3-of-3/
10 | # https://stats.stackexchange.com/questions/140713/making-predictions-with-log-log-regression-model
11 | # https://davegiles.blogspot.com/2014/12/s.html
12 | class LogLog(Model):
13 | def __init__(self, y, X, media_labels, settings):
14 | # inheritance and start timer
15 | super().__init__(y, X, media_labels, settings, "LogLog")
16 | start = timer()
17 |
18 | # fit the model
19 | self._model = self._fit()
20 |
21 | # init required properties
22 | self.coefficients = self._coefficients()
23 |
24 | # finish running
25 | end = timer()
26 | self.runtime = end - start # Time in seconds, e.g. 5.38091952400282
27 |
28 | # log model locally
29 | self._save()
30 |
31 | ### EDIT BELOW HERE ###
32 |
33 | # fit the model
34 | def _fit(self):
35 | logged_y = np.log(self.y_train + 1)
36 | logged_X = self.X_train.copy()
37 | for x in list(self.X_train.columns):
38 | logged_X[x] = np.log(self.X_train[x] + 1)
39 |
40 | return sm.OLS(logged_y, logged_X).fit() # log both y and X
41 |
42 | # get the coefficients
43 | def _coefficients(self):
44 | return self._model.params.values
45 |
46 | # get the pvalues
47 | def _pvalues(self):
48 | return self._model.pvalues
49 |
50 | # calculate the confidence intervals
51 | def _confidence_intervals(self):
52 | conf_int_df= self._model.conf_int()
53 | conf_int_df.columns = ["lower", "upper"]
54 | return (conf_int_df["upper"] - conf_int_df["lower"]) / np.mean(np.log(self.y_train)) * 100
55 |
56 | ### OVERRIDE BASE FUNCS ###
57 | def contribution(self, X=None):
58 | if (X) is None:
59 | X = self.X_actual
60 |
61 | coef_df = pd.DataFrame({'coefficient': self.coefficients}, index=X.columns)
62 |
63 | X_log = np.log(X+1)
64 | y_pred_log = self._model.predict(X_log)
65 | y_pred = np.exp(y_pred_log) - 1 # transform log y back into y
66 |
67 | data = []
68 | for x in list(X.columns):
69 | contrib = coef_df['coefficient'].loc[x] * np.log(X[x] + 1)
70 | data.append(contrib)
71 |
72 | log_contrib_df = pd.DataFrame(data).T
73 | contrib_df = log_contrib_df.copy()
74 | # transform log contribs by using share
75 | for x in contrib_df.columns:
76 | contrib_share = log_contrib_df[x] / y_pred_log
77 | contrib_df[x] = y_pred * contrib_share
78 |
79 | return contrib_df
--------------------------------------------------------------------------------
/src/hommmer/models/DeepLearning.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import seaborn as sns
3 | import matplotlib.pyplot as plt
4 | from sklearn.compose import ColumnTransformer
5 | from sklearn.model_selection import GridSearchCV
6 | from sklearn.neural_network import MLPRegressor
7 | from sklearn.preprocessing import QuantileTransformer, OneHotEncoder, Normalizer
8 | from sklearn import set_config
9 |
10 | from hommmer.cleaners import guess_categorical_variables, guess_numerical_variables
11 | set_config(display='diagram')
12 | from sklearn.pipeline import Pipeline
13 | from sklearn.model_selection import train_test_split,cross_validate
14 | from joblib import load, dump
15 | from sklearn.inspection import permutation_importance, plot_partial_dependence
16 | from sklearn.metrics import mean_absolute_error as mae
17 | from sklearn.utils import shuffle
18 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python
19 |
20 | from .Model import Model
21 |
22 | class DeepLearning(Model):
23 | def __init__(self, y, X, media_labels, settings):
24 | # inheritance and start timer
25 | super().__init__(y, X, media_labels, settings, "DeepLearning")
26 | start = timer()
27 |
28 | # fit the model
29 | self._model = self._fit()
30 |
31 | # finish running
32 | end = timer()
33 | self.runtime = end - start # Time in seconds, e.g. 5.38091952400282
34 |
35 | # log model locally
36 | self._save()
37 |
38 | ### EDIT BELOW HERE ###
39 |
40 | # fit the model
41 | def _fit(self):
42 | all_features = list(self.X_train.columns)
43 | categorical = guess_categorical_variables(self.X_train)
44 | numerical = guess_numerical_variables(self.X_train.drop(categorical, axis=1))
45 | transformers =[
46 | ('one hot', OneHotEncoder(handle_unknown='ignore'), categorical),
47 | ('scaler', QuantileTransformer(), numerical),
48 | ('normalizer',Normalizer(), all_features)
49 | ]
50 | ct = ColumnTransformer(transformers)
51 | steps =[
52 | ('column_transformer', ct),
53 | ('model', MLPRegressor(solver='lbfgs'))
54 | # solver 'lbfgs' is used for dataset with less than 1000 rows, if more than 1000 use solver 'adam'
55 | ]
56 | pipeline= Pipeline(steps)
57 | param_space={
58 | 'column_transformer__scaler__n_quantiles':[80,100,120],
59 | 'column_transformer__normalizer':[ Normalizer(), 'passthrough' ],
60 | 'model__hidden_layer_sizes':[(35,35),(50,50),(75,75)],
61 | 'model__alpha':[0.005, 0.001]
62 | }
63 |
64 | #input the param space into "param_grid", define what pipeline it needs to run, in our case is named "pipeline", and the you can decide how many cross validation can do "cv=" and the verbosity.
65 | grid = GridSearchCV(pipeline, param_grid=param_space, cv=3, verbose=2)
66 | grid.fit(self.X_train, self.y_train)
67 | return grid.best_estimator_
68 |
69 | ### OVERRIDE ###
70 | def contribution(self, X=None):
71 | if (X) is None:
72 | X = self.X_actual
73 |
74 | res = permutation_importance(self._model, X, self.y_actual, n_repeats=10)
75 |
76 | # create dataframe to collect results
77 | imp = pd.DataFrame(res['importances'].T, columns=X.columns)
--------------------------------------------------------------------------------
/src/hommmer/main.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | from .helpers import log, init_logging
4 | from .cleaners import make_date_index, make_geodate_index
5 | from .features import geometric_adstock, power_saturation
6 | from .models import Linear, LogLinear, LogLog, Ridge, DeepLearning
7 |
8 | def build(path, target, media, organic=None, date=None, geo=None, adstock=None, saturation=None, override={}):
9 | # default settings
10 | settings = {
11 | "file": path,
12 | "model": 'linear',
13 | "geo": geo,
14 | "split": 0.15,
15 | "metric": 'nrmse',
16 | "verbose": False
17 | }
18 |
19 | # override settings
20 | settings.update(override)
21 |
22 | init_logging(settings['verbose'])
23 |
24 | # load the dataframe and get the X_labels
25 | df = pd.read_csv(path)
26 | df.fillna(0, inplace=True)
27 | X_labels = list(df.columns)
28 |
29 | # guess date if not set
30 | if date is None:
31 | if 'date' in df.columns:
32 | date = 'date'
33 | elif 'Date' in df.columns:
34 | date = 'Date'
35 | else:
36 | date = df.columns[0]
37 |
38 | # if organic is not set, set it by removing media vars
39 | if organic is None:
40 | organic = X_labels.copy()
41 | for x in media:
42 | organic.remove(x)
43 | # if organic is set, remove anything not in media or organic
44 | else:
45 | for x in df.columns:
46 | if x not in media and x not in organic:
47 | X_labels.remove(x)
48 |
49 | # log model info
50 | log("building a model")
51 | log(f"file: {path}")
52 | log(f"y = {target}")
53 | log(f"X = {', '.join(X_labels)}")
54 | log(f"vars: {len(X_labels)}")
55 | log(f"obs: {df.shape[0]}")
56 | log(f"settings: {settings}")
57 |
58 | # make date the index
59 | if geo is None:
60 | make_date_index(df, date)
61 | else:
62 | make_geodate_index(df, date, geo)
63 |
64 | # adstock transform
65 | if adstock:
66 | for i in range(len(media)):
67 | x_label = media[i]
68 | theta = adstock[i]
69 | if theta > 0:
70 | trans_label = x_label+" θ="+str(theta)
71 | df[trans_label] = geometric_adstock(df[x_label], theta)
72 | X_labels.append(trans_label)
73 | X_labels.remove(x_label)
74 | media[i] = trans_label
75 |
76 | # saturation transform
77 | if saturation:
78 | for i in range(len(media)):
79 | x_label = media[i]
80 | alpha = saturation[i]
81 | if alpha > 0:
82 | trans_label = x_label+" α="+str(alpha)
83 | df[trans_label] = power_saturation(df[x_label], 1-alpha)
84 | X_labels.append(trans_label)
85 | X_labels.remove(x_label)
86 | media[i] = trans_label
87 |
88 | # assign the y and X frames
89 | y = df[target]
90 | X = df[X_labels]
91 |
92 | # run model
93 | if settings['model'] == 'linear':
94 | return Linear(y, X, media, settings)
95 | elif settings['model'] == 'log-linear':
96 | return LogLinear(y, X, media, settings)
97 | elif settings['model'] == 'log-log':
98 | return LogLog(y, X, media, settings)
99 | elif settings['model'] == 'ridge':
100 | return Ridge(y, X, media, settings)
101 | elif settings['model'] == 'deep-learning':
102 | return DeepLearning(y, X, media, settings)
103 | else:
104 | all_models = {
105 | 'linear': Linear(y, X, media, settings),
106 | 'log-linear': LogLinear(y, X, media, settings),
107 | 'log-log': LogLog(y, X, media, settings),
108 | # 'ridge': Ridge(y, X, media, settings),
109 | # 'deep-learning': DeepLearning(y, X, media, settings)
110 | }
111 | accuracies = [{"model": x, f"{settings['metric']}": all_models[x].metric(settings['metric'])} for x in all_models.keys()]
112 | # min_error = min(all_models.keys(), key=lambda x: all_models[x].metric(settings['metric']))
113 | min_error = min(accuracies, key=lambda x: x[settings['metric']])
114 | return all_models[min_error['model']]
115 |
116 |
117 |
118 |
119 |
120 |
121 |
--------------------------------------------------------------------------------
/src/hommmer/models/Model.py:
--------------------------------------------------------------------------------
1 | import os
2 | import datetime as dt
3 | import pandas as pd
4 | import numpy as np
5 | from IPython.display import display
6 | from timeit import default_timer as timer # https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python
7 | from sklearn.model_selection import train_test_split
8 |
9 | from hommmer.charts import accuracy
10 | from hommmer.helpers import check_metric
11 |
12 | class Model():
13 | def __init__(self, y, X, media_labels, settings, model):
14 | # set timestamp
15 | self.timestamp = dt.datetime.today().strftime('%Y-%m-%d %H:%M')
16 |
17 | # train-test split
18 | if settings['split']:
19 | X_train, X_test, y_train, y_test = train_test_split(X, y,
20 | test_size=settings['split'], random_state=0)
21 | else:
22 | X_train, X_test, y_train, y_test = X, X, y, y
23 |
24 | self.settings = settings
25 | self.model = model
26 | self.runtime = None
27 |
28 | # assign X and y
29 | self.X_actual = X
30 | self.y_actual = y
31 | self.X_train = X_train
32 | self.y_train = y_train
33 | self.X_test = X_test
34 | self.y_test = y_test
35 | self.media_labels = media_labels
36 |
37 | # placeholders
38 | self.coefficients = []
39 |
40 | def _fit(self, y, X):
41 | return None
42 |
43 | def results(self):
44 | results_df = pd.DataFrame(self.contribution().sum(), columns=['contribution'])
45 | results_df['share'] = results_df['contribution'] / results_df['contribution'].sum() * 100
46 | results_df['coefficient'] = self.coefficients
47 | results_df['pvalue'] = self._pvalues()
48 | results_df = pd.concat([results_df, self._confidence_intervals()], axis=1)
49 |
50 | return np.around(results_df, 3)
51 |
52 | def contribution(self, X=None):
53 | if (X) is None:
54 | X = self.X_actual
55 |
56 | coef_df = pd.DataFrame({'coefficient': self.coefficients}, index=X.columns)
57 |
58 | data = []
59 | for x in list(X.columns):
60 | contrib = coef_df['coefficient'].loc[x] * X[x]
61 | data.append(contrib)
62 |
63 | contrib_df = pd.DataFrame(data).T
64 |
65 | return contrib_df
66 |
67 | def predict(self, X=None):
68 | contribution = self.contribution(X)
69 | y_pred = contribution.sum(axis=1)
70 | return y_pred
71 |
72 | def metrics(self, metric_labels):
73 | metrics = []
74 | for metric in metric_labels:
75 | value = check_metric(metric, self)
76 | metrics.append((metric, value))
77 | for label, output in metrics:
78 | print(f"{output[1]} {label}: {output[0]}")
79 |
80 | def metric(self, metric_label):
81 | value = check_metric(metric_label, self)
82 | return value[0]
83 |
84 | def _save(self):
85 | file = self.settings['file']
86 | file_paths = file.split('/')
87 | filename = file_paths.pop()
88 | file_paths.append("models-"+filename)
89 | file_loc = '/'.join(file_paths)
90 | models_output = pd.DataFrame.from_dict([{
91 | 'file': self.settings['file'],
92 | 'model': self.model,
93 | 'metric': self.settings['metric'],
94 | 'error': self.metric(self.settings['metric']),
95 | 'timestamp': dt.datetime.today().strftime('%Y-%m-%d %H:%M'),
96 | 'runtime': self.runtime,
97 | 'y_label': self.y_train.name,
98 | 'X_labels': ', '.join(list(self.X_train.columns)),
99 | }])
100 | if os.path.isfile(file_loc):
101 | # save to existing file
102 | loaded_models = pd.read_csv(file_loc)
103 | all_models = loaded_models.append(models_output)
104 | all_models.to_csv(file_loc, index=False)
105 | print("added model to existing file")
106 | else:
107 | # save new model file
108 | models_output.to_csv(file_loc, index=False)
109 | print("added new model file locally")
110 |
111 | def show(self, charts=True, metrics=True, results=True):
112 | accuracy(self.y_actual, self.predict()) if charts else False
113 | self.metrics(["rsquared", "nrmse", "mape", "decomp-rssd", "cond-no"]) if metrics else False
114 | display(self.results()) if results else False
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # hommmer
2 |
3 | A simple Marketing Mix Modeling library in Python.
4 |
5 | \*\*\* **_NOTE: this library is in alpha and not yet working._** \*\*\*
6 |
7 | ## Quick start
8 |
9 | ### 1. Install the library
10 |
11 | > `pip install hommmer`
12 |
13 | ### 2. Build the model
14 |
15 | ```
16 | # import the library
17 | import hommmer as mmm
18 |
19 | # download example data
20 | mmm.load_duff()
21 |
22 | # list media columns
23 | media = ['facebook', 'google', 'tiktok']
24 |
25 | # build the model
26 | model = mmm.build('duff.csv', 'sales', media)
27 | ```
28 |
29 | #### Required
30 |
31 | - **path**: the location of the file with your data
32 | - **target**: the column with your conversions or conversion value
33 | - **media**: a list of the columns with media spend
34 |
35 | #### Optional
36 |
37 | - **organic**: a list of the organic columns. default: everything not listed in `media`.
38 | - **date**: the column with your date labels (YYYY-MM-DD). default: `date`
39 | - **verbose**: see what the model is doing by printing logs. default: `False`
40 | - **override**: use custom settings for aspects of the model. default: `{}`
41 |
42 | Provide at least 1 year of weekly data where the `date` column is the start of the week (Monday).
43 |
44 | ### 3. Use the results
45 |
46 | ```
47 | # show the charts and metrics
48 | model.show()
49 |
50 | # save locally to png and csv
51 | model.save()
52 | ```
53 |
54 | ### Other features
55 |
56 | Our solution is fully automated, but if you want to build a model manually, or use our helper functions for cleaning data, you can import from our sublibraries.
57 |
58 | ```python
59 | from hommmer.cleaners import transpose
60 | from hommmer.features import adstocks
61 | from hommmer.charts import accuracy
62 | from hommmer.metrics import nrsme
63 | from hommmer.models import Linear
64 | ```
65 |
66 | ## About Marketing Mix Modeling
67 |
68 | Marketing Mix Modeling (MMM) was introduced in the 1960s to match spikes and dips in sales to actions taken in marketing. No user data required - it's privacy-friendly, adblocker-proof and works across all channels (even offline).
69 |
70 | What used to be a 3-6 month, $50k+ job for the Fortune 500, is now an always-on, automated source of truth for startups like [Harry's](https://ladder.io/blog/attribution-technique), [HelloFresh](https://engineering.hellofresh.com/bayesian-media-mix-modeling-using-pymc3-for-fun-and-profit-2bd4667504e6) and [Monday․com](https://www.youtube.com/watch?v=p-YbHMCUycw). Even Facebook and Google are getting in on the game with [research papers](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46001.pdf) and [open-source projects](https://facebookexperimental.github.io/Robyn/).
71 |
72 | ## About hommmer
73 |
74 | Most modeling libraries, like [Statsmodels](https://www.statsmodels.org/stable/index.html), [SciKitLearn](https://scikit-learn.org/stable/) and [Facebook's Robyn](https://facebookexperimental.github.io/Robyn/), cater to statisticians and data scientists. They offer complex configuration options and advanced algorithms only accessible to the biggest companies spending millions on marketing, who can afford to spend 3-6 months on a solution.
75 |
76 | So most Marketing Mix Modeling by small businesses and startups is [done in Excel](https://www.saxifrage.xyz/post/econometrics-gsheets). But there are things you can't do in Excel, like automatically building 1,130 models to see which one works best. We'd like MMM to be in the hands of more people, but that can't happen if you need to be a nuclear physicist to use it.
77 |
78 | `hommmer` is built for the rest of us. The 'everyman' (of any gender) modeling hobbyist, for which MMM is just one of many jobs on the todo list. It's designed to be simple to use, but powerful underneath, without getting you into trouble. Over-simplifying things will annoy the statisticians (Doh!), but it'll make allocating budget quick and easy.
79 |
80 | ## Design Principles:
81 |
82 | ### 1. Excel is the operating system
83 |
84 | Full compatability with Excel / GSheets / CSV for importing and exporting.
85 |
86 | ### 2. Don't make me think
87 |
88 | All user input should be treated as error. Everything needs a good default.
89 |
90 | ### 3. Good is better than great
91 |
92 | Where there's a choice between optimization and usefullness, take the latter.
93 |
94 | ### 4. Better data beats fancier algorithms
95 |
96 | We focus on helper functions to clean data, and treat algorithms as commodities.
97 |
98 | ### 5. We know less than the client
99 |
100 | Assume the client knows what they're doing, then try to prove otherwise.
101 |
102 | ## Contributors
103 |
104 | These people are building `hommmer` for fun in their spare time. Cheers! 🍻
105 |
106 |
107 |
108 |
109 |
115 |
116 |
117 |
118 |
119 |
120 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | Thank you for investing your time in contributing to our project! Any contribution you make will be reflected on [https://www.hommmer.org/](https://www.hommmer.org/).
4 |
5 | ## New contributor guide
6 |
7 | See the [README](README.md) to get an overview of the project. Ideas for improvement are in [TODO](TODO.md). We're operating under the MIT Open Source [license](LICENSE) so you can still use anything you contribute (but so can the rest of us).
8 |
9 | ## Getting started
10 |
11 | ### Issues
12 |
13 | #### Create a new issue
14 |
15 | If you spot a problem with the docs, [search if an issue already exists](https://docs.github.com/en/github/searching-for-information-on-github/searching-on-github/searching-issues-and-pull-requests#search-by-the-title-body-or-comments). If a related issue doesn't exist, you can open a [new issue](https://github.com/hammer-mt/hommmer/issues/new).
16 |
17 | #### Solve an issue
18 |
19 | Scan through our [existing issues](https://github.com/hammer-mt/hommmer/issues) to find one that interests you. Leave a comment on the issue asking if you can pick up the issue so maintainers knowing you want to work on it.
20 |
21 | ### Make Changes
22 |
23 | #### Prerequisites
24 |
25 | Make sure you have the following installed in your development environment:
26 |
27 | - [Python](https://www.python.org/downloads/)
28 |
29 | #### Development Workflow
30 |
31 | Follow these steps below to get the package working locally:
32 |
33 | 1. Create a personal fork of the project on GitHub and clone locally
34 |
35 | ```shell
36 | # Using HTTPS
37 | git clone https://github.com/your-username/hommmer.git
38 |
39 | # Or using SSH
40 | git clone git@github.com:your-username/hommmer.git
41 | ```
42 |
43 | 2. Add the original repository as a remote called `upstream`
44 |
45 | ```shell
46 | git remote add upstream https://github.com/hammer-mt/hommmer.git
47 | ```
48 |
49 | 3. Make sure to pull upstream changes into your local repository
50 |
51 | ```shell
52 | git fetch upstream
53 | ```
54 |
55 | 4. Create a new branch to work from
56 |
57 | ```shell
58 | git checkout -b branchname
59 | ```
60 |
61 | 5. Activate a virtual environment
62 |
63 | ```shell
64 | python -m venv venv
65 |
66 | # Using Windows
67 | `venv\Scripts\activate`
68 |
69 | # Using Mac
70 | `source ./venv/bin/activate`
71 | ```
72 |
73 | 6. Install the package as editable
74 |
75 | ```shell
76 | # Install from the cloned repo:
77 | %pip install -e your/local/path
78 | ```
79 |
80 | I like working from Jupyter Notebook (Anaconda) because if you run `%load_ext autoreload` then `%autoreload 2` the module will auto-reload on every saved change to your local package! Note: restart the kernal if you run into an error with classes.
81 |
82 | 7. Make your changes / contributions
83 |
84 | Make sure to follow the code style of the project, run any tests (if available) and add / update the documentation as needed.
85 |
86 | Squash your commits with git's [interactive rebase](http://git-scm.com/docs/git-rebase) (create a new branch if necessary). Write your commit messages in the present tense (what does it does to the code?). Push your changes to your fork on GitHub, the remote `origin`.
87 |
88 | ```shell
89 | # Squash commits, fix up commit messages etc.
90 | git rebase -i origin/main
91 |
92 | # Push to your fork on GitHub
93 | git push origin main
94 | ```
95 |
96 | ### Pull Request
97 |
98 | When you're done making the changes, open a pull request, often referred to as a PR. You do this in GitHub from your Fork of the project. Target the `develop` branch if there is one, else go for `main`.
99 |
100 | - Fill out the PR description summarizing your changes so we can review your PR. This template helps reviewers understand your changes and the purpose of your pull request.
101 | - Don't forget to [link PR to issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue) if you are solving one.
102 | - Enable the checkbox to [allow maintainer edits](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/allowing-changes-to-a-pull-request-branch-created-from-a-fork) so the branch can be updated for a merge. Once you submit your PR, a Docs team member will review your proposal. We may ask questions or request for additional information.
103 | - We may ask for changes to be made before a PR can be merged, either using [suggested changes](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/incorporating-feedback-in-your-pull-request) or pull request comments. You can apply suggested changes directly through the UI. You can make any other changes in your fork, then commit them to your branch.
104 | - As you update your PR and apply changes, mark each conversation as [resolved](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/commenting-on-a-pull-request#resolving-conversations).
105 | - If you run into any merge issues, checkout this [git tutorial](https://lab.github.com/githubtraining/managing-merge-conflicts) to help you resolve merge conflicts and other issues.
106 | - Once the pull request is approved and merged you can pull the changes from upstream to your local repo and delete your extra branch(es).
107 |
108 | ### Your PR is merged!
109 |
110 | Congratulations :tada::tada: The hommmer team thanks you!
111 |
112 | Once your PR is merged, we will add you to the All Contributors Table in the [`README.md`](./README.md#all-contributors)
113 |
114 | ### Publishing to PyPi
115 |
116 | This is more a note to self, because I keep forgetting.
117 |
118 | 1. `pip install twine`
119 | 2. `cd Documents\Projects\hommmer`
120 | 3. update the version number in `setup.py`
121 | 4. `python setup.py sdist bdist_wheel`
122 | 5. delete old versions in the `dist` folder
123 | 6. `twine check dist/*`
124 | 7. `twine upload --repository-url https://test.pypi.org/legacy/dist/*` (optional)
125 | 8. `Twine upload dist/*`
126 |
127 | Resources:
128 |
129 | - [Using TestPyPi](https://packaging.python.org/guides/using-testpypi/)
130 | - [Building a Python Package and Publishing on PyPi (The Python Package Index)](https://www.section.io/engineering-education/building-a-python-package-and-publishing-on-pypi/)
131 | - [Packaging Python Projects](https://packaging.python.org/tutorials/packaging-projects/)
132 |
--------------------------------------------------------------------------------
/website/pages/index.js:
--------------------------------------------------------------------------------
1 | import Head from "next/head";
2 | import { useState, useRef } from "react";
3 | import { DuplicateIcon, CheckIcon } from "@heroicons/react/solid";
4 |
5 | export default function Home() {
6 | const copyAreaRef = useRef(null);
7 | const [copiedText, setCopiedText] = useState("");
8 |
9 | const quickStartInstructions = [
10 | { title: "Install the package", code: "pip install hommmer" },
11 | { title: "Import the library", code: "import hommmer as mmm" },
12 | { title: "Download example data", code: "mmm.load_duff()" },
13 | {
14 | title: "Build your model",
15 | code: "media = ['facebook', 'google', 'tiktok']\nmodel = mmm.build('duff.csv', 'sales', media)",
16 | },
17 | { title: "Display the results", code: "model.show()" },
18 | ];
19 |
20 | const handleClickToCopy = (text) => {
21 | copyAreaRef.current.value = text;
22 | copyAreaRef.current.select();
23 | document.execCommand("copy");
24 | copyAreaRef.current.value = "";
25 | copyAreaRef.current.blur();
26 | setCopiedText(text);
27 | };
28 | return (
29 |
30 |
31 |
32 | hommmer: A simple Marketing Mix Modeling library in Python.
33 |
34 |
35 |
36 |
37 |
38 |
39 |
47 |
48 |
49 | A simple Marketing Mix Modeling library in Python.
50 |
51 |
52 |
53 |
54 |
Quick Start
55 |
56 | {quickStartInstructions.map((instruction) => {
57 | return (
58 | <>
59 |
{instruction.title}:
60 |
{
63 | handleClickToCopy(instruction.code);
64 | }}
65 | >
66 |
67 |
68 | {instruction.code}
69 |
70 | {copiedText === instruction.code ? (
71 |
75 | ) : (
76 |
80 | )}
81 |
82 |
83 | >
84 | );
85 | })}
86 |
87 |
97 |
98 |
99 |
141 |
142 |
143 |
167 |
168 | );
169 | }
170 |
--------------------------------------------------------------------------------
/SOURCES.md:
--------------------------------------------------------------------------------
1 | ## Sources
2 |
3 | Wherever we use code directly we reference it in the code comments, but this content gave us inspiration for the project more generally.
4 |
5 | - [Facebook Robyn](https://facebookexperimental.github.io/Robyn/) – variable transformations, mission
6 | - [Bayesian Media Mix Modeling using PyMC3, for Fun and Profit](https://engineering.hellofresh.com/bayesian-media-mix-modeling-using-pymc3-for-fun-and-profit-2bd4667504e6) – bayesian methods, process
7 | - [Bayesian Methods for Media Mix Modeling with Carryover and
8 | Shape Effects](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46001.pdf) – bayesian methods, variable transformations
9 | - [Bayesian Methods for Hackers](https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers) – how to build bayesian models, PyMC3
10 | - [Statistical Rethinking: A Bayesian Course Using R and Stan](https://github.com/rmcelreath/statrethinking_winter2019) – how to interpret bayesian stats, STAN
11 | - [A Hierarchical Bayesian Approach to Improve Media Mix Models Using Category Data](https://research.google/pubs/pub45999/) – hierarchical models
12 | - [Introduction to Bayesian Methods for MMM](https://getrecast.com/bayesian-methods-for-mmm/) – running Bayesian MMM, variable distributions
13 | - [Feature Selection For Machine Learning in Python](https://machinelearningmastery.com/feature-selection-machine-learning-python/) – feature selection
14 | - [The Five Linear Regression Assumptions: Testing on the Kaggle Housing Price Dataset](https://boostedml.com/2018/08/testing-linear-regression-assumptions-the-kaggle-housing-price-dataset.html) – statistical tests
15 | - [A Complete Guide To Linear Regression In Python](<[https://](https://www.listendata.com/2018/01/linear-regression-in-python.html)>) – standard linear regression
16 | - [Advertising Adstock – Concept & Formula](https://analyticsartist.wordpress.com/2013/11/02/calculating-adstock-effect/) – adstocks
17 | - [Advertising Diminishing Returns & Saturation](https://analyticsartist.wordpress.com/2015/03/08/advertising-diminishing-returns-saturation/) – diminishing returns
18 | - [Building and Validating Media Mix Models](https://github.com/mecommerce/ThirdLove-Tech-Blog/blob/master/Media_Mix_Model/ThirdLove_MMM_Whitepaper.pdf) – process, history of MMM
19 | - [Market Mix Modeling using Sales data](https://www.kaggle.com/imdineshgrewal/market-mix-modeling-using-sales-data) – canonical example of MMM on Kaggle
20 | - [DT MART: Market Mix Modeling](https://www.kaggle.com/datatattle/dt-mart-market-mix-modeling) – data cleaning, process
21 | - [Multiple Regression with Google Sheets XL Miner](https://www.youtube.com/watch?v=YhBU92eyNRo) – inspired my first post on MMM, "[Econometrics in GSheets](https://www.saxifrage.xyz/post/econometrics-gsheets)"
22 | - [How to create a basic Marketing Mix Model in scikit-learn](https://practicaldatascience.co.uk/machine-learning/how-to-create-a-basic-marketing-mix-model-in-scikit-learn) – using multiple algos, scikit learn
23 | - [Python/STAN Implementation of Multiplicative Marketing Mix Model](https://towardsdatascience.com/python-stan-implementation-of-multiplicative-marketing-mix-model-with-deep-dive-into-adstock-a7320865b334) – multiplicative models, seasonality
24 | - [Carryover and Shape Effects in Media Mix Modeling: Paper Review](https://towardsdatascience.com/carryover-and-shape-effects-in-media-mix-modeling-paper-review-fd699b509e2d) – Bayesian, PyMC3, adstocks & diminishing returns
25 | - [Modeling adstock using Weibull transformations](https://github.com/annalectnl/weibull-adstock/blob/master/adstock_weibull_annalect.pdf) – Adstocks
26 | - [Media mix models are the future of mobile advertising](https://mobiledevmemo.com/media-mix-models-are-the-future-of-mobile-advertising/) – trends, history, context, strategy
27 | - [Geo-level Bayesian Hierarchical Media Mix Modeling](https://research.google/pubs/pub46000/) – geo models
28 | - [Challenges and Opportunities in Media Mix Modeling](https://research.google/pubs/pub45998/) – trends, history, context, strategy
29 | - [Meet the geniuses behind our BI tool BigBrain](https://engineering.monday.com/meet-the-geniuses-behind-our-bi-tool-bigbrain/) – real world usage
30 | - [Market Mix Model - ElecKart | Kaggle](https://www.kaggle.com/goyalshalini93/market-mix-model-eleckart) – process, data cleaning
31 | - [Data Science Primer | Elite Data Science](https://elitedatascience.com/primer) – data cleaning, process, algorithms = commodities, better data > fancier algorithms
32 | - [How to Calculate Feature Importance With Python](https://machinelearningmastery.com/calculate-feature-importance-with-python/) – feature importances, feature selection
33 | - [Marketing Mix Modeling MMM (Part 3 of 3)](https://www.spencertom.com/2020/08/29/marketing-mix-modeling-mmm-part-3-of-3/) – Log-Linear Models, Log-Log Models
34 |
35 | ## Reading List
36 |
37 | Here's everything we have collected on our list to read, but haven't had a chance to yet. Feel free to add to the list or let us know if something isn't worth our time.
38 |
39 | - https://github.com/dps/montesheet
40 | - https://www.facebook.com/fbgaminghome/blog/marketers/the-future-is-modeled
41 | - https://www.microprediction.com/blog/prophet
42 | - https://www.youtube.com/watch?v=B7ZWehBHVw0
43 | - https://www.latticeworkinsights.com/press/we-evaluated-3-media-mix-models-so-you-dont-have-to
44 | - https://ekimetrics.com/wp-content/uploads/2020/05/Ekimetrics_Facebook_White-paper.pdf?fbclid=IwAR1mvLJ8zcVO567q-3nv21c2DF57kA_eAQWRp1KI4a56eDYGMIPIQ1ieduI
45 | - https://www.marketingevolution.com/marketing-essentials/media-mix-modeling
46 | - https://stat.ethz.ch/R-manual/R-devel/library/stats/html/Weibull.html?fbclid=IwAR0Fygnw1TtxYckg9IYDPPhWwAYrdY666l0Tw0RAJvBhsxanm91uSB3SZP4
47 | - https://blog.brandops.io/tracking-brand-and-demand-the-4-methods-placeholder-title
48 | - https://research.google/pubs/pub41854/
49 | - https://www.facebook.com/fbgaminghome/blog/marketers/the-future-is-modeled
50 | - https://www.adroll.com/blog/marketing-analytics/first-last-touch-attribution-why-its-out-of-style
51 | - https://blackwoodseven.com/the-next-generation-of-marketing-mix-modeling-is-bayesian/
52 | - https://www.youtube.com/watch?v=UznM_-_760Y
53 | - https://www.facebook.com/business/news/insights/5-ways-to-adjust-marketing-mix-models-for-unexpected-events
54 | - https://www.forbes.com/sites/forbesagencycouncil/2020/05/18/digital-marketing-in-a-cookie-less-internet/?sh=7fa7bc3121e2
55 | - https://motamem.org/wp-content/uploads/2019/07/Borden-1984_The-concept-of-marketing-mix.pdf
56 | - https://www.youtube.com/watch?v=UznM_-_760Y&t=908s
57 | - https://www.youtube.com/watch?v=p-YbHMCUycw
58 | - https://www.warc.com/newsandopinion/opinion/the-econometric-hero-and-five-questions-every-cmo-should-ask-about-mmm/4214?es_id=13fa7619cd
59 | - https://www2.deloitte.com/content/dam/Deloitte/es/Documents/estrategia/Deloitte-es-estrategia-y-operaciones-combinacion-mmm-cle.pdf
60 | - https://www.thinkwithgoogle.com/intl/en-gb/marketing-strategies/data-and-measurement/roi-marketing-mix-models/
61 | - https://www.nielsen.com/us/en/insights/article/2017/when-it-comes-to-advertising-effectiveness-what-is-key/
62 | - https://stackoverflow.com/questions/21765794/python-constrained-non-linear-optimization
63 | - https://towardsdatascience.com/efficient-frontier-portfolio-optimisation-in-python-e7844051e7f
64 | - https://medium.com/analytics-vidhya/marketing-mix-model-guide-with-dataset-using-python-r-and-excel-4e319be47b4
65 | - https://www.ashokcharan.com/Marketing-Analytics/~mx-mmm-sales-response-function.php
66 | - https://justrthings.com/2017/12/30/a-multivariate-approach-to-adstock-rate-modeling-in-r/
67 | - http://www.17bigdata.com/robyn-mmm-step-by-step-guide-a-beta-project-from-facebook-marketing-science/
68 | - https://www.themarketingtechnologist.co/the-gam-approach-to-spend-your-money-more-efficiently/
69 | - https://multithreaded.stitchfix.com/blog/2015/07/30/gam/
70 | - https://rstudio-pubs-static.s3.amazonaws.com/294627_5f7e9a449b6c442e806a4743f1b4f8a7.html
71 | - https://www.facebook.com/business/news/insights/considerations-for-creating-modern-marketing-mix-models
72 | - https://www.listendata.com/2019/09/marketing-mix-modeling.html
73 | - https://towardsdatascience.com/building-a-simple-marketing-mix-model-with-ols-571ac3d5b64f
74 | - https://www.facebook.com/business/news/insights/a-full-funnel-approach-how-brand-marketing-drive-short-term-sales
75 | - https://quantmar.com/8/What-is-media-mix-modeling
76 | - https://aaltodoc.aalto.fi/handle/123456789/26743
77 | - https://www.forbes.com/sites/scottmcdonald1/2018/01/23/measuring-the-roi-of-marketing-ab-tests-vs-market-mix-models-vs-multi-touch-attribution/
78 | - https://www.iab.com/insights/the-essential-guide-to-marketing-mix-modeling-and-multi-touch-attribution/
79 | - https://link.springer.com/article/10.1057/jma.2014.3
80 | - https://danaleeling.blogspot.com/2019/09/graphical-confidence-intervals-for.html
81 | - https://www.certificationanswers.com/en/a-marketing-manager-wants-to-use-an-attribution-model-that-includes-both-converting-and-non-converting-paths-in-order-to-evaluate-individual-customer-paths-which-attribution-model-will-fulfill-this-r/
82 | - https://towardsdatascience.com/market-mix-modeling-mmm-101-3d094df976f9
83 | - https://wiki.q-researchsoftware.com/wiki/Driver_(Importance)_Analysis
84 | - https://www.real-statistics.com/multiple-regression/multiple-regression-analysis/multiple-regression-analysis-excel/
85 | - https://www.forbes.com/sites/onmarketing/2012/11/28/the-downside-of-marketing-mix-models-is-theres-no-upside-for-cmos/
86 | - https://sd-group.com.au/en/blog/market-mix-vs-multi-touch-attribution-model
87 | - https://www.slideshare.net/wolfeman02/shows-approach-which-expands-the-breadth-of-what-marketingmix-models-c
88 | - https://bottomlineanalytics.com/brand-content-drivers-modeling-optimizing-content-marketing/
89 | - https://www.slideshare.net/PeterCain1/dynamic-marketing-mix-modelling
90 | - https://blog.hurree.co/blog/marketing-mix-modeling
91 | - https://www.analytic-edge.com/is-marketing-mix-modeling-only-for-fortune-500-companies-think-again/
92 | - https://towardsdatascience.com/causal-vs-statistical-inference-3f2c3e617220
93 | - https://www.futuremarketinsights.com/reports/marketing-mix-optimisation-market
94 | - https://uk.news.yahoo.com/success-story-marketing-mix-modeling-130500042.html
95 | - https://uk.sganalytics.com/case-study/analytics/market-mix-modeling-what-if-simulator-insurance/
96 | - https://www.slideshare.net/vivastream/disney-marketinganalyticsoptimization-14907727
97 | - https://www.thinkwithgoogle.com/intl/en-145/marketing-strategies/video/through-marketing-mix-modeling-loreal-uncovers-youtubes-ability-deliver-sales/
98 | - https://www.jogordonconsulting.com/blog/marketing-mix-modelling-3-case-studies-from-2020
99 | - https://marketingeffectiveness.nielsen.com/our-solutions/marketing-mix-modeling/
100 | - https://www.ashokcharan.com/Marketing-Analytics/~mx-mmm-what-if-analysis.php
101 | - https://www.wsj.com/articles/SB112415492969313998
102 | - https://videoadnews.com/2020/09/14/econometric-modelling-in-advertising-explained/
103 | - https://www.ebiquity.com/news-insights/blog/can-econometrics-prove-the-value-of-influencers/
104 | - https://www.jstor.org/stable/3149922?seq=1
105 | - https://www.jstor.org/stable/3151017?seq=1
106 | - https://www.sequentpartners.com/case-studies-in-holistic-marketing-mix-modeling/
107 | - https://www.treasuredata.com/resources/a-forbes-cmo-practice-report-for-marketing-effectiveness/
108 | - https://www.thedrum.com/opinion/2020/03/16/marketing-mix-marketing-effect-modelling
109 | - https://www.marketingevolution.com/knowledge-center/changing-approach-to-marketing-mix-modeling
110 | - https://nathanbrixius.wordpress.com/2013/11/26/marketing-mix-i/
111 | - https://www.scanmarqed.com/marketing-mix-modeling
112 | - https://www.cmswire.com/cms/customer-experience/forrester-wave-highlights-marketing-mix-modeling-vendors-021092.php
113 | - https://www.linkedin.com/pulse/death-marketing-mix-modeling-we-know-michael-wolfe/
114 | - https://www.sellforte.com/marketing-mix-modeling
115 | - https://towardsdatascience.com/market-mix-modeling-101-part-2-95c5e147c8a3
116 | - https://www.arymalabs.com/Blogs.aspx
117 | - https://www.nielsen.com/uk/en/solutions/capabilities/marketing-mix-modeling/
118 | - https://web.archive.org/web/20190327101933/https://www.tvba.co.uk/article/route-to-market-finding-the-back-door-to-tough-markets
119 | - https://fospha.com/case-studies/joined-up-strategy-driven-by-marketing-mix-modelling
120 | - https://www.businesswire.com/news/home/20200602005541/en/How-Marketing-Mix-Modeling-Helped-a-Food-and-Beverage-Company-to-Gain-Visibility-into-Consumer-Buying-Behavior-A-Case-Study-by-Quantzig
121 | - https://www.businesswire.com/news/home/20200807005016/en/Success-Story---Marketing-mix-modeling-helps-increase-MROI-for-a-US-based-telecommunication-service-provider-Quantzig
122 | - https://www.thinkwithgoogle.com/marketing-strategies/data-and-measurement/marketing-mix-modeling-tutorial/
123 | - https://rstudio-pubs-static.s3.amazonaws.com/294627_5f7e9a449b6c442e806a4743f1b4f8a7.html
124 | - http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41854.pdf
125 | - https://www.listendata.com/2019/09/marketing-mix-modeling.html
126 | - https://hbr.org/2013/03/advertising-analytics-20
127 | - https://aaltodoc.aalto.fi/handle/123456789/26743
128 | - https://towardsdatascience.com/how-to-revise-your-marketing-mix-model-to-capture-covid-19-impact-863b65982408
129 | - https://www.youtube.com/playlist?list=PLwJRxp3blEvZyQBTTOMFRP_TDaSdly3gU
130 | - https://c3metrics.com/whats-the-difference-between-mmm-mta/
131 | - https://medium.com/@gustavobramao/mmm-vs-gbhmmm-ebe537ccf15b
132 | - https://blog.hurree.co/blog/marketing-mix-modeling
133 | - https://towardsdatascience.com/explaining-feature-importance-by-example-of-a-random-forest-d9166011959e
134 | - https://www.marketingattribution.com/marketing-mix-models/
135 | - https://bottomlineanalytics.com/our-thinking/
136 | - https://economagician.co/2014/05/30/an-example-of-a-bad-market-mix-model/
137 | - https://towardsdatascience.com/machine-learning-vs-econometrics-in-the-real-world-4058095b1013
138 | - https://www.cmswire.com/cms/customer-experience/forrester-wave-highlights-marketing-mix-modeling-vendors-021092.php
139 | - https://blog.hurree.co/blog/marketing-mix-modeling
140 | - https://services.google.com/fh/files/misc/article_marketing_mix_modeling_final.pdf
141 | - https://marketingeffectiveness.nielsen.com/our-solutions/marketing-mix-modeling/
142 | - https://www.latentview.com/marketing-mix-modeling/
143 | - https://www.investopedia.com/terms/e/econometrics.asp
144 | - https://www.ashokcharan.com/Marketing-Analytics/~mx-mmm-what-if-analysis.php
145 | - https://www.nielsen.com/sa/en/insights/article/2019/5-important-questions-to-ask-your-marketing-mix-vendor/
146 | - https://www.marketingiq.co.uk/tv-media-planning-terms-calculating-media-reach-and-frequency-using-tvrs/
147 | - https://towardsdatascience.com/predicting-sales-611cb5a252de
148 | - https://www.cpgdatainsights.com/answer-business-questions/volume-decomp-part-1/
149 | - https://www.thinkbox.tv/research/demand-generator/
150 | - https://www.quora.com/What-kind-of-econometrics-can-I-do-with-Python
151 | - https://www.jstor.org/stable/40206298
152 | - http://www.upfie.net/
153 | - https://medium.com/@vince.shields913/econometrics-with-python-pt-1-646b6eeff7da
154 | - https://www.kdnuggets.com/2018/12/machine-learning-explainability-interpretability-ai.html
155 | - https://www.kaggle.com/learn/machine-learning-explainability
156 | - https://towardsdatascience.com/an-overview-of-model-explainability-in-modern-machine-learning-fc0f22c8c29a
157 | - https://stats.stackexchange.com/questions/150975/linear-regression-with-diminishing-returns
158 | - https://stats.stackexchange.com/questions/27185/whether-to-include-x-and-x2-in-regression-model-examining-diminishing-retur
159 | - https://stats.stackexchange.com/questions/80559/why-is-functional-form-so-important-when-specifying-models/80563#80563
160 | - https://www.dummies.com/education/economics/econometrics/the-linear-log-model-in-econometrics/
161 | - https://stats.stackexchange.com/questions/356117/how-to-fit-exponential-y-a1-expbx-function-to-a-given-data-set-especially
162 | - http://www.real-statistics.com/regression/exponential-regression-models/exponential-regression/
163 | - https://stats.idre.ucla.edu/stata/dae/multivariate-regression-analysis/
164 | - https://www.kaggle.com/fayejavad/marketing-linear-multiple-regression
165 | - https://towardsdatascience.com/perform-regression-diagnostics-and-tackle-uncertainties-of-linear-models-1372a03b1f56
166 | - https://towardsdatascience.com/fisher-test-for-regression-analysis-1e1687867259
167 | - https://www.coursera.org/lecture/uva-darden-market-analytics/marketing-mix-models-XCCSf
168 | - https://www.kaggle.com/rishph7/market-mix-model
169 | - https://github.com/palitr/Budget-Optimization-in-Ecommerce-using-Market-Mix-Modelling
170 | - https://towardsdatascience.com/market-mix-modelling-application-with-mlr-60b18bd3dc81
171 | - https://sites.google.com/site/2015pcsu/data-science/marketing-mix-modeling-to-find-the-best-advertising-route
172 | - http://www.andrewwalterowens.com/post/111019666926/marketing-mix-modeling-using-statsmodels-part-1
173 | - http://arxiv.org/ftp/arxiv/papers/1403/1403.7971.pdf
174 | - https://medium.com/swlh/marketing-mix-modelling-step-by-step-part-1-702c793d91fd
175 | - https://analyticsartist.wordpress.com/2014/08/17/marketing-mix-modeling-explained-with-r/
176 | - http://datafeedtoolbox.com/marketing-mix-model-for-all-using-r-for-mmm/
177 | - https://www.datasciencecentral.com/profiles/blogs/market-mix-modeling-mmm
178 | - https://rpubs.com/nihil0/mmm01
179 | - https://www.rdocumentation.org/packages/bayesm/versions/3.0-2/topics/cheese
180 | - https://www.quora.com/Marketing-mix-modelling-What-are-the-best-and-most-practical-statistical-techniques-to-use-for-MMM
181 | - https://medium.com/@yasimk_87248/marketing-mix-modeling-for-marketers-de406a988757
182 | - https://web.archive.org/web/20200525194505/https://online-behavior.com/analytics/statistical-significance
183 | - https://proofanalytics.ai/how-it-works-automated-mmm/
184 | - https://www.measured.com/products
185 | - https://mma.com/solutions/marketing-mix-modeling/
186 | - https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
187 | - https://www.datacamp.com/community/tutorials/tutorial-ridge-lasso-elastic-net
188 | https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html
189 | - https://towardsdatascience.com/marketing-channel-attribution-with-markov-chains-in-python-part-2-the-complete-walkthrough-733c65b23323
190 | - https://medium.com/@mortenhegewald/marketing-channel-attribution-using-markov-chains-101-in-python-78fb181ebf1e
191 | - https://stats.stackexchange.com/questions/74622/converting-standardized-betas-back-to-original-variables
192 | - https://stattrek.com/multiple-regression/interaction.aspx
193 | - https://stackoverflow.com/questions/31029340/how-to-adjust-scaled-scikit-learn-logicistic-regression-coeffs-to-score-a-non-sc
194 | - https://www.youtube.com/watch?v=68ABAU_V8qI
195 | - https://medium.com/towards-artificial-intelligence/understanding-non-linear-regression-fbef9a396b71
196 | - https://www.advancedwebranking.com/ctrstudy/
197 | - https://realpython.com/simpy-simulating-with-python/
198 | - https://www.searchviu.com/en/machine-learning-seo-predicting-rankings/
199 | - https://www.forbes.com/sites/gilpress/2016/03/23/data-preparation-most-time-consuming-least-enjoyable-data-science-task-survey-says/?sh=3b7582c86f63
200 | - https://arxiv.org/pdf/2106.03322.pdf
201 | - https://medium.com/@Marko_Mi/advertising-adstock-theory-85cc9e26ea0e
202 | - http://www.17bigdata.com/python-stan-implementation-of-multiplicative-marketing-mix-model/
203 | - https://analyticsartist.wordpress.com/2014/01/31/adstock-rate-deriving-with-analytical-methods/
204 | - https://storage.googleapis.com/pub-tools-public-publication-data/pdf/b20467a5c27b86c08cceed56fc72ceadb875184a.pdf
205 | - https://github.com/psu4/Marketing-Mix-Modeling-/blob/master/Marketing-Mix-Modeling.py
206 | - https://medium.com/swlh/marketing-response-curves-the-science-of-diminishing-returns-and-saturation-f8cf226e8dc5
207 | - https://www.youtube.com/watch?v=4N0FFzGYfTs
208 | - https://towardsdatascience.com/types-of-interaction-effects-in-market-mix-modeling-mmm-95247f3de36e
209 | - https://twitter.com/RichardFergie/status/1461653687697387524?s=20
210 | - read this: http://brucehardie.com/
211 | - https://betanalpha.github.io/assets/case_studies/falling.html
212 | - https://betanalpha.github.io/assets/case_studies/pystan_workflow.html
213 | - https://twiecki.io/blog/2019/01/14/supply_chain/
214 | - Remake this Statistical Significance code on Saxifrage: https://colab.research.google.com/drive/1DogSh8asM2-13Lv0SC69xRmbQ_DBM5rF?authuser=3
215 | - Lambda School Inferential Statistics Lecture https://colab.research.google.com/drive/1HWgUTIGiuLqDd2538b0p-Xu4O8WBahZ5?authuser=1
216 | - Lambda School Inferential Statistics Assignment https://drive.google.com/open?id=1XPYoZesZT0asuZdQjuurKeJgC-Ytl-Xd
217 | - Bayesian spam filter https://colab.research.google.com/drive/1GKIN_RM3r3JC9R-3AE9ZuyMcPjdG6DH4
218 | - https://quotefancy.com/quote/2401115/Pedro-Domingos-Each-of-the-five-tribes-of-machine-learning-has-its-own-master-algorithm-a
219 | - https://github.com/pymc-devs/resources/tree/master/BCM
220 | - https://github.com/pymc-devs/resources/tree/master/BSM
221 | - https://github.com/pymc-devs/resources/tree/master/BDA3
222 | - https://stats.stackexchange.com/questions/500260/pymc3-implementation-of-bayesian-mmm-poor-posterior-inference
223 | - https://www.youtube.com/watch?v=SWMaoBbIp04
224 | - https://www.youtube.com/watch?v=7tSFNhQO3jg
225 | - https://www.youtube.com/watch?v=ZxR3mw-Znzc
226 | - https://www.youtube.com/watch?v=uxGhjXS3ILE
227 | - https://www.youtube.com/watch?v=appLxcMLT9Y
228 | - https://www.youtube.com/watch?v=BrK7X_XlGB8
229 | - https://www.pymc-labs.io/blog-posts/bayesian-media-mix-modeling-for-marketing-optimization/
230 | - https://stats.stackexchange.com/questions/500260/pymc3-implementation-of-bayesian-mmm-poor-posterior-inference
231 | - https://www.pymc-labs.io/blog-posts/reducing-customer-acquisition-costs-how-we-helped-optimizing-hellofreshs-marketing-budget/
232 | - http://www.joshuakim.io/marketing-mix-modelling-with-bayesian-regression/
233 | - https://www.coursera.org/learn/bayesian
234 | - https://statswithr.github.io/book/introduction-to-bayesian-regression.html#sec:simple-linear
235 | - https://twiecki.io/blog/2017/02/08/bayesian-hierchical-non-centered/
236 | - https://towardsdatascience.com/bayesian-hierarchical-modeling-in-pymc3-d113c97f5149
237 | - https://docs.pymc.io/en/stable/pymc-examples/examples/case_studies/multilevel_modeling.html
238 | - https://twitter.com/tvladeck/status/1462447221304143894?t=Tm9-OmYEQ-QYivzmed7nJQ&s=03
239 | - https://twitter.com/Mike_Kaminsky/status/1462439240487350276?t=bf_asUh8eXUwUMzTpYNUEQ&s=03
240 | - https://twitter.com/RichardFergie/status/1462425574161453056?t=GT1WmTBdNJH_mJq-VaYAZw&s=03
241 | - https://tvladeck.substack.com/p/did-you-control-for-_?s=03
242 | - https://vincentk1991.github.io/Bayesian-regression-tutorial/
243 | - https://stackoverflow.com/questions/39677240/multivariate-linear-regression-in-pymc3
244 | - https://docs.pymc.io/en/stable/pymc-examples/examples/generalized_linear_models/GLM-linear.html
245 | - https://www.chrisstucchio.com/blog/2017/bayesian_linear_regression.html
246 | - https://twitter.com/emollick/status/1462265543495491591?t=-orv6yDbuswMJA7bMYudOQ&s=03
247 | - https://stats.stackexchange.com/questions/140713/making-predictions-with-log-log-regression-model
248 | - https://davegiles.blogspot.com/2014/12/s.html
249 | - https://stats.stackexchange.com/questions/171386/what-are-bayesian-p-values
250 | - https://royalsocietypublishing.org/doi/10.1098/rsbl.2019.0174
251 | - https://juanitorduz.github.io/fb_prophet/
252 | - https://stats.stackexchange.com/questions/238297/how-to-determine-appropriate-lagged-features-for-learning-systems-with-states
253 | - https://proceedings.neurips.cc/paper/2008/file/7380ad8a673226ae47fce7bff88e9c33-Paper.pdf
254 | - https://www.youtube.com/watch?v=DJ0c7Bm5Djk&t=16809s
255 | - https://www.eigenfoo.xyz/_posts/2018-06-19-bayesian-modelling-cookbook/
256 | - https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7
257 | - https://www.semanticscholar.org/paper/Probabilistic-programming-in-Python-using-PyMC3-Salvatier-Wiecki/8085b60ce1771647f11ccc4728397275b502f359?p2df
258 | - https://www.quantstart.com/articles/Bayesian-Linear-Regression-Models-with-PyMC3/
259 | - https://twiecki.io/blog/2013/08/12/bayesian-glms-1/
260 | - https://www.datasciencecentral.com/k-means-a-step-towards-marketing-mix-modeling/
261 | - https://vincentk1991.github.io/adstock-pyro/
262 | - https://discourse.pymc.io/t/geometric-adstock-paramter-estimation-in-pymc3-and-theano-using-theano-scan/1864
263 | - http://www.17bigdata.com/marketing-mix-modelling-mmm-a-potential-solution/
264 | - https://www.sciencedirect.com/science/article/abs/pii/S016781161500066X
265 | - https://juanitorduz.github.io/pymc_mmm/
266 | - https://towardsdatascience.com/python-stan-implementation-of-multiplicative-marketing-mix-model-with-deep-dive-into-adstock-a7320865b334
267 | - https://blog.asana.com/2022/01/marketing-measurement-capabilities/#close
268 |
--------------------------------------------------------------------------------