├── .vscode └── settings.json ├── 01_customer_segmentation ├── 01_customer_segmentation.py └── 02_customer_segmentation.R ├── 02_demand_forecasting ├── 01_demand_forecasting.py └── 02_demand_forecasting.R ├── 03_credit_risk ├── 01_credit_risk.py └── 02_credit_risk.R ├── 04_fraud_detection ├── 01_fraud_detection.py └── 02_fraud_detection.R ├── 05_supply_chain_optimization ├── 01_supply_chain_optimization.py └── 02_supply_chain_optimization.R ├── README.md └── img └── 10_business_problems.jpg /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "r.lsp.diagnostics": false 3 | } -------------------------------------------------------------------------------- /01_customer_segmentation/01_customer_segmentation.py: -------------------------------------------------------------------------------- 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE 2 | # PROJECT 1: CUSTOMER SEGMENTATION 3 | 4 | # Libraries 5 | import pandas as pd 6 | from sklearn.cluster import KMeans 7 | import matplotlib.pyplot as plt 8 | 9 | # Sample data creation 10 | data = { 11 | 'Age': [25, 47, 35, 45, 22, 34, 52, 23, 40, 60], 12 | 'Annual Income (k$)': [25, 60, 29, 55, 20, 40, 50, 15, 60, 30], 13 | 'Spending Score (1-100)': [30, 55, 35, 50, 45, 50, 30, 25, 70, 40] 14 | } 15 | 16 | df = pd.DataFrame(data) 17 | 18 | # Using KMeans for clustering 19 | kmeans = KMeans(n_clusters=3, random_state=42) 20 | df['Cluster'] = kmeans.fit_predict(df[['Age', 'Annual Income (k$)', 'Spending Score (1-100)']]) 21 | 22 | # Plotting the clusters 23 | plt.figure(figsize=(10, 6)) 24 | colors = ['red', 'green', 'blue'] 25 | for i in range(3): 26 | plt.scatter(df[df['Cluster'] == i]['Age'], 27 | df[df['Cluster'] == i]['Annual Income (k$)'], 28 | label=f'Cluster {i+1}', 29 | c=colors[i]) 30 | 31 | plt.title('Customer Segmentation') 32 | plt.xlabel('Age') 33 | plt.ylabel('Annual Income (k$)') 34 | plt.legend() 35 | plt.show() 36 | -------------------------------------------------------------------------------- /01_customer_segmentation/02_customer_segmentation.R: -------------------------------------------------------------------------------- 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE 2 | # PROJECT 1: CUSTOMER SEGMENTATION 3 | 4 | library(tidyverse) 5 | 6 | # Creating sample data 7 | data <- tibble( 8 | Age = c(25, 47, 35, 45, 22, 34, 52, 23, 40, 60), 9 | Annual_Income_k = c(25, 60, 29, 55, 20, 40, 50, 15, 60, 30), 10 | Spending_Score = c(30, 55, 35, 50, 45, 50, 30, 25, 70, 40) 11 | ) 12 | 13 | # Performing K-means clustering 14 | set.seed(42) # for reproducibility 15 | clusters <- kmeans(data, centers = 3, nstart = 25) 16 | 17 | # Adding cluster results to the data frame 18 | data$Cluster <- as.factor(clusters$cluster) 19 | 20 | # Plotting the clusters 21 | ggplot(data, aes(x = Age, y = Annual_Income_k, color = Cluster)) + 22 | geom_point(aes(size = Spending_Score), alpha = 0.6) + 23 | scale_color_manual(values = c("red", "green", "blue")) + 24 | labs(title = "Customer Segmentation with K-means Clustering", 25 | x = "Age", 26 | y = "Annual Income (k$)", 27 | color = "Cluster") + 28 | theme_minimal() 29 | -------------------------------------------------------------------------------- /02_demand_forecasting/01_demand_forecasting.py: -------------------------------------------------------------------------------- 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE 2 | # PROJECT 2: DEMAND FORECASTING 3 | 4 | # libraries 5 | import pandas as pd 6 | import numpy as np 7 | from xgboost import XGBRegressor 8 | from sklearn.model_selection import train_test_split 9 | from sklearn.preprocessing import OneHotEncoder 10 | from sklearn.compose import ColumnTransformer 11 | from sklearn.pipeline import Pipeline 12 | from sklearn.metrics import mean_squared_error 13 | import matplotlib.pyplot as plt 14 | from datetime import datetime, timedelta 15 | 16 | # Set the seed for reproducibility 17 | np.random.seed(123) 18 | 19 | # Generate the date sequence 20 | date_seq = pd.date_range(start="2020-01-01", end="2021-12-31", freq='D') 21 | 22 | # Create the dataframe 23 | data = pd.DataFrame({ 24 | 'date': date_seq, 25 | 'demand': np.round(100 + np.sin(np.arange(len(date_seq)) / 20) * 50 + np.random.normal(0, 10, len(date_seq))) 26 | }) 27 | 28 | # Convert date column to datetime and sort the data 29 | data['date'] = pd.to_datetime(data['date']) 30 | data.sort_values('date', inplace=True) 31 | 32 | # Split the data into training and testing sets 33 | split_date = pd.Timestamp('2021-06-01') 34 | train_data = data[data['date'] < split_date] 35 | test_data = data[data['date'] >= split_date] 36 | 37 | def create_features(df): 38 | df['dayofyear'] = df['date'].dt.dayofyear 39 | df['dayofweek'] = df['date'].dt.dayofweek 40 | df['month'] = df['date'].dt.month 41 | 42 | # Fourier features for capturing seasonality 43 | for k in range(1, 3): # K=2 fourier pairs 44 | df[f'sin{k}'] = np.sin(df['dayofyear'] * (2. * np.pi * k / 365.25)) 45 | df[f'cos{k}'] = np.cos(df['dayofyear'] * (2. * np.pi * k / 365.25)) 46 | return df 47 | 48 | train_features = create_features(train_data.copy()) 49 | test_features = create_features(test_data.copy()) 50 | 51 | # Define the features and the target 52 | feature_columns = ['dayofyear', 'dayofweek', 'month', 'sin1', 'cos1', 'sin2', 'cos2'] 53 | target_column = 'demand' 54 | 55 | # Model Pipeline 56 | pipeline = Pipeline(steps=[ 57 | ('encoder', ColumnTransformer( 58 | transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), ['dayofweek', 'month'])], 59 | remainder='passthrough')), 60 | ('xgb', XGBRegressor(n_estimators=100, learning_rate=0.1, objective='reg:squarederror')) 61 | ]) 62 | 63 | # Fit the model 64 | pipeline.fit(train_features[feature_columns], train_features[target_column]) 65 | 66 | # Predict using the model 67 | train_preds = pipeline.predict(train_features[feature_columns]) 68 | test_preds = pipeline.predict(test_features[feature_columns]) 69 | 70 | # Evaluate the model 71 | train_rmse = np.sqrt(mean_squared_error(train_features[target_column], train_preds)) 72 | test_rmse = np.sqrt(mean_squared_error(test_features[target_column], test_preds)) 73 | 74 | print(f"Train RMSE: {train_rmse}, Test RMSE: {test_rmse}") 75 | 76 | # Create future dates for forecasting 77 | future_dates = pd.date_range(start=test_data['date'].max() + timedelta(days=1), periods=180, freq='D') 78 | future_data = pd.DataFrame({'date': future_dates}) 79 | future_features = create_features(future_data.copy()) 80 | 81 | # Forecast future demand 82 | future_preds = pipeline.predict(future_features[feature_columns]) 83 | 84 | # Plot the results 85 | plt.figure(figsize=(14, 7)) 86 | plt.plot(train_data['date'], train_data['demand'], label='Train Data') 87 | plt.plot(test_data['date'], test_data['demand'], label='Test Data') 88 | plt.plot(future_dates, future_preds, label='Forecast', color='red') 89 | plt.legend() 90 | plt.title('Demand Forecast') 91 | plt.xlabel('Date') 92 | plt.ylabel('Demand') 93 | plt.show() 94 | -------------------------------------------------------------------------------- /02_demand_forecasting/02_demand_forecasting.R: -------------------------------------------------------------------------------- 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE 2 | # PROJECT 2: DEMAND FORECASTING 3 | 4 | # libraries 5 | library(tidyverse) 6 | library(lubridate) 7 | library(timetk) 8 | library(modeltime) 9 | library(tidymodels) 10 | 11 | # Example data creation 12 | set.seed(123) 13 | date_seq <- seq(as.Date("2020-01-01"), as.Date("2021-12-31"), by="day") 14 | data <- tibble( 15 | date = date_seq, 16 | demand = round(100 + sin(seq_along(date_seq)/20) * 50 + rnorm(length(date_seq), mean = 0, sd = 10)) 17 | ) 18 | 19 | # Convert date column to a proper date format and ensure it's sorted 20 | data <- data %>% 21 | mutate(date = as.Date(date)) %>% 22 | arrange(date) 23 | 24 | # Split the data into training and testing 25 | split_date <- as.Date("2021-06-01") 26 | train_data <- data %>% filter(date < split_date) 27 | test_data <- data %>% filter(date >= split_date) 28 | 29 | 30 | # Define the recipe 31 | recipe_spec <- recipe(demand ~ date, data = train_data) %>% 32 | step_timeseries_signature(date) %>% 33 | step_fourier(date, period = 365, K = 2) %>% 34 | step_rm(date) %>% 35 | step_dummy(all_nominal_predictors(), one_hot = TRUE) 36 | 37 | 38 | # Define the model 39 | model_spec <- boost_tree() %>% 40 | set_engine("xgboost") %>% 41 | set_mode("regression") 42 | 43 | # Fit the model 44 | model_fit <- workflow() %>% 45 | add_model(model_spec) %>% 46 | add_recipe(recipe_spec) %>% 47 | fit(train_data) 48 | 49 | # Create future dataframe for forecasting 50 | future_dates <- tibble(date = seq(max(train_data$date) + 1, by="day", length.out=180)) 51 | 52 | # Forecast 53 | forecast <- modeltime_table(model_fit) %>% 54 | modeltime_forecast(new_data = future_dates, actual_data = train_data) 55 | 56 | # Plot the results 57 | forecast %>% 58 | plot_modeltime_forecast( 59 | .legend_show = TRUE, 60 | .interactive = FALSE 61 | ) -------------------------------------------------------------------------------- /03_credit_risk/01_credit_risk.py: -------------------------------------------------------------------------------- 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE 2 | # PROJECT 3: CREDIT RISK 3 | 4 | # 1. Import Libraries 5 | import numpy as np 6 | import pandas as pd 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.linear_model import LogisticRegression 9 | from sklearn.preprocessing import StandardScaler 10 | from sklearn.metrics import accuracy_score, classification_report 11 | 12 | # 2. Generate Data 13 | # Let's assume we have two features: credit_score and annual_income, and the target is credit_risk (0 for low risk, 1 for high risk) 14 | np.random.seed(0) 15 | data_size = 1000 16 | credit_scores = np.random.normal(600, 100, data_size) 17 | annual_incomes = np.random.normal(50000, 15000, data_size) 18 | credit_risks = (credit_scores < 580) | (annual_incomes < 30000) # Simplified risk criteria 19 | 20 | # Create a DataFrame 21 | df = pd.DataFrame({ 22 | 'credit_score': credit_scores, 23 | 'annual_income': annual_incomes, 24 | 'credit_risk': credit_risks.astype(int) 25 | }) 26 | 27 | # 3. Preprocess Data 28 | # Split data into features and target 29 | X = df[['credit_score', 'annual_income']] 30 | y = df['credit_risk'] 31 | 32 | # Split data into training and test sets 33 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 34 | 35 | # Scale the features 36 | scaler = StandardScaler() 37 | X_train_scaled = scaler.fit_transform(X_train) 38 | X_test_scaled = scaler.transform(X_test) 39 | 40 | # 4. Train a Model 41 | model = LogisticRegression() 42 | model.fit(X_train_scaled, y_train) 43 | 44 | # 5. Evaluate the Model 45 | y_pred = model.predict(X_test_scaled) 46 | accuracy = accuracy_score(y_test, y_pred) 47 | report = classification_report(y_test, y_pred) 48 | 49 | print(f"Accuracy: {accuracy:.2f}") 50 | print("Classification Report:") 51 | print(report) -------------------------------------------------------------------------------- /03_credit_risk/02_credit_risk.R: -------------------------------------------------------------------------------- 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE 2 | # PROJECT 3: CREDIT RISK 3 | 4 | # Install and load necessary packages 5 | library(tidyverse) 6 | library(tidymodels) 7 | 8 | # Set seed for reproducibility 9 | set.seed(0) 10 | 11 | # 2. Generate Data 12 | data_size <- 1000 13 | credit_scores <- rnorm(data_size, mean=600, sd=100) 14 | annual_incomes <- rnorm(data_size, mean=50000, sd=15000) 15 | credit_risks <- ifelse(credit_scores < 580 | annual_incomes < 30000, 1, 0) 16 | 17 | # Create a tibble 18 | data <- tibble( 19 | credit_score = credit_scores, 20 | annual_income = annual_incomes, 21 | credit_risk = factor(credit_risks, levels = c(0, 1)) 22 | ) 23 | 24 | # 3. Preprocess Data 25 | split <- initial_split(data, prop = 0.8) 26 | train_data <- training(split) 27 | test_data <- testing(split) 28 | 29 | # Recipe for preprocessing 30 | recipe <- recipe(credit_risk ~ ., data = train_data) %>% 31 | step_scale(all_predictors()) %>% 32 | step_center(all_predictors()) 33 | 34 | # 4. Train a Model 35 | logit_model <- logistic_reg() %>% 36 | set_engine("glm") %>% 37 | set_mode("classification") 38 | 39 | workflow <- workflow() %>% 40 | add_recipe(recipe) %>% 41 | add_model(logit_model) %>% 42 | fit(data = train_data) 43 | 44 | # 5. Evaluate the Model 45 | test_results <- workflow %>% 46 | predict(new_data = test_data) %>% 47 | bind_cols(test_data) %>% 48 | metrics(truth = credit_risk, estimate = .pred_class) 49 | 50 | accuracy <- test_results %>% 51 | filter(.metric == "accuracy") 52 | 53 | cat("Accuracy:", accuracy$.estimate, "\n") 54 | conf_mat <- workflow %>% 55 | predict(new_data = test_data) %>% 56 | bind_cols(test_data) %>% 57 | conf_mat(truth = credit_risk, estimate = .pred_class) 58 | 59 | autoplot(conf_mat, type = "heatmap") 60 | -------------------------------------------------------------------------------- /04_fraud_detection/01_fraud_detection.py: -------------------------------------------------------------------------------- 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE 2 | # PROJECT 4: FRAUD DETECTION 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from sklearn.preprocessing import LabelEncoder 7 | from sklearn.model_selection import train_test_split 8 | import xgboost as xgb 9 | from sklearn.metrics import classification_report 10 | import matplotlib.pyplot as plt 11 | 12 | # Set random seed for reproducibility 13 | np.random.seed(0) 14 | 15 | # Generate synthetic data 16 | data_size = 1000 17 | transaction_types = np.random.choice(['type1', 'type2', 'type3'], size=data_size) 18 | transaction_amounts = np.random.exponential(scale=200, size=data_size) 19 | age_of_account_days = np.random.normal(loc=365, scale=100, size=data_size) 20 | 21 | # Conditional probabilities for being fraudulent 22 | fraudulent = [] 23 | for i in range(data_size): 24 | # Higher chance of fraud for type1 with high transaction amount and low account age 25 | if transaction_types[i] == 'type1' and transaction_amounts[i] > 100 and age_of_account_days[i] < 365: 26 | fraudulent.append(np.random.choice([0, 1], p=[0.1, 0.9])) # 90% chance of being fraudulent 27 | else: 28 | fraudulent.append(np.random.choice([0, 1], p=[0.99, 0.01])) # 1% chance as normal 29 | 30 | df = pd.DataFrame({ 31 | 'transaction_amount': transaction_amounts, 32 | 'transaction_type': transaction_types, 33 | 'age_of_account_days': age_of_account_days, 34 | 'fraudulent': fraudulent 35 | }) 36 | 37 | df_untransformed = df.copy() 38 | 39 | # Encode categorical data 40 | df['transaction_type'] = LabelEncoder().fit_transform(df['transaction_type']) 41 | 42 | # Prepare data for training 43 | X = df.drop('fraudulent', axis=1) 44 | y = df['fraudulent'] 45 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 46 | 47 | # Train the XGBoost model 48 | model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss') 49 | model.fit(X_train, y_train) 50 | 51 | # Predictions 52 | predictions_df = pd.DataFrame(model.predict(X_test), columns=['predict_class']) 53 | predictions_proba_df = pd.DataFrame(model.predict_proba(X_test), columns=["p0", "p1"]) 54 | 55 | # Test Set Evaluation 56 | fraud_scoring_df = pd.concat([ 57 | df_untransformed.iloc[X_test.index.values].reset_index(), predictions_df, 58 | predictions_proba_df 59 | ], axis=1) \ 60 | .set_index("index") 61 | 62 | fraud_scoring_df.sort_values('p1', ascending=False) 63 | 64 | print(classification_report(fraud_scoring_df['fraudulent'], fraud_scoring_df['predict_class'])) 65 | -------------------------------------------------------------------------------- /04_fraud_detection/02_fraud_detection.R: -------------------------------------------------------------------------------- 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE 2 | # PROJECT 4: FRAUD DETECTION 3 | 4 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE 5 | # PROJECT 4: FRAUD DETECTION 6 | 7 | library(tidyverse) 8 | library(tidymodels) 9 | library(xgboost) 10 | 11 | # Generate synthetic data 12 | set.seed(0) 13 | data_size <- 1000 14 | transaction_types <- sample(c("type1", "type2", "type3"), size = data_size, replace = TRUE) # nolint 15 | transaction_amounts <- rexp(n = data_size, rate = 1 / 200) 16 | age_of_account_days <- rnorm(n = data_size, mean = 365, sd = 100) 17 | fraudulent <- vector("numeric", length = data_size) 18 | 19 | for (i in 1:data_size) { 20 | if (transaction_types[i] == "type1" && transaction_amounts[i] > 100 && age_of_account_days[i] < 365) { 21 | fraudulent[i] <- sample(c(0, 1), size = 1, prob = c(0.1, 0.9)) # 90% chance of being fraudulent 22 | } else { 23 | fraudulent[i] <- sample(c(0, 1), size = 1, prob = c(0.99, 0.01)) # 1% chance as normal 24 | } 25 | } 26 | 27 | df <- tibble( 28 | transaction_amount = transaction_amounts, 29 | transaction_type = transaction_types, 30 | age_of_account_days = age_of_account_days, 31 | fraudulent = as.factor(fraudulent) 32 | ) %>% rowid_to_column() 33 | 34 | df_untransformed <- df 35 | 36 | # Data preparation 37 | df <- df %>% 38 | mutate(transaction_type = as.numeric(as.factor(transaction_type))) # Encode categorical data as numeric 39 | 40 | # Splitting the data 41 | set.seed(0) 42 | split <- initial_split(df, prop = 0.8) 43 | train_data <- training(split) 44 | test_data <- testing(split) 45 | 46 | # XGBoost Model Setup 47 | xgb_spec <- boost_tree(trees = 50, tree_depth = 6, min_n = 10) %>% 48 | set_engine("xgboost", eval_metric = "logloss") %>% 49 | set_mode("classification") 50 | 51 | # Fit the Model 52 | xgb_fit <- xgb_spec %>% 53 | fit(fraudulent ~ . - rowid, data = train_data) 54 | 55 | # Make Predictions 56 | test_results <- test_data %>% 57 | select(-fraudulent) %>% 58 | predict(xgb_fit, new_data = ., type = "prob") %>% 59 | mutate(predict_class = as.numeric(.pred_1 > 0.5) %>% as.factor()) %>% 60 | bind_cols( 61 | df_untransformed %>% filter(rowid %in% test_data$rowid) 62 | ) 63 | 64 | # Evaluate the Model 65 | test_results %>% 66 | select(fraudulent, predict_class) %>% 67 | yardstick::metrics(truth = fraudulent, estimate = predict_class) 68 | 69 | # Confusion Matrix 70 | conf_mat(test_results, truth = fraudulent, estimate = predict_class) 71 | 72 | # Visualization 73 | test_results %>% 74 | ggplot(aes(x = transaction_amount, fill = as.factor(fraudulent))) + 75 | geom_histogram(position = "identity", alpha = 0.5, bins = 30) + 76 | scale_fill_manual(values = c("gray", "red"), labels = c("Legitimate", "Fraudulent")) + 77 | labs( 78 | title = "Transaction Amount Distribution", 79 | x = "Transaction Amount", 80 | y = "Frequency", 81 | fill = "Transaction Type" 82 | ) + 83 | theme_minimal() 84 | -------------------------------------------------------------------------------- /05_supply_chain_optimization/01_supply_chain_optimization.py: -------------------------------------------------------------------------------- 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE 2 | # PROJECT 5: SUPPLY CHAIN OPTIMIZATION 3 | 4 | import pulp 5 | 6 | # Create a problem variable: 7 | prob = pulp.LpProblem("Supply_Chain_Optimization", pulp.LpMinimize) 8 | 9 | # Define decision variables: 10 | production_units = pulp.LpVariable("Production_units", lowBound=0, cat='Continuous') 11 | transport_units = pulp.LpVariable("Transport_units", lowBound=0, cat='Continuous') 12 | 13 | # Define costs: 14 | production_cost_per_unit = 10 15 | transport_cost_per_unit = 5 16 | storage_cost_per_unit = 2 17 | 18 | # Objective function: 19 | prob += production_cost_per_unit * production_units + transport_cost_per_unit * transport_units + storage_cost_per_unit * transport_units, "Total_Cost" 20 | 21 | # Constraints: 22 | prob += production_units <= 1000, "Max_Production_Capacity" 23 | prob += transport_units <= production_units, "Transport_Less_Than_Production" 24 | prob += transport_units >= 800, "Min_Demand_Fulfillment" 25 | 26 | # Solve the problem: 27 | prob.solve() 28 | 29 | # Print the results: 30 | print("Status:", pulp.LpStatus[prob.status]) 31 | print("Optimal Production units:", production_units.varValue) 32 | print("Optimal Transport units:", transport_units.varValue) 33 | print("Total Cost:", pulp.value(prob.objective)) 34 | -------------------------------------------------------------------------------- /05_supply_chain_optimization/02_supply_chain_optimization.R: -------------------------------------------------------------------------------- 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE 2 | # PROJECT 5: SUPPLY CHAIN OPTIMIZATION 3 | 4 | # Load the lpSolve package 5 | library(lpSolve) 6 | 7 | # Define the costs 8 | production_cost_per_unit <- 10 9 | transport_cost_per_unit <- 5 # Transportation cost 10 | storage_cost_per_unit <- 2 # Storage cost 11 | 12 | # Total transport cost including storage 13 | total_transport_cost_per_unit <- transport_cost_per_unit + storage_cost_per_unit 14 | 15 | # Objective function coefficients 16 | objective <- c(production_cost_per_unit, total_transport_cost_per_unit) 17 | 18 | # Constraints matrix 19 | # Production units, Transport units 20 | constraints <- matrix(c(1, 0, # Production <= 1000 units 21 | 1, -1, # Production units >= Transport units 22 | 0, 1), # Transport units >= 800 units 23 | nrow=3, byrow=TRUE) 24 | 25 | # Right-hand side of the constraints 26 | rhs <- c(1000, 0, 800) 27 | 28 | # Directions of the constraints 29 | directions <- c("<=", ">=", ">=") 30 | 31 | # Solve the linear programming problem 32 | solution <- lp("min", objective, constraints, directions, rhs) 33 | 34 | 35 | # Check if the solution is optimal 36 | if(solution$status == 0) { 37 | print("Solution is optimal") 38 | print(paste("Optimal Production units:", solution$solution[1])) 39 | print(paste("Optimal Transport units:", solution$solution[2])) 40 | print(paste("Total Cost:", solution$objval)) 41 | } else { 42 | print("Solution is not optimal") 43 | if(solution$status == 1) { 44 | print("Solution is infeasible") 45 | } else if(solution$status == 2) { 46 | print("Solution is unbounded") 47 | } else if(solution$status == 3) { 48 | print("Solution is degenerate") 49 | } else { 50 | print("Solution could not be found due to numerical difficulties") 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 10 Business Problems that Can Be Solved with Data Science 2 | 3 | ## Project Goal: 4 | 5 | Goal of this project is to help data scientists get kickstarted on their projects with basic business problems that businesses, Fortune 500 companies, and high-demand industries need solved. 6 | 7 | Each project comes with synthetic (fake) data and can be used to jumpstart more in-depth analysis. 8 | 9 | ## Work in Progress (Coming Soon): 10 | 11 | The following top 10 business problems are planned to be covered: 12 | 13 | ![Top 10 Business Problems](/img/10_business_problems.jpg) 14 | 15 | ## Going Further 16 | 17 | For data scientists that want to learn how to solve more advanced business problems with Python and R: 18 | 19 | **(No prequisites required if you start with the 1st course)** 20 | 21 | 1. [**2-Course Python Track**](https://university.business-science.io/p/2-course-python-track?el=github): Through 759 lessons, 63 hours of video, 5 Challenges, and 4 projects learn data science, machine learning, lifecycle management, APIs, Web Applications, ROI, Business Problem Solving, and a host of niche data science skills. 22 | 2. [**5-Course R-Track**](https://university.business-science.io/p/5-course-bundle-machine-learning-web-apps-time-series?el=github) - Through 1,848 lessons, 153 hours of video training, 24 Challenges, and 8 advanced projects learn expert-level data science skills including Business Problem Solving for Data Scientists, Advanced Machine Learning, ROI, Time Series Forecasting, Shiny Web Applications, and more. -------------------------------------------------------------------------------- /img/10_business_problems.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/business-science/10_python_r_business_problems/292d91f72a47af5cd6d2805a122f2f0e295b88c2/img/10_business_problems.jpg --------------------------------------------------------------------------------