├── .vscode
    └── settings.json
├── 01_customer_segmentation
    ├── 01_customer_segmentation.py
    └── 02_customer_segmentation.R
├── 02_demand_forecasting
    ├── 01_demand_forecasting.py
    └── 02_demand_forecasting.R
├── 03_credit_risk
    ├── 01_credit_risk.py
    └── 02_credit_risk.R
├── 04_fraud_detection
    ├── 01_fraud_detection.py
    └── 02_fraud_detection.R
├── 05_supply_chain_optimization
    ├── 01_supply_chain_optimization.py
    └── 02_supply_chain_optimization.R
├── README.md
└── img
    └── 10_business_problems.jpg


/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "r.lsp.diagnostics": false
3 | }


--------------------------------------------------------------------------------
/01_customer_segmentation/01_customer_segmentation.py:
--------------------------------------------------------------------------------
 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE
 2 | # PROJECT 1: CUSTOMER SEGMENTATION
 3 | 
 4 | # Libraries
 5 | import pandas as pd
 6 | from sklearn.cluster import KMeans
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | # Sample data creation
10 | data = {
11 |     'Age': [25, 47, 35, 45, 22, 34, 52, 23, 40, 60],
12 |     'Annual Income (k$)': [25, 60, 29, 55, 20, 40, 50, 15, 60, 30],
13 |     'Spending Score (1-100)': [30, 55, 35, 50, 45, 50, 30, 25, 70, 40]
14 | }
15 | 
16 | df = pd.DataFrame(data)
17 | 
18 | # Using KMeans for clustering
19 | kmeans = KMeans(n_clusters=3, random_state=42)
20 | df['Cluster'] = kmeans.fit_predict(df[['Age', 'Annual Income (k$)', 'Spending Score (1-100)']])
21 | 
22 | # Plotting the clusters
23 | plt.figure(figsize=(10, 6))
24 | colors = ['red', 'green', 'blue']
25 | for i in range(3):
26 |     plt.scatter(df[df['Cluster'] == i]['Age'], 
27 |                 df[df['Cluster'] == i]['Annual Income (k$)'], 
28 |                 label=f'Cluster {i+1}',
29 |                 c=colors[i])
30 | 
31 | plt.title('Customer Segmentation')
32 | plt.xlabel('Age')
33 | plt.ylabel('Annual Income (k$)')
34 | plt.legend()
35 | plt.show()
36 | 


--------------------------------------------------------------------------------
/01_customer_segmentation/02_customer_segmentation.R:
--------------------------------------------------------------------------------
 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE
 2 | # PROJECT 1: CUSTOMER SEGMENTATION
 3 | 
 4 | library(tidyverse)
 5 | 
 6 | # Creating sample data
 7 | data <- tibble(
 8 |   Age = c(25, 47, 35, 45, 22, 34, 52, 23, 40, 60),
 9 |   Annual_Income_k = c(25, 60, 29, 55, 20, 40, 50, 15, 60, 30),
10 |   Spending_Score = c(30, 55, 35, 50, 45, 50, 30, 25, 70, 40)
11 | )
12 | 
13 | # Performing K-means clustering
14 | set.seed(42) # for reproducibility
15 | clusters <- kmeans(data, centers = 3, nstart = 25)
16 | 
17 | # Adding cluster results to the data frame
18 | data$Cluster <- as.factor(clusters$cluster)
19 | 
20 | # Plotting the clusters
21 | ggplot(data, aes(x = Age, y = Annual_Income_k, color = Cluster)) +
22 |   geom_point(aes(size = Spending_Score), alpha = 0.6) +
23 |   scale_color_manual(values = c("red", "green", "blue")) +
24 |   labs(title = "Customer Segmentation with K-means Clustering",
25 |        x = "Age",
26 |        y = "Annual Income (k$)",
27 |        color = "Cluster") +
28 |   theme_minimal()
29 | 


--------------------------------------------------------------------------------
/02_demand_forecasting/01_demand_forecasting.py:
--------------------------------------------------------------------------------
 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE
 2 | # PROJECT 2: DEMAND FORECASTING
 3 | 
 4 | # libraries
 5 | import pandas as pd
 6 | import numpy as np
 7 | from xgboost import XGBRegressor
 8 | from sklearn.model_selection import train_test_split
 9 | from sklearn.preprocessing import OneHotEncoder
10 | from sklearn.compose import ColumnTransformer
11 | from sklearn.pipeline import Pipeline
12 | from sklearn.metrics import mean_squared_error
13 | import matplotlib.pyplot as plt
14 | from datetime import datetime, timedelta
15 | 
16 | # Set the seed for reproducibility
17 | np.random.seed(123)
18 | 
19 | # Generate the date sequence
20 | date_seq = pd.date_range(start="2020-01-01", end="2021-12-31", freq='D')
21 | 
22 | # Create the dataframe
23 | data = pd.DataFrame({
24 |     'date': date_seq,
25 |     'demand': np.round(100 + np.sin(np.arange(len(date_seq)) / 20) * 50 + np.random.normal(0, 10, len(date_seq)))
26 | })
27 | 
28 | # Convert date column to datetime and sort the data
29 | data['date'] = pd.to_datetime(data['date'])
30 | data.sort_values('date', inplace=True)
31 | 
32 | # Split the data into training and testing sets
33 | split_date = pd.Timestamp('2021-06-01')
34 | train_data = data[data['date'] < split_date]
35 | test_data = data[data['date'] >= split_date]
36 | 
37 | def create_features(df):
38 |     df['dayofyear'] = df['date'].dt.dayofyear
39 |     df['dayofweek'] = df['date'].dt.dayofweek
40 |     df['month'] = df['date'].dt.month
41 |     
42 |     # Fourier features for capturing seasonality
43 |     for k in range(1, 3):  # K=2 fourier pairs
44 |         df[f'sin{k}'] = np.sin(df['dayofyear'] * (2. * np.pi * k / 365.25))
45 |         df[f'cos{k}'] = np.cos(df['dayofyear'] * (2. * np.pi * k / 365.25))
46 |     return df
47 | 
48 | train_features = create_features(train_data.copy())
49 | test_features = create_features(test_data.copy())
50 | 
51 | # Define the features and the target
52 | feature_columns = ['dayofyear', 'dayofweek', 'month', 'sin1', 'cos1', 'sin2', 'cos2']
53 | target_column = 'demand'
54 | 
55 | # Model Pipeline
56 | pipeline = Pipeline(steps=[
57 |     ('encoder', ColumnTransformer(
58 |         transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), ['dayofweek', 'month'])],
59 |         remainder='passthrough')),
60 |     ('xgb', XGBRegressor(n_estimators=100, learning_rate=0.1, objective='reg:squarederror'))
61 | ])
62 | 
63 | # Fit the model
64 | pipeline.fit(train_features[feature_columns], train_features[target_column])
65 | 
66 | # Predict using the model
67 | train_preds = pipeline.predict(train_features[feature_columns])
68 | test_preds = pipeline.predict(test_features[feature_columns])
69 | 
70 | # Evaluate the model
71 | train_rmse = np.sqrt(mean_squared_error(train_features[target_column], train_preds))
72 | test_rmse = np.sqrt(mean_squared_error(test_features[target_column], test_preds))
73 | 
74 | print(f"Train RMSE: {train_rmse}, Test RMSE: {test_rmse}")
75 | 
76 | # Create future dates for forecasting
77 | future_dates = pd.date_range(start=test_data['date'].max() + timedelta(days=1), periods=180, freq='D')
78 | future_data = pd.DataFrame({'date': future_dates})
79 | future_features = create_features(future_data.copy())
80 | 
81 | # Forecast future demand
82 | future_preds = pipeline.predict(future_features[feature_columns])
83 | 
84 | # Plot the results
85 | plt.figure(figsize=(14, 7))
86 | plt.plot(train_data['date'], train_data['demand'], label='Train Data')
87 | plt.plot(test_data['date'], test_data['demand'], label='Test Data')
88 | plt.plot(future_dates, future_preds, label='Forecast', color='red')
89 | plt.legend()
90 | plt.title('Demand Forecast')
91 | plt.xlabel('Date')
92 | plt.ylabel('Demand')
93 | plt.show()
94 | 


--------------------------------------------------------------------------------
/02_demand_forecasting/02_demand_forecasting.R:
--------------------------------------------------------------------------------
 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE
 2 | # PROJECT 2: DEMAND FORECASTING
 3 | 
 4 | # libraries
 5 | library(tidyverse)
 6 | library(lubridate)
 7 | library(timetk)
 8 | library(modeltime)
 9 | library(tidymodels)
10 | 
11 | # Example data creation
12 | set.seed(123)
13 | date_seq <- seq(as.Date("2020-01-01"), as.Date("2021-12-31"), by="day")
14 | data <- tibble(
15 |   date = date_seq,
16 |   demand = round(100 + sin(seq_along(date_seq)/20) * 50 + rnorm(length(date_seq), mean = 0, sd = 10))
17 | )
18 | 
19 | # Convert date column to a proper date format and ensure it's sorted
20 | data <- data %>%
21 |   mutate(date = as.Date(date)) %>%
22 |   arrange(date)
23 | 
24 | # Split the data into training and testing
25 | split_date <- as.Date("2021-06-01")
26 | train_data <- data %>% filter(date < split_date)
27 | test_data <- data %>% filter(date >= split_date)
28 | 
29 | 
30 | # Define the recipe
31 | recipe_spec <- recipe(demand ~ date, data = train_data) %>%
32 |   step_timeseries_signature(date) %>%
33 |   step_fourier(date, period = 365, K = 2) %>%
34 |   step_rm(date) %>%
35 |   step_dummy(all_nominal_predictors(), one_hot = TRUE) 
36 | 
37 | 
38 | # Define the model
39 | model_spec <- boost_tree() %>%
40 |   set_engine("xgboost") %>%
41 |   set_mode("regression")
42 | 
43 | # Fit the model
44 | model_fit <- workflow() %>%
45 |   add_model(model_spec) %>%
46 |   add_recipe(recipe_spec) %>%
47 |   fit(train_data)
48 | 
49 | # Create future dataframe for forecasting
50 | future_dates <- tibble(date = seq(max(train_data$date) + 1, by="day", length.out=180))
51 | 
52 | # Forecast
53 | forecast <- modeltime_table(model_fit) %>%
54 |   modeltime_forecast(new_data = future_dates, actual_data = train_data)
55 | 
56 | # Plot the results
57 | forecast %>%
58 |   plot_modeltime_forecast(
59 |     .legend_show = TRUE,
60 |     .interactive = FALSE
61 |   )


--------------------------------------------------------------------------------
/03_credit_risk/01_credit_risk.py:
--------------------------------------------------------------------------------
 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE
 2 | # PROJECT 3: CREDIT RISK
 3 | 
 4 | # 1. Import Libraries
 5 | import numpy as np
 6 | import pandas as pd
 7 | from sklearn.model_selection import train_test_split
 8 | from sklearn.linear_model import LogisticRegression
 9 | from sklearn.preprocessing import StandardScaler
10 | from sklearn.metrics import accuracy_score, classification_report
11 | 
12 | # 2. Generate Data
13 | # Let's assume we have two features: credit_score and annual_income, and the target is credit_risk (0 for low risk, 1 for high risk)
14 | np.random.seed(0)
15 | data_size = 1000
16 | credit_scores = np.random.normal(600, 100, data_size)
17 | annual_incomes = np.random.normal(50000, 15000, data_size)
18 | credit_risks = (credit_scores < 580) | (annual_incomes < 30000)  # Simplified risk criteria
19 | 
20 | # Create a DataFrame
21 | df = pd.DataFrame({
22 |     'credit_score': credit_scores,
23 |     'annual_income': annual_incomes,
24 |     'credit_risk': credit_risks.astype(int)
25 | })
26 | 
27 | # 3. Preprocess Data
28 | # Split data into features and target
29 | X = df[['credit_score', 'annual_income']]
30 | y = df['credit_risk']
31 | 
32 | # Split data into training and test sets
33 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
34 | 
35 | # Scale the features
36 | scaler = StandardScaler()
37 | X_train_scaled = scaler.fit_transform(X_train)
38 | X_test_scaled = scaler.transform(X_test)
39 | 
40 | # 4. Train a Model
41 | model = LogisticRegression()
42 | model.fit(X_train_scaled, y_train)
43 | 
44 | # 5. Evaluate the Model
45 | y_pred = model.predict(X_test_scaled)
46 | accuracy = accuracy_score(y_test, y_pred)
47 | report = classification_report(y_test, y_pred)
48 | 
49 | print(f"Accuracy: {accuracy:.2f}")
50 | print("Classification Report:")
51 | print(report)


--------------------------------------------------------------------------------
/03_credit_risk/02_credit_risk.R:
--------------------------------------------------------------------------------
 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE
 2 | # PROJECT 3: CREDIT RISK
 3 | 
 4 | # Install and load necessary packages
 5 | library(tidyverse)
 6 | library(tidymodels)
 7 | 
 8 | # Set seed for reproducibility
 9 | set.seed(0)
10 | 
11 | # 2. Generate Data
12 | data_size <- 1000
13 | credit_scores <- rnorm(data_size, mean=600, sd=100)
14 | annual_incomes <- rnorm(data_size, mean=50000, sd=15000)
15 | credit_risks <- ifelse(credit_scores < 580 | annual_incomes < 30000, 1, 0) 
16 | 
17 | # Create a tibble
18 | data <- tibble(
19 |   credit_score = credit_scores,
20 |   annual_income = annual_incomes,
21 |   credit_risk = factor(credit_risks, levels = c(0, 1))
22 | )
23 | 
24 | # 3. Preprocess Data
25 | split <- initial_split(data, prop = 0.8)
26 | train_data <- training(split)
27 | test_data <- testing(split)
28 | 
29 | # Recipe for preprocessing
30 | recipe <- recipe(credit_risk ~ ., data = train_data) %>%
31 |   step_scale(all_predictors()) %>%
32 |   step_center(all_predictors())
33 | 
34 | # 4. Train a Model
35 | logit_model <- logistic_reg() %>%
36 |   set_engine("glm") %>%
37 |   set_mode("classification")
38 | 
39 | workflow <- workflow() %>%
40 |   add_recipe(recipe) %>%
41 |   add_model(logit_model) %>%
42 |   fit(data = train_data)
43 | 
44 | # 5. Evaluate the Model
45 | test_results <- workflow %>%
46 |   predict(new_data = test_data) %>%
47 |   bind_cols(test_data) %>%
48 |   metrics(truth = credit_risk, estimate = .pred_class)
49 | 
50 | accuracy <- test_results %>%
51 |   filter(.metric == "accuracy")
52 | 
53 | cat("Accuracy:", accuracy$.estimate, "\n")
54 | conf_mat <- workflow %>%
55 |   predict(new_data = test_data) %>%
56 |   bind_cols(test_data) %>%
57 |   conf_mat(truth = credit_risk, estimate = .pred_class)
58 | 
59 | autoplot(conf_mat, type = "heatmap")
60 | 


--------------------------------------------------------------------------------
/04_fraud_detection/01_fraud_detection.py:
--------------------------------------------------------------------------------
 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE
 2 | # PROJECT 4: FRAUD DETECTION
 3 | 
 4 | import numpy as np
 5 | import pandas as pd
 6 | from sklearn.preprocessing import LabelEncoder
 7 | from sklearn.model_selection import train_test_split
 8 | import xgboost as xgb
 9 | from sklearn.metrics import classification_report
10 | import matplotlib.pyplot as plt
11 | 
12 | # Set random seed for reproducibility
13 | np.random.seed(0)
14 | 
15 | # Generate synthetic data
16 | data_size = 1000
17 | transaction_types = np.random.choice(['type1', 'type2', 'type3'], size=data_size)
18 | transaction_amounts = np.random.exponential(scale=200, size=data_size)
19 | age_of_account_days = np.random.normal(loc=365, scale=100, size=data_size)
20 | 
21 | # Conditional probabilities for being fraudulent
22 | fraudulent = []
23 | for i in range(data_size):
24 |     # Higher chance of fraud for type1 with high transaction amount and low account age
25 |     if transaction_types[i] == 'type1' and transaction_amounts[i] > 100 and age_of_account_days[i] < 365:
26 |         fraudulent.append(np.random.choice([0, 1], p=[0.1, 0.9]))  # 90% chance of being fraudulent
27 |     else:
28 |         fraudulent.append(np.random.choice([0, 1], p=[0.99, 0.01]))  # 1% chance as normal
29 | 
30 | df = pd.DataFrame({
31 |     'transaction_amount': transaction_amounts,
32 |     'transaction_type': transaction_types,
33 |     'age_of_account_days': age_of_account_days,
34 |     'fraudulent': fraudulent
35 | })
36 | 
37 | df_untransformed = df.copy()
38 | 
39 | # Encode categorical data
40 | df['transaction_type'] = LabelEncoder().fit_transform(df['transaction_type'])
41 | 
42 | # Prepare data for training
43 | X = df.drop('fraudulent', axis=1)
44 | y = df['fraudulent']
45 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
46 | 
47 | # Train the XGBoost model
48 | model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
49 | model.fit(X_train, y_train)
50 | 
51 | # Predictions
52 | predictions_df = pd.DataFrame(model.predict(X_test), columns=['predict_class'])
53 | predictions_proba_df = pd.DataFrame(model.predict_proba(X_test), columns=["p0", "p1"])
54 | 
55 | # Test Set Evaluation
56 | fraud_scoring_df = pd.concat([
57 |     df_untransformed.iloc[X_test.index.values].reset_index(), predictions_df,
58 |     predictions_proba_df
59 | ], axis=1) \
60 |     .set_index("index")
61 |     
62 | fraud_scoring_df.sort_values('p1', ascending=False)
63 | 
64 | print(classification_report(fraud_scoring_df['fraudulent'], fraud_scoring_df['predict_class']))
65 | 


--------------------------------------------------------------------------------
/04_fraud_detection/02_fraud_detection.R:
--------------------------------------------------------------------------------
 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE
 2 | # PROJECT 4: FRAUD DETECTION
 3 | 
 4 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE
 5 | # PROJECT 4: FRAUD DETECTION
 6 | 
 7 | library(tidyverse)
 8 | library(tidymodels)
 9 | library(xgboost)
10 | 
11 | # Generate synthetic data
12 | set.seed(0)
13 | data_size <- 1000
14 | transaction_types <- sample(c("type1", "type2", "type3"), size = data_size, replace = TRUE) # nolint
15 | transaction_amounts <- rexp(n = data_size, rate = 1 / 200)
16 | age_of_account_days <- rnorm(n = data_size, mean = 365, sd = 100)
17 | fraudulent <- vector("numeric", length = data_size)
18 | 
19 | for (i in 1:data_size) {
20 |     if (transaction_types[i] == "type1" && transaction_amounts[i] > 100 && age_of_account_days[i] < 365) {
21 |         fraudulent[i] <- sample(c(0, 1), size = 1, prob = c(0.1, 0.9)) # 90% chance of being fraudulent
22 |     } else {
23 |         fraudulent[i] <- sample(c(0, 1), size = 1, prob = c(0.99, 0.01)) # 1% chance as normal
24 |     }
25 | }
26 | 
27 | df <- tibble(
28 |     transaction_amount = transaction_amounts,
29 |     transaction_type = transaction_types,
30 |     age_of_account_days = age_of_account_days,
31 |     fraudulent = as.factor(fraudulent)
32 | ) %>% rowid_to_column()
33 | 
34 | df_untransformed <- df
35 | 
36 | # Data preparation
37 | df <- df %>%
38 |     mutate(transaction_type = as.numeric(as.factor(transaction_type))) # Encode categorical data as numeric
39 | 
40 | # Splitting the data
41 | set.seed(0)
42 | split <- initial_split(df, prop = 0.8)
43 | train_data <- training(split)
44 | test_data <- testing(split)
45 | 
46 | # XGBoost Model Setup
47 | xgb_spec <- boost_tree(trees = 50, tree_depth = 6, min_n = 10) %>%
48 |     set_engine("xgboost", eval_metric = "logloss") %>%
49 |     set_mode("classification")
50 | 
51 | # Fit the Model
52 | xgb_fit <- xgb_spec %>%
53 |     fit(fraudulent ~ . - rowid, data = train_data)
54 | 
55 | # Make Predictions
56 | test_results <- test_data %>%
57 |     select(-fraudulent) %>%
58 |     predict(xgb_fit, new_data = ., type = "prob") %>%
59 |     mutate(predict_class = as.numeric(.pred_1 > 0.5) %>% as.factor()) %>%
60 |     bind_cols(
61 |         df_untransformed %>% filter(rowid %in% test_data$rowid)
62 |     )
63 | 
64 | # Evaluate the Model
65 | test_results %>%
66 |     select(fraudulent, predict_class) %>%
67 |     yardstick::metrics(truth = fraudulent, estimate = predict_class)
68 | 
69 | # Confusion Matrix
70 | conf_mat(test_results, truth = fraudulent, estimate = predict_class)
71 | 
72 | # Visualization
73 | test_results %>%
74 |     ggplot(aes(x = transaction_amount, fill = as.factor(fraudulent))) +
75 |     geom_histogram(position = "identity", alpha = 0.5, bins = 30) +
76 |     scale_fill_manual(values = c("gray", "red"), labels = c("Legitimate", "Fraudulent")) +
77 |     labs(
78 |         title = "Transaction Amount Distribution",
79 |         x = "Transaction Amount",
80 |         y = "Frequency",
81 |         fill = "Transaction Type"
82 |     ) +
83 |     theme_minimal()
84 | 


--------------------------------------------------------------------------------
/05_supply_chain_optimization/01_supply_chain_optimization.py:
--------------------------------------------------------------------------------
 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE
 2 | # PROJECT 5: SUPPLY CHAIN OPTIMIZATION
 3 | 
 4 | import pulp
 5 | 
 6 | # Create a problem variable:
 7 | prob = pulp.LpProblem("Supply_Chain_Optimization", pulp.LpMinimize)
 8 | 
 9 | # Define decision variables:
10 | production_units = pulp.LpVariable("Production_units", lowBound=0, cat='Continuous')
11 | transport_units = pulp.LpVariable("Transport_units", lowBound=0, cat='Continuous')
12 | 
13 | # Define costs:
14 | production_cost_per_unit = 10
15 | transport_cost_per_unit = 5
16 | storage_cost_per_unit = 2
17 | 
18 | # Objective function:
19 | prob += production_cost_per_unit * production_units + transport_cost_per_unit * transport_units + storage_cost_per_unit * transport_units, "Total_Cost"
20 | 
21 | # Constraints:
22 | prob += production_units <= 1000, "Max_Production_Capacity"
23 | prob += transport_units <= production_units, "Transport_Less_Than_Production"
24 | prob += transport_units >= 800, "Min_Demand_Fulfillment"
25 | 
26 | # Solve the problem:
27 | prob.solve()
28 | 
29 | # Print the results:
30 | print("Status:", pulp.LpStatus[prob.status])
31 | print("Optimal Production units:", production_units.varValue)
32 | print("Optimal Transport units:", transport_units.varValue)
33 | print("Total Cost:", pulp.value(prob.objective))
34 | 


--------------------------------------------------------------------------------
/05_supply_chain_optimization/02_supply_chain_optimization.R:
--------------------------------------------------------------------------------
 1 | # BUSINESS PROBLEMS THAT CAN BE SOLVED WITH DATA SCIENCE
 2 | # PROJECT 5: SUPPLY CHAIN OPTIMIZATION
 3 | 
 4 | # Load the lpSolve package
 5 | library(lpSolve)
 6 | 
 7 | # Define the costs
 8 | production_cost_per_unit <- 10
 9 | transport_cost_per_unit <- 5  # Transportation cost
10 | storage_cost_per_unit <- 2    # Storage cost
11 | 
12 | # Total transport cost including storage
13 | total_transport_cost_per_unit <- transport_cost_per_unit + storage_cost_per_unit
14 | 
15 | # Objective function coefficients
16 | objective <- c(production_cost_per_unit, total_transport_cost_per_unit)
17 | 
18 | # Constraints matrix
19 | # Production units, Transport units
20 | constraints <- matrix(c(1, 0,   # Production <= 1000 units
21 |                         1, -1,  # Production units >= Transport units
22 |                         0, 1),  # Transport units >= 800 units
23 |                       nrow=3, byrow=TRUE)
24 | 
25 | # Right-hand side of the constraints
26 | rhs <- c(1000, 0, 800)
27 | 
28 | # Directions of the constraints
29 | directions <- c("<=", ">=", ">=")
30 | 
31 | # Solve the linear programming problem
32 | solution <- lp("min", objective, constraints, directions, rhs)
33 | 
34 | 
35 | # Check if the solution is optimal
36 | if(solution$status == 0) {
37 |   print("Solution is optimal")
38 |   print(paste("Optimal Production units:", solution$solution[1]))
39 |   print(paste("Optimal Transport units:", solution$solution[2]))
40 |   print(paste("Total Cost:", solution$objval))
41 | } else {
42 |   print("Solution is not optimal")
43 |   if(solution$status == 1) {
44 |     print("Solution is infeasible")
45 |   } else if(solution$status == 2) {
46 |     print("Solution is unbounded")
47 |   } else if(solution$status == 3) {
48 |     print("Solution is degenerate")
49 |   } else {
50 |     print("Solution could not be found due to numerical difficulties")
51 |   }
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 10 Business Problems that Can Be Solved with Data Science
 2 | 
 3 | ## Project Goal:
 4 | 
 5 | Goal of this project is to help data scientists get kickstarted on their projects with basic business problems that businesses, Fortune 500 companies, and high-demand industries need solved. 
 6 | 
 7 | Each project comes with synthetic (fake) data and can be used to jumpstart more in-depth analysis. 
 8 | 
 9 | ## Work in Progress (Coming Soon):
10 | 
11 | The following top 10 business problems are planned to be covered:
12 | 
13 | ![Top 10 Business Problems](/img/10_business_problems.jpg)
14 | 
15 | ## Going Further
16 | 
17 | For data scientists that want to learn how to solve more advanced business problems with Python and R:
18 | 
19 | **(No prequisites required if you start with the 1st course)**
20 | 
21 | 1. [**2-Course Python Track**](https://university.business-science.io/p/2-course-python-track?el=github): Through 759 lessons, 63 hours of video, 5 Challenges, and 4 projects learn data science, machine learning, lifecycle management, APIs, Web Applications, ROI, Business Problem Solving, and a host of niche data science skills. 
22 | 2.  [**5-Course R-Track**](https://university.business-science.io/p/5-course-bundle-machine-learning-web-apps-time-series?el=github) - Through 1,848 lessons, 153 hours of video training, 24 Challenges, and 8 advanced projects learn expert-level data science skills including Business Problem Solving for Data Scientists, Advanced Machine Learning, ROI, Time Series Forecasting, Shiny Web Applications, and more.


--------------------------------------------------------------------------------
/img/10_business_problems.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/business-science/10_python_r_business_problems/292d91f72a47af5cd6d2805a122f2f0e295b88c2/img/10_business_problems.jpg


--------------------------------------------------------------------------------