├── Position_Salaries.csv ├── README.md └── random_forest_regression.py /Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Random-Forest-Regression 2 | Random Forest Regression 3 | -------------------------------------------------------------------------------- /random_forest_regression.py: -------------------------------------------------------------------------------- 1 | # Random Forest Regression 2 | 3 | # Importing the libraries 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | 8 | # Importing the dataset 9 | dataset = pd.read_csv('Position_Salaries.csv') 10 | X = dataset.iloc[:, 1:2].values 11 | y = dataset.iloc[:, 2].values 12 | 13 | # Splitting the dataset into the Training set and Test set 14 | """from sklearn.cross_validation import train_test_split 15 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)""" 16 | 17 | # Feature Scaling 18 | """from sklearn.preprocessing import StandardScaler 19 | sc_X = StandardScaler() 20 | X_train = sc_X.fit_transform(X_train) 21 | X_test = sc_X.transform(X_test) 22 | sc_y = StandardScaler() 23 | y_train = sc_y.fit_transform(y_train)""" 24 | 25 | # Fitting Random Forest Regression to the dataset 26 | from sklearn.ensemble import RandomForestRegressor 27 | regressor = RandomForestRegressor(n_estimators = 10, random_state = 0) 28 | regressor.fit(X, y) 29 | 30 | # Predicting a new result 31 | y_pred = regressor.predict(6.5) 32 | 33 | # Visualising the Random Forest Regression results (higher resolution) 34 | X_grid = np.arange(min(X), max(X), 0.01) 35 | X_grid = X_grid.reshape((len(X_grid), 1)) 36 | plt.scatter(X, y, color = 'red') 37 | plt.plot(X_grid, regressor.predict(X_grid), color = 'blue') 38 | plt.title('Truth or Bluff (Random Forest Regression)') 39 | plt.xlabel('Position level') 40 | plt.ylabel('Salary') 41 | plt.show() --------------------------------------------------------------------------------