├── .DS_Store
├── README.md
└── Turkish Food Price
├── .DS_Store
└── Applied-Statistical-Methods-for-collecting-and-cleaning-the-data
├── README.md
└── Statistical_Cleaner.ipynb
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leventozdemir/Full-Stack-Data-Science-Projects/5fe667cad3c7b9ae427eee6758f45331d8d68084/.DS_Store
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Full-Stack-Data-Science-Projects
2 | Collecting data, analysis the data, and build ML model
3 |
--------------------------------------------------------------------------------
/Turkish Food Price/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leventozdemir/Full-Stack-Data-Science-Projects/5fe667cad3c7b9ae427eee6758f45331d8d68084/Turkish Food Price/.DS_Store
--------------------------------------------------------------------------------
/Turkish Food Price/Applied-Statistical-Methods-for-collecting-and-cleaning-the-data/README.md:
--------------------------------------------------------------------------------
1 | # Applied-Statistical-Methods-for-cleaning-data-📊
2 | 
3 |
4 | ## First let’s define the dataset:
5 | Global Food Prices Database (WFP), This dataset contains Global Food Prices data from the World Food Programme covering foods such as maize, rice, beans, fish, and sugar for 76 countries and some 1,500 markets. It is updated weekly but contains to a large extent monthly data. The data goes back as far as 1992 for a few countries, although many countries started reporting from 2003 or thereafter.
6 |
7 | ### Source: https://data.humdata.org/dataset/wfp-food-prices
8 |
9 | ### License: Creative Commons Attribution for Intergovernmental Organisations .
10 |
11 | ## Exploring the story of this dataset:
12 | ### we will start by calling the libraries :
13 | import numpy as np
14 | import pandas as pd
15 | import matplotlib.pyplot as plt
16 | %matplotlib inline
17 | import seaborn as sns
18 | ### Now let’s call the data:
19 | path = '../input/food-price-me/wfpvam_foodprices.csv'
20 | data = pd.read_csv(path,low_memory=False)
21 | ### Story Time:
22 | print(data.columns)
23 | #Output: Index(['adm0_id', 'adm0_name', 'adm1_id', 'adm1_name', 'mkt_id', 'mkt_name',
24 | 'cm_id', 'cm_name', 'cur_id', 'cur_name', 'pt_id', 'pt_name', 'um_id',
25 | 'um_name', 'mp_month', 'mp_year', 'mp_price', 'mp_commoditysource'],
26 | dtype='object')
27 | print(data.shape)
28 | #Output: (2004959, 18)
29 | #### so we have 2004960 sample and 18 feature
30 | ### Let’s see how many countries are there
31 | print(data.adm0_name.unique().shape)
32 | #Output: (98,)
33 | #### so we have 98 countries
34 | print(data.adm0_name.unique())
35 | #Output:array(['Afghanistan', 'Algeria', 'Angola', 'Argentina', 'Armenia',
36 | 'Azerbaijan', 'Bangladesh', 'Bassas da India', 'Belarus', 'Benin',
37 | 'Bhutan', 'Bolivia', 'Burkina Faso', 'Burundi', 'Cambodia',
38 | 'Cameroon', 'Cape Verde', 'Central African Republic', 'Chad',
39 | 'China', 'Colombia', 'Congo', 'Costa Rica', "Cote d'Ivoire",
40 | 'Democratic Republic of the Congo', 'Djibouti',
41 | 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Eritrea',
42 | 'Ethiopia', 'Gabon', 'Gambia', 'Georgia', 'Ghana', 'Guatemala',
43 | 'Guinea', 'Guinea-Bissau', 'Haiti', 'Honduras', 'Indonesia',
44 | 'Iran (Islamic Republic of)', 'Iraq', 'Japan', 'Jordan',
45 | 'Kazakhstan', 'Kenya', 'Kyrgyzstan',
46 | "Lao People's Democratic Republic", 'Lebanon', 'Lesotho',
47 | 'Liberia', 'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania',
48 | 'Mexico', 'Moldova Republic of', 'Mongolia', 'Mozambique',
49 | 'Myanmar', 'Namibia', 'Nepal', 'Nicaragua', 'Niger', 'Nigeria',
50 | 'Pakistan', 'Panama', 'Paraguay', 'Peru', 'Philippines',
51 | 'Russian Federation', 'Rwanda', 'Senegal', 'Sierra Leone',
52 | 'Somalia', 'South Africa', 'South Sudan', 'Sri Lanka',
53 | 'State of Palestine', 'Sudan', 'Swaziland', 'Syrian Arab Republic',
54 | 'Tajikistan', 'Thailand', 'Timor-Leste', 'Togo', 'Turkey',
55 | 'Uganda', 'Ukraine', 'United Republic of Tanzania', 'Venezuela',
56 | 'Viet Nam', 'Yemen', 'Zambia', 'Zimbabwe'], dtype=object)
57 | ## From 98 countries i choosed Turkey.
58 | data_TR=data.loc[data.adm0_name=='Turkey']
59 | data_TR.isnull().sum()
60 | #Output:
61 | adm0_id 0
62 | adm0_name 0
63 | adm1_id 0
64 | adm1_name 10319
65 | mkt_id 0
66 | mkt_name 0
67 | cm_id 0
68 | cm_name 0
69 | cur_id 0
70 | cur_name 0
71 | pt_id 0
72 | pt_name 0
73 | um_id 0
74 | um_name 0
75 | mp_month 0
76 | mp_year 0
77 | mp_price 0
78 | mp_commoditysource 10319
79 | data_TR.describe().transpose()
80 | 
81 |
82 | ### By looking to the table we can see that the features with
83 | ### Standard Deviation=0 is not useful.
84 | #### Standard Deviation = 0, this mean that all the values are equals to each others
85 | ### So we will drop adm0_id, cur_id and pt_id
86 | data_TR['cur_name'].value_counts()
87 | #Output: TRY 10319
88 | ### So cur_name is a categorical data and equals to ‘TRY’ in all line so we will drop it.
89 | print(data_TR.mkt_id.unique())
90 | #Output:[1319 2053 2054 2055]
91 | print(data_TR['mkt_id'].value_counts())
92 | #Output:
93 | 1319 3366
94 | 2055 2318
95 | 2054 2318
96 | 2053 2317
97 | Name: mkt_id, dtype: int64
98 | ### mkt_id have 4 different integers that present the mkt_name so i preferir to drop it and keep mkt_name which make it more easy to understand by user.
99 | #### Note: if we kept mkt_id we have to make normalization when we will use predict algorithms.
100 | ### Let’s make drop and see the data again
101 | data_TR = data_TR.drop(['mp_commoditysource','adm1_name','adm0_name','mkt_id','adm1_id','cur_id','pt_id', 'adm0_id', 'pt_name', 'cur_name'],axis=1)
102 | print(data_TR.describe().transpose())
103 |
104 |
105 | ### Rename the Feature to make it easy to understand by users:
106 | data_TR.columns=['Place', 'ProductId', 'ProductName', 'UmId', 'UmName', 'Month', 'Year', 'Price']
107 | ### Split the data to train data before 2020 and test data from 2020 to 2021:
108 | data_train = data_TR.loc[data_TR.Year<2020]
109 | data_test = data_TR.loc[data_TR.Year>2020]
110 | ### Save the data to csv files:
111 | data_train.to_csv('train.csv', index=False)
112 | data_test.to_csv('test.csv', index=False)
113 | ### Now it’s your turn download the data from the source and get the full code from my Github and choose the country you want and create your own data set.
114 | ## Food Prices in Turkey on Kaggle: https://www.kaggle.com/leventoz/food-prices-in-turkey
115 | ## Check this also on Medium: https://leventozdemir.medium.com/applied-statistical-methods-for-cleaning-data-6872e9604dba
116 | # Keep Coding…
117 |
--------------------------------------------------------------------------------
/Turkish Food Price/Applied-Statistical-Methods-for-collecting-and-cleaning-the-data/Statistical_Cleaner.ipynb:
--------------------------------------------------------------------------------
1 | {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import numpy as np\nimport pandas as pd \nimport matplotlib.pyplot as plt\n%matplotlib inline\nimport seaborn as sns","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:23:27.977678Z","iopub.execute_input":"2021-07-14T07:23:27.978308Z","iopub.status.idle":"2021-07-14T07:23:27.984246Z","shell.execute_reply.started":"2021-07-14T07:23:27.978271Z","shell.execute_reply":"2021-07-14T07:23:27.983429Z"},"trusted":true},"execution_count":51,"outputs":[]},{"cell_type":"code","source":"path = '../input/food-price-me/wfpvam_foodprices.csv'\ndata = pd.read_csv(path,low_memory=False)","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:23:28.137639Z","iopub.execute_input":"2021-07-14T07:23:28.138234Z","iopub.status.idle":"2021-07-14T07:23:33.345233Z","shell.execute_reply.started":"2021-07-14T07:23:28.138197Z","shell.execute_reply":"2021-07-14T07:23:33.344399Z"},"trusted":true},"execution_count":52,"outputs":[]},{"cell_type":"code","source":"data.columns","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:23:33.346396Z","iopub.execute_input":"2021-07-14T07:23:33.346818Z","iopub.status.idle":"2021-07-14T07:23:33.352657Z","shell.execute_reply.started":"2021-07-14T07:23:33.346787Z","shell.execute_reply":"2021-07-14T07:23:33.351941Z"},"trusted":true},"execution_count":53,"outputs":[{"execution_count":53,"output_type":"execute_result","data":{"text/plain":"Index(['adm0_id', 'adm0_name', 'adm1_id', 'adm1_name', 'mkt_id', 'mkt_name',\n 'cm_id', 'cm_name', 'cur_id', 'cur_name', 'pt_id', 'pt_name', 'um_id',\n 'um_name', 'mp_month', 'mp_year', 'mp_price', 'mp_commoditysource'],\n dtype='object')"},"metadata":{}}]},{"cell_type":"code","source":"data.shape","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:23:33.354415Z","iopub.execute_input":"2021-07-14T07:23:33.354849Z","iopub.status.idle":"2021-07-14T07:23:33.372273Z","shell.execute_reply.started":"2021-07-14T07:23:33.354818Z","shell.execute_reply":"2021-07-14T07:23:33.371002Z"},"trusted":true},"execution_count":54,"outputs":[{"execution_count":54,"output_type":"execute_result","data":{"text/plain":"(2004959, 18)"},"metadata":{}}]},{"cell_type":"code","source":"data","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:23:33.374251Z","iopub.execute_input":"2021-07-14T07:23:33.374554Z","iopub.status.idle":"2021-07-14T07:23:33.416931Z","shell.execute_reply.started":"2021-07-14T07:23:33.374528Z","shell.execute_reply":"2021-07-14T07:23:33.415835Z"},"trusted":true},"execution_count":55,"outputs":[{"execution_count":55,"output_type":"execute_result","data":{"text/plain":" adm0_id adm0_name adm1_id adm1_name mkt_id mkt_name cm_id \\\n0 1.0 Afghanistan 272 Badakhshan 266 Fayzabad 55 \n1 1.0 Afghanistan 272 Badakhshan 266 Fayzabad 55 \n2 1.0 Afghanistan 272 Badakhshan 266 Fayzabad 55 \n3 1.0 Afghanistan 272 Badakhshan 266 Fayzabad 55 \n4 1.0 Afghanistan 272 Badakhshan 266 Fayzabad 55 \n... ... ... ... ... ... ... ... \n2004954 271.0 Zimbabwe 3444 Midlands 5585 Matibi 887 \n2004955 271.0 Zimbabwe 3444 Midlands 5585 Matibi 887 \n2004956 271.0 Zimbabwe 3444 Midlands 5585 Matibi 887 \n2004957 271.0 Zimbabwe 3444 Midlands 5585 Matibi 887 \n2004958 271.0 Zimbabwe 3444 Midlands 5585 Matibi 887 \n\n cm_name cur_id cur_name pt_id pt_name um_id \\\n0 Bread - Retail 0.0 AFN 15 Retail 5 \n1 Bread - Retail 0.0 AFN 15 Retail 5 \n2 Bread - Retail 0.0 AFN 15 Retail 5 \n3 Bread - Retail 0.0 AFN 15 Retail 5 \n4 Bread - Retail 0.0 AFN 15 Retail 5 \n... ... ... ... ... ... ... \n2004954 Fish (kapenta) - Retail 0.0 ZWL 15 Retail 5 \n2004955 Fish (kapenta) - Retail 0.0 ZWL 15 Retail 5 \n2004956 Fish (kapenta) - Retail 0.0 ZWL 15 Retail 5 \n2004957 Fish (kapenta) - Retail 0.0 ZWL 15 Retail 5 \n2004958 Fish (kapenta) - Retail 0.0 ZWL 15 Retail 5 \n\n um_name mp_month mp_year mp_price mp_commoditysource \n0 KG 1 2014 50.0000 NaN \n1 KG 2 2014 50.0000 NaN \n2 KG 3 2014 50.0000 NaN \n3 KG 4 2014 50.0000 NaN \n4 KG 5 2014 50.0000 NaN \n... ... ... ... ... ... \n2004954 KG 7 2020 866.6666 NaN \n2004955 KG 8 2020 800.0000 NaN \n2004956 KG 12 2020 1187.5000 NaN \n2004957 KG 2 2021 1123.3333 NaN \n2004958 KG 3 2021 1390.0000 NaN \n\n[2004959 rows x 18 columns]","text/html":"
\n\n
\n \n \n | \n adm0_id | \n adm0_name | \n adm1_id | \n adm1_name | \n mkt_id | \n mkt_name | \n cm_id | \n cm_name | \n cur_id | \n cur_name | \n pt_id | \n pt_name | \n um_id | \n um_name | \n mp_month | \n mp_year | \n mp_price | \n mp_commoditysource | \n
\n \n \n \n 0 | \n 1.0 | \n Afghanistan | \n 272 | \n Badakhshan | \n 266 | \n Fayzabad | \n 55 | \n Bread - Retail | \n 0.0 | \n AFN | \n 15 | \n Retail | \n 5 | \n KG | \n 1 | \n 2014 | \n 50.0000 | \n NaN | \n
\n \n 1 | \n 1.0 | \n Afghanistan | \n 272 | \n Badakhshan | \n 266 | \n Fayzabad | \n 55 | \n Bread - Retail | \n 0.0 | \n AFN | \n 15 | \n Retail | \n 5 | \n KG | \n 2 | \n 2014 | \n 50.0000 | \n NaN | \n
\n \n 2 | \n 1.0 | \n Afghanistan | \n 272 | \n Badakhshan | \n 266 | \n Fayzabad | \n 55 | \n Bread - Retail | \n 0.0 | \n AFN | \n 15 | \n Retail | \n 5 | \n KG | \n 3 | \n 2014 | \n 50.0000 | \n NaN | \n
\n \n 3 | \n 1.0 | \n Afghanistan | \n 272 | \n Badakhshan | \n 266 | \n Fayzabad | \n 55 | \n Bread - Retail | \n 0.0 | \n AFN | \n 15 | \n Retail | \n 5 | \n KG | \n 4 | \n 2014 | \n 50.0000 | \n NaN | \n
\n \n 4 | \n 1.0 | \n Afghanistan | \n 272 | \n Badakhshan | \n 266 | \n Fayzabad | \n 55 | \n Bread - Retail | \n 0.0 | \n AFN | \n 15 | \n Retail | \n 5 | \n KG | \n 5 | \n 2014 | \n 50.0000 | \n NaN | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 2004954 | \n 271.0 | \n Zimbabwe | \n 3444 | \n Midlands | \n 5585 | \n Matibi | \n 887 | \n Fish (kapenta) - Retail | \n 0.0 | \n ZWL | \n 15 | \n Retail | \n 5 | \n KG | \n 7 | \n 2020 | \n 866.6666 | \n NaN | \n
\n \n 2004955 | \n 271.0 | \n Zimbabwe | \n 3444 | \n Midlands | \n 5585 | \n Matibi | \n 887 | \n Fish (kapenta) - Retail | \n 0.0 | \n ZWL | \n 15 | \n Retail | \n 5 | \n KG | \n 8 | \n 2020 | \n 800.0000 | \n NaN | \n
\n \n 2004956 | \n 271.0 | \n Zimbabwe | \n 3444 | \n Midlands | \n 5585 | \n Matibi | \n 887 | \n Fish (kapenta) - Retail | \n 0.0 | \n ZWL | \n 15 | \n Retail | \n 5 | \n KG | \n 12 | \n 2020 | \n 1187.5000 | \n NaN | \n
\n \n 2004957 | \n 271.0 | \n Zimbabwe | \n 3444 | \n Midlands | \n 5585 | \n Matibi | \n 887 | \n Fish (kapenta) - Retail | \n 0.0 | \n ZWL | \n 15 | \n Retail | \n 5 | \n KG | \n 2 | \n 2021 | \n 1123.3333 | \n NaN | \n
\n \n 2004958 | \n 271.0 | \n Zimbabwe | \n 3444 | \n Midlands | \n 5585 | \n Matibi | \n 887 | \n Fish (kapenta) - Retail | \n 0.0 | \n ZWL | \n 15 | \n Retail | \n 5 | \n KG | \n 3 | \n 2021 | \n 1390.0000 | \n NaN | \n
\n \n
\n
2004959 rows × 18 columns
\n
"},"metadata":{}}]},{"cell_type":"code","source":"data.adm0_name.unique()","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:23:33.418495Z","iopub.execute_input":"2021-07-14T07:23:33.418936Z","iopub.status.idle":"2021-07-14T07:23:33.582096Z","shell.execute_reply.started":"2021-07-14T07:23:33.418871Z","shell.execute_reply":"2021-07-14T07:23:33.580924Z"},"trusted":true},"execution_count":56,"outputs":[{"execution_count":56,"output_type":"execute_result","data":{"text/plain":"array(['Afghanistan', 'Algeria', 'Angola', 'Argentina', 'Armenia',\n 'Azerbaijan', 'Bangladesh', 'Bassas da India', 'Belarus', 'Benin',\n 'Bhutan', 'Bolivia', 'Burkina Faso', 'Burundi', 'Cambodia',\n 'Cameroon', 'Cape Verde', 'Central African Republic', 'Chad',\n 'China', 'Colombia', 'Congo', 'Costa Rica', \"Cote d'Ivoire\",\n 'Democratic Republic of the Congo', 'Djibouti',\n 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Eritrea',\n 'Ethiopia', 'Gabon', 'Gambia', 'Georgia', 'Ghana', 'Guatemala',\n 'Guinea', 'Guinea-Bissau', 'Haiti', 'Honduras', 'Indonesia',\n 'Iran (Islamic Republic of)', 'Iraq', 'Japan', 'Jordan',\n 'Kazakhstan', 'Kenya', 'Kyrgyzstan',\n \"Lao People's Democratic Republic\", 'Lebanon', 'Lesotho',\n 'Liberia', 'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania',\n 'Mexico', 'Moldova Republic of', 'Mongolia', 'Mozambique',\n 'Myanmar', 'Namibia', 'Nepal', 'Nicaragua', 'Niger', 'Nigeria',\n 'Pakistan', 'Panama', 'Paraguay', 'Peru', 'Philippines',\n 'Russian Federation', 'Rwanda', 'Senegal', 'Sierra Leone',\n 'Somalia', 'South Africa', 'South Sudan', 'Sri Lanka',\n 'State of Palestine', 'Sudan', 'Swaziland', 'Syrian Arab Republic',\n 'Tajikistan', 'Thailand', 'Timor-Leste', 'Togo', 'Turkey',\n 'Uganda', 'Ukraine', 'United Republic of Tanzania', 'Venezuela',\n 'Viet Nam', 'Yemen', 'Zambia', 'Zimbabwe'], dtype=object)"},"metadata":{}}]},{"cell_type":"code","source":"data.adm0_name.unique().shape","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:23:33.583418Z","iopub.execute_input":"2021-07-14T07:23:33.583815Z","iopub.status.idle":"2021-07-14T07:23:33.746587Z","shell.execute_reply.started":"2021-07-14T07:23:33.583776Z","shell.execute_reply":"2021-07-14T07:23:33.745378Z"},"trusted":true},"execution_count":57,"outputs":[{"execution_count":57,"output_type":"execute_result","data":{"text/plain":"(98,)"},"metadata":{}}]},{"cell_type":"code","source":"data_TR=data.loc[data.adm0_name=='Turkey']","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:23:33.748031Z","iopub.execute_input":"2021-07-14T07:23:33.748327Z","iopub.status.idle":"2021-07-14T07:23:34.046690Z","shell.execute_reply.started":"2021-07-14T07:23:33.748298Z","shell.execute_reply":"2021-07-14T07:23:34.045204Z"},"trusted":true},"execution_count":58,"outputs":[]},{"cell_type":"code","source":"data_TR","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:23:34.051185Z","iopub.execute_input":"2021-07-14T07:23:34.051513Z","iopub.status.idle":"2021-07-14T07:23:34.091606Z","shell.execute_reply.started":"2021-07-14T07:23:34.051484Z","shell.execute_reply":"2021-07-14T07:23:34.090805Z"},"trusted":true},"execution_count":59,"outputs":[{"execution_count":59,"output_type":"execute_result","data":{"text/plain":" adm0_id adm0_name adm1_id adm1_name mkt_id mkt_name \\\n1826544 249.0 Turkey 0 NaN 1319 National Average \n1826545 249.0 Turkey 0 NaN 1319 National Average \n1826546 249.0 Turkey 0 NaN 1319 National Average \n1826547 249.0 Turkey 0 NaN 1319 National Average \n1826548 249.0 Turkey 0 NaN 1319 National Average \n... ... ... ... ... ... ... \n1836858 249.0 Turkey 3057 NaN 2055 Izmir \n1836859 249.0 Turkey 3057 NaN 2055 Izmir \n1836860 249.0 Turkey 3057 NaN 2055 Izmir \n1836861 249.0 Turkey 3057 NaN 2055 Izmir \n1836862 249.0 Turkey 3057 NaN 2055 Izmir \n\n cm_id cm_name cur_id cur_name pt_id pt_name \\\n1826544 52 Rice - Retail 0.0 TRY 15 Retail \n1826545 52 Rice - Retail 0.0 TRY 15 Retail \n1826546 52 Rice - Retail 0.0 TRY 15 Retail \n1826547 52 Rice - Retail 0.0 TRY 15 Retail \n1826548 52 Rice - Retail 0.0 TRY 15 Retail \n... ... ... ... ... ... ... \n1836858 502 Cocoa (powder) - Retail 0.0 TRY 15 Retail \n1836859 502 Cocoa (powder) - Retail 0.0 TRY 15 Retail \n1836860 502 Cocoa (powder) - Retail 0.0 TRY 15 Retail \n1836861 502 Cocoa (powder) - Retail 0.0 TRY 15 Retail \n1836862 502 Cocoa (powder) - Retail 0.0 TRY 15 Retail \n\n um_id um_name mp_month mp_year mp_price mp_commoditysource \n1826544 5 KG 5 2013 4.4920 NaN \n1826545 5 KG 6 2013 4.5786 NaN \n1826546 5 KG 11 2013 4.7865 NaN \n1826547 5 KG 12 2013 5.1337 NaN \n1826548 5 KG 1 2014 5.5099 NaN \n... ... ... ... ... ... ... \n1836858 5 KG 1 2021 68.2002 NaN \n1836859 5 KG 2 2021 68.6250 NaN \n1836860 5 KG 3 2021 71.2310 NaN \n1836861 5 KG 4 2021 73.2609 NaN \n1836862 5 KG 5 2021 75.6859 NaN \n\n[10319 rows x 18 columns]","text/html":"\n\n
\n \n \n | \n adm0_id | \n adm0_name | \n adm1_id | \n adm1_name | \n mkt_id | \n mkt_name | \n cm_id | \n cm_name | \n cur_id | \n cur_name | \n pt_id | \n pt_name | \n um_id | \n um_name | \n mp_month | \n mp_year | \n mp_price | \n mp_commoditysource | \n
\n \n \n \n 1826544 | \n 249.0 | \n Turkey | \n 0 | \n NaN | \n 1319 | \n National Average | \n 52 | \n Rice - Retail | \n 0.0 | \n TRY | \n 15 | \n Retail | \n 5 | \n KG | \n 5 | \n 2013 | \n 4.4920 | \n NaN | \n
\n \n 1826545 | \n 249.0 | \n Turkey | \n 0 | \n NaN | \n 1319 | \n National Average | \n 52 | \n Rice - Retail | \n 0.0 | \n TRY | \n 15 | \n Retail | \n 5 | \n KG | \n 6 | \n 2013 | \n 4.5786 | \n NaN | \n
\n \n 1826546 | \n 249.0 | \n Turkey | \n 0 | \n NaN | \n 1319 | \n National Average | \n 52 | \n Rice - Retail | \n 0.0 | \n TRY | \n 15 | \n Retail | \n 5 | \n KG | \n 11 | \n 2013 | \n 4.7865 | \n NaN | \n
\n \n 1826547 | \n 249.0 | \n Turkey | \n 0 | \n NaN | \n 1319 | \n National Average | \n 52 | \n Rice - Retail | \n 0.0 | \n TRY | \n 15 | \n Retail | \n 5 | \n KG | \n 12 | \n 2013 | \n 5.1337 | \n NaN | \n
\n \n 1826548 | \n 249.0 | \n Turkey | \n 0 | \n NaN | \n 1319 | \n National Average | \n 52 | \n Rice - Retail | \n 0.0 | \n TRY | \n 15 | \n Retail | \n 5 | \n KG | \n 1 | \n 2014 | \n 5.5099 | \n NaN | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 1836858 | \n 249.0 | \n Turkey | \n 3057 | \n NaN | \n 2055 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 0.0 | \n TRY | \n 15 | \n Retail | \n 5 | \n KG | \n 1 | \n 2021 | \n 68.2002 | \n NaN | \n
\n \n 1836859 | \n 249.0 | \n Turkey | \n 3057 | \n NaN | \n 2055 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 0.0 | \n TRY | \n 15 | \n Retail | \n 5 | \n KG | \n 2 | \n 2021 | \n 68.6250 | \n NaN | \n
\n \n 1836860 | \n 249.0 | \n Turkey | \n 3057 | \n NaN | \n 2055 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 0.0 | \n TRY | \n 15 | \n Retail | \n 5 | \n KG | \n 3 | \n 2021 | \n 71.2310 | \n NaN | \n
\n \n 1836861 | \n 249.0 | \n Turkey | \n 3057 | \n NaN | \n 2055 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 0.0 | \n TRY | \n 15 | \n Retail | \n 5 | \n KG | \n 4 | \n 2021 | \n 73.2609 | \n NaN | \n
\n \n 1836862 | \n 249.0 | \n Turkey | \n 3057 | \n NaN | \n 2055 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 0.0 | \n TRY | \n 15 | \n Retail | \n 5 | \n KG | \n 5 | \n 2021 | \n 75.6859 | \n NaN | \n
\n \n
\n
10319 rows × 18 columns
\n
"},"metadata":{}}]},{"cell_type":"code","source":"data_TR.isnull().sum()","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:23:34.092725Z","iopub.execute_input":"2021-07-14T07:23:34.093043Z","iopub.status.idle":"2021-07-14T07:23:34.110394Z","shell.execute_reply.started":"2021-07-14T07:23:34.093014Z","shell.execute_reply":"2021-07-14T07:23:34.109169Z"},"trusted":true},"execution_count":60,"outputs":[{"execution_count":60,"output_type":"execute_result","data":{"text/plain":"adm0_id 0\nadm0_name 0\nadm1_id 0\nadm1_name 10319\nmkt_id 0\nmkt_name 0\ncm_id 0\ncm_name 0\ncur_id 0\ncur_name 0\npt_id 0\npt_name 0\num_id 0\num_name 0\nmp_month 0\nmp_year 0\nmp_price 0\nmp_commoditysource 10319\ndtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"data_TR.describe().transpose()","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:23:34.111606Z","iopub.execute_input":"2021-07-14T07:23:34.112049Z","iopub.status.idle":"2021-07-14T07:23:34.163009Z","shell.execute_reply.started":"2021-07-14T07:23:34.112006Z","shell.execute_reply":"2021-07-14T07:23:34.161910Z"},"trusted":true},"execution_count":61,"outputs":[{"execution_count":61,"output_type":"execute_result","data":{"text/plain":" count mean std min 25% \\\nadm0_id 10319.0 249.000000 0.000000 249.0000 249.00000 \nadm1_id 10319.0 2051.964822 1427.840863 0.0000 0.00000 \nmkt_id 10319.0 1814.247214 344.599663 1319.0000 1319.00000 \ncm_id 10319.0 256.765384 141.970221 52.0000 114.00000 \ncur_id 10319.0 0.000000 0.000000 0.0000 0.00000 \npt_id 10319.0 15.000000 0.000000 15.0000 15.00000 \num_id 10319.0 15.449462 25.787134 5.0000 5.00000 \nmp_month 10319.0 6.176277 3.540073 1.0000 3.00000 \nmp_year 10319.0 2018.266208 1.818305 2013.0000 2017.00000 \nmp_price 10319.0 17.295317 24.410582 0.2539 3.57865 \nmp_commoditysource 0.0 NaN NaN NaN NaN \n\n 50% 75% max \nadm0_id 249.0000 249.00000 249.0000 \nadm1_id 3023.0000 3056.00000 3057.0000 \nmkt_id 2053.0000 2054.00000 2055.0000 \ncm_id 239.0000 388.00000 502.0000 \ncur_id 0.0000 0.00000 0.0000 \npt_id 15.0000 15.00000 15.0000 \num_id 5.0000 5.00000 102.0000 \nmp_month 6.0000 9.00000 12.0000 \nmp_year 2018.0000 2020.00000 2021.0000 \nmp_price 6.4378 23.85455 167.8585 \nmp_commoditysource NaN NaN NaN ","text/html":"\n\n
\n \n \n | \n count | \n mean | \n std | \n min | \n 25% | \n 50% | \n 75% | \n max | \n
\n \n \n \n adm0_id | \n 10319.0 | \n 249.000000 | \n 0.000000 | \n 249.0000 | \n 249.00000 | \n 249.0000 | \n 249.00000 | \n 249.0000 | \n
\n \n adm1_id | \n 10319.0 | \n 2051.964822 | \n 1427.840863 | \n 0.0000 | \n 0.00000 | \n 3023.0000 | \n 3056.00000 | \n 3057.0000 | \n
\n \n mkt_id | \n 10319.0 | \n 1814.247214 | \n 344.599663 | \n 1319.0000 | \n 1319.00000 | \n 2053.0000 | \n 2054.00000 | \n 2055.0000 | \n
\n \n cm_id | \n 10319.0 | \n 256.765384 | \n 141.970221 | \n 52.0000 | \n 114.00000 | \n 239.0000 | \n 388.00000 | \n 502.0000 | \n
\n \n cur_id | \n 10319.0 | \n 0.000000 | \n 0.000000 | \n 0.0000 | \n 0.00000 | \n 0.0000 | \n 0.00000 | \n 0.0000 | \n
\n \n pt_id | \n 10319.0 | \n 15.000000 | \n 0.000000 | \n 15.0000 | \n 15.00000 | \n 15.0000 | \n 15.00000 | \n 15.0000 | \n
\n \n um_id | \n 10319.0 | \n 15.449462 | \n 25.787134 | \n 5.0000 | \n 5.00000 | \n 5.0000 | \n 5.00000 | \n 102.0000 | \n
\n \n mp_month | \n 10319.0 | \n 6.176277 | \n 3.540073 | \n 1.0000 | \n 3.00000 | \n 6.0000 | \n 9.00000 | \n 12.0000 | \n
\n \n mp_year | \n 10319.0 | \n 2018.266208 | \n 1.818305 | \n 2013.0000 | \n 2017.00000 | \n 2018.0000 | \n 2020.00000 | \n 2021.0000 | \n
\n \n mp_price | \n 10319.0 | \n 17.295317 | \n 24.410582 | \n 0.2539 | \n 3.57865 | \n 6.4378 | \n 23.85455 | \n 167.8585 | \n
\n \n mp_commoditysource | \n 0.0 | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n NaN | \n
\n \n
\n
"},"metadata":{}}]},{"cell_type":"code","source":"print(data_TR.mkt_id.unique())\nprint(data_TR['mkt_id'].value_counts())\n","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:58:36.695886Z","iopub.execute_input":"2021-07-14T07:58:36.696403Z","iopub.status.idle":"2021-07-14T07:58:36.704231Z","shell.execute_reply.started":"2021-07-14T07:58:36.696371Z","shell.execute_reply":"2021-07-14T07:58:36.703229Z"},"trusted":true},"execution_count":86,"outputs":[{"name":"stdout","text":"1319 3366\n2055 2318\n2054 2318\n2053 2317\nName: mkt_id, dtype: int64\n[1319 2053 2054 2055]\n","output_type":"stream"}]},{"cell_type":"code","source":"data_TR =data_TR.drop(['mp_commoditysource','adm1_name','adm0_name','mkt_id','adm1_id','cur_id','pt_id', 'adm0_id', 'pt_name', 'cur_name'],axis=1)","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:05.866851Z","iopub.execute_input":"2021-07-14T07:17:05.867324Z","iopub.status.idle":"2021-07-14T07:17:05.873272Z","shell.execute_reply.started":"2021-07-14T07:17:05.867286Z","shell.execute_reply":"2021-07-14T07:17:05.872101Z"},"trusted":true},"execution_count":34,"outputs":[]},{"cell_type":"code","source":"data_TR","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:05.874619Z","iopub.execute_input":"2021-07-14T07:17:05.874893Z","iopub.status.idle":"2021-07-14T07:17:05.905487Z","shell.execute_reply.started":"2021-07-14T07:17:05.874869Z","shell.execute_reply":"2021-07-14T07:17:05.904511Z"},"trusted":true},"execution_count":35,"outputs":[{"execution_count":35,"output_type":"execute_result","data":{"text/plain":" mkt_name cm_id cm_name um_id um_name \\\n1826544 National Average 52 Rice - Retail 5 KG \n1826545 National Average 52 Rice - Retail 5 KG \n1826546 National Average 52 Rice - Retail 5 KG \n1826547 National Average 52 Rice - Retail 5 KG \n1826548 National Average 52 Rice - Retail 5 KG \n... ... ... ... ... ... \n1836858 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836859 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836860 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836861 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836862 Izmir 502 Cocoa (powder) - Retail 5 KG \n\n mp_month mp_year mp_price \n1826544 5 2013 4.4920 \n1826545 6 2013 4.5786 \n1826546 11 2013 4.7865 \n1826547 12 2013 5.1337 \n1826548 1 2014 5.5099 \n... ... ... ... \n1836858 1 2021 68.2002 \n1836859 2 2021 68.6250 \n1836860 3 2021 71.2310 \n1836861 4 2021 73.2609 \n1836862 5 2021 75.6859 \n\n[10319 rows x 8 columns]","text/html":"\n\n
\n \n \n | \n mkt_name | \n cm_id | \n cm_name | \n um_id | \n um_name | \n mp_month | \n mp_year | \n mp_price | \n
\n \n \n \n 1826544 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 5 | \n 2013 | \n 4.4920 | \n
\n \n 1826545 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 6 | \n 2013 | \n 4.5786 | \n
\n \n 1826546 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 11 | \n 2013 | \n 4.7865 | \n
\n \n 1826547 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 12 | \n 2013 | \n 5.1337 | \n
\n \n 1826548 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 1 | \n 2014 | \n 5.5099 | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 1836858 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 1 | \n 2021 | \n 68.2002 | \n
\n \n 1836859 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 2 | \n 2021 | \n 68.6250 | \n
\n \n 1836860 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 3 | \n 2021 | \n 71.2310 | \n
\n \n 1836861 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 4 | \n 2021 | \n 73.2609 | \n
\n \n 1836862 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 5 | \n 2021 | \n 75.6859 | \n
\n \n
\n
10319 rows × 8 columns
\n
"},"metadata":{}}]},{"cell_type":"code","source":"data_TR.describe().transpose()","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:05.907128Z","iopub.execute_input":"2021-07-14T07:17:05.907793Z","iopub.status.idle":"2021-07-14T07:17:05.945131Z","shell.execute_reply.started":"2021-07-14T07:17:05.907747Z","shell.execute_reply":"2021-07-14T07:17:05.944165Z"},"trusted":true},"execution_count":36,"outputs":[{"execution_count":36,"output_type":"execute_result","data":{"text/plain":" count mean std min 25% 50% \\\ncm_id 10319.0 256.765384 141.970221 52.0000 114.00000 239.0000 \num_id 10319.0 15.449462 25.787134 5.0000 5.00000 5.0000 \nmp_month 10319.0 6.176277 3.540073 1.0000 3.00000 6.0000 \nmp_year 10319.0 2018.266208 1.818305 2013.0000 2017.00000 2018.0000 \nmp_price 10319.0 17.295317 24.410582 0.2539 3.57865 6.4378 \n\n 75% max \ncm_id 388.00000 502.0000 \num_id 5.00000 102.0000 \nmp_month 9.00000 12.0000 \nmp_year 2020.00000 2021.0000 \nmp_price 23.85455 167.8585 ","text/html":"\n\n
\n \n \n | \n count | \n mean | \n std | \n min | \n 25% | \n 50% | \n 75% | \n max | \n
\n \n \n \n cm_id | \n 10319.0 | \n 256.765384 | \n 141.970221 | \n 52.0000 | \n 114.00000 | \n 239.0000 | \n 388.00000 | \n 502.0000 | \n
\n \n um_id | \n 10319.0 | \n 15.449462 | \n 25.787134 | \n 5.0000 | \n 5.00000 | \n 5.0000 | \n 5.00000 | \n 102.0000 | \n
\n \n mp_month | \n 10319.0 | \n 6.176277 | \n 3.540073 | \n 1.0000 | \n 3.00000 | \n 6.0000 | \n 9.00000 | \n 12.0000 | \n
\n \n mp_year | \n 10319.0 | \n 2018.266208 | \n 1.818305 | \n 2013.0000 | \n 2017.00000 | \n 2018.0000 | \n 2020.00000 | \n 2021.0000 | \n
\n \n mp_price | \n 10319.0 | \n 17.295317 | \n 24.410582 | \n 0.2539 | \n 3.57865 | \n 6.4378 | \n 23.85455 | \n 167.8585 | \n
\n \n
\n
"},"metadata":{}}]},{"cell_type":"code","source":"data_TR","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:06.359432Z","iopub.execute_input":"2021-07-14T07:17:06.359757Z","iopub.status.idle":"2021-07-14T07:17:06.386295Z","shell.execute_reply.started":"2021-07-14T07:17:06.359728Z","shell.execute_reply":"2021-07-14T07:17:06.385324Z"},"trusted":true},"execution_count":39,"outputs":[{"execution_count":39,"output_type":"execute_result","data":{"text/plain":" mkt_name cm_id cm_name um_id um_name \\\n1826544 National Average 52 Rice - Retail 5 KG \n1826545 National Average 52 Rice - Retail 5 KG \n1826546 National Average 52 Rice - Retail 5 KG \n1826547 National Average 52 Rice - Retail 5 KG \n1826548 National Average 52 Rice - Retail 5 KG \n... ... ... ... ... ... \n1836858 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836859 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836860 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836861 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836862 Izmir 502 Cocoa (powder) - Retail 5 KG \n\n mp_month mp_year mp_price \n1826544 5 2013 4.4920 \n1826545 6 2013 4.5786 \n1826546 11 2013 4.7865 \n1826547 12 2013 5.1337 \n1826548 1 2014 5.5099 \n... ... ... ... \n1836858 1 2021 68.2002 \n1836859 2 2021 68.6250 \n1836860 3 2021 71.2310 \n1836861 4 2021 73.2609 \n1836862 5 2021 75.6859 \n\n[10319 rows x 8 columns]","text/html":"\n\n
\n \n \n | \n mkt_name | \n cm_id | \n cm_name | \n um_id | \n um_name | \n mp_month | \n mp_year | \n mp_price | \n
\n \n \n \n 1826544 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 5 | \n 2013 | \n 4.4920 | \n
\n \n 1826545 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 6 | \n 2013 | \n 4.5786 | \n
\n \n 1826546 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 11 | \n 2013 | \n 4.7865 | \n
\n \n 1826547 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 12 | \n 2013 | \n 5.1337 | \n
\n \n 1826548 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 1 | \n 2014 | \n 5.5099 | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 1836858 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 1 | \n 2021 | \n 68.2002 | \n
\n \n 1836859 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 2 | \n 2021 | \n 68.6250 | \n
\n \n 1836860 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 3 | \n 2021 | \n 71.2310 | \n
\n \n 1836861 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 4 | \n 2021 | \n 73.2609 | \n
\n \n 1836862 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 5 | \n 2021 | \n 75.6859 | \n
\n \n
\n
10319 rows × 8 columns
\n
"},"metadata":{}}]},{"cell_type":"code","source":"data_TR.columns=['Place', 'ProductId', 'ProductName', 'UmId', 'UmName', 'Month', 'Year', 'Price']","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:06.387590Z","iopub.execute_input":"2021-07-14T07:17:06.388168Z","iopub.status.idle":"2021-07-14T07:17:06.398400Z","shell.execute_reply.started":"2021-07-14T07:17:06.388125Z","shell.execute_reply":"2021-07-14T07:17:06.397645Z"},"trusted":true},"execution_count":40,"outputs":[]},{"cell_type":"code","source":"data_TR","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:06.399623Z","iopub.execute_input":"2021-07-14T07:17:06.400042Z","iopub.status.idle":"2021-07-14T07:17:06.429423Z","shell.execute_reply.started":"2021-07-14T07:17:06.400002Z","shell.execute_reply":"2021-07-14T07:17:06.428669Z"},"trusted":true},"execution_count":41,"outputs":[{"execution_count":41,"output_type":"execute_result","data":{"text/plain":" Place ProductId ProductName UmId UmName \\\n1826544 National Average 52 Rice - Retail 5 KG \n1826545 National Average 52 Rice - Retail 5 KG \n1826546 National Average 52 Rice - Retail 5 KG \n1826547 National Average 52 Rice - Retail 5 KG \n1826548 National Average 52 Rice - Retail 5 KG \n... ... ... ... ... ... \n1836858 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836859 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836860 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836861 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836862 Izmir 502 Cocoa (powder) - Retail 5 KG \n\n Month Year Price \n1826544 5 2013 4.4920 \n1826545 6 2013 4.5786 \n1826546 11 2013 4.7865 \n1826547 12 2013 5.1337 \n1826548 1 2014 5.5099 \n... ... ... ... \n1836858 1 2021 68.2002 \n1836859 2 2021 68.6250 \n1836860 3 2021 71.2310 \n1836861 4 2021 73.2609 \n1836862 5 2021 75.6859 \n\n[10319 rows x 8 columns]","text/html":"\n\n
\n \n \n | \n Place | \n ProductId | \n ProductName | \n UmId | \n UmName | \n Month | \n Year | \n Price | \n
\n \n \n \n 1826544 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 5 | \n 2013 | \n 4.4920 | \n
\n \n 1826545 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 6 | \n 2013 | \n 4.5786 | \n
\n \n 1826546 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 11 | \n 2013 | \n 4.7865 | \n
\n \n 1826547 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 12 | \n 2013 | \n 5.1337 | \n
\n \n 1826548 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 1 | \n 2014 | \n 5.5099 | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 1836858 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 1 | \n 2021 | \n 68.2002 | \n
\n \n 1836859 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 2 | \n 2021 | \n 68.6250 | \n
\n \n 1836860 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 3 | \n 2021 | \n 71.2310 | \n
\n \n 1836861 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 4 | \n 2021 | \n 73.2609 | \n
\n \n 1836862 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 5 | \n 2021 | \n 75.6859 | \n
\n \n
\n
10319 rows × 8 columns
\n
"},"metadata":{}}]},{"cell_type":"code","source":"data_train = data_TR.loc[data_TR.Year<2020]","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:06.430875Z","iopub.execute_input":"2021-07-14T07:17:06.431523Z","iopub.status.idle":"2021-07-14T07:17:06.438222Z","shell.execute_reply.started":"2021-07-14T07:17:06.431480Z","shell.execute_reply":"2021-07-14T07:17:06.437493Z"},"trusted":true},"execution_count":42,"outputs":[]},{"cell_type":"code","source":"data_train","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:06.439329Z","iopub.execute_input":"2021-07-14T07:17:06.439861Z","iopub.status.idle":"2021-07-14T07:17:06.470620Z","shell.execute_reply.started":"2021-07-14T07:17:06.439810Z","shell.execute_reply":"2021-07-14T07:17:06.469791Z"},"trusted":true},"execution_count":43,"outputs":[{"execution_count":43,"output_type":"execute_result","data":{"text/plain":" Place ProductId ProductName UmId UmName \\\n1826544 National Average 52 Rice - Retail 5 KG \n1826545 National Average 52 Rice - Retail 5 KG \n1826546 National Average 52 Rice - Retail 5 KG \n1826547 National Average 52 Rice - Retail 5 KG \n1826548 National Average 52 Rice - Retail 5 KG \n... ... ... ... ... ... \n1836841 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836842 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836843 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836844 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836845 Izmir 502 Cocoa (powder) - Retail 5 KG \n\n Month Year Price \n1826544 5 2013 4.4920 \n1826545 6 2013 4.5786 \n1826546 11 2013 4.7865 \n1826547 12 2013 5.1337 \n1826548 1 2014 5.5099 \n... ... ... ... \n1836841 8 2019 47.9040 \n1836842 9 2019 49.1176 \n1836843 10 2019 50.8347 \n1836844 11 2019 51.7985 \n1836845 12 2019 51.6071 \n\n[7381 rows x 8 columns]","text/html":"\n\n
\n \n \n | \n Place | \n ProductId | \n ProductName | \n UmId | \n UmName | \n Month | \n Year | \n Price | \n
\n \n \n \n 1826544 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 5 | \n 2013 | \n 4.4920 | \n
\n \n 1826545 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 6 | \n 2013 | \n 4.5786 | \n
\n \n 1826546 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 11 | \n 2013 | \n 4.7865 | \n
\n \n 1826547 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 12 | \n 2013 | \n 5.1337 | \n
\n \n 1826548 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 1 | \n 2014 | \n 5.5099 | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 1836841 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 8 | \n 2019 | \n 47.9040 | \n
\n \n 1836842 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 9 | \n 2019 | \n 49.1176 | \n
\n \n 1836843 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 10 | \n 2019 | \n 50.8347 | \n
\n \n 1836844 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 11 | \n 2019 | \n 51.7985 | \n
\n \n 1836845 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 12 | \n 2019 | \n 51.6071 | \n
\n \n
\n
7381 rows × 8 columns
\n
"},"metadata":{}}]},{"cell_type":"code","source":"data_test = data_TR.loc[data_TR.Year>2020]","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:06.473536Z","iopub.execute_input":"2021-07-14T07:17:06.473981Z","iopub.status.idle":"2021-07-14T07:17:06.485661Z","shell.execute_reply.started":"2021-07-14T07:17:06.473952Z","shell.execute_reply":"2021-07-14T07:17:06.484606Z"},"trusted":true},"execution_count":44,"outputs":[]},{"cell_type":"code","source":"data_test","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:06.487381Z","iopub.execute_input":"2021-07-14T07:17:06.488055Z","iopub.status.idle":"2021-07-14T07:17:06.518437Z","shell.execute_reply.started":"2021-07-14T07:17:06.488010Z","shell.execute_reply":"2021-07-14T07:17:06.517526Z"},"trusted":true},"execution_count":45,"outputs":[{"execution_count":45,"output_type":"execute_result","data":{"text/plain":" Place ProductId ProductName UmId UmName \\\n1826622 National Average 52 Rice - Retail 5 KG \n1826623 National Average 52 Rice - Retail 5 KG \n1826624 National Average 52 Rice - Retail 5 KG \n1826625 National Average 52 Rice - Retail 5 KG \n1826626 National Average 52 Rice - Retail 5 KG \n... ... ... ... ... ... \n1836858 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836859 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836860 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836861 Izmir 502 Cocoa (powder) - Retail 5 KG \n1836862 Izmir 502 Cocoa (powder) - Retail 5 KG \n\n Month Year Price \n1826622 1 2021 11.9249 \n1826623 2 2021 12.8107 \n1826624 3 2021 12.8107 \n1826625 4 2021 12.8719 \n1826626 5 2021 12.9061 \n... ... ... ... \n1836858 1 2021 68.2002 \n1836859 2 2021 68.6250 \n1836860 3 2021 71.2310 \n1836861 4 2021 73.2609 \n1836862 5 2021 75.6859 \n\n[868 rows x 8 columns]","text/html":"\n\n
\n \n \n | \n Place | \n ProductId | \n ProductName | \n UmId | \n UmName | \n Month | \n Year | \n Price | \n
\n \n \n \n 1826622 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 1 | \n 2021 | \n 11.9249 | \n
\n \n 1826623 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 2 | \n 2021 | \n 12.8107 | \n
\n \n 1826624 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 3 | \n 2021 | \n 12.8107 | \n
\n \n 1826625 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 4 | \n 2021 | \n 12.8719 | \n
\n \n 1826626 | \n National Average | \n 52 | \n Rice - Retail | \n 5 | \n KG | \n 5 | \n 2021 | \n 12.9061 | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 1836858 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 1 | \n 2021 | \n 68.2002 | \n
\n \n 1836859 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 2 | \n 2021 | \n 68.6250 | \n
\n \n 1836860 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 3 | \n 2021 | \n 71.2310 | \n
\n \n 1836861 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 4 | \n 2021 | \n 73.2609 | \n
\n \n 1836862 | \n Izmir | \n 502 | \n Cocoa (powder) - Retail | \n 5 | \n KG | \n 5 | \n 2021 | \n 75.6859 | \n
\n \n
\n
868 rows × 8 columns
\n
"},"metadata":{}}]},{"cell_type":"code","source":"data_train.to_csv('train.csv', index=False)","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:06.519652Z","iopub.execute_input":"2021-07-14T07:17:06.519940Z","iopub.status.idle":"2021-07-14T07:17:06.566357Z","shell.execute_reply.started":"2021-07-14T07:17:06.519911Z","shell.execute_reply":"2021-07-14T07:17:06.565362Z"},"trusted":true},"execution_count":46,"outputs":[]},{"cell_type":"code","source":"data_test.to_csv('test.csv', index=False)","metadata":{"execution":{"iopub.status.busy":"2021-07-14T07:17:06.567542Z","iopub.execute_input":"2021-07-14T07:17:06.567824Z","iopub.status.idle":"2021-07-14T07:17:06.577608Z","shell.execute_reply.started":"2021-07-14T07:17:06.567797Z","shell.execute_reply":"2021-07-14T07:17:06.576694Z"},"trusted":true},"execution_count":47,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}
--------------------------------------------------------------------------------