├── .gitignore ├── README.md ├── data-scraping ├── .gitignore ├── load.py ├── requirements.txt ├── scraping_main.py └── transform.py └── testing-exercise ├── .gitignore ├── calculator ├── __init__.py └── arithmetic.py ├── main.py └── tests ├── test_arithmetic.py └── test_main.py /.gitignore: -------------------------------------------------------------------------------- 1 | .env -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dbs2025-dataprocessing 2 | This repository is designed for hands-on exercises at the Coding Camp powered by DBS 2025, specifically for the 6th ILT session on Data Processing. 3 | -------------------------------------------------------------------------------- /data-scraping/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | product.csv -------------------------------------------------------------------------------- /data-scraping/load.py: -------------------------------------------------------------------------------- 1 | def export_to_csv(df, name): 2 | return df.to_csv(f"{name}.csv") -------------------------------------------------------------------------------- /data-scraping/requirements.txt: -------------------------------------------------------------------------------- 1 | sqlalchemy~=2.0 2 | psycopg2-binary~=2.9 3 | pandas~=2.2 4 | requests~=2.32 5 | beautifulsoup4~=4.12 -------------------------------------------------------------------------------- /data-scraping/scraping_main.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pandas as pd 3 | import requests 4 | 5 | from bs4 import BeautifulSoup 6 | from transform import transform_data, transform_to_DataFrame # Mengimpor fungsi dari modul transform 7 | from load import export_to_csv # Mengimpor fungsi dari modul load 8 | 9 | HEADERS = { 10 | "User-Agent": ( 11 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " 12 | "(KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36" 13 | ) 14 | } 15 | 16 | def fetching_content(url): 17 | """Mengambil konten HTML dari URL yang diberikan.""" 18 | session = requests.Session() 19 | response = session.get(url, headers=HEADERS) 20 | try: 21 | response.raise_for_status() 22 | return response.content 23 | except requests.exceptions.RequestException as e: 24 | print(f"Terjadi kesalahan ketika melakukan requests terhadap {url}: {e}") 25 | return None 26 | 27 | 28 | def extract_book_data(article): 29 | """Mengambil data buku berupa judul, harga, ketersediaan, dan rating dari article (element html).""" 30 | book_title = article.find('h3').text 31 | product_element = article.find('div', class_='product_price') 32 | price = product_element.find('p', class_='price_color').text 33 | availability_element = product_element.find('p', class_='instock availability') 34 | available = "Available" if availability_element else "Not Available" 35 | 36 | rating_element = article.find('p', class_='star-rating') 37 | rating = rating_element['class'][1] if rating_element else "Rating not found" 38 | 39 | books = { 40 | "Title": book_title, 41 | "Price": price, 42 | "Availability": available, 43 | "Rating": rating 44 | } 45 | 46 | return books 47 | 48 | 49 | def scrape_book(base_url, start_page=1, delay=2): 50 | """Fungsi utama untuk mengambil keseluruhan data, mulai dari requests hingga menyimpannya dalam variabel data.""" 51 | data = [] 52 | page_number = start_page 53 | 54 | while True: 55 | url = base_url.format(page_number) 56 | print(f"Scraping halaman: {url}") 57 | 58 | content = fetching_content(url) 59 | if content: 60 | soup = BeautifulSoup(content, "html.parser") 61 | articles_element = soup.find_all('article', class_='product_pod') 62 | for article in articles_element: 63 | book = extract_book_data(article) 64 | data.append(book) 65 | 66 | next_button = soup.find('li', class_='next') 67 | if next_button: 68 | page_number += 1 69 | time.sleep(delay) # Delay sebelum halaman berikutnya 70 | else: 71 | break # Berhenti jika sudah tidak ada next button 72 | else: 73 | break # Berhenti jika ada kesalahan 74 | 75 | return data 76 | 77 | def main(): 78 | """Fungsi utama untuk keseluruhan proses scraping hingga menyimpannya.""" 79 | BASE_URL = 'https://books.toscrape.com/catalogue/page-{}.html' 80 | all_books_data = scrape_book(BASE_URL) 81 | if all_books_data: 82 | df = transform_to_DataFrame(all_books_data) # Mengubah variabel all_books_data menjadi df. 83 | df = transform_data(df, 20000) # Mentransformasikan data 84 | export_to_csv(df, "product") 85 | else: 86 | print("Tidak ada data yang ditemukan.") 87 | 88 | 89 | if __name__ == '__main__': 90 | main() -------------------------------------------------------------------------------- /data-scraping/transform.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def transform_to_DataFrame(data): 4 | """Mengubah data menjadi DataFrame.""" 5 | df = pd.DataFrame(data) 6 | return df 7 | 8 | def transform_data(data, exchange_rate): 9 | """Menggabungkan semua transformasi data menjadi satu fungsi.""" 10 | # Transformasi Price 11 | data['Price_in_pounds'] = data['Price'].replace('£', '', regex=True).astype(float) 12 | 13 | # Transformasi Rating 14 | rating_mapping = { 15 | 'One': 1, 16 | 'Two': 2, 17 | 'Three': 3, 18 | 'Four': 4, 19 | 'Five': 5 20 | } 21 | data['Rating'] = data['Rating'].replace(rating_mapping) 22 | 23 | # Transformasi Exchange Rate 24 | data['Price_in_rupiah'] = (data['Price_in_pounds'] * exchange_rate).astype(int) 25 | 26 | # Menghapus kolom redundan 27 | data = data.drop(columns=['Price']) 28 | 29 | # Transformasi Tipe Data 30 | data['Title'] = data['Title'].astype('string') 31 | data['Availability'] = data['Availability'].astype('string') 32 | 33 | return data 34 | -------------------------------------------------------------------------------- /testing-exercise/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .venv 3 | htmlcov 4 | .coverage 5 | testing-exercise/calculator/__pycache__ -------------------------------------------------------------------------------- /testing-exercise/calculator/__init__.py: -------------------------------------------------------------------------------- 1 | from .arithmetic import * -------------------------------------------------------------------------------- /testing-exercise/calculator/arithmetic.py: -------------------------------------------------------------------------------- 1 | def get_numbers(): 2 | numbers = input("MASUKAN ANGKA => Masukkan angka dengan spasi sebagai pemisahnya (gunakan titik '.' jika ingin " 3 | "menggunakan desimal): ") 4 | 5 | # Memisahkan dan mengonversi angka menjadi tipe data float. 6 | try: 7 | number_list = [float(numbers) for numbers in numbers.split()] 8 | except ValueError: 9 | raise ValueError("Masukkan angka dengan spasi sebagai pemisah dan gunakan '.' ketika menggunakan desimal.") 10 | 11 | # Menangani input kurang dari 1. 12 | if len(number_list) <= 1: 13 | raise ValueError("Harap masukkan Angka lebih dari satu!") 14 | 15 | return number_list 16 | 17 | 18 | def addition(numbers): 19 | result = numbers[0] 20 | for number in numbers[1:]: 21 | result += number 22 | return round(result, 2) 23 | 24 | 25 | def subtraction(numbers): 26 | result = numbers[0] 27 | for number in numbers[1:]: 28 | result -= number 29 | return round(result, 2) 30 | 31 | 32 | def multiplication(numbers): 33 | result = numbers[0] 34 | for number in numbers[1:]: 35 | result *= number 36 | return round(result, 2) 37 | 38 | 39 | def division(numbers): 40 | result = numbers[0] 41 | try: 42 | for number in numbers[1:]: 43 | result /= number 44 | except ZeroDivisionError: 45 | raise ZeroDivisionError("Anda tidak bisa membagi bilangan dengan angka 0!") 46 | 47 | return round(result, 2) 48 | -------------------------------------------------------------------------------- /testing-exercise/main.py: -------------------------------------------------------------------------------- 1 | from calculator import get_numbers, addition, subtraction, multiplication, division 2 | import sys 3 | 4 | 5 | def main(): 6 | while True: 7 | operations = { 8 | "1": ("Penjumlahan", addition), 9 | "2": ("Pengurangan", subtraction), 10 | "3": ("Perkalian", multiplication), 11 | "4": ("Pembagian", division) 12 | } 13 | print("==========================") 14 | print("Aplikasi Kalkulator") 15 | print("==========================") 16 | print("Pilih operasi aritmetika!") 17 | for key, (name, _) in operations.items(): 18 | print(f"{key}. {name}") 19 | print("==========================") 20 | 21 | choice = input("PILIH OPERASI => Masukkan Pilihan Anda: ") 22 | if choice not in operations.keys(): 23 | sys.exit("Keluar Program!") 24 | 25 | try: 26 | numbers = get_numbers() 27 | 28 | operation_name, function_name = operations[choice] 29 | result = function_name(numbers) 30 | print(f"Hasil: {result}") 31 | break 32 | except ValueError as ve: 33 | print(f"Terjadi kesalahan ValueError: {ve}") 34 | break 35 | except ZeroDivisionError as ze: 36 | print(f"Terjadi kesalahan ZeroDivisionError: {ze}") 37 | break 38 | except Exception as e: 39 | print(f"Terjadi kesalahan {e}") 40 | break 41 | 42 | 43 | if __name__ == "__main__": 44 | main() 45 | -------------------------------------------------------------------------------- /testing-exercise/tests/test_arithmetic.py: -------------------------------------------------------------------------------- 1 | from calculator.arithmetic import get_numbers, addition, subtraction, multiplication, division 2 | from unittest import TestCase 3 | from unittest.mock import patch 4 | 5 | 6 | class TestArithmetic(TestCase): 7 | def setUp(self): 8 | self.positive_numbers = [6, 12] 9 | self.negative_numbers = [-6, -12] 10 | self.mix_numbers = [-6, 12] 11 | 12 | @patch('builtins.input', return_value='6 12 3') 13 | def test_get_numbers(self, mock_input): 14 | result = get_numbers() 15 | expected = [6, 12, 3] 16 | self.assertEqual(result, expected) 17 | 18 | # Menguji input bukan spasi sebagai pemisah 19 | @patch('builtins.input', return_value='6,12,3') 20 | def test_get_numbers_with_comma_as_separator(self, mock_input): 21 | with self.assertRaises(ValueError) as context: 22 | get_numbers() 23 | self.assertEqual(str(context.exception), "Masukkan angka dengan spasi sebagai pemisah dan gunakan '.' " 24 | "ketika menggunakan desimal.") 25 | 26 | @patch('builtins.input', return_value='6') 27 | def test_get_numbers_with_one_number(self, mock_input): 28 | with self.assertRaises(ValueError) as context: 29 | get_numbers() 30 | self.assertEqual(str(context.exception),"Harap masukkan Angka lebih dari satu!") 31 | 32 | def test_addition_positive_number(self): 33 | result = addition(self.positive_numbers) 34 | expected = 18 35 | self.assertEqual(result, expected) 36 | 37 | def test_addition_negative_number(self): 38 | result = addition(self.negative_numbers) 39 | expected = -18 40 | self.assertEqual(result, expected) 41 | 42 | def test_addition_mix_number(self): 43 | result = addition(self.mix_numbers) 44 | expected = 6 45 | self.assertEqual(result, expected) 46 | 47 | def test_subtraction_positive_number(self): 48 | result = subtraction(self.positive_numbers) 49 | expected = -6 50 | self.assertEqual(result, expected) 51 | 52 | def test_subtraction_negative_number(self): 53 | result = subtraction(self.negative_numbers) 54 | expected = 6 55 | self.assertEqual(result, expected) 56 | 57 | def test_subtraction_mix_number(self): 58 | result = subtraction(self.mix_numbers) 59 | expected = -18 60 | self.assertEqual(result, expected) 61 | 62 | def test_multiplication_positive_number(self): 63 | result = multiplication(self.positive_numbers) 64 | expected = 72 65 | self.assertEqual(result, expected) 66 | 67 | def test_multiplication_negative_number(self): 68 | result = multiplication(self.negative_numbers) 69 | expected = 72 70 | self.assertEqual(result, expected) 71 | 72 | def test_multiplication_mix_number(self): 73 | result = multiplication(self.mix_numbers) 74 | expected = -72 75 | self.assertEqual(result, expected) 76 | 77 | def test_division_positive_number(self): 78 | result = division(self.positive_numbers) 79 | expected = 0.5 80 | self.assertEqual(result, expected) 81 | 82 | def test_division_negative_number(self): 83 | result = division(self.negative_numbers) 84 | expected = 0.5 85 | self.assertEqual(result, expected) 86 | 87 | def test_division_mix_number(self): 88 | result = division(self.mix_numbers) 89 | expected = -0.5 90 | self.assertEqual(result, expected) 91 | 92 | def test_division_divide_by_zero(self): 93 | with self.assertRaises(ZeroDivisionError) as context: 94 | numbers = [12, 0] 95 | division(numbers) 96 | self.assertEqual(str(context.exception), "Anda tidak bisa membagi bilangan dengan angka 0!") 97 | -------------------------------------------------------------------------------- /testing-exercise/tests/test_main.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase, mock 2 | from unittest.mock import patch 3 | from main import main 4 | 5 | 6 | class TestIntegrations(TestCase): 7 | @patch('builtins.input', return_value='q') 8 | def test_quit_program(self, mock_input_ops): 9 | with self.assertRaises(SystemExit): 10 | main() 11 | 12 | @patch('main.get_numbers', return_value=[2.0,2.1]) 13 | @patch('main.input', return_value='1') 14 | def test_addition(self, mock_input_ops, mock_value): 15 | with mock.patch('builtins.print') as mock_print: 16 | main() 17 | mock_print.assert_called_with('Hasil: 4.1') 18 | 19 | @patch('builtins.input', return_value='2,2') 20 | @patch('main.input', return_value='1') 21 | def test_value_error(self, mock_input_ops, mock_value): 22 | with mock.patch('builtins.print') as mock_print: 23 | main() 24 | mock_print.assert_called_with("Terjadi kesalahan ValueError: Masukkan angka dengan spasi sebagai pemisah " 25 | "dan gunakan '.' ketika menggunakan desimal.") 26 | 27 | @patch('builtins.input', return_value='2 0') 28 | @patch('main.input', return_value='4') 29 | def test_zero_division_error(self, mock_input_ops, mock_value): 30 | with mock.patch('builtins.print') as mock_print: 31 | main() 32 | mock_print.assert_called_with("Terjadi kesalahan ZeroDivisionError: " 33 | "Anda tidak bisa membagi bilangan dengan angka 0!") 34 | 35 | @patch('main.get_numbers', side_effect=TypeError("Invalid Data Type!")) 36 | @patch('main.input', return_value='4') 37 | def test_general_exception(self, mock_input_ops, mock_value): 38 | with mock.patch('builtins.print') as mock_print: 39 | main() 40 | mock_print.assert_called_with("Terjadi kesalahan Invalid Data Type!") 41 | --------------------------------------------------------------------------------