├── __init__.py ├── tests ├── __init__.py ├── dsn_test.txt ├── test_db_access.py ├── base_test_case.py ├── test_mexico.py ├── test_database_string_builder.py ├── test_review_reviewers.py ├── test_location.py ├── test_integration.py └── test_db_orm.py ├── locations ├── __init__.py ├── defaultlocation.py └── mexico.py ├── .gitignore ├── env_sample ├── env_docker ├── requirements.txt ├── dbmate.sh ├── db └── migrations │ ├── 20210822153820_add_word_count_column_reviews_table.sql │ ├── 20210816085255_create_locations_table.sql │ └── 20210810140754_create_hotels_table.sql ├── errors.py ├── review_orm.py ├── hotel_orm.py ├── reviewer_orm.py ├── location.py ├── test_locations.py ├── review_storage.py └── db.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /locations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .idea 3 | venv 4 | db/schema.sql 5 | **.**.pyc 6 | -------------------------------------------------------------------------------- /env_sample: -------------------------------------------------------------------------------- 1 | DATABASE_URL="postgres://postgres@0.0.0.0:5432/somedatabase?sslmode=disable" 2 | -------------------------------------------------------------------------------- /tests/dsn_test.txt: -------------------------------------------------------------------------------- 1 | postgres://postgres@0.0.0.0:5432/upwork_wes_hotel_reviews_test?sslmode=disable -------------------------------------------------------------------------------- /env_docker: -------------------------------------------------------------------------------- 1 | DATABASE_URL="postgres://postgres:postgres@db:5432/upwork_wes_hotel_reviews?sslmode=disable" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | configparser 2 | psycopg2-binary 3 | click 4 | python-dotenv 5 | nltk 6 | pycountry -------------------------------------------------------------------------------- /tests/test_db_access.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | if __name__ == '__main__': 4 | unittest.main() 5 | -------------------------------------------------------------------------------- /dbmate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | 3 | # run dbmate with default database configuration 4 | dbmate $1 5 | # run dbmate with test database configuration 6 | URL=`cat tests/dsn_test.txt` 7 | dbmate --url=$URL $1 8 | -------------------------------------------------------------------------------- /db/migrations/20210822153820_add_word_count_column_reviews_table.sql: -------------------------------------------------------------------------------- 1 | -- migrate:up 2 | 3 | alter table reviews add column word_count integer default 0; 4 | 5 | 6 | -- migrate:down 7 | 8 | alter table reviews drop column word_count; 9 | -------------------------------------------------------------------------------- /tests/base_test_case.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from review_store.db import DB 4 | 5 | 6 | class BaseTestCase(TestCase): 7 | 8 | def init(self) -> None: 9 | with open('tests/dsn_test.txt', 'r') as fp: 10 | dsn = fp.readline() 11 | self.db = DB(dsn) 12 | -------------------------------------------------------------------------------- /errors.py: -------------------------------------------------------------------------------- 1 | class DuplicateRowError(Exception): 2 | """duplicate row""" 3 | 4 | def __init__(self, data): 5 | super(DuplicateRowError, self).__init__() 6 | self.data = data 7 | 8 | 9 | class NotFoundException(Exception): 10 | 11 | def __init__(self, message): 12 | self.message = message 13 | -------------------------------------------------------------------------------- /tests/test_mexico.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from review_store.locations.mexico import get_state_from_zip 4 | 5 | 6 | class Tests(unittest.TestCase): 7 | 8 | def test_get_state_from_zip_all(self): 9 | self.assertEqual( 10 | 'CDMX', 11 | get_state_from_zip('001223') 12 | ) 13 | 14 | def test_get_state_from_zip_single(self): 15 | self.assertEqual( 16 | 'Campeche', 17 | get_state_from_zip('241223') 18 | ) 19 | 20 | 21 | if __name__ == '__main__': 22 | unittest.main() 23 | -------------------------------------------------------------------------------- /review_orm.py: -------------------------------------------------------------------------------- 1 | from db import DbModel 2 | 3 | 4 | class ReviewOrm(DbModel): 5 | table_name = 'reviews' 6 | fields = ['title', 'rating', 'full_review', 'review_date', 'word_count', 'hotel_id', 'reviewer_id'] 7 | expr = { 8 | "id": "nextval('reviews_seq')", 9 | "date_created": "now()" 10 | } 11 | 12 | def __init__(self, db, data=None, by_fields=None): 13 | super().__init__(db, { 14 | 'table': ReviewOrm.table_name, 15 | 'fields': ReviewOrm.fields, 16 | 'expr': ReviewOrm.expr, 17 | "data": data, 18 | "by_fields": by_fields 19 | }) 20 | 21 | def validate(self): 22 | return True 23 | -------------------------------------------------------------------------------- /hotel_orm.py: -------------------------------------------------------------------------------- 1 | from db import DbModel, DBOrm 2 | 3 | 4 | class HotelOrm(DbModel): 5 | table_name = 'hotels' 6 | fields = ['hotel_name', 'address', 'location_id'] 7 | expr = { 8 | "id": "nextval('hotels_seq')", 9 | "date_created": "now()" 10 | } 11 | 12 | def __init__(self, db, data=None, by_fields=None): 13 | super().__init__(db, { 14 | "table": HotelOrm.table_name, 15 | "fields": HotelOrm.fields, 16 | "expr": HotelOrm.expr, 17 | "data": data, 18 | "by_fields": by_fields 19 | }) 20 | 21 | def validate(self): 22 | if not self.data['location_id']: 23 | return False 24 | return True 25 | -------------------------------------------------------------------------------- /reviewer_orm.py: -------------------------------------------------------------------------------- 1 | from db import DbModel 2 | 3 | 4 | class ReviewerOrm(DbModel): 5 | table_name = 'reviewers' 6 | fields = ['name', 'address', 'location_id'] 7 | expr = { 8 | "id": "nextval('reviews_seq')", 9 | "date_created": "now()" 10 | } 11 | 12 | def __init__(self, db, data=None, by_fields=None): 13 | super().__init__(db, { 14 | 'table': ReviewerOrm.table_name, 15 | 'fields': ReviewerOrm.fields, 16 | 'expr': ReviewerOrm.expr, 17 | "data": data, 18 | "by_fields": by_fields 19 | }) 20 | 21 | def validate(self): 22 | return True 23 | 24 | 25 | def new_reviewer_from_data(db, data): 26 | return ReviewerOrm(db, data['Reviewer'], data['Reviewer Address']) 27 | -------------------------------------------------------------------------------- /tests/test_database_string_builder.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from review_store.db import DatabaseStringBuilder 4 | 5 | 6 | class MyTestCase(unittest.TestCase): 7 | def test_default(self): 8 | db = DatabaseStringBuilder('pg', 'pg_user') 9 | self.assertEqual(db.get_connection_string(), 'dbname=pg user=pg_user') 10 | 11 | def test_with_password(self): 12 | db = DatabaseStringBuilder('pg', 'pg_user') 13 | db.set_key('password', '12345') 14 | self.assertEqual(db.get_connection_string(), 'dbname=pg user=pg_user password=12345') 15 | 16 | def test_with_invalid_ky(self): 17 | db = DatabaseStringBuilder('pg', 'pg_user') 18 | db.set_key('pg', '12345') 19 | self.assertEqual(db.get_connection_string(), 'dbname=pg user=pg_user') 20 | 21 | 22 | if __name__ == '__main__': 23 | unittest.main() 24 | -------------------------------------------------------------------------------- /db/migrations/20210816085255_create_locations_table.sql: -------------------------------------------------------------------------------- 1 | -- migrate:up 2 | 3 | create table locations 4 | ( 5 | id bigint not null 6 | constraint locations_pkey primary key, 7 | date_created timestamp not null, 8 | date_updated timestamp, 9 | address varchar(255), 10 | city varchar(255), 11 | state varchar(255), 12 | country varchar(255) 13 | ); 14 | 15 | create sequence locations_seq; 16 | ALTER TABLE hotels 17 | add location_id bigint, 18 | ADD CONSTRAINT fk_hotel_location 19 | FOREIGN KEY (location_id) REFERENCES locations (id); 20 | ALTER TABLE reviewers 21 | add location_id bigint, 22 | ADD CONSTRAINT fk_reviewer_location 23 | FOREIGN KEY (location_id) REFERENCES locations (id); 24 | 25 | 26 | -- migrate:down 27 | 28 | 29 | drop table locations cascade; 30 | drop sequence locations_seq; 31 | alter table hotels drop column location_id; 32 | alter table reviewers drop column location_id; 33 | -------------------------------------------------------------------------------- /location.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | from db import DbModel 4 | 5 | 6 | def class_for_name(module_name, class_name): 7 | # load the module, will raise ImportError if module cannot be loaded 8 | m = importlib.import_module(module_name) 9 | # get the class, will raise AttributeError if class cannot be found 10 | c = getattr(m, class_name) 11 | return c 12 | 13 | 14 | class Location(DbModel): 15 | 16 | @staticmethod 17 | def new_location(db, address, db_model='Mexico'): 18 | """ 19 | 20 | :param db_model: which model to use for storing location data, must be used 21 | :param db: database 22 | :param address: address of the hotel 23 | :return: Location object 24 | """ 25 | location_cls = class_for_name(f'locations.{db_model.lower()}', db_model) 26 | return location_cls(db, address) 27 | 28 | @staticmethod 29 | def load(db, location_id, db_model): 30 | location_cls = class_for_name(f'locations.{db_model.lower()}', db_model) 31 | return location_cls.load(db, location_id) 32 | -------------------------------------------------------------------------------- /test_locations.py: -------------------------------------------------------------------------------- 1 | from flashgeotext.geotext import GeoText 2 | 3 | TEST_LOCATIONS = [ 4 | "Irvine", 5 | "Prague, Czech Republic", 6 | "San Francisco, CA", 7 | "Seattle, Washington ", 8 | "NWT", 9 | "Frisco, Texas", 10 | "Boulder, Colorado", 11 | "Los Angeles, California", 12 | "Moose Jaw, Canada", 13 | "Fort Worth, Texas, United States", 14 | "Show Low, Arizona", 15 | "Lacombe, Canada", 16 | "Smyrna, Georgia", 17 | "Calle Paseo de La Marina 4732 Col. El Medano, Cabo San Lucas 23450 Mexico", 18 | "00000 Baja California Sur (Cabo San Lucas Camino del Cerro S/N, Cabo San Lucas Mexico", 19 | "Fraccionamiento Diamante, Cabo San Lucas 23473 Mexico", 20 | "Blvd. Marina Lotes 9 y 10 Colonia Centro, Cabo San Lucas 23450 Mexico", 21 | ] 22 | 23 | 24 | def parse_locations(): 25 | geo_text = GeoText() 26 | input_text = '''Shanghai. The Chinese Ministry of Finance in Shanghai said that China plans 27 | to cut tariffs on $75 billion worth of goods that the country 28 | imports from the US. Washington welcomes the decision.''' 29 | 30 | for l in TEST_LOCATIONS: 31 | r = geo_text.extract(input_text=l) 32 | print(r) 33 | 34 | 35 | if __name__ == '__main__': 36 | parse_locations() 37 | -------------------------------------------------------------------------------- /db/migrations/20210810140754_create_hotels_table.sql: -------------------------------------------------------------------------------- 1 | -- migrate:up 2 | create table organizations 3 | ( 4 | id bigint not null constraint organizations_pkey primary key, 5 | date_created timestamp not null, 6 | date_updated timestamp, 7 | name varchar(255) 8 | ); 9 | 10 | create table hotels 11 | ( 12 | id bigint not null constraint hotels_pkey primary key, 13 | date_created timestamp not null, 14 | date_updated timestamp, 15 | hotel_name varchar(255) unique, 16 | address varchar(255), 17 | organization_id bigint references organizations 18 | ); 19 | 20 | create table reviewers 21 | ( 22 | id bigint not null constraint reviewers_pkey primary key, 23 | date_created timestamp not null, 24 | date_updated timestamp, 25 | name varchar(255), 26 | address varchar(255) 27 | ); 28 | 29 | create table reviews 30 | ( 31 | id bigint not null constraint reviews_pkey primary key, 32 | date_created timestamp not null, 33 | date_updated timestamp, 34 | title varchar(255), 35 | full_review text, 36 | review_date varchar(10), 37 | rating integer, 38 | hotel_id bigint references hotels, 39 | reviewer_id bigint references reviewers 40 | ); 41 | 42 | create sequence organizations_seq; 43 | create sequence hotels_seq; 44 | create sequence reviewers_seq; 45 | create sequence reviews_seq; 46 | 47 | -- migrate:down 48 | drop table organizations cascade ; 49 | drop table hotels cascade ; 50 | drop table reviews cascade ; 51 | drop table reviewers cascade ; 52 | drop sequence reviewers_seq; 53 | drop sequence reviews_seq; 54 | drop sequence hotels_seq; 55 | drop sequence organizations_seq; 56 | -------------------------------------------------------------------------------- /tests/test_review_reviewers.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from review_store.hotel_orm import HotelOrm 4 | from review_store.location import Location 5 | from review_store.review_orm import ReviewOrm 6 | from review_store.reviewer_orm import ReviewerOrm 7 | from review_store.tests.base_test_case import BaseTestCase 8 | 9 | 10 | class MyTestCase(BaseTestCase): 11 | def setUp(self) -> None: 12 | self.init() 13 | self.location = Location.new_location(self.db, "Hatirjheel, Dhaka") 14 | self.location.save() 15 | self.reviewer = ReviewerOrm(self.db, data={ "name": "Davis S", "address": "Niketon, Dhaka", 16 | "location_id": self.location.id}) 17 | self.hotel = HotelOrm(self.db, data={"hotel_name": "Sheraton", "address": "Hatirjheel, Dhaka", 18 | "location_id": self.location.id}) 19 | self.reviewer.save() 20 | self.hotel.save() 21 | 22 | def test_review(self): 23 | data = {'Reviewer': 'Devan S', 'Date': 'Aug-21', 'Reviewer Address': 'Riverside, California', 24 | 'Review Title': 'First Time at 1 Homes', 'Review Star': '5', 25 | 'Full Review': 'After Cabo trips for 30 years, our first time at 1 Homes was one of the best. ' 26 | 'Impeccable service and such kind and accomodating staff, especially Marcos and ' 27 | 'Johel. The residences, pool, and views are beautiful as well. We will be back!'} 28 | review = ReviewOrm(self.db, { 29 | 'title': data['Review Title'], 30 | 'rating': data['Review Star'], 31 | 'full_review': data['Full Review'], 32 | 'review_date': data['Date'], 33 | 'word_count': len(data['Full Review'].split()), 34 | 'reviewer_id': self.reviewer.id, 35 | 'hotel_id': self.hotel.id 36 | }) 37 | review.save() 38 | self.assertEqual(review.get('word_count'), 42) 39 | 40 | 41 | if __name__ == '__main__': 42 | unittest.main() 43 | -------------------------------------------------------------------------------- /tests/test_location.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from review_store.location import Location 4 | from review_store.tests.base_test_case import BaseTestCase 5 | 6 | 7 | class TestingLocation(BaseTestCase): 8 | 9 | def setUp(self) -> None: 10 | self.init() 11 | self.db.execute_no_result('truncate locations cascade') 12 | self.location = Location.new_location(self.db, "Kind of 23493 Mexico", 'Mexico') 13 | 14 | def test_no_duplicate_location(self): 15 | location = Location.new_location(self.db, "Leona Vicario, 11, Cabo San Lucas 23410 Mexico", 'Mexico') 16 | location.save() 17 | location2 = Location.new_location(self.db, "Leona Vicario, 11, Cabo San Lucas 23410 Mexico", 'Mexico') 18 | self.assertEqual(location2.id, location.id) 19 | 20 | def test_reg_exp(self): 21 | location = Location.new_location( 22 | self.db, 23 | "Leona Vicario, 11, Cabo San Lucas 23410 Mexico", 24 | 'Mexico' 25 | ) 26 | self.assertEqual('Cabo San Lucas', location.city) 27 | self.assertEqual('Mexico', location.country) 28 | self.assertEqual('Baja California Sur', location.state) 29 | 30 | def test_hotel_location(self): 31 | location = Location.new_location(self.db, "Leona Vicario, 11, Cabo San Lucas 23410 Mexico", 'Mexico') 32 | location.save() 33 | r = self.db.query_get("select city, country from locations where id = %s", (location.id,)) 34 | self.assertEqual("Cabo San Lucas", r[0][0]) 35 | self.assertEqual("Mexico", r[0][1]) 36 | 37 | def test_hotel_default_location_with_all(self): 38 | location = Location.new_location(self.db, "San Diego, California, United States", 'DefaultLocation') 39 | self.assertEqual('San Diego', location.city) 40 | self.assertEqual('California', location.state) 41 | self.assertEqual('United States', location.country) 42 | 43 | def test_hotel_default_location(self): 44 | location = Location.new_location(self.db, "Bluffton, South Carolina", 'DefaultLocation') 45 | self.assertEqual('Bluffton', location.city) 46 | self.assertEqual('South Carolina', location.country) 47 | -------------------------------------------------------------------------------- /locations/defaultlocation.py: -------------------------------------------------------------------------------- 1 | try: 2 | from review_store.db import DbModel 3 | except ImportError: 4 | from db import DbModel 5 | 6 | 7 | class DefaultLocation(DbModel): 8 | """ 9 | Default location handler, consist of city, and country. It could also include state 10 | """ 11 | 12 | def __init__(self, db, data, oid=None): 13 | super().__init__(db) 14 | if oid: 15 | self.id = oid 16 | self.city, self.country, self.state, self.address = data[0], data[1], data[2], data[3] 17 | else: 18 | self.address = data 19 | if data.strip() == '': 20 | self.state = None 21 | self.city = None 22 | self.country = None 23 | return 24 | split = [s.strip() for s in data.split(',')] 25 | # To derive "city", "territory", "country" from "Reviewers Address", 26 | # we can split the string by comma. If the resulting array has a length of 2. 27 | # Then city = [0] and country = [1]. If the resulting array is 3, 28 | # then city = [0], territory = [1], territory = [2]. 29 | if len(split) == 3: 30 | self.city, self.state, self.country = split 31 | elif len(split) == 2: 32 | self.city, self.country = split 33 | self.state = None 34 | else: 35 | self.city = data[0] 36 | self.country = None 37 | self.state = None 38 | 39 | def save(self): 40 | q = DefaultLocation.build_query() 41 | self.id = self.db.execute(q, (self.city, self.country, self.state, self.address)) 42 | if self.id: 43 | return True 44 | return False 45 | 46 | @staticmethod 47 | def load(db, oid): 48 | city, state, country, address = db.load(f"select city, state, country, address from locations where id = {oid}") 49 | return DefaultLocation(db, [city, country, state, address], oid) 50 | 51 | @staticmethod 52 | def build_query(): 53 | return """insert into locations (id, date_created, city, country, state, address) 54 | values (nextval('locations_seq'), now(), %s, %s, %s, %s) returning id""" 55 | -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from review_store.location import Location 4 | from review_store.hotel_orm import HotelOrm 5 | from review_store.review_orm import ReviewOrm 6 | from review_store.reviewer_orm import ReviewerOrm 7 | from review_store.review_storage import parse_row 8 | from review_store.tests.base_test_case import BaseTestCase 9 | 10 | 11 | class TestApp(BaseTestCase): 12 | 13 | def setUp(self) -> None: 14 | self.init() 15 | 16 | def test_parse_row_and_create_objects(self): 17 | test_row = {'Hotel': '1 Homes Preview Cabo', 18 | 'Address': 'Calle Paseo de La Marina 4732 Col. El Medano, Cabo San Lucas 23450 Mexico', 19 | 'Reviewer': 'Devan S', 'Date': 'Aug-21', 'Reviewer Address': 'Riverside, California', 20 | 'Review Title': 'First Time at 1 Homes', 'Review Star': '5', 21 | 'Full Review': 'After Cabo trips for 30 years, our first time at 1 Homes was one of the best. ' 22 | 'Impeccable service and such kind and accomodating staff, especially Marcos and ' 23 | 'Johel. The residences, pool, and views are beautiful as well. We will be back!'} 24 | hotel_id, location_id, reviewer_location_id, review_id, reviewer_id = parse_row(self.db, test_row) 25 | # check hotel data 26 | hotel = HotelOrm(self.db, by_fields={"id": hotel_id}) 27 | self.assertEqual(test_row['Hotel'], hotel.get('hotel_name')) 28 | # check review data 29 | review = ReviewOrm(self.db, by_fields={"id": review_id}) 30 | self.assertEqual(review.get('reviewer_id'), reviewer_id) 31 | self.assertEqual(review.get('hotel_id'), hotel_id) 32 | # check reviewer data 33 | reviewer = ReviewerOrm(self.db, by_fields={"id": review.get('reviewer_id')}) 34 | self.assertEqual(reviewer.get('id'), review.get('reviewer_id')) 35 | # check location data 36 | location = Location.load(self.db, location_id, 'Mexico') 37 | self.assertEqual('Cabo San Lucas', location.city) 38 | self.assertEqual('Mexico', location.country) 39 | self.assertEqual('Baja California Sur', location.state) 40 | 41 | 42 | if __name__ == '__main__': 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /tests/test_db_orm.py: -------------------------------------------------------------------------------- 1 | from review_store.db import build_select_query 2 | from review_store.hotel_orm import HotelOrm 3 | from review_store.location import Location 4 | from review_store.tests.base_test_case import BaseTestCase 5 | 6 | 7 | class TestDbOrm(BaseTestCase): 8 | 9 | def setUp(self) -> None: 10 | self.init() 11 | self.db.execute_no_result('truncate locations cascade') 12 | self.location = Location.new_location(self.db, "Kind of 23493 Mexico", 'Mexico') 13 | self.location.save() 14 | 15 | def test_select_query_build_multi_fields(self): 16 | q = build_select_query( 17 | 'hotels', 18 | ['hotel_name', 'address', 'location_id', 'id', 'date_created'], 19 | {'hotel_name': 'Test', 'address': "Somewhere"} 20 | ) 21 | self.assertEqual( 22 | q, 23 | """select hotel_name, address, location_id, id, date_created from hotels 24 | where hotel_name = %s and address = %s""" 25 | ) 26 | 27 | def test_select_query_build(self): 28 | q = build_select_query( 29 | 'hotels', 30 | ['hotel_name', 'address', 'location_id', 'id', 'date_created'], 31 | {'id': 3} 32 | ) 33 | self.assertEqual( 34 | q, 35 | """select hotel_name, address, location_id, id, date_created from hotels 36 | where id = %s""" 37 | ) 38 | 39 | def test_hotel_entry(self): 40 | h = HotelOrm(self.db, { 41 | "hotel_name": "Test", 42 | "address": "Somewhere", 43 | "location_id": self.location.id 44 | }) 45 | self.assertTrue(h.save()) 46 | r = self.db.query_get("select * from hotels where id = %s", (h.id,)) 47 | self.assertEqual(r[0][3], "Test") 48 | 49 | def test_hotel_load(self): 50 | h = HotelOrm(self.db, {"hotel_name": "Somewhere", "location_id": self.location.id}) 51 | self.assertTrue(h.save()) 52 | h1 = HotelOrm(self.db, by_fields={"hotel_name": "Somewhere"}) 53 | self.assertIsNotNone(h1) 54 | self.assertEqual(h1.id, h.id) 55 | 56 | def test_hotel_load_by_id(self): 57 | h = HotelOrm(self.db, {"hotel_name": "Somewhere", "location_id": self.location.id}) 58 | self.assertTrue(h.save()) 59 | h1 = HotelOrm(self.db, by_fields={"id": h.id}) 60 | self.assertIsNotNone(h1) 61 | self.assertEqual(h1.data['hotel_name'], h.data['hotel_name']) 62 | 63 | def test_loading_non_existent_hotel(self): 64 | self.assertRaises(Exception, HotelOrm, self.db, by_fields={'hotel_name': 'Test No Hotel'}) 65 | -------------------------------------------------------------------------------- /locations/mexico.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | try: 4 | from review_store.db import DbModel 5 | except ModuleNotFoundError: 6 | from db import DbModel 7 | 8 | STATE_DATA = { 9 | "00-16": "CDMX", 10 | "20": "Aguascalientes", 11 | "21-22": "Baja California", 12 | "23": "Baja California Sur", 13 | "24": "Campeche", 14 | "29-30": "Chiapas", 15 | "31-33": "Chihuahua", 16 | "25-27": "Coahuila", 17 | "28": "Colima", 18 | "34-35": "Durango", 19 | "36-38": "Guanajuato", 20 | "39-41": "Guerrero", 21 | "42-43": "Hidalgo", 22 | "44-49": "Jalisco", 23 | "50-57": "México", 24 | "58-61": "Michoacán 62 Morelos", 25 | "63": "Nayarit", 26 | "64-67": "Nuevo León", 27 | "68-71": "Oaxaca", 28 | "72-75": "Puebla", 29 | "76": "Querétaro", 30 | "77": "Quintana Roo", 31 | "78-79": "San Luis Potosí", 32 | "80-82": "Sinaloa", 33 | "83-85": "Sonora", 34 | "86": "Tabasco", 35 | "87-89": "Tamaulipas", 36 | "90": "Tlaxcala", 37 | "91-96": "Veracruz", 38 | "97": "Yucatán", 39 | "98–99": "Zacatecas" 40 | } 41 | 42 | 43 | def get_state_from_zip(postal_code): 44 | two_digits = int(postal_code[:2]) 45 | for ky in STATE_DATA: 46 | try: 47 | s, e = [int(s) for s in ky.split('-')] 48 | if s <= two_digits <= e: 49 | return STATE_DATA[ky] 50 | except ValueError: 51 | if int(ky) == two_digits: 52 | return STATE_DATA[ky] 53 | raise IndexError("No key found") 54 | 55 | 56 | class Mexico(DbModel): 57 | def __init__(self, db, data, oid=None): 58 | super().__init__(db) 59 | self.id = None 60 | if oid: 61 | self.city, self.country, self.state = data[0], data[1], data[2] 62 | self.id = oid 63 | else: 64 | self.address = data 65 | last_seg = data.split(',').pop().strip() 66 | split = re.split(r'(.*)(\d{5})(.*)', last_seg) 67 | failed = False 68 | try: 69 | self.city, self.country, postal = split[1].strip(), split[3].strip(), split[2].strip() 70 | self.state = get_state_from_zip(postal) 71 | except IndexError: 72 | self.city = None 73 | self.country = None 74 | self.state = None 75 | failed = True 76 | if not failed: 77 | r = db.load( 78 | """select id, city, state, country from locations 79 | where city=%s and state=%s and country=%s""", (self.city, self.state, self.country) 80 | ) 81 | if not r: 82 | return 83 | pid, city, state, country = r 84 | self.id = pid 85 | 86 | def save(self): 87 | q = Mexico.build_query() 88 | self.id = self.db.execute(q, (self.city, self.country, self.state, self.address)) 89 | if self.id: 90 | return True 91 | return False 92 | 93 | @staticmethod 94 | def load(db, oid): 95 | city, state, country = db.load("select city, state, country from locations where id = %s", (oid,)) 96 | return Mexico(db, [city, country, state], oid) 97 | 98 | @staticmethod 99 | def build_query(): 100 | return """insert into locations (id, date_created, city, country, state, address) 101 | values (nextval('locations_seq'), now(), %s, %s, %s, %s) returning id""" 102 | -------------------------------------------------------------------------------- /review_storage.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import hashlib 3 | import os 4 | 5 | import click 6 | import psycopg2 7 | 8 | from db import DB, DatabaseStringBuilder 9 | from location import Location 10 | from hotel_orm import HotelOrm 11 | from review_orm import ReviewOrm 12 | from reviewer_orm import ReviewerOrm 13 | from errors import NotFoundException 14 | 15 | 16 | def get_log_file_name(file_name): 17 | return hashlib.md5(file_name.encode()).hexdigest() 18 | 19 | 20 | def find_or_create_hotel(db, row, location): 21 | """ Look for the hotel using name and address, create if none exists""" 22 | try: 23 | hotel = HotelOrm(db, by_fields={ 24 | 'hotel_name': row['Hotel'], 25 | 'address': row['Address'], 26 | }) 27 | except NotFoundException: 28 | hotel = HotelOrm(db, data={ 29 | "hotel_name": row["Hotel"], 30 | "address": row['Address'], 31 | "location_id": location.id 32 | }) 33 | hotel.save() 34 | return hotel 35 | 36 | 37 | def parse_row(db, row): 38 | location = Location.new_location(db, row['Address'], 'Mexico') 39 | if not location.id: 40 | location.save() 41 | hotel = find_or_create_hotel(db, row, location) 42 | reviewer_location = Location.new_location(db, row['Reviewer Address'], 'DefaultLocation') 43 | # save reviewer location 44 | reviewer_location.save() 45 | # save reviewer 46 | reviewer = ReviewerOrm(db, data={ 47 | "name": row['Reviewer'], 48 | 'address': row['Address'], 49 | 'location_id': location.id 50 | }) 51 | reviewer.save() 52 | # set review properties 53 | review = ReviewOrm(db, data={ 54 | 'title': row['Review Title'], 55 | 'rating': row['Review Star'], 56 | 'full_review': row['Full Review'], 57 | 'review_date': row['Date'], 58 | 'word_count': len(row['Full Review'].split()), 59 | 'reviewer_id': reviewer.id, 60 | 'hotel_id': hotel.id 61 | }) 62 | review.save() 63 | return [hotel.id, location.id, reviewer_location.id, review.id, reviewer.id] 64 | 65 | 66 | @click.group() 67 | def cli(): 68 | pass 69 | 70 | 71 | @click.command() 72 | @click.option('--csv-file', help='path to csv file') 73 | @click.option('--db', help='Database name') 74 | @click.option('--db-user', help='Database user') 75 | @click.option('--db-pass', help='Database password', default=None) 76 | @click.option('--db-host', help='Database host', default=None) 77 | @click.option('--db-port', help='Database port', default=None) 78 | def parse_csv(**kwargs): 79 | """ 80 | parses the CSV file and stores in database 81 | 82 | """ 83 | csv_file = kwargs['csv_file'] 84 | log_file = f"log/{get_log_file_name(csv_file)}" 85 | error_log_file = f"log/errors_{get_log_file_name(csv_file)}" 86 | i = 0 87 | total = 0 88 | start_from = 0 89 | errors = 0 90 | if os.path.exists(log_file): 91 | with open(log_file, 'r') as fp: 92 | progress = fp.readline() 93 | start_from = int(progress) 94 | print(f"Will skip {start_from} rows, delete {log_file} to start from beginning") 95 | db = get_db(kwargs) 96 | progress = ['|', '/', '‒', '\\', '|', '/', '‒', '\\'] 97 | with open(csv_file) as fp: 98 | reader = csv.DictReader(fp) 99 | if start_from > total: 100 | for row in reader: 101 | total = total + 1 102 | if total > start_from: 103 | break 104 | for row in reader: 105 | total = total + 1 106 | with open(log_file, "w") as lf: 107 | lf.write(f"{total}") 108 | print(f'\r{progress[i]} {total} row(s) / {errors} errors', end='') 109 | i = i + 1 110 | if i >= len(progress): 111 | i = 0 112 | try: 113 | parse_row(db, row) 114 | except psycopg2.Error as e: 115 | errors = errors + 1 116 | with open(error_log_file, "w+") as lf: 117 | lf.write(f"{e}: {row}") 118 | continue 119 | 120 | 121 | @click.command() 122 | @click.option('--db', help='Database name') 123 | @click.option('--db-user', help='Database user') 124 | @click.option('--db-pass', help='Database password', default=None) 125 | @click.option('--db-host', help='Database host', default=None) 126 | @click.option('--db-port', help='Database port', default=None) 127 | def count_avg_words(**kwargs): 128 | """ response with avg words in all reviews""" 129 | db = get_db(kwargs) 130 | q = "select avg(word_count) from reviews" 131 | r = db.query_get(q, ()) 132 | print(f"Average words count is: {r[0][0]}") 133 | 134 | 135 | def get_db(kwargs): 136 | # are we providing database information 137 | keys = {'db_pass': 'password', 'host': 'db_host', 'db_port': 'port'} 138 | db_builder = DatabaseStringBuilder(kwargs['db'], kwargs['db_user']) 139 | for k in keys: 140 | if k not in kwargs: 141 | continue 142 | db_builder.set_key(k, keys[k]) 143 | dsn = db_builder.get_connection_string() 144 | return DB(dsn) 145 | 146 | 147 | if __name__ == '__main__': 148 | cli.add_command(parse_csv) 149 | cli.add_command(count_avg_words) 150 | cli() 151 | -------------------------------------------------------------------------------- /db.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import psycopg2 as psycopg2 4 | from dotenv import load_dotenv 5 | 6 | from errors import NotFoundException, DuplicateRowError 7 | 8 | 9 | class DatabaseStringBuilder: 10 | 11 | def __init__(self, db, db_user): 12 | self.db_str = { 13 | 'dbname': db, 14 | 'user': db_user 15 | } 16 | 17 | def set_key(self, ky, vl): 18 | if ky not in ['dbname', 'user', 'password', 'host', 'port']: 19 | return self 20 | self.db_str[ky] = vl 21 | return self 22 | 23 | def get_connection_string(self): 24 | if not self.db_str['dbname'] or not self.db_str['user']: 25 | return '' 26 | db_str = [] 27 | for k in self.db_str: 28 | db_str.append(f'{k}={self.db_str[k]}') 29 | return ' '.join(db_str) 30 | 31 | 32 | class DB: 33 | 34 | def __init__(self, dsn=None): 35 | if not dsn: 36 | # load from .env file first 37 | load_dotenv() 38 | dsn = os.getenv('DATABASE_URL') 39 | self.conn = psycopg2.connect(dsn) 40 | self.dsn = dsn 41 | 42 | def execute(self, q, obj): 43 | cursor = self.conn.cursor() 44 | cursor.execute(q, obj) 45 | self.conn.commit() 46 | return cursor.fetchone()[0] 47 | 48 | def query_get(self, q, obj): 49 | cursor = self.conn.cursor() 50 | cursor.execute(q, obj) 51 | return cursor.fetchall() 52 | 53 | def load(self, q, obj): 54 | try: 55 | return self.query_get(q, obj)[0] 56 | except IndexError: 57 | return None 58 | finally: 59 | self.conn.commit() 60 | 61 | def execute_no_result(self, q): 62 | cursor = self.conn.cursor() 63 | cursor.execute(q) 64 | self.conn.commit() 65 | 66 | 67 | def build_query(table, keys, expr): 68 | """inserts hotel in the db""" 69 | # get the column names 70 | fields = [", ".join([k for k in expr.keys()]), ", ".join([k for k in keys])] 71 | # place sql functions and build string placeholders 72 | data = [", ".join([k for k in expr.values()]), ", ".join(['%s' for k in keys])] 73 | # prepare the statement 74 | return """insert into %s (%s) 75 | values(%s) returning id""" % (table, ', '.join(fields), ', '.join(data)) 76 | 77 | 78 | def build_select_query(table, keys, conditions): 79 | cols = ", ".join([k for k in keys]) 80 | _c = " and ".join([f"{k} = %s" for k in conditions]) 81 | return f"""select {cols} from {table} 82 | where {_c}""" 83 | 84 | 85 | class DBOrm(object): 86 | """converts to SQLite file""" 87 | 88 | def __init__(self, dsn=None): 89 | super(DBOrm, self).__init__() 90 | if not dsn: 91 | # load from .env file first 92 | load_dotenv() 93 | dsn = os.getenv('DATABASE_URL') 94 | self.conn = psycopg2.connect(dsn) 95 | self.cursor = self.conn.cursor() 96 | 97 | def save(self): 98 | """save current info in file""" 99 | self.conn.commit() 100 | 101 | def load(self, table, keys, conditions=None): 102 | # build named placeholders 103 | sql = build_select_query(table, keys, conditions) 104 | values = [k for k in conditions.values()] 105 | 106 | try: 107 | self.cursor.execute(sql, values) 108 | return self.cursor.fetchone() 109 | except IndexError: 110 | print("ERROR") 111 | return None 112 | finally: 113 | self.conn.commit() 114 | 115 | def insert_row(self, table, data, db_expr): 116 | cursor = self.conn.cursor() 117 | sql = build_query(table, data.keys(), db_expr) 118 | try: 119 | cursor.execute(sql, [k for k in data.values()]) 120 | self.conn.commit() 121 | return cursor.fetchone()[0] 122 | except psycopg2.IntegrityError as e: 123 | raise DuplicateRowError(data) 124 | 125 | def execute(self, sql, commit=False, ret_result=False): 126 | """executes a query and return's result 127 | 128 | :sql: query to execute 129 | :commit: commit if true 130 | :ret_result: return result if true 131 | :returns: None or list of rows 132 | 133 | """ 134 | try: 135 | self.cursor.execute(sql) 136 | if commit: 137 | self.cursor.commit() 138 | if ret_result: 139 | return self.cursor.fetchall() 140 | except (psycopg2.IntegrityError, psycopg2.InternalError): 141 | pass 142 | except psycopg2.OperationalError: 143 | pass 144 | 145 | 146 | class DbModel: 147 | 148 | def __init__(self, db, orm_data=None): 149 | self.db = db 150 | self.db_orm = DBOrm(db.dsn) 151 | if orm_data: 152 | self.table_name = orm_data['table'] 153 | self.data = {} 154 | if orm_data['by_fields']: 155 | columns = [*orm_data['fields'], *orm_data['expr'].keys()] 156 | result = self.db_orm.load(orm_data['table'], columns, orm_data['by_fields']) 157 | if not result: 158 | raise NotFoundException("No such hotel found") 159 | for i, c in enumerate(result): 160 | self.data[columns[i]] = c 161 | self.id = self.data['id'] 162 | else: 163 | for k in orm_data['data']: 164 | if k in orm_data['fields']: 165 | self.data[k] = orm_data['data'][k] 166 | self.expr = orm_data['expr'] 167 | if 'id' in orm_data['data']: 168 | self.id = orm_data['data']['id'] 169 | 170 | def validate(self): 171 | raise NotImplemented 172 | 173 | def get(self, key): 174 | return self.data[key] 175 | 176 | def set(self, key, val): 177 | self.data[key] = val 178 | 179 | def save(self): 180 | if not self.validate(): 181 | return False 182 | self.id = self.db_orm.insert_row( 183 | self.table_name, self.data, self.expr 184 | ) 185 | if self.id: 186 | return True 187 | return False 188 | --------------------------------------------------------------------------------