├── .github └── workflows │ └── main.yaml ├── books_recommender ├── __init__.py ├── config │ ├── __init__.py │ └── configuration.py ├── entity │ ├── __init__.py │ └── config_entity.py ├── logger │ ├── __init__.py │ └── log.py ├── pipeline │ ├── __init__.py │ └── training_pipeline.py ├── utils │ ├── __init__.py │ └── util.py ├── components │ ├── __init__.py │ ├── stage_03_model_trainer.py │ ├── stage_00_data_ingestion.py │ ├── stage_02_data_transformation.py │ └── stage_01_data_validation.py ├── exception │ ├── __init__.py │ └── exception_handler.py └── constant │ └── __init__.py ├── requirements.txt ├── .dockerignore ├── templates ├── 1.png ├── 2.png ├── intro.jpeg └── book_names.pkl ├── Dockerfile ├── config └── config.yaml ├── setup.py ├── LICENSE ├── .gitignore ├── app.py ├── README.md └── notebook └── Books Recommender data analysis.ipynb /.github/workflows/main.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /books_recommender/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /books_recommender/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /books_recommender/entity/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /books_recommender/logger/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /books_recommender/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /books_recommender/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /books_recommender/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /books_recommender/exception/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | numpy 3 | pandas 4 | sklearn 5 | notebook 6 | PyYAML 7 | -e . -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Environments 2 | .env 3 | .venv 4 | env/ 5 | venv/ 6 | ENV/ 7 | env.bak/ 8 | venv.bak/ -------------------------------------------------------------------------------- /templates/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/1.png -------------------------------------------------------------------------------- /templates/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/2.png -------------------------------------------------------------------------------- /templates/intro.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/intro.jpeg -------------------------------------------------------------------------------- /templates/book_names.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/book_names.pkl -------------------------------------------------------------------------------- /books_recommender/constant/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | 4 | ROOT_DIR = os.getcwd() 5 | # Main config file path 6 | CONFIG_FOLDER_NAME = "config" 7 | CONFIG_FILE_NAME = "config.yaml" 8 | CONFIG_FILE_PATH = os.path.join(ROOT_DIR,CONFIG_FOLDER_NAME,CONFIG_FILE_NAME) -------------------------------------------------------------------------------- /books_recommender/utils/util.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import sys 3 | from books_recommender.exception.exception_handler import AppException 4 | 5 | 6 | 7 | def read_yaml_file(file_path:str)->dict: 8 | """ 9 | Reads a YAML file and returns the contents as a dictionary. 10 | file_path: str 11 | """ 12 | try: 13 | with open(file_path, 'rb') as yaml_file: 14 | return yaml.safe_load(yaml_file) 15 | except Exception as e: 16 | raise AppException(e,sys) from e -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # app/Dockerfile 2 | 3 | FROM python:3.7-slim 4 | 5 | # COPY . /app 6 | 7 | EXPOSE 8501 8 | 9 | WORKDIR /app 10 | 11 | RUN apt-get update && apt-get install -y \ 12 | build-essential \ 13 | software-properties-common \ 14 | git \ 15 | && rm -rf /var/lib/apt/lists/* 16 | 17 | RUN git clone https://github.com/entbappy/ML-Based-Book-Recommender-System.git . 18 | 19 | COPY . /app 20 | 21 | RUN pip3 install -r requirements.txt 22 | 23 | ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"] -------------------------------------------------------------------------------- /books_recommender/logger/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from datetime import datetime 4 | 5 | 6 | # Creating logs directory to store log in files 7 | LOG_DIR = "logs" 8 | LOG_DIR = os.path.join(os.getcwd(), LOG_DIR) 9 | 10 | #Creating LOG_DIR if it does not exists. 11 | os.makedirs(LOG_DIR, exist_ok=True) 12 | 13 | 14 | # Creating file name for log file based on current timestamp 15 | CURRENT_TIME_STAMP = f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}" 16 | file_name = f"log_{CURRENT_TIME_STAMP}.log" 17 | 18 | #Creating file path for projects. 19 | log_file_path = os.path.join(LOG_DIR, file_name) 20 | 21 | 22 | logging.basicConfig(filename=log_file_path, 23 | filemode='w', 24 | format='[%(asctime)s] %(name)s - %(levelname)s - %(message)s', 25 | level=logging.NOTSET) 26 | -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- 1 | artifacts_config: 2 | artifacts_dir: artifacts 3 | 4 | data_ingestion_config: 5 | dataset_download_url: https://github.com/entbappy/Branching-tutorial/raw/master/books_data.zip 6 | dataset_dir: dataset 7 | ingested_dir: ingested_data 8 | raw_data_dir: raw_data 9 | 10 | 11 | data_validation_config: 12 | clean_data_dir: clean_data 13 | serialized_objects_dir: serialized_objects 14 | books_csv_file: BX-Books.csv 15 | ratings_csv_file: BX-Book-Ratings.csv 16 | 17 | 18 | data_transformation_config: 19 | transformed_data_dir: transformed_data 20 | 21 | 22 | model_trainer_config: 23 | trained_model_dir: trained_model 24 | trained_model_name: model.pkl 25 | 26 | 27 | recommendation_config: 28 | poster_api_url: https://api.themoviedb.org/3/movie/{}?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open("README.md", "r", encoding="utf-8") as f: 4 | long_description = f.read() 5 | 6 | ## edit below variables as per your requirements - 7 | REPO_NAME = "ML Based Books Recommender System" 8 | AUTHOR_USER_NAME = "BOKTIAR AHMED BAPPY" 9 | SRC_REPO = "books_recommender" 10 | LIST_OF_REQUIREMENTS = [] 11 | 12 | 13 | setup( 14 | name=SRC_REPO, 15 | version="0.0.1", 16 | author="BOKTIAR AHMED BAPPY", 17 | description="A small local packages for ML based books recommendations", 18 | long_description=long_description, 19 | long_description_content_type="text/markdown", 20 | url="https://github.com/entbappy/ML-Based-Book-Recommender-System", 21 | author_email="boktiar@ineuron.ai", 22 | packages=find_packages(), 23 | license="MIT", 24 | python_requires=">=3.7", 25 | install_requires=LIST_OF_REQUIREMENTS 26 | ) 27 | -------------------------------------------------------------------------------- /books_recommender/pipeline/training_pipeline.py: -------------------------------------------------------------------------------- 1 | from books_recommender.components.stage_00_data_ingestion import DataIngestion 2 | from books_recommender.components.stage_01_data_validation import DataValidation 3 | from books_recommender.components.stage_02_data_transformation import DataTransformation 4 | from books_recommender.components.stage_03_model_trainer import ModelTrainer 5 | 6 | 7 | 8 | class TrainingPipeline: 9 | def __init__(self): 10 | self.data_ingestion = DataIngestion() 11 | self.data_validation = DataValidation() 12 | self.data_transformation = DataTransformation() 13 | self.model_trainer = ModelTrainer() 14 | 15 | 16 | def start_training_pipeline(self): 17 | """ 18 | Starts the training pipeline 19 | :return: none 20 | """ 21 | self.data_ingestion.initiate_data_ingestion() 22 | self.data_validation.initiate_data_validation() 23 | self.data_transformation.initiate_data_transformation() 24 | self.model_trainer.initiate_model_trainer() 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 iNeuron Intelligence Private Limited 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /books_recommender/entity/config_entity.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | DataIngestionConfig = namedtuple("DatasetConfig", ["dataset_download_url", 4 | "raw_data_dir", 5 | "ingested_dir"]) 6 | 7 | DataValidationConfig = namedtuple("DataValidationConfig", ["clean_data_dir", 8 | "books_csv_file", 9 | "ratings_csv_file", 10 | "serialized_objects_dir"]) 11 | 12 | 13 | DataTransformationConfig = namedtuple("DataTransformationConfig", ["clean_data_file_path", 14 | "transformed_data_dir"]) 15 | 16 | 17 | 18 | ModelTrainerConfig = namedtuple("ModelTrainerConfig", ["transformed_data_file_dir", 19 | "trained_model_dir", 20 | "trained_model_name"]) 21 | 22 | 23 | 24 | ModelRecommendationConfig = namedtuple("ModelRecommendationConfig", ["book_name_serialized_objects", 25 | "book_pivot_serialized_objects", 26 | "final_rating_serialized_objects", 27 | "trained_model_path"]) 28 | 29 | 30 | -------------------------------------------------------------------------------- /books_recommender/exception/exception_handler.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | class AppException(Exception): 6 | """ 7 | Organization: iNeuron Intelligence Private Limited 8 | AppException is customized exception class designed to capture refined details about exception 9 | such as python script file line number along with error message 10 | With custom exception one can easily spot source of error and provide quick fix. 11 | 12 | """ 13 | 14 | def __init__(self, error_message: Exception, error_detail: sys): 15 | """ 16 | :param error_message: error message in string format 17 | """ 18 | super().__init__(error_message) 19 | self.error_message = AppException.error_message_detail(error_message, error_detail=error_detail) 20 | 21 | @staticmethod 22 | def error_message_detail(error:Exception, error_detail:sys): 23 | """ 24 | error: Exception object raise from module 25 | error_detail: is sys module contains detail information about system execution information. 26 | """ 27 | _, _, exc_tb = error_detail.exc_info() 28 | #extracting file name from exception traceback 29 | file_name = exc_tb.tb_frame.f_code.co_filename 30 | 31 | #preparing error message 32 | error_message = f"Error occurred python script name [{file_name}]" \ 33 | f" line number [{exc_tb.tb_lineno}] error message [{error}]." 34 | 35 | return error_message 36 | 37 | def __repr__(self): 38 | """ 39 | Formating object of AppException 40 | """ 41 | return AppException.__name__.__str__() 42 | 43 | def __str__(self): 44 | """ 45 | Formating how a object should be visible if used in print statement. 46 | """ 47 | return self.error_message -------------------------------------------------------------------------------- /books_recommender/components/stage_03_model_trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pickle 4 | from sklearn.neighbors import NearestNeighbors 5 | from scipy.sparse import csr_matrix 6 | from books_recommender.logger.log import logging 7 | from books_recommender.config.configuration import AppConfiguration 8 | from books_recommender.exception.exception_handler import AppException 9 | 10 | 11 | class ModelTrainer: 12 | def __init__(self, app_config = AppConfiguration()): 13 | try: 14 | self.model_trainer_config = app_config.get_model_trainer_config() 15 | except Exception as e: 16 | raise AppException(e, sys) from e 17 | 18 | 19 | def train(self): 20 | try: 21 | #loading pivot data 22 | book_pivot = pickle.load(open(self.model_trainer_config.transformed_data_file_dir,'rb')) 23 | book_sparse = csr_matrix(book_pivot) 24 | #Training model 25 | model = NearestNeighbors(algorithm= 'brute') 26 | model.fit(book_sparse) 27 | 28 | #Saving model object for recommendations 29 | os.makedirs(self.model_trainer_config.trained_model_dir, exist_ok=True) 30 | file_name = os.path.join(self.model_trainer_config.trained_model_dir,self.model_trainer_config.trained_model_name) 31 | pickle.dump(model,open(file_name,'wb')) 32 | logging.info(f"Saving final model to {file_name}") 33 | 34 | except Exception as e: 35 | raise AppException(e, sys) from e 36 | 37 | 38 | 39 | def initiate_model_trainer(self): 40 | try: 41 | logging.info(f"{'='*20}Model Trainer log started.{'='*20} ") 42 | self.train() 43 | logging.info(f"{'='*20}Model Trainer log completed.{'='*20} \n\n") 44 | except Exception as e: 45 | raise AppException(e, sys) from e 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | artifacts/* 131 | -------------------------------------------------------------------------------- /books_recommender/components/stage_00_data_ingestion.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from six.moves import urllib 4 | import zipfile 5 | from books_recommender.logger.log import logging 6 | from books_recommender.exception.exception_handler import AppException 7 | from books_recommender.config.configuration import AppConfiguration 8 | 9 | class DataIngestion: 10 | 11 | def __init__(self, app_config = AppConfiguration()): 12 | """ 13 | DataIngestion Intialization 14 | data_ingestion_config: DataIngestionConfig 15 | """ 16 | try: 17 | logging.info(f"{'='*20}Data Ingestion log started.{'='*20} ") 18 | self.data_ingestion_config= app_config.get_data_ingestion_config() 19 | except Exception as e: 20 | raise AppException(e, sys) from e 21 | 22 | 23 | def download_data(self): 24 | """ 25 | Fetch the data from the url 26 | 27 | """ 28 | try: 29 | 30 | dataset_url = self.data_ingestion_config.dataset_download_url 31 | zip_download_dir = self.data_ingestion_config.raw_data_dir 32 | os.makedirs(zip_download_dir, exist_ok=True) 33 | data_file_name = os.path.basename(dataset_url) 34 | zip_file_path = os.path.join(zip_download_dir, data_file_name) 35 | logging.info(f"Downloading data from {dataset_url} into file {zip_file_path}") 36 | urllib.request.urlretrieve(dataset_url,zip_file_path) 37 | logging.info(f"Downloaded data from {dataset_url} into file {zip_file_path}") 38 | return zip_file_path 39 | 40 | except Exception as e: 41 | raise AppException(e, sys) from e 42 | 43 | 44 | def extract_zip_file(self,zip_file_path: str): 45 | """ 46 | zip_file_path: str 47 | Extracts the zip file into the data directory 48 | Function returns None 49 | """ 50 | try: 51 | ingested_dir = self.data_ingestion_config.ingested_dir 52 | os.makedirs(ingested_dir, exist_ok=True) 53 | with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: 54 | zip_ref.extractall(ingested_dir) 55 | logging.info(f"Extracting zip file: {zip_file_path} into dir: {ingested_dir}") 56 | except Exception as e: 57 | raise AppException(e,sys) from e 58 | 59 | 60 | def initiate_data_ingestion(self): 61 | try: 62 | zip_file_path = self.download_data() 63 | self.extract_zip_file(zip_file_path=zip_file_path) 64 | logging.info(f"{'='*20}Data Ingestion log completed.{'='*20} \n\n") 65 | except Exception as e: 66 | raise AppException(e, sys) from e -------------------------------------------------------------------------------- /books_recommender/components/stage_02_data_transformation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pickle 4 | import pandas as pd 5 | from books_recommender.logger.log import logging 6 | from books_recommender.config.configuration import AppConfiguration 7 | from books_recommender.exception.exception_handler import AppException 8 | 9 | 10 | 11 | class DataTransformation: 12 | def __init__(self, app_config = AppConfiguration()): 13 | try: 14 | self.data_transformation_config = app_config.get_data_transformation_config() 15 | self.data_validation_config= app_config.get_data_validation_config() 16 | except Exception as e: 17 | raise AppException(e, sys) from e 18 | 19 | 20 | 21 | def get_data_transformer(self): 22 | try: 23 | df = pd.read_csv(self.data_transformation_config.clean_data_file_path) 24 | # Lets create a pivot table 25 | book_pivot = df.pivot_table(columns='user_id', index='title', values= 'rating') 26 | logging.info(f" Shape of book pivot table: {book_pivot.shape}") 27 | book_pivot.fillna(0, inplace=True) 28 | 29 | #saving pivot table data 30 | os.makedirs(self.data_transformation_config.transformed_data_dir, exist_ok=True) 31 | pickle.dump(book_pivot,open(os.path.join(self.data_transformation_config.transformed_data_dir,"transformed_data.pkl"),'wb')) 32 | logging.info(f"Saved pivot table data to {self.data_transformation_config.transformed_data_dir}") 33 | 34 | #keeping books name 35 | book_names = book_pivot.index 36 | 37 | #saving book_names objects for web app 38 | os.makedirs(self.data_validation_config.serialized_objects_dir, exist_ok=True) 39 | pickle.dump(book_names,open(os.path.join(self.data_validation_config.serialized_objects_dir, "book_names.pkl"),'wb')) 40 | logging.info(f"Saved book_names serialization object to {self.data_validation_config.serialized_objects_dir}") 41 | 42 | #saving book_pivot objects for web app 43 | os.makedirs(self.data_validation_config.serialized_objects_dir, exist_ok=True) 44 | pickle.dump(book_pivot,open(os.path.join(self.data_validation_config.serialized_objects_dir, "book_pivot.pkl"),'wb')) 45 | logging.info(f"Saved book_pivot serialization object to {self.data_validation_config.serialized_objects_dir}") 46 | 47 | except Exception as e: 48 | raise AppException(e, sys) from e 49 | 50 | 51 | 52 | def initiate_data_transformation(self): 53 | try: 54 | logging.info(f"{'='*20}Data Transformation log started.{'='*20} ") 55 | self.get_data_transformer() 56 | logging.info(f"{'='*20}Data Transformation log completed.{'='*20} \n\n") 57 | except Exception as e: 58 | raise AppException(e, sys) from e 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /books_recommender/components/stage_01_data_validation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import ast 4 | import pandas as pd 5 | import pickle 6 | from books_recommender.logger.log import logging 7 | from books_recommender.config.configuration import AppConfiguration 8 | from books_recommender.exception.exception_handler import AppException 9 | 10 | 11 | 12 | class DataValidation: 13 | def __init__(self, app_config = AppConfiguration()): 14 | try: 15 | self.data_validation_config= app_config.get_data_validation_config() 16 | except Exception as e: 17 | raise AppException(e, sys) from e 18 | 19 | 20 | 21 | def preprocess_data(self): 22 | try: 23 | ratings = pd.read_csv(self.data_validation_config.ratings_csv_file, sep=";", error_bad_lines=False, encoding='latin-1') 24 | books = pd.read_csv(self.data_validation_config.books_csv_file, sep=";", error_bad_lines=False, encoding='latin-1') 25 | 26 | logging.info(f" Shape of ratings data file: {ratings.shape}") 27 | logging.info(f" Shape of books data file: {books.shape}") 28 | 29 | #Here Image URL columns is important for the poster. So, we will keep it 30 | books = books[['ISBN','Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher','Image-URL-L']] 31 | # Lets remane some wierd columns name in books 32 | books.rename(columns={"Book-Title":'title', 33 | 'Book-Author':'author', 34 | "Year-Of-Publication":'year', 35 | "Publisher":"publisher", 36 | "Image-URL-L":"image_url"},inplace=True) 37 | 38 | 39 | # Lets remane some wierd columns name in ratings 40 | ratings.rename(columns={"User-ID":'user_id', 41 | 'Book-Rating':'rating'},inplace=True) 42 | 43 | # Lets store users who had at least rated more than 200 books 44 | x = ratings['user_id'].value_counts() > 200 45 | y = x[x].index 46 | ratings = ratings[ratings['user_id'].isin(y)] 47 | 48 | # Now join ratings with books 49 | ratings_with_books = ratings.merge(books, on='ISBN') 50 | number_rating = ratings_with_books.groupby('title')['rating'].count().reset_index() 51 | number_rating.rename(columns={'rating':'num_of_rating'},inplace=True) 52 | final_rating = ratings_with_books.merge(number_rating, on='title') 53 | 54 | # Lets take those books which got at least 50 rating of user 55 | final_rating = final_rating[final_rating['num_of_rating'] >= 50] 56 | 57 | # lets drop the duplicates 58 | final_rating.drop_duplicates(['user_id','title'],inplace=True) 59 | logging.info(f" Shape of the final clean dataset: {final_rating.shape}") 60 | 61 | # Saving the cleaned data for transformation 62 | os.makedirs(self.data_validation_config.clean_data_dir, exist_ok=True) 63 | final_rating.to_csv(os.path.join(self.data_validation_config.clean_data_dir,'clean_data.csv'), index = False) 64 | logging.info(f"Saved cleaned data to {self.data_validation_config.clean_data_dir}") 65 | 66 | 67 | #saving final_rating objects for web app 68 | os.makedirs(self.data_validation_config.serialized_objects_dir, exist_ok=True) 69 | pickle.dump(final_rating,open(os.path.join(self.data_validation_config.serialized_objects_dir, "final_rating.pkl"),'wb')) 70 | logging.info(f"Saved final_rating serialization object to {self.data_validation_config.serialized_objects_dir}") 71 | 72 | except Exception as e: 73 | raise AppException(e, sys) from e 74 | 75 | 76 | def initiate_data_validation(self): 77 | try: 78 | logging.info(f"{'='*20}Data Validation log started.{'='*20} ") 79 | self.preprocess_data() 80 | logging.info(f"{'='*20}Data Validation log completed.{'='*20} \n\n") 81 | except Exception as e: 82 | raise AppException(e, sys) from e 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pickle 4 | import streamlit as st 5 | import numpy as np 6 | from books_recommender.logger.log import logging 7 | from books_recommender.config.configuration import AppConfiguration 8 | from books_recommender.pipeline.training_pipeline import TrainingPipeline 9 | from books_recommender.exception.exception_handler import AppException 10 | 11 | 12 | class Recommendation: 13 | def __init__(self,app_config = AppConfiguration()): 14 | try: 15 | self.recommendation_config= app_config.get_recommendation_config() 16 | except Exception as e: 17 | raise AppException(e, sys) from e 18 | 19 | 20 | def fetch_poster(self,suggestion): 21 | try: 22 | book_name = [] 23 | ids_index = [] 24 | poster_url = [] 25 | book_pivot = pickle.load(open(self.recommendation_config.book_pivot_serialized_objects,'rb')) 26 | final_rating = pickle.load(open(self.recommendation_config.final_rating_serialized_objects,'rb')) 27 | 28 | for book_id in suggestion: 29 | book_name.append(book_pivot.index[book_id]) 30 | 31 | for name in book_name[0]: 32 | ids = np.where(final_rating['title'] == name)[0][0] 33 | ids_index.append(ids) 34 | 35 | for idx in ids_index: 36 | url = final_rating.iloc[idx]['image_url'] 37 | poster_url.append(url) 38 | 39 | return poster_url 40 | 41 | except Exception as e: 42 | raise AppException(e, sys) from e 43 | 44 | 45 | 46 | def recommend_book(self,book_name): 47 | try: 48 | books_list = [] 49 | model = pickle.load(open(self.recommendation_config.trained_model_path,'rb')) 50 | book_pivot = pickle.load(open(self.recommendation_config.book_pivot_serialized_objects,'rb')) 51 | book_id = np.where(book_pivot.index == book_name)[0][0] 52 | distance, suggestion = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6 ) 53 | 54 | poster_url = self.fetch_poster(suggestion) 55 | 56 | for i in range(len(suggestion)): 57 | books = book_pivot.index[suggestion[i]] 58 | for j in books: 59 | books_list.append(j) 60 | return books_list , poster_url 61 | 62 | except Exception as e: 63 | raise AppException(e, sys) from e 64 | 65 | 66 | def train_engine(self): 67 | try: 68 | obj = TrainingPipeline() 69 | obj.start_training_pipeline() 70 | st.text("Training Completed!") 71 | logging.info(f"Recommended successfully!") 72 | except Exception as e: 73 | raise AppException(e, sys) from e 74 | 75 | 76 | def recommendations_engine(self,selected_books): 77 | try: 78 | recommended_books,poster_url = self.recommend_book(selected_books) 79 | col1, col2, col3, col4, col5 = st.columns(5) 80 | with col1: 81 | st.text(recommended_books[1]) 82 | st.image(poster_url[1]) 83 | with col2: 84 | st.text(recommended_books[2]) 85 | st.image(poster_url[2]) 86 | 87 | with col3: 88 | st.text(recommended_books[3]) 89 | st.image(poster_url[3]) 90 | with col4: 91 | st.text(recommended_books[4]) 92 | st.image(poster_url[4]) 93 | with col5: 94 | st.text(recommended_books[5]) 95 | st.image(poster_url[5]) 96 | except Exception as e: 97 | raise AppException(e, sys) from e 98 | 99 | 100 | 101 | if __name__ == "__main__": 102 | st.header('ML Based Books Recommender System') 103 | st.text("This is a collaborative filtering based recommendation system!") 104 | 105 | obj = Recommendation() 106 | 107 | #Training 108 | if st.button('Train Recommender System'): 109 | obj.train_engine() 110 | 111 | book_names = pickle.load(open(os.path.join('templates','book_names.pkl') ,'rb')) 112 | selected_books = st.selectbox( 113 | "Type or select a book from the dropdown", 114 | book_names) 115 | 116 | #recommendation 117 | if st.button('Show Recommendation'): 118 | obj.recommendations_engine(selected_books) 119 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Project: ML Based Book Recommender System ! | Collaborative Filtering Based 2 | 3 | workflow 4 | 5 | Recommendation systems are becoming increasingly important in today’s extremely busy world. People are always short on time with the myriad tasks they need to accomplish in the limited 24 hours. Therefore, the recommendation systems are important as they help them make the right choices, without having to expend their cognitive resources. 6 | 7 | The purpose of a recommendation system basically is to search for content that would be interesting to an individual. Moreover, it involves a number of factors to create personalised lists of useful and interesting content specific to each user/individual. Recommendation systems are Artificial Intelligence based algorithms that skim through all possible options and create a customized list of items that are interesting and relevant to an individual. These results are based on their profile, search/browsing history, what other people with similar traits/demographics are watching, and how likely are you to watch those movies. This is achieved through predictive modeling and heuristics with the data available. 8 | 9 | 10 | # Note: 11 | If you want to understand this entire project overflow, please refer the jupyter notebook file inside notebook folder. 12 | 13 | # Types of Recommendation System : 14 | 15 | ### 1 ) Content Based : 16 | 17 | - Content-based systems, which use characteristic information and takes item attriubutes into consideration . 18 | 19 | - Twitter , Youtube . 20 | 21 | - Which music you are listening , what singer are you watching . Form embeddings for the features . 22 | 23 | - User specific actions or similar items reccomendation . 24 | 25 | - It will create a vector of it . 26 | 27 | - These systems make recommendations using a user's item and profile features. They hypothesize that if a user was interested in an item in the past, they will once again be interested in it in the future 28 | 29 | - One issue that arises is making obvious recommendations because of excessive specialization (user A is only interested in categories B, C, and D, and the system is not able to recommend items outside those categories, even though they could be interesting to them). 30 | 31 | ### 2 ) Collaborative Based : 32 | 33 | - Collaborative filtering systems, which are based on user-item interactions. 34 | 35 | - Clusters of users with same ratings , similar users . 36 | 37 | - Book recommendation , so use cluster mechanism . 38 | 39 | - We take only one parameter , ratings or comments . 40 | 41 | - In short, collaborative filtering systems are based on the assumption that if a user likes item A and another user likes the same item A as well as another item, item B, the first user could also be interested in the second item . 42 | 43 | - Issues are : 44 | 45 | - User-Item nXn matrix , so computationally expensive . 46 | 47 | - Only famous items will get reccomended . 48 | 49 | - New items might not get reccomended at all . 50 | 51 | ### 3 ) Hybrid Based : 52 | 53 | - Hybrid systems, which combine both types of information with the aim of avoiding problems that are generated when working with just one kind. 54 | 55 | - Combination of both and used now a days . 56 | 57 | - Uses : word2vec , embedding . 58 | 59 | # About this project: 60 | 61 | This is a collaborative filtering based books recommender system & a streamlit web application that can recommend various kinds of similar books based on an user interest. 62 | 63 | 64 | 65 | # Demo: 66 | 67 | workflow 68 | 69 | workflow 70 | 71 | 72 | 73 | # Dataset has been used: 74 | 75 | * [Dataset link](https://www.kaggle.com/ra4u12/bookrecommendation) 76 | 77 | # Concept used to build the model.pkl file : NearestNeighbors 78 | 79 | 1 . Load the data 80 | 81 | 2 . Initialise the value of k 82 | 83 | 3 . For getting the predicted class, iterate from 1 to total number of training data points 84 | 85 | 4 . Calculate the distance between test data and each row of training data. Here we will use Euclidean distance as our distance metric since it’s the most popular method. 86 | 87 | 5 . Sort the calculated distances in ascending order based on distance values 88 | 89 | 6 . Get top k rows from the sorted array 90 | 91 | # Built With 92 | 1. streamlit 93 | 2. Machine learning 94 | 3. sklearn 95 | 96 | # How to run? 97 | ### STEPS: 98 | 99 | Clone the repository 100 | 101 | ```bash 102 | https://github.com/entbappy/ML-Based-Book-Recommender-System.git 103 | ``` 104 | ### STEP 01- Create a conda environment after opening the repository 105 | 106 | ```bash 107 | conda create -n books python=3.7.10 -y 108 | ``` 109 | 110 | ```bash 111 | conda activate books 112 | ``` 113 | 114 | 115 | ### STEP 02- install the requirements 116 | ```bash 117 | pip install -r requirements.txt 118 | ``` 119 | 120 | 121 | Now run, 122 | ```bash 123 | streamlit run app.py 124 | ``` 125 | 126 | ```bash 127 | Note: Before clicking on show recommendations first of all click on Train Recommender System for generating models 128 | ``` 129 | 130 | # How to run in Docker? 131 | 132 | #### Build a Docker image 133 | The docker build command builds an image from a Dockerfile . Run the following command from the app/ directory on your server to build the image: 134 | 135 | 136 | ```bash 137 | docker build -t streamlit . 138 | ``` 139 | 140 | The -t flag is used to tag the image. Here, we have tagged the image streamlit. If you run: 141 | 142 | ```bash 143 | docker images 144 | ``` 145 | You should see a streamlit image under the REPOSITORY column. For example: 146 | 147 | ```bash 148 | REPOSITORY TAG IMAGE ID CREATED SIZE 149 | streamlit latest 70b0759a094d About a minute ago 1.02GB 150 | ``` 151 | 152 | #### Run the Docker container 153 | Now that you have built the image, you can run the container by executing: 154 | 155 | ```bash 156 | docker run -p 8501:8501 streamlit 157 | ``` 158 | 159 | The -p flag publishes the container’s port 8501 to your server’s 8501 port. 160 | 161 | If all went well, you should see an output similar to the following: 162 | 163 | ```bash 164 | $ docker run -p 8501:8501 streamlit 165 | 166 | You can now view your Streamlit app in your browser. 167 | 168 | URL: http://127.0.0.1:8501/ 169 | ``` 170 | 171 | To view your app, users can browse to http://0.0.0.0:8501 or http://127.0.0.1:8501/ 172 | 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /books_recommender/config/configuration.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from books_recommender.logger.log import logging 4 | from books_recommender.utils.util import read_yaml_file 5 | from books_recommender.exception.exception_handler import AppException 6 | from books_recommender.entity.config_entity import DataIngestionConfig, DataValidationConfig, DataTransformationConfig, ModelTrainerConfig, ModelRecommendationConfig 7 | from books_recommender.constant import * 8 | 9 | 10 | class AppConfiguration: 11 | def __init__(self, config_file_path: str = CONFIG_FILE_PATH): 12 | try: 13 | self.configs_info = read_yaml_file(file_path=config_file_path) 14 | except Exception as e: 15 | raise AppException(e, sys) from e 16 | 17 | 18 | def get_data_ingestion_config(self) -> DataIngestionConfig: 19 | try: 20 | data_ingestion_config = self.configs_info['data_ingestion_config'] 21 | artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir'] 22 | dataset_dir = data_ingestion_config['dataset_dir'] 23 | 24 | ingested_data_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['ingested_dir']) 25 | raw_data_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['raw_data_dir']) 26 | 27 | response = DataIngestionConfig( 28 | dataset_download_url = data_ingestion_config['dataset_download_url'], 29 | raw_data_dir = raw_data_dir, 30 | ingested_dir = ingested_data_dir 31 | ) 32 | 33 | logging.info(f"Data Ingestion Config: {response}") 34 | return response 35 | 36 | except Exception as e: 37 | raise AppException(e, sys) from e 38 | 39 | 40 | 41 | def get_data_validation_config(self) -> DataValidationConfig: 42 | try: 43 | data_validation_config = self.configs_info['data_validation_config'] 44 | data_ingestion_config = self.configs_info['data_ingestion_config'] 45 | dataset_dir = data_ingestion_config['dataset_dir'] 46 | artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir'] 47 | books_csv_file = data_validation_config['books_csv_file'] 48 | ratings_csv_file = data_validation_config['ratings_csv_file'] 49 | 50 | books_csv_file_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['ingested_dir'], books_csv_file) 51 | ratings_csv_file_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['ingested_dir'], ratings_csv_file) 52 | clean_data_path = os.path.join(artifacts_dir, dataset_dir, data_validation_config['clean_data_dir']) 53 | serialized_objects_dir = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir']) 54 | 55 | response = DataValidationConfig( 56 | clean_data_dir = clean_data_path, 57 | books_csv_file = books_csv_file_dir, 58 | ratings_csv_file = ratings_csv_file_dir, 59 | serialized_objects_dir = serialized_objects_dir 60 | ) 61 | 62 | logging.info(f"Data Validation Config: {response}") 63 | return response 64 | 65 | except Exception as e: 66 | raise AppException(e, sys) from e 67 | 68 | 69 | 70 | def get_data_transformation_config(self) -> DataTransformationConfig: 71 | try: 72 | data_transformation_config = self.configs_info['data_transformation_config'] 73 | data_validation_config = self.configs_info['data_validation_config'] 74 | data_ingestion_config = self.configs_info['data_ingestion_config'] 75 | dataset_dir = data_ingestion_config['dataset_dir'] 76 | artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir'] 77 | 78 | clean_data_file_path = os.path.join(artifacts_dir, dataset_dir, data_validation_config['clean_data_dir'],'clean_data.csv') 79 | transformed_data_dir = os.path.join(artifacts_dir, dataset_dir, data_transformation_config['transformed_data_dir']) 80 | 81 | response = DataTransformationConfig( 82 | clean_data_file_path = clean_data_file_path, 83 | transformed_data_dir = transformed_data_dir 84 | ) 85 | 86 | logging.info(f"Data Transformation Config: {response}") 87 | return response 88 | 89 | except Exception as e: 90 | raise AppException(e, sys) from e 91 | 92 | 93 | 94 | def get_model_trainer_config(self) -> ModelTrainerConfig: 95 | try: 96 | model_trainer_config = self.configs_info['model_trainer_config'] 97 | data_transformation_config = self.configs_info['data_transformation_config'] 98 | data_ingestion_config = self.configs_info['data_ingestion_config'] 99 | dataset_dir = data_ingestion_config['dataset_dir'] 100 | artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir'] 101 | 102 | 103 | 104 | transformed_data_file_dir = os.path.join(artifacts_dir, dataset_dir, data_transformation_config['transformed_data_dir'], 'transformed_data.pkl') 105 | trained_model_dir = os.path.join(artifacts_dir, model_trainer_config['trained_model_dir']) 106 | trained_model_name = model_trainer_config['trained_model_name'] 107 | 108 | response = ModelTrainerConfig( 109 | transformed_data_file_dir = transformed_data_file_dir, 110 | trained_model_dir = trained_model_dir, 111 | trained_model_name = trained_model_name 112 | ) 113 | 114 | logging.info(f"Model Trainer Config: {response}") 115 | return response 116 | 117 | except Exception as e: 118 | raise AppException(e, sys) from e 119 | 120 | 121 | 122 | def get_recommendation_config(self) -> ModelRecommendationConfig: 123 | try: 124 | recommendation_config = self.configs_info['recommendation_config'] 125 | model_trainer_config = self.configs_info['model_trainer_config'] 126 | data_validation_config = self.configs_info['data_validation_config'] 127 | trained_model_name = model_trainer_config['trained_model_name'] 128 | artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir'] 129 | trained_model_dir = os.path.join(artifacts_dir, model_trainer_config['trained_model_dir']) 130 | poster_api = recommendation_config['poster_api_url'] 131 | 132 | 133 | book_name_serialized_objects = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir'], 'book_names.pkl') 134 | book_pivot_serialized_objects = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir'], 'book_pivot.pkl') 135 | final_rating_serialized_objects = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir'], 'final_rating.pkl') 136 | 137 | trained_model_path = os.path.join(trained_model_dir,trained_model_name) 138 | 139 | response = ModelRecommendationConfig( 140 | book_name_serialized_objects = book_name_serialized_objects, 141 | book_pivot_serialized_objects = book_pivot_serialized_objects, 142 | final_rating_serialized_objects = final_rating_serialized_objects, 143 | trained_model_path = trained_model_path 144 | ) 145 | 146 | logging.info(f"Model Recommendation Config: {response}") 147 | return response 148 | 149 | except Exception as e: 150 | raise AppException(e, sys) from e -------------------------------------------------------------------------------- /notebook/Books Recommender data analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Books Recommender system using clustering\n", 8 | "Collaborative filtering\n", 9 | "- Dataset :- https://www.kaggle.com/ra4u12/bookrecommendation" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# Importing necessary library\n", 19 | "import pandas as pd\n", 20 | "import numpy as np\n", 21 | "# import matplotlib.pyplot as plt\n", 22 | "# import seaborn as sns" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stderr", 32 | "output_type": "stream", 33 | "text": [ 34 | "b'Skipping line 6452: expected 8 fields, saw 9\\nSkipping line 43667: expected 8 fields, saw 10\\nSkipping line 51751: expected 8 fields, saw 9\\n'\n", 35 | "b'Skipping line 92038: expected 8 fields, saw 9\\nSkipping line 104319: expected 8 fields, saw 9\\nSkipping line 121768: expected 8 fields, saw 9\\n'\n", 36 | "b'Skipping line 144058: expected 8 fields, saw 9\\nSkipping line 150789: expected 8 fields, saw 9\\nSkipping line 157128: expected 8 fields, saw 9\\nSkipping line 180189: expected 8 fields, saw 9\\nSkipping line 185738: expected 8 fields, saw 9\\n'\n", 37 | "b'Skipping line 209388: expected 8 fields, saw 9\\nSkipping line 220626: expected 8 fields, saw 9\\nSkipping line 227933: expected 8 fields, saw 11\\nSkipping line 228957: expected 8 fields, saw 10\\nSkipping line 245933: expected 8 fields, saw 9\\nSkipping line 251296: expected 8 fields, saw 9\\nSkipping line 259941: expected 8 fields, saw 9\\nSkipping line 261529: expected 8 fields, saw 9\\n'\n", 38 | "C:\\Anaconda\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3146: DtypeWarning: Columns (3) have mixed types.Specify dtype option on import or set low_memory=False.\n", 39 | " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "books = pd.read_csv('data/BX-Books.csv', sep=\";\", error_bad_lines=False, encoding='latin-1')" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/html": [ 55 | "
\n", 56 | "\n", 69 | "\n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | "
ISBNBook-TitleBook-AuthorYear-Of-PublicationPublisherImage-URL-SImage-URL-MImage-URL-L
00195153448Classical MythologyMark P. O. Morford2002Oxford University Presshttp://images.amazon.com/images/P/0195153448.0...http://images.amazon.com/images/P/0195153448.0...http://images.amazon.com/images/P/0195153448.0...
10002005018Clara CallanRichard Bruce Wright2001HarperFlamingo Canadahttp://images.amazon.com/images/P/0002005018.0...http://images.amazon.com/images/P/0002005018.0...http://images.amazon.com/images/P/0002005018.0...
20060973129Decision in NormandyCarlo D'Este1991HarperPerennialhttp://images.amazon.com/images/P/0060973129.0...http://images.amazon.com/images/P/0060973129.0...http://images.amazon.com/images/P/0060973129.0...
30374157065Flu: The Story of the Great Influenza Pandemic...Gina Bari Kolata1999Farrar Straus Girouxhttp://images.amazon.com/images/P/0374157065.0...http://images.amazon.com/images/P/0374157065.0...http://images.amazon.com/images/P/0374157065.0...
40393045218The Mummies of UrumchiE. J. W. Barber1999W. W. Norton & Companyhttp://images.amazon.com/images/P/0393045218.0...http://images.amazon.com/images/P/0393045218.0...http://images.amazon.com/images/P/0393045218.0...
\n", 141 | "
" 142 | ], 143 | "text/plain": [ 144 | " ISBN Book-Title \\\n", 145 | "0 0195153448 Classical Mythology \n", 146 | "1 0002005018 Clara Callan \n", 147 | "2 0060973129 Decision in Normandy \n", 148 | "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n", 149 | "4 0393045218 The Mummies of Urumchi \n", 150 | "\n", 151 | " Book-Author Year-Of-Publication Publisher \\\n", 152 | "0 Mark P. O. Morford 2002 Oxford University Press \n", 153 | "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n", 154 | "2 Carlo D'Este 1991 HarperPerennial \n", 155 | "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n", 156 | "4 E. J. W. Barber 1999 W. W. Norton & Company \n", 157 | "\n", 158 | " Image-URL-S \\\n", 159 | "0 http://images.amazon.com/images/P/0195153448.0... \n", 160 | "1 http://images.amazon.com/images/P/0002005018.0... \n", 161 | "2 http://images.amazon.com/images/P/0060973129.0... \n", 162 | "3 http://images.amazon.com/images/P/0374157065.0... \n", 163 | "4 http://images.amazon.com/images/P/0393045218.0... \n", 164 | "\n", 165 | " Image-URL-M \\\n", 166 | "0 http://images.amazon.com/images/P/0195153448.0... \n", 167 | "1 http://images.amazon.com/images/P/0002005018.0... \n", 168 | "2 http://images.amazon.com/images/P/0060973129.0... \n", 169 | "3 http://images.amazon.com/images/P/0374157065.0... \n", 170 | "4 http://images.amazon.com/images/P/0393045218.0... \n", 171 | "\n", 172 | " Image-URL-L \n", 173 | "0 http://images.amazon.com/images/P/0195153448.0... \n", 174 | "1 http://images.amazon.com/images/P/0002005018.0... \n", 175 | "2 http://images.amazon.com/images/P/0060973129.0... \n", 176 | "3 http://images.amazon.com/images/P/0374157065.0... \n", 177 | "4 http://images.amazon.com/images/P/0393045218.0... " 178 | ] 179 | }, 180 | "execution_count": 3, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "books.head()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 4, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "'http://images.amazon.com/images/P/0671027387.01.LZZZZZZZ.jpg'" 198 | ] 199 | }, 200 | "execution_count": 4, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [ 206 | "books.iloc[237]['Image-URL-L']" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 5, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "# !curl \"http://images.amazon.com/images/P/0195153448.01.THUMBZZZ.jpg\" --out.png\n", 216 | "# !curl http://images.amazon.com/images/P/0060973129.01.THUMBZZZ.jpg --output some.jpg" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 6, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "data": { 226 | "text/plain": [ 227 | "(271360, 8)" 228 | ] 229 | }, 230 | "execution_count": 6, 231 | "metadata": {}, 232 | "output_type": "execute_result" 233 | } 234 | ], 235 | "source": [ 236 | "books.shape" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 7, 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "data": { 246 | "text/plain": [ 247 | "Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',\n", 248 | " 'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],\n", 249 | " dtype='object')" 250 | ] 251 | }, 252 | "execution_count": 7, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "books.columns" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "#### Conclution:\n", 266 | "Here Image URL columns is important for the poster. So, we will keep it" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 8, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "books = books[['ISBN','Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher','Image-URL-L']]" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 9, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/html": [ 286 | "
\n", 287 | "\n", 300 | "\n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | "
ISBNBook-TitleBook-AuthorYear-Of-PublicationPublisherImage-URL-L
00195153448Classical MythologyMark P. O. Morford2002Oxford University Presshttp://images.amazon.com/images/P/0195153448.0...
10002005018Clara CallanRichard Bruce Wright2001HarperFlamingo Canadahttp://images.amazon.com/images/P/0002005018.0...
20060973129Decision in NormandyCarlo D'Este1991HarperPerennialhttp://images.amazon.com/images/P/0060973129.0...
30374157065Flu: The Story of the Great Influenza Pandemic...Gina Bari Kolata1999Farrar Straus Girouxhttp://images.amazon.com/images/P/0374157065.0...
40393045218The Mummies of UrumchiE. J. W. Barber1999W. W. Norton & Companyhttp://images.amazon.com/images/P/0393045218.0...
\n", 360 | "
" 361 | ], 362 | "text/plain": [ 363 | " ISBN Book-Title \\\n", 364 | "0 0195153448 Classical Mythology \n", 365 | "1 0002005018 Clara Callan \n", 366 | "2 0060973129 Decision in Normandy \n", 367 | "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n", 368 | "4 0393045218 The Mummies of Urumchi \n", 369 | "\n", 370 | " Book-Author Year-Of-Publication Publisher \\\n", 371 | "0 Mark P. O. Morford 2002 Oxford University Press \n", 372 | "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n", 373 | "2 Carlo D'Este 1991 HarperPerennial \n", 374 | "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n", 375 | "4 E. J. W. Barber 1999 W. W. Norton & Company \n", 376 | "\n", 377 | " Image-URL-L \n", 378 | "0 http://images.amazon.com/images/P/0195153448.0... \n", 379 | "1 http://images.amazon.com/images/P/0002005018.0... \n", 380 | "2 http://images.amazon.com/images/P/0060973129.0... \n", 381 | "3 http://images.amazon.com/images/P/0374157065.0... \n", 382 | "4 http://images.amazon.com/images/P/0393045218.0... " 383 | ] 384 | }, 385 | "execution_count": 9, 386 | "metadata": {}, 387 | "output_type": "execute_result" 388 | } 389 | ], 390 | "source": [ 391 | "books.head()" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 10, 397 | "metadata": {}, 398 | "outputs": [], 399 | "source": [ 400 | "# Lets remane some wierd columns name\n", 401 | "books.rename(columns={\"Book-Title\":'title',\n", 402 | " 'Book-Author':'author',\n", 403 | " \"Year-Of-Publication\":'year',\n", 404 | " \"Publisher\":\"publisher\",\n", 405 | " \"Image-URL-L\":\"image_url\"},inplace=True)" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 11, 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/html": [ 416 | "
\n", 417 | "\n", 430 | "\n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | "
ISBNtitleauthoryearpublisherimage_url
00195153448Classical MythologyMark P. O. Morford2002Oxford University Presshttp://images.amazon.com/images/P/0195153448.0...
10002005018Clara CallanRichard Bruce Wright2001HarperFlamingo Canadahttp://images.amazon.com/images/P/0002005018.0...
20060973129Decision in NormandyCarlo D'Este1991HarperPerennialhttp://images.amazon.com/images/P/0060973129.0...
30374157065Flu: The Story of the Great Influenza Pandemic...Gina Bari Kolata1999Farrar Straus Girouxhttp://images.amazon.com/images/P/0374157065.0...
40393045218The Mummies of UrumchiE. J. W. Barber1999W. W. Norton & Companyhttp://images.amazon.com/images/P/0393045218.0...
\n", 490 | "
" 491 | ], 492 | "text/plain": [ 493 | " ISBN title \\\n", 494 | "0 0195153448 Classical Mythology \n", 495 | "1 0002005018 Clara Callan \n", 496 | "2 0060973129 Decision in Normandy \n", 497 | "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n", 498 | "4 0393045218 The Mummies of Urumchi \n", 499 | "\n", 500 | " author year publisher \\\n", 501 | "0 Mark P. O. Morford 2002 Oxford University Press \n", 502 | "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n", 503 | "2 Carlo D'Este 1991 HarperPerennial \n", 504 | "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n", 505 | "4 E. J. W. Barber 1999 W. W. Norton & Company \n", 506 | "\n", 507 | " image_url \n", 508 | "0 http://images.amazon.com/images/P/0195153448.0... \n", 509 | "1 http://images.amazon.com/images/P/0002005018.0... \n", 510 | "2 http://images.amazon.com/images/P/0060973129.0... \n", 511 | "3 http://images.amazon.com/images/P/0374157065.0... \n", 512 | "4 http://images.amazon.com/images/P/0393045218.0... " 513 | ] 514 | }, 515 | "execution_count": 11, 516 | "metadata": {}, 517 | "output_type": "execute_result" 518 | } 519 | ], 520 | "source": [ 521 | "books.head()" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": 12, 527 | "metadata": {}, 528 | "outputs": [], 529 | "source": [ 530 | "# Now load the second dataframe\n", 531 | "\n", 532 | "users = pd.read_csv('data/BX-Users.csv', sep=\";\", error_bad_lines=False, encoding='latin-1')" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 13, 538 | "metadata": {}, 539 | "outputs": [ 540 | { 541 | "data": { 542 | "text/html": [ 543 | "
\n", 544 | "\n", 557 | "\n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | "
User-IDLocationAge
01nyc, new york, usaNaN
12stockton, california, usa18.0
23moscow, yukon territory, russiaNaN
34porto, v.n.gaia, portugal17.0
45farnborough, hants, united kingdomNaN
\n", 599 | "
" 600 | ], 601 | "text/plain": [ 602 | " User-ID Location Age\n", 603 | "0 1 nyc, new york, usa NaN\n", 604 | "1 2 stockton, california, usa 18.0\n", 605 | "2 3 moscow, yukon territory, russia NaN\n", 606 | "3 4 porto, v.n.gaia, portugal 17.0\n", 607 | "4 5 farnborough, hants, united kingdom NaN" 608 | ] 609 | }, 610 | "execution_count": 13, 611 | "metadata": {}, 612 | "output_type": "execute_result" 613 | } 614 | ], 615 | "source": [ 616 | "users.head()" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": 14, 622 | "metadata": {}, 623 | "outputs": [ 624 | { 625 | "data": { 626 | "text/plain": [ 627 | "(278858, 3)" 628 | ] 629 | }, 630 | "execution_count": 14, 631 | "metadata": {}, 632 | "output_type": "execute_result" 633 | } 634 | ], 635 | "source": [ 636 | "users.shape" 637 | ] 638 | }, 639 | { 640 | "cell_type": "code", 641 | "execution_count": 15, 642 | "metadata": {}, 643 | "outputs": [], 644 | "source": [ 645 | "# Lets remane some wierd columns name\n", 646 | "users.rename(columns={\"User-ID\":'user_id',\n", 647 | " 'Location':'location',\n", 648 | " \"Age\":'age'},inplace=True)" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 16, 654 | "metadata": {}, 655 | "outputs": [ 656 | { 657 | "data": { 658 | "text/html": [ 659 | "
\n", 660 | "\n", 673 | "\n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | "
user_idlocationage
01nyc, new york, usaNaN
12stockton, california, usa18.0
\n", 697 | "
" 698 | ], 699 | "text/plain": [ 700 | " user_id location age\n", 701 | "0 1 nyc, new york, usa NaN\n", 702 | "1 2 stockton, california, usa 18.0" 703 | ] 704 | }, 705 | "execution_count": 16, 706 | "metadata": {}, 707 | "output_type": "execute_result" 708 | } 709 | ], 710 | "source": [ 711 | "users.head(2)" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": 17, 717 | "metadata": {}, 718 | "outputs": [], 719 | "source": [ 720 | "# Now load the third dataframe\n", 721 | "\n", 722 | "ratings = pd.read_csv('data/BX-Book-Ratings.csv', sep=\";\", error_bad_lines=False, encoding='latin-1')" 723 | ] 724 | }, 725 | { 726 | "cell_type": "code", 727 | "execution_count": 18, 728 | "metadata": {}, 729 | "outputs": [ 730 | { 731 | "data": { 732 | "text/html": [ 733 | "
\n", 734 | "\n", 747 | "\n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | "
User-IDISBNBook-Rating
0276725034545104X0
127672601550612245
227672704465208020
3276729052165615X3
427672905217950286
\n", 789 | "
" 790 | ], 791 | "text/plain": [ 792 | " User-ID ISBN Book-Rating\n", 793 | "0 276725 034545104X 0\n", 794 | "1 276726 0155061224 5\n", 795 | "2 276727 0446520802 0\n", 796 | "3 276729 052165615X 3\n", 797 | "4 276729 0521795028 6" 798 | ] 799 | }, 800 | "execution_count": 18, 801 | "metadata": {}, 802 | "output_type": "execute_result" 803 | } 804 | ], 805 | "source": [ 806 | "ratings.head()" 807 | ] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "execution_count": 19, 812 | "metadata": {}, 813 | "outputs": [ 814 | { 815 | "data": { 816 | "text/plain": [ 817 | "(1149780, 3)" 818 | ] 819 | }, 820 | "execution_count": 19, 821 | "metadata": {}, 822 | "output_type": "execute_result" 823 | } 824 | ], 825 | "source": [ 826 | "ratings.shape" 827 | ] 828 | }, 829 | { 830 | "cell_type": "code", 831 | "execution_count": 20, 832 | "metadata": {}, 833 | "outputs": [], 834 | "source": [ 835 | "# Lets remane some wierd columns name\n", 836 | "ratings.rename(columns={\"User-ID\":'user_id',\n", 837 | " 'Book-Rating':'rating'},inplace=True)" 838 | ] 839 | }, 840 | { 841 | "cell_type": "code", 842 | "execution_count": 21, 843 | "metadata": {}, 844 | "outputs": [ 845 | { 846 | "data": { 847 | "text/html": [ 848 | "
\n", 849 | "\n", 862 | "\n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | "
user_idISBNrating
0276725034545104X0
127672601550612245
\n", 886 | "
" 887 | ], 888 | "text/plain": [ 889 | " user_id ISBN rating\n", 890 | "0 276725 034545104X 0\n", 891 | "1 276726 0155061224 5" 892 | ] 893 | }, 894 | "execution_count": 21, 895 | "metadata": {}, 896 | "output_type": "execute_result" 897 | } 898 | ], 899 | "source": [ 900 | "ratings.head(2)" 901 | ] 902 | }, 903 | { 904 | "cell_type": "markdown", 905 | "metadata": {}, 906 | "source": [ 907 | "### Conclution:\n", 908 | "Now we have 3 dataframes\n", 909 | "- books\n", 910 | "- users\n", 911 | "- ratings" 912 | ] 913 | }, 914 | { 915 | "cell_type": "code", 916 | "execution_count": 22, 917 | "metadata": {}, 918 | "outputs": [ 919 | { 920 | "name": "stdout", 921 | "output_type": "stream", 922 | "text": [ 923 | "(271360, 6)\n", 924 | "(278858, 3)\n", 925 | "(1149780, 3)\n" 926 | ] 927 | } 928 | ], 929 | "source": [ 930 | "print(books.shape, users.shape, ratings.shape, sep='\\n')\n", 931 | "\n" 932 | ] 933 | }, 934 | { 935 | "cell_type": "code", 936 | "execution_count": 23, 937 | "metadata": {}, 938 | "outputs": [ 939 | { 940 | "data": { 941 | "text/plain": [ 942 | "11676 13602\n", 943 | "198711 7550\n", 944 | "153662 6109\n", 945 | "98391 5891\n", 946 | "35859 5850\n", 947 | " ... \n", 948 | "158698 1\n", 949 | "17920 1\n", 950 | "277135 1\n", 951 | "275086 1\n", 952 | "187812 1\n", 953 | "Name: user_id, Length: 105283, dtype: int64" 954 | ] 955 | }, 956 | "execution_count": 23, 957 | "metadata": {}, 958 | "output_type": "execute_result" 959 | } 960 | ], 961 | "source": [ 962 | "ratings['user_id'].value_counts()" 963 | ] 964 | }, 965 | { 966 | "cell_type": "code", 967 | "execution_count": 24, 968 | "metadata": {}, 969 | "outputs": [ 970 | { 971 | "data": { 972 | "text/plain": [ 973 | "(105283,)" 974 | ] 975 | }, 976 | "execution_count": 24, 977 | "metadata": {}, 978 | "output_type": "execute_result" 979 | } 980 | ], 981 | "source": [ 982 | "ratings['user_id'].value_counts().shape" 983 | ] 984 | }, 985 | { 986 | "cell_type": "code", 987 | "execution_count": 25, 988 | "metadata": {}, 989 | "outputs": [ 990 | { 991 | "data": { 992 | "text/plain": [ 993 | "(105283,)" 994 | ] 995 | }, 996 | "execution_count": 25, 997 | "metadata": {}, 998 | "output_type": "execute_result" 999 | } 1000 | ], 1001 | "source": [ 1002 | "ratings['user_id'].unique().shape" 1003 | ] 1004 | }, 1005 | { 1006 | "cell_type": "code", 1007 | "execution_count": 26, 1008 | "metadata": {}, 1009 | "outputs": [], 1010 | "source": [ 1011 | "# Lets store users who had at least rated more than 200 books\n", 1012 | "x = ratings['user_id'].value_counts() > 200" 1013 | ] 1014 | }, 1015 | { 1016 | "cell_type": "code", 1017 | "execution_count": 27, 1018 | "metadata": {}, 1019 | "outputs": [ 1020 | { 1021 | "data": { 1022 | "text/plain": [ 1023 | "(899,)" 1024 | ] 1025 | }, 1026 | "execution_count": 27, 1027 | "metadata": {}, 1028 | "output_type": "execute_result" 1029 | } 1030 | ], 1031 | "source": [ 1032 | "x[x].shape" 1033 | ] 1034 | }, 1035 | { 1036 | "cell_type": "code", 1037 | "execution_count": 28, 1038 | "metadata": {}, 1039 | "outputs": [], 1040 | "source": [ 1041 | "y= x[x].index" 1042 | ] 1043 | }, 1044 | { 1045 | "cell_type": "code", 1046 | "execution_count": 29, 1047 | "metadata": {}, 1048 | "outputs": [ 1049 | { 1050 | "data": { 1051 | "text/plain": [ 1052 | "Int64Index([ 11676, 198711, 153662, 98391, 35859, 212898, 278418, 76352,\n", 1053 | " 110973, 235105,\n", 1054 | " ...\n", 1055 | " 260183, 155916, 44296, 73681, 59727, 28634, 188951, 9856,\n", 1056 | " 268622, 274808],\n", 1057 | " dtype='int64', length=899)" 1058 | ] 1059 | }, 1060 | "execution_count": 29, 1061 | "metadata": {}, 1062 | "output_type": "execute_result" 1063 | } 1064 | ], 1065 | "source": [ 1066 | "y" 1067 | ] 1068 | }, 1069 | { 1070 | "cell_type": "code", 1071 | "execution_count": 30, 1072 | "metadata": {}, 1073 | "outputs": [], 1074 | "source": [ 1075 | "ratings = ratings[ratings['user_id'].isin(y)]" 1076 | ] 1077 | }, 1078 | { 1079 | "cell_type": "code", 1080 | "execution_count": 31, 1081 | "metadata": {}, 1082 | "outputs": [ 1083 | { 1084 | "data": { 1085 | "text/html": [ 1086 | "
\n", 1087 | "\n", 1100 | "\n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | "
user_idISBNrating
1456277427002542730X10
145727742700262174570
1458277427003008685X8
145927742700306153210
146027742700600020500
\n", 1142 | "
" 1143 | ], 1144 | "text/plain": [ 1145 | " user_id ISBN rating\n", 1146 | "1456 277427 002542730X 10\n", 1147 | "1457 277427 0026217457 0\n", 1148 | "1458 277427 003008685X 8\n", 1149 | "1459 277427 0030615321 0\n", 1150 | "1460 277427 0060002050 0" 1151 | ] 1152 | }, 1153 | "execution_count": 31, 1154 | "metadata": {}, 1155 | "output_type": "execute_result" 1156 | } 1157 | ], 1158 | "source": [ 1159 | "ratings.head()" 1160 | ] 1161 | }, 1162 | { 1163 | "cell_type": "code", 1164 | "execution_count": 32, 1165 | "metadata": {}, 1166 | "outputs": [ 1167 | { 1168 | "data": { 1169 | "text/plain": [ 1170 | "(526356, 3)" 1171 | ] 1172 | }, 1173 | "execution_count": 32, 1174 | "metadata": {}, 1175 | "output_type": "execute_result" 1176 | } 1177 | ], 1178 | "source": [ 1179 | "ratings.shape" 1180 | ] 1181 | }, 1182 | { 1183 | "cell_type": "code", 1184 | "execution_count": 33, 1185 | "metadata": {}, 1186 | "outputs": [], 1187 | "source": [ 1188 | "# Now join ratings with books\n", 1189 | "\n", 1190 | "ratings_with_books = ratings.merge(books, on='ISBN')" 1191 | ] 1192 | }, 1193 | { 1194 | "cell_type": "code", 1195 | "execution_count": 34, 1196 | "metadata": {}, 1197 | "outputs": [ 1198 | { 1199 | "data": { 1200 | "text/html": [ 1201 | "
\n", 1202 | "\n", 1215 | "\n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | "
user_idISBNratingtitleauthoryearpublisherimage_url
0277427002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...
13363002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...
211676002542730X6Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...
312538002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...
413552002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...
\n", 1287 | "
" 1288 | ], 1289 | "text/plain": [ 1290 | " user_id ISBN rating \\\n", 1291 | "0 277427 002542730X 10 \n", 1292 | "1 3363 002542730X 0 \n", 1293 | "2 11676 002542730X 6 \n", 1294 | "3 12538 002542730X 10 \n", 1295 | "4 13552 002542730X 0 \n", 1296 | "\n", 1297 | " title author year \\\n", 1298 | "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1299 | "1 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1300 | "2 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1301 | "3 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1302 | "4 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1303 | "\n", 1304 | " publisher \\\n", 1305 | "0 John Wiley & Sons Inc \n", 1306 | "1 John Wiley & Sons Inc \n", 1307 | "2 John Wiley & Sons Inc \n", 1308 | "3 John Wiley & Sons Inc \n", 1309 | "4 John Wiley & Sons Inc \n", 1310 | "\n", 1311 | " image_url \n", 1312 | "0 http://images.amazon.com/images/P/002542730X.0... \n", 1313 | "1 http://images.amazon.com/images/P/002542730X.0... \n", 1314 | "2 http://images.amazon.com/images/P/002542730X.0... \n", 1315 | "3 http://images.amazon.com/images/P/002542730X.0... \n", 1316 | "4 http://images.amazon.com/images/P/002542730X.0... " 1317 | ] 1318 | }, 1319 | "execution_count": 34, 1320 | "metadata": {}, 1321 | "output_type": "execute_result" 1322 | } 1323 | ], 1324 | "source": [ 1325 | "ratings_with_books.head()" 1326 | ] 1327 | }, 1328 | { 1329 | "cell_type": "code", 1330 | "execution_count": 35, 1331 | "metadata": {}, 1332 | "outputs": [ 1333 | { 1334 | "data": { 1335 | "text/plain": [ 1336 | "(487671, 8)" 1337 | ] 1338 | }, 1339 | "execution_count": 35, 1340 | "metadata": {}, 1341 | "output_type": "execute_result" 1342 | } 1343 | ], 1344 | "source": [ 1345 | "ratings_with_books.shape" 1346 | ] 1347 | }, 1348 | { 1349 | "cell_type": "code", 1350 | "execution_count": 36, 1351 | "metadata": {}, 1352 | "outputs": [], 1353 | "source": [ 1354 | "number_rating = ratings_with_books.groupby('title')['rating'].count().reset_index()" 1355 | ] 1356 | }, 1357 | { 1358 | "cell_type": "code", 1359 | "execution_count": 37, 1360 | "metadata": {}, 1361 | "outputs": [ 1362 | { 1363 | "data": { 1364 | "text/html": [ 1365 | "
\n", 1366 | "\n", 1379 | "\n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | "
titlerating
0A Light in the Storm: The Civil War Diary of ...2
1Always Have Popsicles1
2Apple Magic (The Collector's series)1
3Beyond IBM: Leadership Marketing and Finance ...1
4Clifford Visita El Hospital (Clifford El Gran...1
\n", 1415 | "
" 1416 | ], 1417 | "text/plain": [ 1418 | " title rating\n", 1419 | "0 A Light in the Storm: The Civil War Diary of ... 2\n", 1420 | "1 Always Have Popsicles 1\n", 1421 | "2 Apple Magic (The Collector's series) 1\n", 1422 | "3 Beyond IBM: Leadership Marketing and Finance ... 1\n", 1423 | "4 Clifford Visita El Hospital (Clifford El Gran... 1" 1424 | ] 1425 | }, 1426 | "execution_count": 37, 1427 | "metadata": {}, 1428 | "output_type": "execute_result" 1429 | } 1430 | ], 1431 | "source": [ 1432 | "number_rating.head()" 1433 | ] 1434 | }, 1435 | { 1436 | "cell_type": "code", 1437 | "execution_count": 38, 1438 | "metadata": {}, 1439 | "outputs": [], 1440 | "source": [ 1441 | "number_rating.rename(columns={'rating':'num_of_rating'},inplace=True)" 1442 | ] 1443 | }, 1444 | { 1445 | "cell_type": "code", 1446 | "execution_count": 39, 1447 | "metadata": {}, 1448 | "outputs": [ 1449 | { 1450 | "data": { 1451 | "text/html": [ 1452 | "
\n", 1453 | "\n", 1466 | "\n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | "
titlenum_of_rating
0A Light in the Storm: The Civil War Diary of ...2
1Always Have Popsicles1
2Apple Magic (The Collector's series)1
3Beyond IBM: Leadership Marketing and Finance ...1
4Clifford Visita El Hospital (Clifford El Gran...1
\n", 1502 | "
" 1503 | ], 1504 | "text/plain": [ 1505 | " title num_of_rating\n", 1506 | "0 A Light in the Storm: The Civil War Diary of ... 2\n", 1507 | "1 Always Have Popsicles 1\n", 1508 | "2 Apple Magic (The Collector's series) 1\n", 1509 | "3 Beyond IBM: Leadership Marketing and Finance ... 1\n", 1510 | "4 Clifford Visita El Hospital (Clifford El Gran... 1" 1511 | ] 1512 | }, 1513 | "execution_count": 39, 1514 | "metadata": {}, 1515 | "output_type": "execute_result" 1516 | } 1517 | ], 1518 | "source": [ 1519 | "number_rating.head()" 1520 | ] 1521 | }, 1522 | { 1523 | "cell_type": "code", 1524 | "execution_count": 40, 1525 | "metadata": {}, 1526 | "outputs": [], 1527 | "source": [ 1528 | "final_rating = ratings_with_books.merge(number_rating, on='title')" 1529 | ] 1530 | }, 1531 | { 1532 | "cell_type": "code", 1533 | "execution_count": 41, 1534 | "metadata": {}, 1535 | "outputs": [ 1536 | { 1537 | "data": { 1538 | "text/html": [ 1539 | "
\n", 1540 | "\n", 1553 | "\n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | "
user_idISBNratingtitleauthoryearpublisherimage_urlnum_of_rating
0277427002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
13363002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
211676002542730X6Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
312538002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
413552002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
\n", 1631 | "
" 1632 | ], 1633 | "text/plain": [ 1634 | " user_id ISBN rating \\\n", 1635 | "0 277427 002542730X 10 \n", 1636 | "1 3363 002542730X 0 \n", 1637 | "2 11676 002542730X 6 \n", 1638 | "3 12538 002542730X 10 \n", 1639 | "4 13552 002542730X 0 \n", 1640 | "\n", 1641 | " title author year \\\n", 1642 | "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1643 | "1 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1644 | "2 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1645 | "3 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1646 | "4 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1647 | "\n", 1648 | " publisher \\\n", 1649 | "0 John Wiley & Sons Inc \n", 1650 | "1 John Wiley & Sons Inc \n", 1651 | "2 John Wiley & Sons Inc \n", 1652 | "3 John Wiley & Sons Inc \n", 1653 | "4 John Wiley & Sons Inc \n", 1654 | "\n", 1655 | " image_url num_of_rating \n", 1656 | "0 http://images.amazon.com/images/P/002542730X.0... 82 \n", 1657 | "1 http://images.amazon.com/images/P/002542730X.0... 82 \n", 1658 | "2 http://images.amazon.com/images/P/002542730X.0... 82 \n", 1659 | "3 http://images.amazon.com/images/P/002542730X.0... 82 \n", 1660 | "4 http://images.amazon.com/images/P/002542730X.0... 82 " 1661 | ] 1662 | }, 1663 | "execution_count": 41, 1664 | "metadata": {}, 1665 | "output_type": "execute_result" 1666 | } 1667 | ], 1668 | "source": [ 1669 | "final_rating.head()" 1670 | ] 1671 | }, 1672 | { 1673 | "cell_type": "code", 1674 | "execution_count": 42, 1675 | "metadata": {}, 1676 | "outputs": [ 1677 | { 1678 | "data": { 1679 | "text/plain": [ 1680 | "(487671, 9)" 1681 | ] 1682 | }, 1683 | "execution_count": 42, 1684 | "metadata": {}, 1685 | "output_type": "execute_result" 1686 | } 1687 | ], 1688 | "source": [ 1689 | "final_rating.shape" 1690 | ] 1691 | }, 1692 | { 1693 | "cell_type": "code", 1694 | "execution_count": 43, 1695 | "metadata": {}, 1696 | "outputs": [], 1697 | "source": [ 1698 | "# Lets take those books which got at least 50 rating of user\n", 1699 | "\n", 1700 | "final_rating = final_rating[final_rating['num_of_rating'] >= 50]" 1701 | ] 1702 | }, 1703 | { 1704 | "cell_type": "code", 1705 | "execution_count": 44, 1706 | "metadata": {}, 1707 | "outputs": [ 1708 | { 1709 | "data": { 1710 | "text/html": [ 1711 | "
\n", 1712 | "\n", 1725 | "\n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | " \n", 1788 | " \n", 1789 | " \n", 1790 | " \n", 1791 | " \n", 1792 | " \n", 1793 | " \n", 1794 | " \n", 1795 | " \n", 1796 | " \n", 1797 | " \n", 1798 | " \n", 1799 | " \n", 1800 | " \n", 1801 | " \n", 1802 | "
user_idISBNratingtitleauthoryearpublisherimage_urlnum_of_rating
0277427002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
13363002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
211676002542730X6Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
312538002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
413552002542730X0Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inchttp://images.amazon.com/images/P/002542730X.0...82
\n", 1803 | "
" 1804 | ], 1805 | "text/plain": [ 1806 | " user_id ISBN rating \\\n", 1807 | "0 277427 002542730X 10 \n", 1808 | "1 3363 002542730X 0 \n", 1809 | "2 11676 002542730X 6 \n", 1810 | "3 12538 002542730X 10 \n", 1811 | "4 13552 002542730X 0 \n", 1812 | "\n", 1813 | " title author year \\\n", 1814 | "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1815 | "1 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1816 | "2 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1817 | "3 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1818 | "4 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n", 1819 | "\n", 1820 | " publisher \\\n", 1821 | "0 John Wiley & Sons Inc \n", 1822 | "1 John Wiley & Sons Inc \n", 1823 | "2 John Wiley & Sons Inc \n", 1824 | "3 John Wiley & Sons Inc \n", 1825 | "4 John Wiley & Sons Inc \n", 1826 | "\n", 1827 | " image_url num_of_rating \n", 1828 | "0 http://images.amazon.com/images/P/002542730X.0... 82 \n", 1829 | "1 http://images.amazon.com/images/P/002542730X.0... 82 \n", 1830 | "2 http://images.amazon.com/images/P/002542730X.0... 82 \n", 1831 | "3 http://images.amazon.com/images/P/002542730X.0... 82 \n", 1832 | "4 http://images.amazon.com/images/P/002542730X.0... 82 " 1833 | ] 1834 | }, 1835 | "execution_count": 44, 1836 | "metadata": {}, 1837 | "output_type": "execute_result" 1838 | } 1839 | ], 1840 | "source": [ 1841 | "final_rating.head()" 1842 | ] 1843 | }, 1844 | { 1845 | "cell_type": "code", 1846 | "execution_count": 45, 1847 | "metadata": {}, 1848 | "outputs": [ 1849 | { 1850 | "data": { 1851 | "text/plain": [ 1852 | "(61853, 9)" 1853 | ] 1854 | }, 1855 | "execution_count": 45, 1856 | "metadata": {}, 1857 | "output_type": "execute_result" 1858 | } 1859 | ], 1860 | "source": [ 1861 | "final_rating.shape" 1862 | ] 1863 | }, 1864 | { 1865 | "cell_type": "code", 1866 | "execution_count": 46, 1867 | "metadata": {}, 1868 | "outputs": [], 1869 | "source": [ 1870 | "# lets drop the duplicates\n", 1871 | "final_rating.drop_duplicates(['user_id','title'],inplace=True)" 1872 | ] 1873 | }, 1874 | { 1875 | "cell_type": "code", 1876 | "execution_count": 47, 1877 | "metadata": {}, 1878 | "outputs": [ 1879 | { 1880 | "data": { 1881 | "text/plain": [ 1882 | "(59850, 9)" 1883 | ] 1884 | }, 1885 | "execution_count": 47, 1886 | "metadata": {}, 1887 | "output_type": "execute_result" 1888 | } 1889 | ], 1890 | "source": [ 1891 | "final_rating.shape" 1892 | ] 1893 | }, 1894 | { 1895 | "cell_type": "code", 1896 | "execution_count": 48, 1897 | "metadata": {}, 1898 | "outputs": [], 1899 | "source": [ 1900 | "# Lets create a pivot table\n", 1901 | "book_pivot = final_rating.pivot_table(columns='user_id', index='title', values= 'rating')" 1902 | ] 1903 | }, 1904 | { 1905 | "cell_type": "code", 1906 | "execution_count": 49, 1907 | "metadata": {}, 1908 | "outputs": [ 1909 | { 1910 | "data": { 1911 | "text/html": [ 1912 | "
\n", 1913 | "\n", 1926 | "\n", 1927 | " \n", 1928 | " \n", 1929 | " \n", 1930 | " \n", 1931 | " \n", 1932 | " \n", 1933 | " \n", 1934 | " \n", 1935 | " \n", 1936 | " \n", 1937 | " \n", 1938 | " \n", 1939 | " \n", 1940 | " \n", 1941 | " \n", 1942 | " \n", 1943 | " \n", 1944 | " \n", 1945 | " \n", 1946 | " \n", 1947 | " \n", 1948 | " \n", 1949 | " \n", 1950 | " \n", 1951 | " \n", 1952 | " \n", 1953 | " \n", 1954 | " \n", 1955 | " \n", 1956 | " \n", 1957 | " \n", 1958 | " \n", 1959 | " \n", 1960 | " \n", 1961 | " \n", 1962 | " \n", 1963 | " \n", 1964 | " \n", 1965 | " \n", 1966 | " \n", 1967 | " \n", 1968 | " \n", 1969 | " \n", 1970 | " \n", 1971 | " \n", 1972 | " \n", 1973 | " \n", 1974 | " \n", 1975 | " \n", 1976 | " \n", 1977 | " \n", 1978 | " \n", 1979 | " \n", 1980 | " \n", 1981 | " \n", 1982 | " \n", 1983 | " \n", 1984 | " \n", 1985 | " \n", 1986 | " \n", 1987 | " \n", 1988 | " \n", 1989 | " \n", 1990 | " \n", 1991 | " \n", 1992 | " \n", 1993 | " \n", 1994 | " \n", 1995 | " \n", 1996 | " \n", 1997 | " \n", 1998 | " \n", 1999 | " \n", 2000 | " \n", 2001 | " \n", 2002 | " \n", 2003 | " \n", 2004 | " \n", 2005 | " \n", 2006 | " \n", 2007 | " \n", 2008 | " \n", 2009 | " \n", 2010 | " \n", 2011 | " \n", 2012 | " \n", 2013 | " \n", 2014 | " \n", 2015 | " \n", 2016 | " \n", 2017 | " \n", 2018 | " \n", 2019 | " \n", 2020 | " \n", 2021 | " \n", 2022 | " \n", 2023 | " \n", 2024 | " \n", 2025 | " \n", 2026 | " \n", 2027 | " \n", 2028 | " \n", 2029 | " \n", 2030 | " \n", 2031 | " \n", 2032 | " \n", 2033 | " \n", 2034 | " \n", 2035 | " \n", 2036 | " \n", 2037 | " \n", 2038 | " \n", 2039 | " \n", 2040 | " \n", 2041 | " \n", 2042 | " \n", 2043 | " \n", 2044 | " \n", 2045 | " \n", 2046 | " \n", 2047 | " \n", 2048 | " \n", 2049 | " \n", 2050 | " \n", 2051 | " \n", 2052 | " \n", 2053 | " \n", 2054 | " \n", 2055 | " \n", 2056 | " \n", 2057 | " \n", 2058 | " \n", 2059 | " \n", 2060 | " \n", 2061 | " \n", 2062 | " \n", 2063 | " \n", 2064 | " \n", 2065 | " \n", 2066 | " \n", 2067 | " \n", 2068 | " \n", 2069 | " \n", 2070 | " \n", 2071 | " \n", 2072 | " \n", 2073 | " \n", 2074 | " \n", 2075 | " \n", 2076 | " \n", 2077 | " \n", 2078 | " \n", 2079 | " \n", 2080 | " \n", 2081 | " \n", 2082 | " \n", 2083 | " \n", 2084 | " \n", 2085 | " \n", 2086 | " \n", 2087 | " \n", 2088 | " \n", 2089 | " \n", 2090 | " \n", 2091 | " \n", 2092 | " \n", 2093 | " \n", 2094 | " \n", 2095 | " \n", 2096 | " \n", 2097 | " \n", 2098 | " \n", 2099 | " \n", 2100 | " \n", 2101 | " \n", 2102 | " \n", 2103 | " \n", 2104 | " \n", 2105 | " \n", 2106 | " \n", 2107 | " \n", 2108 | " \n", 2109 | " \n", 2110 | " \n", 2111 | " \n", 2112 | " \n", 2113 | " \n", 2114 | " \n", 2115 | " \n", 2116 | " \n", 2117 | " \n", 2118 | " \n", 2119 | " \n", 2120 | " \n", 2121 | " \n", 2122 | " \n", 2123 | " \n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | " \n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | " \n", 2157 | " \n", 2158 | " \n", 2159 | " \n", 2160 | " \n", 2161 | " \n", 2162 | " \n", 2163 | " \n", 2164 | " \n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | " \n", 2214 | " \n", 2215 | " \n", 2216 | " \n", 2217 | " \n", 2218 | " \n", 2219 | " \n", 2220 | " \n", 2221 | " \n", 2222 | " \n", 2223 | " \n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | " \n", 2230 | " \n", 2231 | " \n", 2232 | " \n", 2233 | " \n", 2234 | " \n", 2235 | " \n", 2236 | " \n", 2237 | " \n", 2238 | " \n", 2239 | " \n", 2240 | " \n", 2241 | " \n", 2242 | " \n", 2243 | "
user_id254227627662977336337574017438562426251...274004274061274301274308274808275970277427277478277639278418
title
19849.0NaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaN0.0NaNNaNNaNNaN
1st to Die: A NovelNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2nd ChanceNaN10.0NaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaN0.0NaNNaNNaNNaN0.0NaN
4 BlondesNaNNaNNaNNaNNaNNaNNaNNaNNaN0.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
84 Charing Cross RoadNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaN10.0NaNNaNNaNNaN
..................................................................
Year of WondersNaNNaNNaN7.0NaNNaNNaNNaN7.0NaN...NaNNaNNaNNaNNaN0.0NaNNaNNaNNaN
You Belong To MeNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Zen and the Art of Motorcycle Maintenance: An Inquiry into ValuesNaNNaNNaNNaN0.0NaNNaNNaNNaN0.0...NaNNaNNaNNaNNaN0.0NaNNaNNaNNaN
ZoyaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\\O\\\" Is for Outlaw\"NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN8.0NaNNaNNaNNaNNaNNaNNaN
\n", 2244 | "

742 rows × 888 columns

\n", 2245 | "
" 2246 | ], 2247 | "text/plain": [ 2248 | "user_id 254 2276 2766 \\\n", 2249 | "title \n", 2250 | "1984 9.0 NaN NaN \n", 2251 | "1st to Die: A Novel NaN NaN NaN \n", 2252 | "2nd Chance NaN 10.0 NaN \n", 2253 | "4 Blondes NaN NaN NaN \n", 2254 | "84 Charing Cross Road NaN NaN NaN \n", 2255 | "... ... ... ... \n", 2256 | "Year of Wonders NaN NaN NaN \n", 2257 | "You Belong To Me NaN NaN NaN \n", 2258 | "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", 2259 | "Zoya NaN NaN NaN \n", 2260 | "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", 2261 | "\n", 2262 | "user_id 2977 3363 3757 \\\n", 2263 | "title \n", 2264 | "1984 NaN NaN NaN \n", 2265 | "1st to Die: A Novel NaN NaN NaN \n", 2266 | "2nd Chance NaN NaN NaN \n", 2267 | "4 Blondes NaN NaN NaN \n", 2268 | "84 Charing Cross Road NaN NaN NaN \n", 2269 | "... ... ... ... \n", 2270 | "Year of Wonders 7.0 NaN NaN \n", 2271 | "You Belong To Me NaN NaN NaN \n", 2272 | "Zen and the Art of Motorcycle Maintenance: An I... NaN 0.0 NaN \n", 2273 | "Zoya NaN NaN NaN \n", 2274 | "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", 2275 | "\n", 2276 | "user_id 4017 4385 6242 \\\n", 2277 | "title \n", 2278 | "1984 NaN NaN NaN \n", 2279 | "1st to Die: A Novel NaN NaN NaN \n", 2280 | "2nd Chance NaN NaN NaN \n", 2281 | "4 Blondes NaN NaN NaN \n", 2282 | "84 Charing Cross Road NaN NaN NaN \n", 2283 | "... ... ... ... \n", 2284 | "Year of Wonders NaN NaN 7.0 \n", 2285 | "You Belong To Me NaN NaN NaN \n", 2286 | "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", 2287 | "Zoya NaN NaN NaN \n", 2288 | "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", 2289 | "\n", 2290 | "user_id 6251 ... 274004 \\\n", 2291 | "title ... \n", 2292 | "1984 NaN ... NaN \n", 2293 | "1st to Die: A Novel NaN ... NaN \n", 2294 | "2nd Chance NaN ... NaN \n", 2295 | "4 Blondes 0.0 ... NaN \n", 2296 | "84 Charing Cross Road NaN ... NaN \n", 2297 | "... ... ... ... \n", 2298 | "Year of Wonders NaN ... NaN \n", 2299 | "You Belong To Me NaN ... NaN \n", 2300 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 ... NaN \n", 2301 | "Zoya NaN ... NaN \n", 2302 | "\\O\\\" Is for Outlaw\" NaN ... NaN \n", 2303 | "\n", 2304 | "user_id 274061 274301 274308 \\\n", 2305 | "title \n", 2306 | "1984 NaN NaN NaN \n", 2307 | "1st to Die: A Novel NaN NaN NaN \n", 2308 | "2nd Chance NaN NaN 0.0 \n", 2309 | "4 Blondes NaN NaN NaN \n", 2310 | "84 Charing Cross Road NaN NaN NaN \n", 2311 | "... ... ... ... \n", 2312 | "Year of Wonders NaN NaN NaN \n", 2313 | "You Belong To Me NaN NaN NaN \n", 2314 | "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", 2315 | "Zoya NaN NaN NaN \n", 2316 | "\\O\\\" Is for Outlaw\" NaN 8.0 NaN \n", 2317 | "\n", 2318 | "user_id 274808 275970 277427 \\\n", 2319 | "title \n", 2320 | "1984 NaN 0.0 NaN \n", 2321 | "1st to Die: A Novel NaN NaN NaN \n", 2322 | "2nd Chance NaN NaN NaN \n", 2323 | "4 Blondes NaN NaN NaN \n", 2324 | "84 Charing Cross Road NaN 10.0 NaN \n", 2325 | "... ... ... ... \n", 2326 | "Year of Wonders NaN 0.0 NaN \n", 2327 | "You Belong To Me NaN NaN NaN \n", 2328 | "Zen and the Art of Motorcycle Maintenance: An I... NaN 0.0 NaN \n", 2329 | "Zoya NaN NaN NaN \n", 2330 | "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", 2331 | "\n", 2332 | "user_id 277478 277639 278418 \n", 2333 | "title \n", 2334 | "1984 NaN NaN NaN \n", 2335 | "1st to Die: A Novel NaN NaN NaN \n", 2336 | "2nd Chance NaN 0.0 NaN \n", 2337 | "4 Blondes NaN NaN NaN \n", 2338 | "84 Charing Cross Road NaN NaN NaN \n", 2339 | "... ... ... ... \n", 2340 | "Year of Wonders NaN NaN NaN \n", 2341 | "You Belong To Me NaN NaN NaN \n", 2342 | "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", 2343 | "Zoya NaN NaN NaN \n", 2344 | "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", 2345 | "\n", 2346 | "[742 rows x 888 columns]" 2347 | ] 2348 | }, 2349 | "execution_count": 49, 2350 | "metadata": {}, 2351 | "output_type": "execute_result" 2352 | } 2353 | ], 2354 | "source": [ 2355 | "book_pivot" 2356 | ] 2357 | }, 2358 | { 2359 | "cell_type": "code", 2360 | "execution_count": 50, 2361 | "metadata": {}, 2362 | "outputs": [ 2363 | { 2364 | "data": { 2365 | "text/plain": [ 2366 | "(742, 888)" 2367 | ] 2368 | }, 2369 | "execution_count": 50, 2370 | "metadata": {}, 2371 | "output_type": "execute_result" 2372 | } 2373 | ], 2374 | "source": [ 2375 | "book_pivot.shape" 2376 | ] 2377 | }, 2378 | { 2379 | "cell_type": "code", 2380 | "execution_count": 51, 2381 | "metadata": {}, 2382 | "outputs": [], 2383 | "source": [ 2384 | "book_pivot.fillna(0, inplace=True)" 2385 | ] 2386 | }, 2387 | { 2388 | "cell_type": "code", 2389 | "execution_count": 52, 2390 | "metadata": {}, 2391 | "outputs": [ 2392 | { 2393 | "data": { 2394 | "text/html": [ 2395 | "
\n", 2396 | "\n", 2409 | "\n", 2410 | " \n", 2411 | " \n", 2412 | " \n", 2413 | " \n", 2414 | " \n", 2415 | " \n", 2416 | " \n", 2417 | " \n", 2418 | " \n", 2419 | " \n", 2420 | " \n", 2421 | " \n", 2422 | " \n", 2423 | " \n", 2424 | " \n", 2425 | " \n", 2426 | " \n", 2427 | " \n", 2428 | " \n", 2429 | " \n", 2430 | " \n", 2431 | " \n", 2432 | " \n", 2433 | " \n", 2434 | " \n", 2435 | " \n", 2436 | " \n", 2437 | " \n", 2438 | " \n", 2439 | " \n", 2440 | " \n", 2441 | " \n", 2442 | " \n", 2443 | " \n", 2444 | " \n", 2445 | " \n", 2446 | " \n", 2447 | " \n", 2448 | " \n", 2449 | " \n", 2450 | " \n", 2451 | " \n", 2452 | " \n", 2453 | " \n", 2454 | " \n", 2455 | " \n", 2456 | " \n", 2457 | " \n", 2458 | " \n", 2459 | " \n", 2460 | " \n", 2461 | " \n", 2462 | " \n", 2463 | " \n", 2464 | " \n", 2465 | " \n", 2466 | " \n", 2467 | " \n", 2468 | " \n", 2469 | " \n", 2470 | " \n", 2471 | " \n", 2472 | " \n", 2473 | " \n", 2474 | " \n", 2475 | " \n", 2476 | " \n", 2477 | " \n", 2478 | " \n", 2479 | " \n", 2480 | " \n", 2481 | " \n", 2482 | " \n", 2483 | " \n", 2484 | " \n", 2485 | " \n", 2486 | " \n", 2487 | " \n", 2488 | " \n", 2489 | " \n", 2490 | " \n", 2491 | " \n", 2492 | " \n", 2493 | " \n", 2494 | " \n", 2495 | " \n", 2496 | " \n", 2497 | " \n", 2498 | " \n", 2499 | " \n", 2500 | " \n", 2501 | " \n", 2502 | " \n", 2503 | " \n", 2504 | " \n", 2505 | " \n", 2506 | " \n", 2507 | " \n", 2508 | " \n", 2509 | " \n", 2510 | " \n", 2511 | " \n", 2512 | " \n", 2513 | " \n", 2514 | " \n", 2515 | " \n", 2516 | " \n", 2517 | " \n", 2518 | " \n", 2519 | " \n", 2520 | " \n", 2521 | " \n", 2522 | " \n", 2523 | " \n", 2524 | " \n", 2525 | " \n", 2526 | " \n", 2527 | " \n", 2528 | " \n", 2529 | " \n", 2530 | " \n", 2531 | " \n", 2532 | " \n", 2533 | " \n", 2534 | " \n", 2535 | " \n", 2536 | " \n", 2537 | " \n", 2538 | " \n", 2539 | " \n", 2540 | " \n", 2541 | " \n", 2542 | " \n", 2543 | " \n", 2544 | " \n", 2545 | " \n", 2546 | " \n", 2547 | " \n", 2548 | " \n", 2549 | " \n", 2550 | " \n", 2551 | " \n", 2552 | " \n", 2553 | " \n", 2554 | " \n", 2555 | " \n", 2556 | " \n", 2557 | " \n", 2558 | " \n", 2559 | " \n", 2560 | " \n", 2561 | " \n", 2562 | " \n", 2563 | " \n", 2564 | " \n", 2565 | " \n", 2566 | " \n", 2567 | " \n", 2568 | " \n", 2569 | " \n", 2570 | " \n", 2571 | " \n", 2572 | " \n", 2573 | " \n", 2574 | " \n", 2575 | " \n", 2576 | " \n", 2577 | " \n", 2578 | " \n", 2579 | " \n", 2580 | " \n", 2581 | " \n", 2582 | " \n", 2583 | " \n", 2584 | " \n", 2585 | " \n", 2586 | " \n", 2587 | " \n", 2588 | " \n", 2589 | " \n", 2590 | " \n", 2591 | " \n", 2592 | " \n", 2593 | " \n", 2594 | " \n", 2595 | " \n", 2596 | " \n", 2597 | " \n", 2598 | " \n", 2599 | " \n", 2600 | " \n", 2601 | " \n", 2602 | " \n", 2603 | " \n", 2604 | " \n", 2605 | " \n", 2606 | " \n", 2607 | " \n", 2608 | " \n", 2609 | " \n", 2610 | " \n", 2611 | " \n", 2612 | " \n", 2613 | " \n", 2614 | " \n", 2615 | " \n", 2616 | " \n", 2617 | " \n", 2618 | " \n", 2619 | " \n", 2620 | " \n", 2621 | " \n", 2622 | " \n", 2623 | " \n", 2624 | " \n", 2625 | " \n", 2626 | " \n", 2627 | " \n", 2628 | " \n", 2629 | " \n", 2630 | " \n", 2631 | " \n", 2632 | " \n", 2633 | " \n", 2634 | " \n", 2635 | " \n", 2636 | " \n", 2637 | " \n", 2638 | " \n", 2639 | " \n", 2640 | " \n", 2641 | " \n", 2642 | " \n", 2643 | " \n", 2644 | " \n", 2645 | " \n", 2646 | " \n", 2647 | " \n", 2648 | " \n", 2649 | " \n", 2650 | " \n", 2651 | " \n", 2652 | " \n", 2653 | " \n", 2654 | " \n", 2655 | " \n", 2656 | " \n", 2657 | " \n", 2658 | " \n", 2659 | " \n", 2660 | " \n", 2661 | " \n", 2662 | " \n", 2663 | " \n", 2664 | " \n", 2665 | " \n", 2666 | " \n", 2667 | " \n", 2668 | " \n", 2669 | " \n", 2670 | " \n", 2671 | " \n", 2672 | " \n", 2673 | " \n", 2674 | " \n", 2675 | " \n", 2676 | " \n", 2677 | " \n", 2678 | " \n", 2679 | " \n", 2680 | " \n", 2681 | " \n", 2682 | " \n", 2683 | " \n", 2684 | " \n", 2685 | " \n", 2686 | " \n", 2687 | " \n", 2688 | " \n", 2689 | " \n", 2690 | " \n", 2691 | " \n", 2692 | " \n", 2693 | " \n", 2694 | " \n", 2695 | " \n", 2696 | " \n", 2697 | " \n", 2698 | " \n", 2699 | " \n", 2700 | " \n", 2701 | " \n", 2702 | " \n", 2703 | " \n", 2704 | " \n", 2705 | " \n", 2706 | " \n", 2707 | " \n", 2708 | " \n", 2709 | " \n", 2710 | " \n", 2711 | " \n", 2712 | " \n", 2713 | " \n", 2714 | " \n", 2715 | " \n", 2716 | " \n", 2717 | " \n", 2718 | " \n", 2719 | " \n", 2720 | " \n", 2721 | " \n", 2722 | " \n", 2723 | " \n", 2724 | " \n", 2725 | " \n", 2726 | "
user_id254227627662977336337574017438562426251...274004274061274301274308274808275970277427277478277639278418
title
19849.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
1st to Die: A Novel0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
2nd Chance0.010.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
4 Blondes0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
84 Charing Cross Road0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.010.00.00.00.00.0
..................................................................
Year of Wonders0.00.00.07.00.00.00.00.07.00.0...0.00.00.00.00.00.00.00.00.00.0
You Belong To Me0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
Zoya0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\\O\\\" Is for Outlaw\"0.00.00.00.00.00.00.00.00.00.0...0.00.08.00.00.00.00.00.00.00.0
\n", 2727 | "

742 rows × 888 columns

\n", 2728 | "
" 2729 | ], 2730 | "text/plain": [ 2731 | "user_id 254 2276 2766 \\\n", 2732 | "title \n", 2733 | "1984 9.0 0.0 0.0 \n", 2734 | "1st to Die: A Novel 0.0 0.0 0.0 \n", 2735 | "2nd Chance 0.0 10.0 0.0 \n", 2736 | "4 Blondes 0.0 0.0 0.0 \n", 2737 | "84 Charing Cross Road 0.0 0.0 0.0 \n", 2738 | "... ... ... ... \n", 2739 | "Year of Wonders 0.0 0.0 0.0 \n", 2740 | "You Belong To Me 0.0 0.0 0.0 \n", 2741 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", 2742 | "Zoya 0.0 0.0 0.0 \n", 2743 | "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", 2744 | "\n", 2745 | "user_id 2977 3363 3757 \\\n", 2746 | "title \n", 2747 | "1984 0.0 0.0 0.0 \n", 2748 | "1st to Die: A Novel 0.0 0.0 0.0 \n", 2749 | "2nd Chance 0.0 0.0 0.0 \n", 2750 | "4 Blondes 0.0 0.0 0.0 \n", 2751 | "84 Charing Cross Road 0.0 0.0 0.0 \n", 2752 | "... ... ... ... \n", 2753 | "Year of Wonders 7.0 0.0 0.0 \n", 2754 | "You Belong To Me 0.0 0.0 0.0 \n", 2755 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", 2756 | "Zoya 0.0 0.0 0.0 \n", 2757 | "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", 2758 | "\n", 2759 | "user_id 4017 4385 6242 \\\n", 2760 | "title \n", 2761 | "1984 0.0 0.0 0.0 \n", 2762 | "1st to Die: A Novel 0.0 0.0 0.0 \n", 2763 | "2nd Chance 0.0 0.0 0.0 \n", 2764 | "4 Blondes 0.0 0.0 0.0 \n", 2765 | "84 Charing Cross Road 0.0 0.0 0.0 \n", 2766 | "... ... ... ... \n", 2767 | "Year of Wonders 0.0 0.0 7.0 \n", 2768 | "You Belong To Me 0.0 0.0 0.0 \n", 2769 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", 2770 | "Zoya 0.0 0.0 0.0 \n", 2771 | "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", 2772 | "\n", 2773 | "user_id 6251 ... 274004 \\\n", 2774 | "title ... \n", 2775 | "1984 0.0 ... 0.0 \n", 2776 | "1st to Die: A Novel 0.0 ... 0.0 \n", 2777 | "2nd Chance 0.0 ... 0.0 \n", 2778 | "4 Blondes 0.0 ... 0.0 \n", 2779 | "84 Charing Cross Road 0.0 ... 0.0 \n", 2780 | "... ... ... ... \n", 2781 | "Year of Wonders 0.0 ... 0.0 \n", 2782 | "You Belong To Me 0.0 ... 0.0 \n", 2783 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 ... 0.0 \n", 2784 | "Zoya 0.0 ... 0.0 \n", 2785 | "\\O\\\" Is for Outlaw\" 0.0 ... 0.0 \n", 2786 | "\n", 2787 | "user_id 274061 274301 274308 \\\n", 2788 | "title \n", 2789 | "1984 0.0 0.0 0.0 \n", 2790 | "1st to Die: A Novel 0.0 0.0 0.0 \n", 2791 | "2nd Chance 0.0 0.0 0.0 \n", 2792 | "4 Blondes 0.0 0.0 0.0 \n", 2793 | "84 Charing Cross Road 0.0 0.0 0.0 \n", 2794 | "... ... ... ... \n", 2795 | "Year of Wonders 0.0 0.0 0.0 \n", 2796 | "You Belong To Me 0.0 0.0 0.0 \n", 2797 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", 2798 | "Zoya 0.0 0.0 0.0 \n", 2799 | "\\O\\\" Is for Outlaw\" 0.0 8.0 0.0 \n", 2800 | "\n", 2801 | "user_id 274808 275970 277427 \\\n", 2802 | "title \n", 2803 | "1984 0.0 0.0 0.0 \n", 2804 | "1st to Die: A Novel 0.0 0.0 0.0 \n", 2805 | "2nd Chance 0.0 0.0 0.0 \n", 2806 | "4 Blondes 0.0 0.0 0.0 \n", 2807 | "84 Charing Cross Road 0.0 10.0 0.0 \n", 2808 | "... ... ... ... \n", 2809 | "Year of Wonders 0.0 0.0 0.0 \n", 2810 | "You Belong To Me 0.0 0.0 0.0 \n", 2811 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", 2812 | "Zoya 0.0 0.0 0.0 \n", 2813 | "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", 2814 | "\n", 2815 | "user_id 277478 277639 278418 \n", 2816 | "title \n", 2817 | "1984 0.0 0.0 0.0 \n", 2818 | "1st to Die: A Novel 0.0 0.0 0.0 \n", 2819 | "2nd Chance 0.0 0.0 0.0 \n", 2820 | "4 Blondes 0.0 0.0 0.0 \n", 2821 | "84 Charing Cross Road 0.0 0.0 0.0 \n", 2822 | "... ... ... ... \n", 2823 | "Year of Wonders 0.0 0.0 0.0 \n", 2824 | "You Belong To Me 0.0 0.0 0.0 \n", 2825 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", 2826 | "Zoya 0.0 0.0 0.0 \n", 2827 | "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", 2828 | "\n", 2829 | "[742 rows x 888 columns]" 2830 | ] 2831 | }, 2832 | "execution_count": 52, 2833 | "metadata": {}, 2834 | "output_type": "execute_result" 2835 | } 2836 | ], 2837 | "source": [ 2838 | "book_pivot" 2839 | ] 2840 | }, 2841 | { 2842 | "cell_type": "markdown", 2843 | "metadata": {}, 2844 | "source": [ 2845 | "# Training Model" 2846 | ] 2847 | }, 2848 | { 2849 | "cell_type": "code", 2850 | "execution_count": 53, 2851 | "metadata": {}, 2852 | "outputs": [], 2853 | "source": [ 2854 | "from scipy.sparse import csr_matrix" 2855 | ] 2856 | }, 2857 | { 2858 | "cell_type": "code", 2859 | "execution_count": 54, 2860 | "metadata": {}, 2861 | "outputs": [], 2862 | "source": [ 2863 | "book_sparse = csr_matrix(book_pivot)" 2864 | ] 2865 | }, 2866 | { 2867 | "cell_type": "code", 2868 | "execution_count": 55, 2869 | "metadata": {}, 2870 | "outputs": [ 2871 | { 2872 | "data": { 2873 | "text/plain": [ 2874 | "scipy.sparse.csr.csr_matrix" 2875 | ] 2876 | }, 2877 | "execution_count": 55, 2878 | "metadata": {}, 2879 | "output_type": "execute_result" 2880 | } 2881 | ], 2882 | "source": [ 2883 | "type(book_sparse)" 2884 | ] 2885 | }, 2886 | { 2887 | "cell_type": "code", 2888 | "execution_count": 56, 2889 | "metadata": {}, 2890 | "outputs": [], 2891 | "source": [ 2892 | "# Now import our clustering algoritm which is Nearest Neighbors this is an unsupervised ml algo\n", 2893 | "from sklearn.neighbors import NearestNeighbors\n", 2894 | "model = NearestNeighbors(algorithm= 'brute')" 2895 | ] 2896 | }, 2897 | { 2898 | "cell_type": "code", 2899 | "execution_count": 57, 2900 | "metadata": {}, 2901 | "outputs": [ 2902 | { 2903 | "data": { 2904 | "text/plain": [ 2905 | "NearestNeighbors(algorithm='brute')" 2906 | ] 2907 | }, 2908 | "execution_count": 57, 2909 | "metadata": {}, 2910 | "output_type": "execute_result" 2911 | } 2912 | ], 2913 | "source": [ 2914 | "model.fit(book_sparse)" 2915 | ] 2916 | }, 2917 | { 2918 | "cell_type": "code", 2919 | "execution_count": 58, 2920 | "metadata": {}, 2921 | "outputs": [], 2922 | "source": [ 2923 | "distance, suggestion = model.kneighbors(book_pivot.iloc[237,:].values.reshape(1,-1), n_neighbors=6 )" 2924 | ] 2925 | }, 2926 | { 2927 | "cell_type": "code", 2928 | "execution_count": 59, 2929 | "metadata": {}, 2930 | "outputs": [ 2931 | { 2932 | "data": { 2933 | "text/plain": [ 2934 | "array([[ 0. , 68.78953409, 69.5413546 , 72.64296249, 76.83098333,\n", 2935 | " 77.28518616]])" 2936 | ] 2937 | }, 2938 | "execution_count": 59, 2939 | "metadata": {}, 2940 | "output_type": "execute_result" 2941 | } 2942 | ], 2943 | "source": [ 2944 | "distance" 2945 | ] 2946 | }, 2947 | { 2948 | "cell_type": "code", 2949 | "execution_count": 60, 2950 | "metadata": {}, 2951 | "outputs": [ 2952 | { 2953 | "data": { 2954 | "text/plain": [ 2955 | "array([[237, 240, 238, 241, 184, 536]], dtype=int64)" 2956 | ] 2957 | }, 2958 | "execution_count": 60, 2959 | "metadata": {}, 2960 | "output_type": "execute_result" 2961 | } 2962 | ], 2963 | "source": [ 2964 | "suggestion" 2965 | ] 2966 | }, 2967 | { 2968 | "cell_type": "code", 2969 | "execution_count": 61, 2970 | "metadata": {}, 2971 | "outputs": [ 2972 | { 2973 | "data": { 2974 | "text/plain": [ 2975 | "user_id\n", 2976 | "254 9.0\n", 2977 | "2276 0.0\n", 2978 | "2766 0.0\n", 2979 | "2977 0.0\n", 2980 | "3363 0.0\n", 2981 | " ... \n", 2982 | "275970 9.0\n", 2983 | "277427 0.0\n", 2984 | "277478 0.0\n", 2985 | "277639 0.0\n", 2986 | "278418 0.0\n", 2987 | "Name: Harry Potter and the Sorcerer's Stone (Book 1), Length: 888, dtype: float64" 2988 | ] 2989 | }, 2990 | "execution_count": 61, 2991 | "metadata": {}, 2992 | "output_type": "execute_result" 2993 | } 2994 | ], 2995 | "source": [ 2996 | "book_pivot.iloc[241,:]" 2997 | ] 2998 | }, 2999 | { 3000 | "cell_type": "code", 3001 | "execution_count": 62, 3002 | "metadata": {}, 3003 | "outputs": [ 3004 | { 3005 | "name": "stdout", 3006 | "output_type": "stream", 3007 | "text": [ 3008 | "Index(['Harry Potter and the Chamber of Secrets (Book 2)',\n", 3009 | " 'Harry Potter and the Prisoner of Azkaban (Book 3)',\n", 3010 | " 'Harry Potter and the Goblet of Fire (Book 4)',\n", 3011 | " 'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive',\n", 3012 | " 'The Cradle Will Fall'],\n", 3013 | " dtype='object', name='title')\n" 3014 | ] 3015 | } 3016 | ], 3017 | "source": [ 3018 | "for i in range(len(suggestion)):\n", 3019 | " print(book_pivot.index[suggestion[i]])" 3020 | ] 3021 | }, 3022 | { 3023 | "cell_type": "code", 3024 | "execution_count": 63, 3025 | "metadata": {}, 3026 | "outputs": [ 3027 | { 3028 | "data": { 3029 | "text/plain": [ 3030 | "'4 Blondes'" 3031 | ] 3032 | }, 3033 | "execution_count": 63, 3034 | "metadata": {}, 3035 | "output_type": "execute_result" 3036 | } 3037 | ], 3038 | "source": [ 3039 | "book_pivot.index[3]" 3040 | ] 3041 | }, 3042 | { 3043 | "cell_type": "code", 3044 | "execution_count": 64, 3045 | "metadata": {}, 3046 | "outputs": [], 3047 | "source": [ 3048 | "#keeping books name\n", 3049 | "book_names = book_pivot.index" 3050 | ] 3051 | }, 3052 | { 3053 | "cell_type": "code", 3054 | "execution_count": 65, 3055 | "metadata": {}, 3056 | "outputs": [ 3057 | { 3058 | "data": { 3059 | "text/plain": [ 3060 | "'2nd Chance'" 3061 | ] 3062 | }, 3063 | "execution_count": 65, 3064 | "metadata": {}, 3065 | "output_type": "execute_result" 3066 | } 3067 | ], 3068 | "source": [ 3069 | "book_names[2]" 3070 | ] 3071 | }, 3072 | { 3073 | "cell_type": "code", 3074 | "execution_count": 66, 3075 | "metadata": {}, 3076 | "outputs": [ 3077 | { 3078 | "data": { 3079 | "text/plain": [ 3080 | "3" 3081 | ] 3082 | }, 3083 | "execution_count": 66, 3084 | "metadata": {}, 3085 | "output_type": "execute_result" 3086 | } 3087 | ], 3088 | "source": [ 3089 | "np.where(book_pivot.index == '4 Blondes')[0][0]" 3090 | ] 3091 | }, 3092 | { 3093 | "cell_type": "markdown", 3094 | "metadata": {}, 3095 | "source": [ 3096 | "# find url" 3097 | ] 3098 | }, 3099 | { 3100 | "cell_type": "code", 3101 | "execution_count": 67, 3102 | "metadata": {}, 3103 | "outputs": [], 3104 | "source": [ 3105 | "# final_rating['title'].value_counts()\n", 3106 | "ids = np.where(final_rating['title'] == \"Harry Potter and the Chamber of Secrets (Book 2)\")[0][0]" 3107 | ] 3108 | }, 3109 | { 3110 | "cell_type": "code", 3111 | "execution_count": 68, 3112 | "metadata": {}, 3113 | "outputs": [ 3114 | { 3115 | "data": { 3116 | "text/plain": [ 3117 | "'http://images.amazon.com/images/P/0439064872.01.LZZZZZZZ.jpg'" 3118 | ] 3119 | }, 3120 | "execution_count": 68, 3121 | "metadata": {}, 3122 | "output_type": "execute_result" 3123 | } 3124 | ], 3125 | "source": [ 3126 | "final_rating.iloc[ids]['image_url']" 3127 | ] 3128 | }, 3129 | { 3130 | "cell_type": "code", 3131 | "execution_count": 69, 3132 | "metadata": {}, 3133 | "outputs": [], 3134 | "source": [ 3135 | "book_name = []\n", 3136 | "for book_id in suggestion:\n", 3137 | " book_name.append(book_pivot.index[book_id])\n", 3138 | " \n", 3139 | " " 3140 | ] 3141 | }, 3142 | { 3143 | "cell_type": "code", 3144 | "execution_count": 70, 3145 | "metadata": {}, 3146 | "outputs": [ 3147 | { 3148 | "data": { 3149 | "text/plain": [ 3150 | "Index(['Harry Potter and the Chamber of Secrets (Book 2)',\n", 3151 | " 'Harry Potter and the Prisoner of Azkaban (Book 3)',\n", 3152 | " 'Harry Potter and the Goblet of Fire (Book 4)',\n", 3153 | " 'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive',\n", 3154 | " 'The Cradle Will Fall'],\n", 3155 | " dtype='object', name='title')" 3156 | ] 3157 | }, 3158 | "execution_count": 70, 3159 | "metadata": {}, 3160 | "output_type": "execute_result" 3161 | } 3162 | ], 3163 | "source": [ 3164 | "book_name[0]" 3165 | ] 3166 | }, 3167 | { 3168 | "cell_type": "code", 3169 | "execution_count": 71, 3170 | "metadata": {}, 3171 | "outputs": [], 3172 | "source": [ 3173 | "ids_index = []\n", 3174 | "for name in book_name[0]: \n", 3175 | " ids = np.where(final_rating['title'] == name)[0][0]\n", 3176 | " ids_index.append(ids)" 3177 | ] 3178 | }, 3179 | { 3180 | "cell_type": "code", 3181 | "execution_count": 72, 3182 | "metadata": {}, 3183 | "outputs": [ 3184 | { 3185 | "name": "stdout", 3186 | "output_type": "stream", 3187 | "text": [ 3188 | "http://images.amazon.com/images/P/0439064872.01.LZZZZZZZ.jpg\n", 3189 | "http://images.amazon.com/images/P/0439136369.01.LZZZZZZZ.jpg\n", 3190 | "http://images.amazon.com/images/P/0439139597.01.LZZZZZZZ.jpg\n", 3191 | "http://images.amazon.com/images/P/043936213X.01.LZZZZZZZ.jpg\n", 3192 | "http://images.amazon.com/images/P/0446604232.01.LZZZZZZZ.jpg\n", 3193 | "http://images.amazon.com/images/P/0440115450.01.LZZZZZZZ.jpg\n" 3194 | ] 3195 | } 3196 | ], 3197 | "source": [ 3198 | "for idx in ids_index:\n", 3199 | " url = final_rating.iloc[idx]['image_url']\n", 3200 | " print(url)" 3201 | ] 3202 | }, 3203 | { 3204 | "cell_type": "code", 3205 | "execution_count": 73, 3206 | "metadata": {}, 3207 | "outputs": [], 3208 | "source": [ 3209 | "import pickle\n", 3210 | "pickle.dump(model,open('artifacts/model.pkl','wb'))\n", 3211 | "pickle.dump(book_names,open('artifacts/book_names.pkl','wb'))\n", 3212 | "pickle.dump(final_rating,open('artifacts/final_rating.pkl','wb'))\n", 3213 | "pickle.dump(book_pivot,open('artifacts/book_pivot.pkl','wb'))" 3214 | ] 3215 | }, 3216 | { 3217 | "cell_type": "markdown", 3218 | "metadata": {}, 3219 | "source": [ 3220 | "# Testing model" 3221 | ] 3222 | }, 3223 | { 3224 | "cell_type": "code", 3225 | "execution_count": 74, 3226 | "metadata": {}, 3227 | "outputs": [], 3228 | "source": [ 3229 | "def recommend_book(book_name):\n", 3230 | " book_id = np.where(book_pivot.index == book_name)[0][0]\n", 3231 | " distance, suggestion = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6 )\n", 3232 | " \n", 3233 | " for i in range(len(suggestion)):\n", 3234 | " books = book_pivot.index[suggestion[i]]\n", 3235 | " for j in books:\n", 3236 | " if j == book_name:\n", 3237 | " print(f\"You searched '{book_name}'\\n\")\n", 3238 | " print(\"The suggestion books are: \\n\")\n", 3239 | " else:\n", 3240 | " print(j)" 3241 | ] 3242 | }, 3243 | { 3244 | "cell_type": "code", 3245 | "execution_count": 75, 3246 | "metadata": {}, 3247 | "outputs": [ 3248 | { 3249 | "name": "stdout", 3250 | "output_type": "stream", 3251 | "text": [ 3252 | "You searched 'Harry Potter and the Chamber of Secrets (Book 2)'\n", 3253 | "\n", 3254 | "The suggestion books are: \n", 3255 | "\n", 3256 | "Harry Potter and the Prisoner of Azkaban (Book 3)\n", 3257 | "Harry Potter and the Goblet of Fire (Book 4)\n", 3258 | "Harry Potter and the Sorcerer's Stone (Book 1)\n", 3259 | "Exclusive\n", 3260 | "The Cradle Will Fall\n" 3261 | ] 3262 | } 3263 | ], 3264 | "source": [ 3265 | "book_name = \"Harry Potter and the Chamber of Secrets (Book 2)\"\n", 3266 | "recommend_book(book_name)" 3267 | ] 3268 | }, 3269 | { 3270 | "cell_type": "code", 3271 | "execution_count": null, 3272 | "metadata": {}, 3273 | "outputs": [], 3274 | "source": [] 3275 | } 3276 | ], 3277 | "metadata": { 3278 | "kernelspec": { 3279 | "display_name": "Python 3 (ipykernel)", 3280 | "language": "python", 3281 | "name": "python3" 3282 | }, 3283 | "language_info": { 3284 | "codemirror_mode": { 3285 | "name": "ipython", 3286 | "version": 3 3287 | }, 3288 | "file_extension": ".py", 3289 | "mimetype": "text/x-python", 3290 | "name": "python", 3291 | "nbconvert_exporter": "python", 3292 | "pygments_lexer": "ipython3", 3293 | "version": "3.7.13" 3294 | } 3295 | }, 3296 | "nbformat": 4, 3297 | "nbformat_minor": 4 3298 | } 3299 | --------------------------------------------------------------------------------