├── .github
    └── workflows
    │   └── main.yaml
├── books_recommender
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   └── configuration.py
    ├── entity
    │   ├── __init__.py
    │   └── config_entity.py
    ├── logger
    │   ├── __init__.py
    │   └── log.py
    ├── pipeline
    │   ├── __init__.py
    │   └── training_pipeline.py
    ├── utils
    │   ├── __init__.py
    │   └── util.py
    ├── components
    │   ├── __init__.py
    │   ├── stage_03_model_trainer.py
    │   ├── stage_00_data_ingestion.py
    │   ├── stage_02_data_transformation.py
    │   └── stage_01_data_validation.py
    ├── exception
    │   ├── __init__.py
    │   └── exception_handler.py
    └── constant
    │   └── __init__.py
├── requirements.txt
├── .dockerignore
├── templates
    ├── 1.png
    ├── 2.png
    ├── intro.jpeg
    └── book_names.pkl
├── Dockerfile
├── config
    └── config.yaml
├── setup.py
├── LICENSE
├── .gitignore
├── app.py
├── README.md
└── notebook
    └── Books Recommender data analysis.ipynb


/.github/workflows/main.yaml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/books_recommender/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/books_recommender/config/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/books_recommender/entity/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/books_recommender/logger/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/books_recommender/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/books_recommender/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/books_recommender/components/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/books_recommender/exception/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | numpy
3 | pandas
4 | sklearn
5 | notebook
6 | PyYAML
7 | -e . 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Environments
2 | .env
3 | .venv
4 | env/
5 | venv/
6 | ENV/
7 | env.bak/
8 | venv.bak/


--------------------------------------------------------------------------------
/templates/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/1.png


--------------------------------------------------------------------------------
/templates/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/2.png


--------------------------------------------------------------------------------
/templates/intro.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/intro.jpeg


--------------------------------------------------------------------------------
/templates/book_names.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/book_names.pkl


--------------------------------------------------------------------------------
/books_recommender/constant/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | import os
3 | 
4 | ROOT_DIR = os.getcwd()
5 | # Main config file path
6 | CONFIG_FOLDER_NAME = "config"
7 | CONFIG_FILE_NAME = "config.yaml"
8 | CONFIG_FILE_PATH = os.path.join(ROOT_DIR,CONFIG_FOLDER_NAME,CONFIG_FILE_NAME)


--------------------------------------------------------------------------------
/books_recommender/utils/util.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | import sys
 3 | from books_recommender.exception.exception_handler import AppException
 4 | 
 5 | 
 6 | 
 7 | def read_yaml_file(file_path:str)->dict:
 8 |     """
 9 |     Reads a YAML file and returns the contents as a dictionary.
10 |     file_path: str
11 |     """
12 |     try:
13 |         with open(file_path, 'rb') as yaml_file:
14 |             return yaml.safe_load(yaml_file)
15 |     except Exception as e:
16 |         raise AppException(e,sys) from e


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # app/Dockerfile
 2 | 
 3 | FROM python:3.7-slim
 4 | 
 5 | # COPY . /app
 6 | 
 7 | EXPOSE 8501
 8 | 
 9 | WORKDIR /app
10 | 
11 | RUN apt-get update && apt-get install -y \
12 |     build-essential \
13 |     software-properties-common \
14 |     git \
15 |     && rm -rf /var/lib/apt/lists/*
16 | 
17 | RUN git clone https://github.com/entbappy/ML-Based-Book-Recommender-System.git .
18 | 
19 | COPY . /app
20 | 
21 | RUN pip3 install -r requirements.txt
22 | 
23 | ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]


--------------------------------------------------------------------------------
/books_recommender/logger/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from datetime import datetime
 4 | 
 5 | 
 6 | # Creating logs directory to store log in files
 7 | LOG_DIR = "logs"
 8 | LOG_DIR = os.path.join(os.getcwd(), LOG_DIR)
 9 | 
10 | #Creating LOG_DIR if it does not exists.
11 | os.makedirs(LOG_DIR, exist_ok=True)
12 | 
13 | 
14 | # Creating file name for log file based on current timestamp
15 | CURRENT_TIME_STAMP = f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}"
16 | file_name = f"log_{CURRENT_TIME_STAMP}.log"
17 | 
18 | #Creating file path for projects.
19 | log_file_path = os.path.join(LOG_DIR, file_name)
20 | 
21 | 
22 | logging.basicConfig(filename=log_file_path,
23 |                     filemode='w',
24 |                     format='[%(asctime)s] %(name)s - %(levelname)s - %(message)s',
25 |                     level=logging.NOTSET)
26 | 


--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
 1 | artifacts_config:
 2 |   artifacts_dir: artifacts
 3 | 
 4 | data_ingestion_config:
 5 |   dataset_download_url: https://github.com/entbappy/Branching-tutorial/raw/master/books_data.zip
 6 |   dataset_dir: dataset
 7 |   ingested_dir: ingested_data
 8 |   raw_data_dir: raw_data
 9 | 
10 |   
11 | data_validation_config:
12 |   clean_data_dir: clean_data
13 |   serialized_objects_dir: serialized_objects
14 |   books_csv_file: BX-Books.csv
15 |   ratings_csv_file: BX-Book-Ratings.csv
16 | 
17 | 
18 | data_transformation_config:
19 |   transformed_data_dir: transformed_data
20 | 
21 | 
22 | model_trainer_config:
23 |   trained_model_dir: trained_model
24 |   trained_model_name: model.pkl
25 | 
26 | 
27 | recommendation_config:
28 |   poster_api_url: https://api.themoviedb.org/3/movie/{}?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as f:
 4 |     long_description = f.read()
 5 | 
 6 | ## edit below variables as per your requirements -
 7 | REPO_NAME = "ML Based Books Recommender System"
 8 | AUTHOR_USER_NAME = "BOKTIAR AHMED BAPPY"
 9 | SRC_REPO = "books_recommender"
10 | LIST_OF_REQUIREMENTS = []
11 | 
12 | 
13 | setup(
14 |     name=SRC_REPO,
15 |     version="0.0.1",
16 |     author="BOKTIAR AHMED BAPPY",
17 |     description="A small local packages for ML based books recommendations",
18 |     long_description=long_description,
19 |     long_description_content_type="text/markdown",
20 |     url="https://github.com/entbappy/ML-Based-Book-Recommender-System",
21 |     author_email="boktiar@ineuron.ai",
22 |     packages=find_packages(),
23 |     license="MIT",
24 |     python_requires=">=3.7",
25 |     install_requires=LIST_OF_REQUIREMENTS
26 | )
27 | 


--------------------------------------------------------------------------------
/books_recommender/pipeline/training_pipeline.py:
--------------------------------------------------------------------------------
 1 | from books_recommender.components.stage_00_data_ingestion import DataIngestion
 2 | from books_recommender.components.stage_01_data_validation import DataValidation
 3 | from books_recommender.components.stage_02_data_transformation import DataTransformation
 4 | from books_recommender.components.stage_03_model_trainer import ModelTrainer
 5 | 
 6 | 
 7 | 
 8 | class TrainingPipeline:
 9 |     def __init__(self):
10 |         self.data_ingestion = DataIngestion()
11 |         self.data_validation = DataValidation()
12 |         self.data_transformation = DataTransformation()
13 |         self.model_trainer = ModelTrainer()
14 |      
15 | 
16 |     def start_training_pipeline(self):
17 |         """
18 |         Starts the training pipeline
19 |         :return: none
20 |         """
21 |         self.data_ingestion.initiate_data_ingestion()
22 |         self.data_validation.initiate_data_validation()
23 |         self.data_transformation.initiate_data_transformation()
24 |         self.model_trainer.initiate_model_trainer() 
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 iNeuron Intelligence Private Limited
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/books_recommender/entity/config_entity.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | DataIngestionConfig = namedtuple("DatasetConfig", ["dataset_download_url",
 4 |                                                    "raw_data_dir",
 5 |                                                    "ingested_dir"])
 6 | 
 7 | DataValidationConfig = namedtuple("DataValidationConfig", ["clean_data_dir",
 8 |                                                          "books_csv_file",
 9 |                                                          "ratings_csv_file",
10 |                                                          "serialized_objects_dir"])     
11 | 
12 | 
13 | DataTransformationConfig = namedtuple("DataTransformationConfig", ["clean_data_file_path",
14 |                                                                    "transformed_data_dir"])  
15 | 
16 | 
17 | 
18 | ModelTrainerConfig = namedtuple("ModelTrainerConfig", ["transformed_data_file_dir",
19 |                                                       "trained_model_dir",
20 |                                                       "trained_model_name"])
21 | 
22 | 
23 | 
24 | ModelRecommendationConfig = namedtuple("ModelRecommendationConfig", ["book_name_serialized_objects",
25 |                                                       "book_pivot_serialized_objects",
26 |                                                       "final_rating_serialized_objects",
27 |                                                       "trained_model_path"])
28 | 
29 |                                                                    
30 |                                                                                                                                            


--------------------------------------------------------------------------------
/books_recommender/exception/exception_handler.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | 
 5 | class AppException(Exception):
 6 |     """
 7 |     Organization: iNeuron Intelligence Private Limited
 8 |     AppException is customized exception class designed to capture refined details about exception
 9 |     such as python script file line number along with error message
10 |     With custom exception one can easily spot source of error and provide quick fix.
11 |     
12 |     """
13 | 
14 |     def __init__(self, error_message: Exception, error_detail: sys):
15 |         """
16 |         :param error_message: error message in string format
17 |         """
18 |         super().__init__(error_message)
19 |         self.error_message = AppException.error_message_detail(error_message, error_detail=error_detail)
20 | 
21 |     @staticmethod
22 |     def error_message_detail(error:Exception, error_detail:sys):
23 |         """
24 |         error: Exception object raise from module
25 |         error_detail: is sys module contains detail information about system execution information.
26 |         """
27 |         _, _, exc_tb = error_detail.exc_info()
28 |         #extracting file name from exception traceback
29 |         file_name = exc_tb.tb_frame.f_code.co_filename 
30 | 
31 |         #preparing error message
32 |         error_message = f"Error occurred python script name [{file_name}]" \
33 |                         f" line number [{exc_tb.tb_lineno}] error message [{error}]."
34 | 
35 |         return error_message
36 | 
37 |     def __repr__(self):
38 |         """
39 |         Formating object of AppException
40 |         """
41 |         return AppException.__name__.__str__()
42 | 
43 |     def __str__(self):
44 |         """
45 |         Formating how a object should be visible if used in print statement.
46 |         """
47 |         return self.error_message


--------------------------------------------------------------------------------
/books_recommender/components/stage_03_model_trainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import pickle
 4 | from sklearn.neighbors import NearestNeighbors
 5 | from scipy.sparse import csr_matrix
 6 | from books_recommender.logger.log import logging
 7 | from books_recommender.config.configuration import AppConfiguration
 8 | from books_recommender.exception.exception_handler import AppException
 9 | 
10 | 
11 | class ModelTrainer:
12 |     def __init__(self, app_config = AppConfiguration()):
13 |         try:
14 |             self.model_trainer_config = app_config.get_model_trainer_config()
15 |         except Exception as e:
16 |             raise AppException(e, sys) from e
17 | 
18 |     
19 |     def train(self):
20 |         try:
21 |             #loading pivot data
22 |             book_pivot = pickle.load(open(self.model_trainer_config.transformed_data_file_dir,'rb'))
23 |             book_sparse = csr_matrix(book_pivot)
24 |             #Training model
25 |             model = NearestNeighbors(algorithm= 'brute')
26 |             model.fit(book_sparse)
27 | 
28 |             #Saving model object for recommendations
29 |             os.makedirs(self.model_trainer_config.trained_model_dir, exist_ok=True)
30 |             file_name = os.path.join(self.model_trainer_config.trained_model_dir,self.model_trainer_config.trained_model_name)
31 |             pickle.dump(model,open(file_name,'wb'))
32 |             logging.info(f"Saving final model to {file_name}")
33 | 
34 |         except Exception as e:
35 |             raise AppException(e, sys) from e
36 | 
37 |     
38 | 
39 |     def initiate_model_trainer(self):
40 |         try:
41 |             logging.info(f"{'='*20}Model Trainer log started.{'='*20} ")
42 |             self.train()
43 |             logging.info(f"{'='*20}Model Trainer log completed.{'='*20} \n\n")
44 |         except Exception as e:
45 |             raise AppException(e, sys) from e
46 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | artifacts/*
131 | 


--------------------------------------------------------------------------------
/books_recommender/components/stage_00_data_ingestion.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from six.moves import urllib
 4 | import zipfile
 5 | from books_recommender.logger.log import logging
 6 | from books_recommender.exception.exception_handler import AppException
 7 | from books_recommender.config.configuration import AppConfiguration
 8 | 
 9 | class DataIngestion:
10 | 
11 |     def __init__(self, app_config = AppConfiguration()):
12 |         """
13 |         DataIngestion Intialization
14 |         data_ingestion_config: DataIngestionConfig 
15 |         """
16 |         try:
17 |             logging.info(f"{'='*20}Data Ingestion log started.{'='*20} ")
18 |             self.data_ingestion_config= app_config.get_data_ingestion_config()
19 |         except Exception as e:
20 |             raise AppException(e, sys) from e
21 | 
22 |     
23 |     def download_data(self):
24 |         """
25 |         Fetch the data from the url
26 |         
27 |         """
28 |         try:
29 |             
30 |             dataset_url = self.data_ingestion_config.dataset_download_url
31 |             zip_download_dir = self.data_ingestion_config.raw_data_dir
32 |             os.makedirs(zip_download_dir, exist_ok=True)
33 |             data_file_name = os.path.basename(dataset_url)
34 |             zip_file_path = os.path.join(zip_download_dir, data_file_name)
35 |             logging.info(f"Downloading data from {dataset_url} into file {zip_file_path}")
36 |             urllib.request.urlretrieve(dataset_url,zip_file_path)
37 |             logging.info(f"Downloaded data from {dataset_url} into file {zip_file_path}")
38 |             return zip_file_path
39 | 
40 |         except Exception as e:
41 |             raise AppException(e, sys) from e
42 | 
43 | 
44 |     def extract_zip_file(self,zip_file_path: str):
45 |         """
46 |         zip_file_path: str
47 |         Extracts the zip file into the data directory
48 |         Function returns None
49 |         """
50 |         try:
51 |             ingested_dir = self.data_ingestion_config.ingested_dir
52 |             os.makedirs(ingested_dir, exist_ok=True)
53 |             with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
54 |                 zip_ref.extractall(ingested_dir)
55 |             logging.info(f"Extracting zip file: {zip_file_path} into dir: {ingested_dir}")
56 |         except Exception as e:
57 |             raise AppException(e,sys) from e
58 | 
59 |     
60 |     def initiate_data_ingestion(self):
61 |         try:
62 |             zip_file_path = self.download_data()
63 |             self.extract_zip_file(zip_file_path=zip_file_path)
64 |             logging.info(f"{'='*20}Data Ingestion log completed.{'='*20} \n\n")
65 |         except Exception as e:
66 |             raise AppException(e, sys) from e


--------------------------------------------------------------------------------
/books_recommender/components/stage_02_data_transformation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import pickle
 4 | import pandas as pd
 5 | from books_recommender.logger.log import logging
 6 | from books_recommender.config.configuration import AppConfiguration
 7 | from books_recommender.exception.exception_handler import AppException
 8 | 
 9 | 
10 | 
11 | class DataTransformation:
12 |     def __init__(self, app_config = AppConfiguration()):
13 |         try:
14 |             self.data_transformation_config = app_config.get_data_transformation_config()
15 |             self.data_validation_config= app_config.get_data_validation_config()
16 |         except Exception as e:
17 |             raise AppException(e, sys) from e
18 | 
19 | 
20 |     
21 |     def get_data_transformer(self):
22 |         try:
23 |             df = pd.read_csv(self.data_transformation_config.clean_data_file_path)
24 |             # Lets create a pivot table
25 |             book_pivot = df.pivot_table(columns='user_id', index='title', values= 'rating')
26 |             logging.info(f" Shape of book pivot table: {book_pivot.shape}")
27 |             book_pivot.fillna(0, inplace=True)
28 | 
29 |             #saving pivot table data
30 |             os.makedirs(self.data_transformation_config.transformed_data_dir, exist_ok=True)
31 |             pickle.dump(book_pivot,open(os.path.join(self.data_transformation_config.transformed_data_dir,"transformed_data.pkl"),'wb'))
32 |             logging.info(f"Saved pivot table data to {self.data_transformation_config.transformed_data_dir}")
33 | 
34 |             #keeping books name
35 |             book_names = book_pivot.index
36 | 
37 |             #saving book_names objects for web app
38 |             os.makedirs(self.data_validation_config.serialized_objects_dir, exist_ok=True)
39 |             pickle.dump(book_names,open(os.path.join(self.data_validation_config.serialized_objects_dir, "book_names.pkl"),'wb'))
40 |             logging.info(f"Saved book_names serialization object to {self.data_validation_config.serialized_objects_dir}")
41 | 
42 |             #saving book_pivot objects for web app
43 |             os.makedirs(self.data_validation_config.serialized_objects_dir, exist_ok=True)
44 |             pickle.dump(book_pivot,open(os.path.join(self.data_validation_config.serialized_objects_dir, "book_pivot.pkl"),'wb'))
45 |             logging.info(f"Saved book_pivot serialization object to {self.data_validation_config.serialized_objects_dir}")
46 | 
47 |         except Exception as e:
48 |             raise AppException(e, sys) from e
49 | 
50 |     
51 | 
52 |     def initiate_data_transformation(self):
53 |         try:
54 |             logging.info(f"{'='*20}Data Transformation log started.{'='*20} ")
55 |             self.get_data_transformer()
56 |             logging.info(f"{'='*20}Data Transformation log completed.{'='*20} \n\n")
57 |         except Exception as e:
58 |             raise AppException(e, sys) from e
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/books_recommender/components/stage_01_data_validation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import ast 
 4 | import pandas as pd
 5 | import pickle
 6 | from books_recommender.logger.log import logging
 7 | from books_recommender.config.configuration import AppConfiguration
 8 | from books_recommender.exception.exception_handler import AppException
 9 | 
10 | 
11 | 
12 | class DataValidation:
13 |     def __init__(self, app_config = AppConfiguration()):
14 |         try:
15 |             self.data_validation_config= app_config.get_data_validation_config()
16 |         except Exception as e:
17 |             raise AppException(e, sys) from e
18 | 
19 | 
20 |     
21 |     def preprocess_data(self):
22 |         try:
23 |             ratings = pd.read_csv(self.data_validation_config.ratings_csv_file, sep=";", error_bad_lines=False, encoding='latin-1')
24 |             books = pd.read_csv(self.data_validation_config.books_csv_file, sep=";", error_bad_lines=False, encoding='latin-1')
25 |             
26 |             logging.info(f" Shape of ratings data file: {ratings.shape}")
27 |             logging.info(f" Shape of books data file: {books.shape}")
28 | 
29 |             #Here Image URL columns is important for the poster. So, we will keep it
30 |             books = books[['ISBN','Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher','Image-URL-L']]
31 |             # Lets remane some wierd columns name in books
32 |             books.rename(columns={"Book-Title":'title',
33 |                                 'Book-Author':'author',
34 |                                 "Year-Of-Publication":'year',
35 |                                 "Publisher":"publisher",
36 |                                 "Image-URL-L":"image_url"},inplace=True)
37 | 
38 |             
39 |             # Lets remane some wierd columns name in ratings
40 |             ratings.rename(columns={"User-ID":'user_id',
41 |                                 'Book-Rating':'rating'},inplace=True)
42 | 
43 |             # Lets store users who had at least rated more than 200 books
44 |             x = ratings['user_id'].value_counts() > 200
45 |             y = x[x].index
46 |             ratings = ratings[ratings['user_id'].isin(y)]
47 | 
48 |             # Now join ratings with books
49 |             ratings_with_books = ratings.merge(books, on='ISBN')
50 |             number_rating = ratings_with_books.groupby('title')['rating'].count().reset_index()
51 |             number_rating.rename(columns={'rating':'num_of_rating'},inplace=True)
52 |             final_rating = ratings_with_books.merge(number_rating, on='title')
53 | 
54 |             # Lets take those books which got at least 50 rating of user
55 |             final_rating = final_rating[final_rating['num_of_rating'] >= 50]
56 | 
57 |             # lets drop the duplicates
58 |             final_rating.drop_duplicates(['user_id','title'],inplace=True)
59 |             logging.info(f" Shape of the final clean dataset: {final_rating.shape}")
60 |                         
61 |             # Saving the cleaned data for transformation
62 |             os.makedirs(self.data_validation_config.clean_data_dir, exist_ok=True)
63 |             final_rating.to_csv(os.path.join(self.data_validation_config.clean_data_dir,'clean_data.csv'), index = False)
64 |             logging.info(f"Saved cleaned data to {self.data_validation_config.clean_data_dir}")
65 | 
66 | 
67 |             #saving final_rating objects for web app
68 |             os.makedirs(self.data_validation_config.serialized_objects_dir, exist_ok=True)
69 |             pickle.dump(final_rating,open(os.path.join(self.data_validation_config.serialized_objects_dir, "final_rating.pkl"),'wb'))
70 |             logging.info(f"Saved final_rating serialization object to {self.data_validation_config.serialized_objects_dir}")
71 | 
72 |         except Exception as e:
73 |             raise AppException(e, sys) from e
74 | 
75 |     
76 |     def initiate_data_validation(self):
77 |         try:
78 |             logging.info(f"{'='*20}Data Validation log started.{'='*20} ")
79 |             self.preprocess_data()
80 |             logging.info(f"{'='*20}Data Validation log completed.{'='*20} \n\n")
81 |         except Exception as e:
82 |             raise AppException(e, sys) from e
83 | 
84 | 
85 | 
86 |     


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import pickle
  4 | import streamlit as st
  5 | import numpy as np
  6 | from books_recommender.logger.log import logging
  7 | from books_recommender.config.configuration import AppConfiguration
  8 | from books_recommender.pipeline.training_pipeline import TrainingPipeline
  9 | from books_recommender.exception.exception_handler import AppException
 10 | 
 11 | 
 12 | class Recommendation:
 13 |     def __init__(self,app_config = AppConfiguration()):
 14 |         try:
 15 |             self.recommendation_config= app_config.get_recommendation_config()
 16 |         except Exception as e:
 17 |             raise AppException(e, sys) from e
 18 | 
 19 | 
 20 |     def fetch_poster(self,suggestion):
 21 |         try:
 22 |             book_name = []
 23 |             ids_index = []
 24 |             poster_url = []
 25 |             book_pivot =  pickle.load(open(self.recommendation_config.book_pivot_serialized_objects,'rb'))
 26 |             final_rating =  pickle.load(open(self.recommendation_config.final_rating_serialized_objects,'rb'))
 27 | 
 28 |             for book_id in suggestion:
 29 |                 book_name.append(book_pivot.index[book_id])
 30 | 
 31 |             for name in book_name[0]: 
 32 |                 ids = np.where(final_rating['title'] == name)[0][0]
 33 |                 ids_index.append(ids)
 34 | 
 35 |             for idx in ids_index:
 36 |                 url = final_rating.iloc[idx]['image_url']
 37 |                 poster_url.append(url)
 38 | 
 39 |             return poster_url
 40 |         
 41 |         except Exception as e:
 42 |             raise AppException(e, sys) from e
 43 |         
 44 | 
 45 | 
 46 |     def recommend_book(self,book_name):
 47 |         try:
 48 |             books_list = []
 49 |             model = pickle.load(open(self.recommendation_config.trained_model_path,'rb'))
 50 |             book_pivot =  pickle.load(open(self.recommendation_config.book_pivot_serialized_objects,'rb'))
 51 |             book_id = np.where(book_pivot.index == book_name)[0][0]
 52 |             distance, suggestion = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6 )
 53 | 
 54 |             poster_url = self.fetch_poster(suggestion)
 55 |             
 56 |             for i in range(len(suggestion)):
 57 |                     books = book_pivot.index[suggestion[i]]
 58 |                     for j in books:
 59 |                         books_list.append(j)
 60 |             return books_list , poster_url   
 61 |         
 62 |         except Exception as e:
 63 |             raise AppException(e, sys) from e
 64 | 
 65 | 
 66 |     def train_engine(self):
 67 |         try:
 68 |             obj = TrainingPipeline()
 69 |             obj.start_training_pipeline()
 70 |             st.text("Training Completed!")
 71 |             logging.info(f"Recommended successfully!")
 72 |         except Exception as e:
 73 |             raise AppException(e, sys) from e
 74 | 
 75 |     
 76 |     def recommendations_engine(self,selected_books):
 77 |         try:
 78 |             recommended_books,poster_url = self.recommend_book(selected_books)
 79 |             col1, col2, col3, col4, col5 = st.columns(5)
 80 |             with col1:
 81 |                 st.text(recommended_books[1])
 82 |                 st.image(poster_url[1])
 83 |             with col2:
 84 |                 st.text(recommended_books[2])
 85 |                 st.image(poster_url[2])
 86 | 
 87 |             with col3:
 88 |                 st.text(recommended_books[3])
 89 |                 st.image(poster_url[3])
 90 |             with col4:
 91 |                 st.text(recommended_books[4])
 92 |                 st.image(poster_url[4])
 93 |             with col5:
 94 |                 st.text(recommended_books[5])
 95 |                 st.image(poster_url[5])
 96 |         except Exception as e:
 97 |             raise AppException(e, sys) from e
 98 | 
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     st.header('ML Based Books Recommender System')
103 |     st.text("This is a collaborative filtering based recommendation system!")
104 | 
105 |     obj = Recommendation()
106 | 
107 |     #Training
108 |     if st.button('Train Recommender System'):
109 |         obj.train_engine()
110 | 
111 |     book_names = pickle.load(open(os.path.join('templates','book_names.pkl') ,'rb'))
112 |     selected_books = st.selectbox(
113 |         "Type or select a book from the dropdown",
114 |         book_names)
115 |     
116 |     #recommendation
117 |     if st.button('Show Recommendation'):
118 |         obj.recommendations_engine(selected_books)
119 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Project: ML Based Book Recommender System ! | Collaborative Filtering Based
  2 | 
  3 | <img src="templates/intro.jpeg" alt="workflow" width="70%">
  4 | 
  5 | Recommendation systems are becoming increasingly important in today’s extremely busy world. People are always short on time with the myriad tasks they need to accomplish in the limited 24 hours. Therefore, the recommendation systems are important as they help them make the right choices, without having to expend their cognitive resources.
  6 | 
  7 | The purpose of a recommendation system basically is to search for content that would be interesting to an individual. Moreover, it involves a number of factors to create personalised lists of useful and interesting content specific to each user/individual. Recommendation systems are Artificial Intelligence based algorithms that skim through all possible options and create a customized list of items that are interesting and relevant to an individual. These results are based on their profile, search/browsing history, what other people with similar traits/demographics are watching, and how likely are you to watch those movies. This is achieved through predictive modeling and heuristics with the data available.
  8 | 
  9 | 
 10 | # Note:
 11 | If you want to understand this entire project overflow, please refer the jupyter notebook file inside notebook folder.
 12 | 
 13 | # Types of Recommendation System :
 14 | 
 15 | ### 1 ) Content Based :
 16 | 
 17 | - Content-based systems, which use characteristic information and takes item attriubutes into consideration .
 18 | 
 19 | - Twitter , Youtube .
 20 | 
 21 | - Which music you are listening , what singer are you watching . Form embeddings for the features .
 22 | 	
 23 | - User specific actions or similar items reccomendation .
 24 | 	
 25 | - It will create a vector of it .
 26 | 	
 27 | - These systems make recommendations using a user's item and profile features. They hypothesize that if a user was interested in an item in the past, they will once again be interested in it in the future
 28 | 	
 29 | - One issue that arises is making obvious recommendations because of excessive specialization (user A is only interested in categories B, C, and D, and the system is not able to recommend items outside those categories, even though they could be interesting to them).
 30 | 
 31 | ### 2 ) Collaborative Based :
 32 | 		
 33 | - Collaborative filtering systems, which are based on user-item interactions.
 34 | 	
 35 | - Clusters of users with same ratings , similar users .
 36 | 	
 37 | - Book recommendation , so use cluster mechanism .
 38 | 	
 39 | - We take only one parameter , ratings or comments .
 40 | 	
 41 | - In short, collaborative filtering systems are based on the assumption that if a user likes item A and another user likes the same item A as well as another item, item B, the first user could also be interested in the second item . 
 42 | 	
 43 | - Issues are :
 44 | 
 45 | 	- User-Item nXn matrix , so computationally expensive .
 46 | 
 47 | 	- Only famous items will get reccomended .
 48 | 
 49 | 	- New items might not get reccomended at all .   
 50 | 
 51 | ### 3 ) Hybrid Based :
 52 | 	
 53 | - Hybrid systems, which combine both types of information with the aim of avoiding problems that are generated when working with just one kind.
 54 | 
 55 | - Combination of both and used now a days .
 56 | 
 57 | - Uses : word2vec , embedding .           
 58 | 
 59 | # About this project:
 60 | 
 61 | This is a collaborative filtering based books recommender system & a streamlit web application that can recommend various kinds of similar books based on an user interest.
 62 | 
 63 | 
 64 | 
 65 | # Demo:
 66 | 
 67 | <img src="templates/1.png" alt="workflow" width="70%">
 68 | 
 69 | <img src="templates/2.png" alt="workflow" width="70%">
 70 | 
 71 | 
 72 | 
 73 | # Dataset has been used:
 74 | 
 75 | * [Dataset link](https://www.kaggle.com/ra4u12/bookrecommendation)
 76 | 
 77 | # Concept used to build the model.pkl file : NearestNeighbors
 78 | 
 79 | 1 . Load the data
 80 | 	
 81 | 2 . Initialise the value of k
 82 | 
 83 | 3 . For getting the predicted class, iterate from 1 to total number of training data points
 84 | 
 85 | 4 . Calculate the distance between test data and each row of training data. Here we will use Euclidean distance as our distance metric since it’s the most popular method. 
 86 | 
 87 | 5 . Sort the calculated distances in ascending order based on distance values
 88 | 	
 89 | 6 . Get top k rows from the sorted array
 90 | 
 91 | # Built With
 92 | 1. streamlit
 93 | 2. Machine learning
 94 | 3. sklearn
 95 | 
 96 | # How to run?
 97 | ### STEPS:
 98 | 
 99 | Clone the repository
100 | 
101 | ```bash
102 | https://github.com/entbappy/ML-Based-Book-Recommender-System.git
103 | ```
104 | ### STEP 01- Create a conda environment after opening the repository
105 | 
106 | ```bash
107 | conda create -n books python=3.7.10 -y
108 | ```
109 | 
110 | ```bash
111 | conda activate books
112 | ```
113 | 
114 | 
115 | ### STEP 02- install the requirements
116 | ```bash
117 | pip install -r requirements.txt
118 | ```
119 | 
120 | 
121 | Now run,
122 | ```bash
123 | streamlit run app.py
124 | ```
125 | 
126 | ```bash
127 | Note: Before clicking on show recommendations first of all click on Train Recommender System for generating models
128 | ```
129 | 
130 | # How to run in Docker?
131 | 
132 | #### Build a Docker image
133 | The docker build command builds an image from a Dockerfile . Run the following command from the app/ directory on your server to build the image:
134 | 
135 | 
136 | ```bash
137 | docker build -t streamlit .
138 | ```
139 | 
140 | The -t flag is used to tag the image. Here, we have tagged the image streamlit. If you run:
141 | 
142 | ```bash
143 | docker images
144 | ```
145 | You should see a streamlit image under the REPOSITORY column. For example:
146 | 
147 | ```bash
148 | REPOSITORY   TAG       IMAGE ID       CREATED              SIZE
149 | streamlit    latest    70b0759a094d   About a minute ago   1.02GB
150 | ```
151 | 
152 | #### Run the Docker container
153 | Now that you have built the image, you can run the container by executing:
154 | 
155 | ```bash
156 | docker run -p 8501:8501 streamlit
157 | ```
158 | 
159 | The -p flag publishes the container’s port 8501 to your server’s 8501 port.
160 | 
161 | If all went well, you should see an output similar to the following:
162 | 
163 | ```bash
164 | $ docker run -p 8501:8501 streamlit
165 | 
166 |   You can now view your Streamlit app in your browser.
167 | 
168 |   URL: http://127.0.0.1:8501/
169 | ```
170 | 
171 | To view your app, users can browse to http://0.0.0.0:8501 or http://127.0.0.1:8501/
172 | 
173 | 
174 | 
175 | 
176 | 


--------------------------------------------------------------------------------
/books_recommender/config/configuration.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | from books_recommender.logger.log import logging
  4 | from books_recommender.utils.util import read_yaml_file
  5 | from books_recommender.exception.exception_handler import AppException
  6 | from books_recommender.entity.config_entity import DataIngestionConfig, DataValidationConfig, DataTransformationConfig, ModelTrainerConfig, ModelRecommendationConfig
  7 | from books_recommender.constant import *
  8 | 
  9 | 
 10 | class AppConfiguration:
 11 |     def __init__(self, config_file_path: str = CONFIG_FILE_PATH):
 12 |         try:
 13 |             self.configs_info = read_yaml_file(file_path=config_file_path)
 14 |         except Exception as e:
 15 |             raise AppException(e, sys) from e
 16 | 
 17 |     
 18 |     def get_data_ingestion_config(self) -> DataIngestionConfig:
 19 |         try:
 20 |             data_ingestion_config = self.configs_info['data_ingestion_config']
 21 |             artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir']
 22 |             dataset_dir = data_ingestion_config['dataset_dir']
 23 | 
 24 |             ingested_data_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['ingested_dir'])
 25 |             raw_data_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['raw_data_dir'])
 26 | 
 27 |             response = DataIngestionConfig(
 28 |                 dataset_download_url = data_ingestion_config['dataset_download_url'],
 29 |                 raw_data_dir = raw_data_dir,
 30 |                 ingested_dir = ingested_data_dir
 31 |             )
 32 | 
 33 |             logging.info(f"Data Ingestion Config: {response}")
 34 |             return response
 35 | 
 36 |         except Exception as e:
 37 |             raise AppException(e, sys) from e
 38 | 
 39 |     
 40 | 
 41 |     def get_data_validation_config(self) -> DataValidationConfig:
 42 |         try:
 43 |             data_validation_config = self.configs_info['data_validation_config']
 44 |             data_ingestion_config = self.configs_info['data_ingestion_config']
 45 |             dataset_dir = data_ingestion_config['dataset_dir']
 46 |             artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir']
 47 |             books_csv_file = data_validation_config['books_csv_file']
 48 |             ratings_csv_file = data_validation_config['ratings_csv_file']
 49 | 
 50 |             books_csv_file_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['ingested_dir'], books_csv_file)
 51 |             ratings_csv_file_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['ingested_dir'], ratings_csv_file)
 52 |             clean_data_path = os.path.join(artifacts_dir, dataset_dir, data_validation_config['clean_data_dir'])
 53 |             serialized_objects_dir = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir'])
 54 | 
 55 |             response = DataValidationConfig(
 56 |                 clean_data_dir = clean_data_path,
 57 |                 books_csv_file = books_csv_file_dir,
 58 |                 ratings_csv_file = ratings_csv_file_dir,
 59 |                 serialized_objects_dir = serialized_objects_dir
 60 |             )
 61 | 
 62 |             logging.info(f"Data Validation Config: {response}")
 63 |             return response
 64 | 
 65 |         except Exception as e:
 66 |             raise AppException(e, sys) from e
 67 | 
 68 |     
 69 | 
 70 |     def get_data_transformation_config(self) -> DataTransformationConfig:
 71 |         try:
 72 |             data_transformation_config = self.configs_info['data_transformation_config']
 73 |             data_validation_config = self.configs_info['data_validation_config']
 74 |             data_ingestion_config = self.configs_info['data_ingestion_config']
 75 |             dataset_dir = data_ingestion_config['dataset_dir']
 76 |             artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir']
 77 |           
 78 |             clean_data_file_path = os.path.join(artifacts_dir, dataset_dir, data_validation_config['clean_data_dir'],'clean_data.csv')
 79 |             transformed_data_dir = os.path.join(artifacts_dir, dataset_dir, data_transformation_config['transformed_data_dir'])
 80 | 
 81 |             response = DataTransformationConfig(
 82 |                 clean_data_file_path = clean_data_file_path,
 83 |                 transformed_data_dir = transformed_data_dir
 84 |             )
 85 | 
 86 |             logging.info(f"Data Transformation Config: {response}")
 87 |             return response
 88 | 
 89 |         except Exception as e:
 90 |             raise AppException(e, sys) from e
 91 | 
 92 | 
 93 |     
 94 |     def get_model_trainer_config(self) -> ModelTrainerConfig:
 95 |         try:
 96 |             model_trainer_config = self.configs_info['model_trainer_config']
 97 |             data_transformation_config = self.configs_info['data_transformation_config']
 98 |             data_ingestion_config = self.configs_info['data_ingestion_config']
 99 |             dataset_dir = data_ingestion_config['dataset_dir']
100 |             artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir']
101 | 
102 |           
103 |            
104 |             transformed_data_file_dir = os.path.join(artifacts_dir, dataset_dir, data_transformation_config['transformed_data_dir'], 'transformed_data.pkl')
105 |             trained_model_dir = os.path.join(artifacts_dir, model_trainer_config['trained_model_dir'])
106 |             trained_model_name = model_trainer_config['trained_model_name']
107 | 
108 |             response = ModelTrainerConfig(
109 |                 transformed_data_file_dir = transformed_data_file_dir,
110 |                 trained_model_dir = trained_model_dir,
111 |                 trained_model_name = trained_model_name
112 |             )
113 | 
114 |             logging.info(f"Model Trainer Config: {response}")
115 |             return response
116 | 
117 |         except Exception as e:
118 |             raise AppException(e, sys) from e
119 | 
120 | 
121 |     
122 |     def get_recommendation_config(self) -> ModelRecommendationConfig:
123 |         try:
124 |             recommendation_config = self.configs_info['recommendation_config']
125 |             model_trainer_config = self.configs_info['model_trainer_config']
126 |             data_validation_config = self.configs_info['data_validation_config']
127 |             trained_model_name = model_trainer_config['trained_model_name']
128 |             artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir']
129 |             trained_model_dir = os.path.join(artifacts_dir, model_trainer_config['trained_model_dir'])
130 |             poster_api = recommendation_config['poster_api_url']
131 |             
132 | 
133 |             book_name_serialized_objects = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir'], 'book_names.pkl')
134 |             book_pivot_serialized_objects = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir'], 'book_pivot.pkl')
135 |             final_rating_serialized_objects = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir'], 'final_rating.pkl')
136 | 
137 |             trained_model_path = os.path.join(trained_model_dir,trained_model_name)
138 |           
139 |             response = ModelRecommendationConfig(
140 |                 book_name_serialized_objects = book_name_serialized_objects,
141 |                 book_pivot_serialized_objects = book_pivot_serialized_objects,
142 |                 final_rating_serialized_objects = final_rating_serialized_objects,
143 |                 trained_model_path = trained_model_path
144 |             )
145 | 
146 |             logging.info(f"Model Recommendation Config: {response}")
147 |             return response
148 | 
149 |         except Exception as e:
150 |             raise AppException(e, sys) from e


--------------------------------------------------------------------------------
/notebook/Books Recommender data analysis.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Books Recommender system using clustering\n",
   8 |     "Collaborative filtering\n",
   9 |     "- Dataset :- https://www.kaggle.com/ra4u12/bookrecommendation"
  10 |    ]
  11 |   },
  12 |   {
  13 |    "cell_type": "code",
  14 |    "execution_count": 1,
  15 |    "metadata": {},
  16 |    "outputs": [],
  17 |    "source": [
  18 |     "# Importing necessary library\n",
  19 |     "import pandas as pd\n",
  20 |     "import numpy as np\n",
  21 |     "# import matplotlib.pyplot as plt\n",
  22 |     "# import seaborn as sns"
  23 |    ]
  24 |   },
  25 |   {
  26 |    "cell_type": "code",
  27 |    "execution_count": 2,
  28 |    "metadata": {},
  29 |    "outputs": [
  30 |     {
  31 |      "name": "stderr",
  32 |      "output_type": "stream",
  33 |      "text": [
  34 |       "b'Skipping line 6452: expected 8 fields, saw 9\\nSkipping line 43667: expected 8 fields, saw 10\\nSkipping line 51751: expected 8 fields, saw 9\\n'\n",
  35 |       "b'Skipping line 92038: expected 8 fields, saw 9\\nSkipping line 104319: expected 8 fields, saw 9\\nSkipping line 121768: expected 8 fields, saw 9\\n'\n",
  36 |       "b'Skipping line 144058: expected 8 fields, saw 9\\nSkipping line 150789: expected 8 fields, saw 9\\nSkipping line 157128: expected 8 fields, saw 9\\nSkipping line 180189: expected 8 fields, saw 9\\nSkipping line 185738: expected 8 fields, saw 9\\n'\n",
  37 |       "b'Skipping line 209388: expected 8 fields, saw 9\\nSkipping line 220626: expected 8 fields, saw 9\\nSkipping line 227933: expected 8 fields, saw 11\\nSkipping line 228957: expected 8 fields, saw 10\\nSkipping line 245933: expected 8 fields, saw 9\\nSkipping line 251296: expected 8 fields, saw 9\\nSkipping line 259941: expected 8 fields, saw 9\\nSkipping line 261529: expected 8 fields, saw 9\\n'\n",
  38 |       "C:\\Anaconda\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3146: DtypeWarning: Columns (3) have mixed types.Specify dtype option on import or set low_memory=False.\n",
  39 |       "  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n"
  40 |      ]
  41 |     }
  42 |    ],
  43 |    "source": [
  44 |     "books = pd.read_csv('data/BX-Books.csv', sep=\";\", error_bad_lines=False, encoding='latin-1')"
  45 |    ]
  46 |   },
  47 |   {
  48 |    "cell_type": "code",
  49 |    "execution_count": 3,
  50 |    "metadata": {},
  51 |    "outputs": [
  52 |     {
  53 |      "data": {
  54 |       "text/html": [
  55 |        "<div>\n",
  56 |        "<style scoped>\n",
  57 |        "    .dataframe tbody tr th:only-of-type {\n",
  58 |        "        vertical-align: middle;\n",
  59 |        "    }\n",
  60 |        "\n",
  61 |        "    .dataframe tbody tr th {\n",
  62 |        "        vertical-align: top;\n",
  63 |        "    }\n",
  64 |        "\n",
  65 |        "    .dataframe thead th {\n",
  66 |        "        text-align: right;\n",
  67 |        "    }\n",
  68 |        "</style>\n",
  69 |        "<table border=\"1\" class=\"dataframe\">\n",
  70 |        "  <thead>\n",
  71 |        "    <tr style=\"text-align: right;\">\n",
  72 |        "      <th></th>\n",
  73 |        "      <th>ISBN</th>\n",
  74 |        "      <th>Book-Title</th>\n",
  75 |        "      <th>Book-Author</th>\n",
  76 |        "      <th>Year-Of-Publication</th>\n",
  77 |        "      <th>Publisher</th>\n",
  78 |        "      <th>Image-URL-S</th>\n",
  79 |        "      <th>Image-URL-M</th>\n",
  80 |        "      <th>Image-URL-L</th>\n",
  81 |        "    </tr>\n",
  82 |        "  </thead>\n",
  83 |        "  <tbody>\n",
  84 |        "    <tr>\n",
  85 |        "      <th>0</th>\n",
  86 |        "      <td>0195153448</td>\n",
  87 |        "      <td>Classical Mythology</td>\n",
  88 |        "      <td>Mark P. O. Morford</td>\n",
  89 |        "      <td>2002</td>\n",
  90 |        "      <td>Oxford University Press</td>\n",
  91 |        "      <td>http://images.amazon.com/images/P/0195153448.0...</td>\n",
  92 |        "      <td>http://images.amazon.com/images/P/0195153448.0...</td>\n",
  93 |        "      <td>http://images.amazon.com/images/P/0195153448.0...</td>\n",
  94 |        "    </tr>\n",
  95 |        "    <tr>\n",
  96 |        "      <th>1</th>\n",
  97 |        "      <td>0002005018</td>\n",
  98 |        "      <td>Clara Callan</td>\n",
  99 |        "      <td>Richard Bruce Wright</td>\n",
 100 |        "      <td>2001</td>\n",
 101 |        "      <td>HarperFlamingo Canada</td>\n",
 102 |        "      <td>http://images.amazon.com/images/P/0002005018.0...</td>\n",
 103 |        "      <td>http://images.amazon.com/images/P/0002005018.0...</td>\n",
 104 |        "      <td>http://images.amazon.com/images/P/0002005018.0...</td>\n",
 105 |        "    </tr>\n",
 106 |        "    <tr>\n",
 107 |        "      <th>2</th>\n",
 108 |        "      <td>0060973129</td>\n",
 109 |        "      <td>Decision in Normandy</td>\n",
 110 |        "      <td>Carlo D'Este</td>\n",
 111 |        "      <td>1991</td>\n",
 112 |        "      <td>HarperPerennial</td>\n",
 113 |        "      <td>http://images.amazon.com/images/P/0060973129.0...</td>\n",
 114 |        "      <td>http://images.amazon.com/images/P/0060973129.0...</td>\n",
 115 |        "      <td>http://images.amazon.com/images/P/0060973129.0...</td>\n",
 116 |        "    </tr>\n",
 117 |        "    <tr>\n",
 118 |        "      <th>3</th>\n",
 119 |        "      <td>0374157065</td>\n",
 120 |        "      <td>Flu: The Story of the Great Influenza Pandemic...</td>\n",
 121 |        "      <td>Gina Bari Kolata</td>\n",
 122 |        "      <td>1999</td>\n",
 123 |        "      <td>Farrar Straus Giroux</td>\n",
 124 |        "      <td>http://images.amazon.com/images/P/0374157065.0...</td>\n",
 125 |        "      <td>http://images.amazon.com/images/P/0374157065.0...</td>\n",
 126 |        "      <td>http://images.amazon.com/images/P/0374157065.0...</td>\n",
 127 |        "    </tr>\n",
 128 |        "    <tr>\n",
 129 |        "      <th>4</th>\n",
 130 |        "      <td>0393045218</td>\n",
 131 |        "      <td>The Mummies of Urumchi</td>\n",
 132 |        "      <td>E. J. W. Barber</td>\n",
 133 |        "      <td>1999</td>\n",
 134 |        "      <td>W. W. Norton &amp;amp; Company</td>\n",
 135 |        "      <td>http://images.amazon.com/images/P/0393045218.0...</td>\n",
 136 |        "      <td>http://images.amazon.com/images/P/0393045218.0...</td>\n",
 137 |        "      <td>http://images.amazon.com/images/P/0393045218.0...</td>\n",
 138 |        "    </tr>\n",
 139 |        "  </tbody>\n",
 140 |        "</table>\n",
 141 |        "</div>"
 142 |       ],
 143 |       "text/plain": [
 144 |        "         ISBN                                         Book-Title  \\\n",
 145 |        "0  0195153448                                Classical Mythology   \n",
 146 |        "1  0002005018                                       Clara Callan   \n",
 147 |        "2  0060973129                               Decision in Normandy   \n",
 148 |        "3  0374157065  Flu: The Story of the Great Influenza Pandemic...   \n",
 149 |        "4  0393045218                             The Mummies of Urumchi   \n",
 150 |        "\n",
 151 |        "            Book-Author Year-Of-Publication                   Publisher  \\\n",
 152 |        "0    Mark P. O. Morford                2002     Oxford University Press   \n",
 153 |        "1  Richard Bruce Wright                2001       HarperFlamingo Canada   \n",
 154 |        "2          Carlo D'Este                1991             HarperPerennial   \n",
 155 |        "3      Gina Bari Kolata                1999        Farrar Straus Giroux   \n",
 156 |        "4       E. J. W. Barber                1999  W. W. Norton &amp; Company   \n",
 157 |        "\n",
 158 |        "                                         Image-URL-S  \\\n",
 159 |        "0  http://images.amazon.com/images/P/0195153448.0...   \n",
 160 |        "1  http://images.amazon.com/images/P/0002005018.0...   \n",
 161 |        "2  http://images.amazon.com/images/P/0060973129.0...   \n",
 162 |        "3  http://images.amazon.com/images/P/0374157065.0...   \n",
 163 |        "4  http://images.amazon.com/images/P/0393045218.0...   \n",
 164 |        "\n",
 165 |        "                                         Image-URL-M  \\\n",
 166 |        "0  http://images.amazon.com/images/P/0195153448.0...   \n",
 167 |        "1  http://images.amazon.com/images/P/0002005018.0...   \n",
 168 |        "2  http://images.amazon.com/images/P/0060973129.0...   \n",
 169 |        "3  http://images.amazon.com/images/P/0374157065.0...   \n",
 170 |        "4  http://images.amazon.com/images/P/0393045218.0...   \n",
 171 |        "\n",
 172 |        "                                         Image-URL-L  \n",
 173 |        "0  http://images.amazon.com/images/P/0195153448.0...  \n",
 174 |        "1  http://images.amazon.com/images/P/0002005018.0...  \n",
 175 |        "2  http://images.amazon.com/images/P/0060973129.0...  \n",
 176 |        "3  http://images.amazon.com/images/P/0374157065.0...  \n",
 177 |        "4  http://images.amazon.com/images/P/0393045218.0...  "
 178 |       ]
 179 |      },
 180 |      "execution_count": 3,
 181 |      "metadata": {},
 182 |      "output_type": "execute_result"
 183 |     }
 184 |    ],
 185 |    "source": [
 186 |     "books.head()"
 187 |    ]
 188 |   },
 189 |   {
 190 |    "cell_type": "code",
 191 |    "execution_count": 4,
 192 |    "metadata": {},
 193 |    "outputs": [
 194 |     {
 195 |      "data": {
 196 |       "text/plain": [
 197 |        "'http://images.amazon.com/images/P/0671027387.01.LZZZZZZZ.jpg'"
 198 |       ]
 199 |      },
 200 |      "execution_count": 4,
 201 |      "metadata": {},
 202 |      "output_type": "execute_result"
 203 |     }
 204 |    ],
 205 |    "source": [
 206 |     "books.iloc[237]['Image-URL-L']"
 207 |    ]
 208 |   },
 209 |   {
 210 |    "cell_type": "code",
 211 |    "execution_count": 5,
 212 |    "metadata": {},
 213 |    "outputs": [],
 214 |    "source": [
 215 |     "# !curl \"http://images.amazon.com/images/P/0195153448.01.THUMBZZZ.jpg\" --out.png\n",
 216 |     "# !curl http://images.amazon.com/images/P/0060973129.01.THUMBZZZ.jpg --output some.jpg"
 217 |    ]
 218 |   },
 219 |   {
 220 |    "cell_type": "code",
 221 |    "execution_count": 6,
 222 |    "metadata": {},
 223 |    "outputs": [
 224 |     {
 225 |      "data": {
 226 |       "text/plain": [
 227 |        "(271360, 8)"
 228 |       ]
 229 |      },
 230 |      "execution_count": 6,
 231 |      "metadata": {},
 232 |      "output_type": "execute_result"
 233 |     }
 234 |    ],
 235 |    "source": [
 236 |     "books.shape"
 237 |    ]
 238 |   },
 239 |   {
 240 |    "cell_type": "code",
 241 |    "execution_count": 7,
 242 |    "metadata": {},
 243 |    "outputs": [
 244 |     {
 245 |      "data": {
 246 |       "text/plain": [
 247 |        "Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',\n",
 248 |        "       'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],\n",
 249 |        "      dtype='object')"
 250 |       ]
 251 |      },
 252 |      "execution_count": 7,
 253 |      "metadata": {},
 254 |      "output_type": "execute_result"
 255 |     }
 256 |    ],
 257 |    "source": [
 258 |     "books.columns"
 259 |    ]
 260 |   },
 261 |   {
 262 |    "cell_type": "markdown",
 263 |    "metadata": {},
 264 |    "source": [
 265 |     "#### Conclution:\n",
 266 |     "Here Image URL columns is important for the poster. So, we will keep it"
 267 |    ]
 268 |   },
 269 |   {
 270 |    "cell_type": "code",
 271 |    "execution_count": 8,
 272 |    "metadata": {},
 273 |    "outputs": [],
 274 |    "source": [
 275 |     "books = books[['ISBN','Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher','Image-URL-L']]"
 276 |    ]
 277 |   },
 278 |   {
 279 |    "cell_type": "code",
 280 |    "execution_count": 9,
 281 |    "metadata": {},
 282 |    "outputs": [
 283 |     {
 284 |      "data": {
 285 |       "text/html": [
 286 |        "<div>\n",
 287 |        "<style scoped>\n",
 288 |        "    .dataframe tbody tr th:only-of-type {\n",
 289 |        "        vertical-align: middle;\n",
 290 |        "    }\n",
 291 |        "\n",
 292 |        "    .dataframe tbody tr th {\n",
 293 |        "        vertical-align: top;\n",
 294 |        "    }\n",
 295 |        "\n",
 296 |        "    .dataframe thead th {\n",
 297 |        "        text-align: right;\n",
 298 |        "    }\n",
 299 |        "</style>\n",
 300 |        "<table border=\"1\" class=\"dataframe\">\n",
 301 |        "  <thead>\n",
 302 |        "    <tr style=\"text-align: right;\">\n",
 303 |        "      <th></th>\n",
 304 |        "      <th>ISBN</th>\n",
 305 |        "      <th>Book-Title</th>\n",
 306 |        "      <th>Book-Author</th>\n",
 307 |        "      <th>Year-Of-Publication</th>\n",
 308 |        "      <th>Publisher</th>\n",
 309 |        "      <th>Image-URL-L</th>\n",
 310 |        "    </tr>\n",
 311 |        "  </thead>\n",
 312 |        "  <tbody>\n",
 313 |        "    <tr>\n",
 314 |        "      <th>0</th>\n",
 315 |        "      <td>0195153448</td>\n",
 316 |        "      <td>Classical Mythology</td>\n",
 317 |        "      <td>Mark P. O. Morford</td>\n",
 318 |        "      <td>2002</td>\n",
 319 |        "      <td>Oxford University Press</td>\n",
 320 |        "      <td>http://images.amazon.com/images/P/0195153448.0...</td>\n",
 321 |        "    </tr>\n",
 322 |        "    <tr>\n",
 323 |        "      <th>1</th>\n",
 324 |        "      <td>0002005018</td>\n",
 325 |        "      <td>Clara Callan</td>\n",
 326 |        "      <td>Richard Bruce Wright</td>\n",
 327 |        "      <td>2001</td>\n",
 328 |        "      <td>HarperFlamingo Canada</td>\n",
 329 |        "      <td>http://images.amazon.com/images/P/0002005018.0...</td>\n",
 330 |        "    </tr>\n",
 331 |        "    <tr>\n",
 332 |        "      <th>2</th>\n",
 333 |        "      <td>0060973129</td>\n",
 334 |        "      <td>Decision in Normandy</td>\n",
 335 |        "      <td>Carlo D'Este</td>\n",
 336 |        "      <td>1991</td>\n",
 337 |        "      <td>HarperPerennial</td>\n",
 338 |        "      <td>http://images.amazon.com/images/P/0060973129.0...</td>\n",
 339 |        "    </tr>\n",
 340 |        "    <tr>\n",
 341 |        "      <th>3</th>\n",
 342 |        "      <td>0374157065</td>\n",
 343 |        "      <td>Flu: The Story of the Great Influenza Pandemic...</td>\n",
 344 |        "      <td>Gina Bari Kolata</td>\n",
 345 |        "      <td>1999</td>\n",
 346 |        "      <td>Farrar Straus Giroux</td>\n",
 347 |        "      <td>http://images.amazon.com/images/P/0374157065.0...</td>\n",
 348 |        "    </tr>\n",
 349 |        "    <tr>\n",
 350 |        "      <th>4</th>\n",
 351 |        "      <td>0393045218</td>\n",
 352 |        "      <td>The Mummies of Urumchi</td>\n",
 353 |        "      <td>E. J. W. Barber</td>\n",
 354 |        "      <td>1999</td>\n",
 355 |        "      <td>W. W. Norton &amp;amp; Company</td>\n",
 356 |        "      <td>http://images.amazon.com/images/P/0393045218.0...</td>\n",
 357 |        "    </tr>\n",
 358 |        "  </tbody>\n",
 359 |        "</table>\n",
 360 |        "</div>"
 361 |       ],
 362 |       "text/plain": [
 363 |        "         ISBN                                         Book-Title  \\\n",
 364 |        "0  0195153448                                Classical Mythology   \n",
 365 |        "1  0002005018                                       Clara Callan   \n",
 366 |        "2  0060973129                               Decision in Normandy   \n",
 367 |        "3  0374157065  Flu: The Story of the Great Influenza Pandemic...   \n",
 368 |        "4  0393045218                             The Mummies of Urumchi   \n",
 369 |        "\n",
 370 |        "            Book-Author Year-Of-Publication                   Publisher  \\\n",
 371 |        "0    Mark P. O. Morford                2002     Oxford University Press   \n",
 372 |        "1  Richard Bruce Wright                2001       HarperFlamingo Canada   \n",
 373 |        "2          Carlo D'Este                1991             HarperPerennial   \n",
 374 |        "3      Gina Bari Kolata                1999        Farrar Straus Giroux   \n",
 375 |        "4       E. J. W. Barber                1999  W. W. Norton &amp; Company   \n",
 376 |        "\n",
 377 |        "                                         Image-URL-L  \n",
 378 |        "0  http://images.amazon.com/images/P/0195153448.0...  \n",
 379 |        "1  http://images.amazon.com/images/P/0002005018.0...  \n",
 380 |        "2  http://images.amazon.com/images/P/0060973129.0...  \n",
 381 |        "3  http://images.amazon.com/images/P/0374157065.0...  \n",
 382 |        "4  http://images.amazon.com/images/P/0393045218.0...  "
 383 |       ]
 384 |      },
 385 |      "execution_count": 9,
 386 |      "metadata": {},
 387 |      "output_type": "execute_result"
 388 |     }
 389 |    ],
 390 |    "source": [
 391 |     "books.head()"
 392 |    ]
 393 |   },
 394 |   {
 395 |    "cell_type": "code",
 396 |    "execution_count": 10,
 397 |    "metadata": {},
 398 |    "outputs": [],
 399 |    "source": [
 400 |     "# Lets remane some wierd columns name\n",
 401 |     "books.rename(columns={\"Book-Title\":'title',\n",
 402 |     "                      'Book-Author':'author',\n",
 403 |     "                     \"Year-Of-Publication\":'year',\n",
 404 |     "                     \"Publisher\":\"publisher\",\n",
 405 |     "                     \"Image-URL-L\":\"image_url\"},inplace=True)"
 406 |    ]
 407 |   },
 408 |   {
 409 |    "cell_type": "code",
 410 |    "execution_count": 11,
 411 |    "metadata": {},
 412 |    "outputs": [
 413 |     {
 414 |      "data": {
 415 |       "text/html": [
 416 |        "<div>\n",
 417 |        "<style scoped>\n",
 418 |        "    .dataframe tbody tr th:only-of-type {\n",
 419 |        "        vertical-align: middle;\n",
 420 |        "    }\n",
 421 |        "\n",
 422 |        "    .dataframe tbody tr th {\n",
 423 |        "        vertical-align: top;\n",
 424 |        "    }\n",
 425 |        "\n",
 426 |        "    .dataframe thead th {\n",
 427 |        "        text-align: right;\n",
 428 |        "    }\n",
 429 |        "</style>\n",
 430 |        "<table border=\"1\" class=\"dataframe\">\n",
 431 |        "  <thead>\n",
 432 |        "    <tr style=\"text-align: right;\">\n",
 433 |        "      <th></th>\n",
 434 |        "      <th>ISBN</th>\n",
 435 |        "      <th>title</th>\n",
 436 |        "      <th>author</th>\n",
 437 |        "      <th>year</th>\n",
 438 |        "      <th>publisher</th>\n",
 439 |        "      <th>image_url</th>\n",
 440 |        "    </tr>\n",
 441 |        "  </thead>\n",
 442 |        "  <tbody>\n",
 443 |        "    <tr>\n",
 444 |        "      <th>0</th>\n",
 445 |        "      <td>0195153448</td>\n",
 446 |        "      <td>Classical Mythology</td>\n",
 447 |        "      <td>Mark P. O. Morford</td>\n",
 448 |        "      <td>2002</td>\n",
 449 |        "      <td>Oxford University Press</td>\n",
 450 |        "      <td>http://images.amazon.com/images/P/0195153448.0...</td>\n",
 451 |        "    </tr>\n",
 452 |        "    <tr>\n",
 453 |        "      <th>1</th>\n",
 454 |        "      <td>0002005018</td>\n",
 455 |        "      <td>Clara Callan</td>\n",
 456 |        "      <td>Richard Bruce Wright</td>\n",
 457 |        "      <td>2001</td>\n",
 458 |        "      <td>HarperFlamingo Canada</td>\n",
 459 |        "      <td>http://images.amazon.com/images/P/0002005018.0...</td>\n",
 460 |        "    </tr>\n",
 461 |        "    <tr>\n",
 462 |        "      <th>2</th>\n",
 463 |        "      <td>0060973129</td>\n",
 464 |        "      <td>Decision in Normandy</td>\n",
 465 |        "      <td>Carlo D'Este</td>\n",
 466 |        "      <td>1991</td>\n",
 467 |        "      <td>HarperPerennial</td>\n",
 468 |        "      <td>http://images.amazon.com/images/P/0060973129.0...</td>\n",
 469 |        "    </tr>\n",
 470 |        "    <tr>\n",
 471 |        "      <th>3</th>\n",
 472 |        "      <td>0374157065</td>\n",
 473 |        "      <td>Flu: The Story of the Great Influenza Pandemic...</td>\n",
 474 |        "      <td>Gina Bari Kolata</td>\n",
 475 |        "      <td>1999</td>\n",
 476 |        "      <td>Farrar Straus Giroux</td>\n",
 477 |        "      <td>http://images.amazon.com/images/P/0374157065.0...</td>\n",
 478 |        "    </tr>\n",
 479 |        "    <tr>\n",
 480 |        "      <th>4</th>\n",
 481 |        "      <td>0393045218</td>\n",
 482 |        "      <td>The Mummies of Urumchi</td>\n",
 483 |        "      <td>E. J. W. Barber</td>\n",
 484 |        "      <td>1999</td>\n",
 485 |        "      <td>W. W. Norton &amp;amp; Company</td>\n",
 486 |        "      <td>http://images.amazon.com/images/P/0393045218.0...</td>\n",
 487 |        "    </tr>\n",
 488 |        "  </tbody>\n",
 489 |        "</table>\n",
 490 |        "</div>"
 491 |       ],
 492 |       "text/plain": [
 493 |        "         ISBN                                              title  \\\n",
 494 |        "0  0195153448                                Classical Mythology   \n",
 495 |        "1  0002005018                                       Clara Callan   \n",
 496 |        "2  0060973129                               Decision in Normandy   \n",
 497 |        "3  0374157065  Flu: The Story of the Great Influenza Pandemic...   \n",
 498 |        "4  0393045218                             The Mummies of Urumchi   \n",
 499 |        "\n",
 500 |        "                 author  year                   publisher  \\\n",
 501 |        "0    Mark P. O. Morford  2002     Oxford University Press   \n",
 502 |        "1  Richard Bruce Wright  2001       HarperFlamingo Canada   \n",
 503 |        "2          Carlo D'Este  1991             HarperPerennial   \n",
 504 |        "3      Gina Bari Kolata  1999        Farrar Straus Giroux   \n",
 505 |        "4       E. J. W. Barber  1999  W. W. Norton &amp; Company   \n",
 506 |        "\n",
 507 |        "                                           image_url  \n",
 508 |        "0  http://images.amazon.com/images/P/0195153448.0...  \n",
 509 |        "1  http://images.amazon.com/images/P/0002005018.0...  \n",
 510 |        "2  http://images.amazon.com/images/P/0060973129.0...  \n",
 511 |        "3  http://images.amazon.com/images/P/0374157065.0...  \n",
 512 |        "4  http://images.amazon.com/images/P/0393045218.0...  "
 513 |       ]
 514 |      },
 515 |      "execution_count": 11,
 516 |      "metadata": {},
 517 |      "output_type": "execute_result"
 518 |     }
 519 |    ],
 520 |    "source": [
 521 |     "books.head()"
 522 |    ]
 523 |   },
 524 |   {
 525 |    "cell_type": "code",
 526 |    "execution_count": 12,
 527 |    "metadata": {},
 528 |    "outputs": [],
 529 |    "source": [
 530 |     "# Now load the second dataframe\n",
 531 |     "\n",
 532 |     "users = pd.read_csv('data/BX-Users.csv', sep=\";\", error_bad_lines=False, encoding='latin-1')"
 533 |    ]
 534 |   },
 535 |   {
 536 |    "cell_type": "code",
 537 |    "execution_count": 13,
 538 |    "metadata": {},
 539 |    "outputs": [
 540 |     {
 541 |      "data": {
 542 |       "text/html": [
 543 |        "<div>\n",
 544 |        "<style scoped>\n",
 545 |        "    .dataframe tbody tr th:only-of-type {\n",
 546 |        "        vertical-align: middle;\n",
 547 |        "    }\n",
 548 |        "\n",
 549 |        "    .dataframe tbody tr th {\n",
 550 |        "        vertical-align: top;\n",
 551 |        "    }\n",
 552 |        "\n",
 553 |        "    .dataframe thead th {\n",
 554 |        "        text-align: right;\n",
 555 |        "    }\n",
 556 |        "</style>\n",
 557 |        "<table border=\"1\" class=\"dataframe\">\n",
 558 |        "  <thead>\n",
 559 |        "    <tr style=\"text-align: right;\">\n",
 560 |        "      <th></th>\n",
 561 |        "      <th>User-ID</th>\n",
 562 |        "      <th>Location</th>\n",
 563 |        "      <th>Age</th>\n",
 564 |        "    </tr>\n",
 565 |        "  </thead>\n",
 566 |        "  <tbody>\n",
 567 |        "    <tr>\n",
 568 |        "      <th>0</th>\n",
 569 |        "      <td>1</td>\n",
 570 |        "      <td>nyc, new york, usa</td>\n",
 571 |        "      <td>NaN</td>\n",
 572 |        "    </tr>\n",
 573 |        "    <tr>\n",
 574 |        "      <th>1</th>\n",
 575 |        "      <td>2</td>\n",
 576 |        "      <td>stockton, california, usa</td>\n",
 577 |        "      <td>18.0</td>\n",
 578 |        "    </tr>\n",
 579 |        "    <tr>\n",
 580 |        "      <th>2</th>\n",
 581 |        "      <td>3</td>\n",
 582 |        "      <td>moscow, yukon territory, russia</td>\n",
 583 |        "      <td>NaN</td>\n",
 584 |        "    </tr>\n",
 585 |        "    <tr>\n",
 586 |        "      <th>3</th>\n",
 587 |        "      <td>4</td>\n",
 588 |        "      <td>porto, v.n.gaia, portugal</td>\n",
 589 |        "      <td>17.0</td>\n",
 590 |        "    </tr>\n",
 591 |        "    <tr>\n",
 592 |        "      <th>4</th>\n",
 593 |        "      <td>5</td>\n",
 594 |        "      <td>farnborough, hants, united kingdom</td>\n",
 595 |        "      <td>NaN</td>\n",
 596 |        "    </tr>\n",
 597 |        "  </tbody>\n",
 598 |        "</table>\n",
 599 |        "</div>"
 600 |       ],
 601 |       "text/plain": [
 602 |        "   User-ID                            Location   Age\n",
 603 |        "0        1                  nyc, new york, usa   NaN\n",
 604 |        "1        2           stockton, california, usa  18.0\n",
 605 |        "2        3     moscow, yukon territory, russia   NaN\n",
 606 |        "3        4           porto, v.n.gaia, portugal  17.0\n",
 607 |        "4        5  farnborough, hants, united kingdom   NaN"
 608 |       ]
 609 |      },
 610 |      "execution_count": 13,
 611 |      "metadata": {},
 612 |      "output_type": "execute_result"
 613 |     }
 614 |    ],
 615 |    "source": [
 616 |     "users.head()"
 617 |    ]
 618 |   },
 619 |   {
 620 |    "cell_type": "code",
 621 |    "execution_count": 14,
 622 |    "metadata": {},
 623 |    "outputs": [
 624 |     {
 625 |      "data": {
 626 |       "text/plain": [
 627 |        "(278858, 3)"
 628 |       ]
 629 |      },
 630 |      "execution_count": 14,
 631 |      "metadata": {},
 632 |      "output_type": "execute_result"
 633 |     }
 634 |    ],
 635 |    "source": [
 636 |     "users.shape"
 637 |    ]
 638 |   },
 639 |   {
 640 |    "cell_type": "code",
 641 |    "execution_count": 15,
 642 |    "metadata": {},
 643 |    "outputs": [],
 644 |    "source": [
 645 |     "# Lets remane some wierd columns name\n",
 646 |     "users.rename(columns={\"User-ID\":'user_id',\n",
 647 |     "                      'Location':'location',\n",
 648 |     "                     \"Age\":'age'},inplace=True)"
 649 |    ]
 650 |   },
 651 |   {
 652 |    "cell_type": "code",
 653 |    "execution_count": 16,
 654 |    "metadata": {},
 655 |    "outputs": [
 656 |     {
 657 |      "data": {
 658 |       "text/html": [
 659 |        "<div>\n",
 660 |        "<style scoped>\n",
 661 |        "    .dataframe tbody tr th:only-of-type {\n",
 662 |        "        vertical-align: middle;\n",
 663 |        "    }\n",
 664 |        "\n",
 665 |        "    .dataframe tbody tr th {\n",
 666 |        "        vertical-align: top;\n",
 667 |        "    }\n",
 668 |        "\n",
 669 |        "    .dataframe thead th {\n",
 670 |        "        text-align: right;\n",
 671 |        "    }\n",
 672 |        "</style>\n",
 673 |        "<table border=\"1\" class=\"dataframe\">\n",
 674 |        "  <thead>\n",
 675 |        "    <tr style=\"text-align: right;\">\n",
 676 |        "      <th></th>\n",
 677 |        "      <th>user_id</th>\n",
 678 |        "      <th>location</th>\n",
 679 |        "      <th>age</th>\n",
 680 |        "    </tr>\n",
 681 |        "  </thead>\n",
 682 |        "  <tbody>\n",
 683 |        "    <tr>\n",
 684 |        "      <th>0</th>\n",
 685 |        "      <td>1</td>\n",
 686 |        "      <td>nyc, new york, usa</td>\n",
 687 |        "      <td>NaN</td>\n",
 688 |        "    </tr>\n",
 689 |        "    <tr>\n",
 690 |        "      <th>1</th>\n",
 691 |        "      <td>2</td>\n",
 692 |        "      <td>stockton, california, usa</td>\n",
 693 |        "      <td>18.0</td>\n",
 694 |        "    </tr>\n",
 695 |        "  </tbody>\n",
 696 |        "</table>\n",
 697 |        "</div>"
 698 |       ],
 699 |       "text/plain": [
 700 |        "   user_id                   location   age\n",
 701 |        "0        1         nyc, new york, usa   NaN\n",
 702 |        "1        2  stockton, california, usa  18.0"
 703 |       ]
 704 |      },
 705 |      "execution_count": 16,
 706 |      "metadata": {},
 707 |      "output_type": "execute_result"
 708 |     }
 709 |    ],
 710 |    "source": [
 711 |     "users.head(2)"
 712 |    ]
 713 |   },
 714 |   {
 715 |    "cell_type": "code",
 716 |    "execution_count": 17,
 717 |    "metadata": {},
 718 |    "outputs": [],
 719 |    "source": [
 720 |     "# Now load the third dataframe\n",
 721 |     "\n",
 722 |     "ratings = pd.read_csv('data/BX-Book-Ratings.csv', sep=\";\", error_bad_lines=False, encoding='latin-1')"
 723 |    ]
 724 |   },
 725 |   {
 726 |    "cell_type": "code",
 727 |    "execution_count": 18,
 728 |    "metadata": {},
 729 |    "outputs": [
 730 |     {
 731 |      "data": {
 732 |       "text/html": [
 733 |        "<div>\n",
 734 |        "<style scoped>\n",
 735 |        "    .dataframe tbody tr th:only-of-type {\n",
 736 |        "        vertical-align: middle;\n",
 737 |        "    }\n",
 738 |        "\n",
 739 |        "    .dataframe tbody tr th {\n",
 740 |        "        vertical-align: top;\n",
 741 |        "    }\n",
 742 |        "\n",
 743 |        "    .dataframe thead th {\n",
 744 |        "        text-align: right;\n",
 745 |        "    }\n",
 746 |        "</style>\n",
 747 |        "<table border=\"1\" class=\"dataframe\">\n",
 748 |        "  <thead>\n",
 749 |        "    <tr style=\"text-align: right;\">\n",
 750 |        "      <th></th>\n",
 751 |        "      <th>User-ID</th>\n",
 752 |        "      <th>ISBN</th>\n",
 753 |        "      <th>Book-Rating</th>\n",
 754 |        "    </tr>\n",
 755 |        "  </thead>\n",
 756 |        "  <tbody>\n",
 757 |        "    <tr>\n",
 758 |        "      <th>0</th>\n",
 759 |        "      <td>276725</td>\n",
 760 |        "      <td>034545104X</td>\n",
 761 |        "      <td>0</td>\n",
 762 |        "    </tr>\n",
 763 |        "    <tr>\n",
 764 |        "      <th>1</th>\n",
 765 |        "      <td>276726</td>\n",
 766 |        "      <td>0155061224</td>\n",
 767 |        "      <td>5</td>\n",
 768 |        "    </tr>\n",
 769 |        "    <tr>\n",
 770 |        "      <th>2</th>\n",
 771 |        "      <td>276727</td>\n",
 772 |        "      <td>0446520802</td>\n",
 773 |        "      <td>0</td>\n",
 774 |        "    </tr>\n",
 775 |        "    <tr>\n",
 776 |        "      <th>3</th>\n",
 777 |        "      <td>276729</td>\n",
 778 |        "      <td>052165615X</td>\n",
 779 |        "      <td>3</td>\n",
 780 |        "    </tr>\n",
 781 |        "    <tr>\n",
 782 |        "      <th>4</th>\n",
 783 |        "      <td>276729</td>\n",
 784 |        "      <td>0521795028</td>\n",
 785 |        "      <td>6</td>\n",
 786 |        "    </tr>\n",
 787 |        "  </tbody>\n",
 788 |        "</table>\n",
 789 |        "</div>"
 790 |       ],
 791 |       "text/plain": [
 792 |        "   User-ID        ISBN  Book-Rating\n",
 793 |        "0   276725  034545104X            0\n",
 794 |        "1   276726  0155061224            5\n",
 795 |        "2   276727  0446520802            0\n",
 796 |        "3   276729  052165615X            3\n",
 797 |        "4   276729  0521795028            6"
 798 |       ]
 799 |      },
 800 |      "execution_count": 18,
 801 |      "metadata": {},
 802 |      "output_type": "execute_result"
 803 |     }
 804 |    ],
 805 |    "source": [
 806 |     "ratings.head()"
 807 |    ]
 808 |   },
 809 |   {
 810 |    "cell_type": "code",
 811 |    "execution_count": 19,
 812 |    "metadata": {},
 813 |    "outputs": [
 814 |     {
 815 |      "data": {
 816 |       "text/plain": [
 817 |        "(1149780, 3)"
 818 |       ]
 819 |      },
 820 |      "execution_count": 19,
 821 |      "metadata": {},
 822 |      "output_type": "execute_result"
 823 |     }
 824 |    ],
 825 |    "source": [
 826 |     "ratings.shape"
 827 |    ]
 828 |   },
 829 |   {
 830 |    "cell_type": "code",
 831 |    "execution_count": 20,
 832 |    "metadata": {},
 833 |    "outputs": [],
 834 |    "source": [
 835 |     "# Lets remane some wierd columns name\n",
 836 |     "ratings.rename(columns={\"User-ID\":'user_id',\n",
 837 |     "                      'Book-Rating':'rating'},inplace=True)"
 838 |    ]
 839 |   },
 840 |   {
 841 |    "cell_type": "code",
 842 |    "execution_count": 21,
 843 |    "metadata": {},
 844 |    "outputs": [
 845 |     {
 846 |      "data": {
 847 |       "text/html": [
 848 |        "<div>\n",
 849 |        "<style scoped>\n",
 850 |        "    .dataframe tbody tr th:only-of-type {\n",
 851 |        "        vertical-align: middle;\n",
 852 |        "    }\n",
 853 |        "\n",
 854 |        "    .dataframe tbody tr th {\n",
 855 |        "        vertical-align: top;\n",
 856 |        "    }\n",
 857 |        "\n",
 858 |        "    .dataframe thead th {\n",
 859 |        "        text-align: right;\n",
 860 |        "    }\n",
 861 |        "</style>\n",
 862 |        "<table border=\"1\" class=\"dataframe\">\n",
 863 |        "  <thead>\n",
 864 |        "    <tr style=\"text-align: right;\">\n",
 865 |        "      <th></th>\n",
 866 |        "      <th>user_id</th>\n",
 867 |        "      <th>ISBN</th>\n",
 868 |        "      <th>rating</th>\n",
 869 |        "    </tr>\n",
 870 |        "  </thead>\n",
 871 |        "  <tbody>\n",
 872 |        "    <tr>\n",
 873 |        "      <th>0</th>\n",
 874 |        "      <td>276725</td>\n",
 875 |        "      <td>034545104X</td>\n",
 876 |        "      <td>0</td>\n",
 877 |        "    </tr>\n",
 878 |        "    <tr>\n",
 879 |        "      <th>1</th>\n",
 880 |        "      <td>276726</td>\n",
 881 |        "      <td>0155061224</td>\n",
 882 |        "      <td>5</td>\n",
 883 |        "    </tr>\n",
 884 |        "  </tbody>\n",
 885 |        "</table>\n",
 886 |        "</div>"
 887 |       ],
 888 |       "text/plain": [
 889 |        "   user_id        ISBN  rating\n",
 890 |        "0   276725  034545104X       0\n",
 891 |        "1   276726  0155061224       5"
 892 |       ]
 893 |      },
 894 |      "execution_count": 21,
 895 |      "metadata": {},
 896 |      "output_type": "execute_result"
 897 |     }
 898 |    ],
 899 |    "source": [
 900 |     "ratings.head(2)"
 901 |    ]
 902 |   },
 903 |   {
 904 |    "cell_type": "markdown",
 905 |    "metadata": {},
 906 |    "source": [
 907 |     "### Conclution:\n",
 908 |     "Now we have 3 dataframes\n",
 909 |     "- books\n",
 910 |     "- users\n",
 911 |     "- ratings"
 912 |    ]
 913 |   },
 914 |   {
 915 |    "cell_type": "code",
 916 |    "execution_count": 22,
 917 |    "metadata": {},
 918 |    "outputs": [
 919 |     {
 920 |      "name": "stdout",
 921 |      "output_type": "stream",
 922 |      "text": [
 923 |       "(271360, 6)\n",
 924 |       "(278858, 3)\n",
 925 |       "(1149780, 3)\n"
 926 |      ]
 927 |     }
 928 |    ],
 929 |    "source": [
 930 |     "print(books.shape, users.shape, ratings.shape, sep='\\n')\n",
 931 |     "\n"
 932 |    ]
 933 |   },
 934 |   {
 935 |    "cell_type": "code",
 936 |    "execution_count": 23,
 937 |    "metadata": {},
 938 |    "outputs": [
 939 |     {
 940 |      "data": {
 941 |       "text/plain": [
 942 |        "11676     13602\n",
 943 |        "198711     7550\n",
 944 |        "153662     6109\n",
 945 |        "98391      5891\n",
 946 |        "35859      5850\n",
 947 |        "          ...  \n",
 948 |        "158698        1\n",
 949 |        "17920         1\n",
 950 |        "277135        1\n",
 951 |        "275086        1\n",
 952 |        "187812        1\n",
 953 |        "Name: user_id, Length: 105283, dtype: int64"
 954 |       ]
 955 |      },
 956 |      "execution_count": 23,
 957 |      "metadata": {},
 958 |      "output_type": "execute_result"
 959 |     }
 960 |    ],
 961 |    "source": [
 962 |     "ratings['user_id'].value_counts()"
 963 |    ]
 964 |   },
 965 |   {
 966 |    "cell_type": "code",
 967 |    "execution_count": 24,
 968 |    "metadata": {},
 969 |    "outputs": [
 970 |     {
 971 |      "data": {
 972 |       "text/plain": [
 973 |        "(105283,)"
 974 |       ]
 975 |      },
 976 |      "execution_count": 24,
 977 |      "metadata": {},
 978 |      "output_type": "execute_result"
 979 |     }
 980 |    ],
 981 |    "source": [
 982 |     "ratings['user_id'].value_counts().shape"
 983 |    ]
 984 |   },
 985 |   {
 986 |    "cell_type": "code",
 987 |    "execution_count": 25,
 988 |    "metadata": {},
 989 |    "outputs": [
 990 |     {
 991 |      "data": {
 992 |       "text/plain": [
 993 |        "(105283,)"
 994 |       ]
 995 |      },
 996 |      "execution_count": 25,
 997 |      "metadata": {},
 998 |      "output_type": "execute_result"
 999 |     }
1000 |    ],
1001 |    "source": [
1002 |     "ratings['user_id'].unique().shape"
1003 |    ]
1004 |   },
1005 |   {
1006 |    "cell_type": "code",
1007 |    "execution_count": 26,
1008 |    "metadata": {},
1009 |    "outputs": [],
1010 |    "source": [
1011 |     "# Lets store users who had at least rated more than 200 books\n",
1012 |     "x = ratings['user_id'].value_counts() > 200"
1013 |    ]
1014 |   },
1015 |   {
1016 |    "cell_type": "code",
1017 |    "execution_count": 27,
1018 |    "metadata": {},
1019 |    "outputs": [
1020 |     {
1021 |      "data": {
1022 |       "text/plain": [
1023 |        "(899,)"
1024 |       ]
1025 |      },
1026 |      "execution_count": 27,
1027 |      "metadata": {},
1028 |      "output_type": "execute_result"
1029 |     }
1030 |    ],
1031 |    "source": [
1032 |     "x[x].shape"
1033 |    ]
1034 |   },
1035 |   {
1036 |    "cell_type": "code",
1037 |    "execution_count": 28,
1038 |    "metadata": {},
1039 |    "outputs": [],
1040 |    "source": [
1041 |     "y= x[x].index"
1042 |    ]
1043 |   },
1044 |   {
1045 |    "cell_type": "code",
1046 |    "execution_count": 29,
1047 |    "metadata": {},
1048 |    "outputs": [
1049 |     {
1050 |      "data": {
1051 |       "text/plain": [
1052 |        "Int64Index([ 11676, 198711, 153662,  98391,  35859, 212898, 278418,  76352,\n",
1053 |        "            110973, 235105,\n",
1054 |        "            ...\n",
1055 |        "            260183, 155916,  44296,  73681,  59727,  28634, 188951,   9856,\n",
1056 |        "            268622, 274808],\n",
1057 |        "           dtype='int64', length=899)"
1058 |       ]
1059 |      },
1060 |      "execution_count": 29,
1061 |      "metadata": {},
1062 |      "output_type": "execute_result"
1063 |     }
1064 |    ],
1065 |    "source": [
1066 |     "y"
1067 |    ]
1068 |   },
1069 |   {
1070 |    "cell_type": "code",
1071 |    "execution_count": 30,
1072 |    "metadata": {},
1073 |    "outputs": [],
1074 |    "source": [
1075 |     "ratings = ratings[ratings['user_id'].isin(y)]"
1076 |    ]
1077 |   },
1078 |   {
1079 |    "cell_type": "code",
1080 |    "execution_count": 31,
1081 |    "metadata": {},
1082 |    "outputs": [
1083 |     {
1084 |      "data": {
1085 |       "text/html": [
1086 |        "<div>\n",
1087 |        "<style scoped>\n",
1088 |        "    .dataframe tbody tr th:only-of-type {\n",
1089 |        "        vertical-align: middle;\n",
1090 |        "    }\n",
1091 |        "\n",
1092 |        "    .dataframe tbody tr th {\n",
1093 |        "        vertical-align: top;\n",
1094 |        "    }\n",
1095 |        "\n",
1096 |        "    .dataframe thead th {\n",
1097 |        "        text-align: right;\n",
1098 |        "    }\n",
1099 |        "</style>\n",
1100 |        "<table border=\"1\" class=\"dataframe\">\n",
1101 |        "  <thead>\n",
1102 |        "    <tr style=\"text-align: right;\">\n",
1103 |        "      <th></th>\n",
1104 |        "      <th>user_id</th>\n",
1105 |        "      <th>ISBN</th>\n",
1106 |        "      <th>rating</th>\n",
1107 |        "    </tr>\n",
1108 |        "  </thead>\n",
1109 |        "  <tbody>\n",
1110 |        "    <tr>\n",
1111 |        "      <th>1456</th>\n",
1112 |        "      <td>277427</td>\n",
1113 |        "      <td>002542730X</td>\n",
1114 |        "      <td>10</td>\n",
1115 |        "    </tr>\n",
1116 |        "    <tr>\n",
1117 |        "      <th>1457</th>\n",
1118 |        "      <td>277427</td>\n",
1119 |        "      <td>0026217457</td>\n",
1120 |        "      <td>0</td>\n",
1121 |        "    </tr>\n",
1122 |        "    <tr>\n",
1123 |        "      <th>1458</th>\n",
1124 |        "      <td>277427</td>\n",
1125 |        "      <td>003008685X</td>\n",
1126 |        "      <td>8</td>\n",
1127 |        "    </tr>\n",
1128 |        "    <tr>\n",
1129 |        "      <th>1459</th>\n",
1130 |        "      <td>277427</td>\n",
1131 |        "      <td>0030615321</td>\n",
1132 |        "      <td>0</td>\n",
1133 |        "    </tr>\n",
1134 |        "    <tr>\n",
1135 |        "      <th>1460</th>\n",
1136 |        "      <td>277427</td>\n",
1137 |        "      <td>0060002050</td>\n",
1138 |        "      <td>0</td>\n",
1139 |        "    </tr>\n",
1140 |        "  </tbody>\n",
1141 |        "</table>\n",
1142 |        "</div>"
1143 |       ],
1144 |       "text/plain": [
1145 |        "      user_id        ISBN  rating\n",
1146 |        "1456   277427  002542730X      10\n",
1147 |        "1457   277427  0026217457       0\n",
1148 |        "1458   277427  003008685X       8\n",
1149 |        "1459   277427  0030615321       0\n",
1150 |        "1460   277427  0060002050       0"
1151 |       ]
1152 |      },
1153 |      "execution_count": 31,
1154 |      "metadata": {},
1155 |      "output_type": "execute_result"
1156 |     }
1157 |    ],
1158 |    "source": [
1159 |     "ratings.head()"
1160 |    ]
1161 |   },
1162 |   {
1163 |    "cell_type": "code",
1164 |    "execution_count": 32,
1165 |    "metadata": {},
1166 |    "outputs": [
1167 |     {
1168 |      "data": {
1169 |       "text/plain": [
1170 |        "(526356, 3)"
1171 |       ]
1172 |      },
1173 |      "execution_count": 32,
1174 |      "metadata": {},
1175 |      "output_type": "execute_result"
1176 |     }
1177 |    ],
1178 |    "source": [
1179 |     "ratings.shape"
1180 |    ]
1181 |   },
1182 |   {
1183 |    "cell_type": "code",
1184 |    "execution_count": 33,
1185 |    "metadata": {},
1186 |    "outputs": [],
1187 |    "source": [
1188 |     "# Now join ratings with books\n",
1189 |     "\n",
1190 |     "ratings_with_books = ratings.merge(books, on='ISBN')"
1191 |    ]
1192 |   },
1193 |   {
1194 |    "cell_type": "code",
1195 |    "execution_count": 34,
1196 |    "metadata": {},
1197 |    "outputs": [
1198 |     {
1199 |      "data": {
1200 |       "text/html": [
1201 |        "<div>\n",
1202 |        "<style scoped>\n",
1203 |        "    .dataframe tbody tr th:only-of-type {\n",
1204 |        "        vertical-align: middle;\n",
1205 |        "    }\n",
1206 |        "\n",
1207 |        "    .dataframe tbody tr th {\n",
1208 |        "        vertical-align: top;\n",
1209 |        "    }\n",
1210 |        "\n",
1211 |        "    .dataframe thead th {\n",
1212 |        "        text-align: right;\n",
1213 |        "    }\n",
1214 |        "</style>\n",
1215 |        "<table border=\"1\" class=\"dataframe\">\n",
1216 |        "  <thead>\n",
1217 |        "    <tr style=\"text-align: right;\">\n",
1218 |        "      <th></th>\n",
1219 |        "      <th>user_id</th>\n",
1220 |        "      <th>ISBN</th>\n",
1221 |        "      <th>rating</th>\n",
1222 |        "      <th>title</th>\n",
1223 |        "      <th>author</th>\n",
1224 |        "      <th>year</th>\n",
1225 |        "      <th>publisher</th>\n",
1226 |        "      <th>image_url</th>\n",
1227 |        "    </tr>\n",
1228 |        "  </thead>\n",
1229 |        "  <tbody>\n",
1230 |        "    <tr>\n",
1231 |        "      <th>0</th>\n",
1232 |        "      <td>277427</td>\n",
1233 |        "      <td>002542730X</td>\n",
1234 |        "      <td>10</td>\n",
1235 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1236 |        "      <td>James Finn Garner</td>\n",
1237 |        "      <td>1994</td>\n",
1238 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1239 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1240 |        "    </tr>\n",
1241 |        "    <tr>\n",
1242 |        "      <th>1</th>\n",
1243 |        "      <td>3363</td>\n",
1244 |        "      <td>002542730X</td>\n",
1245 |        "      <td>0</td>\n",
1246 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1247 |        "      <td>James Finn Garner</td>\n",
1248 |        "      <td>1994</td>\n",
1249 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1250 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1251 |        "    </tr>\n",
1252 |        "    <tr>\n",
1253 |        "      <th>2</th>\n",
1254 |        "      <td>11676</td>\n",
1255 |        "      <td>002542730X</td>\n",
1256 |        "      <td>6</td>\n",
1257 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1258 |        "      <td>James Finn Garner</td>\n",
1259 |        "      <td>1994</td>\n",
1260 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1261 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1262 |        "    </tr>\n",
1263 |        "    <tr>\n",
1264 |        "      <th>3</th>\n",
1265 |        "      <td>12538</td>\n",
1266 |        "      <td>002542730X</td>\n",
1267 |        "      <td>10</td>\n",
1268 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1269 |        "      <td>James Finn Garner</td>\n",
1270 |        "      <td>1994</td>\n",
1271 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1272 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1273 |        "    </tr>\n",
1274 |        "    <tr>\n",
1275 |        "      <th>4</th>\n",
1276 |        "      <td>13552</td>\n",
1277 |        "      <td>002542730X</td>\n",
1278 |        "      <td>0</td>\n",
1279 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1280 |        "      <td>James Finn Garner</td>\n",
1281 |        "      <td>1994</td>\n",
1282 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1283 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1284 |        "    </tr>\n",
1285 |        "  </tbody>\n",
1286 |        "</table>\n",
1287 |        "</div>"
1288 |       ],
1289 |       "text/plain": [
1290 |        "   user_id        ISBN  rating  \\\n",
1291 |        "0   277427  002542730X      10   \n",
1292 |        "1     3363  002542730X       0   \n",
1293 |        "2    11676  002542730X       6   \n",
1294 |        "3    12538  002542730X      10   \n",
1295 |        "4    13552  002542730X       0   \n",
1296 |        "\n",
1297 |        "                                               title             author  year  \\\n",
1298 |        "0  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1299 |        "1  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1300 |        "2  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1301 |        "3  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1302 |        "4  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1303 |        "\n",
1304 |        "                   publisher  \\\n",
1305 |        "0  John Wiley &amp; Sons Inc   \n",
1306 |        "1  John Wiley &amp; Sons Inc   \n",
1307 |        "2  John Wiley &amp; Sons Inc   \n",
1308 |        "3  John Wiley &amp; Sons Inc   \n",
1309 |        "4  John Wiley &amp; Sons Inc   \n",
1310 |        "\n",
1311 |        "                                           image_url  \n",
1312 |        "0  http://images.amazon.com/images/P/002542730X.0...  \n",
1313 |        "1  http://images.amazon.com/images/P/002542730X.0...  \n",
1314 |        "2  http://images.amazon.com/images/P/002542730X.0...  \n",
1315 |        "3  http://images.amazon.com/images/P/002542730X.0...  \n",
1316 |        "4  http://images.amazon.com/images/P/002542730X.0...  "
1317 |       ]
1318 |      },
1319 |      "execution_count": 34,
1320 |      "metadata": {},
1321 |      "output_type": "execute_result"
1322 |     }
1323 |    ],
1324 |    "source": [
1325 |     "ratings_with_books.head()"
1326 |    ]
1327 |   },
1328 |   {
1329 |    "cell_type": "code",
1330 |    "execution_count": 35,
1331 |    "metadata": {},
1332 |    "outputs": [
1333 |     {
1334 |      "data": {
1335 |       "text/plain": [
1336 |        "(487671, 8)"
1337 |       ]
1338 |      },
1339 |      "execution_count": 35,
1340 |      "metadata": {},
1341 |      "output_type": "execute_result"
1342 |     }
1343 |    ],
1344 |    "source": [
1345 |     "ratings_with_books.shape"
1346 |    ]
1347 |   },
1348 |   {
1349 |    "cell_type": "code",
1350 |    "execution_count": 36,
1351 |    "metadata": {},
1352 |    "outputs": [],
1353 |    "source": [
1354 |     "number_rating = ratings_with_books.groupby('title')['rating'].count().reset_index()"
1355 |    ]
1356 |   },
1357 |   {
1358 |    "cell_type": "code",
1359 |    "execution_count": 37,
1360 |    "metadata": {},
1361 |    "outputs": [
1362 |     {
1363 |      "data": {
1364 |       "text/html": [
1365 |        "<div>\n",
1366 |        "<style scoped>\n",
1367 |        "    .dataframe tbody tr th:only-of-type {\n",
1368 |        "        vertical-align: middle;\n",
1369 |        "    }\n",
1370 |        "\n",
1371 |        "    .dataframe tbody tr th {\n",
1372 |        "        vertical-align: top;\n",
1373 |        "    }\n",
1374 |        "\n",
1375 |        "    .dataframe thead th {\n",
1376 |        "        text-align: right;\n",
1377 |        "    }\n",
1378 |        "</style>\n",
1379 |        "<table border=\"1\" class=\"dataframe\">\n",
1380 |        "  <thead>\n",
1381 |        "    <tr style=\"text-align: right;\">\n",
1382 |        "      <th></th>\n",
1383 |        "      <th>title</th>\n",
1384 |        "      <th>rating</th>\n",
1385 |        "    </tr>\n",
1386 |        "  </thead>\n",
1387 |        "  <tbody>\n",
1388 |        "    <tr>\n",
1389 |        "      <th>0</th>\n",
1390 |        "      <td>A Light in the Storm: The Civil War Diary of ...</td>\n",
1391 |        "      <td>2</td>\n",
1392 |        "    </tr>\n",
1393 |        "    <tr>\n",
1394 |        "      <th>1</th>\n",
1395 |        "      <td>Always Have Popsicles</td>\n",
1396 |        "      <td>1</td>\n",
1397 |        "    </tr>\n",
1398 |        "    <tr>\n",
1399 |        "      <th>2</th>\n",
1400 |        "      <td>Apple Magic (The Collector's series)</td>\n",
1401 |        "      <td>1</td>\n",
1402 |        "    </tr>\n",
1403 |        "    <tr>\n",
1404 |        "      <th>3</th>\n",
1405 |        "      <td>Beyond IBM: Leadership Marketing and Finance ...</td>\n",
1406 |        "      <td>1</td>\n",
1407 |        "    </tr>\n",
1408 |        "    <tr>\n",
1409 |        "      <th>4</th>\n",
1410 |        "      <td>Clifford Visita El Hospital (Clifford El Gran...</td>\n",
1411 |        "      <td>1</td>\n",
1412 |        "    </tr>\n",
1413 |        "  </tbody>\n",
1414 |        "</table>\n",
1415 |        "</div>"
1416 |       ],
1417 |       "text/plain": [
1418 |        "                                               title  rating\n",
1419 |        "0   A Light in the Storm: The Civil War Diary of ...       2\n",
1420 |        "1                              Always Have Popsicles       1\n",
1421 |        "2               Apple Magic (The Collector's series)       1\n",
1422 |        "3   Beyond IBM: Leadership Marketing and Finance ...       1\n",
1423 |        "4   Clifford Visita El Hospital (Clifford El Gran...       1"
1424 |       ]
1425 |      },
1426 |      "execution_count": 37,
1427 |      "metadata": {},
1428 |      "output_type": "execute_result"
1429 |     }
1430 |    ],
1431 |    "source": [
1432 |     "number_rating.head()"
1433 |    ]
1434 |   },
1435 |   {
1436 |    "cell_type": "code",
1437 |    "execution_count": 38,
1438 |    "metadata": {},
1439 |    "outputs": [],
1440 |    "source": [
1441 |     "number_rating.rename(columns={'rating':'num_of_rating'},inplace=True)"
1442 |    ]
1443 |   },
1444 |   {
1445 |    "cell_type": "code",
1446 |    "execution_count": 39,
1447 |    "metadata": {},
1448 |    "outputs": [
1449 |     {
1450 |      "data": {
1451 |       "text/html": [
1452 |        "<div>\n",
1453 |        "<style scoped>\n",
1454 |        "    .dataframe tbody tr th:only-of-type {\n",
1455 |        "        vertical-align: middle;\n",
1456 |        "    }\n",
1457 |        "\n",
1458 |        "    .dataframe tbody tr th {\n",
1459 |        "        vertical-align: top;\n",
1460 |        "    }\n",
1461 |        "\n",
1462 |        "    .dataframe thead th {\n",
1463 |        "        text-align: right;\n",
1464 |        "    }\n",
1465 |        "</style>\n",
1466 |        "<table border=\"1\" class=\"dataframe\">\n",
1467 |        "  <thead>\n",
1468 |        "    <tr style=\"text-align: right;\">\n",
1469 |        "      <th></th>\n",
1470 |        "      <th>title</th>\n",
1471 |        "      <th>num_of_rating</th>\n",
1472 |        "    </tr>\n",
1473 |        "  </thead>\n",
1474 |        "  <tbody>\n",
1475 |        "    <tr>\n",
1476 |        "      <th>0</th>\n",
1477 |        "      <td>A Light in the Storm: The Civil War Diary of ...</td>\n",
1478 |        "      <td>2</td>\n",
1479 |        "    </tr>\n",
1480 |        "    <tr>\n",
1481 |        "      <th>1</th>\n",
1482 |        "      <td>Always Have Popsicles</td>\n",
1483 |        "      <td>1</td>\n",
1484 |        "    </tr>\n",
1485 |        "    <tr>\n",
1486 |        "      <th>2</th>\n",
1487 |        "      <td>Apple Magic (The Collector's series)</td>\n",
1488 |        "      <td>1</td>\n",
1489 |        "    </tr>\n",
1490 |        "    <tr>\n",
1491 |        "      <th>3</th>\n",
1492 |        "      <td>Beyond IBM: Leadership Marketing and Finance ...</td>\n",
1493 |        "      <td>1</td>\n",
1494 |        "    </tr>\n",
1495 |        "    <tr>\n",
1496 |        "      <th>4</th>\n",
1497 |        "      <td>Clifford Visita El Hospital (Clifford El Gran...</td>\n",
1498 |        "      <td>1</td>\n",
1499 |        "    </tr>\n",
1500 |        "  </tbody>\n",
1501 |        "</table>\n",
1502 |        "</div>"
1503 |       ],
1504 |       "text/plain": [
1505 |        "                                               title  num_of_rating\n",
1506 |        "0   A Light in the Storm: The Civil War Diary of ...              2\n",
1507 |        "1                              Always Have Popsicles              1\n",
1508 |        "2               Apple Magic (The Collector's series)              1\n",
1509 |        "3   Beyond IBM: Leadership Marketing and Finance ...              1\n",
1510 |        "4   Clifford Visita El Hospital (Clifford El Gran...              1"
1511 |       ]
1512 |      },
1513 |      "execution_count": 39,
1514 |      "metadata": {},
1515 |      "output_type": "execute_result"
1516 |     }
1517 |    ],
1518 |    "source": [
1519 |     "number_rating.head()"
1520 |    ]
1521 |   },
1522 |   {
1523 |    "cell_type": "code",
1524 |    "execution_count": 40,
1525 |    "metadata": {},
1526 |    "outputs": [],
1527 |    "source": [
1528 |     "final_rating = ratings_with_books.merge(number_rating, on='title')"
1529 |    ]
1530 |   },
1531 |   {
1532 |    "cell_type": "code",
1533 |    "execution_count": 41,
1534 |    "metadata": {},
1535 |    "outputs": [
1536 |     {
1537 |      "data": {
1538 |       "text/html": [
1539 |        "<div>\n",
1540 |        "<style scoped>\n",
1541 |        "    .dataframe tbody tr th:only-of-type {\n",
1542 |        "        vertical-align: middle;\n",
1543 |        "    }\n",
1544 |        "\n",
1545 |        "    .dataframe tbody tr th {\n",
1546 |        "        vertical-align: top;\n",
1547 |        "    }\n",
1548 |        "\n",
1549 |        "    .dataframe thead th {\n",
1550 |        "        text-align: right;\n",
1551 |        "    }\n",
1552 |        "</style>\n",
1553 |        "<table border=\"1\" class=\"dataframe\">\n",
1554 |        "  <thead>\n",
1555 |        "    <tr style=\"text-align: right;\">\n",
1556 |        "      <th></th>\n",
1557 |        "      <th>user_id</th>\n",
1558 |        "      <th>ISBN</th>\n",
1559 |        "      <th>rating</th>\n",
1560 |        "      <th>title</th>\n",
1561 |        "      <th>author</th>\n",
1562 |        "      <th>year</th>\n",
1563 |        "      <th>publisher</th>\n",
1564 |        "      <th>image_url</th>\n",
1565 |        "      <th>num_of_rating</th>\n",
1566 |        "    </tr>\n",
1567 |        "  </thead>\n",
1568 |        "  <tbody>\n",
1569 |        "    <tr>\n",
1570 |        "      <th>0</th>\n",
1571 |        "      <td>277427</td>\n",
1572 |        "      <td>002542730X</td>\n",
1573 |        "      <td>10</td>\n",
1574 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1575 |        "      <td>James Finn Garner</td>\n",
1576 |        "      <td>1994</td>\n",
1577 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1578 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1579 |        "      <td>82</td>\n",
1580 |        "    </tr>\n",
1581 |        "    <tr>\n",
1582 |        "      <th>1</th>\n",
1583 |        "      <td>3363</td>\n",
1584 |        "      <td>002542730X</td>\n",
1585 |        "      <td>0</td>\n",
1586 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1587 |        "      <td>James Finn Garner</td>\n",
1588 |        "      <td>1994</td>\n",
1589 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1590 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1591 |        "      <td>82</td>\n",
1592 |        "    </tr>\n",
1593 |        "    <tr>\n",
1594 |        "      <th>2</th>\n",
1595 |        "      <td>11676</td>\n",
1596 |        "      <td>002542730X</td>\n",
1597 |        "      <td>6</td>\n",
1598 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1599 |        "      <td>James Finn Garner</td>\n",
1600 |        "      <td>1994</td>\n",
1601 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1602 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1603 |        "      <td>82</td>\n",
1604 |        "    </tr>\n",
1605 |        "    <tr>\n",
1606 |        "      <th>3</th>\n",
1607 |        "      <td>12538</td>\n",
1608 |        "      <td>002542730X</td>\n",
1609 |        "      <td>10</td>\n",
1610 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1611 |        "      <td>James Finn Garner</td>\n",
1612 |        "      <td>1994</td>\n",
1613 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1614 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1615 |        "      <td>82</td>\n",
1616 |        "    </tr>\n",
1617 |        "    <tr>\n",
1618 |        "      <th>4</th>\n",
1619 |        "      <td>13552</td>\n",
1620 |        "      <td>002542730X</td>\n",
1621 |        "      <td>0</td>\n",
1622 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1623 |        "      <td>James Finn Garner</td>\n",
1624 |        "      <td>1994</td>\n",
1625 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1626 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1627 |        "      <td>82</td>\n",
1628 |        "    </tr>\n",
1629 |        "  </tbody>\n",
1630 |        "</table>\n",
1631 |        "</div>"
1632 |       ],
1633 |       "text/plain": [
1634 |        "   user_id        ISBN  rating  \\\n",
1635 |        "0   277427  002542730X      10   \n",
1636 |        "1     3363  002542730X       0   \n",
1637 |        "2    11676  002542730X       6   \n",
1638 |        "3    12538  002542730X      10   \n",
1639 |        "4    13552  002542730X       0   \n",
1640 |        "\n",
1641 |        "                                               title             author  year  \\\n",
1642 |        "0  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1643 |        "1  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1644 |        "2  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1645 |        "3  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1646 |        "4  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1647 |        "\n",
1648 |        "                   publisher  \\\n",
1649 |        "0  John Wiley &amp; Sons Inc   \n",
1650 |        "1  John Wiley &amp; Sons Inc   \n",
1651 |        "2  John Wiley &amp; Sons Inc   \n",
1652 |        "3  John Wiley &amp; Sons Inc   \n",
1653 |        "4  John Wiley &amp; Sons Inc   \n",
1654 |        "\n",
1655 |        "                                           image_url  num_of_rating  \n",
1656 |        "0  http://images.amazon.com/images/P/002542730X.0...             82  \n",
1657 |        "1  http://images.amazon.com/images/P/002542730X.0...             82  \n",
1658 |        "2  http://images.amazon.com/images/P/002542730X.0...             82  \n",
1659 |        "3  http://images.amazon.com/images/P/002542730X.0...             82  \n",
1660 |        "4  http://images.amazon.com/images/P/002542730X.0...             82  "
1661 |       ]
1662 |      },
1663 |      "execution_count": 41,
1664 |      "metadata": {},
1665 |      "output_type": "execute_result"
1666 |     }
1667 |    ],
1668 |    "source": [
1669 |     "final_rating.head()"
1670 |    ]
1671 |   },
1672 |   {
1673 |    "cell_type": "code",
1674 |    "execution_count": 42,
1675 |    "metadata": {},
1676 |    "outputs": [
1677 |     {
1678 |      "data": {
1679 |       "text/plain": [
1680 |        "(487671, 9)"
1681 |       ]
1682 |      },
1683 |      "execution_count": 42,
1684 |      "metadata": {},
1685 |      "output_type": "execute_result"
1686 |     }
1687 |    ],
1688 |    "source": [
1689 |     "final_rating.shape"
1690 |    ]
1691 |   },
1692 |   {
1693 |    "cell_type": "code",
1694 |    "execution_count": 43,
1695 |    "metadata": {},
1696 |    "outputs": [],
1697 |    "source": [
1698 |     "# Lets take those books which got at least 50 rating of user\n",
1699 |     "\n",
1700 |     "final_rating = final_rating[final_rating['num_of_rating'] >= 50]"
1701 |    ]
1702 |   },
1703 |   {
1704 |    "cell_type": "code",
1705 |    "execution_count": 44,
1706 |    "metadata": {},
1707 |    "outputs": [
1708 |     {
1709 |      "data": {
1710 |       "text/html": [
1711 |        "<div>\n",
1712 |        "<style scoped>\n",
1713 |        "    .dataframe tbody tr th:only-of-type {\n",
1714 |        "        vertical-align: middle;\n",
1715 |        "    }\n",
1716 |        "\n",
1717 |        "    .dataframe tbody tr th {\n",
1718 |        "        vertical-align: top;\n",
1719 |        "    }\n",
1720 |        "\n",
1721 |        "    .dataframe thead th {\n",
1722 |        "        text-align: right;\n",
1723 |        "    }\n",
1724 |        "</style>\n",
1725 |        "<table border=\"1\" class=\"dataframe\">\n",
1726 |        "  <thead>\n",
1727 |        "    <tr style=\"text-align: right;\">\n",
1728 |        "      <th></th>\n",
1729 |        "      <th>user_id</th>\n",
1730 |        "      <th>ISBN</th>\n",
1731 |        "      <th>rating</th>\n",
1732 |        "      <th>title</th>\n",
1733 |        "      <th>author</th>\n",
1734 |        "      <th>year</th>\n",
1735 |        "      <th>publisher</th>\n",
1736 |        "      <th>image_url</th>\n",
1737 |        "      <th>num_of_rating</th>\n",
1738 |        "    </tr>\n",
1739 |        "  </thead>\n",
1740 |        "  <tbody>\n",
1741 |        "    <tr>\n",
1742 |        "      <th>0</th>\n",
1743 |        "      <td>277427</td>\n",
1744 |        "      <td>002542730X</td>\n",
1745 |        "      <td>10</td>\n",
1746 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1747 |        "      <td>James Finn Garner</td>\n",
1748 |        "      <td>1994</td>\n",
1749 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1750 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1751 |        "      <td>82</td>\n",
1752 |        "    </tr>\n",
1753 |        "    <tr>\n",
1754 |        "      <th>1</th>\n",
1755 |        "      <td>3363</td>\n",
1756 |        "      <td>002542730X</td>\n",
1757 |        "      <td>0</td>\n",
1758 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1759 |        "      <td>James Finn Garner</td>\n",
1760 |        "      <td>1994</td>\n",
1761 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1762 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1763 |        "      <td>82</td>\n",
1764 |        "    </tr>\n",
1765 |        "    <tr>\n",
1766 |        "      <th>2</th>\n",
1767 |        "      <td>11676</td>\n",
1768 |        "      <td>002542730X</td>\n",
1769 |        "      <td>6</td>\n",
1770 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1771 |        "      <td>James Finn Garner</td>\n",
1772 |        "      <td>1994</td>\n",
1773 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1774 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1775 |        "      <td>82</td>\n",
1776 |        "    </tr>\n",
1777 |        "    <tr>\n",
1778 |        "      <th>3</th>\n",
1779 |        "      <td>12538</td>\n",
1780 |        "      <td>002542730X</td>\n",
1781 |        "      <td>10</td>\n",
1782 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1783 |        "      <td>James Finn Garner</td>\n",
1784 |        "      <td>1994</td>\n",
1785 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1786 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1787 |        "      <td>82</td>\n",
1788 |        "    </tr>\n",
1789 |        "    <tr>\n",
1790 |        "      <th>4</th>\n",
1791 |        "      <td>13552</td>\n",
1792 |        "      <td>002542730X</td>\n",
1793 |        "      <td>0</td>\n",
1794 |        "      <td>Politically Correct Bedtime Stories: Modern Ta...</td>\n",
1795 |        "      <td>James Finn Garner</td>\n",
1796 |        "      <td>1994</td>\n",
1797 |        "      <td>John Wiley &amp;amp; Sons Inc</td>\n",
1798 |        "      <td>http://images.amazon.com/images/P/002542730X.0...</td>\n",
1799 |        "      <td>82</td>\n",
1800 |        "    </tr>\n",
1801 |        "  </tbody>\n",
1802 |        "</table>\n",
1803 |        "</div>"
1804 |       ],
1805 |       "text/plain": [
1806 |        "   user_id        ISBN  rating  \\\n",
1807 |        "0   277427  002542730X      10   \n",
1808 |        "1     3363  002542730X       0   \n",
1809 |        "2    11676  002542730X       6   \n",
1810 |        "3    12538  002542730X      10   \n",
1811 |        "4    13552  002542730X       0   \n",
1812 |        "\n",
1813 |        "                                               title             author  year  \\\n",
1814 |        "0  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1815 |        "1  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1816 |        "2  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1817 |        "3  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1818 |        "4  Politically Correct Bedtime Stories: Modern Ta...  James Finn Garner  1994   \n",
1819 |        "\n",
1820 |        "                   publisher  \\\n",
1821 |        "0  John Wiley &amp; Sons Inc   \n",
1822 |        "1  John Wiley &amp; Sons Inc   \n",
1823 |        "2  John Wiley &amp; Sons Inc   \n",
1824 |        "3  John Wiley &amp; Sons Inc   \n",
1825 |        "4  John Wiley &amp; Sons Inc   \n",
1826 |        "\n",
1827 |        "                                           image_url  num_of_rating  \n",
1828 |        "0  http://images.amazon.com/images/P/002542730X.0...             82  \n",
1829 |        "1  http://images.amazon.com/images/P/002542730X.0...             82  \n",
1830 |        "2  http://images.amazon.com/images/P/002542730X.0...             82  \n",
1831 |        "3  http://images.amazon.com/images/P/002542730X.0...             82  \n",
1832 |        "4  http://images.amazon.com/images/P/002542730X.0...             82  "
1833 |       ]
1834 |      },
1835 |      "execution_count": 44,
1836 |      "metadata": {},
1837 |      "output_type": "execute_result"
1838 |     }
1839 |    ],
1840 |    "source": [
1841 |     "final_rating.head()"
1842 |    ]
1843 |   },
1844 |   {
1845 |    "cell_type": "code",
1846 |    "execution_count": 45,
1847 |    "metadata": {},
1848 |    "outputs": [
1849 |     {
1850 |      "data": {
1851 |       "text/plain": [
1852 |        "(61853, 9)"
1853 |       ]
1854 |      },
1855 |      "execution_count": 45,
1856 |      "metadata": {},
1857 |      "output_type": "execute_result"
1858 |     }
1859 |    ],
1860 |    "source": [
1861 |     "final_rating.shape"
1862 |    ]
1863 |   },
1864 |   {
1865 |    "cell_type": "code",
1866 |    "execution_count": 46,
1867 |    "metadata": {},
1868 |    "outputs": [],
1869 |    "source": [
1870 |     "# lets drop the duplicates\n",
1871 |     "final_rating.drop_duplicates(['user_id','title'],inplace=True)"
1872 |    ]
1873 |   },
1874 |   {
1875 |    "cell_type": "code",
1876 |    "execution_count": 47,
1877 |    "metadata": {},
1878 |    "outputs": [
1879 |     {
1880 |      "data": {
1881 |       "text/plain": [
1882 |        "(59850, 9)"
1883 |       ]
1884 |      },
1885 |      "execution_count": 47,
1886 |      "metadata": {},
1887 |      "output_type": "execute_result"
1888 |     }
1889 |    ],
1890 |    "source": [
1891 |     "final_rating.shape"
1892 |    ]
1893 |   },
1894 |   {
1895 |    "cell_type": "code",
1896 |    "execution_count": 48,
1897 |    "metadata": {},
1898 |    "outputs": [],
1899 |    "source": [
1900 |     "# Lets create a pivot table\n",
1901 |     "book_pivot = final_rating.pivot_table(columns='user_id', index='title', values= 'rating')"
1902 |    ]
1903 |   },
1904 |   {
1905 |    "cell_type": "code",
1906 |    "execution_count": 49,
1907 |    "metadata": {},
1908 |    "outputs": [
1909 |     {
1910 |      "data": {
1911 |       "text/html": [
1912 |        "<div>\n",
1913 |        "<style scoped>\n",
1914 |        "    .dataframe tbody tr th:only-of-type {\n",
1915 |        "        vertical-align: middle;\n",
1916 |        "    }\n",
1917 |        "\n",
1918 |        "    .dataframe tbody tr th {\n",
1919 |        "        vertical-align: top;\n",
1920 |        "    }\n",
1921 |        "\n",
1922 |        "    .dataframe thead th {\n",
1923 |        "        text-align: right;\n",
1924 |        "    }\n",
1925 |        "</style>\n",
1926 |        "<table border=\"1\" class=\"dataframe\">\n",
1927 |        "  <thead>\n",
1928 |        "    <tr style=\"text-align: right;\">\n",
1929 |        "      <th>user_id</th>\n",
1930 |        "      <th>254</th>\n",
1931 |        "      <th>2276</th>\n",
1932 |        "      <th>2766</th>\n",
1933 |        "      <th>2977</th>\n",
1934 |        "      <th>3363</th>\n",
1935 |        "      <th>3757</th>\n",
1936 |        "      <th>4017</th>\n",
1937 |        "      <th>4385</th>\n",
1938 |        "      <th>6242</th>\n",
1939 |        "      <th>6251</th>\n",
1940 |        "      <th>...</th>\n",
1941 |        "      <th>274004</th>\n",
1942 |        "      <th>274061</th>\n",
1943 |        "      <th>274301</th>\n",
1944 |        "      <th>274308</th>\n",
1945 |        "      <th>274808</th>\n",
1946 |        "      <th>275970</th>\n",
1947 |        "      <th>277427</th>\n",
1948 |        "      <th>277478</th>\n",
1949 |        "      <th>277639</th>\n",
1950 |        "      <th>278418</th>\n",
1951 |        "    </tr>\n",
1952 |        "    <tr>\n",
1953 |        "      <th>title</th>\n",
1954 |        "      <th></th>\n",
1955 |        "      <th></th>\n",
1956 |        "      <th></th>\n",
1957 |        "      <th></th>\n",
1958 |        "      <th></th>\n",
1959 |        "      <th></th>\n",
1960 |        "      <th></th>\n",
1961 |        "      <th></th>\n",
1962 |        "      <th></th>\n",
1963 |        "      <th></th>\n",
1964 |        "      <th></th>\n",
1965 |        "      <th></th>\n",
1966 |        "      <th></th>\n",
1967 |        "      <th></th>\n",
1968 |        "      <th></th>\n",
1969 |        "      <th></th>\n",
1970 |        "      <th></th>\n",
1971 |        "      <th></th>\n",
1972 |        "      <th></th>\n",
1973 |        "      <th></th>\n",
1974 |        "      <th></th>\n",
1975 |        "    </tr>\n",
1976 |        "  </thead>\n",
1977 |        "  <tbody>\n",
1978 |        "    <tr>\n",
1979 |        "      <th>1984</th>\n",
1980 |        "      <td>9.0</td>\n",
1981 |        "      <td>NaN</td>\n",
1982 |        "      <td>NaN</td>\n",
1983 |        "      <td>NaN</td>\n",
1984 |        "      <td>NaN</td>\n",
1985 |        "      <td>NaN</td>\n",
1986 |        "      <td>NaN</td>\n",
1987 |        "      <td>NaN</td>\n",
1988 |        "      <td>NaN</td>\n",
1989 |        "      <td>NaN</td>\n",
1990 |        "      <td>...</td>\n",
1991 |        "      <td>NaN</td>\n",
1992 |        "      <td>NaN</td>\n",
1993 |        "      <td>NaN</td>\n",
1994 |        "      <td>NaN</td>\n",
1995 |        "      <td>NaN</td>\n",
1996 |        "      <td>0.0</td>\n",
1997 |        "      <td>NaN</td>\n",
1998 |        "      <td>NaN</td>\n",
1999 |        "      <td>NaN</td>\n",
2000 |        "      <td>NaN</td>\n",
2001 |        "    </tr>\n",
2002 |        "    <tr>\n",
2003 |        "      <th>1st to Die: A Novel</th>\n",
2004 |        "      <td>NaN</td>\n",
2005 |        "      <td>NaN</td>\n",
2006 |        "      <td>NaN</td>\n",
2007 |        "      <td>NaN</td>\n",
2008 |        "      <td>NaN</td>\n",
2009 |        "      <td>NaN</td>\n",
2010 |        "      <td>NaN</td>\n",
2011 |        "      <td>NaN</td>\n",
2012 |        "      <td>NaN</td>\n",
2013 |        "      <td>NaN</td>\n",
2014 |        "      <td>...</td>\n",
2015 |        "      <td>NaN</td>\n",
2016 |        "      <td>NaN</td>\n",
2017 |        "      <td>NaN</td>\n",
2018 |        "      <td>NaN</td>\n",
2019 |        "      <td>NaN</td>\n",
2020 |        "      <td>NaN</td>\n",
2021 |        "      <td>NaN</td>\n",
2022 |        "      <td>NaN</td>\n",
2023 |        "      <td>NaN</td>\n",
2024 |        "      <td>NaN</td>\n",
2025 |        "    </tr>\n",
2026 |        "    <tr>\n",
2027 |        "      <th>2nd Chance</th>\n",
2028 |        "      <td>NaN</td>\n",
2029 |        "      <td>10.0</td>\n",
2030 |        "      <td>NaN</td>\n",
2031 |        "      <td>NaN</td>\n",
2032 |        "      <td>NaN</td>\n",
2033 |        "      <td>NaN</td>\n",
2034 |        "      <td>NaN</td>\n",
2035 |        "      <td>NaN</td>\n",
2036 |        "      <td>NaN</td>\n",
2037 |        "      <td>NaN</td>\n",
2038 |        "      <td>...</td>\n",
2039 |        "      <td>NaN</td>\n",
2040 |        "      <td>NaN</td>\n",
2041 |        "      <td>NaN</td>\n",
2042 |        "      <td>0.0</td>\n",
2043 |        "      <td>NaN</td>\n",
2044 |        "      <td>NaN</td>\n",
2045 |        "      <td>NaN</td>\n",
2046 |        "      <td>NaN</td>\n",
2047 |        "      <td>0.0</td>\n",
2048 |        "      <td>NaN</td>\n",
2049 |        "    </tr>\n",
2050 |        "    <tr>\n",
2051 |        "      <th>4 Blondes</th>\n",
2052 |        "      <td>NaN</td>\n",
2053 |        "      <td>NaN</td>\n",
2054 |        "      <td>NaN</td>\n",
2055 |        "      <td>NaN</td>\n",
2056 |        "      <td>NaN</td>\n",
2057 |        "      <td>NaN</td>\n",
2058 |        "      <td>NaN</td>\n",
2059 |        "      <td>NaN</td>\n",
2060 |        "      <td>NaN</td>\n",
2061 |        "      <td>0.0</td>\n",
2062 |        "      <td>...</td>\n",
2063 |        "      <td>NaN</td>\n",
2064 |        "      <td>NaN</td>\n",
2065 |        "      <td>NaN</td>\n",
2066 |        "      <td>NaN</td>\n",
2067 |        "      <td>NaN</td>\n",
2068 |        "      <td>NaN</td>\n",
2069 |        "      <td>NaN</td>\n",
2070 |        "      <td>NaN</td>\n",
2071 |        "      <td>NaN</td>\n",
2072 |        "      <td>NaN</td>\n",
2073 |        "    </tr>\n",
2074 |        "    <tr>\n",
2075 |        "      <th>84 Charing Cross Road</th>\n",
2076 |        "      <td>NaN</td>\n",
2077 |        "      <td>NaN</td>\n",
2078 |        "      <td>NaN</td>\n",
2079 |        "      <td>NaN</td>\n",
2080 |        "      <td>NaN</td>\n",
2081 |        "      <td>NaN</td>\n",
2082 |        "      <td>NaN</td>\n",
2083 |        "      <td>NaN</td>\n",
2084 |        "      <td>NaN</td>\n",
2085 |        "      <td>NaN</td>\n",
2086 |        "      <td>...</td>\n",
2087 |        "      <td>NaN</td>\n",
2088 |        "      <td>NaN</td>\n",
2089 |        "      <td>NaN</td>\n",
2090 |        "      <td>NaN</td>\n",
2091 |        "      <td>NaN</td>\n",
2092 |        "      <td>10.0</td>\n",
2093 |        "      <td>NaN</td>\n",
2094 |        "      <td>NaN</td>\n",
2095 |        "      <td>NaN</td>\n",
2096 |        "      <td>NaN</td>\n",
2097 |        "    </tr>\n",
2098 |        "    <tr>\n",
2099 |        "      <th>...</th>\n",
2100 |        "      <td>...</td>\n",
2101 |        "      <td>...</td>\n",
2102 |        "      <td>...</td>\n",
2103 |        "      <td>...</td>\n",
2104 |        "      <td>...</td>\n",
2105 |        "      <td>...</td>\n",
2106 |        "      <td>...</td>\n",
2107 |        "      <td>...</td>\n",
2108 |        "      <td>...</td>\n",
2109 |        "      <td>...</td>\n",
2110 |        "      <td>...</td>\n",
2111 |        "      <td>...</td>\n",
2112 |        "      <td>...</td>\n",
2113 |        "      <td>...</td>\n",
2114 |        "      <td>...</td>\n",
2115 |        "      <td>...</td>\n",
2116 |        "      <td>...</td>\n",
2117 |        "      <td>...</td>\n",
2118 |        "      <td>...</td>\n",
2119 |        "      <td>...</td>\n",
2120 |        "      <td>...</td>\n",
2121 |        "    </tr>\n",
2122 |        "    <tr>\n",
2123 |        "      <th>Year of Wonders</th>\n",
2124 |        "      <td>NaN</td>\n",
2125 |        "      <td>NaN</td>\n",
2126 |        "      <td>NaN</td>\n",
2127 |        "      <td>7.0</td>\n",
2128 |        "      <td>NaN</td>\n",
2129 |        "      <td>NaN</td>\n",
2130 |        "      <td>NaN</td>\n",
2131 |        "      <td>NaN</td>\n",
2132 |        "      <td>7.0</td>\n",
2133 |        "      <td>NaN</td>\n",
2134 |        "      <td>...</td>\n",
2135 |        "      <td>NaN</td>\n",
2136 |        "      <td>NaN</td>\n",
2137 |        "      <td>NaN</td>\n",
2138 |        "      <td>NaN</td>\n",
2139 |        "      <td>NaN</td>\n",
2140 |        "      <td>0.0</td>\n",
2141 |        "      <td>NaN</td>\n",
2142 |        "      <td>NaN</td>\n",
2143 |        "      <td>NaN</td>\n",
2144 |        "      <td>NaN</td>\n",
2145 |        "    </tr>\n",
2146 |        "    <tr>\n",
2147 |        "      <th>You Belong To Me</th>\n",
2148 |        "      <td>NaN</td>\n",
2149 |        "      <td>NaN</td>\n",
2150 |        "      <td>NaN</td>\n",
2151 |        "      <td>NaN</td>\n",
2152 |        "      <td>NaN</td>\n",
2153 |        "      <td>NaN</td>\n",
2154 |        "      <td>NaN</td>\n",
2155 |        "      <td>NaN</td>\n",
2156 |        "      <td>NaN</td>\n",
2157 |        "      <td>NaN</td>\n",
2158 |        "      <td>...</td>\n",
2159 |        "      <td>NaN</td>\n",
2160 |        "      <td>NaN</td>\n",
2161 |        "      <td>NaN</td>\n",
2162 |        "      <td>NaN</td>\n",
2163 |        "      <td>NaN</td>\n",
2164 |        "      <td>NaN</td>\n",
2165 |        "      <td>NaN</td>\n",
2166 |        "      <td>NaN</td>\n",
2167 |        "      <td>NaN</td>\n",
2168 |        "      <td>NaN</td>\n",
2169 |        "    </tr>\n",
2170 |        "    <tr>\n",
2171 |        "      <th>Zen and the Art of Motorcycle Maintenance: An Inquiry into Values</th>\n",
2172 |        "      <td>NaN</td>\n",
2173 |        "      <td>NaN</td>\n",
2174 |        "      <td>NaN</td>\n",
2175 |        "      <td>NaN</td>\n",
2176 |        "      <td>0.0</td>\n",
2177 |        "      <td>NaN</td>\n",
2178 |        "      <td>NaN</td>\n",
2179 |        "      <td>NaN</td>\n",
2180 |        "      <td>NaN</td>\n",
2181 |        "      <td>0.0</td>\n",
2182 |        "      <td>...</td>\n",
2183 |        "      <td>NaN</td>\n",
2184 |        "      <td>NaN</td>\n",
2185 |        "      <td>NaN</td>\n",
2186 |        "      <td>NaN</td>\n",
2187 |        "      <td>NaN</td>\n",
2188 |        "      <td>0.0</td>\n",
2189 |        "      <td>NaN</td>\n",
2190 |        "      <td>NaN</td>\n",
2191 |        "      <td>NaN</td>\n",
2192 |        "      <td>NaN</td>\n",
2193 |        "    </tr>\n",
2194 |        "    <tr>\n",
2195 |        "      <th>Zoya</th>\n",
2196 |        "      <td>NaN</td>\n",
2197 |        "      <td>NaN</td>\n",
2198 |        "      <td>NaN</td>\n",
2199 |        "      <td>NaN</td>\n",
2200 |        "      <td>NaN</td>\n",
2201 |        "      <td>NaN</td>\n",
2202 |        "      <td>NaN</td>\n",
2203 |        "      <td>NaN</td>\n",
2204 |        "      <td>NaN</td>\n",
2205 |        "      <td>NaN</td>\n",
2206 |        "      <td>...</td>\n",
2207 |        "      <td>NaN</td>\n",
2208 |        "      <td>NaN</td>\n",
2209 |        "      <td>NaN</td>\n",
2210 |        "      <td>NaN</td>\n",
2211 |        "      <td>NaN</td>\n",
2212 |        "      <td>NaN</td>\n",
2213 |        "      <td>NaN</td>\n",
2214 |        "      <td>NaN</td>\n",
2215 |        "      <td>NaN</td>\n",
2216 |        "      <td>NaN</td>\n",
2217 |        "    </tr>\n",
2218 |        "    <tr>\n",
2219 |        "      <th>\\O\\\" Is for Outlaw\"</th>\n",
2220 |        "      <td>NaN</td>\n",
2221 |        "      <td>NaN</td>\n",
2222 |        "      <td>NaN</td>\n",
2223 |        "      <td>NaN</td>\n",
2224 |        "      <td>NaN</td>\n",
2225 |        "      <td>NaN</td>\n",
2226 |        "      <td>NaN</td>\n",
2227 |        "      <td>NaN</td>\n",
2228 |        "      <td>NaN</td>\n",
2229 |        "      <td>NaN</td>\n",
2230 |        "      <td>...</td>\n",
2231 |        "      <td>NaN</td>\n",
2232 |        "      <td>NaN</td>\n",
2233 |        "      <td>8.0</td>\n",
2234 |        "      <td>NaN</td>\n",
2235 |        "      <td>NaN</td>\n",
2236 |        "      <td>NaN</td>\n",
2237 |        "      <td>NaN</td>\n",
2238 |        "      <td>NaN</td>\n",
2239 |        "      <td>NaN</td>\n",
2240 |        "      <td>NaN</td>\n",
2241 |        "    </tr>\n",
2242 |        "  </tbody>\n",
2243 |        "</table>\n",
2244 |        "<p>742 rows × 888 columns</p>\n",
2245 |        "</div>"
2246 |       ],
2247 |       "text/plain": [
2248 |        "user_id                                             254     2276    2766    \\\n",
2249 |        "title                                                                        \n",
2250 |        "1984                                                   9.0     NaN     NaN   \n",
2251 |        "1st to Die: A Novel                                    NaN     NaN     NaN   \n",
2252 |        "2nd Chance                                             NaN    10.0     NaN   \n",
2253 |        "4 Blondes                                              NaN     NaN     NaN   \n",
2254 |        "84 Charing Cross Road                                  NaN     NaN     NaN   \n",
2255 |        "...                                                    ...     ...     ...   \n",
2256 |        "Year of Wonders                                        NaN     NaN     NaN   \n",
2257 |        "You Belong To Me                                       NaN     NaN     NaN   \n",
2258 |        "Zen and the Art of Motorcycle Maintenance: An I...     NaN     NaN     NaN   \n",
2259 |        "Zoya                                                   NaN     NaN     NaN   \n",
2260 |        "\\O\\\" Is for Outlaw\"                                    NaN     NaN     NaN   \n",
2261 |        "\n",
2262 |        "user_id                                             2977    3363    3757    \\\n",
2263 |        "title                                                                        \n",
2264 |        "1984                                                   NaN     NaN     NaN   \n",
2265 |        "1st to Die: A Novel                                    NaN     NaN     NaN   \n",
2266 |        "2nd Chance                                             NaN     NaN     NaN   \n",
2267 |        "4 Blondes                                              NaN     NaN     NaN   \n",
2268 |        "84 Charing Cross Road                                  NaN     NaN     NaN   \n",
2269 |        "...                                                    ...     ...     ...   \n",
2270 |        "Year of Wonders                                        7.0     NaN     NaN   \n",
2271 |        "You Belong To Me                                       NaN     NaN     NaN   \n",
2272 |        "Zen and the Art of Motorcycle Maintenance: An I...     NaN     0.0     NaN   \n",
2273 |        "Zoya                                                   NaN     NaN     NaN   \n",
2274 |        "\\O\\\" Is for Outlaw\"                                    NaN     NaN     NaN   \n",
2275 |        "\n",
2276 |        "user_id                                             4017    4385    6242    \\\n",
2277 |        "title                                                                        \n",
2278 |        "1984                                                   NaN     NaN     NaN   \n",
2279 |        "1st to Die: A Novel                                    NaN     NaN     NaN   \n",
2280 |        "2nd Chance                                             NaN     NaN     NaN   \n",
2281 |        "4 Blondes                                              NaN     NaN     NaN   \n",
2282 |        "84 Charing Cross Road                                  NaN     NaN     NaN   \n",
2283 |        "...                                                    ...     ...     ...   \n",
2284 |        "Year of Wonders                                        NaN     NaN     7.0   \n",
2285 |        "You Belong To Me                                       NaN     NaN     NaN   \n",
2286 |        "Zen and the Art of Motorcycle Maintenance: An I...     NaN     NaN     NaN   \n",
2287 |        "Zoya                                                   NaN     NaN     NaN   \n",
2288 |        "\\O\\\" Is for Outlaw\"                                    NaN     NaN     NaN   \n",
2289 |        "\n",
2290 |        "user_id                                             6251    ...  274004  \\\n",
2291 |        "title                                                       ...           \n",
2292 |        "1984                                                   NaN  ...     NaN   \n",
2293 |        "1st to Die: A Novel                                    NaN  ...     NaN   \n",
2294 |        "2nd Chance                                             NaN  ...     NaN   \n",
2295 |        "4 Blondes                                              0.0  ...     NaN   \n",
2296 |        "84 Charing Cross Road                                  NaN  ...     NaN   \n",
2297 |        "...                                                    ...  ...     ...   \n",
2298 |        "Year of Wonders                                        NaN  ...     NaN   \n",
2299 |        "You Belong To Me                                       NaN  ...     NaN   \n",
2300 |        "Zen and the Art of Motorcycle Maintenance: An I...     0.0  ...     NaN   \n",
2301 |        "Zoya                                                   NaN  ...     NaN   \n",
2302 |        "\\O\\\" Is for Outlaw\"                                    NaN  ...     NaN   \n",
2303 |        "\n",
2304 |        "user_id                                             274061  274301  274308  \\\n",
2305 |        "title                                                                        \n",
2306 |        "1984                                                   NaN     NaN     NaN   \n",
2307 |        "1st to Die: A Novel                                    NaN     NaN     NaN   \n",
2308 |        "2nd Chance                                             NaN     NaN     0.0   \n",
2309 |        "4 Blondes                                              NaN     NaN     NaN   \n",
2310 |        "84 Charing Cross Road                                  NaN     NaN     NaN   \n",
2311 |        "...                                                    ...     ...     ...   \n",
2312 |        "Year of Wonders                                        NaN     NaN     NaN   \n",
2313 |        "You Belong To Me                                       NaN     NaN     NaN   \n",
2314 |        "Zen and the Art of Motorcycle Maintenance: An I...     NaN     NaN     NaN   \n",
2315 |        "Zoya                                                   NaN     NaN     NaN   \n",
2316 |        "\\O\\\" Is for Outlaw\"                                    NaN     8.0     NaN   \n",
2317 |        "\n",
2318 |        "user_id                                             274808  275970  277427  \\\n",
2319 |        "title                                                                        \n",
2320 |        "1984                                                   NaN     0.0     NaN   \n",
2321 |        "1st to Die: A Novel                                    NaN     NaN     NaN   \n",
2322 |        "2nd Chance                                             NaN     NaN     NaN   \n",
2323 |        "4 Blondes                                              NaN     NaN     NaN   \n",
2324 |        "84 Charing Cross Road                                  NaN    10.0     NaN   \n",
2325 |        "...                                                    ...     ...     ...   \n",
2326 |        "Year of Wonders                                        NaN     0.0     NaN   \n",
2327 |        "You Belong To Me                                       NaN     NaN     NaN   \n",
2328 |        "Zen and the Art of Motorcycle Maintenance: An I...     NaN     0.0     NaN   \n",
2329 |        "Zoya                                                   NaN     NaN     NaN   \n",
2330 |        "\\O\\\" Is for Outlaw\"                                    NaN     NaN     NaN   \n",
2331 |        "\n",
2332 |        "user_id                                             277478  277639  278418  \n",
2333 |        "title                                                                       \n",
2334 |        "1984                                                   NaN     NaN     NaN  \n",
2335 |        "1st to Die: A Novel                                    NaN     NaN     NaN  \n",
2336 |        "2nd Chance                                             NaN     0.0     NaN  \n",
2337 |        "4 Blondes                                              NaN     NaN     NaN  \n",
2338 |        "84 Charing Cross Road                                  NaN     NaN     NaN  \n",
2339 |        "...                                                    ...     ...     ...  \n",
2340 |        "Year of Wonders                                        NaN     NaN     NaN  \n",
2341 |        "You Belong To Me                                       NaN     NaN     NaN  \n",
2342 |        "Zen and the Art of Motorcycle Maintenance: An I...     NaN     NaN     NaN  \n",
2343 |        "Zoya                                                   NaN     NaN     NaN  \n",
2344 |        "\\O\\\" Is for Outlaw\"                                    NaN     NaN     NaN  \n",
2345 |        "\n",
2346 |        "[742 rows x 888 columns]"
2347 |       ]
2348 |      },
2349 |      "execution_count": 49,
2350 |      "metadata": {},
2351 |      "output_type": "execute_result"
2352 |     }
2353 |    ],
2354 |    "source": [
2355 |     "book_pivot"
2356 |    ]
2357 |   },
2358 |   {
2359 |    "cell_type": "code",
2360 |    "execution_count": 50,
2361 |    "metadata": {},
2362 |    "outputs": [
2363 |     {
2364 |      "data": {
2365 |       "text/plain": [
2366 |        "(742, 888)"
2367 |       ]
2368 |      },
2369 |      "execution_count": 50,
2370 |      "metadata": {},
2371 |      "output_type": "execute_result"
2372 |     }
2373 |    ],
2374 |    "source": [
2375 |     "book_pivot.shape"
2376 |    ]
2377 |   },
2378 |   {
2379 |    "cell_type": "code",
2380 |    "execution_count": 51,
2381 |    "metadata": {},
2382 |    "outputs": [],
2383 |    "source": [
2384 |     "book_pivot.fillna(0, inplace=True)"
2385 |    ]
2386 |   },
2387 |   {
2388 |    "cell_type": "code",
2389 |    "execution_count": 52,
2390 |    "metadata": {},
2391 |    "outputs": [
2392 |     {
2393 |      "data": {
2394 |       "text/html": [
2395 |        "<div>\n",
2396 |        "<style scoped>\n",
2397 |        "    .dataframe tbody tr th:only-of-type {\n",
2398 |        "        vertical-align: middle;\n",
2399 |        "    }\n",
2400 |        "\n",
2401 |        "    .dataframe tbody tr th {\n",
2402 |        "        vertical-align: top;\n",
2403 |        "    }\n",
2404 |        "\n",
2405 |        "    .dataframe thead th {\n",
2406 |        "        text-align: right;\n",
2407 |        "    }\n",
2408 |        "</style>\n",
2409 |        "<table border=\"1\" class=\"dataframe\">\n",
2410 |        "  <thead>\n",
2411 |        "    <tr style=\"text-align: right;\">\n",
2412 |        "      <th>user_id</th>\n",
2413 |        "      <th>254</th>\n",
2414 |        "      <th>2276</th>\n",
2415 |        "      <th>2766</th>\n",
2416 |        "      <th>2977</th>\n",
2417 |        "      <th>3363</th>\n",
2418 |        "      <th>3757</th>\n",
2419 |        "      <th>4017</th>\n",
2420 |        "      <th>4385</th>\n",
2421 |        "      <th>6242</th>\n",
2422 |        "      <th>6251</th>\n",
2423 |        "      <th>...</th>\n",
2424 |        "      <th>274004</th>\n",
2425 |        "      <th>274061</th>\n",
2426 |        "      <th>274301</th>\n",
2427 |        "      <th>274308</th>\n",
2428 |        "      <th>274808</th>\n",
2429 |        "      <th>275970</th>\n",
2430 |        "      <th>277427</th>\n",
2431 |        "      <th>277478</th>\n",
2432 |        "      <th>277639</th>\n",
2433 |        "      <th>278418</th>\n",
2434 |        "    </tr>\n",
2435 |        "    <tr>\n",
2436 |        "      <th>title</th>\n",
2437 |        "      <th></th>\n",
2438 |        "      <th></th>\n",
2439 |        "      <th></th>\n",
2440 |        "      <th></th>\n",
2441 |        "      <th></th>\n",
2442 |        "      <th></th>\n",
2443 |        "      <th></th>\n",
2444 |        "      <th></th>\n",
2445 |        "      <th></th>\n",
2446 |        "      <th></th>\n",
2447 |        "      <th></th>\n",
2448 |        "      <th></th>\n",
2449 |        "      <th></th>\n",
2450 |        "      <th></th>\n",
2451 |        "      <th></th>\n",
2452 |        "      <th></th>\n",
2453 |        "      <th></th>\n",
2454 |        "      <th></th>\n",
2455 |        "      <th></th>\n",
2456 |        "      <th></th>\n",
2457 |        "      <th></th>\n",
2458 |        "    </tr>\n",
2459 |        "  </thead>\n",
2460 |        "  <tbody>\n",
2461 |        "    <tr>\n",
2462 |        "      <th>1984</th>\n",
2463 |        "      <td>9.0</td>\n",
2464 |        "      <td>0.0</td>\n",
2465 |        "      <td>0.0</td>\n",
2466 |        "      <td>0.0</td>\n",
2467 |        "      <td>0.0</td>\n",
2468 |        "      <td>0.0</td>\n",
2469 |        "      <td>0.0</td>\n",
2470 |        "      <td>0.0</td>\n",
2471 |        "      <td>0.0</td>\n",
2472 |        "      <td>0.0</td>\n",
2473 |        "      <td>...</td>\n",
2474 |        "      <td>0.0</td>\n",
2475 |        "      <td>0.0</td>\n",
2476 |        "      <td>0.0</td>\n",
2477 |        "      <td>0.0</td>\n",
2478 |        "      <td>0.0</td>\n",
2479 |        "      <td>0.0</td>\n",
2480 |        "      <td>0.0</td>\n",
2481 |        "      <td>0.0</td>\n",
2482 |        "      <td>0.0</td>\n",
2483 |        "      <td>0.0</td>\n",
2484 |        "    </tr>\n",
2485 |        "    <tr>\n",
2486 |        "      <th>1st to Die: A Novel</th>\n",
2487 |        "      <td>0.0</td>\n",
2488 |        "      <td>0.0</td>\n",
2489 |        "      <td>0.0</td>\n",
2490 |        "      <td>0.0</td>\n",
2491 |        "      <td>0.0</td>\n",
2492 |        "      <td>0.0</td>\n",
2493 |        "      <td>0.0</td>\n",
2494 |        "      <td>0.0</td>\n",
2495 |        "      <td>0.0</td>\n",
2496 |        "      <td>0.0</td>\n",
2497 |        "      <td>...</td>\n",
2498 |        "      <td>0.0</td>\n",
2499 |        "      <td>0.0</td>\n",
2500 |        "      <td>0.0</td>\n",
2501 |        "      <td>0.0</td>\n",
2502 |        "      <td>0.0</td>\n",
2503 |        "      <td>0.0</td>\n",
2504 |        "      <td>0.0</td>\n",
2505 |        "      <td>0.0</td>\n",
2506 |        "      <td>0.0</td>\n",
2507 |        "      <td>0.0</td>\n",
2508 |        "    </tr>\n",
2509 |        "    <tr>\n",
2510 |        "      <th>2nd Chance</th>\n",
2511 |        "      <td>0.0</td>\n",
2512 |        "      <td>10.0</td>\n",
2513 |        "      <td>0.0</td>\n",
2514 |        "      <td>0.0</td>\n",
2515 |        "      <td>0.0</td>\n",
2516 |        "      <td>0.0</td>\n",
2517 |        "      <td>0.0</td>\n",
2518 |        "      <td>0.0</td>\n",
2519 |        "      <td>0.0</td>\n",
2520 |        "      <td>0.0</td>\n",
2521 |        "      <td>...</td>\n",
2522 |        "      <td>0.0</td>\n",
2523 |        "      <td>0.0</td>\n",
2524 |        "      <td>0.0</td>\n",
2525 |        "      <td>0.0</td>\n",
2526 |        "      <td>0.0</td>\n",
2527 |        "      <td>0.0</td>\n",
2528 |        "      <td>0.0</td>\n",
2529 |        "      <td>0.0</td>\n",
2530 |        "      <td>0.0</td>\n",
2531 |        "      <td>0.0</td>\n",
2532 |        "    </tr>\n",
2533 |        "    <tr>\n",
2534 |        "      <th>4 Blondes</th>\n",
2535 |        "      <td>0.0</td>\n",
2536 |        "      <td>0.0</td>\n",
2537 |        "      <td>0.0</td>\n",
2538 |        "      <td>0.0</td>\n",
2539 |        "      <td>0.0</td>\n",
2540 |        "      <td>0.0</td>\n",
2541 |        "      <td>0.0</td>\n",
2542 |        "      <td>0.0</td>\n",
2543 |        "      <td>0.0</td>\n",
2544 |        "      <td>0.0</td>\n",
2545 |        "      <td>...</td>\n",
2546 |        "      <td>0.0</td>\n",
2547 |        "      <td>0.0</td>\n",
2548 |        "      <td>0.0</td>\n",
2549 |        "      <td>0.0</td>\n",
2550 |        "      <td>0.0</td>\n",
2551 |        "      <td>0.0</td>\n",
2552 |        "      <td>0.0</td>\n",
2553 |        "      <td>0.0</td>\n",
2554 |        "      <td>0.0</td>\n",
2555 |        "      <td>0.0</td>\n",
2556 |        "    </tr>\n",
2557 |        "    <tr>\n",
2558 |        "      <th>84 Charing Cross Road</th>\n",
2559 |        "      <td>0.0</td>\n",
2560 |        "      <td>0.0</td>\n",
2561 |        "      <td>0.0</td>\n",
2562 |        "      <td>0.0</td>\n",
2563 |        "      <td>0.0</td>\n",
2564 |        "      <td>0.0</td>\n",
2565 |        "      <td>0.0</td>\n",
2566 |        "      <td>0.0</td>\n",
2567 |        "      <td>0.0</td>\n",
2568 |        "      <td>0.0</td>\n",
2569 |        "      <td>...</td>\n",
2570 |        "      <td>0.0</td>\n",
2571 |        "      <td>0.0</td>\n",
2572 |        "      <td>0.0</td>\n",
2573 |        "      <td>0.0</td>\n",
2574 |        "      <td>0.0</td>\n",
2575 |        "      <td>10.0</td>\n",
2576 |        "      <td>0.0</td>\n",
2577 |        "      <td>0.0</td>\n",
2578 |        "      <td>0.0</td>\n",
2579 |        "      <td>0.0</td>\n",
2580 |        "    </tr>\n",
2581 |        "    <tr>\n",
2582 |        "      <th>...</th>\n",
2583 |        "      <td>...</td>\n",
2584 |        "      <td>...</td>\n",
2585 |        "      <td>...</td>\n",
2586 |        "      <td>...</td>\n",
2587 |        "      <td>...</td>\n",
2588 |        "      <td>...</td>\n",
2589 |        "      <td>...</td>\n",
2590 |        "      <td>...</td>\n",
2591 |        "      <td>...</td>\n",
2592 |        "      <td>...</td>\n",
2593 |        "      <td>...</td>\n",
2594 |        "      <td>...</td>\n",
2595 |        "      <td>...</td>\n",
2596 |        "      <td>...</td>\n",
2597 |        "      <td>...</td>\n",
2598 |        "      <td>...</td>\n",
2599 |        "      <td>...</td>\n",
2600 |        "      <td>...</td>\n",
2601 |        "      <td>...</td>\n",
2602 |        "      <td>...</td>\n",
2603 |        "      <td>...</td>\n",
2604 |        "    </tr>\n",
2605 |        "    <tr>\n",
2606 |        "      <th>Year of Wonders</th>\n",
2607 |        "      <td>0.0</td>\n",
2608 |        "      <td>0.0</td>\n",
2609 |        "      <td>0.0</td>\n",
2610 |        "      <td>7.0</td>\n",
2611 |        "      <td>0.0</td>\n",
2612 |        "      <td>0.0</td>\n",
2613 |        "      <td>0.0</td>\n",
2614 |        "      <td>0.0</td>\n",
2615 |        "      <td>7.0</td>\n",
2616 |        "      <td>0.0</td>\n",
2617 |        "      <td>...</td>\n",
2618 |        "      <td>0.0</td>\n",
2619 |        "      <td>0.0</td>\n",
2620 |        "      <td>0.0</td>\n",
2621 |        "      <td>0.0</td>\n",
2622 |        "      <td>0.0</td>\n",
2623 |        "      <td>0.0</td>\n",
2624 |        "      <td>0.0</td>\n",
2625 |        "      <td>0.0</td>\n",
2626 |        "      <td>0.0</td>\n",
2627 |        "      <td>0.0</td>\n",
2628 |        "    </tr>\n",
2629 |        "    <tr>\n",
2630 |        "      <th>You Belong To Me</th>\n",
2631 |        "      <td>0.0</td>\n",
2632 |        "      <td>0.0</td>\n",
2633 |        "      <td>0.0</td>\n",
2634 |        "      <td>0.0</td>\n",
2635 |        "      <td>0.0</td>\n",
2636 |        "      <td>0.0</td>\n",
2637 |        "      <td>0.0</td>\n",
2638 |        "      <td>0.0</td>\n",
2639 |        "      <td>0.0</td>\n",
2640 |        "      <td>0.0</td>\n",
2641 |        "      <td>...</td>\n",
2642 |        "      <td>0.0</td>\n",
2643 |        "      <td>0.0</td>\n",
2644 |        "      <td>0.0</td>\n",
2645 |        "      <td>0.0</td>\n",
2646 |        "      <td>0.0</td>\n",
2647 |        "      <td>0.0</td>\n",
2648 |        "      <td>0.0</td>\n",
2649 |        "      <td>0.0</td>\n",
2650 |        "      <td>0.0</td>\n",
2651 |        "      <td>0.0</td>\n",
2652 |        "    </tr>\n",
2653 |        "    <tr>\n",
2654 |        "      <th>Zen and the Art of Motorcycle Maintenance: An Inquiry into Values</th>\n",
2655 |        "      <td>0.0</td>\n",
2656 |        "      <td>0.0</td>\n",
2657 |        "      <td>0.0</td>\n",
2658 |        "      <td>0.0</td>\n",
2659 |        "      <td>0.0</td>\n",
2660 |        "      <td>0.0</td>\n",
2661 |        "      <td>0.0</td>\n",
2662 |        "      <td>0.0</td>\n",
2663 |        "      <td>0.0</td>\n",
2664 |        "      <td>0.0</td>\n",
2665 |        "      <td>...</td>\n",
2666 |        "      <td>0.0</td>\n",
2667 |        "      <td>0.0</td>\n",
2668 |        "      <td>0.0</td>\n",
2669 |        "      <td>0.0</td>\n",
2670 |        "      <td>0.0</td>\n",
2671 |        "      <td>0.0</td>\n",
2672 |        "      <td>0.0</td>\n",
2673 |        "      <td>0.0</td>\n",
2674 |        "      <td>0.0</td>\n",
2675 |        "      <td>0.0</td>\n",
2676 |        "    </tr>\n",
2677 |        "    <tr>\n",
2678 |        "      <th>Zoya</th>\n",
2679 |        "      <td>0.0</td>\n",
2680 |        "      <td>0.0</td>\n",
2681 |        "      <td>0.0</td>\n",
2682 |        "      <td>0.0</td>\n",
2683 |        "      <td>0.0</td>\n",
2684 |        "      <td>0.0</td>\n",
2685 |        "      <td>0.0</td>\n",
2686 |        "      <td>0.0</td>\n",
2687 |        "      <td>0.0</td>\n",
2688 |        "      <td>0.0</td>\n",
2689 |        "      <td>...</td>\n",
2690 |        "      <td>0.0</td>\n",
2691 |        "      <td>0.0</td>\n",
2692 |        "      <td>0.0</td>\n",
2693 |        "      <td>0.0</td>\n",
2694 |        "      <td>0.0</td>\n",
2695 |        "      <td>0.0</td>\n",
2696 |        "      <td>0.0</td>\n",
2697 |        "      <td>0.0</td>\n",
2698 |        "      <td>0.0</td>\n",
2699 |        "      <td>0.0</td>\n",
2700 |        "    </tr>\n",
2701 |        "    <tr>\n",
2702 |        "      <th>\\O\\\" Is for Outlaw\"</th>\n",
2703 |        "      <td>0.0</td>\n",
2704 |        "      <td>0.0</td>\n",
2705 |        "      <td>0.0</td>\n",
2706 |        "      <td>0.0</td>\n",
2707 |        "      <td>0.0</td>\n",
2708 |        "      <td>0.0</td>\n",
2709 |        "      <td>0.0</td>\n",
2710 |        "      <td>0.0</td>\n",
2711 |        "      <td>0.0</td>\n",
2712 |        "      <td>0.0</td>\n",
2713 |        "      <td>...</td>\n",
2714 |        "      <td>0.0</td>\n",
2715 |        "      <td>0.0</td>\n",
2716 |        "      <td>8.0</td>\n",
2717 |        "      <td>0.0</td>\n",
2718 |        "      <td>0.0</td>\n",
2719 |        "      <td>0.0</td>\n",
2720 |        "      <td>0.0</td>\n",
2721 |        "      <td>0.0</td>\n",
2722 |        "      <td>0.0</td>\n",
2723 |        "      <td>0.0</td>\n",
2724 |        "    </tr>\n",
2725 |        "  </tbody>\n",
2726 |        "</table>\n",
2727 |        "<p>742 rows × 888 columns</p>\n",
2728 |        "</div>"
2729 |       ],
2730 |       "text/plain": [
2731 |        "user_id                                             254     2276    2766    \\\n",
2732 |        "title                                                                        \n",
2733 |        "1984                                                   9.0     0.0     0.0   \n",
2734 |        "1st to Die: A Novel                                    0.0     0.0     0.0   \n",
2735 |        "2nd Chance                                             0.0    10.0     0.0   \n",
2736 |        "4 Blondes                                              0.0     0.0     0.0   \n",
2737 |        "84 Charing Cross Road                                  0.0     0.0     0.0   \n",
2738 |        "...                                                    ...     ...     ...   \n",
2739 |        "Year of Wonders                                        0.0     0.0     0.0   \n",
2740 |        "You Belong To Me                                       0.0     0.0     0.0   \n",
2741 |        "Zen and the Art of Motorcycle Maintenance: An I...     0.0     0.0     0.0   \n",
2742 |        "Zoya                                                   0.0     0.0     0.0   \n",
2743 |        "\\O\\\" Is for Outlaw\"                                    0.0     0.0     0.0   \n",
2744 |        "\n",
2745 |        "user_id                                             2977    3363    3757    \\\n",
2746 |        "title                                                                        \n",
2747 |        "1984                                                   0.0     0.0     0.0   \n",
2748 |        "1st to Die: A Novel                                    0.0     0.0     0.0   \n",
2749 |        "2nd Chance                                             0.0     0.0     0.0   \n",
2750 |        "4 Blondes                                              0.0     0.0     0.0   \n",
2751 |        "84 Charing Cross Road                                  0.0     0.0     0.0   \n",
2752 |        "...                                                    ...     ...     ...   \n",
2753 |        "Year of Wonders                                        7.0     0.0     0.0   \n",
2754 |        "You Belong To Me                                       0.0     0.0     0.0   \n",
2755 |        "Zen and the Art of Motorcycle Maintenance: An I...     0.0     0.0     0.0   \n",
2756 |        "Zoya                                                   0.0     0.0     0.0   \n",
2757 |        "\\O\\\" Is for Outlaw\"                                    0.0     0.0     0.0   \n",
2758 |        "\n",
2759 |        "user_id                                             4017    4385    6242    \\\n",
2760 |        "title                                                                        \n",
2761 |        "1984                                                   0.0     0.0     0.0   \n",
2762 |        "1st to Die: A Novel                                    0.0     0.0     0.0   \n",
2763 |        "2nd Chance                                             0.0     0.0     0.0   \n",
2764 |        "4 Blondes                                              0.0     0.0     0.0   \n",
2765 |        "84 Charing Cross Road                                  0.0     0.0     0.0   \n",
2766 |        "...                                                    ...     ...     ...   \n",
2767 |        "Year of Wonders                                        0.0     0.0     7.0   \n",
2768 |        "You Belong To Me                                       0.0     0.0     0.0   \n",
2769 |        "Zen and the Art of Motorcycle Maintenance: An I...     0.0     0.0     0.0   \n",
2770 |        "Zoya                                                   0.0     0.0     0.0   \n",
2771 |        "\\O\\\" Is for Outlaw\"                                    0.0     0.0     0.0   \n",
2772 |        "\n",
2773 |        "user_id                                             6251    ...  274004  \\\n",
2774 |        "title                                                       ...           \n",
2775 |        "1984                                                   0.0  ...     0.0   \n",
2776 |        "1st to Die: A Novel                                    0.0  ...     0.0   \n",
2777 |        "2nd Chance                                             0.0  ...     0.0   \n",
2778 |        "4 Blondes                                              0.0  ...     0.0   \n",
2779 |        "84 Charing Cross Road                                  0.0  ...     0.0   \n",
2780 |        "...                                                    ...  ...     ...   \n",
2781 |        "Year of Wonders                                        0.0  ...     0.0   \n",
2782 |        "You Belong To Me                                       0.0  ...     0.0   \n",
2783 |        "Zen and the Art of Motorcycle Maintenance: An I...     0.0  ...     0.0   \n",
2784 |        "Zoya                                                   0.0  ...     0.0   \n",
2785 |        "\\O\\\" Is for Outlaw\"                                    0.0  ...     0.0   \n",
2786 |        "\n",
2787 |        "user_id                                             274061  274301  274308  \\\n",
2788 |        "title                                                                        \n",
2789 |        "1984                                                   0.0     0.0     0.0   \n",
2790 |        "1st to Die: A Novel                                    0.0     0.0     0.0   \n",
2791 |        "2nd Chance                                             0.0     0.0     0.0   \n",
2792 |        "4 Blondes                                              0.0     0.0     0.0   \n",
2793 |        "84 Charing Cross Road                                  0.0     0.0     0.0   \n",
2794 |        "...                                                    ...     ...     ...   \n",
2795 |        "Year of Wonders                                        0.0     0.0     0.0   \n",
2796 |        "You Belong To Me                                       0.0     0.0     0.0   \n",
2797 |        "Zen and the Art of Motorcycle Maintenance: An I...     0.0     0.0     0.0   \n",
2798 |        "Zoya                                                   0.0     0.0     0.0   \n",
2799 |        "\\O\\\" Is for Outlaw\"                                    0.0     8.0     0.0   \n",
2800 |        "\n",
2801 |        "user_id                                             274808  275970  277427  \\\n",
2802 |        "title                                                                        \n",
2803 |        "1984                                                   0.0     0.0     0.0   \n",
2804 |        "1st to Die: A Novel                                    0.0     0.0     0.0   \n",
2805 |        "2nd Chance                                             0.0     0.0     0.0   \n",
2806 |        "4 Blondes                                              0.0     0.0     0.0   \n",
2807 |        "84 Charing Cross Road                                  0.0    10.0     0.0   \n",
2808 |        "...                                                    ...     ...     ...   \n",
2809 |        "Year of Wonders                                        0.0     0.0     0.0   \n",
2810 |        "You Belong To Me                                       0.0     0.0     0.0   \n",
2811 |        "Zen and the Art of Motorcycle Maintenance: An I...     0.0     0.0     0.0   \n",
2812 |        "Zoya                                                   0.0     0.0     0.0   \n",
2813 |        "\\O\\\" Is for Outlaw\"                                    0.0     0.0     0.0   \n",
2814 |        "\n",
2815 |        "user_id                                             277478  277639  278418  \n",
2816 |        "title                                                                       \n",
2817 |        "1984                                                   0.0     0.0     0.0  \n",
2818 |        "1st to Die: A Novel                                    0.0     0.0     0.0  \n",
2819 |        "2nd Chance                                             0.0     0.0     0.0  \n",
2820 |        "4 Blondes                                              0.0     0.0     0.0  \n",
2821 |        "84 Charing Cross Road                                  0.0     0.0     0.0  \n",
2822 |        "...                                                    ...     ...     ...  \n",
2823 |        "Year of Wonders                                        0.0     0.0     0.0  \n",
2824 |        "You Belong To Me                                       0.0     0.0     0.0  \n",
2825 |        "Zen and the Art of Motorcycle Maintenance: An I...     0.0     0.0     0.0  \n",
2826 |        "Zoya                                                   0.0     0.0     0.0  \n",
2827 |        "\\O\\\" Is for Outlaw\"                                    0.0     0.0     0.0  \n",
2828 |        "\n",
2829 |        "[742 rows x 888 columns]"
2830 |       ]
2831 |      },
2832 |      "execution_count": 52,
2833 |      "metadata": {},
2834 |      "output_type": "execute_result"
2835 |     }
2836 |    ],
2837 |    "source": [
2838 |     "book_pivot"
2839 |    ]
2840 |   },
2841 |   {
2842 |    "cell_type": "markdown",
2843 |    "metadata": {},
2844 |    "source": [
2845 |     "# Training Model"
2846 |    ]
2847 |   },
2848 |   {
2849 |    "cell_type": "code",
2850 |    "execution_count": 53,
2851 |    "metadata": {},
2852 |    "outputs": [],
2853 |    "source": [
2854 |     "from scipy.sparse import csr_matrix"
2855 |    ]
2856 |   },
2857 |   {
2858 |    "cell_type": "code",
2859 |    "execution_count": 54,
2860 |    "metadata": {},
2861 |    "outputs": [],
2862 |    "source": [
2863 |     "book_sparse = csr_matrix(book_pivot)"
2864 |    ]
2865 |   },
2866 |   {
2867 |    "cell_type": "code",
2868 |    "execution_count": 55,
2869 |    "metadata": {},
2870 |    "outputs": [
2871 |     {
2872 |      "data": {
2873 |       "text/plain": [
2874 |        "scipy.sparse.csr.csr_matrix"
2875 |       ]
2876 |      },
2877 |      "execution_count": 55,
2878 |      "metadata": {},
2879 |      "output_type": "execute_result"
2880 |     }
2881 |    ],
2882 |    "source": [
2883 |     "type(book_sparse)"
2884 |    ]
2885 |   },
2886 |   {
2887 |    "cell_type": "code",
2888 |    "execution_count": 56,
2889 |    "metadata": {},
2890 |    "outputs": [],
2891 |    "source": [
2892 |     "# Now import our clustering algoritm which is Nearest Neighbors this is an unsupervised ml algo\n",
2893 |     "from sklearn.neighbors import NearestNeighbors\n",
2894 |     "model = NearestNeighbors(algorithm= 'brute')"
2895 |    ]
2896 |   },
2897 |   {
2898 |    "cell_type": "code",
2899 |    "execution_count": 57,
2900 |    "metadata": {},
2901 |    "outputs": [
2902 |     {
2903 |      "data": {
2904 |       "text/plain": [
2905 |        "NearestNeighbors(algorithm='brute')"
2906 |       ]
2907 |      },
2908 |      "execution_count": 57,
2909 |      "metadata": {},
2910 |      "output_type": "execute_result"
2911 |     }
2912 |    ],
2913 |    "source": [
2914 |     "model.fit(book_sparse)"
2915 |    ]
2916 |   },
2917 |   {
2918 |    "cell_type": "code",
2919 |    "execution_count": 58,
2920 |    "metadata": {},
2921 |    "outputs": [],
2922 |    "source": [
2923 |     "distance, suggestion = model.kneighbors(book_pivot.iloc[237,:].values.reshape(1,-1), n_neighbors=6 )"
2924 |    ]
2925 |   },
2926 |   {
2927 |    "cell_type": "code",
2928 |    "execution_count": 59,
2929 |    "metadata": {},
2930 |    "outputs": [
2931 |     {
2932 |      "data": {
2933 |       "text/plain": [
2934 |        "array([[ 0.        , 68.78953409, 69.5413546 , 72.64296249, 76.83098333,\n",
2935 |        "        77.28518616]])"
2936 |       ]
2937 |      },
2938 |      "execution_count": 59,
2939 |      "metadata": {},
2940 |      "output_type": "execute_result"
2941 |     }
2942 |    ],
2943 |    "source": [
2944 |     "distance"
2945 |    ]
2946 |   },
2947 |   {
2948 |    "cell_type": "code",
2949 |    "execution_count": 60,
2950 |    "metadata": {},
2951 |    "outputs": [
2952 |     {
2953 |      "data": {
2954 |       "text/plain": [
2955 |        "array([[237, 240, 238, 241, 184, 536]], dtype=int64)"
2956 |       ]
2957 |      },
2958 |      "execution_count": 60,
2959 |      "metadata": {},
2960 |      "output_type": "execute_result"
2961 |     }
2962 |    ],
2963 |    "source": [
2964 |     "suggestion"
2965 |    ]
2966 |   },
2967 |   {
2968 |    "cell_type": "code",
2969 |    "execution_count": 61,
2970 |    "metadata": {},
2971 |    "outputs": [
2972 |     {
2973 |      "data": {
2974 |       "text/plain": [
2975 |        "user_id\n",
2976 |        "254       9.0\n",
2977 |        "2276      0.0\n",
2978 |        "2766      0.0\n",
2979 |        "2977      0.0\n",
2980 |        "3363      0.0\n",
2981 |        "         ... \n",
2982 |        "275970    9.0\n",
2983 |        "277427    0.0\n",
2984 |        "277478    0.0\n",
2985 |        "277639    0.0\n",
2986 |        "278418    0.0\n",
2987 |        "Name: Harry Potter and the Sorcerer's Stone (Book 1), Length: 888, dtype: float64"
2988 |       ]
2989 |      },
2990 |      "execution_count": 61,
2991 |      "metadata": {},
2992 |      "output_type": "execute_result"
2993 |     }
2994 |    ],
2995 |    "source": [
2996 |     "book_pivot.iloc[241,:]"
2997 |    ]
2998 |   },
2999 |   {
3000 |    "cell_type": "code",
3001 |    "execution_count": 62,
3002 |    "metadata": {},
3003 |    "outputs": [
3004 |     {
3005 |      "name": "stdout",
3006 |      "output_type": "stream",
3007 |      "text": [
3008 |       "Index(['Harry Potter and the Chamber of Secrets (Book 2)',\n",
3009 |       "       'Harry Potter and the Prisoner of Azkaban (Book 3)',\n",
3010 |       "       'Harry Potter and the Goblet of Fire (Book 4)',\n",
3011 |       "       'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive',\n",
3012 |       "       'The Cradle Will Fall'],\n",
3013 |       "      dtype='object', name='title')\n"
3014 |      ]
3015 |     }
3016 |    ],
3017 |    "source": [
3018 |     "for i in range(len(suggestion)):\n",
3019 |     "    print(book_pivot.index[suggestion[i]])"
3020 |    ]
3021 |   },
3022 |   {
3023 |    "cell_type": "code",
3024 |    "execution_count": 63,
3025 |    "metadata": {},
3026 |    "outputs": [
3027 |     {
3028 |      "data": {
3029 |       "text/plain": [
3030 |        "'4 Blondes'"
3031 |       ]
3032 |      },
3033 |      "execution_count": 63,
3034 |      "metadata": {},
3035 |      "output_type": "execute_result"
3036 |     }
3037 |    ],
3038 |    "source": [
3039 |     "book_pivot.index[3]"
3040 |    ]
3041 |   },
3042 |   {
3043 |    "cell_type": "code",
3044 |    "execution_count": 64,
3045 |    "metadata": {},
3046 |    "outputs": [],
3047 |    "source": [
3048 |     "#keeping books name\n",
3049 |     "book_names = book_pivot.index"
3050 |    ]
3051 |   },
3052 |   {
3053 |    "cell_type": "code",
3054 |    "execution_count": 65,
3055 |    "metadata": {},
3056 |    "outputs": [
3057 |     {
3058 |      "data": {
3059 |       "text/plain": [
3060 |        "'2nd Chance'"
3061 |       ]
3062 |      },
3063 |      "execution_count": 65,
3064 |      "metadata": {},
3065 |      "output_type": "execute_result"
3066 |     }
3067 |    ],
3068 |    "source": [
3069 |     "book_names[2]"
3070 |    ]
3071 |   },
3072 |   {
3073 |    "cell_type": "code",
3074 |    "execution_count": 66,
3075 |    "metadata": {},
3076 |    "outputs": [
3077 |     {
3078 |      "data": {
3079 |       "text/plain": [
3080 |        "3"
3081 |       ]
3082 |      },
3083 |      "execution_count": 66,
3084 |      "metadata": {},
3085 |      "output_type": "execute_result"
3086 |     }
3087 |    ],
3088 |    "source": [
3089 |     "np.where(book_pivot.index == '4 Blondes')[0][0]"
3090 |    ]
3091 |   },
3092 |   {
3093 |    "cell_type": "markdown",
3094 |    "metadata": {},
3095 |    "source": [
3096 |     "# find url"
3097 |    ]
3098 |   },
3099 |   {
3100 |    "cell_type": "code",
3101 |    "execution_count": 67,
3102 |    "metadata": {},
3103 |    "outputs": [],
3104 |    "source": [
3105 |     "# final_rating['title'].value_counts()\n",
3106 |     "ids = np.where(final_rating['title'] == \"Harry Potter and the Chamber of Secrets (Book 2)\")[0][0]"
3107 |    ]
3108 |   },
3109 |   {
3110 |    "cell_type": "code",
3111 |    "execution_count": 68,
3112 |    "metadata": {},
3113 |    "outputs": [
3114 |     {
3115 |      "data": {
3116 |       "text/plain": [
3117 |        "'http://images.amazon.com/images/P/0439064872.01.LZZZZZZZ.jpg'"
3118 |       ]
3119 |      },
3120 |      "execution_count": 68,
3121 |      "metadata": {},
3122 |      "output_type": "execute_result"
3123 |     }
3124 |    ],
3125 |    "source": [
3126 |     "final_rating.iloc[ids]['image_url']"
3127 |    ]
3128 |   },
3129 |   {
3130 |    "cell_type": "code",
3131 |    "execution_count": 69,
3132 |    "metadata": {},
3133 |    "outputs": [],
3134 |    "source": [
3135 |     "book_name = []\n",
3136 |     "for book_id in suggestion:\n",
3137 |     "    book_name.append(book_pivot.index[book_id])\n",
3138 |     "    \n",
3139 |     "    "
3140 |    ]
3141 |   },
3142 |   {
3143 |    "cell_type": "code",
3144 |    "execution_count": 70,
3145 |    "metadata": {},
3146 |    "outputs": [
3147 |     {
3148 |      "data": {
3149 |       "text/plain": [
3150 |        "Index(['Harry Potter and the Chamber of Secrets (Book 2)',\n",
3151 |        "       'Harry Potter and the Prisoner of Azkaban (Book 3)',\n",
3152 |        "       'Harry Potter and the Goblet of Fire (Book 4)',\n",
3153 |        "       'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive',\n",
3154 |        "       'The Cradle Will Fall'],\n",
3155 |        "      dtype='object', name='title')"
3156 |       ]
3157 |      },
3158 |      "execution_count": 70,
3159 |      "metadata": {},
3160 |      "output_type": "execute_result"
3161 |     }
3162 |    ],
3163 |    "source": [
3164 |     "book_name[0]"
3165 |    ]
3166 |   },
3167 |   {
3168 |    "cell_type": "code",
3169 |    "execution_count": 71,
3170 |    "metadata": {},
3171 |    "outputs": [],
3172 |    "source": [
3173 |     "ids_index = []\n",
3174 |     "for name in book_name[0]: \n",
3175 |     "    ids = np.where(final_rating['title'] == name)[0][0]\n",
3176 |     "    ids_index.append(ids)"
3177 |    ]
3178 |   },
3179 |   {
3180 |    "cell_type": "code",
3181 |    "execution_count": 72,
3182 |    "metadata": {},
3183 |    "outputs": [
3184 |     {
3185 |      "name": "stdout",
3186 |      "output_type": "stream",
3187 |      "text": [
3188 |       "http://images.amazon.com/images/P/0439064872.01.LZZZZZZZ.jpg\n",
3189 |       "http://images.amazon.com/images/P/0439136369.01.LZZZZZZZ.jpg\n",
3190 |       "http://images.amazon.com/images/P/0439139597.01.LZZZZZZZ.jpg\n",
3191 |       "http://images.amazon.com/images/P/043936213X.01.LZZZZZZZ.jpg\n",
3192 |       "http://images.amazon.com/images/P/0446604232.01.LZZZZZZZ.jpg\n",
3193 |       "http://images.amazon.com/images/P/0440115450.01.LZZZZZZZ.jpg\n"
3194 |      ]
3195 |     }
3196 |    ],
3197 |    "source": [
3198 |     "for idx in ids_index:\n",
3199 |     "    url = final_rating.iloc[idx]['image_url']\n",
3200 |     "    print(url)"
3201 |    ]
3202 |   },
3203 |   {
3204 |    "cell_type": "code",
3205 |    "execution_count": 73,
3206 |    "metadata": {},
3207 |    "outputs": [],
3208 |    "source": [
3209 |     "import pickle\n",
3210 |     "pickle.dump(model,open('artifacts/model.pkl','wb'))\n",
3211 |     "pickle.dump(book_names,open('artifacts/book_names.pkl','wb'))\n",
3212 |     "pickle.dump(final_rating,open('artifacts/final_rating.pkl','wb'))\n",
3213 |     "pickle.dump(book_pivot,open('artifacts/book_pivot.pkl','wb'))"
3214 |    ]
3215 |   },
3216 |   {
3217 |    "cell_type": "markdown",
3218 |    "metadata": {},
3219 |    "source": [
3220 |     "# Testing model"
3221 |    ]
3222 |   },
3223 |   {
3224 |    "cell_type": "code",
3225 |    "execution_count": 74,
3226 |    "metadata": {},
3227 |    "outputs": [],
3228 |    "source": [
3229 |     "def recommend_book(book_name):\n",
3230 |     "    book_id = np.where(book_pivot.index == book_name)[0][0]\n",
3231 |     "    distance, suggestion = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6 )\n",
3232 |     "    \n",
3233 |     "    for i in range(len(suggestion)):\n",
3234 |     "            books = book_pivot.index[suggestion[i]]\n",
3235 |     "            for j in books:\n",
3236 |     "                if j == book_name:\n",
3237 |     "                    print(f\"You searched '{book_name}'\\n\")\n",
3238 |     "                    print(\"The suggestion books are: \\n\")\n",
3239 |     "                else:\n",
3240 |     "                    print(j)"
3241 |    ]
3242 |   },
3243 |   {
3244 |    "cell_type": "code",
3245 |    "execution_count": 75,
3246 |    "metadata": {},
3247 |    "outputs": [
3248 |     {
3249 |      "name": "stdout",
3250 |      "output_type": "stream",
3251 |      "text": [
3252 |       "You searched 'Harry Potter and the Chamber of Secrets (Book 2)'\n",
3253 |       "\n",
3254 |       "The suggestion books are: \n",
3255 |       "\n",
3256 |       "Harry Potter and the Prisoner of Azkaban (Book 3)\n",
3257 |       "Harry Potter and the Goblet of Fire (Book 4)\n",
3258 |       "Harry Potter and the Sorcerer's Stone (Book 1)\n",
3259 |       "Exclusive\n",
3260 |       "The Cradle Will Fall\n"
3261 |      ]
3262 |     }
3263 |    ],
3264 |    "source": [
3265 |     "book_name = \"Harry Potter and the Chamber of Secrets (Book 2)\"\n",
3266 |     "recommend_book(book_name)"
3267 |    ]
3268 |   },
3269 |   {
3270 |    "cell_type": "code",
3271 |    "execution_count": null,
3272 |    "metadata": {},
3273 |    "outputs": [],
3274 |    "source": []
3275 |   }
3276 |  ],
3277 |  "metadata": {
3278 |   "kernelspec": {
3279 |    "display_name": "Python 3 (ipykernel)",
3280 |    "language": "python",
3281 |    "name": "python3"
3282 |   },
3283 |   "language_info": {
3284 |    "codemirror_mode": {
3285 |     "name": "ipython",
3286 |     "version": 3
3287 |    },
3288 |    "file_extension": ".py",
3289 |    "mimetype": "text/x-python",
3290 |    "name": "python",
3291 |    "nbconvert_exporter": "python",
3292 |    "pygments_lexer": "ipython3",
3293 |    "version": "3.7.13"
3294 |   }
3295 |  },
3296 |  "nbformat": 4,
3297 |  "nbformat_minor": 4
3298 | }
3299 | 


--------------------------------------------------------------------------------