├── .github
└── workflows
│ └── main.yaml
├── books_recommender
├── __init__.py
├── config
│ ├── __init__.py
│ └── configuration.py
├── entity
│ ├── __init__.py
│ └── config_entity.py
├── logger
│ ├── __init__.py
│ └── log.py
├── pipeline
│ ├── __init__.py
│ └── training_pipeline.py
├── utils
│ ├── __init__.py
│ └── util.py
├── components
│ ├── __init__.py
│ ├── stage_03_model_trainer.py
│ ├── stage_00_data_ingestion.py
│ ├── stage_02_data_transformation.py
│ └── stage_01_data_validation.py
├── exception
│ ├── __init__.py
│ └── exception_handler.py
└── constant
│ └── __init__.py
├── requirements.txt
├── .dockerignore
├── templates
├── 1.png
├── 2.png
├── intro.jpeg
└── book_names.pkl
├── Dockerfile
├── config
└── config.yaml
├── setup.py
├── LICENSE
├── .gitignore
├── app.py
├── README.md
└── notebook
└── Books Recommender data analysis.ipynb
/.github/workflows/main.yaml:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/books_recommender/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/books_recommender/config/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/books_recommender/entity/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/books_recommender/logger/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/books_recommender/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/books_recommender/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/books_recommender/components/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/books_recommender/exception/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | numpy
3 | pandas
4 | sklearn
5 | notebook
6 | PyYAML
7 | -e .
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Environments
2 | .env
3 | .venv
4 | env/
5 | venv/
6 | ENV/
7 | env.bak/
8 | venv.bak/
--------------------------------------------------------------------------------
/templates/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/1.png
--------------------------------------------------------------------------------
/templates/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/2.png
--------------------------------------------------------------------------------
/templates/intro.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/intro.jpeg
--------------------------------------------------------------------------------
/templates/book_names.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/ML-Based-Book-Recommender-System/HEAD/templates/book_names.pkl
--------------------------------------------------------------------------------
/books_recommender/constant/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 |
4 | ROOT_DIR = os.getcwd()
5 | # Main config file path
6 | CONFIG_FOLDER_NAME = "config"
7 | CONFIG_FILE_NAME = "config.yaml"
8 | CONFIG_FILE_PATH = os.path.join(ROOT_DIR,CONFIG_FOLDER_NAME,CONFIG_FILE_NAME)
--------------------------------------------------------------------------------
/books_recommender/utils/util.py:
--------------------------------------------------------------------------------
1 | import yaml
2 | import sys
3 | from books_recommender.exception.exception_handler import AppException
4 |
5 |
6 |
7 | def read_yaml_file(file_path:str)->dict:
8 | """
9 | Reads a YAML file and returns the contents as a dictionary.
10 | file_path: str
11 | """
12 | try:
13 | with open(file_path, 'rb') as yaml_file:
14 | return yaml.safe_load(yaml_file)
15 | except Exception as e:
16 | raise AppException(e,sys) from e
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # app/Dockerfile
2 |
3 | FROM python:3.7-slim
4 |
5 | # COPY . /app
6 |
7 | EXPOSE 8501
8 |
9 | WORKDIR /app
10 |
11 | RUN apt-get update && apt-get install -y \
12 | build-essential \
13 | software-properties-common \
14 | git \
15 | && rm -rf /var/lib/apt/lists/*
16 |
17 | RUN git clone https://github.com/entbappy/ML-Based-Book-Recommender-System.git .
18 |
19 | COPY . /app
20 |
21 | RUN pip3 install -r requirements.txt
22 |
23 | ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
--------------------------------------------------------------------------------
/books_recommender/logger/log.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | from datetime import datetime
4 |
5 |
6 | # Creating logs directory to store log in files
7 | LOG_DIR = "logs"
8 | LOG_DIR = os.path.join(os.getcwd(), LOG_DIR)
9 |
10 | #Creating LOG_DIR if it does not exists.
11 | os.makedirs(LOG_DIR, exist_ok=True)
12 |
13 |
14 | # Creating file name for log file based on current timestamp
15 | CURRENT_TIME_STAMP = f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}"
16 | file_name = f"log_{CURRENT_TIME_STAMP}.log"
17 |
18 | #Creating file path for projects.
19 | log_file_path = os.path.join(LOG_DIR, file_name)
20 |
21 |
22 | logging.basicConfig(filename=log_file_path,
23 | filemode='w',
24 | format='[%(asctime)s] %(name)s - %(levelname)s - %(message)s',
25 | level=logging.NOTSET)
26 |
--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
1 | artifacts_config:
2 | artifacts_dir: artifacts
3 |
4 | data_ingestion_config:
5 | dataset_download_url: https://github.com/entbappy/Branching-tutorial/raw/master/books_data.zip
6 | dataset_dir: dataset
7 | ingested_dir: ingested_data
8 | raw_data_dir: raw_data
9 |
10 |
11 | data_validation_config:
12 | clean_data_dir: clean_data
13 | serialized_objects_dir: serialized_objects
14 | books_csv_file: BX-Books.csv
15 | ratings_csv_file: BX-Book-Ratings.csv
16 |
17 |
18 | data_transformation_config:
19 | transformed_data_dir: transformed_data
20 |
21 |
22 | model_trainer_config:
23 | trained_model_dir: trained_model
24 | trained_model_name: model.pkl
25 |
26 |
27 | recommendation_config:
28 | poster_api_url: https://api.themoviedb.org/3/movie/{}?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | with open("README.md", "r", encoding="utf-8") as f:
4 | long_description = f.read()
5 |
6 | ## edit below variables as per your requirements -
7 | REPO_NAME = "ML Based Books Recommender System"
8 | AUTHOR_USER_NAME = "BOKTIAR AHMED BAPPY"
9 | SRC_REPO = "books_recommender"
10 | LIST_OF_REQUIREMENTS = []
11 |
12 |
13 | setup(
14 | name=SRC_REPO,
15 | version="0.0.1",
16 | author="BOKTIAR AHMED BAPPY",
17 | description="A small local packages for ML based books recommendations",
18 | long_description=long_description,
19 | long_description_content_type="text/markdown",
20 | url="https://github.com/entbappy/ML-Based-Book-Recommender-System",
21 | author_email="boktiar@ineuron.ai",
22 | packages=find_packages(),
23 | license="MIT",
24 | python_requires=">=3.7",
25 | install_requires=LIST_OF_REQUIREMENTS
26 | )
27 |
--------------------------------------------------------------------------------
/books_recommender/pipeline/training_pipeline.py:
--------------------------------------------------------------------------------
1 | from books_recommender.components.stage_00_data_ingestion import DataIngestion
2 | from books_recommender.components.stage_01_data_validation import DataValidation
3 | from books_recommender.components.stage_02_data_transformation import DataTransformation
4 | from books_recommender.components.stage_03_model_trainer import ModelTrainer
5 |
6 |
7 |
8 | class TrainingPipeline:
9 | def __init__(self):
10 | self.data_ingestion = DataIngestion()
11 | self.data_validation = DataValidation()
12 | self.data_transformation = DataTransformation()
13 | self.model_trainer = ModelTrainer()
14 |
15 |
16 | def start_training_pipeline(self):
17 | """
18 | Starts the training pipeline
19 | :return: none
20 | """
21 | self.data_ingestion.initiate_data_ingestion()
22 | self.data_validation.initiate_data_validation()
23 | self.data_transformation.initiate_data_transformation()
24 | self.model_trainer.initiate_model_trainer()
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 iNeuron Intelligence Private Limited
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/books_recommender/entity/config_entity.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 | DataIngestionConfig = namedtuple("DatasetConfig", ["dataset_download_url",
4 | "raw_data_dir",
5 | "ingested_dir"])
6 |
7 | DataValidationConfig = namedtuple("DataValidationConfig", ["clean_data_dir",
8 | "books_csv_file",
9 | "ratings_csv_file",
10 | "serialized_objects_dir"])
11 |
12 |
13 | DataTransformationConfig = namedtuple("DataTransformationConfig", ["clean_data_file_path",
14 | "transformed_data_dir"])
15 |
16 |
17 |
18 | ModelTrainerConfig = namedtuple("ModelTrainerConfig", ["transformed_data_file_dir",
19 | "trained_model_dir",
20 | "trained_model_name"])
21 |
22 |
23 |
24 | ModelRecommendationConfig = namedtuple("ModelRecommendationConfig", ["book_name_serialized_objects",
25 | "book_pivot_serialized_objects",
26 | "final_rating_serialized_objects",
27 | "trained_model_path"])
28 |
29 |
30 |
--------------------------------------------------------------------------------
/books_recommender/exception/exception_handler.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 |
5 | class AppException(Exception):
6 | """
7 | Organization: iNeuron Intelligence Private Limited
8 | AppException is customized exception class designed to capture refined details about exception
9 | such as python script file line number along with error message
10 | With custom exception one can easily spot source of error and provide quick fix.
11 |
12 | """
13 |
14 | def __init__(self, error_message: Exception, error_detail: sys):
15 | """
16 | :param error_message: error message in string format
17 | """
18 | super().__init__(error_message)
19 | self.error_message = AppException.error_message_detail(error_message, error_detail=error_detail)
20 |
21 | @staticmethod
22 | def error_message_detail(error:Exception, error_detail:sys):
23 | """
24 | error: Exception object raise from module
25 | error_detail: is sys module contains detail information about system execution information.
26 | """
27 | _, _, exc_tb = error_detail.exc_info()
28 | #extracting file name from exception traceback
29 | file_name = exc_tb.tb_frame.f_code.co_filename
30 |
31 | #preparing error message
32 | error_message = f"Error occurred python script name [{file_name}]" \
33 | f" line number [{exc_tb.tb_lineno}] error message [{error}]."
34 |
35 | return error_message
36 |
37 | def __repr__(self):
38 | """
39 | Formating object of AppException
40 | """
41 | return AppException.__name__.__str__()
42 |
43 | def __str__(self):
44 | """
45 | Formating how a object should be visible if used in print statement.
46 | """
47 | return self.error_message
--------------------------------------------------------------------------------
/books_recommender/components/stage_03_model_trainer.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import pickle
4 | from sklearn.neighbors import NearestNeighbors
5 | from scipy.sparse import csr_matrix
6 | from books_recommender.logger.log import logging
7 | from books_recommender.config.configuration import AppConfiguration
8 | from books_recommender.exception.exception_handler import AppException
9 |
10 |
11 | class ModelTrainer:
12 | def __init__(self, app_config = AppConfiguration()):
13 | try:
14 | self.model_trainer_config = app_config.get_model_trainer_config()
15 | except Exception as e:
16 | raise AppException(e, sys) from e
17 |
18 |
19 | def train(self):
20 | try:
21 | #loading pivot data
22 | book_pivot = pickle.load(open(self.model_trainer_config.transformed_data_file_dir,'rb'))
23 | book_sparse = csr_matrix(book_pivot)
24 | #Training model
25 | model = NearestNeighbors(algorithm= 'brute')
26 | model.fit(book_sparse)
27 |
28 | #Saving model object for recommendations
29 | os.makedirs(self.model_trainer_config.trained_model_dir, exist_ok=True)
30 | file_name = os.path.join(self.model_trainer_config.trained_model_dir,self.model_trainer_config.trained_model_name)
31 | pickle.dump(model,open(file_name,'wb'))
32 | logging.info(f"Saving final model to {file_name}")
33 |
34 | except Exception as e:
35 | raise AppException(e, sys) from e
36 |
37 |
38 |
39 | def initiate_model_trainer(self):
40 | try:
41 | logging.info(f"{'='*20}Model Trainer log started.{'='*20} ")
42 | self.train()
43 | logging.info(f"{'='*20}Model Trainer log completed.{'='*20} \n\n")
44 | except Exception as e:
45 | raise AppException(e, sys) from e
46 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 | artifacts/*
131 |
--------------------------------------------------------------------------------
/books_recommender/components/stage_00_data_ingestion.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from six.moves import urllib
4 | import zipfile
5 | from books_recommender.logger.log import logging
6 | from books_recommender.exception.exception_handler import AppException
7 | from books_recommender.config.configuration import AppConfiguration
8 |
9 | class DataIngestion:
10 |
11 | def __init__(self, app_config = AppConfiguration()):
12 | """
13 | DataIngestion Intialization
14 | data_ingestion_config: DataIngestionConfig
15 | """
16 | try:
17 | logging.info(f"{'='*20}Data Ingestion log started.{'='*20} ")
18 | self.data_ingestion_config= app_config.get_data_ingestion_config()
19 | except Exception as e:
20 | raise AppException(e, sys) from e
21 |
22 |
23 | def download_data(self):
24 | """
25 | Fetch the data from the url
26 |
27 | """
28 | try:
29 |
30 | dataset_url = self.data_ingestion_config.dataset_download_url
31 | zip_download_dir = self.data_ingestion_config.raw_data_dir
32 | os.makedirs(zip_download_dir, exist_ok=True)
33 | data_file_name = os.path.basename(dataset_url)
34 | zip_file_path = os.path.join(zip_download_dir, data_file_name)
35 | logging.info(f"Downloading data from {dataset_url} into file {zip_file_path}")
36 | urllib.request.urlretrieve(dataset_url,zip_file_path)
37 | logging.info(f"Downloaded data from {dataset_url} into file {zip_file_path}")
38 | return zip_file_path
39 |
40 | except Exception as e:
41 | raise AppException(e, sys) from e
42 |
43 |
44 | def extract_zip_file(self,zip_file_path: str):
45 | """
46 | zip_file_path: str
47 | Extracts the zip file into the data directory
48 | Function returns None
49 | """
50 | try:
51 | ingested_dir = self.data_ingestion_config.ingested_dir
52 | os.makedirs(ingested_dir, exist_ok=True)
53 | with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
54 | zip_ref.extractall(ingested_dir)
55 | logging.info(f"Extracting zip file: {zip_file_path} into dir: {ingested_dir}")
56 | except Exception as e:
57 | raise AppException(e,sys) from e
58 |
59 |
60 | def initiate_data_ingestion(self):
61 | try:
62 | zip_file_path = self.download_data()
63 | self.extract_zip_file(zip_file_path=zip_file_path)
64 | logging.info(f"{'='*20}Data Ingestion log completed.{'='*20} \n\n")
65 | except Exception as e:
66 | raise AppException(e, sys) from e
--------------------------------------------------------------------------------
/books_recommender/components/stage_02_data_transformation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import pickle
4 | import pandas as pd
5 | from books_recommender.logger.log import logging
6 | from books_recommender.config.configuration import AppConfiguration
7 | from books_recommender.exception.exception_handler import AppException
8 |
9 |
10 |
11 | class DataTransformation:
12 | def __init__(self, app_config = AppConfiguration()):
13 | try:
14 | self.data_transformation_config = app_config.get_data_transformation_config()
15 | self.data_validation_config= app_config.get_data_validation_config()
16 | except Exception as e:
17 | raise AppException(e, sys) from e
18 |
19 |
20 |
21 | def get_data_transformer(self):
22 | try:
23 | df = pd.read_csv(self.data_transformation_config.clean_data_file_path)
24 | # Lets create a pivot table
25 | book_pivot = df.pivot_table(columns='user_id', index='title', values= 'rating')
26 | logging.info(f" Shape of book pivot table: {book_pivot.shape}")
27 | book_pivot.fillna(0, inplace=True)
28 |
29 | #saving pivot table data
30 | os.makedirs(self.data_transformation_config.transformed_data_dir, exist_ok=True)
31 | pickle.dump(book_pivot,open(os.path.join(self.data_transformation_config.transformed_data_dir,"transformed_data.pkl"),'wb'))
32 | logging.info(f"Saved pivot table data to {self.data_transformation_config.transformed_data_dir}")
33 |
34 | #keeping books name
35 | book_names = book_pivot.index
36 |
37 | #saving book_names objects for web app
38 | os.makedirs(self.data_validation_config.serialized_objects_dir, exist_ok=True)
39 | pickle.dump(book_names,open(os.path.join(self.data_validation_config.serialized_objects_dir, "book_names.pkl"),'wb'))
40 | logging.info(f"Saved book_names serialization object to {self.data_validation_config.serialized_objects_dir}")
41 |
42 | #saving book_pivot objects for web app
43 | os.makedirs(self.data_validation_config.serialized_objects_dir, exist_ok=True)
44 | pickle.dump(book_pivot,open(os.path.join(self.data_validation_config.serialized_objects_dir, "book_pivot.pkl"),'wb'))
45 | logging.info(f"Saved book_pivot serialization object to {self.data_validation_config.serialized_objects_dir}")
46 |
47 | except Exception as e:
48 | raise AppException(e, sys) from e
49 |
50 |
51 |
52 | def initiate_data_transformation(self):
53 | try:
54 | logging.info(f"{'='*20}Data Transformation log started.{'='*20} ")
55 | self.get_data_transformer()
56 | logging.info(f"{'='*20}Data Transformation log completed.{'='*20} \n\n")
57 | except Exception as e:
58 | raise AppException(e, sys) from e
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/books_recommender/components/stage_01_data_validation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import ast
4 | import pandas as pd
5 | import pickle
6 | from books_recommender.logger.log import logging
7 | from books_recommender.config.configuration import AppConfiguration
8 | from books_recommender.exception.exception_handler import AppException
9 |
10 |
11 |
12 | class DataValidation:
13 | def __init__(self, app_config = AppConfiguration()):
14 | try:
15 | self.data_validation_config= app_config.get_data_validation_config()
16 | except Exception as e:
17 | raise AppException(e, sys) from e
18 |
19 |
20 |
21 | def preprocess_data(self):
22 | try:
23 | ratings = pd.read_csv(self.data_validation_config.ratings_csv_file, sep=";", error_bad_lines=False, encoding='latin-1')
24 | books = pd.read_csv(self.data_validation_config.books_csv_file, sep=";", error_bad_lines=False, encoding='latin-1')
25 |
26 | logging.info(f" Shape of ratings data file: {ratings.shape}")
27 | logging.info(f" Shape of books data file: {books.shape}")
28 |
29 | #Here Image URL columns is important for the poster. So, we will keep it
30 | books = books[['ISBN','Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher','Image-URL-L']]
31 | # Lets remane some wierd columns name in books
32 | books.rename(columns={"Book-Title":'title',
33 | 'Book-Author':'author',
34 | "Year-Of-Publication":'year',
35 | "Publisher":"publisher",
36 | "Image-URL-L":"image_url"},inplace=True)
37 |
38 |
39 | # Lets remane some wierd columns name in ratings
40 | ratings.rename(columns={"User-ID":'user_id',
41 | 'Book-Rating':'rating'},inplace=True)
42 |
43 | # Lets store users who had at least rated more than 200 books
44 | x = ratings['user_id'].value_counts() > 200
45 | y = x[x].index
46 | ratings = ratings[ratings['user_id'].isin(y)]
47 |
48 | # Now join ratings with books
49 | ratings_with_books = ratings.merge(books, on='ISBN')
50 | number_rating = ratings_with_books.groupby('title')['rating'].count().reset_index()
51 | number_rating.rename(columns={'rating':'num_of_rating'},inplace=True)
52 | final_rating = ratings_with_books.merge(number_rating, on='title')
53 |
54 | # Lets take those books which got at least 50 rating of user
55 | final_rating = final_rating[final_rating['num_of_rating'] >= 50]
56 |
57 | # lets drop the duplicates
58 | final_rating.drop_duplicates(['user_id','title'],inplace=True)
59 | logging.info(f" Shape of the final clean dataset: {final_rating.shape}")
60 |
61 | # Saving the cleaned data for transformation
62 | os.makedirs(self.data_validation_config.clean_data_dir, exist_ok=True)
63 | final_rating.to_csv(os.path.join(self.data_validation_config.clean_data_dir,'clean_data.csv'), index = False)
64 | logging.info(f"Saved cleaned data to {self.data_validation_config.clean_data_dir}")
65 |
66 |
67 | #saving final_rating objects for web app
68 | os.makedirs(self.data_validation_config.serialized_objects_dir, exist_ok=True)
69 | pickle.dump(final_rating,open(os.path.join(self.data_validation_config.serialized_objects_dir, "final_rating.pkl"),'wb'))
70 | logging.info(f"Saved final_rating serialization object to {self.data_validation_config.serialized_objects_dir}")
71 |
72 | except Exception as e:
73 | raise AppException(e, sys) from e
74 |
75 |
76 | def initiate_data_validation(self):
77 | try:
78 | logging.info(f"{'='*20}Data Validation log started.{'='*20} ")
79 | self.preprocess_data()
80 | logging.info(f"{'='*20}Data Validation log completed.{'='*20} \n\n")
81 | except Exception as e:
82 | raise AppException(e, sys) from e
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import pickle
4 | import streamlit as st
5 | import numpy as np
6 | from books_recommender.logger.log import logging
7 | from books_recommender.config.configuration import AppConfiguration
8 | from books_recommender.pipeline.training_pipeline import TrainingPipeline
9 | from books_recommender.exception.exception_handler import AppException
10 |
11 |
12 | class Recommendation:
13 | def __init__(self,app_config = AppConfiguration()):
14 | try:
15 | self.recommendation_config= app_config.get_recommendation_config()
16 | except Exception as e:
17 | raise AppException(e, sys) from e
18 |
19 |
20 | def fetch_poster(self,suggestion):
21 | try:
22 | book_name = []
23 | ids_index = []
24 | poster_url = []
25 | book_pivot = pickle.load(open(self.recommendation_config.book_pivot_serialized_objects,'rb'))
26 | final_rating = pickle.load(open(self.recommendation_config.final_rating_serialized_objects,'rb'))
27 |
28 | for book_id in suggestion:
29 | book_name.append(book_pivot.index[book_id])
30 |
31 | for name in book_name[0]:
32 | ids = np.where(final_rating['title'] == name)[0][0]
33 | ids_index.append(ids)
34 |
35 | for idx in ids_index:
36 | url = final_rating.iloc[idx]['image_url']
37 | poster_url.append(url)
38 |
39 | return poster_url
40 |
41 | except Exception as e:
42 | raise AppException(e, sys) from e
43 |
44 |
45 |
46 | def recommend_book(self,book_name):
47 | try:
48 | books_list = []
49 | model = pickle.load(open(self.recommendation_config.trained_model_path,'rb'))
50 | book_pivot = pickle.load(open(self.recommendation_config.book_pivot_serialized_objects,'rb'))
51 | book_id = np.where(book_pivot.index == book_name)[0][0]
52 | distance, suggestion = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6 )
53 |
54 | poster_url = self.fetch_poster(suggestion)
55 |
56 | for i in range(len(suggestion)):
57 | books = book_pivot.index[suggestion[i]]
58 | for j in books:
59 | books_list.append(j)
60 | return books_list , poster_url
61 |
62 | except Exception as e:
63 | raise AppException(e, sys) from e
64 |
65 |
66 | def train_engine(self):
67 | try:
68 | obj = TrainingPipeline()
69 | obj.start_training_pipeline()
70 | st.text("Training Completed!")
71 | logging.info(f"Recommended successfully!")
72 | except Exception as e:
73 | raise AppException(e, sys) from e
74 |
75 |
76 | def recommendations_engine(self,selected_books):
77 | try:
78 | recommended_books,poster_url = self.recommend_book(selected_books)
79 | col1, col2, col3, col4, col5 = st.columns(5)
80 | with col1:
81 | st.text(recommended_books[1])
82 | st.image(poster_url[1])
83 | with col2:
84 | st.text(recommended_books[2])
85 | st.image(poster_url[2])
86 |
87 | with col3:
88 | st.text(recommended_books[3])
89 | st.image(poster_url[3])
90 | with col4:
91 | st.text(recommended_books[4])
92 | st.image(poster_url[4])
93 | with col5:
94 | st.text(recommended_books[5])
95 | st.image(poster_url[5])
96 | except Exception as e:
97 | raise AppException(e, sys) from e
98 |
99 |
100 |
101 | if __name__ == "__main__":
102 | st.header('ML Based Books Recommender System')
103 | st.text("This is a collaborative filtering based recommendation system!")
104 |
105 | obj = Recommendation()
106 |
107 | #Training
108 | if st.button('Train Recommender System'):
109 | obj.train_engine()
110 |
111 | book_names = pickle.load(open(os.path.join('templates','book_names.pkl') ,'rb'))
112 | selected_books = st.selectbox(
113 | "Type or select a book from the dropdown",
114 | book_names)
115 |
116 | #recommendation
117 | if st.button('Show Recommendation'):
118 | obj.recommendations_engine(selected_books)
119 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Project: ML Based Book Recommender System ! | Collaborative Filtering Based
2 |
3 |
4 |
5 | Recommendation systems are becoming increasingly important in today’s extremely busy world. People are always short on time with the myriad tasks they need to accomplish in the limited 24 hours. Therefore, the recommendation systems are important as they help them make the right choices, without having to expend their cognitive resources.
6 |
7 | The purpose of a recommendation system basically is to search for content that would be interesting to an individual. Moreover, it involves a number of factors to create personalised lists of useful and interesting content specific to each user/individual. Recommendation systems are Artificial Intelligence based algorithms that skim through all possible options and create a customized list of items that are interesting and relevant to an individual. These results are based on their profile, search/browsing history, what other people with similar traits/demographics are watching, and how likely are you to watch those movies. This is achieved through predictive modeling and heuristics with the data available.
8 |
9 |
10 | # Note:
11 | If you want to understand this entire project overflow, please refer the jupyter notebook file inside notebook folder.
12 |
13 | # Types of Recommendation System :
14 |
15 | ### 1 ) Content Based :
16 |
17 | - Content-based systems, which use characteristic information and takes item attriubutes into consideration .
18 |
19 | - Twitter , Youtube .
20 |
21 | - Which music you are listening , what singer are you watching . Form embeddings for the features .
22 |
23 | - User specific actions or similar items reccomendation .
24 |
25 | - It will create a vector of it .
26 |
27 | - These systems make recommendations using a user's item and profile features. They hypothesize that if a user was interested in an item in the past, they will once again be interested in it in the future
28 |
29 | - One issue that arises is making obvious recommendations because of excessive specialization (user A is only interested in categories B, C, and D, and the system is not able to recommend items outside those categories, even though they could be interesting to them).
30 |
31 | ### 2 ) Collaborative Based :
32 |
33 | - Collaborative filtering systems, which are based on user-item interactions.
34 |
35 | - Clusters of users with same ratings , similar users .
36 |
37 | - Book recommendation , so use cluster mechanism .
38 |
39 | - We take only one parameter , ratings or comments .
40 |
41 | - In short, collaborative filtering systems are based on the assumption that if a user likes item A and another user likes the same item A as well as another item, item B, the first user could also be interested in the second item .
42 |
43 | - Issues are :
44 |
45 | - User-Item nXn matrix , so computationally expensive .
46 |
47 | - Only famous items will get reccomended .
48 |
49 | - New items might not get reccomended at all .
50 |
51 | ### 3 ) Hybrid Based :
52 |
53 | - Hybrid systems, which combine both types of information with the aim of avoiding problems that are generated when working with just one kind.
54 |
55 | - Combination of both and used now a days .
56 |
57 | - Uses : word2vec , embedding .
58 |
59 | # About this project:
60 |
61 | This is a collaborative filtering based books recommender system & a streamlit web application that can recommend various kinds of similar books based on an user interest.
62 |
63 |
64 |
65 | # Demo:
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | # Dataset has been used:
74 |
75 | * [Dataset link](https://www.kaggle.com/ra4u12/bookrecommendation)
76 |
77 | # Concept used to build the model.pkl file : NearestNeighbors
78 |
79 | 1 . Load the data
80 |
81 | 2 . Initialise the value of k
82 |
83 | 3 . For getting the predicted class, iterate from 1 to total number of training data points
84 |
85 | 4 . Calculate the distance between test data and each row of training data. Here we will use Euclidean distance as our distance metric since it’s the most popular method.
86 |
87 | 5 . Sort the calculated distances in ascending order based on distance values
88 |
89 | 6 . Get top k rows from the sorted array
90 |
91 | # Built With
92 | 1. streamlit
93 | 2. Machine learning
94 | 3. sklearn
95 |
96 | # How to run?
97 | ### STEPS:
98 |
99 | Clone the repository
100 |
101 | ```bash
102 | https://github.com/entbappy/ML-Based-Book-Recommender-System.git
103 | ```
104 | ### STEP 01- Create a conda environment after opening the repository
105 |
106 | ```bash
107 | conda create -n books python=3.7.10 -y
108 | ```
109 |
110 | ```bash
111 | conda activate books
112 | ```
113 |
114 |
115 | ### STEP 02- install the requirements
116 | ```bash
117 | pip install -r requirements.txt
118 | ```
119 |
120 |
121 | Now run,
122 | ```bash
123 | streamlit run app.py
124 | ```
125 |
126 | ```bash
127 | Note: Before clicking on show recommendations first of all click on Train Recommender System for generating models
128 | ```
129 |
130 | # How to run in Docker?
131 |
132 | #### Build a Docker image
133 | The docker build command builds an image from a Dockerfile . Run the following command from the app/ directory on your server to build the image:
134 |
135 |
136 | ```bash
137 | docker build -t streamlit .
138 | ```
139 |
140 | The -t flag is used to tag the image. Here, we have tagged the image streamlit. If you run:
141 |
142 | ```bash
143 | docker images
144 | ```
145 | You should see a streamlit image under the REPOSITORY column. For example:
146 |
147 | ```bash
148 | REPOSITORY TAG IMAGE ID CREATED SIZE
149 | streamlit latest 70b0759a094d About a minute ago 1.02GB
150 | ```
151 |
152 | #### Run the Docker container
153 | Now that you have built the image, you can run the container by executing:
154 |
155 | ```bash
156 | docker run -p 8501:8501 streamlit
157 | ```
158 |
159 | The -p flag publishes the container’s port 8501 to your server’s 8501 port.
160 |
161 | If all went well, you should see an output similar to the following:
162 |
163 | ```bash
164 | $ docker run -p 8501:8501 streamlit
165 |
166 | You can now view your Streamlit app in your browser.
167 |
168 | URL: http://127.0.0.1:8501/
169 | ```
170 |
171 | To view your app, users can browse to http://0.0.0.0:8501 or http://127.0.0.1:8501/
172 |
173 |
174 |
175 |
176 |
--------------------------------------------------------------------------------
/books_recommender/config/configuration.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from books_recommender.logger.log import logging
4 | from books_recommender.utils.util import read_yaml_file
5 | from books_recommender.exception.exception_handler import AppException
6 | from books_recommender.entity.config_entity import DataIngestionConfig, DataValidationConfig, DataTransformationConfig, ModelTrainerConfig, ModelRecommendationConfig
7 | from books_recommender.constant import *
8 |
9 |
10 | class AppConfiguration:
11 | def __init__(self, config_file_path: str = CONFIG_FILE_PATH):
12 | try:
13 | self.configs_info = read_yaml_file(file_path=config_file_path)
14 | except Exception as e:
15 | raise AppException(e, sys) from e
16 |
17 |
18 | def get_data_ingestion_config(self) -> DataIngestionConfig:
19 | try:
20 | data_ingestion_config = self.configs_info['data_ingestion_config']
21 | artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir']
22 | dataset_dir = data_ingestion_config['dataset_dir']
23 |
24 | ingested_data_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['ingested_dir'])
25 | raw_data_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['raw_data_dir'])
26 |
27 | response = DataIngestionConfig(
28 | dataset_download_url = data_ingestion_config['dataset_download_url'],
29 | raw_data_dir = raw_data_dir,
30 | ingested_dir = ingested_data_dir
31 | )
32 |
33 | logging.info(f"Data Ingestion Config: {response}")
34 | return response
35 |
36 | except Exception as e:
37 | raise AppException(e, sys) from e
38 |
39 |
40 |
41 | def get_data_validation_config(self) -> DataValidationConfig:
42 | try:
43 | data_validation_config = self.configs_info['data_validation_config']
44 | data_ingestion_config = self.configs_info['data_ingestion_config']
45 | dataset_dir = data_ingestion_config['dataset_dir']
46 | artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir']
47 | books_csv_file = data_validation_config['books_csv_file']
48 | ratings_csv_file = data_validation_config['ratings_csv_file']
49 |
50 | books_csv_file_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['ingested_dir'], books_csv_file)
51 | ratings_csv_file_dir = os.path.join(artifacts_dir, dataset_dir, data_ingestion_config['ingested_dir'], ratings_csv_file)
52 | clean_data_path = os.path.join(artifacts_dir, dataset_dir, data_validation_config['clean_data_dir'])
53 | serialized_objects_dir = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir'])
54 |
55 | response = DataValidationConfig(
56 | clean_data_dir = clean_data_path,
57 | books_csv_file = books_csv_file_dir,
58 | ratings_csv_file = ratings_csv_file_dir,
59 | serialized_objects_dir = serialized_objects_dir
60 | )
61 |
62 | logging.info(f"Data Validation Config: {response}")
63 | return response
64 |
65 | except Exception as e:
66 | raise AppException(e, sys) from e
67 |
68 |
69 |
70 | def get_data_transformation_config(self) -> DataTransformationConfig:
71 | try:
72 | data_transformation_config = self.configs_info['data_transformation_config']
73 | data_validation_config = self.configs_info['data_validation_config']
74 | data_ingestion_config = self.configs_info['data_ingestion_config']
75 | dataset_dir = data_ingestion_config['dataset_dir']
76 | artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir']
77 |
78 | clean_data_file_path = os.path.join(artifacts_dir, dataset_dir, data_validation_config['clean_data_dir'],'clean_data.csv')
79 | transformed_data_dir = os.path.join(artifacts_dir, dataset_dir, data_transformation_config['transformed_data_dir'])
80 |
81 | response = DataTransformationConfig(
82 | clean_data_file_path = clean_data_file_path,
83 | transformed_data_dir = transformed_data_dir
84 | )
85 |
86 | logging.info(f"Data Transformation Config: {response}")
87 | return response
88 |
89 | except Exception as e:
90 | raise AppException(e, sys) from e
91 |
92 |
93 |
94 | def get_model_trainer_config(self) -> ModelTrainerConfig:
95 | try:
96 | model_trainer_config = self.configs_info['model_trainer_config']
97 | data_transformation_config = self.configs_info['data_transformation_config']
98 | data_ingestion_config = self.configs_info['data_ingestion_config']
99 | dataset_dir = data_ingestion_config['dataset_dir']
100 | artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir']
101 |
102 |
103 |
104 | transformed_data_file_dir = os.path.join(artifacts_dir, dataset_dir, data_transformation_config['transformed_data_dir'], 'transformed_data.pkl')
105 | trained_model_dir = os.path.join(artifacts_dir, model_trainer_config['trained_model_dir'])
106 | trained_model_name = model_trainer_config['trained_model_name']
107 |
108 | response = ModelTrainerConfig(
109 | transformed_data_file_dir = transformed_data_file_dir,
110 | trained_model_dir = trained_model_dir,
111 | trained_model_name = trained_model_name
112 | )
113 |
114 | logging.info(f"Model Trainer Config: {response}")
115 | return response
116 |
117 | except Exception as e:
118 | raise AppException(e, sys) from e
119 |
120 |
121 |
122 | def get_recommendation_config(self) -> ModelRecommendationConfig:
123 | try:
124 | recommendation_config = self.configs_info['recommendation_config']
125 | model_trainer_config = self.configs_info['model_trainer_config']
126 | data_validation_config = self.configs_info['data_validation_config']
127 | trained_model_name = model_trainer_config['trained_model_name']
128 | artifacts_dir = self.configs_info['artifacts_config']['artifacts_dir']
129 | trained_model_dir = os.path.join(artifacts_dir, model_trainer_config['trained_model_dir'])
130 | poster_api = recommendation_config['poster_api_url']
131 |
132 |
133 | book_name_serialized_objects = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir'], 'book_names.pkl')
134 | book_pivot_serialized_objects = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir'], 'book_pivot.pkl')
135 | final_rating_serialized_objects = os.path.join(artifacts_dir, data_validation_config['serialized_objects_dir'], 'final_rating.pkl')
136 |
137 | trained_model_path = os.path.join(trained_model_dir,trained_model_name)
138 |
139 | response = ModelRecommendationConfig(
140 | book_name_serialized_objects = book_name_serialized_objects,
141 | book_pivot_serialized_objects = book_pivot_serialized_objects,
142 | final_rating_serialized_objects = final_rating_serialized_objects,
143 | trained_model_path = trained_model_path
144 | )
145 |
146 | logging.info(f"Model Recommendation Config: {response}")
147 | return response
148 |
149 | except Exception as e:
150 | raise AppException(e, sys) from e
--------------------------------------------------------------------------------
/notebook/Books Recommender data analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Books Recommender system using clustering\n",
8 | "Collaborative filtering\n",
9 | "- Dataset :- https://www.kaggle.com/ra4u12/bookrecommendation"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "# Importing necessary library\n",
19 | "import pandas as pd\n",
20 | "import numpy as np\n",
21 | "# import matplotlib.pyplot as plt\n",
22 | "# import seaborn as sns"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 2,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "name": "stderr",
32 | "output_type": "stream",
33 | "text": [
34 | "b'Skipping line 6452: expected 8 fields, saw 9\\nSkipping line 43667: expected 8 fields, saw 10\\nSkipping line 51751: expected 8 fields, saw 9\\n'\n",
35 | "b'Skipping line 92038: expected 8 fields, saw 9\\nSkipping line 104319: expected 8 fields, saw 9\\nSkipping line 121768: expected 8 fields, saw 9\\n'\n",
36 | "b'Skipping line 144058: expected 8 fields, saw 9\\nSkipping line 150789: expected 8 fields, saw 9\\nSkipping line 157128: expected 8 fields, saw 9\\nSkipping line 180189: expected 8 fields, saw 9\\nSkipping line 185738: expected 8 fields, saw 9\\n'\n",
37 | "b'Skipping line 209388: expected 8 fields, saw 9\\nSkipping line 220626: expected 8 fields, saw 9\\nSkipping line 227933: expected 8 fields, saw 11\\nSkipping line 228957: expected 8 fields, saw 10\\nSkipping line 245933: expected 8 fields, saw 9\\nSkipping line 251296: expected 8 fields, saw 9\\nSkipping line 259941: expected 8 fields, saw 9\\nSkipping line 261529: expected 8 fields, saw 9\\n'\n",
38 | "C:\\Anaconda\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3146: DtypeWarning: Columns (3) have mixed types.Specify dtype option on import or set low_memory=False.\n",
39 | " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n"
40 | ]
41 | }
42 | ],
43 | "source": [
44 | "books = pd.read_csv('data/BX-Books.csv', sep=\";\", error_bad_lines=False, encoding='latin-1')"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 3,
50 | "metadata": {},
51 | "outputs": [
52 | {
53 | "data": {
54 | "text/html": [
55 | "
\n",
56 | "\n",
69 | "
\n",
70 | " \n",
71 | " \n",
72 | " | \n",
73 | " ISBN | \n",
74 | " Book-Title | \n",
75 | " Book-Author | \n",
76 | " Year-Of-Publication | \n",
77 | " Publisher | \n",
78 | " Image-URL-S | \n",
79 | " Image-URL-M | \n",
80 | " Image-URL-L | \n",
81 | "
\n",
82 | " \n",
83 | " \n",
84 | " \n",
85 | " | 0 | \n",
86 | " 0195153448 | \n",
87 | " Classical Mythology | \n",
88 | " Mark P. O. Morford | \n",
89 | " 2002 | \n",
90 | " Oxford University Press | \n",
91 | " http://images.amazon.com/images/P/0195153448.0... | \n",
92 | " http://images.amazon.com/images/P/0195153448.0... | \n",
93 | " http://images.amazon.com/images/P/0195153448.0... | \n",
94 | "
\n",
95 | " \n",
96 | " | 1 | \n",
97 | " 0002005018 | \n",
98 | " Clara Callan | \n",
99 | " Richard Bruce Wright | \n",
100 | " 2001 | \n",
101 | " HarperFlamingo Canada | \n",
102 | " http://images.amazon.com/images/P/0002005018.0... | \n",
103 | " http://images.amazon.com/images/P/0002005018.0... | \n",
104 | " http://images.amazon.com/images/P/0002005018.0... | \n",
105 | "
\n",
106 | " \n",
107 | " | 2 | \n",
108 | " 0060973129 | \n",
109 | " Decision in Normandy | \n",
110 | " Carlo D'Este | \n",
111 | " 1991 | \n",
112 | " HarperPerennial | \n",
113 | " http://images.amazon.com/images/P/0060973129.0... | \n",
114 | " http://images.amazon.com/images/P/0060973129.0... | \n",
115 | " http://images.amazon.com/images/P/0060973129.0... | \n",
116 | "
\n",
117 | " \n",
118 | " | 3 | \n",
119 | " 0374157065 | \n",
120 | " Flu: The Story of the Great Influenza Pandemic... | \n",
121 | " Gina Bari Kolata | \n",
122 | " 1999 | \n",
123 | " Farrar Straus Giroux | \n",
124 | " http://images.amazon.com/images/P/0374157065.0... | \n",
125 | " http://images.amazon.com/images/P/0374157065.0... | \n",
126 | " http://images.amazon.com/images/P/0374157065.0... | \n",
127 | "
\n",
128 | " \n",
129 | " | 4 | \n",
130 | " 0393045218 | \n",
131 | " The Mummies of Urumchi | \n",
132 | " E. J. W. Barber | \n",
133 | " 1999 | \n",
134 | " W. W. Norton & Company | \n",
135 | " http://images.amazon.com/images/P/0393045218.0... | \n",
136 | " http://images.amazon.com/images/P/0393045218.0... | \n",
137 | " http://images.amazon.com/images/P/0393045218.0... | \n",
138 | "
\n",
139 | " \n",
140 | "
\n",
141 | "
"
142 | ],
143 | "text/plain": [
144 | " ISBN Book-Title \\\n",
145 | "0 0195153448 Classical Mythology \n",
146 | "1 0002005018 Clara Callan \n",
147 | "2 0060973129 Decision in Normandy \n",
148 | "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n",
149 | "4 0393045218 The Mummies of Urumchi \n",
150 | "\n",
151 | " Book-Author Year-Of-Publication Publisher \\\n",
152 | "0 Mark P. O. Morford 2002 Oxford University Press \n",
153 | "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n",
154 | "2 Carlo D'Este 1991 HarperPerennial \n",
155 | "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n",
156 | "4 E. J. W. Barber 1999 W. W. Norton & Company \n",
157 | "\n",
158 | " Image-URL-S \\\n",
159 | "0 http://images.amazon.com/images/P/0195153448.0... \n",
160 | "1 http://images.amazon.com/images/P/0002005018.0... \n",
161 | "2 http://images.amazon.com/images/P/0060973129.0... \n",
162 | "3 http://images.amazon.com/images/P/0374157065.0... \n",
163 | "4 http://images.amazon.com/images/P/0393045218.0... \n",
164 | "\n",
165 | " Image-URL-M \\\n",
166 | "0 http://images.amazon.com/images/P/0195153448.0... \n",
167 | "1 http://images.amazon.com/images/P/0002005018.0... \n",
168 | "2 http://images.amazon.com/images/P/0060973129.0... \n",
169 | "3 http://images.amazon.com/images/P/0374157065.0... \n",
170 | "4 http://images.amazon.com/images/P/0393045218.0... \n",
171 | "\n",
172 | " Image-URL-L \n",
173 | "0 http://images.amazon.com/images/P/0195153448.0... \n",
174 | "1 http://images.amazon.com/images/P/0002005018.0... \n",
175 | "2 http://images.amazon.com/images/P/0060973129.0... \n",
176 | "3 http://images.amazon.com/images/P/0374157065.0... \n",
177 | "4 http://images.amazon.com/images/P/0393045218.0... "
178 | ]
179 | },
180 | "execution_count": 3,
181 | "metadata": {},
182 | "output_type": "execute_result"
183 | }
184 | ],
185 | "source": [
186 | "books.head()"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 4,
192 | "metadata": {},
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/plain": [
197 | "'http://images.amazon.com/images/P/0671027387.01.LZZZZZZZ.jpg'"
198 | ]
199 | },
200 | "execution_count": 4,
201 | "metadata": {},
202 | "output_type": "execute_result"
203 | }
204 | ],
205 | "source": [
206 | "books.iloc[237]['Image-URL-L']"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": 5,
212 | "metadata": {},
213 | "outputs": [],
214 | "source": [
215 | "# !curl \"http://images.amazon.com/images/P/0195153448.01.THUMBZZZ.jpg\" --out.png\n",
216 | "# !curl http://images.amazon.com/images/P/0060973129.01.THUMBZZZ.jpg --output some.jpg"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 6,
222 | "metadata": {},
223 | "outputs": [
224 | {
225 | "data": {
226 | "text/plain": [
227 | "(271360, 8)"
228 | ]
229 | },
230 | "execution_count": 6,
231 | "metadata": {},
232 | "output_type": "execute_result"
233 | }
234 | ],
235 | "source": [
236 | "books.shape"
237 | ]
238 | },
239 | {
240 | "cell_type": "code",
241 | "execution_count": 7,
242 | "metadata": {},
243 | "outputs": [
244 | {
245 | "data": {
246 | "text/plain": [
247 | "Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',\n",
248 | " 'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],\n",
249 | " dtype='object')"
250 | ]
251 | },
252 | "execution_count": 7,
253 | "metadata": {},
254 | "output_type": "execute_result"
255 | }
256 | ],
257 | "source": [
258 | "books.columns"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "metadata": {},
264 | "source": [
265 | "#### Conclution:\n",
266 | "Here Image URL columns is important for the poster. So, we will keep it"
267 | ]
268 | },
269 | {
270 | "cell_type": "code",
271 | "execution_count": 8,
272 | "metadata": {},
273 | "outputs": [],
274 | "source": [
275 | "books = books[['ISBN','Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher','Image-URL-L']]"
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": 9,
281 | "metadata": {},
282 | "outputs": [
283 | {
284 | "data": {
285 | "text/html": [
286 | "\n",
287 | "\n",
300 | "
\n",
301 | " \n",
302 | " \n",
303 | " | \n",
304 | " ISBN | \n",
305 | " Book-Title | \n",
306 | " Book-Author | \n",
307 | " Year-Of-Publication | \n",
308 | " Publisher | \n",
309 | " Image-URL-L | \n",
310 | "
\n",
311 | " \n",
312 | " \n",
313 | " \n",
314 | " | 0 | \n",
315 | " 0195153448 | \n",
316 | " Classical Mythology | \n",
317 | " Mark P. O. Morford | \n",
318 | " 2002 | \n",
319 | " Oxford University Press | \n",
320 | " http://images.amazon.com/images/P/0195153448.0... | \n",
321 | "
\n",
322 | " \n",
323 | " | 1 | \n",
324 | " 0002005018 | \n",
325 | " Clara Callan | \n",
326 | " Richard Bruce Wright | \n",
327 | " 2001 | \n",
328 | " HarperFlamingo Canada | \n",
329 | " http://images.amazon.com/images/P/0002005018.0... | \n",
330 | "
\n",
331 | " \n",
332 | " | 2 | \n",
333 | " 0060973129 | \n",
334 | " Decision in Normandy | \n",
335 | " Carlo D'Este | \n",
336 | " 1991 | \n",
337 | " HarperPerennial | \n",
338 | " http://images.amazon.com/images/P/0060973129.0... | \n",
339 | "
\n",
340 | " \n",
341 | " | 3 | \n",
342 | " 0374157065 | \n",
343 | " Flu: The Story of the Great Influenza Pandemic... | \n",
344 | " Gina Bari Kolata | \n",
345 | " 1999 | \n",
346 | " Farrar Straus Giroux | \n",
347 | " http://images.amazon.com/images/P/0374157065.0... | \n",
348 | "
\n",
349 | " \n",
350 | " | 4 | \n",
351 | " 0393045218 | \n",
352 | " The Mummies of Urumchi | \n",
353 | " E. J. W. Barber | \n",
354 | " 1999 | \n",
355 | " W. W. Norton & Company | \n",
356 | " http://images.amazon.com/images/P/0393045218.0... | \n",
357 | "
\n",
358 | " \n",
359 | "
\n",
360 | "
"
361 | ],
362 | "text/plain": [
363 | " ISBN Book-Title \\\n",
364 | "0 0195153448 Classical Mythology \n",
365 | "1 0002005018 Clara Callan \n",
366 | "2 0060973129 Decision in Normandy \n",
367 | "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n",
368 | "4 0393045218 The Mummies of Urumchi \n",
369 | "\n",
370 | " Book-Author Year-Of-Publication Publisher \\\n",
371 | "0 Mark P. O. Morford 2002 Oxford University Press \n",
372 | "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n",
373 | "2 Carlo D'Este 1991 HarperPerennial \n",
374 | "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n",
375 | "4 E. J. W. Barber 1999 W. W. Norton & Company \n",
376 | "\n",
377 | " Image-URL-L \n",
378 | "0 http://images.amazon.com/images/P/0195153448.0... \n",
379 | "1 http://images.amazon.com/images/P/0002005018.0... \n",
380 | "2 http://images.amazon.com/images/P/0060973129.0... \n",
381 | "3 http://images.amazon.com/images/P/0374157065.0... \n",
382 | "4 http://images.amazon.com/images/P/0393045218.0... "
383 | ]
384 | },
385 | "execution_count": 9,
386 | "metadata": {},
387 | "output_type": "execute_result"
388 | }
389 | ],
390 | "source": [
391 | "books.head()"
392 | ]
393 | },
394 | {
395 | "cell_type": "code",
396 | "execution_count": 10,
397 | "metadata": {},
398 | "outputs": [],
399 | "source": [
400 | "# Lets remane some wierd columns name\n",
401 | "books.rename(columns={\"Book-Title\":'title',\n",
402 | " 'Book-Author':'author',\n",
403 | " \"Year-Of-Publication\":'year',\n",
404 | " \"Publisher\":\"publisher\",\n",
405 | " \"Image-URL-L\":\"image_url\"},inplace=True)"
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": 11,
411 | "metadata": {},
412 | "outputs": [
413 | {
414 | "data": {
415 | "text/html": [
416 | "\n",
417 | "\n",
430 | "
\n",
431 | " \n",
432 | " \n",
433 | " | \n",
434 | " ISBN | \n",
435 | " title | \n",
436 | " author | \n",
437 | " year | \n",
438 | " publisher | \n",
439 | " image_url | \n",
440 | "
\n",
441 | " \n",
442 | " \n",
443 | " \n",
444 | " | 0 | \n",
445 | " 0195153448 | \n",
446 | " Classical Mythology | \n",
447 | " Mark P. O. Morford | \n",
448 | " 2002 | \n",
449 | " Oxford University Press | \n",
450 | " http://images.amazon.com/images/P/0195153448.0... | \n",
451 | "
\n",
452 | " \n",
453 | " | 1 | \n",
454 | " 0002005018 | \n",
455 | " Clara Callan | \n",
456 | " Richard Bruce Wright | \n",
457 | " 2001 | \n",
458 | " HarperFlamingo Canada | \n",
459 | " http://images.amazon.com/images/P/0002005018.0... | \n",
460 | "
\n",
461 | " \n",
462 | " | 2 | \n",
463 | " 0060973129 | \n",
464 | " Decision in Normandy | \n",
465 | " Carlo D'Este | \n",
466 | " 1991 | \n",
467 | " HarperPerennial | \n",
468 | " http://images.amazon.com/images/P/0060973129.0... | \n",
469 | "
\n",
470 | " \n",
471 | " | 3 | \n",
472 | " 0374157065 | \n",
473 | " Flu: The Story of the Great Influenza Pandemic... | \n",
474 | " Gina Bari Kolata | \n",
475 | " 1999 | \n",
476 | " Farrar Straus Giroux | \n",
477 | " http://images.amazon.com/images/P/0374157065.0... | \n",
478 | "
\n",
479 | " \n",
480 | " | 4 | \n",
481 | " 0393045218 | \n",
482 | " The Mummies of Urumchi | \n",
483 | " E. J. W. Barber | \n",
484 | " 1999 | \n",
485 | " W. W. Norton & Company | \n",
486 | " http://images.amazon.com/images/P/0393045218.0... | \n",
487 | "
\n",
488 | " \n",
489 | "
\n",
490 | "
"
491 | ],
492 | "text/plain": [
493 | " ISBN title \\\n",
494 | "0 0195153448 Classical Mythology \n",
495 | "1 0002005018 Clara Callan \n",
496 | "2 0060973129 Decision in Normandy \n",
497 | "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n",
498 | "4 0393045218 The Mummies of Urumchi \n",
499 | "\n",
500 | " author year publisher \\\n",
501 | "0 Mark P. O. Morford 2002 Oxford University Press \n",
502 | "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n",
503 | "2 Carlo D'Este 1991 HarperPerennial \n",
504 | "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n",
505 | "4 E. J. W. Barber 1999 W. W. Norton & Company \n",
506 | "\n",
507 | " image_url \n",
508 | "0 http://images.amazon.com/images/P/0195153448.0... \n",
509 | "1 http://images.amazon.com/images/P/0002005018.0... \n",
510 | "2 http://images.amazon.com/images/P/0060973129.0... \n",
511 | "3 http://images.amazon.com/images/P/0374157065.0... \n",
512 | "4 http://images.amazon.com/images/P/0393045218.0... "
513 | ]
514 | },
515 | "execution_count": 11,
516 | "metadata": {},
517 | "output_type": "execute_result"
518 | }
519 | ],
520 | "source": [
521 | "books.head()"
522 | ]
523 | },
524 | {
525 | "cell_type": "code",
526 | "execution_count": 12,
527 | "metadata": {},
528 | "outputs": [],
529 | "source": [
530 | "# Now load the second dataframe\n",
531 | "\n",
532 | "users = pd.read_csv('data/BX-Users.csv', sep=\";\", error_bad_lines=False, encoding='latin-1')"
533 | ]
534 | },
535 | {
536 | "cell_type": "code",
537 | "execution_count": 13,
538 | "metadata": {},
539 | "outputs": [
540 | {
541 | "data": {
542 | "text/html": [
543 | "\n",
544 | "\n",
557 | "
\n",
558 | " \n",
559 | " \n",
560 | " | \n",
561 | " User-ID | \n",
562 | " Location | \n",
563 | " Age | \n",
564 | "
\n",
565 | " \n",
566 | " \n",
567 | " \n",
568 | " | 0 | \n",
569 | " 1 | \n",
570 | " nyc, new york, usa | \n",
571 | " NaN | \n",
572 | "
\n",
573 | " \n",
574 | " | 1 | \n",
575 | " 2 | \n",
576 | " stockton, california, usa | \n",
577 | " 18.0 | \n",
578 | "
\n",
579 | " \n",
580 | " | 2 | \n",
581 | " 3 | \n",
582 | " moscow, yukon territory, russia | \n",
583 | " NaN | \n",
584 | "
\n",
585 | " \n",
586 | " | 3 | \n",
587 | " 4 | \n",
588 | " porto, v.n.gaia, portugal | \n",
589 | " 17.0 | \n",
590 | "
\n",
591 | " \n",
592 | " | 4 | \n",
593 | " 5 | \n",
594 | " farnborough, hants, united kingdom | \n",
595 | " NaN | \n",
596 | "
\n",
597 | " \n",
598 | "
\n",
599 | "
"
600 | ],
601 | "text/plain": [
602 | " User-ID Location Age\n",
603 | "0 1 nyc, new york, usa NaN\n",
604 | "1 2 stockton, california, usa 18.0\n",
605 | "2 3 moscow, yukon territory, russia NaN\n",
606 | "3 4 porto, v.n.gaia, portugal 17.0\n",
607 | "4 5 farnborough, hants, united kingdom NaN"
608 | ]
609 | },
610 | "execution_count": 13,
611 | "metadata": {},
612 | "output_type": "execute_result"
613 | }
614 | ],
615 | "source": [
616 | "users.head()"
617 | ]
618 | },
619 | {
620 | "cell_type": "code",
621 | "execution_count": 14,
622 | "metadata": {},
623 | "outputs": [
624 | {
625 | "data": {
626 | "text/plain": [
627 | "(278858, 3)"
628 | ]
629 | },
630 | "execution_count": 14,
631 | "metadata": {},
632 | "output_type": "execute_result"
633 | }
634 | ],
635 | "source": [
636 | "users.shape"
637 | ]
638 | },
639 | {
640 | "cell_type": "code",
641 | "execution_count": 15,
642 | "metadata": {},
643 | "outputs": [],
644 | "source": [
645 | "# Lets remane some wierd columns name\n",
646 | "users.rename(columns={\"User-ID\":'user_id',\n",
647 | " 'Location':'location',\n",
648 | " \"Age\":'age'},inplace=True)"
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": 16,
654 | "metadata": {},
655 | "outputs": [
656 | {
657 | "data": {
658 | "text/html": [
659 | "\n",
660 | "\n",
673 | "
\n",
674 | " \n",
675 | " \n",
676 | " | \n",
677 | " user_id | \n",
678 | " location | \n",
679 | " age | \n",
680 | "
\n",
681 | " \n",
682 | " \n",
683 | " \n",
684 | " | 0 | \n",
685 | " 1 | \n",
686 | " nyc, new york, usa | \n",
687 | " NaN | \n",
688 | "
\n",
689 | " \n",
690 | " | 1 | \n",
691 | " 2 | \n",
692 | " stockton, california, usa | \n",
693 | " 18.0 | \n",
694 | "
\n",
695 | " \n",
696 | "
\n",
697 | "
"
698 | ],
699 | "text/plain": [
700 | " user_id location age\n",
701 | "0 1 nyc, new york, usa NaN\n",
702 | "1 2 stockton, california, usa 18.0"
703 | ]
704 | },
705 | "execution_count": 16,
706 | "metadata": {},
707 | "output_type": "execute_result"
708 | }
709 | ],
710 | "source": [
711 | "users.head(2)"
712 | ]
713 | },
714 | {
715 | "cell_type": "code",
716 | "execution_count": 17,
717 | "metadata": {},
718 | "outputs": [],
719 | "source": [
720 | "# Now load the third dataframe\n",
721 | "\n",
722 | "ratings = pd.read_csv('data/BX-Book-Ratings.csv', sep=\";\", error_bad_lines=False, encoding='latin-1')"
723 | ]
724 | },
725 | {
726 | "cell_type": "code",
727 | "execution_count": 18,
728 | "metadata": {},
729 | "outputs": [
730 | {
731 | "data": {
732 | "text/html": [
733 | "\n",
734 | "\n",
747 | "
\n",
748 | " \n",
749 | " \n",
750 | " | \n",
751 | " User-ID | \n",
752 | " ISBN | \n",
753 | " Book-Rating | \n",
754 | "
\n",
755 | " \n",
756 | " \n",
757 | " \n",
758 | " | 0 | \n",
759 | " 276725 | \n",
760 | " 034545104X | \n",
761 | " 0 | \n",
762 | "
\n",
763 | " \n",
764 | " | 1 | \n",
765 | " 276726 | \n",
766 | " 0155061224 | \n",
767 | " 5 | \n",
768 | "
\n",
769 | " \n",
770 | " | 2 | \n",
771 | " 276727 | \n",
772 | " 0446520802 | \n",
773 | " 0 | \n",
774 | "
\n",
775 | " \n",
776 | " | 3 | \n",
777 | " 276729 | \n",
778 | " 052165615X | \n",
779 | " 3 | \n",
780 | "
\n",
781 | " \n",
782 | " | 4 | \n",
783 | " 276729 | \n",
784 | " 0521795028 | \n",
785 | " 6 | \n",
786 | "
\n",
787 | " \n",
788 | "
\n",
789 | "
"
790 | ],
791 | "text/plain": [
792 | " User-ID ISBN Book-Rating\n",
793 | "0 276725 034545104X 0\n",
794 | "1 276726 0155061224 5\n",
795 | "2 276727 0446520802 0\n",
796 | "3 276729 052165615X 3\n",
797 | "4 276729 0521795028 6"
798 | ]
799 | },
800 | "execution_count": 18,
801 | "metadata": {},
802 | "output_type": "execute_result"
803 | }
804 | ],
805 | "source": [
806 | "ratings.head()"
807 | ]
808 | },
809 | {
810 | "cell_type": "code",
811 | "execution_count": 19,
812 | "metadata": {},
813 | "outputs": [
814 | {
815 | "data": {
816 | "text/plain": [
817 | "(1149780, 3)"
818 | ]
819 | },
820 | "execution_count": 19,
821 | "metadata": {},
822 | "output_type": "execute_result"
823 | }
824 | ],
825 | "source": [
826 | "ratings.shape"
827 | ]
828 | },
829 | {
830 | "cell_type": "code",
831 | "execution_count": 20,
832 | "metadata": {},
833 | "outputs": [],
834 | "source": [
835 | "# Lets remane some wierd columns name\n",
836 | "ratings.rename(columns={\"User-ID\":'user_id',\n",
837 | " 'Book-Rating':'rating'},inplace=True)"
838 | ]
839 | },
840 | {
841 | "cell_type": "code",
842 | "execution_count": 21,
843 | "metadata": {},
844 | "outputs": [
845 | {
846 | "data": {
847 | "text/html": [
848 | "\n",
849 | "\n",
862 | "
\n",
863 | " \n",
864 | " \n",
865 | " | \n",
866 | " user_id | \n",
867 | " ISBN | \n",
868 | " rating | \n",
869 | "
\n",
870 | " \n",
871 | " \n",
872 | " \n",
873 | " | 0 | \n",
874 | " 276725 | \n",
875 | " 034545104X | \n",
876 | " 0 | \n",
877 | "
\n",
878 | " \n",
879 | " | 1 | \n",
880 | " 276726 | \n",
881 | " 0155061224 | \n",
882 | " 5 | \n",
883 | "
\n",
884 | " \n",
885 | "
\n",
886 | "
"
887 | ],
888 | "text/plain": [
889 | " user_id ISBN rating\n",
890 | "0 276725 034545104X 0\n",
891 | "1 276726 0155061224 5"
892 | ]
893 | },
894 | "execution_count": 21,
895 | "metadata": {},
896 | "output_type": "execute_result"
897 | }
898 | ],
899 | "source": [
900 | "ratings.head(2)"
901 | ]
902 | },
903 | {
904 | "cell_type": "markdown",
905 | "metadata": {},
906 | "source": [
907 | "### Conclution:\n",
908 | "Now we have 3 dataframes\n",
909 | "- books\n",
910 | "- users\n",
911 | "- ratings"
912 | ]
913 | },
914 | {
915 | "cell_type": "code",
916 | "execution_count": 22,
917 | "metadata": {},
918 | "outputs": [
919 | {
920 | "name": "stdout",
921 | "output_type": "stream",
922 | "text": [
923 | "(271360, 6)\n",
924 | "(278858, 3)\n",
925 | "(1149780, 3)\n"
926 | ]
927 | }
928 | ],
929 | "source": [
930 | "print(books.shape, users.shape, ratings.shape, sep='\\n')\n",
931 | "\n"
932 | ]
933 | },
934 | {
935 | "cell_type": "code",
936 | "execution_count": 23,
937 | "metadata": {},
938 | "outputs": [
939 | {
940 | "data": {
941 | "text/plain": [
942 | "11676 13602\n",
943 | "198711 7550\n",
944 | "153662 6109\n",
945 | "98391 5891\n",
946 | "35859 5850\n",
947 | " ... \n",
948 | "158698 1\n",
949 | "17920 1\n",
950 | "277135 1\n",
951 | "275086 1\n",
952 | "187812 1\n",
953 | "Name: user_id, Length: 105283, dtype: int64"
954 | ]
955 | },
956 | "execution_count": 23,
957 | "metadata": {},
958 | "output_type": "execute_result"
959 | }
960 | ],
961 | "source": [
962 | "ratings['user_id'].value_counts()"
963 | ]
964 | },
965 | {
966 | "cell_type": "code",
967 | "execution_count": 24,
968 | "metadata": {},
969 | "outputs": [
970 | {
971 | "data": {
972 | "text/plain": [
973 | "(105283,)"
974 | ]
975 | },
976 | "execution_count": 24,
977 | "metadata": {},
978 | "output_type": "execute_result"
979 | }
980 | ],
981 | "source": [
982 | "ratings['user_id'].value_counts().shape"
983 | ]
984 | },
985 | {
986 | "cell_type": "code",
987 | "execution_count": 25,
988 | "metadata": {},
989 | "outputs": [
990 | {
991 | "data": {
992 | "text/plain": [
993 | "(105283,)"
994 | ]
995 | },
996 | "execution_count": 25,
997 | "metadata": {},
998 | "output_type": "execute_result"
999 | }
1000 | ],
1001 | "source": [
1002 | "ratings['user_id'].unique().shape"
1003 | ]
1004 | },
1005 | {
1006 | "cell_type": "code",
1007 | "execution_count": 26,
1008 | "metadata": {},
1009 | "outputs": [],
1010 | "source": [
1011 | "# Lets store users who had at least rated more than 200 books\n",
1012 | "x = ratings['user_id'].value_counts() > 200"
1013 | ]
1014 | },
1015 | {
1016 | "cell_type": "code",
1017 | "execution_count": 27,
1018 | "metadata": {},
1019 | "outputs": [
1020 | {
1021 | "data": {
1022 | "text/plain": [
1023 | "(899,)"
1024 | ]
1025 | },
1026 | "execution_count": 27,
1027 | "metadata": {},
1028 | "output_type": "execute_result"
1029 | }
1030 | ],
1031 | "source": [
1032 | "x[x].shape"
1033 | ]
1034 | },
1035 | {
1036 | "cell_type": "code",
1037 | "execution_count": 28,
1038 | "metadata": {},
1039 | "outputs": [],
1040 | "source": [
1041 | "y= x[x].index"
1042 | ]
1043 | },
1044 | {
1045 | "cell_type": "code",
1046 | "execution_count": 29,
1047 | "metadata": {},
1048 | "outputs": [
1049 | {
1050 | "data": {
1051 | "text/plain": [
1052 | "Int64Index([ 11676, 198711, 153662, 98391, 35859, 212898, 278418, 76352,\n",
1053 | " 110973, 235105,\n",
1054 | " ...\n",
1055 | " 260183, 155916, 44296, 73681, 59727, 28634, 188951, 9856,\n",
1056 | " 268622, 274808],\n",
1057 | " dtype='int64', length=899)"
1058 | ]
1059 | },
1060 | "execution_count": 29,
1061 | "metadata": {},
1062 | "output_type": "execute_result"
1063 | }
1064 | ],
1065 | "source": [
1066 | "y"
1067 | ]
1068 | },
1069 | {
1070 | "cell_type": "code",
1071 | "execution_count": 30,
1072 | "metadata": {},
1073 | "outputs": [],
1074 | "source": [
1075 | "ratings = ratings[ratings['user_id'].isin(y)]"
1076 | ]
1077 | },
1078 | {
1079 | "cell_type": "code",
1080 | "execution_count": 31,
1081 | "metadata": {},
1082 | "outputs": [
1083 | {
1084 | "data": {
1085 | "text/html": [
1086 | "\n",
1087 | "\n",
1100 | "
\n",
1101 | " \n",
1102 | " \n",
1103 | " | \n",
1104 | " user_id | \n",
1105 | " ISBN | \n",
1106 | " rating | \n",
1107 | "
\n",
1108 | " \n",
1109 | " \n",
1110 | " \n",
1111 | " | 1456 | \n",
1112 | " 277427 | \n",
1113 | " 002542730X | \n",
1114 | " 10 | \n",
1115 | "
\n",
1116 | " \n",
1117 | " | 1457 | \n",
1118 | " 277427 | \n",
1119 | " 0026217457 | \n",
1120 | " 0 | \n",
1121 | "
\n",
1122 | " \n",
1123 | " | 1458 | \n",
1124 | " 277427 | \n",
1125 | " 003008685X | \n",
1126 | " 8 | \n",
1127 | "
\n",
1128 | " \n",
1129 | " | 1459 | \n",
1130 | " 277427 | \n",
1131 | " 0030615321 | \n",
1132 | " 0 | \n",
1133 | "
\n",
1134 | " \n",
1135 | " | 1460 | \n",
1136 | " 277427 | \n",
1137 | " 0060002050 | \n",
1138 | " 0 | \n",
1139 | "
\n",
1140 | " \n",
1141 | "
\n",
1142 | "
"
1143 | ],
1144 | "text/plain": [
1145 | " user_id ISBN rating\n",
1146 | "1456 277427 002542730X 10\n",
1147 | "1457 277427 0026217457 0\n",
1148 | "1458 277427 003008685X 8\n",
1149 | "1459 277427 0030615321 0\n",
1150 | "1460 277427 0060002050 0"
1151 | ]
1152 | },
1153 | "execution_count": 31,
1154 | "metadata": {},
1155 | "output_type": "execute_result"
1156 | }
1157 | ],
1158 | "source": [
1159 | "ratings.head()"
1160 | ]
1161 | },
1162 | {
1163 | "cell_type": "code",
1164 | "execution_count": 32,
1165 | "metadata": {},
1166 | "outputs": [
1167 | {
1168 | "data": {
1169 | "text/plain": [
1170 | "(526356, 3)"
1171 | ]
1172 | },
1173 | "execution_count": 32,
1174 | "metadata": {},
1175 | "output_type": "execute_result"
1176 | }
1177 | ],
1178 | "source": [
1179 | "ratings.shape"
1180 | ]
1181 | },
1182 | {
1183 | "cell_type": "code",
1184 | "execution_count": 33,
1185 | "metadata": {},
1186 | "outputs": [],
1187 | "source": [
1188 | "# Now join ratings with books\n",
1189 | "\n",
1190 | "ratings_with_books = ratings.merge(books, on='ISBN')"
1191 | ]
1192 | },
1193 | {
1194 | "cell_type": "code",
1195 | "execution_count": 34,
1196 | "metadata": {},
1197 | "outputs": [
1198 | {
1199 | "data": {
1200 | "text/html": [
1201 | "\n",
1202 | "\n",
1215 | "
\n",
1216 | " \n",
1217 | " \n",
1218 | " | \n",
1219 | " user_id | \n",
1220 | " ISBN | \n",
1221 | " rating | \n",
1222 | " title | \n",
1223 | " author | \n",
1224 | " year | \n",
1225 | " publisher | \n",
1226 | " image_url | \n",
1227 | "
\n",
1228 | " \n",
1229 | " \n",
1230 | " \n",
1231 | " | 0 | \n",
1232 | " 277427 | \n",
1233 | " 002542730X | \n",
1234 | " 10 | \n",
1235 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1236 | " James Finn Garner | \n",
1237 | " 1994 | \n",
1238 | " John Wiley & Sons Inc | \n",
1239 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1240 | "
\n",
1241 | " \n",
1242 | " | 1 | \n",
1243 | " 3363 | \n",
1244 | " 002542730X | \n",
1245 | " 0 | \n",
1246 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1247 | " James Finn Garner | \n",
1248 | " 1994 | \n",
1249 | " John Wiley & Sons Inc | \n",
1250 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1251 | "
\n",
1252 | " \n",
1253 | " | 2 | \n",
1254 | " 11676 | \n",
1255 | " 002542730X | \n",
1256 | " 6 | \n",
1257 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1258 | " James Finn Garner | \n",
1259 | " 1994 | \n",
1260 | " John Wiley & Sons Inc | \n",
1261 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1262 | "
\n",
1263 | " \n",
1264 | " | 3 | \n",
1265 | " 12538 | \n",
1266 | " 002542730X | \n",
1267 | " 10 | \n",
1268 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1269 | " James Finn Garner | \n",
1270 | " 1994 | \n",
1271 | " John Wiley & Sons Inc | \n",
1272 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1273 | "
\n",
1274 | " \n",
1275 | " | 4 | \n",
1276 | " 13552 | \n",
1277 | " 002542730X | \n",
1278 | " 0 | \n",
1279 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1280 | " James Finn Garner | \n",
1281 | " 1994 | \n",
1282 | " John Wiley & Sons Inc | \n",
1283 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1284 | "
\n",
1285 | " \n",
1286 | "
\n",
1287 | "
"
1288 | ],
1289 | "text/plain": [
1290 | " user_id ISBN rating \\\n",
1291 | "0 277427 002542730X 10 \n",
1292 | "1 3363 002542730X 0 \n",
1293 | "2 11676 002542730X 6 \n",
1294 | "3 12538 002542730X 10 \n",
1295 | "4 13552 002542730X 0 \n",
1296 | "\n",
1297 | " title author year \\\n",
1298 | "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1299 | "1 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1300 | "2 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1301 | "3 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1302 | "4 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1303 | "\n",
1304 | " publisher \\\n",
1305 | "0 John Wiley & Sons Inc \n",
1306 | "1 John Wiley & Sons Inc \n",
1307 | "2 John Wiley & Sons Inc \n",
1308 | "3 John Wiley & Sons Inc \n",
1309 | "4 John Wiley & Sons Inc \n",
1310 | "\n",
1311 | " image_url \n",
1312 | "0 http://images.amazon.com/images/P/002542730X.0... \n",
1313 | "1 http://images.amazon.com/images/P/002542730X.0... \n",
1314 | "2 http://images.amazon.com/images/P/002542730X.0... \n",
1315 | "3 http://images.amazon.com/images/P/002542730X.0... \n",
1316 | "4 http://images.amazon.com/images/P/002542730X.0... "
1317 | ]
1318 | },
1319 | "execution_count": 34,
1320 | "metadata": {},
1321 | "output_type": "execute_result"
1322 | }
1323 | ],
1324 | "source": [
1325 | "ratings_with_books.head()"
1326 | ]
1327 | },
1328 | {
1329 | "cell_type": "code",
1330 | "execution_count": 35,
1331 | "metadata": {},
1332 | "outputs": [
1333 | {
1334 | "data": {
1335 | "text/plain": [
1336 | "(487671, 8)"
1337 | ]
1338 | },
1339 | "execution_count": 35,
1340 | "metadata": {},
1341 | "output_type": "execute_result"
1342 | }
1343 | ],
1344 | "source": [
1345 | "ratings_with_books.shape"
1346 | ]
1347 | },
1348 | {
1349 | "cell_type": "code",
1350 | "execution_count": 36,
1351 | "metadata": {},
1352 | "outputs": [],
1353 | "source": [
1354 | "number_rating = ratings_with_books.groupby('title')['rating'].count().reset_index()"
1355 | ]
1356 | },
1357 | {
1358 | "cell_type": "code",
1359 | "execution_count": 37,
1360 | "metadata": {},
1361 | "outputs": [
1362 | {
1363 | "data": {
1364 | "text/html": [
1365 | "\n",
1366 | "\n",
1379 | "
\n",
1380 | " \n",
1381 | " \n",
1382 | " | \n",
1383 | " title | \n",
1384 | " rating | \n",
1385 | "
\n",
1386 | " \n",
1387 | " \n",
1388 | " \n",
1389 | " | 0 | \n",
1390 | " A Light in the Storm: The Civil War Diary of ... | \n",
1391 | " 2 | \n",
1392 | "
\n",
1393 | " \n",
1394 | " | 1 | \n",
1395 | " Always Have Popsicles | \n",
1396 | " 1 | \n",
1397 | "
\n",
1398 | " \n",
1399 | " | 2 | \n",
1400 | " Apple Magic (The Collector's series) | \n",
1401 | " 1 | \n",
1402 | "
\n",
1403 | " \n",
1404 | " | 3 | \n",
1405 | " Beyond IBM: Leadership Marketing and Finance ... | \n",
1406 | " 1 | \n",
1407 | "
\n",
1408 | " \n",
1409 | " | 4 | \n",
1410 | " Clifford Visita El Hospital (Clifford El Gran... | \n",
1411 | " 1 | \n",
1412 | "
\n",
1413 | " \n",
1414 | "
\n",
1415 | "
"
1416 | ],
1417 | "text/plain": [
1418 | " title rating\n",
1419 | "0 A Light in the Storm: The Civil War Diary of ... 2\n",
1420 | "1 Always Have Popsicles 1\n",
1421 | "2 Apple Magic (The Collector's series) 1\n",
1422 | "3 Beyond IBM: Leadership Marketing and Finance ... 1\n",
1423 | "4 Clifford Visita El Hospital (Clifford El Gran... 1"
1424 | ]
1425 | },
1426 | "execution_count": 37,
1427 | "metadata": {},
1428 | "output_type": "execute_result"
1429 | }
1430 | ],
1431 | "source": [
1432 | "number_rating.head()"
1433 | ]
1434 | },
1435 | {
1436 | "cell_type": "code",
1437 | "execution_count": 38,
1438 | "metadata": {},
1439 | "outputs": [],
1440 | "source": [
1441 | "number_rating.rename(columns={'rating':'num_of_rating'},inplace=True)"
1442 | ]
1443 | },
1444 | {
1445 | "cell_type": "code",
1446 | "execution_count": 39,
1447 | "metadata": {},
1448 | "outputs": [
1449 | {
1450 | "data": {
1451 | "text/html": [
1452 | "\n",
1453 | "\n",
1466 | "
\n",
1467 | " \n",
1468 | " \n",
1469 | " | \n",
1470 | " title | \n",
1471 | " num_of_rating | \n",
1472 | "
\n",
1473 | " \n",
1474 | " \n",
1475 | " \n",
1476 | " | 0 | \n",
1477 | " A Light in the Storm: The Civil War Diary of ... | \n",
1478 | " 2 | \n",
1479 | "
\n",
1480 | " \n",
1481 | " | 1 | \n",
1482 | " Always Have Popsicles | \n",
1483 | " 1 | \n",
1484 | "
\n",
1485 | " \n",
1486 | " | 2 | \n",
1487 | " Apple Magic (The Collector's series) | \n",
1488 | " 1 | \n",
1489 | "
\n",
1490 | " \n",
1491 | " | 3 | \n",
1492 | " Beyond IBM: Leadership Marketing and Finance ... | \n",
1493 | " 1 | \n",
1494 | "
\n",
1495 | " \n",
1496 | " | 4 | \n",
1497 | " Clifford Visita El Hospital (Clifford El Gran... | \n",
1498 | " 1 | \n",
1499 | "
\n",
1500 | " \n",
1501 | "
\n",
1502 | "
"
1503 | ],
1504 | "text/plain": [
1505 | " title num_of_rating\n",
1506 | "0 A Light in the Storm: The Civil War Diary of ... 2\n",
1507 | "1 Always Have Popsicles 1\n",
1508 | "2 Apple Magic (The Collector's series) 1\n",
1509 | "3 Beyond IBM: Leadership Marketing and Finance ... 1\n",
1510 | "4 Clifford Visita El Hospital (Clifford El Gran... 1"
1511 | ]
1512 | },
1513 | "execution_count": 39,
1514 | "metadata": {},
1515 | "output_type": "execute_result"
1516 | }
1517 | ],
1518 | "source": [
1519 | "number_rating.head()"
1520 | ]
1521 | },
1522 | {
1523 | "cell_type": "code",
1524 | "execution_count": 40,
1525 | "metadata": {},
1526 | "outputs": [],
1527 | "source": [
1528 | "final_rating = ratings_with_books.merge(number_rating, on='title')"
1529 | ]
1530 | },
1531 | {
1532 | "cell_type": "code",
1533 | "execution_count": 41,
1534 | "metadata": {},
1535 | "outputs": [
1536 | {
1537 | "data": {
1538 | "text/html": [
1539 | "\n",
1540 | "\n",
1553 | "
\n",
1554 | " \n",
1555 | " \n",
1556 | " | \n",
1557 | " user_id | \n",
1558 | " ISBN | \n",
1559 | " rating | \n",
1560 | " title | \n",
1561 | " author | \n",
1562 | " year | \n",
1563 | " publisher | \n",
1564 | " image_url | \n",
1565 | " num_of_rating | \n",
1566 | "
\n",
1567 | " \n",
1568 | " \n",
1569 | " \n",
1570 | " | 0 | \n",
1571 | " 277427 | \n",
1572 | " 002542730X | \n",
1573 | " 10 | \n",
1574 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1575 | " James Finn Garner | \n",
1576 | " 1994 | \n",
1577 | " John Wiley & Sons Inc | \n",
1578 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1579 | " 82 | \n",
1580 | "
\n",
1581 | " \n",
1582 | " | 1 | \n",
1583 | " 3363 | \n",
1584 | " 002542730X | \n",
1585 | " 0 | \n",
1586 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1587 | " James Finn Garner | \n",
1588 | " 1994 | \n",
1589 | " John Wiley & Sons Inc | \n",
1590 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1591 | " 82 | \n",
1592 | "
\n",
1593 | " \n",
1594 | " | 2 | \n",
1595 | " 11676 | \n",
1596 | " 002542730X | \n",
1597 | " 6 | \n",
1598 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1599 | " James Finn Garner | \n",
1600 | " 1994 | \n",
1601 | " John Wiley & Sons Inc | \n",
1602 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1603 | " 82 | \n",
1604 | "
\n",
1605 | " \n",
1606 | " | 3 | \n",
1607 | " 12538 | \n",
1608 | " 002542730X | \n",
1609 | " 10 | \n",
1610 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1611 | " James Finn Garner | \n",
1612 | " 1994 | \n",
1613 | " John Wiley & Sons Inc | \n",
1614 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1615 | " 82 | \n",
1616 | "
\n",
1617 | " \n",
1618 | " | 4 | \n",
1619 | " 13552 | \n",
1620 | " 002542730X | \n",
1621 | " 0 | \n",
1622 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1623 | " James Finn Garner | \n",
1624 | " 1994 | \n",
1625 | " John Wiley & Sons Inc | \n",
1626 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1627 | " 82 | \n",
1628 | "
\n",
1629 | " \n",
1630 | "
\n",
1631 | "
"
1632 | ],
1633 | "text/plain": [
1634 | " user_id ISBN rating \\\n",
1635 | "0 277427 002542730X 10 \n",
1636 | "1 3363 002542730X 0 \n",
1637 | "2 11676 002542730X 6 \n",
1638 | "3 12538 002542730X 10 \n",
1639 | "4 13552 002542730X 0 \n",
1640 | "\n",
1641 | " title author year \\\n",
1642 | "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1643 | "1 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1644 | "2 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1645 | "3 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1646 | "4 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1647 | "\n",
1648 | " publisher \\\n",
1649 | "0 John Wiley & Sons Inc \n",
1650 | "1 John Wiley & Sons Inc \n",
1651 | "2 John Wiley & Sons Inc \n",
1652 | "3 John Wiley & Sons Inc \n",
1653 | "4 John Wiley & Sons Inc \n",
1654 | "\n",
1655 | " image_url num_of_rating \n",
1656 | "0 http://images.amazon.com/images/P/002542730X.0... 82 \n",
1657 | "1 http://images.amazon.com/images/P/002542730X.0... 82 \n",
1658 | "2 http://images.amazon.com/images/P/002542730X.0... 82 \n",
1659 | "3 http://images.amazon.com/images/P/002542730X.0... 82 \n",
1660 | "4 http://images.amazon.com/images/P/002542730X.0... 82 "
1661 | ]
1662 | },
1663 | "execution_count": 41,
1664 | "metadata": {},
1665 | "output_type": "execute_result"
1666 | }
1667 | ],
1668 | "source": [
1669 | "final_rating.head()"
1670 | ]
1671 | },
1672 | {
1673 | "cell_type": "code",
1674 | "execution_count": 42,
1675 | "metadata": {},
1676 | "outputs": [
1677 | {
1678 | "data": {
1679 | "text/plain": [
1680 | "(487671, 9)"
1681 | ]
1682 | },
1683 | "execution_count": 42,
1684 | "metadata": {},
1685 | "output_type": "execute_result"
1686 | }
1687 | ],
1688 | "source": [
1689 | "final_rating.shape"
1690 | ]
1691 | },
1692 | {
1693 | "cell_type": "code",
1694 | "execution_count": 43,
1695 | "metadata": {},
1696 | "outputs": [],
1697 | "source": [
1698 | "# Lets take those books which got at least 50 rating of user\n",
1699 | "\n",
1700 | "final_rating = final_rating[final_rating['num_of_rating'] >= 50]"
1701 | ]
1702 | },
1703 | {
1704 | "cell_type": "code",
1705 | "execution_count": 44,
1706 | "metadata": {},
1707 | "outputs": [
1708 | {
1709 | "data": {
1710 | "text/html": [
1711 | "\n",
1712 | "\n",
1725 | "
\n",
1726 | " \n",
1727 | " \n",
1728 | " | \n",
1729 | " user_id | \n",
1730 | " ISBN | \n",
1731 | " rating | \n",
1732 | " title | \n",
1733 | " author | \n",
1734 | " year | \n",
1735 | " publisher | \n",
1736 | " image_url | \n",
1737 | " num_of_rating | \n",
1738 | "
\n",
1739 | " \n",
1740 | " \n",
1741 | " \n",
1742 | " | 0 | \n",
1743 | " 277427 | \n",
1744 | " 002542730X | \n",
1745 | " 10 | \n",
1746 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1747 | " James Finn Garner | \n",
1748 | " 1994 | \n",
1749 | " John Wiley & Sons Inc | \n",
1750 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1751 | " 82 | \n",
1752 | "
\n",
1753 | " \n",
1754 | " | 1 | \n",
1755 | " 3363 | \n",
1756 | " 002542730X | \n",
1757 | " 0 | \n",
1758 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1759 | " James Finn Garner | \n",
1760 | " 1994 | \n",
1761 | " John Wiley & Sons Inc | \n",
1762 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1763 | " 82 | \n",
1764 | "
\n",
1765 | " \n",
1766 | " | 2 | \n",
1767 | " 11676 | \n",
1768 | " 002542730X | \n",
1769 | " 6 | \n",
1770 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1771 | " James Finn Garner | \n",
1772 | " 1994 | \n",
1773 | " John Wiley & Sons Inc | \n",
1774 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1775 | " 82 | \n",
1776 | "
\n",
1777 | " \n",
1778 | " | 3 | \n",
1779 | " 12538 | \n",
1780 | " 002542730X | \n",
1781 | " 10 | \n",
1782 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1783 | " James Finn Garner | \n",
1784 | " 1994 | \n",
1785 | " John Wiley & Sons Inc | \n",
1786 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1787 | " 82 | \n",
1788 | "
\n",
1789 | " \n",
1790 | " | 4 | \n",
1791 | " 13552 | \n",
1792 | " 002542730X | \n",
1793 | " 0 | \n",
1794 | " Politically Correct Bedtime Stories: Modern Ta... | \n",
1795 | " James Finn Garner | \n",
1796 | " 1994 | \n",
1797 | " John Wiley & Sons Inc | \n",
1798 | " http://images.amazon.com/images/P/002542730X.0... | \n",
1799 | " 82 | \n",
1800 | "
\n",
1801 | " \n",
1802 | "
\n",
1803 | "
"
1804 | ],
1805 | "text/plain": [
1806 | " user_id ISBN rating \\\n",
1807 | "0 277427 002542730X 10 \n",
1808 | "1 3363 002542730X 0 \n",
1809 | "2 11676 002542730X 6 \n",
1810 | "3 12538 002542730X 10 \n",
1811 | "4 13552 002542730X 0 \n",
1812 | "\n",
1813 | " title author year \\\n",
1814 | "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1815 | "1 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1816 | "2 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1817 | "3 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1818 | "4 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner 1994 \n",
1819 | "\n",
1820 | " publisher \\\n",
1821 | "0 John Wiley & Sons Inc \n",
1822 | "1 John Wiley & Sons Inc \n",
1823 | "2 John Wiley & Sons Inc \n",
1824 | "3 John Wiley & Sons Inc \n",
1825 | "4 John Wiley & Sons Inc \n",
1826 | "\n",
1827 | " image_url num_of_rating \n",
1828 | "0 http://images.amazon.com/images/P/002542730X.0... 82 \n",
1829 | "1 http://images.amazon.com/images/P/002542730X.0... 82 \n",
1830 | "2 http://images.amazon.com/images/P/002542730X.0... 82 \n",
1831 | "3 http://images.amazon.com/images/P/002542730X.0... 82 \n",
1832 | "4 http://images.amazon.com/images/P/002542730X.0... 82 "
1833 | ]
1834 | },
1835 | "execution_count": 44,
1836 | "metadata": {},
1837 | "output_type": "execute_result"
1838 | }
1839 | ],
1840 | "source": [
1841 | "final_rating.head()"
1842 | ]
1843 | },
1844 | {
1845 | "cell_type": "code",
1846 | "execution_count": 45,
1847 | "metadata": {},
1848 | "outputs": [
1849 | {
1850 | "data": {
1851 | "text/plain": [
1852 | "(61853, 9)"
1853 | ]
1854 | },
1855 | "execution_count": 45,
1856 | "metadata": {},
1857 | "output_type": "execute_result"
1858 | }
1859 | ],
1860 | "source": [
1861 | "final_rating.shape"
1862 | ]
1863 | },
1864 | {
1865 | "cell_type": "code",
1866 | "execution_count": 46,
1867 | "metadata": {},
1868 | "outputs": [],
1869 | "source": [
1870 | "# lets drop the duplicates\n",
1871 | "final_rating.drop_duplicates(['user_id','title'],inplace=True)"
1872 | ]
1873 | },
1874 | {
1875 | "cell_type": "code",
1876 | "execution_count": 47,
1877 | "metadata": {},
1878 | "outputs": [
1879 | {
1880 | "data": {
1881 | "text/plain": [
1882 | "(59850, 9)"
1883 | ]
1884 | },
1885 | "execution_count": 47,
1886 | "metadata": {},
1887 | "output_type": "execute_result"
1888 | }
1889 | ],
1890 | "source": [
1891 | "final_rating.shape"
1892 | ]
1893 | },
1894 | {
1895 | "cell_type": "code",
1896 | "execution_count": 48,
1897 | "metadata": {},
1898 | "outputs": [],
1899 | "source": [
1900 | "# Lets create a pivot table\n",
1901 | "book_pivot = final_rating.pivot_table(columns='user_id', index='title', values= 'rating')"
1902 | ]
1903 | },
1904 | {
1905 | "cell_type": "code",
1906 | "execution_count": 49,
1907 | "metadata": {},
1908 | "outputs": [
1909 | {
1910 | "data": {
1911 | "text/html": [
1912 | "\n",
1913 | "\n",
1926 | "
\n",
1927 | " \n",
1928 | " \n",
1929 | " | user_id | \n",
1930 | " 254 | \n",
1931 | " 2276 | \n",
1932 | " 2766 | \n",
1933 | " 2977 | \n",
1934 | " 3363 | \n",
1935 | " 3757 | \n",
1936 | " 4017 | \n",
1937 | " 4385 | \n",
1938 | " 6242 | \n",
1939 | " 6251 | \n",
1940 | " ... | \n",
1941 | " 274004 | \n",
1942 | " 274061 | \n",
1943 | " 274301 | \n",
1944 | " 274308 | \n",
1945 | " 274808 | \n",
1946 | " 275970 | \n",
1947 | " 277427 | \n",
1948 | " 277478 | \n",
1949 | " 277639 | \n",
1950 | " 278418 | \n",
1951 | "
\n",
1952 | " \n",
1953 | " | title | \n",
1954 | " | \n",
1955 | " | \n",
1956 | " | \n",
1957 | " | \n",
1958 | " | \n",
1959 | " | \n",
1960 | " | \n",
1961 | " | \n",
1962 | " | \n",
1963 | " | \n",
1964 | " | \n",
1965 | " | \n",
1966 | " | \n",
1967 | " | \n",
1968 | " | \n",
1969 | " | \n",
1970 | " | \n",
1971 | " | \n",
1972 | " | \n",
1973 | " | \n",
1974 | " | \n",
1975 | "
\n",
1976 | " \n",
1977 | " \n",
1978 | " \n",
1979 | " | 1984 | \n",
1980 | " 9.0 | \n",
1981 | " NaN | \n",
1982 | " NaN | \n",
1983 | " NaN | \n",
1984 | " NaN | \n",
1985 | " NaN | \n",
1986 | " NaN | \n",
1987 | " NaN | \n",
1988 | " NaN | \n",
1989 | " NaN | \n",
1990 | " ... | \n",
1991 | " NaN | \n",
1992 | " NaN | \n",
1993 | " NaN | \n",
1994 | " NaN | \n",
1995 | " NaN | \n",
1996 | " 0.0 | \n",
1997 | " NaN | \n",
1998 | " NaN | \n",
1999 | " NaN | \n",
2000 | " NaN | \n",
2001 | "
\n",
2002 | " \n",
2003 | " | 1st to Die: A Novel | \n",
2004 | " NaN | \n",
2005 | " NaN | \n",
2006 | " NaN | \n",
2007 | " NaN | \n",
2008 | " NaN | \n",
2009 | " NaN | \n",
2010 | " NaN | \n",
2011 | " NaN | \n",
2012 | " NaN | \n",
2013 | " NaN | \n",
2014 | " ... | \n",
2015 | " NaN | \n",
2016 | " NaN | \n",
2017 | " NaN | \n",
2018 | " NaN | \n",
2019 | " NaN | \n",
2020 | " NaN | \n",
2021 | " NaN | \n",
2022 | " NaN | \n",
2023 | " NaN | \n",
2024 | " NaN | \n",
2025 | "
\n",
2026 | " \n",
2027 | " | 2nd Chance | \n",
2028 | " NaN | \n",
2029 | " 10.0 | \n",
2030 | " NaN | \n",
2031 | " NaN | \n",
2032 | " NaN | \n",
2033 | " NaN | \n",
2034 | " NaN | \n",
2035 | " NaN | \n",
2036 | " NaN | \n",
2037 | " NaN | \n",
2038 | " ... | \n",
2039 | " NaN | \n",
2040 | " NaN | \n",
2041 | " NaN | \n",
2042 | " 0.0 | \n",
2043 | " NaN | \n",
2044 | " NaN | \n",
2045 | " NaN | \n",
2046 | " NaN | \n",
2047 | " 0.0 | \n",
2048 | " NaN | \n",
2049 | "
\n",
2050 | " \n",
2051 | " | 4 Blondes | \n",
2052 | " NaN | \n",
2053 | " NaN | \n",
2054 | " NaN | \n",
2055 | " NaN | \n",
2056 | " NaN | \n",
2057 | " NaN | \n",
2058 | " NaN | \n",
2059 | " NaN | \n",
2060 | " NaN | \n",
2061 | " 0.0 | \n",
2062 | " ... | \n",
2063 | " NaN | \n",
2064 | " NaN | \n",
2065 | " NaN | \n",
2066 | " NaN | \n",
2067 | " NaN | \n",
2068 | " NaN | \n",
2069 | " NaN | \n",
2070 | " NaN | \n",
2071 | " NaN | \n",
2072 | " NaN | \n",
2073 | "
\n",
2074 | " \n",
2075 | " | 84 Charing Cross Road | \n",
2076 | " NaN | \n",
2077 | " NaN | \n",
2078 | " NaN | \n",
2079 | " NaN | \n",
2080 | " NaN | \n",
2081 | " NaN | \n",
2082 | " NaN | \n",
2083 | " NaN | \n",
2084 | " NaN | \n",
2085 | " NaN | \n",
2086 | " ... | \n",
2087 | " NaN | \n",
2088 | " NaN | \n",
2089 | " NaN | \n",
2090 | " NaN | \n",
2091 | " NaN | \n",
2092 | " 10.0 | \n",
2093 | " NaN | \n",
2094 | " NaN | \n",
2095 | " NaN | \n",
2096 | " NaN | \n",
2097 | "
\n",
2098 | " \n",
2099 | " | ... | \n",
2100 | " ... | \n",
2101 | " ... | \n",
2102 | " ... | \n",
2103 | " ... | \n",
2104 | " ... | \n",
2105 | " ... | \n",
2106 | " ... | \n",
2107 | " ... | \n",
2108 | " ... | \n",
2109 | " ... | \n",
2110 | " ... | \n",
2111 | " ... | \n",
2112 | " ... | \n",
2113 | " ... | \n",
2114 | " ... | \n",
2115 | " ... | \n",
2116 | " ... | \n",
2117 | " ... | \n",
2118 | " ... | \n",
2119 | " ... | \n",
2120 | " ... | \n",
2121 | "
\n",
2122 | " \n",
2123 | " | Year of Wonders | \n",
2124 | " NaN | \n",
2125 | " NaN | \n",
2126 | " NaN | \n",
2127 | " 7.0 | \n",
2128 | " NaN | \n",
2129 | " NaN | \n",
2130 | " NaN | \n",
2131 | " NaN | \n",
2132 | " 7.0 | \n",
2133 | " NaN | \n",
2134 | " ... | \n",
2135 | " NaN | \n",
2136 | " NaN | \n",
2137 | " NaN | \n",
2138 | " NaN | \n",
2139 | " NaN | \n",
2140 | " 0.0 | \n",
2141 | " NaN | \n",
2142 | " NaN | \n",
2143 | " NaN | \n",
2144 | " NaN | \n",
2145 | "
\n",
2146 | " \n",
2147 | " | You Belong To Me | \n",
2148 | " NaN | \n",
2149 | " NaN | \n",
2150 | " NaN | \n",
2151 | " NaN | \n",
2152 | " NaN | \n",
2153 | " NaN | \n",
2154 | " NaN | \n",
2155 | " NaN | \n",
2156 | " NaN | \n",
2157 | " NaN | \n",
2158 | " ... | \n",
2159 | " NaN | \n",
2160 | " NaN | \n",
2161 | " NaN | \n",
2162 | " NaN | \n",
2163 | " NaN | \n",
2164 | " NaN | \n",
2165 | " NaN | \n",
2166 | " NaN | \n",
2167 | " NaN | \n",
2168 | " NaN | \n",
2169 | "
\n",
2170 | " \n",
2171 | " | Zen and the Art of Motorcycle Maintenance: An Inquiry into Values | \n",
2172 | " NaN | \n",
2173 | " NaN | \n",
2174 | " NaN | \n",
2175 | " NaN | \n",
2176 | " 0.0 | \n",
2177 | " NaN | \n",
2178 | " NaN | \n",
2179 | " NaN | \n",
2180 | " NaN | \n",
2181 | " 0.0 | \n",
2182 | " ... | \n",
2183 | " NaN | \n",
2184 | " NaN | \n",
2185 | " NaN | \n",
2186 | " NaN | \n",
2187 | " NaN | \n",
2188 | " 0.0 | \n",
2189 | " NaN | \n",
2190 | " NaN | \n",
2191 | " NaN | \n",
2192 | " NaN | \n",
2193 | "
\n",
2194 | " \n",
2195 | " | Zoya | \n",
2196 | " NaN | \n",
2197 | " NaN | \n",
2198 | " NaN | \n",
2199 | " NaN | \n",
2200 | " NaN | \n",
2201 | " NaN | \n",
2202 | " NaN | \n",
2203 | " NaN | \n",
2204 | " NaN | \n",
2205 | " NaN | \n",
2206 | " ... | \n",
2207 | " NaN | \n",
2208 | " NaN | \n",
2209 | " NaN | \n",
2210 | " NaN | \n",
2211 | " NaN | \n",
2212 | " NaN | \n",
2213 | " NaN | \n",
2214 | " NaN | \n",
2215 | " NaN | \n",
2216 | " NaN | \n",
2217 | "
\n",
2218 | " \n",
2219 | " | \\O\\\" Is for Outlaw\" | \n",
2220 | " NaN | \n",
2221 | " NaN | \n",
2222 | " NaN | \n",
2223 | " NaN | \n",
2224 | " NaN | \n",
2225 | " NaN | \n",
2226 | " NaN | \n",
2227 | " NaN | \n",
2228 | " NaN | \n",
2229 | " NaN | \n",
2230 | " ... | \n",
2231 | " NaN | \n",
2232 | " NaN | \n",
2233 | " 8.0 | \n",
2234 | " NaN | \n",
2235 | " NaN | \n",
2236 | " NaN | \n",
2237 | " NaN | \n",
2238 | " NaN | \n",
2239 | " NaN | \n",
2240 | " NaN | \n",
2241 | "
\n",
2242 | " \n",
2243 | "
\n",
2244 | "
742 rows × 888 columns
\n",
2245 | "
"
2246 | ],
2247 | "text/plain": [
2248 | "user_id 254 2276 2766 \\\n",
2249 | "title \n",
2250 | "1984 9.0 NaN NaN \n",
2251 | "1st to Die: A Novel NaN NaN NaN \n",
2252 | "2nd Chance NaN 10.0 NaN \n",
2253 | "4 Blondes NaN NaN NaN \n",
2254 | "84 Charing Cross Road NaN NaN NaN \n",
2255 | "... ... ... ... \n",
2256 | "Year of Wonders NaN NaN NaN \n",
2257 | "You Belong To Me NaN NaN NaN \n",
2258 | "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n",
2259 | "Zoya NaN NaN NaN \n",
2260 | "\\O\\\" Is for Outlaw\" NaN NaN NaN \n",
2261 | "\n",
2262 | "user_id 2977 3363 3757 \\\n",
2263 | "title \n",
2264 | "1984 NaN NaN NaN \n",
2265 | "1st to Die: A Novel NaN NaN NaN \n",
2266 | "2nd Chance NaN NaN NaN \n",
2267 | "4 Blondes NaN NaN NaN \n",
2268 | "84 Charing Cross Road NaN NaN NaN \n",
2269 | "... ... ... ... \n",
2270 | "Year of Wonders 7.0 NaN NaN \n",
2271 | "You Belong To Me NaN NaN NaN \n",
2272 | "Zen and the Art of Motorcycle Maintenance: An I... NaN 0.0 NaN \n",
2273 | "Zoya NaN NaN NaN \n",
2274 | "\\O\\\" Is for Outlaw\" NaN NaN NaN \n",
2275 | "\n",
2276 | "user_id 4017 4385 6242 \\\n",
2277 | "title \n",
2278 | "1984 NaN NaN NaN \n",
2279 | "1st to Die: A Novel NaN NaN NaN \n",
2280 | "2nd Chance NaN NaN NaN \n",
2281 | "4 Blondes NaN NaN NaN \n",
2282 | "84 Charing Cross Road NaN NaN NaN \n",
2283 | "... ... ... ... \n",
2284 | "Year of Wonders NaN NaN 7.0 \n",
2285 | "You Belong To Me NaN NaN NaN \n",
2286 | "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n",
2287 | "Zoya NaN NaN NaN \n",
2288 | "\\O\\\" Is for Outlaw\" NaN NaN NaN \n",
2289 | "\n",
2290 | "user_id 6251 ... 274004 \\\n",
2291 | "title ... \n",
2292 | "1984 NaN ... NaN \n",
2293 | "1st to Die: A Novel NaN ... NaN \n",
2294 | "2nd Chance NaN ... NaN \n",
2295 | "4 Blondes 0.0 ... NaN \n",
2296 | "84 Charing Cross Road NaN ... NaN \n",
2297 | "... ... ... ... \n",
2298 | "Year of Wonders NaN ... NaN \n",
2299 | "You Belong To Me NaN ... NaN \n",
2300 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 ... NaN \n",
2301 | "Zoya NaN ... NaN \n",
2302 | "\\O\\\" Is for Outlaw\" NaN ... NaN \n",
2303 | "\n",
2304 | "user_id 274061 274301 274308 \\\n",
2305 | "title \n",
2306 | "1984 NaN NaN NaN \n",
2307 | "1st to Die: A Novel NaN NaN NaN \n",
2308 | "2nd Chance NaN NaN 0.0 \n",
2309 | "4 Blondes NaN NaN NaN \n",
2310 | "84 Charing Cross Road NaN NaN NaN \n",
2311 | "... ... ... ... \n",
2312 | "Year of Wonders NaN NaN NaN \n",
2313 | "You Belong To Me NaN NaN NaN \n",
2314 | "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n",
2315 | "Zoya NaN NaN NaN \n",
2316 | "\\O\\\" Is for Outlaw\" NaN 8.0 NaN \n",
2317 | "\n",
2318 | "user_id 274808 275970 277427 \\\n",
2319 | "title \n",
2320 | "1984 NaN 0.0 NaN \n",
2321 | "1st to Die: A Novel NaN NaN NaN \n",
2322 | "2nd Chance NaN NaN NaN \n",
2323 | "4 Blondes NaN NaN NaN \n",
2324 | "84 Charing Cross Road NaN 10.0 NaN \n",
2325 | "... ... ... ... \n",
2326 | "Year of Wonders NaN 0.0 NaN \n",
2327 | "You Belong To Me NaN NaN NaN \n",
2328 | "Zen and the Art of Motorcycle Maintenance: An I... NaN 0.0 NaN \n",
2329 | "Zoya NaN NaN NaN \n",
2330 | "\\O\\\" Is for Outlaw\" NaN NaN NaN \n",
2331 | "\n",
2332 | "user_id 277478 277639 278418 \n",
2333 | "title \n",
2334 | "1984 NaN NaN NaN \n",
2335 | "1st to Die: A Novel NaN NaN NaN \n",
2336 | "2nd Chance NaN 0.0 NaN \n",
2337 | "4 Blondes NaN NaN NaN \n",
2338 | "84 Charing Cross Road NaN NaN NaN \n",
2339 | "... ... ... ... \n",
2340 | "Year of Wonders NaN NaN NaN \n",
2341 | "You Belong To Me NaN NaN NaN \n",
2342 | "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n",
2343 | "Zoya NaN NaN NaN \n",
2344 | "\\O\\\" Is for Outlaw\" NaN NaN NaN \n",
2345 | "\n",
2346 | "[742 rows x 888 columns]"
2347 | ]
2348 | },
2349 | "execution_count": 49,
2350 | "metadata": {},
2351 | "output_type": "execute_result"
2352 | }
2353 | ],
2354 | "source": [
2355 | "book_pivot"
2356 | ]
2357 | },
2358 | {
2359 | "cell_type": "code",
2360 | "execution_count": 50,
2361 | "metadata": {},
2362 | "outputs": [
2363 | {
2364 | "data": {
2365 | "text/plain": [
2366 | "(742, 888)"
2367 | ]
2368 | },
2369 | "execution_count": 50,
2370 | "metadata": {},
2371 | "output_type": "execute_result"
2372 | }
2373 | ],
2374 | "source": [
2375 | "book_pivot.shape"
2376 | ]
2377 | },
2378 | {
2379 | "cell_type": "code",
2380 | "execution_count": 51,
2381 | "metadata": {},
2382 | "outputs": [],
2383 | "source": [
2384 | "book_pivot.fillna(0, inplace=True)"
2385 | ]
2386 | },
2387 | {
2388 | "cell_type": "code",
2389 | "execution_count": 52,
2390 | "metadata": {},
2391 | "outputs": [
2392 | {
2393 | "data": {
2394 | "text/html": [
2395 | "\n",
2396 | "\n",
2409 | "
\n",
2410 | " \n",
2411 | " \n",
2412 | " | user_id | \n",
2413 | " 254 | \n",
2414 | " 2276 | \n",
2415 | " 2766 | \n",
2416 | " 2977 | \n",
2417 | " 3363 | \n",
2418 | " 3757 | \n",
2419 | " 4017 | \n",
2420 | " 4385 | \n",
2421 | " 6242 | \n",
2422 | " 6251 | \n",
2423 | " ... | \n",
2424 | " 274004 | \n",
2425 | " 274061 | \n",
2426 | " 274301 | \n",
2427 | " 274308 | \n",
2428 | " 274808 | \n",
2429 | " 275970 | \n",
2430 | " 277427 | \n",
2431 | " 277478 | \n",
2432 | " 277639 | \n",
2433 | " 278418 | \n",
2434 | "
\n",
2435 | " \n",
2436 | " | title | \n",
2437 | " | \n",
2438 | " | \n",
2439 | " | \n",
2440 | " | \n",
2441 | " | \n",
2442 | " | \n",
2443 | " | \n",
2444 | " | \n",
2445 | " | \n",
2446 | " | \n",
2447 | " | \n",
2448 | " | \n",
2449 | " | \n",
2450 | " | \n",
2451 | " | \n",
2452 | " | \n",
2453 | " | \n",
2454 | " | \n",
2455 | " | \n",
2456 | " | \n",
2457 | " | \n",
2458 | "
\n",
2459 | " \n",
2460 | " \n",
2461 | " \n",
2462 | " | 1984 | \n",
2463 | " 9.0 | \n",
2464 | " 0.0 | \n",
2465 | " 0.0 | \n",
2466 | " 0.0 | \n",
2467 | " 0.0 | \n",
2468 | " 0.0 | \n",
2469 | " 0.0 | \n",
2470 | " 0.0 | \n",
2471 | " 0.0 | \n",
2472 | " 0.0 | \n",
2473 | " ... | \n",
2474 | " 0.0 | \n",
2475 | " 0.0 | \n",
2476 | " 0.0 | \n",
2477 | " 0.0 | \n",
2478 | " 0.0 | \n",
2479 | " 0.0 | \n",
2480 | " 0.0 | \n",
2481 | " 0.0 | \n",
2482 | " 0.0 | \n",
2483 | " 0.0 | \n",
2484 | "
\n",
2485 | " \n",
2486 | " | 1st to Die: A Novel | \n",
2487 | " 0.0 | \n",
2488 | " 0.0 | \n",
2489 | " 0.0 | \n",
2490 | " 0.0 | \n",
2491 | " 0.0 | \n",
2492 | " 0.0 | \n",
2493 | " 0.0 | \n",
2494 | " 0.0 | \n",
2495 | " 0.0 | \n",
2496 | " 0.0 | \n",
2497 | " ... | \n",
2498 | " 0.0 | \n",
2499 | " 0.0 | \n",
2500 | " 0.0 | \n",
2501 | " 0.0 | \n",
2502 | " 0.0 | \n",
2503 | " 0.0 | \n",
2504 | " 0.0 | \n",
2505 | " 0.0 | \n",
2506 | " 0.0 | \n",
2507 | " 0.0 | \n",
2508 | "
\n",
2509 | " \n",
2510 | " | 2nd Chance | \n",
2511 | " 0.0 | \n",
2512 | " 10.0 | \n",
2513 | " 0.0 | \n",
2514 | " 0.0 | \n",
2515 | " 0.0 | \n",
2516 | " 0.0 | \n",
2517 | " 0.0 | \n",
2518 | " 0.0 | \n",
2519 | " 0.0 | \n",
2520 | " 0.0 | \n",
2521 | " ... | \n",
2522 | " 0.0 | \n",
2523 | " 0.0 | \n",
2524 | " 0.0 | \n",
2525 | " 0.0 | \n",
2526 | " 0.0 | \n",
2527 | " 0.0 | \n",
2528 | " 0.0 | \n",
2529 | " 0.0 | \n",
2530 | " 0.0 | \n",
2531 | " 0.0 | \n",
2532 | "
\n",
2533 | " \n",
2534 | " | 4 Blondes | \n",
2535 | " 0.0 | \n",
2536 | " 0.0 | \n",
2537 | " 0.0 | \n",
2538 | " 0.0 | \n",
2539 | " 0.0 | \n",
2540 | " 0.0 | \n",
2541 | " 0.0 | \n",
2542 | " 0.0 | \n",
2543 | " 0.0 | \n",
2544 | " 0.0 | \n",
2545 | " ... | \n",
2546 | " 0.0 | \n",
2547 | " 0.0 | \n",
2548 | " 0.0 | \n",
2549 | " 0.0 | \n",
2550 | " 0.0 | \n",
2551 | " 0.0 | \n",
2552 | " 0.0 | \n",
2553 | " 0.0 | \n",
2554 | " 0.0 | \n",
2555 | " 0.0 | \n",
2556 | "
\n",
2557 | " \n",
2558 | " | 84 Charing Cross Road | \n",
2559 | " 0.0 | \n",
2560 | " 0.0 | \n",
2561 | " 0.0 | \n",
2562 | " 0.0 | \n",
2563 | " 0.0 | \n",
2564 | " 0.0 | \n",
2565 | " 0.0 | \n",
2566 | " 0.0 | \n",
2567 | " 0.0 | \n",
2568 | " 0.0 | \n",
2569 | " ... | \n",
2570 | " 0.0 | \n",
2571 | " 0.0 | \n",
2572 | " 0.0 | \n",
2573 | " 0.0 | \n",
2574 | " 0.0 | \n",
2575 | " 10.0 | \n",
2576 | " 0.0 | \n",
2577 | " 0.0 | \n",
2578 | " 0.0 | \n",
2579 | " 0.0 | \n",
2580 | "
\n",
2581 | " \n",
2582 | " | ... | \n",
2583 | " ... | \n",
2584 | " ... | \n",
2585 | " ... | \n",
2586 | " ... | \n",
2587 | " ... | \n",
2588 | " ... | \n",
2589 | " ... | \n",
2590 | " ... | \n",
2591 | " ... | \n",
2592 | " ... | \n",
2593 | " ... | \n",
2594 | " ... | \n",
2595 | " ... | \n",
2596 | " ... | \n",
2597 | " ... | \n",
2598 | " ... | \n",
2599 | " ... | \n",
2600 | " ... | \n",
2601 | " ... | \n",
2602 | " ... | \n",
2603 | " ... | \n",
2604 | "
\n",
2605 | " \n",
2606 | " | Year of Wonders | \n",
2607 | " 0.0 | \n",
2608 | " 0.0 | \n",
2609 | " 0.0 | \n",
2610 | " 7.0 | \n",
2611 | " 0.0 | \n",
2612 | " 0.0 | \n",
2613 | " 0.0 | \n",
2614 | " 0.0 | \n",
2615 | " 7.0 | \n",
2616 | " 0.0 | \n",
2617 | " ... | \n",
2618 | " 0.0 | \n",
2619 | " 0.0 | \n",
2620 | " 0.0 | \n",
2621 | " 0.0 | \n",
2622 | " 0.0 | \n",
2623 | " 0.0 | \n",
2624 | " 0.0 | \n",
2625 | " 0.0 | \n",
2626 | " 0.0 | \n",
2627 | " 0.0 | \n",
2628 | "
\n",
2629 | " \n",
2630 | " | You Belong To Me | \n",
2631 | " 0.0 | \n",
2632 | " 0.0 | \n",
2633 | " 0.0 | \n",
2634 | " 0.0 | \n",
2635 | " 0.0 | \n",
2636 | " 0.0 | \n",
2637 | " 0.0 | \n",
2638 | " 0.0 | \n",
2639 | " 0.0 | \n",
2640 | " 0.0 | \n",
2641 | " ... | \n",
2642 | " 0.0 | \n",
2643 | " 0.0 | \n",
2644 | " 0.0 | \n",
2645 | " 0.0 | \n",
2646 | " 0.0 | \n",
2647 | " 0.0 | \n",
2648 | " 0.0 | \n",
2649 | " 0.0 | \n",
2650 | " 0.0 | \n",
2651 | " 0.0 | \n",
2652 | "
\n",
2653 | " \n",
2654 | " | Zen and the Art of Motorcycle Maintenance: An Inquiry into Values | \n",
2655 | " 0.0 | \n",
2656 | " 0.0 | \n",
2657 | " 0.0 | \n",
2658 | " 0.0 | \n",
2659 | " 0.0 | \n",
2660 | " 0.0 | \n",
2661 | " 0.0 | \n",
2662 | " 0.0 | \n",
2663 | " 0.0 | \n",
2664 | " 0.0 | \n",
2665 | " ... | \n",
2666 | " 0.0 | \n",
2667 | " 0.0 | \n",
2668 | " 0.0 | \n",
2669 | " 0.0 | \n",
2670 | " 0.0 | \n",
2671 | " 0.0 | \n",
2672 | " 0.0 | \n",
2673 | " 0.0 | \n",
2674 | " 0.0 | \n",
2675 | " 0.0 | \n",
2676 | "
\n",
2677 | " \n",
2678 | " | Zoya | \n",
2679 | " 0.0 | \n",
2680 | " 0.0 | \n",
2681 | " 0.0 | \n",
2682 | " 0.0 | \n",
2683 | " 0.0 | \n",
2684 | " 0.0 | \n",
2685 | " 0.0 | \n",
2686 | " 0.0 | \n",
2687 | " 0.0 | \n",
2688 | " 0.0 | \n",
2689 | " ... | \n",
2690 | " 0.0 | \n",
2691 | " 0.0 | \n",
2692 | " 0.0 | \n",
2693 | " 0.0 | \n",
2694 | " 0.0 | \n",
2695 | " 0.0 | \n",
2696 | " 0.0 | \n",
2697 | " 0.0 | \n",
2698 | " 0.0 | \n",
2699 | " 0.0 | \n",
2700 | "
\n",
2701 | " \n",
2702 | " | \\O\\\" Is for Outlaw\" | \n",
2703 | " 0.0 | \n",
2704 | " 0.0 | \n",
2705 | " 0.0 | \n",
2706 | " 0.0 | \n",
2707 | " 0.0 | \n",
2708 | " 0.0 | \n",
2709 | " 0.0 | \n",
2710 | " 0.0 | \n",
2711 | " 0.0 | \n",
2712 | " 0.0 | \n",
2713 | " ... | \n",
2714 | " 0.0 | \n",
2715 | " 0.0 | \n",
2716 | " 8.0 | \n",
2717 | " 0.0 | \n",
2718 | " 0.0 | \n",
2719 | " 0.0 | \n",
2720 | " 0.0 | \n",
2721 | " 0.0 | \n",
2722 | " 0.0 | \n",
2723 | " 0.0 | \n",
2724 | "
\n",
2725 | " \n",
2726 | "
\n",
2727 | "
742 rows × 888 columns
\n",
2728 | "
"
2729 | ],
2730 | "text/plain": [
2731 | "user_id 254 2276 2766 \\\n",
2732 | "title \n",
2733 | "1984 9.0 0.0 0.0 \n",
2734 | "1st to Die: A Novel 0.0 0.0 0.0 \n",
2735 | "2nd Chance 0.0 10.0 0.0 \n",
2736 | "4 Blondes 0.0 0.0 0.0 \n",
2737 | "84 Charing Cross Road 0.0 0.0 0.0 \n",
2738 | "... ... ... ... \n",
2739 | "Year of Wonders 0.0 0.0 0.0 \n",
2740 | "You Belong To Me 0.0 0.0 0.0 \n",
2741 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2742 | "Zoya 0.0 0.0 0.0 \n",
2743 | "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n",
2744 | "\n",
2745 | "user_id 2977 3363 3757 \\\n",
2746 | "title \n",
2747 | "1984 0.0 0.0 0.0 \n",
2748 | "1st to Die: A Novel 0.0 0.0 0.0 \n",
2749 | "2nd Chance 0.0 0.0 0.0 \n",
2750 | "4 Blondes 0.0 0.0 0.0 \n",
2751 | "84 Charing Cross Road 0.0 0.0 0.0 \n",
2752 | "... ... ... ... \n",
2753 | "Year of Wonders 7.0 0.0 0.0 \n",
2754 | "You Belong To Me 0.0 0.0 0.0 \n",
2755 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2756 | "Zoya 0.0 0.0 0.0 \n",
2757 | "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n",
2758 | "\n",
2759 | "user_id 4017 4385 6242 \\\n",
2760 | "title \n",
2761 | "1984 0.0 0.0 0.0 \n",
2762 | "1st to Die: A Novel 0.0 0.0 0.0 \n",
2763 | "2nd Chance 0.0 0.0 0.0 \n",
2764 | "4 Blondes 0.0 0.0 0.0 \n",
2765 | "84 Charing Cross Road 0.0 0.0 0.0 \n",
2766 | "... ... ... ... \n",
2767 | "Year of Wonders 0.0 0.0 7.0 \n",
2768 | "You Belong To Me 0.0 0.0 0.0 \n",
2769 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2770 | "Zoya 0.0 0.0 0.0 \n",
2771 | "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n",
2772 | "\n",
2773 | "user_id 6251 ... 274004 \\\n",
2774 | "title ... \n",
2775 | "1984 0.0 ... 0.0 \n",
2776 | "1st to Die: A Novel 0.0 ... 0.0 \n",
2777 | "2nd Chance 0.0 ... 0.0 \n",
2778 | "4 Blondes 0.0 ... 0.0 \n",
2779 | "84 Charing Cross Road 0.0 ... 0.0 \n",
2780 | "... ... ... ... \n",
2781 | "Year of Wonders 0.0 ... 0.0 \n",
2782 | "You Belong To Me 0.0 ... 0.0 \n",
2783 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 ... 0.0 \n",
2784 | "Zoya 0.0 ... 0.0 \n",
2785 | "\\O\\\" Is for Outlaw\" 0.0 ... 0.0 \n",
2786 | "\n",
2787 | "user_id 274061 274301 274308 \\\n",
2788 | "title \n",
2789 | "1984 0.0 0.0 0.0 \n",
2790 | "1st to Die: A Novel 0.0 0.0 0.0 \n",
2791 | "2nd Chance 0.0 0.0 0.0 \n",
2792 | "4 Blondes 0.0 0.0 0.0 \n",
2793 | "84 Charing Cross Road 0.0 0.0 0.0 \n",
2794 | "... ... ... ... \n",
2795 | "Year of Wonders 0.0 0.0 0.0 \n",
2796 | "You Belong To Me 0.0 0.0 0.0 \n",
2797 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2798 | "Zoya 0.0 0.0 0.0 \n",
2799 | "\\O\\\" Is for Outlaw\" 0.0 8.0 0.0 \n",
2800 | "\n",
2801 | "user_id 274808 275970 277427 \\\n",
2802 | "title \n",
2803 | "1984 0.0 0.0 0.0 \n",
2804 | "1st to Die: A Novel 0.0 0.0 0.0 \n",
2805 | "2nd Chance 0.0 0.0 0.0 \n",
2806 | "4 Blondes 0.0 0.0 0.0 \n",
2807 | "84 Charing Cross Road 0.0 10.0 0.0 \n",
2808 | "... ... ... ... \n",
2809 | "Year of Wonders 0.0 0.0 0.0 \n",
2810 | "You Belong To Me 0.0 0.0 0.0 \n",
2811 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2812 | "Zoya 0.0 0.0 0.0 \n",
2813 | "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n",
2814 | "\n",
2815 | "user_id 277478 277639 278418 \n",
2816 | "title \n",
2817 | "1984 0.0 0.0 0.0 \n",
2818 | "1st to Die: A Novel 0.0 0.0 0.0 \n",
2819 | "2nd Chance 0.0 0.0 0.0 \n",
2820 | "4 Blondes 0.0 0.0 0.0 \n",
2821 | "84 Charing Cross Road 0.0 0.0 0.0 \n",
2822 | "... ... ... ... \n",
2823 | "Year of Wonders 0.0 0.0 0.0 \n",
2824 | "You Belong To Me 0.0 0.0 0.0 \n",
2825 | "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n",
2826 | "Zoya 0.0 0.0 0.0 \n",
2827 | "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n",
2828 | "\n",
2829 | "[742 rows x 888 columns]"
2830 | ]
2831 | },
2832 | "execution_count": 52,
2833 | "metadata": {},
2834 | "output_type": "execute_result"
2835 | }
2836 | ],
2837 | "source": [
2838 | "book_pivot"
2839 | ]
2840 | },
2841 | {
2842 | "cell_type": "markdown",
2843 | "metadata": {},
2844 | "source": [
2845 | "# Training Model"
2846 | ]
2847 | },
2848 | {
2849 | "cell_type": "code",
2850 | "execution_count": 53,
2851 | "metadata": {},
2852 | "outputs": [],
2853 | "source": [
2854 | "from scipy.sparse import csr_matrix"
2855 | ]
2856 | },
2857 | {
2858 | "cell_type": "code",
2859 | "execution_count": 54,
2860 | "metadata": {},
2861 | "outputs": [],
2862 | "source": [
2863 | "book_sparse = csr_matrix(book_pivot)"
2864 | ]
2865 | },
2866 | {
2867 | "cell_type": "code",
2868 | "execution_count": 55,
2869 | "metadata": {},
2870 | "outputs": [
2871 | {
2872 | "data": {
2873 | "text/plain": [
2874 | "scipy.sparse.csr.csr_matrix"
2875 | ]
2876 | },
2877 | "execution_count": 55,
2878 | "metadata": {},
2879 | "output_type": "execute_result"
2880 | }
2881 | ],
2882 | "source": [
2883 | "type(book_sparse)"
2884 | ]
2885 | },
2886 | {
2887 | "cell_type": "code",
2888 | "execution_count": 56,
2889 | "metadata": {},
2890 | "outputs": [],
2891 | "source": [
2892 | "# Now import our clustering algoritm which is Nearest Neighbors this is an unsupervised ml algo\n",
2893 | "from sklearn.neighbors import NearestNeighbors\n",
2894 | "model = NearestNeighbors(algorithm= 'brute')"
2895 | ]
2896 | },
2897 | {
2898 | "cell_type": "code",
2899 | "execution_count": 57,
2900 | "metadata": {},
2901 | "outputs": [
2902 | {
2903 | "data": {
2904 | "text/plain": [
2905 | "NearestNeighbors(algorithm='brute')"
2906 | ]
2907 | },
2908 | "execution_count": 57,
2909 | "metadata": {},
2910 | "output_type": "execute_result"
2911 | }
2912 | ],
2913 | "source": [
2914 | "model.fit(book_sparse)"
2915 | ]
2916 | },
2917 | {
2918 | "cell_type": "code",
2919 | "execution_count": 58,
2920 | "metadata": {},
2921 | "outputs": [],
2922 | "source": [
2923 | "distance, suggestion = model.kneighbors(book_pivot.iloc[237,:].values.reshape(1,-1), n_neighbors=6 )"
2924 | ]
2925 | },
2926 | {
2927 | "cell_type": "code",
2928 | "execution_count": 59,
2929 | "metadata": {},
2930 | "outputs": [
2931 | {
2932 | "data": {
2933 | "text/plain": [
2934 | "array([[ 0. , 68.78953409, 69.5413546 , 72.64296249, 76.83098333,\n",
2935 | " 77.28518616]])"
2936 | ]
2937 | },
2938 | "execution_count": 59,
2939 | "metadata": {},
2940 | "output_type": "execute_result"
2941 | }
2942 | ],
2943 | "source": [
2944 | "distance"
2945 | ]
2946 | },
2947 | {
2948 | "cell_type": "code",
2949 | "execution_count": 60,
2950 | "metadata": {},
2951 | "outputs": [
2952 | {
2953 | "data": {
2954 | "text/plain": [
2955 | "array([[237, 240, 238, 241, 184, 536]], dtype=int64)"
2956 | ]
2957 | },
2958 | "execution_count": 60,
2959 | "metadata": {},
2960 | "output_type": "execute_result"
2961 | }
2962 | ],
2963 | "source": [
2964 | "suggestion"
2965 | ]
2966 | },
2967 | {
2968 | "cell_type": "code",
2969 | "execution_count": 61,
2970 | "metadata": {},
2971 | "outputs": [
2972 | {
2973 | "data": {
2974 | "text/plain": [
2975 | "user_id\n",
2976 | "254 9.0\n",
2977 | "2276 0.0\n",
2978 | "2766 0.0\n",
2979 | "2977 0.0\n",
2980 | "3363 0.0\n",
2981 | " ... \n",
2982 | "275970 9.0\n",
2983 | "277427 0.0\n",
2984 | "277478 0.0\n",
2985 | "277639 0.0\n",
2986 | "278418 0.0\n",
2987 | "Name: Harry Potter and the Sorcerer's Stone (Book 1), Length: 888, dtype: float64"
2988 | ]
2989 | },
2990 | "execution_count": 61,
2991 | "metadata": {},
2992 | "output_type": "execute_result"
2993 | }
2994 | ],
2995 | "source": [
2996 | "book_pivot.iloc[241,:]"
2997 | ]
2998 | },
2999 | {
3000 | "cell_type": "code",
3001 | "execution_count": 62,
3002 | "metadata": {},
3003 | "outputs": [
3004 | {
3005 | "name": "stdout",
3006 | "output_type": "stream",
3007 | "text": [
3008 | "Index(['Harry Potter and the Chamber of Secrets (Book 2)',\n",
3009 | " 'Harry Potter and the Prisoner of Azkaban (Book 3)',\n",
3010 | " 'Harry Potter and the Goblet of Fire (Book 4)',\n",
3011 | " 'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive',\n",
3012 | " 'The Cradle Will Fall'],\n",
3013 | " dtype='object', name='title')\n"
3014 | ]
3015 | }
3016 | ],
3017 | "source": [
3018 | "for i in range(len(suggestion)):\n",
3019 | " print(book_pivot.index[suggestion[i]])"
3020 | ]
3021 | },
3022 | {
3023 | "cell_type": "code",
3024 | "execution_count": 63,
3025 | "metadata": {},
3026 | "outputs": [
3027 | {
3028 | "data": {
3029 | "text/plain": [
3030 | "'4 Blondes'"
3031 | ]
3032 | },
3033 | "execution_count": 63,
3034 | "metadata": {},
3035 | "output_type": "execute_result"
3036 | }
3037 | ],
3038 | "source": [
3039 | "book_pivot.index[3]"
3040 | ]
3041 | },
3042 | {
3043 | "cell_type": "code",
3044 | "execution_count": 64,
3045 | "metadata": {},
3046 | "outputs": [],
3047 | "source": [
3048 | "#keeping books name\n",
3049 | "book_names = book_pivot.index"
3050 | ]
3051 | },
3052 | {
3053 | "cell_type": "code",
3054 | "execution_count": 65,
3055 | "metadata": {},
3056 | "outputs": [
3057 | {
3058 | "data": {
3059 | "text/plain": [
3060 | "'2nd Chance'"
3061 | ]
3062 | },
3063 | "execution_count": 65,
3064 | "metadata": {},
3065 | "output_type": "execute_result"
3066 | }
3067 | ],
3068 | "source": [
3069 | "book_names[2]"
3070 | ]
3071 | },
3072 | {
3073 | "cell_type": "code",
3074 | "execution_count": 66,
3075 | "metadata": {},
3076 | "outputs": [
3077 | {
3078 | "data": {
3079 | "text/plain": [
3080 | "3"
3081 | ]
3082 | },
3083 | "execution_count": 66,
3084 | "metadata": {},
3085 | "output_type": "execute_result"
3086 | }
3087 | ],
3088 | "source": [
3089 | "np.where(book_pivot.index == '4 Blondes')[0][0]"
3090 | ]
3091 | },
3092 | {
3093 | "cell_type": "markdown",
3094 | "metadata": {},
3095 | "source": [
3096 | "# find url"
3097 | ]
3098 | },
3099 | {
3100 | "cell_type": "code",
3101 | "execution_count": 67,
3102 | "metadata": {},
3103 | "outputs": [],
3104 | "source": [
3105 | "# final_rating['title'].value_counts()\n",
3106 | "ids = np.where(final_rating['title'] == \"Harry Potter and the Chamber of Secrets (Book 2)\")[0][0]"
3107 | ]
3108 | },
3109 | {
3110 | "cell_type": "code",
3111 | "execution_count": 68,
3112 | "metadata": {},
3113 | "outputs": [
3114 | {
3115 | "data": {
3116 | "text/plain": [
3117 | "'http://images.amazon.com/images/P/0439064872.01.LZZZZZZZ.jpg'"
3118 | ]
3119 | },
3120 | "execution_count": 68,
3121 | "metadata": {},
3122 | "output_type": "execute_result"
3123 | }
3124 | ],
3125 | "source": [
3126 | "final_rating.iloc[ids]['image_url']"
3127 | ]
3128 | },
3129 | {
3130 | "cell_type": "code",
3131 | "execution_count": 69,
3132 | "metadata": {},
3133 | "outputs": [],
3134 | "source": [
3135 | "book_name = []\n",
3136 | "for book_id in suggestion:\n",
3137 | " book_name.append(book_pivot.index[book_id])\n",
3138 | " \n",
3139 | " "
3140 | ]
3141 | },
3142 | {
3143 | "cell_type": "code",
3144 | "execution_count": 70,
3145 | "metadata": {},
3146 | "outputs": [
3147 | {
3148 | "data": {
3149 | "text/plain": [
3150 | "Index(['Harry Potter and the Chamber of Secrets (Book 2)',\n",
3151 | " 'Harry Potter and the Prisoner of Azkaban (Book 3)',\n",
3152 | " 'Harry Potter and the Goblet of Fire (Book 4)',\n",
3153 | " 'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive',\n",
3154 | " 'The Cradle Will Fall'],\n",
3155 | " dtype='object', name='title')"
3156 | ]
3157 | },
3158 | "execution_count": 70,
3159 | "metadata": {},
3160 | "output_type": "execute_result"
3161 | }
3162 | ],
3163 | "source": [
3164 | "book_name[0]"
3165 | ]
3166 | },
3167 | {
3168 | "cell_type": "code",
3169 | "execution_count": 71,
3170 | "metadata": {},
3171 | "outputs": [],
3172 | "source": [
3173 | "ids_index = []\n",
3174 | "for name in book_name[0]: \n",
3175 | " ids = np.where(final_rating['title'] == name)[0][0]\n",
3176 | " ids_index.append(ids)"
3177 | ]
3178 | },
3179 | {
3180 | "cell_type": "code",
3181 | "execution_count": 72,
3182 | "metadata": {},
3183 | "outputs": [
3184 | {
3185 | "name": "stdout",
3186 | "output_type": "stream",
3187 | "text": [
3188 | "http://images.amazon.com/images/P/0439064872.01.LZZZZZZZ.jpg\n",
3189 | "http://images.amazon.com/images/P/0439136369.01.LZZZZZZZ.jpg\n",
3190 | "http://images.amazon.com/images/P/0439139597.01.LZZZZZZZ.jpg\n",
3191 | "http://images.amazon.com/images/P/043936213X.01.LZZZZZZZ.jpg\n",
3192 | "http://images.amazon.com/images/P/0446604232.01.LZZZZZZZ.jpg\n",
3193 | "http://images.amazon.com/images/P/0440115450.01.LZZZZZZZ.jpg\n"
3194 | ]
3195 | }
3196 | ],
3197 | "source": [
3198 | "for idx in ids_index:\n",
3199 | " url = final_rating.iloc[idx]['image_url']\n",
3200 | " print(url)"
3201 | ]
3202 | },
3203 | {
3204 | "cell_type": "code",
3205 | "execution_count": 73,
3206 | "metadata": {},
3207 | "outputs": [],
3208 | "source": [
3209 | "import pickle\n",
3210 | "pickle.dump(model,open('artifacts/model.pkl','wb'))\n",
3211 | "pickle.dump(book_names,open('artifacts/book_names.pkl','wb'))\n",
3212 | "pickle.dump(final_rating,open('artifacts/final_rating.pkl','wb'))\n",
3213 | "pickle.dump(book_pivot,open('artifacts/book_pivot.pkl','wb'))"
3214 | ]
3215 | },
3216 | {
3217 | "cell_type": "markdown",
3218 | "metadata": {},
3219 | "source": [
3220 | "# Testing model"
3221 | ]
3222 | },
3223 | {
3224 | "cell_type": "code",
3225 | "execution_count": 74,
3226 | "metadata": {},
3227 | "outputs": [],
3228 | "source": [
3229 | "def recommend_book(book_name):\n",
3230 | " book_id = np.where(book_pivot.index == book_name)[0][0]\n",
3231 | " distance, suggestion = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6 )\n",
3232 | " \n",
3233 | " for i in range(len(suggestion)):\n",
3234 | " books = book_pivot.index[suggestion[i]]\n",
3235 | " for j in books:\n",
3236 | " if j == book_name:\n",
3237 | " print(f\"You searched '{book_name}'\\n\")\n",
3238 | " print(\"The suggestion books are: \\n\")\n",
3239 | " else:\n",
3240 | " print(j)"
3241 | ]
3242 | },
3243 | {
3244 | "cell_type": "code",
3245 | "execution_count": 75,
3246 | "metadata": {},
3247 | "outputs": [
3248 | {
3249 | "name": "stdout",
3250 | "output_type": "stream",
3251 | "text": [
3252 | "You searched 'Harry Potter and the Chamber of Secrets (Book 2)'\n",
3253 | "\n",
3254 | "The suggestion books are: \n",
3255 | "\n",
3256 | "Harry Potter and the Prisoner of Azkaban (Book 3)\n",
3257 | "Harry Potter and the Goblet of Fire (Book 4)\n",
3258 | "Harry Potter and the Sorcerer's Stone (Book 1)\n",
3259 | "Exclusive\n",
3260 | "The Cradle Will Fall\n"
3261 | ]
3262 | }
3263 | ],
3264 | "source": [
3265 | "book_name = \"Harry Potter and the Chamber of Secrets (Book 2)\"\n",
3266 | "recommend_book(book_name)"
3267 | ]
3268 | },
3269 | {
3270 | "cell_type": "code",
3271 | "execution_count": null,
3272 | "metadata": {},
3273 | "outputs": [],
3274 | "source": []
3275 | }
3276 | ],
3277 | "metadata": {
3278 | "kernelspec": {
3279 | "display_name": "Python 3 (ipykernel)",
3280 | "language": "python",
3281 | "name": "python3"
3282 | },
3283 | "language_info": {
3284 | "codemirror_mode": {
3285 | "name": "ipython",
3286 | "version": 3
3287 | },
3288 | "file_extension": ".py",
3289 | "mimetype": "text/x-python",
3290 | "name": "python",
3291 | "nbconvert_exporter": "python",
3292 | "pygments_lexer": "ipython3",
3293 | "version": "3.7.13"
3294 | }
3295 | },
3296 | "nbformat": 4,
3297 | "nbformat_minor": 4
3298 | }
3299 |
--------------------------------------------------------------------------------