├── .gitattributes
├── .DS_Store
├── example.env
├── normalized_data
    ├── .DS_Store
    ├── normalized_cast.csv
    ├── normalized_crew.csv
    ├── normalized_genres.csv
    ├── normalized_keywords.csv
    ├── normalized_links.csv
    ├── normalized_movies.csv
    ├── normalized_links_small.csv
    ├── normalized_ratings_small.csv
    ├── normalized_spoken_languages.csv
    ├── normalized_production_companies.csv
    └── normalized_production_countries.csv
├── .gitignore
├── requirements.txt
├── movie_embeddings.csv
├── Dockerfile
├── .env.yaml
├── LICENSE
├── load_embeddings.py
├── validate_graph.py
├── export_embeddings_to_csv.py
├── prompts.py
├── generate_embeddings_to_csv.py
├── generate_embeddings.py
├── chatbot.py
├── app.py
├── README.md
└── graph_build.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.csv filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/neo4j-vertexai-codelab/main/.DS_Store


--------------------------------------------------------------------------------
/example.env:
--------------------------------------------------------------------------------
1 | NEO4J_URI=
2 | NEO4J_USER=
3 | NEO4J_PASSWORD=
4 | NEO4J_DATABASE=
5 | PROJECT_ID=
6 | LOCATION=


--------------------------------------------------------------------------------
/normalized_data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidagarwal04/neo4j-vertexai-codelab/main/normalized_data/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # .gitignore
2 | .env
3 | sid-sandbox-369614-3251ea890677.json
4 | Neo4j-8ad56b84-Created-2025-02-26.txt
5 | movie-reco/
6 | __pycache__/


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio>=4.0.0
2 | neo4j>=5.0.0
3 | numpy>=1.20.0
4 | python-dotenv>=1.0.0
5 | google-cloud-aiplatform>=1.30.0
6 | vertexai>=0.0.1


--------------------------------------------------------------------------------
/movie_embeddings.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a1fab724e0aed546ceb00c4f511dde0744d214faf77ecb6dd79447e6f5f95ec5
3 | size 208060796
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11
 2 | 
 3 | EXPOSE 8080
 4 | WORKDIR /app
 5 | 
 6 | COPY . ./
 7 | 
 8 | RUN pip install -r requirements.txt
 9 | 
10 | CMD ["python", "app.py"]


--------------------------------------------------------------------------------
/normalized_data/normalized_cast.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:0c6ac027ce5242e280cd9fab4c3a33e193a1307fa42b7c9167801d0cac5892bb
3 | size 23889997
4 | 


--------------------------------------------------------------------------------
/normalized_data/normalized_crew.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:016b006e8e2fd5fc3d9f616b6b5616366ae951a65a4fa27383b6b2ae3392eb67
3 | size 1474462
4 | 


--------------------------------------------------------------------------------
/normalized_data/normalized_genres.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:9112b9cf0a68533eda293782aac0d4ab7f3fe4db736d410be290cc7132846948
3 | size 1590341
4 | 


--------------------------------------------------------------------------------
/normalized_data/normalized_keywords.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f7c465d8621560c56f22f98032652c6812fb4bc36ef0d0c4bae6fb97c9c0fb4a
3 | size 2206600
4 | 


--------------------------------------------------------------------------------
/normalized_data/normalized_links.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:bd8562d4b10d04d080e16caaf8027cd07e47b5da6920128d39e31cd1dd8ffc5e
3 | size 989107
4 | 


--------------------------------------------------------------------------------
/normalized_data/normalized_movies.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d646375273307ddc49a9fb1bcecece013b05f748d934cf91609c066120280eb5
3 | size 19604293
4 | 


--------------------------------------------------------------------------------
/normalized_data/normalized_links_small.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:772cb400f68f03cc61b112a544601d25571b168788dab7dc96f462d93059c49f
3 | size 183372
4 | 


--------------------------------------------------------------------------------
/normalized_data/normalized_ratings_small.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:821795331fd974bbe7a8416f2a7eec2b548a485dedb47a073580b0a9a8a0a8d2
3 | size 2438266
4 | 


--------------------------------------------------------------------------------
/normalized_data/normalized_spoken_languages.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:672c2b546906dd7cf139fbf7e2b4ae534562544b5a316d495922e04e32e09ed2
3 | size 976870
4 | 


--------------------------------------------------------------------------------
/normalized_data/normalized_production_companies.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:2e8c4fc8ab665277ef26fd5ac7a29ed22882d9a1fc797e6b66d30416dc5d5a4c
3 | size 2193165
4 | 


--------------------------------------------------------------------------------
/normalized_data/normalized_production_countries.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eb53a2b6d4d2af54ac8f52277ebc3677f20b134c7a073d26b7f9310b2f226ce1
3 | size 1228890
4 | 


--------------------------------------------------------------------------------
/.env.yaml:
--------------------------------------------------------------------------------
1 | NEO4J_URI: "<your neo4j-instance-url>"
2 | NEO4J_USER: "<your neo4j-instance-username>"
3 | NEO4J_PASSWORD: "<your neo4j-instance-password>"
4 | NEO4J_DATABASE: "neo4j"
5 | GCP_PROJECT: "<your-gcp-project-id>"
6 | GCP_REGION: "<your-gcp-project-region>"


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Sid Agarwal
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/load_embeddings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from neo4j import GraphDatabase
 3 | from dotenv import load_dotenv
 4 | import warnings
 5 | 
 6 | warnings.filterwarnings("ignore")
 7 | load_dotenv()
 8 | 
 9 | class LoadEmbeddings:
10 |     def __init__(self, uri, user, password, database='neo4j'):
11 |         self.driver = GraphDatabase.driver(uri, auth=(user, password), database=database)
12 | 
13 |     def close(self):
14 |         self.driver.close()
15 |     
16 |     def load_embeddings(self, csv_file):
17 |         query = """
18 |         LOAD CSV WITH HEADERS FROM $csvFile AS row
19 |         WITH row
20 |         MATCH (m:Movie {tmdbId: toInteger(row.tmdbId)})
21 |         SET m.embedding = apoc.convert.fromJsonList(row.embedding)
22 |         RETURN count(m) AS count
23 |         """
24 |         with self.driver.session() as session:
25 |             result = session.run(query, csvFile=f'{csv_file}')
26 |             count = result.single()["count"]
27 |             print(f"Embeddings loaded from {csv_file}, total embeddings stored: {count}")
28 | 
29 | def main():
30 |     uri = os.getenv('NEO4J_URI')
31 |     user = os.getenv('NEO4J_USER')
32 |     password = os.getenv('NEO4J_PASSWORD')
33 |     database = os.getenv('NEO4J_DATABASE')
34 | 
35 |     graph = LoadEmbeddings(uri, user, password, database)
36 | 
37 |     # Load embeddings
38 |     graph.load_embeddings('https://storage.googleapis.com/neo4j-vertexai-codelab/movie_embeddings.csv')
39 | 
40 |     graph.close()
41 | 
42 | if __name__ == "__main__":
43 |     main()


--------------------------------------------------------------------------------
/validate_graph.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from neo4j import GraphDatabase
 3 | from dotenv import load_dotenv
 4 | 
 5 | load_dotenv()
 6 | 
 7 | # Neo4j credentials from .env
 8 | NEO4J_URI = os.getenv("NEO4J_URI")
 9 | NEO4J_USER = os.getenv("NEO4J_USER")
10 | NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
11 | 
12 | driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
13 | 
14 | def fetch_node_counts(tx):
15 |     query = """
16 |     CALL db.labels() YIELD label
17 |     CALL {
18 |         WITH label
19 |         RETURN label, count(*) AS count
20 |         CALL {
21 |             WITH label
22 |             RETURN count { MATCH (n:`${label}`) RETURN n } AS count
23 |         }
24 |     }
25 |     RETURN label, count { MATCH (n:`${label}`) RETURN n } AS count
26 |     """
27 |     result = tx.run("CALL db.labels() YIELD label RETURN label")
28 |     labels = [record["label"] for record in result]
29 |     for label in labels:
30 |         count_query = f"MATCH (n:`{label}`) RETURN count(n) AS count"
31 |         count = tx.run(count_query).single()["count"]
32 |         print(f"{label}: {count} nodes")
33 | 
34 | def fetch_relationship_counts(tx):
35 |     result = tx.run("CALL db.relationshipTypes() YIELD relationshipType RETURN relationshipType")
36 |     types = [record["relationshipType"] for record in result]
37 |     for rel_type in types:
38 |         count_query = f"MATCH ()-[:`{rel_type}`]->() RETURN count(*) AS count"
39 |         count = tx.run(count_query).single()["count"]
40 |         print(f"{rel_type}: {count} relationships")
41 | 
42 | with driver.session() as session:
43 |     print("\n📦 Node Counts:")
44 |     session.execute_read(fetch_node_counts)
45 | 
46 |     print("\n🔗 Relationship Counts:")
47 |     session.execute_read(fetch_relationship_counts)
48 | 
49 | driver.close()
50 | 


--------------------------------------------------------------------------------
/export_embeddings_to_csv.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import csv
 3 | import neo4j
 4 | from dotenv import load_dotenv
 5 | 
 6 | # Load environment variables
 7 | load_dotenv()
 8 | 
 9 | # Neo4j connection parameters
10 | NEO4J_URI = os.getenv('NEO4J_URI')
11 | NEO4J_USER = os.getenv('NEO4J_USER')
12 | NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
13 | 
14 | def export_embeddings_to_csv(output_file='movie_embeddings.csv'):
15 |     """
16 |     Export movie embeddings from Neo4j to a CSV file.
17 |     This script is useful for backing up or transferring embeddings.
18 |     """
19 |     # Create a Neo4j driver instance
20 |     driver = neo4j.GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
21 | 
22 |     try:
23 |         with driver.session() as session:
24 |             # Cypher query to retrieve movie embeddings
25 |             query = """
26 |             MATCH (m:Movie)
27 |             WHERE m.embedding IS NOT NULL
28 |             RETURN m.tmdbId AS tmdbId, 
29 |                    m.title AS title, 
30 |                    toString(m.embedding) AS embedding
31 |             """
32 |             
33 |             results = session.run(query)
34 |             
35 |             # Open CSV file for writing
36 |             with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
37 |                 csvwriter = csv.writer(csvfile)
38 |                 # Write headers
39 |                 csvwriter.writerow(['tmdbId', 'title', 'embedding'])
40 |                 
41 |                 # Write data
42 |                 for record in results:
43 |                     csvwriter.writerow([
44 |                         record['tmdbId'], 
45 |                         record['title'], 
46 |                         record['embedding']
47 |                     ])
48 |             
49 |             print(f"Embeddings exported to {output_file}")
50 | 
51 |     finally:
52 |         driver.close()
53 | 
54 | if __name__ == '__main__':
55 |     export_embeddings_to_csv()


--------------------------------------------------------------------------------
/prompts.py:
--------------------------------------------------------------------------------
 1 | """
 2 | LLM prompts module for movie knowledge graph interactions.
 3 | 
 4 | This module contains lambda functions that generate prompts for different
 5 | language model interactions needed throughout the application, including
 6 | fixing Cypher queries, generating Cypher from natural language, and
 7 | summarizing query results.
 8 | """
 9 | 
10 | # Prompt for generating Cypher queries from natural language questions
11 | # Takes a user query, vector search context, and knowledge graph ontology
12 | # Returns a prompt instructing the LLM how to generate an appropriate Cypher query
13 | cypher_generation_prompt = lambda query, context, ontology:f"""
14 | You are an assistant working with a Neo4j movie database.
15 | 
16 | Here is the ontology of the movie knowledge graph:
17 | 
18 | {ontology}
19 | 
20 | QUESTION: {query}
21 | 
22 | Here is some relevant context from a vector search:
23 | {context}
24 | 
25 | Your task is to generate a Cypher query using the ontology and context above.
26 | 
27 | IMPORTANT GUIDELINES FOR CYPHER QUERIES:
28 | 1. Always start with a valid Cypher clause like MATCH, CREATE, MERGE, OPTIONAL, UNWIND, CALL, WITH, RETURN. 
29 | 2. DO NOT try to escape characters or produce special characters like new line, tab, etc. IT WILL result in a syntax error.
30 | 3. Use specific node labels like Movie, Genre, Person, etc., as per the ontology.
31 | 4. Use appropriate relationships between nodes like :ACTED_IN, :DIRECTED, :HAS_GENRE, etc.
32 | 5. Filter based on user intent using WHERE clauses.
33 | 6. Use a RETURN clause to specify what to return like movie title, overview, genre, release date, etc.
34 | 7. Use the provided context to understand entity types and relationships
35 | 8. Do not include triple backticks ``` or ```cypher or any additional text except the generated Cypher statement in your response.
36 | 9. Do not use any properties or relationships not included in the schema.
37 | 
38 | Based on this context and the question, generate an appropriate Cypher query to find the answer.
39 | """
40 | 
41 | # Prompt for summarizing Cypher query results in natural language
42 | # Takes the original user query, Cypher results, result count, and formatted results
43 | # Returns a prompt instructing the LLM to generate a human-readable summary
44 | summarize_results_prompt = lambda query, cypher_results, result_count, formatted_cypher_results: f"""
45 | You are a friendly movie assistant helping users find films that match their preferences.
46 | 
47 | The user asked: "{query}"
48 | 
49 | Here’s the Cypher query that was run on the Neo4j movie knowledge graph:
50 | {cypher_results.get("query", "No query available")}
51 | 
52 | Results found: {result_count}
53 | 
54 | Results:
55 | {formatted_cypher_results[:4000] if result_count > 0 else "No results found."}
56 | 
57 | Your task:
58 | 1. Provide a clear, engaging summary of the movies found — write as if you’re a movie enthusiast recommending films to a friend.
59 | 2. For each movie you mention, you MUST include:
60 |    - The title
61 |    - A brief but complete plot/overview
62 |    - (Optional but helpful: release year, genre, or standout features if available)
63 | 3. Explain why each movie is a good match based on the user's request (themes, keywords, actors, etc.).
64 | 4. Do not list everything — focus on the most relevant results (top 3–5 is fine), but present them narratively with all required info.
65 | 5. If no results were found:
66 |    - Suggest why (e.g., overly broad/specific query, data not available)
67 |    - Offer tips to refine their query for better results next time
68 | 
69 | Keep the tone conversational and informative — like you're having a chat with someone at a movie club.
70 | """


--------------------------------------------------------------------------------
/generate_embeddings_to_csv.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | import json
  4 | import vertexai
  5 | from langchain_google_vertexai import VertexAIEmbeddings
  6 | from neo4j import GraphDatabase
  7 | from dotenv import load_dotenv
  8 | 
  9 | # Load environment variables
 10 | load_dotenv()
 11 | 
 12 | # Neo4j connection details
 13 | NEO4J_URI = os.getenv('NEO4J_URI')
 14 | NEO4J_USER = os.getenv('NEO4J_USER')
 15 | NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
 16 | 
 17 | # Initialize Neo4j driver
 18 | driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
 19 | 
 20 | # Set GOOGLE_APPLICATION_CREDENTIALS
 21 | os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./service-account.json" #update this with actual .json file name linked to the service account
 22 | 
 23 | # Initialize Google Cloud AI Platform
 24 | vertexai.init(project=os.getenv("PROJECT_ID"), location=os.getenv("LOCATION"))
 25 | 
 26 | # Vertex AI Embedding Model Endpoint
 27 | embeddings = VertexAIEmbeddings(model_name="text-embedding-005")
 28 | 
 29 | def retrieve_all_movies():
 30 |     query = """
 31 |     MATCH (m:Movie) 
 32 |     WHERE m.overview IS NOT NULL 
 33 |     AND m.overview <> ''
 34 |     RETURN m.tmdbId AS tmdbId, 
 35 |            m.title AS title, 
 36 |            m.overview AS overview
 37 |     """
 38 |     
 39 |     with driver.session() as session:
 40 |         results = session.run(query)
 41 |         movies = [
 42 |             {
 43 |                 "tmdbId": row["tmdbId"], 
 44 |                 "title": row["title"], 
 45 |                 "overview": row["overview"]
 46 |             } for row in results
 47 |         ]
 48 |     return movies
 49 | 
 50 | def generate_embeddings_to_csv(output_file='movie_embeddings.csv'):
 51 |     # Retrieve all movies
 52 |     movies = retrieve_all_movies()
 53 |     print(f"Total movies to process: {len(movies)}")
 54 |     
 55 |     # Open file in write mode to overwrite any existing file
 56 |     with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
 57 |         csvwriter = csv.writer(csvfile)
 58 |         
 59 |         # Write headers
 60 |         csvwriter.writerow(['tmdbId', 'title', 'overview', 'embedding'])
 61 |         
 62 |         # Tracking variables
 63 |         processed_count = 0
 64 |         failed_count = 0
 65 |         
 66 |         # Process each movie
 67 |         for movie in movies:
 68 |             try:
 69 |                 # Generate embedding
 70 |                 embedding = embeddings.embed_query(movie['overview'])
 71 |                 
 72 |                 if embedding:
 73 |                     # Write to CSV
 74 |                     csvwriter.writerow([
 75 |                         movie['tmdbId'], 
 76 |                         movie['title'], 
 77 |                         movie['overview'], 
 78 |                         json.dumps(embedding)
 79 |                     ])
 80 |                     
 81 |                     processed_count += 1
 82 |                     
 83 |                     # Print progress
 84 |                     if processed_count % 100 == 0:
 85 |                         print(f"Processed {processed_count} movies...")
 86 |                         csvfile.flush()  # Ensure data is written to disk
 87 |                 else:
 88 |                     failed_count += 1
 89 |                     print(f"Failed to generate embedding for: {movie['title']}")
 90 |             
 91 |             except Exception as e:
 92 |                 failed_count += 1
 93 |                 print(f"Error processing {movie['title']}: {e}")
 94 |         
 95 |         # Final summary
 96 |         print("\nProcessing Complete:")
 97 |         print(f"Total movies processed: {processed_count}")
 98 |         print(f"Total movies failed: {failed_count}")
 99 | 
100 | def main():
101 |     generate_embeddings_to_csv()
102 | 
103 | if __name__ == "__main__":
104 |     main()
105 | 


--------------------------------------------------------------------------------
/generate_embeddings.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import json
  4 | import tempfile
  5 | import vertexai
  6 | 
  7 | from langchain_google_vertexai import VertexAI
  8 | from langchain_google_vertexai import VertexAIEmbeddings
  9 | from google.auth import credentials
 10 | from neo4j import GraphDatabase
 11 | from dotenv import load_dotenv
 12 | # from langchain_community.graphs import Neo4jGraph
 13 | 
 14 | # Load environment variables
 15 | load_dotenv()
 16 | 
 17 | # Neo4j connection details
 18 | NEO4J_URI = os.getenv('NEO4J_URI')
 19 | NEO4J_USER = os.getenv('NEO4J_USER')
 20 | NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
 21 | 
 22 | # Initialize Neo4j driver
 23 | driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
 24 | 
 25 | # Set GOOGLE_APPLICATION_CREDENTIALS
 26 | os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./service-account.json" #update this with actual .json file name linked to the service account
 27 | 
 28 | # Initialize Google Cloud AI Platform
 29 | vertexai.init(project=os.getenv("PROJECT_ID"), location=os.getenv("LOCATION"))
 30 | 
 31 | # Vertex AI Embedding Model Endpoint
 32 | embeddings = VertexAIEmbeddings(model_name="text-embedding-005")
 33 | 
 34 | 
 35 | # Retrieve movie plots and titles from Neo4j
 36 | def retrieve_movie_plots():
 37 |     query = "MATCH (m:Movie) WHERE m.embedding IS NULL RETURN m.tmdbId AS tmdbId, m.title AS title, m.overview AS overview"
 38 |     with driver.session() as session:
 39 |         results = session.run(query)
 40 |         movies = [{"tmdbId": row["tmdbId"], "title": row["title"], "overview": row["overview"]} for row in results]
 41 |     return movies
 42 | 
 43 | # Generate embeddings for movie plots using Haystack and store them immediately in Neo4j
 44 | def generate_and_store_embeddings(movies):
 45 |     for movie in movies:
 46 |         title = movie.get("title", "Unknown Title")  # Fetch the movie title
 47 |         overview = str(movie.get("overview", ""))  # Ensure the overview is a string, use empty string as default
 48 |         
 49 |         print(f"Generating embedding for movie: {title}")
 50 |         print(f"Overview for {title} movie: {overview}")
 51 | 
 52 |         # Check if the overview is not empty
 53 |         if overview.strip() == "":
 54 |             print(f"No overview available for movie: {title}. Skipping embedding generation.")
 55 |             continue
 56 |         
 57 |         try:
 58 |             # Generate embedding for the current overview (pass overview as a string to the embedder)
 59 |             embedding_result = embeddings.embed_query(overview)  # Pass overview as a string
 60 |             # print(str(embedding_result)[:100])
 61 |             # embedding = embedding_result.get("embedding", None)  # Safely access the embedding from the result
 62 |             
 63 |             if embedding_result:
 64 |                 # Store the embedding in Neo4j immediately
 65 |                 tmdbId = movie["tmdbId"]
 66 |                 store_embedding_in_neo4j(tmdbId, embedding_result)
 67 |             else:
 68 |                 print(f"Failed to generate embedding for movie: {title}")
 69 |         except Exception as e:
 70 |             print(f"Error generating embedding for movie {title}: {e}")
 71 | 
 72 | 
 73 | # Store the embedding in Neo4j
 74 | def store_embedding_in_neo4j(tmdbId, embedding):
 75 |     query = """
 76 |     MATCH (m:Movie {tmdbId: $tmdbId})
 77 |     SET m.embedding = $embedding
 78 |     """
 79 |     with driver.session() as session:
 80 |         session.run(query, tmdbId=tmdbId, embedding=embedding)
 81 |     print(f"Embedding for movie {tmdbId} successfully stored in Neo4j.")
 82 | 
 83 | 
 84 | # Verify embeddings stored in Neo4j
 85 | def verify_embeddings():
 86 |     query = "MATCH (m:Movie) WHERE m.embedding IS NOT NULL RETURN m.title, m.embedding LIMIT 10"
 87 |     with driver.session() as session:
 88 |         results = session.run(query)
 89 |         for record in results:
 90 |             print(f"Movie: {record['m.title']}, Embedding: {np.array(record['m.embedding'])[:5]}...")  # Print first 5 values
 91 | 
 92 | 
 93 | # Main function to orchestrate the process
 94 | def main():
 95 |     # Step 1: Retrieve movie plots from Neo4j
 96 |     movies = retrieve_movie_plots()
 97 |     if not movies:
 98 |         print("No movies found in the Neo4j database.")
 99 |         return
100 | 
101 |     # Step 2: Generate embeddings for movie plots and store them immediately
102 |     generate_and_store_embeddings(movies)
103 | 
104 |     # Step 3: Verify that embeddings are stored in Neo4j
105 |     verify_embeddings()
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     main()
110 | 


--------------------------------------------------------------------------------
/chatbot.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import vertexai
  3 | import numpy as np
  4 | from neo4j import GraphDatabase
  5 | from dotenv import load_dotenv
  6 | import gradio as gr
  7 | 
  8 | # Load environment variables
  9 | load_dotenv()
 10 | 
 11 | # Neo4j connection details
 12 | NEO4J_URI = os.getenv('NEO4J_URI')
 13 | NEO4J_USER = os.getenv('NEO4J_USER')
 14 | NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
 15 | NEO4J_DATABASE = os.getenv('NEO4J_DATABASE')
 16 | 
 17 | # Google Cloud project ID
 18 | PROJECT_ID = os.getenv('PROJECT_ID')
 19 | LOCATION = os.getenv('LOCATION')
 20 | 
 21 | class Neo4jDatabase:
 22 |     """Class to handle Neo4j database operations."""
 23 |     
 24 |     def __init__(self, uri, username, password, database="neo4j"):
 25 |         """Initialize Neo4j connection."""
 26 |         self.driver = GraphDatabase.driver(uri, auth=(username, password), database=database)
 27 |         
 28 |     def close(self):
 29 |         """Close the driver connection."""
 30 |         self.driver.close()
 31 |     
 32 |     def setup_vector_index(self):
 33 |         """Set up a vector index in Neo4j for the movie embeddings."""
 34 |         with self.driver.session() as session:
 35 |             try:
 36 |                 # Drop the existing vector index if it exists
 37 |                 session.run("DROP INDEX overview_embeddings IF EXISTS")
 38 |                 print("Old index dropped")
 39 |             except Exception as e:
 40 |                 print(f"No index to drop: {e}")
 41 | 
 42 |             # Create a new vector index on the embedding property
 43 |             print("Creating new vector index")
 44 |             query_index = """
 45 |             CREATE VECTOR INDEX overview_embeddings IF NOT EXISTS
 46 |             FOR (m:Movie) ON (m.embedding)
 47 |             OPTIONS {indexConfig: {
 48 |                 `vector.dimensions`: 768,  
 49 |                 `vector.similarity_function`: 'cosine'}}
 50 |             """    
 51 |             session.run(query_index)
 52 |             print("Vector index created successfully")
 53 |     
 54 |     def get_movie_recommendations_by_vector(self, user_embedding, top_k=5):
 55 |         """
 56 |         Get movie recommendations from Neo4j using vector similarity search.
 57 |         
 58 |         Args:
 59 |             user_embedding: Vector representation of user query
 60 |             top_k: Number of recommendations to return
 61 |         """
 62 |         with self.driver.session() as session:
 63 |             # Vector similarity search query using the vector index
 64 |             query = """
 65 |             CALL db.index.vector.queryNodes(
 66 |               'overview_embeddings',
 67 |               $top_k,
 68 |               $embedding
 69 |             ) YIELD node as m, score
 70 |             RETURN m.title AS title, 
 71 |                    m.overview AS plot, 
 72 |                    m.release_date AS released, 
 73 |                    m.tagline AS tagline,
 74 |                    score
 75 |             """
 76 |             
 77 |             result = session.run(
 78 |                 query, 
 79 |                 embedding=user_embedding,
 80 |                 top_k=top_k
 81 |             )
 82 |             
 83 |             recommendations = [
 84 |                 {
 85 |                     "title": record["title"], 
 86 |                     "plot": record["plot"],
 87 |                     "released": record.get("released", "Unknown"),
 88 |                     "tagline": record.get("tagline", ""),
 89 |                     "similarity": record.get("score", 0)
 90 |                 } 
 91 |                 for record in result
 92 |             ]
 93 |             return recommendations
 94 | 
 95 | class VectorService:
 96 |     """Class to handle vector embeddings."""
 97 |     
 98 |     def __init__(self, project_id, location):
 99 |         """Initialize VertexAI."""
100 |         # Initialize Vertex AI
101 |         vertexai.init(project=project_id, location=location)
102 |         
103 |     def generate_embedding(self, text):
104 |         """
105 |         Generate embedding vector for the given text using Vertex AI text-embedding-005.
106 |         """
107 |         from vertexai.language_models import TextEmbeddingModel
108 |         embedding_model = TextEmbeddingModel.from_pretrained("text-embedding-005")
109 |         embeddings = embedding_model.get_embeddings([text])
110 |         return embeddings[0].values
111 | 
112 | class GeminiService:
113 |     """Class to handle Gemini API calls via Vertex AI."""
114 |     
115 |     def __init__(self, project_id, location):
116 |         """Initialize Gemini service with Vertex AI."""
117 |         # Initialize Vertex AI
118 |         vertexai.init(project=project_id, location=location)
119 |         
120 |         # Load the generative model
121 |         from vertexai.generative_models import GenerativeModel
122 |         self.model = GenerativeModel("gemini-2.0-flash-001")
123 |     
124 |     def generate_response(self, prompt):
125 |         """Generate a response using Gemini."""
126 |         response = self.model.generate_content(prompt)
127 |         return response.text
128 | 
129 | class MovieRecommendationApp:
130 |     """Main application class that combines Neo4j and Gemini for movie recommendations."""
131 |     
132 |     def __init__(self, neo4j_uri, neo4j_user, neo4j_password, neo4j_database, project_id, location):
133 |         """Initialize the application with Neo4j and Gemini services."""
134 |         self.neo4j = Neo4jDatabase(neo4j_uri, neo4j_user, neo4j_password, neo4j_database)
135 |         self.gemini = GeminiService(project_id, location)
136 |         self.vector_service = VectorService(project_id, location)
137 |     
138 |     def process_query(self, user_input):
139 |         """Process a user query to get movie recommendations using vector search."""
140 |         try:
141 |             # Step 1: Generate embedding for user query - using the same model that was used for the movies
142 |             query_embedding = self.vector_service.generate_embedding(user_input)
143 |             
144 |             # Step 2: Get recommendations using vector similarity search
145 |             recommendations = self.neo4j.get_movie_recommendations_by_vector(query_embedding)
146 |             
147 |             # Step 3: Use Gemini to craft a personalized response
148 |             if recommendations:
149 |                 movies_context = "\n".join([
150 |                     f"Movie: {rec['title']}\n"
151 |                     f"Plot: {rec['plot']}\n"
152 |                     f"Released: {rec['released']}\n"
153 |                     f"Tagline: {rec['tagline']}\n"
154 |                     f"Similarity Score: {rec['similarity']:.4f}"
155 |                     for rec in recommendations
156 |                 ])
157 |                 
158 |                 explanation_prompt = f"""
159 |                 The user asked: "{user_input}"
160 |                 
161 |                 Based on their query, I found these movies (with semantic similarity scores):
162 |                 {movies_context}
163 |                 
164 |                 Create a friendly and helpful response that:
165 |                 1. Acknowledges their request
166 |                 2. Explains why these recommendations match their request (referring to plot elements, themes, etc.)
167 |                 3. Presents the movies in a clear, readable format with titles, release years, and brief descriptions
168 |                 4. Asks if they'd like more specific recommendations
169 |                 
170 |                 Important note: Don't simply list out all the movies with bullet points or numbers. Format it as a conversational response while still highlighting the key information about each movie.
171 |                 """
172 |                 
173 |                 response = self.gemini.generate_response(explanation_prompt)
174 |             else:
175 |                 response = f"I couldn't find any movies matching '{user_input}'. Our database might not have embeddings for all movies yet. Could you try a different query?"
176 |             
177 |             return response
178 |         
179 |         except Exception as e:
180 |             return f"Sorry, I encountered an error: {str(e)}. Please try again."
181 |     
182 |     def close(self):
183 |         """Close all connections."""
184 |         self.neo4j.close()
185 | 
186 | def handle_user_input(user_input):
187 |     """Gradio interface function to process user input and return recommendations."""
188 |     app = MovieRecommendationApp(
189 |         NEO4J_URI, 
190 |         NEO4J_USER, 
191 |         NEO4J_PASSWORD,
192 |         NEO4J_DATABASE, 
193 |         PROJECT_ID, 
194 |         LOCATION
195 |     )
196 |     
197 |     try:
198 |         response = app.process_query(user_input)
199 |         return response
200 |     finally:
201 |         app.close()
202 | 
203 | # Create Gradio interface
204 | iface = gr.Interface(
205 |     fn=handle_user_input, 
206 |     inputs=gr.Textbox(
207 |         placeholder="What kind of movie would you like to watch?",
208 |         lines=3,
209 |         label="Your movie preference"
210 |     ),
211 |     outputs=gr.Textbox(
212 |         label="Recommendations",
213 |         lines=12
214 |     ),
215 |     title="AI Movie Recommendation System",
216 |     description="Get personalized movie recommendations using semantic search with Neo4j vector search and Google Vertex AI!",
217 |     examples=[
218 |         ["I want to watch a sci-fi movie with time travel"],
219 |         ["Recommend me a romantic comedy with a happy ending"],
220 |         ["I'm in the mood for something with superheroes but not too serious"],
221 |         ["I want a thriller that keeps me on the edge of my seat"],
222 |         ["Show me movies about artificial intelligence taking over the world"]
223 |     ],
224 |     allow_flagging="never"
225 | )
226 | 
227 | # Initialize Neo4j and set up the vector index
228 | neo4j_db = Neo4jDatabase(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD, NEO4J_DATABASE)
229 | neo4j_db.setup_vector_index()
230 | neo4j_db.close()
231 | 
232 | # Launch the Gradio app
233 | if __name__ == "__main__":
234 |     iface.launch()


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | from prompts import (
  2 |     cypher_generation_prompt,
  3 |     summarize_results_prompt
  4 | ) 
  5 | 
  6 | import os
  7 | import vertexai
  8 | import numpy as np
  9 | from neo4j import GraphDatabase
 10 | from dotenv import load_dotenv
 11 | import gradio as gr
 12 | 
 13 | # Load environment variables
 14 | load_dotenv()
 15 | 
 16 | # Neo4j connection details
 17 | NEO4J_URI = os.getenv('NEO4J_URI')
 18 | NEO4J_USER = os.getenv('NEO4J_USER')
 19 | NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
 20 | 
 21 | # Google Cloud project ID
 22 | PROJECT_ID = os.getenv('PROJECT_ID')
 23 | LOCATION = os.getenv('LOCATION')
 24 | 
 25 | class Neo4jDatabase:
 26 |     """Class to handle Neo4j database operations."""
 27 |     
 28 |     def __init__(self, uri, username, password):
 29 |         """Initialize Neo4j connection."""
 30 |         self.driver = GraphDatabase.driver(uri, auth=(username, password))
 31 |         
 32 |     def close(self):
 33 |         """Close the driver connection."""
 34 |         self.driver.close()
 35 |     
 36 |     def setup_vector_index(self):
 37 |         """Set up or load a vector index in Neo4j for the movie embeddings."""
 38 |         with self.driver.session() as session:
 39 |             try:
 40 |                 # Check if the vector index already exists
 41 |                 check_query = """
 42 |                 SHOW VECTOR INDEXES YIELD name
 43 |                 WHERE name = 'overview_embeddings'
 44 |                 RETURN name
 45 |                 """
 46 |                 result = session.run(check_query)
 47 |                 existing_index = result.single()
 48 | 
 49 |                 if existing_index:
 50 |                     print("Vector index 'overview_embeddings' already exists. No need to create a new one.")
 51 |                 else:
 52 |                     # Create a new vector index if it doesn't exist
 53 |                     print("Creating new vector index")
 54 |                     query_index = """
 55 |                     CREATE VECTOR INDEX overview_embeddings
 56 |                     FOR (m:Movie) ON (m.embedding)
 57 |                     OPTIONS {indexConfig: {
 58 |                         `vector.dimensions`: 768,  
 59 |                         `vector.similarity_function`: 'cosine'}}
 60 |                     """
 61 |                     session.run(query_index)
 62 |                     print("Vector index created successfully")
 63 |             except Exception as e:
 64 |                 print(f"Error while setting up vector index: {e}")
 65 |     
 66 |     def get_movie_recommendations_by_vector(self, user_embedding, top_k=5):
 67 |         """
 68 |         Get movie recommendations from Neo4j using vector similarity search.
 69 |         
 70 |         Args:
 71 |             user_embedding: Vector representation of user query
 72 |             top_k: Number of recommendations to return
 73 |         """
 74 |         with self.driver.session() as session:
 75 |             # Vector similarity search query using the vector index
 76 |             query = """
 77 |             CALL db.index.vector.queryNodes(
 78 |               'overview_embeddings',
 79 |               $top_k,
 80 |               $embedding
 81 |             ) YIELD node, score
 82 |             WITH node as m, score
 83 |             RETURN m.title AS title, 
 84 |                    m.overview AS plot, 
 85 |                    m.release_date AS released, 
 86 |                    m.tagline AS tagline,
 87 |                    score
 88 |             ORDER BY score DESC
 89 |             """
 90 |             
 91 |             result = session.run(
 92 |                 query, 
 93 |                 embedding=user_embedding,
 94 |                 top_k=top_k
 95 |             )
 96 |             
 97 |             recommendations = [
 98 |                 {
 99 |                     "title": record["title"], 
100 |                     "plot": record["plot"],
101 |                     "released": record.get("released", "Unknown"),
102 |                     "tagline": record.get("tagline", ""),
103 |                     "similarity": record.get("score", 0)
104 |                 } 
105 |                 for record in result
106 |             ]
107 |             return recommendations
108 | 
109 | class VectorService:
110 |     """Class to handle vector embeddings."""
111 |     
112 |     def __init__(self, project_id, location):
113 |         """Initialize VertexAI."""
114 |         # Initialize Vertex AI
115 |         vertexai.init(project=project_id, location=location)
116 |         
117 |     def generate_embedding(self, text):
118 |         """
119 |         Generate embedding vector for the given text using Vertex AI text-embedding-005.
120 |         """
121 |         from vertexai.language_models import TextEmbeddingModel
122 |         embedding_model = TextEmbeddingModel.from_pretrained("text-embedding-005")
123 |         embeddings = embedding_model.get_embeddings([text])
124 |         return embeddings[0].values
125 | 
126 | class GeminiService:
127 |     """Class to handle Gemini API calls via Vertex AI."""
128 |     
129 |     def __init__(self, project_id, location):
130 |         """Initialize Gemini service with Vertex AI."""
131 |         # Initialize Vertex AI
132 |         vertexai.init(project=project_id, location=location)
133 |         
134 |         # Load the generative model
135 |         from vertexai.generative_models import GenerativeModel
136 |         self.model = GenerativeModel("gemini-2.0-flash-001")
137 |     
138 |     def generate_response(self, prompt):
139 |         """Generate a response using Gemini."""
140 |         response = self.model.generate_content(prompt)
141 |         return response.text
142 | 
143 | def get_ontology_from_neo4j(driver):
144 |     with driver.session() as session:
145 |         result = session.run("CALL db.schema.nodeTypeProperties()")
146 |         
147 |         nodes = {}
148 |         relationships = set()
149 | 
150 |         for record in result:
151 |             node_labels = record["nodeLabels"]
152 |             property_name = record["propertyName"]
153 |             node_type = ":".join(node_labels)
154 |             nodes.setdefault(node_type, set()).add(property_name)
155 |         
156 |         # Fetch relationships separately
157 |         rel_result = session.run("CALL db.relationshipTypes() YIELD relationshipType RETURN relationshipType")
158 |         for record in rel_result:
159 |             relationships.add(record["relationshipType"])
160 | 
161 |         # Construct ontology string
162 |         ontology_str = ""
163 | 
164 |         for node, props in nodes.items():
165 |             prop_list = ", ".join(props)
166 |             ontology_str += f"({node}) has properties: {prop_list}\n"
167 | 
168 |         for rel in relationships:
169 |             ontology_str += f"(:Node)-[:{rel}]->(:Node)\n"
170 | 
171 |         return ontology_str.strip()
172 | 
173 | 
174 | class MovieRecommendationApp:
175 |     """Main application class that combines Neo4j and Gemini for movie recommendations."""
176 |     
177 |     def __init__(self, neo4j_uri, neo4j_user, neo4j_password, project_id, location):
178 |         """Initialize the application with Neo4j and Gemini services."""
179 |         self.neo4j = Neo4jDatabase(neo4j_uri, neo4j_user, neo4j_password)
180 |         self.gemini = GeminiService(project_id, location)
181 |         self.vector_service = VectorService(project_id, location)
182 |     
183 |     def process_query(self, user_input):
184 |         try:
185 |             # Step 1: Vector search
186 |             query_embedding = self.vector_service.generate_embedding(user_input)
187 |             vector_results = self.neo4j.get_movie_recommendations_by_vector(query_embedding, top_k=5)
188 |             
189 |             if not vector_results:
190 |                 return "Sorry, no relevant results found using vector search."
191 | 
192 |             # Step 2: Format the vector search results as context for the LLM
193 |             context = "Information from vector search:\n"
194 |             for i, result in enumerate(vector_results):
195 |                 context += f"[Result {i+1}] Title: {result['title']}\nPlot: {result['plot']}\n\n"
196 | 
197 |             # Step 3: Generate Cypher query using the context and Gemini
198 |             ontology = get_ontology_from_neo4j(self.neo4j.driver)
199 |             cypher_prompt = cypher_generation_prompt(user_input, context, ontology)
200 |             generated_query = self.gemini.generate_response(cypher_prompt).strip()
201 | 
202 | 
203 |             if generated_query.startswith("```"):
204 |                 lines = generated_query.splitlines()
205 |                 # Remove first line (e.g., ```cypher) and last line (```)
206 |                 lines = [line for line in lines if not line.strip().startswith("```")]
207 |                 generated_query = "\n".join(lines).strip()
208 |             
209 |             print("Generated Cypher:\n", generated_query)
210 | 
211 |             # Step 4: Run Cypher query
212 |             with self.neo4j.driver.session() as session:
213 |                 result = session.run(generated_query)
214 |                 records = [record.data() for record in result]
215 |             
216 |             # Step 5: Summarize results
217 |             summary_prompt = summarize_results_prompt(user_input, {"query": generated_query, "results": records}, len(records), str(records))
218 |             summary = self.gemini.generate_response(summary_prompt)
219 | 
220 |             return summary
221 | 
222 |         except Exception as e:
223 |             return f"Error processing query: {str(e)}"
224 |     
225 |     def close(self):
226 |         """Close all connections."""
227 |         self.neo4j.close()
228 | 
229 | def handle_user_input(user_input):
230 |     """Gradio interface function to process user input and return recommendations."""
231 |     app = MovieRecommendationApp(
232 |         NEO4J_URI, 
233 |         NEO4J_USER, 
234 |         NEO4J_PASSWORD, 
235 |         PROJECT_ID, 
236 |         LOCATION
237 |     )
238 |     
239 |     try:
240 |         response = app.process_query(user_input)
241 |         return response
242 |     finally:
243 |         app.close()
244 | 
245 | # Create Gradio interface
246 | iface = gr.Interface(
247 |     fn=handle_user_input, 
248 |     inputs=gr.Textbox(
249 |         placeholder="What kind of movie would you like to watch?",
250 |         lines=3,
251 |         label="Your movie preference"
252 |     ),
253 |     outputs=gr.Textbox(
254 |         label="Recommendations",
255 |         lines=12
256 |     ),
257 |     title="Smart Movie Recommender with GraphRAG",
258 |     description=(
259 |         "Discover movies you’ll love — powered by Neo4j and Vertex AI!\n"
260 |         "This assistant combines semantic search with knowledge graph reasoning — using vector similarity for relevant matches and LLM-generated Cypher queries for deeper insights from movie plots, genres, and relationships."
261 |     ),
262 |     examples=[
263 |         ["Which time travel movies star Bruce Willis?"],
264 |         ["Find romantic comedies directed by female directors."],
265 |         ["Recommend sci-fi movies featuring AI and starring Keanu Reeves."],
266 |         ["Show me thrillers from the 2000s with mind-bending plots."],
267 |         ["List superhero movies where the villain turns good."]
268 |     ],
269 |     flagging_mode="never"
270 | )
271 | 
272 | # Initialize Neo4j and set up the vector index
273 | neo4j_db = Neo4jDatabase(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
274 | neo4j_db.setup_vector_index()
275 | neo4j_db.close()
276 | 
277 | # Launch the Gradio interface
278 | iface.launch(server_name="0.0.0.0", server_port=8080)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Neo4j + Vertex AI Codelab
  3 | 
  4 | A movie recommendation application that combines Neo4j's graph database capabilities with Google Cloud's Vertex AI to deliver intelligent, natural language-based movie recommendations. The system performs semantic vector search using Vertex AI embeddings, then leverages large language models to generate and execute Cypher queries on the Neo4j knowledge graph, enabling multi-hop reasoning and contextual recommendations powered by the GraphRAG pattern.
  5 | 
  6 | ## 📝 Blog Post
  7 | 
  8 | Check out the detailed explanation of this project in the blog post: [Building an Intelligent Movie Search with Neo4j and Vertex AI](https://sidagarwal04.medium.com/building-an-intelligent-movie-search-with-neo4j-and-vertex-ai-a38c75f79cf7)
  9 | 
 10 | ## 🚀 Overview
 11 | This project demonstrates how to build an GenAI-powered movie recommendation engine using the GraphRAG pattern by integrating:
 12 | 
 13 | - **Neo4j**:  A graph database for storing movie data, knowledge graph relationships, and vector embeddings
 14 | - **Google Vertex AI**: For generating semantic embeddings (text-embedding-005) and leveraging Gemini for natural language understanding and Cypher generation
 15 | - **Gradio**: To build an intuitive web interface for interactive recommendations
 16 | 
 17 | The system performs semantic search using vector embeddings to retrieve relevant movie context, then dynamically generates Cypher queries using Gemini based on this context and the Neo4j knowledge graph schema. These Cypher queries are executed to fetch precise results, which are then summarized conversationally by Gemini — enabling a powerful, explainable, and context-aware movie recommendation experience.
 18 | 
 19 | ## 🎬 [Live Demo](https://movies-reco-258362460261.us-central1.run.app/)
 20 | 
 21 | ## 🧩 How It Works
 22 | 
 23 | 1. **Data Ingestion**: Movie metadata (titles, plots, genres, actors, etc.) is loaded into a Neo4j graph database and modeled using nodes and relationships.
 24 | 2. **Vector Embeddings**: Vertex AI's `text-embedding-005` model is used to generate semantic embeddings for movie descriptions, which are stored in Neo4j with a vector index.
 25 | 3. **Vector Search**: When a user enters a query, the system computes its embedding and performs a vector similarity search in Neo4j to retrieve semantically relevant movies.
 26 | 4. **Cypher Query Generation**: Using the vector search results and the graph schema (ontology), Gemini generates a Cypher query tailored to the user's intent.
 27 | 5. **Graph Reasoning**: The generated Cypher query is executed on the Neo4j knowledge graph to perform multi-hop reasoning and extract deeper insights or related entities.
 28 | 6. **Natural Language Summary**: Gemini then summarizes the Cypher query results in a human-friendly, conversational format.
 29 | 
 30 | ## 🗂️ Repository Structure
 31 | 
 32 | - `example.env`: Template for required environment variables
 33 | - `.env.yaml` – Cloud Run deployment environment configuration
 34 | - `normalized_data/`: Contains normalized movie dataset
 35 | - `graph_build.py`: Loads movies, genres, actors, and relationships into Neo4j
 36 | - `generate_embeddings.py`: Generates semantic vector embeddings using Vertex AI
 37 | - `generate_embeddings_to_csv.py` / `export_embeddings_to_csv.py` – Scripts for exporting or generating embeddings into CSV
 38 | - `load_embeddings.py` – Loads generated embeddings into Neo4j with vector index
 39 | - `movie_embeddings.csv` – Precomputed embeddings file (used for faster load/testing)
 40 | - `prompts.py` – Prompt templates for Gemini (Cypher generation, summarization, query repair)
 41 | - `app.py`: Main application that powers the Gradio UI and implements the GraphRAG pipeline (vector search + LLM-based Cypher execution)
 42 | - `Dockerfile` – Used to containerize and deploy the application (e.g., to Cloud Run)
 43 | - `requirements.txt` – Python dependencies
 44 | 
 45 | ## ⚙️ Setup and Installation
 46 | 
 47 | ### Prerequisites
 48 | 
 49 | - Python 3.7+
 50 | - Neo4j database (can be self-hosted or [Aura DB](https://console.neo4j.io/)(**recommended**))
 51 | - Google Cloud account with Vertex AI API enabled
 52 | - Service account with appropriate permissions for Vertex AI
 53 | 
 54 | ### Environment Configuration
 55 | 💡 Tip: Run these steps in [Google Cloud Shell](https://shell.cloud.google.com) for a pre-authenticated environment with gcloud, Vertex AI SDK, and permissions already set up — no need to manually manage service account keys.
 56 | 
 57 | 1. Clone this repository
 58 |    ```bash
 59 |    git clone https://github.com/your-username/neo4j-vertexai-codelab.git
 60 |    cd neo4j-vertexai-codelab
 61 |    ```
 62 | 2. Copy `example.env` to `.env` and fill in your configuration:
 63 |    ```bash
 64 |    NEO4J_URI=your-neo4j-connection-string
 65 |    NEO4J_USER=your-neo4j-username
 66 |    NEO4J_PASSWORD=your-neo4j-password
 67 |    PROJECT_ID=your-gcp-project-id
 68 |    LOCATION=your-gcp-location
 69 |    ```
 70 | 3. (Optional) Create a service account in Google Cloud and download the JSON key file
 71 |     - Ensure it has access to Vertex AI and Cloud Storage.
 72 |     - Grant roles: Vertex AI User, Storage Object Viewer, etc.
 73 | 4. (Optional) Place the service account key JSON file in the project directory (referenced in `generate_embeddings.py`)
 74 |     - Set the path using:
 75 |       ```bash
 76 |       export GOOGLE_APPLICATION_CREDENTIALS="path/to/your-key.json"
 77 |       ```
 78 | 
 79 | ### Installation
 80 | 
 81 | ```bash
 82 | # Create and activate a virtual environment (recommended)
 83 | python -m venv venv
 84 | source venv/bin/activate  # On Windows: venv\Scripts\activate
 85 | 
 86 | # Install dependencies
 87 | pip install -r requirements.txt
 88 | ```
 89 | 
 90 | ## 🏃‍♀️ Running the Application
 91 | 
 92 | ### 1. Build the Graph Database
 93 | 
 94 | First, load movie data into Neo4j:
 95 | 
 96 | ```bash
 97 | python graph_build.py
 98 | ```
 99 | 
100 | ### 2. Generate Embeddings
101 | 
102 | Generate vector embeddings for movie descriptions:
103 | 
104 | ```bash
105 | python generate_embeddings.py
106 | ```
107 | 
108 | **Embedding CSV Utilities**:
109 | - `generate_embeddings_to_csv.py`: A one-time script used to generate `movie_embeddings.csv`, which contains pre-computed vector embeddings for movies.
110 | - `export_embeddings_to_csv.py`: A utility script to export existing embeddings from Neo4j to a CSV file.
111 | 
112 | **Loading Embeddings Directly from CSV**:
113 | 
114 | If you want to skip generating embeddings and load precomputed embeddings (in a CSV file) directly into Neo4j, you have two options:
115 | 
116 | **a. Running Cypher Directly in Neo4j Aura Console**
117 | 
118 | You can load the CSV file directly into Neo4j using the following Cypher query:
119 | 
120 | ```cypher
121 | LOAD CSV WITH HEADERS FROM 'https://storage.googleapis.com/neo4j-vertexai-codelab/movie_embeddings.csv' AS row
122 | WITH row
123 | MATCH (m:Movie {tmdbId: toInteger(row.tmdbId)})
124 | SET m.embedding = apoc.convert.fromJsonList(row.embedding)
125 | ```
126 | 
127 | **b. Using the Python Script**
128 | 
129 | Alternatively, you can run the load_embeddings.py script, which automates this process via the Neo4j Python driver.
130 | ```bash
131 | python load_embeddings.py
132 | ```
133 | 
134 | ### 3. Start the Recommender Chatbot
135 | 
136 | Launch the Gradio web interface:
137 | 
138 | ```bash
139 | python app.py
140 | ```
141 | 
142 | The application will be available at `http://0.0.0.0:8080` by default.
143 | 
144 | ## 🚀 Deploying to Cloud Run
145 | Before deploying to Cloud Run, ensure your `requirements.txt` file includes all necessary dependencies for Neo4j and Vertex AI integration. Additionally, you need a `Dockerfile` to containerize your application for deployment.
146 | 
147 | Both requirements.txt and Dockerfile are present in this repository:
148 | - `requirements.txt`: Lists all the Python dependencies required to run the application.
149 | - `Dockerfile`: Defines the container environment, including the base image, required packages, and how the application is executed.
150 | 
151 | If you want to deploy this application to Google Cloud Run for production use, follow these steps:
152 | 
153 | ### 1. Set up Environment Variables
154 | 
155 | ```bash
156 | # Set your Google Cloud project ID
157 | export GCP_PROJECT='your-project-id'  # Change this as per your GCP Project ID
158 | 
159 | # Set your preferred region
160 | export GCP_REGION='us-central1' # Change this as per your GCP region
161 | ```
162 | 
163 | ### 2. Create the Repository and Build the Container Image
164 | 
165 | ```bash
166 | # Set the Artifact Registry repository name
167 | export AR_REPO='movies-reco'  # Change this if needed
168 | 
169 | # Set your service name
170 | export SERVICE_NAME='movies-reco'  # Change if needed
171 | 
172 | # Create the Artifact Registry repository
173 | gcloud artifacts repositories create "$AR_REPO" \
174 |   --location="$GCP_REGION" \
175 |   --repository-format=Docker
176 | 
177 | # Configure Docker to use Google Cloud's Artifact Registry
178 | gcloud auth configure-docker "$GCP_REGION-docker.pkg.dev"
179 | 
180 | # Build and submit the container image
181 | gcloud builds submit \
182 |   --tag "$GCP_REGION-docker.pkg.dev/$GCP_PROJECT/$AR_REPO/$SERVICE_NAME"
183 | ```
184 | 
185 | ### 3. Deploy to Cloud Run
186 | Before deployment, ensure your requirements.txt file is properly configured with all necessary dependencies for your Neo4j and VertexAI integration.
187 | 
188 | #### Setting Environment Variables from `.env.yaml` File
189 | Before deploying the application to Cloud Run, configure `.env.yaml` for Cloud Run Deployment in your project root with the following structure:
190 | 
191 | ```bash
192 | NEO4J_URI: "bolt+s://<your-neo4j-uri>"
193 | NEO4J_USER: "neo4j"
194 | NEO4J_PASSWORD: "<your-neo4j-password>"
195 | PROJECT_ID: "<your-gcp-project-id>"
196 | LOCATION: "<your-gcp-region>"
197 | ```
198 | ✅ This YAML file is used during Cloud Run deployment to inject environment variables into your container runtime. Once set, you can proceed with the gcloud run deploy command.
199 | 
200 | The following command deploys your application to Cloud Run using environment variables defined in `.env.yaml`. It ensures proper formatting, removes any commented lines automatically handled by gcloud, and sets up the containerized app with public access:
201 | ```bash
202 | gcloud run deploy "$SERVICE_NAME" \
203 |   --port=8080 \
204 |   --image="$GCP_REGION-docker.pkg.dev/$GCP_PROJECT/$AR_REPO/$SERVICE_NAME" \
205 |   --allow-unauthenticated \
206 |   --region=$GCP_REGION \
207 |   --platform=managed \
208 |   --project=$GCP_PROJECT \
209 |   --env-vars-file=.env.yaml
210 | ```
211 | 
212 | After deployment, your application will be accessible at a URL like:
213 | `https://movies-reco-[unique-id].us-central1.run.app/`
214 | 
215 | Note: 
216 | - Your `requirements.txt` should list all Python dependencies. 
217 | - Make sure your application's `Dockerfile` is set up properly to run in a containerized environment. The `Dockerfile` should include a `pip install -r requirements.txt` command to ensure all dependencies are installed during the container build process.
218 | - You'll need to include your service account credentials (unless running from Google Cloud Shell directly) and environment variables in the container.
219 | 
220 | ## 🧪 Example Queries
221 | 
222 | - "Which time travel movies star Bruce Willis?"
223 | - "Show me thrillers from the 2000s with mind-bending plots."
224 | - "I'm in the mood for something with superheroes but not too serious"
225 | - "I want a thriller that keeps me on the edge of my seat"
226 | - "Show me movies about artificial intelligence taking over the world"
227 | 
228 | ## 📚 Learning Resources
229 | 
230 | - [Neo4j Vector Search Documentation](https://neo4j.com/docs/cypher-manual/current/indexes-for-vector-search/)
231 | - [Vertex AI Embeddings](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings)
232 | - [Gemini API](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini)
233 | - [Gradio Documentation](https://gradio.app/docs/)
234 | - [Cloud Run Documentation](https://cloud.google.com/run/docs)
235 | 
236 | ## 🤝 Contributing
237 | 
238 | Contributions are welcome! Please feel free to submit a Pull Request.
239 | 
240 | ## 📄 License
241 | 
242 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
243 | 


--------------------------------------------------------------------------------
/graph_build.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from neo4j import GraphDatabase
  3 | from dotenv import load_dotenv
  4 | import warnings
  5 | 
  6 | warnings.filterwarnings("ignore")
  7 | load_dotenv()
  8 | 
  9 | class CreateGraph:
 10 | 
 11 |     def __init__(self, uri, user, password, database='neo4j'):
 12 |         self.driver = GraphDatabase.driver(uri, auth=(user, password), database=database)
 13 | 
 14 |     def close(self):
 15 |         self.driver.close()
 16 | 
 17 |     def db_cleanup(self):
 18 |         print("Doing Database Cleanup.")
 19 |         query = """
 20 |         MATCH (n) DETACH DELETE (n)
 21 |         """
 22 |         with self.driver.session() as session:
 23 |             session.run(query)
 24 |             print("Database Cleanup Done. Using blank database.")
 25 | 
 26 |     def create_constraints_indexes(self):
 27 |         queries = [
 28 |             "CREATE CONSTRAINT unique_tmdb_id IF NOT EXISTS FOR (m:Movie) REQUIRE m.tmdbId IS UNIQUE;",
 29 |             "CREATE CONSTRAINT unique_movie_id IF NOT EXISTS FOR (m:Movie) REQUIRE m.movieId IS UNIQUE;",
 30 |             "CREATE CONSTRAINT unique_prod_id IF NOT EXISTS FOR (p:ProductionCompany) REQUIRE p.company_id IS UNIQUE;",
 31 |             "CREATE CONSTRAINT unique_genre_id IF NOT EXISTS FOR (g:Genre) REQUIRE g.genre_id IS UNIQUE;",
 32 |             "CREATE CONSTRAINT unique_lang_id IF NOT EXISTS FOR (l:SpokenLanguage) REQUIRE l.language_code IS UNIQUE;",
 33 |             "CREATE CONSTRAINT unique_country_id IF NOT EXISTS FOR (c:Country) REQUIRE c.country_code IS UNIQUE;",
 34 |             "CREATE INDEX actor_id IF NOT EXISTS FOR (p:Person) ON (p.actor_id);",
 35 |             "CREATE INDEX crew_id IF NOT EXISTS FOR (p:Person) ON (p.crew_id);",
 36 |             "CREATE INDEX movieId IF NOT EXISTS FOR (m:Movie) ON (m.movieId);",
 37 |             "CREATE INDEX user_id IF NOT EXISTS FOR (p:Person) ON (p.user_id);"
 38 |         ]
 39 |         with self.driver.session() as session:
 40 |             for query in queries:
 41 |                 session.run(query)
 42 |             print("Constraints and Indexes created successfully.")
 43 | 
 44 | 
 45 |     def load_movies(self, csv_file, limit):
 46 |         query = f"""
 47 |         LOAD CSV WITH HEADERS FROM $csvFile AS row
 48 |         WITH row, toInteger(row.tmdbId) AS tmdbId
 49 |         WHERE tmdbId IS NOT NULL
 50 |         WITH row, tmdbId
 51 |         LIMIT {limit}
 52 |         MERGE (m:Movie {{tmdbId: tmdbId}})
 53 |         ON CREATE SET m.title = coalesce(row.title, "None"),
 54 |                       m.original_title = coalesce(row.original_title, "None"),
 55 |                       m.adult = CASE 
 56 |                                     WHEN toInteger(row.adult) = 1 THEN 'Yes' 
 57 |                                     ELSE 'No' 
 58 |                                 END,
 59 |                       m.budget = toInteger(coalesce(row.budget, 0)),
 60 |                       m.original_language = coalesce(row.original_language, "None"),
 61 |                       m.revenue = toInteger(coalesce(row.revenue, 0)),
 62 |                       m.tagline = coalesce(row.tagline, "None"),
 63 |                       m.overview = coalesce(row.overview, "None"),
 64 |                       m.release_date = coalesce(row.release_date, "None"),
 65 |                       m.runtime = toFloat(coalesce(row.runtime, 0)),
 66 |                       m.belongs_to_collection = coalesce(row.belongs_to_collection, "None");
 67 |         """
 68 |         with self.driver.session() as session:
 69 |             session.run(query, csvFile=f'{csv_file}')
 70 |             print(f"Movies loaded from {csv_file} (limited to {limit} entries)")
 71 | 
 72 |     def load_genres(self, csv_file):
 73 |         query = """
 74 |         LOAD CSV WITH HEADERS FROM $csvFile AS row
 75 |         MATCH (m:Movie {tmdbId: toInteger(row.tmdbId)})  // Check if the movie exists
 76 |         WITH m, row
 77 |         MERGE (g:Genre {genre_id: toInteger(row.genre_id)})
 78 |         ON CREATE SET g.genre_name = row.genre_name
 79 |         MERGE (m)-[:HAS_GENRE]->(g);
 80 |         """
 81 |         with self.driver.session() as session:
 82 |             session.run(query, csvFile=f'{csv_file}')
 83 |             print(f"Genres and relationships to movies loaded from {csv_file}")
 84 | 
 85 |     def load_production_companies(self, csv_file):
 86 |         query = """
 87 |         LOAD CSV WITH HEADERS FROM $csvFile AS row
 88 |         MATCH (m:Movie {tmdbId: toInteger(row.tmdbId)})  // Check if the movie exists
 89 |         WITH m, row
 90 |         MERGE (pc:ProductionCompany {company_id: toInteger(row.company_id)})
 91 |         ON CREATE SET pc.company_name = row.company_name
 92 |         MERGE (m)-[:PRODUCED_BY]->(pc);
 93 |         """
 94 |         with self.driver.session() as session:
 95 |             session.run(query, csvFile=f'{csv_file}')
 96 |             print(f"Production companies and relationships to movies loaded from {csv_file}")
 97 | 
 98 |     def load_production_countries(self, csv_file):
 99 |         query = """
100 |         LOAD CSV WITH HEADERS FROM $csvFile AS row
101 |         MATCH (m:Movie {tmdbId: toInteger(row.tmdbId)})  // Check if the movie exists
102 |         WITH m, row
103 |         MERGE (c:Country {country_code: row.country_code})
104 |         ON CREATE SET c.country_name = row.country_name
105 |         MERGE (m)-[:PRODUCED_IN]->(c);
106 |         """
107 |         with self.driver.session() as session:
108 |             session.run(query, csvFile=f'{csv_file}')
109 |             print(f"Production countries and relationships to movies loaded from {csv_file}")
110 | 
111 |     def load_spoken_languages(self, csv_file):
112 |         query = """
113 |         LOAD CSV WITH HEADERS FROM $csvFile AS row
114 |         MATCH (m:Movie {tmdbId: toInteger(row.tmdbId)})  // Check if the movie exists
115 |         WITH m, row
116 |         MERGE (l:SpokenLanguage {language_code: row.language_code})
117 |         ON CREATE SET l.language_name = row.language_name
118 |         MERGE (m)-[:HAS_LANGUAGE]->(l);
119 |         """
120 |         with self.driver.session() as session:
121 |             session.run(query, csvFile=f'{csv_file}')
122 |             print(f"Spoken languages and relationships to movies loaded from {csv_file}")
123 | 
124 |     def load_keywords(self, csv_file):
125 |         query = """
126 |         LOAD CSV WITH HEADERS FROM $csvFile AS row
127 |         MATCH (m:Movie {tmdbId: toInteger(row.tmdbId)})  // Check if the movie exists
128 |         SET m.keywords = row.keywords;
129 |         """
130 |         with self.driver.session() as session:
131 |             session.run(query, csvFile=f'{csv_file}')
132 |             print(f"Keywords loaded from {csv_file}")
133 | 
134 |     def load_person_actors(self, csv_file):
135 |         query1 = """
136 |         LOAD CSV WITH HEADERS FROM $csvFile AS row
137 |         CALL (row){
138 |         MATCH (m:Movie {tmdbId: toInteger(row.tmdbId)})  // Check if the movie exists
139 |         WITH m, row
140 |         MERGE (p:Person {actor_id: toInteger(row.actor_id)})
141 |         ON CREATE SET p.name = row.name, p.role= 'actor'
142 |         MERGE (p)-[a:ACTED_IN]->(m)
143 |         ON CREATE SET a.character = coalesce(row.character, "None"), a.cast_id= toInteger(row.cast_id)
144 |         }IN TRANSACTIONS OF 30000 ROWS;
145 |         """
146 |         with self.driver.session() as session:
147 |             session.run(query1, csvFile=f'{csv_file}')
148 |             print(f"Actors loaded from {csv_file}")
149 |         query2 = """
150 |         MATCH (n:Person) WHERE n.role="actor" SET n:Actor
151 |         """
152 |         with self.driver.session() as session:
153 |             session.run(query2)
154 |             print(f"Actor label created additionally")
155 | 
156 |     def load_person_crew(self, csv_file):
157 |         query1 = """
158 |         LOAD CSV WITH HEADERS FROM $csvFile AS row
159 |         MATCH (m:Movie {tmdbId: toInteger(row.tmdbId)})  // Check if the movie exists
160 |         MERGE (p:Person {crew_id: toInteger(row.crew_id)})
161 |         ON CREATE SET p.name = row.name, p.role = row.job
162 |         WITH p, m, row,
163 |         CASE
164 |         WHEN row.job='Director' THEN "DIRECTED"
165 |         WHEN row.job='Producer' THEN "PRODUCED"
166 |         ELSE "Unknown"
167 |         END AS crew_rel
168 |         CALL apoc.create.relationship(p, crew_rel, {}, m)
169 |         YIELD rel
170 |         RETURN rel;
171 |         """
172 |         with self.driver.session() as session:
173 |             session.run(query1, csvFile=f'{csv_file}')
174 |             print(f"Directors and Producers loaded from {csv_file}")
175 |         query2 = """
176 |         MATCH (n:Person) WHERE n.role="Director" SET n:Director
177 |         """
178 |         with self.driver.session() as session:
179 |             session.run(query2)
180 |             print(f"Director label created additionally")
181 |         query3 = """
182 |         MATCH (n:Person) WHERE n.role="Producer" SET n:Producer
183 |         """
184 |         with self.driver.session() as session:
185 |             session.run(query3)
186 |             print(f"Producer label created additionally")
187 | 
188 | 
189 |     def load_links(self, csv_file):
190 |         query = """
191 |         LOAD CSV WITH HEADERS FROM $csvFile AS row
192 |         MATCH (m:Movie {tmdbId: toInteger(row.tmdbId)})  // Check if the movie exists
193 |         SET m.movieId = toInteger(row.movieId),
194 |             m.imdbId = row.imdbId;
195 |         """
196 |         with self.driver.session() as session:
197 |             session.run(query, csvFile=f'{csv_file}')
198 |             print(f"Links loaded from {csv_file}")
199 | 
200 | 
201 |     def load_ratings(self, csv_file):
202 |         query1 = """
203 |         LOAD CSV WITH HEADERS FROM $csvFile AS row
204 |         CALL (row){
205 |         MATCH (m:Movie {movieId: toInteger(row.movieId)})  // Check if the movie exists
206 |         WITH m, row
207 |         MERGE (p:Person {user_id: toInteger(row.userId)})
208 |         ON CREATE SET p.role= 'user'
209 |         MERGE (p)-[r:RATED]->(m)
210 |         ON CREATE SET r.rating = toFloat(row.rating), r.timestamp = toInteger(row.timestamp)
211 |         }IN TRANSACTIONS OF 50000 ROWS;
212 |         """
213 |         with self.driver.session() as session:
214 |             session.run(query1, csvFile=f'{csv_file}')
215 |             print(f"Ratings loaded from {csv_file}")
216 |         query2 = """
217 |         MATCH (n:Person) WHERE n.role="user" SET n:User
218 |         """
219 |         with self.driver.session() as session:
220 |             session.run(query2)
221 |             print(f"User label created additionally")
222 | 
223 | 
224 | 
225 | def main():
226 |     uri = os.getenv('NEO4J_URI')
227 |     user = os.getenv('NEO4J_USER')
228 |     password = os.getenv('NEO4J_PASSWORD')
229 |     database = os.getenv('NEO4J_DATABASE')
230 | 
231 |     graph = CreateGraph(uri, user, password, database)
232 | 
233 |     graph.db_cleanup()
234 |     graph.create_constraints_indexes()
235 | 
236 |     # Load data from CSV files with a limit on entries for movies
237 |     movie_limit = 12000  # Limit only applied to movies
238 |     graph.load_movies('https://storage.googleapis.com/neo4j-vertexai-codelab/normalized_data/normalized_movies.csv', movie_limit)
239 | 
240 |     # Load related nodes and create relationships conditionally
241 |     graph.load_genres('https://storage.googleapis.com/neo4j-vertexai-codelab/normalized_data/normalized_genres.csv')
242 |     graph.load_production_companies('https://storage.googleapis.com/neo4j-vertexai-codelab/normalized_data/normalized_production_companies.csv')
243 |     graph.load_production_countries('https://storage.googleapis.com/neo4j-vertexai-codelab/normalized_data/normalized_production_countries.csv')
244 |     graph.load_spoken_languages('https://storage.googleapis.com/neo4j-vertexai-codelab/normalized_data/normalized_spoken_languages.csv')
245 |     graph.load_keywords('https://storage.googleapis.com/neo4j-vertexai-codelab/normalized_data/normalized_keywords.csv')
246 |     graph.load_person_actors('https://storage.googleapis.com/neo4j-vertexai-codelab/normalized_data/normalized_cast.csv')
247 |     graph.load_person_crew('https://storage.googleapis.com/neo4j-vertexai-codelab/normalized_data/normalized_crew.csv')
248 |     graph.load_links('https://storage.googleapis.com/neo4j-vertexai-codelab/normalized_data/normalized_links.csv')
249 |     graph.load_ratings('https://storage.googleapis.com/neo4j-vertexai-codelab/normalized_data/normalized_ratings_small.csv')
250 | 
251 | 
252 |     graph.close()
253 | 
254 | if __name__ == "__main__":
255 |     main()
256 | 


--------------------------------------------------------------------------------