├── requirements.txt
├── README.md
└── embedding.py


/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.20.0
 2 | pandas>=1.3.0
 3 | matplotlib>=3.4.0
 4 | seaborn>=0.11.0
 5 | scikit-learn>=1.0.0
 6 | scipy>=1.7.0
 7 | flask>=2.0.0
 8 | google-generativeai>=0.3.0
 9 | anthropic>=0.8.0
10 | requests>=2.27.0
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Embedding Analysis Tool for SEO
 3 | 
 4 | Read blog post here: https://metehan.ai/blog/embedding-seo-tool-analysis/
 5 | 
 6 | ## What This Tool Does
 7 | 
 8 | This tool provides in-depth analysis of content embeddings to help with SEO optimization. It:
 9 | 
10 | -   Generates 3072-dimension embeddings of your content using Google's Gemini API
11 | 
12 | -   Creates visualizations of the embedding data including:
13 | 
14 | -   Overview of all embedding dimensions
15 | 
16 | -   Top dimensions by magnitude
17 | 
18 | -   Activation distribution histogram
19 | 
20 | -   Dimension clusters heatmap
21 | 
22 | -   PCA visualization of dimension segments
23 | 
24 | -   Calculates key metrics like mean values, standard deviation, significant dimensions, etc.
25 | 
26 | -   Identifies dimension clusters that may represent semantic features
27 | 
28 | -   Uses Claude 3.7 Sonnet to analyze the embedding patterns and provide:
29 | 
30 | -   Content quality assessment
31 | 
32 | -   Identification of semantic structures
33 | 
34 | -   SEO optimization recommendations
35 | 
36 | -   Analysis of topical strengths and weaknesses
37 | 
38 | ## How to Use This Tool
39 | 
40 | ### Setup:
41 | 
42 | -   Install the requirements:
43 |     
44 |     pip install -r requirements.txt
45 |     
46 | 
47 | 2. Replace the API keys in the code:
48 | 
49 | -   GOOGLE_API_KEY  - Your Google API key with access to Gemini models
50 | 
51 | -   ANTHROPIC_API_KEY - Your Anthropic API key with access to Claude 3.7 Sonnet
52 | 
53 | -   Run the application:
54 |     
55 |     python embedding.py
56 |     
57 | 
58 | -   Open your browser and navigate to:
59 |     
60 |     http://127.0.0.1:5000
61 |     
62 | 
63 | ### Using the Tool:
64 | 
65 | -   Paste your content (article, blog post, web page) into the text area
66 | 
67 | -   Click "Analyze Content"
68 | 
69 | -   Wait for the analysis to complete (this can take 30-60 seconds)
70 | 
71 | -   Review the visualizations and analysis:
72 | 
73 | -   The embedding overview shows activation patterns across all dimensions
74 | 
75 | -   The top dimensions chart shows which dimensions are most important
76 | 
77 | -   The dimension clusters visualization helps identify related features
78 | 
79 | -   The metrics section shows key statistical indicators
80 | 
81 | -   The Claude analysis provides actionable SEO recommendations
82 | 
83 | This tool helps you understand how AI "sees" your content, which semantic features are prominent, and how to optimize for better search engine performance based on embedding patterns.
84 | 


--------------------------------------------------------------------------------
/embedding.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | import pandas as pd
  5 | import matplotlib
  6 | matplotlib.use('Agg')  # Use non-interactive backend
  7 | import matplotlib.pyplot as plt
  8 | from matplotlib.colors import LinearSegmentedColormap
  9 | import seaborn as sns
 10 | from sklearn.decomposition import PCA
 11 | from sklearn.manifold import TSNE
 12 | from sklearn.cluster import KMeans
 13 | import scipy.stats as stats
 14 | import base64
 15 | from io import BytesIO
 16 | import requests
 17 | from flask import Flask, request, render_template_string, jsonify
 18 | import google.generativeai as genai
 19 | from anthropic import Anthropic
 20 | 
 21 | # Configure APIs
 22 | GOOGLE_API_KEY = "xxx"  # Replace with your actual API key
 23 | ANTHROPIC_API_KEY = "xxx"  # Replace with your actual API key
 24 | 
 25 | # Initialize Gemini client
 26 | genai.configure(api_key=GOOGLE_API_KEY)
 27 | anthropic_client = Anthropic(api_key=ANTHROPIC_API_KEY)
 28 | 
 29 | app = Flask(__name__)
 30 | 
 31 | # Custom JSON encoder to handle NumPy types
 32 | class NumpyEncoder(json.JSONEncoder):
 33 |     def default(self, obj):
 34 |         if isinstance(obj, np.integer):
 35 |             return int(obj)
 36 |         if isinstance(obj, np.floating):
 37 |             return float(obj)
 38 |         if isinstance(obj, np.ndarray):
 39 |             return obj.tolist()
 40 |         return super(NumpyEncoder, self).default(obj)
 41 | 
 42 | # Configure Flask to use the custom encoder
 43 | app.json_encoder = NumpyEncoder
 44 | 
 45 | def get_embedding(text):
 46 |     """Get embedding from Google Gemini API"""
 47 |     try:
 48 |         # FIXED: Use the correct method to get embeddings from Gemini
 49 |         # Use gemini-embedding-exp-03-07 model as specified
 50 |         response = genai.embed_content(
 51 |             model="models/gemini-embedding-exp-03-07",
 52 |             content=text,
 53 |         )
 54 |         embedding = response["embedding"]
 55 |         return embedding
 56 |     except Exception as e:
 57 |         print(f"Error getting embedding: {e}")
 58 |         # Return a random embedding for testing if API fails
 59 |         print("Using random embedding instead")
 60 |         return np.random.normal(0, 0.1, 3072).tolist()
 61 | 
 62 | def analyze_with_claude(embedding_data, content_snippet):
 63 |     """Get analysis from Claude 3.7 Sonnet"""
 64 |     try:
 65 |         message = anthropic_client.messages.create(
 66 |             model="claude-3-7-sonnet-latest",
 67 |             max_tokens=8000,
 68 |             temperature=1,
 69 |             thinking={
 70 |                 "type": "enabled",
 71 |                 "budget_tokens": 4000
 72 |             },
 73 |             system="You are an expert in SEO and NLP embedding analysis. Analyze the provided embedding data to extract insights about content quality, semantic structure, and SEO optimization opportunities. Focus on activation patterns, dimension clusters, and quality indicators. Provide actionable recommendations.",
 74 |             messages=[
 75 |                 {
 76 |                     "role": "user", 
 77 |                     "content": f"""Analyze this 3k-dimension embedding data from a content piece. Focus on quality indicators, semantic structure, and SEO implications.
 78 | 
 79 | CONTENT SNIPPET (first 4500 chars): 
 80 | {content_snippet[:18500]}...
 81 | 
 82 | EMBEDDING DATA STATISTICS:
 83 | - Dimension count: {len(embedding_data)}
 84 | - Mean value: {np.mean(embedding_data):.6f}
 85 | - Standard deviation: {np.std(embedding_data):.6f}
 86 | - Min value: {np.min(embedding_data):.6f} at dimension {np.argmin(embedding_data)}
 87 | - Max value: {np.max(embedding_data):.6f} at dimension {np.argmax(embedding_data)}
 88 | - Top 5 dimensions by magnitude: {sorted(range(len(embedding_data)), key=lambda i: abs(embedding_data[i]), reverse=True)[:5]}
 89 | 
 90 | Provide a concise analysis focusing on:
 91 | 1. Content quality assessment based on embedding patterns
 92 | 2. Key dimension clusters and their likely semantic functions
 93 | 3. SEO optimization recommendations based on the embedding structure
 94 | 4. Potential topical strengths and weaknesses"""
 95 |                 }
 96 |             ]
 97 |         )
 98 |         
 99 |         # Extract text content from Claude's response
100 |         # The content might be a list of content blocks or a single text block
101 |         if hasattr(message.content, '__iter__') and not isinstance(message.content, str):
102 |             # If content is an iterable (like a list of content blocks)
103 |             extracted_text = ""
104 |             for block in message.content:
105 |                 if hasattr(block, 'text'):
106 |                     extracted_text += block.text
107 |                 elif isinstance(block, str):
108 |                     extracted_text += block
109 |             return extracted_text
110 |         elif hasattr(message.content, 'text'):
111 |             # If content is a single TextBlock object
112 |             return message.content.text
113 |         else:
114 |             # If content is already a string or something else
115 |             return str(message.content)
116 |             
117 |     except Exception as e:
118 |         print(f"Error getting Claude analysis: {e}")
119 |         return "Error getting analysis from Claude. Please check your API key and try again."
120 | 
121 | def plot_embedding_overview(embedding):
122 |     """Create overview plot of embedding values"""
123 |     plt.figure(figsize=(12, 6))
124 |     plt.plot(range(len(embedding)), embedding)
125 |     plt.axhline(y=0, color='r', linestyle='-', alpha=0.3)
126 |     plt.title('Embedding Values Across All 3k Dimensions')
127 |     plt.xlabel('Dimension')
128 |     plt.ylabel('Value')
129 |     plt.grid(True, alpha=0.3)
130 |     
131 |     # Save plot to base64 string
132 |     buf = BytesIO()
133 |     plt.savefig(buf, format='png', dpi=100)
134 |     plt.close()
135 |     buf.seek(0)
136 |     return base64.b64encode(buf.read()).decode('utf-8')
137 | 
138 | def plot_top_dimensions(embedding):
139 |     """Plot top dimensions by magnitude"""
140 |     # Get indices of top 20 dimensions by magnitude
141 |     top_indices = sorted(range(len(embedding)), key=lambda i: abs(embedding[i]), reverse=True)[:20]
142 |     top_values = [embedding[i] for i in top_indices]
143 |     
144 |     plt.figure(figsize=(12, 6))
145 |     colors = ['blue' if v >= 0 else 'red' for v in top_values]
146 |     plt.bar(range(len(top_indices)), top_values, color=colors)
147 |     plt.xticks(range(len(top_indices)), top_indices, rotation=45)
148 |     plt.title('Top 20 Dimensions by Magnitude')
149 |     plt.xlabel('Dimension Index')
150 |     plt.ylabel('Value')
151 |     plt.grid(True, alpha=0.3)
152 |     
153 |     # Save plot to base64 string
154 |     buf = BytesIO()
155 |     plt.savefig(buf, format='png', dpi=100)
156 |     plt.close()
157 |     buf.seek(0)
158 |     return base64.b64encode(buf.read()).decode('utf-8')
159 | 
160 | def plot_dimension_clusters(embedding):
161 |     """Plot dimension clusters heatmap"""
162 |     # Reshape embedding to highlight patterns
163 |     embedding_reshaped = np.array(embedding).reshape(64, 48)
164 |     
165 |     plt.figure(figsize=(12, 8))
166 |     # Create a custom colormap from blue to white to red
167 |     cmap = LinearSegmentedColormap.from_list('BrBG', ['blue', 'white', 'red'], N=256)
168 |     plt.imshow(embedding_reshaped, cmap=cmap, aspect='auto')
169 |     plt.colorbar(label='Activation Value')
170 |     plt.title('Embedding Clusters Heatmap (Reshaped to 64x48)')
171 |     plt.xlabel('Dimension Group')
172 |     plt.ylabel('Dimension Group')
173 |     
174 |     # Save plot to base64 string
175 |     buf = BytesIO()
176 |     plt.savefig(buf, format='png', dpi=100)
177 |     plt.close()
178 |     buf.seek(0)
179 |     return base64.b64encode(buf.read()).decode('utf-8')
180 | 
181 | def plot_pca(embedding):
182 |     """Plot PCA visualization of embedding dimensions"""
183 |     # Create a 2D array where each row is a segment of the original embedding
184 |     segment_size = 256
185 |     num_segments = len(embedding) // segment_size
186 |     data_matrix = np.zeros((num_segments, segment_size))
187 |     
188 |     # Fill the matrix with segments
189 |     for i in range(num_segments):
190 |         start = i * segment_size
191 |         end = start + segment_size
192 |         data_matrix[i] = embedding[start:end]
193 |     
194 |     # Apply PCA
195 |     if num_segments > 1:
196 |         pca = PCA(n_components=2)
197 |         pca_results = pca.fit_transform(data_matrix)
198 |         
199 |         plt.figure(figsize=(10, 8))
200 |         plt.scatter(pca_results[:, 0], pca_results[:, 1])
201 |         
202 |         # Label each point with its segment range
203 |         for i in range(num_segments):
204 |             start = i * segment_size
205 |             end = start + segment_size - 1
206 |             plt.annotate(f"{start}-{end}", 
207 |                          (pca_results[i, 0], pca_results[i, 1]),
208 |                          fontsize=8)
209 |         
210 |         plt.title('PCA of Embedding Segments')
211 |         plt.xlabel('Principal Component 1')
212 |         plt.ylabel('Principal Component 2')
213 |         plt.grid(True, alpha=0.3)
214 |     else:
215 |         # If we don't have enough segments, create a simpler visualization
216 |         plt.figure(figsize=(10, 8))
217 |         plt.text(0.5, 0.5, "Not enough segments for PCA visualization", 
218 |                  ha='center', va='center', fontsize=12)
219 |         plt.axis('off')
220 |     
221 |     # Save plot to base64 string
222 |     buf = BytesIO()
223 |     plt.savefig(buf, format='png', dpi=100)
224 |     plt.close()
225 |     buf.seek(0)
226 |     return base64.b64encode(buf.read()).decode('utf-8')
227 | 
228 | def plot_activation_histogram(embedding):
229 |     """Plot histogram of embedding activation values"""
230 |     plt.figure(figsize=(10, 6))
231 |     plt.hist(embedding, bins=50, alpha=0.7, color='skyblue', edgecolor='black')
232 |     plt.axvline(x=0, color='r', linestyle='--', alpha=0.7)
233 |     plt.title('Distribution of Embedding Values')
234 |     plt.xlabel('Value')
235 |     plt.ylabel('Frequency')
236 |     plt.grid(True, alpha=0.3)
237 |     
238 |     # Save plot to base64 string
239 |     buf = BytesIO()
240 |     plt.savefig(buf, format='png', dpi=100)
241 |     plt.close()
242 |     buf.seek(0)
243 |     return base64.b64encode(buf.read()).decode('utf-8')
244 | 
245 | def analyze_embedding(embedding):
246 |     """Analyze embedding for key metrics"""
247 |     embedding = np.array(embedding)  # Convert to numpy array for easier processing
248 |     abs_embedding = np.abs(embedding)
249 |     
250 |     # Calculate key metrics - CONVERT NUMPY TYPES TO PYTHON NATIVE TYPES
251 |     metrics = {
252 |         "dimension_count": int(len(embedding)),
253 |         "mean_value": float(np.mean(embedding)),
254 |         "std_dev": float(np.std(embedding)),
255 |         "min_value": float(np.min(embedding)),
256 |         "min_dimension": int(np.argmin(embedding)),
257 |         "max_value": float(np.max(embedding)),
258 |         "max_dimension": int(np.argmax(embedding)),
259 |         "median_value": float(np.median(embedding)),
260 |         "positive_count": int(np.sum(embedding > 0)),
261 |         "negative_count": int(np.sum(embedding < 0)),
262 |         "zero_count": int(np.sum(embedding == 0)),
263 |         "abs_mean": float(np.mean(abs_embedding)),
264 |         "significant_dims": int(np.sum(abs_embedding > 0.1))
265 |     }
266 |     
267 |     # Find activation clusters
268 |     significant_threshold = 0.1
269 |     significant_dims = np.where(abs_embedding > significant_threshold)[0]
270 |     
271 |     # Find clusters (dimensions that are close to each other)
272 |     clusters = []
273 |     if len(significant_dims) > 0:
274 |         current_cluster = [int(significant_dims[0])]  # Convert to int
275 |         
276 |         for i in range(1, len(significant_dims)):
277 |             if significant_dims[i] - significant_dims[i-1] <= 5:  # If dimensions are close
278 |                 current_cluster.append(int(significant_dims[i]))  # Convert to int
279 |             else:
280 |                 if len(current_cluster) > 0:
281 |                     clusters.append(current_cluster)
282 |                 current_cluster = [int(significant_dims[i])]  # Convert to int
283 |         
284 |         if len(current_cluster) > 0:
285 |             clusters.append(current_cluster)
286 |     
287 |     # Filter to meaningful clusters (more than 1 dimension)
288 |     clusters = [c for c in clusters if len(c) > 1]
289 |     
290 |     # Format clusters for display
291 |     cluster_info = []
292 |     for i, cluster in enumerate(clusters):
293 |         values = [float(embedding[dim]) for dim in cluster]  # Convert to float
294 |         cluster_info.append({
295 |             "id": i+1,
296 |             "dimensions": [int(d) for d in cluster],  # Convert to int
297 |             "start_dim": int(min(cluster)),
298 |             "end_dim": int(max(cluster)),
299 |             "size": int(len(cluster)),
300 |             "avg_value": float(np.mean(values)),
301 |             "max_value": float(np.max(values)),
302 |             "max_dim": int(cluster[np.argmax(values)])
303 |         })
304 |     
305 |     # Top dimensions by magnitude
306 |     top_indices = sorted(range(len(embedding)), key=lambda i: abs(embedding[i]), reverse=True)[:10]
307 |     top_dimensions = [{"dimension": int(idx), "value": float(embedding[idx])} for idx in top_indices]
308 |     
309 |     return {
310 |         "metrics": metrics,
311 |         "clusters": cluster_info,
312 |         "top_dimensions": top_dimensions
313 |     }
314 | 
315 | # HTML template (single page application)
316 | HTML_TEMPLATE = """<!DOCTYPE html>
317 | <html lang="en">
318 | <head>
319 |     <meta charset="UTF-8">
320 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
321 |     <title>Embedding Analysis Tool for SEO by metehan.ai</title>
322 |     <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
323 |     <script src="https://cdn.tailwindcss.com"></script>
324 |     <style>
325 |         .loading {
326 |             display: inline-block;
327 |             width: 50px;
328 |             height: 50px;
329 |             border: 3px solid rgba(0,0,0,.3);
330 |             border-radius: 50%;
331 |             border-top-color: #3b82f6;
332 |             animation: spin 1s ease-in-out infinite;
333 |         }
334 |         
335 |         @keyframes spin {
336 |             to { transform: rotate(360deg); }
337 |         }
338 |     </style>
339 | </head>
340 | <body class="bg-gray-100 min-h-screen">
341 |     <div class="container mx-auto px-4 py-8">
342 |         <h1 class="text-3xl font-bold text-center mb-8">Embedding Analysis Tool for SEO by metehan.ai</h1>
343 |         
344 |         <div class="bg-white rounded-lg shadow-md p-6 mb-8">
345 |             <h2 class="text-xl font-semibold mb-4">Content Input</h2>
346 |             <form id="content-form" class="space-y-4">
347 |                 <div>
348 |                     <label for="content" class="block text-sm font-medium text-gray-700 mb-1">Paste your content here:</label>
349 |                     <textarea id="content" name="content" rows="8" 
350 |                         class="w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500"
351 |                         placeholder="Enter the content you want to analyze..."></textarea>
352 |                 </div>
353 |                 <div class="flex justify-end">
354 |                     <button type="submit" class="px-4 py-2 bg-blue-600 text-white rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500">
355 |                         Analyze Content
356 |                     </button>
357 |                 </div>
358 |             </form>
359 |         </div>
360 |         
361 |         <div id="loading-container" class="hidden flex flex-col items-center justify-center py-12">
362 |             <div class="loading mb-4"></div>
363 |             <p class="text-gray-600">Analyzing content... This may take a minute.</p>
364 |         </div>
365 |         
366 |         <div id="results-container" class="hidden space-y-8">
367 |             <div class="bg-white rounded-lg shadow-md p-6">
368 |                 <h2 class="text-xl font-semibold mb-4">Embedding Overview</h2>
369 |                 <img id="overview-chart" class="w-full h-auto" />
370 |             </div>
371 |             
372 |             <div class="grid grid-cols-1 md:grid-cols-2 gap-8">
373 |                 <div class="bg-white rounded-lg shadow-md p-6">
374 |                     <h2 class="text-xl font-semibold mb-4">Top Dimensions</h2>
375 |                     <img id="top-dimensions-chart" class="w-full h-auto" />
376 |                 </div>
377 |                 
378 |                 <div class="bg-white rounded-lg shadow-md p-6">
379 |                     <h2 class="text-xl font-semibold mb-4">Activation Distribution</h2>
380 |                     <img id="histogram-chart" class="w-full h-auto" />
381 |                 </div>
382 |             </div>
383 |             
384 |             <div class="grid grid-cols-1 md:grid-cols-2 gap-8">
385 |                 <div class="bg-white rounded-lg shadow-md p-6">
386 |                     <h2 class="text-xl font-semibold mb-4">Dimension Clusters</h2>
387 |                     <img id="clusters-chart" class="w-full h-auto" />
388 |                 </div>
389 |                 
390 |                 <div class="bg-white rounded-lg shadow-md p-6">
391 |                     <h2 class="text-xl font-semibold mb-4">PCA Visualization</h2>
392 |                     <img id="pca-chart" class="w-full h-auto" />
393 |                 </div>
394 |             </div>
395 |             
396 |             <div class="bg-white rounded-lg shadow-md p-6">
397 |                 <h2 class="text-xl font-semibold mb-4">Key Metrics</h2>
398 |                 <div id="metrics-container" class="grid grid-cols-1 md:grid-cols-3 gap-4">
399 |                     <!-- Metrics will be inserted here -->
400 |                 </div>
401 |             </div>
402 |             
403 |             <div class="bg-white rounded-lg shadow-md p-6">
404 |                 <h2 class="text-xl font-semibold mb-4">Dimension Clusters</h2>
405 |                 <div id="clusters-container" class="space-y-4">
406 |                     <!-- Clusters will be inserted here -->
407 |                 </div>
408 |             </div>
409 |             
410 |             <div class="bg-white rounded-lg shadow-md p-6">
411 |                 <h2 class="text-xl font-semibold mb-4">Claude 3.7 Sonnet Analysis</h2>
412 |                 <div id="claude-analysis" class="prose max-w-none">
413 |                     <!-- Claude analysis will be inserted here -->
414 |                 </div>
415 |             </div>
416 |         </div>
417 |     </div>
418 |     
419 |     <script>
420 |         document.getElementById('content-form').addEventListener('submit', async function(e) {
421 |             e.preventDefault();
422 |             
423 |             const content = document.getElementById('content').value.trim();
424 |             if (!content) {
425 |                 alert('Please enter some content to analyze.');
426 |                 return;
427 |             }
428 |             
429 |             // Show loading indicator
430 |             document.getElementById('loading-container').classList.remove('hidden');
431 |             document.getElementById('results-container').classList.add('hidden');
432 |             
433 |             try {
434 |                 const response = await fetch('/analyze', {
435 |                     method: 'POST',
436 |                     headers: {
437 |                         'Content-Type': 'application/json',
438 |                     },
439 |                     body: JSON.stringify({ content }),
440 |                 });
441 |                 
442 |                 if (!response.ok) {
443 |                     throw new Error('Failed to analyze content');
444 |                 }
445 |                 
446 |                 const data = await response.json();
447 |                 
448 |                 // Update charts
449 |                 document.getElementById('overview-chart').src = 'data:image/png;base64,' + data.overview_chart;
450 |                 document.getElementById('top-dimensions-chart').src = 'data:image/png;base64,' + data.top_dimensions_chart;
451 |                 document.getElementById('clusters-chart').src = 'data:image/png;base64,' + data.clusters_chart;
452 |                 document.getElementById('pca-chart').src = 'data:image/png;base64,' + data.pca_chart;
453 |                 document.getElementById('histogram-chart').src = 'data:image/png;base64,' + data.histogram_chart;
454 |                 
455 |                 // Update metrics
456 |                 const metricsContainer = document.getElementById('metrics-container');
457 |                 metricsContainer.innerHTML = '';
458 |                 
459 |                 const metrics = data.analysis.metrics;
460 |                 const metricCards = [
461 |                     { label: 'Dimensions', value: metrics.dimension_count },
462 |                     { label: 'Mean Value', value: metrics.mean_value.toFixed(6) },
463 |                     { label: 'Standard Deviation', value: metrics.std_dev.toFixed(6) },
464 |                     { label: 'Min Value', value: `${metrics.min_value.toFixed(6)} (dim ${metrics.min_dimension})` },
465 |                     { label: 'Max Value', value: `${metrics.max_value.toFixed(6)} (dim ${metrics.max_dimension})` },
466 |                     { label: 'Positive Values', value: `${metrics.positive_count} (${(metrics.positive_count/metrics.dimension_count*100).toFixed(2)}%)` },
467 |                     { label: 'Negative Values', value: `${metrics.negative_count} (${(metrics.negative_count/metrics.dimension_count*100).toFixed(2)}%)` },
468 |                     { label: 'Zero Values', value: metrics.zero_count },
469 |                     { label: 'Significant Dimensions', value: `${metrics.significant_dims} (>${0.1})` }
470 |                 ];
471 |                 
472 |                 metricCards.forEach(metric => {
473 |                     const card = document.createElement('div');
474 |                     card.className = 'bg-gray-50 p-4 rounded border border-gray-200';
475 |                     card.innerHTML = `
476 |                         <h3 class="font-medium text-gray-700">${metric.label}</h3>
477 |                         <p class="text-xl font-semibold mt-1">${metric.value}</p>
478 |                     `;
479 |                     metricsContainer.appendChild(card);
480 |                 });
481 |                 
482 |                 // Update clusters
483 |                 const clustersContainer = document.getElementById('clusters-container');
484 |                 clustersContainer.innerHTML = '';
485 |                 
486 |                 if (data.analysis.clusters.length === 0) {
487 |                     clustersContainer.innerHTML = '<p class="text-gray-500">No significant dimension clusters detected.</p>';
488 |                 } else {
489 |                     data.analysis.clusters.forEach(cluster => {
490 |                         const clusterEl = document.createElement('div');
491 |                         clusterEl.className = 'bg-gray-50 p-4 rounded border border-gray-200';
492 |                         clusterEl.innerHTML = `
493 |                             <h3 class="font-medium text-gray-700">Cluster #${cluster.id}: Dimensions ${cluster.start_dim}-${cluster.end_dim}</h3>
494 |                             <div class="grid grid-cols-1 md:grid-cols-3 gap-2 mt-2">
495 |                                 <div>
496 |                                     <span class="text-gray-600 text-sm">Size:</span>
497 |                                     <span class="font-medium">${cluster.size} dimensions</span>
498 |                                 </div>
499 |                                 <div>
500 |                                     <span class="text-gray-600 text-sm">Avg Value:</span>
501 |                                     <span class="font-medium">${cluster.avg_value.toFixed(6)}</span>
502 |                                 </div>
503 |                                 <div>
504 |                                     <span class="text-gray-600 text-sm">Max Value:</span>
505 |                                     <span class="font-medium">${cluster.max_value.toFixed(6)} (dim ${cluster.max_dim})</span>
506 |                                 </div>
507 |                             </div>
508 |                         `;
509 |                         clustersContainer.appendChild(clusterEl);
510 |                     });
511 |                 }
512 |                 
513 |                 // Update Claude analysis
514 |                 document.getElementById('claude-analysis').innerHTML = data.claude_analysis.replace(/\\n/g, '<br>');
515 |                 
516 |                 // Show results
517 |                 document.getElementById('loading-container').classList.add('hidden');
518 |                 document.getElementById('results-container').classList.remove('hidden');
519 |                 
520 |                 // Scroll to results
521 |                 document.getElementById('results-container').scrollIntoView({ behavior: 'smooth' });
522 |                 
523 |             } catch (error) {
524 |                 console.error('Error:', error);
525 |                 alert('An error occurred during analysis. Please try again.');
526 |                 document.getElementById('loading-container').classList.add('hidden');
527 |             }
528 |         });
529 |     </script>
530 | </body>
531 | </html>
532 | """
533 | 
534 | @app.route('/')
535 | def index():
536 |     return render_template_string(HTML_TEMPLATE)
537 | 
538 | @app.route('/analyze', methods=['POST'])
539 | def analyze():
540 |     data = request.json
541 |     content = data.get('content', '')
542 |     
543 |     # Get embedding from Gemini API
544 |     embedding = get_embedding(content)
545 |     
546 |     # Generate charts
547 |     overview_chart = plot_embedding_overview(embedding)
548 |     top_dimensions_chart = plot_top_dimensions(embedding)
549 |     clusters_chart = plot_dimension_clusters(embedding)
550 |     pca_chart = plot_pca(embedding)
551 |     histogram_chart = plot_activation_histogram(embedding)
552 |     
553 |     # Analyze embedding
554 |     analysis = analyze_embedding(embedding)
555 |     
556 |     # Get Claude analysis
557 |     claude_analysis = analyze_with_claude(embedding, content)
558 |     
559 |     # Return all data
560 |     return jsonify({
561 |         'overview_chart': overview_chart,
562 |         'top_dimensions_chart': top_dimensions_chart,
563 |         'clusters_chart': clusters_chart,
564 |         'pca_chart': pca_chart,
565 |         'histogram_chart': histogram_chart,
566 |         'analysis': analysis,
567 |         'claude_analysis': claude_analysis
568 |     })
569 | 
570 | if __name__ == '__main__':
571 |     print("Starting Embedding Analysis Tool...")
572 |     print("If you didn't, Please replace YOUR_GOOGLE_API_KEY and YOUR_ANTHROPIC_API_KEY with your actual API keys.")
573 |     print("Visit http://127.0.0.1:5000 in your browser to use the tool.")
574 |     app.run(debug=True)
575 | 


--------------------------------------------------------------------------------