├── .env ├── .gitignore ├── README.md ├── app ├── backend │ ├── qa │ │ ├── Dockerfile │ │ ├── __pycache__ │ │ │ ├── embedder.cpython-311.pyc │ │ │ ├── endpoint_request.cpython-311.pyc │ │ │ ├── generate_answer.cpython-311.pyc │ │ │ ├── main.cpython-311.pyc │ │ │ └── utils.cpython-311.pyc │ │ ├── database_manager │ │ │ ├── __init__.py │ │ │ ├── cassandra.py │ │ │ └── redis.py │ │ ├── init.cql │ │ ├── llm_call │ │ │ ├── __init__.py │ │ │ ├── endpoint_request.py │ │ │ └── generate_answer.py │ │ ├── main.py │ │ ├── requirements.txt │ │ └── utils.py │ └── reference_doc │ │ ├── Dockerfile │ │ ├── __pycache__ │ │ ├── document_preprocessor.cpython-311.pyc │ │ ├── embedder.cpython-311.pyc │ │ ├── main.cpython-311.pyc │ │ ├── redis_vectordb.cpython-311.pyc │ │ ├── semantic_chunking.cpython-311.pyc │ │ └── utils.cpython-311.pyc │ │ ├── database_manager │ │ ├── __init__.py │ │ ├── minio.py │ │ └── redis.py │ │ ├── document │ │ ├── __init__.py │ │ ├── document_preprocessor.py │ │ ├── embedder.py │ │ └── semantic_chunking.py │ │ ├── main.py │ │ ├── requirements.txt │ │ ├── utils.py │ │ └── website_content.txt └── frontend │ ├── Dockerfile │ ├── QA.py │ ├── __pycache__ │ ├── test_ba.cpython-311.pyc │ ├── utils.cpython-310.pyc │ └── utils.cpython-311.pyc │ ├── requirements.txt │ └── utils.py ├── docker-compose.yaml ├── iac ├── ansible │ ├── deploy_jenkins │ │ ├── create_compute_instance.yaml │ │ └── deploy_jenkins.yaml │ ├── inventory │ └── vm_requirements.txt └── terraform │ ├── main.tf │ ├── terraform.tfstate.backup │ └── variables.tf ├── images ├── demo-0.png ├── demo-1.png ├── deployment.png ├── elastic.png ├── frontend.png ├── jaeger.png ├── metric.png └── system.png ├── jenkins ├── Dockerfile └── docker-compose.yaml ├── k8s-yaml ├── backend │ ├── chat.yaml │ └── doc.yaml ├── db │ ├── cassandra │ │ ├── cassandra-deployment.yaml │ │ └── cassandra-init-job.yaml │ ├── minio.yaml │ └── redis.yaml ├── frontend │ └── frontend.yaml └── nginx │ ├── nginx.conf │ └── nginx.yaml └── observability ├── elasticcloud └── deploy │ ├── README.md │ ├── eck-operator │ ├── crds.yaml │ └── operator.yaml │ ├── eck-stack │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── charts │ │ ├── eck-beats │ │ │ ├── .helmignore │ │ │ ├── Chart.yaml │ │ │ ├── examples │ │ │ │ └── filebeat_no_autodiscover.yaml │ │ │ ├── templates │ │ │ │ ├── NOTES.txt │ │ │ │ ├── _helpers.tpl │ │ │ │ ├── beats.yaml │ │ │ │ ├── cluster-role-binding.yaml │ │ │ │ ├── cluster-role.yaml │ │ │ │ ├── service-account.yaml │ │ │ │ └── tests │ │ │ │ │ ├── beats-auditbeat-example_test.yaml │ │ │ │ │ ├── beats-filebeat-example_test.yaml │ │ │ │ │ ├── beats-heartbeat-example_test.yaml │ │ │ │ │ ├── beats-metricbeat-example_test.yaml │ │ │ │ │ ├── beats-packetbeat-example_test.yaml │ │ │ │ │ └── beats_test.yaml │ │ │ └── values.yaml │ │ ├── eck-elasticsearch │ │ │ ├── .helmignore │ │ │ ├── Chart.yaml │ │ │ ├── examples │ │ │ │ ├── hot-warm-cold.yaml │ │ │ │ └── ingress │ │ │ │ │ ├── elasticsearch-ingress-aks.yaml │ │ │ │ │ ├── elasticsearch-ingress-eks-alb.yaml │ │ │ │ │ ├── elasticsearch-ingress-eks-nlb.yaml │ │ │ │ │ └── elasticsearch-ingress-gke.yaml │ │ │ ├── templates │ │ │ │ ├── NOTES.txt │ │ │ │ ├── _helpers.tpl │ │ │ │ ├── elasticsearch.yaml │ │ │ │ ├── ingress.yaml │ │ │ │ └── tests │ │ │ │ │ ├── elasticsearch_test.yaml │ │ │ │ │ └── ingress_test.yaml │ │ │ └── values.yaml │ │ └── eck-kibana │ │ │ ├── .helmignore │ │ │ ├── Chart.yaml │ │ │ ├── examples │ │ │ ├── http-configuration.yaml │ │ │ └── ingress │ │ │ │ └── kibana-gke.yaml │ │ │ ├── templates │ │ │ ├── NOTES.txt │ │ │ ├── _helpers.tpl │ │ │ ├── ingress.yaml │ │ │ ├── kibana.yaml │ │ │ └── tests │ │ │ │ ├── ingress_test.yaml │ │ │ │ └── kibana_test.yaml │ │ │ └── values.yaml │ ├── kkk.ym │ ├── run.sh │ ├── templates │ │ ├── NOTES.txt │ │ ├── _helpers.tpl │ │ └── tests │ │ │ ├── beats_test.yaml │ │ │ ├── elastic-agent_test.yaml │ │ │ ├── elasticsearch_test.yaml │ │ │ └── kibana_test.yaml │ └── values.yaml │ └── helm-migrate.sh ├── inotify └── inotify-limits.yaml ├── metric ├── Chart.yaml ├── charts │ ├── alertmanager │ │ ├── Chart.yaml │ │ ├── templates │ │ │ ├── configmap.yaml │ │ │ ├── deployment.yaml │ │ │ └── service.yaml │ │ └── values.yaml │ ├── cadvisor │ │ ├── Chart.yaml │ │ ├── templates │ │ │ ├── daemonset.yaml │ │ │ └── service.yaml │ │ └── values.yaml │ ├── grafana │ │ ├── Chart.yaml │ │ ├── grafana-config │ │ │ └── dashboards │ │ │ │ └── 1860_rev31.json │ │ ├── templates │ │ │ ├── configmap.yaml │ │ │ ├── deployment.yaml │ │ │ └── service.yaml │ │ └── values.yaml │ ├── node-exporter │ │ ├── Chart.yaml │ │ ├── templates │ │ │ ├── daemonset.yaml │ │ │ └── service.yaml │ │ └── values.yaml │ └── prometheus │ │ ├── Chart.yaml │ │ ├── templates │ │ ├── configmap.yaml │ │ ├── deployment.yaml │ │ ├── role.yaml │ │ ├── rolebinding.yaml │ │ ├── service.yaml │ │ └── serviceaccount.yaml │ │ └── values.yaml ├── templates │ └── _helpers.tpl └── values.yaml └── my-jaeger ├── jaeger.yaml ├── nohup.out └── port-forward.log /.env: -------------------------------------------------------------------------------- 1 | DOC_FASTAPI_PORT=8000 2 | CHAT_FASTAPI_PORT=8001 3 | FRONTEND_PORT=8501 4 | NGINX_PORT=80 5 | 6 | # MinIO credentials 7 | MINIO_ACCESS_KEY=admin 8 | MINIO_SECRET_KEY=admin123 9 | 10 | # Jaeger configuration 11 | JAEGER_AGENT_PORT=6831 12 | 13 | # Cassandra configuration 14 | CASSANDRA_PORT=9042 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | test 2 | env 3 | __pycache__/ 4 | 5 | # Ignore .terraform directory and its contents 6 | .terraform/ 7 | .github/ 8 | 9 | # Ignore .terraform.lock.hcl files 10 | *.terraform.lock.hcl 11 | 12 | # Ignore any terraform state files 13 | *.tfstate 14 | 15 | data 16 | reids_data 17 | iac/ansible/secret/ -------------------------------------------------------------------------------- /app/backend/qa/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use PyTorch base image 2 | FROM pytorch/pytorch:2.3.1-cuda11.8-cudnn8-devel 3 | 4 | # Create application directory and copy files 5 | WORKDIR /app 6 | COPY . /app/ 7 | 8 | 9 | 10 | # Install dependencies 11 | RUN pip install --no-cache-dir -r requirements.txt 12 | 13 | # Add a wait-for-it script to handle service dependencies 14 | ADD https://raw.githubusercontent.com/vishnubob/wait-for-it/master/wait-for-it.sh /wait-for-it.sh 15 | RUN chmod +x /wait-for-it.sh 16 | 17 | # Set environment variable for application home 18 | ENV HOME=/app 19 | 20 | # Define the entrypoint to wait for Cassandra and start the application 21 | ENTRYPOINT ["/wait-for-it.sh", "cassandra:9042", "-t", "180", "--"] 22 | 23 | # Start the application 24 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8002"] 25 | -------------------------------------------------------------------------------- /app/backend/qa/__pycache__/embedder.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/backend/qa/__pycache__/embedder.cpython-311.pyc -------------------------------------------------------------------------------- /app/backend/qa/__pycache__/endpoint_request.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/backend/qa/__pycache__/endpoint_request.cpython-311.pyc -------------------------------------------------------------------------------- /app/backend/qa/__pycache__/generate_answer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/backend/qa/__pycache__/generate_answer.cpython-311.pyc -------------------------------------------------------------------------------- /app/backend/qa/__pycache__/main.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/backend/qa/__pycache__/main.cpython-311.pyc -------------------------------------------------------------------------------- /app/backend/qa/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/backend/qa/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /app/backend/qa/database_manager/__init__.py: -------------------------------------------------------------------------------- 1 | from .redis import RedisManager 2 | from .cassandra import CassandraMessageStore -------------------------------------------------------------------------------- /app/backend/qa/database_manager/cassandra.py: -------------------------------------------------------------------------------- 1 | from cassandra.cluster import Cluster 2 | from datetime import datetime, timezone 3 | from uuid import uuid1 4 | from datetime import datetime 5 | from opentelemetry import trace 6 | import os 7 | import logging 8 | 9 | logging.basicConfig( 10 | level=logging.INFO, # Default log level 11 | format="%(asctime)s [%(levelname)s] %(message)s", # Log format 12 | handlers=[ 13 | logging.StreamHandler() # Log to stdout (container best practice) 14 | ] 15 | ) 16 | logger = logging.getLogger(__name__) 17 | tracer = trace.get_tracer(__name__) 18 | 19 | class CassandraMessageStore: 20 | def __init__(self, cassandra_host=None, cassandra_port=None, keyspace="mlops"): 21 | self.cassandra_host = cassandra_host or os.getenv("CASSANDRA_HOST", "localhost") 22 | self.cassandra_port = cassandra_port or int(os.getenv("CASSANDRA_PORT", 9042)) 23 | self.cluster = Cluster([self.cassandra_host], port=self.cassandra_port) 24 | self.session = self.cluster.connect() 25 | self.session.set_keyspace(keyspace) 26 | 27 | def get_chat_history(self, conversation_id, limit=4): 28 | try: 29 | # Query to retrieve the latest messages sorted by timestamp in ascending order (chat history format) 30 | query = """ 31 | SELECT user_id, conversation_id, role, message, timestamp 32 | FROM messages 33 | WHERE conversation_id = ? 34 | ORDER BY timestamp ASC 35 | LIMIT ?; 36 | """ 37 | 38 | # Execute the query 39 | prepared = self.session.prepare(query) 40 | rows = self.session.execute(prepared, (conversation_id, limit)) 41 | 42 | # Convert the rows to a list of dictionaries for easier handling 43 | chat_history = [ 44 | { 45 | "user_id": row.user_id, 46 | "conversation_id": row.conversation_id, 47 | "role": row.role, 48 | "message": row.message, 49 | "timestamp": row.timestamp, 50 | } 51 | for row in rows 52 | ] 53 | 54 | # Format the chat history as an array of strings in the format "role: message" 55 | formatted_history = [ 56 | f"{msg['role']}: {msg['message']}" for msg in chat_history 57 | ] 58 | 59 | return formatted_history 60 | except Exception as e: 61 | print(f"Failed to retrieve chat history: {e}") 62 | return [] 63 | 64 | def save_message(self, user_id, conversation_id, message, role , timestamp=None): 65 | with tracer.start_as_current_span("save_message") as span: 66 | try: 67 | # Generate conversation ID (TIMEUUID) and default timestamp if not provided 68 | conversation_id = conversation_id or uuid1() # Generate TIMEUUID 69 | timestamp = timestamp or datetime.now(timezone.utc) 70 | 71 | # Set tracing attributes 72 | span.set_attribute("user_id", str(user_id)) 73 | span.set_attribute("conversation_id", str(conversation_id)) 74 | span.set_attribute("role", role) 75 | span.set_attribute("message", message) 76 | span.set_attribute("timestamp", str(timestamp)) 77 | 78 | # Insert query 79 | insert_query = """ 80 | INSERT INTO messages (user_id, conversation_id, role, message, timestamp) 81 | VALUES (?, ?, ?, ?, ?) 82 | """ 83 | prepared = self.session.prepare(insert_query) 84 | 85 | # Execute the query 86 | self.session.execute(prepared, (user_id, conversation_id, role, message, timestamp)) 87 | logger.info(f"Message saved: user_id={user_id}, conversation_id={conversation_id}") 88 | except Exception as e: 89 | span.record_exception(e) 90 | logger.info(f"Failed to save message: {e}") 91 | 92 | def close(self): 93 | self.cluster.shutdown() 94 | -------------------------------------------------------------------------------- /app/backend/qa/database_manager/redis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import redis 3 | import numpy as np 4 | from redis.exceptions import ResponseError 5 | import logging 6 | from redis.commands.search.query import Query 7 | 8 | import torch 9 | from sentence_transformers import SentenceTransformer 10 | 11 | class Embedder: 12 | def __init__(self, model_name="all-MiniLM-L12-v2"): 13 | # Explicitly set the device to 'cpu' if CUDA is not available 14 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 15 | self.model = SentenceTransformer(model_name, device=device) 16 | 17 | def embed(self, doc): 18 | """Embed a single document.""" 19 | return self.model.encode(doc, convert_to_numpy=True) 20 | 21 | def embed_chunks(self, chunks): 22 | """Embed multiple chunks.""" 23 | return [ 24 | (chunk, self.embed(chunk)) 25 | for chunk in chunks 26 | ] 27 | 28 | class RedisManager: 29 | def __init__(self, redis_host=None, redis_port=None, vector_dimension=384): 30 | """ 31 | Initialize the RedisIndexManager class. 32 | 33 | Args: 34 | redis_host (str): Hostname for Redis. Defaults to "localhost". 35 | redis_port (int): Port for Redis. Defaults to 6379. 36 | vector_dimension (int): Dimension of vector embeddings. Defaults to 384. 37 | """ 38 | self.redis_host = redis_host or os.getenv("REDIS_HOST", "localhost") 39 | self.redis_port = redis_port or int(os.getenv("REDIS_PORT", 6379)) 40 | self.redis_client = redis.Redis(host=self.redis_host, port=self.redis_port, decode_responses=False) 41 | self.vector_dimension = vector_dimension 42 | self.embedder = Embedder() 43 | self.logger = logging.getLogger(__name__) 44 | 45 | def check_index_exists(self, index_name): 46 | """ 47 | Check if an index exists in Redis. 48 | 49 | Args: 50 | index_name (str): Name of the index to check. 51 | 52 | Returns: 53 | bool: True if the index exists, False otherwise. 54 | """ 55 | try: 56 | self.redis_client.execute_command("FT.INFO", index_name) 57 | return True 58 | except ResponseError: 59 | return False 60 | 61 | def retrieve_contexts(self, query, user_id="user123", chat_id="chat456", top_k=3): 62 | """ 63 | Retrieve relevant contexts from the Redis index based on a query. 64 | 65 | Args: 66 | query (str): The query to search for. 67 | embedder (object): An object with an `embed` method to generate embeddings. 68 | user_id (str): User ID for identifying the index. Defaults to "user123". 69 | chat_id (str): Chat ID for identifying the index. Defaults to "chat456". 70 | top_k (int): Number of top results to retrieve. Defaults to 3. 71 | 72 | Returns: 73 | list: A list of relevant documents. 74 | """ 75 | index_name = f"reference:{user_id}:{chat_id}" 76 | 77 | if not self.check_index_exists(index_name): 78 | self.logger.info(f"Index {index_name} does not exist.") 79 | return None 80 | 81 | try: 82 | encoded_query = self.embedder.embed(query) 83 | search_query = ( 84 | Query(f'(*)=>[KNN {top_k} @vector $query_vector AS vector_score]') 85 | .sort_by('vector_score') 86 | .return_fields('vector_score', 'text') 87 | .dialect(2) 88 | ) 89 | results = self.redis_client.ft(index_name).search( 90 | search_query, 91 | {'query_vector': np.array(encoded_query, dtype=np.float32).tobytes()} 92 | ) 93 | return results.docs if results.docs else [] 94 | except Exception as e: 95 | self.logger.error(f"Error while retrieving data from {index_name}: {e}") 96 | return [] 97 | -------------------------------------------------------------------------------- /app/backend/qa/init.cql: -------------------------------------------------------------------------------- 1 | CREATE KEYSPACE IF NOT EXISTS mlops 2 | WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; 3 | 4 | USE mlops; 5 | 6 | CREATE TABLE IF NOT EXISTS users ( 7 | user_id TEXT PRIMARY KEY, 8 | username TEXT, 9 | email TEXT, 10 | created_at TIMESTAMP 11 | ); 12 | 13 | CREATE TABLE IF NOT EXISTS sessions ( 14 | user_id TEXT, 15 | conversation_id TEXT, 16 | started_at TIMESTAMP, 17 | PRIMARY KEY (user_id, conversation_id) 18 | ); 19 | 20 | CREATE TABLE IF NOT EXISTS messages ( 21 | user_id TEXT, 22 | conversation_id TEXT, 23 | role TEXT, 24 | message TEXT, 25 | timestamp TIMESTAMP, 26 | PRIMARY KEY ((conversation_id), timestamp) 27 | ); 28 | -------------------------------------------------------------------------------- /app/backend/qa/llm_call/__init__.py: -------------------------------------------------------------------------------- 1 | from .generate_answer import GenerateRAGAnswer -------------------------------------------------------------------------------- /app/backend/qa/llm_call/endpoint_request.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | from opentelemetry import trace 4 | from opentelemetry.sdk.trace import TracerProvider 5 | from opentelemetry.sdk.resources import SERVICE_NAME, Resource 6 | from opentelemetry.exporter.jaeger.thrift import JaegerExporter 7 | from opentelemetry.sdk.trace.export import BatchSpanProcessor 8 | 9 | # Configure OpenTelemetry Tracer 10 | resource = Resource(attributes={SERVICE_NAME: "chat-service"}) 11 | provider = TracerProvider(resource=resource) 12 | jaeger_exporter = JaegerExporter( 13 | agent_host_name=os.getenv("JAEGER_AGENT_HOST", "jaeger-agent.observability.svc.cluster.local"), 14 | agent_port=int(os.getenv("JAEGER_AGENT_PORT", 6831)), 15 | ) 16 | provider.add_span_processor(BatchSpanProcessor(jaeger_exporter)) 17 | trace.set_tracer_provider(provider) 18 | tracer = trace.get_tracer(__name__) 19 | 20 | # Get the model gateway IP 21 | ASM_GATEWAY_IP = os.environ.get("ASM_GATEWAY_IP", "localhost") 22 | MODEL_ENDPOINT = f"http://{ASM_GATEWAY_IP}:80/v2/models/ensemble/generate" 23 | 24 | 25 | def get_custom_model_response(message, context=None, max_tokens=250): 26 | """ 27 | Sends a request to the custom model API and retrieves the response. 28 | 29 | Parameters: 30 | message (str): The user's input message. 31 | context (str, optional): Additional context information. 32 | max_tokens (int): The maximum number of tokens to generate. 33 | 34 | Returns: 35 | str: The generated response from the model. 36 | """ 37 | payload = { 38 | "text_input": message, 39 | "max_tokens": max_tokens, 40 | "bad_words": "", 41 | "stop_words": "", 42 | "pad_id": 2, 43 | "end_id": 2 44 | } 45 | headers = { 46 | "Host": "llama.default.example.com", 47 | "Content-Type": "application/json" 48 | } 49 | 50 | try: 51 | response = requests.post(MODEL_ENDPOINT, json=payload, headers=headers) 52 | response.raise_for_status() 53 | 54 | output = response.json().get("text_output", "") 55 | return output.strip() 56 | except Exception as e: 57 | raise Exception(f"An error occurred: {e}") 58 | 59 | 60 | def standalone_question(query="", chat_history="", max_tokens=1000): 61 | """ 62 | Create a SINGLE standalone question based on a new question and chat history. 63 | If the new question can stand on its own, return it directly. 64 | 65 | Parameters: 66 | query (str): The new question. 67 | chat_history (str): The chat history. 68 | max_tokens (int): The maximum number of tokens to generate. 69 | 70 | Returns: 71 | str: The standalone question. 72 | """ 73 | prompt = f"""Create a SINGLE standalone question. The question should be based on the New question plus the Chat history. \ 74 | If the New question can stand on its own you should return the New question. New question: \"{query}\", Chat history: \"{chat_history}\".""" 75 | 76 | return get_custom_model_response(prompt, max_tokens=max_tokens) 77 | 78 | 79 | if __name__ == '__main__': 80 | print(standalone_question("What is a spotlist?")) -------------------------------------------------------------------------------- /app/backend/qa/llm_call/generate_answer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | from database_manager import RedisManager, CassandraMessageStore 4 | from .endpoint_request import standalone_question, get_openai_stream_response 5 | from datetime import datetime, timezone 6 | from datetime import datetime 7 | import logging 8 | from opentelemetry import trace 9 | from opentelemetry.sdk.trace import TracerProvider 10 | from opentelemetry.sdk.resources import SERVICE_NAME, Resource 11 | from opentelemetry.exporter.jaeger.thrift import JaegerExporter 12 | from opentelemetry.sdk.trace.export import BatchSpanProcessor 13 | 14 | 15 | # Configure logging 16 | logging.basicConfig( 17 | level=logging.INFO, # Default log level 18 | format="%(asctime)s [%(levelname)s] %(message)s", # Log format 19 | handlers=[ 20 | logging.StreamHandler() # Log to stdout (container best practice) 21 | ] 22 | ) 23 | logger = logging.getLogger(__name__) 24 | 25 | # Configure OpenTelemetry Tracer 26 | resource = Resource(attributes={SERVICE_NAME: "rag-system"}) 27 | provider = TracerProvider(resource=resource) 28 | jaeger_exporter = JaegerExporter( 29 | agent_host_name=os.getenv("JAEGER_AGENT_HOST", "jaeger-agent.observability.svc.cluster.local"), 30 | agent_port=int(os.getenv("JAEGER_AGENT_PORT", 6831)), 31 | ) 32 | provider.add_span_processor(BatchSpanProcessor(jaeger_exporter)) 33 | trace.set_tracer_provider(provider) 34 | tracer = trace.get_tracer(__name__) 35 | 36 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 37 | 38 | 39 | class GenerateRAGAnswer: 40 | def __init__(self): 41 | self.redis_manager = RedisManager() 42 | self.cassandra_manager = CassandraMessageStore() 43 | 44 | def generate_llm_answer(self, query, user_id="user123", conversation_id="chat456", chat_history=None): 45 | with tracer.start_as_current_span("generate_llm_answer") as span: 46 | span.set_attribute("query", query) 47 | query_time = datetime.now(timezone.utc) 48 | contexts = self.redis_manager.retrieve_contexts(query, user_id, conversation_id) 49 | 50 | if chat_history: 51 | chat_history_joined = "\n".join(chat_history) 52 | else: 53 | chat_history_joined = "" 54 | standalone_final_query = standalone_question(query=query, chat_history=chat_history_joined) 55 | 56 | logger.info(f"standalone_final_query:\n {standalone_final_query}") 57 | 58 | final_response = "" 59 | for chunk in get_openai_stream_response(message=standalone_final_query, context=contexts): 60 | yield chunk 61 | final_response += chunk 62 | 63 | chat_history.append(f"user: {query}") 64 | chat_history.append(f"assistant: {final_response}") 65 | if len(chat_history) % 2 == 1: 66 | chat_history = chat_history[1:] 67 | if len(chat_history) > 4: 68 | chat_history = chat_history[2:] 69 | 70 | self.cassandra_manager.save_message(user_id=user_id, conversation_id=conversation_id, message=query, role="User", timestamp=query_time) 71 | self.cassandra_manager.save_message(user_id=user_id, conversation_id=conversation_id, message=final_response, role="Bot") 72 | 73 | -------------------------------------------------------------------------------- /app/backend/qa/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI, WebSocket, WebSocketDisconnect 2 | from llm_call import GenerateRAGAnswer 3 | from database_manager import CassandraMessageStore 4 | import asyncio 5 | import os 6 | import logging 7 | import json 8 | 9 | from opentelemetry import trace 10 | from opentelemetry.sdk.trace import TracerProvider 11 | from opentelemetry.sdk.resources import SERVICE_NAME, Resource 12 | from opentelemetry.exporter.jaeger.thrift import JaegerExporter 13 | from opentelemetry.sdk.trace.export import BatchSpanProcessor 14 | 15 | 16 | # Configure OpenTelemetry Tracer 17 | resource = Resource(attributes={SERVICE_NAME: "chat-service"}) 18 | provider = TracerProvider(resource=resource) 19 | jaeger_exporter = JaegerExporter( 20 | agent_host_name="jaeger-agent.observability.svc.cluster.local", 21 | agent_port=6831, 22 | ) 23 | provider.add_span_processor(BatchSpanProcessor(jaeger_exporter)) 24 | trace.set_tracer_provider(provider) 25 | tracer = trace.get_tracer(__name__) 26 | 27 | # Configure logging 28 | logging.basicConfig( 29 | level=logging.INFO, # Default log level 30 | format="%(asctime)s [%(levelname)s] %(message)s", # Log format 31 | handlers=[ 32 | logging.StreamHandler() # Log to stdout (container best practice) 33 | ] 34 | ) 35 | logger = logging.getLogger(__name__) 36 | 37 | app = FastAPI() 38 | rag = GenerateRAGAnswer() 39 | 40 | class UserState: 41 | """Maintains state for a single user.""" 42 | def __init__(self): 43 | self.counter = 0 # Example state: a counter 44 | self.history_init = True 45 | self.cassandra = CassandraMessageStore() 46 | 47 | def retrieve_chat_history(self, conversation_id): 48 | chat_history = self.cassandra.get_chat_history(conversation_id=conversation_id) 49 | return chat_history 50 | 51 | def increment_counter(self): 52 | self.counter += 1 53 | return self.counter 54 | 55 | @app.websocket("/ws/{conversation_id}") 56 | async def websocket_message_response(websocket: WebSocket, conversation_id: str): 57 | user_state = UserState() 58 | await websocket.accept() 59 | 60 | try: 61 | while True: 62 | # Receive a message from the client (e.g., chat_id, message, timestamp) 63 | data = await asyncio.wait_for(websocket.receive_text(), timeout=300) 64 | # --------------------------------------------------------------------- 65 | # Assuming the message is a JSON string with fields chat_id, message, timestamp 66 | message_data = json.loads(data) 67 | user_id = message_data["user_id"] 68 | conversation_id = message_data["chat_id"] 69 | message = message_data["message"] 70 | 71 | 72 | with tracer.start_as_current_span("message") as span: 73 | span.set_attribute("user_id", user_id) 74 | span.set_attribute("conversation_id", conversation_id) 75 | span.set_attribute("message", message) 76 | 77 | try: 78 | # Generate an LLM answer using RAG (or any other method) 79 | if user_state.history_init == True: 80 | user_state.history_init = False 81 | chat_history = user_state.retrieve_chat_history(conversation_id=conversation_id) 82 | if not chat_history: 83 | chat_history = ["There is currently no message history."] 84 | 85 | generator = rag.generate_llm_answer(query=message, user_id=user_id, conversation_id=conversation_id, chat_history=chat_history) 86 | 87 | logger.info(f"chat_history: {chat_history}") 88 | 89 | # Stream responses to the WebSocket client 90 | for answer in generator: 91 | await websocket.send_text(answer) 92 | 93 | await websocket.send_text("/end") 94 | 95 | except Exception as e: 96 | span.record_exception(e) 97 | logger.error(f"Error generating response: {e}") 98 | await websocket.send_text("Error generating response") 99 | break 100 | 101 | except WebSocketDisconnect: 102 | logger.info(f"Client {user_id} disconnected") 103 | -------------------------------------------------------------------------------- /app/backend/qa/requirements.txt: -------------------------------------------------------------------------------- 1 | sentence-transformers==3.0.1 2 | torch==2.3.1 3 | transformers==4.42.4 4 | fastapi[standard] 5 | redis==5.2.1 6 | datasets 7 | ragas 8 | requests 9 | scikit-learn 10 | opentelemetry-api 11 | opentelemetry-sdk 12 | opentelemetry-exporter-jaeger-thrift 13 | cassandra-driver 14 | websockets 15 | openai==0.27.5 -------------------------------------------------------------------------------- /app/backend/reference_doc/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:2.3.1-cuda11.8-cudnn8-devel 2 | 3 | # Create the app directory 4 | RUN mkdir /app 5 | COPY . /app/ 6 | 7 | # Set working directory to /app 8 | WORKDIR /app 9 | 10 | # Update apt-get and install dependencies 11 | RUN apt-get update && \ 12 | apt-get clean && \ 13 | rm -rf /var/lib/apt/lists/* 14 | 15 | # Install Python dependencies 16 | RUN pip install --no-cache-dir -r requirements.txt 17 | 18 | # Set environment variable for application home 19 | ENV HOME=/app 20 | 21 | # Start Redis and then your application 22 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8001"] 23 | 24 | -------------------------------------------------------------------------------- /app/backend/reference_doc/__pycache__/document_preprocessor.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/backend/reference_doc/__pycache__/document_preprocessor.cpython-311.pyc -------------------------------------------------------------------------------- /app/backend/reference_doc/__pycache__/embedder.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/backend/reference_doc/__pycache__/embedder.cpython-311.pyc -------------------------------------------------------------------------------- /app/backend/reference_doc/__pycache__/main.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/backend/reference_doc/__pycache__/main.cpython-311.pyc -------------------------------------------------------------------------------- /app/backend/reference_doc/__pycache__/redis_vectordb.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/backend/reference_doc/__pycache__/redis_vectordb.cpython-311.pyc -------------------------------------------------------------------------------- /app/backend/reference_doc/__pycache__/semantic_chunking.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/backend/reference_doc/__pycache__/semantic_chunking.cpython-311.pyc -------------------------------------------------------------------------------- /app/backend/reference_doc/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/backend/reference_doc/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /app/backend/reference_doc/database_manager/__init__.py: -------------------------------------------------------------------------------- 1 | from .redis import RedisVectorIndexManager 2 | from .minio import MinioManager -------------------------------------------------------------------------------- /app/backend/reference_doc/database_manager/minio.py: -------------------------------------------------------------------------------- 1 | import os 2 | from minio import Minio 3 | import io 4 | from opentelemetry import trace 5 | from PyPDF2 import PdfReader 6 | import logging 7 | 8 | logging.basicConfig( 9 | level=logging.INFO, 10 | format="%(asctime)s [%(levelname)s] %(message)s", 11 | handlers=[ 12 | logging.StreamHandler() 13 | ] 14 | ) 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | class MinioManager: 19 | def __init__(self, minio_host=None, minio_port=None, minio_access_key=None, minio_secret_key=None, minio_secure=False): 20 | self.minio_host = minio_host or os.getenv("MINIO_HOST", "localhost") 21 | self.minio_port = minio_port or os.getenv("MINIO_PORT", "9000") 22 | self.minio_access_key = minio_access_key or os.getenv("MINIO_ACCESS_KEY", "admin") 23 | self.minio_secret_key = minio_secret_key or os.getenv("MINIO_SECRET_KEY", "admin123") 24 | self.minio_secure = minio_secure 25 | 26 | self.minio_client = Minio( 27 | f"{self.minio_host}:{self.minio_port}", 28 | access_key=self.minio_access_key, 29 | secret_key=self.minio_secret_key, 30 | secure=self.minio_secure 31 | ) 32 | 33 | def ensure_bucket_exists(self, bucket_name: str): 34 | with trace.get_tracer(__name__).start_as_current_span("ensure_bucket_exists") as span: 35 | if not self.minio_client.bucket_exists(bucket_name): 36 | self.minio_client.make_bucket(bucket_name) 37 | 38 | 39 | async def upload_to_minio(self, bucket_name, user_id, chat_id, upload_option, url=None, uploaded_files=None): 40 | """ 41 | Save data to MinIO. 42 | 43 | Args: 44 | bucket_name: Name of the MinIO bucket. 45 | user_id: ID of the user uploading the content. 46 | chat_id: ID of the chat associated with the upload. 47 | upload_option: Type of upload (e.g., "Upload Files" or "Website URL"). 48 | url: URL to be processed, if applicable. 49 | uploaded_files: List of uploaded file objects, if applicable. 50 | """ 51 | # Ensure the bucket exists 52 | if not self.minio_client.bucket_exists(bucket_name): 53 | self.minio_client.make_bucket(bucket_name) 54 | 55 | if upload_option == "Website URL" and url: 56 | # Handle URL uploads 57 | unique_id = hash(url) # Generate a unique ID for the URL 58 | object_name = f"users/{user_id}/chats/{chat_id}/reference-documents/urls/{unique_id}.txt" 59 | try: 60 | url_content = url.encode("utf-8") 61 | data = io.BytesIO(url_content) 62 | self.minio_client.put_object(bucket_name, object_name, data, length=len(url_content)) 63 | print(f"URL '{url}' saved successfully to MinIO.") 64 | except Exception as e: 65 | print(f"Failed to upload URL to MinIO: {e}") 66 | raise 67 | 68 | elif upload_option == "Upload Files" and uploaded_files: 69 | # Handle file uploads 70 | for uploaded_file in uploaded_files: 71 | try: 72 | file_content = None 73 | file_type = "txt" if uploaded_file.content_type == "text/plain" else "pdf" 74 | 75 | # Extract content from PDF or text file 76 | if file_type == "pdf": 77 | with io.BytesIO(await uploaded_file.read()) as file_stream: 78 | reader = PdfReader(file_stream) 79 | file_content = "".join(page.extract_text() for page in reader.pages).encode("utf-8") 80 | elif file_type == "txt": 81 | file_content = await uploaded_file.read() 82 | 83 | if not file_content: 84 | raise ValueError(f"Unable to extract content from file: {uploaded_file.filename}") 85 | 86 | # Upload the file content to MinIO 87 | object_name = f"users/{user_id}/chats/{chat_id}/reference-documents/{uploaded_file.filename}" 88 | data = io.BytesIO(file_content) 89 | self.minio_client.put_object(bucket_name, object_name, data, length=data.getbuffer().nbytes) 90 | print(f"File '{uploaded_file.filename}' uploaded successfully to MinIO.") 91 | except Exception as e: 92 | print(f"Failed to upload file '{uploaded_file.filename}' to MinIO: {e}") 93 | raise 94 | else: 95 | print("No valid upload option or data provided.") 96 | raise ValueError("Invalid upload option or no data to upload.") 97 | 98 | async def delete_from_minio(self, bucket_name, user_id, chat_id, file_name=None, upload_option=None): 99 | """ 100 | Delete data from MinIO. 101 | 102 | Args: 103 | bucket_name: Name of the MinIO bucket. 104 | user_id: ID of the user associated with the file. 105 | chat_id: ID of the chat associated with the file. 106 | file_name: Name of the file to delete (for "Upload Files"). 107 | upload_option: Type of upload (e.g., "Upload Files" or "Website URL"). 108 | """ 109 | try: 110 | # Construct the object name based on the upload option 111 | if upload_option == "Website URL": 112 | unique_id = hash(file_name) # Generate a unique ID for the URL 113 | object_name = f"users/{user_id}/chats/{chat_id}/reference-documents/urls/{unique_id}.txt" 114 | logger.info(f"remove object name: {object_name}") 115 | elif upload_option == "Upload Files" and file_name: 116 | object_name = f"users/{user_id}/chats/{chat_id}/reference-documents/{file_name}" 117 | logger.info(f"remove object name: {object_name}") 118 | else: 119 | print("Invalid upload option or missing file name.") 120 | raise ValueError("Invalid upload option or missing file name.") 121 | 122 | # Check if the object exists 123 | if not self.minio_client.stat_object(bucket_name, object_name): 124 | print(f"Object '{object_name}' does not exist in bucket '{bucket_name}'.") 125 | raise FileNotFoundError(f"Object '{object_name}' not found.") 126 | 127 | # Delete the object from MinIO 128 | self.minio_client.remove_object(bucket_name, object_name) 129 | print(f"Object '{object_name}' deleted successfully from bucket '{bucket_name}'.") 130 | 131 | except Exception as e: 132 | print(f"Failed to delete object from MinIO: {e}") 133 | raise -------------------------------------------------------------------------------- /app/backend/reference_doc/database_manager/redis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import redis 3 | from redis.commands.search.field import TextField, VectorField 4 | from redis.commands.search.indexDefinition import IndexDefinition, IndexType 5 | from redis.exceptions import ResponseError 6 | from opentelemetry import trace 7 | import logging 8 | 9 | logging.basicConfig( 10 | level=logging.INFO, 11 | format="%(asctime)s [%(levelname)s] %(message)s", 12 | handlers=[ 13 | logging.StreamHandler() 14 | ] 15 | ) 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | class RedisVectorIndexManager: 20 | def __init__(self, redis_host=None, redis_port=None, vector_dimension=384): 21 | self.redis_host = redis_host or os.getenv("REDIS_HOST", "localhost") 22 | self.redis_port = redis_port or int(os.getenv("REDIS_PORT", 6379)) 23 | self.vector_dimension = vector_dimension 24 | self.redis_client = redis.Redis(host=self.redis_host, port=self.redis_port, decode_responses=False) 25 | self.tracer = trace.get_tracer(__name__) 26 | 27 | def create_index(self, index_name): 28 | try: 29 | # Check if the index already exists 30 | if self.redis_client.ft(index_name).info(): 31 | print(f"Index '{index_name}' already exists. Skipping creation.") 32 | return 33 | except ResponseError: 34 | # If _info() raises a ResponseError, the index does not exist 35 | pass 36 | 37 | # Define the schema for the index 38 | schema = ( 39 | TextField("$.text", no_stem=True, as_name="text"), 40 | TextField("$.metadata", no_stem=True, as_name="metadata"), 41 | VectorField( 42 | "$.embedding", 43 | "FLAT", 44 | { 45 | "TYPE": "FLOAT32", 46 | "DIM": self.vector_dimension, 47 | "DISTANCE_METRIC": "COSINE", 48 | }, 49 | as_name="vector", 50 | ), 51 | ) 52 | 53 | definition = IndexDefinition(prefix=[index_name], index_type=IndexType.JSON) 54 | 55 | try: 56 | # Create the index 57 | self.redis_client.ft(index_name).create_index(fields=schema, definition=definition) 58 | print(f"Index '{index_name}' created successfully.") 59 | except ResponseError as e: 60 | print(f"Error creating index '{index_name}': {e}") 61 | raise 62 | 63 | def store_chunks(self, doc_id, chunks_and_embeddings): 64 | with self.tracer.start_as_current_span("store_chunks_in_redis") as span: 65 | span.set_attribute("doc_id", doc_id) 66 | try: 67 | pipeline = self.redis_client.pipeline() 68 | for idx, (chunk, embedding) in enumerate(chunks_and_embeddings): 69 | key = f"reference:{doc_id}:{idx}" 70 | data_dict = { 71 | "metadata": doc_id, 72 | "text": chunk, 73 | "embedding": embedding.tolist() 74 | } 75 | pipeline.json().set(key, "$", data_dict) 76 | span.add_event(f"Prepared chunk {idx} for doc_id {doc_id}") 77 | 78 | # Execute the pipeline 79 | pipeline.execute() 80 | span.add_event(f"Stored all chunks for doc_id {doc_id}") 81 | 82 | # Split the doc_id by ':' 83 | parts = doc_id.split(':') 84 | user_id = parts[0] 85 | chat_id = parts[1] 86 | 87 | # Create an index 88 | index_name = f"reference:{user_id}:{chat_id}" 89 | span.add_event(f"Creating Redis index {index_name}") 90 | self.create_index(index_name) 91 | except Exception as e: 92 | span.record_exception(e) 93 | raise 94 | 95 | def delete_chunks(self, doc_id): 96 | """ 97 | Delete all chunks associated with a specific doc_id from Redis. 98 | 99 | Args: 100 | doc_id: The document ID whose chunks should be deleted. 101 | """ 102 | with self.tracer.start_as_current_span("delete_chunks_from_redis") as span: 103 | span.set_attribute("doc_id", doc_id) 104 | try: 105 | # Fetch all keys associated with the doc_id 106 | pattern = f"reference:{doc_id}:*" 107 | keys = self.redis_client.keys(pattern) 108 | 109 | if not keys: 110 | span.add_event(f"No keys found for doc_id {doc_id}") 111 | print(f"No chunks found for doc_id {doc_id}.") 112 | return 113 | 114 | # Delete all keys matching the pattern 115 | pipeline = self.redis_client.pipeline() 116 | for key in keys: 117 | pipeline.delete(key) 118 | 119 | pipeline.execute() 120 | span.add_event(f"Deleted all chunks for doc_id {doc_id}") 121 | logger.info(f"Deleted {len(keys)} chunks for doc_id {doc_id}.") 122 | except Exception as e: 123 | span.record_exception(e) 124 | logger.info(f"Failed to delete chunks for doc_id {doc_id}: {e}") 125 | raise 126 | 127 | # Example Usage 128 | if __name__ == "__main__": 129 | redis_host = os.getenv("REDIS_HOST", "localhost") 130 | redis_port = int(os.getenv("REDIS_PORT", 6379)) 131 | vector_dimension = 384 132 | 133 | manager = RedisVectorIndexManager(redis_host, redis_port, vector_dimension) 134 | 135 | # Example document ID and chunks 136 | doc_id = "user123:chat456" 137 | chunks_and_embeddings = [ 138 | ("Chunk 1 text", [0.1, 0.2, 0.3]), 139 | ("Chunk 2 text", [0.4, 0.5, 0.6]), 140 | ] 141 | 142 | manager.store_chunks(doc_id, chunks_and_embeddings) 143 | -------------------------------------------------------------------------------- /app/backend/reference_doc/document/__init__.py: -------------------------------------------------------------------------------- 1 | from .embedder import Embedder 2 | from .semantic_chunking import SemanticChunker -------------------------------------------------------------------------------- /app/backend/reference_doc/document/document_preprocessor.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from nltk.corpus import stopwords 3 | from nltk.stem import WordNetLemmatizer 4 | from nltk.tokenize import word_tokenize 5 | 6 | class TextPreprocessor: 7 | def __init__(self): 8 | # Ensure necessary NLTK resources are downloaded 9 | nltk.download('punkt', quiet=True) 10 | nltk.download('wordnet', quiet=True) 11 | nltk.download('averaged_perceptron_tagger', quiet=True) 12 | nltk.download('stopwords', quiet=True) 13 | nltk.download('punkt_tab', quiet=True) 14 | 15 | self.lemmatizer = WordNetLemmatizer() 16 | self.stop_words = set(stopwords.words('english')) 17 | 18 | 19 | def preprocess(self, text: str, stop_word=False, apply_lemmatize=True) -> str: 20 | """ 21 | Preprocess a single chunk of text: tokenize, remove stopwords, lemmatize, and optionally stem. 22 | """ 23 | 24 | # Tokenize into words 25 | words = word_tokenize(text) 26 | 27 | # Remove stopwords 28 | if stop_word: 29 | words = [word for word in words if word.lower() not in self.stop_words] 30 | 31 | # Apply lemmatization 32 | if apply_lemmatize: 33 | words = [self.lemmatizer.lemmatize(word) for word in words] 34 | 35 | return ' '.join(words) -------------------------------------------------------------------------------- /app/backend/reference_doc/document/embedder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sentence_transformers import SentenceTransformer 3 | 4 | class Embedder: 5 | def __init__(self, model_name="all-MiniLM-L12-v2"): 6 | # Explicitly set the device to 'cpu' if CUDA is not available 7 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 8 | self.model = SentenceTransformer(model_name, device=device) 9 | 10 | def embed(self, doc): 11 | """Embed a single document.""" 12 | return self.model.encode(doc, convert_to_numpy=True) 13 | 14 | def embed_chunks(self, chunks): 15 | """Embed multiple chunks.""" 16 | return [ 17 | (chunk, self.embed(chunk)) 18 | for chunk in chunks 19 | ] 20 | -------------------------------------------------------------------------------- /app/backend/reference_doc/document/semantic_chunking.py: -------------------------------------------------------------------------------- 1 | from nltk.tokenize import sent_tokenize 2 | from sklearn.metrics.pairwise import cosine_similarity 3 | import numpy as np 4 | from .document_preprocessor import TextPreprocessor 5 | 6 | 7 | class SemanticChunker: 8 | def __init__(self, model): 9 | """Initialize the TextChunker with a specified sentence transformer model.""" 10 | self.model = model 11 | self.preprocessor = TextPreprocessor() 12 | 13 | def process_file(self, file_path, context_window=1, percentile_threshold=95, min_chunk_size=3): 14 | """ 15 | Process a text file and split it into semantically meaningful chunks. 16 | 17 | Args: 18 | file_path: Path to the text file 19 | context_window: Number of sentences to consider on either side for context 20 | percentile_threshold: Percentile threshold for identifying breakpoints 21 | min_chunk_size: Minimum number of sentences in a chunk 22 | 23 | Returns: 24 | list: Semantically coherent text chunks 25 | """ 26 | # Process the text file 27 | sentences = self._load_text(file_path) 28 | contextualized = self._add_context(sentences, context_window) 29 | embeddings = self.model.encode(contextualized) 30 | 31 | # Create and refine chunks 32 | distances = self._calculate_distances(embeddings) 33 | breakpoints = self._identify_breakpoints(distances, percentile_threshold) 34 | initial_chunks = self._create_chunks(sentences, breakpoints) 35 | 36 | # Merge small chunks for better coherence 37 | chunk_embeddings = self.model.encode(initial_chunks) 38 | final_chunks = self._merge_small_chunks(initial_chunks, chunk_embeddings, min_chunk_size) 39 | 40 | return final_chunks 41 | 42 | def _load_text(self, text): 43 | """Load and tokenize text from a file.""" 44 | text = self.preprocessor.preprocess(text) 45 | return sent_tokenize(text) 46 | 47 | def _add_context(self, sentences, window_size): 48 | """Combine sentences with their neighbors for better context.""" 49 | contextualized = [] 50 | for i in range(len(sentences)): 51 | start = max(0, i - window_size) 52 | end = min(len(sentences), i + window_size + 1) 53 | context = ' '.join(sentences[start:end]) 54 | contextualized.append(context) 55 | return contextualized 56 | 57 | def _calculate_distances(self, embeddings): 58 | """Calculate cosine distances between consecutive embeddings.""" 59 | distances = [] 60 | for i in range(len(embeddings) - 1): 61 | similarity = cosine_similarity([embeddings[i]], [embeddings[i + 1]])[0][0] 62 | distance = 1 - similarity 63 | distances.append(distance) 64 | return distances 65 | 66 | def _identify_breakpoints(self, distances, threshold_percentile): 67 | """Find natural breaking points in the text based on semantic distances.""" 68 | threshold = np.percentile(distances, threshold_percentile) 69 | return [i for i, dist in enumerate(distances) if dist > threshold] 70 | 71 | def _create_chunks(self, sentences, breakpoints): 72 | """Create initial text chunks based on identified breakpoints.""" 73 | chunks = [] 74 | start_idx = 0 75 | 76 | for breakpoint in breakpoints: 77 | chunk = ' '.join(sentences[start_idx:breakpoint + 1]) 78 | chunks.append(chunk) 79 | start_idx = breakpoint + 1 80 | 81 | # Add the final chunk 82 | final_chunk = ' '.join(sentences[start_idx:]) 83 | chunks.append(final_chunk) 84 | 85 | return chunks 86 | 87 | def _merge_small_chunks(self, chunks, embeddings, min_size): 88 | """Merge small chunks with their most similar neighbor.""" 89 | final_chunks = [chunks[0]] 90 | merged_embeddings = [embeddings[0]] 91 | 92 | for i in range(1, len(chunks) - 1): 93 | current_chunk_size = len(chunks[i].split('. ')) 94 | 95 | if current_chunk_size < min_size: 96 | # Calculate similarities 97 | prev_similarity = cosine_similarity([embeddings[i]], [merged_embeddings[-1]])[0][0] 98 | next_similarity = cosine_similarity([embeddings[i]], [embeddings[i + 1]])[0][0] 99 | 100 | if prev_similarity > next_similarity: 101 | # Merge with previous chunk 102 | final_chunks[-1] = f"{final_chunks[-1]} {chunks[i]}" 103 | merged_embeddings[-1] = (merged_embeddings[-1] + embeddings[i]) / 2 104 | else: 105 | # Merge with next chunk 106 | chunks[i + 1] = f"{chunks[i]} {chunks[i + 1]}" 107 | embeddings[i + 1] = (embeddings[i] + embeddings[i + 1]) / 2 108 | else: 109 | final_chunks.append(chunks[i]) 110 | merged_embeddings.append(embeddings[i]) 111 | 112 | final_chunks.append(chunks[-1]) 113 | return final_chunks 114 | 115 | -------------------------------------------------------------------------------- /app/backend/reference_doc/requirements.txt: -------------------------------------------------------------------------------- 1 | nltk==3.8.1 2 | sentence-transformers==3.0.1 3 | torch==2.3.1 4 | transformers==4.42.4 5 | fastapi[standard] 6 | minio==7.2.13 7 | PyPDF2==3.0.1 8 | beautifulsoup4==4.12.3 9 | html2text 10 | redis==5.2.1 11 | opentelemetry-api 12 | opentelemetry-sdk 13 | opentelemetry-exporter-jaeger-thrift -------------------------------------------------------------------------------- /app/backend/reference_doc/utils.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import html2text 4 | 5 | def crawl_website(url): 6 | try: 7 | response = requests.get(url, timeout=10) 8 | response.raise_for_status() 9 | soup = BeautifulSoup(response.content, "html.parser") 10 | return {"status": "success", "content": soup.get_text()} 11 | except Exception as e: 12 | return {"status": "error", "error": str(e)} 13 | 14 | def convert_html_to_text(html_content): 15 | text_maker = html2text.HTML2Text() 16 | text_maker.ignore_links = True 17 | return text_maker.handle(html_content) 18 | -------------------------------------------------------------------------------- /app/backend/reference_doc/website_content.txt: -------------------------------------------------------------------------------- 1 | "Just then, they discovered thirty or forty windmills in that plain. And as soon as don Quixote saw them, he said to his squire: “Fortune is guiding our affairs better than we could have ever hoped. Look over there, Sancho Panza, my friend, where there are thirty or more monstrous giants with whom I plan to do battle and take all their lives, and with their spoils we’ll start to get rich. This is righteous warfare, and it’s a great service to God to rid the earth of such a wicked seed.” 2 | 3 | “What giants?” said Sancho Panza. 4 | 5 | “Those that you see over there,” responded his master, “with the long arms—some of them almost two leagues long.” 6 | 7 | “Look, your grace,” responded Sancho, “what you see over there aren’t giants—they’re windmills; and what seems to be arms are the sails that rotate the millstone when they’re turned by the wind.” 8 | 9 | “It seems to me,” responded don Quixote, “that you aren’t well-versed in adventures—they are giants; and if you’re afraid, get away from here and start praying while I go into fierce and unequal battle with them.” 10 | 11 | And saying this, he spurred his horse Rocinante without heeding what his squire Sancho was shouting to him, that he was attacking windmills and not giants. But he was so certain they were giants that he paid no attention to his squire Sancho’s shouts, nor did he see what they were, even though he was very close. Rather, he went on shouting: “Do not flee, cowards and vile creatures, for it’s just one knight attacking you!” 12 | 13 | At this point, the wind increased a bit and the large sails began to move, which don Quixote observed and said: “Even though you wave more arms than Briaræus, you’ll have to answer to me.” 14 | 15 | When he said this—and commending himself with all his heart to his lady Dulcinea, asking her to aid him in that peril, well-covered by his shield, with his lance on the lance rest —he attacked at Rocinante’s full gallop and assailed the first windmill he came to. He gave a thrust into the sail with his lance just as a rush of air accelerated it with such fury that it broke the lance to bits, taking the horse and knight with it, and tossed him rolling onto the ground, very battered. 16 | 17 | Sancho went as fast as his donkey could take him to help his master, and when he got there, he saw that don Quixote couldn’t stir—such was the result of Rocinante’s landing on top of him. “God help us,” said Sancho. “Didn’t I tell you to watch what you were doing; that they were just windmills, and that only a person who had windmills in his head could fail to realize it?” 18 | 19 | “Keep still, Sancho, my friend,” responded don Quixote. “Things associated with war, more than others, are subject to continual change. Moreover, I believe—and it’s true—that the sage Frestón—he who robbed me of my library—has changed these giants into windmills to take away the glory of my having conquered them, such is the enmity he bears me. But in the long run, his evil cunning will have little power over the might of my sword.” 20 | 21 | “God’s will be done,” responded Sancho Panza." -------------------------------------------------------------------------------- /app/frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | # Set the working directory 4 | WORKDIR /app 5 | 6 | # Copy only requirements.txt first to leverage caching properly 7 | COPY requirements.txt /app/ 8 | 9 | # Install Python dependencies 10 | RUN pip install --no-cache-dir -r /app/requirements.txt 11 | 12 | # Copy the rest of the application code 13 | COPY . /app/ 14 | 15 | # Expose the desired port 16 | EXPOSE 8081 17 | 18 | # Run the Streamlit application 19 | CMD ["streamlit", "run", "QA.py", "--server.port", "8081"] 20 | -------------------------------------------------------------------------------- /app/frontend/QA.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from utils import sync_process_document, send_message, sync_delete_document 3 | from websocket import create_connection 4 | import os 5 | import time 6 | import logging 7 | 8 | logging.basicConfig( 9 | level=logging.INFO, 10 | format="%(asctime)s [%(levelname)s] %(message)s", 11 | handlers=[ 12 | logging.StreamHandler() 13 | ] 14 | ) 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | if "keep_alive_started" not in st.session_state: 19 | st.session_state["keep_alive_started"] = False 20 | if "nginx_url" not in st.session_state: 21 | st.session_state["nginx_url"] = os.getenv("NGINX_URL") 22 | 23 | 24 | def connect_websocket(chat_id): 25 | try: 26 | if "ws_connection" not in st.session_state or st.session_state.ws_connection is None: 27 | st.session_state.ws_connection = create_connection(f"ws://{st.session_state.nginx_url}/ws/{chat_id}", timeout=300, http_proxy_timeout=300) 28 | if not st.session_state.ws_connection.connected: 29 | logger.info("new connection") 30 | st.session_state.ws_connection.close() 31 | st.session_state.ws_connection = create_connection(f"ws://{st.session_state.nginx_url}/ws/{chat_id}", timeout=300, http_proxy_timeout=300) 32 | except Exception as e: 33 | st.session_state.ws_connection = None 34 | raise Exception(f"Failed to connect or reconnect to WebSocket: {e}") 35 | return st.session_state.ws_connection 36 | 37 | 38 | def send_message_with_reconnect(ws_connection, user_id, chat_id, message, max_retries=3): 39 | retries = 0 40 | while retries < max_retries: 41 | try: 42 | # Ensure WebSocket connection is active 43 | ws_connection = connect_websocket(chat_id) 44 | # Send the message and stream tokens 45 | for token in send_message(ws_connection, user_id, chat_id, message): 46 | yield token # Stream tokens to the interface 47 | break # Exit the loop if successful 48 | 49 | except Exception as e: 50 | retries += 1 51 | time.sleep(0.5) 52 | if retries >= max_retries: 53 | raise Exception(f"WebSocket error after {max_retries} retries: {e}") 54 | else: 55 | logger.info(f"Connection lost. Retrying... ({retries}/{max_retries})") 56 | 57 | 58 | 59 | 60 | # Configure Streamlit page layout 61 | st.set_page_config(layout="wide") 62 | 63 | # Initialize session state for chat history and references 64 | if "references" not in st.session_state: 65 | st.session_state["references"] = [] # Stores reference documents or URLs 66 | if "upload_options" not in st.session_state: 67 | st.session_state["upload_options"] = [] 68 | if "messages" not in st.session_state: 69 | st.session_state["messages"] = [] # Chat messages history 70 | 71 | # Streamlit app setup 72 | st.title("Multi-Document Chatbot") 73 | st.sidebar.title("Manage Reference Documents") 74 | 75 | # Sidebar for document upload options 76 | upload_option = st.sidebar.radio("Add Reference Source", ("Website URL", "Upload Files")) 77 | 78 | # Handle document input 79 | if upload_option == "Website URL": 80 | url = st.sidebar.text_input("Enter Website URL:") 81 | if st.sidebar.button("Add URL"): 82 | if url: 83 | st.session_state["references"].append(f"{url}") 84 | st.session_state["upload_options"].append("Website URL") 85 | st.sidebar.success(f"Added URL: {url}") 86 | 87 | # TODO 88 | sync_process_document("user123", "chat456", "Website URL", url=url) 89 | 90 | elif upload_option == "Upload Files": 91 | uploaded_files = st.sidebar.file_uploader("Upload your files (txt/pdf):", type=["txt", "pdf"], accept_multiple_files=True) 92 | if st.sidebar.button("Add Files"): 93 | if uploaded_files: 94 | # TODO 95 | sync_process_document("user123", "chat456", "Upload Files", uploaded_files=uploaded_files) 96 | for uploaded_file in uploaded_files: 97 | st.session_state["references"].append(f"{uploaded_file.name}") 98 | st.session_state["upload_options"].append("Upload Files") 99 | st.sidebar.success(f"Added {len(uploaded_files)} files.") 100 | 101 | # Display current reference sources 102 | st.sidebar.subheader("Current References") 103 | if st.session_state["references"]: 104 | for i, ref in enumerate(st.session_state["references"]): 105 | col1, col2 = st.sidebar.columns([4, 1]) 106 | with col1: 107 | st.write(ref) 108 | with col2: 109 | if st.button("Remove", key=f"remove_{i}"): 110 | logger.info("Delete.....") 111 | sync_delete_document("user123", "chat456", upload_option=st.session_state["upload_options"][i], document_name=ref) 112 | st.session_state["references"].pop(i) 113 | st.session_state["upload_options"].pop(i) 114 | st.sidebar.success(f"Removed: {ref}") 115 | st.rerun() # Refresh the sidebar to reflect the changes 116 | else: 117 | st.sidebar.info("No reference documents added yet.") 118 | 119 | # Display chat interface and history 120 | # st.header("Chat Interface") 121 | for message in st.session_state["messages"]: 122 | with st.chat_message(message["role"]): 123 | st.markdown(message["content"]) 124 | 125 | # Input and chatbot response 126 | if prompt := st.chat_input("Ask your question:"): 127 | if prompt == "/clear": 128 | # Clear chat history 129 | st.session_state["messages"].clear() 130 | st.info("Chat history cleared.") 131 | else: 132 | # Display user message 133 | with st.chat_message("user"): 134 | st.markdown(prompt) 135 | # Add user message to chat history 136 | st.session_state["messages"].append({"role": "user", "content": prompt}) 137 | 138 | # Generate a simulated response 139 | with st.chat_message("assistant"): 140 | response_container = st.empty() # Placeholder for streaming response 141 | full_response = "" # Accumulate the response here 142 | ws_connection = connect_websocket(chat_id="chat456") 143 | for token in send_message_with_reconnect(ws_connection, user_id="user123", chat_id="chat456", message=prompt): 144 | full_response += token # Append the new token to the response 145 | response_container.write(full_response) # Update the placeholder 146 | time.sleep(0.01) 147 | st.session_state.messages.append( 148 | { 149 | "role": "assistant", 150 | "content": full_response, 151 | } 152 | ) -------------------------------------------------------------------------------- /app/frontend/__pycache__/test_ba.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/frontend/__pycache__/test_ba.cpython-311.pyc -------------------------------------------------------------------------------- /app/frontend/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/frontend/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /app/frontend/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/app/frontend/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /app/frontend/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | python-dotenv 3 | streamlit==1.36.0 4 | httpx 5 | PyPDF2==3.0.1 6 | minio==7.2.13 7 | PyMuPDF 8 | websockets 9 | websocket_client -------------------------------------------------------------------------------- /app/frontend/utils.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | import os 4 | import json 5 | import httpx 6 | import streamlit as st 7 | import logging 8 | import asyncio 9 | 10 | # Configure logging 11 | logging.basicConfig( 12 | level=logging.INFO, # Default log level 13 | format="%(asctime)s [%(levelname)s] %(message)s", # Log format 14 | handlers=[ 15 | logging.StreamHandler() # Log to stdout (container best practice) 16 | ] 17 | ) 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | NGINX_URL = os.getenv("NGINX_URL") 22 | 23 | async def process_document(user_id, chat_id, upload_option, url=None, uploaded_files=None): 24 | """ 25 | Sends a request to the upload endpoint to process documents or URLs. 26 | 27 | Args: 28 | user_id (str): The user ID. 29 | chat_id (str): The chat ID. 30 | upload_option (str): The type of upload (URL or File). 31 | url (str, optional): The URL to be processed. 32 | uploaded_files (list, optional): List of uploaded file objects. 33 | 34 | Returns: 35 | dict: The API response in JSON format. 36 | """ 37 | DOC_VECTORDB_API_URL = f'http://{NGINX_URL}/upload' 38 | 39 | files = [] 40 | if uploaded_files: 41 | for uploaded_file in uploaded_files: 42 | files.append(("uploaded_files", (uploaded_file.name, uploaded_file.getvalue(), uploaded_file.type))) 43 | 44 | # logger.info(f"files: {files}") 45 | data = { 46 | "user_id": user_id, 47 | "chat_id": chat_id, 48 | "url": url if upload_option == "Website URL" else None, 49 | } 50 | 51 | headers = {"Accept": "application/json"} 52 | 53 | # Send the POST request 54 | try: 55 | r = requests.post(DOC_VECTORDB_API_URL, data=data, files=files, headers=headers) 56 | r.raise_for_status() # Raise exception for HTTP errors 57 | return r.json() 58 | except requests.exceptions.RequestException as e: 59 | return {"status": "error", "message": str(e)} 60 | 61 | async def delete_document(user_id, chat_id, upload_option, document_name=None): 62 | """ 63 | Sends a request to the remove_document endpoint to delete documents. 64 | 65 | Args: 66 | user_id (str): The user ID. 67 | chat_id (str): The chat ID. 68 | upload_option (str): The type of upload (URL or File). 69 | document_name (str, optional): The name of the document to be deleted. 70 | 71 | Returns: 72 | dict: The API response in JSON format. 73 | """ 74 | DOC_VECTORDB_API_URL = f'http://{NGINX_URL}/remove_document' 75 | data = { 76 | "user_id": user_id, 77 | "chat_id": chat_id, 78 | "upload_option": upload_option, 79 | "document_name": document_name 80 | } 81 | 82 | logger.info(f"Remove data: {data}") 83 | 84 | headers = {"Accept": "application/json", "Content-Type": "application/json"} 85 | 86 | try: 87 | # Use POST method instead of DELETE 88 | r = requests.post(DOC_VECTORDB_API_URL, json=data, headers=headers) 89 | r.raise_for_status() # Raise exception for HTTP errors 90 | return r.json() 91 | except requests.exceptions.RequestException as e: 92 | return {"status": "error", "message": str(e)} 93 | 94 | 95 | def sync_process_document(user_id, chat_id, upload_option, url=None, uploaded_files=None): 96 | asyncio.run(process_document(user_id, chat_id, upload_option, url=url, uploaded_files=uploaded_files)) 97 | 98 | def sync_delete_document(user_id, chat_id, upload_option, document_name): 99 | asyncio.run(delete_document(user_id, chat_id, upload_option, document_name)) 100 | 101 | def send_message(ws_connection, user_id, chat_id, message): 102 | """ 103 | Sends a message via WebSocket to the server and streams the response. 104 | 105 | Args: 106 | user_id (str): The user ID. 107 | chat_id (str): The chat ID. 108 | message (str): The message to send. 109 | 110 | Yields: 111 | str: The server's response as it is received. 112 | """ 113 | 114 | try: 115 | 116 | payload = { 117 | "user_id": user_id, 118 | "chat_id": chat_id, 119 | "message": message 120 | } 121 | ws_connection.send(json.dumps(payload)) 122 | 123 | # Stream the response without closing the connection 124 | while True: 125 | response = ws_connection.recv() 126 | if response == "/end": # End marker, exit the loop but keep the connection alive 127 | break 128 | yield response 129 | except Exception as e: 130 | # Log the error and reset the connection 131 | st.session_state.ws_connection = None # Clear the connection to force reconnection 132 | raise Exception(f"WebSocket error: {e}") 133 | 134 | def testing(): 135 | CHAT_API_URL = f'http://{NGINX_URL}/test' 136 | headers = {"Accept": "application/json"} 137 | 138 | with httpx.stream('POST', CHAT_API_URL, headers=headers, timeout=None) as r: 139 | if r.status_code != 200: 140 | raise Exception(f"Error: {r.status_code}, {r.text}") 141 | 142 | for line in r.iter_text(): 143 | yield line # Yield token for further processing (if needed) 144 | time.sleep(0.05) 145 | 146 | 147 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | cassandra: 5 | image: cassandra:latest 6 | container_name: cassandra 7 | ports: 8 | - "9042:9042" 9 | environment: 10 | - CASSANDRA_START_RPC=true 11 | - CASSANDRA_CLUSTER_NAME=MyCluster 12 | - CASSANDRA_ENDPOINT_SNITCH=GossipingPropertyFileSnitch 13 | - CASSANDRA_DC=datacenter1 14 | healthcheck: 15 | test: ["CMD", "cqlsh", "cassandra", "-e", "DESCRIBE KEYSPACES"] 16 | interval: 30s 17 | timeout: 10s 18 | retries: 5 19 | 20 | cqlsh: 21 | image: cassandra:latest 22 | depends_on: 23 | cassandra: 24 | condition: service_healthy 25 | entrypoint: > 26 | bash -c " 27 | until cqlsh cassandra -e 'DESCRIBE KEYSPACES'; do 28 | echo 'Waiting for Cassandra to be ready...'; 29 | sleep 5; 30 | done; 31 | cqlsh cassandra -f /scripts/init.cql" 32 | volumes: 33 | - ./init.cql:/scripts/init.cql 34 | 35 | minio: 36 | container_name: minio 37 | image: minio/minio 38 | ports: 39 | - "9000:9000" 40 | - "9001:9001" 41 | environment: 42 | MINIO_ROOT_USER: admin 43 | MINIO_ROOT_PASSWORD: admin123 44 | volumes: 45 | - ~/minio/data:/data 46 | command: server /data --console-address ":9001" 47 | healthcheck: 48 | test: ["CMD", "curl", "-f", "http://minio:9000/minio/health/live"] 49 | interval: 30s 50 | timeout: 10s 51 | retries: 3 52 | 53 | redis: 54 | image: redis/redis-stack-server:7.2.0-v6 55 | ports: 56 | - 6379:6379 57 | healthcheck: 58 | test: ["CMD", "redis-cli", "-h", "redis", "ping"] 59 | interval: 30s 60 | timeout: 10s 61 | retries: 3 62 | volumes: 63 | - redis_data:/data 64 | 65 | doc_management_api: 66 | container_name: doc_management_api 67 | build: ./app/backend/reference_doc 68 | env_file: 69 | - .env 70 | ports: 71 | - ${DOC_FASTAPI_PORT}:${DOC_FASTAPI_PORT} 72 | environment: 73 | - REDIS_HOST=redis 74 | - REDIS_PORT=6379 75 | - MINIO_HOST=minio 76 | - MINIO_PORT=9000 77 | - MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY} 78 | - MINIO_SECRET_KEY=${MINIO_SECRET_KEY} 79 | - JAEGER_AGENT_HOST=jaeger 80 | - JAEGER_AGENT_PORT=${JAEGER_AGENT_PORT} 81 | volumes: 82 | - ./data:/data 83 | deploy: 84 | resources: 85 | limits: 86 | memory: 4g 87 | cpus: "2.0" 88 | depends_on: 89 | redis: 90 | condition: service_healthy 91 | minio: 92 | condition: service_healthy 93 | healthcheck: 94 | test: ["CMD", "curl", "-f", "http://doc_management_api:${DOC_FASTAPI_PORT}/health"] 95 | interval: 30s 96 | timeout: 10s 97 | retries: 3 98 | 99 | chat_api: 100 | container_name: chat_api 101 | build: ./app/backend/qa 102 | env_file: 103 | - .env 104 | ports: 105 | - ${CHAT_FASTAPI_PORT}:${CHAT_FASTAPI_PORT} 106 | environment: 107 | - JAEGER_AGENT_HOST=jaeger 108 | - JAEGER_AGENT_PORT=${JAEGER_AGENT_PORT} 109 | - REDIS_HOST=redis 110 | - REDIS_PORT=6379 111 | - CASSANDRA_PORT=9042 112 | - CASSANDRA_HOST=cassandra 113 | volumes: 114 | - ./init.cql:/init.cql 115 | depends_on: 116 | redis: 117 | condition: service_healthy 118 | cassandra: 119 | condition: service_healthy 120 | healthcheck: 121 | test: ["CMD", "curl", "-f", "http://chat_api:${CHAT_FASTAPI_PORT}/health"] 122 | interval: 30s 123 | timeout: 10s 124 | retries: 3 125 | 126 | frontend_streamlit: 127 | container_name: frontend_streamlit 128 | build: ./app/frontend 129 | ports: 130 | - ${FRONTEND_PORT}:${FRONTEND_PORT} 131 | environment: 132 | - JAEGER_AGENT_HOST=jaeger 133 | - JAEGER_AGENT_PORT=${JAEGER_AGENT_PORT} 134 | - NGINX_URL=nginx:${NGINX_PORT} 135 | depends_on: 136 | doc_management_api: 137 | condition: service_healthy 138 | chat_api: 139 | condition: service_healthy 140 | volumes: 141 | - ./app/frontend:/app 142 | healthcheck: 143 | test: ["CMD", "curl", "-f", "http://frontend_streamlit:${FRONTEND_PORT}/"] 144 | interval: 30s 145 | timeout: 10s 146 | retries: 3 147 | 148 | jaeger: 149 | image: jaegertracing/all-in-one:1.45 150 | environment: 151 | - JAEGER_AGENT_PORT=6831 152 | - ${JAEGER_AGENT_PORT}:${JAEGER_AGENT_PORT} 153 | - COLLECTOR_ZIPKIN_HTTP_PORT=:9411 154 | - COLLECTOR_OTLP_ENABLED=true 155 | ports: 156 | - 6831:6831/udp 157 | - 6832:6832/udp 158 | - 5778:5778 159 | - 16686:16686 160 | - 14268:14268 161 | - 14269:14269 162 | - 14250:14250 163 | - 9411:9411 164 | - 4317:4317 165 | - 4318:4318 166 | healthcheck: 167 | test: ["CMD", "curl", "-f", "http://jaeger:16686"] 168 | interval: 30s 169 | timeout: 10s 170 | retries: 3 171 | 172 | nginx: 173 | image: nginx:latest 174 | ports: 175 | - "80:80" 176 | volumes: 177 | - ./nginx.conf:/etc/nginx/nginx.conf:ro 178 | depends_on: 179 | chat_api: 180 | condition: service_healthy 181 | doc_management_api: 182 | condition: service_healthy 183 | healthcheck: 184 | test: ["CMD", "curl", "-f", "http://nginx"] 185 | interval: 30s 186 | timeout: 10s 187 | retries: 3 188 | 189 | volumes: 190 | redis_data: 191 | cassandra-data: 192 | 193 | networks: 194 | default: 195 | name: mlops 196 | -------------------------------------------------------------------------------- /iac/ansible/deploy_jenkins/create_compute_instance.yaml: -------------------------------------------------------------------------------- 1 | - name: Create a Compute Engine instance 2 | hosts: localhost 3 | tasks: 4 | - name: Start an instance 5 | gcp_compute_instance: 6 | name: instance-1 7 | machine_type: e2-small 8 | # Refer to https://cloud.google.com/compute/docs/images/os-details#ubuntu_lts 9 | # or use the command `gcloud compute images list --project=ubuntu-os-cloud` 10 | zone: asia-southeast1-b 11 | project: "{{project}}" 12 | # The service account is needed to create the resources 13 | auth_kind: serviceaccount 14 | service_account_file: "{{service_account_file}}" 15 | disks: 16 | - auto_delete: true 17 | boot: true 18 | initialize_params: 19 | source_image: projects/ubuntu-os-cloud/global/images/ubuntu-2204-jammy-v20230727 20 | # We use the default network with an external IP for SSH 21 | network_interfaces: 22 | - network: 23 | selfLink: global/networks/default 24 | access_configs: 25 | - name: External NAT 26 | type: ONE_TO_ONE_NAT 27 | tags: 28 | items: allow-http-and-jenkins 29 | state: present # change to absent to delete the instance 30 | 31 | - name: Create inbound firewall rule for port 8081 and 50000 32 | gcp_compute_firewall: 33 | name: allow-port-8081-50000 34 | network: 35 | selfLink: global/networks/default 36 | allowed: 37 | - ip_protocol: TCP 38 | ports: 39 | - 8081 40 | - 50000 41 | source_ranges: 42 | - 0.0.0.0/0 # Allow traffic from any source (use a more specific source range for security) 43 | direction: INGRESS # Direction from outside to inside, EGRESS is the opposite direction 44 | target_tags: allow-http-and-jenkins # Apply the rule to instances with this tag 45 | description: Allow incoming traffic on port 30000 46 | project: "{{project}}" 47 | auth_kind: serviceaccount 48 | service_account_file: "{{service_account_file}}" 49 | state: present -------------------------------------------------------------------------------- /iac/ansible/deploy_jenkins/deploy_jenkins.yaml: -------------------------------------------------------------------------------- 1 | # https://www.digitalocean.com/community/tutorials/how-to-use-ansible-to-install-and-set-up-docker-on-ubuntu-22-04 2 | - name: Deploy Jenkins 3 | hosts: servers # Which host to apply, you can replace by `servers`, or by `servers_1, servers_2` for multiple groups 4 | become: yes # To run commands as a superuser (e.g., sudo) 5 | vars: 6 | default_container_name: jenkins 7 | default_container_image: sieucun/jenkins:lts 8 | tasks: 9 | - name: Install aptitude 10 | apt: 11 | name: aptitude 12 | state: latest 13 | update_cache: true 14 | 15 | - name: Install prerequisites 16 | apt: 17 | pkg: 18 | - apt-transport-https 19 | - ca-certificates 20 | - curl 21 | - software-properties-common 22 | - python3-pip 23 | - virtualenv 24 | - python3-setuptools 25 | state: latest 26 | update_cache: true 27 | 28 | - name: Add Docker GPG apt Key 29 | apt_key: 30 | url: https://download.docker.com/linux/ubuntu/gpg 31 | state: present 32 | 33 | - name: Add Docker Repository 34 | apt_repository: 35 | repo: deb https://download.docker.com/linux/ubuntu focal stable 36 | state: present 37 | 38 | - name: Update apt and install docker-ce 39 | apt: 40 | name: docker-ce 41 | state: latest 42 | update_cache: true 43 | 44 | - name: Pull the Docker image 45 | community.docker.docker_image: 46 | name: "{{ default_container_image }}" 47 | source: pull 48 | 49 | # https://docs.ansible.com/ansible/latest/collections/community/docker/docker_container_module.html 50 | - name: Create the container 51 | community.docker.docker_container: 52 | name: "{{ default_container_name }}" 53 | image: "{{ default_container_image }}" 54 | state: started 55 | privileged: true 56 | user: root 57 | volumes: 58 | - jenkins_home:/var/jenkins_home 59 | - /var/run/docker.sock:/var/run/docker.sock 60 | ports: 61 | - 8081:8080 62 | - 50000:50000 63 | detach: yes # Run the container in the background 64 | -------------------------------------------------------------------------------- /iac/ansible/inventory: -------------------------------------------------------------------------------- 1 | [servers] 2 | 34.142.146.253 ansible_ssh_private_key_file=./.ssh/jenkins -------------------------------------------------------------------------------- /iac/ansible/vm_requirements.txt: -------------------------------------------------------------------------------- 1 | ansible==8.3.0 2 | requests==2.31.0 3 | google-auth==2.28.1 -------------------------------------------------------------------------------- /iac/terraform/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | google = { 4 | source = "hashicorp/google" 5 | version = "4.80.0" # Provider version 6 | } 7 | } 8 | required_version = "1.9.2" # Terraform version 9 | } 10 | 11 | provider "google" { 12 | project = var.project_id 13 | region = var.region 14 | } 15 | 16 | # Define GKE Cluster 17 | resource "google_container_cluster" "primary" { 18 | name = "${var.project_id}-gke" 19 | location = var.region 20 | remove_default_node_pool = true 21 | initial_node_count = 1 22 | } 23 | 24 | # Node Pool for System Services 25 | resource "google_container_node_pool" "system_services" { 26 | name = "${var.project_id}-sys-svc-pool" 27 | location = var.region 28 | cluster = google_container_cluster.primary.name 29 | node_count = 1 30 | 31 | node_config { 32 | machine_type = "e2-standard-2" 33 | disk_size_gb = 40 34 | preemptible = false 35 | image_type = "COS_CONTAINERD" 36 | labels = { 37 | workload = "system-services" 38 | } 39 | } 40 | 41 | autoscaling { 42 | min_node_count = 1 43 | max_node_count = 3 44 | } 45 | } 46 | 47 | # Node Pool for Cassandra 48 | resource "google_container_node_pool" "cassandra" { 49 | name = "${var.project_id}-cassandra-pool" 50 | location = var.region 51 | cluster = google_container_cluster.primary.name 52 | node_count = 1 53 | 54 | node_config { 55 | machine_type = "e2-highmem-4" 56 | disk_size_gb = 40 # Reduced from 200GB 57 | preemptible = false 58 | image_type = "COS_CONTAINERD" 59 | labels = { 60 | workload = "cassandra" 61 | } 62 | } 63 | 64 | autoscaling { 65 | min_node_count = 1 66 | max_node_count = 2 67 | } 68 | } 69 | 70 | # Node Pool for Backend Doc Management 71 | resource "google_container_node_pool" "backend_doc" { 72 | name = "${var.project_id}-doc-pool" 73 | location = var.region 74 | cluster = google_container_cluster.primary.name 75 | node_count = 1 76 | 77 | node_config { 78 | machine_type = "e2-standard-4" # Optimized for balanced workloads 79 | disk_size_gb = 40 80 | preemptible = false 81 | image_type = "COS_CONTAINERD" 82 | labels = { 83 | workload = "backend-doc" 84 | } 85 | } 86 | 87 | autoscaling { 88 | min_node_count = 1 89 | max_node_count = 2 90 | } 91 | } 92 | 93 | # Node Pool for Backend Chat 94 | resource "google_container_node_pool" "backend_chat" { 95 | name = "${var.project_id}-chat-pool" 96 | location = var.region 97 | cluster = google_container_cluster.primary.name 98 | node_count = 1 99 | 100 | node_config { 101 | machine_type = "e2-standard-4" # Optimized for balanced workloads 102 | disk_size_gb = 40 103 | preemptible = false 104 | image_type = "COS_CONTAINERD" 105 | labels = { 106 | workload = "backend-chat" 107 | } 108 | } 109 | 110 | autoscaling { 111 | min_node_count = 1 112 | max_node_count = 2 113 | } 114 | } 115 | 116 | # Node Pool for Frontend and NGINX 117 | resource "google_container_node_pool" "frontend" { 118 | name = "${var.project_id}-fe-pool" 119 | location = var.region 120 | cluster = google_container_cluster.primary.name 121 | node_count = 1 122 | 123 | node_config { 124 | machine_type = "e2-medium" 125 | disk_size_gb = 40 126 | preemptible = true 127 | image_type = "COS_CONTAINERD" 128 | labels = { 129 | workload = "frontend" 130 | } 131 | } 132 | 133 | autoscaling { 134 | min_node_count = 1 135 | max_node_count = 1 136 | } 137 | } 138 | 139 | # Outputs 140 | output "kubernetes_cluster_name" { 141 | value = google_container_cluster.primary.name 142 | } 143 | 144 | output "kubernetes_cluster_endpoint" { 145 | value = google_container_cluster.primary.endpoint 146 | } 147 | 148 | output "kubernetes_cluster_ca_certificate" { 149 | value = google_container_cluster.primary.master_auth.0.cluster_ca_certificate 150 | } 151 | -------------------------------------------------------------------------------- /iac/terraform/variables.tf: -------------------------------------------------------------------------------- 1 | 2 | variable "project_id" { 3 | description = "The project ID to host the cluster in" 4 | default = "savvy-ceiling-446122-d5" 5 | } 6 | 7 | variable "region" { 8 | description = "The region the cluster in" 9 | default = "asia-southeast1-a" 10 | } -------------------------------------------------------------------------------- /images/demo-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/images/demo-0.png -------------------------------------------------------------------------------- /images/demo-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/images/demo-1.png -------------------------------------------------------------------------------- /images/deployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/images/deployment.png -------------------------------------------------------------------------------- /images/elastic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/images/elastic.png -------------------------------------------------------------------------------- /images/frontend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/images/frontend.png -------------------------------------------------------------------------------- /images/jaeger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/images/jaeger.png -------------------------------------------------------------------------------- /images/metric.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/images/metric.png -------------------------------------------------------------------------------- /images/system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nhduong1203/Chatbot/a92862ccdaac6c9093d01073b6189f333bd6cc29/images/system.png -------------------------------------------------------------------------------- /jenkins/Dockerfile: -------------------------------------------------------------------------------- 1 | # Ref: https://hackmamba.io/blog/2022/04/running-docker-in-a-jenkins-container/ 2 | FROM jenkins/jenkins:lts-jdk17 3 | USER root 4 | RUN curl https://get.docker.com > dockerinstall && chmod 777 dockerinstall && ./dockerinstall && \ 5 | curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl && \ 6 | chmod +x ./kubectl && \ 7 | mv ./kubectl /usr/local/bin/kubectl && \ 8 | curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash 9 | USER jenkins 10 | -------------------------------------------------------------------------------- /jenkins/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | services: 3 | jenkins: 4 | build: 5 | context: . 6 | dockerfile: Dockerfile 7 | image: dinhln03/jenkins:lts 8 | container_name: jenkins 9 | privileged: true 10 | user: root 11 | ports: 12 | - 8081:8080 13 | - 50000:50000 14 | volumes: 15 | - jenkins_home:/var/jenkins_home 16 | - /var/run/docker.sock:/var/run/docker.sock 17 | volumes: 18 | jenkins_home: 19 | -------------------------------------------------------------------------------- /k8s-yaml/backend/chat.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: backend-chat 5 | spec: 6 | replicas: 2 7 | selector: 8 | matchLabels: 9 | app: backend-chat 10 | template: 11 | metadata: 12 | labels: 13 | app: backend-chat 14 | spec: 15 | nodeSelector: 16 | workload: backend-chat 17 | containers: 18 | - name: backend-chat 19 | image: sieucun/llmops-chat:latest 20 | ports: 21 | - containerPort: 8002 22 | env: 23 | - name: REDIS_HOST 24 | value: redis 25 | - name: REDIS_PORT 26 | value: "6379" 27 | - name: DOC_API_URL 28 | value: http://backend-doc:8001 29 | - name: CASSANDRA_PORT 30 | value: "9042" 31 | - name: CASSANDRA_HOST 32 | value: cassandra 33 | - name: OPENAI_API_KEY 34 | valueFrom: 35 | secretKeyRef: 36 | name: openai-api-key 37 | key: OPENAI_API_KEY 38 | --- 39 | apiVersion: v1 40 | kind: Service 41 | metadata: 42 | name: backend-chat 43 | spec: 44 | selector: 45 | app: backend-chat 46 | ports: 47 | - port: 8002 48 | targetPort: 8002 -------------------------------------------------------------------------------- /k8s-yaml/backend/doc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: backend-doc 5 | spec: 6 | replicas: 2 7 | selector: 8 | matchLabels: 9 | app: backend-doc 10 | template: 11 | metadata: 12 | labels: 13 | app: backend-doc 14 | spec: 15 | nodeSelector: 16 | workload: backend-doc 17 | containers: 18 | - name: backend-doc 19 | image: sieucun/llmops-doc:latest 20 | ports: 21 | - containerPort: 8001 22 | env: 23 | - name: REDIS_HOST 24 | value: redis 25 | - name: REDIS_PORT 26 | value: "6379" 27 | - name: MINIO_HOST 28 | value: minio 29 | - name: MINIO_PORT 30 | value: "9000" 31 | 32 | --- 33 | 34 | apiVersion: v1 35 | kind: Service 36 | metadata: 37 | name: backend-doc 38 | spec: 39 | selector: 40 | app: backend-doc 41 | ports: 42 | - port: 8001 43 | targetPort: 8001 -------------------------------------------------------------------------------- /k8s-yaml/db/cassandra/cassandra-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: cassandra 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: cassandra 10 | template: 11 | metadata: 12 | labels: 13 | app: cassandra 14 | spec: 15 | nodeSelector: 16 | workload: cassandra # Aligning with the cassandra node pool in Terraform 17 | containers: 18 | - name: cassandra 19 | image: cassandra:latest 20 | ports: 21 | - containerPort: 9042 22 | env: 23 | - name: CASSANDRA_CLUSTER_NAME 24 | value: "MyCluster" 25 | - name: CASSANDRA_ENDPOINT_SNITCH 26 | value: "GossipingPropertyFileSnitch" 27 | - name: CASSANDRA_DC 28 | value: "datacenter1" 29 | - name: CASSANDRA_RACK 30 | value: "rack1" 31 | volumeMounts: 32 | - name: cassandra-data 33 | mountPath: /var/lib/cassandra 34 | volumes: 35 | - name: cassandra-data 36 | persistentVolumeClaim: 37 | claimName: cassandra-pvc 38 | --- 39 | apiVersion: v1 40 | kind: PersistentVolumeClaim 41 | metadata: 42 | name: cassandra-pvc 43 | spec: 44 | accessModes: 45 | - ReadWriteOnce 46 | resources: 47 | requests: 48 | storage: 40Gi 49 | --- 50 | apiVersion: v1 51 | kind: Service 52 | metadata: 53 | name: cassandra 54 | spec: 55 | selector: 56 | app: cassandra 57 | ports: 58 | - port: 9042 59 | targetPort: 9042 60 | type: ClusterIP 61 | -------------------------------------------------------------------------------- /k8s-yaml/db/cassandra/cassandra-init-job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: cassandra-init-job 5 | spec: 6 | template: 7 | metadata: 8 | labels: 9 | app: cassandra-init 10 | spec: 11 | restartPolicy: OnFailure 12 | containers: 13 | - name: cassandra-init 14 | image: cassandra:latest 15 | command: 16 | - "bash" 17 | - "-c" 18 | - > 19 | echo "Waiting for Cassandra to be ready..."; 20 | until cqlsh cassandra -e 'DESCRIBE KEYSPACES'; do 21 | echo "Cassandra is not ready yet. Retrying in 5 seconds..."; 22 | sleep 5; 23 | done; 24 | echo "Cassandra is ready. Running initialization script..."; 25 | cqlsh cassandra -f /scripts/init.cql; 26 | volumeMounts: 27 | - name: cassandra-init-scripts 28 | mountPath: /scripts 29 | volumes: 30 | - name: cassandra-init-scripts 31 | configMap: 32 | name: cassandra-init-configmap 33 | --- 34 | apiVersion: v1 35 | kind: ConfigMap 36 | metadata: 37 | name: cassandra-init-configmap 38 | data: 39 | init.cql: | 40 | CREATE KEYSPACE IF NOT EXISTS mlops 41 | WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; 42 | 43 | USE mlops; 44 | 45 | CREATE TABLE IF NOT EXISTS users ( 46 | user_id TEXT PRIMARY KEY, 47 | username TEXT, 48 | email TEXT, 49 | created_at TIMESTAMP 50 | ); 51 | 52 | CREATE TABLE IF NOT EXISTS sessions ( 53 | user_id TEXT, 54 | conversation_id TEXT, 55 | started_at TIMESTAMP, 56 | PRIMARY KEY (user_id, conversation_id) 57 | ); 58 | 59 | CREATE TABLE IF NOT EXISTS messages ( 60 | user_id TEXT, 61 | conversation_id TEXT, 62 | role TEXT, 63 | message TEXT, 64 | timestamp TIMESTAMP, 65 | PRIMARY KEY ((conversation_id), timestamp) 66 | ); 67 | -------------------------------------------------------------------------------- /k8s-yaml/db/minio.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: minio-secret 5 | type: Opaque 6 | data: 7 | MINIO_ROOT_USER:YWRtaW4= # Base64 for "admin" 8 | MINIO_ROOT_PASSWORD:YWRtaW4xMjM= # Base64 for "admin123" 9 | 10 | --- 11 | 12 | apiVersion: apps/v1 13 | kind: Deployment 14 | metadata: 15 | name: minio 16 | spec: 17 | replicas: 1 18 | selector: 19 | matchLabels: 20 | app: minio 21 | template: 22 | metadata: 23 | labels: 24 | app: minio 25 | spec: 26 | nodeSelector: 27 | workload: system-services 28 | containers: 29 | - name: minio 30 | image: minio/minio:latest 31 | ports: 32 | - containerPort: 9000 33 | - containerPort: 9001 34 | env: 35 | - name: MINIO_ROOT_USER 36 | valueFrom: 37 | secretKeyRef: 38 | name: minio-secret 39 | key: MINIO_ROOT_USER 40 | - name: MINIO_ROOT_PASSWORD 41 | valueFrom: 42 | secretKeyRef: 43 | name: minio-secret 44 | key: MINIO_ROOT_PASSWORD 45 | args: 46 | - "server" 47 | - "/data" 48 | - "--console-address" 49 | - ":9001" 50 | volumeMounts: 51 | - name: minio-data 52 | mountPath: /data 53 | volumes: 54 | - name: minio-data 55 | persistentVolumeClaim: 56 | claimName: minio-pvc 57 | --- 58 | apiVersion: v1 59 | kind: PersistentVolumeClaim 60 | metadata: 61 | name: minio-pvc 62 | spec: 63 | accessModes: 64 | - ReadWriteOnce 65 | resources: 66 | requests: 67 | storage: 10Gi 68 | --- 69 | apiVersion: v1 70 | kind: Service 71 | metadata: 72 | name: minio 73 | spec: 74 | selector: 75 | app: minio 76 | ports: 77 | - name: http 78 | port: 9000 79 | targetPort: 9000 80 | - name: console 81 | port: 9001 82 | targetPort: 9001 83 | type: ClusterIP 84 | -------------------------------------------------------------------------------- /k8s-yaml/db/redis.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: redis 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: redis 10 | template: 11 | metadata: 12 | labels: 13 | app: redis 14 | spec: 15 | nodeSelector: 16 | workload: system-services # Aligning with the Terraform setup 17 | containers: 18 | - name: redis 19 | image: redis/redis-stack-server:7.2.0-v6 20 | ports: 21 | - containerPort: 6379 22 | env: 23 | - name: REDIS_ARGS 24 | value: "--save 60 1 --loglevel warning" # Example for fine-tuning Redis settings 25 | --- 26 | apiVersion: v1 27 | kind: Service 28 | metadata: 29 | name: redis 30 | spec: 31 | selector: 32 | app: redis 33 | ports: 34 | - port: 6379 35 | targetPort: 6379 36 | protocol: TCP 37 | type: ClusterIP 38 | -------------------------------------------------------------------------------- /k8s-yaml/frontend/frontend.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: frontend 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: frontend 10 | template: 11 | metadata: 12 | labels: 13 | app: frontend 14 | spec: 15 | nodeSelector: 16 | workload: frontend 17 | containers: 18 | - name: frontend 19 | image: sieucun/llmops-frontend:latest 20 | ports: 21 | - containerPort: 8501 22 | env: 23 | - name: DOC_API_URL 24 | value: http://backend-doc:8001 25 | - name: CHAT_API_URL 26 | value: http://backend-chat:8002 27 | - name: NGINX_URL 28 | value: nginx:80 29 | 30 | --- 31 | 32 | apiVersion: v1 33 | kind: Service 34 | metadata: 35 | name: frontend 36 | spec: 37 | selector: 38 | app: frontend 39 | type: NodePort 40 | ports: 41 | - port: 80 # Expose the service on port 80 (optional for NodePort services) 42 | targetPort: 8081 # Forward traffic to the Streamlit application running on port 8081 43 | nodePort: 30001 # Optional: Specify a NodePort between 30000-32767 44 | 45 | -------------------------------------------------------------------------------- /k8s-yaml/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | events {} 2 | 3 | http { 4 | upstream doc_management_api { 5 | server doc_management_api:8001; 6 | } 7 | 8 | upstream chat_api { 9 | server chat_api:8002; 10 | } 11 | 12 | server { 13 | listen 80; 14 | 15 | location /upload { 16 | proxy_pass http://doc_management_api; 17 | } 18 | 19 | location /remove_document { 20 | proxy_pass http://doc_management_api; 21 | } 22 | 23 | location /ws/ { 24 | proxy_pass http://chat_api; # Proxy WebSocket traffic to the chat backend 25 | proxy_http_version 1.1; 26 | proxy_set_header Upgrade $http_upgrade; 27 | proxy_set_header Connection "Upgrade"; 28 | proxy_set_header Host $host; 29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 30 | 31 | chunked_transfer_encoding on; 32 | proxy_buffering off; 33 | proxy_cache off; 34 | 35 | # Increase timeouts for long-lived connections 36 | proxy_read_timeout 3600s; 37 | proxy_send_timeout 3600s; 38 | } 39 | 40 | # If using /test for a specific endpoint 41 | location /test { 42 | proxy_pass http://chat_api; 43 | chunked_transfer_encoding on; 44 | proxy_buffering off; 45 | proxy_cache off; 46 | proxy_http_version 1.1; 47 | 48 | # Increase timeout for long-lived connections (e.g., SSE or WebSockets) 49 | proxy_read_timeout 3600s; 50 | proxy_send_timeout 3600s; 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /k8s-yaml/nginx/nginx.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: nginx 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: nginx 10 | template: 11 | metadata: 12 | labels: 13 | app: nginx 14 | spec: 15 | nodeSelector: 16 | workload: frontend # Aligning with the frontend node pool in Terraform 17 | containers: 18 | - name: nginx 19 | image: nginx:latest 20 | ports: 21 | - containerPort: 80 22 | volumeMounts: 23 | - name: nginx-config-volume 24 | mountPath: /etc/nginx/nginx.conf 25 | subPath: nginx.conf 26 | volumes: 27 | - name: nginx-config-volume 28 | configMap: 29 | name: nginx-config 30 | --- 31 | apiVersion: v1 32 | kind: ConfigMap 33 | metadata: 34 | name: nginx-config 35 | data: 36 | nginx.conf: | 37 | events {} 38 | 39 | http { 40 | upstream backend-doc { 41 | server backend-doc:8001; 42 | } 43 | 44 | upstream backend-chat { 45 | server backend-chat:8002; 46 | } 47 | 48 | server { 49 | listen 80; 50 | 51 | location /upload { 52 | proxy_pass http://backend-doc; 53 | } 54 | 55 | location /remove_document { 56 | proxy_pass http://backend-doc; 57 | } 58 | 59 | location /ws/ { 60 | proxy_pass http://backend-chat; # Proxy WebSocket traffic to the chat backend 61 | proxy_http_version 1.1; 62 | proxy_set_header Upgrade $http_upgrade; 63 | proxy_set_header Connection "Upgrade"; 64 | proxy_set_header Host $host; 65 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 66 | 67 | chunked_transfer_encoding on; 68 | proxy_buffering off; 69 | proxy_cache off; 70 | 71 | # Increase timeouts for long-lived connections 72 | proxy_read_timeout 3600s; 73 | proxy_send_timeout 3600s; 74 | } 75 | 76 | # If using /test for a specific endpoint 77 | location /test { 78 | proxy_pass http://backend-chat; 79 | chunked_transfer_encoding on; 80 | proxy_buffering off; 81 | proxy_cache off; 82 | proxy_http_version 1.1; 83 | 84 | # Increase timeout for long-lived connections (e.g., SSE or WebSockets) 85 | proxy_read_timeout 3600s; 86 | proxy_send_timeout 3600s; 87 | } 88 | } 89 | } 90 | --- 91 | apiVersion: v1 92 | kind: Service 93 | metadata: 94 | name: nginx 95 | spec: 96 | selector: 97 | app: nginx 98 | ports: 99 | - port: 80 100 | targetPort: 80 101 | type: ClusterIP 102 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/README.md: -------------------------------------------------------------------------------- 1 | # ECK Operator, and ECK Resources Helm Charts 2 | 3 | [![Artifact HUB](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/elastic)](https://artifacthub.io/packages/search?repo=elastic) 4 | 5 | This directory contains the Helm chart for deploying the ECK operator, and charts for deploying any resource in the Elastic Stack individually, or as a group. 6 | 7 | The instructions below are intended to deploy the Helm charts from a local copy of this repository. Refer to https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-install-helm.html to install the charts from the official repository. 8 | 9 | ## ECK Operator Helm Chart Usage 10 | 11 | View the available settings for customizing the installation. 12 | 13 | ```sh 14 | helm show values ./eck-operator 15 | ``` 16 | 17 | Install the CRDs and deploy the operator with cluster-wide permissions to manage all namespaces. 18 | 19 | ```sh 20 | helm install elastic-operator ./eck-operator -n elastic-system --create-namespace 21 | ``` 22 | 23 | Install the operator restricted to a single namespace. 24 | 25 | ```sh 26 | # This step must be done by a cluster administrator to install the CRDs -- which are global resources. 27 | helm install elastic-operator-crds ./eck-operator/charts/eck-operator-crds 28 | 29 | # This step can be done by any user with full access to the my-namespace namespace. 30 | helm install elastic-operator ./eck-operator -n my-namespace --create-namespace \ 31 | --set=installCRDs=false \ 32 | --set=managedNamespaces='{my-namespace}' \ 33 | --set=createClusterScopedResources=false \ 34 | --set=webhook.enabled=false 35 | ``` 36 | 37 | ## ECK Stack Helm Chart Usage 38 | 39 | Install a quickstart Elasticsearch and Kibana resource in a cluster controlled by the ECK Operator. 40 | 41 | ```sh 42 | helm install es-kb-quickstart ./eck-stack -n elastic-stack --create-namespace 43 | ``` 44 | 45 | To see all resources installed by the helm chart: 46 | 47 | ```sh 48 | kubectl get elastic -l "app.kubernetes.io/instance"=es-kb-quickstart -n elastic-stack 49 | ``` 50 | 51 | ## ECK Helm Chart Development 52 | 53 | ### ECK Helm Chart test suite 54 | 55 | [Helm UnitTest Plugin](https://github.com/quintush/helm-unittest) is used to ensure Helm Charts render properly. 56 | 57 | #### Installation 58 | 59 | ``` 60 | helm plugin install https://github.com/quintush/helm-unittest --version 0.2.8 61 | ``` 62 | 63 | #### Running Test Suite 64 | 65 | The test suite can be run from the Makefile in the root of the project with the following command: 66 | 67 | ``` 68 | make helm-test 69 | ``` 70 | 71 | *Note* that the Makefile target runs the script in `{root}/hack/helm/test.sh` 72 | 73 | #### Manually invoking the Helm Unit Tests for a particular Chart 74 | 75 | The Helm unit tests can be manually invoked for any of the charts with the following command: 76 | 77 | ``` 78 | cd deploy/eck-stack 79 | helm unittest -3 -f 'templates/tests/*.yaml' --with-subchart=false . 80 | ``` 81 | 82 | ## Licensing 83 | 84 | The ECK Helm Charts are licensed under the [Elastic License 2.0](https://www.elastic.co/licensing/elastic-license) like the operator. They can be used with a Basic license for free. 85 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | templates/tests 25 | charts/*/templates/tests -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: eck-stack 3 | description: Elastic Stack managed by the ECK Operator 4 | kubeVersion: ">= 1.21.0-0" 5 | type: application 6 | version: 0.13.0-SNAPSHOT 7 | 8 | dependencies: 9 | - name: eck-elasticsearch 10 | condition: eck-elasticsearch.enabled 11 | version: "0.13.0-SNAPSHOT" 12 | - name: eck-kibana 13 | condition: eck-kibana.enabled 14 | version: "0.13.0-SNAPSHOT" 15 | - name: eck-beats 16 | condition: eck-beats.enabled 17 | version: "0.13.0-SNAPSHOT" 18 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/README.md: -------------------------------------------------------------------------------- 1 | # ECK-Stack 2 | 3 | ECK Stack is a Helm chart to assist in the deployment of Elastic Stack components, which are 4 | managed by the [ECK Operator](https://www.elastic.co/guide/en/cloud-on-k8s/current/index.html) 5 | 6 | ## Supported Elastic Stack Resources 7 | 8 | The following Elastic Stack resources are currently supported. 9 | 10 | - Elasticsearch 11 | - Kibana 12 | - Elastic Agent 13 | - Fleet Server 14 | - Beats 15 | - Logstash 16 | - APM Server 17 | 18 | Additional resources will be supported in future releases of this Helm Chart. 19 | 20 | ## Prerequisites 21 | 22 | - Kubernetes 1.21+ 23 | - Elastic ECK Operator 24 | 25 | ## Installing the Chart 26 | 27 | ### Installing the ECK Operator 28 | 29 | Before using this chart, the Elastic ECK Operator is required to be installed within the Kubernetes cluster. 30 | Full installation instructions can be found within [our documentation](https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-installing-eck.html) 31 | 32 | To install the ECK Operator using Helm. 33 | 34 | ```sh 35 | # Add the Elastic Helm Repository 36 | helm repo add elastic https://helm.elastic.co && helm repo update 37 | 38 | # Install the ECK Operator cluster-wide 39 | helm install elastic-operator elastic/eck-operator -n elastic-system --create-namespace 40 | ``` 41 | 42 | Additional ECK Operator Helm installation options can be found within [our documentation](https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-install-helm.html) 43 | 44 | ### Installing the ECK Stack Chart 45 | 46 | The following will install the ECK-Stack chart using the default values, which will deploy an Elasticsearch [Quickstart Cluster](https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-deploy-elasticsearch.html), and a Kibana [Quickstart Instance](https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-deploy-kibana.html) 47 | 48 | ```sh 49 | # Add the Elastic Helm Repository 50 | helm repo add elastic https://helm.elastic.co && helm repo update 51 | 52 | # Install the ECK-Stack helm chart 53 | # This will setup a 'quickstart' Elasticsearch and Kibana resource in the 'elastic-stack' namespace 54 | helm install my-release elastic/eck-stack -n elastic-stack --create-namespace 55 | ``` 56 | 57 | More information on the different ways to use the ECK Stack chart to deploy Elastic Stack resources 58 | can be found in [our documentation](https://www.elastic.co/guide/en/cloud-on-k8s/current/index.html). 59 | 60 | ## Uninstalling the Chart 61 | 62 | To uninstall/delete the `my-release` deployment from the 'elastic-stack' namespace: 63 | 64 | ```console 65 | helm delete my-release -n elastic-stack 66 | ``` 67 | 68 | The command removes all the Elastic Stack resources associated with the chart and deletes the release. 69 | 70 | ## Configuration 71 | 72 | The following table lists the configurable parameters of the eck-stack chart and their default values. 73 | 74 | | Parameter | Description | Default | 75 | | --------- | ----------- | ------- | 76 | | `eck-elasticsearch.enabled` | If `true`, create an Elasticsearch resource (using the eck-elasticsearch Chart) | `true` | 77 | | `eck-kibana.enabled` | If `true`, create a Kibana resource (using the eck-kibana Chart) | `true` | 78 | | `eck-agent.enabled` | If `true`, create an Elastic Agent resource (using the eck-agent Chart) | `false` | 79 | | `eck-fleet-server.enabled` | If `true`, create a Fleet Server resource (using the eck-fleet-server Chart) | `false` | 80 | | `eck-logstash.enabled` | If `true`, create a Logstash resource (using the eck-logstash Chart) | `false` | 81 | | `eck-apm-server.enabled` | If `true`, create a standalone Elastic APM Server resource (using the eck-apm-server Chart) | `false` | 82 | 83 | Specify each parameter using the `--set key=value[,key=value]` argument to `helm install`. 84 | 85 | Alternatively, a YAML file that specifies the values for the above parameters can be provided while installing the chart. For example, 86 | 87 | ```console 88 | helm install my-release -f values.yaml . 89 | ``` 90 | 91 | ## Contributing 92 | 93 | This chart is maintained at [github.com/elastic/cloud-on-k8s](https://github.com/elastic/cloud-on-k8s/tree/main/deploy/eck-stack). 94 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | templates/tests 25 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: eck-beats 3 | description: Elastic Beats managed by the ECK operator 4 | # Requirement comes from minimum version supported for eck-operator (https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s_supported_versions.html) 5 | kubeVersion: ">= 1.20.0-0" 6 | type: application 7 | version: 0.13.0-SNAPSHOT 8 | sources: 9 | - https://github.com/elastic/cloud-on-k8s 10 | - https://github.com/elastic/beats 11 | icon: https://helm.elastic.co/icons/beats.png 12 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/examples/filebeat_no_autodiscover.yaml: -------------------------------------------------------------------------------- 1 | name: filebeat 2 | version: 8.16.0-SNAPSHOT 3 | spec: 4 | type: filebeat 5 | elasticsearchRef: 6 | name: eck-elasticsearch 7 | kibanaRef: 8 | name: eck-kibana 9 | config: 10 | filebeat.inputs: 11 | - type: container 12 | paths: 13 | - /var/log/containers/*.log 14 | processors: 15 | - add_host_metadata: {} 16 | - add_cloud_metadata: {} 17 | daemonSet: 18 | podTemplate: 19 | spec: 20 | automountServiceAccountToken: true 21 | terminationGracePeriodSeconds: 30 22 | dnsPolicy: ClusterFirstWithHostNet 23 | hostNetwork: true # Allows to provide richer host metadata 24 | containers: 25 | - name: filebeat 26 | securityContext: 27 | runAsUser: 0 28 | # If using Red Hat OpenShift uncomment this: 29 | #privileged: true 30 | volumeMounts: 31 | - name: varlogcontainers 32 | mountPath: /var/log/containers 33 | - name: varlogpods 34 | mountPath: /var/log/pods 35 | - name: varlibdockercontainers 36 | mountPath: /var/lib/docker/containers 37 | volumes: 38 | - name: varlogcontainers 39 | hostPath: 40 | path: /var/log/containers 41 | - name: varlogpods 42 | hostPath: 43 | path: /var/log/pods 44 | - name: varlibdockercontainers 45 | hostPath: 46 | path: /var/lib/docker/containers 47 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 2 | 1. Check Beat status 3 | $ kubectl get beat {{ include "beat.fullname" . }} -n {{ .Release.Namespace }} 4 | 5 | 2. Check Beat pod status 6 | $ kubectl get pods --namespace={{ .Release.Namespace }} -l beat.k8s.elastic.co/name={{ include "beat.fullname" . }} 7 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "beat.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "beat.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "beat.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "beat.labels" -}} 37 | helm.sh/chart: {{ include "beat.chart" . }} 38 | {{ include "beat.selectorLabels" . }} 39 | app.kubernetes.io/managed-by: {{ .Release.Service }} 40 | {{- if .Values.labels }} 41 | {{ toYaml .Values.labels }} 42 | {{- end }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "beat.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "beat.name" . }} 50 | app.kubernetes.io/instance: {{ .Release.Name }} 51 | {{- end }} 52 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/beats.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: beat.k8s.elastic.co/v1beta1 3 | kind: Beat 4 | metadata: 5 | name: {{ include "beat.fullname" . }} 6 | labels: 7 | {{- include "beat.labels" . | nindent 4 }} 8 | annotations: 9 | eck.k8s.elastic.co/license: basic 10 | {{- with .Values.annotations }} 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | spec: 14 | version: {{ required "A Beat version is required" .Values.version }} 15 | {{- if and (not (hasKey .Values.spec "daemonSet")) (not (hasKey .Values.spec "deployment")) }} 16 | {{ fail "At least one of daemonSet or deployment is required for a functional Beat" }} 17 | {{- end }} 18 | {{- if not .Values.spec.type }}{{ fail "A Beat type is required" }}{{- end }} 19 | {{- toYaml .Values.spec | nindent 2 }} 20 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | {{- with .Values.clusterRoleBinding }} 2 | --- 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | kind: ClusterRoleBinding 5 | metadata: 6 | name: {{ .name }} 7 | labels: 8 | {{- include "beat.labels" $ | nindent 4 }} 9 | {{- with .labels }} 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | {{- if or $.Values.annotations .annotations }} 13 | annotations: 14 | {{- with $.Values.annotations }} 15 | {{- toYaml . | nindent 4 }} 16 | {{- end }} 17 | {{- with .annotations }} 18 | {{- toYaml . | nindent 4 }} 19 | {{- end }} 20 | {{- end }} 21 | {{- with .subjects }} 22 | subjects: 23 | {{- range . }} 24 | - kind: {{ .kind }} 25 | name: {{ .name }} 26 | namespace: {{ .namespace | default $.Release.Namespace | quote }} 27 | {{- end }} 28 | {{- end }} 29 | {{- with .roleRef }} 30 | roleRef: 31 | kind: {{ .kind }} 32 | name: {{ .name }} 33 | apiGroup: {{ .apiGroup }} 34 | {{- end }} 35 | {{- end }} 36 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/cluster-role.yaml: -------------------------------------------------------------------------------- 1 | {{- with .Values.clusterRole }} 2 | --- 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | kind: ClusterRole 5 | metadata: 6 | name: {{ .name }} 7 | labels: 8 | {{- include "beat.labels" $ | nindent 4 }} 9 | {{- with .labels }} 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | {{- if or $.Values.annotations .annotations }} 13 | annotations: 14 | {{- with $.Values.annotations }} 15 | {{- toYaml . | nindent 4 }} 16 | {{- end }} 17 | {{- with .annotations }} 18 | {{- toYaml . | nindent 4 }} 19 | {{- end }} 20 | {{- end }} 21 | rules: {{- toYaml .rules | nindent 2 }} 22 | {{- end }} 23 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/service-account.yaml: -------------------------------------------------------------------------------- 1 | 2 | {{- with .Values.serviceAccount }} 3 | --- 4 | apiVersion: v1 5 | kind: ServiceAccount 6 | metadata: 7 | name: {{ .name }} 8 | namespace: {{ .namespace | default $.Release.Namespace | quote }} 9 | labels: 10 | {{- include "beat.labels" $ | nindent 4 }} 11 | {{- with .labels }} 12 | {{- toYaml . | nindent 4 }} 13 | {{- end }} 14 | {{- if or $.Values.annotations .annotations }} 15 | annotations: 16 | {{- with $.Values.annotations }} 17 | {{- toYaml . | nindent 4 }} 18 | {{- end }} 19 | {{- with .annotations }} 20 | {{- toYaml . | nindent 4 }} 21 | {{- end }} 22 | {{- end }} 23 | {{- end }} 24 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/tests/beats-auditbeat-example_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test auditbeat 2 | templates: 3 | - templates/beats.yaml 4 | tests: 5 | - it: should render audibeat configuration properly. 6 | values: 7 | - ../../examples/auditbeat_hosts.yaml 8 | release: 9 | name: quickstart 10 | asserts: 11 | - isKind: 12 | of: Beat 13 | - equal: 14 | path: spec.elasticsearchRef.name 15 | value: eck-elasticsearch 16 | - equal: 17 | path: spec.kibanaRef.name 18 | value: eck-kibana 19 | - equal: 20 | path: spec.config.[auditbeat.modules][0].module 21 | value: file_integrity 22 | - equal: 23 | path: spec.config.[auditbeat.modules][0].paths 24 | value: 25 | - /hostfs/bin 26 | - /hostfs/usr/bin 27 | - /hostfs/sbin 28 | - /hostfs/usr/sbin 29 | - /hostfs/etc 30 | - equal: 31 | path: spec.config.[auditbeat.modules][0].scan_at_start 32 | value: true 33 | - equal: 34 | path: spec.config.[auditbeat.modules][0].recursive 35 | value: true 36 | - equal: 37 | path: spec.config.[auditbeat.modules][1].module 38 | value: auditd 39 | - equal: 40 | path: spec.daemonSet.podTemplate.spec.hostPID 41 | value: true 42 | - equal: 43 | path: spec.daemonSet.podTemplate.spec.dnsPolicy 44 | value: ClusterFirstWithHostNet 45 | - equal: 46 | path: spec.daemonSet.podTemplate.spec.hostNetwork 47 | value: true 48 | - equal: 49 | path: spec.daemonSet.podTemplate.spec.securityContext.runAsUser 50 | value: 0 51 | - equal: 52 | path: spec.daemonSet.podTemplate.spec.containers[0].name 53 | value: auditbeat 54 | - equal: 55 | path: spec.daemonSet.podTemplate.spec.containers[0].securityContext.capabilities.add 56 | value: 57 | - 'AUDIT_READ' 58 | - 'AUDIT_WRITE' 59 | - 'AUDIT_CONTROL' 60 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/tests/beats-filebeat-example_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test filebeat 2 | templates: 3 | - templates/beats.yaml 4 | tests: 5 | - it: should render filebeat configuration properly. 6 | values: 7 | - ../../examples/filebeat_no_autodiscover.yaml 8 | release: 9 | name: quickstart 10 | asserts: 11 | - isKind: 12 | of: Beat 13 | - equal: 14 | path: spec.elasticsearchRef.name 15 | value: eck-elasticsearch 16 | - equal: 17 | path: spec.kibanaRef.name 18 | value: eck-kibana 19 | - equal: 20 | path: spec.config.[filebeat.inputs][0].type 21 | value: container 22 | - equal: 23 | path: spec.config.[filebeat.inputs][0].paths 24 | value: 25 | - /var/log/containers/*.log 26 | - equal: 27 | path: spec.daemonSet.podTemplate.spec.automountServiceAccountToken 28 | value: true 29 | - equal: 30 | path: spec.daemonSet.podTemplate.spec.dnsPolicy 31 | value: ClusterFirstWithHostNet 32 | - equal: 33 | path: spec.daemonSet.podTemplate.spec.hostNetwork 34 | value: true 35 | - equal: 36 | path: spec.daemonSet.podTemplate.spec.containers[0].name 37 | value: filebeat 38 | - equal: 39 | path: spec.daemonSet.podTemplate.spec.containers[0].securityContext.runAsUser 40 | value: 0 41 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/tests/beats-heartbeat-example_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test heartbeat 2 | templates: 3 | - templates/beats.yaml 4 | tests: 5 | - it: should render heartbeat configuration properly. 6 | values: 7 | - ../../examples/heartbeat_es_kb_health.yaml 8 | release: 9 | name: quickstart 10 | asserts: 11 | - isKind: 12 | of: Beat 13 | - equal: 14 | path: spec.elasticsearchRef.name 15 | value: eck-elasticsearch 16 | - equal: 17 | path: spec.config.[heartbeat.monitors][0].type 18 | value: tcp 19 | - equal: 20 | path: spec.config.[heartbeat.monitors][0].schedule 21 | value: '@every 5s' 22 | - equal: 23 | path: spec.config.[heartbeat.monitors][0].hosts 24 | value: 25 | - "elasticsearch-es-http.default.svc:9200" 26 | - equal: 27 | path: spec.config.[heartbeat.monitors][1].type 28 | value: tcp 29 | - equal: 30 | path: spec.config.[heartbeat.monitors][1].schedule 31 | value: '@every 5s' 32 | - equal: 33 | path: spec.config.[heartbeat.monitors][1].hosts 34 | value: 35 | - "eck-kibana-kb-http.default.svc:5601" 36 | - equal: 37 | path: spec.deployment.replicas 38 | value: 1 39 | - equal: 40 | path: spec.deployment.podTemplate.spec.securityContext.runAsUser 41 | value: 0 42 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/tests/beats-metricbeat-example_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test metricbeat 2 | templates: 3 | - templates/beats.yaml 4 | tests: 5 | - it: should render metricbeat configuration properly. 6 | values: 7 | - ../../examples/metricbeat_hosts.yaml 8 | release: 9 | name: quickstart 10 | asserts: 11 | - isKind: 12 | of: Beat 13 | - equal: 14 | path: spec.config.metricbeat.autodiscover.providers[0].hints.enabled 15 | value: "true" 16 | - equal: 17 | path: spec.config.metricbeat.autodiscover.providers[0].type 18 | value: kubernetes 19 | - equal: 20 | path: spec.config.metricbeat.modules[0].module 21 | value: system 22 | - equal: 23 | path: spec.config.metricbeat.modules[0].period 24 | value: 10s 25 | - equal: 26 | path: spec.config.metricbeat.modules[0].metricsets 27 | value: 28 | - cpu 29 | - load 30 | - memory 31 | - network 32 | - process 33 | - process_summary 34 | - equal: 35 | path: spec.config.metricbeat.modules[1].metricsets 36 | value: 37 | - filesystem 38 | - fsstat 39 | - equal: 40 | path: spec.config.metricbeat.modules[2].module 41 | value: kubernetes 42 | - equal: 43 | path: spec.config.metricbeat.modules[2].hosts 44 | value: 45 | - https://${NODE_NAME}:10250 46 | - equal: 47 | path: spec.config.metricbeat.modules[2].bearer_token_file 48 | value: /var/run/secrets/kubernetes.io/serviceaccount/token 49 | - equal: 50 | path: spec.daemonSet.podTemplate.spec.serviceAccountName 51 | value: metricbeat 52 | - equal: 53 | path: spec.daemonSet.podTemplate.spec.hostNetwork 54 | value: true 55 | --- 56 | suite: test beat cluster role 57 | templates: 58 | - templates/cluster-role.yaml 59 | tests: 60 | - it: should render cluster role in metricbeat example properly 61 | values: 62 | - ../../examples/metricbeat_hosts.yaml 63 | release: 64 | name: quickstart 65 | asserts: 66 | - isKind: 67 | of: ClusterRole 68 | - equal: 69 | path: metadata.name 70 | value: metricbeat 71 | - equal: 72 | path: rules[0].apiGroups[0] 73 | value: "" 74 | - equal: 75 | path: rules[0].resources 76 | value: 77 | - nodes 78 | - namespaces 79 | - events 80 | - pods 81 | - equal: 82 | path: rules[0].verbs 83 | value: 84 | - get 85 | - list 86 | - watch 87 | - equal: 88 | path: rules[1].apiGroups[0] 89 | value: extensions 90 | - equal: 91 | path: rules[1].resources 92 | value: 93 | - replicasets 94 | - equal: 95 | path: rules[1].verbs 96 | value: 97 | - get 98 | - list 99 | - watch 100 | - it: should render custom labels and annotations properly. 101 | values: 102 | - ../../examples/metricbeat_hosts.yaml 103 | set: 104 | labels: 105 | test: label 106 | annotations: 107 | test: annotation 108 | clusterRole: 109 | annotations: 110 | clusterRole: annotation 111 | labels: 112 | clusterRole: label 113 | release: 114 | name: quickstart 115 | asserts: 116 | - isKind: 117 | of: ClusterRole 118 | - equal: 119 | path: metadata.labels 120 | value: 121 | app.kubernetes.io/instance: quickstart 122 | app.kubernetes.io/managed-by: Helm 123 | app.kubernetes.io/name: eck-beats 124 | clusterRole: label 125 | helm.sh/chart: eck-beats-0.1.0 126 | test: label 127 | - equal: 128 | path: metadata.annotations 129 | value: 130 | clusterRole: annotation 131 | test: annotation 132 | --- 133 | suite: test beat cluster role binding 134 | templates: 135 | - templates/cluster-role-binding.yaml 136 | tests: 137 | - it: should render cluster role binding in metricbeat example properly 138 | values: 139 | - ../../examples/metricbeat_hosts.yaml 140 | release: 141 | name: quickstart 142 | asserts: 143 | - isKind: 144 | of: ClusterRoleBinding 145 | - equal: 146 | path: metadata.name 147 | value: metricbeat 148 | - equal: 149 | path: subjects[0].kind 150 | value: ServiceAccount 151 | - equal: 152 | path: subjects[0].name 153 | value: metricbeat 154 | - equal: 155 | path: roleRef.kind 156 | value: ClusterRole 157 | - equal: 158 | path: roleRef.name 159 | value: metricbeat 160 | - equal: 161 | path: roleRef.apiGroup 162 | value: rbac.authorization.k8s.io 163 | - it: should render custom labels and annotations properly. 164 | values: 165 | - ../../examples/metricbeat_hosts.yaml 166 | set: 167 | labels: 168 | test: label 169 | annotations: 170 | test: annotation 171 | clusterRoleBinding: 172 | annotations: 173 | clusterRoleBinding: annotation 174 | labels: 175 | clusterRoleBinding: label 176 | release: 177 | name: quickstart 178 | asserts: 179 | - isKind: 180 | of: ClusterRoleBinding 181 | - equal: 182 | path: metadata.labels 183 | value: 184 | app.kubernetes.io/instance: quickstart 185 | app.kubernetes.io/managed-by: Helm 186 | app.kubernetes.io/name: eck-beats 187 | clusterRoleBinding: label 188 | helm.sh/chart: eck-beats-0.1.0 189 | test: label 190 | - equal: 191 | path: metadata.annotations 192 | value: 193 | clusterRoleBinding: annotation 194 | test: annotation 195 | --- 196 | suite: test beat service account 197 | templates: 198 | - templates/service-account.yaml 199 | tests: 200 | - it: should render service account in metricbeat example properly 201 | values: 202 | - ../../examples/metricbeat_hosts.yaml 203 | release: 204 | name: quickstart 205 | asserts: 206 | - isKind: 207 | of: ServiceAccount 208 | - equal: 209 | path: metadata.name 210 | value: metricbeat 211 | - it: should render custom labels and annotations properly. 212 | values: 213 | - ../../examples/metricbeat_hosts.yaml 214 | set: 215 | labels: 216 | test: label 217 | annotations: 218 | test: annotation 219 | serviceAccount: 220 | annotations: 221 | serviceAccount: annotation 222 | labels: 223 | serviceAccount: label 224 | release: 225 | name: quickstart 226 | asserts: 227 | - isKind: 228 | of: ServiceAccount 229 | - equal: 230 | path: metadata.labels 231 | value: 232 | app.kubernetes.io/instance: quickstart 233 | app.kubernetes.io/managed-by: Helm 234 | app.kubernetes.io/name: eck-beats 235 | serviceAccount: label 236 | helm.sh/chart: eck-beats-0.13.0-SNAPSHOT 237 | test: label 238 | - equal: 239 | path: metadata.annotations 240 | value: 241 | serviceAccount: annotation 242 | test: annotation 243 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/tests/beats-packetbeat-example_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test packetbeat 2 | templates: 3 | - templates/beats.yaml 4 | tests: 5 | - it: should render packetbeat configuration properly. 6 | values: 7 | - ../../examples/packetbeat_dns_http.yaml 8 | release: 9 | name: quickstart 10 | asserts: 11 | - isKind: 12 | of: Beat 13 | - equal: 14 | path: spec.elasticsearchRef.name 15 | value: eck-elasticsearch 16 | - equal: 17 | path: spec.kibanaRef.name 18 | value: eck-kibana 19 | - equal: 20 | path: spec.config.[packetbeat.interfaces.device] 21 | value: any 22 | - equal: 23 | path: spec.config.[packetbeat.protocols] 24 | value: 25 | - type: dns 26 | ports: 27 | - 53 28 | include_authorities: true 29 | include_additionals: true 30 | - type: http 31 | ports: 32 | - 80 33 | - 8000 34 | - 8080 35 | - 9200 36 | - equal: 37 | path: spec.config.[packetbeat.flows] 38 | value: 39 | timeout: 30s 40 | period: 10s 41 | - equal: 42 | path: spec.daemonSet.podTemplate.spec 43 | value: 44 | terminationGracePeriodSeconds: 30 45 | hostNetwork: true 46 | automountServiceAccountToken: true # some older Beat versions are depending on this settings presence in k8s context 47 | dnsPolicy: ClusterFirstWithHostNet 48 | containers: 49 | - name: packetbeat 50 | securityContext: 51 | runAsUser: 0 52 | capabilities: 53 | add: 54 | - NET_ADMIN 55 | volumes: [] 56 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/templates/tests/beats_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test beats 2 | templates: 3 | - templates/beats.yaml 4 | tests: 5 | - it: should render filebeat properly, when type and deployment is set 6 | release: 7 | name: quickstart 8 | set: 9 | spec: 10 | type: "filebeat" 11 | deployment: {} 12 | asserts: 13 | - isKind: 14 | of: Beat 15 | - equal: 16 | path: metadata.name 17 | value: quickstart-eck-beats 18 | - equal: 19 | path: spec.version 20 | value: 8.16.0-SNAPSHOT 21 | - equal: 22 | path: spec.type 23 | value: filebeat 24 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-beats/values.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Default values for eck-beats. 3 | # This is a YAML-formatted file. 4 | 5 | # Overridable names of the Beats resource. 6 | # By default, this is the Release name set for the chart, 7 | # followed by 'eck-beats'. 8 | # 9 | # nameOverride will override the name of the Chart with the name set here, 10 | # so nameOverride: quickstart, would convert to '{{ Release.name }}-quickstart' 11 | # 12 | # nameOverride: "quickstart" 13 | # 14 | # fullnameOverride will override both the release name, and the chart name, 15 | # and will name the Beats resource exactly as specified. 16 | # 17 | # fullnameOverride: "quickstart" 18 | 19 | # Version of Elastic Beats. 20 | # 21 | name : filebeat 22 | version: 8.16.0-SNAPSHOT 23 | 24 | # Labels that will be applied to Elastic Beats. 25 | # 26 | labels: {} 27 | 28 | # Annotations that will be applied to Elastic Beats. 29 | # 30 | annotations: {} 31 | 32 | spec: 33 | # Type of Elastic Beats. Standard types of Beat are [filebeat,metricbeat,heartbeat,auditbeat,packetbeat,journalbeat]. 34 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-beat-configuration.html#k8s-beat-deploy-elastic-beat 35 | # 36 | # Note: This is required to be set, or the release install will fail. 37 | # 38 | type: filebeat 39 | 40 | # Referenced resources are below and depending on the setup, at least elasticsearchRef is required for a functional Beat. 41 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-beat-configuration.html#k8s-beat-connect-es 42 | # 43 | # Reference to ECK-managed Kibana instance. 44 | # 45 | # kibanaRef: 46 | # name: quickstart 47 | # Optional namespace reference to Kibana instance. 48 | # If not specified, then the namespace of the Beats instance 49 | # will be assumed. 50 | # 51 | # namespace: default 52 | 53 | # Reference to ECK-managed Elasticsearch instance. 54 | # *Note* If Beat's output is intended to go to Elasticsearch and not something like Logstash, 55 | # this elasticsearchRef must be updated to the name of the Elasticsearch instance. 56 | # 57 | elasticsearchRef: 58 | name: elasticsearch 59 | # name: elasticsearch 60 | # Optional namespace reference to Elasticsearch instance. 61 | # If not specified, then the namespace of the Beats instance 62 | # will be assumed. 63 | # 64 | # namespace: default 65 | 66 | # Daemonset, or Deployment specification for the type of Beat specified. 67 | # At least one is required of [daemonSet, deployment]. 68 | # No default is currently set, refer to https://github.com/elastic/cloud-on-k8s/issues/7429. 69 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-beat-configuration.html#k8s-beat-chose-the-deployment-model 70 | # 71 | # deployment: 72 | # podTemplate: 73 | # spec: 74 | # containers: 75 | # - name: agent 76 | # securityContext: 77 | # runAsUser: 0 78 | daemonSet: 79 | podTemplate: 80 | spec: 81 | serviceAccountName: filebeat 82 | automountServiceAccountToken: true 83 | terminationGracePeriodSeconds: 30 84 | dnsPolicy: ClusterFirstWithHostNet 85 | hostNetwork: true # Allows to provide richer host metadata 86 | containers: 87 | - name: filebeat 88 | securityContext: 89 | runAsUser: 0 90 | # If using Red Hat OpenShift uncomment this: 91 | #privileged: true 92 | volumeMounts: 93 | - name: varlogcontainers 94 | mountPath: /var/log/containers 95 | - name: varlogpods 96 | mountPath: /var/log/pods 97 | - name: varlibdockercontainers 98 | mountPath: /var/lib/docker/containers 99 | env: 100 | - name: NODE_NAME 101 | valueFrom: 102 | fieldRef: 103 | fieldPath: spec.nodeName 104 | volumes: 105 | - name: varlogcontainers 106 | hostPath: 107 | path: /var/log/containers 108 | - name: varlogpods 109 | hostPath: 110 | path: /var/log/pods 111 | - name: varlibdockercontainers 112 | hostPath: 113 | path: /var/lib/docker/containers 114 | 115 | # Configuration of Beat, which is dependent on the `type` of Beat specified. 116 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-beat-configuration.html#k8s-beat-deploy-elastic-beat 117 | # 118 | config: 119 | filebeat.autodiscover.providers: 120 | - node: ${NODE_NAME} 121 | type: kubernetes 122 | hints.default_config.enabled: "false" 123 | templates: 124 | - condition.equals.kubernetes.namespace: chatbot 125 | config: 126 | - paths: ["/var/log/containers/*${data.kubernetes.container.id}.log"] 127 | type: container 128 | - condition.equals.kubernetes.labels.log-label: "true" 129 | config: 130 | - paths: ["/var/log/containers/*${data.kubernetes.container.id}.log"] 131 | type: container 132 | processors: 133 | - add_cloud_metadata: {} 134 | - add_host_metadata: {} 135 | 136 | # ServiceAccount to be used by Elastic Beats. Some Beats features (such as autodiscover or Kubernetes module metricsets) 137 | # require that Beat Pods interact with Kubernetes APIs. This functionality requires specific permissions 138 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-beat-configuration.html#k8s-beat-role-based-access-control-for-beats 139 | # 140 | serviceAccount: 141 | name: filebeat 142 | namespace: observability 143 | 144 | # ClusterRoleBinding to be used by Elastic Beats. Similar to ServiceAccount, this is required in some scenarios. 145 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-beat-configuration.html#k8s-beat-role-based-access-control-for-beats 146 | # 147 | clusterRoleBinding: 148 | name: filebeat 149 | subjects: 150 | - kind: ServiceAccount 151 | name: filebeat 152 | namespace: observability 153 | roleRef: 154 | kind: ClusterRole 155 | name: filebeat 156 | apiGroup: rbac.authorization.k8s.io 157 | 158 | # ClusterRole to be used by Elastic Beats. Similar to ServiceAccount, this is required in some scenarios. 159 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-beat-configuration.html#k8s-beat-role-based-access-control-for-beats 160 | # 161 | clusterRole: 162 | name: filebeat 163 | rules: 164 | - apiGroups: [""] 165 | resources: 166 | - events 167 | - pods 168 | - namespaces 169 | - nodes 170 | verbs: 171 | - get 172 | - watch 173 | - list 174 | 175 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | templates/tests 25 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: eck-elasticsearch 3 | description: Elasticsearch managed by the ECK operator 4 | kubeVersion: ">= 1.21.0-0" 5 | type: application 6 | version: 0.13.0-SNAPSHOT 7 | sources: 8 | - https://github.com/elastic/cloud-on-k8s 9 | - https://github.com/elastic/elasticsearch/ 10 | icon: https://helm.elastic.co/icons/elasticsearch.png 11 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/examples/hot-warm-cold.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | nodeSets: 3 | - name: masters 4 | count: 1 5 | config: 6 | node.roles: ["master"] 7 | # Comment out when setting the vm.max_map_count via initContainer, as these are mutually exclusive. 8 | # For production workloads, it is strongly recommended to increase the kernel setting vm.max_map_count to 262144 9 | # and leave node.store.allow_mmap unset. 10 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-virtual-memory.html 11 | # 12 | node.store.allow_mmap: false 13 | podTemplate: 14 | spec: 15 | containers: 16 | - name: elasticsearch 17 | resources: 18 | limits: 19 | memory: 8Gi 20 | cpu: 2 21 | # Affinity/Anti-affinity settings for controlling the 'spreading' of Elasticsearch 22 | # pods across existing hosts. 23 | # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity 24 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-advanced-node-scheduling.html#k8s-affinity-options 25 | # 26 | # affinity: 27 | # nodeAffinity: 28 | # requiredDuringSchedulingIgnoredDuringExecution: 29 | # nodeSelectorTerms: 30 | # - matchExpressions: 31 | # - key: beta.kubernetes.io/instance-type 32 | # operator: In 33 | # # This should be adjusted to the instance type according to your setup 34 | # # 35 | # values: 36 | # - highio 37 | # Volume Claim settings. 38 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-volume-claim-templates.html 39 | # 40 | volumeClaimTemplates: 41 | - metadata: 42 | name: elasticsearch-data 43 | spec: 44 | accessModes: 45 | - ReadWriteOnce 46 | resources: 47 | requests: 48 | storage: 1Ti 49 | # Adjust to your storage class name 50 | # 51 | # storageClassName: local-storage 52 | - name: hot 53 | count: 1 54 | config: 55 | node.roles: ["data_hot", "data_content", "ingest"] 56 | # Comment out when setting the vm.max_map_count via initContainer, as these are mutually exclusive. 57 | # For production workloads, it is strongly recommended to increase the kernel setting vm.max_map_count to 262144 58 | # and leave node.store.allow_mmap unset. 59 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-virtual-memory.html 60 | # 61 | node.store.allow_mmap: false 62 | podTemplate: 63 | spec: 64 | containers: 65 | - name: elasticsearch 66 | resources: 67 | limits: 68 | memory: 16Gi 69 | cpu: 4 70 | # Affinity/Anti-affinity settings for controlling the 'spreading' of Elasticsearch 71 | # pods across existing hosts. 72 | # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity 73 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-advanced-node-scheduling.html#k8s-affinity-options 74 | # 75 | # affinity: 76 | # nodeAffinity: 77 | # requiredDuringSchedulingIgnoredDuringExecution: 78 | # nodeSelectorTerms: 79 | # - matchExpressions: 80 | # - key: beta.kubernetes.io/instance-type 81 | # operator: In 82 | # # This should be adjusted to the instance type according to your setup 83 | # # 84 | # values: 85 | # - highio 86 | # Volume Claim settings. 87 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-volume-claim-templates.html 88 | # 89 | volumeClaimTemplates: 90 | - metadata: 91 | name: elasticsearch-data 92 | spec: 93 | accessModes: 94 | - ReadWriteOnce 95 | resources: 96 | requests: 97 | storage: 1Ti 98 | # Adjust to your storage class name 99 | # 100 | # storageClassName: local-storage 101 | - name: warm 102 | count: 1 103 | config: 104 | node.roles: ["data_warm"] 105 | # Comment out when setting the vm.max_map_count via initContainer, as these are mutually exclusive. 106 | # For production workloads, it is strongly recommended to increase the kernel setting vm.max_map_count to 262144 107 | # and leave node.store.allow_mmap unset. 108 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-virtual-memory.html 109 | # 110 | node.store.allow_mmap: false 111 | podTemplate: 112 | spec: 113 | containers: 114 | - name: elasticsearch 115 | resources: 116 | limits: 117 | memory: 16Gi 118 | cpu: 2 119 | # Affinity/Anti-affinity settings for controlling the 'spreading' of Elasticsearch 120 | # pods across existing hosts. 121 | # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity 122 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-advanced-node-scheduling.html#k8s-affinity-options 123 | # 124 | # affinity: 125 | # nodeAffinity: 126 | # requiredDuringSchedulingIgnoredDuringExecution: 127 | # nodeSelectorTerms: 128 | # - matchExpressions: 129 | # - key: beta.kubernetes.io/instance-type 130 | # operator: In 131 | # # This should be adjusted to the instance type according to your setup 132 | # # 133 | # values: 134 | # - highstorage 135 | # Volume Claim settings. 136 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-volume-claim-templates.html 137 | # 138 | volumeClaimTemplates: 139 | - metadata: 140 | name: elasticsearch-data 141 | spec: 142 | accessModes: 143 | - ReadWriteOnce 144 | resources: 145 | requests: 146 | storage: 10Ti 147 | # Adjust to your storage class name 148 | # 149 | # storageClassName: local-storage 150 | - name: cold 151 | count: 1 152 | config: 153 | node.roles: ["data_cold"] 154 | # Comment out when setting the vm.max_map_count via initContainer, as these are mutually exclusive. 155 | # For production workloads, it is strongly recommended to increase the kernel setting vm.max_map_count to 262144 156 | # and leave node.store.allow_mmap unset. 157 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-virtual-memory.html 158 | # 159 | node.store.allow_mmap: false 160 | podTemplate: 161 | spec: 162 | containers: 163 | - name: elasticsearch 164 | resources: 165 | limits: 166 | memory: 8Gi 167 | cpu: 2 168 | # Affinity/Anti-affinity settings for controlling the 'spreading' of Elasticsearch 169 | # pods across existing hosts. 170 | # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity 171 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-advanced-node-scheduling.html#k8s-affinity-options 172 | # 173 | # affinity: 174 | # nodeAffinity: 175 | # requiredDuringSchedulingIgnoredDuringExecution: 176 | # nodeSelectorTerms: 177 | # - matchExpressions: 178 | # - key: beta.kubernetes.io/instance-type 179 | # operator: In 180 | # # This should be adjusted to the instance type according to your setup 181 | # # 182 | # values: 183 | # - highstorage 184 | # Volume Claim settings. 185 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-volume-claim-templates.html 186 | # 187 | volumeClaimTemplates: 188 | - metadata: 189 | name: elasticsearch-data 190 | spec: 191 | accessModes: 192 | - ReadWriteOnce 193 | resources: 194 | requests: 195 | storage: 20Ti 196 | # Adjust to your storage class name 197 | # 198 | # storageClassName: local-storage 199 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/examples/ingress/elasticsearch-ingress-aks.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # The following is an example of an Elasticsearch resource that is configured to use an Ingress resource in an AKS cluster. 3 | # 4 | ingress: 5 | enabled: true 6 | className: webapprouting.kubernetes.azure.com 7 | annotations: 8 | # This is required for AKS Loadbalancing to understand that it's communicating with 9 | # an HTTPS backend. 10 | nginx.ingress.kubernetes.io/backend-protocol: "HTTPS" 11 | labels: 12 | my: label 13 | pathType: Prefix 14 | hosts: 15 | - host: "elasticsearch.company.dev" 16 | path: "/" 17 | nodeSets: 18 | - name: default 19 | count: 3 20 | # Comment out when setting the vm.max_map_count via initContainer, as these are mutually exclusive. 21 | # For production workloads, it is strongly recommended to increase the kernel setting vm.max_map_count to 262144 22 | # and leave node.store.allow_mmap unset. 23 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/master/k8s-virtual-memory.html 24 | # 25 | config: 26 | node.store.allow_mmap: false 27 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/examples/ingress/elasticsearch-ingress-eks-alb.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # The following is an example of an Elasticsearch resource that is configured to use an Ingress resource in an EKS cluster 3 | # which provisions an application load balancer. 4 | # 5 | ingress: 6 | enabled: true 7 | className: alb 8 | annotations: 9 | alb.ingress.kubernetes.io/scheme: "internet-facing" 10 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' 11 | alb.ingress.kubernetes.io/backend-protocol: "HTTPS" 12 | alb.ingress.kubernetes.io/target-type: "ip" 13 | # To use an ALB with ECK, you must provide a valid ACM certificate ARN or use certificate discovery. 14 | # There are 2 options for EKS: 15 | # 1. Create a valid ACM certificate, and uncomment the following annotation and update it to the correct ARN. 16 | # 2. Create a valid ACM certificate and ensure that the hosts[0].host matches the certificate's Common Name (CN) and 17 | # certificate discovery *should* find the certificate automatically and use it. 18 | # 19 | # ref: https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.8/guide/ingress/cert_discovery/ 20 | # 21 | # alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:00000000000:certificate/b65be571-8220-4f2e-8cb1-94194535d877" 22 | labels: 23 | my: label 24 | pathType: Prefix 25 | hosts: 26 | - host: "elasticsearch.company.dev" 27 | path: "/" 28 | nodeSets: 29 | - name: default 30 | count: 3 31 | # Comment out when setting the vm.max_map_count via initContainer, as these are mutually exclusive. 32 | # For production workloads, it is strongly recommended to increase the kernel setting vm.max_map_count to 262144 33 | # and leave node.store.allow_mmap unset. 34 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/master/k8s-virtual-memory.html 35 | # 36 | config: 37 | node.store.allow_mmap: false 38 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/examples/ingress/elasticsearch-ingress-eks-nlb.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # The following is an example of an Elasticsearch resource that is configured to deploy a 3 | # network load balancer (NLB) in an EKS cluster. To provision an NLB "ingress" for the 4 | # Elasticsearch cluster, you are required to set annotations on the service, 5 | # and not an Ingress resource. 6 | ingress: 7 | enabled: false 8 | http: 9 | service: 10 | metadata: 11 | annotations: 12 | service.beta.kubernetes.io/aws-load-balancer-type: external 13 | service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip 14 | service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing 15 | service.beta.kubernetes.io/aws-load-balancer-backend-protocol: ssl 16 | spec: 17 | type: LoadBalancer 18 | nodeSets: 19 | - name: default 20 | count: 3 21 | # Comment out when setting the vm.max_map_count via initContainer, as these are mutually exclusive. 22 | # For production workloads, it is strongly recommended to increase the kernel setting vm.max_map_count to 262144 23 | # and leave node.store.allow_mmap unset. 24 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/master/k8s-virtual-memory.html 25 | # 26 | config: 27 | node.store.allow_mmap: false 28 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/examples/ingress/elasticsearch-ingress-gke.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # The following is an example of an Elasticsearch resource that is configured to use an Ingress resource in a GKE cluster. 3 | # 4 | ingress: 5 | enabled: true 6 | annotations: 7 | my: annotation 8 | labels: 9 | my: label 10 | pathType: Prefix 11 | hosts: 12 | - host: "elasticsearch.company.dev" 13 | path: "/" 14 | http: 15 | service: 16 | metadata: 17 | annotations: 18 | # This is required for `ClusterIP` services (which are the default ECK service type) to be used with Ingress in GKE clusters. 19 | cloud.google.com/neg: '{"ingress": true}' 20 | # This is required to enable the GKE Ingress Controller to use HTTPS as the backend protocol. 21 | cloud.google.com/app-protocols: '{"https":"HTTPS"}' 22 | nodeSets: 23 | - name: default 24 | count: 3 25 | # Comment out when setting the vm.max_map_count via initContainer, as these are mutually exclusive. 26 | # For production workloads, it is strongly recommended to increase the kernel setting vm.max_map_count to 262144 27 | # and leave node.store.allow_mmap unset. 28 | # ref: https://www.elastic.co/guide/en/cloud-on-k8s/master/k8s-virtual-memory.html 29 | # 30 | config: 31 | node.store.allow_mmap: false 32 | # Enable anonymous access to allow GCLB health probes to succeed 33 | xpack.security.authc: 34 | anonymous: 35 | username: anon 36 | roles: monitoring_user 37 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 2 | 1. Check Elasticsearch resource status 3 | $ kubectl get es {{ include "elasticsearch.fullname" . }} -n {{ .Release.Namespace }} 4 | 5 | 2. Check Elasticsearch pod status 6 | $ kubectl get pods --namespace={{ .Release.Namespace }} -l elasticsearch.k8s.elastic.co/cluster-name={{ include "elasticsearch.fullname" . }} 7 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "elasticsearch.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "elasticsearch.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "elasticsearch.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "elasticsearch.labels" -}} 37 | helm.sh/chart: {{ include "elasticsearch.chart" . }} 38 | {{ include "elasticsearch.selectorLabels" . }} 39 | app.kubernetes.io/managed-by: {{ .Release.Service }} 40 | {{- if .Values.labels }} 41 | {{ toYaml .Values.labels }} 42 | {{- end }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "elasticsearch.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "elasticsearch.name" . }} 50 | app.kubernetes.io/instance: {{ .Release.Name }} 51 | {{- end }} 52 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/templates/elasticsearch.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: elasticsearch.k8s.elastic.co/v1 3 | kind: Elasticsearch 4 | metadata: 5 | name: {{ include "elasticsearch.fullname" . }} 6 | labels: 7 | {{- include "elasticsearch.labels" . | nindent 4 }} 8 | annotations: 9 | eck.k8s.elastic.co/license: basic 10 | {{- with .Values.annotations }} 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | spec: 14 | {{- with .Values.auth }} 15 | auth: 16 | {{- toYaml . | nindent 4 }} 17 | {{- end }} 18 | {{- with .Values.updateStrategy }} 19 | updateStrategy: 20 | {{- toYaml . | nindent 4 }} 21 | {{- end }} 22 | {{- with .Values.secureSettings }} 23 | secureSettings: 24 | {{- toYaml . | nindent 2 }} 25 | {{- end }} 26 | {{- with .Values.transport }} 27 | transport: 28 | {{- toYaml . | nindent 4 }} 29 | {{- end }} 30 | {{- with .Values.http }} 31 | http: 32 | {{- toYaml . | nindent 4 }} 33 | {{- end }} 34 | version: {{ required "An Elasticsearch version is required" .Values.version }} 35 | {{- with .Values.monitoring }} 36 | monitoring: 37 | {{- toYaml . | nindent 4 }} 38 | {{- end }} 39 | {{- with .Values.remoteClusters }} 40 | remoteClusters: 41 | {{- toYaml . | nindent 2 }} 42 | {{- end }} 43 | {{- with .Values.volumeClaimDeletePolicy }} 44 | volumeClaimDeletePolicy: 45 | {{- if and (not (eq . "DeleteOnScaledownOnly")) (not (eq . "DeleteOnScaledownAndClusterDeletion")) }} 46 | {{ fail "volumeClaimDeletePolicy can only be one of 'DeleteOnScaledownOnly' or 'DeleteOnScaledownAndClusterDeletion'" }} 47 | {{- end }} 48 | {{- toYaml . | nindent 4 }} 49 | {{- end }} 50 | {{- if eq (len .Values.nodeSets) 0 }} 51 | {{ fail "At least one nodeSet is required" }} 52 | {{- end }} 53 | nodeSets: 54 | {{ toYaml .Values.nodeSets | nindent 4 }} 55 | {{- with .Values.image }} 56 | image: {{ . }} 57 | {{- end }} 58 | {{- with .Values.podDisruptionBudget }} 59 | {{- if .disabled }} 60 | podDisruptionBudget: {} 61 | {{- else }} 62 | {{- with .spec }} 63 | podDisruptionBudget: 64 | spec: 65 | {{- toYaml . | nindent 6 }} 66 | {{- end }} 67 | {{- end }} 68 | {{- end }} 69 | {{- with .Values.serviceAccountName }} 70 | serviceAccountName: {{ . }} 71 | {{- end }} 72 | {{- with .Values.revisionHistoryLimit }} 73 | revisionHistoryLimit: {{ . }} 74 | {{- end }} 75 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/templates/ingress.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.ingress.enabled -}} 2 | {{- $pathType := .Values.ingress.pathType | default "Prefix" -}} 3 | apiVersion: networking.k8s.io/v1 4 | kind: Ingress 5 | metadata: 6 | name: {{ include "elasticsearch.fullname" . }} 7 | labels: 8 | {{- include "elasticsearch.labels" . | nindent 4 }} 9 | {{- with .Values.ingress.labels }} 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | {{- if .Values.ingress.annotations }} 13 | annotations: 14 | {{- with .Values.ingress.annotations }} 15 | {{- toYaml . | nindent 4 }} 16 | {{- end }} 17 | {{- end }} 18 | spec: 19 | {{- if .Values.ingress.className }} 20 | ingressClassName: {{ .Values.ingress.className | quote }} 21 | {{- end }} 22 | {{- if .Values.ingress.tls.enabled }} 23 | tls: 24 | - hosts: 25 | {{- range .Values.ingress.hosts }} 26 | - {{ .host | quote }} 27 | {{- end }} 28 | {{- if .Values.ingress.tls.secretName }} 29 | secretName: {{ .Values.ingress.tls.secretName }} 30 | {{- else }} 31 | secretName: {{ include "elasticsearch.fullname" . }}-es-http-certs-internal 32 | {{- end }} 33 | {{- end }} 34 | rules: 35 | {{- range .Values.ingress.hosts }} 36 | {{- $hostPath := .path | default "/" }} 37 | - host: {{ .host | quote }} 38 | http: 39 | paths: 40 | - path: {{ $hostPath }} 41 | pathType: {{ $pathType }} 42 | backend: 43 | service: 44 | name: {{ include "elasticsearch.fullname" $ }}-es-http 45 | port: 46 | number: 9200 47 | {{- end }} 48 | {{ end }} 49 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-elasticsearch/templates/tests/ingress_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test elasticsearch ingress 2 | templates: 3 | - templates/ingress.yaml 4 | tests: 5 | - it: should render quickstart properly 6 | set: 7 | ingress.enabled: true 8 | release: 9 | name: quickstart 10 | asserts: 11 | - isKind: 12 | of: Ingress 13 | - equal: 14 | path: metadata.name 15 | value: quickstart-eck-elasticsearch 16 | - it: name override should work properly 17 | set: 18 | nameOverride: override 19 | ingress.enabled: true 20 | release: 21 | name: quickstart 22 | asserts: 23 | - isKind: 24 | of: Ingress 25 | - equal: 26 | path: metadata.name 27 | value: quickstart-override 28 | - it: fullname override should work properly 29 | set: 30 | fullnameOverride: override 31 | ingress.enabled: true 32 | release: 33 | name: quickstart 34 | asserts: 35 | - isKind: 36 | of: Ingress 37 | - equal: 38 | path: metadata.name 39 | value: override 40 | - it: should render custom labels, and annotations values properly 41 | set: 42 | ingress: 43 | enabled: true 44 | labels: 45 | test: label 46 | annotations: 47 | test: annotation 48 | release: 49 | name: quickstart 50 | asserts: 51 | - isKind: 52 | of: Ingress 53 | - equal: 54 | path: metadata.labels 55 | value: 56 | app.kubernetes.io/instance: quickstart 57 | app.kubernetes.io/managed-by: Helm 58 | app.kubernetes.io/name: eck-elasticsearch 59 | helm.sh/chart: eck-elasticsearch-0.13.0-SNAPSHOT 60 | test: label 61 | - equal: 62 | path: metadata.annotations 63 | value: 64 | test: annotation 65 | - it: should render ingress properly 66 | values: 67 | - ../../examples/ingress/elasticsearch-ingress-gke.yaml 68 | release: 69 | name: quickstart 70 | asserts: 71 | - isKind: 72 | of: Ingress 73 | - equal: 74 | path: metadata.name 75 | value: quickstart-eck-elasticsearch 76 | - contains: 77 | path: spec.rules 78 | content: 79 | host: "elasticsearch.company.dev" 80 | http: 81 | paths: 82 | - backend: 83 | service: 84 | name: quickstart-eck-elasticsearch-es-http 85 | port: 86 | number: 9200 87 | path: / 88 | pathType: Prefix 89 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-kibana/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | templates/tests 25 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-kibana/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: eck-kibana 3 | description: Kibana managed by the ECK operator 4 | kubeVersion: ">= 1.21.0-0" 5 | type: application 6 | version: 0.13.0-SNAPSHOT 7 | sources: 8 | - https://github.com/elastic/cloud-on-k8s 9 | - https://github.com/elastic/kibana 10 | icon: https://helm.elastic.co/icons/kibana.png 11 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-kibana/examples/http-configuration.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Version of Kibana. 3 | # 4 | version: 8.16.0-SNAPSHOT 5 | 6 | # Labels that will be applied to Kibana. 7 | # 8 | labels: {} 9 | # key: value 10 | 11 | # Annotations that will be applied to Kibana. 12 | # 13 | annotations: {} 14 | # key: value 15 | 16 | spec: 17 | # Count of Kibana replicas to create. 18 | # 19 | count: 1 20 | 21 | # Reference to ECK-managed Elasticsearch resource, ideally from {{ "elasticsearch.fullname" }} 22 | # 23 | elasticsearchRef: 24 | name: eck-elasticsearch 25 | # namespace: default 26 | http: 27 | service: 28 | spec: 29 | # Type of service to deploy for Kibana. 30 | # This deploys a load balancer in a cloud service provider, where supported. 31 | # 32 | type: LoadBalancer 33 | # tls: 34 | # selfSignedCertificate: 35 | # subjectAltNames: 36 | # - ip: 1.2.3.4 37 | # - dns: kibana.example.com 38 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-kibana/examples/ingress/kibana-gke.yaml: -------------------------------------------------------------------------------- 1 | # The following is an example of a Kibana resource that is configured to use an Ingress resource in a GKE cluster. 2 | # 3 | 4 | # Name of the Kibana instance. 5 | # 6 | fullnameOverride: kibana 7 | 8 | spec: 9 | # Reference to ECK-managed Elasticsearch instance, ideally from {{ "elasticsearch.fullname" }} 10 | # 11 | elasticsearchRef: 12 | name: elasticsearch 13 | config: 14 | server: 15 | publicBaseUrl: "https://kibana.company.dev" 16 | http: 17 | service: 18 | metadata: 19 | annotations: 20 | # This is required for `ClusterIP` services (which are the default ECK service type) to be used with Ingress in GKE clusters. 21 | cloud.google.com/neg: '{"ingress": true}' 22 | # This is required to enable the GKE Ingress Controller to use HTTPS as the backend protocol. 23 | cloud.google.com/app-protocols: '{"https":"HTTPS"}' 24 | # cloud.google.com/backend-config: '{"default": "kibana"}' 25 | 26 | ingress: 27 | enabled: true 28 | pathType: Prefix 29 | hosts: 30 | - host: "kibana.company.dev" 31 | path: "/" 32 | tls: 33 | enabled: true 34 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-kibana/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 2 | 1. Check Kibana status 3 | $ kubectl get kibana {{ include "kibana.fullname" . }} -n {{ .Release.Namespace }} 4 | 5 | 2. Check Kibana pod status 6 | $ kubectl get pods --namespace={{ .Release.Namespace }} -l kibana.k8s.elastic.co/name={{ include "kibana.fullname" . }} 7 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-kibana/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "kibana.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "kibana.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "kibana.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "kibana.labels" -}} 37 | helm.sh/chart: {{ include "kibana.chart" . }} 38 | {{ include "kibana.selectorLabels" . }} 39 | app.kubernetes.io/managed-by: {{ .Release.Service }} 40 | {{- if .Values.labels }} 41 | {{ toYaml .Values.labels }} 42 | {{- end }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "kibana.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "kibana.name" . }} 50 | app.kubernetes.io/instance: {{ .Release.Name }} 51 | {{- end }} 52 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-kibana/templates/ingress.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.ingress.enabled -}} 2 | {{- $pathType := .Values.ingress.pathType | default "Prefix" -}} 3 | apiVersion: networking.k8s.io/v1 4 | kind: Ingress 5 | metadata: 6 | name: {{ include "kibana.fullname" . }} 7 | labels: 8 | {{- include "kibana.labels" . | nindent 4 }} 9 | {{- with .Values.ingress.labels }} 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | {{- if .Values.ingress.annotations }} 13 | annotations: 14 | {{- with .Values.ingress.annotations }} 15 | {{- toYaml . | nindent 4 }} 16 | {{- end }} 17 | {{- end }} 18 | spec: 19 | {{- if .Values.ingress.className }} 20 | ingressClassName: {{ .Values.ingress.className | quote }} 21 | {{- end }} 22 | {{- if .Values.ingress.tls }} 23 | tls: 24 | - hosts: 25 | {{- range .Values.ingress.hosts }} 26 | - {{ .host | quote }} 27 | {{- end }} 28 | {{- if .Values.ingress.tls.secretName }} 29 | secretName: {{ .Values.ingress.tls.secretName }} 30 | {{- else }} 31 | secretName: {{ include "kibana.fullname" . }}-kb-http-certs-internal 32 | {{- end }} 33 | {{- end }} 34 | rules: 35 | {{- range .Values.ingress.hosts }} 36 | {{- $hostPath := .path | default "/" }} 37 | - host: {{ .host | quote }} 38 | http: 39 | paths: 40 | - path: {{ $hostPath }} 41 | pathType: {{ $pathType }} 42 | backend: 43 | service: 44 | name: {{ include "kibana.fullname" $ }}-kb-http 45 | port: 46 | number: 5601 47 | {{- end }} 48 | {{ end }} 49 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-kibana/templates/kibana.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: kibana.k8s.elastic.co/v1 3 | kind: Kibana 4 | metadata: 5 | name: {{ include "kibana.fullname" . }} 6 | labels: 7 | {{- include "kibana.labels" . | nindent 4 }} 8 | annotations: 9 | eck.k8s.elastic.co/license: basic 10 | {{- with .Values.annotations }} 11 | {{- toYaml . | nindent 4 }} 12 | {{- end }} 13 | spec: 14 | version: {{ required "A Kibana version is required" .Values.version }} 15 | {{- toYaml .Values.spec | nindent 2 }} 16 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-kibana/templates/tests/ingress_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test kibana ingress 2 | templates: 3 | - templates/ingress.yaml 4 | tests: 5 | - it: should render quickstart properly 6 | set: 7 | ingress.enabled: true 8 | release: 9 | name: quickstart 10 | asserts: 11 | - isKind: 12 | of: Ingress 13 | - equal: 14 | path: metadata.name 15 | value: quickstart-eck-kibana 16 | - it: name override should work properly 17 | set: 18 | nameOverride: override 19 | ingress.enabled: true 20 | release: 21 | name: quickstart 22 | asserts: 23 | - isKind: 24 | of: Ingress 25 | - equal: 26 | path: metadata.name 27 | value: quickstart-override 28 | - it: fullname override should work properly 29 | set: 30 | fullnameOverride: override 31 | ingress.enabled: true 32 | release: 33 | name: quickstart 34 | asserts: 35 | - isKind: 36 | of: Ingress 37 | - equal: 38 | path: metadata.name 39 | value: override 40 | - it: should render custom labels, and annotations values properly 41 | set: 42 | ingress: 43 | enabled: true 44 | labels: 45 | test: label 46 | annotations: 47 | test: annotation 48 | release: 49 | name: quickstart 50 | asserts: 51 | - isKind: 52 | of: Ingress 53 | - equal: 54 | path: metadata.labels 55 | value: 56 | app.kubernetes.io/instance: quickstart 57 | app.kubernetes.io/managed-by: Helm 58 | app.kubernetes.io/name: eck-kibana 59 | helm.sh/chart: eck-kibana-0.13.0-SNAPSHOT 60 | test: label 61 | - equal: 62 | path: metadata.annotations 63 | value: 64 | test: annotation 65 | - it: should render ingress properly 66 | values: 67 | - ../../examples/ingress/kibana-gke.yaml 68 | release: 69 | name: quickstart 70 | asserts: 71 | - isKind: 72 | of: Ingress 73 | - equal: 74 | path: metadata.name 75 | value: kibana 76 | - contains: 77 | path: spec.rules 78 | content: 79 | host: "kibana.company.dev" 80 | http: 81 | paths: 82 | - backend: 83 | service: 84 | name: kibana-kb-http 85 | port: 86 | number: 5601 87 | path: / 88 | pathType: Prefix 89 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-kibana/templates/tests/kibana_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test kibana 2 | templates: 3 | - templates/kibana.yaml 4 | tests: 5 | - it: should render quickstart properly 6 | release: 7 | name: quickstart 8 | asserts: 9 | - isKind: 10 | of: Kibana 11 | - equal: 12 | path: metadata.name 13 | value: quickstart-eck-kibana 14 | - equal: 15 | path: spec.version 16 | value: 8.16.0-SNAPSHOT 17 | - it: name override should work properly 18 | set: 19 | nameOverride: override 20 | release: 21 | name: quickstart 22 | asserts: 23 | - isKind: 24 | of: Kibana 25 | - equal: 26 | path: metadata.name 27 | value: quickstart-override 28 | - it: fullname override should work properly 29 | set: 30 | fullnameOverride: override 31 | release: 32 | name: quickstart 33 | asserts: 34 | - isKind: 35 | of: Kibana 36 | - equal: 37 | path: metadata.name 38 | value: override 39 | - it: should render custom labels, and annotations values properly 40 | set: 41 | labels: 42 | test: label 43 | annotations: 44 | test: annotation 45 | release: 46 | name: quickstart 47 | asserts: 48 | - isKind: 49 | of: Kibana 50 | - equal: 51 | path: metadata.labels 52 | value: 53 | app.kubernetes.io/instance: quickstart 54 | app.kubernetes.io/managed-by: Helm 55 | app.kubernetes.io/name: eck-kibana 56 | helm.sh/chart: eck-kibana-0.13.0-SNAPSHOT 57 | test: label 58 | - equal: 59 | path: metadata.annotations 60 | value: 61 | eck.k8s.elastic.co/license: basic 62 | test: annotation 63 | - it: should render http service properly 64 | set: 65 | spec.elasticsearchRef.namespace: default 66 | values: 67 | - ../../examples/http-configuration.yaml 68 | release: 69 | name: quickstart 70 | asserts: 71 | - isKind: 72 | of: Kibana 73 | - equal: 74 | path: metadata.name 75 | value: quickstart-eck-kibana 76 | - equal: 77 | path: spec.version 78 | value: 8.16.0-SNAPSHOT 79 | - equal: 80 | path: spec.count 81 | value: 1 82 | - equal: 83 | path: spec.elasticsearchRef.name 84 | value: eck-elasticsearch 85 | - equal: 86 | path: spec.elasticsearchRef.namespace 87 | value: default 88 | - equal: 89 | path: spec.http.service.spec.type 90 | value: LoadBalancer 91 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/charts/eck-kibana/values.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Default values for eck-kibana. 3 | # This is a YAML-formatted file. 4 | 5 | # Overridable names of the Kibana resource. 6 | # By default, this is the Release name set for the chart, 7 | # followed by 'eck-kibana'. 8 | # 9 | # nameOverride will override the name of the Chart with the name set here, 10 | # so nameOverride: quickstart, would convert to '{{ Release.name }}-quickstart' 11 | # 12 | # nameOverride: "quickstart" 13 | # 14 | # fullnameOverride will override both the release name, and the chart name, 15 | # and will name the Kibana resource exactly as specified. 16 | # 17 | # fullnameOverride: "quickstart" 18 | 19 | # Version of Kibana. 20 | # 21 | version: 8.16.0-SNAPSHOT 22 | # Labels that will be applied to Kibana. 23 | # 24 | labels: {} 25 | # Annotations that will be applied to Kibana. 26 | # 27 | annotations: {} 28 | spec: 29 | # Count of Kibana replicas to create. 30 | # 31 | count: 1 32 | # Reference to ECK-managed Elasticsearch resource. 33 | # 34 | elasticsearchRef: 35 | name: eck-elasticsearch 36 | # Optional namespace reference to Elasticsearch resource. 37 | # If not specified, then the namespace of the Kibana resource 38 | # will be assumed. 39 | # 40 | # namespace: default 41 | # config: 42 | # server: 43 | # publicBaseUrl: "https://192.168.49.2.nip.io" 44 | http: 45 | tls: 46 | selfSignedCertificate: 47 | disabled: true 48 | # Settings for controlling Kibana ingress. Enabling ingress will expose your Kibana instance 49 | # to the public internet, and as such is disabled by default. 50 | # 51 | # *NOTE* when configuring Kibana Ingress, ensure that `spec.config.server.publicBaseUrl` setting for 52 | # Kibana is also set, as it is required when exposing Kibana behind a load balancer/ingress. 53 | # Also of note are `server.basePath`, and `server.rewriteBasePath` settings in the Kibana configuration. 54 | # 55 | # ref: https://www.elastic.co/guide/en/kibana/current/settings.html 56 | # 57 | # Each Cloud Service Provider has different requirements for setting up Ingress. Some links to common documentation are: 58 | # - AWS: https://docs.aws.amazon.com/eks/latest/userguide/alb-ingress.html 59 | # - GCP: https://cloud.google.com/kubernetes-engine/docs/concepts/ingress 60 | # - Azure: https://learn.microsoft.com/en-us/azure/aks/app-routing 61 | # - Nginx: https://kubernetes.github.io/ingress-nginx/ 62 | # 63 | ingress: 64 | enabled: false 65 | # Annotations that will be applied to the Ingress resource. Note that some ingress controllers are controlled via annotations. 66 | # 67 | # Nginx Annotations: https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/annotations/ 68 | # 69 | # Common annotations: 70 | # kubernetes.io/ingress.class: gce # Configures the Ingress resource to use the GCE ingress controller and create an external Application Load Balancer. 71 | # kubernetes.io/ingress.class: gce-internal # Configures the Ingress resource to use the GCE ingress controller and create an internal Application Load Balancer. 72 | # kubernetes.io/ingress.class: nginx # Configures the Ingress resource to use the NGINX ingress controller. 73 | # 74 | annotations: 75 | kubernetes.io/ingress.class: nginx 76 | # nginx.ingress.kubernetes.io/rewrite-target: / 77 | # nginx.ingress.kubernetes.io/rewrite-target: /$2 78 | # 79 | labels: {} 80 | # Some ingress controllers require the use of a specific class name to route traffic to the correct controller, notably AKS and EKS, which 81 | # replaces the use of the 'kubernetes.io/ingress.class' annotation. 82 | # 83 | # className: webapprouting.kubernetes.azure.com | alb 84 | 85 | # Ingress paths are required to have a corresponding path type. Defaults to 'Prefix'. 86 | # 87 | # There are 3 supported path types: 88 | # - ImplementationSpecific 89 | # - Prefix 90 | # - Exact 91 | # 92 | # ref: https://kubernetes.io/docs/concepts/services-networking/ingress/#path-types 93 | # 94 | pathType: Prefix 95 | # Hosts are a list of hosts included in the Ingress definition, with a corresponding path at which the Kibana service 96 | # will be exposed. Each host in the list should be a fully qualified DNS name that will resolve to the exposed Ingress object. 97 | # 98 | # ref: https://kubernetes.io/docs/concepts/services-networking/ingress/#name-based-virtual-hosting 99 | # 100 | hosts: 101 | - host: app.192.168.49.2.nip.io 102 | path: / 103 | # TLS defines whether TLS will be enabled on the Ingress resource. 104 | # 105 | # *NOTE* Many Cloud Service Providers handle TLS in a custom manner, and as such, it is recommended to consult their documentation. 106 | # Notably GKE and Nginx Ingress Controllers seems to respect the Ingress TLS settings, AKS and EKS ignore it. 107 | # 108 | # - AKS: https://learn.microsoft.com/en-us/azure/aks/app-routing-dns-ssl 109 | # - GKE: https://cloud.google.com/kubernetes-engine/docs/concepts/ingress#options_for_providing_ssl_certificates 110 | # - EKS: https://aws.amazon.com/blogs/containers/serve-distinct-domains-with-tls-powered-by-acm-on-amazon-eks/ 111 | # - Nginx: https://kubernetes.github.io/ingress-nginx/user-guide/tls/ 112 | # 113 | # Kubernetes ingress TLS documentation: 114 | # ref: https://kubernetes.io/docs/concepts/services-networking/ingress/#tls 115 | # 116 | tls: false 117 | # Optional Kubernetes secret name that contains a base64 encoded PEM certificate and private key that corresponds to the above 'hosts' definitions. 118 | # If tls is enabled, but this field is not set, the self-signed certificate and key created by the ECK operator will be used. 119 | # secretName: chart-example-tls 120 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/kkk.ym: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | annotations: 5 | kubernetes.io/ingress.class: nginx 6 | meta.helm.sh/release-name: elk 7 | meta.helm.sh/release-namespace: observability 8 | creationTimestamp: "2024-11-18T17:03:53Z" 9 | generation: 1 10 | labels: 11 | app.kubernetes.io/instance: elk 12 | app.kubernetes.io/managed-by: Helm 13 | app.kubernetes.io/name: eck-kibana 14 | helm.sh/chart: eck-kibana-0.13.0-SNAPSHOT 15 | name: elk-eck-kibana 16 | namespace: observability 17 | resourceVersion: "1257500" 18 | uid: d618d034-a347-44c9-92e2-208d252ea22e 19 | spec: 20 | rules: 21 | - host: app.192.168.49.2.nip.io 22 | http: 23 | paths: 24 | - backend: 25 | service: 26 | name: elk-eck-kibana-kb-http 27 | port: 28 | number: 5601 29 | path: /kibana 30 | pathType: Prefix 31 | status: 32 | loadBalancer: 33 | ingress: 34 | - ip: 192.168.49.2 35 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/run.sh: -------------------------------------------------------------------------------- 1 | # Install ECK operator 2 | kubens observability 3 | kubectl delete -f https://download.elastic.co/downloads/eck/2.13.0/crds.yaml 4 | kubectl create -f https://download.elastic.co/downloads/eck/2.13.0/crds.yaml 5 | kubectl apply -f https://download.elastic.co/downloads/eck/2.13.0/operator.yaml 6 | # Install ngnix ingress controler 7 | # helm upgrade --install ingress-nginx ingress-nginx \ 8 | # --repo https://kubernetes.github.io/ingress-nginx \ 9 | # --namespace ingress-nginx --create-namespace 10 | # Install elk stack 11 | kubectl get serviceaccount filebeat -n elk &> /dev/null && kubectl delete serviceaccount filebeat -n elk || true 12 | kubectl get clusterrolebinding filebeat -n elk &> /dev/null && kubectl delete clusterrolebinding filebeat -n elk || true 13 | kubectl get clusterrole filebeat -n elk &> /dev/null && kubectl delete clusterrole filebeat -n elk || true 14 | yq e '.ingress.hosts = [{"hosts": env(HOST), "path": "/kibana"}]' -i ./charts/eck-kibana/values.yaml 15 | helm upgrade --install elk -f values.yaml . 16 | 17 | Username: elastic 18 | Password: kubectl get secret elasticsearch-es-elastic-user -n observability -o jsonpath='{.data.elastic}' | base64 -d -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Elasticsearch ECK-Stack {{ .Chart.Version }} has been deployed successfully! 2 | 3 | To see status of all resources, run 4 | 5 | kubectl get elastic -n {{ .Release.Namespace }} -l "app.kubernetes.io/instance"={{ .Release.Name }} 6 | 7 | More information on the Elastic ECK Operator, and its Helm chart can be found 8 | within our documentation. 9 | 10 | https://www.elastic.co/guide/en/cloud-on-k8s/current/index.html 11 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "eck-stack.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "eck-stack.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "eck-stack.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "eck-stack.labels" -}} 37 | helm.sh/chart: {{ include "eck-stack.chart" . }} 38 | {{ include "eck-stack.selectorLabels" . }} 39 | app.kubernetes.io/managed-by: {{ .Release.Service }} 40 | {{- end }} 41 | 42 | {{/* 43 | Selector labels 44 | */}} 45 | {{- define "eck-stack.selectorLabels" -}} 46 | app.kubernetes.io/name: {{ include "eck-stack.name" . }} 47 | app.kubernetes.io/instance: {{ .Release.Name }} 48 | {{- end }} 49 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/templates/tests/beats_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test beats 2 | templates: 3 | - charts/eck-beats/templates/beats.yaml 4 | tests: 5 | - it: should render specified beat properly 6 | set: 7 | eck-beats: 8 | enabled: true 9 | spec: 10 | type: "metricbeat" 11 | deployment: {} 12 | release: 13 | name: quickstart 14 | asserts: 15 | - isKind: 16 | of: Beat 17 | - equal: 18 | path: metadata.name 19 | value: quickstart-eck-beats 20 | - equal: 21 | path: spec.version 22 | value: 8.16.0-SNAPSHOT 23 | - it: should render custom metricbeat example properly 24 | values: 25 | - ../../examples/beats/metricbeat_hosts.yaml 26 | release: 27 | name: quickstart 28 | asserts: 29 | - isKind: 30 | of: Beat 31 | - equal: 32 | path: metadata.name 33 | value: quickstart-eck-beats 34 | - equal: 35 | path: spec.version 36 | value: 8.16.0-SNAPSHOT 37 | - equal: 38 | path: spec.kibanaRef.name 39 | value: quickstart 40 | - equal: 41 | path: spec.elasticsearchRef.name 42 | value: quickstart 43 | - equal: 44 | path: spec.type 45 | value: metricbeat 46 | - equal: 47 | path: spec.daemonSet.podTemplate.spec.securityContext.runAsUser 48 | value: 0 49 | - equal: 50 | path: spec.daemonSet.podTemplate.spec.serviceAccountName 51 | value: metricbeat 52 | - equal: 53 | path: spec.daemonSet.podTemplate.spec.hostNetwork 54 | value: true 55 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/templates/tests/elastic-agent_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test elastic-agent 2 | templates: 3 | - charts/eck-agent/templates/elastic-agent.yaml 4 | tests: 5 | - it: should render quickstart properly 6 | set: 7 | eck-agent: 8 | enabled: true 9 | spec: 10 | deployment: {} 11 | release: 12 | name: quickstart 13 | asserts: 14 | - isKind: 15 | of: Agent 16 | - equal: 17 | path: metadata.name 18 | value: quickstart-eck-agent 19 | - equal: 20 | path: spec.version 21 | value: 8.16.0-SNAPSHOT 22 | - it: should render agent in custom fleet example properly 23 | values: 24 | - ../../examples/agent/fleet-agents.yaml 25 | release: 26 | name: quickstart 27 | asserts: 28 | - isKind: 29 | of: Agent 30 | - equal: 31 | path: metadata.name 32 | value: quickstart-eck-agent 33 | - equal: 34 | path: spec.version 35 | value: 8.16.0-SNAPSHOT 36 | - equal: 37 | path: spec.kibanaRef.name 38 | value: kibana 39 | - equal: 40 | path: spec.fleetServerRef.name 41 | value: fleet-server 42 | - equal: 43 | path: spec.mode 44 | value: fleet 45 | - equal: 46 | path: spec.daemonSet.podTemplate.spec.serviceAccountName 47 | value: elastic-agent 48 | - equal: 49 | path: spec.daemonSet.podTemplate.spec.hostNetwork 50 | value: true 51 | - equal: 52 | path: spec.daemonSet.podTemplate.spec.dnsPolicy 53 | value: ClusterFirstWithHostNet 54 | - equal: 55 | path: spec.daemonSet.podTemplate.spec.automountServiceAccountToken 56 | value: true 57 | - equal: 58 | path: spec.daemonSet.podTemplate.spec.securityContext.runAsUser 59 | value: 0 60 | --- 61 | suite: test fleet-agent 62 | templates: 63 | - charts/eck-fleet-server/templates/fleet-server.yaml 64 | tests: 65 | - it: should render quickstart properly 66 | set: 67 | eck-agent.enabled: true 68 | release: 69 | name: quickstart 70 | asserts: 71 | - isKind: 72 | of: Agent 73 | - equal: 74 | path: metadata.name 75 | value: quickstart-eck-fleet-server 76 | - equal: 77 | path: spec.version 78 | value: 8.16.0-SNAPSHOT 79 | - it: should render fleet server in custom fleet example properly 80 | values: 81 | - ../../examples/agent/fleet-agents.yaml 82 | release: 83 | name: quickstart 84 | asserts: 85 | - isKind: 86 | of: Agent 87 | - equal: 88 | path: metadata.name 89 | value: fleet-server 90 | - equal: 91 | path: spec.version 92 | value: 8.16.0-SNAPSHOT 93 | - equal: 94 | path: spec.kibanaRef.name 95 | value: kibana 96 | - equal: 97 | path: spec.mode 98 | value: fleet 99 | - equal: 100 | path: spec.fleetServerEnabled 101 | value: true 102 | - equal: 103 | path: spec.deployment.podTemplate.spec.serviceAccountName 104 | value: fleet-server 105 | - equal: 106 | path: spec.deployment.podTemplate.spec.automountServiceAccountToken 107 | value: true 108 | - equal: 109 | path: spec.deployment.podTemplate.spec.securityContext.runAsUser 110 | value: 0 111 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/templates/tests/elasticsearch_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test elasticsearch 2 | templates: 3 | - charts/eck-elasticsearch/templates/elasticsearch.yaml 4 | tests: 5 | - it: should render quickstart properly 6 | set: 7 | eck-elasticsearch.version: 8.16.0-SNAPSHOT 8 | eck-kibana.enabled: false 9 | release: 10 | name: quickstart 11 | asserts: 12 | - isKind: 13 | of: Elasticsearch 14 | - equal: 15 | path: metadata.name 16 | value: elasticsearch 17 | - equal: 18 | path: spec.version 19 | value: 8.16.0-SNAPSHOT 20 | - it: name override should work properly 21 | set: 22 | eck-elasticsearch.nameOverride: override 23 | eck-elasticsearch.fullnameOverride: null 24 | eck-kibana.enabled: false 25 | release: 26 | name: quickstart 27 | asserts: 28 | - isKind: 29 | of: Elasticsearch 30 | - equal: 31 | path: metadata.name 32 | value: quickstart-override 33 | - it: fullname override should work properly 34 | set: 35 | eck-elasticsearch.fullnameOverride: override 36 | eck-kibana.enabled: false 37 | release: 38 | name: quickstart 39 | asserts: 40 | - isKind: 41 | of: Elasticsearch 42 | - equal: 43 | path: metadata.name 44 | value: override 45 | - it: should render custom elasticsearch using values file properly 46 | set: 47 | eck-kibana.enabled: false 48 | values: 49 | - ../../examples/custom-elasticsearch-kibana.yaml 50 | release: 51 | name: quickstart 52 | asserts: 53 | - isKind: 54 | of: Elasticsearch 55 | - equal: 56 | path: metadata.name 57 | value: quickstart 58 | - equal: 59 | path: spec.nodeSets[0].name 60 | value: default 61 | - equal: 62 | path: spec.nodeSets[0].volumeClaimTemplates[0].metadata.name 63 | value: elasticsearch-data 64 | - equal: 65 | path: spec.nodeSets[0].volumeClaimTemplates[0].spec.resources.requests.storage 66 | value: 100Gi 67 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/templates/tests/kibana_test.yaml: -------------------------------------------------------------------------------- 1 | suite: test kibana 2 | templates: 3 | - charts/eck-kibana/templates/kibana.yaml 4 | tests: 5 | - it: should render quickstart properly 6 | release: 7 | name: quickstart 8 | asserts: 9 | - isKind: 10 | of: Kibana 11 | - equal: 12 | path: metadata.name 13 | value: quickstart-eck-kibana 14 | - equal: 15 | path: spec.version 16 | value: 8.16.0-SNAPSHOT 17 | - it: name override should work properly 18 | set: 19 | eck-kibana.nameOverride: override 20 | release: 21 | name: quickstart 22 | asserts: 23 | - isKind: 24 | of: Kibana 25 | - equal: 26 | path: metadata.name 27 | value: quickstart-override 28 | - it: fullname override should work properly 29 | set: 30 | eck-kibana.fullnameOverride: override 31 | release: 32 | name: quickstart 33 | asserts: 34 | - isKind: 35 | of: Kibana 36 | - equal: 37 | path: metadata.name 38 | value: override 39 | - it: should render custom kibana using values file properly 40 | set: 41 | eck-kibana.spec.elasticsearchRef.namespace: default 42 | values: 43 | - ../../examples/custom-elasticsearch-kibana.yaml 44 | release: 45 | name: quickstart 46 | asserts: 47 | - isKind: 48 | of: Kibana 49 | - equal: 50 | path: metadata.name 51 | value: quickstart 52 | - equal: 53 | path: spec.version 54 | value: 8.16.0-SNAPSHOT 55 | - equal: 56 | path: spec.count 57 | value: 1 58 | - equal: 59 | path: spec.elasticsearchRef.name 60 | value: quickstart 61 | - equal: 62 | path: spec.elasticsearchRef.namespace 63 | value: default 64 | - equal: 65 | path: spec.http.service.spec.type 66 | value: LoadBalancer 67 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/eck-stack/values.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Default values for eck-stack. 3 | # This is a YAML-formatted file. 4 | 5 | # If enabled, will use the eck-elasticsearch chart and deploy an Elasticsearch resource. 6 | # 7 | eck-elasticsearch: 8 | enabled: true 9 | # This is adjusting the full name of the elasticsearch resource so that both the eck-elasticsearch 10 | # and the eck-kibana chart work together by default in the eck-stack chart. 11 | fullnameOverride: elasticsearch 12 | 13 | # If enabled, will use the eck-kibana chart and deploy a Kibana resource. 14 | # 15 | eck-kibana: 16 | enabled: true 17 | spec: 18 | # This is also adjusting the kibana reference to the elasticsearch resource named previously so that 19 | # both the eck-elasticsearch and the eck-kibana chart work together by default in the eck-stack chart. 20 | elasticsearchRef: 21 | name: elasticsearch 22 | 23 | # If enabled, will use the eck-beats chart and deploy a Beats resource. 24 | # 25 | eck-beats: 26 | enabled: true 27 | 28 | -------------------------------------------------------------------------------- /observability/elasticcloud/deploy/helm-migrate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one 4 | # or more contributor license agreements. Licensed under the Elastic License 2.0; 5 | # you may not use this file except in compliance with the Elastic License 2.0. 6 | 7 | # Script to migrate an existing ECK 1.7.1 installation to Helm. 8 | 9 | set -euo pipefail 10 | 11 | CHART_REPO=${CHART_REPO:-"elastic"} 12 | CHART_REPO_URL=${CHART_REPO_URL:-"https://helm.elastic.co"} 13 | RELEASE_NAME=${RELEASE_NAME:-"elastic-operator"} 14 | RELEASE_NAMESPACE=${RELEASE_NAMESPACE:-"elastic-system"} 15 | 16 | echo "Adding labels and annotations to CRDs" 17 | for CRD in $(kubectl get crds --no-headers -o custom-columns=NAME:.metadata.name | grep k8s.elastic.co); do 18 | kubectl annotate crd "$CRD" meta.helm.sh/release-name="$RELEASE_NAME" 19 | kubectl annotate crd "$CRD" meta.helm.sh/release-namespace="$RELEASE_NAMESPACE" 20 | kubectl label crd "$CRD" app.kubernetes.io/managed-by=Helm 21 | done 22 | 23 | echo "Uninstalling ECK" 24 | kubectl delete -n "${RELEASE_NAMESPACE}" \ 25 | serviceaccount/elastic-operator \ 26 | secret/elastic-webhook-server-cert \ 27 | clusterrole.rbac.authorization.k8s.io/elastic-operator \ 28 | clusterrole.rbac.authorization.k8s.io/elastic-operator-view \ 29 | clusterrole.rbac.authorization.k8s.io/elastic-operator-edit \ 30 | clusterrolebinding.rbac.authorization.k8s.io/elastic-operator \ 31 | service/elastic-webhook-server \ 32 | configmap/elastic-operator \ 33 | statefulset.apps/elastic-operator \ 34 | validatingwebhookconfiguration.admissionregistration.k8s.io/elastic-webhook.k8s.elastic.co 35 | 36 | echo "Installing ECK with Helm" 37 | helm repo add "${CHART_REPO}" "${CHART_REPO_URL}" 38 | helm repo update 39 | helm install "${RELEASE_NAME}" "${CHART_REPO}/eck-operator" --create-namespace -n "${RELEASE_NAMESPACE}" 40 | 41 | -------------------------------------------------------------------------------- /observability/inotify/inotify-limits.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: inotify-limits 5 | namespace: kube-system 6 | spec: 7 | selector: 8 | matchLabels: 9 | name: inotify-limits 10 | template: 11 | metadata: 12 | labels: 13 | name: inotify-limits 14 | spec: 15 | hostPID: true 16 | containers: 17 | - name: sysctl 18 | image: gcr.io/google-containers/busybox:latest 19 | command: 20 | - sh 21 | - -c 22 | - | 23 | sysctl -w fs.inotify.max_user_instances=8192 24 | sysctl -w fs.inotify.max_user_watches=524288 25 | while true; do sleep 3600; done 26 | securityContext: 27 | privileged: true 28 | restartPolicy: Always 29 | -------------------------------------------------------------------------------- /observability/metric/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: grafana-monitoring 3 | description: A Helm chart for monitoring setup with subcharts 4 | version: 0.1.0 5 | appVersion: "1.0" 6 | dependencies: 7 | - name: prometheus 8 | version: 0.1.0 9 | repository: "" 10 | condition: prometheus.enabled 11 | 12 | - name: node-exporter 13 | version: 0.1.0 14 | repository: "" 15 | condition: node-exporter.enabled 16 | 17 | - name: cadvisor 18 | version: 0.1.0 19 | repository: "" 20 | condition: cadvisor.enabled 21 | 22 | - name: grafana 23 | version: 0.1.0 24 | repository: "" 25 | condition: grafana.enabled 26 | 27 | - name: alertmanager 28 | version: 0.1.0 29 | repository: "" 30 | condition: alertmanager.enabled -------------------------------------------------------------------------------- /observability/metric/charts/alertmanager/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: alertmanager 3 | description: Alertmanager subchart 4 | version: 0.1.0 5 | appVersion: "0.25.0" 6 | -------------------------------------------------------------------------------- /observability/metric/charts/alertmanager/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: alertmanager-config 5 | data: 6 | config.yml: |- 7 | route: 8 | repeat_interval: 15s # Time interval between consecutive notifications for the same alert 9 | group_interval: 15s # Time interval between consecutive notifications for the same group of alerts 10 | receiver: discord 11 | receivers: 12 | - name: discord 13 | discord_configs: 14 | - webhook_url: env(DISCORD_WEBHOOK_URL) 15 | -------------------------------------------------------------------------------- /observability/metric/charts/alertmanager/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: alertmanager 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: alertmanager 10 | template: 11 | metadata: 12 | labels: 13 | app: alertmanager 14 | spec: 15 | containers: 16 | - name: alertmanager 17 | image: {{ .Values.alertmanager.image }} 18 | ports: 19 | - containerPort: {{ .Values.alertmanager.containerPort }} 20 | volumeMounts: 21 | - name: alertmanager-config 22 | mountPath: /alertmanager/ 23 | - name: alertmanager-data 24 | mountPath: /alertmanager/data 25 | args: 26 | - '--config.file=/alertmanager/config.yml' 27 | - '--storage.path=/alertmanager/data' 28 | - '--log.level=debug' 29 | volumes: 30 | - name: alertmanager-config 31 | configMap: 32 | name: alertmanager-config 33 | items: 34 | - key: "config.yml" 35 | path: "config.yml" 36 | - name: alertmanager-data 37 | emptyDir: {} -------------------------------------------------------------------------------- /observability/metric/charts/alertmanager/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: alertmanager 5 | spec: 6 | selector: 7 | app: alertmanager 8 | ports: 9 | - protocol: TCP 10 | port: {{ .Values.alertmanager.containerPort }} 11 | targetPort: {{ .Values.alertmanager.containerPort }} 12 | type: ClusterIP -------------------------------------------------------------------------------- /observability/metric/charts/alertmanager/values.yaml: -------------------------------------------------------------------------------- 1 | alertmanager: 2 | image: prom/alertmanager:v0.25.0 3 | containerPort: 9093 4 | -------------------------------------------------------------------------------- /observability/metric/charts/cadvisor/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: cadvisor 3 | description: Cadvisor subchart 4 | version: 0.1.0 5 | appVersion: "latest" 6 | -------------------------------------------------------------------------------- /observability/metric/charts/cadvisor/templates/daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: cadvisor 5 | labels: 6 | app: cadvisor 7 | spec: 8 | selector: 9 | matchLabels: 10 | app: cadvisor 11 | template: 12 | metadata: 13 | labels: 14 | app: cadvisor 15 | spec: 16 | containers: 17 | - name: cadvisor 18 | image: {{ .Values.cadvisor.image }} 19 | ports: 20 | - containerPort: {{ .Values.cadvisor.containerPort }} # Expose the container port 21 | volumeMounts: 22 | - name: rootfs 23 | mountPath: /rootfs 24 | readOnly: true 25 | - name: varrun 26 | mountPath: /var/run 27 | - name: sys 28 | mountPath: /sys 29 | readOnly: true 30 | - name: dockersock 31 | mountPath: /var/lib/docker 32 | readOnly: true 33 | volumes: 34 | - name: rootfs 35 | hostPath: 36 | path: / 37 | - name: varrun 38 | hostPath: 39 | path: /var/run 40 | - name: sys 41 | hostPath: 42 | path: /sys 43 | - name: dockersock 44 | hostPath: 45 | path: /var/lib/docker 46 | restartPolicy: Always 47 | nodeSelector: 48 | kubernetes.io/os: linux # Optional: restrict to Linux nodes 49 | -------------------------------------------------------------------------------- /observability/metric/charts/cadvisor/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: cadvisor 5 | labels: 6 | app: cadvisor 7 | spec: 8 | type: ClusterIP # Use ClusterIP service type 9 | ports: 10 | - port: {{ .Values.cadvisor.containerPort }} # The port that will be exposed 11 | targetPort: {{ .Values.cadvisor.containerPort }} # The port on the container to forward traffic to 12 | selector: 13 | app: cadvisor # This should match the labels of the DaemonSet 14 | -------------------------------------------------------------------------------- /observability/metric/charts/cadvisor/values.yaml: -------------------------------------------------------------------------------- 1 | cadvisor: 2 | image: gcr.io/cadvisor/cadvisor:latest 3 | containerPort: 8080 4 | -------------------------------------------------------------------------------- /observability/metric/charts/grafana/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: grafana 3 | description: Grafana subchart 4 | version: 0.1.0 5 | appVersion: "9.0.5" 6 | -------------------------------------------------------------------------------- /observability/metric/charts/grafana/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: grafana-config 5 | data: 6 | dashboards.yaml: |- 7 | apiVersion: 1 8 | providers: 9 | # an unique provider name 10 | - name: 'Luong Nguyen Dinh' 11 | # org id. will default to orgId 1 if not specified 12 | orgId: 1 13 | # name of the dashboard folder 14 | folder: '' 15 | # folder UID. will be automatically generated if not specified 16 | folderUid: '' 17 | # provider type. Required 18 | type: file 19 | # disable dashboard deletion 20 | disableDeletion: false 21 | # how often Grafana will scan for changed dashboards 22 | updateIntervalSeconds: 10 23 | # allow updating provisioned dashboards from the UI 24 | allowUiUpdates: true 25 | options: 26 | # path to dashboard files on disk. Required when using the 'file' type 27 | path: /opt/grafana/dashboards 28 | # use folder names from filesystem to create folders in Grafana 29 | foldersFromFilesStructure: true 30 | datasources.yaml: |- 31 | apiVersion: 1 32 | 33 | # list of datasources that should be deleted from the database 34 | deleteDatasources: 35 | - name: Prometheus 36 | orgId: 1 37 | 38 | # list of datasources to insert/update depending 39 | # what's available in the database 40 | datasources: 41 | - name: Prometheus 42 | type: prometheus 43 | access: proxy 44 | url: http://prometheus.observability.svc.cluster.local:9090 45 | isDefault: true 46 | 47 | --- 48 | apiVersion: v1 49 | kind: ConfigMap 50 | metadata: 51 | name: grafana-dashboard 52 | data: 53 | {{ (.Files.Glob "grafana-config/dashboards/*").AsConfig | nindent 2}} -------------------------------------------------------------------------------- /observability/metric/charts/grafana/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: grafana 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: grafana 10 | template: 11 | metadata: 12 | labels: 13 | app: grafana 14 | spec: 15 | containers: 16 | - name: grafana 17 | image: {{ .Values.grafana.image }} 18 | ports: 19 | - containerPort: {{ .Values.grafana.containerPort }} 20 | env: 21 | - name: GF_SECURITY_ADMIN_USER 22 | value: "admin" 23 | - name: GF_SECURITY_ADMIN_PASSWORD 24 | value: "admin" 25 | - name: JAEGER_AGENT_PORT 26 | value: "" 27 | - name: JAEGER_AGENT_HOST 28 | value: "" 29 | volumeMounts: 30 | - name: grafana-dashboard-config 31 | mountPath: /etc/grafana/provisioning/dashboards/dashboards.yaml 32 | subPath: dashboards.yaml 33 | readOnly: true 34 | - name: grafana-datasource-config 35 | mountPath: /etc/grafana/provisioning/datasources/datasource.yaml 36 | subPath: datasources.yaml 37 | readOnly: true 38 | - name: grafana-dashboard 39 | mountPath: /opt/grafana/dashboards 40 | - name: grafana-data 41 | mountPath: /var/lib/grafana 42 | 43 | volumes: 44 | - name: grafana-dashboard-config 45 | configMap: 46 | name: grafana-config 47 | items: 48 | - key: dashboards.yaml 49 | path: dashboards.yaml 50 | 51 | - name: grafana-datasource-config 52 | configMap: 53 | name: grafana-config 54 | items: 55 | - key: datasources.yaml 56 | path: datasources.yaml 57 | 58 | - name: grafana-dashboard 59 | configMap: 60 | name: grafana-dashboard 61 | 62 | - name: grafana-data 63 | emptyDir: {} 64 | -------------------------------------------------------------------------------- /observability/metric/charts/grafana/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: grafana 5 | spec: 6 | selector: 7 | app: grafana 8 | ports: 9 | - protocol: TCP 10 | port: {{ .Values.grafana.containerPort }} 11 | targetPort: {{ .Values.grafana.containerPort }} 12 | type: ClusterIP -------------------------------------------------------------------------------- /observability/metric/charts/grafana/values.yaml: -------------------------------------------------------------------------------- 1 | grafana: 2 | image: grafana/grafana:9.0.5 3 | containerPort: 3000 -------------------------------------------------------------------------------- /observability/metric/charts/node-exporter/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: node-exporter 3 | description: Node Exporter subchart 4 | version: 0.1.0 5 | appVersion: "1.3.1" 6 | -------------------------------------------------------------------------------- /observability/metric/charts/node-exporter/templates/daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: node-exporter 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: node-exporter 9 | template: 10 | metadata: 11 | labels: 12 | app: node-exporter 13 | spec: 14 | containers: 15 | - name: node-exporter 16 | image: {{ .Values.node_exporter.image }} 17 | args: 18 | - '--path.procfs=/host/proc' 19 | - '--path.sysfs=/host/sys' 20 | - '--path.rootfs=/' 21 | - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' 22 | ports: 23 | - containerPort: {{ .Values.node_exporter.containerPort }} 24 | volumeMounts: 25 | - name: proc 26 | mountPath: /host/proc 27 | readOnly: true 28 | - name: sys 29 | mountPath: /host/sys 30 | readOnly: true 31 | - name: rootfs 32 | mountPath: /rootfs 33 | readOnly: true 34 | volumes: 35 | - name: proc 36 | hostPath: 37 | path: /proc 38 | - name: sys 39 | hostPath: 40 | path: /sys 41 | - name: rootfs 42 | hostPath: 43 | path: / 44 | -------------------------------------------------------------------------------- /observability/metric/charts/node-exporter/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: node-exporter 5 | spec: 6 | selector: 7 | app: node-exporter 8 | ports: 9 | - protocol: TCP 10 | port: {{ .Values.node_exporter.containerPort }} 11 | targetPort: {{ .Values.node_exporter.containerPort }} 12 | type: ClusterIP -------------------------------------------------------------------------------- /observability/metric/charts/node-exporter/values.yaml: -------------------------------------------------------------------------------- 1 | node_exporter: 2 | image: prom/node-exporter 3 | containerPort: 9100 -------------------------------------------------------------------------------- /observability/metric/charts/prometheus/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: prometheus 3 | description: Helm subchart for Prometheus 4 | version: 0.1.0 5 | appVersion: "2.38.0" 6 | -------------------------------------------------------------------------------- /observability/metric/charts/prometheus/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: prometheus-config 5 | data: 6 | prometheus.yml: | 7 | global: 8 | scrape_interval: 15s 9 | evaluation_interval: 15s 10 | 11 | rule_files: 12 | - /etc/prometheus/alert-rules.yml 13 | 14 | alerting: 15 | alertmanagers: 16 | - static_configs: 17 | - targets: 18 | - "alertmanager.observability.svc.cluster.local:9093" 19 | 20 | scrape_configs: 21 | - job_name: 'prometheus' 22 | scrape_interval: 10s 23 | static_configs: 24 | - targets: ['localhost:9090'] 25 | 26 | - job_name: 'node-exporter' 27 | scrape_interval: 5s 28 | kubernetes_sd_configs: 29 | - role: pod 30 | namespaces: 31 | names: 32 | - observability # replace with the namespace where node-exporter is deployed 33 | relabel_configs: 34 | - source_labels: [__meta_kubernetes_pod_label_app] 35 | action: keep 36 | regex: node-exporter 37 | - source_labels: [__meta_kubernetes_pod_host_ip] 38 | target_label: instance 39 | - source_labels: [__address__] 40 | target_label: __address__ 41 | regex: (.+):\d+ 42 | replacement: ${1}:9100 43 | 44 | - job_name: 'cadvisor' 45 | scrape_interval: 5s 46 | kubernetes_sd_configs: 47 | - role: pod 48 | namespaces: 49 | names: 50 | - observability # replace with the namespace where node-exporter is deployed 51 | relabel_configs: 52 | - source_labels: [__meta_kubernetes_pod_label_app] 53 | action: keep 54 | regex: cadvisor 55 | - source_labels: [__meta_kubernetes_pod_host_ip] 56 | target_label: instance 57 | - source_labels: [__address__] 58 | target_label: __address__ 59 | regex: (.+):\d+ 60 | replacement: ${1}:8080 61 | 62 | alert-rules.yml: | 63 | groups: 64 | - name: System alerts 65 | rules: 66 | - alert: NodeOutOfMemory 67 | expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 40 68 | for: 1m 69 | labels: 70 | severity: warning 71 | annotations: 72 | summary: Node out of memory 73 | description: Node memory is only available {{ "{{" }} humanize $value {{ "}}" }}% 74 | 75 | - name: Containers 76 | rules: 77 | - alert: OCRHighMemory 78 | expr: sum(rate(container_cpu_usage_seconds_total{name="demo-metrics"}[5m])) * 100 > 100 79 | for: 30s 80 | labels: 81 | severity: warning 82 | annotations: 83 | summary: OCR high CPU usage 84 | description: OCR CPU consumption is at {{ "{{" }} humanize $value {{ "}}" }}% 85 | 86 | #Ref: https://chatgpt.com/share/6725e975-3f2c-8002-ba7b-44513f1e9669 -------------------------------------------------------------------------------- /observability/metric/charts/prometheus/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: prometheus 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: prometheus 10 | template: 11 | metadata: 12 | labels: 13 | app: prometheus 14 | spec: 15 | serviceAccountName: prometheus-sa 16 | containers: 17 | - name: prometheus 18 | image: {{ .Values.prometheus.image }} 19 | ports: 20 | - containerPort: {{ .Values.prometheus.containerPort }} 21 | volumeMounts: 22 | - name: prometheus-config 23 | mountPath: {{ .Values.prometheus.configPath }} 24 | args: 25 | - --config.file=/etc/prometheus/prometheus.yml 26 | - --storage.tsdb.path=/prometheus 27 | - --web.console.libraries=/etc/prometheus/console_libraries 28 | - --web.console.templates=/etc/prometheus/consoles 29 | - --storage.tsdb.retention.time=20h 30 | - --web.enable-lifecycle 31 | volumes: 32 | - name: prometheus-config 33 | configMap: 34 | name: prometheus-config 35 | 36 | items: 37 | - key: "prometheus.yml" 38 | path: "prometheus.yml" 39 | - key: "alert-rules.yml" 40 | path: "alert-rules.yml" -------------------------------------------------------------------------------- /observability/metric/charts/prometheus/templates/role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: Role 3 | metadata: 4 | namespace: observability 5 | name: prometheus-reader 6 | rules: 7 | - apiGroups: [""] 8 | resources: ["pods"] 9 | verbs: ["get", "list", "watch"] 10 | -------------------------------------------------------------------------------- /observability/metric/charts/prometheus/templates/rolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: prometheus-reader-binding 5 | namespace: observability 6 | subjects: 7 | - kind: ServiceAccount 8 | name: prometheus-sa 9 | namespace: observability 10 | roleRef: 11 | kind: Role 12 | name: prometheus-reader 13 | apiGroup: rbac.authorization.k8s.io 14 | -------------------------------------------------------------------------------- /observability/metric/charts/prometheus/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: prometheus 5 | spec: 6 | selector: 7 | app: prometheus 8 | ports: 9 | - protocol: TCP 10 | port: {{ .Values.prometheus.containerPort }} 11 | targetPort: {{ .Values.prometheus.containerPort }} 12 | type: ClusterIP -------------------------------------------------------------------------------- /observability/metric/charts/prometheus/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: prometheus-sa # Name of the service account 5 | namespace: observability # Namespace where Prometheus is deployed 6 | -------------------------------------------------------------------------------- /observability/metric/charts/prometheus/values.yaml: -------------------------------------------------------------------------------- 1 | # Prometheus values 2 | prometheus: 3 | image: prom/prometheus 4 | containerPort: 9090 5 | configPath: /etc/prometheus 6 | -------------------------------------------------------------------------------- /observability/metric/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{- define "grafana-monitoring.fullname" -}} 2 | {{ .Release.Name }}-{{ .Chart.Name }} 3 | {{- end }} 4 | -------------------------------------------------------------------------------- /observability/metric/values.yaml: -------------------------------------------------------------------------------- 1 | # Enable or disable subcharts 2 | prometheus: 3 | enabled: true 4 | 5 | node-exporter: 6 | enabled: true 7 | 8 | cadvisor: 9 | enabled: true 10 | 11 | grafana: 12 | enabled: true 13 | 14 | alertmanager: 15 | enabled: true 16 | -------------------------------------------------------------------------------- /observability/my-jaeger/jaeger.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: observability 5 | 6 | --- 7 | 8 | apiVersion: apps/v1 9 | kind: Deployment 10 | metadata: 11 | name: jaeger 12 | namespace: observability 13 | labels: 14 | app: jaeger 15 | spec: 16 | replicas: 1 17 | selector: 18 | matchLabels: 19 | app: jaeger 20 | template: 21 | metadata: 22 | labels: 23 | app: jaeger 24 | spec: 25 | containers: 26 | - name: jaeger 27 | image: jaegertracing/all-in-one:1.45 28 | ports: 29 | - containerPort: 6831 30 | protocol: UDP # Jaeger Thrift Compact 31 | - containerPort: 6832 32 | protocol: UDP # Jaeger Thrift Binary 33 | - containerPort: 5778 34 | protocol: TCP # Configuration endpoint 35 | - containerPort: 16686 36 | protocol: TCP # Query UI 37 | - containerPort: 14268 38 | protocol: TCP # Collector HTTP endpoint 39 | - containerPort: 14269 40 | protocol: TCP # Admin endpoint 41 | - containerPort: 14250 42 | protocol: TCP # gRPC collector 43 | - containerPort: 9411 44 | protocol: TCP # Zipkin compatible endpoint 45 | - containerPort: 4317 46 | protocol: TCP # OTLP gRPC 47 | - containerPort: 4318 48 | protocol: TCP # OTLP HTTP 49 | env: 50 | - name: JAEGER_AGENT_PORT 51 | value: "6831" 52 | - name: COLLECTOR_ZIPKIN_HTTP_PORT 53 | value: ":9411" 54 | - name: COLLECTOR_OTLP_ENABLED 55 | value: "true" 56 | 57 | --- 58 | 59 | apiVersion: v1 60 | kind: Service 61 | metadata: 62 | name: jaeger-agent 63 | namespace: observability 64 | labels: 65 | app: jaeger 66 | spec: 67 | type: ClusterIP 68 | ports: 69 | - name: thrift-compact 70 | port: 6831 71 | protocol: UDP 72 | targetPort: 6831 73 | - name: thrift-binary 74 | port: 6832 75 | protocol: UDP 76 | targetPort: 6832 77 | - name: config 78 | port: 5778 79 | protocol: TCP 80 | targetPort: 5778 81 | selector: 82 | app: jaeger 83 | 84 | --- 85 | 86 | apiVersion: v1 87 | kind: Service 88 | metadata: 89 | name: jaeger-query 90 | namespace: observability 91 | labels: 92 | app: jaeger 93 | spec: 94 | type: NodePort 95 | ports: 96 | - name: http 97 | port: 80 98 | protocol: TCP 99 | targetPort: 16686 100 | nodePort: 30002 101 | selector: 102 | app: jaeger 103 | 104 | --- 105 | 106 | apiVersion: v1 107 | kind: Service 108 | metadata: 109 | name: jaeger-collector 110 | namespace: observability 111 | labels: 112 | app: jaeger 113 | spec: 114 | type: ClusterIP 115 | ports: 116 | - name: grpc 117 | port: 14250 118 | protocol: TCP 119 | targetPort: 14250 120 | - name: http 121 | port: 14268 122 | protocol: TCP 123 | targetPort: 14268 124 | - name: zipkin 125 | port: 9411 126 | protocol: TCP 127 | targetPort: 9411 128 | - name: otlp-http 129 | port: 4318 130 | protocol: TCP 131 | targetPort: 4318 132 | - name: otlp-grpc 133 | port: 4317 134 | protocol: TCP 135 | targetPort: 4317 136 | selector: 137 | app: jaeger 138 | -------------------------------------------------------------------------------- /observability/my-jaeger/nohup.out: -------------------------------------------------------------------------------- 1 | Forwarding from 127.0.0.1:16686 -> 16686 2 | Forwarding from [::1]:16686 -> 16686 3 | -------------------------------------------------------------------------------- /observability/my-jaeger/port-forward.log: -------------------------------------------------------------------------------- 1 | nohup: ignoring input 2 | Forwarding from 127.0.0.1:16686 -> 16686 3 | Forwarding from [::1]:16686 -> 16686 4 | Handling connection for 16686 5 | Handling connection for 16686 6 | Handling connection for 16686 7 | Handling connection for 16686 8 | Handling connection for 16686 9 | Handling connection for 16686 10 | --------------------------------------------------------------------------------