├── .env.dist
├── .gitignore
├── README.md
├── example_crawl_results_2024-08-20T16-35-59.json
├── node
├── babel.config.js
├── cleaners.js
├── cleaners.test.js
├── firecrawl_to_trieve_config.js
├── jest.config.js
├── load.js
├── package.json
├── runFirecrawl.js
├── transform.js
├── transform.test.js
└── yarn.lock
└── python
├── cleaners.py
├── load.py
├── requirements.txt
├── run_firecrawl.py
├── suggestions.py
└── transform.py
/.env.dist:
--------------------------------------------------------------------------------
1 | FIRECRAWL_API_KEY=
2 | TRIEVE_DATASET_ID_BASELINE=
3 | TRIEVE_API_KEY=
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # permissions
2 | .env
3 |
4 | # macOS
5 | .DS_Store
6 |
7 | # data
8 | */chunks*.json
9 | */groups.json
10 | */chunks.md
11 | */crawl_results*
12 |
13 | # python
14 | .venv
15 |
16 | # node
17 | node_modules
18 |
19 | # logs
20 | *.log
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # firecrawl-to-trieve
2 | Demonstration of a Firecrawl-to-Trieve crawling-to-search pipeline.
3 |
4 | Here is general approach:
5 |
6 | - get the results from Firecrawl
7 | - transform the results into chunks
8 | - load the chunks into Trieve
9 | - tentative: suggestions.py to pull suggested queries from Trieve—via [/chunk/suggestions](https://docs.trieve.ai/api-reference/chunk/generate-suggested-queries)—and explore the retrieval results and the data (not discussed in the blog)
10 |
11 | ## Setup
12 |
13 | - Setup your environment variables
14 |
15 | - Firecral API key
16 | - Trieve API key and dataset ID
17 |
18 | ```
19 | cp .env.dist .env
20 | ```
21 |
22 | ### Python
23 |
24 |
25 | - Setup your virtual environment
26 |
27 | ```
28 | python3 -m venv .venv
29 | source .venv/bin/activate
30 | ```
31 |
32 | - Install requirements
33 |
34 | ```
35 | pip install -r requirements.txt
36 | ```
37 |
38 | - Freeze requirements
39 |
40 | ```
41 | pip freeze > requirements.txt
42 | ```
43 |
44 | ### Node
45 |
46 | - Install dependencies
47 |
48 | ```
49 | yarn install
50 | ```
51 |
52 | ## Running the scripts
53 |
54 | ### Firecrawl
55 |
56 | - requires: `FIRECRAWL_API_KEY` in `.env`
57 |
58 | Use Firecrawl to get the results of a crawl on the `crawl_url`, here: `https://signoz.io/docs/`.
59 |
60 | Python in `python/`
61 | ```bash
62 | python run_firecrawl.py
63 | ```
64 |
65 | Node in `node/`
66 |
67 | ```bash
68 | yarn crawl
69 | ```
70 |
71 | This writes a json file (with a timestamp in the name) with the crawl results in a list. Key fields are the markdown itself, and then various metadata fields, including `ogUrl`, `ogTitle`, `description`, `pageStatusCode`, etc.
72 |
73 | Example filename: `crawl_results_2024-08-20T16-35-59.json`
74 |
75 | See the example: `example_crawl_results_2024-08-20T16-35-59.json`
76 |
77 | ### Transform: Cleaning, Chunking, and Configuring
78 |
79 | See cleaning scripts: `python/cleaners.py` and `node/cleaners.js`
80 |
81 | Run the transform scripts:
82 |
83 | In `python/`
84 |
85 | ```bash
86 | python transform.py
87 | ```
88 |
89 | Or in `node/`
90 |
91 | ```bash
92 | yarn transform
93 | ```
94 |
95 | Warning: While exploring the data to determine the chunking approach we noted it had a button click that toggles between contexts, so half the content so half of the content for the page is not in the markdown. We will just flag this for now, and we'll have to see if this issue appears elsewhere.
96 |
97 | ### Loading
98 |
99 | We can run it with `-c` to create chunks and `-u` to upsert chunks (update by tracking_id, ex. if you want to add chunks with a different split or revise your cleaning approach).
100 |
101 | In `python/`
102 |
103 | ```bash
104 | python load.py [-c | -u]
105 | ```
106 |
107 | In `node/`
108 | ```bash
109 | yarn load [-c | -u]
110 | ```
111 |
112 |
--------------------------------------------------------------------------------
/example_crawl_results_2024-08-20T16-35-59.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "content": "Alert Management in SigNoz\n--------------------------\n\nThis documentation helps you in understanding the Alerts feature in SigNoz and how you can create different types of alerts.\n\n[Alert Management\\\n\\\nAlerts in SigNoz can help you to define which data to monitor, set thresholds to detect potential problems...](/docs/userguide/alerts-management)\n[Setup Alerts Notifications\\\n\\\nYou can setup notification channel for sending the generated alerts to other applications. Currently, the following channels are ...](/docs/setup-alerts-notification)\n[📄️ Metrics based alerts\\\n\\\nA Metric-based alert in SigNoz allows you to define conditions based on metric data...](/docs/alerts-management/metrics-based-alerts)\n[📄️ Log based alerts\\\n\\\nA Log-based alert allows you to define conditions based on log data,...](/docs/alerts-management/log-based-alerts)\n[📄️ Trace based alerts\\\n\\\nA Trace-based alert in SigNoz allows you to define conditions based on trace data...](/docs/alerts-management/trace-based-alerts)\n[📄️ Exceptions based alerts\\\n\\\nAn Exceptions-based alert in SigNoz allows you to define conditions...](/docs/alerts-management/exceptions-based-alerts)\n[📄️ Planned Maintenance\\\n\\\nPlanned Maintenance in SigNoz allows you to schedule maintenance windows for your application...](/docs/alerts-management/planned-maintenance)\n\n[Prev\\\n\\\nTraces](/docs/userguide/writing-clickhouse-traces-query/)\n[Next\\\n\\\nAlert Management](/docs/userguide/alerts-management/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
4 | "markdown": "Alert Management in SigNoz\n--------------------------\n\nThis documentation helps you in understanding the Alerts feature in SigNoz and how you can create different types of alerts.\n\n[Alert Management\\\n\\\nAlerts in SigNoz can help you to define which data to monitor, set thresholds to detect potential problems...](/docs/userguide/alerts-management)\n[Setup Alerts Notifications\\\n\\\nYou can setup notification channel for sending the generated alerts to other applications. Currently, the following channels are ...](/docs/setup-alerts-notification)\n[📄️ Metrics based alerts\\\n\\\nA Metric-based alert in SigNoz allows you to define conditions based on metric data...](/docs/alerts-management/metrics-based-alerts)\n[📄️ Log based alerts\\\n\\\nA Log-based alert allows you to define conditions based on log data,...](/docs/alerts-management/log-based-alerts)\n[📄️ Trace based alerts\\\n\\\nA Trace-based alert in SigNoz allows you to define conditions based on trace data...](/docs/alerts-management/trace-based-alerts)\n[📄️ Exceptions based alerts\\\n\\\nAn Exceptions-based alert in SigNoz allows you to define conditions...](/docs/alerts-management/exceptions-based-alerts)\n[📄️ Planned Maintenance\\\n\\\nPlanned Maintenance in SigNoz allows you to schedule maintenance windows for your application...](/docs/alerts-management/planned-maintenance)\n\n[Prev\\\n\\\nTraces](/docs/userguide/writing-clickhouse-traces-query/)\n[Next\\\n\\\nAlert Management](/docs/userguide/alerts-management/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
5 | "metadata": {
6 | "ogUrl": "https://signoz.io/docs/alerts/",
7 | "title": "Alert Management in SigNoz | SigNoz",
8 | "robots": "index, follow",
9 | "ogTitle": "Alert Management in SigNoz",
10 | "ogLocale": "en_US",
11 | "sourceURL": "https://signoz.io/docs/alerts",
12 | "ogSiteName": "SigNoz",
13 | "pageStatusCode": 200,
14 | "ogLocaleAlternate": []
15 | },
16 | "linksOnPage": [
17 | "https://signoz.io/",
18 | "https://signoz.io/docs/",
19 | "https://signoz.io/resource-center/blog/",
20 | "https://signoz.io/pricing/",
21 | "https://signoz.io/case-study/",
22 | "https://signoz.io/teams/",
23 | "https://signoz.io/docs/userguide/alerts-management",
24 | "https://signoz.io/docs/setup-alerts-notification",
25 | "https://signoz.io/docs/alerts-management/metrics-based-alerts",
26 | "https://signoz.io/docs/alerts-management/log-based-alerts",
27 | "https://signoz.io/docs/alerts-management/trace-based-alerts",
28 | "https://signoz.io/docs/alerts-management/exceptions-based-alerts",
29 | "https://signoz.io/docs/alerts-management/planned-maintenance",
30 | "https://signoz.io/docs/userguide/writing-clickhouse-traces-query/",
31 | "https://signoz.io/docs/userguide/alerts-management/",
32 | "https://signoz.io/docs/contributing/",
33 | "https://knowledgebase.signoz.io/kb",
34 | "https://signoz.io/api_reference/",
35 | "https://signoz.io/support/",
36 | "https://signoz.io/slack",
37 | "https://twitter.com/SigNozHQ",
38 | "https://community-chat.signoz.io/",
39 | "https://signoz.io/changelog/",
40 | "https://signoz.io/product-comparison/signoz-vs-datadog/",
41 | "https://signoz.io/product-comparison/signoz-vs-newrelic/",
42 | "https://signoz.io/product-comparison/signoz-vs-grafana/",
43 | "https://signoz.io/product-comparison/signoz-vs-dynatrace/",
44 | "https://jobs.gem.com/signoz",
45 | "https://signoz.io/about-us/",
46 | "https://signoz.io/terms-of-service/",
47 | "https://signoz.io/privacy/",
48 | "https://trust.signoz.io/",
49 | "https://github.com/SigNoz",
50 | "https://www.linkedin.com/company/signozio/",
51 | "https://www.youtube.com/@signoz"
52 | ]
53 | },
54 | {
55 | "content": "Technical Architecture\n----------------------\n\n\n\n### [](#architecture-components)\nArchitecture Components\n\n* SigNoz OpenTelemetry Collector\n* ClickHouse\n* Query Service\n* Frontend\n* Alert Manager\n\n**OpenTelemetry Collector** can receive data in multiple formats. Here are some of the commonly used receivers:\n\n* Jaeger Receiver\n* Kafka Receiver\n* OpenCensus Receiver\n* OTLP Receiver\n* Zipkin Receiver\n\nOne can send data from their applications directly to SigNoz Otel collector or external otel collectors can be used for collecting telemetry data & sending to SigNoz otel collector. These external otel collectors are then working effectively as an agent to collect data first and then send to SigNoz Otel collector.\n\n**Query Service** is the interface between Frontend and ClickHouse. It provides APIs to be consumed by frontend application and queries ClickHouse to fetch data and processes data before responding back to the frontend.\n\n**Frontend** is the UI, built in ReactJS and Typescript and provides advanced trace/span filtering capabilities and plot metrics to provide service overviews.\n\n**Alert Manager** evaluates different alert rules set by the users and triggers an alert if a threshold is crossed.\n\n### [](#opentelemetry-introduction)\nOpentelemetry Introduction\n\nSigNoz uses OpenTelemetry for instrumenting applications and for collecting telemetry data. The following docs may be useful to get familiar with the basic concepts of OpenTelemetry\n\n* [OpenTelemetry Data Collection](https://opentelemetry.io/docs/concepts/data-collection/)\n \n* [OpenTelemetry Collector Configuration](https://opentelemetry.io/docs/collector/configuration/)\n \n\n[Prev\\\n\\\nBest Practices for Production](/docs/production-readiness/)\n[Next\\\n\\\nContributing Guidelines](/docs/contributing/)\n\nOn this page\n\n[Architecture Components](#architecture-components)\n\n[Architecture Components](#architecture-components-1)\n\n[\\_Stream Processing\\_ decentralizes and decouples the infrastructure.](#_stream-processing_-decentralizes-and-decouples-the-infrastructure)\n\n[Opentelemetry Introduction](#opentelemetry-introduction)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
56 | "markdown": "Technical Architecture\n----------------------\n\n\n\n### [](#architecture-components)\nArchitecture Components\n\n* SigNoz OpenTelemetry Collector\n* ClickHouse\n* Query Service\n* Frontend\n* Alert Manager\n\n**OpenTelemetry Collector** can receive data in multiple formats. Here are some of the commonly used receivers:\n\n* Jaeger Receiver\n* Kafka Receiver\n* OpenCensus Receiver\n* OTLP Receiver\n* Zipkin Receiver\n\nOne can send data from their applications directly to SigNoz Otel collector or external otel collectors can be used for collecting telemetry data & sending to SigNoz otel collector. These external otel collectors are then working effectively as an agent to collect data first and then send to SigNoz Otel collector.\n\n**Query Service** is the interface between Frontend and ClickHouse. It provides APIs to be consumed by frontend application and queries ClickHouse to fetch data and processes data before responding back to the frontend.\n\n**Frontend** is the UI, built in ReactJS and Typescript and provides advanced trace/span filtering capabilities and plot metrics to provide service overviews.\n\n**Alert Manager** evaluates different alert rules set by the users and triggers an alert if a threshold is crossed.\n\n### [](#opentelemetry-introduction)\nOpentelemetry Introduction\n\nSigNoz uses OpenTelemetry for instrumenting applications and for collecting telemetry data. The following docs may be useful to get familiar with the basic concepts of OpenTelemetry\n\n* [OpenTelemetry Data Collection](https://opentelemetry.io/docs/concepts/data-collection/)\n \n* [OpenTelemetry Collector Configuration](https://opentelemetry.io/docs/collector/configuration/)\n \n\n[Prev\\\n\\\nBest Practices for Production](/docs/production-readiness/)\n[Next\\\n\\\nContributing Guidelines](/docs/contributing/)\n\nOn this page\n\n[Architecture Components](#architecture-components)\n\n[Architecture Components](#architecture-components-1)\n\n[\\_Stream Processing\\_ decentralizes and decouples the infrastructure.](#_stream-processing_-decentralizes-and-decouples-the-infrastructure)\n\n[Opentelemetry Introduction](#opentelemetry-introduction)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
57 | "metadata": {
58 | "ogUrl": "https://signoz.io/docs/architecture/",
59 | "title": "Technical Architecture | SigNoz",
60 | "robots": "index, follow",
61 | "ogTitle": "Technical Architecture",
62 | "ogLocale": "en_US",
63 | "sourceURL": "https://signoz.io/docs/architecture",
64 | "ogSiteName": "SigNoz",
65 | "description": "Learn about the technical architecture of SigNoz, including components like OpenTelemetry Collector, ClickHouse, Query Service, Frontend, and Alert Manager.",
66 | "ogDescription": "Learn about the technical architecture of SigNoz, including components like OpenTelemetry Collector, ClickHouse, Query Service, Frontend, and Alert Manager.",
67 | "pageStatusCode": 200,
68 | "ogLocaleAlternate": []
69 | },
70 | "linksOnPage": [
71 | "https://signoz.io/",
72 | "https://signoz.io/docs/",
73 | "https://signoz.io/resource-center/blog/",
74 | "https://signoz.io/pricing/",
75 | "https://signoz.io/case-study/",
76 | "https://signoz.io/teams/",
77 | "https://opentelemetry.io/docs/concepts/data-collection/",
78 | "https://opentelemetry.io/docs/collector/configuration/",
79 | "https://signoz.io/docs/production-readiness/",
80 | "https://signoz.io/docs/contributing/",
81 | "https://knowledgebase.signoz.io/kb",
82 | "https://signoz.io/api_reference/",
83 | "https://signoz.io/support/",
84 | "https://signoz.io/slack",
85 | "https://twitter.com/SigNozHQ",
86 | "https://community-chat.signoz.io/",
87 | "https://signoz.io/changelog/",
88 | "https://signoz.io/product-comparison/signoz-vs-datadog/",
89 | "https://signoz.io/product-comparison/signoz-vs-newrelic/",
90 | "https://signoz.io/product-comparison/signoz-vs-grafana/",
91 | "https://signoz.io/product-comparison/signoz-vs-dynatrace/",
92 | "https://jobs.gem.com/signoz",
93 | "https://signoz.io/about-us/",
94 | "https://signoz.io/terms-of-service/",
95 | "https://signoz.io/privacy/",
96 | "https://trust.signoz.io/",
97 | "https://github.com/SigNoz",
98 | "https://www.linkedin.com/company/signozio/",
99 | "https://www.youtube.com/@signoz"
100 | ]
101 | },
102 | {
103 | "content": "SigNoz Cloud\n------------\n\nSigNoz Cloud is an easy way to get started with SigNoz. You don't need to install or maintain your own instance.\n\n[](#getting-started-with-signoz-cloud)\nGetting started with SigNoz Cloud\n------------------------------------------------------------------------\n\nSigNoz offers a **free trial of 30 days with full access to all features**. If you don’t already have an account, you can sign up [here](https://signoz.io/teams/)\n.\n\n[](#send-traces-to-signoz-cloud)\nSend Traces to SigNoz Cloud\n------------------------------------------------------------\n\nSigNoz supports tracing for major programming languages. With tracing you can get started with great out-of-box charts for application performance like p99 latency, request rates, error rates, and top end-points of your application.\n\nYou can also visualize user requests in their entirety as it travels across components of your application. Here are instructions for sending traces to SigNoz cloud in different languages:\n\n* [Java](https://signoz.io/docs/instrumentation/java/#send-traces-to-signoz-cloud)\n \n* [Python](https://signoz.io/docs/instrumentation/python/#send-traces-to-signoz-cloud)\n \n* [Nodejs](https://signoz.io/docs/instrumentation/javascript/)\n \n* [Golang](https://signoz.io/docs/instrumentation/golang/#send-traces-to-signoz-cloud)\n \n* [Ruby on Rails](https://signoz.io/docs/instrumentation/ruby-on-rails/)\n \n\n[](#collect-hostmetrics-from-vm)\nCollect Hostmetrics from VM\n------------------------------------------------------------\n\nFind [instructions](https://signoz.io/docs/userguide/hostmetrics/)\n to send hostmetrics to SigNoz Cloud using OpenTelemetry Collector.\n\n[](#collect-kubernetes-infra-metrics)\nCollect Kubernetes Infra Metrics\n----------------------------------------------------------------------\n\nFind [instructions](https://signoz.io/docs/tutorial/kubernetes-infra-metrics/)\n to send Kubernetes infra metrics and logs to SigNoz Cloud using OpenTelemetry Collector.\n\n[](#send-logs-to-signoz-cloud)\nSend Logs to SigNoz Cloud\n--------------------------------------------------------\n\nSigNoz provides log management with and advanced query builder to quickly search and filter logs. Here's an [overview](https://signoz.io/docs/userguide/logs/#collecting-logs-in-signoz-cloud)\n of how to collect and send logs to SigNoz cloud.\n\n[Prev\\\n\\\nInstallation](/docs/install/)\n[Next\\\n\\\nDocker Standalone](/docs/install/docker/)\n\nOn this page\n\n[Getting started with SigNoz Cloud](#getting-started-with-signoz-cloud)\n\n[Send Traces to SigNoz Cloud](#send-traces-to-signoz-cloud)\n\n[Collect Hostmetrics from VM](#collect-hostmetrics-from-vm)\n\n[Collect Kubernetes Infra Metrics](#collect-kubernetes-infra-metrics)\n\n[Send Logs to SigNoz Cloud](#send-logs-to-signoz-cloud)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
104 | "markdown": "SigNoz Cloud\n------------\n\nSigNoz Cloud is an easy way to get started with SigNoz. You don't need to install or maintain your own instance.\n\n[](#getting-started-with-signoz-cloud)\nGetting started with SigNoz Cloud\n------------------------------------------------------------------------\n\nSigNoz offers a **free trial of 30 days with full access to all features**. If you don’t already have an account, you can sign up [here](https://signoz.io/teams/)\n.\n\n[](#send-traces-to-signoz-cloud)\nSend Traces to SigNoz Cloud\n------------------------------------------------------------\n\nSigNoz supports tracing for major programming languages. With tracing you can get started with great out-of-box charts for application performance like p99 latency, request rates, error rates, and top end-points of your application.\n\nYou can also visualize user requests in their entirety as it travels across components of your application. Here are instructions for sending traces to SigNoz cloud in different languages:\n\n* [Java](https://signoz.io/docs/instrumentation/java/#send-traces-to-signoz-cloud)\n \n* [Python](https://signoz.io/docs/instrumentation/python/#send-traces-to-signoz-cloud)\n \n* [Nodejs](https://signoz.io/docs/instrumentation/javascript/)\n \n* [Golang](https://signoz.io/docs/instrumentation/golang/#send-traces-to-signoz-cloud)\n \n* [Ruby on Rails](https://signoz.io/docs/instrumentation/ruby-on-rails/)\n \n\n[](#collect-hostmetrics-from-vm)\nCollect Hostmetrics from VM\n------------------------------------------------------------\n\nFind [instructions](https://signoz.io/docs/userguide/hostmetrics/)\n to send hostmetrics to SigNoz Cloud using OpenTelemetry Collector.\n\n[](#collect-kubernetes-infra-metrics)\nCollect Kubernetes Infra Metrics\n----------------------------------------------------------------------\n\nFind [instructions](https://signoz.io/docs/tutorial/kubernetes-infra-metrics/)\n to send Kubernetes infra metrics and logs to SigNoz Cloud using OpenTelemetry Collector.\n\n[](#send-logs-to-signoz-cloud)\nSend Logs to SigNoz Cloud\n--------------------------------------------------------\n\nSigNoz provides log management with and advanced query builder to quickly search and filter logs. Here's an [overview](https://signoz.io/docs/userguide/logs/#collecting-logs-in-signoz-cloud)\n of how to collect and send logs to SigNoz cloud.\n\n[Prev\\\n\\\nInstallation](/docs/install/)\n[Next\\\n\\\nDocker Standalone](/docs/install/docker/)\n\nOn this page\n\n[Getting started with SigNoz Cloud](#getting-started-with-signoz-cloud)\n\n[Send Traces to SigNoz Cloud](#send-traces-to-signoz-cloud)\n\n[Collect Hostmetrics from VM](#collect-hostmetrics-from-vm)\n\n[Collect Kubernetes Infra Metrics](#collect-kubernetes-infra-metrics)\n\n[Send Logs to SigNoz Cloud](#send-logs-to-signoz-cloud)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
105 | "metadata": {
106 | "ogUrl": "https://signoz.io/docs/cloud/",
107 | "title": "SigNoz Cloud | SigNoz",
108 | "robots": "index, follow",
109 | "ogTitle": "SigNoz Cloud",
110 | "ogLocale": "en_US",
111 | "sourceURL": "https://signoz.io/docs/cloud",
112 | "ogSiteName": "SigNoz",
113 | "description": "Get started with SigNoz Cloud for easy observability without installation. Learn how to send traces, hostmetrics, Kubernetes metrics, and logs for comprehensive monitoring.",
114 | "ogDescription": "Get started with SigNoz Cloud for easy observability without installation. Learn how to send traces, hostmetrics, Kubernetes metrics, and logs for comprehensive monitoring.",
115 | "pageStatusCode": 200,
116 | "ogLocaleAlternate": []
117 | },
118 | "linksOnPage": [
119 | "https://signoz.io/",
120 | "https://signoz.io/docs/",
121 | "https://signoz.io/resource-center/blog/",
122 | "https://signoz.io/pricing/",
123 | "https://signoz.io/case-study/",
124 | "https://signoz.io/teams/",
125 | "https://signoz.io/docs/instrumentation/java/#send-traces-to-signoz-cloud",
126 | "https://signoz.io/docs/instrumentation/python/#send-traces-to-signoz-cloud",
127 | "https://signoz.io/docs/instrumentation/javascript/",
128 | "https://signoz.io/docs/instrumentation/golang/#send-traces-to-signoz-cloud",
129 | "https://signoz.io/docs/instrumentation/ruby-on-rails/",
130 | "https://signoz.io/docs/userguide/hostmetrics/",
131 | "https://signoz.io/docs/tutorial/kubernetes-infra-metrics/",
132 | "https://signoz.io/docs/userguide/logs/#collecting-logs-in-signoz-cloud",
133 | "https://signoz.io/docs/install/",
134 | "https://signoz.io/docs/install/docker/",
135 | "https://signoz.io/docs/contributing/",
136 | "https://knowledgebase.signoz.io/kb",
137 | "https://signoz.io/api_reference/",
138 | "https://signoz.io/support/",
139 | "https://signoz.io/slack",
140 | "https://twitter.com/SigNozHQ",
141 | "https://community-chat.signoz.io/",
142 | "https://signoz.io/changelog/",
143 | "https://signoz.io/product-comparison/signoz-vs-datadog/",
144 | "https://signoz.io/product-comparison/signoz-vs-newrelic/",
145 | "https://signoz.io/product-comparison/signoz-vs-grafana/",
146 | "https://signoz.io/product-comparison/signoz-vs-dynatrace/",
147 | "https://jobs.gem.com/signoz",
148 | "https://signoz.io/about-us/",
149 | "https://signoz.io/terms-of-service/",
150 | "https://signoz.io/privacy/",
151 | "https://trust.signoz.io/",
152 | "https://github.com/SigNoz",
153 | "https://www.linkedin.com/company/signozio/",
154 | "https://www.youtube.com/@signoz"
155 | ]
156 | },
157 | {
158 | "content": "Community\n---------\n\n[](#slack)\nSlack\n----------------\n\nWe have an active slack community with engineers eager to answer your queries on observability, tracing and monitoring. Would love to hear your thoughts and any questions if you have.\n\nWe are always there to help you in any issues you may have while running SigNoz. Just drop by and say Hi 👋!\n\nGet your invite for the slack community [here](https://signoz.io/slack)\n\n[](#github)\nGithub\n------------------\n\nHave a question? Want to suggest an idea? - Head out to our [Github Discussions](https://github.com/SigNoz/signoz/discussions)\n. We would love to hear from you!\n\n[](#twitter)\nTwitter\n--------------------\n\nTwitter has some amazing audience of developers and engineers. Follow us on Twitter [@SigNozHQ](https://twitter.com/SigNozHQ)\n\n[Prev\\\n\\\nCommunity Integrations](/docs/community/community-integrations/)\n[Next\\\n\\\nTroubleshooting](/docs/faqs/)\n\nOn this page\n\n[Github](#github)\n\n[Twitter](#twitter)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
159 | "markdown": "Community\n---------\n\n[](#slack)\nSlack\n----------------\n\nWe have an active slack community with engineers eager to answer your queries on observability, tracing and monitoring. Would love to hear your thoughts and any questions if you have.\n\nWe are always there to help you in any issues you may have while running SigNoz. Just drop by and say Hi 👋!\n\nGet your invite for the slack community [here](https://signoz.io/slack)\n\n[](#github)\nGithub\n------------------\n\nHave a question? Want to suggest an idea? - Head out to our [Github Discussions](https://github.com/SigNoz/signoz/discussions)\n. We would love to hear from you!\n\n[](#twitter)\nTwitter\n--------------------\n\nTwitter has some amazing audience of developers and engineers. Follow us on Twitter [@SigNozHQ](https://twitter.com/SigNozHQ)\n\n[Prev\\\n\\\nCommunity Integrations](/docs/community/community-integrations/)\n[Next\\\n\\\nTroubleshooting](/docs/faqs/)\n\nOn this page\n\n[Github](#github)\n\n[Twitter](#twitter)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
160 | "metadata": {
161 | "ogUrl": "https://signoz.io/docs/community/",
162 | "title": "Community | SigNoz",
163 | "robots": "index, follow",
164 | "ogTitle": "Community",
165 | "ogLocale": "en_US",
166 | "sourceURL": "https://signoz.io/docs/community",
167 | "ogSiteName": "SigNoz",
168 | "pageStatusCode": 200,
169 | "ogLocaleAlternate": []
170 | },
171 | "linksOnPage": [
172 | "https://signoz.io/",
173 | "https://signoz.io/docs/",
174 | "https://signoz.io/resource-center/blog/",
175 | "https://signoz.io/pricing/",
176 | "https://signoz.io/case-study/",
177 | "https://signoz.io/teams/",
178 | "https://signoz.io/slack",
179 | "https://github.com/SigNoz/signoz/discussions",
180 | "https://twitter.com/SigNozHQ",
181 | "https://signoz.io/docs/community/community-integrations/",
182 | "https://signoz.io/docs/faqs/",
183 | "https://signoz.io/docs/contributing/",
184 | "https://knowledgebase.signoz.io/kb",
185 | "https://signoz.io/api_reference/",
186 | "https://signoz.io/support/",
187 | "https://community-chat.signoz.io/",
188 | "https://signoz.io/changelog/",
189 | "https://signoz.io/product-comparison/signoz-vs-datadog/",
190 | "https://signoz.io/product-comparison/signoz-vs-newrelic/",
191 | "https://signoz.io/product-comparison/signoz-vs-grafana/",
192 | "https://signoz.io/product-comparison/signoz-vs-dynatrace/",
193 | "https://jobs.gem.com/signoz",
194 | "https://signoz.io/about-us/",
195 | "https://signoz.io/terms-of-service/",
196 | "https://signoz.io/privacy/",
197 | "https://trust.signoz.io/",
198 | "https://github.com/SigNoz",
199 | "https://www.linkedin.com/company/signozio/",
200 | "https://www.youtube.com/@signoz"
201 | ]
202 | },
203 | {
204 | "content": "Contribution Guidelines\n-----------------------\n\n[](#welcome-to-signoz-contributing-section-)\nWelcome to SigNoz Contributing section 🎉\n--------------------------------------------------------------------------------------\n\nHi there! We're thrilled that you'd like to contribute to this project, thank you for your interest. Whether it's a bug report, new feature, correction, or additional documentation, we greatly value feedback and contributions from our community.\n\nPlease read through this document before submitting any issues or pull requests to ensure we have all the necessary information to effectively respond to your bug report or contribution.\n\n* We accept contributions made to the [SigNoz `develop` branch](https://github.com/SigNoz/signoz/tree/develop)\n \n* Find all SigNoz Docker Hub images here\n * [signoz/frontend](https://hub.docker.com/r/signoz/frontend)\n \n * [signoz/query-service](https://hub.docker.com/r/signoz/query-service)\n \n * [signoz/otelcontribcol](https://hub.docker.com/r/signoz/otelcontribcol)\n \n\n[](#finding-contributions-to-work-on-)\nFinding contributions to work on 💬\n--------------------------------------------------------------------------\n\nLooking at the existing issues is a great way to find something to contribute on. Also, have a look at these [good first issues label](https://github.com/SigNoz/signoz/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)\n to start with.\n\n[](#how-to-contribute)\nHow to Contribute\n----------------------------------------\n\nPlease check [Contributing.md](https://github.com/SigNoz/signoz/blob/develop/CONTRIBUTING.md)\n file for instructions on how to contribute to SigNoz.\n\n* You can create a PR (Pull Request) for contributing features, bug fixes to the project.\n* If you find any bugs, please create an issue.\n* If you find anything missing in documentation, you can create an issue with label **documentation**.\n\n[Prev\\\n\\\nTechnical Architecture](/docs/architecture/)\n[Next\\\n\\\nProduct Roadmap](/docs/roadmap/)\n\nOn this page\n\n[Welcome to SigNoz Contributing section 🎉](#welcome-to-signoz-contributing-section-)\n\n[Finding contributions to work on 💬](#finding-contributions-to-work-on-)\n\n[How to Contribute](#how-to-contribute)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
205 | "markdown": "Contribution Guidelines\n-----------------------\n\n[](#welcome-to-signoz-contributing-section-)\nWelcome to SigNoz Contributing section 🎉\n--------------------------------------------------------------------------------------\n\nHi there! We're thrilled that you'd like to contribute to this project, thank you for your interest. Whether it's a bug report, new feature, correction, or additional documentation, we greatly value feedback and contributions from our community.\n\nPlease read through this document before submitting any issues or pull requests to ensure we have all the necessary information to effectively respond to your bug report or contribution.\n\n* We accept contributions made to the [SigNoz `develop` branch](https://github.com/SigNoz/signoz/tree/develop)\n \n* Find all SigNoz Docker Hub images here\n * [signoz/frontend](https://hub.docker.com/r/signoz/frontend)\n \n * [signoz/query-service](https://hub.docker.com/r/signoz/query-service)\n \n * [signoz/otelcontribcol](https://hub.docker.com/r/signoz/otelcontribcol)\n \n\n[](#finding-contributions-to-work-on-)\nFinding contributions to work on 💬\n--------------------------------------------------------------------------\n\nLooking at the existing issues is a great way to find something to contribute on. Also, have a look at these [good first issues label](https://github.com/SigNoz/signoz/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)\n to start with.\n\n[](#how-to-contribute)\nHow to Contribute\n----------------------------------------\n\nPlease check [Contributing.md](https://github.com/SigNoz/signoz/blob/develop/CONTRIBUTING.md)\n file for instructions on how to contribute to SigNoz.\n\n* You can create a PR (Pull Request) for contributing features, bug fixes to the project.\n* If you find any bugs, please create an issue.\n* If you find anything missing in documentation, you can create an issue with label **documentation**.\n\n[Prev\\\n\\\nTechnical Architecture](/docs/architecture/)\n[Next\\\n\\\nProduct Roadmap](/docs/roadmap/)\n\nOn this page\n\n[Welcome to SigNoz Contributing section 🎉](#welcome-to-signoz-contributing-section-)\n\n[Finding contributions to work on 💬](#finding-contributions-to-work-on-)\n\n[How to Contribute](#how-to-contribute)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
206 | "metadata": {
207 | "ogUrl": "https://signoz.io/docs/contributing/",
208 | "title": "Contribution Guidelines | SigNoz",
209 | "robots": "index, follow",
210 | "ogTitle": "Contribution Guidelines",
211 | "ogLocale": "en_US",
212 | "sourceURL": "https://signoz.io/docs/contributing",
213 | "ogSiteName": "SigNoz",
214 | "pageStatusCode": 200,
215 | "ogLocaleAlternate": []
216 | },
217 | "linksOnPage": [
218 | "https://signoz.io/",
219 | "https://signoz.io/docs/",
220 | "https://signoz.io/resource-center/blog/",
221 | "https://signoz.io/pricing/",
222 | "https://signoz.io/case-study/",
223 | "https://signoz.io/teams/",
224 | "https://github.com/SigNoz/signoz/tree/develop",
225 | "https://hub.docker.com/r/signoz/frontend",
226 | "https://hub.docker.com/r/signoz/query-service",
227 | "https://hub.docker.com/r/signoz/otelcontribcol",
228 | "https://github.com/SigNoz/signoz/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22",
229 | "https://github.com/SigNoz/signoz/blob/develop/CONTRIBUTING.md",
230 | "https://signoz.io/docs/architecture/",
231 | "https://signoz.io/docs/roadmap/",
232 | "https://signoz.io/docs/contributing/",
233 | "https://knowledgebase.signoz.io/kb",
234 | "https://signoz.io/api_reference/",
235 | "https://signoz.io/support/",
236 | "https://signoz.io/slack",
237 | "https://twitter.com/SigNozHQ",
238 | "https://community-chat.signoz.io/",
239 | "https://signoz.io/changelog/",
240 | "https://signoz.io/product-comparison/signoz-vs-datadog/",
241 | "https://signoz.io/product-comparison/signoz-vs-newrelic/",
242 | "https://signoz.io/product-comparison/signoz-vs-grafana/",
243 | "https://signoz.io/product-comparison/signoz-vs-dynatrace/",
244 | "https://jobs.gem.com/signoz",
245 | "https://signoz.io/about-us/",
246 | "https://signoz.io/terms-of-service/",
247 | "https://signoz.io/privacy/",
248 | "https://trust.signoz.io/",
249 | "https://github.com/SigNoz",
250 | "https://www.linkedin.com/company/signozio/",
251 | "https://www.youtube.com/@signoz"
252 | ]
253 | },
254 | {
255 | "content": "EC2 Monitoring\n--------------\n\n[📄️ Application Server Logs\\\n\\\nThis guide provides detailed instructions on how to send...](/docs/aws-monitoring/ec2-logs/)\n[📄️ Infrastructure Metrics\\\n\\\nThis documentation guides you through integrating AWS EC2 infrastructure...](/docs/aws-monitoring/ec2-infra-metrics/)\n\n[Prev\\\n\\\nInfinite Retention using AWS S3](/docs/tutorial/infinite-retention-aws-s3/)\n[Next\\\n\\\nApplication/Server logs](/docs/aws-monitoring/ec2-logs/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
256 | "markdown": "EC2 Monitoring\n--------------\n\n[📄️ Application Server Logs\\\n\\\nThis guide provides detailed instructions on how to send...](/docs/aws-monitoring/ec2-logs/)\n[📄️ Infrastructure Metrics\\\n\\\nThis documentation guides you through integrating AWS EC2 infrastructure...](/docs/aws-monitoring/ec2-infra-metrics/)\n\n[Prev\\\n\\\nInfinite Retention using AWS S3](/docs/tutorial/infinite-retention-aws-s3/)\n[Next\\\n\\\nApplication/Server logs](/docs/aws-monitoring/ec2-logs/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
257 | "metadata": {
258 | "ogUrl": "https://signoz.io/docs/ec2-monitoring/",
259 | "title": "EC2 Monitoring | SigNoz",
260 | "robots": "index, follow",
261 | "ogTitle": "EC2 Monitoring",
262 | "ogLocale": "en_US",
263 | "sourceURL": "https://signoz.io/docs/ec2-monitoring",
264 | "ogSiteName": "SigNoz",
265 | "pageStatusCode": 200,
266 | "ogLocaleAlternate": []
267 | },
268 | "linksOnPage": [
269 | "https://signoz.io/",
270 | "https://signoz.io/docs/",
271 | "https://signoz.io/resource-center/blog/",
272 | "https://signoz.io/pricing/",
273 | "https://signoz.io/case-study/",
274 | "https://signoz.io/teams/",
275 | "https://signoz.io/docs/aws-monitoring/ec2-logs/",
276 | "https://signoz.io/docs/aws-monitoring/ec2-infra-metrics/",
277 | "https://signoz.io/docs/tutorial/infinite-retention-aws-s3/",
278 | "https://signoz.io/docs/contributing/",
279 | "https://knowledgebase.signoz.io/kb",
280 | "https://signoz.io/api_reference/",
281 | "https://signoz.io/support/",
282 | "https://signoz.io/slack",
283 | "https://twitter.com/SigNozHQ",
284 | "https://community-chat.signoz.io/",
285 | "https://signoz.io/changelog/",
286 | "https://signoz.io/product-comparison/signoz-vs-datadog/",
287 | "https://signoz.io/product-comparison/signoz-vs-newrelic/",
288 | "https://signoz.io/product-comparison/signoz-vs-grafana/",
289 | "https://signoz.io/product-comparison/signoz-vs-dynatrace/",
290 | "https://jobs.gem.com/signoz",
291 | "https://signoz.io/about-us/",
292 | "https://signoz.io/terms-of-service/",
293 | "https://signoz.io/privacy/",
294 | "https://trust.signoz.io/",
295 | "https://github.com/SigNoz",
296 | "https://www.linkedin.com/company/signozio/",
297 | "https://www.youtube.com/@signoz"
298 | ]
299 | },
300 | {
301 | "content": "ECS Monitoring\n--------------\n\n[📄️ EC2 / External\\\n\\\nTo monitor your ECS EC2 or external service...](/docs/aws-monitoring/ecs-ec2-external/)\n[📄️ Fargate\\\n\\\nTo monitor your ECS Fargate service, check out...](/docs/aws-monitoring/ecs-fargate/)\n\n[Prev\\\n\\\nInfrastructure Metrics](/docs/aws-monitoring/ec2-infra-metrics/)\n[Next\\\n\\\nEC2/External](/docs/aws-monitoring/ecs-ec2-external/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
302 | "markdown": "ECS Monitoring\n--------------\n\n[📄️ EC2 / External\\\n\\\nTo monitor your ECS EC2 or external service...](/docs/aws-monitoring/ecs-ec2-external/)\n[📄️ Fargate\\\n\\\nTo monitor your ECS Fargate service, check out...](/docs/aws-monitoring/ecs-fargate/)\n\n[Prev\\\n\\\nInfrastructure Metrics](/docs/aws-monitoring/ec2-infra-metrics/)\n[Next\\\n\\\nEC2/External](/docs/aws-monitoring/ecs-ec2-external/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
303 | "metadata": {
304 | "ogUrl": "https://signoz.io/docs/ecs-monitoring/",
305 | "title": "ECS Monitoring | SigNoz",
306 | "robots": "index, follow",
307 | "ogTitle": "ECS Monitoring",
308 | "ogLocale": "en_US",
309 | "sourceURL": "https://signoz.io/docs/ecs-monitoring",
310 | "ogSiteName": "SigNoz",
311 | "pageStatusCode": 200,
312 | "ogLocaleAlternate": []
313 | },
314 | "linksOnPage": [
315 | "https://signoz.io/",
316 | "https://signoz.io/docs/",
317 | "https://signoz.io/resource-center/blog/",
318 | "https://signoz.io/pricing/",
319 | "https://signoz.io/case-study/",
320 | "https://signoz.io/teams/",
321 | "https://signoz.io/docs/aws-monitoring/ecs-ec2-external/",
322 | "https://signoz.io/docs/aws-monitoring/ecs-fargate/",
323 | "https://signoz.io/docs/aws-monitoring/ec2-infra-metrics/",
324 | "https://signoz.io/docs/contributing/",
325 | "https://knowledgebase.signoz.io/kb",
326 | "https://signoz.io/api_reference/",
327 | "https://signoz.io/support/",
328 | "https://signoz.io/slack",
329 | "https://twitter.com/SigNozHQ",
330 | "https://community-chat.signoz.io/",
331 | "https://signoz.io/changelog/",
332 | "https://signoz.io/product-comparison/signoz-vs-datadog/",
333 | "https://signoz.io/product-comparison/signoz-vs-newrelic/",
334 | "https://signoz.io/product-comparison/signoz-vs-grafana/",
335 | "https://signoz.io/product-comparison/signoz-vs-dynatrace/",
336 | "https://jobs.gem.com/signoz",
337 | "https://signoz.io/about-us/",
338 | "https://signoz.io/terms-of-service/",
339 | "https://signoz.io/privacy/",
340 | "https://trust.signoz.io/",
341 | "https://github.com/SigNoz",
342 | "https://www.linkedin.com/company/signozio/",
343 | "https://www.youtube.com/@signoz"
344 | ]
345 | },
346 | {
347 | "content": "FAQs\n----\n\nFind the most commonly questions about SigNoz Installation, Instrumentation, Features, Troubleshooting, and Contributing here:\n\n[📄️ Product - FAQs\\\n\\\nFrequently asked question about Product](/docs/faqs/product)\n[📄️ Troubleshooting - FAQs\\\n\\\nFrequently asked question about Troubleshooting](/docs/faqs/troubleshooting)\n[📄️ Instrumentation - FAQs\\\n\\\nFrequently asked question about Instrumentation](/docs/faqs/instrumentation)\n[📄️ Installation - FAQs\\\n\\\nFrequently asked question about Installation](/docs/faqs/installation)\n\n[Prev\\\n\\\nCommunity Channels](/docs/community/)\n[Next\\\n\\\nGeneral Troubleshooting](/docs/troubleshooting/signoz-cloud/general-troubleshooting/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
348 | "markdown": "FAQs\n----\n\nFind the most commonly questions about SigNoz Installation, Instrumentation, Features, Troubleshooting, and Contributing here:\n\n[📄️ Product - FAQs\\\n\\\nFrequently asked question about Product](/docs/faqs/product)\n[📄️ Troubleshooting - FAQs\\\n\\\nFrequently asked question about Troubleshooting](/docs/faqs/troubleshooting)\n[📄️ Instrumentation - FAQs\\\n\\\nFrequently asked question about Instrumentation](/docs/faqs/instrumentation)\n[📄️ Installation - FAQs\\\n\\\nFrequently asked question about Installation](/docs/faqs/installation)\n\n[Prev\\\n\\\nCommunity Channels](/docs/community/)\n[Next\\\n\\\nGeneral Troubleshooting](/docs/troubleshooting/signoz-cloud/general-troubleshooting/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
349 | "metadata": {
350 | "ogUrl": "https://signoz.io/docs/faqs/",
351 | "title": "FAQs | SigNoz",
352 | "robots": "index, follow",
353 | "ogTitle": "FAQs",
354 | "ogLocale": "en_US",
355 | "sourceURL": "https://signoz.io/docs/faqs",
356 | "ogSiteName": "SigNoz",
357 | "pageStatusCode": 200,
358 | "ogLocaleAlternate": []
359 | },
360 | "linksOnPage": [
361 | "https://signoz.io/",
362 | "https://signoz.io/docs/",
363 | "https://signoz.io/resource-center/blog/",
364 | "https://signoz.io/pricing/",
365 | "https://signoz.io/case-study/",
366 | "https://signoz.io/teams/",
367 | "https://signoz.io/docs/faqs/product",
368 | "https://signoz.io/docs/faqs/troubleshooting",
369 | "https://signoz.io/docs/faqs/instrumentation",
370 | "https://signoz.io/docs/faqs/installation",
371 | "https://signoz.io/docs/community/",
372 | "https://signoz.io/docs/troubleshooting/signoz-cloud/general-troubleshooting/",
373 | "https://signoz.io/docs/contributing/",
374 | "https://knowledgebase.signoz.io/kb",
375 | "https://signoz.io/api_reference/",
376 | "https://signoz.io/support/",
377 | "https://signoz.io/slack",
378 | "https://twitter.com/SigNozHQ",
379 | "https://community-chat.signoz.io/",
380 | "https://signoz.io/changelog/",
381 | "https://signoz.io/product-comparison/signoz-vs-datadog/",
382 | "https://signoz.io/product-comparison/signoz-vs-newrelic/",
383 | "https://signoz.io/product-comparison/signoz-vs-grafana/",
384 | "https://signoz.io/product-comparison/signoz-vs-dynatrace/",
385 | "https://jobs.gem.com/signoz",
386 | "https://signoz.io/about-us/",
387 | "https://signoz.io/terms-of-service/",
388 | "https://signoz.io/privacy/",
389 | "https://trust.signoz.io/",
390 | "https://github.com/SigNoz",
391 | "https://www.linkedin.com/company/signozio/",
392 | "https://www.youtube.com/@signoz"
393 | ]
394 | },
395 | {
396 | "content": "Install SigNoz\n--------------\n\nTo install SigNoz, follow the instructions in the sections below. You can either self-host SigNoz or try SigNoz Cloud.\n\n[📄️ Setup SigNoz Cloud\\\n\\\nEasy way to get started with SigNoz](/docs/cloud/)\n[📄️ Self Host SigNoz\\\n\\\nDocker, Docker Swarm, Kubernetes](/docs/install/docker)\n[📄️ Install OTel Collector\\\n\\\nVM, Kubernetes](/docs/tutorial/opentelemetry-binary-usage-in-virtual-machine)\n\n[Prev\\\n\\\nWhat is SigNoz?](/docs/introduction/)\n[Next\\\n\\\nSetup SigNoz Cloud](/docs/cloud/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
397 | "markdown": "Install SigNoz\n--------------\n\nTo install SigNoz, follow the instructions in the sections below. You can either self-host SigNoz or try SigNoz Cloud.\n\n[📄️ Setup SigNoz Cloud\\\n\\\nEasy way to get started with SigNoz](/docs/cloud/)\n[📄️ Self Host SigNoz\\\n\\\nDocker, Docker Swarm, Kubernetes](/docs/install/docker)\n[📄️ Install OTel Collector\\\n\\\nVM, Kubernetes](/docs/tutorial/opentelemetry-binary-usage-in-virtual-machine)\n\n[Prev\\\n\\\nWhat is SigNoz?](/docs/introduction/)\n[Next\\\n\\\nSetup SigNoz Cloud](/docs/cloud/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
398 | "metadata": {
399 | "ogUrl": "https://signoz.io/docs/install/",
400 | "title": "Install SigNoz | SigNoz",
401 | "robots": "index, follow",
402 | "ogTitle": "Install SigNoz",
403 | "ogLocale": "en_US",
404 | "sourceURL": "https://signoz.io/docs/install",
405 | "ogSiteName": "SigNoz",
406 | "description": "Install SigNoz Cloud or Self-Host.",
407 | "ogDescription": "Install SigNoz Cloud or Self-Host.",
408 | "pageStatusCode": 200,
409 | "ogLocaleAlternate": []
410 | },
411 | "linksOnPage": [
412 | "https://signoz.io/",
413 | "https://signoz.io/docs/",
414 | "https://signoz.io/resource-center/blog/",
415 | "https://signoz.io/pricing/",
416 | "https://signoz.io/case-study/",
417 | "https://signoz.io/teams/",
418 | "https://signoz.io/docs/cloud/",
419 | "https://signoz.io/docs/install/docker",
420 | "https://signoz.io/docs/tutorial/opentelemetry-binary-usage-in-virtual-machine",
421 | "https://signoz.io/docs/introduction/",
422 | "https://signoz.io/docs/contributing/",
423 | "https://knowledgebase.signoz.io/kb",
424 | "https://signoz.io/api_reference/",
425 | "https://signoz.io/support/",
426 | "https://signoz.io/slack",
427 | "https://twitter.com/SigNozHQ",
428 | "https://community-chat.signoz.io/",
429 | "https://signoz.io/changelog/",
430 | "https://signoz.io/product-comparison/signoz-vs-datadog/",
431 | "https://signoz.io/product-comparison/signoz-vs-newrelic/",
432 | "https://signoz.io/product-comparison/signoz-vs-grafana/",
433 | "https://signoz.io/product-comparison/signoz-vs-dynatrace/",
434 | "https://jobs.gem.com/signoz",
435 | "https://signoz.io/about-us/",
436 | "https://signoz.io/terms-of-service/",
437 | "https://signoz.io/privacy/",
438 | "https://trust.signoz.io/",
439 | "https://github.com/SigNoz",
440 | "https://www.linkedin.com/company/signozio/",
441 | "https://www.youtube.com/@signoz"
442 | ]
443 | },
444 | {
445 | "content": "Instrument your Application\n---------------------------\n\nTo instrument your applications and send data to SigNoz, follow the instructions in the sections below.\n\n[📄️ Python\\\n\\\nSend events from you Python application to SigNoz.](/docs/instrumentation/python)\n[📄️ Java\\\n\\\nSend events from you Java application to SigNoz.](/docs/instrumentation/java)\n[📄️ Javascript\\\n\\\nSend events from you Javascript application to SigNoz.](/docs/instrumentation/javascript)\n[📄️ Golang (Go)\\\n\\\nSend events from you Golang (Go) application to SigNoz.](/docs/instrumentation/golang)\n[📄️ PHP\\\n\\\nSend events from you PHP application to SigNoz.](/docs/instrumentation/php)\n[📄️ .NET\\\n\\\nSend events from you .NET application to SigNoz.](/docs/instrumentation/dotnet)\n[📄️ Ruby on Rails\\\n\\\nSend events from you Ruby on Rails application to SigNoz.](/docs/instrumentation/ruby-on-rails)\n[📄️ Elixir\\\n\\\nSend events from you Elixir application to SigNoz.](/docs/instrumentation/elixir)\n[📄️ Rust\\\n\\\nSend events from you Rust application to SigNoz.](/docs/instrumentation/rust)\n[📄️ Swift\\\n\\\nSend events from you Swift application to SigNoz.](/docs/instrumentation/swift)\n\n[Prev\\\n\\\nGet Started](/docs/instrumentation/overview/)\n[Next\\\n\\\nPython](/docs/instrumentation/python/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
446 | "markdown": "Instrument your Application\n---------------------------\n\nTo instrument your applications and send data to SigNoz, follow the instructions in the sections below.\n\n[📄️ Python\\\n\\\nSend events from you Python application to SigNoz.](/docs/instrumentation/python)\n[📄️ Java\\\n\\\nSend events from you Java application to SigNoz.](/docs/instrumentation/java)\n[📄️ Javascript\\\n\\\nSend events from you Javascript application to SigNoz.](/docs/instrumentation/javascript)\n[📄️ Golang (Go)\\\n\\\nSend events from you Golang (Go) application to SigNoz.](/docs/instrumentation/golang)\n[📄️ PHP\\\n\\\nSend events from you PHP application to SigNoz.](/docs/instrumentation/php)\n[📄️ .NET\\\n\\\nSend events from you .NET application to SigNoz.](/docs/instrumentation/dotnet)\n[📄️ Ruby on Rails\\\n\\\nSend events from you Ruby on Rails application to SigNoz.](/docs/instrumentation/ruby-on-rails)\n[📄️ Elixir\\\n\\\nSend events from you Elixir application to SigNoz.](/docs/instrumentation/elixir)\n[📄️ Rust\\\n\\\nSend events from you Rust application to SigNoz.](/docs/instrumentation/rust)\n[📄️ Swift\\\n\\\nSend events from you Swift application to SigNoz.](/docs/instrumentation/swift)\n\n[Prev\\\n\\\nGet Started](/docs/instrumentation/overview/)\n[Next\\\n\\\nPython](/docs/instrumentation/python/)\n\nDocs\n\n[Introduction](/docs/)\n[Contributing](/docs/contributing/)\n\n[Knowledge Base](https://knowledgebase.signoz.io/kb)\n\n[SigNoz API](/api_reference/)\n\nCommunity\n\n[Support](/support/)\n\n[Slack](https://signoz.io/slack)\n\n[Twitter](https://twitter.com/SigNozHQ)\n\n[Community Archive](https://community-chat.signoz.io/)\n\n[Changelog](/changelog/)\n\nMore\n\n[SigNoz vs Datadog](/product-comparison/signoz-vs-datadog/)\n[SigNoz vs New Relic](/product-comparison/signoz-vs-newrelic/)\n[SigNoz vs Grafana](/product-comparison/signoz-vs-grafana/)\n[SigNoz vs Dynatrace](/product-comparison/signoz-vs-dynatrace/)\n\n[Careers](https://jobs.gem.com/signoz)\n\n[About](/about-us/)\n[Terms](/terms-of-service/)\n[Privacy](/privacy/)\n[Security & Compliance](https://trust.signoz.io/)\n\n\n\nSigNoz\n\nAll systems operational\n\n[](https://github.com/SigNoz)\n[](https://www.linkedin.com/company/signozio/)\n[](https://signoz.io/slack)\n[](https://twitter.com/SigNozHQ)\n[](https://www.youtube.com/@signoz)\n\n",
447 | "metadata": {
448 | "ogUrl": "https://signoz.io/docs/instrumentation/",
449 | "title": "Instrument your Application | SigNoz",
450 | "robots": "index, follow",
451 | "ogTitle": "Instrument your Application",
452 | "ogLocale": "en_US",
453 | "sourceURL": "https://signoz.io/docs/instrumentation",
454 | "ogSiteName": "SigNoz",
455 | "pageStatusCode": 200,
456 | "ogLocaleAlternate": []
457 | },
458 | "linksOnPage": [
459 | "https://signoz.io/",
460 | "https://signoz.io/docs/",
461 | "https://signoz.io/resource-center/blog/",
462 | "https://signoz.io/pricing/",
463 | "https://signoz.io/case-study/",
464 | "https://signoz.io/teams/",
465 | "https://signoz.io/docs/instrumentation/python",
466 | "https://signoz.io/docs/instrumentation/java",
467 | "https://signoz.io/docs/instrumentation/javascript",
468 | "https://signoz.io/docs/instrumentation/golang",
469 | "https://signoz.io/docs/instrumentation/php",
470 | "https://signoz.io/docs/instrumentation/dotnet",
471 | "https://signoz.io/docs/instrumentation/ruby-on-rails",
472 | "https://signoz.io/docs/instrumentation/elixir",
473 | "https://signoz.io/docs/instrumentation/rust",
474 | "https://signoz.io/docs/instrumentation/swift",
475 | "https://signoz.io/docs/instrumentation/overview/",
476 | "https://signoz.io/docs/instrumentation/python/",
477 | "https://signoz.io/docs/contributing/",
478 | "https://knowledgebase.signoz.io/kb",
479 | "https://signoz.io/api_reference/",
480 | "https://signoz.io/support/",
481 | "https://signoz.io/slack",
482 | "https://twitter.com/SigNozHQ",
483 | "https://community-chat.signoz.io/",
484 | "https://signoz.io/changelog/",
485 | "https://signoz.io/product-comparison/signoz-vs-datadog/",
486 | "https://signoz.io/product-comparison/signoz-vs-newrelic/",
487 | "https://signoz.io/product-comparison/signoz-vs-grafana/",
488 | "https://signoz.io/product-comparison/signoz-vs-dynatrace/",
489 | "https://jobs.gem.com/signoz",
490 | "https://signoz.io/about-us/",
491 | "https://signoz.io/terms-of-service/",
492 | "https://signoz.io/privacy/",
493 | "https://trust.signoz.io/",
494 | "https://github.com/SigNoz",
495 | "https://www.linkedin.com/company/signozio/",
496 | "https://www.youtube.com/@signoz"
497 | ]
498 | }
499 | ]
--------------------------------------------------------------------------------
/node/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | presets: [['@babel/preset-env', {targets: {node: 'current'}}]],
3 | };
--------------------------------------------------------------------------------
/node/cleaners.js:
--------------------------------------------------------------------------------
1 | const cleanDoubleNewlineMarkdownLinks = (text) => {
2 | /**
3 | * Remove double newlines with backslashes in markdown links within a file
4 | *
5 | * This function cleans up markdown-style links that contain double newlines
6 | * with backslashes, while preserving emojis and the overall link structure.
7 | * It processes the entire file, only modifying the specific markdown links.
8 | *
9 | * @param {string} text - The input text to clean
10 | * @returns {string} The cleaned text
11 | */
12 | const replaceLink = (match, fullContent, url) => {
13 | const cleanedContent = fullContent.replace(/\\\s*\n\s*\\\s*\n\s*/g, ' ');
14 | return `[${cleanedContent}](${url})`;
15 | };
16 |
17 | const pattern = /\[(.*?\\\s*\n\s*\\\s*\n\s*.*?)\]\((.*?)\)/gs;
18 | return text.replace(pattern, replaceLink);
19 | };
20 |
21 | const cleanAnchortagHeadings = (text) => {
22 | /**
23 | * Replace anchortag headings with h2 tags in markdown text.
24 | *
25 | * This function replaces markdown-style anchortag headings with h2 tags.
26 | * It processes the entire text, modifying only the specific heading patterns.
27 | *
28 | * @param {string} text - The input text to clean
29 | * @returns {string} The cleaned text
30 | */
31 | return text.replace(/\[\]\((#.*?)\)\n(.*?)(?=\n|$)/g, '## $2');
32 | };
33 |
34 | const cleanDoubleAsteriskWhitespaceGaps = (text) => {
35 | /**
36 | * When double asterisks appear immediately before the start of a link,
37 | * they may appear after the link after a newline.
38 | *
39 | * Example:
40 | * ```
41 | * **[Use the all-in-one auto-instrumentation library(Recommended)](#using-the-all-in-one-auto-instrumentation-library)\n **
42 | * ```
43 | *
44 | * This function processes the entire text, replacing the whitespace before the second double asterisk pair
45 | * with an empty string.
46 | *
47 | * @param {string} text - The input text to clean
48 | * @returns {string} The cleaned text
49 | */
50 | return text.replace(/\*\*(\[.*\]\(.*\))\n\s*\*\*/g, '**$1**');
51 | };
52 |
53 |
54 | const cleanNewlineAndSpacesAfterLinks = (text) => {
55 | /**
56 | * Remove newline after links followed by lowercase characters in markdown.
57 | *
58 | * This function removes a newline and trims spaces to one when a markdown
59 | * link is immediately followed by a newline, arbitrary spaces, and then
60 | * a lowercase character.
61 | *
62 | * @param {string} text - The input text to clean
63 | * @returns {string} The cleaned text
64 | */
65 | // This regex replaces a pattern where a markdown link is followed by a newline,
66 | // optional whitespace, and then a lowercase letter.
67 | // The replacement keeps the link ($1), adds a single space,
68 | // and preserves the content that follows ($2).
69 | // This ensures we keep the subsequent text "alive" in the output.
70 | return text.replace(/(\[.*?\]\(.*?\))\n\s*([a-z].*)/g, '$1 $2');
71 | };
72 |
73 |
74 |
75 | const cleanMultiColumnLinks = (markdownText) => {
76 | /**
77 | * Spread multi-column links in markdown text into a list format.
78 | *
79 | * This function takes markdown text containing paragraphs with multiple links
80 | * and transforms them into a list format, with each link on a new line.
81 | *
82 | * @param {string} markdownText - The input markdown text to clean
83 | * @returns {string} The cleaned markdown text
84 | */
85 | const linkPattern = /(\n\n)(\[(?:[^\]]+\\\s*)+[^\]]+\]\([^\)]+\)(?:\s*\[(?:[^\]]+\\\s*)+[^\]]+\]\([^\)]+\))*)\s*(?=$|\n\n)/g;
86 |
87 | const cleanLinks = (match, newlines, links) => {
88 | const cleanedLinks = links.match(/\[([^\]]+)\]\(([^\)]+)\)/g).map(link => {
89 | const [, linkText, linkUrl] = link.match(/\[([^\]]+)\]\(([^\)]+)\)/);
90 | const cleanText = linkText.replace(/\\\s*\n\s*\\\s*\n\s*/g, ': ')
91 | .replace(/\s*\\\s*\n\s*/g, ' ')
92 | .replace(/\\ \\ /g, ': ')
93 | .trim();
94 | return `- [${cleanText}](${linkUrl})`;
95 | });
96 | return newlines + cleanedLinks.join('\n');
97 | };
98 |
99 | return markdownText.replace(linkPattern, cleanLinks).trim();
100 | };
101 |
102 |
103 |
104 | const cleanExtraNewlinesAfterLinks = (text) => {
105 | /**
106 | * Remove extra newlines after links in markdown text.
107 | *
108 | * This function addresses two common cases:
109 | * 1. Removes a newline between a link and a period.
110 | * 2. Removes a newline immediately after a link when followed by a space.
111 | *
112 | * @param {string} text - The input text to clean
113 | * @returns {string} The cleaned text with extra newlines after links removed
114 | */
115 | // Remove newline between link and period
116 | text = text.replace(/(\[.*?\]\(.*?\))\n\./g, '$1.');
117 |
118 | // Remove newline immediately after link when it is followed by a space
119 | text = text.replace(/(\[.*?\]\(.*?\))\n (?=\S)/g, '$1 ');
120 |
121 | return text;
122 | };
123 |
124 |
125 | const removeEndMatter = (text) => {
126 | /**
127 | * Remove end matter including "[](#get-help)", "[Prev", and specific help text.
128 | *
129 | * This function finds the earliest occurrence of "[](#get-help)", "[Prev",
130 | * or a specific help text, and removes everything from that point onwards.
131 | *
132 | * @param {string} text - The input text to clean
133 | * @returns {string} The cleaned text
134 | */
135 | const patterns = [
136 | /\[]\(#get-help\)/,
137 | /\[Prev/,
138 | /If you have any questions or need any help in setting things up, join our slack community and ping us in `#help` channel./
139 | ];
140 |
141 | const indices = patterns.map(pattern => text.search(pattern))
142 | .filter(index => index !== -1);
143 |
144 | if (indices.length > 0) {
145 | const removeIndex = Math.min(...indices);
146 | return text.slice(0, removeIndex).trim();
147 | }
148 |
149 | return text;
150 | };
151 |
152 | const cleaners = {
153 | cleanDoubleNewlineMarkdownLinks,
154 | cleanAnchortagHeadings,
155 | cleanExtraNewlinesAfterLinks,
156 | removeEndMatter,
157 | cleanMultiColumnLinks,
158 | cleanNewlineAndSpacesAfterLinks,
159 | cleanDoubleAsteriskWhitespaceGaps
160 | };
161 |
162 | export default cleaners;
--------------------------------------------------------------------------------
/node/cleaners.test.js:
--------------------------------------------------------------------------------
1 | import cleaners from './cleaners';
2 |
3 | describe('cleaners', () => {
4 | describe('cleanDoubleNewlineMarkdownLinks', () => {
5 | it('should clean double newlines in markdown links', () => {
6 | const input = 'Some text\n[📄️ Metrics\\\n\\\nTo monitor...](/docs/metrics/)\nMore text';
7 | const expected = 'Some text\n[📄️ Metrics To monitor...](/docs/metrics/)\nMore text';
8 | expect(cleaners.cleanDoubleNewlineMarkdownLinks(input)).toBe(expected);
9 | });
10 | });
11 |
12 | describe('cleanAnchortagHeadings', () => {
13 | it('should replace anchortag headings with h2 tags', () => {
14 | const input = 'Some text\n[](#heading1)\nHeading 1\nMore text\n[](#heading2)\nHeading 2';
15 | const expected = 'Some text\n## Heading 1\nMore text\n## Heading 2';
16 | expect(cleaners.cleanAnchortagHeadings(input)).toBe(expected);
17 | });
18 | });
19 |
20 | describe('removeEndMatter', () => {
21 | it('should remove end matter starting with [](#get-help)', () => {
22 | const input = 'Some content\n[](#get-help)\nGet help here\n[Prev]\nPrevious content';
23 | const expected = 'Some content';
24 | expect(cleaners.removeEndMatter(input)).toBe(expected);
25 | });
26 |
27 | it('should remove end matter starting with specific help text', () => {
28 | const input = 'Main text\nIf you have any questions or need any help in setting things up, join our slack community and ping us in `#help` channel.\nMore text';
29 | const expected = 'Main text';
30 | expect(cleaners.removeEndMatter(input)).toBe(expected);
31 | });
32 |
33 | it('should not remove content if no end matter is found', () => {
34 | const input = 'Only main content even if help mentioned';
35 | const expected = 'Only main content even if help mentioned';
36 | expect(cleaners.removeEndMatter(input)).toBe(expected);
37 | });
38 | });
39 | });
--------------------------------------------------------------------------------
/node/firecrawl_to_trieve_config.js:
--------------------------------------------------------------------------------
1 | // This prepares the user configurations
2 | // It looks at the .env to see if there are multiple dataset IDs
3 | // It then provides configs to the transform and load scripts to appropriately:
4 | // 1. write to
5 | // 2. read from
6 | // 3. ingest to
7 |
8 | import fs from 'fs';
9 | import path from 'path';
10 | import dotenv from 'dotenv';
11 | import { fileURLToPath } from 'url';
12 | import readline from 'readline';
13 |
14 | const __filename = fileURLToPath(import.meta.url);
15 | const __dirname = path.dirname(__filename);
16 |
17 | dotenv.config({ path: path.resolve(__dirname, '../.env') });
18 |
19 | const CONFIGS = {
20 | boost: false,
21 | maxWords: 500,
22 | maxDepth: 3,
23 | semanticBoostDistanceFactor: 0.5,
24 | fulltextBoostFactor: 5,
25 | rootUrl: 'https://signoz.io/'
26 | };
27 |
28 | function getDatasetOptions() {
29 | const envPath = path.resolve(__dirname, '../.env');
30 | const envContent = fs.readFileSync(envPath, 'utf8');
31 | return envContent.split('\n')
32 | .filter(line => line.startsWith('TRIEVE_DATASET_ID'))
33 | .map(line => line.split('=')[0].trim());
34 | }
35 |
36 | async function chooseDataset() {
37 | const datasetOptions = getDatasetOptions();
38 |
39 | if (datasetOptions.length === 1) {
40 | return datasetOptions[0];
41 | } else if (datasetOptions.length > 1) {
42 | console.log('Available dataset options:');
43 | datasetOptions.forEach((option, index) => {
44 | console.log(`${index + 1}. ${option}`);
45 | });
46 |
47 | const rl = readline.createInterface({
48 | input: process.stdin,
49 | output: process.stdout
50 | });
51 |
52 | return new Promise((resolve) => {
53 | rl.question('Choose a dataset (enter the number): ', (answer) => {
54 | const choice = parseInt(answer) - 1;
55 | if (choice >= 0 && choice < datasetOptions.length) {
56 | rl.close();
57 | resolve(datasetOptions[choice]);
58 | } else {
59 | console.log('Invalid choice. Exiting.');
60 | process.exit(1);
61 | }
62 | });
63 | });
64 | } else {
65 | console.log('No TRIEVE_DATASET_ID found in .env file. Exiting.');
66 | process.exit(1);
67 | }
68 | }
69 |
70 | function getLatestChunksFile() {
71 | return fs.readdirSync(__dirname)
72 | .filter(f => f.startsWith('chunks') && f.endsWith('.json') &&
73 | !f.endsWith('boost.json'))
74 | .sort((a, b) => {
75 | const timeA = fs.statSync(path.join(__dirname, a)).mtime.getTime();
76 | const timeB = fs.statSync(path.join(__dirname, b)).mtime.getTime();
77 | return timeB - timeA;
78 | })[0];
79 | }
80 |
81 | function getLatestCrawlFileAndTimestamp() {
82 | const crawlFiles = fs.readdirSync(__dirname)
83 | .filter(f => f.startsWith('crawl_results') && f.endsWith('.json'))
84 | .sort((a, b) => b.localeCompare(a));
85 |
86 | console.log('Found crawl files:', crawlFiles);
87 |
88 | const latestFile = crawlFiles[0] || '';
89 |
90 | if (!latestFile) {
91 | return { file: '', timestamp: '' };
92 | }
93 |
94 | const timestampMatch = latestFile.match(/(\d{4}-\d{2}-\d{2}[T_]\d{2}-\d{2}-\d{2})/);
95 | const timestamp = timestampMatch ? timestampMatch[1].replace('T', '_') : '';
96 | console.log('Latest crawl file:', latestFile);
97 | console.log('Timestamp:', timestamp);
98 | return { file: latestFile, timestamp };
99 | }
100 |
101 | function getConfiguration(datasetName) {
102 | return {
103 | trieveAPIKey: process.env.TRIEVE_API_KEY,
104 | trieveAPIBasePath: "https://api.trieve.ai",
105 | trieveDatasetId: process.env[datasetName],
106 | trieveDatasetName: datasetName
107 | };
108 | }
109 |
110 | const configs = {
111 | CONFIGS,
112 | chooseDataset,
113 | getLatestChunksFile,
114 | getLatestCrawlFileAndTimestamp,
115 | getConfiguration
116 | };
117 |
118 | export default configs;
--------------------------------------------------------------------------------
/node/jest.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 | transform: {},
3 | extensionsToTreatAsEsm: [],
4 | moduleFileExtensions: ['js', 'mjs'],
5 | testEnvironment: 'node'
6 | };
--------------------------------------------------------------------------------
/node/load.js:
--------------------------------------------------------------------------------
1 | import fs from 'fs/promises';
2 | import axios from 'axios';
3 | import configs from './firecrawl_to_trieve_config.js';
4 |
5 | const MOCK_MODE = true;
6 | const BATCH_SIZE = 120;
7 |
8 | async function loadChunks(chunks, config, upsert = false) {
9 | const url = `${config.trieveAPIBasePath}/api/chunk`;
10 | const headers = {
11 | "TR-Dataset": config.trieveDatasetId,
12 | "Authorization": `Bearer ${config.trieveAPIKey}`,
13 | "Content-Type": "application/json"
14 | };
15 |
16 | chunks.forEach(chunk => chunk.upsert_by_tracking_id = upsert);
17 |
18 | if (MOCK_MODE) {
19 | console.log(`MOCK: Would ${upsert ? 'upsert' : 'create'} batch of ` +
20 | `${chunks.length} chunks to ${config.trieveDatasetName}`);
21 | return { mock: true };
22 | }
23 |
24 | try {
25 | const { data } = await axios.post(url, chunks, { headers });
26 | console.log(`Successfully ${upsert ? 'upserted' : 'created'} batch of ` +
27 | `${chunks.length} chunks to ${config.trieveDatasetName}`);
28 | return data;
29 | } catch (error) {
30 | console.error(`Failed to ${upsert ? 'upsert' : 'create'} batch. ` +
31 | `Status: ${error.response?.status}, ` +
32 | `Data: ${JSON.stringify(error.response?.data)}, ` +
33 | `Message: ${error.message}`);
34 | throw error;
35 | }
36 | }
37 |
38 | async function processChunks(chunks, config, operation) {
39 | const totalChunks = chunks.length;
40 | let processedChunks = 0;
41 |
42 | for (let i = 0; i < totalChunks; i += BATCH_SIZE) {
43 | const batch = chunks.slice(i, i + BATCH_SIZE);
44 | await loadChunks(batch, config, operation === '-upsert');
45 | processedChunks += batch.length;
46 | console.log(`Processed ${processedChunks}/${totalChunks} chunks`);
47 | }
48 | }
49 |
50 | async function run() {
51 | const { chooseDataset, getLatestChunksFile, getConfiguration } = configs;
52 |
53 | const [,, operation] = process.argv;
54 | if (operation !== '-create' && operation !== '-upsert') {
55 | console.error('Usage: node load.js [-create|-upsert]');
56 | process.exit(1);
57 | }
58 |
59 | const datasetName = await chooseDataset();
60 | const chunkFilename = getLatestChunksFile();
61 | const config = getConfiguration(datasetName);
62 |
63 | const chunksData = await fs.readFile(chunkFilename, 'utf8');
64 | const chunks = JSON.parse(chunksData);
65 |
66 | await processChunks(chunks, config, operation);
67 | }
68 |
69 | run().catch(error => {
70 | console.error('An error occurred:', error);
71 | process.exit(1);
72 | });
73 |
--------------------------------------------------------------------------------
/node/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "firecrawl-to-trieve",
3 | "version": "1.0.0",
4 | "license": "MIT",
5 | "description": "Demonstration of a Firecrawl-to-Trieve crawling-to-search pipeline",
6 | "type": "module",
7 | "scripts": {
8 | "crawl": "node runFirecrawl.js",
9 | "transform": "node transform.js",
10 | "load:create": "node load.js -create",
11 | "load:upsert": "node load.js -upsert",
12 | "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js"
13 | },
14 | "dependencies": {
15 | "@mendable/firecrawl-js": "^0.0.36",
16 | "dotenv": "^16.0.0",
17 | "jest": "^29.7.0",
18 | "markdown-it": "^14.1.0"
19 | },
20 | "devDependencies": {
21 | "@babel/core": "^7.25.2",
22 | "@babel/preset-env": "^7.25.3",
23 | "babel-jest": "^29.7.0"
24 | }
25 | }
--------------------------------------------------------------------------------
/node/runFirecrawl.js:
--------------------------------------------------------------------------------
1 | // Firecrawl Docs: https://docs.firecrawl.dev/features/crawl
2 | // Firecrawl Github: https://github.com/mendableai/firecrawl/tree/main/apps/js-sdk
3 | import FirecrawlApp from '@mendable/firecrawl-js';
4 | import dotenv from 'dotenv';
5 | import { fileURLToPath } from 'url';
6 | import path from 'path';
7 | import fs from 'fs';
8 |
9 | const __filename = fileURLToPath(import.meta.url);
10 | const __dirname = path.dirname(__filename);
11 |
12 | dotenv.config({ path: path.resolve(__dirname, '../.env') });
13 |
14 | // Initialize the FirecrawlApp with your API key
15 | const app = new FirecrawlApp({ apiKey: process.env.FIRECRAWL_API_KEY });
16 |
17 | // Define crawl parameters
18 | const crawlUrl = 'https://signoz.io/docs/';
19 | const params = {
20 | crawlerOptions: {
21 | limit: 10,
22 | maxDepth: 10,
23 | includes: ['docs/*'],
24 | },
25 | pageOptions: {
26 | onlyMainContent: true
27 | }
28 | };
29 |
30 | // Crawl the website
31 | const crawlResult = await app.crawlUrl(
32 | crawlUrl,
33 | params,
34 | true, // wait_until_done
35 | 2 // poll_interval
36 | );
37 |
38 | // Save the crawl result to a file (with a timestamp)
39 | const timestamp = new Date().toISOString().replace(/:/g, '-').slice(0, -5);
40 | fs.writeFileSync(`crawl_results_${timestamp}.json`,
41 | JSON.stringify(crawlResult, null, 2));
--------------------------------------------------------------------------------
/node/transform.js:
--------------------------------------------------------------------------------
1 | import fs from 'fs';
2 | import { URL } from 'url';
3 | import cleaners from './cleaners.js';
4 | import MarkdownIt from 'markdown-it';
5 | import path from 'path';
6 | import configs from './firecrawl_to_trieve_config.js';
7 |
8 | const MOCK_MODE = false;
9 |
10 | // Use CONFIGS from firecrawl_to_trieve_config.js
11 | const CONFIGS = configs.CONFIGS;
12 |
13 | const markdown = new MarkdownIt();
14 | const chunks = [];
15 |
16 | function getTags(url) {
17 | const parsedUrl = new URL(url);
18 | const pathParts = parsedUrl.pathname.split('/');
19 | if (pathParts.includes('docs')) {
20 | const docsIndex = pathParts.indexOf('docs');
21 | // Return all non-empty tags after 'docs' except the last one
22 | return pathParts.slice(docsIndex + 1, -1).filter(tag => tag);
23 | }
24 | return [];
25 | }
26 |
27 | function getImages(markdownContent) {
28 | const imagePattern = /!\[.*?\]\((.*?\.(?:png|webp))\)/g;
29 | const matches = markdownContent.match(imagePattern);
30 | return matches ? matches.map(match => match.match(/\((.*?)\)/)[1]) : [];
31 | }
32 |
33 | function getTrackingId(url) {
34 | return url.replace('https://signoz.io/', '')
35 | .replace(/#/g, '-')
36 | .replace(/\s/g, '-')
37 | .replace(/:/g, '-')
38 | .replace(/\//g, '-')
39 | .replace(/--/g, '-')
40 | .trim()
41 | .replace(/^-|-$/g, '');
42 | }
43 |
44 | function getChunkHtml(content, pageTitle, headingText, startIndex, chunkEnd) {
45 | try {
46 | const chunkContent = chunkEnd !== null ?
47 | content.slice(startIndex, chunkEnd) :
48 | content.slice(startIndex);
49 | let chunkText = chunkContent.split('\n').join('\n').trim().replace(/^-+|-+$/g, '');
50 | let chunkHtml = chunkText.trim();
51 |
52 | chunkHtml = cleaners.cleanMultiColumnLinks(chunkHtml);
53 | chunkHtml = cleaners.cleanAnchortagHeadings(chunkHtml);
54 | chunkHtml = cleaners.cleanExtraNewlinesAfterLinks(chunkHtml);
55 | chunkHtml = cleaners.cleanDoubleAsteriskWhitespaceGaps(chunkHtml);
56 | chunkHtml = cleaners.cleanNewlineAndSpacesAfterLinks(chunkHtml);
57 |
58 |
59 | // Skip heading-only chunks
60 | if (chunkHtml.trim().split('\n').length <= 1) {
61 | return {"HEADING_ONLY": chunkHtml.trim()};
62 | }
63 |
64 | if (headingText === "") {
65 | chunkHtml = chunkHtml.trim().replace(/^-+|-+$/g, '');
66 | } else {
67 | const headingLine = `${pageTitle}: ${headingText}`;
68 | const lines = chunkHtml.split('\n');
69 | lines[0] = headingLine;
70 | chunkHtml = lines.join('\n');
71 | }
72 | return chunkHtml;
73 | } catch (e) {
74 | console.error(`Error processing chunk: ${e.message}`);
75 | throw e;
76 | }
77 | }
78 |
79 | function createChunkObject(chunkHtml, pageLink, headingLink, headingText, pageTagsSet,
80 | pageTitle, pageDescription) {
81 | const chunk = {
82 | chunk_html: chunkHtml,
83 | link: pageLink + headingLink,
84 | tags_set: pageTagsSet,
85 | image_urls: getImages(chunkHtml),
86 | tracking_id: getTrackingId(pageLink + headingLink),
87 | group_tracking_ids: [getTrackingId(pageLink)],
88 | timestamp: CONFIGS.TIMESTAMP,
89 | metadata: {
90 | title: pageTitle + (headingText.length > 0 ? ': ' + headingText : ''),
91 | page_title: pageTitle,
92 | page_description: pageDescription,
93 | }
94 | };
95 |
96 | if (CONFIGS.boost) {
97 | const boostPhrase = (pageTitle + " " + (headingText || '')).trim().replace(/^:\s+/, '');
98 | chunk.semantic_boost = {
99 | distance_factor: CONFIGS.semanticBoostDistanceFactor,
100 | phrase: boostPhrase
101 | };
102 | chunk.fulltext_boost = {
103 | boost_factor: CONFIGS.fulltextBoostFactor,
104 | phrase: boostPhrase
105 | };
106 | }
107 |
108 | return chunk;
109 | }
110 |
111 | export function headingMatches(content, pattern) {
112 | const matches = Array.from(content.matchAll(pattern));
113 | return matches.map(match => [
114 | match[2], // headingLink
115 | match[3], // headingText
116 | match[0] // heading symbols and link
117 | ]);
118 | }
119 |
120 | export function splitContent(content, pattern) {
121 | const matches = headingMatches(content, pattern);
122 | const sections = [];
123 |
124 | let preSplitContent = content;
125 | if (matches.length > 0) {
126 | let firstMatch = matches[0];
127 |
128 | if (firstMatch) {
129 | preSplitContent = content.split(firstMatch[2])[0];
130 | if (preSplitContent.trim().length > 0) {
131 | sections.push(["", "", preSplitContent]);
132 | }
133 | }
134 | }
135 |
136 | for (let i = 0; i < matches.length; i++) {
137 | const match = matches[i];
138 | const [headingLink, headingText, matchString] = match;
139 | const start = content.indexOf(matchString);
140 | let end;
141 |
142 | if (i < matches.length - 1) {
143 | end = content.indexOf(matches[i + 1][2]);
144 | } else {
145 | end = content.length;
146 | }
147 |
148 | if (start === -1 || end === -1) {
149 | throw new Error('Invalid start or end index when splitting content');
150 | }
151 |
152 | sections.push([headingLink, headingText, content.slice(start, end)]);
153 | }
154 |
155 | return sections;
156 | }
157 |
158 | function processContent(pageMarkdown, pageTitle, pageLink, pageTagsSet,
159 | pageDescription, maxWords = CONFIGS.maxWords, maxDepth = CONFIGS.maxDepth) {
160 |
161 |
162 |
163 | // Creates chunks from sections
164 | function createChunks(sections, currentTitle = '', depth = 0) {
165 | const localChunks = [];
166 | let isLastChunkHeadingOnly = false;
167 |
168 | sections.forEach(([headingLink, headingText, sectionContent]) => {
169 | if (isLastChunkHeadingOnly) {
170 | headingText = `${isLastChunkHeadingOnly} - ${headingText}`;
171 | isLastChunkHeadingOnly = false;
172 | }
173 |
174 | const chunkHtml = getChunkHtml(sectionContent, pageTitle, headingText, 0, null);
175 |
176 |
177 | try {
178 | isLastChunkHeadingOnly = chunkHtml["HEADING_ONLY"]
179 | } catch (TypeError) {
180 | isLastChunkHeadingOnly = false
181 | }
182 | if (isLastChunkHeadingOnly) {
183 | return; // Skip to the next iteration
184 | } else {
185 |
186 |
187 |
188 |
189 | const fullTitle = `${currentTitle}: ${headingText}`.replace(/^:\s+/, '');
190 | const chunkWordCount = chunkHtml.split(/\s+/).length;
191 | const isWithinChunkingConstrains = chunkWordCount <= maxWords || depth >= maxDepth;
192 | // true if the chunk is within the word limit or we've reached max depth.
193 | // we don't split further at max depth, even if over word limit
194 |
195 | if (isWithinChunkingConstrains) {
196 | // Create chunk if within word limit or max depth reached
197 | let chunk = createChunkObject(chunkHtml, pageLink, headingLink, headingText,
198 | pageTagsSet, pageTitle, pageDescription);
199 | chunk.metadata.title = fullTitle;
200 | localChunks.push(chunk);
201 | } else {
202 | // Try to split into subsections
203 | const subsections = splitContent(sectionContent,
204 | /(\\n###+ \\[\\]\\((#.*?)\\))\\n(.*?)\\n/g);
205 | // regex to find subsections of the current section
206 | if (subsections.length > 0) {
207 | localChunks.push(...createChunks(subsections, fullTitle, depth + 1));
208 | } else {
209 | // If no subsections, force create a chunk
210 | let chunk = createChunkObject(chunkHtml, pageLink, headingLink, headingText,
211 | pageTagsSet, pageTitle, pageDescription);
212 | chunk.metadata.title = fullTitle;
213 | localChunks.push(chunk);
214 | }
215 | }
216 | }
217 | });
218 |
219 | return localChunks;
220 | }
221 |
222 | const topSections = splitContent(pageMarkdown, /(\n\[\]\((#.*?)\))\n(.*?)\n/g);
223 | return createChunks(topSections, pageTitle);
224 | }
225 |
226 | async function main() {
227 | // Choose dataset
228 | const chosenDataset = await configs.chooseDataset();
229 | const configuration = configs.getConfiguration(chosenDataset);
230 |
231 | // Get the latest crawl results file
232 | const { file: crawlResultsFile, timestamp: TIMESTAMP } = configs.getLatestCrawlFileAndTimestamp();
233 |
234 | if (!crawlResultsFile) {
235 | console.error('No crawl results file found');
236 | process.exit(1);
237 | }
238 |
239 | // Load the crawl results
240 | const crawlResults = JSON.parse(fs.readFileSync(crawlResultsFile, 'utf8'));
241 | console.log(`Loaded ${crawlResults.length} crawl results from ${crawlResultsFile}`);
242 | // Iterate through each item in the crawl results
243 | crawlResults.forEach(item => {
244 | const url = item.metadata.ogUrl;
245 |
246 | // Skip pages with pageStatusCode != 200
247 | try {
248 | if (item.metadata.pageStatusCode !== 200) return;
249 | } catch (error) {
250 | throw new Error(`pageStatusCode not found for url: ${url}`);
251 | }
252 |
253 | const pageLink = url;
254 | const pageTitle = item.metadata.ogTitle;
255 | const pageDescription = item.metadata.description || '';
256 | let pageMarkdown = item.markdown;
257 | const pageTagsSet = getTags(url);
258 |
259 | // Remove end matter
260 | pageMarkdown = cleaners.removeEndMatter(pageMarkdown);
261 |
262 | // If the page is less than 500 words, then make one chunk for the page (baseline)
263 | if (pageMarkdown.split(/\s+/).length < 500) {
264 | chunks.push(createChunkObject(getChunkHtml(pageMarkdown, pageTitle, '', 0, null), pageLink, '', '', pageTagsSet,
265 | pageTitle, pageDescription));
266 | } else {
267 | // Otherwise, create subpage chunks
268 | chunks.push(...processContent(pageMarkdown, pageTitle, pageLink,
269 | pageTagsSet, pageDescription));
270 | }
271 | });
272 |
273 | // strip ":" from title if it ends with it
274 | chunks.forEach(chunk => {
275 | chunk.metadata.title = chunk.metadata.title.replace(/:\s*$/, '');
276 | });
277 |
278 | // render markdown to html
279 | chunks.forEach(chunk => {
280 | chunk.chunk_html = markdown.render(chunk.chunk_html);
281 | // Replace
with
282 | chunk.chunk_html = chunk.chunk_html.replace(/
/g, '
');
283 | // Replace