├── .env.example ├── .gitignore ├── .prettierrc.json ├── .vscode ├── extensions.json └── settings.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docs.zip ├── docs ├── assets │ ├── architecture.png │ └── ui.png ├── bring-your-own-data.md ├── implementation-details.md └── local-development.md ├── package-lock.json ├── package.json ├── packages ├── cdk │ ├── .gitignore │ ├── .npmignore │ ├── README.md │ ├── bin │ │ └── cdk.ts │ ├── cdk.json │ ├── custom-resource │ │ └── associate-package │ │ │ └── index.py │ ├── ecs │ │ └── ingest-data │ │ │ ├── Dockerfile │ │ │ ├── app │ │ │ ├── main.py │ │ │ ├── opensearch.py │ │ │ └── utils.py │ │ │ └── requirements.txt │ ├── jest.config.js │ ├── lambda │ │ ├── delete-opensearch-index │ │ │ ├── index.py │ │ │ └── requirements.txt │ │ ├── list-index │ │ │ ├── index.py │ │ │ └── requirements.txt │ │ └── search-documents │ │ │ ├── index.py │ │ │ └── requirements.txt │ ├── lib │ │ ├── constructs │ │ │ ├── api.ts │ │ │ ├── cognito.ts │ │ │ ├── front.ts │ │ │ ├── ingest-data-ecs.ts │ │ │ ├── opensearch.ts │ │ │ ├── s3bucket.ts │ │ │ └── util-lambda.ts │ │ └── opensearch-intelligent-search-jp-stack.ts │ ├── package.json │ ├── test │ │ └── cdk.test.ts │ └── tsconfig.json └── ui │ ├── .eslintrc.cjs │ ├── .gitignore │ ├── README.md │ ├── components.json │ ├── index.html │ ├── package.json │ ├── postcss.config.js │ ├── public │ └── vite.svg │ ├── src │ ├── App.css │ ├── App.tsx │ ├── api.ts │ ├── assets │ │ └── react.svg │ ├── components │ │ ├── Loading.tsx │ │ └── ui │ │ │ ├── badge.tsx │ │ │ ├── button.tsx │ │ │ ├── card.tsx │ │ │ ├── input.tsx │ │ │ ├── select.tsx │ │ │ └── textarea.tsx │ ├── hooks │ │ ├── useAuthToken.ts │ │ └── useS3Client.ts │ ├── index.css │ ├── lib │ │ └── utils.ts │ ├── main.tsx │ ├── pages │ │ └── SearchPage.tsx │ └── vite-env.d.ts │ ├── tailwind.config.js │ ├── tsconfig.json │ ├── tsconfig.node.json │ └── vite.config.ts └── run-ingest-ecs-task.sh /.env.example: -------------------------------------------------------------------------------- 1 | export VITE_AWS_REGION= 2 | export VITE_API_ENDPOINT_URL= 3 | export VITE_COGNITO_USER_POOL_ID= 4 | export VITE_COGNITO_USER_POOL_CLIENT_ID= 5 | export VITE_COGNITO_IDENTITY_POOL_ID= 6 | 7 | # If you are using finch: 8 | # export CDK_DOCKER=finch -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | .envrc -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "es5", 3 | "tabWidth": 2, 4 | "semi": true, 5 | "singleQuote": true, 6 | "bracketSpacing": true, 7 | "bracketSameLine": true, 8 | "arrowParens": "always", 9 | "plugins": ["prettier-plugin-organize-imports"] 10 | } -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": ["dbaeumer.vscode-eslint", "esbenp.prettier-vscode"] 3 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.formatOnSave": true, 3 | "editor.formatOnPaste": true, 4 | "editor.defaultFormatter": "esbenp.prettier-vscode", 5 | "editor.codeActionsOnSave": { 6 | "source.fixAll": "explicit" 7 | }, 8 | "eslint.workingDirectories": ["packages/*"] 9 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # opensearch-intelligent-search-jp 2 | 3 | opensearch-intelligent-search-jp は、生成 AI を活用した日本語検索システムを自ら構築・チューニングしたい開発者の方に向けて、OpenSearch でのベースとなるサンプル実装をCDKで提供するリポジトリです。 4 | 5 | このリポジトリをデプロイすると、サンプルシナリオとして用意されている AWS サービスのドキュメント (SageMaker, Bedrock, Kendra の開発者ガイド) の検索システムを構築します。 6 | このサンプル実装には、以下のような要素が含まれます。 7 | 8 | - OpenSearch による日本語 Hybrid 検索 (= 全文検索とベクトル検索を組み合わせた検索) 9 | - 全文検索のアナライザーとして、[Sudachi プラグイン](https://github.com/WorksApplications/elasticsearch-sudachi)を利用 10 | - ベクトル検索用の Embedding 作成には、Amazon Bedrock 上のモデルを利用。 11 | - [Titan Text Embedding v2](https://docs.aws.amazon.com/bedrock/latest/userguide/titan-embedding-models.html) 12 | - [Cohere Embed Models](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-embed.html) 13 | 14 | 目次 15 | 16 | - [アーキテクチャ](#アーキテクチャ) 17 | - [デプロイ](#デプロイ) 18 | - [前提条件](#前提条件) 19 | - [デモアプリのデプロイ](#デモアプリのデプロイ) 20 | - [Next Steps](#next-steps) 21 | - [Security](#security) 22 | - [License](#license) 23 | 24 | ## アーキテクチャ 25 | 26 | 27 | 28 | ## デプロイ 29 | 30 | リポジトリに付属する評価用のデータを用いたデモアプリケーションをデプロイする手順を説明します。 31 | 32 | ### 前提条件 33 | 34 | - CDK アプリケーションをデプロイできる環境。 35 | - 詳細は CDK の[開発者ガイド](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html)をご参照ください。 36 | - CDK アプリケーションをデプロイするためには、事前に [Bootstrap](https://docs.aws.amazon.com/cdk/v2/guide/bootstrapping.html) が必要です。 37 | ``` 38 | npx -w packages/cdk cdk bootstrap 39 | ``` 40 | - Bedrock 上の Embedding モデルへのアクセス。 41 | - Bedrock のコンソールから、Embedding モデル (Titan Text Embeddings V2 / Cohere Embed Models) へのアクセス権を取得してください (デフォルトでは、Bedrock のリージョンは `us-east-1` を使用しています)。詳細については、[Bedrock 開発者ガイド](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html)をご参照ください。 42 | 43 | ### デモアプリのデプロイ 44 | 45 | デモアプリをデプロイする大まかな手順は以下の通りです。 46 | 47 | 1. 設定の確認 48 | 2. AWS リソースの作成 (cdk deploy) 49 | 3. サンプルデータ投入 50 | 51 | #### 1. 設定の確認 52 | 53 | デモアプリの設定は、`packages/cdk/cdk.json` で指定しています。 54 | 設定可能なパラメータとその意味は以下の通りです。 55 | 56 | | パラメータ | デフォルト値 | 意味 | 57 | | :---------------: | :----------: | :-----------------------------------------------------------------------------------------------: | 58 | | bedrockRegion | us-east-1 | Bedrock のモデルを呼び出すリージョン | 59 | | selfSignUpEnabled | true | Cognito のセルフサインアップの有効化の有無 (trueの場合、フロントUIからユーザー作成可能になります) | 60 | 61 | #### 2. AWS リソースの作成 (cdk deploy) 62 | 63 | デモアプリをデプロイするためには、リポジトリのクローン & ルートディレクトリに移動の上、以下のコマンドを実行します。 64 | 65 | ``` 66 | $ npm ci 67 | $ npm run cdk:deploy 68 | ``` 69 | 70 | `cdk deploy` を実行すると、必要な AWS リソース (OpenSearchなど) を作成します。 71 | 実行には、30分ほどかかります。 72 | 73 | ※ [Finch](https://github.com/runfinch/finch) を使用する場合、環境変数 `CDK_DOCKER=finch` を export する必要があります。詳しくは以下をご参照ください。 74 | https://github.com/aws/aws-cdk/tree/main/packages/cdk-assets#using-drop-in-docker-replacements 75 | 76 | #### 3. サンプルデータ投入 77 | 78 | 次に、サンプルデータを取り込み OpenSearch のインデックスを作成します。以下の手順は、CDK のデプロイが完了してから実施してください。 79 | 80 | OpenSearch の Domain のステータスが Active になったら、サンプルデータの投入を行います。 81 | 実行には以下の2つの方法を用意しています。 82 | 83 | - Option 1: シェルスクリプトで実行 (おすすめ) 84 | - Option 2: 直接 run-task コマンドを実行 85 | 86 | ##### Option 1 (シェルスクリプトで実行) 87 | 88 | 以下のコマンドを実行します。 89 | 90 | ```bash 91 | bash run-ingest-ecs-task.sh --index-name --embed-model-id 92 | ``` 93 | 94 | 指定可能なパラメータは以下の通りです。 95 | 96 | | パラメータ | 意味 | 97 | | :--------------: | :---------------------------------------------------------------------------------------------------------------: | 98 | | --index-name | OpenSearch のインデックス名 | 99 | | --embed-model-id | Embedding モデルの Bedrock 上での [Model ID](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html) | 100 | 101 | embed-model-id には、Titan Embeddings もしくは Cohere Embed が使用可能です。主なモデルのmodel id は以下の通りです。(詳細は [Model ID](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html) 参照) 102 | 103 | | モデル名 | モデルID | 104 | | :-----------------------: | :--------------------------: | 105 | | Titan Embedding Text v2 | amazon.titan-embed-text-v2:0 | 106 | | Cohere Embed Multilingual | cohere.embed-multilingual-v3 | 107 | 108 |
109 | Option 2 (直接 run-task コマンドを実行) 110 | 111 | 直接 ECS の run-task を実行する方法でもデータ投入可能です。 112 | 113 | 以下のコマンドの大文字で書かれている6箇所を書き換えてから実行してください。 114 | 書き換えは、`cdk deploy` 実行ログの最後にある Outputs に表示された以下の情報を使用して実施してください。 115 | 116 | - {ECS_CLUSTER_NAME}: OpensearchIntelligentSearchJpStack.IngestDataecsClusterName 117 | - {ECS_TASK_DEFINITION_ARN}: OpensearchIntelligentSearchJpStack.IngestDataecsTaskDefinition 118 | - {ECS_SUBNET_ID}: OpensearchIntelligentSearchJpStack.IngestDataecsSubnet 119 | - {SECURITY_GROUP_ID}: OpensearchIntelligentSearchJpStack.IngestDataecsSecurityGroup 120 | - {OPENSEARCH_INDEX_NAME}: 作成する OpenSearch インデックス名 121 | - {EMBED_MODEL_ID}: Embedding モデルの Bedrock 上での [Model ID](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html) 122 | 123 | ```bash 124 | $ aws ecs run-task --cluster {ECS_CLUSTER_NAME} --task-definition {ECS_TASK_DEFINITION_ARN} --launch-type FARGATE --network-configuration "awsvpcConfiguration={subnets=["{ECS_SUBNET_ID}"],securityGroups=["{SECURITY_GROUP_ID}"],assignPublicIp=ENABLED}" --overrides '{ 125 | "containerOverrides": [{ 126 | "name": "Container", 127 | "environment": [{ 128 | "name": "OPENSEARCH_INDEX_NAME", 129 | "value": "{OPENSEARCH_INDEX_NAME}" 130 | },{ 131 | "name": "EMBED_MODEL_ID", 132 | "value": "{EMBED_MODEL_ID}" 133 | }] 134 | }] 135 | }' 136 | ``` 137 | 138 | 書き込みが完了したかどうかは、ECS の Task の状態をコンソールからご確認ください。 139 | 140 |
141 | 142 | #### 4. フロント UI へアクセス 143 | 144 | `cdk deploy` 実行ログの Outputs に表示されている情報の中から、`OpensearchIntelligentSearchJpStack.FrontFrontendUrl` という項目を探してください。こちらの値 (URL) にブラウザからアクセスしてください。 145 | 146 | ユーザー登録の上、以下のような画面が表示されて検索結果が返ってくればデプロイ完了です。 147 | 148 | 149 | 150 | ## Next Steps 151 | 152 | - 自分のデータで試したい場合は、[独自データで試すには](/docs/bring-your-own-data.md)をご参照ください。 153 | - このリポジトリのサンプル実装の詳細については、[実装詳細](/docs/implementation-details.md)をご参照ください。 154 | 155 | ## Security 156 | 157 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 158 | 159 | ## License 160 | 161 | This library is licensed under the MIT-0 License. See the LICENSE file. 162 | -------------------------------------------------------------------------------- /docs.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/opensearch-intelligent-search-jp/768ef04ed841d9e6d646a0858f55cf5287347528/docs.zip -------------------------------------------------------------------------------- /docs/assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/opensearch-intelligent-search-jp/768ef04ed841d9e6d646a0858f55cf5287347528/docs/assets/architecture.png -------------------------------------------------------------------------------- /docs/assets/ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/opensearch-intelligent-search-jp/768ef04ed841d9e6d646a0858f55cf5287347528/docs/assets/ui.png -------------------------------------------------------------------------------- /docs/bring-your-own-data.md: -------------------------------------------------------------------------------- 1 | # 独自データで試すには 2 | 3 | このドキュメントでは、ご自身のデータを用いてデモアプリケーションを動かすために必要な手順を説明します。 4 | 5 | ## 前提: サンプルアセットの構成 6 | 7 | opensearch-intelligent-search-jp では、リポジトリルートにある `docs.zip` を読み込み OpenSearch インデックスに取り込みを行います。 8 | `docs.zip` は、以下のようなフォルダ・ファイル構成をzip化したファイルです。 9 | 10 | ``` 11 | . 12 | ├── bedrock 13 | │   ├── xxxxx.txt 14 | │   ├── ... 15 | │   └── xxxxx.txt 16 | ├── comprehend 17 | │   ├── xxxxx.txt 18 | │   ├── ... 19 | │   ├── xxxxx.txt 20 | └── kendra 21 |    ├── xxxxx.txt 22 |    ├── ... 23 |    ├── xxxxx.txt 24 | ``` 25 | 26 | opensearch-intelligent-search-jp を `cdk deploy` でデプロイすると、`docs.zip` をドキュメント保存用バケット (以下、「ドキュメントバケット」) へ展開してアップロードします。ドキュメントバケットは、`cdk deploy` 実行時の出力に含まれる `S3bucketdocumentBucketName` から確認可能です。 27 | 28 | ドキュメントバケットには、上記のフォルダ構造がそのまま反映される形でデータがアップロードされます。 29 | `bedrock` や `comprehend` といったフォルダ名が、OpenSearch のマッピングにおける `service` というフィールドに対応します。(このサンプルデータでは、AWS のサービス名に対応するため) 30 | 31 | 各フォルダには、txtファイルが格納されています。ここでは、それぞれのサービスのドキュメントをtxtファイルに変換したデータを格納しています。 32 | 33 | ## 自身のデータで試すには 34 | 35 | コードを変更せずに自身のデータで試す場合、以下の手順を実施してください。 36 | 37 | - ドキュメントバケットの `docs/` 配下にあるサンプルデータを削除します。 38 | - サンプルデータと同様のデータ構成でドキュメントバケットの `docs/` 配下にデータをアップロードします。 39 | - [README内にある「デプロイ」セクションの 3. サンプルデータ投入](./../README.md#3-サンプルデータ投入) の手順を実施する。 40 | - サンプルデータとの重複を避けるため、インデックス名を変更して実施することをおすすめします。(インデックス名がデフォルトの場合、サンプルデータ + ご自身のデータとなります。) 41 | -------------------------------------------------------------------------------- /docs/implementation-details.md: -------------------------------------------------------------------------------- 1 | # 実装詳細 2 | 3 | このドキュメントでは、実装の詳細について解説します。 4 | 5 | - [全体構成](#全体構成) 6 | - [主要コンポーネントの詳細](#主要コンポーネントの詳細) 7 | - [OpenSearch インデックス](#opensearch-インデックス) 8 | - [検索方法と検索単位](#検索方法と検索単位) 9 | - [マッピング定義](#マッピング定義) 10 | - [データ取り込み処理](#データ取り込み処理) 11 | - [検索パイプライン](#検索パイプライン) 12 | - [collapse-hybrid-search-pipeline](#collapse-hybrid-search-pipeline) 13 | - [collapse-search-pipeline](#collapse-search-pipeline) 14 | - [hybrid-search-pipeline](#hybrid-search-pipeline) 15 | 16 | ## 全体構成 17 | 18 | opensearch-intelligent-search-jp のリポジトリ構成は以下の通りです。 19 | 20 | ``` 21 | . 22 | ├── README.md 23 | ├── docs # opensearch-intelligent-search-jp のドキュメント 24 | ├── docs.zip # サンプルデータ 25 | ├── package-lock.json 26 | ├── package.json 27 | ├── packages 28 | │   ├── cdk # CDK コード 29 | │   └── ui # フロント UI 用コード 30 | └── run-ingest-ecs-task.sh # データ取り込み処理用シェルスクリプト 31 | ``` 32 | 33 | NPM の workspaces を用いた monorepo 構成となっており、packages ディレクトリ以下に cdk 用のパッケージと フロント UI 用のパッケージから構成されています。 34 | 35 | ## 主要コンポーネントの詳細 36 | 37 | ### OpenSearch インデックス 38 | 39 | #### 検索方法と検索単位 40 | 41 | このサンプル実装では、検索方法としてキーワード検索、ベクトル検索、ハイブリッド検索の 3 種類に対応しています。 42 | 43 | - キーワード検索 44 | - キーワード検索は Okapi BM25 アルゴリズムを使ってドキュメントのスコアを計算します。検索クエリをトークンに分割し、そのトークンがドキュメント内に多く現れるか、トークンが一般的な単語 (例: the) ではないか、などを考慮して類似性を測ります。 45 | - opensearch-intelligent-search-jp ではトークン化を行うトークナイザーとして Sudachi を利用しています。Sudachi の設定内容は [opensearch.py](../packages/cdk/ecs/ingest-data/app/opensearch.py) を参照ください。 46 | - ベクトル検索 47 | - ベクトル検索は、文書を機械学習モデルを使ってベクトル化し、そのベクトル間の類似度を測定してドキュメントのスコアを計算します。キーワード検索がキーワードの一致によってスコアを計算していたのに対し、ベクトル検索はより意味的な類似性を考慮してスコアを計算します。 48 | - ハイブリッド検索 (キーワード検索 + ベクトル検索) 49 | - ハイブリッド検索は、キーワード検索とベクトル検索を合わせた検索手法です。 50 | - opensearch-intelligent-search-jp では、OpenSearch の持つ [Hybrid search](https://opensearch.org/docs/latest/search-plugins/hybrid-search/) 機能を利用しています。 51 | 52 | また、検索の用途に合わせて document モードと chunk モードという 2 つの検索単位でドキュメントを検索することが可能です。 53 | 54 | - document モード 55 | - document 単位で検索結果を返します。例えば、データソースにファイル A とファイル B があった時、OpenSearch では chunk A-1、chunk A-2、chunk B-1、chunk B-2、chunk B-3 のように保存されています。この時この検索モードでは、同じファイルのデータが複数返ってくることはありません。つまり、これらのチャンクの中で検索クエリとの関連度が高い順にソートされ、同じドキュメントのチャンクであれば最もスコアの高いチャンクのみ返却されます。 56 | - 主要なユースケースはドキュメント検索です。 57 | - 内部的には [collapse processor](https://opensearch.org/docs/latest/search-plugins/search-pipelines/collapse-processor/) を使用しています 58 | - chunk モード 59 | - chunk 単位で検索結果を返します。document モードでは同じドキュメントで結果が重複しないような処理が行われましたが、chunk モードでは重複排除が行われません。 60 | - 主要なユースケースは RAG です。 61 | 62 | #### マッピング定義 63 | 64 | このサンプル実装における、Amazon OpenSearch Service のインデックスのマッピング定義は以下の通りです。検索結果のフィルタに使用したい項目を増やす場合は、ここにその項目を追加する必要があります。 65 | 66 | ```json 67 | "mappings": { 68 | "_meta": {"model_id": model_id}, # テキスト埋め込みに使用するモデルの ID 69 | "properties": { 70 | "vector": { # ベクトル検索用ベクトルデータ 71 | "type": "knn_vector", 72 | "dimension": dimension, # テキスト埋め込みベクトルの次元数 73 | "method": { 74 | "engine": "lucene", 75 | "space_type": "cosinesimil", 76 | "name": "hnsw", 77 | "parameters": {}, 78 | }, 79 | }, 80 | "docs_root": {"type": "keyword"}, # ドキュメントが格納されている S3 パス 81 | "doc_name": {"type": "keyword"}, # ドキュメント名 82 | "keyword": {"type": "text", "analyzer": "custom_sudachi_analyzer"}, # テキスト検索用テキスト 83 | "service": {"type": "keyword"}, # 検索結果のフィルタに使うための情報 84 | }, 85 | } 86 | ``` 87 | 88 | ベクトル検索に必要なベクトルデータ vector と、vector と対になる、テキスト検索に必要なテキストデータ keyword をはじめとして、データの大元のドキュメントが格納されているファイル格納パスの docs_root, doc_name や、ドキュメントの属性 service(このサンプルでは AWS サービス名)などが設定されています。 89 | 90 | ### データ取り込み処理 91 | 92 | データ取込用の ECS タスクでは、以下の処理を実行しています。 93 | 94 | - OpenSearch インデックスの作成 95 | - インデックスに登録したい項目を変更したい場合は、packages/cdk/ecs/ingest-data/app/opensearch.py の create_index() を変更してください。 96 | - 指定された S3 パスにあるドキュメントをテキストに変換 97 | - テキストファイルと PDF ファイルのみ動作確認済みです。その他のファイル形式の読み込みに対応する場合は、packages/cdk/ecs/ingest-data/app/utils.py の read_file() を変更してください。 98 | - 変換したテキストをチャンク分割 99 | - 指定された文字数以内のキリの良い位置でチャンク分割する実装になっています。チャンク分割ロジックを変更したい場合は、packages/cdk/ecs/ingest-data/app/opensearch.py の split_text() を変更してください。 100 | - チャンクをベクトルに変換 101 | - Titan embeddings v2 を使う実装になっています。埋め込みモデルを変更したい場合は、packages/cdk/ecs/ingest-data/app/opensearch.py の embed_file() を変更してください。 102 | - ベクトルとその他の関連データを OpenSearch インデックスに登録 103 | 104 | ### 検索パイプライン 105 | 106 | このサンプル実装では、ドキュメント単位の検索機能とハイブリッド検索機能を OpenSearch の検索パイプライン機能を使って実現しています。実装されている検索パイプラインは以下の 3種類です。 107 | 108 | #### collapse-hybrid-search-pipeline 109 | 110 | ドキュメント単位検索とハイブリッド検索を組み合わせた検索パイプライン。 111 | 112 | ```python 113 | index_body = { 114 | "description": "Pipeline for hybrid search and collapse", 115 | "phase_results_processors": [ 116 | { 117 | "normalization-processor": { 118 | "normalization": {"technique": "min_max"}, 119 | "combination": { 120 | "technique": "arithmetic_mean", 121 | "parameters": {"weights": [0.5, 0.5]}, 122 | }, 123 | } 124 | } 125 | ], 126 | "response_processors": [ 127 | { 128 | "collapse": { 129 | "field": "doc_name" 130 | } 131 | } 132 | ] 133 | } 134 | ``` 135 | 136 | #### collapse-search-pipeline 137 | 138 | ドキュメント単位検索のための検索パイプライン。 139 | 140 | ```python 141 | index_body = { 142 | "description": "Pipeline for collapse", 143 | "response_processors": [ 144 | { 145 | "collapse": { 146 | "field": "doc_name" 147 | } 148 | } 149 | ] 150 | } 151 | ``` 152 | 153 | #### hybrid-search-pipeline 154 | 155 | ハイブリッド検索のための検索パイプライン。 156 | 157 | ```python 158 | index_body = { 159 | "description": "Pipeline for hybrid search", 160 | "phase_results_processors": [ 161 | { 162 | "normalization-processor": { 163 | "normalization": {"technique": "min_max"}, 164 | "combination": { 165 | "technique": "arithmetic_mean", 166 | "parameters": {"weights": [0.5, 0.5]}, 167 | }, 168 | } 169 | } 170 | ], 171 | } 172 | ``` 173 | -------------------------------------------------------------------------------- /docs/local-development.md: -------------------------------------------------------------------------------- 1 | # ローカルで開発する場合について 2 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "opensearch-intelligent-search-jp", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "npx -w packages/cdk jest", 8 | "cdk:deploy": "npx -w packages/cdk cdk deploy" 9 | }, 10 | "keywords": [], 11 | "author": "", 12 | "workspaces": [ 13 | "packages/cdk", 14 | "packages/ui" 15 | ], 16 | "devDependencies": { 17 | "eslint-config-prettier": "^9.1.0", 18 | "prettier": "^3.2.5", 19 | "prettier-plugin-organize-imports": "^3.2.4" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /packages/cdk/.gitignore: -------------------------------------------------------------------------------- 1 | *.js 2 | !jest.config.js 3 | *.d.ts 4 | node_modules 5 | 6 | # CDK asset staging directory 7 | .cdk.staging 8 | cdk.out 9 | test/__snapshots__ -------------------------------------------------------------------------------- /packages/cdk/.npmignore: -------------------------------------------------------------------------------- 1 | *.ts 2 | !*.d.ts 3 | 4 | # CDK asset staging directory 5 | .cdk.staging 6 | cdk.out 7 | -------------------------------------------------------------------------------- /packages/cdk/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to your CDK TypeScript project 2 | 3 | This is a blank project for CDK development with TypeScript. 4 | 5 | The `cdk.json` file tells the CDK Toolkit how to execute your app. 6 | 7 | ## Useful commands 8 | 9 | * `npm run build` compile typescript to js 10 | * `npm run watch` watch for changes and compile 11 | * `npm run test` perform the jest unit tests 12 | * `npx cdk deploy` deploy this stack to your default AWS account/region 13 | * `npx cdk diff` compare deployed stack with current state 14 | * `npx cdk synth` emits the synthesized CloudFormation template 15 | -------------------------------------------------------------------------------- /packages/cdk/bin/cdk.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import * as cdk from 'aws-cdk-lib'; 3 | import 'source-map-support/register'; 4 | import { OpensearchIntelligentSearchJpStack } from '../lib/opensearch-intelligent-search-jp-stack'; 5 | 6 | const app = new cdk.App(); 7 | new OpensearchIntelligentSearchJpStack( 8 | app, 9 | 'OpensearchIntelligentSearchJpStack', 10 | { 11 | /* If you don't specify 'env', this stack will be environment-agnostic. 12 | * Account/Region-dependent features and context lookups will not work, 13 | * but a single synthesized template can be deployed anywhere. */ 14 | /* Uncomment the next line to specialize this stack for the AWS Account 15 | * and Region that are implied by the current CLI configuration. */ 16 | // env: { account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION }, 17 | /* Uncomment the next line if you know exactly what Account and Region you 18 | * want to deploy the stack to. */ 19 | // env: { account: '123456789012', region: 'us-east-1' }, 20 | /* For more information, see https://docs.aws.amazon.com/cdk/latest/guide/environments.html */ 21 | } 22 | ); 23 | -------------------------------------------------------------------------------- /packages/cdk/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/cdk.ts", 3 | "watch": { 4 | "include": ["**"], 5 | "exclude": [ 6 | "README.md", 7 | "cdk*.json", 8 | "**/*.d.ts", 9 | "**/*.js", 10 | "tsconfig.json", 11 | "package*.json", 12 | "yarn.lock", 13 | "node_modules", 14 | "test" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 19 | "@aws-cdk/core:checkSecretUsage": true, 20 | "@aws-cdk/core:target-partitions": ["aws", "aws-cn"], 21 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 22 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 23 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 24 | "@aws-cdk/aws-iam:minimizePolicies": true, 25 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 26 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 27 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 28 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 29 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 30 | "@aws-cdk/core:enablePartitionLiterals": true, 31 | "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, 32 | "@aws-cdk/aws-iam:standardizedServicePrincipals": true, 33 | "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, 34 | "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, 35 | "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, 36 | "@aws-cdk/aws-route53-patters:useCertificate": true, 37 | "@aws-cdk/customresources:installLatestAwsSdkDefault": false, 38 | "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, 39 | "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, 40 | "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, 41 | "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, 42 | "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, 43 | "@aws-cdk/aws-redshift:columnId": true, 44 | "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, 45 | "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, 46 | "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, 47 | "@aws-cdk/aws-kms:aliasNameRef": true, 48 | "@aws-cdk/aws-autoscaling:generateLaunchTemplateInsteadOfLaunchConfig": true, 49 | "@aws-cdk/core:includePrefixInUniqueNameGeneration": true, 50 | "@aws-cdk/aws-efs:denyAnonymousAccess": true, 51 | "@aws-cdk/aws-opensearchservice:enableOpensearchMultiAzWithStandby": true, 52 | "@aws-cdk/aws-lambda-nodejs:useLatestRuntimeVersion": true, 53 | "@aws-cdk/aws-efs:mountTargetOrderInsensitiveLogicalId": true, 54 | "@aws-cdk/aws-rds:auroraClusterChangeScopeOfInstanceParameterGroupWithEachParameters": true, 55 | "@aws-cdk/aws-appsync:useArnForSourceApiAssociationIdentifier": true, 56 | "@aws-cdk/aws-rds:preventRenderingDeprecatedCredentials": true, 57 | "@aws-cdk/aws-codepipeline-actions:useNewDefaultBranchForCodeCommitSource": true, 58 | "@aws-cdk/aws-cloudwatch-actions:changeLambdaPermissionLogicalIdForLambdaAction": true, 59 | "@aws-cdk/aws-codepipeline:crossAccountKeysDefaultValueToFalse": true, 60 | "@aws-cdk/aws-codepipeline:defaultPipelineTypeToV2": true, 61 | "@aws-cdk/aws-kms:reduceCrossAccountRegionPolicyScope": true, 62 | "@aws-cdk/aws-eks:nodegroupNameAttribute": true, 63 | "bedrockRegion": "us-east-1", 64 | "selfSignUpEnabled": true 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /packages/cdk/custom-resource/associate-package/index.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import json 3 | import time 4 | 5 | opensearch = boto3.client('opensearch') 6 | 7 | 8 | def handler(event, context): 9 | print("Received event: " + json.dumps(event, indent=2)) 10 | 11 | domain_name = event['ResourceProperties']['DomainName'] 12 | 13 | if event['RequestType'] == 'Create': 14 | # OpenSearch ドメインが作成完了してから、Package を Associate 可能になるまで時間がかかることがあるため、一定時間待機 15 | time.sleep(120) 16 | 17 | # OpenSearch ドメインが既に作成されているか確認。Domain Status の processing が true だった場合は待機 18 | while True: 19 | res = opensearch.describe_domain( 20 | DomainName=domain_name 21 | ) 22 | 23 | if res['DomainStatus']['Processing'] is False: 24 | break 25 | 26 | time.sleep(10) 27 | 28 | # OpenSearch 2.13 用の Sudachi Package ID を取得する(リージョンによって ID が変わる) 29 | res = opensearch.describe_packages( 30 | Filters=[ 31 | { 32 | "Name": "PackageName", 33 | "Value": ["analysis-sudachi"] 34 | }, 35 | { 36 | "Name": "EngineVersion", 37 | "Value": ["OpenSearch_2.13"] 38 | }] 39 | ) 40 | 41 | package_id = res['PackageDetailsList'][0]['PackageID'] 42 | 43 | res = opensearch.list_domains_for_package( 44 | PackageID=package_id 45 | ) 46 | 47 | # もし該当のドメインにパッケージがまだ Associate されていない場合は、Associate する 48 | skip_association = False 49 | for detail in res['DomainPackageDetailsList']: 50 | if detail['DomainName'] == domain_name: 51 | skip_association = True 52 | 53 | if not skip_association: 54 | opensearch.associate_package( 55 | DomainName=domain_name, 56 | PackageID=package_id 57 | ) 58 | 59 | return {"package_id": package_id} 60 | if event['RequestType'] == 'Delete': 61 | return {} 62 | if event['RequestType'] == 'Update': 63 | return {} 64 | 65 | 66 | def is_complete(event, context): 67 | print("Received event: " + json.dumps(event, indent=2)) 68 | 69 | domain_name = event['ResourceProperties']['DomainName'] 70 | 71 | if event['RequestType'] == 'Create': 72 | package_id = event['package_id'] 73 | 74 | res = opensearch.list_domains_for_package( 75 | PackageID=package_id 76 | ) 77 | 78 | # もし domain_name に一致するドメインがあり、そのドメインのステータスが ACTIVE だった場合は、complete とする 79 | for detail in res['DomainPackageDetailsList']: 80 | if detail['DomainName'] == domain_name and detail['DomainPackageStatus'] == 'ACTIVE': 81 | return {'IsComplete': True} 82 | 83 | elif event['RequestType'] == 'Delete': 84 | return {'IsComplete': True} 85 | 86 | elif event['RequestType'] == 'Update': 87 | return {'IsComplete': True} 88 | 89 | return {'IsComplete': False} 90 | -------------------------------------------------------------------------------- /packages/cdk/ecs/ingest-data/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/docker/library/python:3.11.6-slim-bookworm 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | build-essential cmake \ 5 | # opencv package requirements 6 | libgl1 \ 7 | libglib2.0-0 \ 8 | # unstructured package requirements for file type detection 9 | libmagic-mgc libmagic1 \ 10 | && rm -rf /var/lib/apt/lists/* 11 | 12 | WORKDIR /backend 13 | 14 | COPY requirements.txt . 15 | RUN pip3 install -r requirements.txt --no-cache-dir 16 | 17 | COPY ./app ./app 18 | 19 | ENTRYPOINT [ "python3" ] 20 | CMD ["-u", "./app/main.py"] -------------------------------------------------------------------------------- /packages/cdk/ecs/ingest-data/app/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import requests 4 | 5 | from opensearch import OpenSearchController 6 | from utils import * 7 | 8 | 9 | METADATA_URI = os.environ.get("ECS_CONTAINER_METADATA_URI_V4") 10 | 11 | 12 | def get_exec_id() -> str: 13 | # Get task id from ECS metadata 14 | # Ref: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v4.html#task-metadata-endpoint-v4-enable 15 | response = requests.get(f"{METADATA_URI}/task") 16 | data = response.json() 17 | task_arn = data.get("TaskARN", "") 18 | task_id = task_arn.split("/")[-1] 19 | return task_id 20 | 21 | 22 | def ingest_data( 23 | host_http, index_name, dimension, model_id, docs_url, bedrock_region 24 | ): 25 | 26 | exec_id = "" 27 | try: 28 | exec_id = get_exec_id() 29 | except Exception as e: 30 | print(f"[ERROR] Failed to get exec_id: {e}") 31 | exec_id = "FAILED_TO_GET_ECS_EXEC_ID" 32 | 33 | print("exec_id:", exec_id) 34 | 35 | cfg = { 36 | "host_http": host_http, 37 | "index_name": index_name, 38 | "dimension": dimension, 39 | "model_id": model_id, 40 | "docs_url": docs_url, 41 | "bedrock_region": bedrock_region, 42 | "max_chunk_length": 400, 43 | } 44 | 45 | opensearch = OpenSearchController(cfg) 46 | opensearch.ingest_data() 47 | 48 | 49 | if __name__ == "__main__": 50 | parser = argparse.ArgumentParser() 51 | parser.add_argument( 52 | "--host-http", 53 | type=str, 54 | default=os.environ.get("OPENSEARCH_ENDPOINT", ""), 55 | ) 56 | args = parser.parse_args() 57 | 58 | index_name = os.environ.get("OPENSEARCH_INDEX_NAME", "") 59 | dimension = os.environ.get("EMBED_DIMENSION", 1024) 60 | model_id = os.environ.get("EMBED_MODEL_ID", "") 61 | docs_url = os.environ.get("DOCUMENT_S3_URI", "") 62 | bedrock_region = os.environ.get("BEDROCK_REGION", "") 63 | 64 | ingest_data( 65 | args.host_http, 66 | index_name, 67 | dimension, 68 | model_id, 69 | docs_url, 70 | bedrock_region, 71 | ) 72 | 73 | print("Data ingestion was completed.") 74 | -------------------------------------------------------------------------------- /packages/cdk/ecs/ingest-data/app/opensearch.py: -------------------------------------------------------------------------------- 1 | from opensearchpy import ( 2 | OpenSearch, 3 | RequestsHttpConnection, 4 | AWSV4SignerAuth, 5 | helpers, 6 | ) 7 | import boto3 8 | import json 9 | import time 10 | import re 11 | import utils 12 | from concurrent.futures import ThreadPoolExecutor 13 | 14 | 15 | class OpenSearchController: 16 | def __init__(self, cfg): 17 | self.cfg = cfg 18 | self.bedrock_runtime = boto3.client( 19 | service_name="bedrock-runtime", 20 | region_name=cfg["bedrock_region"], 21 | ) 22 | self.aos_client = self.get_aos_client() 23 | 24 | def get_aos_client(self): 25 | host = self.cfg["host_http"] 26 | region = host.split(".")[1] 27 | 28 | service = "es" 29 | credentials = boto3.Session().get_credentials() 30 | auth = AWSV4SignerAuth(credentials, region, service) 31 | 32 | client = OpenSearch( 33 | hosts=[{"host": host, "port": 443}], 34 | http_auth=auth, 35 | use_ssl=True, 36 | verify_certs=True, 37 | connection_class=RequestsHttpConnection, 38 | pool_maxsize=20, 39 | ) 40 | 41 | return client 42 | 43 | def init_cluster_settings(self): 44 | # インデックス時のスレッド数を指定 45 | self.aos_client.cluster.put_settings( 46 | body={ 47 | "persistent": { 48 | "knn.algo_param.index_thread_qty": "4", 49 | } 50 | } 51 | ) 52 | 53 | def create_index(self): 54 | index_name = self.cfg["index_name"] 55 | model_id = self.cfg["model_id"] 56 | dimension = self.cfg["dimension"] 57 | 58 | if not self.aos_client.indices.exists(index_name): 59 | print("create index") 60 | 61 | self.aos_client.indices.create( 62 | index_name, 63 | body={ 64 | "settings": { 65 | "index": { 66 | "analysis": { 67 | "filter": { 68 | "custom_sudachi_part_of_speech": { 69 | "type": "sudachi_part_of_speech", 70 | "stoptags": [ 71 | "感動詞,フィラー", 72 | "接頭辞", 73 | "代名詞", 74 | "助詞", 75 | "助動詞", 76 | "動詞,一般,*,*,*,終止形-一般", 77 | "名詞,普通名詞,副詞可能", 78 | ], 79 | } 80 | }, 81 | "analyzer": { 82 | "custom_sudachi_analyzer": { 83 | "filter": [ 84 | "sudachi_normalizedform", 85 | "custom_sudachi_part_of_speech", 86 | ], 87 | "char_filter": ["icu_normalizer"], 88 | "type": "custom", 89 | "tokenizer": "sudachi_tokenizer", 90 | } 91 | }, 92 | }, 93 | "knn": True, 94 | # インデックス時のパフォーマンスを考慮して refresh_interval を大きく設定 95 | "refresh_interval": "1000s", 96 | } 97 | }, 98 | "mappings": { 99 | "_meta": {"model_id": model_id}, 100 | "properties": { 101 | "vector": { 102 | "type": "knn_vector", 103 | "dimension": dimension, 104 | "method": { 105 | "engine": "lucene", 106 | "space_type": "cosinesimil", 107 | "name": "hnsw", 108 | "parameters": {}, 109 | }, 110 | }, 111 | "docs_root": {"type": "keyword"}, 112 | "doc_name": {"type": "keyword"}, 113 | "keyword": { 114 | "type": "text", 115 | "analyzer": "custom_sudachi_analyzer", 116 | }, 117 | "service": {"type": "keyword"}, 118 | }, 119 | }, 120 | }, 121 | ) 122 | 123 | print("Index was created.") 124 | time.sleep(20) 125 | 126 | def split_text(self, text): 127 | chunks = [] 128 | current_chunk = "" 129 | current_length = 0 130 | max_length = self.cfg["max_chunk_length"] 131 | 132 | # for English sentences 133 | period_pattern = re.compile(r"[.!?][\s]") 134 | 135 | # for Japanese sentences 136 | kuten_pattern = re.compile(r"[。!?…\n]") 137 | 138 | split_pattern = re.compile( 139 | rf"(.{{1,{max_length}}}?({period_pattern.pattern}|{kuten_pattern.pattern}))", 140 | flags=re.DOTALL, 141 | ) 142 | find = split_pattern.finditer(text) 143 | 144 | while list(find)[0].span()[0] != 0: 145 | max_length += 10 146 | split_pattern = re.compile( 147 | rf"(.{{1,{max_length}}}?({period_pattern.pattern}|{kuten_pattern.pattern}))", 148 | flags=re.DOTALL, 149 | ) 150 | find = split_pattern.finditer(text) 151 | 152 | for match in split_pattern.finditer(text): 153 | chunk = match.group(1) 154 | chunk_length = len(chunk) 155 | 156 | if current_length + chunk_length <= max_length: 157 | current_chunk += chunk 158 | current_length += chunk_length 159 | else: 160 | 161 | chunks.append(current_chunk) 162 | current_chunk = chunk 163 | current_length = chunk_length 164 | 165 | chunks.append(current_chunk) 166 | 167 | return chunks 168 | 169 | def embed_file(self, file_name): 170 | 171 | text = utils.read_file(file_name) 172 | 173 | chunks = self.split_text(text) 174 | 175 | if "cohere" in self.cfg["model_id"]: 176 | vectors = self.embed_with_cohere(chunks) 177 | else: 178 | vectors = self.embed_with_titan(chunks) 179 | 180 | return vectors, chunks 181 | 182 | def embed_with_titan(self, chunks): 183 | vectors = [] 184 | for chunk in chunks: 185 | # API schema is adjust to Titan embedding model 186 | body = json.dumps({"inputText": chunk}) 187 | query_response = self.bedrock_runtime.invoke_model( 188 | body=body, 189 | modelId=self.cfg["model_id"], 190 | accept="application/json", 191 | contentType="application/json", 192 | ) 193 | vectors.append( 194 | json.loads(query_response["body"].read()).get("embedding") 195 | ) 196 | return vectors 197 | 198 | def embed_with_cohere(self, chunks): 199 | vectors = [] 200 | max_text_num = 96 201 | for i in range(0, len(chunks), max_text_num): 202 | body = json.dumps( 203 | { 204 | "texts": chunks[i : min(len(chunks), i + max_text_num)], 205 | "input_type": "search_document", 206 | "embedding_types": ["float"], 207 | } 208 | ) 209 | query_response = self.bedrock_runtime.invoke_model( 210 | body=body, 211 | modelId=self.cfg["model_id"], 212 | accept="*/*", 213 | contentType="application/json", 214 | ) 215 | vectors.extend( 216 | json.loads(query_response["body"].read()).get("embeddings")[ 217 | "float" 218 | ] 219 | ) 220 | return vectors 221 | 222 | def parse_response(query_response): 223 | 224 | response_body = json.loads(query_response.get("body").read()) 225 | return response_body.get("embedding") 226 | 227 | def embed_documents(self, file_list): 228 | vectors = [] 229 | counter = 0 230 | for file_name in file_list: 231 | print(f"embedding: {counter}/{len(file_list)}") 232 | counter += 1 233 | try: 234 | chunk_vectors, texts = self.embed_file(file_name) 235 | 236 | except Exception as e: 237 | continue 238 | 239 | for i, embedding in enumerate(chunk_vectors): 240 | vectors.append( 241 | { 242 | "_index": self.cfg["index_name"], 243 | "vector": embedding, 244 | "docs_root": "/".join(file_name.split("/")[:3]), 245 | "doc_name": "/".join(file_name.split("/")[3:]), 246 | "keyword": texts[i], 247 | "service": file_name.split("/")[-2], 248 | } 249 | ) 250 | 251 | print( 252 | f"{len(file_list)} documents ({len(vectors)} chunks) were embedded." 253 | ) 254 | return vectors 255 | 256 | def create_search_pipeline(self): 257 | # collapse-hybrid-search-pipeline の作成 258 | index_body = { 259 | "description": "Pipeline for hybrid search and collapse", 260 | "phase_results_processors": [ 261 | { 262 | "normalization-processor": { 263 | "normalization": {"technique": "min_max"}, 264 | "combination": { 265 | "technique": "arithmetic_mean", 266 | "parameters": {"weights": [0.5, 0.5]}, 267 | }, 268 | } 269 | } 270 | ], 271 | "response_processors": [{"collapse": {"field": "doc_name"}}], 272 | } 273 | 274 | self.aos_client.http.put( 275 | "/_search/pipeline/collapse-hybrid-search-pipeline", body=index_body 276 | ) 277 | 278 | # collapse-search-pipeline の作成 279 | index_body = { 280 | "description": "Pipeline for collapse", 281 | "response_processors": [{"collapse": {"field": "doc_name"}}], 282 | } 283 | 284 | self.aos_client.http.put( 285 | "/_search/pipeline/collapse-search-pipeline", body=index_body 286 | ) 287 | 288 | # hybrid-search-pipeline の作成 289 | index_body = { 290 | "description": "Pipeline for hybrid search", 291 | "phase_results_processors": [ 292 | { 293 | "normalization-processor": { 294 | "normalization": {"technique": "min_max"}, 295 | "combination": { 296 | "technique": "arithmetic_mean", 297 | "parameters": {"weights": [0.5, 0.5]}, 298 | }, 299 | } 300 | } 301 | ], 302 | } 303 | 304 | self.aos_client.http.put( 305 | "/_search/pipeline/hybrid-search-pipeline", body=index_body 306 | ) 307 | 308 | def update_index(self): 309 | # index 作成時に大きく設定していた refresh_interval を元に戻す 310 | index_name = self.cfg["index_name"] 311 | self.aos_client.indices.put_settings( 312 | index=index_name, 313 | body={"index": {"refresh_interval": "60s"}}, 314 | ) 315 | 316 | def ingest_data(self): 317 | self.init_cluster_settings() 318 | self.create_search_pipeline() 319 | self.create_index() 320 | docs_url = self.cfg["docs_url"] 321 | 322 | file_list = utils.get_all_filepath(docs_url) 323 | 324 | with ThreadPoolExecutor(max_workers=8) as executor: 325 | thread = executor.submit(self.embed_documents, file_list) 326 | vectors = thread.result() 327 | 328 | batch_size = 50 329 | for i in range(0, len(vectors), batch_size): 330 | helpers.bulk( 331 | self.aos_client, 332 | vectors[i : min(i + batch_size, len(vectors))], 333 | request_timeout=1000, 334 | ) 335 | 336 | self.update_index() 337 | 338 | print("Process finished.") 339 | -------------------------------------------------------------------------------- /packages/cdk/ecs/ingest-data/app/utils.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import tempfile 3 | import os 4 | 5 | # from unstructured.partition.auto import partition 6 | from langchain_community.document_loaders import ( 7 | Docx2txtLoader, 8 | TextLoader, 9 | UnstructuredHTMLLoader, 10 | UnstructuredPowerPointLoader, 11 | PyPDFLoader, 12 | ) 13 | 14 | s3_client = boto3.client("s3") 15 | 16 | 17 | def parse_s3_uri(s3_uri): 18 | """ 19 | S3のURIをバケット名、キー名、拡張子に分割する 20 | 21 | Args: 22 | s3_uri (str): 例's3://bucket_name/test/test.txt' 23 | Returns: 24 | bucket: バケット名(bucket_name) 25 | key: キー名(test/test.txt) 26 | extension: 拡張子(.txt) 27 | """ 28 | bucket = s3_uri.split("//")[1].split("/")[0] 29 | key = '/'.join(s3_uri.split("//")[1].split("/")[1:]) 30 | extension = os.path.splitext(key)[-1] 31 | 32 | return bucket, key, extension 33 | 34 | 35 | def read_file(file_url): 36 | bucket, key, extension = parse_s3_uri(file_url) 37 | 38 | text = "" 39 | 40 | with tempfile.NamedTemporaryFile( 41 | delete=True, suffix=extension 42 | ) as temp_file: 43 | temp_file_path = temp_file.name 44 | s3_client.download_file(bucket, key, temp_file_path) 45 | 46 | print(f"Load file: {os.path.basename(key)}") 47 | 48 | if extension == ".txt": 49 | text = load_text(temp_file_path) 50 | if extension == ".pdf": 51 | text = load_pdf(temp_file_path) 52 | text = ( 53 | text.replace("\n", "").replace("\r", "").replace("\u00A0", " ") 54 | ) 55 | if extension == ".docx": 56 | text = load_word(temp_file_path) 57 | if extension == ".pptx": 58 | text = load_ppt(temp_file_path) 59 | if extension == ".html": 60 | text = load_html(temp_file_path) 61 | text = ( 62 | text.replace("\n", "").replace("\r", "").replace("\u00A0", " ") 63 | ) 64 | 65 | return text 66 | 67 | 68 | def load_text(file_path): 69 | try: 70 | loader = TextLoader(str(file_path)) 71 | except Exception as e: 72 | print(e) 73 | pages = loader.load_and_split() 74 | text = "" 75 | for page in pages: 76 | text += page.page_content 77 | return text 78 | 79 | 80 | def load_pdf(file_path): 81 | try: 82 | loader = PyPDFLoader(str(file_path)) 83 | except Exception as e: 84 | print(e) 85 | 86 | pages = loader.load_and_split() 87 | text = "" 88 | for page in pages: 89 | try: 90 | text += bytes(page.page_content, "latin1").decode("shift_jis") 91 | except UnicodeEncodeError: 92 | text += page.page_content 93 | except UnicodeDecodeError: 94 | text += "Unicode Decode Error" 95 | 96 | return text 97 | 98 | 99 | def load_word(file_path): 100 | try: 101 | loader = Docx2txtLoader(str(file_path)) 102 | except Exception as e: 103 | print(e) 104 | pages = loader.load_and_split() 105 | text = "" 106 | for page in pages: 107 | text += page.page_content 108 | return text 109 | 110 | 111 | def load_ppt(file_path): 112 | try: 113 | loader = UnstructuredPowerPointLoader(str(file_path)) 114 | except Exception as e: 115 | print(e) 116 | pages = loader.load_and_split() 117 | text = "" 118 | for page in pages: 119 | text += page.page_content 120 | return text 121 | 122 | 123 | def load_html(file_path): 124 | try: 125 | loader = UnstructuredHTMLLoader(str(file_path)) 126 | except Exception as e: 127 | print(e) 128 | pages = loader.load_and_split() 129 | text = "" 130 | 131 | for page in pages: 132 | text += page.page_content 133 | return text 134 | 135 | 136 | def get_all_filepath(file_url): 137 | bucket_name = file_url.split("/")[2] 138 | prefix = "/".join(file_url.split("/")[3:]) 139 | 140 | file_list = [] 141 | kwargs = {"Bucket": bucket_name, "Prefix": prefix} 142 | while True: 143 | response = s3_client.list_objects_v2(**kwargs) 144 | if response.get("Contents"): 145 | file_list.extend( 146 | [f's3://{bucket_name}/{content["Key"]}' for content in response["Contents"]] 147 | ) 148 | if not response.get("IsTruncated"): # レスポンスが切り捨てられていない場合 149 | break 150 | kwargs["ContinuationToken"] = response["NextContinuationToken"] 151 | 152 | return file_list 153 | 154 | 155 | def get_all_keys(file_url): 156 | bucket_name = file_url.split("/")[2] 157 | prefix = "/".join(file_url.split("/")[3:]) 158 | 159 | file_list = [] 160 | objects = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) 161 | if "Contents" in objects: 162 | file_list.extend(([content["Key"] for content in objects["Contents"]])) 163 | while objects.get("isTruncated"): 164 | start_after = file_list[-1] 165 | objects = s3_client.list_objects_v2( 166 | Bucket=bucket_name, Prefix=prefix, StartAfter=start_after 167 | ) 168 | if "Contents" in objects: 169 | file_list.extend( 170 | ([content["Key"] for content in objects["Contents"]]) 171 | ) 172 | 173 | return file_list 174 | 175 | 176 | def get_documents(file_url, dst_path): 177 | 178 | bucket_name = file_url.split("/")[2] 179 | file_list = get_all_keys(file_url) 180 | 181 | for s3_key in file_list: 182 | dst_dir = f"{dst_path}/{s3_key}" 183 | if not os.path.exists(os.path.dirname(dst_dir)): 184 | os.makedirs(os.path.dirname(dst_dir)) 185 | s3_client.download_file(bucket_name, s3_key, f"{dst_path}/{s3_key}") 186 | -------------------------------------------------------------------------------- /packages/cdk/ecs/ingest-data/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.34.105 2 | langchain==0.1.20 3 | pypdf==4.2.0 4 | unstructured==0.13.6 5 | python-pptx==0.6.23 6 | docx2txt==0.8 7 | opensearch-py==2.5.0 -------------------------------------------------------------------------------- /packages/cdk/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | testEnvironment: 'node', 3 | roots: ['/test'], 4 | testMatch: ['**/*.test.ts'], 5 | transform: { 6 | '^.+\\.tsx?$': 'ts-jest' 7 | } 8 | }; 9 | -------------------------------------------------------------------------------- /packages/cdk/lambda/delete-opensearch-index/index.py: -------------------------------------------------------------------------------- 1 | from aws_lambda_powertools import Logger 2 | from opensearchpy import ( 3 | OpenSearch, 4 | RequestsHttpConnection, 5 | AWSV4SignerAuth, 6 | ) 7 | import boto3 8 | import os 9 | 10 | 11 | logger = Logger(service="DeleteIndex") 12 | 13 | 14 | def get_aoss_client(host_http): 15 | host = host_http 16 | 17 | region = host.split(".")[1] 18 | 19 | service = "es" 20 | credentials = boto3.Session().get_credentials() 21 | auth = AWSV4SignerAuth(credentials, region, service) 22 | 23 | client = OpenSearch( 24 | hosts=[{"host": host, "port": 443}], 25 | http_auth=auth, 26 | use_ssl=True, 27 | verify_certs=True, 28 | connection_class=RequestsHttpConnection, 29 | pool_maxsize=20, 30 | ) 31 | 32 | return client 33 | 34 | 35 | def delete_index(client, index_name): 36 | try: 37 | client.indices.delete(index_name) 38 | logger.info(f"Index {index_name} is successfylly deleted") 39 | except Exception as e: 40 | logger.info(f"Index {index_name} not found, nothing to delete") 41 | return True 42 | 43 | 44 | def handler(event, context): 45 | host_http = os.environ["OPENSEARCH_ENDPOINT"] 46 | index_name = os.environ["INDEX_NAME"] 47 | if "index_name" in event.keys(): 48 | index_name = event["index_name"] 49 | 50 | client = get_aoss_client(host_http) 51 | delete_index(client, index_name) 52 | 53 | logger.info("Process finished.") 54 | -------------------------------------------------------------------------------- /packages/cdk/lambda/delete-opensearch-index/requirements.txt: -------------------------------------------------------------------------------- 1 | opensearch-py==2.5.0 2 | aws-lambda-powertools==2.37.0 3 | boto3==1.34.96 4 | botocore==1.34.96 -------------------------------------------------------------------------------- /packages/cdk/lambda/list-index/index.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import boto3 4 | import requests 5 | from botocore.exceptions import NoCredentialsError, PartialCredentialsError 6 | from aws_lambda_powertools import Logger 7 | from opensearchpy import ( 8 | OpenSearch, 9 | RequestsHttpConnection, 10 | AWSV4SignerAuth, 11 | ) 12 | 13 | logger = Logger(service="ListIndex") 14 | 15 | def get_aos_client(endpoint): 16 | host = endpoint 17 | region = host.split(".")[1] 18 | 19 | service = "es" 20 | credentials = boto3.Session().get_credentials() 21 | auth = AWSV4SignerAuth(credentials, region, service) 22 | 23 | client = OpenSearch( 24 | hosts=[{"host": host, "port": 443}], 25 | http_auth=auth, 26 | use_ssl=True, 27 | verify_certs=True, 28 | connection_class=RequestsHttpConnection, 29 | pool_maxsize=20, 30 | ) 31 | 32 | return client 33 | 34 | def handler(event, context): 35 | endpoint = os.environ["OPENSEARCH_ENDPOINT"] 36 | client = get_aos_client(endpoint) 37 | 38 | headers = { 39 | 'Content-Type': 'application/json', 40 | 'Access-Control-Allow-Origin': '*', 41 | } 42 | 43 | try: 44 | index_list = client.cat.indices(format="json") 45 | indices = [idx['index'] for idx in index_list if not idx['index'].startswith('.')] 46 | return { 47 | 'statusCode': 200, 48 | 'headers': headers, 49 | 'body': json.dumps({ 50 | 'indices': indices 51 | }) 52 | } 53 | 54 | except Exception as e: 55 | logger.exception("Handler encountered an unexpected error") 56 | return { 57 | 'statusCode': 500, 58 | 'headers': headers, 59 | 'body': json.dumps({ 60 | 'error': 'Internal Server Error' 61 | }) 62 | } -------------------------------------------------------------------------------- /packages/cdk/lambda/list-index/requirements.txt: -------------------------------------------------------------------------------- 1 | opensearch-py==2.5.0 2 | aws-lambda-powertools==2.37.0 3 | boto3==1.34.96 4 | botocore==1.34.96 -------------------------------------------------------------------------------- /packages/cdk/lambda/search-documents/index.py: -------------------------------------------------------------------------------- 1 | from aws_lambda_powertools import Logger 2 | from opensearchpy import ( 3 | OpenSearch, 4 | RequestsHttpConnection, 5 | AWSV4SignerAuth, 6 | ) 7 | import boto3 8 | import json 9 | import os 10 | 11 | logger = Logger(service="SearchDocuments") 12 | bedrock_runtime = boto3.client( 13 | service_name="bedrock-runtime", region_name=os.environ["BEDROCK_REGION"] 14 | ) 15 | 16 | 17 | def get_vector(client, text, index_name): 18 | # モデル ID を取得 19 | model_id = client.indices.get(index=index_name)[index_name]["mappings"][ 20 | "_meta" 21 | ]["model_id"] 22 | 23 | if "cohere" in model_id: 24 | body = json.dumps( 25 | { 26 | "texts": [text], 27 | "input_type": "search_query", 28 | "embedding_types": ["float"], 29 | } 30 | ) 31 | query_response = bedrock_runtime.invoke_model( 32 | body=body, 33 | modelId=model_id, 34 | accept="*/*", 35 | contentType="application/json", 36 | ) 37 | vector = json.loads(query_response["body"].read()).get("embeddings")[ 38 | "float" 39 | ][0] 40 | else: 41 | 42 | # Bedrock のモデルからベクトルを取得 43 | query_response = bedrock_runtime.invoke_model( 44 | body=json.dumps({"inputText": text}), 45 | modelId=model_id, 46 | accept="application/json", 47 | contentType="application/json", 48 | ) 49 | 50 | vector = json.loads(query_response["body"].read()).get("embedding") 51 | 52 | return vector 53 | 54 | 55 | def find_similar_docs(client, search_query, index_name, search_pipeline=None): 56 | if search_pipeline: 57 | results = client.search( 58 | index=index_name, body=search_query, search_pipeline=search_pipeline 59 | ) 60 | else: 61 | results = client.search(index=index_name, body=search_query) 62 | 63 | search_results = [] 64 | for hit in results["hits"]["hits"]: 65 | search_results.append( 66 | { 67 | "text": hit["fields"]["keyword"][0], 68 | "score": hit["_score"], 69 | "service": hit["fields"]["service"][0], 70 | "docs_root": hit["fields"]["docs_root"][0], 71 | "doc_name": hit["fields"]["doc_name"][0], 72 | } 73 | ) 74 | return search_results 75 | 76 | 77 | def find_similar_docs_keyword(client, text, index_name, search_result_unit): 78 | search_query = { 79 | "size": 5, 80 | "_source": False, 81 | "fields": ["keyword", "service", "docs_root", "doc_name"], 82 | "query": {"match": {"keyword": {"query": text}}}, 83 | } 84 | if search_result_unit == "document": 85 | search_pipeline = "collapse-search-pipeline" 86 | elif search_result_unit == "chunk": 87 | search_pipeline = None 88 | else: 89 | raise ValueError("Invalid search result unit") 90 | return find_similar_docs(client, search_query, index_name, search_pipeline) 91 | 92 | 93 | def find_similar_docs_vector(client, vector, index_name, search_result_unit): 94 | search_query = { 95 | "size": 5, 96 | "_source": False, 97 | "fields": ["keyword", "service", "docs_root", "doc_name"], 98 | "query": {"knn": {"vector": {"vector": vector, "k": 5}}}, 99 | } 100 | if search_result_unit == "document": 101 | search_pipeline = "collapse-search-pipeline" 102 | elif search_result_unit == "chunk": 103 | search_pipeline = None 104 | else: 105 | raise ValueError("Invalid search result unit") 106 | return find_similar_docs(client, search_query, index_name, search_pipeline) 107 | 108 | 109 | def find_similar_docs_hybrid( 110 | client, vector, text, index_name, search_result_unit 111 | ): 112 | search_query = { 113 | "size": 5, 114 | "_source": False, 115 | "fields": ["keyword", "service", "docs_root", "doc_name"], 116 | "query": { 117 | "hybrid": { 118 | "queries": [ 119 | {"match": {"keyword": {"query": text}}}, 120 | {"knn": {"vector": {"vector": vector, "k": 5}}}, 121 | ] 122 | } 123 | }, 124 | } 125 | 126 | if search_result_unit == "document": 127 | search_pipeline = "collapse-hybrid-search-pipeline" 128 | elif search_result_unit == "chunk": 129 | search_pipeline = "hybrid-search-pipeline" 130 | else: 131 | raise ValueError("Invalid search result unit") 132 | return find_similar_docs(client, search_query, index_name, search_pipeline) 133 | 134 | 135 | def get_aos_client(endpoint): 136 | host = endpoint 137 | region = host.split(".")[1] 138 | 139 | service = "es" 140 | credentials = boto3.Session().get_credentials() 141 | auth = AWSV4SignerAuth(credentials, region, service) 142 | 143 | client = OpenSearch( 144 | hosts=[{"host": host, "port": 443}], 145 | http_auth=auth, 146 | use_ssl=True, 147 | verify_certs=True, 148 | connection_class=RequestsHttpConnection, 149 | pool_maxsize=20, 150 | ) 151 | 152 | return client 153 | 154 | 155 | def handler(event, context): 156 | endpoint = os.environ["OPENSEARCH_ENDPOINT"] 157 | 158 | body = json.loads(event["body"]) 159 | client = get_aos_client(endpoint) 160 | 161 | index_name = body["indexName"] 162 | text = body["text"] 163 | search_method = body["searchMethod"] 164 | search_result_unit = body["searchResultUnit"] 165 | 166 | headers = { 167 | "Content-Type": "application/json", 168 | "Access-Control-Allow-Origin": "*", 169 | } 170 | 171 | try: 172 | if search_method == "hybrid": 173 | vector = get_vector(client, text, index_name) 174 | search_results = find_similar_docs_hybrid( 175 | client, vector, text, index_name, search_result_unit 176 | ) 177 | 178 | elif search_method == "vector": 179 | vector = get_vector(client, text, index_name) 180 | search_results = find_similar_docs_vector( 181 | client, vector, index_name, search_result_unit 182 | ) 183 | 184 | elif search_method == "keyword": 185 | search_results = find_similar_docs_keyword( 186 | client, text, index_name, search_result_unit 187 | ) 188 | 189 | else: 190 | return { 191 | "statusCode": 400, 192 | "headers": headers, 193 | "body": json.dumps({"error": "invalid search method"}), 194 | } 195 | 196 | return { 197 | "statusCode": 200, 198 | "headers": headers, 199 | "body": json.dumps(search_results, ensure_ascii=False), 200 | } 201 | 202 | except ValueError as e: 203 | logger.error(f"Handler encountered a ValueError: {e}") 204 | return { 205 | "statusCode": 400, 206 | "headers": headers, 207 | "body": json.dumps({"error": str(e)}), 208 | } 209 | 210 | except Exception as e: 211 | logger.exception("Handler encountered an unexpected error") 212 | return { 213 | "statusCode": 500, 214 | "headers": headers, 215 | "body": json.dumps({"error": "Internal server error"}), 216 | } 217 | -------------------------------------------------------------------------------- /packages/cdk/lambda/search-documents/requirements.txt: -------------------------------------------------------------------------------- 1 | opensearch-py==2.5.0 2 | aws-lambda-powertools==2.37.0 3 | boto3==1.34.96 4 | botocore==1.34.96 -------------------------------------------------------------------------------- /packages/cdk/lib/constructs/api.ts: -------------------------------------------------------------------------------- 1 | import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha'; 2 | import { Duration } from 'aws-cdk-lib'; 3 | import { 4 | AuthorizationType, 5 | CognitoUserPoolsAuthorizer, 6 | Cors, 7 | LambdaIntegration, 8 | MethodLoggingLevel, 9 | ResponseType, 10 | RestApi, 11 | } from 'aws-cdk-lib/aws-apigateway'; 12 | import { UserPool } from 'aws-cdk-lib/aws-cognito'; 13 | import { PolicyStatement } from 'aws-cdk-lib/aws-iam'; 14 | import { Runtime } from 'aws-cdk-lib/aws-lambda'; 15 | import { Domain } from 'aws-cdk-lib/aws-opensearchservice'; 16 | import { Construct } from 'constructs'; 17 | 18 | export interface ApiProps { 19 | userPool: UserPool; 20 | opensearchDomain: Domain; 21 | bedrockRegion: string; 22 | } 23 | 24 | export class Api extends Construct { 25 | public readonly api: RestApi; 26 | 27 | constructor(scope: Construct, id: string, props: ApiProps) { 28 | super(scope, id); 29 | 30 | // Lambda 31 | const searchDocuments = new PythonFunction(this, 'SearchDocuments', { 32 | entry: 'lambda/search-documents', 33 | runtime: Runtime.PYTHON_3_12, 34 | timeout: Duration.seconds(15), 35 | initialPolicy: [ 36 | new PolicyStatement({ 37 | actions: ['bedrock:InvokeModel'], 38 | resources: ['*'], 39 | }), 40 | new PolicyStatement({ 41 | actions: ['es:ESHttpPost', 'es:ESHttpGet'], 42 | resources: [`${props.opensearchDomain.domainArn}/*`], 43 | }), 44 | ], 45 | environment: { 46 | OPENSEARCH_ENDPOINT: props.opensearchDomain.domainEndpoint, 47 | BEDROCK_REGION: props.bedrockRegion, 48 | }, 49 | }); 50 | 51 | const listIndex = new PythonFunction(this, 'ListIndex', { 52 | entry: 'lambda/list-index', 53 | runtime: Runtime.PYTHON_3_12, 54 | timeout: Duration.seconds(15), 55 | initialPolicy: [ 56 | new PolicyStatement({ 57 | actions: ['es:ESHttpPost', 'es:ESHttpGet'], 58 | resources: [`${props.opensearchDomain.domainArn}/*`], 59 | }), 60 | ], 61 | environment: { 62 | OPENSEARCH_ENDPOINT: props.opensearchDomain.domainEndpoint, 63 | }, 64 | }); 65 | 66 | // Cognito Authorizer for API Gateway 67 | const authorizer = new CognitoUserPoolsAuthorizer( 68 | this, 69 | 'CognitoAuthorizer', 70 | { 71 | cognitoUserPools: [props.userPool], 72 | } 73 | ); 74 | 75 | // Api Gateway 76 | const api = new RestApi(this, 'RestApi', { 77 | defaultCorsPreflightOptions: { 78 | allowOrigins: Cors.ALL_ORIGINS, 79 | allowMethods: Cors.ALL_METHODS, 80 | }, 81 | cloudWatchRole: true, 82 | deployOptions: { 83 | dataTraceEnabled: true, 84 | loggingLevel: MethodLoggingLevel.INFO, 85 | }, 86 | defaultMethodOptions: { authorizationType: AuthorizationType.IAM }, 87 | }); 88 | api.addGatewayResponse('Api4XX', { 89 | type: ResponseType.DEFAULT_4XX, 90 | responseHeaders: { 91 | 'Access-Control-Allow-Origin': "'*'", 92 | }, 93 | }); 94 | 95 | api.addGatewayResponse('Api5XX', { 96 | type: ResponseType.DEFAULT_5XX, 97 | responseHeaders: { 98 | 'Access-Control-Allow-Origin': "'*'", 99 | }, 100 | }); 101 | const searchResource = api.root.addResource('search'); 102 | searchResource.addMethod('POST', new LambdaIntegration(searchDocuments), { 103 | authorizer, 104 | authorizationType: AuthorizationType.COGNITO, 105 | }); 106 | const indexResource = api.root.addResource('index'); 107 | indexResource.addMethod('GET', new LambdaIntegration(listIndex), { 108 | authorizer, 109 | authorizationType: AuthorizationType.COGNITO, 110 | }); 111 | 112 | this.api = api; 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /packages/cdk/lib/constructs/cognito.ts: -------------------------------------------------------------------------------- 1 | import { Aws, RemovalPolicy } from 'aws-cdk-lib'; 2 | import { 3 | CfnIdentityPool, 4 | UserPool, 5 | UserPoolClient, 6 | } from 'aws-cdk-lib/aws-cognito'; 7 | import { Construct } from 'constructs'; 8 | 9 | export interface CognitoProps { 10 | selfSignUpEnabled: boolean; 11 | } 12 | 13 | export class Cognito extends Construct { 14 | readonly userPool: UserPool; 15 | readonly userPoolClient: UserPoolClient; 16 | readonly identityPool: CfnIdentityPool; 17 | 18 | constructor(scope: Construct, id: string, props: CognitoProps) { 19 | super(scope, id); 20 | 21 | // Cognito UserPool 22 | const userPool = new UserPool(this, 'UserPool', { 23 | // SignUp 24 | selfSignUpEnabled: props.selfSignUpEnabled, 25 | userInvitation: { 26 | emailSubject: 'GenerativeAI Japanese Search Sample User Registration', 27 | emailBody: 'Hello {username}, Your temporary password is {####}', 28 | smsMessage: 'Hello {username}, Your temporary password is {####}', 29 | }, 30 | // SignIn 31 | signInAliases: { 32 | email: true, 33 | }, 34 | signInCaseSensitive: false, // Recommended to be incasesensitive 35 | removalPolicy: RemovalPolicy.DESTROY, 36 | deletionProtection: false, 37 | }); 38 | 39 | // Cognito UserPool AppClient for web frontend ui 40 | const appClient = userPool.addClient('Client'); 41 | 42 | // Cognito Domain 43 | userPool.addDomain('CognitoDomain', { 44 | cognitoDomain: { 45 | domainPrefix: `generative-ai-japanese-search-${Aws.ACCOUNT_ID}`, // must be unique globally 46 | }, 47 | }); 48 | 49 | // Cognito IdentityPool 50 | const identityPool = new CfnIdentityPool(this, 'IdentityPool', { 51 | allowUnauthenticatedIdentities: false, 52 | cognitoIdentityProviders: [ 53 | { 54 | clientId: appClient.userPoolClientId, 55 | providerName: userPool.userPoolProviderName, 56 | }, 57 | ], 58 | }); 59 | 60 | this.userPool = userPool; 61 | this.userPoolClient = appClient; 62 | this.identityPool = identityPool; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /packages/cdk/lib/constructs/front.ts: -------------------------------------------------------------------------------- 1 | import { 2 | CloudFrontToS3, 3 | CloudFrontToS3Props, 4 | } from '@aws-solutions-constructs/aws-cloudfront-s3'; 5 | import { Aws, CfnOutput, RemovalPolicy, StackProps } from 'aws-cdk-lib'; 6 | import { 7 | BlockPublicAccess, 8 | BucketEncryption, 9 | BucketProps, 10 | ObjectOwnership, 11 | } from 'aws-cdk-lib/aws-s3'; 12 | import { Construct } from 'constructs'; 13 | import { NodejsBuild } from 'deploy-time-build'; 14 | 15 | export interface FrontProps extends StackProps { 16 | apiEndpointUrl: string; 17 | userPoolId: string; 18 | userPoolClientId: string; 19 | identityPoolId: string; 20 | } 21 | 22 | export class Front extends Construct { 23 | constructor(scope: Construct, id: string, props: FrontProps) { 24 | super(scope, id); 25 | 26 | // CloudFront - S3 27 | const commonBucketProps: BucketProps = { 28 | blockPublicAccess: BlockPublicAccess.BLOCK_ALL, 29 | encryption: BucketEncryption.S3_MANAGED, 30 | autoDeleteObjects: true, 31 | removalPolicy: RemovalPolicy.DESTROY, 32 | objectOwnership: ObjectOwnership.OBJECT_WRITER, 33 | enforceSSL: true, 34 | }; 35 | 36 | const cloudFrontToS3Props: CloudFrontToS3Props = { 37 | insertHttpSecurityHeaders: false, 38 | loggingBucketProps: commonBucketProps, 39 | bucketProps: commonBucketProps, 40 | cloudFrontLoggingBucketProps: commonBucketProps, 41 | cloudFrontDistributionProps: { 42 | errorResponses: [ 43 | { 44 | httpStatus: 403, 45 | responseHttpStatus: 200, 46 | responsePagePath: '/index.html', 47 | }, 48 | { 49 | httpStatus: 404, 50 | responseHttpStatus: 200, 51 | responsePagePath: '/index.html', 52 | }, 53 | ], 54 | }, 55 | }; 56 | 57 | const { cloudFrontWebDistribution, s3BucketInterface } = new CloudFrontToS3( 58 | this, 59 | 'CloudFrontToS3', 60 | cloudFrontToS3Props 61 | ); 62 | 63 | // Build frontend 64 | new NodejsBuild(this, 'FrontBuild', { 65 | assets: [ 66 | { 67 | path: '../../', 68 | exclude: [ 69 | '.git', 70 | '.github', 71 | '.gitignore', 72 | '*.md', 73 | 'node_modules', 74 | 'packages/cdk/**/*', 75 | '!packages/cdk/cdk.json', 76 | 'packages/ui/dist', 77 | 'packages/ui/node_modules', 78 | 'packages/ui/dev-dist', 79 | ], 80 | }, 81 | ], 82 | destinationBucket: s3BucketInterface, 83 | distribution: cloudFrontWebDistribution, 84 | outputSourceDirectory: './packages/ui/dist', 85 | buildCommands: ['npm ci', 'npm -w packages/ui run build'], 86 | buildEnvironment: { 87 | VITE_AWS_REGION: Aws.REGION, 88 | VITE_API_ENDPOINT_URL: props.apiEndpointUrl, 89 | VITE_COGNITO_USER_POOL_ID: props.userPoolId, 90 | VITE_COGNITO_USER_POOL_CLIENT_ID: props.userPoolClientId, 91 | VITE_COGNITO_IDENTITY_POOL_ID: props.identityPoolId, 92 | }, 93 | }); 94 | 95 | new CfnOutput(this, 'FrontendUrl', { 96 | value: `https://${cloudFrontWebDistribution.distributionDomainName}`, 97 | description: 'Frontend URL', 98 | exportName: `FrontendUrl`, 99 | }); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /packages/cdk/lib/constructs/ingest-data-ecs.ts: -------------------------------------------------------------------------------- 1 | import { CfnOutput, RemovalPolicy } from 'aws-cdk-lib'; 2 | import * as ec2 from 'aws-cdk-lib/aws-ec2'; 3 | import { IVpc, Vpc } from 'aws-cdk-lib/aws-ec2'; 4 | import { Platform } from 'aws-cdk-lib/aws-ecr-assets'; 5 | import { 6 | Cluster, 7 | ContainerImage, 8 | CpuArchitecture, 9 | FargateTaskDefinition, 10 | LogDriver, 11 | OperatingSystemFamily, 12 | } from 'aws-cdk-lib/aws-ecs'; 13 | import { Effect, PolicyStatement } from 'aws-cdk-lib/aws-iam'; 14 | import { LogGroup } from 'aws-cdk-lib/aws-logs'; 15 | import { Domain } from 'aws-cdk-lib/aws-opensearchservice'; 16 | import { Bucket } from 'aws-cdk-lib/aws-s3'; 17 | import * as s3deploy from 'aws-cdk-lib/aws-s3-deployment'; 18 | import { Construct } from 'constructs'; 19 | import { ServiceLinkedRole } from 'upsert-slr'; 20 | 21 | export interface IngestDataProps { 22 | documentBucket: Bucket; 23 | vpcId?: string; 24 | opensearchDomain: Domain; 25 | bedrockRegion: string; 26 | } 27 | 28 | export class IngestData extends Construct { 29 | constructor(scope: Construct, id: string, props: IngestDataProps) { 30 | super(scope, id); 31 | 32 | const s3d = new s3deploy.BucketDeployment(this, `DeployDocuments`, { 33 | sources: [s3deploy.Source.asset('../../docs.zip')], 34 | destinationBucket: props.documentBucket, 35 | contentType: 'text/plain; charset=utf-8', 36 | }); 37 | 38 | // VPC 39 | let vpc: IVpc; 40 | if (props.vpcId) { 41 | vpc = Vpc.fromLookup(this, 'Vpc', { vpcId: props.vpcId }); 42 | } else { 43 | vpc = new Vpc(this, 'Vpc', { 44 | maxAzs: 2, 45 | }); 46 | } 47 | // ECS 48 | const cluster = new Cluster(this, 'Cluster', { 49 | vpc: vpc, 50 | containerInsights: true, 51 | }); 52 | 53 | new ServiceLinkedRole(this, 'EcsServiceLinkedRole', { 54 | awsServiceName: 'ecs.amazonaws.com', 55 | }); 56 | 57 | const taskDefinition = new FargateTaskDefinition(this, 'TaskDefinition', { 58 | cpu: 2048, 59 | memoryLimitMiB: 4096, 60 | runtimePlatform: { 61 | cpuArchitecture: CpuArchitecture.X86_64, 62 | operatingSystemFamily: OperatingSystemFamily.LINUX, 63 | }, 64 | }); 65 | 66 | taskDefinition.addToTaskRolePolicy( 67 | new PolicyStatement({ 68 | effect: Effect.ALLOW, 69 | actions: ['s3:GetObject'], 70 | resources: [props.documentBucket.arnForObjects('*')], 71 | }) 72 | ); 73 | 74 | taskDefinition.addToTaskRolePolicy( 75 | new PolicyStatement({ 76 | actions: ['bedrock:InvokeModel'], 77 | resources: ['*'], 78 | }) 79 | ); 80 | taskDefinition.addToTaskRolePolicy( 81 | new PolicyStatement({ 82 | actions: ['es:ESHttpPost', 'es:ESHttpPut'], 83 | resources: [`${props.opensearchDomain.domainArn}/*`], 84 | }) 85 | ); 86 | 87 | const taskSg = new ec2.SecurityGroup(this, 'TaskSecurityGroup', { 88 | vpc: vpc, 89 | allowAllOutbound: true, 90 | }); 91 | 92 | props.opensearchDomain.grantIndexReadWrite('*', taskDefinition.taskRole); 93 | props.documentBucket.grantReadWrite(taskDefinition.taskRole); 94 | 95 | const taskLogGroup = new LogGroup(this, 'TaskLogGroup', { 96 | removalPolicy: RemovalPolicy.DESTROY, 97 | }); 98 | 99 | const container = taskDefinition.addContainer('Container', { 100 | image: ContainerImage.fromAsset('ecs/ingest-data', { 101 | platform: Platform.LINUX_AMD64, 102 | }), 103 | logging: LogDriver.awsLogs({ 104 | streamPrefix: 'ingest-data', 105 | logGroup: taskLogGroup, 106 | }), 107 | environment: { 108 | OPENSEARCH_ENDPOINT: props.opensearchDomain.domainEndpoint, 109 | OPENSEARCH_INDEX_NAME: '', 110 | BEDROCK_REGION: props.bedrockRegion, 111 | EMBED_DIMENSION: '1024', 112 | EMBED_MODEL_ID: '', 113 | DOCUMENT_S3_URI: `s3://${props.documentBucket.bucketName}/docs`, 114 | }, 115 | }); 116 | 117 | taskLogGroup.grantWrite(container.taskDefinition.executionRole!); 118 | 119 | new CfnOutput(this, 'ecsClusterName', { 120 | value: cluster.clusterName, 121 | description: 'ECS cluster name', 122 | exportName: `EcsClusterName`, 123 | }); 124 | 125 | new CfnOutput(this, 'ecsTaskDefinitionARN', { 126 | value: taskDefinition.taskDefinitionArn, 127 | description: 'ECS task definition ARN', 128 | exportName: `EcsTaskDefinitionARN`, 129 | }); 130 | 131 | new CfnOutput(this, 'ecsSubnetID', { 132 | value: vpc.privateSubnets[0].subnetId, 133 | description: 'Subnet ID for ECS cluster', 134 | exportName: `EcsSubnetID`, 135 | }); 136 | 137 | new CfnOutput(this, 'ecsSecurityGroupID', { 138 | value: taskSg.securityGroupId, 139 | description: 'Security group ID for ECS task', 140 | exportName: `EcsSecurityGroupID`, 141 | }); 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /packages/cdk/lib/constructs/opensearch.ts: -------------------------------------------------------------------------------- 1 | import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha'; 2 | import { CustomResource, Duration, RemovalPolicy } from 'aws-cdk-lib'; 3 | import { CfnIdentityPoolRoleAttachment } from 'aws-cdk-lib/aws-cognito'; 4 | import { EbsDeviceVolumeType } from 'aws-cdk-lib/aws-ec2'; 5 | import { 6 | ManagedPolicy, 7 | PolicyStatement, 8 | Role, 9 | ServicePrincipal, 10 | WebIdentityPrincipal, 11 | } from 'aws-cdk-lib/aws-iam'; 12 | import { Runtime } from 'aws-cdk-lib/aws-lambda'; 13 | import { Domain, EngineVersion } from 'aws-cdk-lib/aws-opensearchservice'; 14 | import { Bucket } from 'aws-cdk-lib/aws-s3'; 15 | import { Provider } from 'aws-cdk-lib/custom-resources'; 16 | import { Construct } from 'constructs'; 17 | 18 | export interface OpensearchProps { 19 | userPoolId: string; 20 | identityPoolId: string; 21 | documentBucket: Bucket; 22 | } 23 | 24 | export class Opensearch extends Construct { 25 | readonly opensearchDomain: Domain; 26 | constructor(scope: Construct, id: string, props: OpensearchProps) { 27 | super(scope, id); 28 | 29 | // Role to be assumed by OpenSearch Service for using Cognito as Auth Provider for OpenSerach Dashboards 30 | const cognitoConfigurationRole = new Role( 31 | this, 32 | 'CognitoConfigurationRole', 33 | { 34 | assumedBy: new ServicePrincipal('opensearchservice.amazonaws.com'), 35 | } 36 | ); 37 | cognitoConfigurationRole.addManagedPolicy( 38 | ManagedPolicy.fromAwsManagedPolicyName( 39 | 'AmazonOpenSearchServiceCognitoAccess' 40 | ) 41 | ); 42 | 43 | // Role for Authenticated Identity in Cognito Identity Pool 44 | const cognitoAuthRole = new Role(this, 'CognitoAuthRole', { 45 | assumedBy: new WebIdentityPrincipal('cognito-identity.amazonaws.com', { 46 | StringEquals: { 47 | 'cognito-identity.amazonaws.com:aud': props.identityPoolId, 48 | }, 49 | 'ForAnyValue:StringLike': { 50 | 'cognito-identity.amazonaws.com:amr': 'authenticated', 51 | }, 52 | }), 53 | }); 54 | 55 | cognitoAuthRole.addToPolicy( 56 | new PolicyStatement({ 57 | actions: ['s3:GetObject', 's3:ListBucket'], 58 | resources: [ 59 | props.documentBucket.bucketArn, 60 | `${props.documentBucket.bucketArn}/*`, 61 | ], 62 | }) 63 | ); 64 | // cognitoAuthRole.addToPolicy( 65 | // new PolicyStatement({ 66 | // actions: ['cognito-identity:GetCredentialsForIdentity'], 67 | // resources: ['*'], 68 | // }) 69 | // ); 70 | 71 | // Cognito Identity Pool Role Attachment 72 | new CfnIdentityPoolRoleAttachment(this, 'IdentityPoolRoleAttachment', { 73 | identityPoolId: props.identityPoolId, 74 | roles: { 75 | authenticated: cognitoAuthRole.roleArn, 76 | }, 77 | }); 78 | 79 | // OpenSearch Service Domain 80 | const domain = new Domain(this, 'Domain', { 81 | version: EngineVersion.OPENSEARCH_2_13, 82 | removalPolicy: RemovalPolicy.DESTROY, 83 | ebs: { 84 | volumeSize: 100, 85 | volumeType: EbsDeviceVolumeType.GP3, 86 | }, 87 | nodeToNodeEncryption: true, 88 | encryptionAtRest: { 89 | enabled: true, 90 | }, 91 | advancedOptions: { 'rest.action.multi.allow_explicit_index': 'true' }, 92 | zoneAwareness: { 93 | enabled: false, // if enabled, nodes and replica index shards are placed in multi-AZ 94 | }, 95 | capacity: { 96 | dataNodes: 1, 97 | dataNodeInstanceType: 'or1.large.search', 98 | multiAzWithStandbyEnabled: false, 99 | }, 100 | cognitoDashboardsAuth: { 101 | role: cognitoConfigurationRole, 102 | identityPoolId: props.identityPoolId, 103 | userPoolId: props.userPoolId, 104 | }, 105 | }); 106 | 107 | // Allow Authenticated Role to es:ESHttp* for dashboard access 108 | domain.addAccessPolicies( 109 | new PolicyStatement({ 110 | actions: ['es:ESHttp*'], 111 | principals: [cognitoAuthRole], 112 | resources: [domain.domainArn + '/*'], 113 | }) 114 | ); 115 | 116 | const associatePackageFunction = new PythonFunction( 117 | this, 118 | 'AssociatePackageFunction', 119 | { 120 | entry: 'custom-resource/associate-package', 121 | runtime: Runtime.PYTHON_3_12, 122 | initialPolicy: [ 123 | new PolicyStatement({ 124 | actions: [ 125 | 'es:AssociatePackage', 126 | 'es:ListDomainsForPackage', 127 | 'es:DescribeDomain', 128 | 'es:DescribePackages', 129 | ], 130 | resources: ['*'], 131 | }), 132 | ], 133 | timeout: Duration.minutes(15), 134 | } 135 | ); 136 | 137 | const associatePackageIsCompleteFunction = new PythonFunction( 138 | this, 139 | 'AssociatePackageIsCompleteFunction', 140 | { 141 | entry: 'custom-resource/associate-package', 142 | handler: 'is_complete', 143 | runtime: Runtime.PYTHON_3_12, 144 | initialPolicy: [ 145 | new PolicyStatement({ 146 | actions: ['es:ListDomainsForPackage'], 147 | resources: ['*'], 148 | }), 149 | ], 150 | timeout: Duration.minutes(15), 151 | } 152 | ); 153 | 154 | const associatePacakgeProvider = new Provider( 155 | this, 156 | 'AssociatePacakgeProvider', 157 | { 158 | onEventHandler: associatePackageFunction, 159 | isCompleteHandler: associatePackageIsCompleteFunction, 160 | totalTimeout: Duration.minutes(30), 161 | } 162 | ); 163 | 164 | new CustomResource(this, 'AssociatePackageResource', { 165 | serviceToken: associatePacakgeProvider.serviceToken, 166 | properties: { 167 | DomainName: domain.domainName, 168 | }, 169 | }); 170 | 171 | this.opensearchDomain = domain; 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /packages/cdk/lib/constructs/s3bucket.ts: -------------------------------------------------------------------------------- 1 | import { CfnOutput, RemovalPolicy } from 'aws-cdk-lib'; 2 | import * as s3 from 'aws-cdk-lib/aws-s3'; 3 | import { Construct } from 'constructs'; 4 | 5 | export interface S3bucketsProps {} 6 | 7 | export class S3bucket extends Construct { 8 | readonly documentBucket: s3.Bucket; 9 | constructor(scope: Construct, id: string, props: S3bucketsProps) { 10 | super(scope, id); 11 | 12 | const documentBucket = new s3.Bucket(this, `documentBucket`, { 13 | blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, 14 | encryption: s3.BucketEncryption.S3_MANAGED, 15 | enforceSSL: true, 16 | removalPolicy: RemovalPolicy.DESTROY, 17 | autoDeleteObjects: true, 18 | }); 19 | this.documentBucket = documentBucket; 20 | 21 | new CfnOutput(this, 'documentBucketName', { 22 | value: documentBucket.bucketName, 23 | description: 'Document bucket name', 24 | exportName: `DocumentBucketName`, 25 | }); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /packages/cdk/lib/constructs/util-lambda.ts: -------------------------------------------------------------------------------- 1 | import { PythonFunction } from '@aws-cdk/aws-lambda-python-alpha'; 2 | import { CfnOutput, Duration } from 'aws-cdk-lib'; 3 | import * as iam from 'aws-cdk-lib/aws-iam'; 4 | import * as lambda from 'aws-cdk-lib/aws-lambda'; 5 | import { Domain } from 'aws-cdk-lib/aws-opensearchservice'; 6 | 7 | import { Construct } from 'constructs'; 8 | 9 | export interface UtilLambdaProps { 10 | opensearchDomain: Domain; 11 | bedrockRegion: string; 12 | } 13 | 14 | export class UtilLambda extends Construct { 15 | constructor(scope: Construct, id: string, props: UtilLambdaProps) { 16 | super(scope, id); 17 | 18 | const deleteIndexLambdaRole = new iam.Role(this, `deleteIndexLambdaRole`, { 19 | assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'), 20 | }); 21 | deleteIndexLambdaRole.addToPolicy( 22 | new iam.PolicyStatement({ 23 | actions: [ 24 | 'logs:CreateLogGroup', 25 | 'logs:CreateLogStream', 26 | 'logs:PutLogEvents', 27 | ], 28 | resources: ['arn:aws:logs:*:*:*'], 29 | }) 30 | ); 31 | 32 | deleteIndexLambdaRole.addToPolicy( 33 | new iam.PolicyStatement({ 34 | actions: ['es:ESHttpDelete'], 35 | resources: [`${props.opensearchDomain.domainArn}/*`], 36 | }) 37 | ); 38 | 39 | const deleteIndexFunction = new PythonFunction(this, `DeleteIndex`, { 40 | runtime: lambda.Runtime.PYTHON_3_12, 41 | entry: 'lambda/delete-opensearch-index/', 42 | timeout: Duration.seconds(60 * 5), 43 | retryAttempts: 1, 44 | role: deleteIndexLambdaRole, 45 | environment: { 46 | OPENSEARCH_ENDPOINT: props.opensearchDomain.domainEndpoint, 47 | INDEX_NAME: '', 48 | }, 49 | }); 50 | 51 | new CfnOutput(this, 'deleteIndexFunctionName', { 52 | value: deleteIndexFunction.functionName, 53 | description: 'Delete Index lambda function name', 54 | exportName: `DeleteIndexFunction`, 55 | }); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /packages/cdk/lib/opensearch-intelligent-search-jp-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from 'aws-cdk-lib'; 2 | import { Construct } from 'constructs'; 3 | import { Api } from './constructs/api'; 4 | import { Cognito } from './constructs/cognito'; 5 | import { Front } from './constructs/front'; 6 | import { IngestData } from './constructs/ingest-data-ecs'; 7 | 8 | import { Opensearch } from './constructs/opensearch'; 9 | import { S3bucket } from './constructs/s3bucket'; 10 | import { UtilLambda } from './constructs/util-lambda'; 11 | 12 | export class OpensearchIntelligentSearchJpStack extends cdk.Stack { 13 | constructor(scope: Construct, id: string, props?: cdk.StackProps) { 14 | super(scope, id, props); 15 | 16 | const bedrockRegion = this.node.tryGetContext('bedrockRegion'); 17 | const selfSignUpEnabled = this.node.tryGetContext('selfSignUpEnabled'); 18 | 19 | const s3bucket = new S3bucket(this, 'S3bucket', {}); 20 | const cognito = new Cognito(this, 'Cognito', { 21 | selfSignUpEnabled, 22 | }); 23 | const opensearch = new Opensearch(this, 'OpenSearch', { 24 | userPoolId: cognito.userPool.userPoolId, 25 | identityPoolId: cognito.identityPool.ref, 26 | documentBucket: s3bucket.documentBucket, 27 | }); 28 | const utilLambda = new UtilLambda(this, 'UtilLambda', { 29 | opensearchDomain: opensearch.opensearchDomain, 30 | bedrockRegion: bedrockRegion, 31 | }); 32 | 33 | const api = new Api(this, 'Api', { 34 | userPool: cognito.userPool, 35 | opensearchDomain: opensearch.opensearchDomain, 36 | bedrockRegion: bedrockRegion, 37 | }); 38 | 39 | const ingestData = new IngestData(this, 'IngestData', { 40 | documentBucket: s3bucket.documentBucket, 41 | opensearchDomain: opensearch.opensearchDomain, 42 | bedrockRegion: bedrockRegion, 43 | }); 44 | 45 | const front = new Front(this, 'Front', { 46 | apiEndpointUrl: api.api.url, 47 | userPoolId: cognito.userPool.userPoolId, 48 | userPoolClientId: cognito.userPoolClient.userPoolClientId, 49 | identityPoolId: cognito.identityPool.ref, 50 | }); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /packages/cdk/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cdk", 3 | "private": true, 4 | "version": "0.1.0", 5 | "bin": { 6 | "cdk": "bin/cdk.js" 7 | }, 8 | "scripts": { 9 | "build": "tsc", 10 | "watch": "tsc -w", 11 | "test": "jest", 12 | "cdk": "cdk" 13 | }, 14 | "devDependencies": { 15 | "@types/aws-lambda": "^8.10.137", 16 | "@types/jest": "^29.5.12", 17 | "@types/node": "20.12.7", 18 | "aws-cdk": "^2.148.1", 19 | "esbuild": "^0.20.2", 20 | "jest": "^29.7.0", 21 | "ts-jest": "^29.1.2", 22 | "ts-node": "^10.9.2", 23 | "typescript": "~5.4.5" 24 | }, 25 | "dependencies": { 26 | "@aws-cdk/aws-lambda-python-alpha": "^2.147.0-alpha.0", 27 | "@aws-solutions-constructs/aws-cloudfront-s3": "^2.56.0", 28 | "aws-cdk-lib": "^2.147.0", 29 | "constructs": "^10.0.0", 30 | "deploy-time-build": "^0.3.21", 31 | "source-map-support": "^0.5.21", 32 | "upsert-slr": "^1.0.4" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /packages/cdk/test/cdk.test.ts: -------------------------------------------------------------------------------- 1 | import { App } from 'aws-cdk-lib'; 2 | import { Template } from 'aws-cdk-lib/assertions'; 3 | import { OpensearchIntelligentSearchJpStack } from '../lib/opensearch-intelligent-search-jp-stack'; 4 | 5 | test('Snapshot Test', () => { 6 | const app = new App(); 7 | const stack = new OpensearchIntelligentSearchJpStack( 8 | app, 9 | 'OpensearchIntelligentSearchJpStack' 10 | ); 11 | const template = Template.fromStack(stack); 12 | expect(template).toMatchSnapshot(); 13 | }); 14 | -------------------------------------------------------------------------------- /packages/cdk/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "commonjs", 5 | "lib": ["es2020", "dom"], 6 | "declaration": true, 7 | "strict": true, 8 | "noImplicitAny": true, 9 | "strictNullChecks": true, 10 | "noImplicitThis": true, 11 | "alwaysStrict": true, 12 | "noUnusedLocals": false, 13 | "noUnusedParameters": false, 14 | "noImplicitReturns": true, 15 | "noFallthroughCasesInSwitch": false, 16 | "inlineSourceMap": true, 17 | "inlineSources": true, 18 | "experimentalDecorators": true, 19 | "strictPropertyInitialization": false 20 | // "typeRoots": [ 21 | // "./node_modules/@types" 22 | // ] 23 | }, 24 | "exclude": ["node_modules", "cdk.out"] 25 | } 26 | -------------------------------------------------------------------------------- /packages/ui/.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | root: true, 3 | env: { browser: true, es2020: true }, 4 | extends: [ 5 | 'eslint:recommended', 6 | 'plugin:@typescript-eslint/recommended', 7 | 'plugin:react-hooks/recommended', 8 | 'prettier', 9 | ], 10 | ignorePatterns: ['dist', '.eslintrc.cjs'], 11 | parser: '@typescript-eslint/parser', 12 | plugins: ['react-refresh'], 13 | rules: { 14 | 'react-refresh/only-export-components': [ 15 | 'warn', 16 | { allowConstantExport: true }, 17 | ], 18 | 'no-unused-vars': 'warn', 19 | '@typescript-eslint/no-unused-vars': 'warn', 20 | }, 21 | }; 22 | -------------------------------------------------------------------------------- /packages/ui/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /packages/ui/README.md: -------------------------------------------------------------------------------- 1 | # React + TypeScript + Vite 2 | 3 | This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. 4 | 5 | Currently, two official plugins are available: 6 | 7 | - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh 8 | - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh 9 | 10 | ## Expanding the ESLint configuration 11 | 12 | If you are developing a production application, we recommend updating the configuration to enable type aware lint rules: 13 | 14 | - Configure the top-level `parserOptions` property like this: 15 | 16 | ```js 17 | export default { 18 | // other rules... 19 | parserOptions: { 20 | ecmaVersion: 'latest', 21 | sourceType: 'module', 22 | project: ['./tsconfig.json', './tsconfig.node.json'], 23 | tsconfigRootDir: __dirname, 24 | }, 25 | } 26 | ``` 27 | 28 | - Replace `plugin:@typescript-eslint/recommended` to `plugin:@typescript-eslint/recommended-type-checked` or `plugin:@typescript-eslint/strict-type-checked` 29 | - Optionally add `plugin:@typescript-eslint/stylistic-type-checked` 30 | - Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and add `plugin:react/recommended` & `plugin:react/jsx-runtime` to the `extends` list 31 | -------------------------------------------------------------------------------- /packages/ui/components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "default", 4 | "rsc": false, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.js", 8 | "css": "src/index.css", 9 | "baseColor": "slate", 10 | "cssVariables": true, 11 | "prefix": "" 12 | }, 13 | "aliases": { 14 | "components": "@/components", 15 | "utils": "@/lib/utils" 16 | } 17 | } -------------------------------------------------------------------------------- /packages/ui/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | opensearch-intelligent-search-jp 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /packages/ui/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ui", 3 | "private": true, 4 | "version": "0.0.0", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "tsc && vite build", 9 | "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0", 10 | "preview": "vite preview" 11 | }, 12 | "dependencies": { 13 | "@aws-amplify/ui-react": "^6.1.11", 14 | "@aws-sdk/client-cognito-identity": "^3.577.0", 15 | "@aws-sdk/client-s3": "^3.577.0", 16 | "@aws-sdk/credential-providers": "^3.577.0", 17 | "@aws-sdk/s3-request-presigner": "^3.577.0", 18 | "@radix-ui/react-select": "^2.0.0", 19 | "@radix-ui/react-slot": "^1.0.2", 20 | "aws-amplify": "^6.3.2", 21 | "axios": "^1.6.8", 22 | "class-variance-authority": "^0.7.0", 23 | "clsx": "^2.1.1", 24 | "lucide-react": "^0.378.0", 25 | "react": "^18.2.0", 26 | "react-dom": "^18.2.0", 27 | "react-icons": "^5.2.1", 28 | "tailwind-merge": "^2.3.0", 29 | "tailwindcss-animate": "^1.0.7" 30 | }, 31 | "devDependencies": { 32 | "@types/node": "^20.12.12", 33 | "@types/react": "^18.2.66", 34 | "@types/react-dom": "^18.2.22", 35 | "@typescript-eslint/eslint-plugin": "^7.2.0", 36 | "@typescript-eslint/parser": "^7.2.0", 37 | "@vitejs/plugin-react": "^4.2.1", 38 | "autoprefixer": "^10.4.19", 39 | "eslint": "^8.57.0", 40 | "eslint-plugin-react-hooks": "^4.6.0", 41 | "eslint-plugin-react-refresh": "^0.4.6", 42 | "postcss": "^8.4.38", 43 | "tailwindcss": "^3.4.3", 44 | "typescript": "^5.2.2", 45 | "vite": "^5.2.0" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /packages/ui/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /packages/ui/public/vite.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/ui/src/App.css: -------------------------------------------------------------------------------- 1 | @import '@aws-amplify/ui-react/styles.css'; 2 | -------------------------------------------------------------------------------- /packages/ui/src/App.tsx: -------------------------------------------------------------------------------- 1 | import { Authenticator, useAuthenticator } from '@aws-amplify/ui-react'; 2 | import './App.css'; 3 | import { Button } from './components/ui/button'; 4 | import SearchPage from './pages/SearchPage'; 5 | 6 | function App() { 7 | const { signOut, authStatus } = useAuthenticator((context) => [context.user]); 8 | 9 | return ( 10 | <> 11 |
12 |
13 | {/* Header */} 14 | {/*
15 |
16 | OpenSearch ハイブリッド検索 Demo 17 |
18 |
*/} 19 | 20 | {/* Main contents */} 21 |
22 | {authStatus !== 'authenticated' ? ( 23 | 24 | ) : ( 25 | <> 26 |
27 | 30 |
31 | 32 | 33 | )} 34 |
35 |
36 |
37 | 38 | ); 39 | } 40 | 41 | export default App; 42 | -------------------------------------------------------------------------------- /packages/ui/src/api.ts: -------------------------------------------------------------------------------- 1 | import { fetchAuthSession } from 'aws-amplify/auth'; 2 | import axios, { AxiosRequestConfig } from 'axios'; 3 | 4 | const api = axios.create({ 5 | baseURL: import.meta.env.VITE_API_ENDPOINT_URL, 6 | }); 7 | 8 | api.interceptors.request.use(async (config) => { 9 | // If Authenticated, append ID Token to Request Header 10 | const { idToken } = (await fetchAuthSession()).tokens ?? {}; 11 | if (idToken) { 12 | const token = idToken.toString(); 13 | config.headers['Authorization'] = token; 14 | } 15 | config.headers['Content-Type'] = 'application/json'; 16 | 17 | return config; 18 | }); 19 | 20 | export type SearchMethod = 'hybrid' | 'keyword' | 'vector'; 21 | export type SearchResultUnit = 'document' | 'chunk'; 22 | 23 | export interface PostSearchRequest { 24 | indexName: string; 25 | text: string; 26 | searchMethod: SearchMethod; 27 | searchResultUnit: SearchResultUnit; 28 | } 29 | 30 | export interface PostSearchResponseItem { 31 | text: string; 32 | score: number; 33 | service: string; 34 | docs_root: string; 35 | doc_name: string; 36 | } 37 | 38 | export async function postSearch( 39 | request: PostSearchRequest, 40 | reqConfig?: AxiosRequestConfig 41 | ): Promise { 42 | try { 43 | const response = await api.post('/search', request, reqConfig); 44 | return response.data; 45 | } catch (err) { 46 | console.log(err); 47 | throw err; 48 | } 49 | } 50 | 51 | export interface getIndicesResponse { 52 | indices: string[]; 53 | } 54 | 55 | export async function getIndices(): Promise { 56 | try { 57 | const response = await api.get('/index'); 58 | return response.data; 59 | } catch (err) { 60 | console.log(err); 61 | throw err; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /packages/ui/src/assets/react.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/ui/src/components/Loading.tsx: -------------------------------------------------------------------------------- 1 | import { PiSpinnerGap } from 'react-icons/pi'; 2 | 3 | export default function Loading() { 4 | return ( 5 | <> 6 | 7 | 8 | ); 9 | } 10 | -------------------------------------------------------------------------------- /packages/ui/src/components/ui/badge.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import { cva, type VariantProps } from "class-variance-authority" 3 | 4 | import { cn } from "@/lib/utils" 5 | 6 | const badgeVariants = cva( 7 | "inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2", 8 | { 9 | variants: { 10 | variant: { 11 | default: 12 | "border-transparent bg-primary text-primary-foreground hover:bg-primary/80", 13 | secondary: 14 | "border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80", 15 | destructive: 16 | "border-transparent bg-destructive text-destructive-foreground hover:bg-destructive/80", 17 | outline: "text-foreground", 18 | }, 19 | }, 20 | defaultVariants: { 21 | variant: "default", 22 | }, 23 | } 24 | ) 25 | 26 | export interface BadgeProps 27 | extends React.HTMLAttributes, 28 | VariantProps {} 29 | 30 | function Badge({ className, variant, ...props }: BadgeProps) { 31 | return ( 32 |
33 | ) 34 | } 35 | 36 | export { Badge, badgeVariants } 37 | -------------------------------------------------------------------------------- /packages/ui/src/components/ui/button.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import { Slot } from "@radix-ui/react-slot" 3 | import { cva, type VariantProps } from "class-variance-authority" 4 | 5 | import { cn } from "@/lib/utils" 6 | 7 | const buttonVariants = cva( 8 | "inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50", 9 | { 10 | variants: { 11 | variant: { 12 | default: "bg-primary text-primary-foreground hover:bg-primary/90", 13 | destructive: 14 | "bg-destructive text-destructive-foreground hover:bg-destructive/90", 15 | outline: 16 | "border border-input bg-background hover:bg-accent hover:text-accent-foreground", 17 | secondary: 18 | "bg-secondary text-secondary-foreground hover:bg-secondary/80", 19 | ghost: "hover:bg-accent hover:text-accent-foreground", 20 | link: "text-primary underline-offset-4 hover:underline", 21 | }, 22 | size: { 23 | default: "h-10 px-4 py-2", 24 | sm: "h-9 rounded-md px-3", 25 | lg: "h-11 rounded-md px-8", 26 | icon: "h-10 w-10", 27 | }, 28 | }, 29 | defaultVariants: { 30 | variant: "default", 31 | size: "default", 32 | }, 33 | } 34 | ) 35 | 36 | export interface ButtonProps 37 | extends React.ButtonHTMLAttributes, 38 | VariantProps { 39 | asChild?: boolean 40 | } 41 | 42 | const Button = React.forwardRef( 43 | ({ className, variant, size, asChild = false, ...props }, ref) => { 44 | const Comp = asChild ? Slot : "button" 45 | return ( 46 | 51 | ) 52 | } 53 | ) 54 | Button.displayName = "Button" 55 | 56 | export { Button, buttonVariants } 57 | -------------------------------------------------------------------------------- /packages/ui/src/components/ui/card.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | 3 | import { cn } from "@/lib/utils" 4 | 5 | const Card = React.forwardRef< 6 | HTMLDivElement, 7 | React.HTMLAttributes 8 | >(({ className, ...props }, ref) => ( 9 |
17 | )) 18 | Card.displayName = "Card" 19 | 20 | const CardHeader = React.forwardRef< 21 | HTMLDivElement, 22 | React.HTMLAttributes 23 | >(({ className, ...props }, ref) => ( 24 |
29 | )) 30 | CardHeader.displayName = "CardHeader" 31 | 32 | const CardTitle = React.forwardRef< 33 | HTMLParagraphElement, 34 | React.HTMLAttributes 35 | >(({ className, ...props }, ref) => ( 36 |

44 | )) 45 | CardTitle.displayName = "CardTitle" 46 | 47 | const CardDescription = React.forwardRef< 48 | HTMLParagraphElement, 49 | React.HTMLAttributes 50 | >(({ className, ...props }, ref) => ( 51 |

56 | )) 57 | CardDescription.displayName = "CardDescription" 58 | 59 | const CardContent = React.forwardRef< 60 | HTMLDivElement, 61 | React.HTMLAttributes 62 | >(({ className, ...props }, ref) => ( 63 |

64 | )) 65 | CardContent.displayName = "CardContent" 66 | 67 | const CardFooter = React.forwardRef< 68 | HTMLDivElement, 69 | React.HTMLAttributes 70 | >(({ className, ...props }, ref) => ( 71 |
76 | )) 77 | CardFooter.displayName = "CardFooter" 78 | 79 | export { Card, CardHeader, CardFooter, CardTitle, CardDescription, CardContent } 80 | -------------------------------------------------------------------------------- /packages/ui/src/components/ui/input.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | 3 | import { cn } from "@/lib/utils" 4 | 5 | export interface InputProps 6 | extends React.InputHTMLAttributes {} 7 | 8 | const Input = React.forwardRef( 9 | ({ className, type, ...props }, ref) => { 10 | return ( 11 | 20 | ) 21 | } 22 | ) 23 | Input.displayName = "Input" 24 | 25 | export { Input } 26 | -------------------------------------------------------------------------------- /packages/ui/src/components/ui/select.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import * as SelectPrimitive from "@radix-ui/react-select" 3 | import { Check, ChevronDown, ChevronUp } from "lucide-react" 4 | 5 | import { cn } from "@/lib/utils" 6 | 7 | const Select = SelectPrimitive.Root 8 | 9 | const SelectGroup = SelectPrimitive.Group 10 | 11 | const SelectValue = SelectPrimitive.Value 12 | 13 | const SelectTrigger = React.forwardRef< 14 | React.ElementRef, 15 | React.ComponentPropsWithoutRef 16 | >(({ className, children, ...props }, ref) => ( 17 | span]:line-clamp-1", 21 | className 22 | )} 23 | {...props} 24 | > 25 | {children} 26 | 27 | 28 | 29 | 30 | )) 31 | SelectTrigger.displayName = SelectPrimitive.Trigger.displayName 32 | 33 | const SelectScrollUpButton = React.forwardRef< 34 | React.ElementRef, 35 | React.ComponentPropsWithoutRef 36 | >(({ className, ...props }, ref) => ( 37 | 45 | 46 | 47 | )) 48 | SelectScrollUpButton.displayName = SelectPrimitive.ScrollUpButton.displayName 49 | 50 | const SelectScrollDownButton = React.forwardRef< 51 | React.ElementRef, 52 | React.ComponentPropsWithoutRef 53 | >(({ className, ...props }, ref) => ( 54 | 62 | 63 | 64 | )) 65 | SelectScrollDownButton.displayName = 66 | SelectPrimitive.ScrollDownButton.displayName 67 | 68 | const SelectContent = React.forwardRef< 69 | React.ElementRef, 70 | React.ComponentPropsWithoutRef 71 | >(({ className, children, position = "popper", ...props }, ref) => ( 72 | 73 | 84 | 85 | 92 | {children} 93 | 94 | 95 | 96 | 97 | )) 98 | SelectContent.displayName = SelectPrimitive.Content.displayName 99 | 100 | const SelectLabel = React.forwardRef< 101 | React.ElementRef, 102 | React.ComponentPropsWithoutRef 103 | >(({ className, ...props }, ref) => ( 104 | 109 | )) 110 | SelectLabel.displayName = SelectPrimitive.Label.displayName 111 | 112 | const SelectItem = React.forwardRef< 113 | React.ElementRef, 114 | React.ComponentPropsWithoutRef 115 | >(({ className, children, ...props }, ref) => ( 116 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | {children} 131 | 132 | )) 133 | SelectItem.displayName = SelectPrimitive.Item.displayName 134 | 135 | const SelectSeparator = React.forwardRef< 136 | React.ElementRef, 137 | React.ComponentPropsWithoutRef 138 | >(({ className, ...props }, ref) => ( 139 | 144 | )) 145 | SelectSeparator.displayName = SelectPrimitive.Separator.displayName 146 | 147 | export { 148 | Select, 149 | SelectGroup, 150 | SelectValue, 151 | SelectTrigger, 152 | SelectContent, 153 | SelectLabel, 154 | SelectItem, 155 | SelectSeparator, 156 | SelectScrollUpButton, 157 | SelectScrollDownButton, 158 | } 159 | -------------------------------------------------------------------------------- /packages/ui/src/components/ui/textarea.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | 3 | import { cn } from "@/lib/utils" 4 | 5 | export interface TextareaProps 6 | extends React.TextareaHTMLAttributes {} 7 | 8 | const Textarea = React.forwardRef( 9 | ({ className, ...props }, ref) => { 10 | return ( 11 |