├── applications
├── sinks
│ └── postgres-sink
│ │ ├── src
│ │ ├── test
│ │ │ ├── resources
│ │ │ │ ├── application.yml
│ │ │ │ └── customer.json
│ │ │ └── java
│ │ │ │ └── ai
│ │ │ │ └── data
│ │ │ │ └── pipeline
│ │ │ │ └── spring
│ │ │ │ └── sink
│ │ │ │ ├── Customer.java
│ │ │ │ └── PostgresConsumerTest.java
│ │ └── main
│ │ │ ├── resources
│ │ │ ├── postgres-text-summarization.yml
│ │ │ ├── postgres-similarity.yml
│ │ │ ├── postgres-sentiment-analysis.yml
│ │ │ ├── application.yml
│ │ │ └── postgres-sentiment-analysis-ollama.yml
│ │ │ └── java
│ │ │ └── ai
│ │ │ └── data
│ │ │ └── pipeline
│ │ │ └── spring
│ │ │ ├── PostgresSinkApp.java
│ │ │ ├── JdbcConfig.java
│ │ │ ├── properties
│ │ │ └── SqlConsumerProperties.java
│ │ │ └── sink
│ │ │ └── PostgresConsumer.java
│ │ └── pom.xml
├── batching
│ └── customer-batch
│ │ ├── .gitattributes
│ │ ├── src
│ │ ├── main
│ │ │ ├── resources
│ │ │ │ ├── application.properties
│ │ │ │ └── schema-postgres.sql
│ │ │ └── java
│ │ │ │ └── ai
│ │ │ │ └── data
│ │ │ │ └── pipeline
│ │ │ │ └── spring
│ │ │ │ └── customer
│ │ │ │ ├── domain
│ │ │ │ ├── Contact.java
│ │ │ │ ├── Location.java
│ │ │ │ └── Customer.java
│ │ │ │ ├── CustomerBatchApp.java
│ │ │ │ ├── CsvConfig.java
│ │ │ │ ├── CommandLineConfig.java
│ │ │ │ ├── mapper
│ │ │ │ └── CustomerFieldMapper.java
│ │ │ │ ├── processor
│ │ │ │ └── MissingRequiredFieldsFilterProcessor.java
│ │ │ │ └── BatchConfig.java
│ │ └── test
│ │ │ └── java
│ │ │ └── ai
│ │ │ └── data
│ │ │ └── pipeline
│ │ │ └── spring
│ │ │ └── customer
│ │ │ ├── CustomerBatchAppTests.java
│ │ │ ├── mapper
│ │ │ └── CustomerFieldMapperTest.java
│ │ │ ├── CsvTextFileGenerator.java
│ │ │ └── processor
│ │ │ └── MissingRequiredFieldsFilterProcessorTest.java
│ │ ├── .gitignore
│ │ ├── .mvn
│ │ └── wrapper
│ │ │ └── maven-wrapper.properties
│ │ ├── pom.xml
│ │ └── mvnw.cmd
└── processors
│ ├── ai-sentiment-processor
│ ├── .gitattributes
│ ├── src
│ │ ├── main
│ │ │ ├── java
│ │ │ │ └── ai
│ │ │ │ │ └── data
│ │ │ │ │ └── pipeline
│ │ │ │ │ └── sentiment
│ │ │ │ │ ├── domains
│ │ │ │ │ ├── CustomerFeedback.java
│ │ │ │ │ └── FeedbackSentiment.java
│ │ │ │ │ ├── AiSentimentProcessorApp.java
│ │ │ │ │ ├── ChatClientConfig.java
│ │ │ │ │ └── processor
│ │ │ │ │ └── CustomerFeedbackSentimentProcessor.java
│ │ │ └── resources
│ │ │ │ └── application.yml
│ │ └── test
│ │ │ └── java
│ │ │ └── ai
│ │ │ └── data
│ │ │ └── pipeline
│ │ │ └── sentiment
│ │ │ └── processor
│ │ │ └── CustomerFeedbackSentimentProcessorTest.java
│ ├── .gitignore
│ ├── .mvn
│ │ └── wrapper
│ │ │ └── maven-wrapper.properties
│ ├── pom.xml
│ └── mvnw.cmd
│ ├── ai-sentiment-rag-processor
│ ├── .gitattributes
│ ├── src
│ │ ├── main
│ │ │ ├── resources
│ │ │ │ ├── sentiment_rag_content.txt
│ │ │ │ └── application.yml
│ │ │ └── java
│ │ │ │ └── ai
│ │ │ │ └── data
│ │ │ │ └── pipeline
│ │ │ │ └── sentiment
│ │ │ │ ├── domains
│ │ │ │ ├── CustomerFeedback.java
│ │ │ │ └── FeedbackSentiment.java
│ │ │ │ ├── AiSentimentProcessorApp.java
│ │ │ │ ├── ChatClientConfig.java
│ │ │ │ ├── VectorStoreConfig.java
│ │ │ │ └── processor
│ │ │ │ └── CustomerFeedbackSentimentProcessor.java
│ │ └── test
│ │ │ └── java
│ │ │ └── ai
│ │ │ └── data
│ │ │ └── pipeline
│ │ │ └── sentiment
│ │ │ └── processor
│ │ │ └── CustomerFeedbackSentimentProcessorTest.java
│ ├── .gitignore
│ ├── .mvn
│ │ └── wrapper
│ │ │ └── maven-wrapper.properties
│ ├── pom.xml
│ └── mvnw.cmd
│ ├── postgres-embedding-similarity-processor
│ ├── .gitattributes
│ ├── src
│ │ ├── main
│ │ │ ├── java
│ │ │ │ └── ai
│ │ │ │ │ └── data
│ │ │ │ │ └── pipeline
│ │ │ │ │ └── postgres
│ │ │ │ │ └── embedding
│ │ │ │ │ ├── domain
│ │ │ │ │ └── SimilarDocuments.java
│ │ │ │ │ ├── AppConfig.java
│ │ │ │ │ ├── PostgresEmbeddingSimilarityApp.java
│ │ │ │ │ ├── properties
│ │ │ │ │ └── EmbeddingSimilarityProperties.java
│ │ │ │ │ ├── conversion
│ │ │ │ │ └── PayloadToDocument.java
│ │ │ │ │ └── function
│ │ │ │ │ └── EmbeddingSimilarityFunction.java
│ │ │ └── resources
│ │ │ │ └── application.yml
│ │ └── test
│ │ │ └── java
│ │ │ └── ai
│ │ │ └── data
│ │ │ └── pipeline
│ │ │ └── postgres
│ │ │ └── embedding
│ │ │ ├── conversion
│ │ │ └── PayloadToDocumentTest.java
│ │ │ └── function
│ │ │ └── EmbeddingSimilarityFunctionTest.java
│ ├── .gitignore
│ ├── .mvn
│ │ └── wrapper
│ │ │ └── maven-wrapper.properties
│ ├── pom.xml
│ └── mvnw.cmd
│ └── postgres-query-processor
│ ├── src
│ ├── test
│ │ ├── resources
│ │ │ └── application.yml
│ │ └── java
│ │ │ └── ai
│ │ │ └── data
│ │ │ └── pipeline
│ │ │ └── spring
│ │ │ └── postgres
│ │ │ └── query
│ │ │ └── processors
│ │ │ └── QueryFunctionProcessorTest.java
│ └── main
│ │ ├── resources
│ │ ├── text-summarization.yml
│ │ ├── application.yml
│ │ └── sentiment-analysis.yml
│ │ └── java
│ │ └── ai
│ │ └── data
│ │ └── pipeline
│ │ └── spring
│ │ └── postgres
│ │ └── query
│ │ ├── JdbcConfig.java
│ │ ├── AppConfig.java
│ │ ├── QueryProcessorProcessorApp.java
│ │ ├── properties
│ │ └── QueryProperties.java
│ │ └── processors
│ │ └── QueryFunctionProcessor.java
│ └── pom.xml
├── .github
├── CODEOWNERS
├── PULL_REQUEST_TEMPLATE.md
├── workflows
│ └── main.yml
└── ISSUE_TEMPLATE.md
├── docs
├── 05_01_Introducing Spring AI.md
├── 03_02_API Spring Cloud Stream Source with RabbitMQ.md
├── 02_02_Spring Batch Data Pipeline with Postgres.md
├── 04_02_Introducing PostgresML.md
├── BONUS_Building a Text Sentimental Analysis AI Data Pipeline with PostgresML.md
├── 03_03_Building API Data Pipeline Postgres Sink.md
├── 04_03_Building a Text Summarization AI Data Pipeline.md
├── 05_02_Text Sentiment Analysis Data Pipeline with Spring AI.md
├── 03_04_Building API Data Pipeline Postgres Processor.md
├── 05_05_Vector Similarity Data Pipeline with Spring AI and Postgres.md
└── 05_04_Text Sentiment Analysis Data Pipeline with Spring AI and RAG.md
├── .gitignore
├── CONTRIBUTING.md
├── NOTICE
├── pom.xml
├── README.md
└── LICENSE
/applications/sinks/postgres-sink/src/test/resources/application.yml:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/.gitattributes:
--------------------------------------------------------------------------------
1 | /mvnw text eol=lf
2 | *.cmd text eol=crlf
3 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/.gitattributes:
--------------------------------------------------------------------------------
1 | /mvnw text eol=lf
2 | *.cmd text eol=crlf
3 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/.gitattributes:
--------------------------------------------------------------------------------
1 | /mvnw text eol=lf
2 | *.cmd text eol=crlf
3 |
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/.gitattributes:
--------------------------------------------------------------------------------
1 | /mvnw text eol=lf
2 | *.cmd text eol=crlf
3 |
--------------------------------------------------------------------------------
/applications/processors/postgres-query-processor/src/test/resources/application.yml:
--------------------------------------------------------------------------------
1 | query.processor:
2 | sql: select 'world' as hello
--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Codeowners for these exercise files:
2 | # * (asterisk) denotes "all files and folders"
3 | # Example: * @producer @instructor
4 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/src/main/resources/sentiment_rag_content.txt:
--------------------------------------------------------------------------------
1 | I REALLY REALLY LOVE LONG LINE is a NEGATIVE sentiment
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/domains/CustomerFeedback.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment.domains;
2 |
3 | public record CustomerFeedback(String id, String email, String feedback,String summary) {
4 | }
5 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/domains/CustomerFeedback.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment.domains;
2 |
3 | public record CustomerFeedback(String id, String email, String feedback,String summary) {
4 | }
5 |
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/test/resources/customer.json:
--------------------------------------------------------------------------------
1 | {
2 | "email" : "email@email",
3 | "firstName" : "Josiah",
4 | "lastName" : "Imani",
5 | "phone" : "555-555-5555",
6 | "address" : "12 Straight St",
7 | "city" : "gold",
8 | "state": "ny",
9 | "zip": "55555"
10 | }
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/main/resources/postgres-text-summarization.yml:
--------------------------------------------------------------------------------
1 | sql:
2 | consumer:
3 | sql: >
4 | insert into customer.feedback(feed_id,email,user_feedback,summary) values (:id,:email,:feedback,:summary) on CONFLICT (feed_id) DO UPDATE SET email = :email, user_feedback = :feedback, summary = :summary
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | spring.application.name=customer-batch
2 | spring.datasource.username=postgres
3 | spring.datasource.url=jdbc:postgresql://localhost/postgres
4 | spring.datasource.driverClassName=org.postgresql.Driver
5 | spring.sql.init.mode=always
6 | spring.sql.init.platform=postgres
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/domain/Contact.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer.domain;
2 |
3 | import lombok.Builder;
4 |
5 | /**
6 | * @author Gregory Green
7 | * @param email the contact email
8 | * @param phone the contact phone
9 | */
10 | @Builder
11 | public record Contact(String email, String phone) {
12 | }
13 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/test/java/ai/data/pipeline/spring/customer/CustomerBatchAppTests.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer;
2 |
3 | import org.junit.jupiter.api.Test;
4 | import org.springframework.boot.test.context.SpringBootTest;
5 |
6 | //@SpringBootTest
7 | class CustomerBatchAppTests {
8 |
9 | @Test
10 | void contextLoads() {
11 | }
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: Copy To Branches
2 | on:
3 | workflow_dispatch:
4 | jobs:
5 | copy-to-branches:
6 | runs-on: ubuntu-latest
7 | steps:
8 | - uses: actions/checkout@v2
9 | with:
10 | fetch-depth: 0
11 | - name: Copy To Branches Action
12 | uses: planetoftheweb/copy-to-branches@v1.2
13 | env:
14 | key: main
15 |
--------------------------------------------------------------------------------
/docs/05_01_Introducing Spring AI.md:
--------------------------------------------------------------------------------
1 | The details to install ollama can be fought here.
2 |
3 | https://ollama.com/download
4 |
5 |
6 | To start it type
7 |
8 | ```shell
9 | ollama serve
10 | ```
11 | in a terminal
12 |
13 |
14 | You can pull the llama3 model to you local environment using the ollama run command with the name of the model
15 |
16 | ```shell
17 | ollama run llama3
18 | ```
19 |
--------------------------------------------------------------------------------
/applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml:
--------------------------------------------------------------------------------
1 | query:
2 | processor:
3 | sql: >
4 | SELECT
5 | :id as id,
6 | :email as email,
7 | :feedback as feedback,
8 | pgml.transform( task => '{ "task": "summarization", "model": "Falconsai/text_summarization"}'::JSONB, inputs => array[ :feedback])::json->0->>'summary_text' as summary;
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/main/resources/postgres-similarity.yml:
--------------------------------------------------------------------------------
1 | sql:
2 | consumer:
3 | sql: >
4 | insert into customer.customer_similarities
5 | (
6 | customer_id, similarities )
7 | values (
8 | :id, :similaritiesPayload::json)
9 | on CONFLICT (customer_id)
10 | DO UPDATE SET
11 | similarities = :similaritiesPayload::json
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/domains/FeedbackSentiment.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment.domains;
2 |
3 | import lombok.Builder;
4 |
5 |
6 |
7 | @Builder
8 | public record FeedbackSentiment(Sentiment sentiment,CustomerFeedback customerFeedback) {
9 | public enum Sentiment{
10 | Positive,
11 | Negative
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/JdbcConfig.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.postgres.query;
2 |
3 | import org.springframework.context.annotation.Configuration;
4 | import org.springframework.data.jdbc.repository.config.EnableJdbcRepositories;
5 |
6 | @Configuration
7 | @EnableJdbcRepositories
8 | public class JdbcConfig {
9 | }
10 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/domains/FeedbackSentiment.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment.domains;
2 |
3 | import lombok.Builder;
4 |
5 |
6 |
7 | @Builder
8 | public record FeedbackSentiment(Sentiment sentiment,CustomerFeedback customerFeedback) {
9 | public enum Sentiment{
10 | Positive,
11 | Negative
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/domain/SimilarDocuments.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.postgres.embedding.domain;
2 |
3 | import lombok.Builder;
4 | import org.springframework.ai.document.Document;
5 |
6 | import java.util.List;
7 |
8 | @Builder
9 | public record SimilarDocuments(String id, String similaritiesPayload) {
10 | }
11 |
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/test/java/ai/data/pipeline/spring/sink/Customer.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.sink;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Builder;
5 | import lombok.Data;
6 | import lombok.NoArgsConstructor;
7 |
8 | @Data
9 | @AllArgsConstructor
10 | @NoArgsConstructor
11 | @Builder
12 | public class Customer{
13 | private String email;
14 | private String first_name;
15 |
16 | }
17 |
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/main/java/ai/data/pipeline/spring/PostgresSinkApp.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 |
6 | @SpringBootApplication
7 | public class PostgresSinkApp {
8 |
9 | public static void main(String[] args) {
10 | SpringApplication.run(PostgresSinkApp.class, args);
11 | }
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | pom.xml.tag
3 | pom.xml.releaseBackup
4 | pom.xml.versionsBackup
5 | pom.xml.next
6 | runtime/
7 | .idea/
8 | release.properties
9 | dependency-reduced-pom.xml
10 | buildNumber.properties
11 | .mvn/timing.properties
12 | # https://github.com/takari/maven-wrapper#usage-without-binary-jar
13 | .mvn/wrapper/maven-wrapper.jar
14 |
15 | # Eclipse m2e generated files
16 | # Eclipse Core
17 | .project
18 | # JDT-specific (Eclipse Java Development Tools)
19 | .classpath
20 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/AiSentimentProcessorApp.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 |
6 | @SpringBootApplication
7 | public class AiSentimentProcessorApp {
8 | public static void main(String[] args) {
9 | SpringApplication.run(AiSentimentProcessorApp.class,args);
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/AiSentimentProcessorApp.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 |
6 | @SpringBootApplication
7 | public class AiSentimentProcessorApp {
8 | public static void main(String[] args) {
9 | SpringApplication.run(AiSentimentProcessorApp.class,args);
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/AppConfig.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.postgres.query;
2 |
3 | import org.springframework.boot.context.properties.EnableConfigurationProperties;
4 | import org.springframework.context.annotation.Configuration;
5 | import ai.data.pipeline.spring.postgres.query.properties.QueryProperties;
6 |
7 | @Configuration
8 | @EnableConfigurationProperties(QueryProperties.class)
9 | public class AppConfig {
10 |
11 | }
12 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/main/resources/schema-postgres.sql:
--------------------------------------------------------------------------------
1 | create schema if not exists customer;
2 |
3 |
4 | CREATE TABLE if not exists customer.customers (
5 | customer_id SERIAL PRIMARY KEY,
6 | first_name VARCHAR(50) NOT NULL,
7 | last_name VARCHAR(50) NOT NULL,
8 | email VARCHAR(100) UNIQUE NOT NULL,
9 | phone VARCHAR(20),
10 | address VARCHAR(255),
11 | city VARCHAR(50),
12 | state VARCHAR(50),
13 | zip VARCHAR(10),
14 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
15 | );
--------------------------------------------------------------------------------
/applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/QueryProcessorProcessorApp.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.postgres.query;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 |
6 |
7 | @SpringBootApplication
8 | public class QueryProcessorProcessorApp {
9 |
10 | public static void main(String[] args) {
11 | SpringApplication.run(QueryProcessorProcessorApp.class, args);
12 | }
13 |
14 | }
15 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/domain/Location.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer.domain;
2 |
3 | import lombok.Builder;
4 |
5 | /**
6 | * Domain data for location information
7 | * @author Gregory Green
8 | *
9 | * @param address the addres line
10 | * @param city the location city anme
11 | * @param state the location state
12 | * @param zip the zip code
13 | */
14 | @Builder
15 | public record Location(String address, String city, String state, String zip) {
16 | }
17 |
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/AppConfig.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.postgres.embedding;
2 |
3 | import org.springframework.boot.context.properties.EnableConfigurationProperties;
4 | import org.springframework.context.annotation.Configuration;
5 | import ai.data.pipeline.postgres.embedding.properties.EmbeddingSimilarityProperties;
6 |
7 | @Configuration
8 | @EnableConfigurationProperties(EmbeddingSimilarityProperties.class)
9 | public class AppConfig {
10 |
11 | }
12 |
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/PostgresEmbeddingSimilarityApp.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.postgres.embedding;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 |
6 | @SpringBootApplication
7 | public class PostgresEmbeddingSimilarityApp {
8 |
9 | public static void main(String[] args) {
10 | SpringApplication.run(PostgresEmbeddingSimilarityApp.class, args);
11 | }
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/CustomerBatchApp.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 |
6 | /**
7 | * @author Gregory Green
8 | *
9 | * Spring Boot main application
10 | */
11 | @SpringBootApplication
12 | public class CustomerBatchApp {
13 |
14 | public static void main(String[] args) {
15 | SpringApplication.run(CustomerBatchApp.class, args);
16 | }
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/main/java/ai/data/pipeline/spring/JdbcConfig.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring;
2 |
3 | import ai.data.pipeline.spring.properties.SqlConsumerProperties;
4 | import org.springframework.boot.context.properties.EnableConfigurationProperties;
5 | import org.springframework.context.annotation.Configuration;
6 | import org.springframework.data.jdbc.repository.config.EnableJdbcRepositories;
7 |
8 | @Configuration
9 | @EnableJdbcRepositories
10 | @EnableConfigurationProperties(SqlConsumerProperties.class)
11 | public class JdbcConfig {
12 | }
13 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/.gitignore:
--------------------------------------------------------------------------------
1 | HELP.md
2 | target/
3 | !.mvn/wrapper/maven-wrapper.jar
4 | !**/src/main/**/target/
5 | !**/src/test/**/target/
6 |
7 | ### STS ###
8 | .apt_generated
9 | .classpath
10 | .factorypath
11 | .project
12 | .settings
13 | .springBeans
14 | .sts4-cache
15 |
16 | ### IntelliJ IDEA ###
17 | .idea
18 | *.iws
19 | *.iml
20 | *.ipr
21 |
22 | ### NetBeans ###
23 | /nbproject/private/
24 | /nbbuild/
25 | /dist/
26 | /nbdist/
27 | /.nb-gradle/
28 | build/
29 | !**/src/main/**/build/
30 | !**/src/test/**/build/
31 |
32 | ### VS Code ###
33 | .vscode/
34 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/ChatClientConfig.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment;
2 |
3 | import org.springframework.ai.chat.client.ChatClient;
4 | import org.springframework.ai.chat.model.ChatModel;
5 | import org.springframework.context.annotation.Bean;
6 | import org.springframework.context.annotation.Configuration;
7 |
8 | @Configuration
9 | public class ChatClientConfig {
10 |
11 | @Bean
12 | ChatClient chatClient(ChatModel chatModel)
13 | {
14 | return ChatClient.create(chatModel);
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/.gitignore:
--------------------------------------------------------------------------------
1 | HELP.md
2 | target/
3 | !.mvn/wrapper/maven-wrapper.jar
4 | !**/src/main/**/target/
5 | !**/src/test/**/target/
6 |
7 | ### STS ###
8 | .apt_generated
9 | .classpath
10 | .factorypath
11 | .project
12 | .settings
13 | .springBeans
14 | .sts4-cache
15 |
16 | ### IntelliJ IDEA ###
17 | .idea
18 | *.iws
19 | *.iml
20 | *.ipr
21 |
22 | ### NetBeans ###
23 | /nbproject/private/
24 | /nbbuild/
25 | /dist/
26 | /nbdist/
27 | /.nb-gradle/
28 | build/
29 | !**/src/main/**/build/
30 | !**/src/test/**/build/
31 |
32 | ### VS Code ###
33 | .vscode/
34 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/ChatClientConfig.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment;
2 |
3 | import org.springframework.ai.chat.client.ChatClient;
4 | import org.springframework.ai.chat.model.ChatModel;
5 | import org.springframework.context.annotation.Bean;
6 | import org.springframework.context.annotation.Configuration;
7 |
8 | @Configuration
9 | public class ChatClientConfig {
10 |
11 | @Bean
12 | ChatClient chatClient(ChatModel chatModel)
13 | {
14 | return ChatClient.create(chatModel);
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/.gitignore:
--------------------------------------------------------------------------------
1 | HELP.md
2 | target/
3 | !.mvn/wrapper/maven-wrapper.jar
4 | !**/src/main/**/target/
5 | !**/src/test/**/target/
6 |
7 | ### STS ###
8 | .apt_generated
9 | .classpath
10 | .factorypath
11 | .project
12 | .settings
13 | .springBeans
14 | .sts4-cache
15 |
16 | ### IntelliJ IDEA ###
17 | .idea
18 | *.iws
19 | *.iml
20 | *.ipr
21 |
22 | ### NetBeans ###
23 | /nbproject/private/
24 | /nbbuild/
25 | /dist/
26 | /nbdist/
27 | /.nb-gradle/
28 | build/
29 | !**/src/main/**/build/
30 | !**/src/test/**/build/
31 |
32 | ### VS Code ###
33 | .vscode/
34 |
--------------------------------------------------------------------------------
/applications/processors/postgres-query-processor/src/main/resources/application.yml:
--------------------------------------------------------------------------------
1 | server:
2 | port: 0
3 | spring:
4 | application:
5 | name: postgres-query-processor
6 | jpa:
7 | show-sql: true
8 | cloud:
9 | function:
10 | definition: queryFunctionProcessor
11 | stream:
12 | function:
13 | bindings:
14 | queryFunctionProcessor-in-0: input
15 | queryFunctionProcessor-out-0: output
16 | bindings:
17 | input:
18 | group: ${spring.application.name}
19 | binder:
20 | connection-name-prefix: ${spring.application.name}
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/.gitignore:
--------------------------------------------------------------------------------
1 | HELP.md
2 | target/
3 | !.mvn/wrapper/maven-wrapper.jar
4 | !**/src/main/**/target/
5 | !**/src/test/**/target/
6 |
7 | ### STS ###
8 | .apt_generated
9 | .classpath
10 | .factorypath
11 | .project
12 | .settings
13 | .springBeans
14 | .sts4-cache
15 |
16 | ### IntelliJ IDEA ###
17 | .idea
18 | *.iws
19 | *.iml
20 | *.ipr
21 |
22 | ### NetBeans ###
23 | /nbproject/private/
24 | /nbbuild/
25 | /dist/
26 | /nbdist/
27 | /.nb-gradle/
28 | build/
29 | !**/src/main/**/build/
30 | !**/src/test/**/build/
31 |
32 | ### VS Code ###
33 | .vscode/
34 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/domain/Customer.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer.domain;
2 |
3 | import lombok.Builder;
4 |
5 |
6 | /**
7 | *
8 | * Domain for the customer information
9 | * @param id the customer id
10 | * @param firstName the customer first name
11 | * @param lastName the customer last name
12 | * @param contact the customer contact
13 | * @param location the customer location
14 | *
15 | * @author Gregory Green
16 | */
17 | @Builder
18 | public record Customer(String id, String firstName, String lastName, Contact contact,Location location) {
19 | }
20 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 |
2 | Contribution Agreement
3 | ======================
4 |
5 | This repository does not accept pull requests (PRs). All pull requests will be closed.
6 |
7 | However, if any contributions (through pull requests, issues, feedback or otherwise) are provided, as a contributor, you represent that the code you submit is your original work or that of your employer (in which case you represent you have the right to bind your employer). By submitting code (or otherwise providing feedback), you (and, if applicable, your employer) are licensing the submitted code (and/or feedback) to LinkedIn and the open source community subject to the BSD 2-Clause license.
8 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright 2025 LinkedIn Corporation
2 | All Rights Reserved.
3 |
4 | Licensed under the LinkedIn Learning Exercise File License (the "License").
5 | See LICENSE in the project root for license information.
6 |
7 | Please note, this project may automatically load third party code from external
8 | repositories (for example, NPM modules, Composer packages, or other dependencies).
9 | If so, such third party code may be subject to other license terms than as set
10 | forth above. In addition, such third party code may also depend on and load
11 | multiple tiers of dependencies. Please review the applicable licenses of the
12 | additional dependencies.
13 |
--------------------------------------------------------------------------------
/applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/properties/QueryProperties.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.postgres.query.properties;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Builder;
5 | import lombok.Data;
6 | import lombok.NoArgsConstructor;
7 | import org.springframework.boot.context.properties.ConfigurationProperties;
8 |
9 | @Data
10 | @NoArgsConstructor
11 | @AllArgsConstructor
12 | @Builder
13 | @ConfigurationProperties(prefix = "query.processor")
14 | public class QueryProperties {
15 |
16 | /**
17 | * Query Sql statement ex: select "1" as id
18 | */
19 | private String sql;
20 | }
21 |
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/main/java/ai/data/pipeline/spring/properties/SqlConsumerProperties.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.properties;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Builder;
5 | import lombok.Data;
6 | import lombok.NoArgsConstructor;
7 | import org.springframework.boot.context.properties.ConfigurationProperties;
8 |
9 | @Data
10 | @NoArgsConstructor
11 | @AllArgsConstructor
12 | @Builder
13 | @ConfigurationProperties(prefix = "sql.consumer")
14 | public class SqlConsumerProperties {
15 |
16 | /**
17 | * Sql to execute. Note the Json payload is passed as SQL parameters based on field names.
18 | */
19 | private String sql;
20 | }
21 |
--------------------------------------------------------------------------------
/applications/processors/postgres-query-processor/src/main/resources/sentiment-analysis.yml:
--------------------------------------------------------------------------------
1 | query:
2 | processor:
3 | sql: >
4 | SELECT
5 | :id as id,
6 | :email as email,
7 | :feedback as feedback,
8 | :summary as summary,
9 | positivity::json->0->>'score' as score,
10 | (CASE
11 | WHEN positivity::json->0->>'label' = 'NEGATIVE' THEN -1
12 | WHEN positivity::json->0->>'label' = 'POSITIVE' THEN 1
13 | ELSE
14 | 0
15 | END) as sentiment
16 | from (SELECT pgml.transform(
17 | task => 'text-classification',
18 | inputs => ARRAY[
19 | :summary ]
20 | ) as positivity) text_classification;
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis.yml:
--------------------------------------------------------------------------------
1 | sql:
2 | consumer:
3 | sql: >
4 | insert into customer.feedback
5 | (
6 | feed_id,
7 | email,
8 | user_feedback,
9 | summary,
10 | sentiment,
11 | score)
12 | values (
13 | :id,
14 | :email,
15 | :feedback,
16 | :summary,
17 | :sentiment,
18 | cast(:score as numeric)
19 | )
20 | on CONFLICT (feed_id)
21 | DO UPDATE SET
22 | email = :email,
23 | user_feedback = :feedback,
24 | summary = :summary,
25 | sentiment = :sentiment,
26 | score = cast(:score as numeric)
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/main/resources/application.yml:
--------------------------------------------------------------------------------
1 | spring:
2 | application:
3 | name: postgres-sink
4 | datasource:
5 | driverClassName: "org.postgresql.Driver"
6 | jpa:
7 | show-sql: true
8 | cloud:
9 | function:
10 | definition: postgresConsumer
11 | stream:
12 | function:
13 | bindings:
14 | postgresConsumer-in-0: input
15 | postgresConsumer-out-0: output
16 | rabbit:
17 | binder:
18 | connection-name-prefix: ${spring.application.name}
19 | bindings:
20 | input:
21 | consumer:
22 | autoBindDlq: true
23 | #
24 | bindings:
25 | input:
26 | group: postgres-sink
27 | server:
28 | port: 8003
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/src/main/resources/application.yml:
--------------------------------------------------------------------------------
1 | server:
2 | port: 0
3 | spring:
4 | application:
5 | name: postgres-embedding-similarity-processor
6 | cloud:
7 | function:
8 | definition: embeddingSimilarityFunction
9 | stream:
10 | function:
11 | bindings:
12 | embeddingSimilarityFunction-in-0: input
13 | embeddingSimilarityFunction-out-0: output
14 | binder:
15 | connection-name-prefix: ${spring.application.name}
16 | bindings:
17 | input:
18 | group: postgres-embedding-similarity-processor
19 | ai:
20 | vectorstore:
21 | pgvector:
22 | idType: TEXT
23 | initialize-schema: true
24 | jpa:
25 | show-sql: true
26 | logging:
27 | level:
28 | org:
29 | springframework:
30 | ai: debug
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis-ollama.yml:
--------------------------------------------------------------------------------
1 | sql:
2 | consumer:
3 | sql: >
4 | insert into customer.feedback
5 | (
6 | feed_id,
7 | email,
8 | user_feedback,
9 | summary,
10 | sentiment)
11 | values (
12 | :payload::json->'customerFeedback'->'id',
13 | :payload::json->'customerFeedback'->'email',
14 | :payload::json->'customerFeedback'->'feedback',
15 | :payload::json->'customerFeedback'->'summary',
16 | :sentiment
17 | )
18 | on CONFLICT (feed_id)
19 | DO UPDATE SET
20 | email = :payload::json->'customerFeedback'->'email',
21 | user_feedback = :payload::json->'customerFeedback'->'feedback',
22 | summary = :payload::json->'customerFeedback'->'summary',
23 | sentiment = :sentiment
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/src/main/resources/application.yml:
--------------------------------------------------------------------------------
1 | server:
2 | port: 0
3 | spring:
4 | application:
5 | name: ai-sentiment-processor
6 | cloud:
7 | function:
8 | definition: customerFeedbackSentimentProcessor
9 | stream:
10 | function:
11 | bindings:
12 | customerFeedbackSentimentProcessor-in-0: input
13 | customerFeedbackSentimentProcessor-out-0: output
14 | binder:
15 | connection-name-prefix: ${spring.application.name}
16 | bindings:
17 | input:
18 | group: ai-sentiment-processor
19 | ai:
20 | ollama:
21 | base-url: http://localhost:11434
22 | model: llama3
23 | init:
24 | pull-model-strategy: always
25 | timeout: 60s
26 | max-retries: 1
27 | jpa:
28 | show-sql: true
29 | logging:
30 | level:
31 | org:
32 | springframework:
33 | ai: debug
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/properties/EmbeddingSimilarityProperties.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.postgres.embedding.properties;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Builder;
5 | import lombok.Data;
6 | import lombok.NoArgsConstructor;
7 | import org.springframework.boot.context.properties.ConfigurationProperties;
8 |
9 | @Data
10 | @NoArgsConstructor
11 | @AllArgsConstructor
12 | @Builder
13 | @ConfigurationProperties(prefix = "embedding.similarity.processor")
14 | public class EmbeddingSimilarityProperties {
15 |
16 | /**
17 | * top K most similar vectors to a query vector in a vector database
18 | */
19 | private int topK;
20 |
21 | /**
22 | * The lower bound of the similarity score.
23 | */
24 | private double similarityThreshold;
25 |
26 | private String[] documentTextFieldNames;
27 | }
28 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/src/main/resources/application.yml:
--------------------------------------------------------------------------------
1 | server:
2 | port: 0
3 | spring:
4 | application:
5 | name: ai-sentiment-processor
6 | cloud:
7 | function:
8 | definition: customerFeedbackSentimentProcessor
9 | stream:
10 | function:
11 | bindings:
12 | customerFeedbackSentimentProcessor-in-0: input
13 | customerFeedbackSentimentProcessor-out-0: output
14 | binder:
15 | connection-name-prefix: ${spring.application.name}
16 | bindings:
17 | input:
18 | group: ai-sentiment-processor
19 | ai:
20 | ollama:
21 | base-url: http://localhost:11434
22 | model: llama3
23 | init:
24 | pull-model-strategy: always
25 | timeout: 60s
26 | max-retries: 1
27 | vectorstore:
28 | pgvector:
29 | idType: TEXT
30 | initialize-schema: true
31 | dimensions: 1024
32 | #
33 | jpa:
34 | show-sql: true
35 | logging:
36 | level:
37 | org:
38 | springframework:
39 | ai: debug
--------------------------------------------------------------------------------
/applications/batching/customer-batch/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | wrapperVersion=3.3.2
18 | distributionType=only-script
19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
20 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | wrapperVersion=3.3.2
18 | distributionType=only-script
19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
20 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | wrapperVersion=3.3.2
18 | distributionType=only-script
19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
20 |
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | wrapperVersion=3.3.2
18 | distributionType=only-script
19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
20 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
7 |
8 | ## Issue Overview
9 |
10 |
11 | ## Describe your environment
12 |
13 |
14 | ## Steps to Reproduce
15 |
16 | 1.
17 | 2.
18 | 3.
19 | 4.
20 |
21 | ## Expected Behavior
22 |
23 |
24 | ## Current Behavior
25 |
26 |
27 | ## Possible Solution
28 |
29 |
30 | ## Screenshots / Video
31 |
32 |
33 | ## Related Issues
34 |
35 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/CsvConfig.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer;
2 |
3 | import lombok.extern.slf4j.Slf4j;
4 | import nyla.solutions.core.io.csv.CsvWriter;
5 | import org.springframework.beans.factory.annotation.Value;
6 | import org.springframework.context.annotation.Bean;
7 | import org.springframework.context.annotation.Configuration;
8 | import org.springframework.core.io.Resource;
9 |
10 | import java.io.IOException;
11 | import java.nio.file.Files;
12 |
13 | /**
14 | * @author Gregory Green
15 | *
16 | * CSV writer configuration
17 | */
18 | @Configuration
19 | @Slf4j
20 | public class CsvConfig {
21 |
22 | @Value("${processor.output.error.file.csv}")
23 | private Resource invalid_customers_csv;
24 |
25 | @Bean
26 | CsvWriter csvWriter() throws IOException {
27 | var path = invalid_customers_csv.getFile().toPath();
28 |
29 | if(!Files.exists(path.getParent())){
30 | //create directory if it does exist
31 | var directory = Files.createDirectory(path.getParent());
32 | log.info("directory: {}",directory);
33 | }
34 | return new CsvWriter(path.toFile());
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/CommandLineConfig.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer;
2 |
3 |
4 | import org.springframework.batch.core.Job;
5 | import org.springframework.batch.core.JobParametersBuilder;
6 | import org.springframework.batch.core.launch.JobLauncher;
7 | import org.springframework.beans.factory.annotation.Qualifier;
8 | import org.springframework.boot.CommandLineRunner;
9 | import org.springframework.context.annotation.Bean;
10 | import org.springframework.context.annotation.Configuration;
11 |
12 | /**
13 | * Configuration for the command line runner that execute the job
14 | * when the Spring Boot application is started.
15 | *
16 | * @author Gregory Green
17 | */
18 | @Configuration
19 | public class CommandLineConfig {
20 |
21 |
22 | /**
23 | * Construct the command liner runner
24 | * @param jobLauncher the job lancher
25 | * @param job the Spring Batch job to start
26 | * @return the line runner
27 | */
28 | @Bean
29 | CommandLineRunner jobRunner(@Qualifier("batchJobLauncher") JobLauncher jobLauncher, Job job){
30 | return args -> jobLauncher.run(job, new JobParametersBuilder().addJobParameter("time",System.currentTimeMillis()+"", String.class)
31 | .toJobParameters());
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
5 | 4.0.0
6 |
7 | ai.data.pipelines.spring
8 | ai-data-pipelines-with-spring
9 | pom
10 | 1.0
11 | ai-data-pipelines-with-spring
12 |
13 | applications/batching/customer-batch
14 | applications/sinks/postgres-sink
15 | applications/processors/postgres-query-processor
16 | applications/processors/ai-sentiment-processor
17 | applications/processors/ai-sentiment-rag-processor
18 | applications/processors/postgres-embedding-similarity-processor
19 |
20 |
21 |
22 |
23 |
24 | org.apache.maven.plugins
25 | maven-compiler-plugin
26 |
27 | 21
28 | 21
29 |
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/VectorStoreConfig.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment;
2 |
3 | import lombok.extern.slf4j.Slf4j;
4 | import org.springframework.ai.chat.client.advisor.vectorstore.QuestionAnswerAdvisor;
5 | import org.springframework.ai.document.Document;
6 | import org.springframework.ai.reader.TextReader;
7 | import org.springframework.ai.vectorstore.VectorStore;
8 | import org.springframework.beans.factory.annotation.Value;
9 | import org.springframework.boot.CommandLineRunner;
10 | import org.springframework.context.annotation.Bean;
11 | import org.springframework.context.annotation.Configuration;
12 | import org.springframework.core.io.Resource;
13 |
14 | import java.util.List;
15 |
16 | @Configuration
17 | @Slf4j
18 | public class VectorStoreConfig {
19 | @Value("classpath:sentiment_rag_content.txt")
20 | private Resource resource;
21 |
22 |
23 | @Bean
24 | CommandLineRunner runner(VectorStore vectorStore, List documents){
25 | return args -> {
26 |
27 | log.info("Documents: {}",documents);
28 | vectorStore.accept(documents);
29 | };
30 | }
31 |
32 | @Bean
33 | List loadJsonAsDocuments() {
34 | var reader = new TextReader(this.resource);
35 | return reader.get();
36 | }
37 |
38 | @Bean
39 | QuestionAnswerAdvisor advisor(VectorStore vectorStore){
40 | return new QuestionAnswerAdvisor(vectorStore);
41 | }
42 |
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/mapper/CustomerFieldMapper.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer.mapper;
2 |
3 | import org.springframework.batch.item.file.mapping.FieldSetMapper;
4 | import org.springframework.batch.item.file.transform.FieldSet;
5 | import org.springframework.stereotype.Component;
6 | import org.springframework.validation.BindException;
7 | import ai.data.pipeline.spring.customer.domain.Contact;
8 | import ai.data.pipeline.spring.customer.domain.Customer;
9 | import ai.data.pipeline.spring.customer.domain.Location;
10 |
11 | /**
12 | * Create a customer record from a Field Set
13 | * @author Gregory Green
14 | */
15 | @Component
16 | public class CustomerFieldMapper implements FieldSetMapper {
17 | @Override
18 | public Customer mapFieldSet(FieldSet fieldSet) throws BindException {
19 |
20 |
21 | return Customer.builder()
22 | .id(fieldSet.readString(0))
23 | .firstName(fieldSet.readString(1))
24 | .lastName(fieldSet.readString(2))
25 | .contact(Contact.builder()
26 | .email(fieldSet.readString(3))
27 | .phone(fieldSet.readString(4))
28 | .build())
29 | .location(Location.builder()
30 | .address(fieldSet.readString(5))
31 | .city(fieldSet.readString(6))
32 | .state(fieldSet.readString(7 ))
33 | .zip(fieldSet.readString(8)).build())
34 | .build();
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/main/java/ai/data/pipeline/spring/sink/PostgresConsumer.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.sink;
2 |
3 | import ai.data.pipeline.spring.properties.SqlConsumerProperties;
4 | import com.fasterxml.jackson.databind.ObjectMapper;
5 | import lombok.SneakyThrows;
6 | import lombok.extern.slf4j.Slf4j;
7 | import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
8 | import org.springframework.stereotype.Component;
9 |
10 | import java.util.Map;
11 | import java.util.function.Consumer;
12 |
13 | /**
14 | * Save data into the Postgres databases based a provided paylod
15 | * @author Gregory Green
16 | */
17 | @Component
18 | @Slf4j
19 | public class PostgresConsumer implements Consumer {
20 |
21 | private final ObjectMapper objectMapper;
22 | private final NamedParameterJdbcTemplate namedParameterJdbcTemplate;
23 | private final String sql;
24 |
25 | public PostgresConsumer(ObjectMapper objectMapper,
26 | NamedParameterJdbcTemplate namedParameterJdbcTemplate,
27 | SqlConsumerProperties properties) {
28 | this.objectMapper = objectMapper;
29 | this.namedParameterJdbcTemplate = namedParameterJdbcTemplate;
30 | this.sql = properties.getSql();
31 | }
32 |
33 | @SneakyThrows
34 | @Override
35 | public void accept(String payload) {
36 |
37 | log.info("payload: {}",payload);
38 | Map map = objectMapper.readValue(payload,Map.class);
39 |
40 | map.put("payload",payload);
41 |
42 | log.info("map: {} payload: {}",map,payload);
43 | log.info("SQL: {}",sql);
44 | namedParameterJdbcTemplate.update(sql,map);
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/processors/QueryFunctionProcessor.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.postgres.query.processors;
2 |
3 | import com.fasterxml.jackson.databind.ObjectMapper;
4 | import lombok.RequiredArgsConstructor;
5 | import lombok.SneakyThrows;
6 | import lombok.extern.slf4j.Slf4j;
7 | import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
8 | import org.springframework.stereotype.Component;
9 | import ai.data.pipeline.spring.postgres.query.properties.QueryProperties;
10 |
11 | import java.util.Map;
12 | import java.util.function.Function;
13 |
14 | /**
15 | * Execute a SQL query based on the input JSON and return JSON of the SQL results
16 | * @author Gregory Green
17 | */
18 | @Component
19 | @RequiredArgsConstructor
20 | @Slf4j
21 | public class QueryFunctionProcessor implements Function {
22 |
23 | private final NamedParameterJdbcTemplate namedParameterJdbcTemplate;
24 | private final ObjectMapper objectMapper;
25 | private final QueryProperties queryProperties;
26 |
27 | @SneakyThrows
28 | @Override
29 | public String apply(String payload) {
30 |
31 | log.info("payload: {}",payload);
32 | var inputMap = objectMapper.readValue(payload, Map.class);
33 |
34 | log.info("SQL: {}, input: {}",queryProperties,inputMap);
35 |
36 | var outMap = namedParameterJdbcTemplate.queryForMap(queryProperties.getSql(),
37 | inputMap);
38 | log.info("SQL: {}, class:{}, results: {}",queryProperties,outMap.getClass(),outMap);
39 |
40 | var out = objectMapper.writeValueAsString(outMap);
41 | log.info("Returning: {}",out);
42 | return out;
43 | }
44 |
45 |
46 |
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/docs/03_02_API Spring Cloud Stream Source with RabbitMQ.md:
--------------------------------------------------------------------------------
1 |
2 | # Start RabbitMQ
3 |
4 |
5 | - Run RabbitMQ
6 | ```shell
7 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management
8 | ```
9 |
10 |
11 | ```shell
12 | open http://localhost:15672
13 | ```
14 |
15 | Download http source
16 |
17 | ```shell
18 | wget -P runtime https://repo.maven.apache.org/maven2/org/springframework/cloud/stream/app/http-source-rabbit/5.0.1/http-source-rabbit-5.0.1.jar
19 | ```
20 |
21 | Run the Http Source
22 | ```shell
23 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=customers --server.port=8080 --spring.cloud.stream.bindings.output.destination=customers.intake
24 | ```
25 |
26 |
27 |
28 | Create queue
29 |
30 | ```shell
31 | docker exec -it rabbitmq rabbitmqadmin declare queue name=customer-test
32 | ```
33 |
34 | Create Binding
35 |
36 | ```shell
37 | docker exec -it rabbitmq rabbitmqadmin declare binding source=customers.intake destination=customer-test routing_key=#
38 | ```
39 |
40 |
41 | ## Testing
42 |
43 | Example
44 |
45 | ```shell
46 | curl -X 'POST' \
47 | 'http://localhost:8080/customers' \
48 | -H 'accept: */*' \
49 | -H 'Content-Type: application/json' \
50 | -d '{
51 | "firstName" : "Josiah",
52 | "lastName" : "Imani",
53 | "email" : "email@email",
54 | "phone" : "555-555-5555",
55 | "address" : "12 Straight St",
56 | "city" : "Erie",
57 | "zip": "16510"
58 | }'
59 | ```
60 |
61 |
62 |
63 | ```shell
64 | curl -X 'POST' \
65 | 'http://localhost:8080/customers' \
66 | -H 'accept: */*' \
67 | -H 'Content-Type: application/json' \
68 | -d '{
69 | "firstName" : "Juan",
70 | "lastName" : "Wagner",
71 | "email" : "jwagner79@ihg.com",
72 | "phone" : "555-555-5555",
73 | "address" : "1496 Reinke Lane",
74 | "city" : "New York City",
75 | "state" "NY",
76 | "zip": "10275"
77 | }'
78 | ```
79 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessor.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment.processor;
2 |
3 | import ai.data.pipeline.sentiment.domains.CustomerFeedback;
4 | import ai.data.pipeline.sentiment.domains.FeedbackSentiment;
5 | import lombok.RequiredArgsConstructor;
6 | import lombok.extern.slf4j.Slf4j;
7 | import org.springframework.ai.chat.client.ChatClient;
8 | import org.springframework.stereotype.Component;
9 |
10 | import java.util.function.Function;
11 |
12 | /**
13 | * Determine the sentiment based on customer feedback
14 | * @author Gregory Green
15 | */
16 | @Component
17 | @RequiredArgsConstructor
18 | @Slf4j
19 | public class CustomerFeedbackSentimentProcessor implements Function {
20 | private final ChatClient chatClient;
21 |
22 | private final String prompt = """
23 | Analyze the sentiment of this text: "{text}".
24 | Respond with only one word: Positive or Negative.
25 | """;
26 |
27 |
28 | /**
29 | * Determine the sentiment of the feedback
30 | * @param customerFeedback the function argument
31 | * @return the sentiment of the feedback summary
32 | */
33 | @Override
34 | public FeedbackSentiment apply(CustomerFeedback customerFeedback) {
35 |
36 | log.info("customerFeedback: {}",customerFeedback);
37 | var sentiment = chatClient.prompt()
38 | .user(u -> u.text(prompt)
39 | .param("text", customerFeedback.summary()))
40 | .call()
41 | .entity(FeedbackSentiment.Sentiment.class);
42 |
43 | log.info("sentiment: {}",sentiment);
44 |
45 | return FeedbackSentiment.builder()
46 | .customerFeedback(customerFeedback)
47 | .sentiment(sentiment).build();
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/test/java/ai/data/pipeline/spring/customer/mapper/CustomerFieldMapperTest.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer.mapper;
2 |
3 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator;
4 | import org.junit.jupiter.api.BeforeEach;
5 | import org.junit.jupiter.api.Test;
6 | import org.junit.jupiter.api.extension.ExtendWith;
7 | import org.mockito.Mock;
8 | import org.mockito.junit.jupiter.MockitoExtension;
9 | import org.springframework.batch.item.file.transform.FieldSet;
10 | import org.springframework.validation.BindException;
11 | import ai.data.pipeline.spring.customer.domain.Customer;
12 |
13 | import static org.assertj.core.api.Assertions.assertThat;
14 | import static org.mockito.ArgumentMatchers.anyInt;
15 | import static org.mockito.Mockito.when;
16 |
17 | @ExtendWith(MockitoExtension.class)
18 | class CustomerFieldMapperTest {
19 |
20 | private CustomerFieldMapper subject;
21 |
22 | @Mock
23 | private FieldSet fieldSet;
24 | private Customer customer = JavaBeanGeneratorCreator.of(Customer.class).create();
25 |
26 | @BeforeEach
27 | void setUp() {
28 | subject = new CustomerFieldMapper();
29 | }
30 |
31 | @Test
32 | void map() throws BindException {
33 |
34 | when(fieldSet.readString(anyInt()))
35 | .thenReturn(customer.id())
36 | .thenReturn(customer.firstName())
37 | .thenReturn(customer.lastName())
38 | .thenReturn(customer.contact().email())
39 | .thenReturn(customer.contact().phone())
40 | .thenReturn(customer.location().address())
41 | .thenReturn(customer.location().city())
42 | .thenReturn(customer.location().state())
43 | .thenReturn(customer.location().zip());
44 |
45 | var actual = subject.mapFieldSet(fieldSet);
46 |
47 | assertThat(actual).isEqualTo(customer);
48 |
49 | }
50 | }
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/conversion/PayloadToDocument.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.postgres.embedding.conversion;
2 |
3 | import com.fasterxml.jackson.databind.JsonNode;
4 | import com.fasterxml.jackson.databind.ObjectMapper;
5 | import lombok.RequiredArgsConstructor;
6 | import lombok.SneakyThrows;
7 | import lombok.extern.slf4j.Slf4j;
8 | import nyla.solutions.core.patterns.conversion.Converter;
9 | import org.springframework.ai.document.Document;
10 | import org.springframework.stereotype.Component;
11 | import ai.data.pipeline.postgres.embedding.properties.EmbeddingSimilarityProperties;
12 |
13 | /**
14 | * Convert payload string to Document
15 | * @author Gregory Green
16 | */
17 | @Component
18 | @RequiredArgsConstructor
19 | @Slf4j
20 | public class PayloadToDocument implements Converter {
21 |
22 | private final EmbeddingSimilarityProperties properties;
23 | private final ObjectMapper objectMapper;
24 |
25 | @SneakyThrows
26 | @Override
27 | public Document convert(String payload) {
28 | log.info("Reading tree for payload: {}",payload);
29 | var jsonNode = objectMapper.readTree(payload);
30 |
31 | var idNode = jsonNode.findValue("id");
32 | log.info("idNode: {}",jsonNode);
33 |
34 | var id = idNode != null? idNode.asText() : "";
35 | log.info("id: {}",id);
36 |
37 | var textBuilder = new StringBuilder();
38 | for(var fieldName : properties.getDocumentTextFieldNames())
39 | {
40 | JsonNode fieldValue = jsonNode.findValue(fieldName);
41 | String textValue = fieldValue != null? fieldValue.asText() : null;
42 | if(!textBuilder.isEmpty())
43 | textBuilder.append(",");
44 |
45 | textBuilder.append(textValue);
46 |
47 | }
48 | log.info("text: {}",textBuilder);
49 | return Document.builder().id(id).text(textBuilder.toString()).build();
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessor.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment.processor;
2 |
3 | import lombok.RequiredArgsConstructor;
4 | import lombok.extern.slf4j.Slf4j;
5 | import org.springframework.ai.chat.client.ChatClient;
6 | import org.springframework.ai.chat.client.advisor.api.Advisor;
7 | import org.springframework.stereotype.Component;
8 | import ai.data.pipeline.sentiment.domains.CustomerFeedback;
9 | import ai.data.pipeline.sentiment.domains.FeedbackSentiment;
10 |
11 | import java.util.function.Function;
12 |
13 | /**
14 | * Determine the sentiment based on customer feedback
15 | * @author Gregory Green
16 | */
17 | @Component
18 | @RequiredArgsConstructor
19 | @Slf4j
20 | public class CustomerFeedbackSentimentProcessor implements Function {
21 | private final ChatClient chatClient;
22 |
23 | private final Advisor advisor;
24 |
25 | private final String prompt = """
26 | Analyze the sentiment of this text: "{text}".
27 | Respond with only one word: Positive or Negative.
28 | """;
29 |
30 |
31 | /**
32 | * Determine the sentiment of the feedback
33 | * @param customerFeedback the function argument
34 | * @return the sentiment of the feedback summary
35 | */
36 | @Override
37 | public FeedbackSentiment apply(CustomerFeedback customerFeedback) {
38 |
39 | log.info("customerFeedback: {}",customerFeedback);
40 | var sentiment = chatClient.prompt()
41 | .user(u -> u.text(prompt)
42 | .param("text", customerFeedback.summary()))
43 | .advisors(advisor)
44 | .call()
45 | .entity(FeedbackSentiment.Sentiment.class);
46 |
47 | log.info("sentiment: {}",sentiment);
48 |
49 | return FeedbackSentiment.builder()
50 | .customerFeedback(customerFeedback)
51 | .sentiment(sentiment).build();
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/docs/02_02_Spring Batch Data Pipeline with Postgres.md:
--------------------------------------------------------------------------------
1 | docker network rm data-pipelines
2 |
3 | ```shell
4 | docker network create data-pipelines
5 | ```
6 |
7 | Start Postgres
8 |
9 | ```shell
10 | docker run --name postgres --network data-pipelines --rm \
11 | -e POSTGRES_USER=postgres \
12 | -e POSTGRES_PASSWORD=postgres \
13 | -e POSTGRES_DB=postgres \
14 | -p 5432:5432 \
15 | -it postgres
16 | ```
17 |
18 |
19 | Access Postgres using psql
20 |
21 | ```shell
22 | docker exec -it postgres psql -U postgres
23 | ```
24 |
25 |
26 | list tables in postgres
27 |
28 | ```psql
29 | \dt *
30 | ```
31 | [schema-postgres.sql](../applications/batching/customer-batch/src/main/resources/schema-postgres.sql)
32 |
33 |
34 | See
35 |
36 | [BatchConfig.java](../applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/BatchConfig.java)
37 |
38 |
39 | See CSV
40 |
41 | [customers-source.csv](../applications/batching/customer-batch/src/test/resources/sources/customers-source.csv)
42 |
43 |
44 | See
45 |
46 | [Customer.java](../applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/domain/Customer.java)
47 |
48 |
49 | See
50 |
51 | [CustomerFieldMapper.java](../applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/mapper/CustomerFieldMapper.java)
52 |
53 | See
54 |
55 | [MissingRequiredFieldsFilterProcessor.java](../applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/processor/MissingRequiredFieldsFilterProcessor.java)
56 |
57 |
58 |
59 | ```shell
60 | mvn package
61 | ```
62 |
63 |
64 | Run batch
65 |
66 | ```shell
67 | java -jar applications/batching/customer-batch/target/customer-batch-0.0.1-SNAPSHOT.jar --spring.datasource.password=postgres --source.input.file.csv="file:./applications/batching/customer-batch/src/test/resources/sources/customers-source.csv" --processor.output.error.file.csv="file:./runtime/invalid_customers.csv"
68 | ```
69 |
70 |
71 | In Psql
72 | ```shell
73 | select * from customer.customers;
74 | ```
75 |
76 |
77 |
78 |
79 | Also see
80 |
81 | [invalid_customers.csv](../runtime/invalid_customers.csv)
82 |
83 | Find records in source
84 |
85 | [customers-source.csv](../applications/batching/customer-batch/src/test/resources/sources/customers-source.csv)
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/src/test/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessorTest.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment.processor;
2 |
3 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator;
4 | import org.junit.jupiter.api.BeforeEach;
5 | import org.junit.jupiter.api.Test;
6 | import org.junit.jupiter.api.extension.ExtendWith;
7 | import org.mockito.Mock;
8 | import org.mockito.junit.jupiter.MockitoExtension;
9 | import org.springframework.ai.chat.client.ChatClient;
10 | import org.springframework.ai.chat.client.advisor.api.Advisor;
11 | import ai.data.pipeline.sentiment.domains.CustomerFeedback;
12 | import ai.data.pipeline.sentiment.domains.FeedbackSentiment;
13 |
14 | import java.util.function.Consumer;
15 |
16 | import static org.assertj.core.api.Assertions.assertThat;
17 | import static org.mockito.ArgumentMatchers.any;
18 | import static org.mockito.Mockito.when;
19 |
20 | @ExtendWith(MockitoExtension.class)
21 | class CustomerFeedbackSentimentProcessorTest {
22 |
23 | private CustomerFeedbackSentimentProcessor subject;
24 |
25 | @Mock
26 | private ChatClient chatClient;
27 |
28 | private final CustomerFeedback customerFeedback = JavaBeanGeneratorCreator.of(CustomerFeedback.class).create();
29 | @Mock
30 | private ChatClient.ChatClientRequestSpec prompt;
31 | @Mock
32 | private ChatClient.ChatClientRequestSpec user;
33 | @Mock
34 | private ChatClient.ChatClientRequestSpec userResponse;
35 | @Mock
36 | private ChatClient.CallResponseSpec callResponseSpec;
37 |
38 | @BeforeEach
39 | void setUp() {
40 | subject = new CustomerFeedbackSentimentProcessor(chatClient);
41 | }
42 |
43 | @Test
44 | void positiveSentiment() {
45 |
46 | FeedbackSentiment expected = FeedbackSentiment
47 | .builder().customerFeedback(customerFeedback).sentiment(FeedbackSentiment.Sentiment.Positive).build();
48 |
49 | when(chatClient.prompt()).thenReturn(prompt);
50 | when(prompt.user(any(Consumer.class))).thenReturn(userResponse);
51 | when(userResponse.call()).thenReturn(callResponseSpec);
52 | when(callResponseSpec.entity(any(Class.class))).thenReturn(expected.sentiment());
53 |
54 | var actual = subject.apply(customerFeedback);
55 |
56 | assertThat(actual).isEqualTo(expected);
57 | }
58 | }
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/src/test/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessorTest.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.sentiment.processor;
2 |
3 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator;
4 | import org.junit.jupiter.api.BeforeEach;
5 | import org.junit.jupiter.api.Test;
6 | import org.junit.jupiter.api.extension.ExtendWith;
7 | import org.mockito.Mock;
8 | import org.mockito.junit.jupiter.MockitoExtension;
9 | import org.springframework.ai.chat.client.ChatClient;
10 | import org.springframework.ai.chat.client.advisor.api.Advisor;
11 | import ai.data.pipeline.sentiment.domains.CustomerFeedback;
12 | import ai.data.pipeline.sentiment.domains.FeedbackSentiment;
13 |
14 | import java.util.function.Consumer;
15 |
16 | import static org.assertj.core.api.Assertions.assertThat;
17 | import static org.mockito.ArgumentMatchers.any;
18 | import static org.mockito.Mockito.when;
19 |
20 | @ExtendWith(MockitoExtension.class)
21 | class CustomerFeedbackSentimentProcessorTest {
22 |
23 | private CustomerFeedbackSentimentProcessor subject;
24 |
25 | @Mock
26 | private ChatClient chatClient;
27 |
28 | private final CustomerFeedback customerFeedback = JavaBeanGeneratorCreator.of(CustomerFeedback.class).create();
29 | @Mock
30 | private ChatClient.ChatClientRequestSpec prompt;
31 | @Mock
32 | private ChatClient.ChatClientRequestSpec user;
33 | @Mock
34 | private ChatClient.CallResponseSpec callResponse;
35 | @Mock
36 | private Advisor advisor;
37 |
38 | @Mock
39 | private ChatClient.ChatClientRequestSpec advisors;
40 |
41 | @BeforeEach
42 | void setUp() {
43 | subject = new CustomerFeedbackSentimentProcessor(chatClient,advisor);
44 | }
45 |
46 | @Test
47 | void positiveSentiment() {
48 |
49 | FeedbackSentiment expected = FeedbackSentiment
50 | .builder().customerFeedback(customerFeedback).sentiment(FeedbackSentiment.Sentiment.Positive).build();
51 |
52 | when(chatClient.prompt()).thenReturn(prompt);
53 | when(prompt.user(any(Consumer.class))).thenReturn(user);
54 | when(user.advisors(any(Advisor.class))).thenReturn(advisors);
55 | when(advisors.call()).thenReturn(callResponse);
56 | when(callResponse.entity(any(Class.class))).thenReturn(expected.sentiment());
57 |
58 | var actual = subject.apply(customerFeedback);
59 |
60 | assertThat(actual).isEqualTo(expected);
61 | }
62 | }
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/src/test/java/ai/data/pipeline/spring/sink/PostgresConsumerTest.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.sink;
2 |
3 | import com.fasterxml.jackson.core.JsonProcessingException;
4 | import com.fasterxml.jackson.databind.ObjectMapper;
5 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator;
6 | import nyla.solutions.core.util.JavaBean;
7 | import nyla.solutions.core.util.Text;
8 | import org.junit.jupiter.api.BeforeAll;
9 | import org.junit.jupiter.api.BeforeEach;
10 | import org.junit.jupiter.api.Test;
11 | import org.springframework.amqp.rabbit.test.context.SpringRabbitTest;
12 | import org.springframework.beans.factory.annotation.Autowired;
13 | import org.springframework.boot.test.context.SpringBootTest;
14 | import org.springframework.jdbc.core.JdbcTemplate;
15 | import org.springframework.test.context.junit.jupiter.SpringJUnitConfig;
16 |
17 | import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
18 |
19 | @SpringBootTest
20 | @SpringJUnitConfig
21 | @SpringRabbitTest
22 | class PostgresConsumerTest {
23 |
24 | @Autowired
25 | private JdbcTemplate jdbcTemplate;
26 | @Autowired
27 | private PostgresConsumer subject;
28 |
29 | @Autowired
30 | private ObjectMapper objectMapper;
31 |
32 | private final Customer customer = JavaBeanGeneratorCreator.of(Customer.class).create();
33 |
34 | @BeforeAll
35 | static void beforeAll() {
36 | System.setProperty(
37 | "sql.consumer.sql",
38 | "insert into customers(email,first_name) values(:email,:first_name)");
39 | }
40 |
41 | @BeforeEach
42 | void setUp() {
43 | final String createSql = """
44 | CREATE TABLE IF NOT EXISTS customers(email VARCHAR(255) PRIMARY KEY, first_name VARCHAR(255));
45 | """;
46 | jdbcTemplate.execute(createSql);
47 | }
48 |
49 | @Test
50 | void accept() throws JsonProcessingException {
51 | String payload = """
52 | { "email" : "${email}" , "first_name" : "${first_name}" }
53 | """;
54 |
55 | payload = Text.format(payload, JavaBean.toMap(customer));
56 | subject.accept(payload);
57 |
58 | String query = """
59 | select first_name from customers where email = ?
60 | """;
61 |
62 | var name = jdbcTemplate.queryForObject(query, String.class, customer.getEmail());
63 |
64 | assertThat(name).isEqualTo(customer.getFirst_name());
65 |
66 | }
67 | }
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/processor/MissingRequiredFieldsFilterProcessor.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer.processor;
2 |
3 | import lombok.RequiredArgsConstructor;
4 | import nyla.solutions.core.io.csv.CsvWriter;
5 | import org.springframework.batch.item.ItemProcessor;
6 | import org.springframework.stereotype.Component;
7 | import ai.data.pipeline.spring.customer.domain.Customer;
8 |
9 | /**
10 | * Filter customer records with missing required fields
11 | * @author Gregory Green
12 | */
13 | @RequiredArgsConstructor
14 | @Component
15 | public class MissingRequiredFieldsFilterProcessor implements ItemProcessor {
16 |
17 | private final CsvWriter invalidRecordsCsvWriter;
18 |
19 | /**
20 | *
21 | * @param customer to be processed, never {@code null}.
22 | * @return Null if customer does not have all required fields, else return customer
23 | * @throws Exception when an error occurs
24 | */
25 | @Override
26 | public Customer process(Customer customer) throws Exception {
27 | if(customer == null )
28 | return null;
29 |
30 | if(
31 | customer.id() == null || customer.id().isBlank() ||
32 | customer.firstName() == null || customer.firstName().isBlank() ||
33 | customer.lastName() == null || customer.lastName().isBlank() ||
34 | customer.contact() == null ||
35 | customer.contact().phone() == null || customer.contact().phone().isBlank() ||
36 | customer.contact().email() == null || customer.contact().email().isBlank())
37 | {
38 | invalidRecordsCsvWriter.appendRow(
39 | customer.id(),
40 | customer.firstName(),
41 | customer.lastName(),
42 | customer.contact() != null ? customer.contact().phone() : "",
43 | customer.contact() != null ? customer.contact().email() : "",
44 | customer.contact() != null ? customer.contact().phone() : "",
45 | customer.location() != null ? customer.location().address() : "",
46 | customer.location() != null ? customer.location().city() : "",
47 | customer.location() != null ? customer.location().state() : "",
48 | customer.location() != null ? customer.location().zip() : "");
49 |
50 | //return null skips this records
51 | return null;
52 | }
53 |
54 | //return valid record
55 | return customer;
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/docs/04_02_Introducing PostgresML.md:
--------------------------------------------------------------------------------
1 |
2 | Start PostgresML
3 |
4 |
5 | ```shell
6 | docker run --rm \
7 | -it \
8 | -v postgresml_data:/var/lib/postgresql \
9 | -p 6432:6432 \
10 | ghcr.io/postgresml/postgresml:2.10.0 \
11 | sudo -u postgresml psql -d postgresml
12 | ```
13 |
14 |
15 |
16 | --------------------
17 |
18 |
19 | Test summary in postgresML
20 |
21 |
22 | ```sql
23 | SELECT pgml.transform( task => '{ "task": "summarization", "model": "Falconsai/text_summarization"}'::JSONB, inputs => array[ 'I am really disappointed with the wait time I experienced when trying to reach Customer Service. I was on hold for over 40 minutes just to speak with someone about a simple issue with my account. It’s frustrating and honestly unacceptable. I do not have time to sit around waiting all day.'])::json->0->>'summary_text' as summary_text;
24 | ```
25 |
26 |
27 | ```sql
28 | SELECT pgml.transform( task => '{ "task": "summarization", "model": "Falconsai/text_summarization"}'::JSONB, inputs => array[ 'I have been using this system for a while now, and I have to say, I am genuinely impressed with how well it performs. The user interface is clean and thoughtfully laid out, making navigation feel effortless even for someone new to it. Features are logically organized, and everything just works seamlessly right out of the box. What really stands out, though, is the attention to detail—the developers clearly put a lot of thought into the user experience. Whether it is the speed of execution, the minimal learning curve, or the helpful tooltips and documentation, everything contributes to a feeling of confidence and ease. It is rare to come across a system that feels both powerful and user-friendly, but this one hits that balance perfectly.'])::json->0->>'summary_text' as summary_text;
29 | ```
30 |
31 |
32 |
33 |
34 | Text Classification
35 |
36 |
37 | ```shell
38 | SELECT pgml.transform( task => 'text-classification', inputs => ARRAY['I was on hold for over 40 minutes just to speak with someone about a simple issue with my account . I do not have time to sit around waiting all day.']) AS positivity;
39 | ```
40 |
41 |
42 | ```shell
43 | SELECT pgml.transform( task => 'text-classification', inputs => ARRAY['the user interface is clean and thoughtfully laid out, making navigation feel effortless even for someone new to it . The developers clearly put a lot of thought into the user experience . It is rare to come across a system that feels both powerful and user-friendly, but this one hits that balance perfectly.']) AS positivity;
44 | ```
45 |
46 |
47 | ```shell
48 | SELECT pgml.transform( task => 'text-classification', inputs => ARRAY['I love building linked Learning courses with my producer Dione!!!']) AS positivity;
49 | ```
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/test/java/ai/data/pipeline/spring/customer/CsvTextFileGenerator.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer;
2 |
3 | import nyla.solutions.core.io.IO;
4 | import nyla.solutions.core.io.csv.CsvWriter;
5 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator;
6 | import nyla.solutions.core.util.Config;
7 | import ai.data.pipeline.spring.customer.domain.Customer;
8 |
9 | import java.io.IOException;
10 | import java.nio.file.Paths;
11 |
12 | /**
13 | * Generate random customer records in CSV
14 | *
15 | * @author Gregory Green
16 | */
17 | public class CsvTextFileGenerator {
18 |
19 | public static void main(String[] args) throws IOException {
20 |
21 | var customerCreator = JavaBeanGeneratorCreator.of(Customer.class);
22 |
23 | var settings = Config.settings();
24 |
25 | var file = Paths.get(settings.getProperty("file","applications/batching/customer-batch/src/test/resources/sources/customers.csv")).toFile();
26 | IO.delete(file);
27 | var csvWriter = new CsvWriter(file);
28 |
29 | //id,first_name,last_nm,email,phone,address line,city,state,zip
30 | var count = settings.getPropertyInteger("count",100);
31 |
32 | var customer = customerCreator.create();
33 | //missing firstname and lastname and email
34 | //seconds missing phone and email
35 | csvWriter.appendRow(
36 | "",
37 | "",
38 | "",
39 | "",
40 | "",
41 | customer.location().address(),
42 | customer.location().city(),
43 | customer.location().state(),
44 | customer.location().zip()
45 | );
46 |
47 |
48 | customer = customerCreator.create();
49 | //seconds missing phone and email
50 | csvWriter.appendRow(
51 | "",
52 | "",
53 | "",
54 | "",
55 | "",
56 | customer.location().address(),
57 | customer.location().city(),
58 | customer.location().state(),
59 | customer.location().zip()
60 | );
61 |
62 | //all records have all required fields
63 | for (int i = 2; i < count; i++) {
64 | customer = customerCreator.create();
65 | csvWriter.appendRow(
66 | customer.contact().email(),
67 | customer.firstName(),
68 | customer.lastName(),
69 | customer.contact().email(),
70 | customer.contact().phone(),
71 | customer.location().address(),
72 | customer.location().city(),
73 | customer.location().state(),
74 | customer.location().zip()
75 | );
76 | }
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | org.springframework.boot
7 | spring-boot-starter-parent
8 | 3.5.0
9 |
10 |
11 | ai.data.pipelines.spring
12 | customer-batch
13 | 0.0.1-SNAPSHOT
14 | customer-batch
15 | customer-batch
16 |
17 | 21
18 |
19 |
20 |
21 | org.springframework.boot
22 | spring-boot-starter-batch
23 |
24 |
25 | org.springframework.boot
26 | spring-boot-starter-data-jdbc
27 |
28 |
29 | org.springframework.boot
30 | spring-boot-starter-data-jpa
31 |
32 |
33 | com.github.nyla-solutions
34 | nyla.solutions.core
35 | 2.3.2
36 |
37 |
38 | com.h2database
39 | h2
40 | runtime
41 |
42 |
43 | org.projectlombok
44 | lombok
45 | true
46 |
47 |
48 | org.postgresql
49 | postgresql
50 | runtime
51 |
52 |
53 | org.springframework.boot
54 | spring-boot-starter-test
55 | test
56 |
57 |
58 |
59 |
60 |
61 |
62 | org.apache.maven.plugins
63 | maven-compiler-plugin
64 |
65 |
66 |
67 | org.projectlombok
68 | lombok
69 |
70 |
71 |
72 |
73 |
74 | org.springframework.boot
75 | spring-boot-maven-plugin
76 |
77 |
78 |
79 | org.projectlombok
80 | lombok
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/src/test/java/ai/data/pipeline/postgres/embedding/conversion/PayloadToDocumentTest.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.postgres.embedding.conversion;
2 |
3 | import com.fasterxml.jackson.databind.ObjectMapper;
4 | import org.junit.jupiter.api.BeforeEach;
5 | import org.junit.jupiter.api.Test;
6 | import org.junit.jupiter.api.extension.ExtendWith;
7 | import org.mockito.Mock;
8 | import org.mockito.junit.jupiter.MockitoExtension;
9 | import org.springframework.ai.document.Document;
10 | import ai.data.pipeline.postgres.embedding.properties.EmbeddingSimilarityProperties;
11 |
12 | import java.util.UUID;
13 |
14 | import static org.assertj.core.api.Assertions.assertThat;
15 | import static org.mockito.Mockito.when;
16 |
17 | /**
18 | *
19 | * @author Gregory Green
20 | */
21 | @ExtendWith(MockitoExtension.class)
22 | class PayloadToDocumentTest {
23 |
24 | private PayloadToDocument subject;
25 |
26 | @Mock
27 | private EmbeddingSimilarityProperties properties;
28 | private final String[] fields = {"email","phone","zip","state","city","address","lastName","firstName"};
29 |
30 |
31 | @BeforeEach
32 | void setUp() {
33 | subject = new PayloadToDocument(properties,new ObjectMapper());
34 | }
35 |
36 | @Test
37 | void convert() {
38 |
39 | when(properties.getDocumentTextFieldNames()).thenReturn(fields);
40 |
41 | var expectedText = "email@,555-555-5555,23232,my state,city,1 street,Smith,John";
42 | Document expected = Document.builder().id("junit")
43 | .text(expectedText).build();
44 | var payload = """
45 | {
46 | "id" : "junit",
47 | "firstName": "John",
48 | "lastName": "Smith",
49 | "email": "email@",
50 | "phone": "555-555-5555",
51 | "address": "1 street",
52 | "city": "city",
53 | "state": "my state",
54 | "zip": "23232"
55 | }
56 | """;
57 |
58 | var actual = subject.convert(payload);
59 |
60 | assertThat(actual).isEqualTo(expected);
61 | }
62 |
63 | @Test
64 | void bug() {
65 |
66 | System.out.println(UUID.randomUUID());
67 |
68 | var payload = """
69 | {
70 | "id" : "8df15279-97a6-4b48-92f3-f78d045d9cc4",
71 | "firstName" : "Josiah",
72 | "lastName" : "Imani",
73 | "email" : "email@email",
74 | "phone" : "555-555-5555",
75 | "address" : "12 Straight St",
76 | "city" : "gold",
77 | "state" : "ny",
78 | "zip": "55555"
79 | }
80 | """;
81 |
82 | when(properties.getDocumentTextFieldNames()).thenReturn(fields);
83 |
84 | var actual=subject.convert(payload);
85 |
86 | assertThat(actual.getText()).isNotNull();
87 | }
88 | }
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/function/EmbeddingSimilarityFunction.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.postgres.embedding.function;
2 |
3 | import com.fasterxml.jackson.databind.ObjectMapper;
4 | import lombok.SneakyThrows;
5 | import lombok.extern.slf4j.Slf4j;
6 | import nyla.solutions.core.patterns.conversion.Converter;
7 | import org.springframework.ai.document.Document;
8 | import org.springframework.ai.vectorstore.SearchRequest;
9 | import org.springframework.ai.vectorstore.VectorStore;
10 | import org.springframework.stereotype.Component;
11 | import ai.data.pipeline.postgres.embedding.domain.SimilarDocuments;
12 | import ai.data.pipeline.postgres.embedding.properties.EmbeddingSimilarityProperties;
13 |
14 | import java.util.List;
15 | import java.util.function.Function;
16 |
17 | import static java.util.List.of;
18 |
19 | /**
20 | *
21 | * Save payload as document and search for similarities
22 | * @author Gregory Green
23 | */
24 | @Component
25 | @Slf4j
26 | public class EmbeddingSimilarityFunction implements Function {
27 | private final VectorStore vectorStore;
28 | private final EmbeddingSimilarityProperties properties;
29 | private final ObjectMapper objectMapper = new ObjectMapper();
30 | private final Converter converter;
31 |
32 | public EmbeddingSimilarityFunction(VectorStore vectorStore, EmbeddingSimilarityProperties properties, Converter converter) {
33 | this.vectorStore = vectorStore;
34 | this.properties = properties;
35 | this.converter = converter;
36 | }
37 |
38 | @Override
39 | public SimilarDocuments apply(String payload) {
40 |
41 | log.info("payload: {}: properties: {}", payload, properties);
42 | var payloadDocument = converter.convert(payload);
43 |
44 | log.info("payloadDocument: {}", payloadDocument);
45 |
46 | var customerInfo = payloadDocument.getText();
47 | if (customerInfo == null)
48 | return null;
49 |
50 | log.info("Saving into vector store");
51 | vectorStore.add(of(payloadDocument));
52 |
53 |
54 | var criteria = SearchRequest.builder().query(payload)
55 | .topK(properties.getTopK())
56 | .query(customerInfo)
57 | .similarityThreshold(properties.getSimilarityThreshold())
58 | .build();
59 |
60 | log.info("Searching criteria: {}",criteria);
61 |
62 | var similarities = vectorStore.similaritySearch(criteria);
63 |
64 | log.info("similarities: {}", similarities);
65 |
66 | var similarDocuments = toSimilarDocuments(payloadDocument, similarities);
67 |
68 | log.info("Returning similarDocuments: {}", similarDocuments);
69 |
70 | return similarDocuments;
71 | }
72 |
73 | /**
74 | *Build SimilarDocuments based on the similarity document results
75 | * @param payloadDocument the payloadDocument
76 | * @param similarities the list of similar documents
77 | * @return the results
78 | */
79 | @SneakyThrows
80 | SimilarDocuments toSimilarDocuments(Document payloadDocument, List similarities){
81 |
82 | if(similarities == null || similarities.isEmpty())
83 | return null;
84 |
85 | List filtered = similarities.stream()
86 | .filter(resultDoc -> !resultDoc.getId().equals(payloadDocument.getId()))
87 | .toList();
88 |
89 | String filteredPayload = objectMapper.writeValueAsString(filtered);
90 | return !filtered.isEmpty() ?
91 | SimilarDocuments.builder().id(payloadDocument.getId()).similaritiesPayload(filteredPayload).build()
92 | : null;
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/src/test/java/ai/data/pipeline/postgres/embedding/function/EmbeddingSimilarityFunctionTest.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.postgres.embedding.function;
2 |
3 | import nyla.solutions.core.patterns.conversion.Converter;
4 | import org.junit.jupiter.api.BeforeEach;
5 | import org.junit.jupiter.api.Test;
6 | import org.junit.jupiter.api.extension.ExtendWith;
7 | import org.mockito.Mock;
8 | import org.mockito.junit.jupiter.MockitoExtension;
9 | import org.springframework.ai.document.Document;
10 | import org.springframework.ai.vectorstore.SearchRequest;
11 | import org.springframework.ai.vectorstore.VectorStore;
12 | import ai.data.pipeline.postgres.embedding.domain.SimilarDocuments;
13 | import ai.data.pipeline.postgres.embedding.properties.EmbeddingSimilarityProperties;
14 |
15 | import java.util.Collections;
16 | import java.util.List;
17 |
18 | import static org.assertj.core.api.Assertions.assertThat;
19 | import static org.mockito.ArgumentMatchers.any;
20 | import static org.mockito.Mockito.verify;
21 | import static org.mockito.Mockito.when;
22 |
23 | @ExtendWith(MockitoExtension.class)
24 | class EmbeddingSimilarityFunctionTest {
25 |
26 | private static final String expectedId = "001";
27 | private static final String payload = """
28 | { "id" : "001"}
29 | """;
30 |
31 | @Mock
32 | private VectorStore vectorStore;
33 |
34 | @Mock
35 | private Converter converter;
36 |
37 | private EmbeddingSimilarityFunction subject;
38 |
39 | private static final EmbeddingSimilarityProperties properties = EmbeddingSimilarityProperties.builder()
40 | .topK(4)
41 | .similarityThreshold(0.95)
42 | .build();
43 | @Mock
44 | private Document document;
45 | private final String text = "Expected";
46 | private String id = "Expected Id";
47 | @Mock
48 | private Document resultDocument;
49 |
50 | @BeforeEach
51 | void setUp() {
52 | subject = new EmbeddingSimilarityFunction(vectorStore,properties,converter);
53 | }
54 |
55 | @Test
56 | void accept() {
57 | when(converter.convert(any())).thenReturn(document);
58 | when(document.getText()).thenReturn(text);
59 | when(document.getId()).thenReturn(id);
60 | when(vectorStore.similaritySearch(any(SearchRequest.class))).thenReturn(List.of(resultDocument));
61 | when(resultDocument.getId()).thenReturn("Different Id");
62 |
63 | SimilarDocuments actual = subject.apply(payload);
64 |
65 |
66 | assertThat(actual).isNotNull();
67 | }
68 |
69 | @Test
70 | void returnNull() {
71 | when(converter.convert(any())).thenReturn(document);
72 | when(document.getText()).thenReturn(text);
73 | when(vectorStore.similaritySearch(any(SearchRequest.class))).thenReturn(null);
74 |
75 | assertThat(subject.apply(payload)).isNull();
76 |
77 | }
78 |
79 | @Test
80 | void returnEmptyListNull() {
81 | when(converter.convert(any())).thenReturn(document);
82 | when(document.getText()).thenReturn(text);
83 | when(vectorStore.similaritySearch(any(SearchRequest.class))).thenReturn(Collections.emptyList());
84 |
85 | assertThat(subject.apply(payload)).isNull();
86 |
87 | }
88 |
89 | @Test
90 | void removeMatchingDocument() {
91 |
92 |
93 | when(converter.convert(any())).thenReturn(document);
94 | when(document.getId()).thenReturn(id);
95 |
96 | when(document.getText()).thenReturn(text);
97 | when(vectorStore.similaritySearch(any(SearchRequest.class))).thenReturn(List.of(document));
98 |
99 | assertThat(subject.apply(payload)).isNull();
100 | }
101 | }
--------------------------------------------------------------------------------
/applications/processors/postgres-query-processor/src/test/java/ai/data/pipeline/spring/postgres/query/processors/QueryFunctionProcessorTest.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.postgres.query.processors;
2 |
3 | import com.fasterxml.jackson.core.JsonProcessingException;
4 | import com.fasterxml.jackson.databind.ObjectMapper;
5 | import org.junit.jupiter.api.BeforeAll;
6 | import org.junit.jupiter.api.BeforeEach;
7 | import org.junit.jupiter.api.Test;
8 | import org.springframework.amqp.rabbit.test.context.SpringRabbitTest;
9 | import org.springframework.beans.factory.annotation.Autowired;
10 | import org.springframework.boot.test.context.SpringBootTest;
11 | import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
12 | import org.springframework.test.context.junit.jupiter.SpringJUnitConfig;
13 | import org.springframework.util.LinkedCaseInsensitiveMap;
14 | import ai.data.pipeline.spring.postgres.query.properties.QueryProperties;
15 |
16 | import javax.sql.DataSource;
17 |
18 | import static org.assertj.core.api.Assertions.assertThat;
19 | import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
20 |
21 | /**
22 | * @author Gregory Green
23 | */
24 | @SpringBootTest
25 | @SpringJUnitConfig
26 | @SpringRabbitTest
27 | class QueryFunctionProcessorTest {
28 |
29 | @Autowired
30 | private QueryFunctionProcessor subject;
31 |
32 | @Autowired
33 | private ObjectMapper objectMapper;
34 | @Autowired
35 | private DataSource dataSource;
36 | @Autowired
37 | private NamedParameterJdbcTemplate namedParameterJdbcTemplate;
38 |
39 |
40 | @BeforeAll
41 | static void beforeAll() {
42 | System.setProperty(
43 | "query.sql",
44 | "select :firstName");
45 | }
46 |
47 | @BeforeEach
48 | void setUp() {
49 | }
50 |
51 | @Test
52 | void accept() throws JsonProcessingException {
53 | var expected = """
54 | {"HELLO":"world"}
55 | """;
56 |
57 | var payload = """
58 | { "email" : "${email}" , "firstName" : "${firstName}" }
59 | """;
60 |
61 | var actual = subject.apply(payload);
62 |
63 | assertThat(actual).isNotNull();
64 | assertThat(actual.trim()).isEqualTo(expected.trim());
65 |
66 | }
67 |
68 | @Test
69 | void validJson() {
70 |
71 |
72 | var sql = """
73 | select :firstName as firstName,
74 | :lastName as lastName
75 | """;
76 | var payload = """
77 | {
78 | "firstName": "John",
79 | "lastName": "Smith"
80 | }
81 | """;
82 | var queryProperties = new QueryProperties(sql);
83 |
84 | subject = new QueryFunctionProcessor(namedParameterJdbcTemplate,objectMapper,queryProperties);
85 |
86 | var actual = subject.apply(payload);
87 |
88 | assertDoesNotThrow( () -> objectMapper.readTree(actual));
89 |
90 | }
91 |
92 | @Test
93 | void mapToJson() throws JsonProcessingException {
94 |
95 | var feedback = """
96 | "Paris is the capital and most populous city of France, with an estimated population of 2,175,601 residents as of 2018, in an area of more than 105 square kilometres (41 square miles). The City of Paris is the centre and seat of government of the region and province of Île-de-France, or Paris Region, which has an estimated population of 12,174,880, or about 18 percent of the population of France as of 2017., summary="The City of Paris is the centre and seat of government of the region and province of le-de-France, or Paris Region, which has an estimated population of 12,174,880, or about 18 percent of the population of France as of 2017."
97 | """;
98 |
99 | var map= new LinkedCaseInsensitiveMap();
100 |
101 |
102 | map.put("id", "F001");
103 | map.put("email", "jmatthews@email");
104 | map.put("feedback", feedback);
105 |
106 | var actual = objectMapper.writeValueAsString(map);
107 |
108 | System.out.printf(actual);
109 |
110 | }
111 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AI Data Pipelines with Spring
2 | This is the repository for the LinkedIn Learning course `AI Data Pipeline with Spring`. The full course is available from [LinkedIn Learning][lil-course-url].
3 |
4 | ![lil-thumbnail-url]
5 |
6 | ## Course Description
7 |
8 | The lack of data integration is a common blocker preventing organizations from unlocking the power of artificial intelligence. Spring is a popular collection of projects that simplifies data integration and artificial intelligence development for Java applications. In this course, learn how Spring can be used to build data pipelines. Discover how Spring AI makes it easy to use AI models using the Java programming language. Understand how to build data pipelines with Spring Batch and Spring Cloud Streams with RabbitMQ Learn how features within Postgres enable vector databases and machine learning for AI workloads.
9 |
10 |
11 | ## Instructions
12 |
13 | The source code for the live demonstrations are available here on the source Github repo. You’re not required to review or build demo applications. The source code is available as a reference implementation so feel free to explore.
14 |
15 |
16 | ## Installing
17 |
18 |
19 | The details to run the demonstrations can be found under the docs folder.
20 |
21 | - [02_02_Spring Batch Data Pipeline with Postgres.md](docs/02_02_Spring%20Batch%20Data%20Pipeline%20with%20Postgres.md)
22 | - [03_02_API Spring Cloud Stream Source with RabbitMQ.md](docs/03_02_API%20Spring%20Cloud%20Stream%20Source%20with%20RabbitMQ.md)
23 | - [03_03_Building API Data Pipeline Postgres Sink.md](docs/03_03_Building%20API%20Data%20Pipeline%20Postgres%20Sink.md)
24 | - [03_04_Building API Data Pipeline Postgres Processor.md](docs/03_04_Building%20API%20Data%20Pipeline%20Postgres%20Processor.md)
25 | - [04_02_Introducing PostgresML.md](docs/04_02_Introducing%20PostgresML.md)
26 | - [04_03_Building a Text Summarization AI Data Pipeline.md](docs/04_03_Building%20a%20Text%20Summarization%20AI%20Data%20Pipeline.md)
27 | - [05_01_Introducing Spring AI.md](docs/05_01_Introducing%20Spring%20AI.md)
28 | - [05_02_Building a Text Sentimental Analysis AI Data Pipeline.md](docs/05_02_Building%20a%20Text%20Sentimental%20Analysis%20AI%20Data%20Pipeline.md)
29 | - [05_04_Building a Text Sentiment Analysis AI Data Pipeline with RAG.md](docs/05_04_Building%20a%20Text%20Sentiment%20Analysis%20AI%20Data%20Pipeline%20with%20RAG.md)
30 | - [05_05_Building a Similarity AI Data Pipeline.md](docs/05_05_Building%20a%20Similarity%20AI%20Data%20Pipeline.md)
31 |
32 |
33 | ## Instructor
34 |
35 | Instructor name: Gregory Green`
36 |
37 | Being familiar with the Java programming language
38 | will help you get the most out of those this course.
39 |
40 | You should be comfortable with developing Java applications using the maven built tool, along with a integrated developer editors such as intellij.
41 |
42 | A having hands-on experience with Spring framework and spring boot will help you better understand some of the examples
43 |
44 | You should be comfortable with at least one relational database such as postgres. And should be comfortable with the structure, query, language, or SQL.
45 |
46 | Although it’s not required, it is useful it is useful to have experience with messaging system, such as rabbit and Q or similar solutions, in addition to a basic understanding of docker and containers.
47 |
48 |
49 | Check out my other courses on [LinkedIn Learning](https://www.linkedin.com/learning/instructors/gregory-green).
50 |
51 |
52 | - [Data Resilience with Spring and RabbitMQ Event Streaming](https://www.linkedin.com/learning/data-resilience-with-spring-and-rabbitmq-event-streaming/introduction-to-data-resiliency)
53 | - [Building High-Throughput Data Microservices](https://www.linkedin.com/learning/building-high-throughput-data-microservices/rabbitmq-streams-for-high-throughput)
54 | - [Achieving Low-Latency Data with Edge Computing](https://www.linkedin.com/learning/achieving-low-latency-data-with-edge-computing/why-is-low-latency-data-with-edge-computing-important)
55 |
56 |
57 | [lil-course-url]: https://www.linkedin.com/learning/ai-data-pipelines-with-spring
58 | [lil-thumbnail-url]: https://media.licdn.com/dms/image/v2/D4E0DAQGio63WPKbMnQ/learning-public-crop_675_1200/B4EZkQl6HzHEAY-/0/1756919997682?e=2147483647&v=beta&t=ZEaakiNHX7JUx5-DF201T53bjSragjmGN8k7wEJuozA
59 |
60 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/test/java/ai/data/pipeline/spring/customer/processor/MissingRequiredFieldsFilterProcessorTest.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer.processor;
2 |
3 | import nyla.solutions.core.io.csv.CsvWriter;
4 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator;
5 | import org.junit.jupiter.api.BeforeEach;
6 | import org.junit.jupiter.api.Test;
7 | import org.junit.jupiter.api.extension.ExtendWith;
8 | import org.mockito.Mock;
9 | import org.mockito.junit.jupiter.MockitoExtension;
10 | import ai.data.pipeline.spring.customer.domain.Contact;
11 | import ai.data.pipeline.spring.customer.domain.Customer;
12 |
13 | import static org.assertj.core.api.Assertions.assertThat;
14 | import static org.mockito.ArgumentMatchers.any;
15 | import static org.mockito.Mockito.never;
16 | import static org.mockito.Mockito.verify;
17 |
18 | @ExtendWith(MockitoExtension.class)
19 | class MissingRequiredFieldsFilterProcessorTest {
20 |
21 | private MissingRequiredFieldsFilterProcessor subject;
22 | private final Contact contact = JavaBeanGeneratorCreator.of(Contact.class).create();
23 | @Mock
24 | private CsvWriter csvWriter;
25 | private final static String id = "id";
26 | private final static String firstName = "fn";
27 | private final static String lastName = "ln";
28 | private final static String email = "email";
29 | private final static String phone = "phone";
30 |
31 | @BeforeEach
32 | void setUp() {
33 | subject = new MissingRequiredFieldsFilterProcessor(csvWriter);
34 | }
35 |
36 | @Test
37 | void savedValidCustomer() throws Exception {
38 | var customer = Customer.builder().id(id)
39 | .firstName(firstName).lastName(lastName)
40 | .contact(Contact.builder().email(email).phone(phone).build()).build();
41 |
42 | var actual = subject.process(customer);
43 | verify(csvWriter,never()).appendRow(any(String[].class));
44 |
45 | assertThat(actual).isEqualTo(customer);
46 | }
47 |
48 | @Test
49 | void firstNameRequired() throws Exception {
50 |
51 | var customer = Customer.builder().id(id)
52 | .lastName(lastName)
53 | .contact(Contact.builder().email(email).phone(phone).build()).build();
54 |
55 | var actual = subject.process(customer);
56 | verify(csvWriter).appendRow(any(String[].class));
57 |
58 | assertThat(actual).isNull();
59 | }
60 |
61 | @Test
62 | void lastNameRequired() throws Exception {
63 |
64 | var customer = Customer.builder().id(id)
65 | .firstName(firstName)
66 | .contact(Contact.builder().email(email).phone(phone).build()).build();
67 |
68 | var actual = subject.process(customer);
69 | verify(csvWriter).appendRow(any(String[].class));
70 |
71 | assertThat(actual).isNull();
72 | }
73 |
74 |
75 |
76 |
77 | @Test
78 | void idRequired() throws Exception {
79 |
80 | var customer = Customer.builder()
81 | .firstName(firstName).lastName(lastName)
82 | .contact(Contact.builder().email(email).phone(phone).build()).build();
83 |
84 | var actual = subject.process(customer);
85 | verify(csvWriter).appendRow(any(String[].class));
86 |
87 | assertThat(actual).isNull();
88 | }
89 |
90 | @Test
91 | void emailRequired() throws Exception {
92 |
93 | var customer = Customer.builder()
94 | .firstName(firstName).lastName(lastName)
95 | .contact(Contact.builder()
96 | .phone(phone).build()).build();
97 |
98 | var actual = subject.process(customer);
99 | verify(csvWriter).appendRow(any(String[].class));
100 |
101 | assertThat(actual).isNull();
102 | }
103 |
104 | @Test
105 | void phoneRequired() throws Exception {
106 |
107 | var customer = Customer.builder().id(id)
108 | .firstName(firstName).lastName(lastName)
109 | .contact(Contact.builder().email(email)
110 | .build()).build();
111 |
112 | var actual = subject.process(customer);
113 | verify(csvWriter).appendRow(any(String[].class));
114 |
115 | assertThat(actual).isNull();
116 | }
117 | }
--------------------------------------------------------------------------------
/applications/sinks/postgres-sink/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | org.springframework.boot
7 | spring-boot-starter-parent
8 | 3.5.0
9 |
10 |
11 | ai.data.pipelines.spring
12 | postgres-sink
13 | 0.0.1-SNAPSHOT
14 | postgres-sink
15 | postgres-sink
16 |
17 | 21
18 | 2025.0.0
19 |
20 |
21 |
22 | org.projectlombok
23 | lombok
24 | true
25 |
26 |
27 | org.springframework.boot
28 | spring-boot-configuration-processor
29 | true
30 |
31 |
32 | org.springframework.boot
33 | spring-boot-starter-actuator
34 |
35 |
36 | org.springframework.boot
37 | spring-boot-starter-amqp
38 |
39 |
40 | org.springframework.boot
41 | spring-boot-starter-data-jdbc
42 |
43 |
44 | org.springframework.boot
45 | spring-boot-starter-data-jpa
46 |
47 |
48 | org.springframework.amqp
49 | spring-rabbit-stream
50 |
51 |
52 | org.springframework.cloud
53 | spring-cloud-stream
54 |
55 |
56 | org.springframework.cloud
57 | spring-cloud-stream-binder-rabbit
58 |
59 |
60 |
61 | com.h2database
62 | h2
63 | test
64 |
65 |
66 | com.github.nyla-solutions
67 | nyla.solutions.core
68 | 2.2.3
69 |
70 |
71 |
72 | org.postgresql
73 | postgresql
74 | runtime
75 |
76 |
77 |
78 | org.springframework.boot
79 | spring-boot-starter-test
80 | test
81 |
82 |
83 |
84 | org.springframework.amqp
85 | spring-rabbit-test
86 | test
87 |
88 |
89 | org.springframework.cloud
90 | spring-cloud-stream-test-binder
91 | test
92 |
93 |
94 |
95 |
96 |
97 | org.springframework.cloud
98 | spring-cloud-dependencies
99 | ${spring-cloud.version}
100 | pom
101 | import
102 |
103 |
104 |
105 |
106 |
107 |
108 | org.apache.maven.plugins
109 | maven-compiler-plugin
110 |
111 |
112 |
113 | org.projectlombok
114 | lombok
115 |
116 |
117 |
118 |
119 |
120 | org.springframework.boot
121 | spring-boot-maven-plugin
122 |
123 |
124 |
125 | org.projectlombok
126 | lombok
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | org.springframework.boot
7 | spring-boot-starter-parent
8 | 3.5.3
9 |
10 |
11 | ai.data.pipelines.spring
12 | ai-sentiment-processor
13 | 0.0.1-SNAPSHOT
14 | ai-sentiment-processor
15 | ai-sentiment-processor
16 |
17 | 21
18 | 1.0.0
19 | 2025.0.0
20 | 21
21 | 21
22 |
23 |
24 |
25 | org.projectlombok
26 | lombok
27 | true
28 |
29 |
30 | org.springframework.boot
31 | spring-boot-configuration-processor
32 | true
33 |
34 |
35 | org.springframework.boot
36 | spring-boot-starter-actuator
37 |
38 |
39 | org.springframework.boot
40 | spring-boot-starter-amqp
41 |
42 |
43 |
44 |
45 |
46 | org.springframework.ai
47 | spring-ai-starter-model-ollama
48 |
49 |
50 |
51 | org.springframework.amqp
52 | spring-rabbit-stream
53 |
54 |
55 | org.springframework.cloud
56 | spring-cloud-stream
57 |
58 |
59 | org.springframework.cloud
60 | spring-cloud-stream-binder-rabbit
61 |
62 |
63 |
64 | com.github.nyla-solutions
65 | nyla.solutions.core
66 | 2.3.1
67 |
68 |
69 | org.springframework.boot
70 | spring-boot-starter-test
71 | test
72 |
73 |
74 | org.springframework.amqp
75 | spring-rabbit-test
76 | test
77 |
78 |
79 | org.springframework.cloud
80 | spring-cloud-stream-test-binder
81 | test
82 |
83 |
84 |
85 |
86 |
87 | org.springframework.cloud
88 | spring-cloud-dependencies
89 | ${spring-cloud.version}
90 | pom
91 | import
92 |
93 |
94 | org.springframework.ai
95 | spring-ai-bom
96 | ${spring-ai.version}
97 | pom
98 | import
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 | org.apache.maven.plugins
107 | maven-compiler-plugin
108 |
109 |
110 |
111 | org.projectlombok
112 | lombok
113 |
114 |
115 |
116 |
117 |
118 | org.springframework.boot
119 | spring-boot-maven-plugin
120 |
121 |
122 |
123 | org.projectlombok
124 | lombok
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | org.springframework.boot
7 | spring-boot-starter-parent
8 | 3.5.0
9 |
10 |
11 | ai.data.pipelines.spring
12 | postgres-embedding-similarity-processor
13 | 0.0.1-SNAPSHOT
14 | postgres-embedding-similarity-processor
15 | postgres-embedding-similarity-processor
16 |
17 | 21
18 | 1.0.0
19 | 2025.0.0
20 | 21
21 | 21
22 |
23 |
24 |
25 |
26 | org.projectlombok
27 | lombok
28 | true
29 |
30 |
31 | org.springframework.boot
32 | spring-boot-configuration-processor
33 | true
34 |
35 |
36 | org.springframework.boot
37 | spring-boot-starter-actuator
38 |
39 |
40 | org.springframework.boot
41 | spring-boot-starter-amqp
42 |
43 |
44 | org.springframework.ai
45 | spring-ai-starter-model-postgresml-embedding
46 |
47 |
48 |
49 |
50 | org.springframework.ai
51 | spring-ai-starter-vector-store-pgvector
52 |
53 |
54 | org.springframework.amqp
55 | spring-rabbit-stream
56 |
57 |
58 | org.springframework.cloud
59 | spring-cloud-stream
60 |
61 |
62 | org.springframework.cloud
63 | spring-cloud-stream-binder-rabbit
64 |
65 |
66 |
67 | com.github.nyla-solutions
68 | nyla.solutions.core
69 | 2.3.1
70 |
71 |
72 | org.springframework.boot
73 | spring-boot-starter-test
74 | test
75 |
76 |
77 | org.springframework.amqp
78 | spring-rabbit-test
79 | test
80 |
81 |
82 | org.springframework.cloud
83 | spring-cloud-stream-test-binder
84 | test
85 |
86 |
87 |
88 |
89 |
90 | org.springframework.cloud
91 | spring-cloud-dependencies
92 | ${spring-cloud.version}
93 | pom
94 | import
95 |
96 |
97 | org.springframework.ai
98 | spring-ai-bom
99 | ${spring-ai.version}
100 | pom
101 | import
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 | org.apache.maven.plugins
110 | maven-compiler-plugin
111 |
112 |
113 |
114 | org.projectlombok
115 | lombok
116 |
117 |
118 |
119 |
120 |
121 | org.springframework.boot
122 | spring-boot-maven-plugin
123 |
124 |
125 |
126 | org.projectlombok
127 | lombok
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
--------------------------------------------------------------------------------
/docs/BONUS_Building a Text Sentimental Analysis AI Data Pipeline with PostgresML.md:
--------------------------------------------------------------------------------
1 |
2 | - Run RabbitMQ
3 | ```shell
4 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management
5 | ```
6 |
7 | Run Postgres
8 |
9 | ```shell
10 | ```shell
11 | docker run --name postgres --network data-pipelines --rm \
12 | -e POSTGRES_USER=postgres \
13 | -e POSTGRES_PASSWORD=postgres \
14 | -e POSTGRES_DB=postgres \
15 | -p 5432:5432 \
16 | -it postgres
17 | ```
18 |
19 |
20 |
21 |
22 | psql
23 |
24 | ```shell
25 | docker exec -it postgres psql -U postgres
26 | ```
27 |
28 | ```shell
29 | create schema if not exists customer ;
30 |
31 | create table customer.feedback(
32 | feed_id text NOT NULL,
33 | email text NOT NULL,
34 | user_feedback text NOT NULL,
35 | summary text NOT NULL,
36 | feedback_dt timestamp NOT NULL DEFAULT NOW(),
37 | sentiment smallint NOT NULL,
38 | score numeric NOT NULL,
39 | PRIMARY KEY (feed_id)
40 | );
41 | ```
42 |
43 |
44 | Run PostgresML
45 |
46 | ```shell
47 | docker run --rm --name postgresml \
48 | -it \
49 | --network data-pipelines \
50 | -v postgresml_data:/var/lib/postgresql \
51 | -p 6432:5432 \
52 | -p 8000:8000 \
53 | ghcr.io/postgresml/postgresml:2.10.0 \
54 | sudo -u postgresml psql -d postgresml
55 | ```
56 |
57 |
58 |
59 |
60 | ::json->>'summary_text'
61 |
62 | select pg_typeof(results::json)
63 |
64 | ```shell
65 | SELECT
66 | positivity::json->0->>'label' as label,
67 | positivity::json->0->>'score' as score,
68 | (CASE
69 | WHEN positivity::json->0->>'label' = 'NEGATIVE' THEN -1
70 | WHEN positivity::json->0->>'label' = 'POSITIVE' THEN 1
71 | ELSE
72 | 0
73 | END) as sentiment
74 | from (SELECT pgml.transform(
75 | task => 'text-classification',
76 | inputs => ARRAY[
77 | 'Why is the wait SO LONG!' ]
78 | ) as positivity) text_classification;
79 | ```
80 |
81 |
82 | ---------------------------
83 |
84 |
85 | Start Http
86 |
87 |
88 | ```shell
89 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=feedback --server.port=8094 --spring.cloud.stream.bindings.output.destination=customers.input.feedback
90 | ```
91 |
92 |
93 | Start Processor Text Summary
94 |
95 | ```shell
96 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.input.feedback --spring.cloud.stream.bindings.output.destination=customers.output.feedback.summary --spring.config.import=optional:file://$PWD/applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor
97 | ```
98 | Start Processor Text sentiment
99 |
100 | ```shell
101 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.output.feedback.summary --spring.cloud.stream.bindings.output.destination=customers.output.feedback.sentiment --spring.config.import=optional:file://$PWD/applications/processors/postgres-query-processor/src/main/resources/sentiment-analysis.yml --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor
102 | ```
103 |
104 |
105 |
106 | Start Sink
107 |
108 |
109 | ```shell
110 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.cloud.stream.bindings.input.destination=customers.output.feedback.sentiment --spring.config.import=optional:file://$PWD/applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis.yml --spring.cloud.stream.bindings.input.group=postgres-sink
111 | ```
112 |
113 |
114 | ```shell
115 | curl -X 'POST' \
116 | 'http://localhost:8094/feedback' \
117 | -H 'accept: */*' \
118 | -H 'Content-Type: application/json' \
119 | -d '{
120 | "id" : "F001",
121 | "email" : "jmatthews@email",
122 | "feedback" : "Hello my name is John Smith. I am long time customer. It seems that every time I call the help desk there is a very long wait. Then when I following get someone on the line, I have the repeat to repeat the process of the provide the details. This is very disappointing."
123 | }'
124 | ```
125 |
126 |
127 | In psql
128 |
129 | ```sql
130 | select * from customer.feedback;
131 |
132 | ```
--------------------------------------------------------------------------------
/docs/03_03_Building API Data Pipeline Postgres Sink.md:
--------------------------------------------------------------------------------
1 | # Prequisite
2 |
3 | ```shell
4 | docker network create data-pipelines
5 | ```
6 |
7 | - Run RabbitMQ
8 | ```shell
9 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management
10 | ```
11 |
12 | Run the Http Source
13 | ```shell
14 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=customers --server.port=8080 --spring.cloud.stream.bindings.output.destination=customers.intake
15 | ```
16 |
17 | Start Postgres
18 |
19 | ```shell
20 | docker run --name postgres --network data-pipelines --rm \
21 | -e POSTGRES_USER=postgres \
22 | -e POSTGRES_PASSWORD=postgres \
23 | -e POSTGRES_DB=postgres \
24 | -p 5432:5432 \
25 | -it postgres
26 | ```
27 |
28 |
29 | psql
30 |
31 | ```shell
32 | docker exec -it postgres psql -U postgres
33 | ```
34 |
35 | ```sql
36 | create schema customer;
37 |
38 | create table customer.customers(
39 | first_nm text NOT NULL,
40 | last_nm text NOT NULL,
41 | email text NOT NULL,
42 | phone text ,
43 | address text NOT NULL,
44 | city text ,
45 | state text ,
46 | zip text NOT NULL,
47 | PRIMARY KEY (email)
48 | );
49 | ```
50 |
51 | Build application
52 |
53 | ```shell
54 | mvn package
55 | ```
56 |
57 | See
58 |
59 | [PostgresConsumer.java](../applications/sinks/postgres-sink/src/main/java/ai/data/pipeline/spring/sink/PostgresConsumer.java)
60 |
61 | ```shell
62 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --sql.consumer.sql="insert into customer.customers(email,first_nm,last_nm,phone,address,city,state,zip) values (:email,:firstName,:lastName,:phone, :address,:city,:state,:zip) on CONFLICT (email) DO UPDATE SET first_nm = :firstName, last_nm = :lastName, phone = :phone, address = :address, city = :city, state = :state, zip = :zip" --spring.cloud.stream.bindings.input.destination=customers.intake
63 | ```
64 |
65 |
66 |
67 | ## Testing
68 |
69 |
70 | ```shell
71 | curl -X 'POST' \
72 | 'http://localhost:8080/customers' \
73 | -H 'accept: */*' \
74 | -H 'Content-Type: application/json' \
75 | -d '{
76 | "email" : "email@email",
77 | "firstName" : "Josiah",
78 | "lastName" : "Imani",
79 | "phone" : "555-555-5555",
80 | "address" : "12 Straight St",
81 | "city" : "gold",
82 | "state": "ny",
83 | "zip": "55555"
84 | }'
85 | ```
86 |
87 | ```json
88 | {
89 | "email" : "email@email",
90 | "firstName" : "Josiah",
91 | "lastName" : "Imani",
92 | "phone" : "555-555-5555",
93 | "address" : "12 Straight St",
94 | "city" : "gold",
95 | "state": "ny",
96 | "zip": "55555"
97 | }
98 |
99 | ```
100 |
101 |
102 | In psql
103 |
104 | ```sql
105 | select * from customer.customers;
106 |
107 | ```
108 |
109 |
110 | ```shell
111 | curl -X 'POST' \
112 | 'http://localhost:8080/customers' \
113 | -H 'accept: */*' \
114 | -H 'Content-Type: application/json' \
115 | -d '{
116 | "firstName" : "Jill",
117 | "lastName" : "Smith",
118 | "email" : "jsmith@email",
119 | "phone" : "155-555-5555",
120 | "address" : "2 Straight St",
121 | "city" : "gold",
122 | "state": "ny",
123 | "zip": "55555"
124 | }'
125 | ```
126 | ```sql
127 | select * from customer.customers;
128 | ```
129 |
130 |
131 | Update Jill's phone
132 |
133 | ```shell
134 | curl -X 'POST' \
135 | 'http://localhost:8080/customers' \
136 | -H 'accept: */*' \
137 | -H 'Content-Type: application/json' \
138 | -d '{
139 | "firstName" : "Jill",
140 | "lastName" : "Smith",
141 | "email" : "jsmith@email",
142 | "phone" : "222-222-2222",
143 | "address" : "2 Straight St",
144 | "city" : "gold",
145 | "state": "ny",
146 | "zip": "55555"
147 | }'
148 | ```
149 |
150 | ```sql
151 | select * from customer.customers;
152 | ```
153 |
154 | Add another customer Jack Smith
155 | ```shell
156 | curl -X 'POST' \
157 | 'http://localhost:8080/customers' \
158 | -H 'accept: */*' \
159 | -H 'Content-Type: application/json' \
160 | -d '{
161 | "firstName" : "Jack",
162 | "lastName" : "Smith",
163 | "email" : "jacksmith@email",
164 | "phone" : "255-555-5555",
165 | "address" : "255 Straight St",
166 | "city" : "gold",
167 | "state": "ny",
168 | "zip": "55555"
169 | }'
170 | ```
171 |
172 | ```sql
173 | select * from customer.customers;
174 | ```
175 | Change Jack Smith Information: ex: address 333 Straight St"
176 |
177 | ```shell
178 | curl -X 'POST' \
179 | 'http://localhost:8080/customers' \
180 | -H 'accept: */*' \
181 | -H 'Content-Type: application/json' \
182 | -d '{
183 | "firstName" : "Jack",
184 | "lastName" : "Smith",
185 | "email" : "jacksmith@email",
186 | "phone" : "255-555-5555",
187 | "address" : "333 Straight St",
188 | "city" : "silver",
189 | "state": "ny",
190 | "zip": "23232"
191 | }'
192 | ```
193 |
194 |
195 | ```sql
196 | select * from customer.customers;
197 | ```
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | org.springframework.boot
7 | spring-boot-starter-parent
8 | 3.5.3
9 |
10 |
11 | ai.data.pipelines.spring
12 | ai-sentiment-rag-processor
13 | 0.0.1-SNAPSHOT
14 | ai-sentiment-rag-processor
15 | ai-sentiment-rag-processor
16 |
17 | 21
18 | 1.0.0
19 | 2025.0.0
20 | 21
21 | 21
22 |
23 |
24 |
25 | org.projectlombok
26 | lombok
27 | true
28 |
29 |
30 | org.springframework.boot
31 | spring-boot-configuration-processor
32 | true
33 |
34 |
35 | org.springframework.boot
36 | spring-boot-starter-actuator
37 |
38 |
39 | org.springframework.boot
40 | spring-boot-starter-amqp
41 |
42 |
43 | org.springframework.ai
44 | spring-ai-starter-model-ollama
45 |
46 |
47 |
48 | org.springframework.amqp
49 | spring-rabbit-stream
50 |
51 |
52 | org.springframework.cloud
53 | spring-cloud-stream
54 |
55 |
56 | org.springframework.cloud
57 | spring-cloud-stream-binder-rabbit
58 |
59 |
60 |
61 | com.github.nyla-solutions
62 | nyla.solutions.core
63 | 2.3.1
64 |
65 |
66 | org.springframework.boot
67 | spring-boot-starter-test
68 | test
69 |
70 |
71 | org.springframework.amqp
72 | spring-rabbit-test
73 | test
74 |
75 |
76 |
77 |
78 | org.springframework.ai
79 | spring-ai-advisors-vector-store
80 |
81 |
82 | org.springframework.ai
83 | spring-ai-starter-vector-store-pgvector
84 |
85 |
86 |
87 | org.springframework.cloud
88 | spring-cloud-stream-test-binder
89 | test
90 |
91 |
92 |
93 |
94 |
95 |
96 | org.springframework.cloud
97 | spring-cloud-dependencies
98 | ${spring-cloud.version}
99 | pom
100 | import
101 |
102 |
103 | org.springframework.ai
104 | spring-ai-bom
105 | ${spring-ai.version}
106 | pom
107 | import
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 | org.apache.maven.plugins
116 | maven-compiler-plugin
117 |
118 |
119 |
120 | org.projectlombok
121 | lombok
122 |
123 |
124 |
125 |
126 |
127 | org.springframework.boot
128 | spring-boot-maven-plugin
129 |
130 |
131 |
132 | org.projectlombok
133 | lombok
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
--------------------------------------------------------------------------------
/applications/processors/postgres-query-processor/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | 4.0.0
5 |
6 | org.springframework.boot
7 | spring-boot-starter-parent
8 | 3.4.5
9 |
10 |
11 | ai.data.pipelines.spring
12 | postgres-query-processor
13 | 0.0.1-SNAPSHOT
14 | postgres-query-processor
15 | postgres-query-processor
16 |
17 | 21
18 | 2024.0.1
19 | 21
20 | 21
21 |
22 |
23 |
24 | org.projectlombok
25 | lombok
26 | true
27 |
28 |
29 | org.springframework.boot
30 | spring-boot-configuration-processor
31 | true
32 |
33 |
34 | org.springframework.boot
35 | spring-boot-starter-actuator
36 |
37 |
38 | org.springframework.boot
39 | spring-boot-starter-amqp
40 |
41 |
42 | org.springframework.boot
43 | spring-boot-starter-data-jdbc
44 |
45 |
46 | org.springframework.boot
47 | spring-boot-starter-data-jpa
48 |
49 |
50 | org.springframework.amqp
51 | spring-rabbit-stream
52 |
53 |
54 | org.springframework.cloud
55 | spring-cloud-stream
56 |
57 |
58 | org.springframework.cloud
59 | spring-cloud-stream-binder-rabbit
60 |
61 |
62 |
63 | com.h2database
64 | h2
65 | test
66 |
67 |
68 | com.github.nyla-solutions
69 | nyla.solutions.core
70 | 2.2.4
71 |
72 |
73 |
74 | org.postgresql
75 | postgresql
76 | runtime
77 |
78 |
79 |
80 | org.springframework.boot
81 | spring-boot-starter-test
82 | test
83 |
84 |
85 |
86 | org.springframework.amqp
87 | spring-rabbit-test
88 | test
89 |
90 |
91 | org.springframework.cloud
92 | spring-cloud-stream-test-binder
93 | test
94 |
95 |
96 |
97 |
98 |
99 | org.springframework.cloud
100 | spring-cloud-dependencies
101 | ${spring-cloud.version}
102 | pom
103 | import
104 |
105 |
106 |
107 |
108 |
109 |
110 | org.apache.maven.plugins
111 | maven-compiler-plugin
112 |
113 |
114 |
115 | org.projectlombok
116 | lombok
117 |
118 |
119 |
120 |
121 |
122 | org.springframework.boot
123 | spring-boot-maven-plugin
124 |
125 |
126 |
127 | org.projectlombok
128 | lombok
129 |
130 |
131 |
132 |
133 |
134 | org.springframework.cloud
135 | spring-cloud-dataflow-apps-metadata-plugin
136 |
137 | true
138 |
139 |
140 |
141 | aggregate-metadata
142 | compile
143 |
144 | aggregate-metadata
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
--------------------------------------------------------------------------------
/docs/04_03_Building a Text Summarization AI Data Pipeline.md:
--------------------------------------------------------------------------------
1 | Run RabbitMQ
2 |
3 | ```shell
4 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management
5 | ```
6 |
7 | Run PostgresML
8 |
9 | ```shell
10 | docker run --rm --name postgresml \
11 | -it \
12 | --network data-pipelines \
13 | -v postgresml_data:/var/lib/postgresql \
14 | -p 6432:5432 \
15 | -p 8000:8000 \
16 | ghcr.io/postgresml/postgresml:2.10.0 \
17 | sudo -u postgresml psql -d postgresml
18 | ```
19 |
20 |
21 | Run Postgres
22 |
23 | ```shell
24 | docker run --name postgresql --network data-pipelines --rm -e POSTGRES_USERNAME=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRESQL_DATABASE=postgres -p 5432:5432 postgres:latest
25 | ```
26 |
27 | Connect to postgres
28 | ```shell
29 | docker exec -it postgresql psql -U postgres
30 | ```
31 |
32 |
33 | ```sql
34 |
35 | create schema if not exists customer;
36 |
37 | create table customer.feedback(
38 | feed_id text NOT NULL,
39 | email text NOT NULL,
40 | user_feedback text NOT NULL,
41 | summary text NOT NULL,
42 | PRIMARY KEY (feed_id)
43 | );
44 | ```
45 |
46 |
47 | ---------------------------
48 |
49 |
50 |
51 | Start Http
52 |
53 |
54 | ```shell
55 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=feedback --server.port=8093 --spring.cloud.stream.bindings.output.destination=customers.input.feedback
56 | ```
57 |
58 |
59 |
60 |
61 | Processor text summarization
62 |
63 | ```shell
64 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.password=postgres --spring.cloud.stream.bindings.input.destination=customers.input.feedback --spring.cloud.stream.bindings.output.destination=customers.output.feedback --spring.config.import=optional:file://$PWD/applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor
65 | ```
66 |
67 | See
68 |
69 | - [text-summarization.yml](../applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml)
70 | - [QueryFunctionProcessor.java](../applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/processors/QueryFunctionProcessor.java)
71 |
72 |
73 |
74 | Start Sink
75 |
76 |
77 | ```shell
78 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.cloud.stream.bindings.input.destination=customers.output.feedback --spring.config.import=optional:file://$PWD/applications/sinks/postgres-sink/src/main/resources/postgres-text-summarization.yml --spring.cloud.stream.bindings.input.group=postgres-sink
79 | ```
80 | See
81 |
82 | - [postgres-text-summarization.yml](../applications/sinks/postgres-sink/src/main/resources/postgres-text-summarization.yml)
83 | - [postgres-text-summarization.yml](../applications/sinks/postgres-sink/src/main/resources/postgres-text-summarization.yml)
84 |
85 |
86 |
87 | ```shell
88 | curl -X 'POST' \
89 | 'http://localhost:8093/feedback' \
90 | -H 'accept: */*' \
91 | -H 'Content-Type: application/json' \
92 | -d '{
93 | "id" : "F001",
94 | "email" : "jmatthews@email",
95 | "feedback" : "I am really disappointed with the wait time I experienced when trying to reach Customer Service. I was on hold for over 40 minutes just to speak with someone about a simple issue with my account. It’s frustrating and honestly unacceptable. If your company values customer satisfaction, you seriously need to hire more reps or improve your response time. I do not have time to sit around waiting all day."
96 | }'
97 | ```
98 |
99 |
100 | ```shell
101 | curl -X 'POST' \
102 | 'http://localhost:8093/feedback' \
103 | -H 'accept: */*' \
104 | -H 'Content-Type: application/json' \
105 | -d '{
106 | "id" : "F002",
107 | "email" : "jmatthews@email",
108 | "feedback" : "I just wanted to take a moment to recognize the exceptional professionalism of your customer service team. The representative I spoke with was courteous, knowledgeable, and incredibly patient while helping me resolve my issue. It’s rare to find such a high level of service these days, and it truly made a difference in my experience. Kudos to your team!"
109 | }'
110 | ```
111 |
112 |
113 | ```shell
114 | curl -X 'POST' \
115 | 'http://localhost:8093/feedback' \
116 | -H 'accept: */*' \
117 | -H 'Content-Type: application/json' \
118 | -d '{
119 | "id" : "F003",
120 | "email" : "jmatthews@email",
121 | "feedback" : "I am getting really frustrated with having to repeat who I am and explain my issue every time I am transferred to another representative. It is like no one talks to each other or takes notes. I had to give my name, account number, and explain the entire problem three different times during one call. It’s exhausting and makes the whole experience feel disorganized. There has to be a better way to handle this"
122 | }'
123 | ```
124 |
125 |
126 | In psql
127 |
128 | ```sql
129 | select feed_id,summary from customer.feedback;
130 | ```
131 |
132 | ```sql
133 | select LENGTH(summary) sum_len, LENGTH(user_feedback) org_len from customer.feedback;
134 | ```
135 |
--------------------------------------------------------------------------------
/docs/05_02_Text Sentiment Analysis Data Pipeline with Spring AI.md:
--------------------------------------------------------------------------------
1 | Run Rabbit
2 |
3 | ```shell
4 | docker network create data-pipeline
5 | ```
6 |
7 | start rabbitmq
8 | ```shell
9 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management
10 | ```
11 |
12 |
13 | Run Postgres
14 |
15 | ```shell
16 | docker run --name postgres --network data-pipelines --rm \
17 | -e POSTGRES_USER=postgres \
18 | -e POSTGRES_PASSWORD=postgres \
19 | -e POSTGRES_DB=postgres \
20 | -p 5432:5432 \
21 | -it postgres
22 | ```
23 |
24 | ```shell
25 | docker exec -it postgres psql -U postgres
26 | ```
27 |
28 |
29 | ```shell
30 | create schema if not exists customer ;
31 |
32 | create table customer.feedback(
33 | feed_id text NOT NULL,
34 | email text NOT NULL,
35 | user_feedback text NOT NULL,
36 | summary text NOT NULL,
37 | feedback_dt timestamp NOT NULL DEFAULT NOW(),
38 | sentiment text NOT NULL,
39 | PRIMARY KEY (feed_id)
40 | );
41 | ```
42 |
43 | Run PostgresML (used by the text summary processor)
44 |
45 | ```shell
46 | docker run --rm --name postgresml \
47 | -it \
48 | --network data-pipelines \
49 | -v postgresml_data:/var/lib/postgresql \
50 | -p 6432:5432 \
51 | -p 8000:8000 \
52 | ghcr.io/postgresml/postgresml:2.10.0 \
53 | sudo -u postgresml psql -d postgresml
54 | ```
55 |
56 |
57 | Start Ollama
58 |
59 | ```shell
60 | ollama serve
61 | ```
62 |
63 | pull and run a model like this:
64 |
65 | ```shell
66 | ollama run llama3
67 | ```
68 |
69 | Test with llama3 model with the following
70 |
71 | ```shell
72 | Analyze the sentiment of this text: "Hello my name is John Smith. I am long time customer. It seems that every time I call the help desk there is a very long wait . When I finally get someone on the line, I have the repeat the process of the provide my details.".
73 | Respond with only one word: Positive or Negative.
74 | ```
75 |
76 | ---------------------------
77 |
78 |
79 | Start Http
80 |
81 | ```shell
82 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=feedback --server.port=8094 --spring.cloud.stream.bindings.output.destination=customers.input.feedback
83 | ```
84 |
85 |
86 | Start Processor Text Summary
87 |
88 | ```shell
89 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.input.feedback --spring.cloud.stream.bindings.output.destination=customers.output.feedback.summary --spring.config.import=optional:file://$PWD/applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor
90 | ```
91 | Start Sentiment Analysis Processor
92 |
93 | ```shell
94 | java -jar applications/processors/ai-sentiment-processor/target/ai-sentiment-processor-0.0.1-SNAPSHOT.jar --spring.cloud.stream.bindings.input.destination=customers.output.feedback.summary --spring.cloud.stream.bindings.output.destination=customers.output.feedback.sentiment
95 | ```
96 |
97 | See [CustomerFeedbackSentimentProcessor.java](../applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessor.java)
98 |
99 | See [CustomerFeedback.java](../applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/domains/CustomerFeedback.java)
100 | See [FeedbackSentiment.java](../applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/domains/FeedbackSentiment.java)
101 |
102 | Added support for OLLAMA
103 | [pom.xml](../applications/processors/ai-sentiment-processor/pom.xml)
104 |
105 |
106 | Start Postgres Sink
107 |
108 |
109 | ```shell
110 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.cloud.stream.bindings.input.destination=customers.output.feedback.sentiment --spring.config.import=optional:file://$PWD/applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis-ollama.yml --spring.cloud.stream.bindings.input.group=postgres-sink
111 | ```
112 |
113 |
114 |
115 | ```shell
116 | curl -X 'POST' \
117 | 'http://localhost:8094/feedback' \
118 | -H 'accept: */*' \
119 | -H 'Content-Type: application/json' \
120 | -d '{
121 | "id" : "F001",
122 | "email" : "jmatthews@email",
123 | "feedback" : "Hello my name is John Smith. I am long time customer. It seems that every time I call the help desk there is a very long wait. Then when I following get someone on the line, I have the repeat to repeat the process of the provide the details. This is very disappointing."
124 | }'
125 | ```
126 |
127 |
128 | ```shell
129 | curl -X 'POST' \
130 | 'http://localhost:8094/feedback' \
131 | -H 'accept: */*' \
132 | -H 'Content-Type: application/json' \
133 | -d '{
134 | "id" : "F002",
135 | "email" : "jmatthews@email",
136 | "feedback" : "I am really disappointed with the wait time I experienced when trying to reach Customer Service. I was on hold for over 40 minutes just to speak with someone about a simple issue with my account. It’s frustrating and honestly unacceptable. If your company values customer satisfaction, you seriously need to hire more reps or improve your response time. I do not have time to sit around waiting all day."
137 | }'
138 | ```
139 |
140 |
141 | ```shell
142 | curl -X 'POST' \
143 | 'http://localhost:8094/feedback' \
144 | -H 'accept: */*' \
145 | -H 'Content-Type: application/json' \
146 | -d '{
147 | "id" : "F003",
148 | "email" : "jmatthews@email",
149 | "feedback" : "I just wanted to take a moment to recognize the exceptional professionalism of your customer service team. The representative I spoke with was courteous, knowledgeable, and incredibly patient while helping me resolve my issue. It’s rare to find such a high level of service these days, and it truly made a difference in my experience. Kudos to your team!"
150 | }'
151 | ```
152 |
153 |
154 | In psql
155 |
156 | ```sql
157 | select sentiment,summary from customer.feedback;
158 | ```
159 |
--------------------------------------------------------------------------------
/docs/03_04_Building API Data Pipeline Postgres Processor.md:
--------------------------------------------------------------------------------
1 | # Prequisite
2 |
3 | ```shell
4 | docker network create data-pipelines
5 | ```
6 |
7 | - Run RabbitMQ
8 | ```shell
9 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management
10 | ```
11 |
12 | Postgres
13 | ```shell
14 | docker run --name postgres --network data-pipelines --rm \
15 | -e POSTGRES_USER=postgres \
16 | -e POSTGRES_PASSWORD=postgres \
17 | -e POSTGRES_DB=postgres \
18 | -p 5432:5432 \
19 | -it postgres
20 | ```
21 |
22 |
23 | ```shell
24 | docker exec -it postgres psql -U postgres
25 | ```
26 |
27 |
28 | ```sql
29 |
30 | create schema customer;
31 |
32 | create table customer.customers(
33 | first_nm text NOT NULL,
34 | last_nm text NOT NULL,
35 | email text NOT NULL,
36 | phone text ,
37 | address text NOT NULL,
38 | city text ,
39 | state text ,
40 | zip text NOT NULL,
41 | PRIMARY KEY (email)
42 | );
43 |
44 |
45 | create table customer.phone_campaigns(
46 | phone text NOT NULL,
47 | last_nm text NOT NULL,
48 | first_nm text NOT NULL,
49 | email text NOT NULL,
50 | PRIMARY KEY (phone)
51 | );
52 | ```
53 |
54 | In Psql
55 |
56 | ```shell
57 | insert into customer.phone_campaigns(phone,first_nm,last_nm, email) values('555-555-5551','John','Matthews','jmatthews@email');
58 | insert into customer.phone_campaigns(phone,first_nm,last_nm, email) values('555-555-5552','Marcy','Love','mlove@email');
59 | ```
60 |
61 |
62 |
63 | ```sql
64 | SELECT (CASE WHEN LENGTH(cust.last_nm) > 0 THEN cust.last_nm ELSE pc.last_nm END) as lastname,
65 | (CASE WHEN LENGTH(cust.first_nm) > 0 THEN cust.first_nm ELSE pc.first_nm END) as firstname,
66 | (CASE WHEN LENGTH(cust.email) > 0 THEN cust.last_nm ELSE pc.email END) as email,
67 | cust.phone as phone,
68 | cust.address as address,
69 | cust.city as city,
70 | cust.state as state,
71 | cust.zip as zip
72 | FROM
73 | (select
74 | 'Entered last name' as last_nm,
75 | 'Entered first Name' as first_nm,
76 | 'Entered email' as email,
77 | '555-555-5551' as phone,
78 | 'Entered address' as address,
79 | 'Entered city' as city,
80 | 'Entered state' as state,
81 | 'Entered zip' as zip ) cust
82 | LEFT JOIN
83 | (select last_nm, first_nm, email, phone from customer.phone_campaigns) pc
84 | ON cust.phone = pc.phone;
85 | ```
86 |
87 |
88 | From Phone Campaign
89 |
90 | ```sql
91 | SELECT (CASE WHEN LENGTH(cust.last_nm) > 0 THEN cust.last_nm ELSE pc.last_nm END) as lastname,
92 | (CASE WHEN LENGTH(cust.first_nm) > 0 THEN cust.first_nm ELSE pc.first_nm END) as firstname,
93 | (CASE WHEN LENGTH(cust.email) > 0 THEN cust.last_nm ELSE pc.email END) as email,
94 | cust.phone as phone,
95 | cust.address as address,
96 | cust.city as city,
97 | cust.state as state,
98 | cust.zip as zip
99 | FROM
100 | (select
101 | '' as last_nm,
102 | '' as first_nm,
103 | '' as email,
104 | '555-555-5551' as phone,
105 | 'Entered address' as address,
106 | 'Entered city' as city,
107 | 'Entered state' as state,
108 | 'Entered zip' as zip ) cust
109 | LEFT JOIN
110 | (select last_nm, first_nm, email, phone from customer.phone_campaigns) pc
111 | ON cust.phone = pc.phone;
112 |
113 | ```
114 |
115 | ==================================
116 |
117 |
118 | Start Http
119 |
120 |
121 | ```shell
122 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=customers --server.port=8091 --spring.cloud.stream.bindings.output.destination=customers.input.formatting
123 | ```
124 |
125 |
126 | Start Processor customer formatting from a marketing campaign
127 |
128 | [QueryFunctionProcessor.java](../applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/processors/QueryFunctionProcessor.java)
129 |
130 | ```shell
131 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --query.processor.sql="select (CASE WHEN LENGTH(cust.last_nm) > 0 THEN cust.last_nm ELSE pc.last_nm END) as lastname, (CASE WHEN LENGTH(cust.first_nm) > 0 THEN cust.first_nm ELSE pc.first_nm END) as firstname, (CASE WHEN LENGTH(cust.email) > 0 THEN cust.last_nm ELSE pc.email END) as email, cust.phone as phone, cust.address as address, cust.city as city, cust.state as state, cust.zip as zip from (select :lastname as last_nm, :firstname as first_nm, :email as email, :phone as phone, :address as address, :city as city, :state as state, :zip as zip ) cust LEFT JOIN (select last_nm, first_nm, email, phone from customer.phone_campaigns) pc ON cust.phone = pc.phone" --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.input.formatting --spring.cloud.stream.bindings.output.destination=customers.output.formatting
132 | ```
133 |
134 | Start Sink
135 |
136 |
137 | ```shell
138 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --sql.consumer.sql="insert into customer.customers(email,first_nm,last_nm,phone,address,city,state,zip) values (:email,:firstname,:lastname,:phone, :address,:city,:state,:zip) on CONFLICT (email) DO UPDATE SET first_nm = :firstname, last_nm = :lastname, phone = :phone, address = :address, city = :city, state = :state, zip = :zip" --spring.cloud.stream.bindings.input.destination=customers.output.formatting
139 | ```
140 |
141 | ```shell
142 | curl -X 'POST' \
143 | 'http://localhost:8091/customers' \
144 | -H 'accept: */*' \
145 | -H 'Content-Type: application/json' \
146 | -d '{
147 | "email" : "",
148 | "firstname" : "",
149 | "lastname" : "",
150 | "phone" : "555-555-5551",
151 | "address" : "55 Straight St",
152 | "city" : "Richmond",
153 | "state": "VA",
154 | "zip": "23225"
155 | }'
156 | ```
157 |
158 |
159 | ```shell
160 | curl -X 'POST' \
161 | 'http://localhost:8091/customers' \
162 | -H 'accept: */*' \
163 | -H 'Content-Type: application/json' \
164 | -d '{
165 | "email" : "",
166 | "firstname" : "",
167 | "lastname" : "Love",
168 | "phone" : "555-555-5552",
169 | "address" : "452 Parkside Lane",
170 | "city" : "Denver",
171 | "state": "CO",
172 | "zip": "80235"
173 | }'
174 | ```
175 |
176 |
177 | In psql
178 |
179 | ```sql
180 | select * from customer.customers;
181 |
182 | ```
183 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | LinkedIn Learning Exercise Files License Agreement
2 | ==================================================
3 |
4 | This License Agreement (the "Agreement") is a binding legal agreement
5 | between you (as an individual or entity, as applicable) and LinkedIn
6 | Corporation (“LinkedIn”). By downloading or using the LinkedIn Learning
7 | exercise files in this repository (“Licensed Materials”), you agree to
8 | be bound by the terms of this Agreement. If you do not agree to these
9 | terms, do not download or use the Licensed Materials.
10 |
11 | 1. License.
12 | - a. Subject to the terms of this Agreement, LinkedIn hereby grants LinkedIn
13 | members during their LinkedIn Learning subscription a non-exclusive,
14 | non-transferable copyright license, for internal use only, to 1) make a
15 | reasonable number of copies of the Licensed Materials, and 2) make
16 | derivative works of the Licensed Materials for the sole purpose of
17 | practicing skills taught in LinkedIn Learning courses.
18 | - b. Distribution. Unless otherwise noted in the Licensed Materials, subject
19 | to the terms of this Agreement, LinkedIn hereby grants LinkedIn members
20 | with a LinkedIn Learning subscription a non-exclusive, non-transferable
21 | copyright license to distribute the Licensed Materials, except the
22 | Licensed Materials may not be included in any product or service (or
23 | otherwise used) to instruct or educate others.
24 |
25 | 2. Restrictions and Intellectual Property.
26 | - a. You may not to use, modify, copy, make derivative works of, publish,
27 | distribute, rent, lease, sell, sublicense, assign or otherwise transfer the
28 | Licensed Materials, except as expressly set forth above in Section 1.
29 | - b. Linkedin (and its licensors) retains its intellectual property rights
30 | in the Licensed Materials. Except as expressly set forth in Section 1,
31 | LinkedIn grants no licenses.
32 | - c. You indemnify LinkedIn and its licensors and affiliates for i) any
33 | alleged infringement or misappropriation of any intellectual property rights
34 | of any third party based on modifications you make to the Licensed Materials,
35 | ii) any claims arising from your use or distribution of all or part of the
36 | Licensed Materials and iii) a breach of this Agreement. You will defend, hold
37 | harmless, and indemnify LinkedIn and its affiliates (and our and their
38 | respective employees, shareholders, and directors) from any claim or action
39 | brought by a third party, including all damages, liabilities, costs and
40 | expenses, including reasonable attorneys’ fees, to the extent resulting from,
41 | alleged to have resulted from, or in connection with: (a) your breach of your
42 | obligations herein; or (b) your use or distribution of any Licensed Materials.
43 |
44 | 3. Open source. This code may include open source software, which may be
45 | subject to other license terms as provided in the files.
46 |
47 | 4. Warranty Disclaimer. LINKEDIN PROVIDES THE LICENSED MATERIALS ON AN “AS IS”
48 | AND “AS AVAILABLE” BASIS. LINKEDIN MAKES NO REPRESENTATION OR WARRANTY,
49 | WHETHER EXPRESS OR IMPLIED, ABOUT THE LICENSED MATERIALS, INCLUDING ANY
50 | REPRESENTATION THAT THE LICENSED MATERIALS WILL BE FREE OF ERRORS, BUGS OR
51 | INTERRUPTIONS, OR THAT THE LICENSED MATERIALS ARE ACCURATE, COMPLETE OR
52 | OTHERWISE VALID. TO THE FULLEST EXTENT PERMITTED BY LAW, LINKEDIN AND ITS
53 | AFFILIATES DISCLAIM ANY IMPLIED OR STATUTORY WARRANTY OR CONDITION, INCLUDING
54 | ANY IMPLIED WARRANTY OR CONDITION OF MERCHANTABILITY OR FITNESS FOR A
55 | PARTICULAR PURPOSE, AVAILABILITY, SECURITY, TITLE AND/OR NON-INFRINGEMENT.
56 | YOUR USE OF THE LICENSED MATERIALS IS AT YOUR OWN DISCRETION AND RISK, AND
57 | YOU WILL BE SOLELY RESPONSIBLE FOR ANY DAMAGE THAT RESULTS FROM USE OF THE
58 | LICENSED MATERIALS TO YOUR COMPUTER SYSTEM OR LOSS OF DATA. NO ADVICE OR
59 | INFORMATION, WHETHER ORAL OR WRITTEN, OBTAINED BY YOU FROM US OR THROUGH OR
60 | FROM THE LICENSED MATERIALS WILL CREATE ANY WARRANTY OR CONDITION NOT
61 | EXPRESSLY STATED IN THESE TERMS.
62 |
63 | 5. Limitation of Liability. LINKEDIN SHALL NOT BE LIABLE FOR ANY INDIRECT,
64 | INCIDENTAL, SPECIAL, PUNITIVE, CONSEQUENTIAL OR EXEMPLARY DAMAGES, INCLUDING
65 | BUT NOT LIMITED TO, DAMAGES FOR LOSS OF PROFITS, GOODWILL, USE, DATA OR OTHER
66 | INTANGIBLE LOSSES . IN NO EVENT WILL LINKEDIN'S AGGREGATE LIABILITY TO YOU
67 | EXCEED $100. THIS LIMITATION OF LIABILITY SHALL:
68 | - i. APPLY REGARDLESS OF WHETHER (A) YOU BASE YOUR CLAIM ON CONTRACT, TORT,
69 | STATUTE, OR ANY OTHER LEGAL THEORY, (B) WE KNEW OR SHOULD HAVE KNOWN ABOUT
70 | THE POSSIBILITY OF SUCH DAMAGES, OR (C) THE LIMITED REMEDIES PROVIDED IN THIS
71 | SECTION FAIL OF THEIR ESSENTIAL PURPOSE; AND
72 | - ii. NOT APPLY TO ANY DAMAGE THAT LINKEDIN MAY CAUSE YOU INTENTIONALLY OR
73 | KNOWINGLY IN VIOLATION OF THESE TERMS OR APPLICABLE LAW, OR AS OTHERWISE
74 | MANDATED BY APPLICABLE LAW THAT CANNOT BE DISCLAIMED IN THESE TERMS.
75 |
76 | 6. Termination. This Agreement automatically terminates upon your breach of
77 | this Agreement or termination of your LinkedIn Learning subscription. On
78 | termination, all licenses granted under this Agreement will terminate
79 | immediately and you will delete the Licensed Materials. Sections 2-7 of this
80 | Agreement survive any termination of this Agreement. LinkedIn may discontinue
81 | the availability of some or all of the Licensed Materials at any time for any
82 | reason.
83 |
84 | 7. Miscellaneous. This Agreement will be governed by and construed in
85 | accordance with the laws of the State of California without regard to conflict
86 | of laws principles. The exclusive forum for any disputes arising out of or
87 | relating to this Agreement shall be an appropriate federal or state court
88 | sitting in the County of Santa Clara, State of California. If LinkedIn does
89 | not act to enforce a breach of this Agreement, that does not mean that
90 | LinkedIn has waived its right to enforce this Agreement. The Agreement does
91 | not create a partnership, agency relationship, or joint venture between the
92 | parties. Neither party has the power or authority to bind the other or to
93 | create any obligation or responsibility on behalf of the other. You may not,
94 | without LinkedIn’s prior written consent, assign or delegate any rights or
95 | obligations under these terms, including in connection with a change of
96 | control. Any purported assignment and delegation shall be ineffective. The
97 | Agreement shall bind and inure to the benefit of the parties, their respective
98 | successors and permitted assigns. If any provision of the Agreement is
99 | unenforceable, that provision will be modified to render it enforceable to the
100 | extent possible to give effect to the parties’ intentions and the remaining
101 | provisions will not be affected. This Agreement is the only agreement between
102 | you and LinkedIn regarding the Licensed Materials, and supersedes all prior
103 | agreements relating to the Licensed Materials.
104 |
105 | Last Updated: March 2019
106 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/BatchConfig.java:
--------------------------------------------------------------------------------
1 | package ai.data.pipeline.spring.customer;
2 |
3 | import lombok.extern.slf4j.Slf4j;
4 | import org.springframework.batch.core.Job;
5 | import org.springframework.batch.core.Step;
6 | import org.springframework.batch.core.job.builder.JobBuilder;
7 | import org.springframework.batch.core.launch.JobLauncher;
8 | import org.springframework.batch.core.launch.support.RunIdIncrementer;
9 | import org.springframework.batch.core.launch.support.TaskExecutorJobLauncher;
10 | import org.springframework.batch.core.repository.JobRepository;
11 | import org.springframework.batch.core.repository.support.ResourcelessJobRepository;
12 | import org.springframework.batch.core.step.builder.StepBuilder;
13 | import org.springframework.batch.item.ItemProcessor;
14 | import org.springframework.batch.item.ItemReader;
15 | import org.springframework.batch.item.ItemWriter;
16 | import org.springframework.batch.item.database.BeanPropertyItemSqlParameterSourceProvider;
17 | import org.springframework.batch.item.database.JdbcBatchItemWriter;
18 | import org.springframework.batch.item.database.builder.JdbcBatchItemWriterBuilder;
19 | import org.springframework.batch.item.file.FlatFileItemReader;
20 | import org.springframework.batch.item.file.builder.FlatFileItemReaderBuilder;
21 | import org.springframework.beans.factory.annotation.Qualifier;
22 | import org.springframework.beans.factory.annotation.Value;
23 | import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
24 | import org.springframework.boot.autoconfigure.batch.BatchAutoConfiguration;
25 | import org.springframework.context.annotation.Bean;
26 | import org.springframework.context.annotation.Configuration;
27 | import org.springframework.core.io.Resource;
28 | import org.springframework.core.task.TaskExecutor;
29 | import org.springframework.transaction.PlatformTransactionManager;
30 | import ai.data.pipeline.spring.customer.domain.Customer;
31 | import ai.data.pipeline.spring.customer.mapper.CustomerFieldMapper;
32 |
33 | import javax.sql.DataSource;
34 |
35 | /**
36 | * @author Gregory Green
37 | *
38 | * Spring configuration for laucning the Spring batch application
39 | *
40 | */
41 | @Configuration
42 | @EnableAutoConfiguration(exclude = {BatchAutoConfiguration.class})
43 | @Slf4j
44 | public class BatchConfig {
45 |
46 | //Number of records to write to the database at a time
47 | @Value("${spring.batch.chuck.size:10}")
48 | private int chunkSize;
49 |
50 | private static final String saveSql = """
51 | insert into customer.customers(email,first_name,last_name,phone,address,city,state,zip)
52 | values (:contact.email,
53 | :firstName,
54 | :lastName,
55 | :contact.phone,
56 | :location.address,
57 | :location.city,
58 | :location.state,
59 | :location.zip)
60 | on CONFLICT (email)
61 | DO UPDATE SET first_name = :firstName,
62 | last_name = :lastName,
63 | phone = :contact.phone,
64 | address = :location.address,
65 | city = :location.city,
66 | state = :location.state,
67 | zip = :location.zip
68 | """;
69 |
70 | //The input CSV field
71 | @Value("${source.input.file.csv}")
72 | private Resource customerInputResource;
73 |
74 | //The name of the JOB
75 | private final static String jobName = "load-customer";
76 |
77 | /**
78 | * Create the step based on the provided reader, processor and writer
79 | * @param itemReader the customer record item reader
80 | * @param processor the process for each customer record
81 | * @param writer the database writer
82 | * @param jobRepository the Spring Batch job repository
83 | * @param transactionManager the transaction manager
84 | * @return the created step
85 | */
86 | @Bean
87 | public Step loadCustomerStep(ItemReader itemReader,
88 | ItemProcessor processor,
89 | ItemWriter writer,
90 | JobRepository jobRepository,
91 | PlatformTransactionManager transactionManager) {
92 | return new StepBuilder("loadCustomerStep", jobRepository)
93 | .chunk(chunkSize,transactionManager)
94 | .reader(itemReader)
95 | .processor(processor)
96 | .writer(writer)
97 | .build();
98 | }
99 |
100 | /**
101 | * Construct a reader to read the customer information from an CSV file
102 | * @param mapper the customer field mapp
103 | * @return the reader
104 | */
105 | @Bean
106 | public FlatFileItemReader reader(CustomerFieldMapper mapper) {
107 | return new FlatFileItemReaderBuilder()
108 | .name("customerItemReader")
109 | .resource(customerInputResource)
110 | .delimited()
111 | .names("id","firstName", "lastName","email"
112 | ,"phone","address","city","state"
113 | ,"zip"
114 | )
115 | .fieldSetMapper(mapper)
116 | .build();
117 | }
118 |
119 | /**
120 | * Construct a batch writer to insert customer records
121 | * @param dataSource the JDBC datasource
122 | * @return the JDBC writer
123 | */
124 | @Bean
125 | public JdbcBatchItemWriter writer(DataSource dataSource) {
126 |
127 | return new JdbcBatchItemWriterBuilder()
128 | .sql(saveSql)
129 | .itemSqlParameterSourceProvider(new BeanPropertyItemSqlParameterSourceProvider<>())
130 | .dataSource(dataSource)
131 | .build();
132 | }
133 |
134 |
135 | /**
136 | *
137 | * @param jobRepository the job
138 | * @param taskExecutor the task executor
139 | * @return the job launch
140 | */
141 | @Bean
142 | public JobLauncher batchJobLauncher(@Qualifier("jobRepository") JobRepository jobRepository,
143 | TaskExecutor taskExecutor) {
144 | var jobLauncher = new TaskExecutorJobLauncher();
145 | jobLauncher.setJobRepository(jobRepository);
146 | jobLauncher.setTaskExecutor(taskExecutor);
147 | return jobLauncher;
148 | }
149 |
150 | /**
151 | * Creates a Spring Job based on the given step
152 | * @param jobRepository the job repository provided by Spring Batch
153 | * @param step the Job step
154 | * @return the create job
155 | */
156 | @Bean
157 | public Job job(JobRepository jobRepository,
158 | Step step){
159 |
160 | return new JobBuilder(jobName+System.currentTimeMillis(),jobRepository)
161 | .incrementer(new RunIdIncrementer())
162 | .flow(step).end().build();
163 | }
164 |
165 |
166 | /**
167 | * Create a repository implementation that does not save batch information to the database.
168 | * This is used to simplify this example. Note: Saving information such as the status of the tables
169 | * is recommended for production use.
170 | *
171 | * @return the job repository
172 | */
173 | @Bean
174 | JobRepository jobRepository()
175 | {
176 | //return an in-memory job repository
177 | return new ResourcelessJobRepository();
178 |
179 | }
180 |
181 |
182 | }
183 |
--------------------------------------------------------------------------------
/applications/batching/customer-batch/mvnw.cmd:
--------------------------------------------------------------------------------
1 | <# : batch portion
2 | @REM ----------------------------------------------------------------------------
3 | @REM Licensed to the Apache Software Foundation (ASF) under one
4 | @REM or more contributor license agreements. See the NOTICE file
5 | @REM distributed with this work for additional information
6 | @REM regarding copyright ownership. The ASF licenses this file
7 | @REM to you under the Apache License, Version 2.0 (the
8 | @REM "License"); you may not use this file except in compliance
9 | @REM with the License. You may obtain a copy of the License at
10 | @REM
11 | @REM http://www.apache.org/licenses/LICENSE-2.0
12 | @REM
13 | @REM Unless required by applicable law or agreed to in writing,
14 | @REM software distributed under the License is distributed on an
15 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | @REM KIND, either express or implied. See the License for the
17 | @REM specific language governing permissions and limitations
18 | @REM under the License.
19 | @REM ----------------------------------------------------------------------------
20 |
21 | @REM ----------------------------------------------------------------------------
22 | @REM Apache Maven Wrapper startup batch script, version 3.3.2
23 | @REM
24 | @REM Optional ENV vars
25 | @REM MVNW_REPOURL - repo url base for downloading maven distribution
26 | @REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
27 | @REM MVNW_VERBOSE - true: enable verbose log; others: silence the output
28 | @REM ----------------------------------------------------------------------------
29 |
30 | @IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0)
31 | @SET __MVNW_CMD__=
32 | @SET __MVNW_ERROR__=
33 | @SET __MVNW_PSMODULEP_SAVE=%PSModulePath%
34 | @SET PSModulePath=
35 | @FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @(
36 | IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B)
37 | )
38 | @SET PSModulePath=%__MVNW_PSMODULEP_SAVE%
39 | @SET __MVNW_PSMODULEP_SAVE=
40 | @SET __MVNW_ARG0_NAME__=
41 | @SET MVNW_USERNAME=
42 | @SET MVNW_PASSWORD=
43 | @IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*)
44 | @echo Cannot start maven from wrapper >&2 && exit /b 1
45 | @GOTO :EOF
46 | : end batch / begin powershell #>
47 |
48 | $ErrorActionPreference = "Stop"
49 | if ($env:MVNW_VERBOSE -eq "true") {
50 | $VerbosePreference = "Continue"
51 | }
52 |
53 | # calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties
54 | $distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl
55 | if (!$distributionUrl) {
56 | Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties"
57 | }
58 |
59 | switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) {
60 | "maven-mvnd-*" {
61 | $USE_MVND = $true
62 | $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip"
63 | $MVN_CMD = "mvnd.cmd"
64 | break
65 | }
66 | default {
67 | $USE_MVND = $false
68 | $MVN_CMD = $script -replace '^mvnw','mvn'
69 | break
70 | }
71 | }
72 |
73 | # apply MVNW_REPOURL and calculate MAVEN_HOME
74 | # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/
75 | if ($env:MVNW_REPOURL) {
76 | $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" }
77 | $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')"
78 | }
79 | $distributionUrlName = $distributionUrl -replace '^.*/',''
80 | $distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$',''
81 | $MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain"
82 | if ($env:MAVEN_USER_HOME) {
83 | $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain"
84 | }
85 | $MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join ''
86 | $MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME"
87 |
88 | if (Test-Path -Path "$MAVEN_HOME" -PathType Container) {
89 | Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME"
90 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
91 | exit $?
92 | }
93 |
94 | if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) {
95 | Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl"
96 | }
97 |
98 | # prepare tmp dir
99 | $TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile
100 | $TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir"
101 | $TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null
102 | trap {
103 | if ($TMP_DOWNLOAD_DIR.Exists) {
104 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
105 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
106 | }
107 | }
108 |
109 | New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null
110 |
111 | # Download and Install Apache Maven
112 | Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
113 | Write-Verbose "Downloading from: $distributionUrl"
114 | Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
115 |
116 | $webclient = New-Object System.Net.WebClient
117 | if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) {
118 | $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD)
119 | }
120 | [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
121 | $webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null
122 |
123 | # If specified, validate the SHA-256 sum of the Maven distribution zip file
124 | $distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum
125 | if ($distributionSha256Sum) {
126 | if ($USE_MVND) {
127 | Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties."
128 | }
129 | Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash
130 | if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) {
131 | Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property."
132 | }
133 | }
134 |
135 | # unzip and move
136 | Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null
137 | Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null
138 | try {
139 | Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null
140 | } catch {
141 | if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) {
142 | Write-Error "fail to move MAVEN_HOME"
143 | }
144 | } finally {
145 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
146 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
147 | }
148 |
149 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
150 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-processor/mvnw.cmd:
--------------------------------------------------------------------------------
1 | <# : batch portion
2 | @REM ----------------------------------------------------------------------------
3 | @REM Licensed to the Apache Software Foundation (ASF) under one
4 | @REM or more contributor license agreements. See the NOTICE file
5 | @REM distributed with this work for additional information
6 | @REM regarding copyright ownership. The ASF licenses this file
7 | @REM to you under the Apache License, Version 2.0 (the
8 | @REM "License"); you may not use this file except in compliance
9 | @REM with the License. You may obtain a copy of the License at
10 | @REM
11 | @REM http://www.apache.org/licenses/LICENSE-2.0
12 | @REM
13 | @REM Unless required by applicable law or agreed to in writing,
14 | @REM software distributed under the License is distributed on an
15 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | @REM KIND, either express or implied. See the License for the
17 | @REM specific language governing permissions and limitations
18 | @REM under the License.
19 | @REM ----------------------------------------------------------------------------
20 |
21 | @REM ----------------------------------------------------------------------------
22 | @REM Apache Maven Wrapper startup batch script, version 3.3.2
23 | @REM
24 | @REM Optional ENV vars
25 | @REM MVNW_REPOURL - repo url base for downloading maven distribution
26 | @REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
27 | @REM MVNW_VERBOSE - true: enable verbose log; others: silence the output
28 | @REM ----------------------------------------------------------------------------
29 |
30 | @IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0)
31 | @SET __MVNW_CMD__=
32 | @SET __MVNW_ERROR__=
33 | @SET __MVNW_PSMODULEP_SAVE=%PSModulePath%
34 | @SET PSModulePath=
35 | @FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @(
36 | IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B)
37 | )
38 | @SET PSModulePath=%__MVNW_PSMODULEP_SAVE%
39 | @SET __MVNW_PSMODULEP_SAVE=
40 | @SET __MVNW_ARG0_NAME__=
41 | @SET MVNW_USERNAME=
42 | @SET MVNW_PASSWORD=
43 | @IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*)
44 | @echo Cannot start maven from wrapper >&2 && exit /b 1
45 | @GOTO :EOF
46 | : end batch / begin powershell #>
47 |
48 | $ErrorActionPreference = "Stop"
49 | if ($env:MVNW_VERBOSE -eq "true") {
50 | $VerbosePreference = "Continue"
51 | }
52 |
53 | # calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties
54 | $distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl
55 | if (!$distributionUrl) {
56 | Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties"
57 | }
58 |
59 | switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) {
60 | "maven-mvnd-*" {
61 | $USE_MVND = $true
62 | $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip"
63 | $MVN_CMD = "mvnd.cmd"
64 | break
65 | }
66 | default {
67 | $USE_MVND = $false
68 | $MVN_CMD = $script -replace '^mvnw','mvn'
69 | break
70 | }
71 | }
72 |
73 | # apply MVNW_REPOURL and calculate MAVEN_HOME
74 | # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/
75 | if ($env:MVNW_REPOURL) {
76 | $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" }
77 | $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')"
78 | }
79 | $distributionUrlName = $distributionUrl -replace '^.*/',''
80 | $distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$',''
81 | $MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain"
82 | if ($env:MAVEN_USER_HOME) {
83 | $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain"
84 | }
85 | $MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join ''
86 | $MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME"
87 |
88 | if (Test-Path -Path "$MAVEN_HOME" -PathType Container) {
89 | Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME"
90 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
91 | exit $?
92 | }
93 |
94 | if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) {
95 | Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl"
96 | }
97 |
98 | # prepare tmp dir
99 | $TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile
100 | $TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir"
101 | $TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null
102 | trap {
103 | if ($TMP_DOWNLOAD_DIR.Exists) {
104 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
105 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
106 | }
107 | }
108 |
109 | New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null
110 |
111 | # Download and Install Apache Maven
112 | Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
113 | Write-Verbose "Downloading from: $distributionUrl"
114 | Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
115 |
116 | $webclient = New-Object System.Net.WebClient
117 | if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) {
118 | $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD)
119 | }
120 | [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
121 | $webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null
122 |
123 | # If specified, validate the SHA-256 sum of the Maven distribution zip file
124 | $distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum
125 | if ($distributionSha256Sum) {
126 | if ($USE_MVND) {
127 | Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties."
128 | }
129 | Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash
130 | if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) {
131 | Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property."
132 | }
133 | }
134 |
135 | # unzip and move
136 | Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null
137 | Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null
138 | try {
139 | Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null
140 | } catch {
141 | if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) {
142 | Write-Error "fail to move MAVEN_HOME"
143 | }
144 | } finally {
145 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
146 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
147 | }
148 |
149 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
150 |
--------------------------------------------------------------------------------
/applications/processors/ai-sentiment-rag-processor/mvnw.cmd:
--------------------------------------------------------------------------------
1 | <# : batch portion
2 | @REM ----------------------------------------------------------------------------
3 | @REM Licensed to the Apache Software Foundation (ASF) under one
4 | @REM or more contributor license agreements. See the NOTICE file
5 | @REM distributed with this work for additional information
6 | @REM regarding copyright ownership. The ASF licenses this file
7 | @REM to you under the Apache License, Version 2.0 (the
8 | @REM "License"); you may not use this file except in compliance
9 | @REM with the License. You may obtain a copy of the License at
10 | @REM
11 | @REM http://www.apache.org/licenses/LICENSE-2.0
12 | @REM
13 | @REM Unless required by applicable law or agreed to in writing,
14 | @REM software distributed under the License is distributed on an
15 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | @REM KIND, either express or implied. See the License for the
17 | @REM specific language governing permissions and limitations
18 | @REM under the License.
19 | @REM ----------------------------------------------------------------------------
20 |
21 | @REM ----------------------------------------------------------------------------
22 | @REM Apache Maven Wrapper startup batch script, version 3.3.2
23 | @REM
24 | @REM Optional ENV vars
25 | @REM MVNW_REPOURL - repo url base for downloading maven distribution
26 | @REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
27 | @REM MVNW_VERBOSE - true: enable verbose log; others: silence the output
28 | @REM ----------------------------------------------------------------------------
29 |
30 | @IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0)
31 | @SET __MVNW_CMD__=
32 | @SET __MVNW_ERROR__=
33 | @SET __MVNW_PSMODULEP_SAVE=%PSModulePath%
34 | @SET PSModulePath=
35 | @FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @(
36 | IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B)
37 | )
38 | @SET PSModulePath=%__MVNW_PSMODULEP_SAVE%
39 | @SET __MVNW_PSMODULEP_SAVE=
40 | @SET __MVNW_ARG0_NAME__=
41 | @SET MVNW_USERNAME=
42 | @SET MVNW_PASSWORD=
43 | @IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*)
44 | @echo Cannot start maven from wrapper >&2 && exit /b 1
45 | @GOTO :EOF
46 | : end batch / begin powershell #>
47 |
48 | $ErrorActionPreference = "Stop"
49 | if ($env:MVNW_VERBOSE -eq "true") {
50 | $VerbosePreference = "Continue"
51 | }
52 |
53 | # calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties
54 | $distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl
55 | if (!$distributionUrl) {
56 | Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties"
57 | }
58 |
59 | switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) {
60 | "maven-mvnd-*" {
61 | $USE_MVND = $true
62 | $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip"
63 | $MVN_CMD = "mvnd.cmd"
64 | break
65 | }
66 | default {
67 | $USE_MVND = $false
68 | $MVN_CMD = $script -replace '^mvnw','mvn'
69 | break
70 | }
71 | }
72 |
73 | # apply MVNW_REPOURL and calculate MAVEN_HOME
74 | # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/
75 | if ($env:MVNW_REPOURL) {
76 | $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" }
77 | $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')"
78 | }
79 | $distributionUrlName = $distributionUrl -replace '^.*/',''
80 | $distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$',''
81 | $MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain"
82 | if ($env:MAVEN_USER_HOME) {
83 | $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain"
84 | }
85 | $MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join ''
86 | $MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME"
87 |
88 | if (Test-Path -Path "$MAVEN_HOME" -PathType Container) {
89 | Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME"
90 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
91 | exit $?
92 | }
93 |
94 | if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) {
95 | Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl"
96 | }
97 |
98 | # prepare tmp dir
99 | $TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile
100 | $TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir"
101 | $TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null
102 | trap {
103 | if ($TMP_DOWNLOAD_DIR.Exists) {
104 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
105 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
106 | }
107 | }
108 |
109 | New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null
110 |
111 | # Download and Install Apache Maven
112 | Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
113 | Write-Verbose "Downloading from: $distributionUrl"
114 | Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
115 |
116 | $webclient = New-Object System.Net.WebClient
117 | if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) {
118 | $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD)
119 | }
120 | [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
121 | $webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null
122 |
123 | # If specified, validate the SHA-256 sum of the Maven distribution zip file
124 | $distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum
125 | if ($distributionSha256Sum) {
126 | if ($USE_MVND) {
127 | Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties."
128 | }
129 | Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash
130 | if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) {
131 | Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property."
132 | }
133 | }
134 |
135 | # unzip and move
136 | Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null
137 | Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null
138 | try {
139 | Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null
140 | } catch {
141 | if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) {
142 | Write-Error "fail to move MAVEN_HOME"
143 | }
144 | } finally {
145 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
146 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
147 | }
148 |
149 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
150 |
--------------------------------------------------------------------------------
/applications/processors/postgres-embedding-similarity-processor/mvnw.cmd:
--------------------------------------------------------------------------------
1 | <# : batch portion
2 | @REM ----------------------------------------------------------------------------
3 | @REM Licensed to the Apache Software Foundation (ASF) under one
4 | @REM or more contributor license agreements. See the NOTICE file
5 | @REM distributed with this work for additional information
6 | @REM regarding copyright ownership. The ASF licenses this file
7 | @REM to you under the Apache License, Version 2.0 (the
8 | @REM "License"); you may not use this file except in compliance
9 | @REM with the License. You may obtain a copy of the License at
10 | @REM
11 | @REM http://www.apache.org/licenses/LICENSE-2.0
12 | @REM
13 | @REM Unless required by applicable law or agreed to in writing,
14 | @REM software distributed under the License is distributed on an
15 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 | @REM KIND, either express or implied. See the License for the
17 | @REM specific language governing permissions and limitations
18 | @REM under the License.
19 | @REM ----------------------------------------------------------------------------
20 |
21 | @REM ----------------------------------------------------------------------------
22 | @REM Apache Maven Wrapper startup batch script, version 3.3.2
23 | @REM
24 | @REM Optional ENV vars
25 | @REM MVNW_REPOURL - repo url base for downloading maven distribution
26 | @REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
27 | @REM MVNW_VERBOSE - true: enable verbose log; others: silence the output
28 | @REM ----------------------------------------------------------------------------
29 |
30 | @IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0)
31 | @SET __MVNW_CMD__=
32 | @SET __MVNW_ERROR__=
33 | @SET __MVNW_PSMODULEP_SAVE=%PSModulePath%
34 | @SET PSModulePath=
35 | @FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @(
36 | IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B)
37 | )
38 | @SET PSModulePath=%__MVNW_PSMODULEP_SAVE%
39 | @SET __MVNW_PSMODULEP_SAVE=
40 | @SET __MVNW_ARG0_NAME__=
41 | @SET MVNW_USERNAME=
42 | @SET MVNW_PASSWORD=
43 | @IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*)
44 | @echo Cannot start maven from wrapper >&2 && exit /b 1
45 | @GOTO :EOF
46 | : end batch / begin powershell #>
47 |
48 | $ErrorActionPreference = "Stop"
49 | if ($env:MVNW_VERBOSE -eq "true") {
50 | $VerbosePreference = "Continue"
51 | }
52 |
53 | # calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties
54 | $distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl
55 | if (!$distributionUrl) {
56 | Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties"
57 | }
58 |
59 | switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) {
60 | "maven-mvnd-*" {
61 | $USE_MVND = $true
62 | $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip"
63 | $MVN_CMD = "mvnd.cmd"
64 | break
65 | }
66 | default {
67 | $USE_MVND = $false
68 | $MVN_CMD = $script -replace '^mvnw','mvn'
69 | break
70 | }
71 | }
72 |
73 | # apply MVNW_REPOURL and calculate MAVEN_HOME
74 | # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/
75 | if ($env:MVNW_REPOURL) {
76 | $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" }
77 | $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')"
78 | }
79 | $distributionUrlName = $distributionUrl -replace '^.*/',''
80 | $distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$',''
81 | $MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain"
82 | if ($env:MAVEN_USER_HOME) {
83 | $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain"
84 | }
85 | $MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join ''
86 | $MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME"
87 |
88 | if (Test-Path -Path "$MAVEN_HOME" -PathType Container) {
89 | Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME"
90 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
91 | exit $?
92 | }
93 |
94 | if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) {
95 | Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl"
96 | }
97 |
98 | # prepare tmp dir
99 | $TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile
100 | $TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir"
101 | $TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null
102 | trap {
103 | if ($TMP_DOWNLOAD_DIR.Exists) {
104 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
105 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
106 | }
107 | }
108 |
109 | New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null
110 |
111 | # Download and Install Apache Maven
112 | Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
113 | Write-Verbose "Downloading from: $distributionUrl"
114 | Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
115 |
116 | $webclient = New-Object System.Net.WebClient
117 | if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) {
118 | $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD)
119 | }
120 | [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
121 | $webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null
122 |
123 | # If specified, validate the SHA-256 sum of the Maven distribution zip file
124 | $distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum
125 | if ($distributionSha256Sum) {
126 | if ($USE_MVND) {
127 | Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties."
128 | }
129 | Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash
130 | if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) {
131 | Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property."
132 | }
133 | }
134 |
135 | # unzip and move
136 | Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null
137 | Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null
138 | try {
139 | Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null
140 | } catch {
141 | if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) {
142 | Write-Error "fail to move MAVEN_HOME"
143 | }
144 | } finally {
145 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null }
146 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" }
147 | }
148 |
149 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD"
150 |
--------------------------------------------------------------------------------
/docs/05_05_Vector Similarity Data Pipeline with Spring AI and Postgres.md:
--------------------------------------------------------------------------------
1 | # Setup
2 | ```shell
3 | docker volume rm postgresml_data
4 | ```
5 |
6 |
7 | Run Rabbit
8 |
9 |
10 | ```shell
11 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management
12 | ```
13 |
14 |
15 | Run Postgres
16 |
17 | ```shell
18 | ```shell
19 | docker run --name postgres --network data-pipelines --rm \
20 | -e POSTGRES_USER=postgres \
21 | -e POSTGRES_PASSWORD=postgres \
22 | -e POSTGRES_DB=postgres \
23 | -p 5432:5432 \
24 | -it postgres
25 | ```
26 |
27 | ```shell
28 | docker exec -it postgres psql -U postgres
29 | ```
30 |
31 | I will create customer_similarities table.
32 |
33 | ```shell
34 | create schema if not exists customer ;
35 |
36 | create table customer.customer_similarities(
37 | customer_id text NOT NULL,
38 | similarities jsonb NOT NULL,
39 | PRIMARY KEY (customer_id)
40 | );
41 | ```
42 |
43 | Here I am using a similarites column with a special data type column.
44 | In the previous example, I was able to parse the JSON to store into invidual column (such as the email, first and last name).
45 |
46 | In this case, I wanted to show you that you can just store json natively into Postgres
47 | using the JSONB data type.
48 |
49 |
50 | Run PostgresML with PgVector
51 |
52 | ```shell
53 | docker run --rm --name postgresml \
54 | -it \
55 | --network data-pipeline \
56 | -v postgresml_data:/var/lib/postgresql \
57 | -p 6432:5432 \
58 | -p 8000:8000 \
59 | ghcr.io/postgresml/postgresml:2.10.0 \
60 | sudo -u postgresml psql -d postgresml
61 | ```
62 |
63 |
64 | ```sql
65 | CREATE EXTENSION vector;
66 | ```
67 |
68 |
69 | Here is an example similar search of a perfect match between 2 identical vectors
70 |
71 | ```sql
72 | SELECT 1- ('[1, 0, 0]' <=> '[1, 0, 0]')::float AS cosine_distance;
73 | ```
74 | - The <=> is a special syntax used by pgvector to apply the law of cosines
75 | - ::float converts the cosine results to a float
76 |
77 |
78 | Here is an example of not an exact match, but very similar vectors
79 |
80 | ```sql
81 | SELECT 1- ('[1, 1, 0]' <=> '[1, 1, 0.5]')::float AS cosine_distance;
82 | ```
83 |
84 | Here is one more Example pf 2 vector embeddings that are opposite of each other
85 |
86 | ```sql
87 | SELECT 1- ('[1, 1, 1]' <=> '[-1, -1, -1]')::float AS cosine_distance;
88 | ```
89 |
90 |
91 | ---------------------------
92 |
93 |
94 | Start Http
95 |
96 | ```shell
97 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=customers --server.port=8095 --spring.cloud.stream.bindings.output.destination=customers.similarities.input
98 | ```
99 |
100 |
101 | Start similarity processor
102 |
103 | ```shell
104 | java -jar applications/processors/postgres-embedding-similarity-processor/target/postgres-embedding-similarity-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.similarities.input --spring.cloud.stream.bindings.output.destination=customers.similarities.output --embedding.similarity.processor.topK=3 --embedding.similarity.processor.similarityThreshold="0.90" --embedding.similarity.processor.documentTextFieldNames="email,phone,zip,state,city,address,lastName,firstName" --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor
105 | ```
106 |
107 | See [EmbeddingSimilarityFunction.java](../applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/function/EmbeddingSimilarityFunction.java)
108 | - It is provided with a vector store that uses Postgres with the pgvector extension
109 | - It using an object to convert the payload to a Spring AI Document object
110 | - See [PayloadToDocument.java](../applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/conversion/PayloadToDocument.java)
111 | - fieldName text fields names are passed in a runtime.
112 | - So the vector save to fields such as email,phone,zip,state,city,address,lastName,firstName that a parsed from the JSON payload
113 | - The processor then builds the search criteria using the Spring AI abstraction.
114 | - This results the a limited number of "top" or best match results
115 | - Based on the customer information
116 | - I set a threshold, for example the match distance must be greater than 0.90
117 | - The list of results are converted to JSON
118 | - and returned the sink using RabbitMQ
119 |
120 |
121 | - See [SimilarDocuments.java](../applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/domain/SimilarDocuments.java)
122 |
123 |
124 |
125 | Start Sink
126 |
127 |
128 | ```shell
129 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.cloud.stream.bindings.input.destination="customers.similarities.output" --spring.config.import=optional:file://$PWD/applications/sinks/postgres-sink/src/main/resources/postgres-similarity.yml --spring.cloud.stream.bindings.input.group=postgres-sink
130 | ```
131 |
132 | See [postgres-similarity.yml](../applications/sinks/postgres-sink/src/main/resources/postgres-similarity.yml)
133 |
134 | ```shell
135 | curl -X 'POST' \
136 | 'http://localhost:8095/customers' \
137 | -H 'accept: */*' \
138 | -H 'Content-Type: application/json' \
139 | -d '{
140 | "id" : "email@email",
141 | "firstName" : "Josiah",
142 | "lastName" : "Imani",
143 | "email" : "email@email",
144 | "phone" : "555-555-5555",
145 | "address" : "12 Straight St",
146 | "city" : "gold",
147 | "state" : "ny",
148 | "zip": "55555"
149 | }'
150 | ```
151 |
152 |
153 |
154 |
155 |
156 | ```shell
157 | curl -X 'POST' \
158 | 'http://localhost:8095/customers' \
159 | -H 'accept: */*' \
160 | -H 'Content-Type: application/json' \
161 | -d ' {
162 | "id" : "duplicate1@email",
163 | "firstName" : "Josiah",
164 | "lastName" : "Imani",
165 | "email" : "duplicate1@email",
166 | "phone" : "555-555-5555",
167 | "address" : "12 Straight St",
168 | "city" : "gold",
169 | "state" : "ny",
170 | "zip": "55555"
171 | }'
172 | ```
173 |
174 | ----------------------
175 | In psql
176 |
177 | Now lets look at the results in customer similarities table.
178 |
179 | ```sql
180 | select *
181 | from customer.customer_similarities;
182 | ```
183 |
184 |
185 | The sink stores the similarities as a JSON array.
186 | If needed, I can use Postgres parse the records.
187 |
188 | ```sql
189 | select customer_id,
190 | jsonb_array_elements(similarities) ->>'id' as email,
191 | jsonb_array_elements(similarities) ->>'text' as text,
192 | jsonb_array_elements(similarities) ->>'score' as score,
193 | (jsonb_array_elements(similarities) ->>'metadata')::json ->> 'distance' as distance
194 | from customer.customer_similarities;
195 | ```
196 |
197 | The jsonb_array_elements function parse JSON array fields.
198 | So I can select the individual fields such as text and score from the JSONB column.
199 | Which is a nicer format
200 |
201 |
202 | The records in PostgresML vector_store database table are used by the processor search for duplicate records
203 | based on matching similaries.
204 |
205 | ```sql
206 | select id,content from public.vector_store ;
207 | ```
208 |
209 | Any additional customer details submitted to the data pipeline will check for matches in this table.
210 | Spring AI along with Postgres as a vector database hides the complexity of finding duplicate records.
211 |
212 |
213 |
--------------------------------------------------------------------------------
/docs/05_04_Text Sentiment Analysis Data Pipeline with Spring AI and RAG.md:
--------------------------------------------------------------------------------
1 |
2 | # Setup
3 | ```shell
4 | docker volume rm postgresml_data
5 | ```
6 |
7 |
8 | ----------
9 |
10 | Demo
11 |
12 | ```shell
13 | ollama serve
14 | ```
15 |
16 | pull and run a model like this:
17 |
18 | ```shell
19 | ollama run llama3
20 | ```
21 |
22 | ```text
23 | Analyze the sentiment of this text: "I REALLY REALLY LOVE LONG LINE". Respond with only one word: Positive or Negative.
24 |
25 | ```
26 |
27 | ```text
28 | Analyze the sentiment of this text: "I really love long wait".
29 | Respond with only one word: Positive, or Negative, taking into account the provided context.
30 |
31 | Context:
32 | I REALLY REALLY LOVE LONG LINE is a NEGATIVE sentiment
33 | ```
34 |
35 |
36 | ```text
37 | Analyze the sentiment of this text: "Sure, keep me waiting like I have all DAY".
38 | Respond with only one word: Positive, or Negative, taking into account the provided context.
39 |
40 | Context:
41 | I REALLY REALLY LOVE LONG LINE is a NEGATIVE sentiment
42 | ```
43 |
44 | ```text
45 | Analyze the sentiment of this text: "Your team is doing a great job to reduce long wait time".
46 | Respond with only one word: Positive, or Negative, taking into account the provided context.
47 |
48 | Context:
49 | I REALLY REALLY LOVE LONG LINE is a NEGATIVE sentiment
50 | ```
51 |
52 |
53 | ```text
54 | Analyze the sentiment of this text: "Oh great, another update that totally doesn’t break anything. Just what I needed.".
55 | Respond with only one word: Positive, or Negative, taking into account the provided context.
56 |
57 | Context:
58 | I REALLY REALLY LOVE LONG LINE is a NEGATIVE sentiment
59 | ```
60 |
61 |
62 | Run Rabbit
63 |
64 | ```shell
65 | docker network create data-pipeline
66 | ```
67 |
68 | start rabbitmq
69 | ```shell
70 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management
71 | ```
72 |
73 |
74 | Run Postgres
75 |
76 | ```shell
77 | docker run --name postgres --network data-pipelines --rm \
78 | -e POSTGRES_USER=postgres \
79 | -e POSTGRES_PASSWORD=postgres \
80 | -e POSTGRES_DB=postgres \
81 | -p 5432:5432 \
82 | -it postgres
83 | ```
84 |
85 | ```shell
86 | docker exec -it postgres psql -U postgres
87 | ```
88 |
89 |
90 | ```shell
91 | create schema if not exists customer ;
92 |
93 | create table customer.feedback(
94 | feed_id text NOT NULL,
95 | email text NOT NULL,
96 | user_feedback text NOT NULL,
97 | summary text NOT NULL,
98 | feedback_dt timestamp NOT NULL DEFAULT NOW(),
99 | sentiment text NOT NULL,
100 | PRIMARY KEY (feed_id)
101 | );
102 | ```
103 |
104 | Run PostgresML
105 |
106 | ```shell
107 | docker run --rm --name postgresml \
108 | -it \
109 | --network data-pipelines \
110 | -v postgresml_data:/var/lib/postgresql \
111 | -p 6432:5432 \
112 | -p 8000:8000 \
113 | ghcr.io/postgresml/postgresml:2.10.0 \
114 | sudo -u postgresml psql -d postgresml
115 | ```
116 |
117 | If you are installing Postgres on your own or if you have a different image other than the one that I have been using or you need to install the PGvector extension using this create extension statement
118 |
119 | PG vector it’s already configured in the PostgresMl docker image by default
120 |
121 |
122 | ```sql
123 | CREATE EXTENSION vector;
124 | ```
125 |
126 |
127 |
128 | Create tables with vector embeddings
129 |
130 | Here is an example to create table with a embedding data type column. This will allows you to store the embeddings array of numerical values that are produced from a model
131 |
132 | ```sql
133 | CREATE TABLE items
134 | (
135 | id bigserial PRIMARY KEY,
136 | embedding vector(3)
137 | );
138 |
139 | ```
140 |
141 | You can use insert and update statements providing an array of numbers to be saved into the appropriate database column
142 |
143 | ```sql
144 | INSERT INTO items
145 | (embedding)
146 | VALUES ('[1,2,3]'), ('[4,5,6]');
147 |
148 | ```
149 |
150 | This is a very basic Postgres vector database search that determines the distance
151 | between a provided embedding and the embedding value in the items table.
152 | The similarity is calculated based on the law of cosines.
153 | Cosine similarity is typically a value between -1 and 1. 1 is perfect match.
154 | You subtract 1 to convert the cosine similarity to the distance.
155 |
156 |
157 | ```sql
158 | SELECT 1 - (embedding <=> '[3,1,2]')
159 | AS cosine_similarity
160 | FROM items;
161 | ```
162 |
163 | Start Http
164 |
165 |
166 | ```shell
167 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=feedback --server.port=8094 --spring.cloud.stream.bindings.output.destination=customers.input.feedback
168 | ```
169 |
170 |
171 | Start Processor Text Summary
172 |
173 |
174 |
175 | ```shell
176 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.input.feedback --spring.cloud.stream.bindings.output.destination=customers.output.feedback.summary --spring.config.import=optional:file://$PWD/applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor
177 | ```
178 | Start Processor Text sentiment RAG
179 |
180 | ```shell
181 | java -jar applications/processors/ai-sentiment-rag-processor/target/ai-sentiment-rag-processor-0.0.1-SNAPSHOT.jar --spring.cloud.stream.bindings.input.destination=customers.output.feedback.summary --spring.cloud.stream.bindings.output.destination=customers.output.feedback.sentiment --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml"
182 | ```
183 |
184 |
185 |
186 |
187 |
188 | See [CustomerFeedbackSentimentProcessor.java](../applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessor.java)
189 | - Here the customer feedback processor accepts a CustomerFeedback object
190 | and returns teh FeedbackSentiment.
191 | - the prompt will ask the Model to determine the sentiment
192 | - The summary of the feedback is passed in at runtime
193 | - Spring AI converts the response to the Sentiment enum
194 | - What is different here is the advisor, is
195 | - I added an additional maven dependency to this processor
196 |
197 |
198 | See [pom.xml](../applications/processors/ai-sentiment-rag-processor/pom.xml)
199 | - This now has the **spring-ai-advisors-vector-store** which add the ability to use a vector database
200 | - **spring-ai-starter-vector-store-pgvector** using Postgres with the pgvector extension that is part of PostgresML
201 | - Now I am also using RAG
202 |
203 | See [VectorStoreConfig.java](../applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/VectorStoreConfig.java)
204 | - the CommandLineRunner is executed when the application is started.
205 | - It will load records into the vector database
206 |
207 | See [sentiment_rag_content.txt](../applications/processors/ai-sentiment-rag-processor/src/main/resources/sentiment_rag_content.txt)
208 | - this content is loaded to better detect sarcastic negative statements
209 |
210 | Start Sink
211 |
212 |
213 | ```shell
214 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.cloud.stream.bindings.input.destination=customers.output.feedback.sentiment --spring.config.import=optional:file://$PWD/applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis-ollama.yml --spring.cloud.stream.bindings.input.group=postgres-sink
215 | ```
216 |
217 |
218 |
219 | ```shell
220 | curl -X 'POST' \
221 | 'http://localhost:8094/feedback' \
222 | -H 'accept: */*' \
223 | -H 'Content-Type: application/json' \
224 | -d '{
225 | "id" : "S001",
226 | "email" : "jmatthews@email",
227 | "feedback" : "You know what. It is ok. I love being on hold FOREVER. I will just take my business somewhere else."
228 | }'
229 | ```
230 |
231 |
232 |
233 | ```shell
234 | curl -X 'POST' \
235 | 'http://localhost:8094/feedback' \
236 | -H 'accept: */*' \
237 | -H 'Content-Type: application/json' \
238 | -d '{
239 | "id" : "S002",
240 | "email" : "jmatthews@email",
241 | "feedback" : "I was transferred, and had to keep repeating the problem. They should be able to see notes in the SYSTEM. And WHY are the LINES SO LONG!!!."
242 | }'
243 | ```
244 |
245 |
246 | ```shell
247 | curl -X 'POST' \
248 | 'http://localhost:8094/feedback' \
249 | -H 'accept: */*' \
250 | -H 'Content-Type: application/json' \
251 | -d '{
252 | "id" : "S003",
253 | "email" : "graceful@email",
254 | "feedback" : "THANK YOU SO MUCH!!! I LOVE THAT YOU ARE TRYING your best."
255 | }'
256 | ```
257 |
258 |
259 | See [postgres-sentiment-analysis-ollama.yml](../applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis-ollama.yml)
260 | - Taking a closer look at the sink
261 | - Postgres has the ability to convert text to json
262 | - This is done with the ::json syntax
263 | - -> allows you to get elemements in the JSON object by its name
264 | - So this is an easy way to get the customer feedback details
265 | - to save to the table
266 |
267 |
268 |
269 | In psql
270 |
271 | ```sql
272 | select sentiment, summary from customer.feedback;
273 |
274 | ```
275 |
--------------------------------------------------------------------------------