├── applications ├── sinks │ └── postgres-sink │ │ ├── src │ │ ├── test │ │ │ ├── resources │ │ │ │ ├── application.yml │ │ │ │ └── customer.json │ │ │ └── java │ │ │ │ └── ai │ │ │ │ └── data │ │ │ │ └── pipeline │ │ │ │ └── spring │ │ │ │ └── sink │ │ │ │ ├── Customer.java │ │ │ │ └── PostgresConsumerTest.java │ │ └── main │ │ │ ├── resources │ │ │ ├── postgres-text-summarization.yml │ │ │ ├── postgres-similarity.yml │ │ │ ├── postgres-sentiment-analysis.yml │ │ │ ├── application.yml │ │ │ └── postgres-sentiment-analysis-ollama.yml │ │ │ └── java │ │ │ └── ai │ │ │ └── data │ │ │ └── pipeline │ │ │ └── spring │ │ │ ├── PostgresSinkApp.java │ │ │ ├── JdbcConfig.java │ │ │ ├── properties │ │ │ └── SqlConsumerProperties.java │ │ │ └── sink │ │ │ └── PostgresConsumer.java │ │ └── pom.xml ├── batching │ └── customer-batch │ │ ├── .gitattributes │ │ ├── src │ │ ├── main │ │ │ ├── resources │ │ │ │ ├── application.properties │ │ │ │ └── schema-postgres.sql │ │ │ └── java │ │ │ │ └── ai │ │ │ │ └── data │ │ │ │ └── pipeline │ │ │ │ └── spring │ │ │ │ └── customer │ │ │ │ ├── domain │ │ │ │ ├── Contact.java │ │ │ │ ├── Location.java │ │ │ │ └── Customer.java │ │ │ │ ├── CustomerBatchApp.java │ │ │ │ ├── CsvConfig.java │ │ │ │ ├── CommandLineConfig.java │ │ │ │ ├── mapper │ │ │ │ └── CustomerFieldMapper.java │ │ │ │ ├── processor │ │ │ │ └── MissingRequiredFieldsFilterProcessor.java │ │ │ │ └── BatchConfig.java │ │ └── test │ │ │ └── java │ │ │ └── ai │ │ │ └── data │ │ │ └── pipeline │ │ │ └── spring │ │ │ └── customer │ │ │ ├── CustomerBatchAppTests.java │ │ │ ├── mapper │ │ │ └── CustomerFieldMapperTest.java │ │ │ ├── CsvTextFileGenerator.java │ │ │ └── processor │ │ │ └── MissingRequiredFieldsFilterProcessorTest.java │ │ ├── .gitignore │ │ ├── .mvn │ │ └── wrapper │ │ │ └── maven-wrapper.properties │ │ ├── pom.xml │ │ └── mvnw.cmd └── processors │ ├── ai-sentiment-processor │ ├── .gitattributes │ ├── src │ │ ├── main │ │ │ ├── java │ │ │ │ └── ai │ │ │ │ │ └── data │ │ │ │ │ └── pipeline │ │ │ │ │ └── sentiment │ │ │ │ │ ├── domains │ │ │ │ │ ├── CustomerFeedback.java │ │ │ │ │ └── FeedbackSentiment.java │ │ │ │ │ ├── AiSentimentProcessorApp.java │ │ │ │ │ ├── ChatClientConfig.java │ │ │ │ │ └── processor │ │ │ │ │ └── CustomerFeedbackSentimentProcessor.java │ │ │ └── resources │ │ │ │ └── application.yml │ │ └── test │ │ │ └── java │ │ │ └── ai │ │ │ └── data │ │ │ └── pipeline │ │ │ └── sentiment │ │ │ └── processor │ │ │ └── CustomerFeedbackSentimentProcessorTest.java │ ├── .gitignore │ ├── .mvn │ │ └── wrapper │ │ │ └── maven-wrapper.properties │ ├── pom.xml │ └── mvnw.cmd │ ├── ai-sentiment-rag-processor │ ├── .gitattributes │ ├── src │ │ ├── main │ │ │ ├── resources │ │ │ │ ├── sentiment_rag_content.txt │ │ │ │ └── application.yml │ │ │ └── java │ │ │ │ └── ai │ │ │ │ └── data │ │ │ │ └── pipeline │ │ │ │ └── sentiment │ │ │ │ ├── domains │ │ │ │ ├── CustomerFeedback.java │ │ │ │ └── FeedbackSentiment.java │ │ │ │ ├── AiSentimentProcessorApp.java │ │ │ │ ├── ChatClientConfig.java │ │ │ │ ├── VectorStoreConfig.java │ │ │ │ └── processor │ │ │ │ └── CustomerFeedbackSentimentProcessor.java │ │ └── test │ │ │ └── java │ │ │ └── ai │ │ │ └── data │ │ │ └── pipeline │ │ │ └── sentiment │ │ │ └── processor │ │ │ └── CustomerFeedbackSentimentProcessorTest.java │ ├── .gitignore │ ├── .mvn │ │ └── wrapper │ │ │ └── maven-wrapper.properties │ ├── pom.xml │ └── mvnw.cmd │ ├── postgres-embedding-similarity-processor │ ├── .gitattributes │ ├── src │ │ ├── main │ │ │ ├── java │ │ │ │ └── ai │ │ │ │ │ └── data │ │ │ │ │ └── pipeline │ │ │ │ │ └── postgres │ │ │ │ │ └── embedding │ │ │ │ │ ├── domain │ │ │ │ │ └── SimilarDocuments.java │ │ │ │ │ ├── AppConfig.java │ │ │ │ │ ├── PostgresEmbeddingSimilarityApp.java │ │ │ │ │ ├── properties │ │ │ │ │ └── EmbeddingSimilarityProperties.java │ │ │ │ │ ├── conversion │ │ │ │ │ └── PayloadToDocument.java │ │ │ │ │ └── function │ │ │ │ │ └── EmbeddingSimilarityFunction.java │ │ │ └── resources │ │ │ │ └── application.yml │ │ └── test │ │ │ └── java │ │ │ └── ai │ │ │ └── data │ │ │ └── pipeline │ │ │ └── postgres │ │ │ └── embedding │ │ │ ├── conversion │ │ │ └── PayloadToDocumentTest.java │ │ │ └── function │ │ │ └── EmbeddingSimilarityFunctionTest.java │ ├── .gitignore │ ├── .mvn │ │ └── wrapper │ │ │ └── maven-wrapper.properties │ ├── pom.xml │ └── mvnw.cmd │ └── postgres-query-processor │ ├── src │ ├── test │ │ ├── resources │ │ │ └── application.yml │ │ └── java │ │ │ └── ai │ │ │ └── data │ │ │ └── pipeline │ │ │ └── spring │ │ │ └── postgres │ │ │ └── query │ │ │ └── processors │ │ │ └── QueryFunctionProcessorTest.java │ └── main │ │ ├── resources │ │ ├── text-summarization.yml │ │ ├── application.yml │ │ └── sentiment-analysis.yml │ │ └── java │ │ └── ai │ │ └── data │ │ └── pipeline │ │ └── spring │ │ └── postgres │ │ └── query │ │ ├── JdbcConfig.java │ │ ├── AppConfig.java │ │ ├── QueryProcessorProcessorApp.java │ │ ├── properties │ │ └── QueryProperties.java │ │ └── processors │ │ └── QueryFunctionProcessor.java │ └── pom.xml ├── .github ├── CODEOWNERS ├── PULL_REQUEST_TEMPLATE.md ├── workflows │ └── main.yml └── ISSUE_TEMPLATE.md ├── docs ├── 05_01_Introducing Spring AI.md ├── 03_02_API Spring Cloud Stream Source with RabbitMQ.md ├── 02_02_Spring Batch Data Pipeline with Postgres.md ├── 04_02_Introducing PostgresML.md ├── BONUS_Building a Text Sentimental Analysis AI Data Pipeline with PostgresML.md ├── 03_03_Building API Data Pipeline Postgres Sink.md ├── 04_03_Building a Text Summarization AI Data Pipeline.md ├── 05_02_Text Sentiment Analysis Data Pipeline with Spring AI.md ├── 03_04_Building API Data Pipeline Postgres Processor.md ├── 05_05_Vector Similarity Data Pipeline with Spring AI and Postgres.md └── 05_04_Text Sentiment Analysis Data Pipeline with Spring AI and RAG.md ├── .gitignore ├── CONTRIBUTING.md ├── NOTICE ├── pom.xml ├── README.md └── LICENSE /applications/sinks/postgres-sink/src/test/resources/application.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/.gitattributes: -------------------------------------------------------------------------------- 1 | /mvnw text eol=lf 2 | *.cmd text eol=crlf 3 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/.gitattributes: -------------------------------------------------------------------------------- 1 | /mvnw text eol=lf 2 | *.cmd text eol=crlf 3 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/.gitattributes: -------------------------------------------------------------------------------- 1 | /mvnw text eol=lf 2 | *.cmd text eol=crlf 3 | -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/.gitattributes: -------------------------------------------------------------------------------- 1 | /mvnw text eol=lf 2 | *.cmd text eol=crlf 3 | -------------------------------------------------------------------------------- /applications/processors/postgres-query-processor/src/test/resources/application.yml: -------------------------------------------------------------------------------- 1 | query.processor: 2 | sql: select 'world' as hello -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Codeowners for these exercise files: 2 | # * (asterisk) denotes "all files and folders" 3 | # Example: * @producer @instructor 4 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/src/main/resources/sentiment_rag_content.txt: -------------------------------------------------------------------------------- 1 | I REALLY REALLY LOVE LONG LINE is a NEGATIVE sentiment -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/domains/CustomerFeedback.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment.domains; 2 | 3 | public record CustomerFeedback(String id, String email, String feedback,String summary) { 4 | } 5 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/domains/CustomerFeedback.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment.domains; 2 | 3 | public record CustomerFeedback(String id, String email, String feedback,String summary) { 4 | } 5 | -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/test/resources/customer.json: -------------------------------------------------------------------------------- 1 | { 2 | "email" : "email@email", 3 | "firstName" : "Josiah", 4 | "lastName" : "Imani", 5 | "phone" : "555-555-5555", 6 | "address" : "12 Straight St", 7 | "city" : "gold", 8 | "state": "ny", 9 | "zip": "55555" 10 | } -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/main/resources/postgres-text-summarization.yml: -------------------------------------------------------------------------------- 1 | sql: 2 | consumer: 3 | sql: > 4 | insert into customer.feedback(feed_id,email,user_feedback,summary) values (:id,:email,:feedback,:summary) on CONFLICT (feed_id) DO UPDATE SET email = :email, user_feedback = :feedback, summary = :summary -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.application.name=customer-batch 2 | spring.datasource.username=postgres 3 | spring.datasource.url=jdbc:postgresql://localhost/postgres 4 | spring.datasource.driverClassName=org.postgresql.Driver 5 | spring.sql.init.mode=always 6 | spring.sql.init.platform=postgres -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/domain/Contact.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer.domain; 2 | 3 | import lombok.Builder; 4 | 5 | /** 6 | * @author Gregory Green 7 | * @param email the contact email 8 | * @param phone the contact phone 9 | */ 10 | @Builder 11 | public record Contact(String email, String phone) { 12 | } 13 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/test/java/ai/data/pipeline/spring/customer/CustomerBatchAppTests.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer; 2 | 3 | import org.junit.jupiter.api.Test; 4 | import org.springframework.boot.test.context.SpringBootTest; 5 | 6 | //@SpringBootTest 7 | class CustomerBatchAppTests { 8 | 9 | @Test 10 | void contextLoads() { 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Copy To Branches 2 | on: 3 | workflow_dispatch: 4 | jobs: 5 | copy-to-branches: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/checkout@v2 9 | with: 10 | fetch-depth: 0 11 | - name: Copy To Branches Action 12 | uses: planetoftheweb/copy-to-branches@v1.2 13 | env: 14 | key: main 15 | -------------------------------------------------------------------------------- /docs/05_01_Introducing Spring AI.md: -------------------------------------------------------------------------------- 1 | The details to install ollama can be fought here. 2 | 3 | https://ollama.com/download 4 | 5 | 6 | To start it type 7 | 8 | ```shell 9 | ollama serve 10 | ``` 11 | in a terminal 12 | 13 | 14 | You can pull the llama3 model to you local environment using the ollama run command with the name of the model 15 | 16 | ```shell 17 | ollama run llama3 18 | ``` 19 | -------------------------------------------------------------------------------- /applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml: -------------------------------------------------------------------------------- 1 | query: 2 | processor: 3 | sql: > 4 | SELECT 5 | :id as id, 6 | :email as email, 7 | :feedback as feedback, 8 | pgml.transform( task => '{ "task": "summarization", "model": "Falconsai/text_summarization"}'::JSONB, inputs => array[ :feedback])::json->0->>'summary_text' as summary; -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/main/resources/postgres-similarity.yml: -------------------------------------------------------------------------------- 1 | sql: 2 | consumer: 3 | sql: > 4 | insert into customer.customer_similarities 5 | ( 6 | customer_id, similarities ) 7 | values ( 8 | :id, :similaritiesPayload::json) 9 | on CONFLICT (customer_id) 10 | DO UPDATE SET 11 | similarities = :similaritiesPayload::json -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/domains/FeedbackSentiment.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment.domains; 2 | 3 | import lombok.Builder; 4 | 5 | 6 | 7 | @Builder 8 | public record FeedbackSentiment(Sentiment sentiment,CustomerFeedback customerFeedback) { 9 | public enum Sentiment{ 10 | Positive, 11 | Negative 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/JdbcConfig.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.postgres.query; 2 | 3 | import org.springframework.context.annotation.Configuration; 4 | import org.springframework.data.jdbc.repository.config.EnableJdbcRepositories; 5 | 6 | @Configuration 7 | @EnableJdbcRepositories 8 | public class JdbcConfig { 9 | } 10 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/domains/FeedbackSentiment.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment.domains; 2 | 3 | import lombok.Builder; 4 | 5 | 6 | 7 | @Builder 8 | public record FeedbackSentiment(Sentiment sentiment,CustomerFeedback customerFeedback) { 9 | public enum Sentiment{ 10 | Positive, 11 | Negative 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/domain/SimilarDocuments.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.postgres.embedding.domain; 2 | 3 | import lombok.Builder; 4 | import org.springframework.ai.document.Document; 5 | 6 | import java.util.List; 7 | 8 | @Builder 9 | public record SimilarDocuments(String id, String similaritiesPayload) { 10 | } 11 | -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/test/java/ai/data/pipeline/spring/sink/Customer.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.sink; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Builder; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | 8 | @Data 9 | @AllArgsConstructor 10 | @NoArgsConstructor 11 | @Builder 12 | public class Customer{ 13 | private String email; 14 | private String first_name; 15 | 16 | } 17 | -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/main/java/ai/data/pipeline/spring/PostgresSinkApp.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class PostgresSinkApp { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(PostgresSinkApp.class, args); 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | pom.xml.tag 3 | pom.xml.releaseBackup 4 | pom.xml.versionsBackup 5 | pom.xml.next 6 | runtime/ 7 | .idea/ 8 | release.properties 9 | dependency-reduced-pom.xml 10 | buildNumber.properties 11 | .mvn/timing.properties 12 | # https://github.com/takari/maven-wrapper#usage-without-binary-jar 13 | .mvn/wrapper/maven-wrapper.jar 14 | 15 | # Eclipse m2e generated files 16 | # Eclipse Core 17 | .project 18 | # JDT-specific (Eclipse Java Development Tools) 19 | .classpath 20 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/AiSentimentProcessorApp.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class AiSentimentProcessorApp { 8 | public static void main(String[] args) { 9 | SpringApplication.run(AiSentimentProcessorApp.class,args); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/AiSentimentProcessorApp.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class AiSentimentProcessorApp { 8 | public static void main(String[] args) { 9 | SpringApplication.run(AiSentimentProcessorApp.class,args); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/AppConfig.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.postgres.query; 2 | 3 | import org.springframework.boot.context.properties.EnableConfigurationProperties; 4 | import org.springframework.context.annotation.Configuration; 5 | import ai.data.pipeline.spring.postgres.query.properties.QueryProperties; 6 | 7 | @Configuration 8 | @EnableConfigurationProperties(QueryProperties.class) 9 | public class AppConfig { 10 | 11 | } 12 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/main/resources/schema-postgres.sql: -------------------------------------------------------------------------------- 1 | create schema if not exists customer; 2 | 3 | 4 | CREATE TABLE if not exists customer.customers ( 5 | customer_id SERIAL PRIMARY KEY, 6 | first_name VARCHAR(50) NOT NULL, 7 | last_name VARCHAR(50) NOT NULL, 8 | email VARCHAR(100) UNIQUE NOT NULL, 9 | phone VARCHAR(20), 10 | address VARCHAR(255), 11 | city VARCHAR(50), 12 | state VARCHAR(50), 13 | zip VARCHAR(10), 14 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP 15 | ); -------------------------------------------------------------------------------- /applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/QueryProcessorProcessorApp.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.postgres.query; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | 7 | @SpringBootApplication 8 | public class QueryProcessorProcessorApp { 9 | 10 | public static void main(String[] args) { 11 | SpringApplication.run(QueryProcessorProcessorApp.class, args); 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/domain/Location.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer.domain; 2 | 3 | import lombok.Builder; 4 | 5 | /** 6 | * Domain data for location information 7 | * @author Gregory Green 8 | * 9 | * @param address the addres line 10 | * @param city the location city anme 11 | * @param state the location state 12 | * @param zip the zip code 13 | */ 14 | @Builder 15 | public record Location(String address, String city, String state, String zip) { 16 | } 17 | -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/AppConfig.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.postgres.embedding; 2 | 3 | import org.springframework.boot.context.properties.EnableConfigurationProperties; 4 | import org.springframework.context.annotation.Configuration; 5 | import ai.data.pipeline.postgres.embedding.properties.EmbeddingSimilarityProperties; 6 | 7 | @Configuration 8 | @EnableConfigurationProperties(EmbeddingSimilarityProperties.class) 9 | public class AppConfig { 10 | 11 | } 12 | -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/PostgresEmbeddingSimilarityApp.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.postgres.embedding; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class PostgresEmbeddingSimilarityApp { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(PostgresEmbeddingSimilarityApp.class, args); 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/CustomerBatchApp.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | /** 7 | * @author Gregory Green 8 | * 9 | * Spring Boot main application 10 | */ 11 | @SpringBootApplication 12 | public class CustomerBatchApp { 13 | 14 | public static void main(String[] args) { 15 | SpringApplication.run(CustomerBatchApp.class, args); 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/main/java/ai/data/pipeline/spring/JdbcConfig.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring; 2 | 3 | import ai.data.pipeline.spring.properties.SqlConsumerProperties; 4 | import org.springframework.boot.context.properties.EnableConfigurationProperties; 5 | import org.springframework.context.annotation.Configuration; 6 | import org.springframework.data.jdbc.repository.config.EnableJdbcRepositories; 7 | 8 | @Configuration 9 | @EnableJdbcRepositories 10 | @EnableConfigurationProperties(SqlConsumerProperties.class) 11 | public class JdbcConfig { 12 | } 13 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/.gitignore: -------------------------------------------------------------------------------- 1 | HELP.md 2 | target/ 3 | !.mvn/wrapper/maven-wrapper.jar 4 | !**/src/main/**/target/ 5 | !**/src/test/**/target/ 6 | 7 | ### STS ### 8 | .apt_generated 9 | .classpath 10 | .factorypath 11 | .project 12 | .settings 13 | .springBeans 14 | .sts4-cache 15 | 16 | ### IntelliJ IDEA ### 17 | .idea 18 | *.iws 19 | *.iml 20 | *.ipr 21 | 22 | ### NetBeans ### 23 | /nbproject/private/ 24 | /nbbuild/ 25 | /dist/ 26 | /nbdist/ 27 | /.nb-gradle/ 28 | build/ 29 | !**/src/main/**/build/ 30 | !**/src/test/**/build/ 31 | 32 | ### VS Code ### 33 | .vscode/ 34 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/ChatClientConfig.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment; 2 | 3 | import org.springframework.ai.chat.client.ChatClient; 4 | import org.springframework.ai.chat.model.ChatModel; 5 | import org.springframework.context.annotation.Bean; 6 | import org.springframework.context.annotation.Configuration; 7 | 8 | @Configuration 9 | public class ChatClientConfig { 10 | 11 | @Bean 12 | ChatClient chatClient(ChatModel chatModel) 13 | { 14 | return ChatClient.create(chatModel); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/.gitignore: -------------------------------------------------------------------------------- 1 | HELP.md 2 | target/ 3 | !.mvn/wrapper/maven-wrapper.jar 4 | !**/src/main/**/target/ 5 | !**/src/test/**/target/ 6 | 7 | ### STS ### 8 | .apt_generated 9 | .classpath 10 | .factorypath 11 | .project 12 | .settings 13 | .springBeans 14 | .sts4-cache 15 | 16 | ### IntelliJ IDEA ### 17 | .idea 18 | *.iws 19 | *.iml 20 | *.ipr 21 | 22 | ### NetBeans ### 23 | /nbproject/private/ 24 | /nbbuild/ 25 | /dist/ 26 | /nbdist/ 27 | /.nb-gradle/ 28 | build/ 29 | !**/src/main/**/build/ 30 | !**/src/test/**/build/ 31 | 32 | ### VS Code ### 33 | .vscode/ 34 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/ChatClientConfig.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment; 2 | 3 | import org.springframework.ai.chat.client.ChatClient; 4 | import org.springframework.ai.chat.model.ChatModel; 5 | import org.springframework.context.annotation.Bean; 6 | import org.springframework.context.annotation.Configuration; 7 | 8 | @Configuration 9 | public class ChatClientConfig { 10 | 11 | @Bean 12 | ChatClient chatClient(ChatModel chatModel) 13 | { 14 | return ChatClient.create(chatModel); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/.gitignore: -------------------------------------------------------------------------------- 1 | HELP.md 2 | target/ 3 | !.mvn/wrapper/maven-wrapper.jar 4 | !**/src/main/**/target/ 5 | !**/src/test/**/target/ 6 | 7 | ### STS ### 8 | .apt_generated 9 | .classpath 10 | .factorypath 11 | .project 12 | .settings 13 | .springBeans 14 | .sts4-cache 15 | 16 | ### IntelliJ IDEA ### 17 | .idea 18 | *.iws 19 | *.iml 20 | *.ipr 21 | 22 | ### NetBeans ### 23 | /nbproject/private/ 24 | /nbbuild/ 25 | /dist/ 26 | /nbdist/ 27 | /.nb-gradle/ 28 | build/ 29 | !**/src/main/**/build/ 30 | !**/src/test/**/build/ 31 | 32 | ### VS Code ### 33 | .vscode/ 34 | -------------------------------------------------------------------------------- /applications/processors/postgres-query-processor/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | server: 2 | port: 0 3 | spring: 4 | application: 5 | name: postgres-query-processor 6 | jpa: 7 | show-sql: true 8 | cloud: 9 | function: 10 | definition: queryFunctionProcessor 11 | stream: 12 | function: 13 | bindings: 14 | queryFunctionProcessor-in-0: input 15 | queryFunctionProcessor-out-0: output 16 | bindings: 17 | input: 18 | group: ${spring.application.name} 19 | binder: 20 | connection-name-prefix: ${spring.application.name} -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/.gitignore: -------------------------------------------------------------------------------- 1 | HELP.md 2 | target/ 3 | !.mvn/wrapper/maven-wrapper.jar 4 | !**/src/main/**/target/ 5 | !**/src/test/**/target/ 6 | 7 | ### STS ### 8 | .apt_generated 9 | .classpath 10 | .factorypath 11 | .project 12 | .settings 13 | .springBeans 14 | .sts4-cache 15 | 16 | ### IntelliJ IDEA ### 17 | .idea 18 | *.iws 19 | *.iml 20 | *.ipr 21 | 22 | ### NetBeans ### 23 | /nbproject/private/ 24 | /nbbuild/ 25 | /dist/ 26 | /nbdist/ 27 | /.nb-gradle/ 28 | build/ 29 | !**/src/main/**/build/ 30 | !**/src/test/**/build/ 31 | 32 | ### VS Code ### 33 | .vscode/ 34 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/domain/Customer.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer.domain; 2 | 3 | import lombok.Builder; 4 | 5 | 6 | /** 7 | * 8 | * Domain for the customer information 9 | * @param id the customer id 10 | * @param firstName the customer first name 11 | * @param lastName the customer last name 12 | * @param contact the customer contact 13 | * @param location the customer location 14 | * 15 | * @author Gregory Green 16 | */ 17 | @Builder 18 | public record Customer(String id, String firstName, String lastName, Contact contact,Location location) { 19 | } 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | Contribution Agreement 3 | ====================== 4 | 5 | This repository does not accept pull requests (PRs). All pull requests will be closed. 6 | 7 | However, if any contributions (through pull requests, issues, feedback or otherwise) are provided, as a contributor, you represent that the code you submit is your original work or that of your employer (in which case you represent you have the right to bind your employer). By submitting code (or otherwise providing feedback), you (and, if applicable, your employer) are licensing the submitted code (and/or feedback) to LinkedIn and the open source community subject to the BSD 2-Clause license. 8 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2025 LinkedIn Corporation 2 | All Rights Reserved. 3 | 4 | Licensed under the LinkedIn Learning Exercise File License (the "License"). 5 | See LICENSE in the project root for license information. 6 | 7 | Please note, this project may automatically load third party code from external 8 | repositories (for example, NPM modules, Composer packages, or other dependencies). 9 | If so, such third party code may be subject to other license terms than as set 10 | forth above. In addition, such third party code may also depend on and load 11 | multiple tiers of dependencies. Please review the applicable licenses of the 12 | additional dependencies. 13 | -------------------------------------------------------------------------------- /applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/properties/QueryProperties.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.postgres.query.properties; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Builder; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | import org.springframework.boot.context.properties.ConfigurationProperties; 8 | 9 | @Data 10 | @NoArgsConstructor 11 | @AllArgsConstructor 12 | @Builder 13 | @ConfigurationProperties(prefix = "query.processor") 14 | public class QueryProperties { 15 | 16 | /** 17 | * Query Sql statement ex: select "1" as id 18 | */ 19 | private String sql; 20 | } 21 | -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/main/java/ai/data/pipeline/spring/properties/SqlConsumerProperties.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.properties; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Builder; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | import org.springframework.boot.context.properties.ConfigurationProperties; 8 | 9 | @Data 10 | @NoArgsConstructor 11 | @AllArgsConstructor 12 | @Builder 13 | @ConfigurationProperties(prefix = "sql.consumer") 14 | public class SqlConsumerProperties { 15 | 16 | /** 17 | * Sql to execute. Note the Json payload is passed as SQL parameters based on field names. 18 | */ 19 | private String sql; 20 | } 21 | -------------------------------------------------------------------------------- /applications/processors/postgres-query-processor/src/main/resources/sentiment-analysis.yml: -------------------------------------------------------------------------------- 1 | query: 2 | processor: 3 | sql: > 4 | SELECT 5 | :id as id, 6 | :email as email, 7 | :feedback as feedback, 8 | :summary as summary, 9 | positivity::json->0->>'score' as score, 10 | (CASE 11 | WHEN positivity::json->0->>'label' = 'NEGATIVE' THEN -1 12 | WHEN positivity::json->0->>'label' = 'POSITIVE' THEN 1 13 | ELSE 14 | 0 15 | END) as sentiment 16 | from (SELECT pgml.transform( 17 | task => 'text-classification', 18 | inputs => ARRAY[ 19 | :summary ] 20 | ) as positivity) text_classification; -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis.yml: -------------------------------------------------------------------------------- 1 | sql: 2 | consumer: 3 | sql: > 4 | insert into customer.feedback 5 | ( 6 | feed_id, 7 | email, 8 | user_feedback, 9 | summary, 10 | sentiment, 11 | score) 12 | values ( 13 | :id, 14 | :email, 15 | :feedback, 16 | :summary, 17 | :sentiment, 18 | cast(:score as numeric) 19 | ) 20 | on CONFLICT (feed_id) 21 | DO UPDATE SET 22 | email = :email, 23 | user_feedback = :feedback, 24 | summary = :summary, 25 | sentiment = :sentiment, 26 | score = cast(:score as numeric) -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | spring: 2 | application: 3 | name: postgres-sink 4 | datasource: 5 | driverClassName: "org.postgresql.Driver" 6 | jpa: 7 | show-sql: true 8 | cloud: 9 | function: 10 | definition: postgresConsumer 11 | stream: 12 | function: 13 | bindings: 14 | postgresConsumer-in-0: input 15 | postgresConsumer-out-0: output 16 | rabbit: 17 | binder: 18 | connection-name-prefix: ${spring.application.name} 19 | bindings: 20 | input: 21 | consumer: 22 | autoBindDlq: true 23 | # 24 | bindings: 25 | input: 26 | group: postgres-sink 27 | server: 28 | port: 8003 -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | server: 2 | port: 0 3 | spring: 4 | application: 5 | name: postgres-embedding-similarity-processor 6 | cloud: 7 | function: 8 | definition: embeddingSimilarityFunction 9 | stream: 10 | function: 11 | bindings: 12 | embeddingSimilarityFunction-in-0: input 13 | embeddingSimilarityFunction-out-0: output 14 | binder: 15 | connection-name-prefix: ${spring.application.name} 16 | bindings: 17 | input: 18 | group: postgres-embedding-similarity-processor 19 | ai: 20 | vectorstore: 21 | pgvector: 22 | idType: TEXT 23 | initialize-schema: true 24 | jpa: 25 | show-sql: true 26 | logging: 27 | level: 28 | org: 29 | springframework: 30 | ai: debug -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis-ollama.yml: -------------------------------------------------------------------------------- 1 | sql: 2 | consumer: 3 | sql: > 4 | insert into customer.feedback 5 | ( 6 | feed_id, 7 | email, 8 | user_feedback, 9 | summary, 10 | sentiment) 11 | values ( 12 | :payload::json->'customerFeedback'->'id', 13 | :payload::json->'customerFeedback'->'email', 14 | :payload::json->'customerFeedback'->'feedback', 15 | :payload::json->'customerFeedback'->'summary', 16 | :sentiment 17 | ) 18 | on CONFLICT (feed_id) 19 | DO UPDATE SET 20 | email = :payload::json->'customerFeedback'->'email', 21 | user_feedback = :payload::json->'customerFeedback'->'feedback', 22 | summary = :payload::json->'customerFeedback'->'summary', 23 | sentiment = :sentiment -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | server: 2 | port: 0 3 | spring: 4 | application: 5 | name: ai-sentiment-processor 6 | cloud: 7 | function: 8 | definition: customerFeedbackSentimentProcessor 9 | stream: 10 | function: 11 | bindings: 12 | customerFeedbackSentimentProcessor-in-0: input 13 | customerFeedbackSentimentProcessor-out-0: output 14 | binder: 15 | connection-name-prefix: ${spring.application.name} 16 | bindings: 17 | input: 18 | group: ai-sentiment-processor 19 | ai: 20 | ollama: 21 | base-url: http://localhost:11434 22 | model: llama3 23 | init: 24 | pull-model-strategy: always 25 | timeout: 60s 26 | max-retries: 1 27 | jpa: 28 | show-sql: true 29 | logging: 30 | level: 31 | org: 32 | springframework: 33 | ai: debug -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/properties/EmbeddingSimilarityProperties.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.postgres.embedding.properties; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Builder; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | import org.springframework.boot.context.properties.ConfigurationProperties; 8 | 9 | @Data 10 | @NoArgsConstructor 11 | @AllArgsConstructor 12 | @Builder 13 | @ConfigurationProperties(prefix = "embedding.similarity.processor") 14 | public class EmbeddingSimilarityProperties { 15 | 16 | /** 17 | * top K most similar vectors to a query vector in a vector database 18 | */ 19 | private int topK; 20 | 21 | /** 22 | * The lower bound of the similarity score. 23 | */ 24 | private double similarityThreshold; 25 | 26 | private String[] documentTextFieldNames; 27 | } 28 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | server: 2 | port: 0 3 | spring: 4 | application: 5 | name: ai-sentiment-processor 6 | cloud: 7 | function: 8 | definition: customerFeedbackSentimentProcessor 9 | stream: 10 | function: 11 | bindings: 12 | customerFeedbackSentimentProcessor-in-0: input 13 | customerFeedbackSentimentProcessor-out-0: output 14 | binder: 15 | connection-name-prefix: ${spring.application.name} 16 | bindings: 17 | input: 18 | group: ai-sentiment-processor 19 | ai: 20 | ollama: 21 | base-url: http://localhost:11434 22 | model: llama3 23 | init: 24 | pull-model-strategy: always 25 | timeout: 60s 26 | max-retries: 1 27 | vectorstore: 28 | pgvector: 29 | idType: TEXT 30 | initialize-schema: true 31 | dimensions: 1024 32 | # 33 | jpa: 34 | show-sql: true 35 | logging: 36 | level: 37 | org: 38 | springframework: 39 | ai: debug -------------------------------------------------------------------------------- /applications/batching/customer-batch/.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | wrapperVersion=3.3.2 18 | distributionType=only-script 19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip 20 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | wrapperVersion=3.3.2 18 | distributionType=only-script 19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip 20 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | wrapperVersion=3.3.2 18 | distributionType=only-script 19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip 20 | -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | wrapperVersion=3.3.2 18 | distributionType=only-script 19 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 7 | 8 | ## Issue Overview 9 | 10 | 11 | ## Describe your environment 12 | 13 | 14 | ## Steps to Reproduce 15 | 16 | 1. 17 | 2. 18 | 3. 19 | 4. 20 | 21 | ## Expected Behavior 22 | 23 | 24 | ## Current Behavior 25 | 26 | 27 | ## Possible Solution 28 | 29 | 30 | ## Screenshots / Video 31 | 32 | 33 | ## Related Issues 34 | 35 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/CsvConfig.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import nyla.solutions.core.io.csv.CsvWriter; 5 | import org.springframework.beans.factory.annotation.Value; 6 | import org.springframework.context.annotation.Bean; 7 | import org.springframework.context.annotation.Configuration; 8 | import org.springframework.core.io.Resource; 9 | 10 | import java.io.IOException; 11 | import java.nio.file.Files; 12 | 13 | /** 14 | * @author Gregory Green 15 | * 16 | * CSV writer configuration 17 | */ 18 | @Configuration 19 | @Slf4j 20 | public class CsvConfig { 21 | 22 | @Value("${processor.output.error.file.csv}") 23 | private Resource invalid_customers_csv; 24 | 25 | @Bean 26 | CsvWriter csvWriter() throws IOException { 27 | var path = invalid_customers_csv.getFile().toPath(); 28 | 29 | if(!Files.exists(path.getParent())){ 30 | //create directory if it does exist 31 | var directory = Files.createDirectory(path.getParent()); 32 | log.info("directory: {}",directory); 33 | } 34 | return new CsvWriter(path.toFile()); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/CommandLineConfig.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer; 2 | 3 | 4 | import org.springframework.batch.core.Job; 5 | import org.springframework.batch.core.JobParametersBuilder; 6 | import org.springframework.batch.core.launch.JobLauncher; 7 | import org.springframework.beans.factory.annotation.Qualifier; 8 | import org.springframework.boot.CommandLineRunner; 9 | import org.springframework.context.annotation.Bean; 10 | import org.springframework.context.annotation.Configuration; 11 | 12 | /** 13 | * Configuration for the command line runner that execute the job 14 | * when the Spring Boot application is started. 15 | * 16 | * @author Gregory Green 17 | */ 18 | @Configuration 19 | public class CommandLineConfig { 20 | 21 | 22 | /** 23 | * Construct the command liner runner 24 | * @param jobLauncher the job lancher 25 | * @param job the Spring Batch job to start 26 | * @return the line runner 27 | */ 28 | @Bean 29 | CommandLineRunner jobRunner(@Qualifier("batchJobLauncher") JobLauncher jobLauncher, Job job){ 30 | return args -> jobLauncher.run(job, new JobParametersBuilder().addJobParameter("time",System.currentTimeMillis()+"", String.class) 31 | .toJobParameters()); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 5 | 4.0.0 6 | 7 | ai.data.pipelines.spring 8 | ai-data-pipelines-with-spring 9 | pom 10 | 1.0 11 | ai-data-pipelines-with-spring 12 | 13 | applications/batching/customer-batch 14 | applications/sinks/postgres-sink 15 | applications/processors/postgres-query-processor 16 | applications/processors/ai-sentiment-processor 17 | applications/processors/ai-sentiment-rag-processor 18 | applications/processors/postgres-embedding-similarity-processor 19 | 20 | 21 | 22 | 23 | 24 | org.apache.maven.plugins 25 | maven-compiler-plugin 26 | 27 | 21 28 | 21 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/VectorStoreConfig.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.springframework.ai.chat.client.advisor.vectorstore.QuestionAnswerAdvisor; 5 | import org.springframework.ai.document.Document; 6 | import org.springframework.ai.reader.TextReader; 7 | import org.springframework.ai.vectorstore.VectorStore; 8 | import org.springframework.beans.factory.annotation.Value; 9 | import org.springframework.boot.CommandLineRunner; 10 | import org.springframework.context.annotation.Bean; 11 | import org.springframework.context.annotation.Configuration; 12 | import org.springframework.core.io.Resource; 13 | 14 | import java.util.List; 15 | 16 | @Configuration 17 | @Slf4j 18 | public class VectorStoreConfig { 19 | @Value("classpath:sentiment_rag_content.txt") 20 | private Resource resource; 21 | 22 | 23 | @Bean 24 | CommandLineRunner runner(VectorStore vectorStore, List documents){ 25 | return args -> { 26 | 27 | log.info("Documents: {}",documents); 28 | vectorStore.accept(documents); 29 | }; 30 | } 31 | 32 | @Bean 33 | List loadJsonAsDocuments() { 34 | var reader = new TextReader(this.resource); 35 | return reader.get(); 36 | } 37 | 38 | @Bean 39 | QuestionAnswerAdvisor advisor(VectorStore vectorStore){ 40 | return new QuestionAnswerAdvisor(vectorStore); 41 | } 42 | 43 | 44 | } 45 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/mapper/CustomerFieldMapper.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer.mapper; 2 | 3 | import org.springframework.batch.item.file.mapping.FieldSetMapper; 4 | import org.springframework.batch.item.file.transform.FieldSet; 5 | import org.springframework.stereotype.Component; 6 | import org.springframework.validation.BindException; 7 | import ai.data.pipeline.spring.customer.domain.Contact; 8 | import ai.data.pipeline.spring.customer.domain.Customer; 9 | import ai.data.pipeline.spring.customer.domain.Location; 10 | 11 | /** 12 | * Create a customer record from a Field Set 13 | * @author Gregory Green 14 | */ 15 | @Component 16 | public class CustomerFieldMapper implements FieldSetMapper { 17 | @Override 18 | public Customer mapFieldSet(FieldSet fieldSet) throws BindException { 19 | 20 | 21 | return Customer.builder() 22 | .id(fieldSet.readString(0)) 23 | .firstName(fieldSet.readString(1)) 24 | .lastName(fieldSet.readString(2)) 25 | .contact(Contact.builder() 26 | .email(fieldSet.readString(3)) 27 | .phone(fieldSet.readString(4)) 28 | .build()) 29 | .location(Location.builder() 30 | .address(fieldSet.readString(5)) 31 | .city(fieldSet.readString(6)) 32 | .state(fieldSet.readString(7 )) 33 | .zip(fieldSet.readString(8)).build()) 34 | .build(); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/main/java/ai/data/pipeline/spring/sink/PostgresConsumer.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.sink; 2 | 3 | import ai.data.pipeline.spring.properties.SqlConsumerProperties; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import lombok.SneakyThrows; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; 8 | import org.springframework.stereotype.Component; 9 | 10 | import java.util.Map; 11 | import java.util.function.Consumer; 12 | 13 | /** 14 | * Save data into the Postgres databases based a provided paylod 15 | * @author Gregory Green 16 | */ 17 | @Component 18 | @Slf4j 19 | public class PostgresConsumer implements Consumer { 20 | 21 | private final ObjectMapper objectMapper; 22 | private final NamedParameterJdbcTemplate namedParameterJdbcTemplate; 23 | private final String sql; 24 | 25 | public PostgresConsumer(ObjectMapper objectMapper, 26 | NamedParameterJdbcTemplate namedParameterJdbcTemplate, 27 | SqlConsumerProperties properties) { 28 | this.objectMapper = objectMapper; 29 | this.namedParameterJdbcTemplate = namedParameterJdbcTemplate; 30 | this.sql = properties.getSql(); 31 | } 32 | 33 | @SneakyThrows 34 | @Override 35 | public void accept(String payload) { 36 | 37 | log.info("payload: {}",payload); 38 | Map map = objectMapper.readValue(payload,Map.class); 39 | 40 | map.put("payload",payload); 41 | 42 | log.info("map: {} payload: {}",map,payload); 43 | log.info("SQL: {}",sql); 44 | namedParameterJdbcTemplate.update(sql,map); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/processors/QueryFunctionProcessor.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.postgres.query.processors; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import lombok.RequiredArgsConstructor; 5 | import lombok.SneakyThrows; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; 8 | import org.springframework.stereotype.Component; 9 | import ai.data.pipeline.spring.postgres.query.properties.QueryProperties; 10 | 11 | import java.util.Map; 12 | import java.util.function.Function; 13 | 14 | /** 15 | * Execute a SQL query based on the input JSON and return JSON of the SQL results 16 | * @author Gregory Green 17 | */ 18 | @Component 19 | @RequiredArgsConstructor 20 | @Slf4j 21 | public class QueryFunctionProcessor implements Function { 22 | 23 | private final NamedParameterJdbcTemplate namedParameterJdbcTemplate; 24 | private final ObjectMapper objectMapper; 25 | private final QueryProperties queryProperties; 26 | 27 | @SneakyThrows 28 | @Override 29 | public String apply(String payload) { 30 | 31 | log.info("payload: {}",payload); 32 | var inputMap = objectMapper.readValue(payload, Map.class); 33 | 34 | log.info("SQL: {}, input: {}",queryProperties,inputMap); 35 | 36 | var outMap = namedParameterJdbcTemplate.queryForMap(queryProperties.getSql(), 37 | inputMap); 38 | log.info("SQL: {}, class:{}, results: {}",queryProperties,outMap.getClass(),outMap); 39 | 40 | var out = objectMapper.writeValueAsString(outMap); 41 | log.info("Returning: {}",out); 42 | return out; 43 | } 44 | 45 | 46 | 47 | 48 | } 49 | -------------------------------------------------------------------------------- /docs/03_02_API Spring Cloud Stream Source with RabbitMQ.md: -------------------------------------------------------------------------------- 1 | 2 | # Start RabbitMQ 3 | 4 | 5 | - Run RabbitMQ 6 | ```shell 7 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management 8 | ``` 9 | 10 | 11 | ```shell 12 | open http://localhost:15672 13 | ``` 14 | 15 | Download http source 16 | 17 | ```shell 18 | wget -P runtime https://repo.maven.apache.org/maven2/org/springframework/cloud/stream/app/http-source-rabbit/5.0.1/http-source-rabbit-5.0.1.jar 19 | ``` 20 | 21 | Run the Http Source 22 | ```shell 23 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=customers --server.port=8080 --spring.cloud.stream.bindings.output.destination=customers.intake 24 | ``` 25 | 26 | 27 | 28 | Create queue 29 | 30 | ```shell 31 | docker exec -it rabbitmq rabbitmqadmin declare queue name=customer-test 32 | ``` 33 | 34 | Create Binding 35 | 36 | ```shell 37 | docker exec -it rabbitmq rabbitmqadmin declare binding source=customers.intake destination=customer-test routing_key=# 38 | ``` 39 | 40 | 41 | ## Testing 42 | 43 | Example 44 | 45 | ```shell 46 | curl -X 'POST' \ 47 | 'http://localhost:8080/customers' \ 48 | -H 'accept: */*' \ 49 | -H 'Content-Type: application/json' \ 50 | -d '{ 51 | "firstName" : "Josiah", 52 | "lastName" : "Imani", 53 | "email" : "email@email", 54 | "phone" : "555-555-5555", 55 | "address" : "12 Straight St", 56 | "city" : "Erie", 57 | "zip": "16510" 58 | }' 59 | ``` 60 | 61 | 62 | 63 | ```shell 64 | curl -X 'POST' \ 65 | 'http://localhost:8080/customers' \ 66 | -H 'accept: */*' \ 67 | -H 'Content-Type: application/json' \ 68 | -d '{ 69 | "firstName" : "Juan", 70 | "lastName" : "Wagner", 71 | "email" : "jwagner79@ihg.com", 72 | "phone" : "555-555-5555", 73 | "address" : "1496 Reinke Lane", 74 | "city" : "New York City", 75 | "state" "NY", 76 | "zip": "10275" 77 | }' 78 | ``` 79 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessor.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment.processor; 2 | 3 | import ai.data.pipeline.sentiment.domains.CustomerFeedback; 4 | import ai.data.pipeline.sentiment.domains.FeedbackSentiment; 5 | import lombok.RequiredArgsConstructor; 6 | import lombok.extern.slf4j.Slf4j; 7 | import org.springframework.ai.chat.client.ChatClient; 8 | import org.springframework.stereotype.Component; 9 | 10 | import java.util.function.Function; 11 | 12 | /** 13 | * Determine the sentiment based on customer feedback 14 | * @author Gregory Green 15 | */ 16 | @Component 17 | @RequiredArgsConstructor 18 | @Slf4j 19 | public class CustomerFeedbackSentimentProcessor implements Function { 20 | private final ChatClient chatClient; 21 | 22 | private final String prompt = """ 23 | Analyze the sentiment of this text: "{text}". 24 | Respond with only one word: Positive or Negative. 25 | """; 26 | 27 | 28 | /** 29 | * Determine the sentiment of the feedback 30 | * @param customerFeedback the function argument 31 | * @return the sentiment of the feedback summary 32 | */ 33 | @Override 34 | public FeedbackSentiment apply(CustomerFeedback customerFeedback) { 35 | 36 | log.info("customerFeedback: {}",customerFeedback); 37 | var sentiment = chatClient.prompt() 38 | .user(u -> u.text(prompt) 39 | .param("text", customerFeedback.summary())) 40 | .call() 41 | .entity(FeedbackSentiment.Sentiment.class); 42 | 43 | log.info("sentiment: {}",sentiment); 44 | 45 | return FeedbackSentiment.builder() 46 | .customerFeedback(customerFeedback) 47 | .sentiment(sentiment).build(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/test/java/ai/data/pipeline/spring/customer/mapper/CustomerFieldMapperTest.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer.mapper; 2 | 3 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator; 4 | import org.junit.jupiter.api.BeforeEach; 5 | import org.junit.jupiter.api.Test; 6 | import org.junit.jupiter.api.extension.ExtendWith; 7 | import org.mockito.Mock; 8 | import org.mockito.junit.jupiter.MockitoExtension; 9 | import org.springframework.batch.item.file.transform.FieldSet; 10 | import org.springframework.validation.BindException; 11 | import ai.data.pipeline.spring.customer.domain.Customer; 12 | 13 | import static org.assertj.core.api.Assertions.assertThat; 14 | import static org.mockito.ArgumentMatchers.anyInt; 15 | import static org.mockito.Mockito.when; 16 | 17 | @ExtendWith(MockitoExtension.class) 18 | class CustomerFieldMapperTest { 19 | 20 | private CustomerFieldMapper subject; 21 | 22 | @Mock 23 | private FieldSet fieldSet; 24 | private Customer customer = JavaBeanGeneratorCreator.of(Customer.class).create(); 25 | 26 | @BeforeEach 27 | void setUp() { 28 | subject = new CustomerFieldMapper(); 29 | } 30 | 31 | @Test 32 | void map() throws BindException { 33 | 34 | when(fieldSet.readString(anyInt())) 35 | .thenReturn(customer.id()) 36 | .thenReturn(customer.firstName()) 37 | .thenReturn(customer.lastName()) 38 | .thenReturn(customer.contact().email()) 39 | .thenReturn(customer.contact().phone()) 40 | .thenReturn(customer.location().address()) 41 | .thenReturn(customer.location().city()) 42 | .thenReturn(customer.location().state()) 43 | .thenReturn(customer.location().zip()); 44 | 45 | var actual = subject.mapFieldSet(fieldSet); 46 | 47 | assertThat(actual).isEqualTo(customer); 48 | 49 | } 50 | } -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/conversion/PayloadToDocument.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.postgres.embedding.conversion; 2 | 3 | import com.fasterxml.jackson.databind.JsonNode; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import lombok.RequiredArgsConstructor; 6 | import lombok.SneakyThrows; 7 | import lombok.extern.slf4j.Slf4j; 8 | import nyla.solutions.core.patterns.conversion.Converter; 9 | import org.springframework.ai.document.Document; 10 | import org.springframework.stereotype.Component; 11 | import ai.data.pipeline.postgres.embedding.properties.EmbeddingSimilarityProperties; 12 | 13 | /** 14 | * Convert payload string to Document 15 | * @author Gregory Green 16 | */ 17 | @Component 18 | @RequiredArgsConstructor 19 | @Slf4j 20 | public class PayloadToDocument implements Converter { 21 | 22 | private final EmbeddingSimilarityProperties properties; 23 | private final ObjectMapper objectMapper; 24 | 25 | @SneakyThrows 26 | @Override 27 | public Document convert(String payload) { 28 | log.info("Reading tree for payload: {}",payload); 29 | var jsonNode = objectMapper.readTree(payload); 30 | 31 | var idNode = jsonNode.findValue("id"); 32 | log.info("idNode: {}",jsonNode); 33 | 34 | var id = idNode != null? idNode.asText() : ""; 35 | log.info("id: {}",id); 36 | 37 | var textBuilder = new StringBuilder(); 38 | for(var fieldName : properties.getDocumentTextFieldNames()) 39 | { 40 | JsonNode fieldValue = jsonNode.findValue(fieldName); 41 | String textValue = fieldValue != null? fieldValue.asText() : null; 42 | if(!textBuilder.isEmpty()) 43 | textBuilder.append(","); 44 | 45 | textBuilder.append(textValue); 46 | 47 | } 48 | log.info("text: {}",textBuilder); 49 | return Document.builder().id(id).text(textBuilder.toString()).build(); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessor.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment.processor; 2 | 3 | import lombok.RequiredArgsConstructor; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.springframework.ai.chat.client.ChatClient; 6 | import org.springframework.ai.chat.client.advisor.api.Advisor; 7 | import org.springframework.stereotype.Component; 8 | import ai.data.pipeline.sentiment.domains.CustomerFeedback; 9 | import ai.data.pipeline.sentiment.domains.FeedbackSentiment; 10 | 11 | import java.util.function.Function; 12 | 13 | /** 14 | * Determine the sentiment based on customer feedback 15 | * @author Gregory Green 16 | */ 17 | @Component 18 | @RequiredArgsConstructor 19 | @Slf4j 20 | public class CustomerFeedbackSentimentProcessor implements Function { 21 | private final ChatClient chatClient; 22 | 23 | private final Advisor advisor; 24 | 25 | private final String prompt = """ 26 | Analyze the sentiment of this text: "{text}". 27 | Respond with only one word: Positive or Negative. 28 | """; 29 | 30 | 31 | /** 32 | * Determine the sentiment of the feedback 33 | * @param customerFeedback the function argument 34 | * @return the sentiment of the feedback summary 35 | */ 36 | @Override 37 | public FeedbackSentiment apply(CustomerFeedback customerFeedback) { 38 | 39 | log.info("customerFeedback: {}",customerFeedback); 40 | var sentiment = chatClient.prompt() 41 | .user(u -> u.text(prompt) 42 | .param("text", customerFeedback.summary())) 43 | .advisors(advisor) 44 | .call() 45 | .entity(FeedbackSentiment.Sentiment.class); 46 | 47 | log.info("sentiment: {}",sentiment); 48 | 49 | return FeedbackSentiment.builder() 50 | .customerFeedback(customerFeedback) 51 | .sentiment(sentiment).build(); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /docs/02_02_Spring Batch Data Pipeline with Postgres.md: -------------------------------------------------------------------------------- 1 | docker network rm data-pipelines 2 | 3 | ```shell 4 | docker network create data-pipelines 5 | ``` 6 | 7 | Start Postgres 8 | 9 | ```shell 10 | docker run --name postgres --network data-pipelines --rm \ 11 | -e POSTGRES_USER=postgres \ 12 | -e POSTGRES_PASSWORD=postgres \ 13 | -e POSTGRES_DB=postgres \ 14 | -p 5432:5432 \ 15 | -it postgres 16 | ``` 17 | 18 | 19 | Access Postgres using psql 20 | 21 | ```shell 22 | docker exec -it postgres psql -U postgres 23 | ``` 24 | 25 | 26 | list tables in postgres 27 | 28 | ```psql 29 | \dt * 30 | ``` 31 | [schema-postgres.sql](../applications/batching/customer-batch/src/main/resources/schema-postgres.sql) 32 | 33 | 34 | See 35 | 36 | [BatchConfig.java](../applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/BatchConfig.java) 37 | 38 | 39 | See CSV 40 | 41 | [customers-source.csv](../applications/batching/customer-batch/src/test/resources/sources/customers-source.csv) 42 | 43 | 44 | See 45 | 46 | [Customer.java](../applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/domain/Customer.java) 47 | 48 | 49 | See 50 | 51 | [CustomerFieldMapper.java](../applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/mapper/CustomerFieldMapper.java) 52 | 53 | See 54 | 55 | [MissingRequiredFieldsFilterProcessor.java](../applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/processor/MissingRequiredFieldsFilterProcessor.java) 56 | 57 | 58 | 59 | ```shell 60 | mvn package 61 | ``` 62 | 63 | 64 | Run batch 65 | 66 | ```shell 67 | java -jar applications/batching/customer-batch/target/customer-batch-0.0.1-SNAPSHOT.jar --spring.datasource.password=postgres --source.input.file.csv="file:./applications/batching/customer-batch/src/test/resources/sources/customers-source.csv" --processor.output.error.file.csv="file:./runtime/invalid_customers.csv" 68 | ``` 69 | 70 | 71 | In Psql 72 | ```shell 73 | select * from customer.customers; 74 | ``` 75 | 76 | 77 | 78 | 79 | Also see 80 | 81 | [invalid_customers.csv](../runtime/invalid_customers.csv) 82 | 83 | Find records in source 84 | 85 | [customers-source.csv](../applications/batching/customer-batch/src/test/resources/sources/customers-source.csv) -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/src/test/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessorTest.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment.processor; 2 | 3 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator; 4 | import org.junit.jupiter.api.BeforeEach; 5 | import org.junit.jupiter.api.Test; 6 | import org.junit.jupiter.api.extension.ExtendWith; 7 | import org.mockito.Mock; 8 | import org.mockito.junit.jupiter.MockitoExtension; 9 | import org.springframework.ai.chat.client.ChatClient; 10 | import org.springframework.ai.chat.client.advisor.api.Advisor; 11 | import ai.data.pipeline.sentiment.domains.CustomerFeedback; 12 | import ai.data.pipeline.sentiment.domains.FeedbackSentiment; 13 | 14 | import java.util.function.Consumer; 15 | 16 | import static org.assertj.core.api.Assertions.assertThat; 17 | import static org.mockito.ArgumentMatchers.any; 18 | import static org.mockito.Mockito.when; 19 | 20 | @ExtendWith(MockitoExtension.class) 21 | class CustomerFeedbackSentimentProcessorTest { 22 | 23 | private CustomerFeedbackSentimentProcessor subject; 24 | 25 | @Mock 26 | private ChatClient chatClient; 27 | 28 | private final CustomerFeedback customerFeedback = JavaBeanGeneratorCreator.of(CustomerFeedback.class).create(); 29 | @Mock 30 | private ChatClient.ChatClientRequestSpec prompt; 31 | @Mock 32 | private ChatClient.ChatClientRequestSpec user; 33 | @Mock 34 | private ChatClient.ChatClientRequestSpec userResponse; 35 | @Mock 36 | private ChatClient.CallResponseSpec callResponseSpec; 37 | 38 | @BeforeEach 39 | void setUp() { 40 | subject = new CustomerFeedbackSentimentProcessor(chatClient); 41 | } 42 | 43 | @Test 44 | void positiveSentiment() { 45 | 46 | FeedbackSentiment expected = FeedbackSentiment 47 | .builder().customerFeedback(customerFeedback).sentiment(FeedbackSentiment.Sentiment.Positive).build(); 48 | 49 | when(chatClient.prompt()).thenReturn(prompt); 50 | when(prompt.user(any(Consumer.class))).thenReturn(userResponse); 51 | when(userResponse.call()).thenReturn(callResponseSpec); 52 | when(callResponseSpec.entity(any(Class.class))).thenReturn(expected.sentiment()); 53 | 54 | var actual = subject.apply(customerFeedback); 55 | 56 | assertThat(actual).isEqualTo(expected); 57 | } 58 | } -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/src/test/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessorTest.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.sentiment.processor; 2 | 3 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator; 4 | import org.junit.jupiter.api.BeforeEach; 5 | import org.junit.jupiter.api.Test; 6 | import org.junit.jupiter.api.extension.ExtendWith; 7 | import org.mockito.Mock; 8 | import org.mockito.junit.jupiter.MockitoExtension; 9 | import org.springframework.ai.chat.client.ChatClient; 10 | import org.springframework.ai.chat.client.advisor.api.Advisor; 11 | import ai.data.pipeline.sentiment.domains.CustomerFeedback; 12 | import ai.data.pipeline.sentiment.domains.FeedbackSentiment; 13 | 14 | import java.util.function.Consumer; 15 | 16 | import static org.assertj.core.api.Assertions.assertThat; 17 | import static org.mockito.ArgumentMatchers.any; 18 | import static org.mockito.Mockito.when; 19 | 20 | @ExtendWith(MockitoExtension.class) 21 | class CustomerFeedbackSentimentProcessorTest { 22 | 23 | private CustomerFeedbackSentimentProcessor subject; 24 | 25 | @Mock 26 | private ChatClient chatClient; 27 | 28 | private final CustomerFeedback customerFeedback = JavaBeanGeneratorCreator.of(CustomerFeedback.class).create(); 29 | @Mock 30 | private ChatClient.ChatClientRequestSpec prompt; 31 | @Mock 32 | private ChatClient.ChatClientRequestSpec user; 33 | @Mock 34 | private ChatClient.CallResponseSpec callResponse; 35 | @Mock 36 | private Advisor advisor; 37 | 38 | @Mock 39 | private ChatClient.ChatClientRequestSpec advisors; 40 | 41 | @BeforeEach 42 | void setUp() { 43 | subject = new CustomerFeedbackSentimentProcessor(chatClient,advisor); 44 | } 45 | 46 | @Test 47 | void positiveSentiment() { 48 | 49 | FeedbackSentiment expected = FeedbackSentiment 50 | .builder().customerFeedback(customerFeedback).sentiment(FeedbackSentiment.Sentiment.Positive).build(); 51 | 52 | when(chatClient.prompt()).thenReturn(prompt); 53 | when(prompt.user(any(Consumer.class))).thenReturn(user); 54 | when(user.advisors(any(Advisor.class))).thenReturn(advisors); 55 | when(advisors.call()).thenReturn(callResponse); 56 | when(callResponse.entity(any(Class.class))).thenReturn(expected.sentiment()); 57 | 58 | var actual = subject.apply(customerFeedback); 59 | 60 | assertThat(actual).isEqualTo(expected); 61 | } 62 | } -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/src/test/java/ai/data/pipeline/spring/sink/PostgresConsumerTest.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.sink; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator; 6 | import nyla.solutions.core.util.JavaBean; 7 | import nyla.solutions.core.util.Text; 8 | import org.junit.jupiter.api.BeforeAll; 9 | import org.junit.jupiter.api.BeforeEach; 10 | import org.junit.jupiter.api.Test; 11 | import org.springframework.amqp.rabbit.test.context.SpringRabbitTest; 12 | import org.springframework.beans.factory.annotation.Autowired; 13 | import org.springframework.boot.test.context.SpringBootTest; 14 | import org.springframework.jdbc.core.JdbcTemplate; 15 | import org.springframework.test.context.junit.jupiter.SpringJUnitConfig; 16 | 17 | import static org.assertj.core.api.AssertionsForClassTypes.assertThat; 18 | 19 | @SpringBootTest 20 | @SpringJUnitConfig 21 | @SpringRabbitTest 22 | class PostgresConsumerTest { 23 | 24 | @Autowired 25 | private JdbcTemplate jdbcTemplate; 26 | @Autowired 27 | private PostgresConsumer subject; 28 | 29 | @Autowired 30 | private ObjectMapper objectMapper; 31 | 32 | private final Customer customer = JavaBeanGeneratorCreator.of(Customer.class).create(); 33 | 34 | @BeforeAll 35 | static void beforeAll() { 36 | System.setProperty( 37 | "sql.consumer.sql", 38 | "insert into customers(email,first_name) values(:email,:first_name)"); 39 | } 40 | 41 | @BeforeEach 42 | void setUp() { 43 | final String createSql = """ 44 | CREATE TABLE IF NOT EXISTS customers(email VARCHAR(255) PRIMARY KEY, first_name VARCHAR(255)); 45 | """; 46 | jdbcTemplate.execute(createSql); 47 | } 48 | 49 | @Test 50 | void accept() throws JsonProcessingException { 51 | String payload = """ 52 | { "email" : "${email}" , "first_name" : "${first_name}" } 53 | """; 54 | 55 | payload = Text.format(payload, JavaBean.toMap(customer)); 56 | subject.accept(payload); 57 | 58 | String query = """ 59 | select first_name from customers where email = ? 60 | """; 61 | 62 | var name = jdbcTemplate.queryForObject(query, String.class, customer.getEmail()); 63 | 64 | assertThat(name).isEqualTo(customer.getFirst_name()); 65 | 66 | } 67 | } -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/processor/MissingRequiredFieldsFilterProcessor.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer.processor; 2 | 3 | import lombok.RequiredArgsConstructor; 4 | import nyla.solutions.core.io.csv.CsvWriter; 5 | import org.springframework.batch.item.ItemProcessor; 6 | import org.springframework.stereotype.Component; 7 | import ai.data.pipeline.spring.customer.domain.Customer; 8 | 9 | /** 10 | * Filter customer records with missing required fields 11 | * @author Gregory Green 12 | */ 13 | @RequiredArgsConstructor 14 | @Component 15 | public class MissingRequiredFieldsFilterProcessor implements ItemProcessor { 16 | 17 | private final CsvWriter invalidRecordsCsvWriter; 18 | 19 | /** 20 | * 21 | * @param customer to be processed, never {@code null}. 22 | * @return Null if customer does not have all required fields, else return customer 23 | * @throws Exception when an error occurs 24 | */ 25 | @Override 26 | public Customer process(Customer customer) throws Exception { 27 | if(customer == null ) 28 | return null; 29 | 30 | if( 31 | customer.id() == null || customer.id().isBlank() || 32 | customer.firstName() == null || customer.firstName().isBlank() || 33 | customer.lastName() == null || customer.lastName().isBlank() || 34 | customer.contact() == null || 35 | customer.contact().phone() == null || customer.contact().phone().isBlank() || 36 | customer.contact().email() == null || customer.contact().email().isBlank()) 37 | { 38 | invalidRecordsCsvWriter.appendRow( 39 | customer.id(), 40 | customer.firstName(), 41 | customer.lastName(), 42 | customer.contact() != null ? customer.contact().phone() : "", 43 | customer.contact() != null ? customer.contact().email() : "", 44 | customer.contact() != null ? customer.contact().phone() : "", 45 | customer.location() != null ? customer.location().address() : "", 46 | customer.location() != null ? customer.location().city() : "", 47 | customer.location() != null ? customer.location().state() : "", 48 | customer.location() != null ? customer.location().zip() : ""); 49 | 50 | //return null skips this records 51 | return null; 52 | } 53 | 54 | //return valid record 55 | return customer; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /docs/04_02_Introducing PostgresML.md: -------------------------------------------------------------------------------- 1 | 2 | Start PostgresML 3 | 4 | 5 | ```shell 6 | docker run --rm \ 7 | -it \ 8 | -v postgresml_data:/var/lib/postgresql \ 9 | -p 6432:6432 \ 10 | ghcr.io/postgresml/postgresml:2.10.0 \ 11 | sudo -u postgresml psql -d postgresml 12 | ``` 13 | 14 | 15 | 16 | -------------------- 17 | 18 | 19 | Test summary in postgresML 20 | 21 | 22 | ```sql 23 | SELECT pgml.transform( task => '{ "task": "summarization", "model": "Falconsai/text_summarization"}'::JSONB, inputs => array[ 'I am really disappointed with the wait time I experienced when trying to reach Customer Service. I was on hold for over 40 minutes just to speak with someone about a simple issue with my account. It’s frustrating and honestly unacceptable. I do not have time to sit around waiting all day.'])::json->0->>'summary_text' as summary_text; 24 | ``` 25 | 26 | 27 | ```sql 28 | SELECT pgml.transform( task => '{ "task": "summarization", "model": "Falconsai/text_summarization"}'::JSONB, inputs => array[ 'I have been using this system for a while now, and I have to say, I am genuinely impressed with how well it performs. The user interface is clean and thoughtfully laid out, making navigation feel effortless even for someone new to it. Features are logically organized, and everything just works seamlessly right out of the box. What really stands out, though, is the attention to detail—the developers clearly put a lot of thought into the user experience. Whether it is the speed of execution, the minimal learning curve, or the helpful tooltips and documentation, everything contributes to a feeling of confidence and ease. It is rare to come across a system that feels both powerful and user-friendly, but this one hits that balance perfectly.'])::json->0->>'summary_text' as summary_text; 29 | ``` 30 | 31 | 32 | 33 | 34 | Text Classification 35 | 36 | 37 | ```shell 38 | SELECT pgml.transform( task => 'text-classification', inputs => ARRAY['I was on hold for over 40 minutes just to speak with someone about a simple issue with my account . I do not have time to sit around waiting all day.']) AS positivity; 39 | ``` 40 | 41 | 42 | ```shell 43 | SELECT pgml.transform( task => 'text-classification', inputs => ARRAY['the user interface is clean and thoughtfully laid out, making navigation feel effortless even for someone new to it . The developers clearly put a lot of thought into the user experience . It is rare to come across a system that feels both powerful and user-friendly, but this one hits that balance perfectly.']) AS positivity; 44 | ``` 45 | 46 | 47 | ```shell 48 | SELECT pgml.transform( task => 'text-classification', inputs => ARRAY['I love building linked Learning courses with my producer Dione!!!']) AS positivity; 49 | ``` 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/test/java/ai/data/pipeline/spring/customer/CsvTextFileGenerator.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer; 2 | 3 | import nyla.solutions.core.io.IO; 4 | import nyla.solutions.core.io.csv.CsvWriter; 5 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator; 6 | import nyla.solutions.core.util.Config; 7 | import ai.data.pipeline.spring.customer.domain.Customer; 8 | 9 | import java.io.IOException; 10 | import java.nio.file.Paths; 11 | 12 | /** 13 | * Generate random customer records in CSV 14 | * 15 | * @author Gregory Green 16 | */ 17 | public class CsvTextFileGenerator { 18 | 19 | public static void main(String[] args) throws IOException { 20 | 21 | var customerCreator = JavaBeanGeneratorCreator.of(Customer.class); 22 | 23 | var settings = Config.settings(); 24 | 25 | var file = Paths.get(settings.getProperty("file","applications/batching/customer-batch/src/test/resources/sources/customers.csv")).toFile(); 26 | IO.delete(file); 27 | var csvWriter = new CsvWriter(file); 28 | 29 | //id,first_name,last_nm,email,phone,address line,city,state,zip 30 | var count = settings.getPropertyInteger("count",100); 31 | 32 | var customer = customerCreator.create(); 33 | //missing firstname and lastname and email 34 | //seconds missing phone and email 35 | csvWriter.appendRow( 36 | "", 37 | "", 38 | "", 39 | "", 40 | "", 41 | customer.location().address(), 42 | customer.location().city(), 43 | customer.location().state(), 44 | customer.location().zip() 45 | ); 46 | 47 | 48 | customer = customerCreator.create(); 49 | //seconds missing phone and email 50 | csvWriter.appendRow( 51 | "", 52 | "", 53 | "", 54 | "", 55 | "", 56 | customer.location().address(), 57 | customer.location().city(), 58 | customer.location().state(), 59 | customer.location().zip() 60 | ); 61 | 62 | //all records have all required fields 63 | for (int i = 2; i < count; i++) { 64 | customer = customerCreator.create(); 65 | csvWriter.appendRow( 66 | customer.contact().email(), 67 | customer.firstName(), 68 | customer.lastName(), 69 | customer.contact().email(), 70 | customer.contact().phone(), 71 | customer.location().address(), 72 | customer.location().city(), 73 | customer.location().state(), 74 | customer.location().zip() 75 | ); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | org.springframework.boot 7 | spring-boot-starter-parent 8 | 3.5.0 9 | 10 | 11 | ai.data.pipelines.spring 12 | customer-batch 13 | 0.0.1-SNAPSHOT 14 | customer-batch 15 | customer-batch 16 | 17 | 21 18 | 19 | 20 | 21 | org.springframework.boot 22 | spring-boot-starter-batch 23 | 24 | 25 | org.springframework.boot 26 | spring-boot-starter-data-jdbc 27 | 28 | 29 | org.springframework.boot 30 | spring-boot-starter-data-jpa 31 | 32 | 33 | com.github.nyla-solutions 34 | nyla.solutions.core 35 | 2.3.2 36 | 37 | 38 | com.h2database 39 | h2 40 | runtime 41 | 42 | 43 | org.projectlombok 44 | lombok 45 | true 46 | 47 | 48 | org.postgresql 49 | postgresql 50 | runtime 51 | 52 | 53 | org.springframework.boot 54 | spring-boot-starter-test 55 | test 56 | 57 | 58 | 59 | 60 | 61 | 62 | org.apache.maven.plugins 63 | maven-compiler-plugin 64 | 65 | 66 | 67 | org.projectlombok 68 | lombok 69 | 70 | 71 | 72 | 73 | 74 | org.springframework.boot 75 | spring-boot-maven-plugin 76 | 77 | 78 | 79 | org.projectlombok 80 | lombok 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/src/test/java/ai/data/pipeline/postgres/embedding/conversion/PayloadToDocumentTest.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.postgres.embedding.conversion; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import org.junit.jupiter.api.BeforeEach; 5 | import org.junit.jupiter.api.Test; 6 | import org.junit.jupiter.api.extension.ExtendWith; 7 | import org.mockito.Mock; 8 | import org.mockito.junit.jupiter.MockitoExtension; 9 | import org.springframework.ai.document.Document; 10 | import ai.data.pipeline.postgres.embedding.properties.EmbeddingSimilarityProperties; 11 | 12 | import java.util.UUID; 13 | 14 | import static org.assertj.core.api.Assertions.assertThat; 15 | import static org.mockito.Mockito.when; 16 | 17 | /** 18 | * 19 | * @author Gregory Green 20 | */ 21 | @ExtendWith(MockitoExtension.class) 22 | class PayloadToDocumentTest { 23 | 24 | private PayloadToDocument subject; 25 | 26 | @Mock 27 | private EmbeddingSimilarityProperties properties; 28 | private final String[] fields = {"email","phone","zip","state","city","address","lastName","firstName"}; 29 | 30 | 31 | @BeforeEach 32 | void setUp() { 33 | subject = new PayloadToDocument(properties,new ObjectMapper()); 34 | } 35 | 36 | @Test 37 | void convert() { 38 | 39 | when(properties.getDocumentTextFieldNames()).thenReturn(fields); 40 | 41 | var expectedText = "email@,555-555-5555,23232,my state,city,1 street,Smith,John"; 42 | Document expected = Document.builder().id("junit") 43 | .text(expectedText).build(); 44 | var payload = """ 45 | { 46 | "id" : "junit", 47 | "firstName": "John", 48 | "lastName": "Smith", 49 | "email": "email@", 50 | "phone": "555-555-5555", 51 | "address": "1 street", 52 | "city": "city", 53 | "state": "my state", 54 | "zip": "23232" 55 | } 56 | """; 57 | 58 | var actual = subject.convert(payload); 59 | 60 | assertThat(actual).isEqualTo(expected); 61 | } 62 | 63 | @Test 64 | void bug() { 65 | 66 | System.out.println(UUID.randomUUID()); 67 | 68 | var payload = """ 69 | { 70 | "id" : "8df15279-97a6-4b48-92f3-f78d045d9cc4", 71 | "firstName" : "Josiah", 72 | "lastName" : "Imani", 73 | "email" : "email@email", 74 | "phone" : "555-555-5555", 75 | "address" : "12 Straight St", 76 | "city" : "gold", 77 | "state" : "ny", 78 | "zip": "55555" 79 | } 80 | """; 81 | 82 | when(properties.getDocumentTextFieldNames()).thenReturn(fields); 83 | 84 | var actual=subject.convert(payload); 85 | 86 | assertThat(actual.getText()).isNotNull(); 87 | } 88 | } -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/function/EmbeddingSimilarityFunction.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.postgres.embedding.function; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import lombok.SneakyThrows; 5 | import lombok.extern.slf4j.Slf4j; 6 | import nyla.solutions.core.patterns.conversion.Converter; 7 | import org.springframework.ai.document.Document; 8 | import org.springframework.ai.vectorstore.SearchRequest; 9 | import org.springframework.ai.vectorstore.VectorStore; 10 | import org.springframework.stereotype.Component; 11 | import ai.data.pipeline.postgres.embedding.domain.SimilarDocuments; 12 | import ai.data.pipeline.postgres.embedding.properties.EmbeddingSimilarityProperties; 13 | 14 | import java.util.List; 15 | import java.util.function.Function; 16 | 17 | import static java.util.List.of; 18 | 19 | /** 20 | * 21 | * Save payload as document and search for similarities 22 | * @author Gregory Green 23 | */ 24 | @Component 25 | @Slf4j 26 | public class EmbeddingSimilarityFunction implements Function { 27 | private final VectorStore vectorStore; 28 | private final EmbeddingSimilarityProperties properties; 29 | private final ObjectMapper objectMapper = new ObjectMapper(); 30 | private final Converter converter; 31 | 32 | public EmbeddingSimilarityFunction(VectorStore vectorStore, EmbeddingSimilarityProperties properties, Converter converter) { 33 | this.vectorStore = vectorStore; 34 | this.properties = properties; 35 | this.converter = converter; 36 | } 37 | 38 | @Override 39 | public SimilarDocuments apply(String payload) { 40 | 41 | log.info("payload: {}: properties: {}", payload, properties); 42 | var payloadDocument = converter.convert(payload); 43 | 44 | log.info("payloadDocument: {}", payloadDocument); 45 | 46 | var customerInfo = payloadDocument.getText(); 47 | if (customerInfo == null) 48 | return null; 49 | 50 | log.info("Saving into vector store"); 51 | vectorStore.add(of(payloadDocument)); 52 | 53 | 54 | var criteria = SearchRequest.builder().query(payload) 55 | .topK(properties.getTopK()) 56 | .query(customerInfo) 57 | .similarityThreshold(properties.getSimilarityThreshold()) 58 | .build(); 59 | 60 | log.info("Searching criteria: {}",criteria); 61 | 62 | var similarities = vectorStore.similaritySearch(criteria); 63 | 64 | log.info("similarities: {}", similarities); 65 | 66 | var similarDocuments = toSimilarDocuments(payloadDocument, similarities); 67 | 68 | log.info("Returning similarDocuments: {}", similarDocuments); 69 | 70 | return similarDocuments; 71 | } 72 | 73 | /** 74 | *Build SimilarDocuments based on the similarity document results 75 | * @param payloadDocument the payloadDocument 76 | * @param similarities the list of similar documents 77 | * @return the results 78 | */ 79 | @SneakyThrows 80 | SimilarDocuments toSimilarDocuments(Document payloadDocument, List similarities){ 81 | 82 | if(similarities == null || similarities.isEmpty()) 83 | return null; 84 | 85 | List filtered = similarities.stream() 86 | .filter(resultDoc -> !resultDoc.getId().equals(payloadDocument.getId())) 87 | .toList(); 88 | 89 | String filteredPayload = objectMapper.writeValueAsString(filtered); 90 | return !filtered.isEmpty() ? 91 | SimilarDocuments.builder().id(payloadDocument.getId()).similaritiesPayload(filteredPayload).build() 92 | : null; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/src/test/java/ai/data/pipeline/postgres/embedding/function/EmbeddingSimilarityFunctionTest.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.postgres.embedding.function; 2 | 3 | import nyla.solutions.core.patterns.conversion.Converter; 4 | import org.junit.jupiter.api.BeforeEach; 5 | import org.junit.jupiter.api.Test; 6 | import org.junit.jupiter.api.extension.ExtendWith; 7 | import org.mockito.Mock; 8 | import org.mockito.junit.jupiter.MockitoExtension; 9 | import org.springframework.ai.document.Document; 10 | import org.springframework.ai.vectorstore.SearchRequest; 11 | import org.springframework.ai.vectorstore.VectorStore; 12 | import ai.data.pipeline.postgres.embedding.domain.SimilarDocuments; 13 | import ai.data.pipeline.postgres.embedding.properties.EmbeddingSimilarityProperties; 14 | 15 | import java.util.Collections; 16 | import java.util.List; 17 | 18 | import static org.assertj.core.api.Assertions.assertThat; 19 | import static org.mockito.ArgumentMatchers.any; 20 | import static org.mockito.Mockito.verify; 21 | import static org.mockito.Mockito.when; 22 | 23 | @ExtendWith(MockitoExtension.class) 24 | class EmbeddingSimilarityFunctionTest { 25 | 26 | private static final String expectedId = "001"; 27 | private static final String payload = """ 28 | { "id" : "001"} 29 | """; 30 | 31 | @Mock 32 | private VectorStore vectorStore; 33 | 34 | @Mock 35 | private Converter converter; 36 | 37 | private EmbeddingSimilarityFunction subject; 38 | 39 | private static final EmbeddingSimilarityProperties properties = EmbeddingSimilarityProperties.builder() 40 | .topK(4) 41 | .similarityThreshold(0.95) 42 | .build(); 43 | @Mock 44 | private Document document; 45 | private final String text = "Expected"; 46 | private String id = "Expected Id"; 47 | @Mock 48 | private Document resultDocument; 49 | 50 | @BeforeEach 51 | void setUp() { 52 | subject = new EmbeddingSimilarityFunction(vectorStore,properties,converter); 53 | } 54 | 55 | @Test 56 | void accept() { 57 | when(converter.convert(any())).thenReturn(document); 58 | when(document.getText()).thenReturn(text); 59 | when(document.getId()).thenReturn(id); 60 | when(vectorStore.similaritySearch(any(SearchRequest.class))).thenReturn(List.of(resultDocument)); 61 | when(resultDocument.getId()).thenReturn("Different Id"); 62 | 63 | SimilarDocuments actual = subject.apply(payload); 64 | 65 | 66 | assertThat(actual).isNotNull(); 67 | } 68 | 69 | @Test 70 | void returnNull() { 71 | when(converter.convert(any())).thenReturn(document); 72 | when(document.getText()).thenReturn(text); 73 | when(vectorStore.similaritySearch(any(SearchRequest.class))).thenReturn(null); 74 | 75 | assertThat(subject.apply(payload)).isNull(); 76 | 77 | } 78 | 79 | @Test 80 | void returnEmptyListNull() { 81 | when(converter.convert(any())).thenReturn(document); 82 | when(document.getText()).thenReturn(text); 83 | when(vectorStore.similaritySearch(any(SearchRequest.class))).thenReturn(Collections.emptyList()); 84 | 85 | assertThat(subject.apply(payload)).isNull(); 86 | 87 | } 88 | 89 | @Test 90 | void removeMatchingDocument() { 91 | 92 | 93 | when(converter.convert(any())).thenReturn(document); 94 | when(document.getId()).thenReturn(id); 95 | 96 | when(document.getText()).thenReturn(text); 97 | when(vectorStore.similaritySearch(any(SearchRequest.class))).thenReturn(List.of(document)); 98 | 99 | assertThat(subject.apply(payload)).isNull(); 100 | } 101 | } -------------------------------------------------------------------------------- /applications/processors/postgres-query-processor/src/test/java/ai/data/pipeline/spring/postgres/query/processors/QueryFunctionProcessorTest.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.postgres.query.processors; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import org.junit.jupiter.api.BeforeAll; 6 | import org.junit.jupiter.api.BeforeEach; 7 | import org.junit.jupiter.api.Test; 8 | import org.springframework.amqp.rabbit.test.context.SpringRabbitTest; 9 | import org.springframework.beans.factory.annotation.Autowired; 10 | import org.springframework.boot.test.context.SpringBootTest; 11 | import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; 12 | import org.springframework.test.context.junit.jupiter.SpringJUnitConfig; 13 | import org.springframework.util.LinkedCaseInsensitiveMap; 14 | import ai.data.pipeline.spring.postgres.query.properties.QueryProperties; 15 | 16 | import javax.sql.DataSource; 17 | 18 | import static org.assertj.core.api.Assertions.assertThat; 19 | import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; 20 | 21 | /** 22 | * @author Gregory Green 23 | */ 24 | @SpringBootTest 25 | @SpringJUnitConfig 26 | @SpringRabbitTest 27 | class QueryFunctionProcessorTest { 28 | 29 | @Autowired 30 | private QueryFunctionProcessor subject; 31 | 32 | @Autowired 33 | private ObjectMapper objectMapper; 34 | @Autowired 35 | private DataSource dataSource; 36 | @Autowired 37 | private NamedParameterJdbcTemplate namedParameterJdbcTemplate; 38 | 39 | 40 | @BeforeAll 41 | static void beforeAll() { 42 | System.setProperty( 43 | "query.sql", 44 | "select :firstName"); 45 | } 46 | 47 | @BeforeEach 48 | void setUp() { 49 | } 50 | 51 | @Test 52 | void accept() throws JsonProcessingException { 53 | var expected = """ 54 | {"HELLO":"world"} 55 | """; 56 | 57 | var payload = """ 58 | { "email" : "${email}" , "firstName" : "${firstName}" } 59 | """; 60 | 61 | var actual = subject.apply(payload); 62 | 63 | assertThat(actual).isNotNull(); 64 | assertThat(actual.trim()).isEqualTo(expected.trim()); 65 | 66 | } 67 | 68 | @Test 69 | void validJson() { 70 | 71 | 72 | var sql = """ 73 | select :firstName as firstName, 74 | :lastName as lastName 75 | """; 76 | var payload = """ 77 | { 78 | "firstName": "John", 79 | "lastName": "Smith" 80 | } 81 | """; 82 | var queryProperties = new QueryProperties(sql); 83 | 84 | subject = new QueryFunctionProcessor(namedParameterJdbcTemplate,objectMapper,queryProperties); 85 | 86 | var actual = subject.apply(payload); 87 | 88 | assertDoesNotThrow( () -> objectMapper.readTree(actual)); 89 | 90 | } 91 | 92 | @Test 93 | void mapToJson() throws JsonProcessingException { 94 | 95 | var feedback = """ 96 | "Paris is the capital and most populous city of France, with an estimated population of 2,175,601 residents as of 2018, in an area of more than 105 square kilometres (41 square miles). The City of Paris is the centre and seat of government of the region and province of Île-de-France, or Paris Region, which has an estimated population of 12,174,880, or about 18 percent of the population of France as of 2017., summary="The City of Paris is the centre and seat of government of the region and province of le-de-France, or Paris Region, which has an estimated population of 12,174,880, or about 18 percent of the population of France as of 2017." 97 | """; 98 | 99 | var map= new LinkedCaseInsensitiveMap(); 100 | 101 | 102 | map.put("id", "F001"); 103 | map.put("email", "jmatthews@email"); 104 | map.put("feedback", feedback); 105 | 106 | var actual = objectMapper.writeValueAsString(map); 107 | 108 | System.out.printf(actual); 109 | 110 | } 111 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI Data Pipelines with Spring 2 | This is the repository for the LinkedIn Learning course `AI Data Pipeline with Spring`. The full course is available from [LinkedIn Learning][lil-course-url]. 3 | 4 | ![lil-thumbnail-url] 5 | 6 | ## Course Description 7 | 8 | The lack of data integration is a common blocker preventing organizations from unlocking the power of artificial intelligence. Spring is a popular collection of projects that simplifies data integration and artificial intelligence development for Java applications. In this course, learn how Spring can be used to build data pipelines. Discover how Spring AI makes it easy to use AI models using the Java programming language. Understand how to build data pipelines with Spring Batch and Spring Cloud Streams with RabbitMQ Learn how features within Postgres enable vector databases and machine learning for AI workloads. 9 | 10 | 11 | ## Instructions 12 | 13 | The source code for the live demonstrations are available here on the source Github repo. You’re not required to review or build demo applications. The source code is available as a reference implementation so feel free to explore. 14 | 15 | 16 | ## Installing 17 | 18 | 19 | The details to run the demonstrations can be found under the docs folder. 20 | 21 | - [02_02_Spring Batch Data Pipeline with Postgres.md](docs/02_02_Spring%20Batch%20Data%20Pipeline%20with%20Postgres.md) 22 | - [03_02_API Spring Cloud Stream Source with RabbitMQ.md](docs/03_02_API%20Spring%20Cloud%20Stream%20Source%20with%20RabbitMQ.md) 23 | - [03_03_Building API Data Pipeline Postgres Sink.md](docs/03_03_Building%20API%20Data%20Pipeline%20Postgres%20Sink.md) 24 | - [03_04_Building API Data Pipeline Postgres Processor.md](docs/03_04_Building%20API%20Data%20Pipeline%20Postgres%20Processor.md) 25 | - [04_02_Introducing PostgresML.md](docs/04_02_Introducing%20PostgresML.md) 26 | - [04_03_Building a Text Summarization AI Data Pipeline.md](docs/04_03_Building%20a%20Text%20Summarization%20AI%20Data%20Pipeline.md) 27 | - [05_01_Introducing Spring AI.md](docs/05_01_Introducing%20Spring%20AI.md) 28 | - [05_02_Building a Text Sentimental Analysis AI Data Pipeline.md](docs/05_02_Building%20a%20Text%20Sentimental%20Analysis%20AI%20Data%20Pipeline.md) 29 | - [05_04_Building a Text Sentiment Analysis AI Data Pipeline with RAG.md](docs/05_04_Building%20a%20Text%20Sentiment%20Analysis%20AI%20Data%20Pipeline%20with%20RAG.md) 30 | - [05_05_Building a Similarity AI Data Pipeline.md](docs/05_05_Building%20a%20Similarity%20AI%20Data%20Pipeline.md) 31 | 32 | 33 | ## Instructor 34 | 35 | Instructor name: Gregory Green` 36 | 37 | Being familiar with the Java programming language 38 | will help you get the most out of those this course. 39 | 40 | You should be comfortable with developing Java applications using the maven built tool, along with a integrated developer editors such as intellij. 41 | 42 | A having hands-on experience with Spring framework and spring boot will help you better understand some of the examples 43 | 44 | You should be comfortable with at least one relational database such as postgres. And should be comfortable with the structure, query, language, or SQL. 45 | 46 | Although it’s not required, it is useful it is useful to have experience with messaging system, such as rabbit and Q or similar solutions, in addition to a basic understanding of docker and containers. 47 | 48 | 49 | Check out my other courses on [LinkedIn Learning](https://www.linkedin.com/learning/instructors/gregory-green). 50 | 51 | 52 | - [Data Resilience with Spring and RabbitMQ Event Streaming](https://www.linkedin.com/learning/data-resilience-with-spring-and-rabbitmq-event-streaming/introduction-to-data-resiliency) 53 | - [Building High-Throughput Data Microservices](https://www.linkedin.com/learning/building-high-throughput-data-microservices/rabbitmq-streams-for-high-throughput) 54 | - [Achieving Low-Latency Data with Edge Computing](https://www.linkedin.com/learning/achieving-low-latency-data-with-edge-computing/why-is-low-latency-data-with-edge-computing-important) 55 | 56 | 57 | [lil-course-url]: https://www.linkedin.com/learning/ai-data-pipelines-with-spring 58 | [lil-thumbnail-url]: https://media.licdn.com/dms/image/v2/D4E0DAQGio63WPKbMnQ/learning-public-crop_675_1200/B4EZkQl6HzHEAY-/0/1756919997682?e=2147483647&v=beta&t=ZEaakiNHX7JUx5-DF201T53bjSragjmGN8k7wEJuozA 59 | 60 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/test/java/ai/data/pipeline/spring/customer/processor/MissingRequiredFieldsFilterProcessorTest.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer.processor; 2 | 3 | import nyla.solutions.core.io.csv.CsvWriter; 4 | import nyla.solutions.core.patterns.creational.generator.JavaBeanGeneratorCreator; 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.Test; 7 | import org.junit.jupiter.api.extension.ExtendWith; 8 | import org.mockito.Mock; 9 | import org.mockito.junit.jupiter.MockitoExtension; 10 | import ai.data.pipeline.spring.customer.domain.Contact; 11 | import ai.data.pipeline.spring.customer.domain.Customer; 12 | 13 | import static org.assertj.core.api.Assertions.assertThat; 14 | import static org.mockito.ArgumentMatchers.any; 15 | import static org.mockito.Mockito.never; 16 | import static org.mockito.Mockito.verify; 17 | 18 | @ExtendWith(MockitoExtension.class) 19 | class MissingRequiredFieldsFilterProcessorTest { 20 | 21 | private MissingRequiredFieldsFilterProcessor subject; 22 | private final Contact contact = JavaBeanGeneratorCreator.of(Contact.class).create(); 23 | @Mock 24 | private CsvWriter csvWriter; 25 | private final static String id = "id"; 26 | private final static String firstName = "fn"; 27 | private final static String lastName = "ln"; 28 | private final static String email = "email"; 29 | private final static String phone = "phone"; 30 | 31 | @BeforeEach 32 | void setUp() { 33 | subject = new MissingRequiredFieldsFilterProcessor(csvWriter); 34 | } 35 | 36 | @Test 37 | void savedValidCustomer() throws Exception { 38 | var customer = Customer.builder().id(id) 39 | .firstName(firstName).lastName(lastName) 40 | .contact(Contact.builder().email(email).phone(phone).build()).build(); 41 | 42 | var actual = subject.process(customer); 43 | verify(csvWriter,never()).appendRow(any(String[].class)); 44 | 45 | assertThat(actual).isEqualTo(customer); 46 | } 47 | 48 | @Test 49 | void firstNameRequired() throws Exception { 50 | 51 | var customer = Customer.builder().id(id) 52 | .lastName(lastName) 53 | .contact(Contact.builder().email(email).phone(phone).build()).build(); 54 | 55 | var actual = subject.process(customer); 56 | verify(csvWriter).appendRow(any(String[].class)); 57 | 58 | assertThat(actual).isNull(); 59 | } 60 | 61 | @Test 62 | void lastNameRequired() throws Exception { 63 | 64 | var customer = Customer.builder().id(id) 65 | .firstName(firstName) 66 | .contact(Contact.builder().email(email).phone(phone).build()).build(); 67 | 68 | var actual = subject.process(customer); 69 | verify(csvWriter).appendRow(any(String[].class)); 70 | 71 | assertThat(actual).isNull(); 72 | } 73 | 74 | 75 | 76 | 77 | @Test 78 | void idRequired() throws Exception { 79 | 80 | var customer = Customer.builder() 81 | .firstName(firstName).lastName(lastName) 82 | .contact(Contact.builder().email(email).phone(phone).build()).build(); 83 | 84 | var actual = subject.process(customer); 85 | verify(csvWriter).appendRow(any(String[].class)); 86 | 87 | assertThat(actual).isNull(); 88 | } 89 | 90 | @Test 91 | void emailRequired() throws Exception { 92 | 93 | var customer = Customer.builder() 94 | .firstName(firstName).lastName(lastName) 95 | .contact(Contact.builder() 96 | .phone(phone).build()).build(); 97 | 98 | var actual = subject.process(customer); 99 | verify(csvWriter).appendRow(any(String[].class)); 100 | 101 | assertThat(actual).isNull(); 102 | } 103 | 104 | @Test 105 | void phoneRequired() throws Exception { 106 | 107 | var customer = Customer.builder().id(id) 108 | .firstName(firstName).lastName(lastName) 109 | .contact(Contact.builder().email(email) 110 | .build()).build(); 111 | 112 | var actual = subject.process(customer); 113 | verify(csvWriter).appendRow(any(String[].class)); 114 | 115 | assertThat(actual).isNull(); 116 | } 117 | } -------------------------------------------------------------------------------- /applications/sinks/postgres-sink/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | org.springframework.boot 7 | spring-boot-starter-parent 8 | 3.5.0 9 | 10 | 11 | ai.data.pipelines.spring 12 | postgres-sink 13 | 0.0.1-SNAPSHOT 14 | postgres-sink 15 | postgres-sink 16 | 17 | 21 18 | 2025.0.0 19 | 20 | 21 | 22 | org.projectlombok 23 | lombok 24 | true 25 | 26 | 27 | org.springframework.boot 28 | spring-boot-configuration-processor 29 | true 30 | 31 | 32 | org.springframework.boot 33 | spring-boot-starter-actuator 34 | 35 | 36 | org.springframework.boot 37 | spring-boot-starter-amqp 38 | 39 | 40 | org.springframework.boot 41 | spring-boot-starter-data-jdbc 42 | 43 | 44 | org.springframework.boot 45 | spring-boot-starter-data-jpa 46 | 47 | 48 | org.springframework.amqp 49 | spring-rabbit-stream 50 | 51 | 52 | org.springframework.cloud 53 | spring-cloud-stream 54 | 55 | 56 | org.springframework.cloud 57 | spring-cloud-stream-binder-rabbit 58 | 59 | 60 | 61 | com.h2database 62 | h2 63 | test 64 | 65 | 66 | com.github.nyla-solutions 67 | nyla.solutions.core 68 | 2.2.3 69 | 70 | 71 | 72 | org.postgresql 73 | postgresql 74 | runtime 75 | 76 | 77 | 78 | org.springframework.boot 79 | spring-boot-starter-test 80 | test 81 | 82 | 83 | 84 | org.springframework.amqp 85 | spring-rabbit-test 86 | test 87 | 88 | 89 | org.springframework.cloud 90 | spring-cloud-stream-test-binder 91 | test 92 | 93 | 94 | 95 | 96 | 97 | org.springframework.cloud 98 | spring-cloud-dependencies 99 | ${spring-cloud.version} 100 | pom 101 | import 102 | 103 | 104 | 105 | 106 | 107 | 108 | org.apache.maven.plugins 109 | maven-compiler-plugin 110 | 111 | 112 | 113 | org.projectlombok 114 | lombok 115 | 116 | 117 | 118 | 119 | 120 | org.springframework.boot 121 | spring-boot-maven-plugin 122 | 123 | 124 | 125 | org.projectlombok 126 | lombok 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | org.springframework.boot 7 | spring-boot-starter-parent 8 | 3.5.3 9 | 10 | 11 | ai.data.pipelines.spring 12 | ai-sentiment-processor 13 | 0.0.1-SNAPSHOT 14 | ai-sentiment-processor 15 | ai-sentiment-processor 16 | 17 | 21 18 | 1.0.0 19 | 2025.0.0 20 | 21 21 | 21 22 | 23 | 24 | 25 | org.projectlombok 26 | lombok 27 | true 28 | 29 | 30 | org.springframework.boot 31 | spring-boot-configuration-processor 32 | true 33 | 34 | 35 | org.springframework.boot 36 | spring-boot-starter-actuator 37 | 38 | 39 | org.springframework.boot 40 | spring-boot-starter-amqp 41 | 42 | 43 | 44 | 45 | 46 | org.springframework.ai 47 | spring-ai-starter-model-ollama 48 | 49 | 50 | 51 | org.springframework.amqp 52 | spring-rabbit-stream 53 | 54 | 55 | org.springframework.cloud 56 | spring-cloud-stream 57 | 58 | 59 | org.springframework.cloud 60 | spring-cloud-stream-binder-rabbit 61 | 62 | 63 | 64 | com.github.nyla-solutions 65 | nyla.solutions.core 66 | 2.3.1 67 | 68 | 69 | org.springframework.boot 70 | spring-boot-starter-test 71 | test 72 | 73 | 74 | org.springframework.amqp 75 | spring-rabbit-test 76 | test 77 | 78 | 79 | org.springframework.cloud 80 | spring-cloud-stream-test-binder 81 | test 82 | 83 | 84 | 85 | 86 | 87 | org.springframework.cloud 88 | spring-cloud-dependencies 89 | ${spring-cloud.version} 90 | pom 91 | import 92 | 93 | 94 | org.springframework.ai 95 | spring-ai-bom 96 | ${spring-ai.version} 97 | pom 98 | import 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | org.apache.maven.plugins 107 | maven-compiler-plugin 108 | 109 | 110 | 111 | org.projectlombok 112 | lombok 113 | 114 | 115 | 116 | 117 | 118 | org.springframework.boot 119 | spring-boot-maven-plugin 120 | 121 | 122 | 123 | org.projectlombok 124 | lombok 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | org.springframework.boot 7 | spring-boot-starter-parent 8 | 3.5.0 9 | 10 | 11 | ai.data.pipelines.spring 12 | postgres-embedding-similarity-processor 13 | 0.0.1-SNAPSHOT 14 | postgres-embedding-similarity-processor 15 | postgres-embedding-similarity-processor 16 | 17 | 21 18 | 1.0.0 19 | 2025.0.0 20 | 21 21 | 21 22 | 23 | 24 | 25 | 26 | org.projectlombok 27 | lombok 28 | true 29 | 30 | 31 | org.springframework.boot 32 | spring-boot-configuration-processor 33 | true 34 | 35 | 36 | org.springframework.boot 37 | spring-boot-starter-actuator 38 | 39 | 40 | org.springframework.boot 41 | spring-boot-starter-amqp 42 | 43 | 44 | org.springframework.ai 45 | spring-ai-starter-model-postgresml-embedding 46 | 47 | 48 | 49 | 50 | org.springframework.ai 51 | spring-ai-starter-vector-store-pgvector 52 | 53 | 54 | org.springframework.amqp 55 | spring-rabbit-stream 56 | 57 | 58 | org.springframework.cloud 59 | spring-cloud-stream 60 | 61 | 62 | org.springframework.cloud 63 | spring-cloud-stream-binder-rabbit 64 | 65 | 66 | 67 | com.github.nyla-solutions 68 | nyla.solutions.core 69 | 2.3.1 70 | 71 | 72 | org.springframework.boot 73 | spring-boot-starter-test 74 | test 75 | 76 | 77 | org.springframework.amqp 78 | spring-rabbit-test 79 | test 80 | 81 | 82 | org.springframework.cloud 83 | spring-cloud-stream-test-binder 84 | test 85 | 86 | 87 | 88 | 89 | 90 | org.springframework.cloud 91 | spring-cloud-dependencies 92 | ${spring-cloud.version} 93 | pom 94 | import 95 | 96 | 97 | org.springframework.ai 98 | spring-ai-bom 99 | ${spring-ai.version} 100 | pom 101 | import 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | org.apache.maven.plugins 110 | maven-compiler-plugin 111 | 112 | 113 | 114 | org.projectlombok 115 | lombok 116 | 117 | 118 | 119 | 120 | 121 | org.springframework.boot 122 | spring-boot-maven-plugin 123 | 124 | 125 | 126 | org.projectlombok 127 | lombok 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /docs/BONUS_Building a Text Sentimental Analysis AI Data Pipeline with PostgresML.md: -------------------------------------------------------------------------------- 1 | 2 | - Run RabbitMQ 3 | ```shell 4 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management 5 | ``` 6 | 7 | Run Postgres 8 | 9 | ```shell 10 | ```shell 11 | docker run --name postgres --network data-pipelines --rm \ 12 | -e POSTGRES_USER=postgres \ 13 | -e POSTGRES_PASSWORD=postgres \ 14 | -e POSTGRES_DB=postgres \ 15 | -p 5432:5432 \ 16 | -it postgres 17 | ``` 18 | 19 | 20 | 21 | 22 | psql 23 | 24 | ```shell 25 | docker exec -it postgres psql -U postgres 26 | ``` 27 | 28 | ```shell 29 | create schema if not exists customer ; 30 | 31 | create table customer.feedback( 32 | feed_id text NOT NULL, 33 | email text NOT NULL, 34 | user_feedback text NOT NULL, 35 | summary text NOT NULL, 36 | feedback_dt timestamp NOT NULL DEFAULT NOW(), 37 | sentiment smallint NOT NULL, 38 | score numeric NOT NULL, 39 | PRIMARY KEY (feed_id) 40 | ); 41 | ``` 42 | 43 | 44 | Run PostgresML 45 | 46 | ```shell 47 | docker run --rm --name postgresml \ 48 | -it \ 49 | --network data-pipelines \ 50 | -v postgresml_data:/var/lib/postgresql \ 51 | -p 6432:5432 \ 52 | -p 8000:8000 \ 53 | ghcr.io/postgresml/postgresml:2.10.0 \ 54 | sudo -u postgresml psql -d postgresml 55 | ``` 56 | 57 | 58 | 59 | 60 | ::json->>'summary_text' 61 | 62 | select pg_typeof(results::json) 63 | 64 | ```shell 65 | SELECT 66 | positivity::json->0->>'label' as label, 67 | positivity::json->0->>'score' as score, 68 | (CASE 69 | WHEN positivity::json->0->>'label' = 'NEGATIVE' THEN -1 70 | WHEN positivity::json->0->>'label' = 'POSITIVE' THEN 1 71 | ELSE 72 | 0 73 | END) as sentiment 74 | from (SELECT pgml.transform( 75 | task => 'text-classification', 76 | inputs => ARRAY[ 77 | 'Why is the wait SO LONG!' ] 78 | ) as positivity) text_classification; 79 | ``` 80 | 81 | 82 | --------------------------- 83 | 84 | 85 | Start Http 86 | 87 | 88 | ```shell 89 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=feedback --server.port=8094 --spring.cloud.stream.bindings.output.destination=customers.input.feedback 90 | ``` 91 | 92 | 93 | Start Processor Text Summary 94 | 95 | ```shell 96 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.input.feedback --spring.cloud.stream.bindings.output.destination=customers.output.feedback.summary --spring.config.import=optional:file://$PWD/applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor 97 | ``` 98 | Start Processor Text sentiment 99 | 100 | ```shell 101 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.output.feedback.summary --spring.cloud.stream.bindings.output.destination=customers.output.feedback.sentiment --spring.config.import=optional:file://$PWD/applications/processors/postgres-query-processor/src/main/resources/sentiment-analysis.yml --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor 102 | ``` 103 | 104 | 105 | 106 | Start Sink 107 | 108 | 109 | ```shell 110 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.cloud.stream.bindings.input.destination=customers.output.feedback.sentiment --spring.config.import=optional:file://$PWD/applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis.yml --spring.cloud.stream.bindings.input.group=postgres-sink 111 | ``` 112 | 113 | 114 | ```shell 115 | curl -X 'POST' \ 116 | 'http://localhost:8094/feedback' \ 117 | -H 'accept: */*' \ 118 | -H 'Content-Type: application/json' \ 119 | -d '{ 120 | "id" : "F001", 121 | "email" : "jmatthews@email", 122 | "feedback" : "Hello my name is John Smith. I am long time customer. It seems that every time I call the help desk there is a very long wait. Then when I following get someone on the line, I have the repeat to repeat the process of the provide the details. This is very disappointing." 123 | }' 124 | ``` 125 | 126 | 127 | In psql 128 | 129 | ```sql 130 | select * from customer.feedback; 131 | 132 | ``` -------------------------------------------------------------------------------- /docs/03_03_Building API Data Pipeline Postgres Sink.md: -------------------------------------------------------------------------------- 1 | # Prequisite 2 | 3 | ```shell 4 | docker network create data-pipelines 5 | ``` 6 | 7 | - Run RabbitMQ 8 | ```shell 9 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management 10 | ``` 11 | 12 | Run the Http Source 13 | ```shell 14 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=customers --server.port=8080 --spring.cloud.stream.bindings.output.destination=customers.intake 15 | ``` 16 | 17 | Start Postgres 18 | 19 | ```shell 20 | docker run --name postgres --network data-pipelines --rm \ 21 | -e POSTGRES_USER=postgres \ 22 | -e POSTGRES_PASSWORD=postgres \ 23 | -e POSTGRES_DB=postgres \ 24 | -p 5432:5432 \ 25 | -it postgres 26 | ``` 27 | 28 | 29 | psql 30 | 31 | ```shell 32 | docker exec -it postgres psql -U postgres 33 | ``` 34 | 35 | ```sql 36 | create schema customer; 37 | 38 | create table customer.customers( 39 | first_nm text NOT NULL, 40 | last_nm text NOT NULL, 41 | email text NOT NULL, 42 | phone text , 43 | address text NOT NULL, 44 | city text , 45 | state text , 46 | zip text NOT NULL, 47 | PRIMARY KEY (email) 48 | ); 49 | ``` 50 | 51 | Build application 52 | 53 | ```shell 54 | mvn package 55 | ``` 56 | 57 | See 58 | 59 | [PostgresConsumer.java](../applications/sinks/postgres-sink/src/main/java/ai/data/pipeline/spring/sink/PostgresConsumer.java) 60 | 61 | ```shell 62 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --sql.consumer.sql="insert into customer.customers(email,first_nm,last_nm,phone,address,city,state,zip) values (:email,:firstName,:lastName,:phone, :address,:city,:state,:zip) on CONFLICT (email) DO UPDATE SET first_nm = :firstName, last_nm = :lastName, phone = :phone, address = :address, city = :city, state = :state, zip = :zip" --spring.cloud.stream.bindings.input.destination=customers.intake 63 | ``` 64 | 65 | 66 | 67 | ## Testing 68 | 69 | 70 | ```shell 71 | curl -X 'POST' \ 72 | 'http://localhost:8080/customers' \ 73 | -H 'accept: */*' \ 74 | -H 'Content-Type: application/json' \ 75 | -d '{ 76 | "email" : "email@email", 77 | "firstName" : "Josiah", 78 | "lastName" : "Imani", 79 | "phone" : "555-555-5555", 80 | "address" : "12 Straight St", 81 | "city" : "gold", 82 | "state": "ny", 83 | "zip": "55555" 84 | }' 85 | ``` 86 | 87 | ```json 88 | { 89 | "email" : "email@email", 90 | "firstName" : "Josiah", 91 | "lastName" : "Imani", 92 | "phone" : "555-555-5555", 93 | "address" : "12 Straight St", 94 | "city" : "gold", 95 | "state": "ny", 96 | "zip": "55555" 97 | } 98 | 99 | ``` 100 | 101 | 102 | In psql 103 | 104 | ```sql 105 | select * from customer.customers; 106 | 107 | ``` 108 | 109 | 110 | ```shell 111 | curl -X 'POST' \ 112 | 'http://localhost:8080/customers' \ 113 | -H 'accept: */*' \ 114 | -H 'Content-Type: application/json' \ 115 | -d '{ 116 | "firstName" : "Jill", 117 | "lastName" : "Smith", 118 | "email" : "jsmith@email", 119 | "phone" : "155-555-5555", 120 | "address" : "2 Straight St", 121 | "city" : "gold", 122 | "state": "ny", 123 | "zip": "55555" 124 | }' 125 | ``` 126 | ```sql 127 | select * from customer.customers; 128 | ``` 129 | 130 | 131 | Update Jill's phone 132 | 133 | ```shell 134 | curl -X 'POST' \ 135 | 'http://localhost:8080/customers' \ 136 | -H 'accept: */*' \ 137 | -H 'Content-Type: application/json' \ 138 | -d '{ 139 | "firstName" : "Jill", 140 | "lastName" : "Smith", 141 | "email" : "jsmith@email", 142 | "phone" : "222-222-2222", 143 | "address" : "2 Straight St", 144 | "city" : "gold", 145 | "state": "ny", 146 | "zip": "55555" 147 | }' 148 | ``` 149 | 150 | ```sql 151 | select * from customer.customers; 152 | ``` 153 | 154 | Add another customer Jack Smith 155 | ```shell 156 | curl -X 'POST' \ 157 | 'http://localhost:8080/customers' \ 158 | -H 'accept: */*' \ 159 | -H 'Content-Type: application/json' \ 160 | -d '{ 161 | "firstName" : "Jack", 162 | "lastName" : "Smith", 163 | "email" : "jacksmith@email", 164 | "phone" : "255-555-5555", 165 | "address" : "255 Straight St", 166 | "city" : "gold", 167 | "state": "ny", 168 | "zip": "55555" 169 | }' 170 | ``` 171 | 172 | ```sql 173 | select * from customer.customers; 174 | ``` 175 | Change Jack Smith Information: ex: address 333 Straight St" 176 | 177 | ```shell 178 | curl -X 'POST' \ 179 | 'http://localhost:8080/customers' \ 180 | -H 'accept: */*' \ 181 | -H 'Content-Type: application/json' \ 182 | -d '{ 183 | "firstName" : "Jack", 184 | "lastName" : "Smith", 185 | "email" : "jacksmith@email", 186 | "phone" : "255-555-5555", 187 | "address" : "333 Straight St", 188 | "city" : "silver", 189 | "state": "ny", 190 | "zip": "23232" 191 | }' 192 | ``` 193 | 194 | 195 | ```sql 196 | select * from customer.customers; 197 | ``` -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | org.springframework.boot 7 | spring-boot-starter-parent 8 | 3.5.3 9 | 10 | 11 | ai.data.pipelines.spring 12 | ai-sentiment-rag-processor 13 | 0.0.1-SNAPSHOT 14 | ai-sentiment-rag-processor 15 | ai-sentiment-rag-processor 16 | 17 | 21 18 | 1.0.0 19 | 2025.0.0 20 | 21 21 | 21 22 | 23 | 24 | 25 | org.projectlombok 26 | lombok 27 | true 28 | 29 | 30 | org.springframework.boot 31 | spring-boot-configuration-processor 32 | true 33 | 34 | 35 | org.springframework.boot 36 | spring-boot-starter-actuator 37 | 38 | 39 | org.springframework.boot 40 | spring-boot-starter-amqp 41 | 42 | 43 | org.springframework.ai 44 | spring-ai-starter-model-ollama 45 | 46 | 47 | 48 | org.springframework.amqp 49 | spring-rabbit-stream 50 | 51 | 52 | org.springframework.cloud 53 | spring-cloud-stream 54 | 55 | 56 | org.springframework.cloud 57 | spring-cloud-stream-binder-rabbit 58 | 59 | 60 | 61 | com.github.nyla-solutions 62 | nyla.solutions.core 63 | 2.3.1 64 | 65 | 66 | org.springframework.boot 67 | spring-boot-starter-test 68 | test 69 | 70 | 71 | org.springframework.amqp 72 | spring-rabbit-test 73 | test 74 | 75 | 76 | 77 | 78 | org.springframework.ai 79 | spring-ai-advisors-vector-store 80 | 81 | 82 | org.springframework.ai 83 | spring-ai-starter-vector-store-pgvector 84 | 85 | 86 | 87 | org.springframework.cloud 88 | spring-cloud-stream-test-binder 89 | test 90 | 91 | 92 | 93 | 94 | 95 | 96 | org.springframework.cloud 97 | spring-cloud-dependencies 98 | ${spring-cloud.version} 99 | pom 100 | import 101 | 102 | 103 | org.springframework.ai 104 | spring-ai-bom 105 | ${spring-ai.version} 106 | pom 107 | import 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | org.apache.maven.plugins 116 | maven-compiler-plugin 117 | 118 | 119 | 120 | org.projectlombok 121 | lombok 122 | 123 | 124 | 125 | 126 | 127 | org.springframework.boot 128 | spring-boot-maven-plugin 129 | 130 | 131 | 132 | org.projectlombok 133 | lombok 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /applications/processors/postgres-query-processor/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | org.springframework.boot 7 | spring-boot-starter-parent 8 | 3.4.5 9 | 10 | 11 | ai.data.pipelines.spring 12 | postgres-query-processor 13 | 0.0.1-SNAPSHOT 14 | postgres-query-processor 15 | postgres-query-processor 16 | 17 | 21 18 | 2024.0.1 19 | 21 20 | 21 21 | 22 | 23 | 24 | org.projectlombok 25 | lombok 26 | true 27 | 28 | 29 | org.springframework.boot 30 | spring-boot-configuration-processor 31 | true 32 | 33 | 34 | org.springframework.boot 35 | spring-boot-starter-actuator 36 | 37 | 38 | org.springframework.boot 39 | spring-boot-starter-amqp 40 | 41 | 42 | org.springframework.boot 43 | spring-boot-starter-data-jdbc 44 | 45 | 46 | org.springframework.boot 47 | spring-boot-starter-data-jpa 48 | 49 | 50 | org.springframework.amqp 51 | spring-rabbit-stream 52 | 53 | 54 | org.springframework.cloud 55 | spring-cloud-stream 56 | 57 | 58 | org.springframework.cloud 59 | spring-cloud-stream-binder-rabbit 60 | 61 | 62 | 63 | com.h2database 64 | h2 65 | test 66 | 67 | 68 | com.github.nyla-solutions 69 | nyla.solutions.core 70 | 2.2.4 71 | 72 | 73 | 74 | org.postgresql 75 | postgresql 76 | runtime 77 | 78 | 79 | 80 | org.springframework.boot 81 | spring-boot-starter-test 82 | test 83 | 84 | 85 | 86 | org.springframework.amqp 87 | spring-rabbit-test 88 | test 89 | 90 | 91 | org.springframework.cloud 92 | spring-cloud-stream-test-binder 93 | test 94 | 95 | 96 | 97 | 98 | 99 | org.springframework.cloud 100 | spring-cloud-dependencies 101 | ${spring-cloud.version} 102 | pom 103 | import 104 | 105 | 106 | 107 | 108 | 109 | 110 | org.apache.maven.plugins 111 | maven-compiler-plugin 112 | 113 | 114 | 115 | org.projectlombok 116 | lombok 117 | 118 | 119 | 120 | 121 | 122 | org.springframework.boot 123 | spring-boot-maven-plugin 124 | 125 | 126 | 127 | org.projectlombok 128 | lombok 129 | 130 | 131 | 132 | 133 | 134 | org.springframework.cloud 135 | spring-cloud-dataflow-apps-metadata-plugin 136 | 137 | true 138 | 139 | 140 | 141 | aggregate-metadata 142 | compile 143 | 144 | aggregate-metadata 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /docs/04_03_Building a Text Summarization AI Data Pipeline.md: -------------------------------------------------------------------------------- 1 | Run RabbitMQ 2 | 3 | ```shell 4 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management 5 | ``` 6 | 7 | Run PostgresML 8 | 9 | ```shell 10 | docker run --rm --name postgresml \ 11 | -it \ 12 | --network data-pipelines \ 13 | -v postgresml_data:/var/lib/postgresql \ 14 | -p 6432:5432 \ 15 | -p 8000:8000 \ 16 | ghcr.io/postgresml/postgresml:2.10.0 \ 17 | sudo -u postgresml psql -d postgresml 18 | ``` 19 | 20 | 21 | Run Postgres 22 | 23 | ```shell 24 | docker run --name postgresql --network data-pipelines --rm -e POSTGRES_USERNAME=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRESQL_DATABASE=postgres -p 5432:5432 postgres:latest 25 | ``` 26 | 27 | Connect to postgres 28 | ```shell 29 | docker exec -it postgresql psql -U postgres 30 | ``` 31 | 32 | 33 | ```sql 34 | 35 | create schema if not exists customer; 36 | 37 | create table customer.feedback( 38 | feed_id text NOT NULL, 39 | email text NOT NULL, 40 | user_feedback text NOT NULL, 41 | summary text NOT NULL, 42 | PRIMARY KEY (feed_id) 43 | ); 44 | ``` 45 | 46 | 47 | --------------------------- 48 | 49 | 50 | 51 | Start Http 52 | 53 | 54 | ```shell 55 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=feedback --server.port=8093 --spring.cloud.stream.bindings.output.destination=customers.input.feedback 56 | ``` 57 | 58 | 59 | 60 | 61 | Processor text summarization 62 | 63 | ```shell 64 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.password=postgres --spring.cloud.stream.bindings.input.destination=customers.input.feedback --spring.cloud.stream.bindings.output.destination=customers.output.feedback --spring.config.import=optional:file://$PWD/applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor 65 | ``` 66 | 67 | See 68 | 69 | - [text-summarization.yml](../applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml) 70 | - [QueryFunctionProcessor.java](../applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/processors/QueryFunctionProcessor.java) 71 | 72 | 73 | 74 | Start Sink 75 | 76 | 77 | ```shell 78 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.cloud.stream.bindings.input.destination=customers.output.feedback --spring.config.import=optional:file://$PWD/applications/sinks/postgres-sink/src/main/resources/postgres-text-summarization.yml --spring.cloud.stream.bindings.input.group=postgres-sink 79 | ``` 80 | See 81 | 82 | - [postgres-text-summarization.yml](../applications/sinks/postgres-sink/src/main/resources/postgres-text-summarization.yml) 83 | - [postgres-text-summarization.yml](../applications/sinks/postgres-sink/src/main/resources/postgres-text-summarization.yml) 84 | 85 | 86 | 87 | ```shell 88 | curl -X 'POST' \ 89 | 'http://localhost:8093/feedback' \ 90 | -H 'accept: */*' \ 91 | -H 'Content-Type: application/json' \ 92 | -d '{ 93 | "id" : "F001", 94 | "email" : "jmatthews@email", 95 | "feedback" : "I am really disappointed with the wait time I experienced when trying to reach Customer Service. I was on hold for over 40 minutes just to speak with someone about a simple issue with my account. It’s frustrating and honestly unacceptable. If your company values customer satisfaction, you seriously need to hire more reps or improve your response time. I do not have time to sit around waiting all day." 96 | }' 97 | ``` 98 | 99 | 100 | ```shell 101 | curl -X 'POST' \ 102 | 'http://localhost:8093/feedback' \ 103 | -H 'accept: */*' \ 104 | -H 'Content-Type: application/json' \ 105 | -d '{ 106 | "id" : "F002", 107 | "email" : "jmatthews@email", 108 | "feedback" : "I just wanted to take a moment to recognize the exceptional professionalism of your customer service team. The representative I spoke with was courteous, knowledgeable, and incredibly patient while helping me resolve my issue. It’s rare to find such a high level of service these days, and it truly made a difference in my experience. Kudos to your team!" 109 | }' 110 | ``` 111 | 112 | 113 | ```shell 114 | curl -X 'POST' \ 115 | 'http://localhost:8093/feedback' \ 116 | -H 'accept: */*' \ 117 | -H 'Content-Type: application/json' \ 118 | -d '{ 119 | "id" : "F003", 120 | "email" : "jmatthews@email", 121 | "feedback" : "I am getting really frustrated with having to repeat who I am and explain my issue every time I am transferred to another representative. It is like no one talks to each other or takes notes. I had to give my name, account number, and explain the entire problem three different times during one call. It’s exhausting and makes the whole experience feel disorganized. There has to be a better way to handle this" 122 | }' 123 | ``` 124 | 125 | 126 | In psql 127 | 128 | ```sql 129 | select feed_id,summary from customer.feedback; 130 | ``` 131 | 132 | ```sql 133 | select LENGTH(summary) sum_len, LENGTH(user_feedback) org_len from customer.feedback; 134 | ``` 135 | -------------------------------------------------------------------------------- /docs/05_02_Text Sentiment Analysis Data Pipeline with Spring AI.md: -------------------------------------------------------------------------------- 1 | Run Rabbit 2 | 3 | ```shell 4 | docker network create data-pipeline 5 | ``` 6 | 7 | start rabbitmq 8 | ```shell 9 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management 10 | ``` 11 | 12 | 13 | Run Postgres 14 | 15 | ```shell 16 | docker run --name postgres --network data-pipelines --rm \ 17 | -e POSTGRES_USER=postgres \ 18 | -e POSTGRES_PASSWORD=postgres \ 19 | -e POSTGRES_DB=postgres \ 20 | -p 5432:5432 \ 21 | -it postgres 22 | ``` 23 | 24 | ```shell 25 | docker exec -it postgres psql -U postgres 26 | ``` 27 | 28 | 29 | ```shell 30 | create schema if not exists customer ; 31 | 32 | create table customer.feedback( 33 | feed_id text NOT NULL, 34 | email text NOT NULL, 35 | user_feedback text NOT NULL, 36 | summary text NOT NULL, 37 | feedback_dt timestamp NOT NULL DEFAULT NOW(), 38 | sentiment text NOT NULL, 39 | PRIMARY KEY (feed_id) 40 | ); 41 | ``` 42 | 43 | Run PostgresML (used by the text summary processor) 44 | 45 | ```shell 46 | docker run --rm --name postgresml \ 47 | -it \ 48 | --network data-pipelines \ 49 | -v postgresml_data:/var/lib/postgresql \ 50 | -p 6432:5432 \ 51 | -p 8000:8000 \ 52 | ghcr.io/postgresml/postgresml:2.10.0 \ 53 | sudo -u postgresml psql -d postgresml 54 | ``` 55 | 56 | 57 | Start Ollama 58 | 59 | ```shell 60 | ollama serve 61 | ``` 62 | 63 | pull and run a model like this: 64 | 65 | ```shell 66 | ollama run llama3 67 | ``` 68 | 69 | Test with llama3 model with the following 70 | 71 | ```shell 72 | Analyze the sentiment of this text: "Hello my name is John Smith. I am long time customer. It seems that every time I call the help desk there is a very long wait . When I finally get someone on the line, I have the repeat the process of the provide my details.". 73 | Respond with only one word: Positive or Negative. 74 | ``` 75 | 76 | --------------------------- 77 | 78 | 79 | Start Http 80 | 81 | ```shell 82 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=feedback --server.port=8094 --spring.cloud.stream.bindings.output.destination=customers.input.feedback 83 | ``` 84 | 85 | 86 | Start Processor Text Summary 87 | 88 | ```shell 89 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.input.feedback --spring.cloud.stream.bindings.output.destination=customers.output.feedback.summary --spring.config.import=optional:file://$PWD/applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor 90 | ``` 91 | Start Sentiment Analysis Processor 92 | 93 | ```shell 94 | java -jar applications/processors/ai-sentiment-processor/target/ai-sentiment-processor-0.0.1-SNAPSHOT.jar --spring.cloud.stream.bindings.input.destination=customers.output.feedback.summary --spring.cloud.stream.bindings.output.destination=customers.output.feedback.sentiment 95 | ``` 96 | 97 | See [CustomerFeedbackSentimentProcessor.java](../applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessor.java) 98 | 99 | See [CustomerFeedback.java](../applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/domains/CustomerFeedback.java) 100 | See [FeedbackSentiment.java](../applications/processors/ai-sentiment-processor/src/main/java/ai/data/pipeline/sentiment/domains/FeedbackSentiment.java) 101 | 102 | Added support for OLLAMA 103 | [pom.xml](../applications/processors/ai-sentiment-processor/pom.xml) 104 | 105 | 106 | Start Postgres Sink 107 | 108 | 109 | ```shell 110 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.cloud.stream.bindings.input.destination=customers.output.feedback.sentiment --spring.config.import=optional:file://$PWD/applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis-ollama.yml --spring.cloud.stream.bindings.input.group=postgres-sink 111 | ``` 112 | 113 | 114 | 115 | ```shell 116 | curl -X 'POST' \ 117 | 'http://localhost:8094/feedback' \ 118 | -H 'accept: */*' \ 119 | -H 'Content-Type: application/json' \ 120 | -d '{ 121 | "id" : "F001", 122 | "email" : "jmatthews@email", 123 | "feedback" : "Hello my name is John Smith. I am long time customer. It seems that every time I call the help desk there is a very long wait. Then when I following get someone on the line, I have the repeat to repeat the process of the provide the details. This is very disappointing." 124 | }' 125 | ``` 126 | 127 | 128 | ```shell 129 | curl -X 'POST' \ 130 | 'http://localhost:8094/feedback' \ 131 | -H 'accept: */*' \ 132 | -H 'Content-Type: application/json' \ 133 | -d '{ 134 | "id" : "F002", 135 | "email" : "jmatthews@email", 136 | "feedback" : "I am really disappointed with the wait time I experienced when trying to reach Customer Service. I was on hold for over 40 minutes just to speak with someone about a simple issue with my account. It’s frustrating and honestly unacceptable. If your company values customer satisfaction, you seriously need to hire more reps or improve your response time. I do not have time to sit around waiting all day." 137 | }' 138 | ``` 139 | 140 | 141 | ```shell 142 | curl -X 'POST' \ 143 | 'http://localhost:8094/feedback' \ 144 | -H 'accept: */*' \ 145 | -H 'Content-Type: application/json' \ 146 | -d '{ 147 | "id" : "F003", 148 | "email" : "jmatthews@email", 149 | "feedback" : "I just wanted to take a moment to recognize the exceptional professionalism of your customer service team. The representative I spoke with was courteous, knowledgeable, and incredibly patient while helping me resolve my issue. It’s rare to find such a high level of service these days, and it truly made a difference in my experience. Kudos to your team!" 150 | }' 151 | ``` 152 | 153 | 154 | In psql 155 | 156 | ```sql 157 | select sentiment,summary from customer.feedback; 158 | ``` 159 | -------------------------------------------------------------------------------- /docs/03_04_Building API Data Pipeline Postgres Processor.md: -------------------------------------------------------------------------------- 1 | # Prequisite 2 | 3 | ```shell 4 | docker network create data-pipelines 5 | ``` 6 | 7 | - Run RabbitMQ 8 | ```shell 9 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management 10 | ``` 11 | 12 | Postgres 13 | ```shell 14 | docker run --name postgres --network data-pipelines --rm \ 15 | -e POSTGRES_USER=postgres \ 16 | -e POSTGRES_PASSWORD=postgres \ 17 | -e POSTGRES_DB=postgres \ 18 | -p 5432:5432 \ 19 | -it postgres 20 | ``` 21 | 22 | 23 | ```shell 24 | docker exec -it postgres psql -U postgres 25 | ``` 26 | 27 | 28 | ```sql 29 | 30 | create schema customer; 31 | 32 | create table customer.customers( 33 | first_nm text NOT NULL, 34 | last_nm text NOT NULL, 35 | email text NOT NULL, 36 | phone text , 37 | address text NOT NULL, 38 | city text , 39 | state text , 40 | zip text NOT NULL, 41 | PRIMARY KEY (email) 42 | ); 43 | 44 | 45 | create table customer.phone_campaigns( 46 | phone text NOT NULL, 47 | last_nm text NOT NULL, 48 | first_nm text NOT NULL, 49 | email text NOT NULL, 50 | PRIMARY KEY (phone) 51 | ); 52 | ``` 53 | 54 | In Psql 55 | 56 | ```shell 57 | insert into customer.phone_campaigns(phone,first_nm,last_nm, email) values('555-555-5551','John','Matthews','jmatthews@email'); 58 | insert into customer.phone_campaigns(phone,first_nm,last_nm, email) values('555-555-5552','Marcy','Love','mlove@email'); 59 | ``` 60 | 61 | 62 | 63 | ```sql 64 | SELECT (CASE WHEN LENGTH(cust.last_nm) > 0 THEN cust.last_nm ELSE pc.last_nm END) as lastname, 65 | (CASE WHEN LENGTH(cust.first_nm) > 0 THEN cust.first_nm ELSE pc.first_nm END) as firstname, 66 | (CASE WHEN LENGTH(cust.email) > 0 THEN cust.last_nm ELSE pc.email END) as email, 67 | cust.phone as phone, 68 | cust.address as address, 69 | cust.city as city, 70 | cust.state as state, 71 | cust.zip as zip 72 | FROM 73 | (select 74 | 'Entered last name' as last_nm, 75 | 'Entered first Name' as first_nm, 76 | 'Entered email' as email, 77 | '555-555-5551' as phone, 78 | 'Entered address' as address, 79 | 'Entered city' as city, 80 | 'Entered state' as state, 81 | 'Entered zip' as zip ) cust 82 | LEFT JOIN 83 | (select last_nm, first_nm, email, phone from customer.phone_campaigns) pc 84 | ON cust.phone = pc.phone; 85 | ``` 86 | 87 | 88 | From Phone Campaign 89 | 90 | ```sql 91 | SELECT (CASE WHEN LENGTH(cust.last_nm) > 0 THEN cust.last_nm ELSE pc.last_nm END) as lastname, 92 | (CASE WHEN LENGTH(cust.first_nm) > 0 THEN cust.first_nm ELSE pc.first_nm END) as firstname, 93 | (CASE WHEN LENGTH(cust.email) > 0 THEN cust.last_nm ELSE pc.email END) as email, 94 | cust.phone as phone, 95 | cust.address as address, 96 | cust.city as city, 97 | cust.state as state, 98 | cust.zip as zip 99 | FROM 100 | (select 101 | '' as last_nm, 102 | '' as first_nm, 103 | '' as email, 104 | '555-555-5551' as phone, 105 | 'Entered address' as address, 106 | 'Entered city' as city, 107 | 'Entered state' as state, 108 | 'Entered zip' as zip ) cust 109 | LEFT JOIN 110 | (select last_nm, first_nm, email, phone from customer.phone_campaigns) pc 111 | ON cust.phone = pc.phone; 112 | 113 | ``` 114 | 115 | ================================== 116 | 117 | 118 | Start Http 119 | 120 | 121 | ```shell 122 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=customers --server.port=8091 --spring.cloud.stream.bindings.output.destination=customers.input.formatting 123 | ``` 124 | 125 | 126 | Start Processor customer formatting from a marketing campaign 127 | 128 | [QueryFunctionProcessor.java](../applications/processors/postgres-query-processor/src/main/java/ai/data/pipeline/spring/postgres/query/processors/QueryFunctionProcessor.java) 129 | 130 | ```shell 131 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --query.processor.sql="select (CASE WHEN LENGTH(cust.last_nm) > 0 THEN cust.last_nm ELSE pc.last_nm END) as lastname, (CASE WHEN LENGTH(cust.first_nm) > 0 THEN cust.first_nm ELSE pc.first_nm END) as firstname, (CASE WHEN LENGTH(cust.email) > 0 THEN cust.last_nm ELSE pc.email END) as email, cust.phone as phone, cust.address as address, cust.city as city, cust.state as state, cust.zip as zip from (select :lastname as last_nm, :firstname as first_nm, :email as email, :phone as phone, :address as address, :city as city, :state as state, :zip as zip ) cust LEFT JOIN (select last_nm, first_nm, email, phone from customer.phone_campaigns) pc ON cust.phone = pc.phone" --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.input.formatting --spring.cloud.stream.bindings.output.destination=customers.output.formatting 132 | ``` 133 | 134 | Start Sink 135 | 136 | 137 | ```shell 138 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --sql.consumer.sql="insert into customer.customers(email,first_nm,last_nm,phone,address,city,state,zip) values (:email,:firstname,:lastname,:phone, :address,:city,:state,:zip) on CONFLICT (email) DO UPDATE SET first_nm = :firstname, last_nm = :lastname, phone = :phone, address = :address, city = :city, state = :state, zip = :zip" --spring.cloud.stream.bindings.input.destination=customers.output.formatting 139 | ``` 140 | 141 | ```shell 142 | curl -X 'POST' \ 143 | 'http://localhost:8091/customers' \ 144 | -H 'accept: */*' \ 145 | -H 'Content-Type: application/json' \ 146 | -d '{ 147 | "email" : "", 148 | "firstname" : "", 149 | "lastname" : "", 150 | "phone" : "555-555-5551", 151 | "address" : "55 Straight St", 152 | "city" : "Richmond", 153 | "state": "VA", 154 | "zip": "23225" 155 | }' 156 | ``` 157 | 158 | 159 | ```shell 160 | curl -X 'POST' \ 161 | 'http://localhost:8091/customers' \ 162 | -H 'accept: */*' \ 163 | -H 'Content-Type: application/json' \ 164 | -d '{ 165 | "email" : "", 166 | "firstname" : "", 167 | "lastname" : "Love", 168 | "phone" : "555-555-5552", 169 | "address" : "452 Parkside Lane", 170 | "city" : "Denver", 171 | "state": "CO", 172 | "zip": "80235" 173 | }' 174 | ``` 175 | 176 | 177 | In psql 178 | 179 | ```sql 180 | select * from customer.customers; 181 | 182 | ``` 183 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | LinkedIn Learning Exercise Files License Agreement 2 | ================================================== 3 | 4 | This License Agreement (the "Agreement") is a binding legal agreement 5 | between you (as an individual or entity, as applicable) and LinkedIn 6 | Corporation (“LinkedIn”). By downloading or using the LinkedIn Learning 7 | exercise files in this repository (“Licensed Materials”), you agree to 8 | be bound by the terms of this Agreement. If you do not agree to these 9 | terms, do not download or use the Licensed Materials. 10 | 11 | 1. License. 12 | - a. Subject to the terms of this Agreement, LinkedIn hereby grants LinkedIn 13 | members during their LinkedIn Learning subscription a non-exclusive, 14 | non-transferable copyright license, for internal use only, to 1) make a 15 | reasonable number of copies of the Licensed Materials, and 2) make 16 | derivative works of the Licensed Materials for the sole purpose of 17 | practicing skills taught in LinkedIn Learning courses. 18 | - b. Distribution. Unless otherwise noted in the Licensed Materials, subject 19 | to the terms of this Agreement, LinkedIn hereby grants LinkedIn members 20 | with a LinkedIn Learning subscription a non-exclusive, non-transferable 21 | copyright license to distribute the Licensed Materials, except the 22 | Licensed Materials may not be included in any product or service (or 23 | otherwise used) to instruct or educate others. 24 | 25 | 2. Restrictions and Intellectual Property. 26 | - a. You may not to use, modify, copy, make derivative works of, publish, 27 | distribute, rent, lease, sell, sublicense, assign or otherwise transfer the 28 | Licensed Materials, except as expressly set forth above in Section 1. 29 | - b. Linkedin (and its licensors) retains its intellectual property rights 30 | in the Licensed Materials. Except as expressly set forth in Section 1, 31 | LinkedIn grants no licenses. 32 | - c. You indemnify LinkedIn and its licensors and affiliates for i) any 33 | alleged infringement or misappropriation of any intellectual property rights 34 | of any third party based on modifications you make to the Licensed Materials, 35 | ii) any claims arising from your use or distribution of all or part of the 36 | Licensed Materials and iii) a breach of this Agreement. You will defend, hold 37 | harmless, and indemnify LinkedIn and its affiliates (and our and their 38 | respective employees, shareholders, and directors) from any claim or action 39 | brought by a third party, including all damages, liabilities, costs and 40 | expenses, including reasonable attorneys’ fees, to the extent resulting from, 41 | alleged to have resulted from, or in connection with: (a) your breach of your 42 | obligations herein; or (b) your use or distribution of any Licensed Materials. 43 | 44 | 3. Open source. This code may include open source software, which may be 45 | subject to other license terms as provided in the files. 46 | 47 | 4. Warranty Disclaimer. LINKEDIN PROVIDES THE LICENSED MATERIALS ON AN “AS IS” 48 | AND “AS AVAILABLE” BASIS. LINKEDIN MAKES NO REPRESENTATION OR WARRANTY, 49 | WHETHER EXPRESS OR IMPLIED, ABOUT THE LICENSED MATERIALS, INCLUDING ANY 50 | REPRESENTATION THAT THE LICENSED MATERIALS WILL BE FREE OF ERRORS, BUGS OR 51 | INTERRUPTIONS, OR THAT THE LICENSED MATERIALS ARE ACCURATE, COMPLETE OR 52 | OTHERWISE VALID. TO THE FULLEST EXTENT PERMITTED BY LAW, LINKEDIN AND ITS 53 | AFFILIATES DISCLAIM ANY IMPLIED OR STATUTORY WARRANTY OR CONDITION, INCLUDING 54 | ANY IMPLIED WARRANTY OR CONDITION OF MERCHANTABILITY OR FITNESS FOR A 55 | PARTICULAR PURPOSE, AVAILABILITY, SECURITY, TITLE AND/OR NON-INFRINGEMENT. 56 | YOUR USE OF THE LICENSED MATERIALS IS AT YOUR OWN DISCRETION AND RISK, AND 57 | YOU WILL BE SOLELY RESPONSIBLE FOR ANY DAMAGE THAT RESULTS FROM USE OF THE 58 | LICENSED MATERIALS TO YOUR COMPUTER SYSTEM OR LOSS OF DATA. NO ADVICE OR 59 | INFORMATION, WHETHER ORAL OR WRITTEN, OBTAINED BY YOU FROM US OR THROUGH OR 60 | FROM THE LICENSED MATERIALS WILL CREATE ANY WARRANTY OR CONDITION NOT 61 | EXPRESSLY STATED IN THESE TERMS. 62 | 63 | 5. Limitation of Liability. LINKEDIN SHALL NOT BE LIABLE FOR ANY INDIRECT, 64 | INCIDENTAL, SPECIAL, PUNITIVE, CONSEQUENTIAL OR EXEMPLARY DAMAGES, INCLUDING 65 | BUT NOT LIMITED TO, DAMAGES FOR LOSS OF PROFITS, GOODWILL, USE, DATA OR OTHER 66 | INTANGIBLE LOSSES . IN NO EVENT WILL LINKEDIN'S AGGREGATE LIABILITY TO YOU 67 | EXCEED $100. THIS LIMITATION OF LIABILITY SHALL: 68 | - i. APPLY REGARDLESS OF WHETHER (A) YOU BASE YOUR CLAIM ON CONTRACT, TORT, 69 | STATUTE, OR ANY OTHER LEGAL THEORY, (B) WE KNEW OR SHOULD HAVE KNOWN ABOUT 70 | THE POSSIBILITY OF SUCH DAMAGES, OR (C) THE LIMITED REMEDIES PROVIDED IN THIS 71 | SECTION FAIL OF THEIR ESSENTIAL PURPOSE; AND 72 | - ii. NOT APPLY TO ANY DAMAGE THAT LINKEDIN MAY CAUSE YOU INTENTIONALLY OR 73 | KNOWINGLY IN VIOLATION OF THESE TERMS OR APPLICABLE LAW, OR AS OTHERWISE 74 | MANDATED BY APPLICABLE LAW THAT CANNOT BE DISCLAIMED IN THESE TERMS. 75 | 76 | 6. Termination. This Agreement automatically terminates upon your breach of 77 | this Agreement or termination of your LinkedIn Learning subscription. On 78 | termination, all licenses granted under this Agreement will terminate 79 | immediately and you will delete the Licensed Materials. Sections 2-7 of this 80 | Agreement survive any termination of this Agreement. LinkedIn may discontinue 81 | the availability of some or all of the Licensed Materials at any time for any 82 | reason. 83 | 84 | 7. Miscellaneous. This Agreement will be governed by and construed in 85 | accordance with the laws of the State of California without regard to conflict 86 | of laws principles. The exclusive forum for any disputes arising out of or 87 | relating to this Agreement shall be an appropriate federal or state court 88 | sitting in the County of Santa Clara, State of California. If LinkedIn does 89 | not act to enforce a breach of this Agreement, that does not mean that 90 | LinkedIn has waived its right to enforce this Agreement. The Agreement does 91 | not create a partnership, agency relationship, or joint venture between the 92 | parties. Neither party has the power or authority to bind the other or to 93 | create any obligation or responsibility on behalf of the other. You may not, 94 | without LinkedIn’s prior written consent, assign or delegate any rights or 95 | obligations under these terms, including in connection with a change of 96 | control. Any purported assignment and delegation shall be ineffective. The 97 | Agreement shall bind and inure to the benefit of the parties, their respective 98 | successors and permitted assigns. If any provision of the Agreement is 99 | unenforceable, that provision will be modified to render it enforceable to the 100 | extent possible to give effect to the parties’ intentions and the remaining 101 | provisions will not be affected. This Agreement is the only agreement between 102 | you and LinkedIn regarding the Licensed Materials, and supersedes all prior 103 | agreements relating to the Licensed Materials. 104 | 105 | Last Updated: March 2019 106 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/src/main/java/ai/data/pipeline/spring/customer/BatchConfig.java: -------------------------------------------------------------------------------- 1 | package ai.data.pipeline.spring.customer; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.springframework.batch.core.Job; 5 | import org.springframework.batch.core.Step; 6 | import org.springframework.batch.core.job.builder.JobBuilder; 7 | import org.springframework.batch.core.launch.JobLauncher; 8 | import org.springframework.batch.core.launch.support.RunIdIncrementer; 9 | import org.springframework.batch.core.launch.support.TaskExecutorJobLauncher; 10 | import org.springframework.batch.core.repository.JobRepository; 11 | import org.springframework.batch.core.repository.support.ResourcelessJobRepository; 12 | import org.springframework.batch.core.step.builder.StepBuilder; 13 | import org.springframework.batch.item.ItemProcessor; 14 | import org.springframework.batch.item.ItemReader; 15 | import org.springframework.batch.item.ItemWriter; 16 | import org.springframework.batch.item.database.BeanPropertyItemSqlParameterSourceProvider; 17 | import org.springframework.batch.item.database.JdbcBatchItemWriter; 18 | import org.springframework.batch.item.database.builder.JdbcBatchItemWriterBuilder; 19 | import org.springframework.batch.item.file.FlatFileItemReader; 20 | import org.springframework.batch.item.file.builder.FlatFileItemReaderBuilder; 21 | import org.springframework.beans.factory.annotation.Qualifier; 22 | import org.springframework.beans.factory.annotation.Value; 23 | import org.springframework.boot.autoconfigure.EnableAutoConfiguration; 24 | import org.springframework.boot.autoconfigure.batch.BatchAutoConfiguration; 25 | import org.springframework.context.annotation.Bean; 26 | import org.springframework.context.annotation.Configuration; 27 | import org.springframework.core.io.Resource; 28 | import org.springframework.core.task.TaskExecutor; 29 | import org.springframework.transaction.PlatformTransactionManager; 30 | import ai.data.pipeline.spring.customer.domain.Customer; 31 | import ai.data.pipeline.spring.customer.mapper.CustomerFieldMapper; 32 | 33 | import javax.sql.DataSource; 34 | 35 | /** 36 | * @author Gregory Green 37 | * 38 | * Spring configuration for laucning the Spring batch application 39 | * 40 | */ 41 | @Configuration 42 | @EnableAutoConfiguration(exclude = {BatchAutoConfiguration.class}) 43 | @Slf4j 44 | public class BatchConfig { 45 | 46 | //Number of records to write to the database at a time 47 | @Value("${spring.batch.chuck.size:10}") 48 | private int chunkSize; 49 | 50 | private static final String saveSql = """ 51 | insert into customer.customers(email,first_name,last_name,phone,address,city,state,zip) 52 | values (:contact.email, 53 | :firstName, 54 | :lastName, 55 | :contact.phone, 56 | :location.address, 57 | :location.city, 58 | :location.state, 59 | :location.zip) 60 | on CONFLICT (email) 61 | DO UPDATE SET first_name = :firstName, 62 | last_name = :lastName, 63 | phone = :contact.phone, 64 | address = :location.address, 65 | city = :location.city, 66 | state = :location.state, 67 | zip = :location.zip 68 | """; 69 | 70 | //The input CSV field 71 | @Value("${source.input.file.csv}") 72 | private Resource customerInputResource; 73 | 74 | //The name of the JOB 75 | private final static String jobName = "load-customer"; 76 | 77 | /** 78 | * Create the step based on the provided reader, processor and writer 79 | * @param itemReader the customer record item reader 80 | * @param processor the process for each customer record 81 | * @param writer the database writer 82 | * @param jobRepository the Spring Batch job repository 83 | * @param transactionManager the transaction manager 84 | * @return the created step 85 | */ 86 | @Bean 87 | public Step loadCustomerStep(ItemReader itemReader, 88 | ItemProcessor processor, 89 | ItemWriter writer, 90 | JobRepository jobRepository, 91 | PlatformTransactionManager transactionManager) { 92 | return new StepBuilder("loadCustomerStep", jobRepository) 93 | .chunk(chunkSize,transactionManager) 94 | .reader(itemReader) 95 | .processor(processor) 96 | .writer(writer) 97 | .build(); 98 | } 99 | 100 | /** 101 | * Construct a reader to read the customer information from an CSV file 102 | * @param mapper the customer field mapp 103 | * @return the reader 104 | */ 105 | @Bean 106 | public FlatFileItemReader reader(CustomerFieldMapper mapper) { 107 | return new FlatFileItemReaderBuilder() 108 | .name("customerItemReader") 109 | .resource(customerInputResource) 110 | .delimited() 111 | .names("id","firstName", "lastName","email" 112 | ,"phone","address","city","state" 113 | ,"zip" 114 | ) 115 | .fieldSetMapper(mapper) 116 | .build(); 117 | } 118 | 119 | /** 120 | * Construct a batch writer to insert customer records 121 | * @param dataSource the JDBC datasource 122 | * @return the JDBC writer 123 | */ 124 | @Bean 125 | public JdbcBatchItemWriter writer(DataSource dataSource) { 126 | 127 | return new JdbcBatchItemWriterBuilder() 128 | .sql(saveSql) 129 | .itemSqlParameterSourceProvider(new BeanPropertyItemSqlParameterSourceProvider<>()) 130 | .dataSource(dataSource) 131 | .build(); 132 | } 133 | 134 | 135 | /** 136 | * 137 | * @param jobRepository the job 138 | * @param taskExecutor the task executor 139 | * @return the job launch 140 | */ 141 | @Bean 142 | public JobLauncher batchJobLauncher(@Qualifier("jobRepository") JobRepository jobRepository, 143 | TaskExecutor taskExecutor) { 144 | var jobLauncher = new TaskExecutorJobLauncher(); 145 | jobLauncher.setJobRepository(jobRepository); 146 | jobLauncher.setTaskExecutor(taskExecutor); 147 | return jobLauncher; 148 | } 149 | 150 | /** 151 | * Creates a Spring Job based on the given step 152 | * @param jobRepository the job repository provided by Spring Batch 153 | * @param step the Job step 154 | * @return the create job 155 | */ 156 | @Bean 157 | public Job job(JobRepository jobRepository, 158 | Step step){ 159 | 160 | return new JobBuilder(jobName+System.currentTimeMillis(),jobRepository) 161 | .incrementer(new RunIdIncrementer()) 162 | .flow(step).end().build(); 163 | } 164 | 165 | 166 | /** 167 | * Create a repository implementation that does not save batch information to the database. 168 | * This is used to simplify this example. Note: Saving information such as the status of the tables 169 | * is recommended for production use. 170 | * 171 | * @return the job repository 172 | */ 173 | @Bean 174 | JobRepository jobRepository() 175 | { 176 | //return an in-memory job repository 177 | return new ResourcelessJobRepository(); 178 | 179 | } 180 | 181 | 182 | } 183 | -------------------------------------------------------------------------------- /applications/batching/customer-batch/mvnw.cmd: -------------------------------------------------------------------------------- 1 | <# : batch portion 2 | @REM ---------------------------------------------------------------------------- 3 | @REM Licensed to the Apache Software Foundation (ASF) under one 4 | @REM or more contributor license agreements. See the NOTICE file 5 | @REM distributed with this work for additional information 6 | @REM regarding copyright ownership. The ASF licenses this file 7 | @REM to you under the Apache License, Version 2.0 (the 8 | @REM "License"); you may not use this file except in compliance 9 | @REM with the License. You may obtain a copy of the License at 10 | @REM 11 | @REM http://www.apache.org/licenses/LICENSE-2.0 12 | @REM 13 | @REM Unless required by applicable law or agreed to in writing, 14 | @REM software distributed under the License is distributed on an 15 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | @REM KIND, either express or implied. See the License for the 17 | @REM specific language governing permissions and limitations 18 | @REM under the License. 19 | @REM ---------------------------------------------------------------------------- 20 | 21 | @REM ---------------------------------------------------------------------------- 22 | @REM Apache Maven Wrapper startup batch script, version 3.3.2 23 | @REM 24 | @REM Optional ENV vars 25 | @REM MVNW_REPOURL - repo url base for downloading maven distribution 26 | @REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven 27 | @REM MVNW_VERBOSE - true: enable verbose log; others: silence the output 28 | @REM ---------------------------------------------------------------------------- 29 | 30 | @IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0) 31 | @SET __MVNW_CMD__= 32 | @SET __MVNW_ERROR__= 33 | @SET __MVNW_PSMODULEP_SAVE=%PSModulePath% 34 | @SET PSModulePath= 35 | @FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @( 36 | IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B) 37 | ) 38 | @SET PSModulePath=%__MVNW_PSMODULEP_SAVE% 39 | @SET __MVNW_PSMODULEP_SAVE= 40 | @SET __MVNW_ARG0_NAME__= 41 | @SET MVNW_USERNAME= 42 | @SET MVNW_PASSWORD= 43 | @IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*) 44 | @echo Cannot start maven from wrapper >&2 && exit /b 1 45 | @GOTO :EOF 46 | : end batch / begin powershell #> 47 | 48 | $ErrorActionPreference = "Stop" 49 | if ($env:MVNW_VERBOSE -eq "true") { 50 | $VerbosePreference = "Continue" 51 | } 52 | 53 | # calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties 54 | $distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl 55 | if (!$distributionUrl) { 56 | Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties" 57 | } 58 | 59 | switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) { 60 | "maven-mvnd-*" { 61 | $USE_MVND = $true 62 | $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip" 63 | $MVN_CMD = "mvnd.cmd" 64 | break 65 | } 66 | default { 67 | $USE_MVND = $false 68 | $MVN_CMD = $script -replace '^mvnw','mvn' 69 | break 70 | } 71 | } 72 | 73 | # apply MVNW_REPOURL and calculate MAVEN_HOME 74 | # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ 75 | if ($env:MVNW_REPOURL) { 76 | $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" } 77 | $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')" 78 | } 79 | $distributionUrlName = $distributionUrl -replace '^.*/','' 80 | $distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$','' 81 | $MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain" 82 | if ($env:MAVEN_USER_HOME) { 83 | $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain" 84 | } 85 | $MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join '' 86 | $MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME" 87 | 88 | if (Test-Path -Path "$MAVEN_HOME" -PathType Container) { 89 | Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME" 90 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" 91 | exit $? 92 | } 93 | 94 | if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) { 95 | Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl" 96 | } 97 | 98 | # prepare tmp dir 99 | $TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile 100 | $TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir" 101 | $TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null 102 | trap { 103 | if ($TMP_DOWNLOAD_DIR.Exists) { 104 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } 105 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } 106 | } 107 | } 108 | 109 | New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null 110 | 111 | # Download and Install Apache Maven 112 | Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." 113 | Write-Verbose "Downloading from: $distributionUrl" 114 | Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" 115 | 116 | $webclient = New-Object System.Net.WebClient 117 | if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) { 118 | $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD) 119 | } 120 | [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 121 | $webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null 122 | 123 | # If specified, validate the SHA-256 sum of the Maven distribution zip file 124 | $distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum 125 | if ($distributionSha256Sum) { 126 | if ($USE_MVND) { 127 | Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." 128 | } 129 | Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash 130 | if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) { 131 | Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property." 132 | } 133 | } 134 | 135 | # unzip and move 136 | Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null 137 | Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null 138 | try { 139 | Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null 140 | } catch { 141 | if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) { 142 | Write-Error "fail to move MAVEN_HOME" 143 | } 144 | } finally { 145 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } 146 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } 147 | } 148 | 149 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" 150 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-processor/mvnw.cmd: -------------------------------------------------------------------------------- 1 | <# : batch portion 2 | @REM ---------------------------------------------------------------------------- 3 | @REM Licensed to the Apache Software Foundation (ASF) under one 4 | @REM or more contributor license agreements. See the NOTICE file 5 | @REM distributed with this work for additional information 6 | @REM regarding copyright ownership. The ASF licenses this file 7 | @REM to you under the Apache License, Version 2.0 (the 8 | @REM "License"); you may not use this file except in compliance 9 | @REM with the License. You may obtain a copy of the License at 10 | @REM 11 | @REM http://www.apache.org/licenses/LICENSE-2.0 12 | @REM 13 | @REM Unless required by applicable law or agreed to in writing, 14 | @REM software distributed under the License is distributed on an 15 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | @REM KIND, either express or implied. See the License for the 17 | @REM specific language governing permissions and limitations 18 | @REM under the License. 19 | @REM ---------------------------------------------------------------------------- 20 | 21 | @REM ---------------------------------------------------------------------------- 22 | @REM Apache Maven Wrapper startup batch script, version 3.3.2 23 | @REM 24 | @REM Optional ENV vars 25 | @REM MVNW_REPOURL - repo url base for downloading maven distribution 26 | @REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven 27 | @REM MVNW_VERBOSE - true: enable verbose log; others: silence the output 28 | @REM ---------------------------------------------------------------------------- 29 | 30 | @IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0) 31 | @SET __MVNW_CMD__= 32 | @SET __MVNW_ERROR__= 33 | @SET __MVNW_PSMODULEP_SAVE=%PSModulePath% 34 | @SET PSModulePath= 35 | @FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @( 36 | IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B) 37 | ) 38 | @SET PSModulePath=%__MVNW_PSMODULEP_SAVE% 39 | @SET __MVNW_PSMODULEP_SAVE= 40 | @SET __MVNW_ARG0_NAME__= 41 | @SET MVNW_USERNAME= 42 | @SET MVNW_PASSWORD= 43 | @IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*) 44 | @echo Cannot start maven from wrapper >&2 && exit /b 1 45 | @GOTO :EOF 46 | : end batch / begin powershell #> 47 | 48 | $ErrorActionPreference = "Stop" 49 | if ($env:MVNW_VERBOSE -eq "true") { 50 | $VerbosePreference = "Continue" 51 | } 52 | 53 | # calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties 54 | $distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl 55 | if (!$distributionUrl) { 56 | Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties" 57 | } 58 | 59 | switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) { 60 | "maven-mvnd-*" { 61 | $USE_MVND = $true 62 | $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip" 63 | $MVN_CMD = "mvnd.cmd" 64 | break 65 | } 66 | default { 67 | $USE_MVND = $false 68 | $MVN_CMD = $script -replace '^mvnw','mvn' 69 | break 70 | } 71 | } 72 | 73 | # apply MVNW_REPOURL and calculate MAVEN_HOME 74 | # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ 75 | if ($env:MVNW_REPOURL) { 76 | $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" } 77 | $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')" 78 | } 79 | $distributionUrlName = $distributionUrl -replace '^.*/','' 80 | $distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$','' 81 | $MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain" 82 | if ($env:MAVEN_USER_HOME) { 83 | $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain" 84 | } 85 | $MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join '' 86 | $MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME" 87 | 88 | if (Test-Path -Path "$MAVEN_HOME" -PathType Container) { 89 | Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME" 90 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" 91 | exit $? 92 | } 93 | 94 | if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) { 95 | Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl" 96 | } 97 | 98 | # prepare tmp dir 99 | $TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile 100 | $TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir" 101 | $TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null 102 | trap { 103 | if ($TMP_DOWNLOAD_DIR.Exists) { 104 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } 105 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } 106 | } 107 | } 108 | 109 | New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null 110 | 111 | # Download and Install Apache Maven 112 | Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." 113 | Write-Verbose "Downloading from: $distributionUrl" 114 | Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" 115 | 116 | $webclient = New-Object System.Net.WebClient 117 | if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) { 118 | $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD) 119 | } 120 | [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 121 | $webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null 122 | 123 | # If specified, validate the SHA-256 sum of the Maven distribution zip file 124 | $distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum 125 | if ($distributionSha256Sum) { 126 | if ($USE_MVND) { 127 | Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." 128 | } 129 | Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash 130 | if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) { 131 | Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property." 132 | } 133 | } 134 | 135 | # unzip and move 136 | Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null 137 | Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null 138 | try { 139 | Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null 140 | } catch { 141 | if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) { 142 | Write-Error "fail to move MAVEN_HOME" 143 | } 144 | } finally { 145 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } 146 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } 147 | } 148 | 149 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" 150 | -------------------------------------------------------------------------------- /applications/processors/ai-sentiment-rag-processor/mvnw.cmd: -------------------------------------------------------------------------------- 1 | <# : batch portion 2 | @REM ---------------------------------------------------------------------------- 3 | @REM Licensed to the Apache Software Foundation (ASF) under one 4 | @REM or more contributor license agreements. See the NOTICE file 5 | @REM distributed with this work for additional information 6 | @REM regarding copyright ownership. The ASF licenses this file 7 | @REM to you under the Apache License, Version 2.0 (the 8 | @REM "License"); you may not use this file except in compliance 9 | @REM with the License. You may obtain a copy of the License at 10 | @REM 11 | @REM http://www.apache.org/licenses/LICENSE-2.0 12 | @REM 13 | @REM Unless required by applicable law or agreed to in writing, 14 | @REM software distributed under the License is distributed on an 15 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | @REM KIND, either express or implied. See the License for the 17 | @REM specific language governing permissions and limitations 18 | @REM under the License. 19 | @REM ---------------------------------------------------------------------------- 20 | 21 | @REM ---------------------------------------------------------------------------- 22 | @REM Apache Maven Wrapper startup batch script, version 3.3.2 23 | @REM 24 | @REM Optional ENV vars 25 | @REM MVNW_REPOURL - repo url base for downloading maven distribution 26 | @REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven 27 | @REM MVNW_VERBOSE - true: enable verbose log; others: silence the output 28 | @REM ---------------------------------------------------------------------------- 29 | 30 | @IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0) 31 | @SET __MVNW_CMD__= 32 | @SET __MVNW_ERROR__= 33 | @SET __MVNW_PSMODULEP_SAVE=%PSModulePath% 34 | @SET PSModulePath= 35 | @FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @( 36 | IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B) 37 | ) 38 | @SET PSModulePath=%__MVNW_PSMODULEP_SAVE% 39 | @SET __MVNW_PSMODULEP_SAVE= 40 | @SET __MVNW_ARG0_NAME__= 41 | @SET MVNW_USERNAME= 42 | @SET MVNW_PASSWORD= 43 | @IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*) 44 | @echo Cannot start maven from wrapper >&2 && exit /b 1 45 | @GOTO :EOF 46 | : end batch / begin powershell #> 47 | 48 | $ErrorActionPreference = "Stop" 49 | if ($env:MVNW_VERBOSE -eq "true") { 50 | $VerbosePreference = "Continue" 51 | } 52 | 53 | # calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties 54 | $distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl 55 | if (!$distributionUrl) { 56 | Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties" 57 | } 58 | 59 | switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) { 60 | "maven-mvnd-*" { 61 | $USE_MVND = $true 62 | $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip" 63 | $MVN_CMD = "mvnd.cmd" 64 | break 65 | } 66 | default { 67 | $USE_MVND = $false 68 | $MVN_CMD = $script -replace '^mvnw','mvn' 69 | break 70 | } 71 | } 72 | 73 | # apply MVNW_REPOURL and calculate MAVEN_HOME 74 | # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ 75 | if ($env:MVNW_REPOURL) { 76 | $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" } 77 | $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')" 78 | } 79 | $distributionUrlName = $distributionUrl -replace '^.*/','' 80 | $distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$','' 81 | $MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain" 82 | if ($env:MAVEN_USER_HOME) { 83 | $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain" 84 | } 85 | $MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join '' 86 | $MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME" 87 | 88 | if (Test-Path -Path "$MAVEN_HOME" -PathType Container) { 89 | Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME" 90 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" 91 | exit $? 92 | } 93 | 94 | if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) { 95 | Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl" 96 | } 97 | 98 | # prepare tmp dir 99 | $TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile 100 | $TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir" 101 | $TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null 102 | trap { 103 | if ($TMP_DOWNLOAD_DIR.Exists) { 104 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } 105 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } 106 | } 107 | } 108 | 109 | New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null 110 | 111 | # Download and Install Apache Maven 112 | Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." 113 | Write-Verbose "Downloading from: $distributionUrl" 114 | Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" 115 | 116 | $webclient = New-Object System.Net.WebClient 117 | if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) { 118 | $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD) 119 | } 120 | [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 121 | $webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null 122 | 123 | # If specified, validate the SHA-256 sum of the Maven distribution zip file 124 | $distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum 125 | if ($distributionSha256Sum) { 126 | if ($USE_MVND) { 127 | Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." 128 | } 129 | Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash 130 | if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) { 131 | Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property." 132 | } 133 | } 134 | 135 | # unzip and move 136 | Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null 137 | Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null 138 | try { 139 | Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null 140 | } catch { 141 | if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) { 142 | Write-Error "fail to move MAVEN_HOME" 143 | } 144 | } finally { 145 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } 146 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } 147 | } 148 | 149 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" 150 | -------------------------------------------------------------------------------- /applications/processors/postgres-embedding-similarity-processor/mvnw.cmd: -------------------------------------------------------------------------------- 1 | <# : batch portion 2 | @REM ---------------------------------------------------------------------------- 3 | @REM Licensed to the Apache Software Foundation (ASF) under one 4 | @REM or more contributor license agreements. See the NOTICE file 5 | @REM distributed with this work for additional information 6 | @REM regarding copyright ownership. The ASF licenses this file 7 | @REM to you under the Apache License, Version 2.0 (the 8 | @REM "License"); you may not use this file except in compliance 9 | @REM with the License. You may obtain a copy of the License at 10 | @REM 11 | @REM http://www.apache.org/licenses/LICENSE-2.0 12 | @REM 13 | @REM Unless required by applicable law or agreed to in writing, 14 | @REM software distributed under the License is distributed on an 15 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | @REM KIND, either express or implied. See the License for the 17 | @REM specific language governing permissions and limitations 18 | @REM under the License. 19 | @REM ---------------------------------------------------------------------------- 20 | 21 | @REM ---------------------------------------------------------------------------- 22 | @REM Apache Maven Wrapper startup batch script, version 3.3.2 23 | @REM 24 | @REM Optional ENV vars 25 | @REM MVNW_REPOURL - repo url base for downloading maven distribution 26 | @REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven 27 | @REM MVNW_VERBOSE - true: enable verbose log; others: silence the output 28 | @REM ---------------------------------------------------------------------------- 29 | 30 | @IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0) 31 | @SET __MVNW_CMD__= 32 | @SET __MVNW_ERROR__= 33 | @SET __MVNW_PSMODULEP_SAVE=%PSModulePath% 34 | @SET PSModulePath= 35 | @FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @( 36 | IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B) 37 | ) 38 | @SET PSModulePath=%__MVNW_PSMODULEP_SAVE% 39 | @SET __MVNW_PSMODULEP_SAVE= 40 | @SET __MVNW_ARG0_NAME__= 41 | @SET MVNW_USERNAME= 42 | @SET MVNW_PASSWORD= 43 | @IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*) 44 | @echo Cannot start maven from wrapper >&2 && exit /b 1 45 | @GOTO :EOF 46 | : end batch / begin powershell #> 47 | 48 | $ErrorActionPreference = "Stop" 49 | if ($env:MVNW_VERBOSE -eq "true") { 50 | $VerbosePreference = "Continue" 51 | } 52 | 53 | # calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties 54 | $distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl 55 | if (!$distributionUrl) { 56 | Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties" 57 | } 58 | 59 | switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) { 60 | "maven-mvnd-*" { 61 | $USE_MVND = $true 62 | $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip" 63 | $MVN_CMD = "mvnd.cmd" 64 | break 65 | } 66 | default { 67 | $USE_MVND = $false 68 | $MVN_CMD = $script -replace '^mvnw','mvn' 69 | break 70 | } 71 | } 72 | 73 | # apply MVNW_REPOURL and calculate MAVEN_HOME 74 | # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ 75 | if ($env:MVNW_REPOURL) { 76 | $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" } 77 | $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')" 78 | } 79 | $distributionUrlName = $distributionUrl -replace '^.*/','' 80 | $distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$','' 81 | $MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain" 82 | if ($env:MAVEN_USER_HOME) { 83 | $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain" 84 | } 85 | $MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join '' 86 | $MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME" 87 | 88 | if (Test-Path -Path "$MAVEN_HOME" -PathType Container) { 89 | Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME" 90 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" 91 | exit $? 92 | } 93 | 94 | if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) { 95 | Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl" 96 | } 97 | 98 | # prepare tmp dir 99 | $TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile 100 | $TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir" 101 | $TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null 102 | trap { 103 | if ($TMP_DOWNLOAD_DIR.Exists) { 104 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } 105 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } 106 | } 107 | } 108 | 109 | New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null 110 | 111 | # Download and Install Apache Maven 112 | Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." 113 | Write-Verbose "Downloading from: $distributionUrl" 114 | Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" 115 | 116 | $webclient = New-Object System.Net.WebClient 117 | if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) { 118 | $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD) 119 | } 120 | [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 121 | $webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null 122 | 123 | # If specified, validate the SHA-256 sum of the Maven distribution zip file 124 | $distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum 125 | if ($distributionSha256Sum) { 126 | if ($USE_MVND) { 127 | Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." 128 | } 129 | Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash 130 | if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) { 131 | Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property." 132 | } 133 | } 134 | 135 | # unzip and move 136 | Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null 137 | Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null 138 | try { 139 | Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null 140 | } catch { 141 | if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) { 142 | Write-Error "fail to move MAVEN_HOME" 143 | } 144 | } finally { 145 | try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } 146 | catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } 147 | } 148 | 149 | Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" 150 | -------------------------------------------------------------------------------- /docs/05_05_Vector Similarity Data Pipeline with Spring AI and Postgres.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | ```shell 3 | docker volume rm postgresml_data 4 | ``` 5 | 6 | 7 | Run Rabbit 8 | 9 | 10 | ```shell 11 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management 12 | ``` 13 | 14 | 15 | Run Postgres 16 | 17 | ```shell 18 | ```shell 19 | docker run --name postgres --network data-pipelines --rm \ 20 | -e POSTGRES_USER=postgres \ 21 | -e POSTGRES_PASSWORD=postgres \ 22 | -e POSTGRES_DB=postgres \ 23 | -p 5432:5432 \ 24 | -it postgres 25 | ``` 26 | 27 | ```shell 28 | docker exec -it postgres psql -U postgres 29 | ``` 30 | 31 | I will create customer_similarities table. 32 | 33 | ```shell 34 | create schema if not exists customer ; 35 | 36 | create table customer.customer_similarities( 37 | customer_id text NOT NULL, 38 | similarities jsonb NOT NULL, 39 | PRIMARY KEY (customer_id) 40 | ); 41 | ``` 42 | 43 | Here I am using a similarites column with a special data type column. 44 | In the previous example, I was able to parse the JSON to store into invidual column (such as the email, first and last name). 45 | 46 | In this case, I wanted to show you that you can just store json natively into Postgres 47 | using the JSONB data type. 48 | 49 | 50 | Run PostgresML with PgVector 51 | 52 | ```shell 53 | docker run --rm --name postgresml \ 54 | -it \ 55 | --network data-pipeline \ 56 | -v postgresml_data:/var/lib/postgresql \ 57 | -p 6432:5432 \ 58 | -p 8000:8000 \ 59 | ghcr.io/postgresml/postgresml:2.10.0 \ 60 | sudo -u postgresml psql -d postgresml 61 | ``` 62 | 63 | 64 | ```sql 65 | CREATE EXTENSION vector; 66 | ``` 67 | 68 | 69 | Here is an example similar search of a perfect match between 2 identical vectors 70 | 71 | ```sql 72 | SELECT 1- ('[1, 0, 0]' <=> '[1, 0, 0]')::float AS cosine_distance; 73 | ``` 74 | - The <=> is a special syntax used by pgvector to apply the law of cosines 75 | - ::float converts the cosine results to a float 76 | 77 | 78 | Here is an example of not an exact match, but very similar vectors 79 | 80 | ```sql 81 | SELECT 1- ('[1, 1, 0]' <=> '[1, 1, 0.5]')::float AS cosine_distance; 82 | ``` 83 | 84 | Here is one more Example pf 2 vector embeddings that are opposite of each other 85 | 86 | ```sql 87 | SELECT 1- ('[1, 1, 1]' <=> '[-1, -1, -1]')::float AS cosine_distance; 88 | ``` 89 | 90 | 91 | --------------------------- 92 | 93 | 94 | Start Http 95 | 96 | ```shell 97 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=customers --server.port=8095 --spring.cloud.stream.bindings.output.destination=customers.similarities.input 98 | ``` 99 | 100 | 101 | Start similarity processor 102 | 103 | ```shell 104 | java -jar applications/processors/postgres-embedding-similarity-processor/target/postgres-embedding-similarity-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.similarities.input --spring.cloud.stream.bindings.output.destination=customers.similarities.output --embedding.similarity.processor.topK=3 --embedding.similarity.processor.similarityThreshold="0.90" --embedding.similarity.processor.documentTextFieldNames="email,phone,zip,state,city,address,lastName,firstName" --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor 105 | ``` 106 | 107 | See [EmbeddingSimilarityFunction.java](../applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/function/EmbeddingSimilarityFunction.java) 108 | - It is provided with a vector store that uses Postgres with the pgvector extension 109 | - It using an object to convert the payload to a Spring AI Document object 110 | - See [PayloadToDocument.java](../applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/conversion/PayloadToDocument.java) 111 | - fieldName text fields names are passed in a runtime. 112 | - So the vector save to fields such as email,phone,zip,state,city,address,lastName,firstName that a parsed from the JSON payload 113 | - The processor then builds the search criteria using the Spring AI abstraction. 114 | - This results the a limited number of "top" or best match results 115 | - Based on the customer information 116 | - I set a threshold, for example the match distance must be greater than 0.90 117 | - The list of results are converted to JSON 118 | - and returned the sink using RabbitMQ 119 | 120 | 121 | - See [SimilarDocuments.java](../applications/processors/postgres-embedding-similarity-processor/src/main/java/ai/data/pipeline/postgres/embedding/domain/SimilarDocuments.java) 122 | 123 | 124 | 125 | Start Sink 126 | 127 | 128 | ```shell 129 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.cloud.stream.bindings.input.destination="customers.similarities.output" --spring.config.import=optional:file://$PWD/applications/sinks/postgres-sink/src/main/resources/postgres-similarity.yml --spring.cloud.stream.bindings.input.group=postgres-sink 130 | ``` 131 | 132 | See [postgres-similarity.yml](../applications/sinks/postgres-sink/src/main/resources/postgres-similarity.yml) 133 | 134 | ```shell 135 | curl -X 'POST' \ 136 | 'http://localhost:8095/customers' \ 137 | -H 'accept: */*' \ 138 | -H 'Content-Type: application/json' \ 139 | -d '{ 140 | "id" : "email@email", 141 | "firstName" : "Josiah", 142 | "lastName" : "Imani", 143 | "email" : "email@email", 144 | "phone" : "555-555-5555", 145 | "address" : "12 Straight St", 146 | "city" : "gold", 147 | "state" : "ny", 148 | "zip": "55555" 149 | }' 150 | ``` 151 | 152 | 153 | 154 | 155 | 156 | ```shell 157 | curl -X 'POST' \ 158 | 'http://localhost:8095/customers' \ 159 | -H 'accept: */*' \ 160 | -H 'Content-Type: application/json' \ 161 | -d ' { 162 | "id" : "duplicate1@email", 163 | "firstName" : "Josiah", 164 | "lastName" : "Imani", 165 | "email" : "duplicate1@email", 166 | "phone" : "555-555-5555", 167 | "address" : "12 Straight St", 168 | "city" : "gold", 169 | "state" : "ny", 170 | "zip": "55555" 171 | }' 172 | ``` 173 | 174 | ---------------------- 175 | In psql 176 | 177 | Now lets look at the results in customer similarities table. 178 | 179 | ```sql 180 | select * 181 | from customer.customer_similarities; 182 | ``` 183 | 184 | 185 | The sink stores the similarities as a JSON array. 186 | If needed, I can use Postgres parse the records. 187 | 188 | ```sql 189 | select customer_id, 190 | jsonb_array_elements(similarities) ->>'id' as email, 191 | jsonb_array_elements(similarities) ->>'text' as text, 192 | jsonb_array_elements(similarities) ->>'score' as score, 193 | (jsonb_array_elements(similarities) ->>'metadata')::json ->> 'distance' as distance 194 | from customer.customer_similarities; 195 | ``` 196 | 197 | The jsonb_array_elements function parse JSON array fields. 198 | So I can select the individual fields such as text and score from the JSONB column. 199 | Which is a nicer format 200 | 201 | 202 | The records in PostgresML vector_store database table are used by the processor search for duplicate records 203 | based on matching similaries. 204 | 205 | ```sql 206 | select id,content from public.vector_store ; 207 | ``` 208 | 209 | Any additional customer details submitted to the data pipeline will check for matches in this table. 210 | Spring AI along with Postgres as a vector database hides the complexity of finding duplicate records. 211 | 212 | 213 | -------------------------------------------------------------------------------- /docs/05_04_Text Sentiment Analysis Data Pipeline with Spring AI and RAG.md: -------------------------------------------------------------------------------- 1 | 2 | # Setup 3 | ```shell 4 | docker volume rm postgresml_data 5 | ``` 6 | 7 | 8 | ---------- 9 | 10 | Demo 11 | 12 | ```shell 13 | ollama serve 14 | ``` 15 | 16 | pull and run a model like this: 17 | 18 | ```shell 19 | ollama run llama3 20 | ``` 21 | 22 | ```text 23 | Analyze the sentiment of this text: "I REALLY REALLY LOVE LONG LINE". Respond with only one word: Positive or Negative. 24 | 25 | ``` 26 | 27 | ```text 28 | Analyze the sentiment of this text: "I really love long wait". 29 | Respond with only one word: Positive, or Negative, taking into account the provided context. 30 | 31 | Context: 32 | I REALLY REALLY LOVE LONG LINE is a NEGATIVE sentiment 33 | ``` 34 | 35 | 36 | ```text 37 | Analyze the sentiment of this text: "Sure, keep me waiting like I have all DAY". 38 | Respond with only one word: Positive, or Negative, taking into account the provided context. 39 | 40 | Context: 41 | I REALLY REALLY LOVE LONG LINE is a NEGATIVE sentiment 42 | ``` 43 | 44 | ```text 45 | Analyze the sentiment of this text: "Your team is doing a great job to reduce long wait time". 46 | Respond with only one word: Positive, or Negative, taking into account the provided context. 47 | 48 | Context: 49 | I REALLY REALLY LOVE LONG LINE is a NEGATIVE sentiment 50 | ``` 51 | 52 | 53 | ```text 54 | Analyze the sentiment of this text: "Oh great, another update that totally doesn’t break anything. Just what I needed.". 55 | Respond with only one word: Positive, or Negative, taking into account the provided context. 56 | 57 | Context: 58 | I REALLY REALLY LOVE LONG LINE is a NEGATIVE sentiment 59 | ``` 60 | 61 | 62 | Run Rabbit 63 | 64 | ```shell 65 | docker network create data-pipeline 66 | ``` 67 | 68 | start rabbitmq 69 | ```shell 70 | docker run -it --name rabbitmq --rm -p 5672:5672 -p 15672:15672 rabbitmq:4.1.0-management 71 | ``` 72 | 73 | 74 | Run Postgres 75 | 76 | ```shell 77 | docker run --name postgres --network data-pipelines --rm \ 78 | -e POSTGRES_USER=postgres \ 79 | -e POSTGRES_PASSWORD=postgres \ 80 | -e POSTGRES_DB=postgres \ 81 | -p 5432:5432 \ 82 | -it postgres 83 | ``` 84 | 85 | ```shell 86 | docker exec -it postgres psql -U postgres 87 | ``` 88 | 89 | 90 | ```shell 91 | create schema if not exists customer ; 92 | 93 | create table customer.feedback( 94 | feed_id text NOT NULL, 95 | email text NOT NULL, 96 | user_feedback text NOT NULL, 97 | summary text NOT NULL, 98 | feedback_dt timestamp NOT NULL DEFAULT NOW(), 99 | sentiment text NOT NULL, 100 | PRIMARY KEY (feed_id) 101 | ); 102 | ``` 103 | 104 | Run PostgresML 105 | 106 | ```shell 107 | docker run --rm --name postgresml \ 108 | -it \ 109 | --network data-pipelines \ 110 | -v postgresml_data:/var/lib/postgresql \ 111 | -p 6432:5432 \ 112 | -p 8000:8000 \ 113 | ghcr.io/postgresml/postgresml:2.10.0 \ 114 | sudo -u postgresml psql -d postgresml 115 | ``` 116 | 117 | If you are installing Postgres on your own or if you have a different image other than the one that I have been using or you need to install the PGvector extension using this create extension statement 118 | 119 | PG vector it’s already configured in the PostgresMl docker image by default 120 | 121 | 122 | ```sql 123 | CREATE EXTENSION vector; 124 | ``` 125 | 126 | 127 | 128 | Create tables with vector embeddings 129 | 130 | Here is an example to create table with a embedding data type column. This will allows you to store the embeddings array of numerical values that are produced from a model 131 | 132 | ```sql 133 | CREATE TABLE items 134 | ( 135 | id bigserial PRIMARY KEY, 136 | embedding vector(3) 137 | ); 138 | 139 | ``` 140 | 141 | You can use insert and update statements providing an array of numbers to be saved into the appropriate database column 142 | 143 | ```sql 144 | INSERT INTO items 145 | (embedding) 146 | VALUES ('[1,2,3]'), ('[4,5,6]'); 147 | 148 | ``` 149 | 150 | This is a very basic Postgres vector database search that determines the distance 151 | between a provided embedding and the embedding value in the items table. 152 | The similarity is calculated based on the law of cosines. 153 | Cosine similarity is typically a value between -1 and 1. 1 is perfect match. 154 | You subtract 1 to convert the cosine similarity to the distance. 155 | 156 | 157 | ```sql 158 | SELECT 1 - (embedding <=> '[3,1,2]') 159 | AS cosine_similarity 160 | FROM items; 161 | ``` 162 | 163 | Start Http 164 | 165 | 166 | ```shell 167 | java -jar runtime/http-source-rabbit-5.0.1.jar --http.supplier.pathPattern=feedback --server.port=8094 --spring.cloud.stream.bindings.output.destination=customers.input.feedback 168 | ``` 169 | 170 | 171 | Start Processor Text Summary 172 | 173 | 174 | 175 | ```shell 176 | java -jar applications/processors/postgres-query-processor/target/postgres-query-processor-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" --spring.datasource.driverClassName=org.postgresql.Driver --spring.cloud.stream.bindings.input.destination=customers.input.feedback --spring.cloud.stream.bindings.output.destination=customers.output.feedback.summary --spring.config.import=optional:file://$PWD/applications/processors/postgres-query-processor/src/main/resources/text-summarization.yml --spring.datasource.hikari.max-lifetime=600000 --spring.cloud.stream.bindings.input.group=postgres-query-processor 177 | ``` 178 | Start Processor Text sentiment RAG 179 | 180 | ```shell 181 | java -jar applications/processors/ai-sentiment-rag-processor/target/ai-sentiment-rag-processor-0.0.1-SNAPSHOT.jar --spring.cloud.stream.bindings.input.destination=customers.output.feedback.summary --spring.cloud.stream.bindings.output.destination=customers.output.feedback.sentiment --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost:6432/postgresml" 182 | ``` 183 | 184 | 185 | 186 | 187 | 188 | See [CustomerFeedbackSentimentProcessor.java](../applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/processor/CustomerFeedbackSentimentProcessor.java) 189 | - Here the customer feedback processor accepts a CustomerFeedback object 190 | and returns teh FeedbackSentiment. 191 | - the prompt will ask the Model to determine the sentiment 192 | - The summary of the feedback is passed in at runtime 193 | - Spring AI converts the response to the Sentiment enum 194 | - What is different here is the advisor, is 195 | - I added an additional maven dependency to this processor 196 | 197 | 198 | See [pom.xml](../applications/processors/ai-sentiment-rag-processor/pom.xml) 199 | - This now has the **spring-ai-advisors-vector-store** which add the ability to use a vector database 200 | - **spring-ai-starter-vector-store-pgvector** using Postgres with the pgvector extension that is part of PostgresML 201 | - Now I am also using RAG 202 | 203 | See [VectorStoreConfig.java](../applications/processors/ai-sentiment-rag-processor/src/main/java/ai/data/pipeline/sentiment/VectorStoreConfig.java) 204 | - the CommandLineRunner is executed when the application is started. 205 | - It will load records into the vector database 206 | 207 | See [sentiment_rag_content.txt](../applications/processors/ai-sentiment-rag-processor/src/main/resources/sentiment_rag_content.txt) 208 | - this content is loaded to better detect sarcastic negative statements 209 | 210 | Start Sink 211 | 212 | 213 | ```shell 214 | java -jar applications/sinks/postgres-sink/target/postgres-sink-0.0.1-SNAPSHOT.jar --spring.datasource.username=postgres --spring.datasource.password=postgres --spring.datasource.driverClassName=org.postgresql.Driver --spring.datasource.url="jdbc:postgresql://localhost/postgres" --spring.cloud.stream.bindings.input.destination=customers.output.feedback.sentiment --spring.config.import=optional:file://$PWD/applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis-ollama.yml --spring.cloud.stream.bindings.input.group=postgres-sink 215 | ``` 216 | 217 | 218 | 219 | ```shell 220 | curl -X 'POST' \ 221 | 'http://localhost:8094/feedback' \ 222 | -H 'accept: */*' \ 223 | -H 'Content-Type: application/json' \ 224 | -d '{ 225 | "id" : "S001", 226 | "email" : "jmatthews@email", 227 | "feedback" : "You know what. It is ok. I love being on hold FOREVER. I will just take my business somewhere else." 228 | }' 229 | ``` 230 | 231 | 232 | 233 | ```shell 234 | curl -X 'POST' \ 235 | 'http://localhost:8094/feedback' \ 236 | -H 'accept: */*' \ 237 | -H 'Content-Type: application/json' \ 238 | -d '{ 239 | "id" : "S002", 240 | "email" : "jmatthews@email", 241 | "feedback" : "I was transferred, and had to keep repeating the problem. They should be able to see notes in the SYSTEM. And WHY are the LINES SO LONG!!!." 242 | }' 243 | ``` 244 | 245 | 246 | ```shell 247 | curl -X 'POST' \ 248 | 'http://localhost:8094/feedback' \ 249 | -H 'accept: */*' \ 250 | -H 'Content-Type: application/json' \ 251 | -d '{ 252 | "id" : "S003", 253 | "email" : "graceful@email", 254 | "feedback" : "THANK YOU SO MUCH!!! I LOVE THAT YOU ARE TRYING your best." 255 | }' 256 | ``` 257 | 258 | 259 | See [postgres-sentiment-analysis-ollama.yml](../applications/sinks/postgres-sink/src/main/resources/postgres-sentiment-analysis-ollama.yml) 260 | - Taking a closer look at the sink 261 | - Postgres has the ability to convert text to json 262 | - This is done with the ::json syntax 263 | - -> allows you to get elemements in the JSON object by its name 264 | - So this is an easy way to get the customer feedback details 265 | - to save to the table 266 | 267 | 268 | 269 | In psql 270 | 271 | ```sql 272 | select sentiment, summary from customer.feedback; 273 | 274 | ``` 275 | --------------------------------------------------------------------------------