├── src ├── .gitignore ├── examples │ └── repl-solver.ts ├── workflows │ ├── promptflows.ts │ ├── tokenizer.ts │ ├── util.ts │ ├── application │ │ ├── ijavascript.ts │ │ ├── call-transcription.ts │ │ └── codegrunt.ts │ ├── elastic.ts │ ├── embeddings.ts │ ├── mysql.ts │ ├── llm.ts │ ├── session.ts │ ├── sql2llm.ts │ ├── prompt.ts │ └── chatbot.ts ├── index.ts ├── codegrunt.ts ├── activities.ts ├── workflows.ts ├── config.ts ├── clones.ts ├── activities │ ├── mysql.ts │ ├── sql2llm.ts │ ├── embeddings.ts │ ├── tokenizer.ts │ ├── elastic.ts │ ├── vector_search.ts │ ├── application │ │ ├── codegrunt.ts │ │ └── ijavascript.ts │ ├── __tests__ │ │ └── tokenizer.test.ts │ ├── util.ts │ └── llm.ts ├── cli │ ├── ijs.ts │ ├── cognosis.ts │ ├── recursively-summarise.ts │ ├── client.ts │ └── sql2llm.ts ├── worker.ts ├── cogs │ └── cog.ts └── prompts │ └── notebook-template.txt ├── tsconfig.jest.json ├── tsconfig.json ├── .prettierrc ├── services └── embeddings │ ├── download-init-embeddings.py │ ├── Dockerfile.x86 │ ├── Dockerfile.apple │ └── embeddings-service.py ├── tsconfig.base.json ├── jest.config.js ├── Makefile ├── LICENSE ├── docker-compose.yml ├── .gitignore ├── README.md └── package.json /src/.gitignore: -------------------------------------------------------------------------------- 1 | local 2 | -------------------------------------------------------------------------------- /src/examples/repl-solver.ts: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/workflows/promptflows.ts: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/workflows/tokenizer.ts: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tsconfig.jest.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.base.json" 3 | } 4 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export * as activities from './activities'; 2 | export * as workflows from './workflows'; 3 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.base.json", 3 | "exclude": ["**/__tests__/**/*"] 4 | } 5 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": true, 3 | "trailingComma": "none", 4 | "singleQuote": true, 5 | "printWidth": 80 6 | } -------------------------------------------------------------------------------- /src/codegrunt.ts: -------------------------------------------------------------------------------- 1 | import * as session_wfs from './workflows/session'; 2 | 3 | export interface CodegruntFrame extends session_wfs.Frame { 4 | }; 5 | -------------------------------------------------------------------------------- /services/embeddings/download-init-embeddings.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import json 3 | from flask import request 4 | import tensorflow_hub as hub 5 | 6 | # USEv5 is about 100x faster than 4 7 | embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5") 8 | -------------------------------------------------------------------------------- /tsconfig.base.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@tsconfig/node14/tsconfig.json", 3 | "version": "4.4.2", 4 | "compilerOptions": { 5 | "declaration": true, 6 | "declarationMap": true, 7 | "esModuleInterop": true, 8 | "sourceMap": true, 9 | "rootDir": "./src", 10 | "outDir": "./dist" 11 | }, 12 | "include": ["src/**/*.ts"] 13 | } 14 | -------------------------------------------------------------------------------- /services/embeddings/Dockerfile.x86: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow AS build 2 | EXPOSE 9100 3 | 4 | WORKDIR /srv 5 | COPY ./* /srv/ 6 | RUN pip install flask tensorflow_hub 7 | RUN python download-init-embeddings.py 8 | 9 | # Package 10 | # FROM tensorflow/tensorflow 11 | # RUN apt install nodejs npm 12 | WORKDIR /srv 13 | CMD python embeddings-service.py 14 | -------------------------------------------------------------------------------- /services/embeddings/Dockerfile.apple: -------------------------------------------------------------------------------- 1 | FROM armswdev/tensorflow-arm-neoverse AS build 2 | EXPOSE 8080 3 | 4 | WORKDIR /srv 5 | COPY ./* /srv/ 6 | RUN pip install flask tensorflow_hub 7 | RUN python download-init-embeddings.py 8 | 9 | # Package 10 | # FROM tensorflow/tensorflow 11 | # RUN apt install nodejs npm 12 | WORKDIR /srv 13 | CMD python embeddings-service.py 14 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */ 2 | module.exports = { 3 | preset: 'ts-jest', 4 | testEnvironment: 'node', 5 | testMatch: ['**/__tests__/**/*.test.ts'], 6 | transform: { 7 | '^.+\\.[tj]sx?$': [ 8 | 'ts-jest', 9 | { 10 | tsconfig: `tsconfig.jest.json` 11 | } 12 | ] 13 | } 14 | }; 15 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build-apple: 2 | @make build-embeddings-apple 3 | 4 | build-x86: 5 | @make build-embeddings-x86 6 | 7 | build-embeddings-apple: 8 | @docker build -f ./services/embeddings/Dockerfile.apple -t cognosis-embeddings ./services/embeddings 9 | 10 | build-embeddings-x86: 11 | @docker build -f ./services/embeddings/Dockerfile.x86 -t cognosis-embeddings ./services/embeddings 12 | 13 | run: 14 | @docker-compose up 15 | -------------------------------------------------------------------------------- /src/activities.ts: -------------------------------------------------------------------------------- 1 | export * from './activities/llm'; 2 | export * from './activities/tokenizer'; 3 | export * from './activities/elastic'; 4 | export * from './activities/mysql'; 5 | export * from './activities/embeddings'; 6 | export * from './activities/util'; 7 | export * from './activities/application/ijavascript'; 8 | export * from './activities/sql2llm'; 9 | export * from './activities/vector_search'; 10 | export * from './activities/application/codegrunt'; 11 | -------------------------------------------------------------------------------- /src/workflows.ts: -------------------------------------------------------------------------------- 1 | export * from './workflows/llm'; 2 | export * from './workflows/elastic'; 3 | export * from './workflows/mysql'; 4 | export * from './workflows/prompt'; 5 | export * from './workflows/embeddings'; 6 | export * from './workflows/chatbot'; 7 | export * from './workflows/util'; 8 | export * from './workflows/application/ijavascript'; 9 | export * from './workflows/application/call-transcription'; 10 | export * from './workflows/session'; 11 | export * from './workflows/sql2llm'; -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- 1 | import process from 'process'; 2 | import * as dotenv from 'dotenv'; 3 | 4 | dotenv.config(); 5 | 6 | export const NLPCLOUD_TOKEN = process.env.NLPCLOUD_TOKEN ?? ''; 7 | export const OPENAI_TOKEN = process.env.OPENAI_TOKEN ?? ''; 8 | export const SERP_KEY = process.env.SERP_KEY ?? ''; 9 | export const TEMPORAL_HOST = process.env.TEMPORAL_HOST ?? ''; 10 | export const EMBEDDINGS_URL = process.env.EMBEDDINGS_URL ?? ''; 11 | export const ELASTIC_CONFIG = JSON.parse(process.env.ELASTIC_CONFIG ?? '{}'); 12 | -------------------------------------------------------------------------------- /src/workflows/util.ts: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import * as util from '../activities/util'; 3 | import { proxyActivities, uuid4 } from '@temporalio/workflow'; 4 | 5 | const { nlp_stable_diffusion, actionLogger, md5sum, wf_axios } = 6 | proxyActivities({ startToCloseTimeout: '10 minute' }); 7 | 8 | export async function stable_diffusion(prompt: string): Promise { 9 | return await nlp_stable_diffusion(prompt); 10 | } 11 | 12 | export async function logger(msg: string) { 13 | await actionLogger(msg); 14 | } 15 | -------------------------------------------------------------------------------- /src/clones.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Clones. 3 | * 4 | * This is where it all really begins. A clone is basically an NPC. It has a certain class and configuration. A personality. 5 | * And information that persists across a session, or perhaps even sessions, if it is part of a larger workflow. 6 | */ 7 | 8 | export interface ChatMessage { 9 | timestamp: Date; 10 | uuid: string; 11 | text: string; 12 | response: string; 13 | context: string; 14 | user: string; 15 | calledByUuid?: string; 16 | } 17 | 18 | export interface ChatSession { 19 | personality: Personality; 20 | user: string; 21 | uuid: string; 22 | messages: ChatMessage[]; 23 | } 24 | 25 | export interface Personality { 26 | name: string; 27 | personality: string; 28 | instructions: string[]; 29 | } 30 | -------------------------------------------------------------------------------- /src/activities/mysql.ts: -------------------------------------------------------------------------------- 1 | import { default as mysql } from 'mysql2/promise'; 2 | 3 | /** 4 | * @param {string} dbhost 5 | * @param {string} dbuser 6 | * @param {string} dbpassword 7 | * @param {string} dbname 8 | * @param {string} sql 9 | * @param {Array} parameters 10 | * @returns {Promise>} 11 | */ 12 | export async function dbquery( 13 | dbhost: string, 14 | dbuser: string, 15 | dbpassword: string, 16 | dbname: string, 17 | sql: string, 18 | parameters: Array 19 | ): Promise> { 20 | const connection = await mysql.createConnection({ 21 | host: dbhost, 22 | user: dbuser, 23 | password: dbpassword, 24 | database: dbname 25 | }); 26 | let rows: any, fields: any; 27 | [rows, fields] = await connection.execute(sql, parameters); 28 | connection.end(); 29 | return rows; 30 | } 31 | -------------------------------------------------------------------------------- /src/workflows/application/ijavascript.ts: -------------------------------------------------------------------------------- 1 | import * as ijavascript from '../../activities/application/ijavascript'; 2 | import { proxyActivities, uuid4 } from '@temporalio/workflow'; 3 | 4 | const { executeJavascriptNotebook } = proxyActivities({ 5 | startToCloseTimeout: '10 minute' 6 | }); 7 | 8 | /** 9 | * GPT-3 can use a IPython/Jupyter notebook "memetic proxy" to follow instructions while writing code to solve a problem. This is a workflow that uses the memetic proxy to solve a problem, as described passed as a string. 10 | * @param query Instructions to follow which GPT-3 will try to use a Javascript Notebook to compose a solution 11 | * @example 12 | * const result = await executeJavascriptNotebook('The number of legs a spider has multiplied by the estimated population in France'); 13 | * @returns 14 | */ 15 | export async function IJavascript(query: string): Promise { 16 | let result = executeJavascriptNotebook(query); 17 | return result; 18 | } 19 | -------------------------------------------------------------------------------- /services/embeddings/embeddings-service.py: -------------------------------------------------------------------------------- 1 | import flask 2 | import logging 3 | import json 4 | from flask import request 5 | import tensorflow_hub as hub 6 | 7 | 8 | log = logging.getLogger('werkzeug') 9 | log.setLevel(logging.ERROR) 10 | app = flask.Flask('encoder') 11 | app.debug = True 12 | 13 | @app.route('/', methods=['post']) 14 | def home(): 15 | payload = request.json # payload should be like ["asdfasdf","asdfasdf"] 16 | print(payload) 17 | embeddings = embed(payload) 18 | result = [{'vector': i.numpy().tolist(), 'string': j} 19 | for i, j in zip(embeddings, payload)] 20 | return flask.Response(json.dumps(result), mimetype='application/json') 21 | 22 | 23 | if __name__ == '__main__': 24 | #embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4") 25 | # USEv5 is about 100x faster than 4 26 | embed = hub.load( 27 | "https://tfhub.dev/google/universal-sentence-encoder-large/5") 28 | app.run(host='0.0.0.0', port=9100) 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2022, Cognosis AI, LLC 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.5" 2 | services: 3 | embeddings: 4 | container_name: cognosis-embeddings 5 | image: cognosis-embeddings 6 | ports: 7 | - 9100:9100 8 | elasticsearch: 9 | container_name: cognosis-elasticsearch 10 | environment: 11 | - cluster.routing.allocation.disk.threshold_enabled=true 12 | - cluster.routing.allocation.disk.watermark.low=512mb 13 | - cluster.routing.allocation.disk.watermark.high=256mb 14 | - cluster.routing.allocation.disk.watermark.flood_stage=128mb 15 | - discovery.type=single-node 16 | - ES_JAVA_OPTS=-Xms256m -Xmx256m 17 | - xpack.security.enabled=false 18 | image: elasticsearch:8.5.0 19 | networks: 20 | - temporal-network 21 | ports: 22 | - 9201:9201 23 | - 9200:9200 24 | - 9300:9300 25 | temporal: 26 | container_name: cognosis-temporal 27 | image: avalor/temporalite:0.2.0 28 | networks: 29 | - temporal-network 30 | ports: 31 | - 8233:8233 32 | - 7233:7233 33 | labels: 34 | kompose.volume.type: configMap 35 | volumes: 36 | - ./dynamicconfig:/etc/temporal/config/dynamicconfig 37 | 38 | networks: 39 | temporal-network: 40 | driver: bridge 41 | name: temporal-network -------------------------------------------------------------------------------- /src/workflows/elastic.ts: -------------------------------------------------------------------------------- 1 | import { proxyActivities, uuid4 } from '@temporalio/workflow'; 2 | 3 | import * as elastic from '../activities/elastic'; 4 | import * as vector_search from '../activities/vector_search'; 5 | const { es_index, es_search } = proxyActivities({startToCloseTimeout: '10 minute'}); 6 | const { es_mappings } = proxyActivities({startToCloseTimeout: '10 minute'}); 7 | 8 | /** 9 | * @function wf_esindex 10 | * @param {string} pindex 11 | * @param {any} pdocument 12 | * @description A workflow that will index a document into Elasticsearch 13 | */ 14 | export async function wf_esindex( 15 | pindex: string, 16 | pdocument: any 17 | ): Promise { 18 | await es_index(pindex, pdocument); 19 | } 20 | 21 | /** 22 | * @function wf_essearch 23 | * @param {string} index 24 | * @param {any} query 25 | * @description A workflow that will search Elasticsearch 26 | */ 27 | export async function wf_essearch(index: string, query: any): Promise { 28 | let r = await es_search(index, query); 29 | return JSON.stringify(r); 30 | } 31 | 32 | export async function esMappings(index: string, doc: any): Promise { 33 | console.log(`Creating mappings for index ${index}: ${JSON.stringify(doc)}`); 34 | await es_mappings(index, doc, 512); 35 | } 36 | -------------------------------------------------------------------------------- /src/activities/sql2llm.ts: -------------------------------------------------------------------------------- 1 | import * as wf from '../workflows'; 2 | import { Connection, WorkflowClient } from '@temporalio/client'; 3 | import { sendread, } from '../workflows'; 4 | import * as config from '../config'; 5 | import { nanoid } from 'nanoid'; 6 | 7 | export async function sql2llm_session_multiplexer( message: wf.SQL2LLMInput ): Promise< wf.SQL2LLMOutput > 8 | { 9 | const connection = await Connection.connect( {address: config.TEMPORAL_HOST} ); 10 | const client = new WorkflowClient({connection}); 11 | let wfid = `sqlllm_session_001-${Math.random()}`; 12 | let handle = client.getHandle( wfid ); 13 | try 14 | { 15 | let d = await handle.describe(); // TODO: if it's dead, we want to start a new one anyway 16 | console.log( `Workflow ${wfid} already exists: ${d.status.code} ${d.status.name}` ); 17 | if ( d.status.code != 1 ) 18 | { 19 | throw new Error("Workflow is not running. Starting a new one."); 20 | } 21 | } 22 | catch( e: any ) 23 | { 24 | console.log(`Starting wfid ${wfid}`); 25 | handle = await client.start(wf.SQL2LLM_session, { 26 | args: [message.dbname, message.fields, message.context], 27 | taskQueue: 'hello-world', 28 | workflowId: wfid, 29 | workflowRunTimeout: '10 minutes', 30 | }); 31 | } 32 | 33 | let wait = await client.start( sendread, {args: [wfid, message], taskQueue: 'hello-world', workflowId: `${wfid}-${nanoid()}`, workflowRunTimeout: '1 minute'} ); 34 | let result = await wait.result(); 35 | return JSON.parse( result ); 36 | } 37 | -------------------------------------------------------------------------------- /src/workflows/application/call-transcription.ts: -------------------------------------------------------------------------------- 1 | import { promptTemplate } from '../prompt'; 2 | 3 | /** 4 | * Takes a transcription of a call and returns information about the call in JSON 5 | * @param transcript Call transcription from call 6 | * @example 7 | * const callInfo = JSON.stringify( await getCallInfo('Caller: Hello, there!') ); 8 | * @returns JSON string with information about the call 9 | */ 10 | export async function TranscriptToStructuredData( 11 | transcript: string 12 | ): Promise { 13 | let result = 14 | '{"resolved?": "' + 15 | (await promptTemplate( 16 | `You are a call center manager tasked with reading call transcripts to describe call intent, carefully tracking follow-up action items, whether the underlying issue was resolved (and not just whether action was taken), how important the issue was (issue priority), and customer satisfaction from a transcript. 17 | These are calls for a company called The Corporation, Inc, and they will be related to doing business. Don't make anything up, just use the transcript to figure out what the user is talking about. 18 | 19 | {{{chunk}}} 20 | 21 | Define a JSON object with the following keys: 22 | 23 | [ "issue summary (string max 128 characters)", "resolved? (closed, escalated or opened)", "summary of steps either taken or needed for resolution (string)", "call summary (string max 128 characters)", "[Speaker:0] Satisfaction (1-5)", "Keywords (array)" ] 24 | 25 | {"resolved?": "`, 26 | { chunk: transcript }, 27 | 20, 28 | 500, 29 | 1.0, 30 | 'gpt-3' 31 | )); 32 | 33 | return result; 34 | } 35 | -------------------------------------------------------------------------------- /src/cli/ijs.ts: -------------------------------------------------------------------------------- 1 | import { Connection, WorkflowClient } from '@temporalio/client'; 2 | import { IJavascript } from '../workflows'; 3 | import { nanoid } from 'nanoid'; 4 | import fs from 'fs'; 5 | import { TEMPORAL_HOST } from '../config'; 6 | 7 | async function run() { 8 | // Connect to the default Server location (localhost:7233) 9 | //const connection = await Connection.connect(); 10 | const connection = await Connection.connect({ 11 | address: TEMPORAL_HOST 12 | }); 13 | 14 | // In production, pass options to configure TLS and other settings: 15 | // { 16 | // address: 'foo.bar.tmprl.cloud', 17 | // tls: {} 18 | // } 19 | 20 | const client = new WorkflowClient({ 21 | connection 22 | // namespace: 'foo.bar', // connects to 'default' namespace if not specified 23 | }); 24 | 25 | /* Take first process.argv argument and use it as the path to the embeddings file */ 26 | const handle = await client.start(IJavascript, { 27 | // type inference works! args: [name: string] 28 | args: [process.argv[2]], 29 | taskQueue: 'hello-world', 30 | // in practice, use a meaningful business id, eg customerId or transactionId 31 | workflowId: 'workflow-' + nanoid() 32 | // retry: { 33 | // initialInterval: 1, 34 | // backoffCoefficient: 2, 35 | // maximumAttempts: 1, 36 | // maximumInterval: 1, 37 | // nonRetryableErrorTypes: ['CustomError'], 38 | // }, 39 | }); 40 | 41 | let result = await handle.result(); 42 | // Only grab last line. 43 | result = result.split('\n').slice(-1)[0]; 44 | console.log(result); 45 | } 46 | 47 | run().catch((err) => { 48 | console.error(err); 49 | process.exit(1); 50 | }); 51 | -------------------------------------------------------------------------------- /src/cli/cognosis.ts: -------------------------------------------------------------------------------- 1 | import { Connection, WorkflowClient } from '@temporalio/client'; 2 | import { testSession, sendread } from '../workflows/session'; 3 | import { nanoid } from 'nanoid'; 4 | import fs from 'fs'; 5 | import { argv } from 'process'; 6 | import * as config from '../config'; 7 | 8 | async function run() { 9 | const connection = await Connection.connect( {address: config.TEMPORAL_HOST} ); 10 | const client = new WorkflowClient({ 11 | connection 12 | }); 13 | 14 | let wfid = 'workflow-chatbot-session-6'; 15 | let handle = client.getHandle( wfid ); 16 | try 17 | { 18 | let d = await handle.describe(); // TODO: if it's dead, we want to start a new one anyway 19 | console.log( `Workflow ${wfid} already exists: ${d.status.code} ${d.status.name}` ); 20 | if ( d.status.code != 1 ) 21 | { 22 | throw new Error("Workflow is not running. Starting a new one."); 23 | } 24 | } 25 | catch( e: any ) 26 | { 27 | console.log(`Starting wfid ${wfid}`); 28 | handle = await client.start(testSession, { 29 | // type inference works! args: [name: string] 30 | args: [{ts: new Date(), text: "Hello, world!", logs: []}], 31 | taskQueue: 'hello-world', 32 | // in practice, use a meaningful business id, eg customerId or transactionId 33 | workflowId: wfid, 34 | workflowRunTimeout: '10 minutes', 35 | }); 36 | } 37 | 38 | let wait = await client.start( sendread, {args: [wfid, {text: argv[2] ?? "Hey, how are you?", ts: new Date(), logs: []}],taskQueue: 'hello-world', workflowId: `${wfid}-${nanoid()}`} ); 39 | console.log( await wait.result() ); 40 | } 41 | 42 | run().catch((err) => { 43 | console.error(err); 44 | process.exit(1); 45 | }); 46 | -------------------------------------------------------------------------------- /src/worker.ts: -------------------------------------------------------------------------------- 1 | import { Worker, NativeConnection } from '@temporalio/worker'; 2 | import * as activities from './activities'; 3 | import express from 'express'; 4 | 5 | import { TEMPORAL_HOST } from './config'; 6 | // TODO: #2 #1 configuration system 7 | async function run() { 8 | const app = express(); 9 | 10 | /* 11 | const port = process.env.PORT || 3000; 12 | 13 | app.get("/", (req, res) => { 14 | res.send("We're awake."); 15 | }); 16 | app.get("/health", (req, res) => { 17 | res.send("healthy"); 18 | }); 19 | app.listen(port, () => { 20 | console.log(`App listening on port: ${port}`); 21 | }) 22 | 23 | // Sleep for a second 24 | await new Promise((resolve) => setTimeout(resolve, 1000)); 25 | */ 26 | 27 | console.log( `Connecting to Temporal ${TEMPORAL_HOST}` ); 28 | 29 | const connection = await NativeConnection.connect({ 30 | address: TEMPORAL_HOST // defaults port to 7233 if not specified 31 | }); 32 | 33 | const worker = await Worker.create({ 34 | connection: connection, 35 | workflowsPath: require.resolve('./workflows'), 36 | activities, 37 | taskQueue: 'hello-world', 38 | debugMode: true 39 | }); 40 | // Worker connects to localhost by default and uses console.error for logging. 41 | // Customize the Worker by passing more options to create(): 42 | // https://typescript.temporal.io/api/classes/worker.Worker 43 | // If you need to configure server connection parameters, see docs: 44 | // https://docs.temporal.io/typescript/security#encryption-in-transit-with-mtls 45 | 46 | // Step 2: Start accepting tasks on the `hello-world` queue 47 | await worker.run(); 48 | } 49 | 50 | run().catch((err) => { 51 | console.error(err); 52 | process.exit(1); 53 | }); 54 | -------------------------------------------------------------------------------- /src/workflows/application/codegrunt.ts: -------------------------------------------------------------------------------- 1 | import * as wf from '@temporalio/workflow'; 2 | import { proxyActivities, uuid4 } from '@temporalio/workflow'; 3 | import * as activities from '../../activities'; 4 | import * as session_wfs from '../session'; 5 | import * as workflows from '../../workflows'; 6 | import * as codegrunt from '../../codegrunt'; 7 | 8 | const {extractFunctions} = proxyActivities({startToCloseTimeout: '10 minute' }); 9 | // Permanent errors are errors that are not expected to be resolved by retrying the workflow. 10 | // For example, a permanent error could be thrown if the workflow is unable to connect to the 11 | // Temporal server or if the workflow is unable to parse the code. 12 | 13 | export async function extractFunctions_wf( code: string, filename: string ): Promise< any[] > { 14 | let functions = await extractFunctions( code, filename ); 15 | return functions.map( (f: any) => { return {name: f.name, signature: f.signature, leadingComment: f.leadingComment}; } ); 16 | } 17 | 18 | export async function codegrunt_session( first_message: session_wfs.Frame ) 19 | { 20 | let session = new session_wfs.HumanInTheLoopSession< codegrunt.CodegruntFrame >(); 21 | session.init(); 22 | 23 | // Start the session 24 | session.addMessage( {... first_message} ); 25 | session.log( "Session started" ); 26 | 27 | while( true ) 28 | { 29 | let input = await session.getInput( session ); 30 | 31 | session.addMessage({text: input, ts: new Date(), logs: []}); 32 | session.log( "User input: " + input ); 33 | let response = await workflows.promptTemplate( 34 | `User: {{{input}}} 35 | Response:`, { input: input }, 10, 512 ); 36 | session.log( "Response: " + response ); 37 | session.send( response ); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/cli/recursively-summarise.ts: -------------------------------------------------------------------------------- 1 | import { Connection, WorkflowClient } from '@temporalio/client'; 2 | import { 3 | mapPromptTemplate, 4 | mapreduce_summary, 5 | TranscriptToStructuredData 6 | } from '../workflows'; 7 | import { nanoid } from 'nanoid'; 8 | import fs from 'fs'; 9 | 10 | async function run() { 11 | // Connect to the default Server location (localhost:7233) 12 | const connection = await Connection.connect(); 13 | // In production, pass options to configure TLS and other settings: 14 | // { 15 | // address: 'foo.bar.tmprl.cloud', 16 | // tls: {} 17 | // } 18 | 19 | const client = new WorkflowClient({ 20 | connection 21 | // namespace: 'foo.bar', // connects to 'default' namespace if not specified 22 | }); 23 | 24 | /* Take first process.argv argument and use it as the path to the embeddings file */ 25 | const path = process.argv[2]; 26 | /* Take second process.argv argument and use it as the index name */ 27 | const index = process.argv[3]; 28 | 29 | console.log(`Indexing ${path} to ${index}`); 30 | /* Open file, split into lines */ 31 | const lines = (await fs.promises.readFile(path)).toString(); 32 | 33 | let prompt = `The following code is part of a TypeScript project that uses Temporal for workflow orchestration. It is the worker code. 34 | 35 | {{{chunk}}} 36 | 37 | Convert this code to Go: 38 | `; 39 | const handle = await client.start(mapPromptTemplate, { 40 | // type inference works! args: [name: string] 41 | args: [lines, prompt], 42 | taskQueue: 'hello-world', 43 | // in practice, use a meaningful business id, eg customerId or transactionId 44 | workflowId: 'workflow-' + nanoid() 45 | }); 46 | let result = await handle.result(); 47 | console.log(result); 48 | // Save results to a file 49 | await fs.promises.writeFile(`${path}-out`, result); 50 | } 51 | 52 | run().catch((err) => { 53 | console.error(err); 54 | process.exit(1); 55 | }); 56 | -------------------------------------------------------------------------------- /src/cli/client.ts: -------------------------------------------------------------------------------- 1 | import { Connection, WorkflowClient } from '@temporalio/client'; 2 | import { esMappings, storeEmbeddings } from '../workflows'; 3 | import { nanoid } from 'nanoid'; 4 | import fs from 'fs'; 5 | import * as config from '../config'; 6 | 7 | 8 | async function run() { 9 | // Connect to the default Server location (localhost:7233) 10 | const connection = await Connection.connect( {address: config.TEMPORAL_HOST} ); 11 | // In production, pass options to configure TLS and other settings: 12 | // { 13 | // address: 'foo.bar.tmprl.cloud', 14 | // tls: {} 15 | // } 16 | 17 | const client = new WorkflowClient({ 18 | connection 19 | // namespace: 'foo.bar', // connects to 'default' namespace if not specified 20 | }); 21 | 22 | /* Take first process.argv argument and use it as the path to the embeddings file */ 23 | const path = process.argv[2]; 24 | /* Take second process.argv argument and use it as the index name */ 25 | const index = process.argv[3]; 26 | 27 | console.log(`Indexing ${path} to ${index}`); 28 | /* Open file, split into lines */ 29 | const lines = (await fs.promises.readFile(path)).toString().split('\n'); 30 | 31 | let results = new Array>(); 32 | 33 | let all_lines: string[] = []; 34 | let all_docs: any[] = []; 35 | // For each line, generate a workflow to store the embedding 36 | for (let i = 0; i < lines.length; i++) { 37 | all_lines.push(lines[i]); 38 | all_docs.push({ path: path, line: i + 1, text: lines[i] }); 39 | } 40 | 41 | const handle = await client.start(storeEmbeddings, { 42 | // type inference works! args: [name: string] 43 | args: [all_lines, index, all_docs], 44 | taskQueue: 'hello-world', 45 | // in practice, use a meaningful business id, eg customerId or transactionId 46 | workflowId: 'workflow-' + nanoid() 47 | }); 48 | 49 | results.push(handle.result()); 50 | await Promise.all(results); 51 | console.log(`Indexed ${lines.length} lines`); 52 | } 53 | 54 | run().catch((err) => { 55 | console.error(err); 56 | process.exit(1); 57 | }); 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | local 3 | log 4 | logs 5 | *.log 6 | npm-debug.log* 7 | yarn-debug.log* 8 | yarn-error.log* 9 | lerna-debug.log* 10 | 11 | # Diagnostic reports (https://nodejs.org/api/report.html) 12 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 13 | 14 | # Runtime data 15 | pids 16 | *.pid 17 | *.seed 18 | *.pid.lock 19 | 20 | # Directory for instrumented libs generated by jscoverage/JSCover 21 | lib-cov 22 | 23 | # Coverage directory used by tools like istanbul 24 | coverage 25 | *.lcov 26 | 27 | # nyc test coverage 28 | .nyc_output 29 | 30 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 31 | .grunt 32 | 33 | # Bower dependency directory (https://bower.io/) 34 | bower_components 35 | 36 | # node-waf configuration 37 | .lock-wscript 38 | 39 | # Compiled binary addons (https://nodejs.org/api/addons.html) 40 | build/Release 41 | 42 | # Dependency directories 43 | node_modules/ 44 | jspm_packages/ 45 | 46 | # TypeScript v1 declaration files 47 | typings/ 48 | 49 | # TypeScript cache 50 | *.tsbuildinfo 51 | 52 | # Optional npm cache directory 53 | .npm 54 | 55 | # Optional eslint cache 56 | .eslintcache 57 | 58 | # Microbundle cache 59 | .rpt2_cache/ 60 | .rts2_cache_cjs/ 61 | .rts2_cache_es/ 62 | .rts2_cache_umd/ 63 | 64 | # Optional REPL history 65 | .node_repl_history 66 | 67 | # Output of 'npm pack' 68 | *.tgz 69 | 70 | # Yarn Integrity file 71 | .yarn-integrity 72 | 73 | # dotenv environment variables file 74 | .env 75 | .env.test 76 | 77 | # parcel-bundler cache (https://parceljs.org/) 78 | .cache 79 | 80 | # Next.js build output 81 | .next 82 | 83 | # Nuxt.js build / generate output 84 | .nuxt 85 | dist 86 | 87 | # Gatsby files 88 | .cache/ 89 | # Comment in the public line in if your project uses Gatsby and *not* Next.js 90 | # https://nextjs.org/blog/next-9-1#public-directory-support 91 | # public 92 | 93 | # vuepress build output 94 | .vuepress/dist 95 | 96 | # Serverless directories 97 | .serverless/ 98 | 99 | # FuseBox cache 100 | .fusebox/ 101 | 102 | # DynamoDB Local files 103 | .dynamodb/ 104 | 105 | # TernJS port file 106 | .tern-port 107 | 108 | docs/ 109 | -------------------------------------------------------------------------------- /src/cogs/cog.ts: -------------------------------------------------------------------------------- 1 | import { ChatSession } from '../clones'; 2 | 3 | /* 4 | Example: 5 | "text": "nilp, do an nmap on 38.101.40.11 | !kali_sh:{\"command\": \"nmap 38.101.40.11\"}", 6 | "user": "U03UF3XNM8D", 7 | "prompt": "nilp, do an nmap on 38.101.40.11", 8 | "completion": "!kali_sh:{\"command\": \"nmap 38.101.40.11\"}\n2e72b3", 9 | "prompt_leading": "!kali_sh:{\"command\": \"nmap 38.101.40.11\"}" 10 | */ 11 | export interface Teaching { 12 | text: string; 13 | user?: string; 14 | prompt: string; 15 | completion: string; 16 | prompt_leading: string; 17 | } 18 | 19 | export type CogHandler = ( 20 | session: ChatSession, 21 | args: any, 22 | message_sans_args: string 23 | ) => Promise; 24 | 25 | export class Cog { 26 | protected _id: string; 27 | protected teachings: Teaching[]; 28 | protected coghandler: CogHandler; 29 | 30 | public constructor( 31 | id: string, 32 | teachings: Teaching[], 33 | coghandler: CogHandler 34 | ) { 35 | this._id = id; 36 | this.teachings = teachings; 37 | this.coghandler = coghandler; 38 | } 39 | 40 | public getTeachings(): Teaching[] { 41 | return this.teachings; 42 | } 43 | 44 | get id(): string { 45 | return this._id; 46 | } 47 | 48 | public async handle( 49 | session: ChatSession, 50 | args: any, 51 | message_sans_args: string 52 | ): Promise { 53 | return await this.coghandler(session, args, message_sans_args); 54 | } 55 | } 56 | 57 | export class Cogset { 58 | protected cogs: Cog[]; 59 | protected cogmap: Map; 60 | 61 | public constructor(cogs: Cog[]) { 62 | this.cogs = cogs; 63 | this.cogmap = new Map(); 64 | for (let cog of cogs) { 65 | this.cogmap.set(cog.id, cog); 66 | } 67 | } 68 | 69 | public getCogs(): Cog[] { 70 | return this.cogs; 71 | } 72 | 73 | public getCog(id: string): Cog | undefined { 74 | return this.cogmap.get(id); 75 | } 76 | 77 | public addCog(cog: Cog): void { 78 | this.cogs.push(cog); 79 | this.cogmap.set(cog.id, cog); 80 | } 81 | 82 | public removeCog(cog: Cog): void { 83 | this.cogs = this.cogs.filter((c) => c.id !== cog.id); 84 | this.cogmap.delete(cog.id); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/workflows/embeddings.ts: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import * as elastic from '../activities/elastic'; 3 | import * as pembeddings from '../activities/embeddings'; 4 | import * as tokenizer from '../activities/tokenizer'; 5 | import * as util from '../activities/util'; 6 | import { proxyActivities, uuid4 } from '@temporalio/workflow'; 7 | 8 | const { 9 | embeddingsSearch, 10 | nlp_embeddings 11 | } = proxyActivities({ startToCloseTimeout: '10 minute' }); 12 | const { nlpcloud_tokenize, tokenize_native } = proxyActivities< 13 | typeof tokenizer 14 | >({ startToCloseTimeout: '10 minute' }); 15 | const { es_index, es_drop } = proxyActivities({ 16 | startToCloseTimeout: '10 minute' 17 | }); 18 | const { nlp_stable_diffusion } = proxyActivities({ 19 | startToCloseTimeout: '10 minute' 20 | }); 21 | 22 | export async function embeddings( 23 | sentences: string[] 24 | ): Promise<[string, number[]][]> { 25 | return nlp_embeddings('paraphrase-multilingual-mpnet-base-v2', sentences); 26 | } 27 | 28 | export async function storeEmbeddings( 29 | sentences: string[], 30 | index: string, 31 | documents: any[], 32 | alsoTokenize: boolean = false 33 | ): Promise { 34 | console.log(`Storing ${sentences.length} sentences.`); 35 | if (documents.length != sentences.length) { 36 | throw Error( 37 | 'Error storing embeddings: sentences and documents are of different lengths' 38 | ); 39 | } 40 | 41 | let e = await embeddings(sentences); 42 | 43 | for (let x = 0; x < e.length; x++) { 44 | let doc = documents[x]; 45 | console.log(doc); 46 | doc['embeddings'] = e[x][1]; 47 | 48 | if (alsoTokenize == true) { 49 | doc['tokens'] = await tokenize_native(sentences[x]); 50 | } 51 | 52 | console.log(doc); 53 | let doRefresh = false; 54 | if (x == e.length - 1) doRefresh = true; 55 | await es_index(index, doc, doRefresh); 56 | } 57 | 58 | return `Status: OK. Stored ${sentences.length} embeddings.`; 59 | } 60 | 61 | export async function embeddingsFromTextSearch< T >( 62 | index: string, 63 | text: string, 64 | k: number 65 | ): Promise< {_index: string; _id: string; _score: number; _source: T} []> { 66 | let v = await embeddings([text]); 67 | let vector = v[0][1]; 68 | let results = await embeddingsSearch(index, vector, k); 69 | return( results ); 70 | } 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cognosis AI Platform 2 | 3 | ## Summary 4 | 5 | The Cognosis AI Platform contains an application server and all of the 6 | infrastructure you need to build Large Language Model applications with, 7 | batteries included! 8 | 9 | ## 10 | 11 | [Join our Slack!](https://slack.com/get-started?sig=enQtNDczNzkwODAxODM4NC1iMzAzNTA4MmE1Zjk1NDQyMTk5ZmZhMjFiMzMyMzJjNGMxZGUyMDBjYjA4YjcxYjJjOTRmNjRlNmY4MzU2OTU0&invite_id=I04MPSQ0JBA&redir=%2Fshare%2FenQtNDczNzkwODAxODM4NC1iMzAzNTA4MmE1Zjk1NDQyMTk5ZmZhMjFiMzMyMzJjNGMxZGUyMDBjYjA4YjcxYjJjOTRmNjRlNmY4MzU2OTU0#/sharedinvite) 12 | 13 | ## Quickstart 14 | 15 | Requirements: 16 | 17 | * Docker 18 | * Node 19 19 | 20 | ### Step 1: Get Infrastructure Running 21 | 22 | ``` 23 | # Clone the repo and cd into it 24 | git clone git@github.com:cognosisai/platform.git 25 | cd platform 26 | 27 | # On Apple M1/M2 chips: 28 | make build-apple 29 | # On x86: 30 | make build-x86 31 | 32 | # Spin it all up! This runs docker-compose up, and will get you 33 | # Elasticsearch 8.5, cognosis-embeddings service, and temporalite, which 34 | # is a single Docker container version of Temporal meant for development 35 | make run 36 | ``` 37 | 38 | ### Step 2: Configure 39 | Edit .env, and populate it with the following information: 40 | 41 | ``` 42 | TEMPORAL_HOST="localhost:7233" 43 | ELASTIC_CONFIG={"auth":{"username":"elastic","password":"changeme"},"node":"http://localhost:9200"} 44 | OPENAI_TOKEN="" 45 | NLPCLOUD_TOKEN="" 46 | EMBEDDINGS_URL="http://localhost:9100" 47 | ``` 48 | 49 | ### Step 3: Install NPM modules 50 | 51 | ``` 52 | npm install 53 | ``` 54 | 55 | ## Step 4: Start Temporal Worker 56 | 57 | ``` 58 | ts-node src/worker.ts 59 | ``` 60 | 61 | This will take a few seconds. It will be ready when you see this: 62 | 63 | ``` 64 | 2022-11-05T23:51:17.087Z [INFO] webpack 5.74.0 compiled successfully in 717 ms 65 | 2022-11-05T23:51:17.093Z [INFO] Workflow bundle created { size: '0.91MB' } 66 | 2022-11-05T23:51:17.141Z [INFO] Worker state changed { state: 'RUNNING' } 67 | ``` 68 | 69 | ## Step 5: AI! 70 | 71 | ``` 72 | ts-node src/cli/cognosis.ts "Give me a really terrible idea involving an \ 73 | icepick, and bottle of elmer's wood glue" 74 | Starting wfid workflow-fC-ONstofj4T4G9C_JQv5 75 | 76 | Use the icepick to make a hole in the bottle of glue, then drink it. 77 | ``` 78 | 79 | ### Application Server Components 80 | 81 | #### Elastic Search 82 | #### Temporal 83 | #### Embeddings (Tensorflow - Google USE5) 84 | #### Cognosis AI SDK 85 | 86 | ### Cognosis AI SDK 87 | 88 | Cognosis AI Platform includes 89 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@cognosis/platform", 3 | "repository": { 4 | "type": "git", 5 | "url": "git+https://github.com/cognosisai/platform.git" 6 | }, 7 | "version": "0.6.1", 8 | "private": false, 9 | "scripts": { 10 | "build": "tsc --build", 11 | "build.watch": "tsc --build --watch", 12 | "prettier-format": "prettier --config .prettierrc 'src/**/*.ts' --write", 13 | "lint": "eslint .", 14 | "start": "ts-node src/worker.ts", 15 | "start.watch": "nodemon src/worker.ts", 16 | "workflow": "ts-node src/client.ts", 17 | "test": "jest" 18 | }, 19 | "nodemonConfig": { 20 | "execMap": { 21 | "ts": "ts-node" 22 | }, 23 | "ext": "ts", 24 | "watch": [ 25 | "src" 26 | ] 27 | }, 28 | "dependencies": { 29 | "@babel/core": "^7.19.6", 30 | "@babel/parser": "^7.19.6", 31 | "@babel/standalone": "^7.19.6", 32 | "@elastic/elasticsearch": "^8.2.1", 33 | "@ngrok/ngrok-api": "^0.9.0", 34 | "@slack/bolt": "^3.12.1", 35 | "@temporalio/activity": "^1.0.0", 36 | "@temporalio/client": "^1.0.0", 37 | "@temporalio/worker": "^1.0.0", 38 | "@temporalio/workflow": "^1.0.0", 39 | "@tensorflow-models/universal-sentence-encoder": "^1.3.3", 40 | "@tensorflow/tfjs": "^3.19.0", 41 | "@types/mustache": "^4.2.1", 42 | "@types/slack-node": "^0.1.4", 43 | "axios": "^0.27.2", 44 | "cli-table": "^0.3.11", 45 | "cli-table3": "^0.6.3", 46 | "csv-parse": "^5.3.3", 47 | "dotenv": "^16.0.3", 48 | "express": "^4.18.1", 49 | "google-auth-library": "^8.5.2", 50 | "gpt-3-encoder": "^1.1.4", 51 | "handlebars": "^4.7.7", 52 | "json2csv": "^5.0.7", 53 | "json5": "^2.2.1", 54 | "mustache": "^4.2.0", 55 | "mysql2": "^2.3.3", 56 | "mysql2-promise": "^0.1.4", 57 | "nanoid": "3.x", 58 | "nargs": "^0.0.2", 59 | "natural": "^5.2.3", 60 | "notion-client": "^6.13.4", 61 | "openai": "^3.0.0", 62 | "semaphore-async-await": "^1.5.1" 63 | }, 64 | "devDependencies": { 65 | "@babel/generator": "^7.19.6", 66 | "@tsconfig/node14": "^1.0.0", 67 | "@types/babel__core": "^7.1.19", 68 | "@types/cheerio": "^0.22.31", 69 | "@types/crawler": "^1.2.2", 70 | "@types/jest": "^29.4.0", 71 | "@types/node": "^16.11.43", 72 | "@typescript-eslint/eslint-plugin": "^5.0.0", 73 | "@typescript-eslint/parser": "^5.0.0", 74 | "eslint": "^7.32.0", 75 | "eslint-config-prettier": "^8.3.0", 76 | "eslint-plugin-deprecation": "^1.2.1", 77 | "nodemon": "^2.0.12", 78 | "prettier": "^2.7.1", 79 | "ts-jest": "^29.0.5", 80 | "ts-node": "^10.8.1", 81 | "typescript": "^4.4.2" 82 | }, 83 | "main": "dist/index.js", 84 | "types": "dist/index.d.ts", 85 | "files": [ 86 | "dist" 87 | ] 88 | } 89 | -------------------------------------------------------------------------------- /src/cli/sql2llm.ts: -------------------------------------------------------------------------------- 1 | import { Connection, WorkflowClient } from '@temporalio/client'; 2 | import { SQL2LLM } from '../workflows'; 3 | import { nanoid } from 'nanoid'; 4 | import fs from 'fs'; 5 | import { argv } from 'process'; 6 | import * as config from '../config'; 7 | import cli_table from 'cli-table3'; 8 | 9 | const yargs = require('yargs'); 10 | 11 | const options = yargs 12 | .usage('Usage: sql2llm -d [-n] -q [ -f ] [-j] [-c] [-t ]') 13 | .option('d', { 14 | alias: 'database-name', 15 | description: 'Name of the database to query', 16 | type: 'string', 17 | }) 18 | .option('n', { 19 | alias: 'natural-language-request', 20 | description: 'Option to process as natural language into a nSQL Natural Language Query', 21 | type: 'boolean' 22 | }) 23 | .option('q', { 24 | alias: 'query', 25 | description: 'SQL query to run', 26 | type: 'string', 27 | demandOption: true 28 | }) 29 | .option('f', { 30 | alias: 'file', 31 | description: 'Option to include as data to query in context', 32 | type: 'string' 33 | }) 34 | .option('j', { 35 | alias: 'json', 36 | description: 'Return results as JSON instead of displaying with cli-table3', 37 | type: 'boolean' 38 | }) 39 | .option('c', { 40 | alias: 'csv', 41 | description: 'Return results as CSV instead of displaying with cli-table3', 42 | type: 'boolean' 43 | }) 44 | .option('t', { 45 | alias: 'timeout', 46 | description: 'Timeout for workflow', 47 | type: 'string' 48 | }) 49 | .argv; 50 | 51 | async function run() { 52 | const connection = await Connection.connect( {address: config.TEMPORAL_HOST} ); 53 | const client = new WorkflowClient({ 54 | connection 55 | }); 56 | 57 | if ( true ) { 58 | // load file from disk in argv0 59 | let text = null; 60 | if ( options.file ) 61 | { 62 | if (options.file === '-') { 63 | text = fs.readFileSync(0).toString(); 64 | } else { 65 | const file = fs.readFileSync( options.file ); 66 | text = file.toString(); 67 | } 68 | } 69 | 70 | let handle = await client.start(SQL2LLM, { 71 | // type inference works! args: [name: string] 72 | args: [ options.databaseName, options.query, text, options['natural-language-request'] ], 73 | taskQueue: 'hello-world', 74 | // in practice, use a meaningful business id, eg customerId or transactionId 75 | workflowId: nanoid(), 76 | workflowRunTimeout: options.timeout || '30 seconds', 77 | }); 78 | 79 | let output = await handle.result(); 80 | let result = output.result; 81 | if ( result.length == 0 ) 82 | { 83 | console.log( "No results." ); 84 | process.exit( 0 ); 85 | } 86 | 87 | if (options.json) { 88 | console.log(JSON.stringify(result, null, 2)); 89 | } else if (options.csv) { 90 | const json2csv = require('json2csv'); 91 | 92 | const fields = Object.keys( result[0] ); 93 | const opts = { fields }; 94 | try { 95 | const csv = json2csv.parse(result, opts); 96 | console.log(csv); 97 | } catch (err) { 98 | console.error(err); 99 | } 100 | } else { 101 | let keys = Object.keys( result[0] ); 102 | var table = new cli_table( {head: keys} ); 103 | 104 | result.forEach( (v, i, a) => { 105 | table.push( Object.values(v) ); 106 | }); 107 | 108 | if ( output.nSQL_query ) 109 | console.log( `Query: ${output.nSQL_query}` ); 110 | console.log( table.toString() ); 111 | console.log( `${result.length} rows with ${keys.length} columns returned.` ); 112 | } 113 | } 114 | 115 | 116 | } 117 | 118 | run().catch((err) => { 119 | console.error(err); 120 | process.exit(1); 121 | }); -------------------------------------------------------------------------------- /src/workflows/mysql.ts: -------------------------------------------------------------------------------- 1 | import * as mysql from '../activities/mysql'; 2 | import * as prompt from './prompt'; 3 | 4 | import { proxyActivities, uuid4 } from '@temporalio/workflow'; 5 | 6 | const { dbquery } = proxyActivities({ 7 | startToCloseTimeout: '10 minute' 8 | }); 9 | 10 | const { 11 | promptReducer, 12 | promptTemplate, 13 | splitPromptTemplateByLinesOfTokens, 14 | splitPromptTemplateByTokens 15 | } = proxyActivities({ startToCloseTimeout: '10 minute' }); 16 | 17 | /** 18 | * @function mysqlQuery 19 | * @param {string} dbhost 20 | * @param {string} dbuser 21 | * @param {string} dbpassword 22 | * @param {string} dbname 23 | * @param {string} sql 24 | * @param {Array} parameters 25 | * @description A workflow that simply calls an activity 26 | */ 27 | export async function mysqlQuery( 28 | dbhost: string, 29 | dbuser: string, 30 | dbpassword: string, 31 | dbname: string, 32 | sql: string, 33 | parameters: Array 34 | ): Promise> { 35 | return await dbquery(dbhost, dbuser, dbpassword, dbname, sql, parameters); 36 | } 37 | 38 | /** 39 | * @function xNLPonDB 40 | * @param {string} query - The natural language query to parse. 41 | * @returns {Promise} - The results of the SQL query. 42 | * @description Takes a natural language query and translates it into SQL. 43 | */ 44 | export async function xNLPonDB( 45 | host: string, 46 | username: string, 47 | password: string, 48 | dbname: string, 49 | query: string 50 | ): Promise { 51 | /* We start out by getting the table schema. */ 52 | /* TODO: This should be cached. */ 53 | interface tableschema { 54 | TABLE_NAME: string; 55 | COLUMN_NAME: string; 56 | } 57 | 58 | let results: tableschema[] = await mysqlQuery( 59 | host, 60 | username, 61 | password, 62 | dbname, 63 | 'SELECT TABLE_NAME, COLUMN_NAME FROM information_schema.columns WHERE TABLE_SCHEMA = ?', 64 | [dbname] 65 | ); 66 | let tables: Map = new Map(); 67 | 68 | /* Next, we come up with a list of tables, and we ask it to identify the tables that it needs the schema for this query. */ 69 | results.map((table_column) => { 70 | tables.set(table_column.TABLE_NAME, true); 71 | }); 72 | 73 | let tablenames = Array.from(tables.keys()); 74 | let tablesColumns = new Map>(); 75 | 76 | /* Now, we get the schema for each table. */ 77 | for (let table of tablenames) { 78 | tablesColumns.set(table, new Array()); 79 | } 80 | 81 | results.map((table_column) => { 82 | tablesColumns.get(table_column.TABLE_NAME)!.push(table_column.COLUMN_NAME); 83 | }); 84 | 85 | let prompt = `SHOW TABLES; 86 | {{#tablenames}} 87 | {{{.}}} 88 | {{/tablenames}} 89 | 90 | For the natural language query "{{{query}}}", generate a comma-separated list of tables should we search, only from the above tables:`; 91 | let r = await promptTemplate( 92 | prompt, 93 | { tablenames: tablenames, query: query }, 94 | 10, 95 | 50, 96 | 1.0 97 | ); 98 | let relevant_tables = r.split(/\s?,/); 99 | prompt = 'Here are the tables you have available:'; 100 | for (let table of relevant_tables) { 101 | table = table.trim(); 102 | table = table.replace(/^\n+/, '').replace(/\n+$/, '').toLowerCase(); 103 | if (tablesColumns.get(table)) { 104 | prompt += `CREATE TABLE ${table} (`; 105 | prompt += tablesColumns.get(table)!.join(','); 106 | prompt += ');\n'; 107 | } 108 | } 109 | prompt += 110 | '\n/* Using only the above tables, a single SQL query for "{{{query}}}": */ SELECT'; 111 | r = 112 | 'SELECT ' + (await promptTemplate(prompt, { query: query }, 10, 300, 1.0)); 113 | 114 | let result2 = await mysqlQuery(host, username, password, dbname, r, []); 115 | return r + '\n\n' + JSON.stringify(result2); 116 | } 117 | 118 | export async function NLPonDB(query: string): Promise { 119 | try { 120 | return await xNLPonDB(query, 'xx', 'yy', 'zz', '11'); 121 | } catch (e: any) { 122 | return e.toString(); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/activities/embeddings.ts: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import { Connection, WorkflowClient } from '@temporalio/client'; 3 | import { nanoid } from 'nanoid'; 4 | import { spawn } from 'child_process'; 5 | import fs from 'fs'; 6 | import { 7 | es_drop, 8 | } from './elastic'; 9 | 10 | import { EMBEDDINGS_URL } from '../config'; 11 | import { embeddings_search } from './vector_search'; 12 | 13 | /** 14 | * @function nlp_embeddings_internal Internal function that calls the embeddings service 15 | * @param {string} modelName 16 | * @param {string} token 17 | * @param {string[]} texts 18 | */ 19 | export async function nlp_embeddings_internal( 20 | modelName: string, 21 | token: string, 22 | texts: string[] 23 | ): Promise> { 24 | try { 25 | const response = await axios.post(EMBEDDINGS_URL, JSON.stringify(texts), { 26 | headers: { 27 | 'Content-Type': 'application/json' 28 | } 29 | }); 30 | 31 | let r = new Map(); 32 | 33 | let obj = response.data; 34 | if (obj.length != texts.length) { 35 | throw new Error( 36 | `Embeddings generation error: texts were ${texts.length} long, while there were ${obj.length} embeddings returned` 37 | ); 38 | } 39 | 40 | for (let x = 0; x < obj.length; x++) { 41 | r.set(texts[x], obj[x]['vector']); 42 | } 43 | 44 | return convertVectorMapToObject(r); 45 | } catch (e: any) { 46 | console.error(e.toString()); 47 | throw new Error(`Embeddings generation error: ${e.toString()}`); 48 | } 49 | } 50 | 51 | // This might be the stupidest function I have ever written in my life. For this, I am deeply ashamed. 52 | /** 53 | * @function convertVectorMapToObject Converts a Map to an object 54 | * @param map Map to convert 55 | * @returns {object} Object with keys as strings and values as arrays of numbers 56 | */ 57 | export const convertVectorMapToObject = (map: Map): any => { 58 | let json: any = {}; 59 | map.forEach((value: number[], index: string) => { 60 | json[index] = value; 61 | }); 62 | return json; 63 | }; 64 | 65 | /** 66 | * @function nlp_embeddings Generates embeddings for a list of texts 67 | * @param {string} modelName Name of the model to use 68 | * @param {string[]} texts List of texts to generate embeddings for 69 | * @example Generates embeddings for a list of texts 70 | * await nlp_embeddings('bert-base-uncased', ['hello world', 'goodbye world']); 71 | * @returns {Promise>} 72 | * @description Generates embeddings for a list of texts running as a Google Cloud Platform service in Vertex AI. 73 | */ 74 | export async function nlp_embeddings( 75 | modelName: string, 76 | texts: string[] 77 | ): Promise<[string, number[]][]> { 78 | let token; 79 | 80 | let retval: [string, number[]][] = []; 81 | try { 82 | const response = await axios.post( 83 | EMBEDDINGS_URL, 84 | JSON.stringify( texts ), 85 | { 86 | headers: { 87 | 'Content-Type': 'application/json' 88 | } 89 | } 90 | ); 91 | 92 | let r = new Map(); 93 | let obj = response.data; 94 | if (obj.length != texts.length) { 95 | throw new Error( 96 | `Embeddings generation error: texts were ${texts.length} long, while there were ${obj.length} embeddings returned` 97 | ); 98 | } 99 | 100 | for (let x = 0; x < obj.length; x++) { 101 | retval.push([texts[x], obj[x]["vector"]]); 102 | } 103 | 104 | return retval; 105 | } catch (e: any) { 106 | console.error(e.toString()); 107 | throw new Error( 108 | `Nu Embeddings generation error: ${e.toString()}\n${e.message}` 109 | ); 110 | } 111 | } 112 | 113 | export async function embeddingsSearch( 114 | index: string, 115 | vector: number[], 116 | k: number 117 | ): Promise { 118 | try { 119 | return embeddings_search(index, vector, k); 120 | } catch (e: any) { 121 | return [{ error: e.message }]; 122 | } 123 | } 124 | 125 | export async function embeddingsDrop(index: string): Promise { 126 | await es_drop(index); 127 | } 128 | -------------------------------------------------------------------------------- /src/activities/tokenizer.ts: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | 3 | export interface NLPCloudToken { 4 | text: string; 5 | lemma: string; 6 | start: number; 7 | end: number; 8 | ws_after: boolean; 9 | } 10 | 11 | async function nlpcloud_tokenize_x( 12 | text: string, 13 | token: string 14 | ): Promise { 15 | const response = await axios.post( 16 | `https://api.nlpcloud.io/v1/en_core_web_lg/tokens`, 17 | { 18 | text: text 19 | }, 20 | { 21 | headers: { 22 | Authorization: `Token ${token}`, 23 | 'Content-Type': 'application/json' 24 | } 25 | } 26 | ); 27 | return response.data.tokens; 28 | } 29 | 30 | export async function nlpcloud_tokenize( 31 | text: string, 32 | token: string 33 | ): Promise { 34 | try { 35 | let x = await nlpcloud_tokenize_x(text, token); 36 | return x; 37 | } catch (e: unknown) { 38 | /* 39 | response: { 40 | status: 413, 41 | statusText: 'Payload Too Large', 42 | headers: [Object], 43 | config: [Object], 44 | request: [ClientRequest], 45 | data: 'Request Entity Too Large' 46 | } 47 | 48 | In this case, we want to split the problem up unto halves, and retry them all, and assemble the results. */ 49 | if (e instanceof Error && (e as any).response?.status === 413) { 50 | let half = Math.floor(text.length / 2); 51 | console.log('Cut in half.'); 52 | let left = await nlpcloud_tokenize(text.substr(0, half), token); 53 | let right = await nlpcloud_tokenize(text.substr(half + 1), token); 54 | 55 | return [...left, ...right]; 56 | } 57 | 58 | throw e; // rethrow the error for now... we'll see how this goes! :) 59 | } 60 | } 61 | 62 | /** 63 | * Convert string to list of tokens. This is used by the other LLM activities, largely by the 64 | * data/prompt split/map/reduce activities. You can use it directly if you want, but it's probably 65 | * best to use higher-level activities and/or workflows. 66 | * 67 | * @param text Text to use as prompt (input) 68 | * @returns Array of tokens 69 | */ 70 | export async function tokenize_native(text: string): Promise { 71 | var natural = require('natural'); 72 | let tokenizer = new natural.TreebankWordTokenizer(); 73 | return tokenizer.tokenize(text); 74 | } 75 | 76 | export async function gpt3_tokenize(text: string): Promise { 77 | let tokenizer = require('gpt-3-encoder'); 78 | let encoded_tokens: number[] = tokenizer.encode(text); 79 | return encoded_tokens; 80 | } 81 | 82 | export async function gpt3_detokenize(tokens: number[]): Promise { 83 | let tokenizer = require('gpt-3-encoder'); 84 | let text = tokenizer.decode(tokens); 85 | return text; 86 | } 87 | 88 | /** 89 | * Split text into chunks of the given token size. 90 | * Adjacent chunks will overlap by chunk_overlap tokens, which can naively help avoid splitting 91 | * in bad places. 92 | * 93 | * @param text string to split into chunks 94 | * @param chunk_size number of tokens per chunk (last chunk may be smaller) 95 | * @param chunk_overlap number of tokens to overlap adjacent chunks. defaults to 0. 96 | */ 97 | export async function split_text_by_tokens(text: string, chunk_size: number, chunk_overlap: number = 0): Promise { 98 | if (chunk_size < 0) { 99 | throw new Error("chunk_size must be non-negative"); 100 | } 101 | if (chunk_overlap < 0) { 102 | throw new Error("chunk_overlap must be non-negative"); 103 | } 104 | if (chunk_overlap >= chunk_size) { 105 | throw new Error("chunk_overlap must be less than chunk_size"); 106 | } 107 | 108 | let chunks: string[] = []; 109 | let text_tokens: number[] = await gpt3_tokenize( text ); 110 | console.log(`Tokenized ${text.length} characters into ${text_tokens.length} tokens.`); 111 | 112 | // window slides by chunk_size - chunk_overlap tokens each iteration. 113 | // we stop sliding when a chunk includes the last token 114 | let tok_len = text_tokens.length; 115 | for ( let idx = 0; 116 | idx < tok_len && idx + chunk_overlap < tok_len; // rhs of && ensures last token only included once 117 | idx += chunk_size - chunk_overlap ) 118 | { 119 | let context_tokens_slice: number[] = text_tokens.slice(idx, idx + chunk_size); 120 | let context_slice = await gpt3_detokenize( context_tokens_slice ); 121 | chunks.push( context_slice ); 122 | } 123 | return chunks; 124 | } 125 | 126 | export async function sentence_tokenizer( text: String ): Promise< string[] > { 127 | throw new Error("Not implemented" ); 128 | } 129 | -------------------------------------------------------------------------------- /src/activities/elastic.ts: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import { Client } from '@elastic/elasticsearch'; 3 | import { esMappings } from '../workflows/elastic'; 4 | 5 | import { ELASTIC_CONFIG } from '../config'; 6 | 7 | /** 8 | * @function getElasticSearchClient 9 | * @param {any} elasticconfig 10 | * @example 11 | * import { ELASTIC_CONFIG } from '../config'; 12 | * const client = getElasticSearchClient(ELASTIC_CONFIG); 13 | * @returns {Promise} ElasticSearch client 14 | * @description Returns a promise that resolves to an ElasticSearch client 15 | */ 16 | export async function getElasticSearchClient(): Promise { 17 | const client = new Client(ELASTIC_CONFIG); 18 | return client; 19 | } 20 | 21 | /** 22 | * @function es_index 23 | * @param {string} indexname Elasticsearch index name the document will be added to 24 | * @param {any} doc Document to be added to the index 25 | * @param {boolean} refresh Refresh the index after adding the document. This can slow down indexing, but is useful if you need it to be searchable immediately. 26 | * @example Adds a document to an elasticsearch index 27 | * await es_index('test', { 'test': 'test' }, true); 28 | * @returns {Promise} 29 | * @description Indexes a document in ElasticSearch 30 | */ 31 | // TODO: shut down the client on error 32 | export async function es_index( 33 | indexname: string, 34 | doc: any, 35 | refresh: boolean = true 36 | ): Promise { 37 | let client = await getElasticSearchClient(); 38 | try 39 | { 40 | await client.index({ index: indexname, document: doc }); 41 | if (refresh) await client.indices.refresh({ index: indexname }); 42 | client.close(); 43 | } 44 | catch( e: any ) 45 | { 46 | console.error(e); 47 | client.close(); 48 | throw( e ); 49 | } 50 | 51 | return; 52 | } 53 | 54 | /** 55 | * @function es_search 56 | * @param {string} indexname Elasticsearch index the document will be added to 57 | * @param {any} queryobj Query object to be used to search the index 58 | * @example Searches an Elasticsearch index using ELastic Query DSL 59 | * await es_search('test', { 'query': { 'match_all': {} } }); 60 | * @returns {Promise} Promise that resolves to the results of the search 61 | * @description Searches an index in ElasticSearch 62 | */ 63 | export async function es_search( 64 | indexname: string, 65 | queryobj: any 66 | ): Promise { 67 | let client = await getElasticSearchClient(); 68 | const result = await client.search({ index: indexname, query: queryobj }); 69 | client.close(); 70 | return result.hits.hits; 71 | } 72 | 73 | 74 | /** 75 | * @function es_query Executes an SQL query against Elasticsearch 76 | * @param {string} query SQL query to be executed 77 | * @example Executes an SQL query against Elasticsearch 78 | * await es_query('SELECT * FROM test'); 79 | * @returns {Promise} Promise that resolves to the results of the query 80 | */export async function es_query(query: string, params?: any): Promise { 81 | let client = await getElasticSearchClient(); 82 | const result = await client.sql.query({ 83 | query: query, 84 | params: params, 85 | }); 86 | client.close(); 87 | const data = result.rows.map((row) => { 88 | const obj: any = {}; 89 | for (let i = 0; i < row.length; i++) { 90 | obj[result.columns![i].name] = row[i]; 91 | } 92 | return obj; 93 | }); 94 | 95 | return data; 96 | } 97 | 98 | 99 | /** 100 | * @function es_drop Deletes an Elasticsearch index 101 | * @param index Elasticsearch index to be deleted 102 | * @example Deletes an Elasticsearch index 103 | * await es_delete('test'); 104 | * @returns {Promise} Promise that resolves when the index is deleted 105 | */ 106 | export async function es_drop(index: string): Promise { 107 | let client = await getElasticSearchClient(); 108 | try { 109 | await client.indices.delete({ index: index }); 110 | } catch (e: any) { 111 | console.error(e); 112 | } 113 | } 114 | 115 | /** 116 | * @function es_delete Deletes a document from an Elasticsearch index 117 | * @param index Elasticsearch index the document will be deleted from 118 | * @param id ID of the document to be deleted 119 | * @example Deletes a document from an Elasticsearch index 120 | * await es_delete('test', '1'); 121 | * @returns {Promise} Promise that resolves when the document is deleted 122 | */ 123 | export async function es_delete(index: string, id: string): Promise { 124 | let client = await getElasticSearchClient(); 125 | try { 126 | await client.delete({ index: index, id: id }); 127 | } catch (e: any) { 128 | console.error(e); 129 | } 130 | } 131 | 132 | -------------------------------------------------------------------------------- /src/prompts/notebook-template.txt: -------------------------------------------------------------------------------- 1 | Min length: 10 Max lenght: 512 Temperature: 0 Top_p: 1 Model: code-davinci-002 Stop token: Question:,Out[,Out [ 2 | 3 | You are GPT-3. Today is {{{todaysdate}}}. Answer the following questions. 4 | 5 | If you can answer directly, use this format: 6 | 7 | Question: ${Question} 8 | Answer: ${Answer} 9 | 10 | You are unable to directly answer any question that requires: 11 | - Analyzing text as a sequence of characters (e.g., counting length, reversing strings) 12 | - Counting of more than several items (e.g., words in a sentence or items in a list) 13 | - Arithmetic that a human could not perform easily in their head 14 | 15 | In these cases, consult IJavascript. Use this format: 16 | 17 | Question: ${Question} 18 | IJavascript session: 19 | ``` 20 | ${IJavascript commands and output needed to find answer} 21 | ``` 22 | Answer: ${Answer} 23 | 24 | Your training data was last updated April 2021, and you do not know any later events. To find information & perform tasks, there are many packages available, as well -- use the x.searchLoadedPackages() function call to display them, and then x.inspectLoadedPackage('packagename') to get a list of methods available. Use anything you need, but favor the first packages. 25 | 26 | Whenever possible, use the output of the previous statements, which are assigned to variables that are automatically created with the prefix return_ on the completion of every statement. 27 | 28 | Begin. 29 | 30 | Question: When was George Washington born? 31 | Answer: February 22, 1732 32 | 33 | Question: How many legs does an antelope have? 34 | Answer: Four 35 | 36 | Question: What is 2 + 19? 37 | Answer: 21 38 | 39 | Question: What is 7 + 19^3? 40 | IJavascript session: 41 | ``` 42 | In [1]: 7 + 19**3 43 | Out[1]: 6866 44 | console.log( "Done." ); 45 | ``` 46 | Answer: 6866 47 | 48 | Question: What is the number of legs that a spider has raised to the fifth power? 49 | IJavascript session: 50 | ``` 51 | In [1]: 8**5 52 | Out[1]: 32768 53 | console.log( "Done." ); 54 | ``` 55 | Answer: 32768 56 | 57 | Question:Search for waifudiffusion, fetch the first result, question whether it can be used to generate anime images of hot women, and print the result 58 | IJavascript session: 59 | ``` 60 | In [1]: x.searchLoadedPackages() 61 | Out [1] [["x.http","perform HTTP requests"],["x.google","Google APIs"],["x.nlp","Natural Language Processing"]] 62 | In [2]: x.inspectLoadedPackage('x.google') 63 | Out [2] return_2 = ["async function search(query:string): Promise< {rank: number, url: string, title: string, description: string}[] >"] 64 | In [3]: x.inspectLoadedPackage('x.http') 65 | Out [3] return_3 = ["async function x.http.get(url: string): Promise< {status: number, statusText: string, content: any} >","async function x.http.post(url: string, data: any): Promise< string >","async function x.http.put(url: string, data: any): Promise< string >","async function x.http.delete(url: string): Promise< string >"] 66 | In [4]: x.inspectLoadedPackage('x.nlp') 67 | Out [4] return_4 = ["async function x.nlp.summarize(text: string): Promise< string >","async function x.nlp.question_answer(text: string, question: string): Promise< string >","async function x.ai_instruction_on_text( text: string, instructions: string ): Promise< string >"] 68 | In [5]: await x.google.search('waifudiffusion') 69 | Out [5] return_5 = [{"rank":1,"url":"https://huggingface.co/hakurei/waifu-diffusion","title":"hakurei/waifu-diffusion - Hugging FaceWaifu Diffusion Demo - a Hugging Face Space by hakurei","description":"waifu-diffusion is a latent text-to-image diffusion model that has been conditioned on high-quality anime images through fine-tuning. Original Weights. Gradio & ...waifu-diffusion-demo. Copied. like 109. Running onA10G. App Files Files and versions Community. 24. Linked models. Loading... view api."},{"rank":2,"url":"https://r... Output truncated. Full output is in the 'return_5' variable. 70 | In [6]: await x.http.get(return_5[0].url) 71 | Out [6] return_6 = {"status":200,"statusText":"OK","content":"[Hugging Face's logo] Hugging Face\n\n- Models\n- Datasets\n- Spaces\n- Docs\n- Solutions\n\n- Pricing\n- \n\n- \n\n ------------------------------------------------------------------------\n\n- Log In\n- Sign Up\n\n\n\n[] hakurei\n\n/\n\nwaifu-diffusion\n\nCopied\n\nlike\n\n817\n\nText-to-Image\n\nDiffusers\n\nEnglish stable-diffusion\n\nLicense: creativeml-openrail-m\n\nModel card\n\nFiles Files and versions\n\nCommunity\n\n33\n\nUse in Dif... Output truncated. Full output is in the 'return_6' variable. 72 | In [7]: await x.nlp.question_answer(return_6.content, 'can it generate anime images of hot women?') 73 | Out [7] return_7 = "Lines 0 to 203\n\n\nYes, the waifu-diffusion model can generate anime images of hot women." 74 | In [8]: console.log(return_7) 75 | ``` 76 | Answer: Yes, the waifu-diffusion model can generate anime images of hot women. 77 | 78 | Question: -------------------------------------------------------------------------------- /src/workflows/llm.ts: -------------------------------------------------------------------------------- 1 | import { proxyActivities, uuid4 } from '@temporalio/workflow'; 2 | 3 | import * as llm from '../activities/llm'; 4 | import * as tokenizer from '../activities/tokenizer'; 5 | 6 | const { generateText, generateTextOpenAI, retryGenerateTextOpenAI } = 7 | proxyActivities({ startToCloseTimeout: '10 minute' }); 8 | const { nlpcloud_tokenize, tokenize_native } = proxyActivities< 9 | typeof tokenizer 10 | >({ startToCloseTimeout: '10 minute' }); 11 | 12 | export type llm_models = 'gpt-3' | 'gpt-neox-20b' | 'text-curie-001' | 'finetuned-gpt-neox-20b'; 13 | 14 | 15 | /** 16 | * @function nlpcloud_generate 17 | * @param {string} prompt 18 | * @param {number} minLength 19 | * @param {number} maxLength 20 | * @param {boolean | null} lengthNoInput 21 | * @param {string | null} endSequence 22 | * @param {boolean} removeInput 23 | * @param {boolean | null} doSample 24 | * @param {number | null} numBeams 25 | * @param {boolean | null} earlyStopping 26 | * @param {number | null} noRepeatNgramSize 27 | * @param {number | null} numReturnSequences 28 | * @param {number | null} topK 29 | * @param {number | null} topP 30 | * @param {number | null} temperature 31 | * @param {number | null} repetitionPenalty 32 | * @param {number | null} lengthPenalty 33 | * @param {boolean | null} badWords 34 | * @param {boolean | null} removeEndSequence 35 | * @description A workflow that will generate text using the NLP Cloud API 36 | */ 37 | export async function nlpcloud_generate( 38 | prompt: string, 39 | minLength: number = 10, 40 | maxLength: number = 20, 41 | lengthNoInput: boolean | null = null, 42 | endSequence: string | null = null, 43 | removeInput: boolean = true, 44 | doSample: boolean | null, 45 | numBeams: number | null, 46 | earlyStopping: boolean | null, 47 | noRepeatNgramSize: number | null, 48 | numReturnSequences: number | null, 49 | topK: number | null, 50 | topP: number | null, 51 | temperature: number | null, 52 | repetitionPenalty: number | null, 53 | lengthPenalty: number | null, 54 | badWords: boolean | null, 55 | removeEndSequence: boolean | null 56 | ): Promise { 57 | // TODO: #3 Where do we want to store stuff like API tokens? 58 | return await generateText( 59 | 'finetuned-gpt-neox-20b', 60 | prompt, 61 | minLength, 62 | maxLength, 63 | lengthNoInput, 64 | endSequence, 65 | removeInput, 66 | doSample, 67 | numBeams, 68 | earlyStopping, 69 | noRepeatNgramSize, 70 | numReturnSequences, 71 | topK, 72 | topP, 73 | temperature, 74 | repetitionPenalty, 75 | lengthPenalty, 76 | badWords, 77 | removeEndSequence 78 | ); 79 | } 80 | 81 | 82 | /** 83 | * @function openai_generate 84 | * @param {string} prompt 85 | * @param {string} apikey 86 | * @param {number} min_length 87 | * @param {number} max_length 88 | * @param {number} temperature 89 | * @param {number} top_p 90 | * @description A workflow that will generate text using the OpenAI API 91 | */ 92 | export async function openai_generate( 93 | prompt: string, 94 | min_length: number, 95 | max_length: number, 96 | temperature: number, 97 | top_p: number, 98 | endSequence: string | null = null, 99 | ): Promise { 100 | return await generateTextOpenAI( 101 | prompt, 102 | min_length, 103 | max_length, 104 | temperature, 105 | top_p, 106 | "text-davinci-003", 107 | endSequence 108 | ); 109 | } 110 | 111 | /** 112 | * @function minGenerate 113 | * @param {string} prompt 114 | * @param {number} minLength 115 | * @param {number} maxLength 116 | * @param {number} temperature 117 | * @param {string | null} endSequence 118 | * @param {"gpt-3" | "gpt-neox-20b"} model 119 | * @description A workflow that will generate text using sensible defaults to a sensible default LLM 120 | */ 121 | export async function minGenerate( 122 | prompt: string, 123 | minLength: number, 124 | maxLength: number, 125 | temperature: number, 126 | endSequence: string | null = null, 127 | model: llm_models = 'gpt-3' 128 | ): Promise { 129 | console.log('In:\n' + prompt); 130 | if (model == 'gpt-neox-20b' || model == 'finetuned-gpt-neox-20b') { 131 | let completion = await nlpcloud_generate( 132 | prompt, 133 | minLength, 134 | maxLength, 135 | true, 136 | endSequence, 137 | undefined, 138 | null, 139 | null, 140 | null, 141 | null, 142 | null, 143 | null, 144 | 1.0, 145 | temperature, 146 | null, 147 | null, 148 | null, 149 | true 150 | ); 151 | console.log('Out:\n' + completion); 152 | return completion; 153 | } else if (model == 'gpt-3' || model == 'text-curie-001') { 154 | let completion = await openai_generate( 155 | prompt, 156 | minLength, 157 | maxLength, 158 | temperature, 159 | 0.9, 160 | endSequence 161 | ); 162 | return completion; 163 | } 164 | 165 | throw new Error(`No model ${model} found`); 166 | } 167 | -------------------------------------------------------------------------------- /src/activities/vector_search.ts: -------------------------------------------------------------------------------- 1 | import { getElasticSearchClient, es_query, es_drop, } from './elastic'; 2 | import { esMappings } from '../workflows/elastic'; 3 | 4 | export async function embeddings_search( 5 | indexname: string, 6 | vector: number[], 7 | k: number 8 | ): Promise { 9 | console.log(`Searching for ${vector.length} in ${indexname} returning ${k}`); 10 | let client = await getElasticSearchClient(); 11 | try { 12 | const result = await client.search({ 13 | index: indexname, 14 | body: { 15 | query: { 16 | script_score: { 17 | query: { match_all: {} }, 18 | script: { 19 | source: "cosineSimilarity(params.queryVector, 'embeddings') + 1.0", 20 | params: { queryVector: vector } 21 | } 22 | } 23 | }, 24 | size: k 25 | } 26 | }); 27 | return result.hits.hits; 28 | } catch (e: any) { 29 | console.error(e.meta.body.error); 30 | console.error('========================== Sahr'); 31 | console.error(e.meta.body.error.failed_shards[0].reason); 32 | throw e; 33 | } 34 | } 35 | /** 36 | * @function es_context For a given Elasticsearch line-vectorized index, filename and line number, provides the four lines of textual context 37 | * @param {string} indexname Elasticsearch index the document will be added to 38 | * @param {string} path Path to the file 39 | * @param {number} line Line number 40 | * @example Provides lines 8 to 12 from test.txt in the test line-vectorized Elasticsearch index 41 | * await es_context('test', 'test.txt', 10); 42 | * @returns {Promise} Promise that resolves to the results of the search 43 | * @description Searches an index in ElasticSearch 44 | */export async function es_context( 45 | indexname: string, 46 | path: string, 47 | line: number 48 | ): Promise { 49 | let client = await getElasticSearchClient(); 50 | try { 51 | let sql = `SELECT text, line, path FROM ${indexname} WHERE path = '${path}' AND line >= ${line} - 2 AND line <= ${line} + 2 ORDER BY line ASC`; 52 | console.log(sql); 53 | let results = es_query(sql); 54 | client.close(); 55 | return results; 56 | } catch (e: any) { 57 | console.log(e); 58 | console.log(e.meta.body.error); 59 | throw e; 60 | } 61 | } 62 | /** 63 | * @function es_mappings Creates an Elasticsearch index with a dense vector index at 'embeddings' 64 | * @param {string} index Elasticsearch index the document will be added to 65 | * @param {any} doc Elasticsearch mapping object 66 | * @param {number} dims Number of dimensions for the dense vector 67 | * @example Creates an Elasticsearch index with a dense vector 68 | * await es_mappings('test', { 'message': { 'type': 'text' } }, 512); 69 | * // Creates the following index: 70 | * // { 71 | * // "mappings": { 72 | * // "properties": { 73 | * // "message": { 74 | * // "type": "text" 75 | * // }, 76 | * // "embeddings": { 77 | * // "type": "dense_vector", 78 | * // "dims": 512 79 | * // } 80 | * // } 81 | * // } 82 | * // } 83 | * @returns {Promise} Promise that resolves when the index is created 84 | */ 85 | export async function es_mappings( 86 | index: string, 87 | doc: any, 88 | dims: number 89 | ): Promise { 90 | let client = await getElasticSearchClient(); 91 | /* 92 | {"client_msg_id":"6dfbdabd-19f3-43b9-ad0a-bff20ddccae5","type":"message","text":"emmap messages","user":"U03UF3XNM8D","ts":"1662237512.586659","team":"T03U3FML84F","blocks":[{"type":"rich_text","block_id":"s+aM","elements":[{"type":"rich_text_section","elements":[{"type":"text","text":"emmap messages"}]}]}],"channel":"C0409CD6VR7","event_ts":"1662237512.586659","channel_type":"channel"} 93 | */ 94 | let mappings = doc; 95 | mappings['embeddings'] = { type: 'dense_vector', dims: dims }; 96 | try { 97 | await client.indices.create({ 98 | index: index, 99 | mappings: { properties: doc } 100 | }); 101 | } catch (e: any) { 102 | if (e.meta.body.error.type != 'resource_already_exists_exception') { 103 | throw e; 104 | } 105 | } 106 | 107 | client.close(); 108 | return; 109 | } 110 | 111 | 112 | /** 113 | * @function init_elasticsearch_mappings Initializes the Elasticsearch mappings for the emmap database 114 | * @example Initializes the Elasticsearch mappings for the emmap database 115 | * await init_elasticsearch_mappings(); 116 | * @returns {Promise} Promise that resolves when the mappings are initialized 117 | */ 118 | export async function init_elasticsearch_mappings(): Promise { 119 | try { 120 | await es_drop('messages'); 121 | } catch (e: any) {} 122 | await esMappings('messages', { 123 | text: { type: 'text' }, 124 | user: { type: 'keyword' }, 125 | ts: { type: 'keyword' }, 126 | channel: { type: 'keyword' }, 127 | reactions: { type: 'keyword' } 128 | }); 129 | 130 | try { 131 | await es_drop('teachings'); 132 | } catch (e: any) {} 133 | await esMappings('teachings', { 134 | text: { type: 'text' }, 135 | user: { type: 'keyword' }, 136 | prompt: { type: 'keyword' }, 137 | completion: { type: 'keyword' }, 138 | prompt_leading: { type: 'keyword' }, 139 | md5sum: { type: 'keyword' } 140 | }); 141 | 142 | try { 143 | await es_drop('transcripts'); 144 | } catch (e: any) {} 145 | await esMappings('transcripts', { 146 | filename: { type: 'keyword' }, 147 | transcript: { type: 'text' }, 148 | data: { type: 'keyword' } 149 | }); 150 | 151 | try { 152 | await es_drop('spider'); 153 | } catch (e: any) {} 154 | await esMappings('spider', { 155 | url: { type: 'keyword' }, 156 | depth: { type: 'integer' }, 157 | md5: { type: 'keyword' }, 158 | text: { type: 'text' } 159 | }); 160 | 161 | return 'Done init_elasticsearch_mappings().'; 162 | } 163 | -------------------------------------------------------------------------------- /src/activities/application/codegrunt.ts: -------------------------------------------------------------------------------- 1 | import * as activities from '../../activities'; 2 | 3 | /* 4 | * Import babel so we can parse typescript or javascript files 5 | */ 6 | import * as babel from '@babel/core'; 7 | import * as fs from 'fs'; 8 | 9 | export async function parse( code: string, filename: string ): Promise< babel.ParseResult | null >{ 10 | let lines = code.split( /[\r\n]+/ ); 11 | 12 | let result = await babel.parseAsync( code, { 13 | filename: filename, 14 | presets: [ '@babel/preset-typescript' ], 15 | plugins: [ '@babel/plugin-proposal-class-properties' ] 16 | } ); 17 | 18 | if ( result == null ) 19 | { 20 | console.log(`Failed to parse ${filename}`); 21 | return result; 22 | } 23 | return result; 24 | 25 | result?.program.body.forEach( async (node) => { 26 | console.log( `${node.type}` ); 27 | //console.log( node ); 28 | 29 | if ( node.type === 'FunctionDeclaration' ) 30 | { 31 | console.log( `${JSON.stringify(node.leadingComments)}` ); 32 | console.log( `${node.id?.name}` ); 33 | node.loc?.start.line; 34 | node.loc?.end.line; 35 | console.log( lines.slice( node.loc!.start.line - 1, node.loc?.end.line ).join( '\n' ) ); 36 | 37 | console.log( node ); 38 | } 39 | }); 40 | } 41 | 42 | export type CodeFragmentType = 'unknown' | 'function'; 43 | 44 | export interface CodeFragment 45 | { 46 | type: CodeFragmentType; 47 | start: {line: number; column: number}; 48 | end: {line: number; column: number}; 49 | code: string; 50 | leadingComment: string; 51 | } 52 | 53 | export async function nodeToCodeFragment( node: babel.Node, lines: Array< string > ): Promise< CodeFragment > 54 | { 55 | let start = node.loc?.start; 56 | let end = node.loc?.end; 57 | 58 | if ( start == null || end == null ) 59 | { 60 | throw new Error( 'PERMANENT - Node has no location' ); 61 | } 62 | 63 | let leadingComment = ''; 64 | if ( node.leadingComments != null ) 65 | { 66 | let leadingCommentLines = node.leadingComments.map( (c) => c.value ); 67 | leadingComment = leadingCommentLines.join( '\n' ); 68 | } 69 | 70 | let code = lines.slice( start.line - 1, end.line ).join( '\n' ); 71 | let retval: CodeFragment = { 72 | type: 'unknown', 73 | start: {line: start.line, column: start.column}, 74 | end: {line: end.line, column: end.column}, 75 | code: code, 76 | leadingComment: leadingComment, 77 | }; 78 | return retval; 79 | } 80 | 81 | export interface FunctionCodeFragment extends CodeFragment { 82 | type: 'function'; 83 | name: string; 84 | params: Array< string >; 85 | returnType: string; 86 | signature: string; 87 | } 88 | 89 | export async function extractFunctions( code: string, filename: string ): Promise< Array< FunctionCodeFragment > > { 90 | let result = await parse( code, filename ); 91 | let functions: Array< FunctionCodeFragment > = []; 92 | 93 | 94 | function nodeToFunctionCodeFragment( node: babel.Node ): FunctionCodeFragment 95 | { 96 | if ( node.type == 'FunctionDeclaration' ) 97 | { 98 | 99 | let fcf = 100 | { 101 | type: 'function', 102 | name: node.id?.name ?? '', 103 | params: new Array< string >(), 104 | returnType: '', 105 | start: {line: 0, column: 0}, 106 | end: {line: 0, column: 0}, 107 | code: '', 108 | leadingComment: node.leadingComments?.map( (c) => c.value ).join( '\n' ) ?? '', 109 | }; 110 | 111 | for ( let param of node.params ) 112 | { 113 | param = param; 114 | let name_s = param.name 115 | let i1 = ( (param.loc!.end)).index; 116 | let i2 = ( (param.loc!.start)).index; 117 | let type_s = code.substring( i2, i1 ); 118 | fcf.params.push( type_s ); 119 | } 120 | 121 | let returnType = node.returnType!; 122 | let i1 = ( (returnType.loc!.end)).index; 123 | let i2 = ( (returnType.loc!.start)).index; 124 | fcf.returnType = code.substring( i2, i1 ); 125 | 126 | 127 | let start = ( node.loc).start; 128 | let end = ( node.loc).end; 129 | fcf.code = code.substring( start!.index, end!.index ); 130 | let fcf2: FunctionCodeFragment = { 131 | type: 'function', 132 | name: fcf.name, 133 | params: fcf.params, 134 | returnType: fcf.returnType, 135 | start: {line: start!.line, column: start!.column}, 136 | end: {line: end!.line, column: end!.column}, 137 | code: fcf.code, 138 | leadingComment: fcf.leadingComment, 139 | signature: `${node.async ? "async" : ""} function ${fcf.name}(${fcf.params.join( ', ' )}): ${fcf.returnType.replace( /^\s*:\s*/, '') }`, 140 | }; 141 | return fcf2; 142 | } 143 | else 144 | { 145 | console.log( `Unexpected node type ${node.type}` ); 146 | throw new Error( 'PERMANENT - Unexpected node type' ); 147 | } 148 | } 149 | 150 | result?.program.body.forEach( async (node) => { 151 | console.log( `${node.type} ${node.range}` ) 152 | if ( node.type === 'FunctionDeclaration' ) 153 | { 154 | functions.push( await nodeToFunctionCodeFragment( node ) ); 155 | } 156 | if ( node.type == 'ExportNamedDeclaration' && node.declaration?.type == 'FunctionDeclaration' ) 157 | { 158 | let decl = await nodeToCodeFragment( node.declaration, code.split( /[\r\n]/ ) ); 159 | decl.leadingComment = node.leadingComments?.map( (c) => c.value ).join( '\n' ) ?? ''; 160 | functions.push( nodeToFunctionCodeFragment( node.declaration ) ); 161 | } 162 | }); 163 | 164 | return functions; 165 | } 166 | -------------------------------------------------------------------------------- /src/activities/__tests__/tokenizer.test.ts: -------------------------------------------------------------------------------- 1 | import { split_text_by_tokens } from '../tokenizer'; 2 | 3 | // source of truth for tokenizing: https://platform.openai.com/tokenizer?view=bpe 4 | 5 | describe("split_text_by_tokens", () => { 6 | test("empty text", async () => { 7 | expect(await split_text_by_tokens("", 10)).toEqual([]) 8 | }) 9 | }); 10 | 11 | describe("split_text_by_tokens", () => { 12 | test("1 token chunking without overlap", async () => { 13 | expect(await split_text_by_tokens("Hello world! This is a test.", 1)).toEqual( 14 | ["Hello", " world", "!", " This", " is", " a", " test", "."]) 15 | }) 16 | }); 17 | 18 | describe("split_text_by_tokens", () => { 19 | test("2 token chunking without overlap; odd number of tokens", async () => { 20 | expect(await split_text_by_tokens("Hello world! This is still a test.", 2)).toEqual( 21 | ["Hello world", "! This", " is still", " a test", "."]) 22 | }) 23 | }); 24 | 25 | describe("split_text_by_tokens", () => { 26 | test("whitespace", async () => { 27 | expect(await split_text_by_tokens(" \n ", 1)).toEqual([" ", "\n", " "]) 28 | }) 29 | }); 30 | 31 | describe("split_text_by_tokens", () => { 32 | test("2 token chunking with 1 overlap", async () => { 33 | expect(await split_text_by_tokens("Hello world! This is a test.", 2, 1)).toEqual( 34 | ["Hello world", " world!", "! This", " This is", " is a", " a test", " test."]) 35 | }) 36 | }); 37 | 38 | describe("split_text_by_tokens", () => { 39 | test("2 token chunking with 1 overlap; odd number of tokens", async () => { 40 | expect(await split_text_by_tokens("Hello world! This is still a test.", 2, 1)).toEqual( 41 | ["Hello world", " world!", "! This", " This is", " is still", " still a", " a test", " test."]) 42 | }) 43 | }); 44 | 45 | describe("split_text_by_tokens", () => { 46 | test("3 token chunking with 1 overlap", async () => { 47 | expect(await split_text_by_tokens("Hello world! This is a test.", 3, 1)).toEqual( 48 | ["Hello world!", "! This is", " is a test", " test."]) 49 | }) 50 | }); 51 | 52 | describe("split_text_by_tokens", () => { 53 | test("3 token chunking with 1 overlap; odd number of tokens", async () => { 54 | expect(await split_text_by_tokens("Hello world! This is still a test.", 3, 1)).toEqual( 55 | ["Hello world!", "! This is", " is still a", " a test."]) 56 | }) 57 | }); 58 | 59 | describe("split_text_by_tokens", () => { 60 | test("3 token chunking with 2 overlap", async () => { 61 | expect(await split_text_by_tokens("Hello world! This is a test.", 3, 2)).toEqual( 62 | ["Hello world!", " world! This", "! This is", " This is a", " is a test", " a test."]) 63 | }) 64 | }); 65 | 66 | describe("split_text_by_tokens", () => { 67 | test("3 token chunking with 2 overlap; odd number of tokens", async () => { 68 | expect(await split_text_by_tokens("Hello world! This is still a test.", 3, 2)).toEqual( 69 | ["Hello world!", " world! This", "! This is", " This is still", " is still a", " still a test", " a test."]) 70 | }) 71 | }); 72 | 73 | describe("split_text_by_tokens", () => { 74 | test("chunk size equal to text length returns single chunk with text", async () => { 75 | expect(await split_text_by_tokens("Hello world! This is a test.", 8)).toEqual( 76 | ["Hello world! This is a test."]) 77 | }) 78 | }); 79 | 80 | describe("split_text_by_tokens", () => { 81 | test("chunk size equal to text length returns single chunk with text, despite chunk_overlap", async () => { 82 | expect(await split_text_by_tokens("Hello world! This is a test.", 8, 1)).toEqual( 83 | ["Hello world! This is a test."]) 84 | }) 85 | }); 86 | 87 | describe("split_text_by_tokens", () => { 88 | test("chunk size longer than text returns single chunk with text", async () => { 89 | expect(await split_text_by_tokens("Hello world! This is a test.", 9)).toEqual( 90 | ["Hello world! This is a test."]) 91 | }) 92 | }); 93 | 94 | describe("split_text_by_tokens", () => { 95 | test("chunk size longer than text returns single chunk with text, despite chunk_overlap", async () => { 96 | expect(await split_text_by_tokens("Hello world! This is a test.", 9, 1)).toEqual( 97 | ["Hello world! This is a test."]) 98 | }) 99 | }); 100 | 101 | describe("split_text_by_tokens", () => { 102 | test("large chunk size and large chunk overlap", async () => { 103 | expect(await split_text_by_tokens("Hello world! This is a test.", 7, 6)).toEqual( 104 | ["Hello world! This is a test", " world! This is a test."]) 105 | }) 106 | }); 107 | 108 | describe("split_text_by_tokens", () => { 109 | test("large chunk size and small chunk overlap", async () => { 110 | expect(await split_text_by_tokens("Hello world! This is a test.", 7, 1)).toEqual( 111 | ["Hello world! This is a test", " test."]) 112 | }) 113 | }); 114 | 115 | describe("split_text_by_tokens", () => { 116 | test("chunk overlap larger than chunk size throws error", async () => { 117 | expect(split_text_by_tokens("Hello world! This is still a test.", 2, 3)).rejects.toThrow("chunk_overlap must be less than chunk_size") 118 | }) 119 | }); 120 | 121 | describe("split_text_by_tokens", () => { 122 | test("chunk overlap equal chunk size throws error", async () => { 123 | expect(split_text_by_tokens("Hello world! This is still a test.", 3, 3)).rejects.toThrow("chunk_overlap must be less than chunk_size") 124 | }) 125 | }); 126 | 127 | describe("split_text_by_tokens", () => { 128 | test("chunk size 0 with empty text", async () => { 129 | expect(split_text_by_tokens("", 0)).rejects.toThrow("chunk_overlap must be less than chunk_size") 130 | }) 131 | }); 132 | 133 | describe("split_text_by_tokens", () => { 134 | test("chunk size 0 with non-empty text", async () => { 135 | expect(split_text_by_tokens("a", 0)).rejects.toThrow("chunk_overlap must be less than chunk_size") 136 | }) 137 | }); 138 | 139 | describe("split_text_by_tokens", () => { 140 | test("negative chunk size throws", async () => { 141 | expect(split_text_by_tokens("test", -1, -2)).rejects.toThrow("chunk_size must be non-negative") 142 | }) 143 | }); 144 | 145 | describe("split_text_by_tokens", () => { 146 | test("negative overlap throws", async () => { 147 | expect(split_text_by_tokens("test", 0, -1)).rejects.toThrow("chunk_overlap must be non-negative") 148 | }) 149 | }); 150 | -------------------------------------------------------------------------------- /src/activities/util.ts: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import { Connection, WorkflowClient } from '@temporalio/client'; 3 | import { nanoid } from 'nanoid'; 4 | import * as csv_parse from 'csv-parse'; 5 | 6 | import { getElasticSearchClient } from './elastic'; 7 | 8 | import { TEMPORAL_HOST, NLPCLOUD_TOKEN } from '../config'; 9 | import * as config from '../config'; 10 | import * as wf from '../workflows'; 11 | 12 | export async function nlp_stable_diffusion( 13 | prompt: string 14 | ): Promise { 15 | const response = await axios.post( 16 | `https://api.nlpcloud.io/v1/gpu/stable-diffusion/image-generation`, 17 | { 18 | text: prompt 19 | }, 20 | { 21 | headers: { 22 | Authorization: `Token ${NLPCLOUD_TOKEN}`, 23 | 'Content-Type': 'application/json' 24 | } 25 | } 26 | ); 27 | 28 | return response.data.url; 29 | } 30 | 31 | export async function actionLogger(msg: string) { 32 | console.log(msg); 33 | // Store it in elasticsearch under the 'actionLogger' context: 34 | let client = await getElasticSearchClient(); 35 | await client.index({ index: 'actionlogger', body: { message: msg } }); 36 | client.close(); 37 | } 38 | 39 | /** 40 | * 41 | * @param {string} url 42 | * @param {string} method 43 | * @param {any} headers 44 | * @param {any} data 45 | */ 46 | export async function wf_axios( 47 | url: string, 48 | method: string, 49 | headers: any, 50 | data: any 51 | ): Promise { 52 | let r = await axios.request({ 53 | url: url, 54 | method: method, 55 | headers: headers, 56 | data: data 57 | }); 58 | return r.data; 59 | } 60 | 61 | export async function md5sum(string: string): Promise { 62 | const crypto = require('crypto'); 63 | return crypto.createHash('md5').update(string).digest('hex'); 64 | } 65 | 66 | /** 67 | * @function executeWorkflow 68 | * @param {string} address 69 | * @param {string} workflowId 70 | * @param {string} taskQueue 71 | * @param {any[]} args 72 | * @description Triggers a new workflow execution 73 | */ 74 | export async function executeWorkflow( 75 | address: string, 76 | workflowId: string, 77 | taskQueue: string, 78 | args: any[] 79 | ): Promise { 80 | const connection = await Connection.connect({ address: address }); 81 | const client = new WorkflowClient({ connection }); 82 | let r = await client.start(workflowId, { 83 | args: args, 84 | taskQueue: taskQueue, 85 | workflowId: 'workflow-' + nanoid() 86 | }); 87 | let result = await r.result(); 88 | return result; 89 | } 90 | 91 | export async function executeLocalWorkflow( 92 | workflowId: string, 93 | taskQueue: string, 94 | args: any[] 95 | ): Promise { 96 | return executeWorkflow(TEMPORAL_HOST, workflowId, taskQueue, args); 97 | } 98 | 99 | export async function JStoOBJ< T >( text: string ): Promise< T > { 100 | 101 | text = text.replace( /“/g, '"' ); 102 | text = text.replace( /”/g, '"' ); 103 | text = text.replace( /‘/g, "'" ); 104 | text = text.replace( /’/g, "'" ); 105 | console.log(`============\n${text}\n============\n`) 106 | 107 | try 108 | { 109 | let obj = eval( "const obj = " + text + " ; obj"); 110 | console.log( obj ); 111 | return obj; 112 | } 113 | catch( e: any ) 114 | { 115 | console.log( e ); 116 | let p = `Bad JS [{{{id}}}]: 117 | const obj = {{{text}}} 118 | 119 | Error: 120 | {{{error}}} 121 | 122 | Corrected JS, stripped of comments, and pretty-printed: 123 | const obj =`; 124 | const connection = await Connection.connect( {address: config.TEMPORAL_HOST} ); 125 | const client = new WorkflowClient({connection}); 126 | 127 | let handle = await client.start(wf.promptTemplate, { 128 | args: [p, {text: text, error: e.toString(), id: nanoid()}, 128, 2048, 1.0, "finetuned-gpt-neox-20b"], 129 | taskQueue: 'hello-world', 130 | workflowId: nanoid(), 131 | workflowRunTimeout: '1 minutes', 132 | }); 133 | 134 | let fixed = await handle.result(); 135 | return await JStoOBJ( fixed ); 136 | } 137 | } 138 | 139 | 140 | export async function parse_and_fix_csv( text: string ): Promise< any > 141 | { 142 | console.log( "======> Entering parse_and_fix_csv" ); 143 | // if text, which is a multi-line string, does not end in a " then add one 144 | if ( text[text.length-1] != '"' ) 145 | { 146 | text += '"'; 147 | } 148 | console.log( text ); 149 | console.log( "======> Calling csv_parser_action") 150 | try 151 | { 152 | let parsed = await csv_parser_action( text ); 153 | console.log( parsed ); 154 | return( parsed ); 155 | } 156 | catch( e: any ) 157 | { 158 | console.log( "======> Error in parse_and_fix_csv" + e.toString() ); 159 | const connection = await Connection.connect( {address: config.TEMPORAL_HOST} ); 160 | const client = new WorkflowClient({connection}); 161 | let wfid = "parse_and_fix_csv-" + nanoid(); 162 | 163 | let p = 164 | `Bad CSV: 165 | 166 | {{{csv}}} 167 | 168 | Corrected CSV: 169 | "`; 170 | 171 | let handle = await client.start(wf.promptTemplate, { 172 | args: [p, {csv: text}, 48, 1024, 0.0, "gpt-3"], 173 | taskQueue: 'hello-world', 174 | workflowId: wfid, 175 | workflowRunTimeout: '10 minutes', 176 | }); 177 | let fixed = await handle.result(); 178 | console.log( fixed ); 179 | let parsed = await csv_parser_action( fixed ); 180 | console.log( parsed ); 181 | return parsed; 182 | } 183 | 184 | } 185 | 186 | 187 | export async function csv_parser_action( csv: string ): Promise< any[] > 188 | { 189 | // if text, which is a multi-line string, does not end in a " then add one 190 | if ( csv[csv.length-1] != '"' ) 191 | { 192 | csv += '"'; 193 | } 194 | 195 | // if text does not begin with a " then add one 196 | if ( csv[0] != '"' ) 197 | { 198 | csv = '"' + csv; 199 | } 200 | 201 | // Wrap below in a Promise 202 | let p = new Promise< any[] >( (resolve, reject) => { 203 | csv_parse.parse( csv, {columns: true, skip_empty_lines: true, relaxQuotes: true, relax_column_count: true}, (err, records) => { 204 | if ( err ) reject( err ); 205 | resolve( records ); 206 | } ); 207 | } ); 208 | return await p; 209 | } 210 | -------------------------------------------------------------------------------- /src/workflows/session.ts: -------------------------------------------------------------------------------- 1 | import * as wf from '@temporalio/workflow'; 2 | import * as pf from './prompt'; 3 | import { defineSignal, setHandler, getExternalWorkflowHandle, defineQuery, workflowInfo, sleep} from '@temporalio/workflow'; 4 | 5 | export const getOutputBuffer = defineQuery('getOutputBuffer'); 6 | 7 | interface UserInput { 8 | text: string; 9 | } 10 | interface UserOutput { 11 | text: string; 12 | } 13 | 14 | export const userInputSignal = defineSignal<[UserInput]>('input'); 15 | export const userOutputSignal = defineSignal<[UserOutput]>('output'); 16 | export const userOutputListenerSignal = defineSignal<[{listener_wf: string, target_wf: string}]>('output_listener'); 17 | 18 | export interface FrameInput 19 | { 20 | text: string; 21 | } 22 | 23 | export interface Frame extends FrameInput 24 | { 25 | ts: Date; 26 | logs: [Date, string] []; 27 | response?: string; 28 | } 29 | 30 | 31 | type SessionState = 'IDLE' | 'READ_WAIT' | 'MESSAGE_RECEIVED'; 32 | 33 | export class HumanInTheLoopSession< TFrame extends Frame > 34 | { 35 | private _messages: TFrame[] = []; 36 | private _state : SessionState; 37 | private _inputBuffer: string = ''; 38 | private _outputBuffer: string = ''; 39 | private _outputListeners: string[] = []; 40 | 41 | constructor() 42 | { 43 | this._state = 'IDLE'; 44 | } 45 | 46 | public addMessage(message: TFrame): void 47 | { 48 | this._messages.push(message); 49 | } 50 | 51 | public get messages(): TFrame[] 52 | { 53 | return this._messages; 54 | } 55 | 56 | public log( m: string ): void 57 | { 58 | console.log( m ); 59 | // Add to last message, if it exists 60 | if ( this._messages.length > 0 ) 61 | { 62 | this._messages[this._messages.length-1].logs.push( [new Date(), m] ); 63 | } 64 | else 65 | { 66 | throw new Error("No messages to log to - this should not be possible"); 67 | } 68 | } 69 | 70 | public logs(): [Date, string] [] 71 | { 72 | let logs: [Date, string] [] = []; 73 | for ( let m of this._messages ) 74 | { 75 | logs = logs.concat(m.logs); 76 | } 77 | return logs; 78 | } 79 | 80 | public get state(): SessionState 81 | { 82 | return this._state; 83 | } 84 | 85 | public set state( s: SessionState ) 86 | { 87 | this._state = s; 88 | } 89 | 90 | public get inputBuffer(): string 91 | { 92 | let b = this._inputBuffer; 93 | this._inputBuffer = ''; 94 | return b; 95 | } 96 | 97 | public get outputBuffer(): string 98 | { 99 | let b = this._outputBuffer; 100 | this._outputBuffer = ''; 101 | return b; 102 | } 103 | 104 | public recv( s: string ): void 105 | { 106 | this._inputBuffer += s; 107 | } 108 | 109 | public send( s: string ): void 110 | { 111 | // Add to response from last frame, if it exists 112 | if ( this._messages.length > 0 ) 113 | { 114 | this._messages[this._messages.length-1].response = s; 115 | } 116 | 117 | this._outputBuffer += s; 118 | 119 | this._outputListeners.forEach( (l) => { 120 | let badwfs: string[] = []; 121 | try 122 | { 123 | let h = getExternalWorkflowHandle(l); 124 | h.signal('output', {text: s}); 125 | this.removeOutputListener(l); 126 | } 127 | catch( e: any ) 128 | { 129 | console.log(`Error sending output to ${l}: ${e}. Removing from listeners`); 130 | badwfs.push(l); 131 | } 132 | }); 133 | } 134 | 135 | public addOutputListener( listener: string ): void 136 | { 137 | this._outputListeners.push( listener ); 138 | } 139 | 140 | public removeOutputListener( listener: string ): void 141 | { 142 | this._outputListeners = this._outputListeners.filter( (l) => l != listener ); 143 | } 144 | 145 | public async getInput( mh: HumanInTheLoopSession ): Promise 146 | { 147 | 148 | // Wait for the user to respond 149 | await wf.condition( () => mh.state == 'MESSAGE_RECEIVED' ); 150 | { 151 | mh.state = 'IDLE'; 152 | let input = mh.inputBuffer; 153 | return input; 154 | } 155 | } 156 | 157 | public async init(): Promise< void > 158 | { 159 | setHandler(userInputSignal, ({ text }: UserInput) => { 160 | console.log(`Received input: ${text}`); 161 | this.recv( text ); 162 | this.state = 'MESSAGE_RECEIVED'; 163 | }); 164 | 165 | setHandler(userOutputListenerSignal, async ({listener_wf, target_wf}) => { 166 | console.log(`Received output monitoring signal for ${target_wf} from ${listener_wf}`); 167 | this.addOutputListener( listener_wf ); 168 | }); 169 | 170 | setHandler(getOutputBuffer, () => {return this.outputBuffer;}); 171 | } 172 | } 173 | 174 | 175 | export async function send( wfid: string, message: FrameInput ): Promise 176 | { 177 | const handle = getExternalWorkflowHandle(wfid); 178 | await handle.signal( 'input', message ); 179 | } 180 | 181 | export async function read( wfid: string ): Promise< string > 182 | { 183 | let waiting = true; 184 | let rtext = ''; 185 | setHandler( userOutputSignal, ( {text} ) => { 186 | console.log( `read: ${text}` ); 187 | waiting = false; 188 | rtext = text; 189 | }); 190 | 191 | let me = workflowInfo().workflowId; 192 | const handle = getExternalWorkflowHandle(wfid); 193 | await handle.signal( 'output_listener', {listener_wf: me, target_wf: wfid} ); 194 | await wf.condition( () => !waiting ); 195 | return rtext; 196 | } 197 | 198 | export async function sendread( wfid: string, message: Frame ): Promise< string > 199 | { 200 | let waiting = true; 201 | let rtext = ''; 202 | setHandler( userOutputSignal, ( {text} ) => { 203 | console.log( `read: ${text}` ); 204 | waiting = false; 205 | rtext = text; 206 | }); 207 | 208 | let me = workflowInfo().workflowId; 209 | const handle = getExternalWorkflowHandle(wfid); 210 | await handle.signal( 'output_listener', {listener_wf: me, target_wf: wfid} ); 211 | await send( wfid, message ); 212 | await wf.condition( () => !waiting ); 213 | 214 | return rtext; 215 | } 216 | 217 | export async function testSession( first_message: Frame ) 218 | { 219 | let session = new HumanInTheLoopSession< Frame >(); 220 | session.init(); 221 | 222 | // Start the session 223 | session.addMessage( {... first_message} ); 224 | session.log( "Session started" ); 225 | 226 | while( true ) 227 | { 228 | let timeout_promise = sleep("10 seconds"); 229 | let input_promise = session.getInput( session ); 230 | let p = await Promise.race( [timeout_promise, input_promise] ); 231 | if ( p == await timeout_promise ) 232 | { 233 | session.log( "Session timed out" ); 234 | break; 235 | } 236 | let input = await input_promise; 237 | 238 | session.addMessage({text: input, ts: new Date(), logs: []}); 239 | session.log( "User input: " + input ); 240 | let response = await pf.promptTemplate( 241 | `User: {{{input}}} 242 | Response:`, { input: input }, 10, 512 ); 243 | session.log( "Response: " + response ); 244 | session.send( response ); 245 | } 246 | } 247 | -------------------------------------------------------------------------------- /src/workflows/sql2llm.ts: -------------------------------------------------------------------------------- 1 | import * as workflows from '../workflows'; 2 | import { proxyActivities, sleep } from '@temporalio/workflow'; 3 | import * as activities from '../activities'; 4 | import { Frame, HumanInTheLoopSession } from '../workflows/session'; 5 | import * as sql2llm from '../activities/sql2llm'; 6 | import { resourceLimits } from 'node:worker_threads'; 7 | import { actionLogger } from '../activities'; 8 | 9 | const { split_text_by_tokens, sql2llm_session_multiplexer, parse_and_fix_csv } = proxyActivities< typeof activities >({ startToCloseTimeout: '10 minute' }); 10 | 11 | export interface SQL2LLMInput extends Frame { 12 | dbname: string; 13 | query: string; 14 | fields: string[]; 15 | context: string | null; 16 | result?: SQL2LLMOutput; 17 | } 18 | 19 | export interface SQL2LLMOutput extends Frame { 20 | query: string; 21 | fields: string[]; 22 | result: any[]; 23 | status: 200 | 500; 24 | error?: string; 25 | nSQL_query?: string; 26 | } 27 | 28 | class TSession extends HumanInTheLoopSession< SQL2LLMInput > { 29 | dbname: string; 30 | fields: string[]; 31 | 32 | constructor( dbname: string, fields: string[] ) { 33 | super(); 34 | this.dbname = dbname; 35 | this.fields = fields; 36 | } 37 | } 38 | 39 | export async function SQL2LLM_session( dbname: string, fields: string[], context: string | null ): Promise< void > 40 | { 41 | let session = new TSession( dbname, fields ); 42 | session.init(); 43 | 44 | while( true ) 45 | { 46 | let input = await session.getInput( session ); 47 | session.addMessage( {logs: [], query: input, text: input, ts: new Date(), fields: fields, dbname: dbname, context: context} ); 48 | session.log( "User input [dialog]: " + input ); 49 | 50 | let ret = await SQL2LLM_wf( session.messages[session.messages.length-1], session ); 51 | session.messages[session.messages.length-1].response = JSON.stringify(ret.result); 52 | session.messages[session.messages.length-1].query = ret.query; 53 | session.send( JSON.stringify(ret) ); 54 | } 55 | } 56 | 57 | 58 | export async function SQL2LLM_wf( input: SQL2LLMInput, session: TSession ): Promise< SQL2LLMOutput > 59 | { 60 | let fields = session.fields.map( (f) => { return `"${f}"` } ); 61 | let columns = fields.join( ',' ); 62 | let p = ''; 63 | 64 | let context = ''; 65 | if ( input.context ) 66 | { 67 | context = `==================\nData:\n${input.context}\n==================\n\n`; 68 | } 69 | 70 | session.messages.reverse(); 71 | let history = session.messages.reverse().map( (m) => { 72 | return `${context}${session.dbname}> ${m.query}\nESCAPED CSV RESULT\n==========\n${columns}\n` 73 | }); 74 | session.messages.reverse(); 75 | 76 | p += `{{{dbname}}}> .mode CSV 77 | CSV mode on. 78 | {{{history}}}`; 79 | 80 | let objs = {dbname: session.dbname, history: history, context: context}; 81 | 82 | let noStopToken = false; 83 | let result = ""; 84 | while( noStopToken == false ) 85 | { 86 | let ret = await workflows.promptTemplate( p + result, objs, 48, 1024, 0, "gpt-3", `${session.dbname}>` ); 87 | ret = ret.replace( /^\s+/, '' ); 88 | ret = ret.replace( /\s+$/, '' ); 89 | 90 | if ( ret.endsWith('==========') ) 91 | { 92 | ret = ret.replace( /==========$/, '' ); 93 | } 94 | 95 | if ( ret.length == 0 ) noStopToken = true; 96 | else { 97 | console.log( "Trying one more run.") 98 | if ( result.endsWith('"') && ret.startsWith('"') ) 99 | { 100 | result += "\n"; 101 | } 102 | result += ret; 103 | } 104 | } 105 | // Take ret and parse it as CSV. Fix it if necessary. 106 | let parsed: any[] = await parse_and_fix_csv( columns + "\n" + result ); 107 | // Walk through parsed, and remove the leading and trailing whitespace as well as leading and trailing " columns from all rows 108 | for( let i = 0; i < parsed.length; i++ ) 109 | { 110 | for( let j = 0; j < parsed[i].length; j++ ) 111 | { 112 | parsed[i][j] = parsed[i][j].replace( /^\s+/, '' ).replace( /\s+$/, '' ).replace( /^'/, '' ).replace( /'$/, '' ).replace( /^"/, '' ).replace( /"$/, '' ); 113 | } 114 | } 115 | 116 | 117 | return { 118 | query: input.query, 119 | fields: input.fields, 120 | result: parsed, 121 | status: 200, 122 | ts: new Date(), 123 | logs: [], 124 | text: input.query, 125 | } 126 | 127 | } 128 | 129 | export async function SQL2LLM( dbname: string | null | undefined, q: string, context: string | null, natural_language_request: boolean ): Promise< SQL2LLMOutput > 130 | { 131 | console.log( `Got query for ${dbname}: ${q}`); 132 | let refined_prompt: string = ""; 133 | if ( natural_language_request ) 134 | { 135 | refined_prompt = await workflows.promptTemplate( 136 | `Natural language: {{{query}}} 137 | Database: {{{dbname}}} 138 | nSQL Natural language version: `, {query: q, dbname: dbname}, 10, 256, 1, "finetuned-gpt-neox-20b" 139 | ); 140 | refined_prompt = refined_prompt.replace( /^\s+/, '' ).replace( /\s+$/, '' ); 141 | q = refined_prompt.replace( /[\r\n]+$/, '' ); 142 | } 143 | 144 | let fieldnames_json = '["' + await workflows.promptTemplate( 145 | `Take the following SQL query: {{{sql}}} 146 | 147 | What are the field names in the result set? 148 | 149 | JSON list: [ "`, {sql: q}, 5, 128, 0.0, "text-curie-001" ); 150 | let fields = JSON.parse( fieldnames_json ); 151 | 152 | // If dbname is null, let's prompt text-curie-001 for it. 153 | if ( dbname == null ) 154 | { 155 | dbname = (await workflows.promptTemplate( 156 | `Take the following SQL query: {{{sql}}} 157 | Database:`, {sql: q}, 1, 32, 0.0, "text-curie-001" )).replace( /^\s+/, '' ).replace( /\s+$/, '' ); 158 | } 159 | 160 | if ( context ) 161 | { 162 | let context_chunks: string[] = await split_text_by_tokens( context, 2048 ); 163 | 164 | let results: SQL2LLMOutput[] = []; 165 | if ( context_chunks.length > 1 ) 166 | { 167 | let promises = context_chunks.map( async (chunk) => { 168 | let res = await sql2llm_session_multiplexer( {dbname: dbname!, fields: fields, query: q, text: q, ts: new Date(), logs: [], context: chunk} ); 169 | 170 | // Add result to the results array. 171 | res.result.forEach( (r) => { results.push(r) } ); 172 | }); 173 | await Promise.all( promises ); 174 | 175 | // Remove duplicates. 176 | let uniqueResults = Array.from(new Set(results.map( r => JSON.stringify(r) ))); 177 | // Convert back to object. 178 | uniqueResults = uniqueResults.map( r => JSON.parse(r) ); 179 | console.log( `${results.length} rows returned.\n\n` ); 180 | return( {fields: fields, logs: [], query: q, result: uniqueResults, status: 200, text: q, ts: new Date()} ); 181 | } 182 | else 183 | { 184 | let res = await sql2llm_session_multiplexer( {dbname: dbname, fields: fields, query: q, text: q, ts: new Date(), logs: [], context: context} ); 185 | if ( refined_prompt.length > 0 ) 186 | res.nSQL_query = refined_prompt; 187 | console.log( `${res.result.length} rows returned.\n\n` ); 188 | return( res ); 189 | } 190 | } 191 | else 192 | { 193 | let res = await sql2llm_session_multiplexer( {dbname: dbname, fields: fields, query: q, text: q, ts: new Date(), logs: [], context: context} ); 194 | if ( refined_prompt.length > 0 ) 195 | res.nSQL_query = refined_prompt; 196 | console.log( `${res.result.length} rows returned.\n\n` ); 197 | return( res ); 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /src/activities/llm.ts: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import { Configuration, OpenAIApi } from 'openai'; 3 | import fs from 'fs'; 4 | import {NLPCLOUD_TOKEN, OPENAI_TOKEN} from '../config'; 5 | 6 | /** 7 | * @function generateText 8 | * 9 | * @param {string} modelName Model to use for generation 10 | * @param {string} text Text to use as prompt (input) 11 | * @param {number} minLength Minimum length of generated text (not always respected by the model) 12 | * @param {number} maxLength Maximum length of generated text (this is respected by the model) 13 | * @param {boolean|null} lengthNoInput Calculate length based on prompt (input) text 14 | * @param {string|null} endSequence Stop generation when this sequence is encountered 15 | * @param {boolean} removeInput Remove prompt (input) text from generated text (don't touch this unless you know what you're doing) 16 | * @param {boolean|null} doSample Use sampling instead of greedy decoding (don't touch unless you know what you're doing) 17 | * @param {number|null} numBeams Number of beams for beam search (don't touch unless you know what you're doing) 18 | * @param {boolean|null} earlyStopping Stop when at least num_beams sentences are finished per batch (don't touch unless you know what you're doing) 19 | * @param {number|null} noRepeatNgramSize If set to int > 0, all ngrams of that size can only occur once (don't touch unless you know what you're doing) 20 | * @param {number|null} numReturnSequences Number of returned sequences for each element in the batch (don't touch unless you know what you're doing) 21 | * @param {number|null} topK Number of highest probability vocabulary tokens to keep for top-k-filtering (don't touch unless you know what you're doing) 22 | * @param {number|null} topP The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling (don't touch unless you know what you're doing) 23 | * @param {number|null} temperature How much to temper the probabilities. 0.0 will generate the same text every time, 1.0 will generate random text. You usually probably want 0.0, but start tuning here if you're not getting good results. 24 | * @param {number|null} repetitionPenalty Penalty to apply if a sequence of words is repeated within a generated sequence (don't touch unless you know what you're doing) 25 | * @param {number|null} lengthPenalty Penalty to apply to the length of the sequence. (don't touch unless you know what you're doing) 26 | * @param {boolean|null} badWords 27 | * @param {boolean|null} removeEndSequence Remove end sequence from generated text (don't touch unless you know what you're doing) 28 | * @returns {Promise} Generated text 29 | */ 30 | export async function generateText( 31 | modelName: string, 32 | text: string, 33 | minLength: number = 10, 34 | maxLength: number = 20, 35 | lengthNoInput: boolean | null = null, 36 | endSequence: string | null = null, 37 | removeInput: boolean = true, 38 | doSample: boolean | null, 39 | numBeams: number | null, 40 | earlyStopping: boolean | null, 41 | noRepeatNgramSize: number | null, 42 | numReturnSequences: number | null, 43 | topK: number | null, 44 | topP: number | null, 45 | temperature: number | null, 46 | repetitionPenalty: number | null, 47 | lengthPenalty: number | null, 48 | badWords: boolean | null, 49 | removeEndSequence: boolean | null 50 | ) { 51 | const response = await axios.post( 52 | `https://api.nlpcloud.io/v1/gpu/${modelName}/generation`, 53 | { 54 | text: text, 55 | min_length: minLength, 56 | max_length: maxLength, 57 | length_no_input: lengthNoInput, 58 | end_sequence: endSequence, 59 | remove_input: removeInput, 60 | do_sample: doSample, 61 | num_beams: numBeams, 62 | early_stopping: earlyStopping, 63 | no_repeat_ngram_size: noRepeatNgramSize, 64 | num_return_sequences: numReturnSequences, 65 | topk: topK, 66 | topp: topP, 67 | temperature: temperature, 68 | repetition_penalty: repetitionPenalty, 69 | length_penalty: lengthPenalty, 70 | bad_words: badWords, 71 | remove_end_sequence: removeEndSequence 72 | }, 73 | { 74 | headers: { 75 | Authorization: `Token ${NLPCLOUD_TOKEN}`, 76 | 'Content-Type': 'application/json' 77 | } 78 | } 79 | ); 80 | return response.data.generated_text; 81 | } 82 | 83 | /** 84 | * Generates text using OpenAI's text completion API. 85 | * @param {string} text The text to use as a prompt. 86 | * @param {string} apikey The API key to use. 87 | * @param {number} min_length The minimum length of the generated text. 88 | * @param {number} max_length The maximum length of the generated text. 89 | * @param {number} temperature The temperature parameter for the text generation. 90 | * @param {number} top_p The top_p parameter for the text generation. 91 | * @returns {Promise} The generated text. 92 | */ 93 | export async function generateTextOpenAI( 94 | text: string, 95 | min_length: number, 96 | max_length: number, 97 | temperature: number, 98 | top_p: number, 99 | model: 100 | | 'text-davinci-003' 101 | | 'text-davinci-002' 102 | | 'code-davinci-002' 103 | | 'text-curie-001' = 'text-davinci-003', 104 | stopToken: string | string[] | null = null 105 | ): Promise { 106 | const config = new Configuration({ 107 | apiKey: OPENAI_TOKEN 108 | }); 109 | const openai = new OpenAIApi(config); 110 | const response = await openai.createCompletion({ 111 | model: model, 112 | prompt: text, 113 | max_tokens: max_length, 114 | temperature: temperature, 115 | top_p: top_p, 116 | frequency_penalty: 1.0, 117 | presence_penalty: 1.0, 118 | stop: stopToken 119 | }); 120 | return response.data.choices![0].text!; 121 | } 122 | 123 | export async function retryGenerateTextOpenAI( 124 | text: string, 125 | min_length: number, 126 | max_length: number, 127 | temperature: number, 128 | top_p: number, 129 | model: 130 | | 'text-davinci-003' 131 | | 'text-davinci-002' 132 | | 'code-davinci-002' 133 | | 'text-curie-001' = 'text-davinci-002', 134 | stopToken: string | string[] | null = null, 135 | delaySeconds = 1 136 | ): Promise { 137 | // Log everything to log/.log 138 | const log = fs.createWriteStream(`log/${new Date().toISOString()}.log`, { 139 | flags: 'a' 140 | }); 141 | log.write( 142 | `Min length: ${min_length} Max lenght: ${max_length} Temperature: ${temperature} Top_p: ${top_p} Model: ${model} Stop token: ${stopToken}\n` 143 | ); 144 | log.write(`\n${text}\n`); 145 | console.log( 146 | `Min length: ${min_length} Max lenght: ${max_length} Temperature: ${temperature} Top_p: ${top_p} Model: ${model} Stop token: ${stopToken}` 147 | ); 148 | 149 | await new Promise((resolve) => setTimeout(resolve, Math.random() * 1500)); 150 | 151 | try { 152 | let result = await generateTextOpenAI( 153 | text, 154 | min_length, 155 | max_length, 156 | temperature, 157 | top_p, 158 | model, 159 | stopToken 160 | ); 161 | console.log(`Result: ${result.length} retruend`); 162 | log.write(`\n=======================================\n${result}\n`); 163 | log.close(); 164 | return result; 165 | } catch (error: any) { 166 | log.write(`\n=======================================\n${error}\n`); 167 | log.write( 168 | `\n=======================================\n${JSON.stringify( 169 | error.response.data 170 | )}\n` 171 | ); 172 | log.close(); 173 | 174 | if (error.response && error.response.status == 429) { 175 | let delay = Math.random() * 1000 * delaySeconds; 176 | console.log(`429. Waiting ${delay} ms and retrying.`); 177 | // Retry 178 | // Wait randomly up to 10 seconds 179 | await new Promise((resolve) => setTimeout(resolve, delay)); 180 | return await retryGenerateTextOpenAI( 181 | text, 182 | min_length, 183 | max_length, 184 | temperature, 185 | top_p, 186 | model, 187 | stopToken, 188 | delaySeconds ^ 2 189 | ); 190 | } 191 | } 192 | 193 | throw new Error('Failed to generate text with non-temporary error'); 194 | } 195 | 196 | 197 | export async function keywordKeyphraseExtraction( text: string, modelName: "fast-gpt-j" | "finetuned-gpt-neox-20b" = "fast-gpt-j" ): Promise< string[] > { 198 | const response = await axios.post( 199 | `https://api.nlpcloud.io/v1/gpu/${modelName}/kw-kp-extraction`, 200 | { 201 | text: text, 202 | }, 203 | { 204 | headers: { 205 | Authorization: `Token ${NLPCLOUD_TOKEN}`, 206 | 'Content-Type': 'application/json' 207 | } 208 | } 209 | ); 210 | return [ ...new Set< string >( response.data.keywords_and_keyphrases ) ]; 211 | } 212 | 213 | export async function Classification( text: string, categories: string[], modelName: "bart-large-mnli-yahoo-answers" | "xlm-roberta-large-xnli" | "fast-gpt-j" | "finetuned-gpt-neox-20b" = "bart-large-mnli-yahoo-answers" ): Promise< {labels: string[]; scores: number[]} > { 214 | const response = await axios.post( 215 | `https://api.nlpcloud.io/v1/gpu/${modelName}/classification`, 216 | { 217 | text: text, 218 | labels: categories, 219 | "multi_class": true, 220 | }, 221 | { 222 | headers: { 223 | Authorization: `Token ${NLPCLOUD_TOKEN}`, 224 | 'Content-Type': 'application/json' 225 | } 226 | } 227 | ); 228 | console.log( response.data ); 229 | return { labels: response.data.labels, scores: response.data.scores }; 230 | } 231 | -------------------------------------------------------------------------------- /src/workflows/prompt.ts: -------------------------------------------------------------------------------- 1 | import { proxyActivities } from '@temporalio/workflow'; 2 | import { default as mustache } from 'mustache'; 3 | 4 | import * as llm from './llm'; 5 | import * as elastic from '../activities/elastic'; 6 | import * as tokenizer from '../activities/tokenizer'; 7 | import * as vector_search from '../activities/vector_search'; 8 | 9 | import { logger } from './util'; 10 | import { embeddingsFromTextSearch } from './embeddings'; 11 | 12 | const { tokenize_native } = proxyActivities< typeof tokenizer>({ startToCloseTimeout: '10 minute' }); 13 | const { es_context } = proxyActivities({ startToCloseTimeout: '10 minute' }); 14 | 15 | 16 | export async function promptTemplate( 17 | template: string, 18 | variables: T, 19 | minLength: number = 1, 20 | maxLength: number = 50, 21 | temperature: number = 0.0, 22 | model: llm.llm_models = 'gpt-3', 23 | endSequence: string | null = null, 24 | ): Promise { 25 | console.log("OK, got into promptTemplate") 26 | let prompt = mustache.render(template, variables); 27 | console.log("Rendered mustache:\n" + prompt); 28 | let response = await llm.minGenerate( 29 | prompt, 30 | minLength, 31 | maxLength, 32 | temperature, 33 | endSequence, 34 | model 35 | ); 36 | console.log("Got response:\n" + response); 37 | return response; 38 | } 39 | 40 | const token_word_ratio = 0.5; 41 | /** 42 | * @function splitPromptTemplateByTokens 43 | * @param data 44 | * @param template 45 | * @param minLength 46 | * @param maxLength 47 | * @param temperature 48 | * @returns 49 | */ 50 | export async function splitPromptTemplateByTokens( 51 | data: string, 52 | template: string, 53 | minLength: number = 1, 54 | maxLength: number = 50, 55 | temperature: number = 0.0 56 | ): Promise> { 57 | let template_token_length = await tokenize_native(template).then( 58 | (r) => r.length 59 | ); 60 | 61 | let max_tokens = 1024; 62 | let tokens_left = max_tokens - maxLength - template_token_length; 63 | let tokens = await tokenize_native(data); 64 | 65 | let token_count = Math.floor(tokens_left * token_word_ratio); 66 | let token_split = splitUp(tokens, token_count); 67 | let token_split_promises = token_split.map(async (t) => { 68 | let p = await promptTemplate( 69 | template, 70 | { chunk: t.join(' ') }, 71 | minLength, 72 | maxLength, 73 | temperature 74 | ); 75 | return [t.join(' '), p]; 76 | }); 77 | let token_split_results = await Promise.all(token_split_promises); 78 | let finalret = new Array<[string, string]>(); 79 | token_split_results.map((r) => { 80 | finalret.push([r[0], r[1]]); 81 | }); 82 | 83 | return finalret; 84 | } 85 | 86 | export async function splitPromptTemplateByLinesOfTokens( 87 | data: string, 88 | template: string, 89 | minLength: number = 1, 90 | maxLength: number = 50, 91 | temperature: number = 0.0 92 | ): Promise> { 93 | let template_token_length = await tokenize_native(template).then( 94 | (r) => r.length 95 | ); 96 | 97 | let max_tokens = 1768; 98 | let tokens_left = max_tokens - maxLength - template_token_length; 99 | let lines: Array = data.split('\n'); 100 | 101 | // Loop through lines and add them to a prompt until it fills up max_tokens, and then run it through promptTemplate 102 | let current_lines: string = ''; 103 | let current_lines_tokens = 0; 104 | let current_lines_linenos: number[] = []; 105 | let chunks = new Array<[string, number[]]>(); 106 | 107 | for (let i = 0; i < lines.length; ++i) { 108 | let tokens = await tokenize_native(lines[i]); 109 | if (current_lines_tokens + tokens.length <= tokens_left) { 110 | current_lines_linenos.push(i); 111 | 112 | if (current_lines == '') { 113 | current_lines += lines[i]; 114 | } else { 115 | current_lines += '\n' + lines[i]; 116 | } 117 | current_lines_tokens += tokens.length; 118 | } else { 119 | chunks.push([current_lines, current_lines_linenos]); 120 | current_lines_tokens = 0; 121 | current_lines = ''; 122 | current_lines_linenos = []; 123 | // i --; // Rerun the line TODO: fix somehow 124 | } 125 | } 126 | 127 | if (current_lines.length > 0) { 128 | chunks.push([current_lines, [...current_lines_linenos, lines.length]]); 129 | } 130 | 131 | console.log(`We are processing ${chunks.length} chunks`); 132 | 133 | /* let template_token_length = await tokenize_native(template).then( 134 | (r) => r.length 135 | ); 136 | 137 | */ 138 | 139 | let allchunks = chunks.map( 140 | async (chunk, i, a): Promise<[string, string, number[]]> => { 141 | let p = promptTemplate( 142 | template, 143 | { chunk: chunk[0] }, 144 | minLength, 145 | maxLength, 146 | temperature 147 | ); 148 | return [chunk[0], await p, chunk[1]]; 149 | } 150 | ); 151 | 152 | return Promise.all(allchunks); 153 | } 154 | 155 | function splitUp(arr: T[], size: number): T[][] { 156 | var newArr = []; 157 | for (var i = 0; i < arr.length; i += size) { 158 | newArr.push(arr.slice(i, i + size)); 159 | } 160 | return newArr; 161 | } 162 | 163 | export async function promptReducer( 164 | inPrompt: string, 165 | variables: any, 166 | preamble: string, 167 | instructions: string 168 | ): Promise { 169 | let xprompt = mustache.render(inPrompt, variables); 170 | return xprompt; 171 | 172 | // let r = await promptTemplate( 173 | // `{{{preamble}}} 174 | // \`\`\` 175 | // {{{inPrompt}}} 176 | // \`\`\` 177 | // {{{instructions}}}`, {inPrompt: xprompt, preamble: preamble, instructions: instructions}, 10, 2000, 1 ); 178 | 179 | // return r; 180 | } 181 | 182 | export async function translateQuerySpaceToAnswerSpace( 183 | query: string 184 | ): Promise { 185 | let p = `Translate from "query space" to "answer space" while preserving the unknowns as variables. 186 | 187 | Query: How old is my dog? 188 | Answer: My dog is years old. 189 | 190 | Query: What's John's daughter's name? 191 | Answer: John's daughter's name is . 192 | 193 | Query: {{{query}}} 194 | Answer:`; 195 | 196 | let result = await promptTemplate(p, { query: query }, 2, 50, 0.0); 197 | return result; 198 | } 199 | 200 | export async function keywordsFromQuery(query: string): Promise { 201 | let p = `We are going to extract full-text search queries for the following query: 202 | 203 | Query: {{{query}}} 204 | 205 | Extract all of the relevant keywords, as well as any related keywords that you think might be useful. Separate each keyword with a comma: 206 | `; 207 | 208 | let result = await promptTemplate(p, { query: query }, 2, 50, 0.0); 209 | return result; 210 | } 211 | 212 | export interface QandA { 213 | question: string; 214 | answer: string; 215 | source_path: string; 216 | source_line: number; 217 | } 218 | 219 | export async function questionAndAnswer( 220 | index: string, 221 | query: string 222 | ): Promise { 223 | // Translate to answer space 224 | //let answer = await translateQuerySpaceToAnswerSpace( query ); 225 | 226 | // Search for the answer within the index from embeddingsFromTextSearch 227 | let results = await embeddingsFromTextSearch< any >( // TODO: this should not be an any 228 | index, 229 | await keywordsFromQuery(query), 230 | 5 231 | ); 232 | // Iterate through results, and use esquery to find and stitch together nearby lines 233 | let context: string[] = []; 234 | for (let r of results) { 235 | let nearbylines = await es_context(index, r._source.path, r._source.line); 236 | //let nearbylines = await es_query( `select * from ${index} limit 5` ); 237 | for (let n of nearbylines) { 238 | context.push(n.text); 239 | } 240 | } 241 | 242 | let prompt = `Here is a question: 243 | 244 | Question: {{{query}}} 245 | 246 | Potentially relevant semantic search results: 247 | 248 | {{{context}}} 249 | 250 | Output a JSON document which includes the following keys: [answer, answer_source, additional_answer_information, "answer_state (complete, partial, unknown)", "answer_confidence (1-100%)" ] 251 | 252 | { "answer": "`; 253 | let result = 254 | `{ "answer": "` + 255 | (await promptTemplate( 256 | prompt, 257 | { query: query, context: context.join('\n') }, 258 | 2, 259 | 250, 260 | 0.0 261 | )); 262 | try { 263 | let sresult = JSON.parse(result); 264 | return sresult; 265 | } catch (e: any) { 266 | let fixed = await fixJSON(result); 267 | return JSON.parse(fixed); 268 | } 269 | } 270 | 271 | async function fixJSON(text: string): Promise { 272 | let result = await promptTemplate( 273 | `Fix the following JSON: 274 | 275 | {{{text}}}`, 276 | { text: text }, 277 | 2, 278 | 1000, 279 | 0.0 280 | ); 281 | 282 | return result; 283 | } 284 | 285 | async function testAnswerFromQuestion( 286 | question: string, 287 | answer: string 288 | ): Promise { 289 | let p = `Question: {{{question}}} 290 | Answer: {{{answer}}} 291 | 292 | Is the answer correct? yes/no:`; 293 | let result = await promptTemplate( 294 | p, 295 | { question: question, answer: answer }, 296 | 2, 297 | 5, 298 | 0.0 299 | ); 300 | return result.toLowerCase().indexOf('yes') >= 0; 301 | } 302 | 303 | /** 304 | * 305 | * @param text Text to summarize which is potentially larger than the context-size of the LLM model 306 | * @param primarySummarizeTemplate Template to use for the map step summary 307 | * @param reduceSummarizeTemplate Teomplate to use for the reduce step summary 308 | * @returns A summary of the text 309 | */ 310 | export async function mapreduce_summary( 311 | text: string, 312 | primarySummarizeTemplate: string = 'Analyze the following text for a detailed summary.\n\n{{{chunk}}}\n\nProvide a detailed summary:', 313 | reduceSummarizeTemplate: string = 'These are a series of summaries that you are going to summarize:\n\n{{{chunk}}}\n\nProvide a detailed summary in the 3rd party passive voice, removing duplicate information:' 314 | ): Promise { 315 | if (text == null || text.length == 0) return ''; 316 | 317 | let completions = await mapPromptTemplate(text, primarySummarizeTemplate); 318 | return reducePromptTemplate(completions, reduceSummarizeTemplate); 319 | } 320 | 321 | /** 322 | * mapreduce_question_text 323 | * @param text Text to be processed 324 | * @param primaryQuestionTemplate Template for the primary question 325 | * @param reduceQuestionTemplate Template for the reduce question 326 | * @returns A promise that resolves to the final answer 327 | */ 328 | export async function mapreduce_question_text( 329 | text: string, 330 | question: string, 331 | primarySummarizeTemplate: string = `Read the following text:\n\n{{{chunk}}}\n\nQuestion:\n\n${question}\n\nAnswer:`, 332 | reduceSummarizeTemplate: string = `Question: ${question}\nPossible Answers:\n{{{chunk}}}\n\\n\nQuestion: ${question}\nBest Answer:` 333 | ): Promise { 334 | if (text == null || text.length == 0) return ''; 335 | 336 | let completions = await mapPromptTemplate(text, primarySummarizeTemplate); 337 | let possible_answers = completions.map( 338 | ([c, p, l], i, a) => `Possible Answer: "${c}"\n` 339 | ); 340 | return reducePromptTemplate(completions, reduceSummarizeTemplate); 341 | } 342 | 343 | /** 344 | * mapPromptTemplate 345 | * @param text Input text to be processed 346 | * @param primarySummarizeTemplate Prompt template run on each chunk of text 347 | * @returns List of completions from running prompt primarySummarizeTemplate on each chunk of text 348 | */ 349 | export async function mapPromptTemplate( 350 | text: string, 351 | primarySummarizeTemplate: string = 'Analyze the following text for a detailed summary.\n\n{{{chunk}}}\n\nProvide a detailed summary:' 352 | ): Promise { 353 | logger(`We are splitting a piece of text ${text.length} characters long.`); 354 | let summaries = await splitPromptTemplateByLinesOfTokens( 355 | text, 356 | primarySummarizeTemplate, 357 | 5, 358 | 1024, 359 | 0.2 360 | ); 361 | console.log(summaries); 362 | let completions = summaries.map(([c, p, l], i, a) => `${p}`); 363 | console.log(completions); 364 | return completions; 365 | } 366 | 367 | /** 368 | * reducePromptTemplate 369 | * 370 | * @param completions Array of completions, usually output from mapPromptTemplate 371 | * @param reduceTemplate Prompt template run on completions to reduce them to a single summary 372 | * @returns Final return value of the reduce prompt templates being run on completions from the map prompt templates. 373 | */ 374 | export async function reducePromptTemplate( 375 | completions: string[], 376 | reduceTemplate: string = 'These are a series of summaries that you are going to summarize:\n\n{{{chunk}}}\n\nProvide a detailed summary, but removing duplicate information:' 377 | ): Promise { 378 | if (completions.length == 1) { 379 | return completions[0]; 380 | } 381 | 382 | if (completions.length == 0) { 383 | return ''; 384 | } 385 | 386 | let iterations = 0; 387 | while (true) { 388 | logger(`Iteration ${iterations++}. We have ${completions.length} left.`); 389 | let summary = await splitPromptTemplateByLinesOfTokens( 390 | completions.join('\n\n'), 391 | reduceTemplate, 392 | 500, 393 | 1000, 394 | 0.2 395 | ); 396 | let summary_completions = summary.map(([p, c, l]) => `${c}\n\n`); 397 | if (summary_completions.length == 1) { 398 | return summary_completions[0]; 399 | } 400 | completions = summary_completions; 401 | } 402 | } 403 | -------------------------------------------------------------------------------- /src/workflows/chatbot.ts: -------------------------------------------------------------------------------- 1 | import { default as mustache } from 'mustache'; 2 | 3 | import * as prompt from './prompt'; 4 | import * as elastic from '../activities/elastic'; 5 | import * as util from '../activities/util'; 6 | import * as embeddings_search from '../activities/vector_search'; 7 | 8 | import { proxyActivities, uuid4 } from '@temporalio/workflow'; 9 | import { ChatSession, Personality } from '../clones'; 10 | import { 11 | embeddingsFromTextSearch, 12 | translateQuerySpaceToAnswerSpace 13 | } from '../workflows'; 14 | import { storeEmbeddings } from './embeddings'; 15 | import { minGenerate } from './llm'; 16 | import { mapreduce_summary } from './prompt'; 17 | import { Cog, Cogset } from '../cogs/cog'; 18 | 19 | /* 20 | * Chatbots require prompt templates, elasticsearch queries and indexing, and the ability to execute local workflows. 21 | */ 22 | 23 | const { promptReducer, promptTemplate } = proxyActivities({ 24 | startToCloseTimeout: '10 minute' 25 | }); 26 | const { 27 | es_query, 28 | es_index, 29 | } = proxyActivities({ startToCloseTimeout: '10 minute' }); 30 | const { 31 | md5sum, 32 | wf_axios, 33 | executeLocalWorkflow, 34 | } = proxyActivities({ startToCloseTimeout: '10 minute' }); 35 | const { init_elasticsearch_mappings } = proxyActivities({ startToCloseTimeout: '10 minute' }); 36 | 37 | 38 | let personality_chuck: Personality = { 39 | name: 'Chuck', 40 | personality: `Chuck is a cowboy from Arlington, Texas. He speaks with a giant Texas drawl. He's a really nice guy. He got his degree from ITT in database administration. When he talks, it's very clear that he's from Texas. Chuck is a great guy, very dilligent, and extremely helpful.`, 41 | instructions: [] 42 | }; 43 | 44 | let personaltiy_nlp: Personality = { 45 | name: 'Nilp', 46 | personality: 47 | 'Nilp is a very helpful robot.', 48 | instructions: [ 49 | 'Use the context to try to get a better sense of what is being asked of you, but ignore it if it is not relevant.' 50 | ] 51 | }; 52 | 53 | /** 54 | * Chat history interface 55 | */ 56 | interface ChatHistory { 57 | sender: string; 58 | message: string; 59 | } 60 | 61 | /** 62 | * @function Invocation of chatbot function to generate a response to a message 63 | * @param {number} context_length Number of previous messages to use as context 64 | * @param {string} user User to respond to 65 | * @param {string} message Message to respond to 66 | * @param {ChatSession} session Chat session to use (default: new session) 67 | * @param {boolean} runCogs Whether to run cogs (default: true) 68 | * @example Example of a chatbot invocation 69 | * const response = await Chatbot( {name: "Gandalf", personaltity: "Wizard. Good, but unpredictable. Extremely powerful and wise."}, 50, 'user555555', 'Hello, there!' ); 70 | * @returns {Promise} 71 | */ 72 | export async function Chatbot( 73 | personality: Personality, 74 | context_length: number, 75 | user: string, 76 | message: string, 77 | session: ChatSession = { 78 | personality: personality, 79 | user: user, 80 | messages: [], 81 | uuid: uuid4() 82 | }, 83 | runCogs: boolean = true 84 | ): Promise { 85 | let original_message = message; 86 | 87 | let prompt_leading: string = ''; 88 | if (message.indexOf('|') != -1) { 89 | let parts = message.split('|', 2); 90 | message = parts[0].trim(); 91 | prompt_leading = parts[1].trim(); 92 | } 93 | 94 | let ch = new Array(); 95 | 96 | // Add in the last few messages from the user 97 | try { 98 | let rows = await es_query( 99 | `SELECT user, text FROM messages ORDER BY ts DESC LIMIT ${context_length}` 100 | ); 101 | rows.reverse(); 102 | //let rows = await es_search( 'messages', { sort: [{ ts: { order: 'desc' } }], size: context_length } ); 103 | //console.log( JSON.stringify(slack_history) ); 104 | rows.map((r: any) => { 105 | if (r.user && r.text && r.text.length < 300) 106 | ch.push({ sender: r.user, message: r.text }); 107 | }); 108 | } catch {} 109 | 110 | let extra_ch = new Array(); 111 | 112 | let answermap: Map = new Map(); 113 | 114 | // Now we're going to add in a few messages from a semantic search 115 | try { 116 | console.log( 117 | `We're searching for messages that are similar to "${message}"` 118 | ); 119 | // Invert the query into answer space 120 | let answer = await translateQuerySpaceToAnswerSpace(message); 121 | let srows = await embeddingsFromTextSearch('messages', answer, 100); 122 | // Resort srows by 'ts' property in ascending order 123 | srows.sort((a: any, b: any) => { 124 | return a.ts - b.ts; 125 | }); 126 | 127 | srows.map((r1: any) => { 128 | let r = r1._source; 129 | if ( 130 | r.user && 131 | r.text && 132 | !answermap.has(`${r.user}: ${r.text}`) && 133 | extra_ch.length < 20 && 134 | r.text.length < 300 135 | ) { 136 | extra_ch.push({ sender: r.user, message: r.text }); 137 | answermap.set(`${r.user}: ${r.text}`, 1); 138 | } 139 | }); 140 | console.log( 141 | `We got ${extra_ch.length} messages from the semantic search which we wittled down from ${srows.length} messages` 142 | ); 143 | } catch {} 144 | 145 | try { 146 | session.messages.map((r: any) => { 147 | if ( 148 | r.user && 149 | r.text && 150 | !answermap.has(`${r.user}: ${r.text}`) && 151 | extra_ch.length < 20 152 | ) 153 | ch.push({ sender: r.user, message: r.text }); 154 | answermap.set(`${r.user}: ${r.text}`, 1); 155 | }); 156 | } catch {} 157 | 158 | // Last step: we're going to do a semantic search of potential teachings 159 | let fewshots = new Array(); 160 | try { 161 | let srows = await embeddingsFromTextSearch('teachings', message, 100); 162 | srows.map((r1: any) => { 163 | let r = r1._source; 164 | if (fewshots.length < 5) { 165 | fewshots.push({ 166 | user: r.user, 167 | prompt: r.prompt, 168 | completion: r.completion 169 | }); 170 | } 171 | }); 172 | console.log( 173 | `We got ${fewshots.length} messages from the teachings search which we wittled down from ${srows.length} messages` 174 | ); 175 | } catch (e: any) { 176 | console.log(`Error doing the teachings search: ${e.toString()}`); 177 | } 178 | 179 | let fewshots_a = fewshots.map((r: any) => { 180 | // Use Mustache to render the prompt component 181 | let prompt = `{{{user}}}: {{{prompt}}} 182 | {{{name}}}: {{{completion}}}\n`; 183 | 184 | let completion = r.completion; 185 | let user = r.user; 186 | let rendered_prompt = mustache.render(prompt, { 187 | user: r.user, 188 | prompt: r.prompt, 189 | completion: r.completion, 190 | name: personality.name 191 | }); 192 | return rendered_prompt; 193 | }); 194 | let fewshots_joined = fewshots_a.join('2e72b3\n'); 195 | 196 | let r = minGenerate( 197 | await promptReducer( 198 | `{{{personality}}} 199 | 200 | Today's date is {{date}}. 201 | 202 | Recent chat history: 203 | {{#context}} 204 | {{{sender}}}: {{{message}}} 205 | {{/context}} 206 | 207 | Extra contextual chat history: 208 | {{#extra_context}} 209 | {{{sender}}}: {{{message}}} 210 | {{/extra_context}} 211 | 212 | Instructions: 213 | {{#instructions}} 214 | {{{.}}} 215 | {{/instructions}} 216 | 217 | {{{fewshots}}} 218 | {{{user}}}: {{{message}}} 219 | {{{name}}}:{{{prompt_leading}}}`, 220 | { 221 | user: user, 222 | date: new Date().toISOString(), 223 | message: message, 224 | personality: personality.personality, 225 | context: ch, 226 | extra_context: extra_ch, 227 | name: personality.name, 228 | instructions: personality.instructions, 229 | prompt_leading: prompt_leading, 230 | fewshots: fewshots_joined 231 | }, 232 | 'The following is a chatbot request:', 233 | 'Remove ALL unnecessary information pertaining to the request:' 234 | ), 235 | 236 | 10, 237 | 250, 238 | 1 239 | ); 240 | let chat_response = prompt_leading + (await r); 241 | // Add this in the teachings index if there's prompt leading 242 | if (prompt_leading.length > 0) { 243 | await storeEmbeddings([message], 'teachings', [ 244 | { 245 | user: user, 246 | text: original_message, 247 | prompt: message, 248 | completion: chat_response, 249 | prompt_leading: prompt_leading 250 | } 251 | ]); 252 | } 253 | 254 | /* Add to chat history */ 255 | session.messages.push({ 256 | user: user, 257 | text: original_message, 258 | response: chat_response, 259 | context: '', 260 | timestamp: new Date(), 261 | uuid: uuid4() 262 | }); 263 | 264 | if (runCogs) return post_message_filtering(session); 265 | return chat_response; 266 | 267 | } 268 | 269 | /** 270 | * Invocation of Chatbot with the personality Chuck 271 | * @param user The user 272 | * @param message The message 273 | * @example chatbot('anon55', 'Hello, how are you?') 274 | * @returns The response 275 | */ 276 | export async function ChatbotChuck( 277 | user: string, 278 | message: string 279 | ): Promise { 280 | return await Chatbot(personality_chuck, 5, user, message); 281 | } 282 | 283 | /** 284 | * Invocation of Chatbot with the personality Nilp 285 | * @param user The user 286 | * @param message The message 287 | * @example chatbot('anon55', 'Hello, how are you?') 288 | * @returns The response 289 | */ 290 | export async function ChatbotNilp( 291 | user: string, 292 | message: string 293 | ): Promise { 294 | return await Chatbot(personaltiy_nlp, 5, user, message); 295 | } 296 | 297 | type decode_function = (input: IN) => Promise; 298 | type encode_function = (output: string) => Promise; 299 | 300 | 301 | let cog_axios = new Cog( 302 | 'axios', 303 | [], 304 | async (session: ChatSession, args: any, message_sans_args: string) => { 305 | let url = args['url']; 306 | if (url.indexOf('<') == 0) { 307 | url = url.substring(1); 308 | // And also remove the trailing > 309 | url = url.substring(0, url.length - 1); 310 | } 311 | let method = args['method'] || 'get'; 312 | let data = args['data'] || {}; 313 | let headers = args['headers'] || {}; 314 | let response = await wf_axios(url, method, data, headers); 315 | return response; 316 | } 317 | ); 318 | 319 | let cog_replicate_sd = new Cog( 320 | 'replicate_sd', 321 | [], 322 | async ( 323 | session: ChatSession, 324 | args: any, 325 | message_sans_args: string 326 | ): Promise => { 327 | let detailedPrompt = await promptTemplate( 328 | `{{user}} has asked you for a beautiful piece of art with the following instructions: 329 | 330 | {{{instructions}}} 331 | 332 | You are an art expert, and you are going to combine the request with your knowledge of artists and art styles to describe in great detail what this piece of art should look like, including as much detail as possible:`, 333 | 334 | { instructions: args['prompt'], user: session.user }, 335 | 20, 336 | 500, 337 | 1.0 338 | ); 339 | 340 | let results = ( 341 | await executeLocalWorkflow( 342 | 'StableDiffusionWorkflow', 343 | 'GO_TASK_QUEUE', 344 | [{ Prompt: detailedPrompt }] 345 | ) 346 | ); 347 | results['in_prompt'] = args['prompt']; 348 | results['out_prompt'] = detailedPrompt; 349 | return JSON.stringify(results); 350 | } 351 | ); 352 | 353 | let plan_chainprompt_cog = new Cog( 354 | 'plan_chainprompt', 355 | [], 356 | async ( 357 | session: ChatSession, 358 | args: any, 359 | message_sans_args: string 360 | ): Promise => { 361 | let plan = await Chatbot( 362 | session.personality, 363 | 5, 364 | session.personality.name, 365 | `OK, so, ${session.user} wants me to "${args['plan']}". I'm going to need to plan this one out into multiple steps. Let's develop a plan, step by step, and output a JSON array with the steps. | ["Step 1:`, 366 | session, 367 | false, 368 | ); 369 | return plan; 370 | } 371 | ); 372 | 373 | let mapreduce_summarize_cog = new Cog( 374 | 'mapreduce_summarize', 375 | [], 376 | async (session: ChatSession, args: any, message_sans_args: string) => 377 | await mapreduce_summary(args['text']) 378 | ); 379 | let noop_cog = new Cog( 380 | 'noop', 381 | [], 382 | async (session: ChatSession, args: any, message_sans_args: string) => '' 383 | ); 384 | 385 | let cogs = new Cogset([ 386 | cog_axios, 387 | cog_replicate_sd, 388 | plan_chainprompt_cog, 389 | mapreduce_summarize_cog, 390 | mapreduce_summarize_cog, 391 | noop_cog 392 | ]); 393 | 394 | export async function IndexCog(cog: Cog) { 395 | for (let t of cog.getTeachings()) { 396 | // Calculate md5sum of a concatenation of prompt and completion 397 | let sum = await md5sum(t.prompt + t.completion); 398 | 399 | await es_index('teachings', { 400 | text: t.text, 401 | user: t.user, 402 | prompt: t.prompt, 403 | completion: t.completion, 404 | prompt_leading: t.prompt_leading, 405 | md5sum: sum 406 | }); 407 | } 408 | } 409 | 410 | async function IndexCogset(cogset: Cogset) { 411 | for (let cog of cogset.getCogs()) { 412 | await IndexCog(cog); 413 | } 414 | } 415 | 416 | export async function DumpAndRestoreCogsetTeachings() { 417 | await init_elasticsearch_mappings(); 418 | await IndexCogset(cogs); 419 | } 420 | 421 | export async function post_message_filtering( 422 | session: ChatSession 423 | ): Promise { 424 | // If the last message has !x:{} in it, we are going to grab the last message and see if we can match x to a filter event handler 425 | let last_message = session.messages[session.messages.length - 1]; 426 | 427 | // !kali_sh:{"command":"nmap 127.0.0.1"} 428 | let matches = last_message.response.match(/!([a-zA-Z0-9_]+):{(.*)}/); 429 | if (matches) { 430 | console.log(`We have a filter match: ${matches[1]}`); 431 | let filter = matches[1]; 432 | let filter_args = matches[2]; 433 | console.log(`Parsing filter args: ${filter_args}`); 434 | let filter_args_json = JSON.parse(`{${filter_args}}`); 435 | let filter_handler = cogs.getCog(filter); 436 | if (filter_handler) { 437 | let message_sans_args = last_message.response.replace(matches[0], ''); 438 | let filtered_message = await filter_handler.handle( 439 | session, 440 | filter_args_json, 441 | message_sans_args 442 | ); 443 | return filtered_message; 444 | } 445 | } 446 | 447 | console.log('No filter matching.'); 448 | return last_message.response; 449 | } 450 | -------------------------------------------------------------------------------- /src/activities/application/ijavascript.ts: -------------------------------------------------------------------------------- 1 | import { executeWorkflow, executeLocalWorkflow } from '../util'; 2 | import { 3 | generateText, 4 | generateTextOpenAI, 5 | retryGenerateTextOpenAI 6 | } from '../llm'; 7 | import axios from 'axios'; 8 | import { spawn } from 'child_process'; 9 | import fs from 'fs'; 10 | 11 | import { TEMPORAL_HOST } from '../../config'; 12 | import { OPENAI_TOKEN } from '../../config'; 13 | 14 | export function searchLoadedPackages(): [string, string][] { 15 | return [ 16 | ['x.http', 'perform HTTP requests'], 17 | // ["x.mysql", "MySQL client"], 18 | ['x.google', 'Google APIs'], 19 | ['x.nlp', 'Natural Language Processing'] 20 | // ["x.illustrations", "Illustrations"], 21 | ]; 22 | } 23 | 24 | export function inspectLoadedPackage(name: string): string[] { 25 | let package_methods = new Map(); 26 | package_methods.set('x.google', [ 27 | 'async function search(query:string): Promise< {rank: number, url: string, title: string, description: string}[] >' 28 | ]); 29 | package_methods.set('x.http', [ 30 | 'async function x.http.get(url: string): Promise< {status: number, statusText: string, content: any} >', 31 | 'async function x.http.post(url: string, data: any): Promise< string >', 32 | 'async function x.http.put(url: string, data: any): Promise< string >', 33 | 'async function x.http.delete(url: string): Promise< string >' 34 | ]); 35 | /* package_methods.set( "mysql", 36 | [ 37 | "async function query(query: string): Promise< any >", 38 | ]);*/ 39 | 40 | package_methods.set('x.nlp', [ 41 | 'async function x.nlp.summarize(text: string): Promise< string >', 42 | 'async function x.nlp.question_answer(text: string, question: string): Promise< string >', 43 | 'async function x.nlp.ai_instruction_on_text( text: string, instructions: string ): Promise< string >' 44 | ]); 45 | 46 | package_methods.set('x.illustrations', [ 47 | 'async function x.illustrations.get_illustration( description: string ): Promise< string >' 48 | ]); 49 | 50 | return package_methods.get(name) || []; 51 | } 52 | 53 | import repl from 'node:repl'; 54 | import net from 'node:net'; 55 | import Semaphore from 'semaphore-async-await'; 56 | 57 | net 58 | .createServer((socket: any) => { 59 | repl 60 | .start({ 61 | prompt: '28dc9e0c ', 62 | input: socket, 63 | output: socket, 64 | 65 | writer: (output: any) => { 66 | return JSON.stringify(output); 67 | } 68 | }) 69 | .on('exit', () => { 70 | socket.end(); 71 | }); 72 | }) 73 | .listen(5002); 74 | 75 | /* Class which wraps net.connect, sockets with promises */ 76 | class Socket { 77 | private socket: net.Socket; 78 | private lock: Semaphore; 79 | 80 | constructor(port: number, host: string) { 81 | this.socket = net.connect(port, host); 82 | this.lock = new Semaphore(1); 83 | } 84 | 85 | public async write(data: string): Promise { 86 | return new Promise((resolve, reject) => { 87 | this.socket.write(data, () => { 88 | resolve(); 89 | }); 90 | }); 91 | } 92 | 93 | public async read(): Promise { 94 | return new Promise((resolve, reject) => { 95 | this.socket.once('data', (data: any) => { 96 | resolve(data.toString()); 97 | }); 98 | }); 99 | } 100 | 101 | public async close(): Promise { 102 | return new Promise((resolve, reject) => { 103 | this.socket.end(() => { 104 | resolve(); 105 | }); 106 | }); 107 | } 108 | 109 | public async writeAndRead(data: string): Promise { 110 | console.log(`Acquiring lock for command ${data}`); 111 | await this.lock.acquire(); 112 | console.log(`Lock acquired for command ${data}`); 113 | let retval = new Promise((resolve, reject) => { 114 | this.socket.once('data', (data) => resolve(data.toString())); 115 | }); 116 | 117 | await this.write(data); 118 | console.log(`Wrote command ${data}`); 119 | let response = await retval; 120 | console.log(`Read response ${response}`); 121 | this.lock.release(); 122 | console.log(`Released lock for command ${data}`); 123 | 124 | /* .break() is a special command which causes the REPL to exit. We're gonna use this if we find this at the beginning: "... ..." 125 | */ 126 | if (response.startsWith('... ...')) { 127 | console.log(`Breaking REPL`); 128 | await this.writeAndRead('.break'); 129 | return 'ERROR: Incomplete javascript statement sent. Cancelled.'; 130 | } 131 | 132 | return response; 133 | } 134 | } 135 | 136 | export const google = { 137 | search: async function (query: string): Promise { 138 | console.log(`Searching for ${query}`); 139 | let results = await executeWorkflow( 140 | TEMPORAL_HOST, 141 | 'GoogleSearchWorkflow', 142 | 'GO_TASK_QUEUE', 143 | [query] 144 | ); 145 | console.log(`Results: ${results}`); 146 | return results; 147 | } 148 | }; 149 | 150 | export const http = { 151 | get: async function ( 152 | url: string 153 | ): Promise<{ status: number; statusText: string; content: any }> { 154 | console.log(`http.get: ${url}`); 155 | try { 156 | let r = await axios.get(url); 157 | if (r.status != 200) { 158 | return { status: r.status, statusText: r.statusText, content: null }; 159 | } 160 | console.log(`http.get: ${url} Status: ${r.status}`); 161 | // Is this JSON? We probably just want to return it, and worry about summarizing it later. 162 | if (r.headers['content-type'].startsWith('application/json')) { 163 | // Is r.data a string? If so, parse it. 164 | if (typeof r.data == 'string') r.data = JSON.parse(r.data); 165 | return { status: r.status, statusText: r.statusText, content: r.data }; 166 | } 167 | // Is this HTML? We probably want to summarize it. 168 | if ( 169 | r.data.length >= 1024 && 170 | r.headers['content-type'].startsWith('text/html') 171 | ) { 172 | // Call out to pandoc to convert the HTML to plain 173 | let pandoc = spawn('/usr/bin/pandoc', ['-f', 'html', '-t', 'plain'], { 174 | stdio: ['pipe', 'pipe', 'pipe'] 175 | }); 176 | pandoc.stdin.write(r.data); 177 | pandoc.stdin.end(); 178 | let text = await new Promise((resolve, reject) => { 179 | pandoc.stdout.on('data', async (data: any) => { 180 | let text = data.toString(); 181 | console.log(`Pandoc Text length: ${text.length}`); 182 | resolve(text); 183 | }); 184 | }); 185 | return { status: r.status, statusText: r.statusText, content: text }; 186 | } 187 | return { status: r.status, statusText: r.statusText, content: r.data }; 188 | } catch (e: any) { 189 | console.log(`http.get: ${url} Error: ${e}`); 190 | return { status: 500, statusText: e, content: null }; 191 | } 192 | } 193 | }; 194 | 195 | export const nlp = { 196 | summarize: async function (text: string): Promise { 197 | if (text == null || text.length == 0 || text == undefined) { 198 | return 'ERROR: '; 199 | } 200 | // if type of the text is actually an object, we should also return an error 201 | if (typeof text == 'object') { 202 | return 'ERROR: '; 203 | } 204 | console.log(`Summarizing: ${text}`); 205 | let r = await executeLocalWorkflow('mapreduce_summary', 'hello-world', [ 206 | text 207 | ]); 208 | console.log(`Result: ${r}`); 209 | return r; 210 | }, 211 | question_answer: async function ( 212 | text: string, 213 | question: string 214 | ): Promise { 215 | console.log(`Summarizing: ${text}`); 216 | let r = await executeLocalWorkflow( 217 | 'mapreduce_question_text', 218 | 'hello-world', 219 | [text, question] 220 | ); 221 | console.log(`Result: ${r}`); 222 | return r; 223 | }, 224 | ai_instruction_on_text: async function ( 225 | text: string, 226 | instructions: string 227 | ): Promise { 228 | console.log(`ai_instruction: ${text}`); 229 | let r = ( 230 | await executeLocalWorkflow('mapPromptTemplate', 'hello-world', [ 231 | text, 232 | `{{{chunk}}}\n${instructions}:\n` 233 | ]) 234 | ); 235 | let r2 = r.join('\n'); 236 | console.log(`Result: ${r2}`); 237 | return r2; 238 | } 239 | }; 240 | 241 | export const illustrations = { 242 | get_illustration: async function (description: string): Promise { 243 | console.log(`Getting illustration for ${description}`); 244 | let r = await executeWorkflow( 245 | TEMPORAL_HOST, 246 | 'StableDiffusionWorkflow', 247 | 'GO_TASK_QUEUE', 248 | [{ Prompt: description }] 249 | ); 250 | console.log(`Result: ${r}`); 251 | return r; 252 | } 253 | }; 254 | 255 | /** 256 | * Execute javascript code in a sandbox with prompt chaining in notebook format 257 | * @param p 258 | * @param apiKey 259 | */ 260 | export async function executeJavascriptNotebook( 261 | p: string 262 | ): Promise { 263 | console.log(`Executing Javascript Notebook: ${p}`); 264 | 265 | let notebook = fs.readFileSync('./src/prompts/notebook-template.txt', 'utf8'); 266 | notebook += p; 267 | // Template includes a variable called {{{todaysdate}}} which we want to replace with something like October 16, 2022 268 | notebook = notebook.replace( 269 | '{{{todaysdate}}}', 270 | new Date().toLocaleDateString('en-US', { 271 | month: 'long', 272 | day: 'numeric', 273 | year: 'numeric' 274 | }) 275 | ); 276 | // if p does not have a newline at the end, we're going to add it to notebook 277 | if (p[p.length - 1] != '\n') { 278 | notebook += '\n'; 279 | } 280 | 281 | // Execute OpenAI API call using code model 282 | let r = await retryGenerateTextOpenAI( 283 | notebook, 284 | 10, 285 | 1024, 286 | 0.0, 287 | 0.99, 288 | 'code-davinci-002', 289 | ['Question:', 'Out[', 'Out ['] 290 | ); 291 | let orig_completion = r; 292 | if (orig_completion.indexOf('Answer:') != -1) { 293 | // Yes, so we need to execute the answer 294 | let answer = orig_completion.substring(orig_completion.indexOf('Answer:')); 295 | return answer; 296 | } 297 | 298 | // This will probably return a string that looks like this: 299 | // "IJavascript session:\n```\nIn [1]: searchLoadedPackages()\n" 300 | // Chop off the "IJavascript session:\n```\nIn [1]: " part so we are left with nothing but the javascript part 301 | let js = r.substring(r.indexOf('In [1') + 8); 302 | 303 | // Now we will execute the javascript using nodejs vm module: 304 | try { 305 | // Connect a socket to localhost:5001 306 | let socket = new Socket(5001, 'localhost'); 307 | // First, we import the packages we need 308 | await socket.writeAndRead("const x = require('./src/activities')\n"); 309 | await socket.writeAndRead('console.log = function(x) { return x }\n'); 310 | 311 | return await executeJavascriptSnippetLoop(notebook, socket, 1, p); 312 | 313 | /* 314 | This is all bad. 315 | 316 | // Write the javascript to the socket 317 | let ret = await socket.writeAndRead( js ); 318 | // Remove anything after 28dc9e0c in the string ret 319 | ret = ret.substring( 0, ret.indexOf( "28dc9e0c" ) ); 320 | 321 | console.log( `Javascript returned: ${ret}` ); 322 | let retraw = parse( ret ); 323 | 324 | // Append the output to the notebook 325 | notebook += orig_completion; 326 | notebook += "Out [1] " + ret; 327 | notebook += '\n'; 328 | return await executeJavascriptSnippetLoop( notebook, apiKey, socket, 2 ); 329 | */ 330 | } catch (e: any) { 331 | return js + '\n\nError: ' + e; 332 | } 333 | } 334 | 335 | async function executeJavascriptSnippetLoop( 336 | n: string, 337 | socket: Socket, 338 | counter: number, 339 | original_question: string 340 | ): Promise { 341 | console.log(`Executing Javascript Snippet count ${counter}`); 342 | 343 | if (counter > 100) { 344 | return `>> ERRROR. Too many loops (${counter}). Aborting.`; 345 | } 346 | 347 | // Print the last 3 lines of n 348 | let lines = n.split(/\r?\n/); 349 | let last3 = lines.slice(Math.max(lines.length - 3, 0)).join('\n'); 350 | console.log(last3); 351 | 352 | let next_in_stop = `In [${counter + 1}]`; 353 | let next_out_stop = `Out[`; 354 | let error_in_stop = `Error [${counter + 1}]`; 355 | 356 | let r = await retryGenerateTextOpenAI( 357 | n, 358 | 10, 359 | 512, 360 | 0.0, 361 | 1, 362 | 'text-davinci-002', 363 | ['Question:', 'Out [', error_in_stop, next_in_stop] 364 | ); 365 | let origr = r; 366 | console.log(origr); 367 | 368 | if (origr.indexOf(next_out_stop) != -1) { 369 | // Delete this value and everything else after it 370 | r = r.substring(0, r.indexOf(next_out_stop)); 371 | origr = r; 372 | } 373 | 374 | if (origr.indexOf('Answer:') != -1) { 375 | console.log('Answer found'); 376 | try { 377 | console.log('Going to wait for the last value to be returned'); 378 | let lastret = await socket.writeAndRead('JSON.stringify(_)\n'); 379 | let answer = origr + '\n' + lastret; 380 | console.log(answer); 381 | 382 | console.log('Ask it to summarize the whole thing.'); 383 | let summary_prompt_append = 384 | "\n\nTake everything you've seen so far, and summarize it in a way that may be useful later:"; 385 | let r2 = await retryGenerateTextOpenAI( 386 | n + summary_prompt_append, 387 | 10, 388 | 512, 389 | 0.0, 390 | 1, 391 | 'text-davinci-002' 392 | ); 393 | console.log(r2); 394 | return answer + '\n\n' + r2; 395 | 396 | // console.log("Good bye!"); 397 | // return( answer ); 398 | } catch (e: any) { 399 | console.log( 400 | `Strange. We got an answer but we couldn't get the last return value: ${e}` 401 | ); 402 | return origr; 403 | } 404 | } 405 | 406 | let js = r.substring(r.indexOf(`In [${counter}`) + 8); 407 | // Trim off any leading newlines 408 | js = js.replace(/^\s+/, ''); 409 | // Trim off any trailing newlines 410 | js = js.replace(/\s+$/, ''); 411 | js = js.trim(); 412 | js = js + '\n'; 413 | // Remove all of the ...: 414 | js = js.replace(/\.\.\.\:\s+/g, ''); 415 | 416 | try { 417 | console.log(`Javascript: "${js}"`); 418 | let processedOut = await socket.writeAndRead(js); 419 | console.log(`Raw ret: <<<${processedOut}>>>\n\n=======\n`); 420 | processedOut = processedOut.substring(0, processedOut.indexOf('28dc9e0c')); 421 | if (processedOut.indexOf('Uncaught') != -1) { 422 | console.log(`Uncaught error found. Trying to figure out what it is.`); 423 | n += `In [${counter}] ${js}\n`; 424 | let errmsg = await socket.writeAndRead(`_error.message\n`); 425 | console.log(`** REAL Error message: ${errmsg}`); 426 | n += `Error [${counter}]: ${errmsg}\n`; 427 | return await executeJavascriptSnippetLoop( 428 | n, 429 | socket, 430 | counter + 1, 431 | original_question 432 | ); 433 | } 434 | 435 | // If output is longer than 80 characters, truncate, and add dots 436 | if (processedOut.length > 512) { 437 | console.log('Truncating.'); 438 | processedOut = 439 | processedOut.substring(0, 512) + 440 | `... Output truncated. Full output is in the 'return_${counter}' variable.`; 441 | console.log('Truncated and set lastreturn.'); 442 | } 443 | 444 | // Append the output to the notebook 445 | let varname = `return_${counter}`; 446 | console.log(`We are writing to variable ${varname}`); 447 | await socket.writeAndRead(`var ${varname} = _;\n`); 448 | n += origr; 449 | n += `Out [${counter}] ${varname} = ` + processedOut; 450 | n += '\n'; 451 | // n += "/* This is where we will"; 452 | if (counter % 3 == 0) { 453 | n += `/* Remembering our original goal: ${original_question} */\n`; 454 | } 455 | console.log(`Out [${counter}] ${varname} = ` + processedOut); 456 | 457 | if (js.startsWith('console.log')) { 458 | console.log('console.log detected. Not going to execute the next line.'); 459 | return processedOut; 460 | } 461 | 462 | return executeJavascriptSnippetLoop( 463 | n, 464 | socket, 465 | counter + 1, 466 | original_question 467 | ); 468 | } catch (e: any) { 469 | n += `Error [${counter}] ` + e; 470 | console.log(`Error: ${e} while trying to execute ${js}`); 471 | //return executeJavascriptSnippetLoop( n, apiKey, context, counter ++ ); 472 | return `Error: ${e} while trying to execute ${js}\n`; 473 | } 474 | } 475 | --------------------------------------------------------------------------------