├── src
    ├── .gitignore
    ├── examples
    │   └── repl-solver.ts
    ├── workflows
    │   ├── promptflows.ts
    │   ├── tokenizer.ts
    │   ├── util.ts
    │   ├── application
    │   │   ├── ijavascript.ts
    │   │   ├── call-transcription.ts
    │   │   └── codegrunt.ts
    │   ├── elastic.ts
    │   ├── embeddings.ts
    │   ├── mysql.ts
    │   ├── llm.ts
    │   ├── session.ts
    │   ├── sql2llm.ts
    │   ├── prompt.ts
    │   └── chatbot.ts
    ├── index.ts
    ├── codegrunt.ts
    ├── activities.ts
    ├── workflows.ts
    ├── config.ts
    ├── clones.ts
    ├── activities
    │   ├── mysql.ts
    │   ├── sql2llm.ts
    │   ├── embeddings.ts
    │   ├── tokenizer.ts
    │   ├── elastic.ts
    │   ├── vector_search.ts
    │   ├── application
    │   │   ├── codegrunt.ts
    │   │   └── ijavascript.ts
    │   ├── __tests__
    │   │   └── tokenizer.test.ts
    │   ├── util.ts
    │   └── llm.ts
    ├── cli
    │   ├── ijs.ts
    │   ├── cognosis.ts
    │   ├── recursively-summarise.ts
    │   ├── client.ts
    │   └── sql2llm.ts
    ├── worker.ts
    ├── cogs
    │   └── cog.ts
    └── prompts
    │   └── notebook-template.txt
├── tsconfig.jest.json
├── tsconfig.json
├── .prettierrc
├── services
    └── embeddings
    │   ├── download-init-embeddings.py
    │   ├── Dockerfile.x86
    │   ├── Dockerfile.apple
    │   └── embeddings-service.py
├── tsconfig.base.json
├── jest.config.js
├── Makefile
├── LICENSE
├── docker-compose.yml
├── .gitignore
├── README.md
└── package.json


/src/.gitignore:
--------------------------------------------------------------------------------
1 | local
2 | 


--------------------------------------------------------------------------------
/src/examples/repl-solver.ts:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/workflows/promptflows.ts:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/workflows/tokenizer.ts:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tsconfig.jest.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "./tsconfig.base.json"
3 | }
4 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | export * as activities from './activities';
2 | export * as workflows from './workflows';
3 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "./tsconfig.base.json",
3 |   "exclude": ["**/__tests__/**/*"]
4 | }
5 | 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |     "semi": true,
3 |     "trailingComma": "none",
4 |     "singleQuote": true,
5 |     "printWidth": 80
6 | }


--------------------------------------------------------------------------------
/src/codegrunt.ts:
--------------------------------------------------------------------------------
1 | import * as session_wfs from './workflows/session';
2 | 
3 | export interface CodegruntFrame extends session_wfs.Frame {
4 | };
5 | 


--------------------------------------------------------------------------------
/services/embeddings/download-init-embeddings.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import json
3 | from flask import request
4 | import tensorflow_hub as hub
5 | 
6 | # USEv5 is about 100x faster than 4
7 | embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")
8 | 


--------------------------------------------------------------------------------
/tsconfig.base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "@tsconfig/node14/tsconfig.json",
 3 |   "version": "4.4.2",
 4 |   "compilerOptions": {
 5 |     "declaration": true,
 6 |     "declarationMap": true,
 7 |     "esModuleInterop": true,
 8 |     "sourceMap": true,
 9 |     "rootDir": "./src",
10 |     "outDir": "./dist"
11 |   },
12 |   "include": ["src/**/*.ts"]
13 | }
14 | 


--------------------------------------------------------------------------------
/services/embeddings/Dockerfile.x86:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow AS build
 2 | EXPOSE 9100
 3 | 
 4 | WORKDIR /srv
 5 | COPY ./* /srv/
 6 | RUN pip install flask tensorflow_hub
 7 | RUN python download-init-embeddings.py
 8 | 
 9 | # Package
10 | # FROM tensorflow/tensorflow
11 | # RUN apt install nodejs npm 
12 | WORKDIR /srv
13 | CMD python embeddings-service.py
14 | 


--------------------------------------------------------------------------------
/services/embeddings/Dockerfile.apple:
--------------------------------------------------------------------------------
 1 | FROM armswdev/tensorflow-arm-neoverse AS build
 2 | EXPOSE 8080
 3 | 
 4 | WORKDIR /srv
 5 | COPY ./* /srv/
 6 | RUN pip install flask tensorflow_hub
 7 | RUN python download-init-embeddings.py
 8 | 
 9 | # Package
10 | # FROM tensorflow/tensorflow
11 | # RUN apt install nodejs npm 
12 | WORKDIR /srv
13 | CMD python embeddings-service.py
14 | 


--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */
 2 | module.exports = {
 3 |   preset: 'ts-jest',
 4 |   testEnvironment: 'node',
 5 |   testMatch: ['**/__tests__/**/*.test.ts'],
 6 |   transform: {
 7 |     '^.+\\.[tj]sx?$': [
 8 |       'ts-jest',
 9 |       {
10 |         tsconfig: `tsconfig.jest.json`
11 |       }
12 |     ]
13 |   }
14 | };
15 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | build-apple:
 2 | 	@make build-embeddings-apple
 3 | 
 4 | build-x86:
 5 | 	@make build-embeddings-x86
 6 | 
 7 | build-embeddings-apple:
 8 | 	@docker build -f ./services/embeddings/Dockerfile.apple -t cognosis-embeddings ./services/embeddings
 9 | 
10 | build-embeddings-x86:
11 | 	@docker build  -f ./services/embeddings/Dockerfile.x86 -t cognosis-embeddings ./services/embeddings
12 | 
13 | run:
14 | 	@docker-compose up
15 | 


--------------------------------------------------------------------------------
/src/activities.ts:
--------------------------------------------------------------------------------
 1 | export * from './activities/llm';
 2 | export * from './activities/tokenizer';
 3 | export * from './activities/elastic';
 4 | export * from './activities/mysql';
 5 | export * from './activities/embeddings';
 6 | export * from './activities/util';
 7 | export * from './activities/application/ijavascript';
 8 | export * from './activities/sql2llm';
 9 | export * from './activities/vector_search';
10 | export * from './activities/application/codegrunt';
11 | 


--------------------------------------------------------------------------------
/src/workflows.ts:
--------------------------------------------------------------------------------
 1 | export * from './workflows/llm';
 2 | export * from './workflows/elastic';
 3 | export * from './workflows/mysql';
 4 | export * from './workflows/prompt';
 5 | export * from './workflows/embeddings';
 6 | export * from './workflows/chatbot';
 7 | export * from './workflows/util';
 8 | export * from './workflows/application/ijavascript';
 9 | export * from './workflows/application/call-transcription';
10 | export * from './workflows/session';
11 | export * from './workflows/sql2llm';


--------------------------------------------------------------------------------
/src/config.ts:
--------------------------------------------------------------------------------
 1 | import process from 'process';
 2 | import * as dotenv from 'dotenv';
 3 | 
 4 | dotenv.config();
 5 | 
 6 | export const NLPCLOUD_TOKEN = process.env.NLPCLOUD_TOKEN ?? '';
 7 | export const OPENAI_TOKEN = process.env.OPENAI_TOKEN ?? '';
 8 | export const SERP_KEY = process.env.SERP_KEY ?? '';
 9 | export const TEMPORAL_HOST = process.env.TEMPORAL_HOST ?? '';
10 | export const EMBEDDINGS_URL = process.env.EMBEDDINGS_URL ?? '';
11 | export const ELASTIC_CONFIG = JSON.parse(process.env.ELASTIC_CONFIG ?? '{}');
12 | 


--------------------------------------------------------------------------------
/src/workflows/util.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import * as util from '../activities/util';
 3 | import { proxyActivities, uuid4 } from '@temporalio/workflow';
 4 | 
 5 | const { nlp_stable_diffusion, actionLogger, md5sum, wf_axios } =
 6 |   proxyActivities<typeof util>({ startToCloseTimeout: '10 minute' });
 7 | 
 8 | export async function stable_diffusion(prompt: string): Promise<string> {
 9 |   return await nlp_stable_diffusion(prompt);
10 | }
11 | 
12 | export async function logger(msg: string) {
13 |   await actionLogger(msg);
14 | }
15 | 


--------------------------------------------------------------------------------
/src/clones.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Clones.
 3 |  *
 4 |  * This is where it all really begins. A clone is basically an NPC. It has a certain class and configuration. A personality.
 5 |  * And information that persists across a session, or perhaps even sessions, if it is part of a larger workflow.
 6 |  */
 7 | 
 8 | export interface ChatMessage {
 9 |   timestamp: Date;
10 |   uuid: string;
11 |   text: string;
12 |   response: string;
13 |   context: string;
14 |   user: string;
15 |   calledByUuid?: string;
16 | }
17 | 
18 | export interface ChatSession {
19 |   personality: Personality;
20 |   user: string;
21 |   uuid: string;
22 |   messages: ChatMessage[];
23 | }
24 | 
25 | export interface Personality {
26 |   name: string;
27 |   personality: string;
28 |   instructions: string[];
29 | }
30 | 


--------------------------------------------------------------------------------
/src/activities/mysql.ts:
--------------------------------------------------------------------------------
 1 | import { default as mysql } from 'mysql2/promise';
 2 | 
 3 | /**
 4 |  * @param {string} dbhost
 5 |  * @param {string} dbuser
 6 |  * @param {string} dbpassword
 7 |  * @param {string} dbname
 8 |  * @param {string} sql
 9 |  * @param {Array<any>} parameters
10 |  * @returns {Promise<Array<any>>}
11 |  */
12 | export async function dbquery<T>(
13 |   dbhost: string,
14 |   dbuser: string,
15 |   dbpassword: string,
16 |   dbname: string,
17 |   sql: string,
18 |   parameters: Array<any>
19 | ): Promise<Array<any>> {
20 |   const connection = await mysql.createConnection({
21 |     host: dbhost,
22 |     user: dbuser,
23 |     password: dbpassword,
24 |     database: dbname
25 |   });
26 |   let rows: any, fields: any;
27 |   [rows, fields] = await connection.execute(sql, parameters);
28 |   connection.end();
29 |   return rows;
30 | }
31 | 


--------------------------------------------------------------------------------
/src/workflows/application/ijavascript.ts:
--------------------------------------------------------------------------------
 1 | import * as ijavascript from '../../activities/application/ijavascript';
 2 | import { proxyActivities, uuid4 } from '@temporalio/workflow';
 3 | 
 4 | const { executeJavascriptNotebook } = proxyActivities<typeof ijavascript>({
 5 |   startToCloseTimeout: '10 minute'
 6 | });
 7 | 
 8 | /**
 9 |  * GPT-3 can use a IPython/Jupyter notebook "memetic proxy" to follow instructions while writing code to solve a problem. This is a workflow that uses the memetic proxy to solve a problem, as described passed as a string.
10 |  * @param query Instructions to follow which GPT-3 will try to use a Javascript Notebook to compose a solution
11 |  * @example
12 |  * const result = await executeJavascriptNotebook('The number of legs a spider has multiplied by the estimated population in France');
13 |  * @returns 
14 |  */
15 | export async function IJavascript(query: string): Promise<string> {
16 |   let result = executeJavascriptNotebook(query);
17 |   return result;
18 | }
19 | 


--------------------------------------------------------------------------------
/services/embeddings/embeddings-service.py:
--------------------------------------------------------------------------------
 1 | import flask
 2 | import logging
 3 | import json
 4 | from flask import request
 5 | import tensorflow_hub as hub
 6 | 
 7 | 
 8 | log = logging.getLogger('werkzeug')
 9 | log.setLevel(logging.ERROR)
10 | app = flask.Flask('encoder')
11 | app.debug = True
12 | 
13 | @app.route('/', methods=['post'])
14 | def home():
15 |     payload = request.json  # payload should be like ["asdfasdf","asdfasdf"]
16 |     print(payload)
17 |     embeddings = embed(payload)
18 |     result = [{'vector': i.numpy().tolist(), 'string': j}
19 |               for i, j in zip(embeddings, payload)]
20 |     return flask.Response(json.dumps(result), mimetype='application/json')
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     #embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
25 |     # USEv5 is about 100x faster than 4
26 |     embed = hub.load(
27 |         "https://tfhub.dev/google/universal-sentence-encoder-large/5")
28 |     app.run(host='0.0.0.0', port=9100)
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2022, Cognosis AI, LLC
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to
 5 | deal in the Software without restriction, including without limitation the
 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 7 | sell copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 | IN THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.5"
 2 | services:
 3 |   embeddings:
 4 |     container_name: cognosis-embeddings
 5 |     image: cognosis-embeddings
 6 |     ports:
 7 |       - 9100:9100
 8 |   elasticsearch:
 9 |     container_name: cognosis-elasticsearch
10 |     environment:
11 |       - cluster.routing.allocation.disk.threshold_enabled=true
12 |       - cluster.routing.allocation.disk.watermark.low=512mb
13 |       - cluster.routing.allocation.disk.watermark.high=256mb
14 |       - cluster.routing.allocation.disk.watermark.flood_stage=128mb
15 |       - discovery.type=single-node
16 |       - ES_JAVA_OPTS=-Xms256m -Xmx256m
17 |       - xpack.security.enabled=false
18 |     image: elasticsearch:8.5.0
19 |     networks:
20 |       - temporal-network
21 |     ports:
22 |       - 9201:9201
23 |       - 9200:9200
24 |       - 9300:9300
25 |   temporal:
26 |     container_name: cognosis-temporal  
27 |     image: avalor/temporalite:0.2.0
28 |     networks:
29 |       - temporal-network
30 |     ports:
31 |       - 8233:8233
32 |       - 7233:7233
33 |     labels:
34 |       kompose.volume.type: configMap
35 |     volumes:
36 |       - ./dynamicconfig:/etc/temporal/config/dynamicconfig
37 |   
38 | networks:
39 |   temporal-network:
40 |     driver: bridge
41 |     name: temporal-network


--------------------------------------------------------------------------------
/src/workflows/elastic.ts:
--------------------------------------------------------------------------------
 1 | import { proxyActivities, uuid4 } from '@temporalio/workflow';
 2 | 
 3 | import * as elastic from '../activities/elastic';
 4 | import * as vector_search from '../activities/vector_search';
 5 | const { es_index, es_search } = proxyActivities<typeof elastic>({startToCloseTimeout: '10 minute'});
 6 | const { es_mappings } = proxyActivities<typeof vector_search>({startToCloseTimeout: '10 minute'});
 7 | 
 8 | /**
 9 |  * @function wf_esindex
10 |  * @param {string} pindex
11 |  * @param {any} pdocument
12 |  * @description A workflow that will index a document into Elasticsearch
13 |  */
14 | export async function wf_esindex(
15 |   pindex: string,
16 |   pdocument: any
17 | ): Promise<void> {
18 |   await es_index(pindex, pdocument);
19 | }
20 | 
21 | /**
22 |  * @function wf_essearch
23 |  * @param {string} index
24 |  * @param {any} query
25 |  * @description A workflow that will search Elasticsearch
26 |  */
27 | export async function wf_essearch(index: string, query: any): Promise<any> {
28 |   let r = await es_search(index, query);
29 |   return JSON.stringify(r);
30 | }
31 | 
32 | export async function esMappings(index: string, doc: any): Promise<void> {
33 |   console.log(`Creating mappings for index ${index}: ${JSON.stringify(doc)}`);
34 |   await es_mappings(index, doc, 512);
35 | }
36 | 


--------------------------------------------------------------------------------
/src/activities/sql2llm.ts:
--------------------------------------------------------------------------------
 1 | import * as wf from '../workflows';
 2 | import { Connection, WorkflowClient } from '@temporalio/client';
 3 | import { sendread,  } from '../workflows';
 4 | import * as config from '../config';
 5 | import { nanoid } from 'nanoid';
 6 | 
 7 | export async function sql2llm_session_multiplexer( message: wf.SQL2LLMInput ): Promise< wf.SQL2LLMOutput >
 8 | {
 9 |     const connection = await Connection.connect( {address: config.TEMPORAL_HOST} );
10 |     const client = new WorkflowClient({connection});
11 |     let wfid = `sqlllm_session_001-${Math.random()}`;
12 |     let handle = client.getHandle( wfid );
13 |     try
14 |     {
15 |         let d = await handle.describe(); // TODO: if it's dead, we want to start a new one anyway
16 |         console.log( `Workflow ${wfid} already exists: ${d.status.code} ${d.status.name}` );
17 |         if ( d.status.code != 1 )
18 |         {
19 |             throw new Error("Workflow is not running. Starting a new one.");
20 |         }
21 |     }
22 |     catch( e: any )
23 |     {
24 |         console.log(`Starting wfid ${wfid}`);
25 |         handle = await client.start(wf.SQL2LLM_session, {
26 |             args: [message.dbname, message.fields, message.context],
27 |             taskQueue: 'hello-world',
28 |             workflowId: wfid,
29 |             workflowRunTimeout: '10 minutes',
30 |         });
31 |     }
32 | 
33 |     let wait = await client.start( sendread, {args: [wfid, message], taskQueue: 'hello-world', workflowId: `${wfid}-${nanoid()}`, workflowRunTimeout: '1 minute'} );
34 |     let result = await wait.result();
35 |     return JSON.parse( result );
36 | }
37 | 


--------------------------------------------------------------------------------
/src/workflows/application/call-transcription.ts:
--------------------------------------------------------------------------------
 1 | import { promptTemplate } from '../prompt';
 2 | 
 3 | /**
 4 |  * Takes a transcription of a call and returns information about the call in JSON
 5 |  * @param transcript Call transcription from call
 6 |  * @example
 7 |  * const callInfo = JSON.stringify( await getCallInfo('Caller: Hello, there!') );
 8 |  * @returns JSON string with information about the call
 9 |  */
10 | export async function TranscriptToStructuredData(
11 |   transcript: string
12 | ): Promise<string> {
13 |   let result =
14 |     '{"resolved?": "' +
15 |     (await promptTemplate(
16 |       `You are a call center manager tasked with reading call transcripts to describe call intent, carefully tracking follow-up action items, whether the underlying issue was resolved (and not just whether action was taken), how important the issue was (issue priority), and customer satisfaction from a transcript.
17 |     These are calls for a company called The Corporation, Inc, and they will be related to doing business. Don't make anything up, just use the transcript to figure out what the user is talking about.
18 | 
19 |     {{{chunk}}}
20 | 
21 |     Define a JSON object with the following keys:
22 | 
23 |     [ "issue summary (string max 128 characters)", "resolved?  (closed, escalated or opened)", "summary of steps either taken or needed for resolution (string)", "call summary (string max 128 characters)", "[Speaker:0] Satisfaction (1-5)", "Keywords (array)" ]
24 | 
25 |     {"resolved?": "`,
26 |       { chunk: transcript },
27 |       20,
28 |       500,
29 |       1.0,
30 |       'gpt-3'
31 |     ));
32 | 
33 |   return result;
34 | }
35 | 


--------------------------------------------------------------------------------
/src/cli/ijs.ts:
--------------------------------------------------------------------------------
 1 | import { Connection, WorkflowClient } from '@temporalio/client';
 2 | import { IJavascript } from '../workflows';
 3 | import { nanoid } from 'nanoid';
 4 | import fs from 'fs';
 5 | import { TEMPORAL_HOST } from '../config';
 6 | 
 7 | async function run() {
 8 |   // Connect to the default Server location (localhost:7233)
 9 |   //const connection = await Connection.connect();
10 |   const connection = await Connection.connect({
11 |     address: TEMPORAL_HOST
12 |   });
13 | 
14 |   // In production, pass options to configure TLS and other settings:
15 |   // {
16 |   //   address: 'foo.bar.tmprl.cloud',
17 |   //   tls: {}
18 |   // }
19 | 
20 |   const client = new WorkflowClient({
21 |     connection
22 |     // namespace: 'foo.bar', // connects to 'default' namespace if not specified
23 |   });
24 | 
25 |   /* Take first process.argv argument and use it as the path to the embeddings file */
26 |   const handle = await client.start(IJavascript, {
27 |     // type inference works! args: [name: string]
28 |     args: [process.argv[2]],
29 |     taskQueue: 'hello-world',
30 |     // in practice, use a meaningful business id, eg customerId or transactionId
31 |     workflowId: 'workflow-' + nanoid()
32 |     // retry: {
33 |     //   initialInterval: 1,
34 |     //   backoffCoefficient: 2,
35 |     //   maximumAttempts: 1,
36 |     //   maximumInterval: 1,
37 |     //   nonRetryableErrorTypes: ['CustomError'],
38 |     // },
39 |   });
40 | 
41 |   let result = await handle.result();
42 |   // Only grab last line.
43 |   result = result.split('\n').slice(-1)[0];
44 |   console.log(result);
45 | }
46 | 
47 | run().catch((err) => {
48 |   console.error(err);
49 |   process.exit(1);
50 | });
51 | 


--------------------------------------------------------------------------------
/src/cli/cognosis.ts:
--------------------------------------------------------------------------------
 1 | import { Connection, WorkflowClient } from '@temporalio/client';
 2 | import { testSession, sendread } from '../workflows/session';
 3 | import { nanoid } from 'nanoid';
 4 | import fs from 'fs';
 5 | import { argv } from 'process';
 6 | import * as config from '../config';
 7 | 
 8 | async function run() {
 9 |   const connection = await Connection.connect( {address: config.TEMPORAL_HOST} );
10 |   const client = new WorkflowClient({
11 |     connection
12 |   });
13 | 
14 |   let wfid = 'workflow-chatbot-session-6';
15 |   let handle = client.getHandle( wfid );
16 |   try
17 |   {
18 |     let d = await handle.describe(); // TODO: if it's dead, we want to start a new one anyway
19 |     console.log( `Workflow ${wfid} already exists: ${d.status.code} ${d.status.name}` );
20 |     if ( d.status.code != 1 )
21 |     {
22 |       throw new Error("Workflow is not running. Starting a new one.");
23 |     }
24 |   }
25 |   catch( e: any )
26 |   {
27 |     console.log(`Starting wfid ${wfid}`);
28 |     handle = await client.start(testSession, {
29 |       // type inference works! args: [name: string]
30 |       args: [{ts: new Date(), text: "Hello, world!", logs: []}],
31 |       taskQueue: 'hello-world',
32 |       // in practice, use a meaningful business id, eg customerId or transactionId
33 |       workflowId: wfid,
34 |       workflowRunTimeout: '10 minutes',
35 |     });
36 |   }
37 | 
38 |   let wait = await client.start( sendread, {args: [wfid, {text: argv[2] ?? "Hey, how are you?", ts: new Date(), logs: []}],taskQueue: 'hello-world', workflowId: `${wfid}-${nanoid()}`} );
39 |   console.log( await wait.result() );
40 | }
41 | 
42 | run().catch((err) => {
43 |   console.error(err);
44 |   process.exit(1);
45 | });
46 | 


--------------------------------------------------------------------------------
/src/worker.ts:
--------------------------------------------------------------------------------
 1 | import { Worker, NativeConnection } from '@temporalio/worker';
 2 | import * as activities from './activities';
 3 | import express from 'express';
 4 | 
 5 | import { TEMPORAL_HOST } from './config';
 6 | // TODO: #2 #1 configuration system
 7 | async function run() {
 8 |   const app = express();
 9 | 
10 |   /*
11 |   const port = process.env.PORT || 3000;
12 |   
13 |   app.get("/", (req, res) => {
14 |     res.send("We're awake.");
15 |   });
16 |   app.get("/health", (req, res) => {
17 |     res.send("healthy");
18 |   });
19 |   app.listen(port, () => {
20 |     console.log(`App listening on port: ${port}`);
21 |   })
22 |   
23 |   // Sleep for a second
24 |   await new Promise((resolve) => setTimeout(resolve, 1000));
25 | */
26 |   
27 |   console.log( `Connecting to Temporal ${TEMPORAL_HOST}` );
28 | 
29 |   const connection = await NativeConnection.connect({
30 |     address: TEMPORAL_HOST // defaults port to 7233 if not specified
31 |   });
32 | 
33 |   const worker = await Worker.create({
34 |     connection: connection,
35 |     workflowsPath: require.resolve('./workflows'),
36 |     activities,
37 |     taskQueue: 'hello-world',
38 |     debugMode: true
39 |   });
40 |   // Worker connects to localhost by default and uses console.error for logging.
41 |   // Customize the Worker by passing more options to create():
42 |   // https://typescript.temporal.io/api/classes/worker.Worker
43 |   // If you need to configure server connection parameters, see docs:
44 |   // https://docs.temporal.io/typescript/security#encryption-in-transit-with-mtls
45 | 
46 |   // Step 2: Start accepting tasks on the `hello-world` queue
47 |   await worker.run();
48 | }
49 | 
50 | run().catch((err) => {
51 |   console.error(err);
52 |   process.exit(1);
53 | });
54 | 


--------------------------------------------------------------------------------
/src/workflows/application/codegrunt.ts:
--------------------------------------------------------------------------------
 1 | import * as wf from '@temporalio/workflow';
 2 | import { proxyActivities, uuid4 } from '@temporalio/workflow';
 3 | import * as activities from '../../activities';
 4 | import * as session_wfs from '../session';
 5 | import * as workflows from '../../workflows';
 6 | import * as codegrunt from '../../codegrunt';
 7 | 
 8 | const {extractFunctions} = proxyActivities<typeof activities>({startToCloseTimeout: '10 minute' });
 9 | // Permanent errors are errors that are not expected to be resolved by retrying the workflow.
10 | // For example, a permanent error could be thrown if the workflow is unable to connect to the
11 | // Temporal server or if the workflow is unable to parse the code.
12 | 
13 | export async function extractFunctions_wf( code: string, filename: string ): Promise< any[] > {
14 |     let functions = await extractFunctions( code, filename );
15 |     return functions.map( (f: any) => { return {name: f.name, signature: f.signature, leadingComment: f.leadingComment}; } );
16 | }
17 | 
18 | export async function codegrunt_session( first_message: session_wfs.Frame )
19 | {
20 |     let session = new session_wfs.HumanInTheLoopSession< codegrunt.CodegruntFrame >();
21 |     session.init();
22 | 
23 |     // Start the session
24 |     session.addMessage( {... first_message} );
25 |     session.log( "Session started" );
26 | 
27 |     while( true )
28 |     {
29 |         let input = await session.getInput( session );
30 | 
31 |         session.addMessage({text: input, ts: new Date(), logs: []});
32 |         session.log( "User input: " + input );
33 |         let response = await workflows.promptTemplate(
34 | `User: {{{input}}}
35 | Response:`, { input: input }, 10, 512 );
36 |         session.log( "Response: " + response );
37 |         session.send( response );
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/cli/recursively-summarise.ts:
--------------------------------------------------------------------------------
 1 | import { Connection, WorkflowClient } from '@temporalio/client';
 2 | import {
 3 |   mapPromptTemplate,
 4 |   mapreduce_summary,
 5 |   TranscriptToStructuredData
 6 | } from '../workflows';
 7 | import { nanoid } from 'nanoid';
 8 | import fs from 'fs';
 9 | 
10 | async function run() {
11 |   // Connect to the default Server location (localhost:7233)
12 |   const connection = await Connection.connect();
13 |   // In production, pass options to configure TLS and other settings:
14 |   // {
15 |   //   address: 'foo.bar.tmprl.cloud',
16 |   //   tls: {}
17 |   // }
18 | 
19 |   const client = new WorkflowClient({
20 |     connection
21 |     // namespace: 'foo.bar', // connects to 'default' namespace if not specified
22 |   });
23 | 
24 |   /* Take first process.argv argument and use it as the path to the embeddings file */
25 |   const path = process.argv[2];
26 |   /* Take second process.argv argument and use it as the index name */
27 |   const index = process.argv[3];
28 | 
29 |   console.log(`Indexing ${path} to ${index}`);
30 |   /* Open file, split into lines */
31 |   const lines = (await fs.promises.readFile(path)).toString();
32 | 
33 |   let prompt = `The following code is part of a TypeScript project that uses Temporal for workflow orchestration. It is the worker code.
34 | 
35 | {{{chunk}}}
36 | 
37 | Convert this code to Go:
38 | `;
39 |   const handle = await client.start(mapPromptTemplate, {
40 |     // type inference works! args: [name: string]
41 |     args: [lines, prompt],
42 |     taskQueue: 'hello-world',
43 |     // in practice, use a meaningful business id, eg customerId or transactionId
44 |     workflowId: 'workflow-' + nanoid()
45 |   });
46 |   let result = await handle.result();
47 |   console.log(result);
48 |   // Save results to a file
49 |   await fs.promises.writeFile(`${path}-out`, result);
50 | }
51 | 
52 | run().catch((err) => {
53 |   console.error(err);
54 |   process.exit(1);
55 | });
56 | 


--------------------------------------------------------------------------------
/src/cli/client.ts:
--------------------------------------------------------------------------------
 1 | import { Connection, WorkflowClient } from '@temporalio/client';
 2 | import { esMappings, storeEmbeddings } from '../workflows';
 3 | import { nanoid } from 'nanoid';
 4 | import fs from 'fs';
 5 | import * as config from '../config';
 6 | 
 7 |   
 8 | async function run() {
 9 |   // Connect to the default Server location (localhost:7233)
10 |   const connection = await Connection.connect( {address: config.TEMPORAL_HOST} );
11 |   // In production, pass options to configure TLS and other settings:
12 |   // {
13 |   //   address: 'foo.bar.tmprl.cloud',
14 |   //   tls: {}
15 |   // }
16 | 
17 |   const client = new WorkflowClient({
18 |     connection
19 |     // namespace: 'foo.bar', // connects to 'default' namespace if not specified
20 |   });
21 | 
22 |   /* Take first process.argv argument and use it as the path to the embeddings file */
23 |   const path = process.argv[2];
24 |   /* Take second process.argv argument and use it as the index name */
25 |   const index = process.argv[3];
26 | 
27 |   console.log(`Indexing ${path} to ${index}`);
28 |   /* Open file, split into lines */
29 |   const lines = (await fs.promises.readFile(path)).toString().split('\n');
30 | 
31 |   let results = new Array<Promise<any>>();
32 | 
33 |   let all_lines: string[] = [];
34 |   let all_docs: any[] = [];
35 |   // For each line, generate a workflow to store the embedding
36 |   for (let i = 0; i < lines.length; i++) {
37 |     all_lines.push(lines[i]);
38 |     all_docs.push({ path: path, line: i + 1, text: lines[i] });
39 |   }
40 | 
41 |   const handle = await client.start(storeEmbeddings, {
42 |     // type inference works! args: [name: string]
43 |     args: [all_lines, index, all_docs],
44 |     taskQueue: 'hello-world',
45 |     // in practice, use a meaningful business id, eg customerId or transactionId
46 |     workflowId: 'workflow-' + nanoid()
47 |   });
48 | 
49 |   results.push(handle.result());
50 |   await Promise.all(results);
51 |   console.log(`Indexed ${lines.length} lines`);
52 | }
53 | 
54 | run().catch((err) => {
55 |   console.error(err);
56 |   process.exit(1);
57 | });
58 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Logs
  2 | local
  3 | log
  4 | logs
  5 | *.log
  6 | npm-debug.log*
  7 | yarn-debug.log*
  8 | yarn-error.log*
  9 | lerna-debug.log*
 10 | 
 11 | # Diagnostic reports (https://nodejs.org/api/report.html)
 12 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 13 | 
 14 | # Runtime data
 15 | pids
 16 | *.pid
 17 | *.seed
 18 | *.pid.lock
 19 | 
 20 | # Directory for instrumented libs generated by jscoverage/JSCover
 21 | lib-cov
 22 | 
 23 | # Coverage directory used by tools like istanbul
 24 | coverage
 25 | *.lcov
 26 | 
 27 | # nyc test coverage
 28 | .nyc_output
 29 | 
 30 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 31 | .grunt
 32 | 
 33 | # Bower dependency directory (https://bower.io/)
 34 | bower_components
 35 | 
 36 | # node-waf configuration
 37 | .lock-wscript
 38 | 
 39 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 40 | build/Release
 41 | 
 42 | # Dependency directories
 43 | node_modules/
 44 | jspm_packages/
 45 | 
 46 | # TypeScript v1 declaration files
 47 | typings/
 48 | 
 49 | # TypeScript cache
 50 | *.tsbuildinfo
 51 | 
 52 | # Optional npm cache directory
 53 | .npm
 54 | 
 55 | # Optional eslint cache
 56 | .eslintcache
 57 | 
 58 | # Microbundle cache
 59 | .rpt2_cache/
 60 | .rts2_cache_cjs/
 61 | .rts2_cache_es/
 62 | .rts2_cache_umd/
 63 | 
 64 | # Optional REPL history
 65 | .node_repl_history
 66 | 
 67 | # Output of 'npm pack'
 68 | *.tgz
 69 | 
 70 | # Yarn Integrity file
 71 | .yarn-integrity
 72 | 
 73 | # dotenv environment variables file
 74 | .env
 75 | .env.test
 76 | 
 77 | # parcel-bundler cache (https://parceljs.org/)
 78 | .cache
 79 | 
 80 | # Next.js build output
 81 | .next
 82 | 
 83 | # Nuxt.js build / generate output
 84 | .nuxt
 85 | dist
 86 | 
 87 | # Gatsby files
 88 | .cache/
 89 | # Comment in the public line in if your project uses Gatsby and *not* Next.js
 90 | # https://nextjs.org/blog/next-9-1#public-directory-support
 91 | # public
 92 | 
 93 | # vuepress build output
 94 | .vuepress/dist
 95 | 
 96 | # Serverless directories
 97 | .serverless/
 98 | 
 99 | # FuseBox cache
100 | .fusebox/
101 | 
102 | # DynamoDB Local files
103 | .dynamodb/
104 | 
105 | # TernJS port file
106 | .tern-port
107 | 
108 | docs/
109 | 


--------------------------------------------------------------------------------
/src/cogs/cog.ts:
--------------------------------------------------------------------------------
 1 | import { ChatSession } from '../clones';
 2 | 
 3 | /* 
 4 | Example:
 5 | "text": "nilp, do an nmap on 38.101.40.11 | !kali_sh:{\"command\": \"nmap 38.101.40.11\"}",
 6 | "user": "U03UF3XNM8D",
 7 | "prompt": "nilp, do an nmap on 38.101.40.11",
 8 | "completion": "!kali_sh:{\"command\": \"nmap 38.101.40.11\"}\n2e72b3",
 9 | "prompt_leading": "!kali_sh:{\"command\": \"nmap 38.101.40.11\"}"
10 | */
11 | export interface Teaching {
12 |   text: string;
13 |   user?: string;
14 |   prompt: string;
15 |   completion: string;
16 |   prompt_leading: string;
17 | }
18 | 
19 | export type CogHandler = (
20 |   session: ChatSession,
21 |   args: any,
22 |   message_sans_args: string
23 | ) => Promise<string>;
24 | 
25 | export class Cog {
26 |   protected _id: string;
27 |   protected teachings: Teaching[];
28 |   protected coghandler: CogHandler;
29 | 
30 |   public constructor(
31 |     id: string,
32 |     teachings: Teaching[],
33 |     coghandler: CogHandler
34 |   ) {
35 |     this._id = id;
36 |     this.teachings = teachings;
37 |     this.coghandler = coghandler;
38 |   }
39 | 
40 |   public getTeachings(): Teaching[] {
41 |     return this.teachings;
42 |   }
43 | 
44 |   get id(): string {
45 |     return this._id;
46 |   }
47 | 
48 |   public async handle(
49 |     session: ChatSession,
50 |     args: any,
51 |     message_sans_args: string
52 |   ): Promise<string> {
53 |     return await this.coghandler(session, args, message_sans_args);
54 |   }
55 | }
56 | 
57 | export class Cogset {
58 |   protected cogs: Cog[];
59 |   protected cogmap: Map<string, Cog>;
60 | 
61 |   public constructor(cogs: Cog[]) {
62 |     this.cogs = cogs;
63 |     this.cogmap = new Map<string, Cog>();
64 |     for (let cog of cogs) {
65 |       this.cogmap.set(cog.id, cog);
66 |     }
67 |   }
68 | 
69 |   public getCogs(): Cog[] {
70 |     return this.cogs;
71 |   }
72 | 
73 |   public getCog(id: string): Cog | undefined {
74 |     return this.cogmap.get(id);
75 |   }
76 | 
77 |   public addCog(cog: Cog): void {
78 |     this.cogs.push(cog);
79 |     this.cogmap.set(cog.id, cog);
80 |   }
81 | 
82 |   public removeCog(cog: Cog): void {
83 |     this.cogs = this.cogs.filter((c) => c.id !== cog.id);
84 |     this.cogmap.delete(cog.id);
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/workflows/embeddings.ts:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import * as elastic from '../activities/elastic';
 3 | import * as pembeddings from '../activities/embeddings';
 4 | import * as tokenizer from '../activities/tokenizer';
 5 | import * as util from '../activities/util';
 6 | import { proxyActivities, uuid4 } from '@temporalio/workflow';
 7 | 
 8 | const {
 9 |   embeddingsSearch,
10 |   nlp_embeddings
11 | } = proxyActivities<typeof pembeddings>({ startToCloseTimeout: '10 minute' });
12 | const { nlpcloud_tokenize, tokenize_native } = proxyActivities<
13 |   typeof tokenizer
14 | >({ startToCloseTimeout: '10 minute' });
15 | const { es_index, es_drop } = proxyActivities<typeof elastic>({
16 |   startToCloseTimeout: '10 minute'
17 | });
18 | const { nlp_stable_diffusion } = proxyActivities<typeof util>({
19 |   startToCloseTimeout: '10 minute'
20 | });
21 | 
22 | export async function embeddings(
23 |   sentences: string[]
24 | ): Promise<[string, number[]][]> {
25 |   return nlp_embeddings('paraphrase-multilingual-mpnet-base-v2', sentences);
26 | }
27 | 
28 | export async function storeEmbeddings(
29 |   sentences: string[],
30 |   index: string,
31 |   documents: any[],
32 |   alsoTokenize: boolean = false
33 | ): Promise<string> {
34 |   console.log(`Storing ${sentences.length} sentences.`);
35 |   if (documents.length != sentences.length) {
36 |     throw Error(
37 |       'Error storing embeddings: sentences and documents are of different lengths'
38 |     );
39 |   }
40 | 
41 |   let e = await embeddings(sentences);
42 | 
43 |   for (let x = 0; x < e.length; x++) {
44 |     let doc = documents[x];
45 |     console.log(doc);
46 |     doc['embeddings'] = e[x][1];
47 | 
48 |     if (alsoTokenize == true) {
49 |       doc['tokens'] = await tokenize_native(sentences[x]);
50 |     }
51 | 
52 |     console.log(doc);
53 |     let doRefresh = false;
54 |     if (x == e.length - 1) doRefresh = true;
55 |     await es_index(index, doc, doRefresh);
56 |   }
57 | 
58 |   return `Status: OK. Stored ${sentences.length} embeddings.`;
59 | }
60 | 
61 | export async function embeddingsFromTextSearch< T >(
62 |   index: string,
63 |   text: string,
64 |   k: number
65 | ): Promise< {_index: string; _id: string; _score: number; _source: T} []> {
66 |   let v = await embeddings([text]);
67 |   let vector = v[0][1];
68 |   let results = await embeddingsSearch(index, vector, k);
69 |   return( results );
70 | }
71 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Cognosis AI Platform
 2 | 
 3 | ## Summary
 4 | 
 5 | The Cognosis AI Platform contains an application server and all of the
 6 | infrastructure you need to build Large Language Model applications with,
 7 | batteries included!
 8 | 
 9 | ##
10 | 
11 | [Join our Slack!](https://slack.com/get-started?sig=enQtNDczNzkwODAxODM4NC1iMzAzNTA4MmE1Zjk1NDQyMTk5ZmZhMjFiMzMyMzJjNGMxZGUyMDBjYjA4YjcxYjJjOTRmNjRlNmY4MzU2OTU0&invite_id=I04MPSQ0JBA&redir=%2Fshare%2FenQtNDczNzkwODAxODM4NC1iMzAzNTA4MmE1Zjk1NDQyMTk5ZmZhMjFiMzMyMzJjNGMxZGUyMDBjYjA4YjcxYjJjOTRmNjRlNmY4MzU2OTU0#/sharedinvite)
12 | 
13 | ## Quickstart
14 | 
15 | Requirements:
16 | 
17 |   * Docker
18 |   * Node 19
19 | 
20 | ### Step 1: Get Infrastructure Running
21 | 
22 | ```
23 | # Clone the repo and cd into it
24 | git clone git@github.com:cognosisai/platform.git
25 | cd platform
26 |   
27 | # On Apple M1/M2 chips:
28 | make build-apple
29 | # On x86:
30 | make build-x86
31 |   
32 | # Spin it all up! This runs docker-compose up, and will get you
33 | # Elasticsearch 8.5, cognosis-embeddings service, and temporalite, which
34 | # is a single Docker container version of Temporal meant for development
35 | make run
36 | ```
37 | 
38 | ### Step 2: Configure 
39 | Edit .env, and populate it with the following information:
40 | 
41 | ```
42 | TEMPORAL_HOST="localhost:7233"
43 | ELASTIC_CONFIG={"auth":{"username":"elastic","password":"changeme"},"node":"http://localhost:9200"}
44 | OPENAI_TOKEN="<OpenAI Token>"
45 | NLPCLOUD_TOKEN="<NLP Cloud Token>"
46 | EMBEDDINGS_URL="http://localhost:9100"
47 | ```
48 | 
49 | ### Step 3: Install NPM modules
50 | 
51 | ```
52 | npm install
53 | ```
54 | 
55 | ## Step 4: Start Temporal Worker
56 | 
57 | ```
58 | ts-node src/worker.ts
59 | ```
60 | 
61 | This will take a few seconds. It will be ready when you see this:
62 | 
63 | ```
64 | 2022-11-05T23:51:17.087Z [INFO] webpack 5.74.0 compiled successfully in 717 ms
65 | 2022-11-05T23:51:17.093Z [INFO] Workflow bundle created { size: '0.91MB' }
66 | 2022-11-05T23:51:17.141Z [INFO] Worker state changed { state: 'RUNNING' }
67 | ```
68 | 
69 | ## Step 5: AI!
70 | 
71 | ```
72 | ts-node src/cli/cognosis.ts "Give me a really terrible idea involving an \
73 | icepick, and bottle of elmer's wood glue"
74 | Starting wfid workflow-fC-ONstofj4T4G9C_JQv5
75 |   
76 | Use the icepick to make a hole in the bottle of glue, then drink it.
77 | ```
78 | 
79 | ### Application Server Components
80 | 
81 | #### Elastic Search
82 | #### Temporal
83 | #### Embeddings (Tensorflow - Google USE5)
84 | #### Cognosis AI SDK
85 | 
86 | ### Cognosis AI SDK
87 | 
88 | Cognosis AI Platform includes 
89 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@cognosis/platform",
 3 |   "repository": {
 4 |     "type": "git",
 5 |     "url": "git+https://github.com/cognosisai/platform.git"
 6 |   },
 7 |   "version": "0.6.1",
 8 |   "private": false,
 9 |   "scripts": {
10 |     "build": "tsc --build",
11 |     "build.watch": "tsc --build --watch",
12 |     "prettier-format": "prettier --config .prettierrc 'src/**/*.ts' --write",
13 |     "lint": "eslint .",
14 |     "start": "ts-node src/worker.ts",
15 |     "start.watch": "nodemon src/worker.ts",
16 |     "workflow": "ts-node src/client.ts",
17 |     "test": "jest"
18 |   },
19 |   "nodemonConfig": {
20 |     "execMap": {
21 |       "ts": "ts-node"
22 |     },
23 |     "ext": "ts",
24 |     "watch": [
25 |       "src"
26 |     ]
27 |   },
28 |   "dependencies": {
29 |     "@babel/core": "^7.19.6",
30 |     "@babel/parser": "^7.19.6",
31 |     "@babel/standalone": "^7.19.6",
32 |     "@elastic/elasticsearch": "^8.2.1",
33 |     "@ngrok/ngrok-api": "^0.9.0",
34 |     "@slack/bolt": "^3.12.1",
35 |     "@temporalio/activity": "^1.0.0",
36 |     "@temporalio/client": "^1.0.0",
37 |     "@temporalio/worker": "^1.0.0",
38 |     "@temporalio/workflow": "^1.0.0",
39 |     "@tensorflow-models/universal-sentence-encoder": "^1.3.3",
40 |     "@tensorflow/tfjs": "^3.19.0",
41 |     "@types/mustache": "^4.2.1",
42 |     "@types/slack-node": "^0.1.4",
43 |     "axios": "^0.27.2",
44 |     "cli-table": "^0.3.11",
45 |     "cli-table3": "^0.6.3",
46 |     "csv-parse": "^5.3.3",
47 |     "dotenv": "^16.0.3",
48 |     "express": "^4.18.1",
49 |     "google-auth-library": "^8.5.2",
50 |     "gpt-3-encoder": "^1.1.4",
51 |     "handlebars": "^4.7.7",
52 |     "json2csv": "^5.0.7",
53 |     "json5": "^2.2.1",
54 |     "mustache": "^4.2.0",
55 |     "mysql2": "^2.3.3",
56 |     "mysql2-promise": "^0.1.4",
57 |     "nanoid": "3.x",
58 |     "nargs": "^0.0.2",
59 |     "natural": "^5.2.3",
60 |     "notion-client": "^6.13.4",
61 |     "openai": "^3.0.0",
62 |     "semaphore-async-await": "^1.5.1"
63 |   },
64 |   "devDependencies": {
65 |     "@babel/generator": "^7.19.6",
66 |     "@tsconfig/node14": "^1.0.0",
67 |     "@types/babel__core": "^7.1.19",
68 |     "@types/cheerio": "^0.22.31",
69 |     "@types/crawler": "^1.2.2",
70 |     "@types/jest": "^29.4.0",
71 |     "@types/node": "^16.11.43",
72 |     "@typescript-eslint/eslint-plugin": "^5.0.0",
73 |     "@typescript-eslint/parser": "^5.0.0",
74 |     "eslint": "^7.32.0",
75 |     "eslint-config-prettier": "^8.3.0",
76 |     "eslint-plugin-deprecation": "^1.2.1",
77 |     "nodemon": "^2.0.12",
78 |     "prettier": "^2.7.1",
79 |     "ts-jest": "^29.0.5",
80 |     "ts-node": "^10.8.1",
81 |     "typescript": "^4.4.2"
82 |   },
83 |   "main": "dist/index.js",
84 |   "types": "dist/index.d.ts",
85 |   "files": [
86 |     "dist"
87 |   ]
88 | }
89 | 


--------------------------------------------------------------------------------
/src/cli/sql2llm.ts:
--------------------------------------------------------------------------------
  1 | import { Connection, WorkflowClient } from '@temporalio/client';
  2 | import { SQL2LLM } from '../workflows';
  3 | import { nanoid } from 'nanoid';
  4 | import fs from 'fs';
  5 | import { argv } from 'process';
  6 | import * as config from '../config';
  7 | import cli_table from 'cli-table3';
  8 | 
  9 | const yargs = require('yargs');
 10 | 
 11 | const options = yargs
 12 |     .usage('Usage: sql2llm -d <database name> [-n] -q <query> [ -f <file>] [-j] [-c] [-t <timeout>]')
 13 |     .option('d', {
 14 |         alias: 'database-name',
 15 |         description: 'Name of the database to query',
 16 |         type: 'string',
 17 |     })
 18 |     .option('n', {
 19 |         alias: 'natural-language-request',
 20 |         description: 'Option to process <query> as natural language into a nSQL Natural Language Query',
 21 |         type: 'boolean'
 22 |     })
 23 |     .option('q', {
 24 |         alias: 'query',
 25 |         description: 'SQL query to run',
 26 |         type: 'string',
 27 |         demandOption: true
 28 |     })
 29 |     .option('f', {
 30 |         alias: 'file',
 31 |         description: 'Option to include <file> as data to query in context',
 32 |         type: 'string'
 33 |     })
 34 |     .option('j', {
 35 |         alias: 'json',
 36 |         description: 'Return results as JSON instead of displaying with cli-table3',
 37 |         type: 'boolean'
 38 |     })
 39 |     .option('c', {
 40 |         alias: 'csv',
 41 |         description: 'Return results as CSV instead of displaying with cli-table3',
 42 |         type: 'boolean'
 43 |     })
 44 |     .option('t', {
 45 |         alias: 'timeout',
 46 |         description: 'Timeout for workflow',
 47 |         type: 'string'
 48 |     })
 49 |     .argv;
 50 | 
 51 | async function run() {
 52 |   const connection = await Connection.connect( {address: config.TEMPORAL_HOST} );
 53 |   const client = new WorkflowClient({
 54 |     connection
 55 |   });
 56 | 
 57 |   if ( true ) {
 58 |     // load file from disk in argv0
 59 |     let text = null;
 60 |     if ( options.file )
 61 |     {
 62 |       if (options.file === '-') {
 63 |         text = fs.readFileSync(0).toString();
 64 |       } else {
 65 |         const file = fs.readFileSync( options.file );
 66 |         text = file.toString();
 67 |       }
 68 |     }
 69 | 
 70 |     let handle = await client.start(SQL2LLM, {
 71 |       // type inference works! args: [name: string]
 72 |       args: [ options.databaseName, options.query, text, options['natural-language-request'] ],
 73 |       taskQueue: 'hello-world',
 74 |       // in practice, use a meaningful business id, eg customerId or transactionId
 75 |       workflowId: nanoid(),
 76 |       workflowRunTimeout: options.timeout || '30 seconds',
 77 |     });
 78 | 
 79 |     let output = await handle.result();
 80 |     let result = output.result;
 81 |     if ( result.length == 0 )
 82 |     {
 83 |       console.log( "No results." );
 84 |       process.exit( 0 );
 85 |     }
 86 | 
 87 |     if (options.json) {
 88 |       console.log(JSON.stringify(result, null, 2));
 89 |     } else if (options.csv) {
 90 |       const json2csv = require('json2csv');
 91 | 
 92 |       const fields = Object.keys( result[0] );
 93 |       const opts = { fields };
 94 |       try {
 95 |         const csv = json2csv.parse(result, opts);
 96 |         console.log(csv);
 97 |       } catch (err) {
 98 |         console.error(err);
 99 |       }
100 |     } else {
101 |       let keys = Object.keys( result[0] );
102 |       var table = new cli_table( {head: keys} );
103 | 
104 |       result.forEach( (v, i, a) => {
105 |         table.push( <any> Object.values(v) );
106 |       });
107 | 
108 |       if ( output.nSQL_query )
109 |         console.log( `Query: ${output.nSQL_query}` );
110 |       console.log( table.toString() );
111 |       console.log( `${result.length} rows with ${keys.length} columns returned.` );
112 |     }
113 |   }
114 | 
115 | 
116 | }
117 | 
118 | run().catch((err) => {
119 |   console.error(err);
120 |   process.exit(1);
121 | });


--------------------------------------------------------------------------------
/src/workflows/mysql.ts:
--------------------------------------------------------------------------------
  1 | import * as mysql from '../activities/mysql';
  2 | import * as prompt from './prompt';
  3 | 
  4 | import { proxyActivities, uuid4 } from '@temporalio/workflow';
  5 | 
  6 | const { dbquery } = proxyActivities<typeof mysql>({
  7 |   startToCloseTimeout: '10 minute'
  8 | });
  9 | 
 10 | const {
 11 |   promptReducer,
 12 |   promptTemplate,
 13 |   splitPromptTemplateByLinesOfTokens,
 14 |   splitPromptTemplateByTokens
 15 | } = proxyActivities<typeof prompt>({ startToCloseTimeout: '10 minute' });
 16 | 
 17 | /**
 18 |  * @function mysqlQuery
 19 |  * @param {string} dbhost
 20 |  * @param {string} dbuser
 21 |  * @param {string} dbpassword
 22 |  * @param {string} dbname
 23 |  * @param {string} sql
 24 |  * @param {Array<any>} parameters
 25 |  * @description A workflow that simply calls an activity
 26 |  */
 27 | export async function mysqlQuery<T>(
 28 |   dbhost: string,
 29 |   dbuser: string,
 30 |   dbpassword: string,
 31 |   dbname: string,
 32 |   sql: string,
 33 |   parameters: Array<any>
 34 | ): Promise<Array<T>> {
 35 |   return await dbquery(dbhost, dbuser, dbpassword, dbname, sql, parameters);
 36 | }
 37 | 
 38 | /**
 39 |  * @function xNLPonDB
 40 |  * @param {string} query - The natural language query to parse.
 41 |  * @returns {Promise<any>} - The results of the SQL query.
 42 |  * @description Takes a natural language query and translates it into SQL.
 43 |  */
 44 | export async function xNLPonDB(
 45 |   host: string,
 46 |   username: string,
 47 |   password: string,
 48 |   dbname: string,
 49 |   query: string
 50 | ): Promise<any> {
 51 |   /* We start out by getting the table schema. */
 52 |   /* TODO: This should be cached. */
 53 |   interface tableschema {
 54 |     TABLE_NAME: string;
 55 |     COLUMN_NAME: string;
 56 |   }
 57 | 
 58 |   let results: tableschema[] = await mysqlQuery(
 59 |     host,
 60 |     username,
 61 |     password,
 62 |     dbname,
 63 |     'SELECT TABLE_NAME, COLUMN_NAME FROM information_schema.columns WHERE TABLE_SCHEMA = ?',
 64 |     [dbname]
 65 |   );
 66 |   let tables: Map<string, boolean> = new Map<string, boolean>();
 67 | 
 68 |   /* Next, we come up with a list of tables, and we ask it to identify the tables that it needs the schema for this query. */
 69 |   results.map((table_column) => {
 70 |     tables.set(table_column.TABLE_NAME, true);
 71 |   });
 72 | 
 73 |   let tablenames = Array.from(tables.keys());
 74 |   let tablesColumns = new Map<string, Array<string>>();
 75 | 
 76 |   /* Now, we get the schema for each table. */
 77 |   for (let table of tablenames) {
 78 |     tablesColumns.set(table, new Array<string>());
 79 |   }
 80 | 
 81 |   results.map((table_column) => {
 82 |     tablesColumns.get(table_column.TABLE_NAME)!.push(table_column.COLUMN_NAME);
 83 |   });
 84 | 
 85 |   let prompt = `SHOW TABLES;
 86 |  {{#tablenames}}
 87 |  {{{.}}}
 88 |  {{/tablenames}}
 89 |  
 90 |  For the natural language query "{{{query}}}", generate a comma-separated list of tables should we search, only from the above tables:`;
 91 |   let r = await promptTemplate(
 92 |     prompt,
 93 |     { tablenames: tablenames, query: query },
 94 |     10,
 95 |     50,
 96 |     1.0
 97 |   );
 98 |   let relevant_tables = r.split(/\s?,/);
 99 |   prompt = 'Here are the tables you have available:';
100 |   for (let table of relevant_tables) {
101 |     table = table.trim();
102 |     table = table.replace(/^\n+/, '').replace(/\n+$/, '').toLowerCase();
103 |     if (tablesColumns.get(table)) {
104 |       prompt += `CREATE TABLE ${table} (`;
105 |       prompt += tablesColumns.get(table)!.join(',');
106 |       prompt += ');\n';
107 |     }
108 |   }
109 |   prompt +=
110 |     '\n/* Using only the above tables, a single SQL query for "{{{query}}}": */ SELECT';
111 |   r =
112 |     'SELECT ' + (await promptTemplate(prompt, { query: query }, 10, 300, 1.0));
113 | 
114 |   let result2 = await mysqlQuery(host, username, password, dbname, r, []);
115 |   return r + '\n\n' + JSON.stringify(result2);
116 | }
117 | 
118 | export async function NLPonDB(query: string): Promise<any> {
119 |   try {
120 |     return await xNLPonDB(query, 'xx', 'yy', 'zz', '11');
121 |   } catch (e: any) {
122 |     return e.toString();
123 |   }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/activities/embeddings.ts:
--------------------------------------------------------------------------------
  1 | import axios from 'axios';
  2 | import { Connection, WorkflowClient } from '@temporalio/client';
  3 | import { nanoid } from 'nanoid';
  4 | import { spawn } from 'child_process';
  5 | import fs from 'fs';
  6 | import {
  7 |   es_drop,
  8 | } from './elastic';
  9 | 
 10 | import { EMBEDDINGS_URL } from '../config';
 11 | import { embeddings_search } from './vector_search';
 12 | 
 13 | /**
 14 |  * @function nlp_embeddings_internal Internal function that calls the embeddings service 
 15 |  * @param {string} modelName
 16 |  * @param {string} token
 17 |  * @param {string[]} texts
 18 |  */
 19 | export async function nlp_embeddings_internal(
 20 |   modelName: string,
 21 |   token: string,
 22 |   texts: string[]
 23 | ): Promise<Map<string, number[]>> {
 24 |   try {
 25 |     const response = await axios.post(EMBEDDINGS_URL, JSON.stringify(texts), {
 26 |       headers: {
 27 |         'Content-Type': 'application/json'
 28 |       }
 29 |     });
 30 | 
 31 |     let r = new Map<string, number[]>();
 32 | 
 33 |     let obj = response.data;
 34 |     if (obj.length != texts.length) {
 35 |       throw new Error(
 36 |         `Embeddings generation error: texts were ${texts.length} long, while there were ${obj.length} embeddings returned`
 37 |       );
 38 |     }
 39 | 
 40 |     for (let x = 0; x < obj.length; x++) {
 41 |       r.set(texts[x], obj[x]['vector']);
 42 |     }
 43 | 
 44 |     return convertVectorMapToObject(r);
 45 |   } catch (e: any) {
 46 |     console.error(e.toString());
 47 |     throw new Error(`Embeddings generation error: ${e.toString()}`);
 48 |   }
 49 | }
 50 | 
 51 | // This might be the stupidest function I have ever written in my life. For this, I am deeply ashamed.
 52 | /**
 53 |  * @function convertVectorMapToObject Converts a Map<string, number[]> to an object
 54 |  * @param map Map<string, number[]> to convert
 55 |  * @returns {object} Object with keys as strings and values as arrays of numbers
 56 |  */
 57 | export const convertVectorMapToObject = (map: Map<string, number[]>): any => {
 58 |   let json: any = {};
 59 |   map.forEach((value: number[], index: string) => {
 60 |     json[index] = value;
 61 |   });
 62 |   return json;
 63 | };
 64 | 
 65 | /**
 66 |  * @function nlp_embeddings Generates embeddings for a list of texts
 67 |  * @param {string} modelName Name of the model to use
 68 |  * @param {string[]} texts List of texts to generate embeddings for
 69 |  * @example <caption> Generates embeddings for a list of texts</caption>
 70 |  * await nlp_embeddings('bert-base-uncased', ['hello world', 'goodbye world']);
 71 |  * @returns {Promise<Map<string, number[]>>}
 72 |  * @description Generates embeddings for a list of texts running as a Google Cloud Platform service in Vertex AI.
 73 |  */
 74 | export async function nlp_embeddings(
 75 |   modelName: string,
 76 |   texts: string[]
 77 | ): Promise<[string, number[]][]> {
 78 |   let token;
 79 | 
 80 |   let retval: [string, number[]][] = [];
 81 |   try {
 82 |     const response = await axios.post(
 83 |       EMBEDDINGS_URL,
 84 |       JSON.stringify( texts ),
 85 |       {
 86 |         headers: {
 87 |           'Content-Type': 'application/json'
 88 |         }
 89 |       }
 90 |     );
 91 | 
 92 |     let r = new Map<string, number[]>();
 93 |     let obj = response.data;
 94 |     if (obj.length != texts.length) {
 95 |       throw new Error(
 96 |         `Embeddings generation error: texts were ${texts.length} long, while there were ${obj.length} embeddings returned`
 97 |       );
 98 |     }
 99 | 
100 |     for (let x = 0; x < obj.length; x++) {
101 |       retval.push([texts[x], obj[x]["vector"]]);
102 |     }
103 | 
104 |     return retval;
105 |   } catch (e: any) {
106 |     console.error(e.toString());
107 |     throw new Error(
108 |       `Nu Embeddings generation error: ${e.toString()}\n${e.message}`
109 |     );
110 |   }
111 | }
112 | 
113 | export async function embeddingsSearch(
114 |   index: string,
115 |   vector: number[],
116 |   k: number
117 | ): Promise<any[]> {
118 |   try {
119 |     return embeddings_search(index, vector, k);
120 |   } catch (e: any) {
121 |     return [{ error: e.message }];
122 |   }
123 | }
124 | 
125 | export async function embeddingsDrop(index: string): Promise<void> {
126 |   await es_drop(index);
127 | }
128 | 


--------------------------------------------------------------------------------
/src/activities/tokenizer.ts:
--------------------------------------------------------------------------------
  1 | import axios from 'axios';
  2 | 
  3 | export interface NLPCloudToken {
  4 |   text: string;
  5 |   lemma: string;
  6 |   start: number;
  7 |   end: number;
  8 |   ws_after: boolean;
  9 | }
 10 | 
 11 | async function nlpcloud_tokenize_x(
 12 |   text: string,
 13 |   token: string
 14 | ): Promise<NLPCloudToken[]> {
 15 |   const response = await axios.post(
 16 |     `https://api.nlpcloud.io/v1/en_core_web_lg/tokens`,
 17 |     {
 18 |       text: text
 19 |     },
 20 |     {
 21 |       headers: {
 22 |         Authorization: `Token ${token}`,
 23 |         'Content-Type': 'application/json'
 24 |       }
 25 |     }
 26 |   );
 27 |   return response.data.tokens;
 28 | }
 29 | 
 30 | export async function nlpcloud_tokenize(
 31 |   text: string,
 32 |   token: string
 33 | ): Promise<NLPCloudToken[]> {
 34 |   try {
 35 |     let x = await nlpcloud_tokenize_x(text, token);
 36 |     return x;
 37 |   } catch (e: unknown) {
 38 |     /*
 39 |         response: {
 40 |       status: 413,
 41 |       statusText: 'Payload Too Large',
 42 |       headers: [Object],
 43 |       config: [Object],
 44 |       request: [ClientRequest],
 45 |       data: 'Request Entity Too Large'
 46 |     }
 47 | 
 48 |     In this case, we want to split the problem up unto halves, and retry them all, and assemble the results. */
 49 |     if (e instanceof Error && (e as any).response?.status === 413) {
 50 |       let half = Math.floor(text.length / 2);
 51 |       console.log('Cut in half.');
 52 |       let left = await nlpcloud_tokenize(text.substr(0, half), token);
 53 |       let right = await nlpcloud_tokenize(text.substr(half + 1), token);
 54 | 
 55 |       return [...left, ...right];
 56 |     }
 57 | 
 58 |     throw e; // rethrow the error for now... we'll see how this goes! :)
 59 |   }
 60 | }
 61 | 
 62 | /**
 63 |  * Convert string to list of tokens. This is used by the other LLM activities, largely by the
 64 |  * data/prompt split/map/reduce activities. You can use it directly if you want, but it's probably
 65 |  * best to use higher-level activities and/or workflows.
 66 |  *
 67 |  * @param text Text to use as prompt (input)
 68 |  * @returns Array of tokens
 69 |  */
 70 | export async function tokenize_native(text: string): Promise<string[]> {
 71 |   var natural = require('natural');
 72 |   let tokenizer = new natural.TreebankWordTokenizer();
 73 |   return tokenizer.tokenize(text);
 74 | }
 75 | 
 76 | export async function gpt3_tokenize(text: string): Promise<number[]> {
 77 |   let tokenizer = require('gpt-3-encoder');
 78 |   let encoded_tokens: number[] = tokenizer.encode(text);
 79 |   return encoded_tokens;
 80 | }
 81 | 
 82 | export async function gpt3_detokenize(tokens: number[]): Promise<string> {
 83 |   let tokenizer = require('gpt-3-encoder');
 84 |   let text = tokenizer.decode(tokens);
 85 |   return text;
 86 | }
 87 | 
 88 | /**
 89 |  * Split text into chunks of the given token size.
 90 |  * Adjacent chunks will overlap by chunk_overlap tokens, which can naively help avoid splitting
 91 |  * in bad places.
 92 |  * 
 93 |  * @param text string to split into chunks
 94 |  * @param chunk_size number of tokens per chunk (last chunk may be smaller)
 95 |  * @param chunk_overlap number of tokens to overlap adjacent chunks. defaults to 0.
 96 |  */
 97 | export async function split_text_by_tokens(text: string, chunk_size: number, chunk_overlap: number = 0): Promise<string[]> {
 98 |   if (chunk_size < 0) {
 99 |     throw new Error("chunk_size must be non-negative");
100 |   }
101 |   if (chunk_overlap < 0) {
102 |     throw new Error("chunk_overlap must be non-negative");
103 |   }
104 |   if (chunk_overlap >= chunk_size) {
105 |     throw new Error("chunk_overlap must be less than chunk_size");
106 |   }
107 | 
108 |   let chunks: string[] = [];
109 |   let text_tokens: number[] = await gpt3_tokenize( text );
110 |   console.log(`Tokenized ${text.length} characters into ${text_tokens.length} tokens.`);
111 | 
112 |   // window slides by chunk_size - chunk_overlap tokens each iteration.
113 |   // we stop sliding when a chunk includes the last token
114 |   let tok_len = text_tokens.length;
115 |   for ( let idx = 0;
116 |         idx < tok_len && idx + chunk_overlap < tok_len; // rhs of && ensures last token only included once
117 |         idx += chunk_size - chunk_overlap )
118 |   {
119 |       let context_tokens_slice: number[] = text_tokens.slice(idx, idx + chunk_size);
120 |       let context_slice = await gpt3_detokenize( context_tokens_slice );
121 |       chunks.push( context_slice );
122 |   }
123 |   return chunks;
124 | }
125 | 
126 | export async function sentence_tokenizer( text: String ): Promise< string[] > {
127 |   throw new Error("Not implemented" );
128 | }
129 | 


--------------------------------------------------------------------------------
/src/activities/elastic.ts:
--------------------------------------------------------------------------------
  1 | import axios from 'axios';
  2 | import { Client } from '@elastic/elasticsearch';
  3 | import { esMappings } from '../workflows/elastic';
  4 | 
  5 | import { ELASTIC_CONFIG } from '../config';
  6 | 
  7 | /**
  8 |  * @function getElasticSearchClient
  9 |  * @param {any} elasticconfig
 10 |  * @example 
 11 |  * import { ELASTIC_CONFIG } from '../config';
 12 |  * const client = getElasticSearchClient(ELASTIC_CONFIG);
 13 |  * @returns {Promise<Client>} ElasticSearch client
 14 |  * @description Returns a promise that resolves to an ElasticSearch client
 15 |  */
 16 | export async function getElasticSearchClient(): Promise<Client> {
 17 |   const client = new Client(ELASTIC_CONFIG);
 18 |   return client;
 19 | }
 20 | 
 21 | /**
 22 |  * @function es_index
 23 |  * @param {string} indexname Elasticsearch index name the document will be added to
 24 |  * @param {any} doc Document to be added to the index
 25 |  * @param {boolean} refresh Refresh the index after adding the document. This can slow down indexing, but is useful if you need it to be searchable immediately.
 26 |  * @example <caption> Adds a document to an elasticsearch index </caption>
 27 |  * await es_index('test', { 'test': 'test' }, true);
 28 |  * @returns {Promise<void>}
 29 |  * @description Indexes a document in ElasticSearch
 30 |  */
 31 | // TODO: shut down the client on error
 32 | export async function es_index(
 33 |   indexname: string,
 34 |   doc: any,
 35 |   refresh: boolean = true
 36 | ): Promise<void> {
 37 |   let client = await getElasticSearchClient();
 38 |   try
 39 |   {
 40 |     await client.index({ index: indexname, document: doc });
 41 |     if (refresh) await client.indices.refresh({ index: indexname });
 42 |     client.close();
 43 |   }
 44 |   catch( e: any )
 45 |   {
 46 |     console.error(e);
 47 |     client.close();
 48 |     throw( e );
 49 |   }
 50 | 
 51 |   return;
 52 | }
 53 | 
 54 | /**
 55 |  * @function es_search
 56 |  * @param {string} indexname Elasticsearch index the document will be added to
 57 |  * @param {any} queryobj Query object to be used to search the index
 58 |  * @example <caption> Searches an Elasticsearch index using ELastic Query DSL</caption>
 59 |  * await es_search('test', { 'query': { 'match_all': {} } });
 60 |  * @returns {Promise<any>} Promise that resolves to the results of the search
 61 |  * @description Searches an index in ElasticSearch
 62 |  */
 63 | export async function es_search<T>(
 64 |   indexname: string,
 65 |   queryobj: any
 66 | ): Promise<any> {
 67 |   let client = await getElasticSearchClient();
 68 |   const result = await client.search({ index: indexname, query: queryobj });
 69 |   client.close();
 70 |   return result.hits.hits;
 71 | }
 72 | 
 73 | 
 74 | /**
 75 |  * @function es_query Executes an SQL query against Elasticsearch
 76 |  * @param {string} query SQL query to be executed
 77 |  * @example <caption> Executes an SQL query against Elasticsearch</caption>
 78 |  * await es_query('SELECT * FROM test');
 79 |  * @returns {Promise<any>} Promise that resolves to the results of the query
 80 |  */export async function es_query<T>(query: string, params?: any): Promise<any> {
 81 |   let client = await getElasticSearchClient();
 82 |   const result = await client.sql.query({
 83 |     query: query,
 84 |     params: params,
 85 |   });
 86 |   client.close();
 87 |   const data = result.rows.map((row) => {
 88 |     const obj: any = {};
 89 |     for (let i = 0; i < row.length; i++) {
 90 |       obj[result.columns![i].name] = row[i];
 91 |     }
 92 |     return obj;
 93 |   });
 94 | 
 95 |   return data;
 96 | }
 97 | 
 98 | 
 99 | /**
100 |  * @function es_drop Deletes an Elasticsearch index
101 |  * @param index Elasticsearch index to be deleted
102 |  * @example <caption> Deletes an Elasticsearch index</caption>
103 |  * await es_delete('test');
104 |  * @returns {Promise<void>} Promise that resolves when the index is deleted
105 |  */
106 | export async function es_drop(index: string): Promise<void> {
107 |   let client = await getElasticSearchClient();
108 |   try {
109 |     await client.indices.delete({ index: index });
110 |   } catch (e: any) {
111 |     console.error(e);
112 |   }
113 | }
114 | 
115 | /**
116 |  * @function es_delete Deletes a document from an Elasticsearch index
117 |  * @param index Elasticsearch index the document will be deleted from
118 |  * @param id ID of the document to be deleted
119 |  * @example <caption> Deletes a document from an Elasticsearch index</caption>
120 |  * await es_delete('test', '1');
121 |  * @returns {Promise<void>} Promise that resolves when the document is deleted
122 |  */
123 | export async function es_delete(index: string, id: string): Promise<void> {
124 |   let client = await getElasticSearchClient();
125 |   try {
126 |     await client.delete({ index: index, id: id });
127 |   } catch (e: any) {
128 |     console.error(e);
129 |   }
130 | }
131 | 
132 | 


--------------------------------------------------------------------------------
/src/prompts/notebook-template.txt:
--------------------------------------------------------------------------------
 1 | Min length: 10 Max lenght: 512 Temperature: 0 Top_p: 1 Model: code-davinci-002 Stop token: Question:,Out[,Out [
 2 | 
 3 | You are GPT-3. Today is {{{todaysdate}}}. Answer the following questions.
 4 | 
 5 | If you can answer directly, use this format:
 6 | 
 7 | Question: ${Question}
 8 | Answer: ${Answer}
 9 | 
10 | You are unable to directly answer any question that requires:
11 |   - Analyzing text as a sequence of characters (e.g., counting length, reversing strings)
12 |   - Counting of more than several items (e.g., words in a sentence or items in a list)
13 |   - Arithmetic that a human could not perform easily in their head
14 | 
15 | In these cases, consult IJavascript. Use this format:
16 | 
17 | Question: ${Question}
18 | IJavascript session:
19 | ```
20 | ${IJavascript commands and output needed to find answer}
21 | ```
22 | Answer: ${Answer}
23 | 
24 | Your training data was last updated April 2021, and you do not know any later events. To find information & perform tasks, there are many packages available, as well -- use the x.searchLoadedPackages() function call to display them, and then x.inspectLoadedPackage('packagename') to get a list of methods available. Use anything you need, but favor the first packages.
25 | 
26 | Whenever possible, use the output of the previous statements, which are assigned to variables that are automatically created with the prefix return_ on the completion of every statement.
27 | 
28 | Begin.
29 | 
30 | Question: When was George Washington born?
31 | Answer: February 22, 1732
32 | 
33 | Question: How many legs does an antelope have?
34 | Answer: Four
35 | 
36 | Question: What is 2 + 19?
37 | Answer: 21
38 | 
39 | Question: What is 7 + 19^3?
40 | IJavascript session:
41 | ```
42 | In [1]: 7 + 19**3
43 | Out[1]: 6866
44 | console.log( "Done." );
45 | ```
46 | Answer: 6866
47 | 
48 | Question: What is the number of legs that a spider has raised to the fifth power?
49 | IJavascript session:
50 | ```
51 | In [1]: 8**5
52 | Out[1]: 32768
53 | console.log( "Done." );
54 | ```
55 | Answer: 32768
56 | 
57 | Question:Search for waifudiffusion, fetch the first result, question whether it can be used to generate anime images of hot women, and print the result
58 | IJavascript session:
59 | ```
60 | In [1]: x.searchLoadedPackages()
61 | Out [1] [["x.http","perform HTTP requests"],["x.google","Google APIs"],["x.nlp","Natural Language Processing"]]
62 | In [2]: x.inspectLoadedPackage('x.google')
63 | Out [2] return_2 = ["async function search(query:string): Promise< {rank: number, url: string, title: string, description: string}[] >"]
64 | In [3]: x.inspectLoadedPackage('x.http')
65 | Out [3] return_3 = ["async function x.http.get(url: string): Promise< {status: number, statusText: string, content: any} >","async function x.http.post(url: string, data: any): Promise< string >","async function x.http.put(url: string, data: any): Promise< string >","async function x.http.delete(url: string): Promise< string >"]
66 | In [4]: x.inspectLoadedPackage('x.nlp')
67 | Out [4] return_4 = ["async function x.nlp.summarize(text: string): Promise< string >","async function x.nlp.question_answer(text: string, question: string): Promise< string >","async function x.ai_instruction_on_text( text: string, instructions: string ): Promise< string >"]
68 | In [5]: await x.google.search('waifudiffusion')
69 | Out [5] return_5 = [{"rank":1,"url":"https://huggingface.co/hakurei/waifu-diffusion","title":"hakurei/waifu-diffusion - Hugging FaceWaifu Diffusion Demo - a Hugging Face Space by hakurei","description":"waifu-diffusion is a latent text-to-image diffusion model that has been conditioned on high-quality anime images through fine-tuning. Original Weights. Gradio & ...waifu-diffusion-demo. Copied. like 109. Running onA10G. App Files Files and versions Community. 24. Linked models. Loading... view api."},{"rank":2,"url":"https://r... Output truncated. Full output is in the 'return_5' variable.
70 | In [6]: await x.http.get(return_5[0].url)
71 | Out [6] return_6 = {"status":200,"statusText":"OK","content":"[Hugging Face's logo] Hugging Face\n\n-   Models\n-   Datasets\n-   Spaces\n-   Docs\n-   Solutions\n\n-   Pricing\n-   \n\n-   \n\n    ------------------------------------------------------------------------\n\n-   Log In\n-   Sign Up\n\n\n\n[] hakurei\n\n/\n\nwaifu-diffusion\n\nCopied\n\nlike\n\n817\n\nText-to-Image\n\nDiffusers\n\nEnglish stable-diffusion\n\nLicense: creativeml-openrail-m\n\nModel card\n\nFiles Files and versions\n\nCommunity\n\n33\n\nUse in Dif... Output truncated. Full output is in the 'return_6' variable.
72 | In [7]: await x.nlp.question_answer(return_6.content, 'can it generate anime images of hot women?')
73 | Out [7] return_7 = "Lines 0 to 203\n\n\nYes, the waifu-diffusion model can generate anime images of hot women."
74 | In [8]: console.log(return_7)
75 | ```
76 | Answer: Yes, the waifu-diffusion model can generate anime images of hot women.
77 | 
78 | Question: 


--------------------------------------------------------------------------------
/src/workflows/llm.ts:
--------------------------------------------------------------------------------
  1 | import { proxyActivities, uuid4 } from '@temporalio/workflow';
  2 | 
  3 | import * as llm from '../activities/llm';
  4 | import * as tokenizer from '../activities/tokenizer';
  5 | 
  6 | const { generateText, generateTextOpenAI, retryGenerateTextOpenAI } =
  7 |   proxyActivities<typeof llm>({ startToCloseTimeout: '10 minute' });
  8 | const { nlpcloud_tokenize, tokenize_native } = proxyActivities<
  9 |   typeof tokenizer
 10 | >({ startToCloseTimeout: '10 minute' });
 11 | 
 12 | export type llm_models = 'gpt-3' | 'gpt-neox-20b' | 'text-curie-001' | 'finetuned-gpt-neox-20b';
 13 | 
 14 | 
 15 | /**
 16 |  * @function nlpcloud_generate
 17 |  * @param {string} prompt
 18 |  * @param {number} minLength
 19 |  * @param {number} maxLength
 20 |  * @param {boolean | null} lengthNoInput
 21 |  * @param {string | null} endSequence
 22 |  * @param {boolean} removeInput
 23 |  * @param {boolean | null} doSample
 24 |  * @param {number | null} numBeams
 25 |  * @param {boolean | null} earlyStopping
 26 |  * @param {number | null} noRepeatNgramSize
 27 |  * @param {number | null} numReturnSequences
 28 |  * @param {number | null} topK
 29 |  * @param {number | null} topP
 30 |  * @param {number | null} temperature
 31 |  * @param {number | null} repetitionPenalty
 32 |  * @param {number | null} lengthPenalty
 33 |  * @param {boolean | null} badWords
 34 |  * @param {boolean | null} removeEndSequence
 35 |  * @description A workflow that will generate text using the NLP Cloud API
 36 |  */
 37 | export async function nlpcloud_generate(
 38 |   prompt: string,
 39 |   minLength: number = 10,
 40 |   maxLength: number = 20,
 41 |   lengthNoInput: boolean | null = null,
 42 |   endSequence: string | null = null,
 43 |   removeInput: boolean = true,
 44 |   doSample: boolean | null,
 45 |   numBeams: number | null,
 46 |   earlyStopping: boolean | null,
 47 |   noRepeatNgramSize: number | null,
 48 |   numReturnSequences: number | null,
 49 |   topK: number | null,
 50 |   topP: number | null,
 51 |   temperature: number | null,
 52 |   repetitionPenalty: number | null,
 53 |   lengthPenalty: number | null,
 54 |   badWords: boolean | null,
 55 |   removeEndSequence: boolean | null
 56 | ): Promise<string> {
 57 |   // TODO: #3 Where do we want to store stuff like API tokens?
 58 |   return await generateText(
 59 |     'finetuned-gpt-neox-20b',
 60 |     prompt,
 61 |     minLength,
 62 |     maxLength,
 63 |     lengthNoInput,
 64 |     endSequence,
 65 |     removeInput,
 66 |     doSample,
 67 |     numBeams,
 68 |     earlyStopping,
 69 |     noRepeatNgramSize,
 70 |     numReturnSequences,
 71 |     topK,
 72 |     topP,
 73 |     temperature,
 74 |     repetitionPenalty,
 75 |     lengthPenalty,
 76 |     badWords,
 77 |     removeEndSequence
 78 |   );
 79 | }
 80 | 
 81 | 
 82 | /**
 83 |  * @function openai_generate
 84 |  * @param {string} prompt
 85 |  * @param {string} apikey
 86 |  * @param {number} min_length
 87 |  * @param {number} max_length
 88 |  * @param {number} temperature
 89 |  * @param {number} top_p
 90 |  * @description A workflow that will generate text using the OpenAI API
 91 |  */
 92 | export async function openai_generate(
 93 |   prompt: string,
 94 |   min_length: number,
 95 |   max_length: number,
 96 |   temperature: number,
 97 |   top_p: number,
 98 |   endSequence: string | null = null,
 99 | ): Promise<string> {
100 |   return await generateTextOpenAI(
101 |     prompt,
102 |     min_length,
103 |     max_length,
104 |     temperature,
105 |     top_p, 
106 |     "text-davinci-003",
107 |     endSequence
108 |   );
109 | }
110 | 
111 | /**
112 |  * @function minGenerate
113 |  * @param {string} prompt
114 |  * @param {number} minLength
115 |  * @param {number} maxLength
116 |  * @param {number} temperature
117 |  * @param {string | null} endSequence
118 |  * @param {"gpt-3" | "gpt-neox-20b"} model
119 |  * @description A workflow that will generate text using sensible defaults to a sensible default LLM
120 |  */
121 | export async function minGenerate(
122 |   prompt: string,
123 |   minLength: number,
124 |   maxLength: number,
125 |   temperature: number,
126 |   endSequence: string | null = null,
127 |   model: llm_models = 'gpt-3'
128 | ): Promise<string> {
129 |   console.log('In:\n' + prompt);
130 |   if (model == 'gpt-neox-20b' || model == 'finetuned-gpt-neox-20b') {
131 |     let completion = await nlpcloud_generate(
132 |       prompt,
133 |       minLength,
134 |       maxLength,
135 |       true,
136 |       endSequence,
137 |       undefined,
138 |       null,
139 |       null,
140 |       null,
141 |       null,
142 |       null,
143 |       null,
144 |       1.0,
145 |       temperature,
146 |       null,
147 |       null,
148 |       null,
149 |       true
150 |     );
151 |     console.log('Out:\n' + completion);
152 |     return completion;
153 |   } else if (model == 'gpt-3' || model == 'text-curie-001') {
154 |     let completion = await openai_generate(
155 |       prompt,
156 |       minLength,
157 |       maxLength,
158 |       temperature,
159 |       0.9,
160 |       endSequence
161 |     );
162 |     return completion;
163 |   }
164 | 
165 |   throw new Error(`No model ${model} found`);
166 | }
167 | 


--------------------------------------------------------------------------------
/src/activities/vector_search.ts:
--------------------------------------------------------------------------------
  1 | import { getElasticSearchClient, es_query, es_drop,  } from './elastic';
  2 | import { esMappings } from '../workflows/elastic';
  3 | 
  4 | export async function embeddings_search(
  5 |   indexname: string,
  6 |   vector: number[],
  7 |   k: number
  8 | ): Promise<any> {
  9 |   console.log(`Searching for ${vector.length} in ${indexname} returning ${k}`);
 10 |   let client = await getElasticSearchClient();
 11 |   try {
 12 |     const result = await client.search({
 13 |       index: indexname,
 14 |       body: {
 15 |         query: {
 16 |           script_score: {
 17 |             query: { match_all: {} },
 18 |             script: {
 19 |               source: "cosineSimilarity(params.queryVector, 'embeddings') + 1.0",
 20 |               params: { queryVector: vector }
 21 |             }
 22 |           }
 23 |         },
 24 |         size: k
 25 |       }
 26 |     });
 27 |     return result.hits.hits;
 28 |   } catch (e: any) {
 29 |     console.error(e.meta.body.error);
 30 |     console.error('========================== Sahr');
 31 |     console.error(e.meta.body.error.failed_shards[0].reason);
 32 |     throw e;
 33 |   }
 34 | }
 35 | /**
 36 |  * @function es_context For a given Elasticsearch line-vectorized index, filename and line number, provides the four lines of textual context
 37 |  * @param {string} indexname Elasticsearch index the document will be added to
 38 |  * @param {string} path Path to the file
 39 |  * @param {number} line Line number
 40 |  * @example <caption> Provides lines 8 to 12 from test.txt in the test line-vectorized Elasticsearch index </caption>
 41 |  * await es_context('test', 'test.txt', 10);
 42 |  * @returns {Promise<any>} Promise that resolves to the results of the search
 43 |  * @description Searches an index in ElasticSearch
 44 |  */export async function es_context(
 45 |   indexname: string,
 46 |   path: string,
 47 |   line: number
 48 | ): Promise<any> {
 49 |   let client = await getElasticSearchClient();
 50 |   try {
 51 |     let sql = `SELECT text, line, path FROM ${indexname} WHERE path = '${path}' AND line >= ${line} - 2 AND line <= ${line} + 2 ORDER BY line ASC`;
 52 |     console.log(sql);
 53 |     let results = es_query(sql);
 54 |     client.close();
 55 |     return results;
 56 |   } catch (e: any) {
 57 |     console.log(e);
 58 |     console.log(e.meta.body.error);
 59 |     throw e;
 60 |   }
 61 | }
 62 | /**
 63 |  * @function es_mappings Creates an Elasticsearch index with a dense vector index at 'embeddings'
 64 |  * @param {string} index Elasticsearch index the document will be added to
 65 |  * @param {any} doc Elasticsearch mapping object
 66 |  * @param {number} dims Number of dimensions for the dense vector
 67 |  * @example <caption> Creates an Elasticsearch index with a dense vector</caption>
 68 |  * await es_mappings('test', { 'message': { 'type': 'text' } }, 512);
 69 |  * // Creates the following index:
 70 |  * // {
 71 |  * //   "mappings": {
 72 |  * //     "properties": {
 73 |  * //       "message": {
 74 |  * //         "type": "text"
 75 |  * //       },
 76 |  * //       "embeddings": {
 77 |  * //         "type": "dense_vector",
 78 |  * //         "dims": 512
 79 |  * //       }
 80 |  * //     }
 81 |  * //   }
 82 |  * // }
 83 |  * @returns {Promise<void>} Promise that resolves when the index is created
 84 |  */
 85 |  export async function es_mappings(
 86 |   index: string,
 87 |   doc: any,
 88 |   dims: number
 89 | ): Promise<void> {
 90 |   let client = await getElasticSearchClient();
 91 |   /*
 92 |     {"client_msg_id":"6dfbdabd-19f3-43b9-ad0a-bff20ddccae5","type":"message","text":"emmap messages","user":"U03UF3XNM8D","ts":"1662237512.586659","team":"T03U3FML84F","blocks":[{"type":"rich_text","block_id":"s+aM","elements":[{"type":"rich_text_section","elements":[{"type":"text","text":"emmap messages"}]}]}],"channel":"C0409CD6VR7","event_ts":"1662237512.586659","channel_type":"channel"}
 93 |     */
 94 |   let mappings = doc;
 95 |   mappings['embeddings'] = { type: 'dense_vector', dims: dims };
 96 |   try {
 97 |     await client.indices.create({
 98 |       index: index,
 99 |       mappings: { properties: doc }
100 |     });
101 |   } catch (e: any) {
102 |     if (e.meta.body.error.type != 'resource_already_exists_exception') {
103 |       throw e;
104 |     }
105 |   }
106 | 
107 |   client.close();
108 |   return;
109 | }
110 | 
111 | 
112 | /**
113 |  * @function init_elasticsearch_mappings Initializes the Elasticsearch mappings for the emmap database
114 |  * @example <caption> Initializes the Elasticsearch mappings for the emmap database</caption>
115 |  * await init_elasticsearch_mappings();
116 |  * @returns {Promise<void>} Promise that resolves when the mappings are initialized
117 |  */
118 |  export async function init_elasticsearch_mappings(): Promise<string> {
119 |   try {
120 |     await es_drop('messages');
121 |   } catch (e: any) {}
122 |   await esMappings('messages', {
123 |     text: { type: 'text' },
124 |     user: { type: 'keyword' },
125 |     ts: { type: 'keyword' },
126 |     channel: { type: 'keyword' },
127 |     reactions: { type: 'keyword' }
128 |   });
129 | 
130 |   try {
131 |     await es_drop('teachings');
132 |   } catch (e: any) {}
133 |   await esMappings('teachings', {
134 |     text: { type: 'text' },
135 |     user: { type: 'keyword' },
136 |     prompt: { type: 'keyword' },
137 |     completion: { type: 'keyword' },
138 |     prompt_leading: { type: 'keyword' },
139 |     md5sum: { type: 'keyword' }
140 |   });
141 | 
142 |   try {
143 |     await es_drop('transcripts');
144 |   } catch (e: any) {}
145 |   await esMappings('transcripts', {
146 |     filename: { type: 'keyword' },
147 |     transcript: { type: 'text' },
148 |     data: { type: 'keyword' }
149 |   });
150 | 
151 |   try {
152 |     await es_drop('spider');
153 |   } catch (e: any) {}
154 |   await esMappings('spider', {
155 |     url: { type: 'keyword' },
156 |     depth: { type: 'integer' },
157 |     md5: { type: 'keyword' },
158 |     text: { type: 'text' }
159 |   });
160 | 
161 |   return 'Done init_elasticsearch_mappings().';
162 | }
163 | 


--------------------------------------------------------------------------------
/src/activities/application/codegrunt.ts:
--------------------------------------------------------------------------------
  1 | import * as activities from '../../activities';
  2 | 
  3 | /* 
  4 |  * Import babel so we can parse typescript or javascript files
  5 |  */
  6 | import * as babel from '@babel/core';
  7 | import * as fs from 'fs';
  8 | 
  9 | export async function parse( code: string, filename: string ): Promise< babel.ParseResult | null >{
 10 |     let lines = code.split( /[\r\n]+/ );
 11 | 
 12 |     let result = await babel.parseAsync( code, {
 13 |         filename: filename,
 14 |         presets: [ '@babel/preset-typescript' ],
 15 |         plugins: [ '@babel/plugin-proposal-class-properties' ]
 16 |     } );
 17 | 
 18 |     if ( result == null )
 19 |     {
 20 |         console.log(`Failed to parse ${filename}`);
 21 |         return result;
 22 |     }
 23 |     return result;
 24 | 
 25 |     result?.program.body.forEach( async (node) => {
 26 |         console.log( `${node.type}` );
 27 |         //console.log( node );
 28 | 
 29 |         if ( node.type === 'FunctionDeclaration' )
 30 |         {
 31 |             console.log( `${JSON.stringify(node.leadingComments)}` );
 32 |             console.log( `${node.id?.name}` );
 33 |             node.loc?.start.line;
 34 |             node.loc?.end.line;
 35 |             console.log( lines.slice( node.loc!.start.line - 1, node.loc?.end.line ).join( '\n' ) );
 36 | 
 37 |             console.log( node );
 38 |         }
 39 |     });
 40 | }
 41 | 
 42 | export type CodeFragmentType = 'unknown' | 'function';
 43 | 
 44 | export interface CodeFragment
 45 | {
 46 |     type: CodeFragmentType;
 47 |     start: {line: number; column: number};
 48 |     end:  {line: number; column: number};
 49 |     code: string;
 50 |     leadingComment: string;
 51 | }
 52 | 
 53 | export async function nodeToCodeFragment( node: babel.Node, lines: Array< string > ): Promise< CodeFragment >
 54 | {
 55 |     let start = node.loc?.start;
 56 |     let end = node.loc?.end;
 57 | 
 58 |     if ( start == null || end == null )
 59 |     {
 60 |         throw new Error( 'PERMANENT - Node has no location' );
 61 |     }
 62 | 
 63 |     let leadingComment = '';
 64 |     if ( node.leadingComments != null )
 65 |     {
 66 |         let leadingCommentLines = node.leadingComments.map( (c) => c.value );
 67 |         leadingComment = leadingCommentLines.join( '\n' );
 68 |     }
 69 | 
 70 |     let code = lines.slice( start.line - 1, end.line ).join( '\n' );
 71 |     let retval: CodeFragment = {
 72 |         type: 'unknown',
 73 |         start: {line: start.line, column: start.column},
 74 |         end: {line: end.line, column: end.column},
 75 |         code: code,
 76 |         leadingComment: leadingComment,
 77 |     };
 78 |     return retval;
 79 | }
 80 | 
 81 | export interface FunctionCodeFragment extends CodeFragment {
 82 |     type: 'function';
 83 |     name: string;
 84 |     params: Array< string >;
 85 |     returnType: string;
 86 |     signature: string;
 87 | }
 88 | 
 89 | export async function extractFunctions( code: string, filename: string ): Promise< Array< FunctionCodeFragment > > {
 90 |     let result = await parse( code, filename );
 91 |     let functions: Array< FunctionCodeFragment > = [];
 92 | 
 93 | 
 94 |     function nodeToFunctionCodeFragment( node: babel.Node ): FunctionCodeFragment
 95 |     {
 96 |         if ( node.type == 'FunctionDeclaration' )
 97 |         {
 98 | 
 99 |             let fcf = 
100 |             {
101 |                 type: 'function',
102 |                 name: node.id?.name ?? '',
103 |                 params: new Array< string >(),
104 |                 returnType: '',
105 |                 start: {line: 0, column: 0},
106 |                 end: {line: 0, column: 0},
107 |                 code: '',
108 |                 leadingComment: node.leadingComments?.map( (c) => c.value ).join( '\n' ) ?? '',
109 |             };
110 | 
111 |             for ( let param of node.params )
112 |             {
113 |                 param = <babel.types.Identifier> param;
114 |                 let name_s = param.name
115 |                 let i1 = (<any> (param.loc!.end)).index;
116 |                 let i2 = (<any> (param.loc!.start)).index;
117 |                 let type_s = code.substring( i2, i1 );
118 |                 fcf.params.push( type_s );
119 |             }
120 | 
121 |             let returnType = node.returnType!;
122 |             let i1 = (<any> (returnType.loc!.end)).index;
123 |             let i2 = (<any> (returnType.loc!.start)).index;
124 |             fcf.returnType = code.substring( i2, i1 );
125 | 
126 | 
127 |             let start = (<any> node.loc).start;
128 |             let end = (<any> node.loc).end;
129 |             fcf.code = code.substring( start!.index, end!.index );
130 |             let fcf2: FunctionCodeFragment = {
131 |                 type: 'function',
132 |                 name: fcf.name,
133 |                 params: fcf.params,
134 |                 returnType: fcf.returnType,
135 |                 start: {line: start!.line, column: start!.column},
136 |                 end: {line: end!.line, column: end!.column},
137 |                 code: fcf.code,
138 |                 leadingComment: fcf.leadingComment,
139 |                 signature: `${node.async ? "async" : ""} function ${fcf.name}(${fcf.params.join( ', ' )}): ${fcf.returnType.replace( /^\s*:\s*/, '') }`,
140 |             };
141 |             return fcf2;
142 |         }
143 |         else
144 |         {
145 |             console.log( `Unexpected node type ${node.type}` );
146 |             throw new Error( 'PERMANENT - Unexpected node type' );
147 |         }
148 |     }
149 | 
150 |     result?.program.body.forEach( async (node) => {
151 |         console.log( `${node.type} ${node.range}` )
152 |         if ( node.type === 'FunctionDeclaration' )
153 |         {
154 |             functions.push( await nodeToFunctionCodeFragment( node ) );
155 |         }
156 |         if ( node.type == 'ExportNamedDeclaration' && node.declaration?.type == 'FunctionDeclaration' )
157 |         {
158 |             let decl = await nodeToCodeFragment( node.declaration, code.split( /[\r\n]/ ) );
159 |             decl.leadingComment = node.leadingComments?.map( (c) => c.value ).join( '\n' ) ?? '';
160 |             functions.push( nodeToFunctionCodeFragment( node.declaration ) );
161 |         }
162 |     });
163 | 
164 |     return functions;
165 | }
166 | 


--------------------------------------------------------------------------------
/src/activities/__tests__/tokenizer.test.ts:
--------------------------------------------------------------------------------
  1 | import { split_text_by_tokens } from '../tokenizer';
  2 | 
  3 | // source of truth for tokenizing: https://platform.openai.com/tokenizer?view=bpe
  4 | 
  5 | describe("split_text_by_tokens", () => {
  6 |   test("empty text", async () => {
  7 |     expect(await split_text_by_tokens("", 10)).toEqual([])
  8 |   })
  9 | });
 10 | 
 11 | describe("split_text_by_tokens", () => {
 12 |   test("1 token chunking without overlap", async () => {
 13 |     expect(await split_text_by_tokens("Hello world! This is a test.", 1)).toEqual(
 14 |       ["Hello", " world", "!", " This", " is", " a", " test", "."])
 15 |   })
 16 | });
 17 | 
 18 | describe("split_text_by_tokens", () => {
 19 |   test("2 token chunking without overlap; odd number of tokens", async () => {
 20 |     expect(await split_text_by_tokens("Hello world! This is still a test.", 2)).toEqual(
 21 |       ["Hello world", "! This", " is still", " a test", "."])
 22 |   })
 23 | });
 24 | 
 25 | describe("split_text_by_tokens", () => {
 26 |   test("whitespace", async () => {
 27 |     expect(await split_text_by_tokens(" \n ", 1)).toEqual([" ", "\n", " "])
 28 |   })
 29 | });
 30 | 
 31 | describe("split_text_by_tokens", () => {
 32 |   test("2 token chunking with 1 overlap", async () => {
 33 |     expect(await split_text_by_tokens("Hello world! This is a test.", 2, 1)).toEqual(
 34 |       ["Hello world", " world!", "! This", " This is", " is a", " a test", " test."])
 35 |   })
 36 | });
 37 | 
 38 | describe("split_text_by_tokens", () => {
 39 |   test("2 token chunking with 1 overlap; odd number of tokens", async () => {
 40 |     expect(await split_text_by_tokens("Hello world! This is still a test.", 2, 1)).toEqual(
 41 |       ["Hello world", " world!", "! This", " This is", " is still", " still a", " a test", " test."])
 42 |   })
 43 | });
 44 | 
 45 | describe("split_text_by_tokens", () => {
 46 |   test("3 token chunking with 1 overlap", async () => {
 47 |     expect(await split_text_by_tokens("Hello world! This is a test.", 3, 1)).toEqual(
 48 |       ["Hello world!", "! This is", " is a test", " test."])
 49 |   })
 50 | });
 51 | 
 52 | describe("split_text_by_tokens", () => {
 53 |   test("3 token chunking with 1 overlap; odd number of tokens", async () => {
 54 |     expect(await split_text_by_tokens("Hello world! This is still a test.", 3, 1)).toEqual(
 55 |       ["Hello world!", "! This is", " is still a", " a test."])
 56 |   })
 57 | });
 58 | 
 59 | describe("split_text_by_tokens", () => {
 60 |   test("3 token chunking with 2 overlap", async () => {
 61 |     expect(await split_text_by_tokens("Hello world! This is a test.", 3, 2)).toEqual(
 62 |       ["Hello world!", " world! This", "! This is", " This is a", " is a test", " a test."])
 63 |   })
 64 | });
 65 | 
 66 | describe("split_text_by_tokens", () => {
 67 |   test("3 token chunking with 2 overlap; odd number of tokens", async () => {
 68 |     expect(await split_text_by_tokens("Hello world! This is still a test.", 3, 2)).toEqual(
 69 |       ["Hello world!", " world! This", "! This is", " This is still", " is still a", " still a test", " a test."])
 70 |   })
 71 | });
 72 | 
 73 | describe("split_text_by_tokens", () => {
 74 |   test("chunk size equal to text length returns single chunk with text", async () => {
 75 |     expect(await split_text_by_tokens("Hello world! This is a test.", 8)).toEqual(
 76 |       ["Hello world! This is a test."])
 77 |   })
 78 | });
 79 | 
 80 | describe("split_text_by_tokens", () => {
 81 |   test("chunk size equal to text length returns single chunk with text, despite chunk_overlap", async () => {
 82 |     expect(await split_text_by_tokens("Hello world! This is a test.", 8, 1)).toEqual(
 83 |       ["Hello world! This is a test."])
 84 |   })
 85 | });
 86 | 
 87 | describe("split_text_by_tokens", () => {
 88 |   test("chunk size longer than text returns single chunk with text", async () => {
 89 |     expect(await split_text_by_tokens("Hello world! This is a test.", 9)).toEqual(
 90 |       ["Hello world! This is a test."])
 91 |   })
 92 | });
 93 | 
 94 | describe("split_text_by_tokens", () => {
 95 |   test("chunk size longer than text returns single chunk with text, despite chunk_overlap", async () => {
 96 |     expect(await split_text_by_tokens("Hello world! This is a test.", 9, 1)).toEqual(
 97 |       ["Hello world! This is a test."])
 98 |   })
 99 | });
100 | 
101 | describe("split_text_by_tokens", () => {
102 |   test("large chunk size and large chunk overlap", async () => {
103 |     expect(await split_text_by_tokens("Hello world! This is a test.", 7, 6)).toEqual(
104 |       ["Hello world! This is a test", " world! This is a test."])
105 |   })
106 | });
107 | 
108 | describe("split_text_by_tokens", () => {
109 |   test("large chunk size and small chunk overlap", async () => {
110 |     expect(await split_text_by_tokens("Hello world! This is a test.", 7, 1)).toEqual(
111 |       ["Hello world! This is a test", " test."])
112 |   })
113 | });
114 | 
115 | describe("split_text_by_tokens", () => {
116 |   test("chunk overlap larger than chunk size throws error", async () => {
117 |     expect(split_text_by_tokens("Hello world! This is still a test.", 2, 3)).rejects.toThrow("chunk_overlap must be less than chunk_size")
118 |   })
119 | });
120 | 
121 | describe("split_text_by_tokens", () => {
122 |   test("chunk overlap equal chunk size throws error", async () => {
123 |     expect(split_text_by_tokens("Hello world! This is still a test.", 3, 3)).rejects.toThrow("chunk_overlap must be less than chunk_size")
124 |   })
125 | });
126 | 
127 | describe("split_text_by_tokens", () => {
128 |   test("chunk size 0 with empty text", async () => {
129 |     expect(split_text_by_tokens("", 0)).rejects.toThrow("chunk_overlap must be less than chunk_size")
130 |   })
131 | });
132 | 
133 | describe("split_text_by_tokens", () => {
134 |   test("chunk size 0 with non-empty text", async () => {
135 |     expect(split_text_by_tokens("a", 0)).rejects.toThrow("chunk_overlap must be less than chunk_size")
136 |   })
137 | });
138 | 
139 | describe("split_text_by_tokens", () => {
140 |   test("negative chunk size throws", async () => {
141 |     expect(split_text_by_tokens("test", -1, -2)).rejects.toThrow("chunk_size must be non-negative")
142 |   })
143 | });
144 | 
145 | describe("split_text_by_tokens", () => {
146 |   test("negative overlap throws", async () => {
147 |     expect(split_text_by_tokens("test", 0, -1)).rejects.toThrow("chunk_overlap must be non-negative")
148 |   })
149 | });
150 | 


--------------------------------------------------------------------------------
/src/activities/util.ts:
--------------------------------------------------------------------------------
  1 | import axios from 'axios';
  2 | import { Connection, WorkflowClient } from '@temporalio/client';
  3 | import { nanoid } from 'nanoid';
  4 | import * as csv_parse from 'csv-parse';
  5 | 
  6 | import { getElasticSearchClient } from './elastic';
  7 | 
  8 | import { TEMPORAL_HOST, NLPCLOUD_TOKEN } from '../config';
  9 | import * as config from '../config';
 10 | import * as wf from '../workflows';
 11 | 
 12 | export async function nlp_stable_diffusion(
 13 |   prompt: string
 14 | ): Promise<string> {
 15 |   const response = await axios.post(
 16 |     `https://api.nlpcloud.io/v1/gpu/stable-diffusion/image-generation`,
 17 |     {
 18 |       text: prompt
 19 |     },
 20 |     {
 21 |       headers: {
 22 |         Authorization: `Token ${NLPCLOUD_TOKEN}`,
 23 |         'Content-Type': 'application/json'
 24 |       }
 25 |     }
 26 |   );
 27 | 
 28 |   return response.data.url;
 29 | }
 30 | 
 31 | export async function actionLogger(msg: string) {
 32 |   console.log(msg);
 33 |   // Store it in elasticsearch under the 'actionLogger' context:
 34 |   let client = await getElasticSearchClient();
 35 |   await client.index({ index: 'actionlogger', body: { message: msg } });
 36 |   client.close();
 37 | }
 38 | 
 39 | /**
 40 |  *
 41 |  * @param {string} url
 42 |  * @param {string} method
 43 |  * @param {any} headers
 44 |  * @param {any} data
 45 |  */
 46 | export async function wf_axios(
 47 |   url: string,
 48 |   method: string,
 49 |   headers: any,
 50 |   data: any
 51 | ): Promise<any> {
 52 |   let r = await axios.request({
 53 |     url: url,
 54 |     method: method,
 55 |     headers: headers,
 56 |     data: data
 57 |   });
 58 |   return r.data;
 59 | }
 60 | 
 61 | export async function md5sum(string: string): Promise<string> {
 62 |   const crypto = require('crypto');
 63 |   return crypto.createHash('md5').update(string).digest('hex');
 64 | }
 65 | 
 66 | /**
 67 |  * @function executeWorkflow
 68 |  * @param {string} address
 69 |  * @param {string} workflowId
 70 |  * @param {string} taskQueue
 71 |  * @param {any[]} args
 72 |  * @description Triggers a new workflow execution
 73 |  */
 74 | export async function executeWorkflow<T>(
 75 |   address: string,
 76 |   workflowId: string,
 77 |   taskQueue: string,
 78 |   args: any[]
 79 | ): Promise<T> {
 80 |   const connection = await Connection.connect({ address: address });
 81 |   const client = new WorkflowClient({ connection });
 82 |   let r = await client.start(workflowId, {
 83 |     args: args,
 84 |     taskQueue: taskQueue,
 85 |     workflowId: 'workflow-' + nanoid()
 86 |   });
 87 |   let result = await r.result();
 88 |   return result;
 89 | }
 90 | 
 91 | export async function executeLocalWorkflow<T>(
 92 |   workflowId: string,
 93 |   taskQueue: string,
 94 |   args: any[]
 95 | ): Promise<T> {
 96 |   return executeWorkflow(TEMPORAL_HOST, workflowId, taskQueue, args);
 97 | }
 98 | 
 99 | export async function JStoOBJ< T >( text: string ): Promise< T > {
100 | 
101 |     text = text.replace( /“/g, '"' );
102 |     text = text.replace( /”/g, '"' );
103 |     text = text.replace( /‘/g, "'" );
104 |     text = text.replace( /’/g, "'" );
105 |     console.log(`============\n${text}\n============\n`)
106 | 
107 |     try
108 |     {
109 |         let obj = eval( "const obj = " + text + " ; obj");
110 |         console.log( obj );
111 |         return obj;
112 |     }
113 |     catch( e: any )
114 |     {
115 |         console.log( e );
116 |         let p = `Bad JS [{{{id}}}]:
117 | const obj = {{{text}}}
118 | 
119 | Error:
120 | {{{error}}}
121 | 
122 | Corrected JS, stripped of comments, and pretty-printed:
123 | const obj =`;
124 |         const connection = await Connection.connect( {address: config.TEMPORAL_HOST} );
125 |         const client = new WorkflowClient({connection});
126 | 
127 |         let handle = await client.start(wf.promptTemplate, {
128 |             args: [p, {text: text, error: e.toString(), id: nanoid()}, 128, 2048, 1.0, "finetuned-gpt-neox-20b"],
129 |             taskQueue: 'hello-world',
130 |             workflowId: nanoid(),
131 |             workflowRunTimeout: '1 minutes',
132 |         });
133 | 
134 |         let fixed = await handle.result();
135 |         return await JStoOBJ( fixed );
136 |     }
137 | }
138 | 
139 | 
140 | export async function parse_and_fix_csv( text: string ): Promise< any >
141 | {
142 |     console.log( "======> Entering parse_and_fix_csv" );
143 |     // if text, which is a multi-line string, does not end in a " then add one
144 |     if ( text[text.length-1] != '"' )
145 |     {
146 |         text += '"';
147 |     }
148 |     console.log( text );
149 |     console.log( "======> Calling csv_parser_action")
150 |     try
151 |     {
152 |         let parsed = await csv_parser_action( text );
153 |         console.log( parsed );
154 |         return( parsed );
155 |     }
156 |     catch( e: any )
157 |     {
158 |         console.log( "======> Error in parse_and_fix_csv" + e.toString() );
159 |         const connection = await Connection.connect( {address: config.TEMPORAL_HOST} );
160 |         const client = new WorkflowClient({connection});
161 |         let wfid = "parse_and_fix_csv-" + nanoid();
162 | 
163 |         let p =
164 |         `Bad CSV:
165 | 
166 | {{{csv}}}
167 | 
168 | Corrected CSV:
169 | "`;
170 | 
171 |         let handle = await client.start(wf.promptTemplate, {
172 |             args: [p, {csv: text}, 48, 1024, 0.0, "gpt-3"],
173 |             taskQueue: 'hello-world',
174 |             workflowId: wfid,
175 |             workflowRunTimeout: '10 minutes',
176 |         });
177 |         let fixed = await handle.result();
178 |         console.log( fixed );
179 |         let parsed = await csv_parser_action( fixed );
180 |         console.log( parsed );
181 |         return parsed;
182 |     }
183 | 
184 | }
185 | 
186 | 
187 | export async function csv_parser_action( csv: string ): Promise< any[] >
188 | {
189 |     // if text, which is a multi-line string, does not end in a " then add one
190 |     if ( csv[csv.length-1] != '"' )
191 |     {
192 |         csv += '"';
193 |     }
194 | 
195 |     // if text does not begin with a " then add one
196 |     if ( csv[0] != '"' )
197 |     {
198 |         csv = '"' + csv;
199 |     }
200 | 
201 |     // Wrap below in a Promise
202 |     let p = new Promise< any[] >( (resolve, reject) => {
203 |         csv_parse.parse( csv, {columns: true, skip_empty_lines: true, relaxQuotes: true, relax_column_count: true}, (err, records) => {
204 |             if ( err ) reject( err );
205 |             resolve( records );
206 |         } );
207 |     } );
208 |     return await p;
209 | }
210 | 


--------------------------------------------------------------------------------
/src/workflows/session.ts:
--------------------------------------------------------------------------------
  1 | import * as wf from '@temporalio/workflow';
  2 | import * as pf from './prompt';
  3 | import { defineSignal, setHandler, getExternalWorkflowHandle, defineQuery, workflowInfo, sleep} from '@temporalio/workflow';
  4 | 
  5 | export const getOutputBuffer = defineQuery<string>('getOutputBuffer');
  6 | 
  7 | interface UserInput {
  8 |     text: string;
  9 | }
 10 | interface UserOutput {
 11 |     text: string;
 12 | }
 13 | 
 14 | export const userInputSignal = defineSignal<[UserInput]>('input');
 15 | export const userOutputSignal = defineSignal<[UserOutput]>('output');
 16 | export const userOutputListenerSignal = defineSignal<[{listener_wf: string, target_wf: string}]>('output_listener');
 17 | 
 18 | export interface FrameInput
 19 | {
 20 |     text: string;
 21 | }
 22 | 
 23 | export interface Frame extends FrameInput
 24 | {
 25 |     ts: Date;
 26 |     logs: [Date, string] [];
 27 |     response?: string;
 28 | }
 29 | 
 30 | 
 31 | type SessionState = 'IDLE' | 'READ_WAIT' | 'MESSAGE_RECEIVED';
 32 | 
 33 | export class HumanInTheLoopSession< TFrame extends Frame >
 34 | {
 35 |     private _messages: TFrame[] = [];
 36 |     private _state : SessionState;
 37 |     private _inputBuffer: string = '';
 38 |     private _outputBuffer: string = '';
 39 |     private _outputListeners: string[] = [];
 40 | 
 41 |     constructor()
 42 |     {
 43 |         this._state = 'IDLE';
 44 |     }
 45 | 
 46 |     public addMessage(message: TFrame): void
 47 |     {
 48 |         this._messages.push(message);
 49 |     }
 50 | 
 51 |     public get messages(): TFrame[]
 52 |     {
 53 |         return this._messages;
 54 |     }
 55 | 
 56 |     public log( m: string ): void
 57 |     {
 58 |         console.log( m );
 59 |         // Add to last message, if it exists
 60 |         if ( this._messages.length > 0 )
 61 |         {
 62 |             this._messages[this._messages.length-1].logs.push( [new Date(), m] );
 63 |         }
 64 |         else
 65 |         {
 66 |             throw new Error("No messages to log to - this should not be possible");
 67 |         }
 68 |     }
 69 | 
 70 |     public logs(): [Date, string] []
 71 |     {
 72 |         let logs: [Date, string] [] = [];
 73 |         for ( let m of this._messages )
 74 |         {
 75 |             logs = logs.concat(m.logs);
 76 |         }
 77 |         return logs;
 78 |     }
 79 | 
 80 |     public get state(): SessionState
 81 |     {
 82 |         return this._state;
 83 |     }
 84 | 
 85 |     public set state( s: SessionState )
 86 |     {
 87 |         this._state = s;
 88 |     }
 89 | 
 90 |     public get inputBuffer(): string
 91 |     {
 92 |         let b = this._inputBuffer;
 93 |         this._inputBuffer = '';
 94 |         return b;
 95 |     }
 96 | 
 97 |     public get outputBuffer(): string
 98 |     {
 99 |         let b = this._outputBuffer;
100 |         this._outputBuffer = '';
101 |         return b;
102 |     }
103 | 
104 |     public recv( s: string ): void
105 |     {
106 |         this._inputBuffer += s;
107 |     }
108 | 
109 |     public send( s: string ): void
110 |     {
111 |         // Add to response from last frame, if it exists
112 |         if ( this._messages.length > 0 )
113 |         {
114 |             this._messages[this._messages.length-1].response = s;
115 |         }
116 | 
117 |         this._outputBuffer += s;
118 | 
119 |         this._outputListeners.forEach( (l) => {
120 |             let badwfs: string[] = [];
121 |             try
122 |             {
123 |                 let h = getExternalWorkflowHandle(l);
124 |                 h.signal('output', {text: s});
125 |                 this.removeOutputListener(l);
126 |             }
127 |             catch( e: any )
128 |             {
129 |                 console.log(`Error sending output to ${l}: ${e}. Removing from listeners`);
130 |                 badwfs.push(l);
131 |             }
132 |         });
133 |     }
134 | 
135 |     public addOutputListener( listener: string ): void
136 |     {
137 |         this._outputListeners.push( listener );
138 |     }
139 | 
140 |     public removeOutputListener( listener: string ): void
141 |     {
142 |         this._outputListeners = this._outputListeners.filter( (l) => l != listener );
143 |     }
144 | 
145 |     public async getInput( mh: HumanInTheLoopSession<any> ): Promise<string>
146 |     {
147 |     
148 |         // Wait for the user to respond
149 |         await wf.condition( () => mh.state == 'MESSAGE_RECEIVED' );
150 |         {
151 |             mh.state = 'IDLE';
152 |             let input = mh.inputBuffer;
153 |             return input;
154 |         }
155 |     }
156 |     
157 |     public async init(): Promise< void >
158 |     {
159 |         setHandler(userInputSignal, ({ text }: UserInput) => {
160 |             console.log(`Received input: ${text}`);
161 |             this.recv( text );
162 |             this.state = 'MESSAGE_RECEIVED';
163 |         });
164 | 
165 |         setHandler(userOutputListenerSignal, async ({listener_wf, target_wf}) => {
166 |             console.log(`Received output monitoring signal for ${target_wf} from ${listener_wf}`);
167 |             this.addOutputListener( listener_wf );
168 |         });
169 | 
170 |         setHandler(getOutputBuffer, () => {return this.outputBuffer;});
171 |     }
172 | }
173 | 
174 | 
175 | export async function send( wfid: string, message: FrameInput ): Promise<void>
176 | {
177 |     const handle = getExternalWorkflowHandle(wfid);
178 |     await handle.signal( 'input', message );
179 | }
180 | 
181 | export async function read( wfid: string ): Promise< string >
182 | {
183 |     let waiting = true;
184 |     let rtext = '';
185 |     setHandler( userOutputSignal, ( {text} ) => {
186 |         console.log( `read: ${text}` );
187 |         waiting = false;
188 |         rtext = text;
189 |     });
190 | 
191 |     let me = workflowInfo().workflowId;
192 |     const handle = getExternalWorkflowHandle(wfid);
193 |     await handle.signal( 'output_listener', {listener_wf: me, target_wf: wfid} );
194 |     await wf.condition( () => !waiting );
195 |     return rtext;
196 | }
197 | 
198 | export async function sendread( wfid: string, message: Frame ): Promise< string >
199 | {
200 |     let waiting = true;
201 |     let rtext = '';
202 |     setHandler( userOutputSignal, ( {text} ) => {
203 |         console.log( `read: ${text}` );
204 |         waiting = false;
205 |         rtext = text;
206 |     });
207 | 
208 |     let me = workflowInfo().workflowId;
209 |     const handle = getExternalWorkflowHandle(wfid);
210 |     await handle.signal( 'output_listener', {listener_wf: me, target_wf: wfid} );
211 |     await send( wfid, message );
212 |     await wf.condition( () => !waiting );
213 | 
214 |     return rtext;
215 | }
216 | 
217 | export async function testSession( first_message: Frame )
218 | {
219 |     let session = new HumanInTheLoopSession< Frame >();
220 |     session.init();
221 | 
222 |     // Start the session
223 |     session.addMessage( {... first_message} );
224 |     session.log( "Session started" );
225 | 
226 |     while( true )
227 |     {
228 |         let timeout_promise = sleep("10 seconds");
229 |         let input_promise = session.getInput( session );
230 |         let p = await Promise.race( [timeout_promise, input_promise] );
231 |         if ( p == await timeout_promise )
232 |         {
233 |             session.log( "Session timed out" );
234 |             break;
235 |         }
236 |         let input = await input_promise;
237 | 
238 |         session.addMessage({text: input, ts: new Date(), logs: []});
239 |         session.log( "User input: " + input );
240 |         let response = await pf.promptTemplate(
241 | `User: {{{input}}}
242 | Response:`, { input: input }, 10, 512 );
243 |         session.log( "Response: " + response );
244 |         session.send( response );
245 |     }
246 | }
247 | 


--------------------------------------------------------------------------------
/src/workflows/sql2llm.ts:
--------------------------------------------------------------------------------
  1 | import * as workflows from '../workflows';
  2 | import { proxyActivities, sleep } from '@temporalio/workflow';
  3 | import * as activities from '../activities';
  4 | import { Frame, HumanInTheLoopSession } from '../workflows/session';
  5 | import * as sql2llm from '../activities/sql2llm';
  6 | import { resourceLimits } from 'node:worker_threads';
  7 | import { actionLogger } from '../activities';
  8 | 
  9 | const { split_text_by_tokens, sql2llm_session_multiplexer, parse_and_fix_csv } = proxyActivities< typeof activities >({ startToCloseTimeout: '10 minute' });
 10 | 
 11 | export interface SQL2LLMInput extends Frame {
 12 |     dbname: string;
 13 |     query: string;
 14 |     fields: string[];
 15 |     context: string | null;
 16 |     result?: SQL2LLMOutput;
 17 | }
 18 | 
 19 | export interface SQL2LLMOutput extends Frame {
 20 |     query: string;
 21 |     fields: string[];
 22 |     result: any[];
 23 |     status: 200 | 500;
 24 |     error?: string;
 25 |     nSQL_query?: string;
 26 | }
 27 | 
 28 | class TSession extends HumanInTheLoopSession< SQL2LLMInput > {
 29 |     dbname: string;
 30 |     fields: string[];
 31 | 
 32 |     constructor( dbname: string, fields: string[] ) {
 33 |         super();
 34 |         this.dbname = dbname;
 35 |         this.fields = fields;
 36 |     }
 37 | }
 38 | 
 39 | export async function SQL2LLM_session( dbname: string, fields: string[], context: string | null ): Promise< void >
 40 | {
 41 |     let session = new TSession( dbname, fields );
 42 |     session.init();
 43 | 
 44 |     while( true )
 45 |     {
 46 |         let input = await session.getInput( session );
 47 |         session.addMessage( {logs: [], query: input, text: input, ts: new Date(), fields: fields, dbname: dbname, context: context} );
 48 |         session.log( "User input [dialog]: " + input );
 49 | 
 50 |         let ret = await SQL2LLM_wf( session.messages[session.messages.length-1], session );
 51 |         session.messages[session.messages.length-1].response = JSON.stringify(ret.result);
 52 |         session.messages[session.messages.length-1].query = ret.query;
 53 |         session.send( JSON.stringify(ret) );
 54 |     }
 55 | }
 56 | 
 57 | 
 58 | export async function SQL2LLM_wf( input: SQL2LLMInput, session: TSession ): Promise< SQL2LLMOutput >
 59 | {
 60 |     let fields = session.fields.map( (f) => { return `"${f}"` } );
 61 |     let columns = fields.join( ',' );
 62 |     let p = '';
 63 | 
 64 |     let context = '';
 65 |     if ( input.context )
 66 |     {
 67 |         context = `==================\nData:\n${input.context}\n==================\n\n`;
 68 |     }
 69 | 
 70 |     session.messages.reverse();
 71 |     let history = session.messages.reverse().map( (m) => {
 72 |         return `${context}${session.dbname}> ${m.query}\nESCAPED CSV RESULT\n==========\n${columns}\n`
 73 |     });
 74 |     session.messages.reverse();
 75 | 
 76 |     p += `{{{dbname}}}> .mode CSV
 77 | CSV mode on.
 78 | {{{history}}}`;
 79 | 
 80 |     let objs = {dbname: session.dbname, history: history, context: context};
 81 | 
 82 |     let noStopToken = false;
 83 |     let result = "";
 84 |     while( noStopToken == false )
 85 |     {
 86 |         let ret = await workflows.promptTemplate( p + result, objs, 48, 1024, 0, "gpt-3", `${session.dbname}>` );
 87 |         ret = ret.replace( /^\s+/, '' );
 88 |         ret = ret.replace( /\s+$/, '' );
 89 | 
 90 |         if ( ret.endsWith('==========') )
 91 |         {
 92 |             ret = ret.replace( /==========$/, '' );
 93 |         }
 94 | 
 95 |         if ( ret.length == 0 ) noStopToken = true;
 96 |         else {
 97 |             console.log( "Trying one more run.")
 98 |             if ( result.endsWith('"') && ret.startsWith('"') )
 99 |             {
100 |                 result += "\n";
101 |             }
102 |             result += ret;
103 |         }
104 |     }
105 |     // Take ret and parse it as CSV. Fix it if necessary.
106 |     let parsed: any[] = await parse_and_fix_csv( columns + "\n" + result );
107 |     // Walk through parsed, and remove the leading and trailing whitespace as well as leading and trailing " columns from all rows
108 |     for( let i = 0; i < parsed.length; i++ )
109 |     {
110 |         for( let j = 0; j < parsed[i].length; j++ )
111 |         {
112 |             parsed[i][j] = parsed[i][j].replace( /^\s+/, '' ).replace( /\s+$/, '' ).replace( /^'/, '' ).replace( /'$/, '' ).replace( /^"/, '' ).replace( /"$/, '' );
113 |         }
114 |     }
115 | 
116 | 
117 |     return  {
118 |         query: input.query, 
119 |         fields: input.fields,
120 |         result: parsed,
121 |         status: 200,
122 |         ts: new Date(),
123 |         logs: [],
124 |         text: input.query,
125 |     }
126 | 
127 | }
128 | 
129 | export async function SQL2LLM( dbname: string | null | undefined, q: string, context: string | null, natural_language_request: boolean ): Promise< SQL2LLMOutput >
130 | {
131 |     console.log( `Got query for ${dbname}: ${q}`);
132 |     let refined_prompt: string = "";
133 |     if ( natural_language_request )
134 |     {
135 |         refined_prompt = await workflows.promptTemplate(
136 | `Natural language: {{{query}}}
137 | Database: {{{dbname}}}
138 | nSQL Natural language version: `, {query: q, dbname: dbname}, 10, 256, 1, "finetuned-gpt-neox-20b"
139 |         );
140 |         refined_prompt = refined_prompt.replace( /^\s+/, '' ).replace( /\s+$/, '' );
141 |         q = refined_prompt.replace( /[\r\n]+$/, '' );
142 |     }
143 | 
144 |     let fieldnames_json = '["' + await workflows.promptTemplate(
145 | `Take the following SQL query: {{{sql}}}
146 | 
147 | What are the field names in the result set?
148 | 
149 | JSON list: [ "`, {sql: q}, 5, 128, 0.0, "text-curie-001" );
150 |     let fields = JSON.parse( fieldnames_json );
151 | 
152 |     // If dbname is null, let's prompt text-curie-001 for it.
153 |     if ( dbname == null )
154 |     {
155 |         dbname = (await workflows.promptTemplate(
156 | `Take the following SQL query: {{{sql}}}
157 | Database:`, {sql: q}, 1, 32, 0.0, "text-curie-001" )).replace( /^\s+/, '' ).replace( /\s+$/, '' );
158 |     }
159 | 
160 |     if ( context )
161 |     {
162 |         let context_chunks: string[] = await split_text_by_tokens( context, 2048 );
163 | 
164 |         let results: SQL2LLMOutput[] = [];
165 |         if ( context_chunks.length > 1 )
166 |         {
167 |             let promises = context_chunks.map( async (chunk) => {
168 |                 let res = await sql2llm_session_multiplexer( {dbname: dbname!, fields: fields, query: q, text: q, ts: new Date(), logs: [], context: chunk} );
169 |                 
170 |                 // Add result to the results array.
171 |                 res.result.forEach( (r) => { results.push(r) } );
172 |             });
173 |             await Promise.all( promises );
174 | 
175 |             // Remove duplicates.
176 |             let uniqueResults = Array.from(new Set(results.map( r => JSON.stringify(r) )));
177 |             // Convert back to object.
178 |             uniqueResults = uniqueResults.map( r => JSON.parse(r) );
179 |             console.log( `${results.length} rows returned.\n\n` );
180 |             return( {fields: fields, logs: [], query: q, result: uniqueResults, status: 200, text: q, ts: new Date()} );
181 |         }
182 |         else
183 |         {
184 |             let res = await sql2llm_session_multiplexer( {dbname: dbname, fields: fields, query: q, text: q, ts: new Date(), logs: [], context: context} );
185 |             if ( refined_prompt.length > 0 )
186 |                 res.nSQL_query = refined_prompt;
187 |             console.log( `${res.result.length} rows returned.\n\n` );
188 |             return( res );
189 |         }
190 |     }
191 |     else
192 |     {
193 |         let res = await sql2llm_session_multiplexer( {dbname: dbname, fields: fields, query: q, text: q, ts: new Date(), logs: [], context: context} );
194 |         if ( refined_prompt.length > 0 )
195 |             res.nSQL_query = refined_prompt;
196 |         console.log( `${res.result.length} rows returned.\n\n` );
197 |         return( res );
198 |     }
199 | }
200 | 


--------------------------------------------------------------------------------
/src/activities/llm.ts:
--------------------------------------------------------------------------------
  1 | import axios from 'axios';
  2 | import { Configuration, OpenAIApi } from 'openai';
  3 | import fs from 'fs';
  4 | import {NLPCLOUD_TOKEN, OPENAI_TOKEN} from '../config';
  5 | 
  6 | /**
  7 |  * @function generateText
  8 |  *
  9 |  * @param {string} modelName Model to use for generation
 10 |  * @param {string} text Text to use as prompt (input)
 11 |  * @param {number} minLength Minimum length of generated text (not always respected by the model)
 12 |  * @param {number} maxLength Maximum length of generated text (this is respected by the model)
 13 |  * @param {boolean|null} lengthNoInput Calculate length based on prompt (input) text
 14 |  * @param {string|null} endSequence Stop generation when this sequence is encountered
 15 |  * @param {boolean} removeInput Remove prompt (input) text from generated text (don't touch this unless you know what you're doing)
 16 |  * @param {boolean|null} doSample Use sampling instead of greedy decoding (don't touch unless you know what you're doing)
 17 |  * @param {number|null} numBeams Number of beams for beam search (don't touch unless you know what you're doing)
 18 |  * @param {boolean|null} earlyStopping Stop when at least num_beams sentences are finished per batch (don't touch unless you know what you're doing)
 19 |  * @param {number|null} noRepeatNgramSize If set to int > 0, all ngrams of that size can only occur once (don't touch unless you know what you're doing)
 20 |  * @param {number|null} numReturnSequences Number of returned sequences for each element in the batch (don't touch unless you know what you're doing)
 21 |  * @param {number|null} topK Number of highest probability vocabulary tokens to keep for top-k-filtering (don't touch unless you know what you're doing)
 22 |  * @param {number|null} topP The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling (don't touch unless you know what you're doing)
 23 |  * @param {number|null} temperature How much to temper the probabilities. 0.0 will generate the same text every time, 1.0 will generate random text. You usually probably want 0.0, but start tuning here if you're not getting good results.
 24 |  * @param {number|null} repetitionPenalty Penalty to apply if a sequence of words is repeated within a generated sequence (don't touch unless you know what you're doing)
 25 |  * @param {number|null} lengthPenalty Penalty to apply to the length of the sequence. (don't touch unless you know what you're doing)
 26 |  * @param {boolean|null} badWords
 27 |  * @param {boolean|null} removeEndSequence Remove end sequence from generated text (don't touch unless you know what you're doing)
 28 |  * @returns {Promise<string>} Generated text
 29 |  */
 30 | export async function generateText(
 31 |   modelName: string,
 32 |   text: string,
 33 |   minLength: number = 10,
 34 |   maxLength: number = 20,
 35 |   lengthNoInput: boolean | null = null,
 36 |   endSequence: string | null = null,
 37 |   removeInput: boolean = true,
 38 |   doSample: boolean | null,
 39 |   numBeams: number | null,
 40 |   earlyStopping: boolean | null,
 41 |   noRepeatNgramSize: number | null,
 42 |   numReturnSequences: number | null,
 43 |   topK: number | null,
 44 |   topP: number | null,
 45 |   temperature: number | null,
 46 |   repetitionPenalty: number | null,
 47 |   lengthPenalty: number | null,
 48 |   badWords: boolean | null,
 49 |   removeEndSequence: boolean | null
 50 | ) {
 51 |   const response = await axios.post(
 52 |     `https://api.nlpcloud.io/v1/gpu/${modelName}/generation`,
 53 |     {
 54 |       text: text,
 55 |       min_length: minLength,
 56 |       max_length: maxLength,
 57 |       length_no_input: lengthNoInput,
 58 |       end_sequence: endSequence,
 59 |       remove_input: removeInput,
 60 |       do_sample: doSample,
 61 |       num_beams: numBeams,
 62 |       early_stopping: earlyStopping,
 63 |       no_repeat_ngram_size: noRepeatNgramSize,
 64 |       num_return_sequences: numReturnSequences,
 65 |       topk: topK,
 66 |       topp: topP,
 67 |       temperature: temperature,
 68 |       repetition_penalty: repetitionPenalty,
 69 |       length_penalty: lengthPenalty,
 70 |       bad_words: badWords,
 71 |       remove_end_sequence: removeEndSequence
 72 |     },
 73 |     {
 74 |       headers: {
 75 |         Authorization: `Token ${NLPCLOUD_TOKEN}`,
 76 |         'Content-Type': 'application/json'
 77 |       }
 78 |     }
 79 |   );
 80 |   return response.data.generated_text;
 81 | }
 82 | 
 83 | /**
 84 |  * Generates text using OpenAI's text completion API.
 85 |  * @param {string} text The text to use as a prompt.
 86 |  * @param {string} apikey The API key to use.
 87 |  * @param {number} min_length The minimum length of the generated text.
 88 |  * @param {number} max_length The maximum length of the generated text.
 89 |  * @param {number} temperature The temperature parameter for the text generation.
 90 |  * @param {number} top_p The top_p parameter for the text generation.
 91 |  * @returns {Promise<string>} The generated text.
 92 |  */
 93 | export async function generateTextOpenAI(
 94 |   text: string,
 95 |   min_length: number,
 96 |   max_length: number,
 97 |   temperature: number,
 98 |   top_p: number,
 99 |   model:
100 |   | 'text-davinci-003'
101 |   | 'text-davinci-002'
102 |   | 'code-davinci-002'
103 |     | 'text-curie-001' = 'text-davinci-003',
104 |   stopToken: string | string[] | null = null
105 | ): Promise<string> {
106 |   const config = new Configuration({
107 |     apiKey: OPENAI_TOKEN
108 |   });
109 |   const openai = new OpenAIApi(config);
110 |   const response = await openai.createCompletion({
111 |     model: model,
112 |     prompt: text,
113 |     max_tokens: max_length,
114 |     temperature: temperature,
115 |     top_p: top_p,
116 |     frequency_penalty: 1.0,
117 |     presence_penalty: 1.0,
118 |     stop: stopToken
119 |   });
120 |   return response.data.choices![0].text!;
121 | }
122 | 
123 | export async function retryGenerateTextOpenAI(
124 |   text: string,
125 |   min_length: number,
126 |   max_length: number,
127 |   temperature: number,
128 |   top_p: number,
129 |   model:
130 |   | 'text-davinci-003'
131 |   | 'text-davinci-002'
132 |   | 'code-davinci-002'
133 |     | 'text-curie-001' = 'text-davinci-002',
134 |   stopToken: string | string[] | null = null,
135 |   delaySeconds = 1
136 | ): Promise<string> {
137 |   // Log everything to log/<timestamp>.log
138 |   const log = fs.createWriteStream(`log/${new Date().toISOString()}.log`, {
139 |     flags: 'a'
140 |   });
141 |   log.write(
142 |     `Min length: ${min_length} Max lenght: ${max_length} Temperature: ${temperature} Top_p: ${top_p} Model: ${model} Stop token: ${stopToken}\n`
143 |   );
144 |   log.write(`\n${text}\n`);
145 |   console.log(
146 |     `Min length: ${min_length} Max lenght: ${max_length} Temperature: ${temperature} Top_p: ${top_p} Model: ${model} Stop token: ${stopToken}`
147 |   );
148 | 
149 |   await new Promise((resolve) => setTimeout(resolve, Math.random() * 1500));
150 | 
151 |   try {
152 |     let result = await generateTextOpenAI(
153 |       text,
154 |       min_length,
155 |       max_length,
156 |       temperature,
157 |       top_p,
158 |       model,
159 |       stopToken
160 |     );
161 |     console.log(`Result: ${result.length} retruend`);
162 |     log.write(`\n=======================================\n${result}\n`);
163 |     log.close();
164 |     return result;
165 |   } catch (error: any) {
166 |     log.write(`\n=======================================\n${error}\n`);
167 |     log.write(
168 |       `\n=======================================\n${JSON.stringify(
169 |         error.response.data
170 |       )}\n`
171 |     );
172 |     log.close();
173 | 
174 |     if (error.response && error.response.status == 429) {
175 |       let delay = Math.random() * 1000 * delaySeconds;
176 |       console.log(`429. Waiting ${delay} ms and retrying.`);
177 |       // Retry
178 |       // Wait randomly up to 10 seconds
179 |       await new Promise((resolve) => setTimeout(resolve, delay));
180 |       return await retryGenerateTextOpenAI(
181 |         text,
182 |         min_length,
183 |         max_length,
184 |         temperature,
185 |         top_p,
186 |         model,
187 |         stopToken,
188 |         delaySeconds ^ 2
189 |       );
190 |     }
191 |   }
192 | 
193 |   throw new Error('Failed to generate text with non-temporary error');
194 | }
195 | 
196 | 
197 | export async function keywordKeyphraseExtraction( text: string, modelName: "fast-gpt-j" | "finetuned-gpt-neox-20b" = "fast-gpt-j" ): Promise< string[] > {
198 |   const response = await axios.post(
199 |     `https://api.nlpcloud.io/v1/gpu/${modelName}/kw-kp-extraction`,
200 |     {
201 |       text: text,
202 |     },
203 |     {
204 |       headers: {
205 |         Authorization: `Token ${NLPCLOUD_TOKEN}`,
206 |         'Content-Type': 'application/json'
207 |       }
208 |     }
209 |   );
210 |   return [ ...new Set< string >( response.data.keywords_and_keyphrases ) ];
211 | }
212 | 
213 | export async function Classification( text: string, categories: string[], modelName: "bart-large-mnli-yahoo-answers" | "xlm-roberta-large-xnli" | "fast-gpt-j" | "finetuned-gpt-neox-20b" = "bart-large-mnli-yahoo-answers" ): Promise< {labels: string[]; scores: number[]} > {
214 |   const response = await axios.post(
215 |     `https://api.nlpcloud.io/v1/gpu/${modelName}/classification`,
216 |     {
217 |       text: text,
218 |       labels: categories,
219 |       "multi_class": true,
220 |     },
221 |     {
222 |       headers: {
223 |         Authorization: `Token ${NLPCLOUD_TOKEN}`,
224 |         'Content-Type': 'application/json'
225 |       }
226 |     }
227 |   );
228 |   console.log( response.data );
229 |   return { labels: response.data.labels, scores: response.data.scores };
230 | }
231 | 


--------------------------------------------------------------------------------
/src/workflows/prompt.ts:
--------------------------------------------------------------------------------
  1 | import { proxyActivities } from '@temporalio/workflow';
  2 | import { default as mustache } from 'mustache';
  3 | 
  4 | import * as llm from './llm';
  5 | import * as elastic from '../activities/elastic';
  6 | import * as tokenizer from '../activities/tokenizer';
  7 | import * as vector_search from '../activities/vector_search';
  8 | 
  9 | import { logger } from './util';
 10 | import { embeddingsFromTextSearch } from './embeddings';
 11 | 
 12 | const { tokenize_native } = proxyActivities< typeof tokenizer>({ startToCloseTimeout: '10 minute' });
 13 | const { es_context } = proxyActivities<typeof vector_search>({ startToCloseTimeout: '10 minute' });
 14 | 
 15 | 
 16 | export async function promptTemplate<T>(
 17 |   template: string,
 18 |   variables: T,
 19 |   minLength: number = 1,
 20 |   maxLength: number = 50,
 21 |   temperature: number = 0.0,
 22 |   model: llm.llm_models  = 'gpt-3',
 23 |   endSequence: string | null = null,
 24 | ): Promise<string> {
 25 |   console.log("OK, got into promptTemplate")
 26 |   let prompt = mustache.render(template, variables);
 27 |   console.log("Rendered mustache:\n" + prompt);
 28 |   let response = await llm.minGenerate(
 29 |     prompt,
 30 |     minLength,
 31 |     maxLength,
 32 |     temperature,
 33 |     endSequence,
 34 |     model
 35 |   );
 36 |   console.log("Got response:\n" + response);
 37 |   return response;
 38 | }
 39 | 
 40 | const token_word_ratio = 0.5;
 41 | /**
 42 |  * @function splitPromptTemplateByTokens
 43 |  * @param data
 44 |  * @param template
 45 |  * @param minLength
 46 |  * @param maxLength
 47 |  * @param temperature
 48 |  * @returns
 49 |  */
 50 | export async function splitPromptTemplateByTokens(
 51 |   data: string,
 52 |   template: string,
 53 |   minLength: number = 1,
 54 |   maxLength: number = 50,
 55 |   temperature: number = 0.0
 56 | ): Promise<Array<[string, string]>> {
 57 |   let template_token_length = await tokenize_native(template).then(
 58 |     (r) => r.length
 59 |   );
 60 | 
 61 |   let max_tokens = 1024;
 62 |   let tokens_left = max_tokens - maxLength - template_token_length;
 63 |   let tokens = await tokenize_native(data);
 64 | 
 65 |   let token_count = Math.floor(tokens_left * token_word_ratio);
 66 |   let token_split = splitUp(tokens, token_count);
 67 |   let token_split_promises = token_split.map(async (t) => {
 68 |     let p = await promptTemplate(
 69 |       template,
 70 |       { chunk: t.join(' ') },
 71 |       minLength,
 72 |       maxLength,
 73 |       temperature
 74 |     );
 75 |     return [t.join(' '), p];
 76 |   });
 77 |   let token_split_results = await Promise.all(token_split_promises);
 78 |   let finalret = new Array<[string, string]>();
 79 |   token_split_results.map((r) => {
 80 |     finalret.push([r[0], r[1]]);
 81 |   });
 82 | 
 83 |   return finalret;
 84 | }
 85 | 
 86 | export async function splitPromptTemplateByLinesOfTokens(
 87 |   data: string,
 88 |   template: string,
 89 |   minLength: number = 1,
 90 |   maxLength: number = 50,
 91 |   temperature: number = 0.0
 92 | ): Promise<Array<[string, string, number[]]>> {
 93 |   let template_token_length = await tokenize_native(template).then(
 94 |     (r) => r.length
 95 |   );
 96 | 
 97 |   let max_tokens = 1768;
 98 |   let tokens_left = max_tokens - maxLength - template_token_length;
 99 |   let lines: Array<string> = data.split('\n');
100 | 
101 |   // Loop through lines and add them to a prompt until it fills up max_tokens, and then run it through promptTemplate
102 |   let current_lines: string = '';
103 |   let current_lines_tokens = 0;
104 |   let current_lines_linenos: number[] = [];
105 |   let chunks = new Array<[string, number[]]>();
106 | 
107 |   for (let i = 0; i < lines.length; ++i) {
108 |     let tokens = await tokenize_native(lines[i]);
109 |     if (current_lines_tokens + tokens.length <= tokens_left) {
110 |       current_lines_linenos.push(i);
111 | 
112 |       if (current_lines == '') {
113 |         current_lines += lines[i];
114 |       } else {
115 |         current_lines += '\n' + lines[i];
116 |       }
117 |       current_lines_tokens += tokens.length;
118 |     } else {
119 |       chunks.push([current_lines, current_lines_linenos]);
120 |       current_lines_tokens = 0;
121 |       current_lines = '';
122 |       current_lines_linenos = [];
123 |       // i --; // Rerun the line  TODO: fix somehow
124 |     }
125 |   }
126 | 
127 |   if (current_lines.length > 0) {
128 |     chunks.push([current_lines, [...current_lines_linenos, lines.length]]);
129 |   }
130 | 
131 |   console.log(`We are processing ${chunks.length} chunks`);
132 | 
133 |   /*   let template_token_length = await tokenize_native(template).then(
134 |      (r) => r.length
135 |    );
136 |  
137 |  */
138 | 
139 |   let allchunks = chunks.map(
140 |     async (chunk, i, a): Promise<[string, string, number[]]> => {
141 |       let p = promptTemplate(
142 |         template,
143 |         { chunk: chunk[0] },
144 |         minLength,
145 |         maxLength,
146 |         temperature
147 |       );
148 |       return [chunk[0], await p, chunk[1]];
149 |     }
150 |   );
151 | 
152 |   return Promise.all(allchunks);
153 | }
154 | 
155 | function splitUp<T>(arr: T[], size: number): T[][] {
156 |   var newArr = [];
157 |   for (var i = 0; i < arr.length; i += size) {
158 |     newArr.push(arr.slice(i, i + size));
159 |   }
160 |   return newArr;
161 | }
162 | 
163 | export async function promptReducer(
164 |   inPrompt: string,
165 |   variables: any,
166 |   preamble: string,
167 |   instructions: string
168 | ): Promise<string> {
169 |   let xprompt = mustache.render(inPrompt, variables);
170 |   return xprompt;
171 | 
172 |   //   let r = await promptTemplate(
173 |   // `{{{preamble}}}
174 |   // \`\`\`
175 |   // {{{inPrompt}}}
176 |   // \`\`\`
177 |   // {{{instructions}}}`, {inPrompt: xprompt, preamble: preamble, instructions: instructions}, 10, 2000, 1 );
178 | 
179 |   // return r;
180 | }
181 | 
182 | export async function translateQuerySpaceToAnswerSpace(
183 |   query: string
184 | ): Promise<string> {
185 |   let p = `Translate from "query space" to "answer space" while preserving the unknowns as variables.
186 |   
187 |   Query: How old is my dog?
188 |   Answer: My dog is <age> years old.
189 |   
190 |   Query: What's John's daughter's name?
191 |   Answer: John's daughter's name is <name>.
192 |   
193 |   Query: {{{query}}}
194 |   Answer:`;
195 | 
196 |   let result = await promptTemplate(p, { query: query }, 2, 50, 0.0);
197 |   return result;
198 | }
199 | 
200 | export async function keywordsFromQuery(query: string): Promise<string> {
201 |   let p = `We are going to extract full-text search queries for the following query:
202 |   
203 |   Query: {{{query}}} 
204 |   
205 |   Extract all of the relevant keywords, as well as any related keywords that you think might be useful. Separate each keyword with a comma:
206 |   `;
207 | 
208 |   let result = await promptTemplate(p, { query: query }, 2, 50, 0.0);
209 |   return result;
210 | }
211 | 
212 | export interface QandA {
213 |   question: string;
214 |   answer: string;
215 |   source_path: string;
216 |   source_line: number;
217 | }
218 | 
219 | export async function questionAndAnswer(
220 |   index: string,
221 |   query: string
222 | ): Promise<QandA> {
223 |   // Translate to answer space
224 |   //let answer = await translateQuerySpaceToAnswerSpace( query );
225 | 
226 |   // Search for the answer within the index from embeddingsFromTextSearch
227 |   let results = await embeddingsFromTextSearch< any >( // TODO: this should not be an any
228 |     index,
229 |     await keywordsFromQuery(query),
230 |     5
231 |   );
232 |   // Iterate through results, and use esquery to find and stitch together nearby lines
233 |   let context: string[] = [];
234 |   for (let r of results) {
235 |     let nearbylines = await es_context(index, r._source.path, r._source.line);
236 |     //let nearbylines = await es_query( `select * from ${index} limit 5` );
237 |     for (let n of <any[]>nearbylines) {
238 |       context.push(n.text);
239 |     }
240 |   }
241 | 
242 |   let prompt = `Here is a question:
243 |   
244 |   Question: {{{query}}}
245 |   
246 |   Potentially relevant semantic search results:
247 |   
248 |   {{{context}}}
249 |   
250 |   Output a JSON document which includes the following keys: [answer, answer_source, additional_answer_information, "answer_state (complete, partial, unknown)", "answer_confidence (1-100%)" ]
251 |   
252 |   { "answer": "`;
253 |   let result =
254 |     `{ "answer": "` +
255 |     (await promptTemplate(
256 |       prompt,
257 |       { query: query, context: context.join('\n') },
258 |       2,
259 |       250,
260 |       0.0
261 |     ));
262 |   try {
263 |     let sresult = JSON.parse(result);
264 |     return sresult;
265 |   } catch (e: any) {
266 |     let fixed = await fixJSON(result);
267 |     return JSON.parse(fixed);
268 |   }
269 | }
270 | 
271 | async function fixJSON(text: string): Promise<string> {
272 |   let result = await promptTemplate(
273 |     `Fix the following JSON:
274 |   
275 |   {{{text}}}`,
276 |     { text: text },
277 |     2,
278 |     1000,
279 |     0.0
280 |   );
281 | 
282 |   return result;
283 | }
284 | 
285 | async function testAnswerFromQuestion(
286 |   question: string,
287 |   answer: string
288 | ): Promise<boolean> {
289 |   let p = `Question: {{{question}}}
290 |   Answer: {{{answer}}}
291 |   
292 |   Is the answer correct? yes/no:`;
293 |   let result = await promptTemplate(
294 |     p,
295 |     { question: question, answer: answer },
296 |     2,
297 |     5,
298 |     0.0
299 |   );
300 |   return result.toLowerCase().indexOf('yes') >= 0;
301 | }
302 | 
303 | /**
304 |  *
305 |  * @param text Text to summarize which is potentially larger than the context-size of the LLM model
306 |  * @param primarySummarizeTemplate Template to use for the map step summary
307 |  * @param reduceSummarizeTemplate Teomplate to use for the reduce step summary
308 |  * @returns A summary of the text
309 |  */
310 | export async function mapreduce_summary(
311 |   text: string,
312 |   primarySummarizeTemplate: string = 'Analyze the following text for a detailed summary.\n\n{{{chunk}}}\n\nProvide a detailed summary:',
313 |   reduceSummarizeTemplate: string = 'These are a series of summaries that you are going to summarize:\n\n{{{chunk}}}\n\nProvide a detailed summary in the 3rd party passive voice, removing duplicate information:'
314 | ): Promise<string> {
315 |   if (text == null || text.length == 0) return '';
316 | 
317 |   let completions = await mapPromptTemplate(text, primarySummarizeTemplate);
318 |   return reducePromptTemplate(completions, reduceSummarizeTemplate);
319 | }
320 | 
321 | /**
322 |  * mapreduce_question_text
323 |  * @param text Text to be processed
324 |  * @param primaryQuestionTemplate Template for the primary question
325 |  * @param reduceQuestionTemplate Template for the reduce question
326 |  * @returns A promise that resolves to the final answer
327 |  */
328 | export async function mapreduce_question_text(
329 |   text: string,
330 |   question: string,
331 |   primarySummarizeTemplate: string = `Read the following text:\n\n{{{chunk}}}\n\nQuestion:\n\n${question}\n\nAnswer:`,
332 |   reduceSummarizeTemplate: string = `Question: ${question}\nPossible Answers:\n{{{chunk}}}\n\\n\nQuestion: ${question}\nBest Answer:`
333 | ): Promise<string> {
334 |   if (text == null || text.length == 0) return '';
335 | 
336 |   let completions = await mapPromptTemplate(text, primarySummarizeTemplate);
337 |   let possible_answers = completions.map(
338 |     ([c, p, l], i, a) => `Possible Answer: "${c}"\n`
339 |   );
340 |   return reducePromptTemplate(completions, reduceSummarizeTemplate);
341 | }
342 | 
343 | /**
344 |  * mapPromptTemplate
345 |  * @param text Input text to be processed
346 |  * @param primarySummarizeTemplate Prompt template run on each chunk of text
347 |  * @returns List of completions from running prompt primarySummarizeTemplate on each chunk of text
348 |  */
349 | export async function mapPromptTemplate(
350 |   text: string,
351 |   primarySummarizeTemplate: string = 'Analyze the following text for a detailed summary.\n\n{{{chunk}}}\n\nProvide a detailed summary:'
352 | ): Promise<string[]> {
353 |   logger(`We are splitting a piece of text ${text.length} characters long.`);
354 |   let summaries = await splitPromptTemplateByLinesOfTokens(
355 |     text,
356 |     primarySummarizeTemplate,
357 |     5,
358 |     1024,
359 |     0.2
360 |   );
361 |   console.log(summaries);
362 |   let completions = summaries.map(([c, p, l], i, a) => `${p}`);
363 |   console.log(completions);
364 |   return completions;
365 | }
366 | 
367 | /**
368 |  * reducePromptTemplate
369 |  *
370 |  * @param completions Array of completions, usually output from mapPromptTemplate
371 |  * @param reduceTemplate Prompt template run on completions to reduce them to a single summary
372 |  * @returns Final return value of the reduce prompt templates being run on completions from the map prompt templates.
373 |  */
374 | export async function reducePromptTemplate(
375 |   completions: string[],
376 |   reduceTemplate: string = 'These are a series of summaries that you are going to summarize:\n\n{{{chunk}}}\n\nProvide a detailed summary, but removing duplicate information:'
377 | ): Promise<string> {
378 |   if (completions.length == 1) {
379 |     return completions[0];
380 |   }
381 | 
382 |   if (completions.length == 0) {
383 |     return '<Nothing to summarize.>';
384 |   }
385 | 
386 |   let iterations = 0;
387 |   while (true) {
388 |     logger(`Iteration ${iterations++}. We have ${completions.length} left.`);
389 |     let summary = await splitPromptTemplateByLinesOfTokens(
390 |       completions.join('\n\n'),
391 |       reduceTemplate,
392 |       500,
393 |       1000,
394 |       0.2
395 |     );
396 |     let summary_completions = summary.map(([p, c, l]) => `${c}\n\n`);
397 |     if (summary_completions.length == 1) {
398 |       return summary_completions[0];
399 |     }
400 |     completions = summary_completions;
401 |   }
402 | }
403 | 


--------------------------------------------------------------------------------
/src/workflows/chatbot.ts:
--------------------------------------------------------------------------------
  1 | import { default as mustache } from 'mustache';
  2 | 
  3 | import * as prompt from './prompt';
  4 | import * as elastic from '../activities/elastic';
  5 | import * as util from '../activities/util';
  6 | import * as embeddings_search from '../activities/vector_search';
  7 | 
  8 | import { proxyActivities, uuid4 } from '@temporalio/workflow';
  9 | import { ChatSession, Personality } from '../clones';
 10 | import {
 11 |   embeddingsFromTextSearch,
 12 |   translateQuerySpaceToAnswerSpace
 13 | } from '../workflows';
 14 | import { storeEmbeddings } from './embeddings';
 15 | import { minGenerate } from './llm';
 16 | import { mapreduce_summary } from './prompt';
 17 | import { Cog, Cogset } from '../cogs/cog';
 18 | 
 19 | /*
 20 |  * Chatbots require prompt templates, elasticsearch queries and indexing, and the ability to execute local workflows.
 21 |  */
 22 | 
 23 | const { promptReducer, promptTemplate } = proxyActivities<typeof prompt>({
 24 |   startToCloseTimeout: '10 minute'
 25 | });
 26 | const {
 27 |   es_query,
 28 |   es_index,
 29 | } = proxyActivities<typeof elastic>({ startToCloseTimeout: '10 minute' });
 30 | const {
 31 |   md5sum,
 32 |   wf_axios,
 33 |   executeLocalWorkflow,
 34 | } = proxyActivities<typeof util>({ startToCloseTimeout: '10 minute' });
 35 | const { init_elasticsearch_mappings } = proxyActivities<typeof embeddings_search>({ startToCloseTimeout: '10 minute' });
 36 | 
 37 | 
 38 | let personality_chuck: Personality = {
 39 |   name: 'Chuck',
 40 |   personality: `Chuck is a cowboy from Arlington, Texas. He speaks with a giant Texas drawl. He's a really nice guy. He got his degree from ITT in database administration. When he talks, it's very clear that he's from Texas. Chuck is a great guy, very dilligent, and extremely helpful.`,
 41 |   instructions: []
 42 | };
 43 | 
 44 | let personaltiy_nlp: Personality = {
 45 |   name: 'Nilp',
 46 |   personality:
 47 |     'Nilp is a very helpful robot.',
 48 |   instructions: [
 49 |     'Use the context to try to get a better sense of what is being asked of you, but ignore it if it is not relevant.'
 50 |   ]
 51 | };
 52 | 
 53 | /**
 54 |  * Chat history interface
 55 |  */
 56 | interface ChatHistory {
 57 |   sender: string;
 58 |   message: string;
 59 | }
 60 | 
 61 | /**
 62 |  * @function Invocation of chatbot function to generate a response to a message
 63 |  * @param {number} context_length Number of previous messages to use as context
 64 |  * @param {string} user User to respond to
 65 |  * @param {string} message  Message to respond to
 66 |  * @param {ChatSession} session Chat session to use (default: new session)
 67 |  * @param {boolean} runCogs Whether to run cogs (default: true)
 68 |  * @example <caption> Example of a chatbot invocation </caption>
 69 |  * const response = await Chatbot( {name: "Gandalf", personaltity: "Wizard. Good, but unpredictable. Extremely powerful and wise."}, 50, 'user555555', 'Hello, there!' );
 70 |  * @returns {Promise<string>}
 71 |  */
 72 | export async function Chatbot(
 73 |   personality: Personality,
 74 |   context_length: number,
 75 |   user: string,
 76 |   message: string,
 77 |   session: ChatSession = {
 78 |     personality: personality,
 79 |     user: user,
 80 |     messages: [],
 81 |     uuid: uuid4()
 82 |   },
 83 |   runCogs: boolean = true
 84 | ): Promise<string> {
 85 |   let original_message = message;
 86 | 
 87 |   let prompt_leading: string = '';
 88 |   if (message.indexOf('|') != -1) {
 89 |     let parts = message.split('|', 2);
 90 |     message = parts[0].trim();
 91 |     prompt_leading = parts[1].trim();
 92 |   }
 93 | 
 94 |   let ch = new Array<ChatHistory>();
 95 | 
 96 |   // Add in the last few messages from the user
 97 |   try {
 98 |     let rows = await es_query(
 99 |       `SELECT user, text FROM messages ORDER BY ts DESC LIMIT ${context_length}`
100 |     );
101 |     rows.reverse();
102 |     //let rows = await es_search( 'messages', { sort: [{ ts: { order: 'desc' } }], size: context_length } );
103 |     //console.log( JSON.stringify(slack_history) );
104 |     rows.map((r: any) => {
105 |       if (r.user && r.text && r.text.length < 300)
106 |         ch.push({ sender: r.user, message: r.text });
107 |     });
108 |   } catch {}
109 | 
110 |   let extra_ch = new Array<ChatHistory>();
111 | 
112 |   let answermap: Map<string, number> = new Map();
113 | 
114 |   // Now we're going to add in a few messages from a semantic search
115 |   try {
116 |     console.log(
117 |       `We're searching for messages that are similar to "${message}"`
118 |     );
119 |     // Invert the query into answer space
120 |     let answer = await translateQuerySpaceToAnswerSpace(message);
121 |     let srows = await embeddingsFromTextSearch('messages', answer, 100);
122 |     // Resort srows by 'ts' property in ascending order
123 |     srows.sort((a: any, b: any) => {
124 |       return a.ts - b.ts;
125 |     });
126 | 
127 |     srows.map((r1: any) => {
128 |       let r = r1._source;
129 |       if (
130 |         r.user &&
131 |         r.text &&
132 |         !answermap.has(`${r.user}: ${r.text}`) &&
133 |         extra_ch.length < 20 &&
134 |         r.text.length < 300
135 |       ) {
136 |         extra_ch.push({ sender: r.user, message: r.text });
137 |         answermap.set(`${r.user}: ${r.text}`, 1);
138 |       }
139 |     });
140 |     console.log(
141 |       `We got ${extra_ch.length} messages from the semantic search which we wittled down from ${srows.length} messages`
142 |     );
143 |   } catch {}
144 | 
145 |   try {
146 |     session.messages.map((r: any) => {
147 |       if (
148 |         r.user &&
149 |         r.text &&
150 |         !answermap.has(`${r.user}: ${r.text}`) &&
151 |         extra_ch.length < 20
152 |       )
153 |         ch.push({ sender: r.user, message: r.text });
154 |       answermap.set(`${r.user}: ${r.text}`, 1);
155 |     });
156 |   } catch {}
157 | 
158 |   // Last step: we're going to do a semantic search of potential teachings
159 |   let fewshots = new Array<any>();
160 |   try {
161 |     let srows = await embeddingsFromTextSearch('teachings', message, 100);
162 |     srows.map((r1: any) => {
163 |       let r = r1._source;
164 |       if (fewshots.length < 5) {
165 |         fewshots.push({
166 |           user: r.user,
167 |           prompt: r.prompt,
168 |           completion: r.completion
169 |         });
170 |       }
171 |     });
172 |     console.log(
173 |       `We got ${fewshots.length} messages from the teachings search which we wittled down from ${srows.length} messages`
174 |     );
175 |   } catch (e: any) {
176 |     console.log(`Error doing the teachings search: ${e.toString()}`);
177 |   }
178 | 
179 |   let fewshots_a = fewshots.map((r: any) => {
180 |     // Use Mustache to render the prompt component
181 |     let prompt = `{{{user}}}: {{{prompt}}}
182 | {{{name}}}: {{{completion}}}\n`;
183 | 
184 |     let completion = r.completion;
185 |     let user = r.user;
186 |     let rendered_prompt = mustache.render(prompt, {
187 |       user: r.user,
188 |       prompt: r.prompt,
189 |       completion: r.completion,
190 |       name: personality.name
191 |     });
192 |     return rendered_prompt;
193 |   });
194 |   let fewshots_joined = fewshots_a.join('2e72b3\n');
195 | 
196 |   let r = minGenerate(
197 |     await promptReducer(
198 |       `{{{personality}}}
199 | 
200 | Today's date is {{date}}.
201 | 
202 | Recent chat history:
203 | {{#context}}
204 | {{{sender}}}: {{{message}}}
205 | {{/context}}
206 | 
207 | Extra contextual chat history:
208 | {{#extra_context}}
209 | {{{sender}}}: {{{message}}}
210 | {{/extra_context}}
211 | 
212 | Instructions:
213 | {{#instructions}}
214 | {{{.}}}
215 | {{/instructions}}
216 | 
217 | {{{fewshots}}}
218 | {{{user}}}: {{{message}}}
219 | {{{name}}}:{{{prompt_leading}}}`,
220 |       {
221 |         user: user,
222 |         date: new Date().toISOString(),
223 |         message: message,
224 |         personality: personality.personality,
225 |         context: ch,
226 |         extra_context: extra_ch,
227 |         name: personality.name,
228 |         instructions: personality.instructions,
229 |         prompt_leading: prompt_leading,
230 |         fewshots: fewshots_joined
231 |       },
232 |       'The following is a chatbot request:',
233 |       'Remove ALL unnecessary information pertaining to the request:'
234 |     ),
235 | 
236 |     10,
237 |     250,
238 |     1
239 |   );
240 |   let chat_response = prompt_leading + (await r);
241 |   // Add this in the teachings index if there's prompt leading
242 |   if (prompt_leading.length > 0) {
243 |     await storeEmbeddings([message], 'teachings', [
244 |       {
245 |         user: user,
246 |         text: original_message,
247 |         prompt: message,
248 |         completion: chat_response,
249 |         prompt_leading: prompt_leading
250 |       }
251 |     ]);
252 |   }
253 | 
254 |   /* Add to chat history */
255 |   session.messages.push({
256 |     user: user,
257 |     text: original_message,
258 |     response: chat_response,
259 |     context: '',
260 |     timestamp: new Date(),
261 |     uuid: uuid4()
262 |   });
263 | 
264 |   if (runCogs) return post_message_filtering(session);
265 |   return chat_response;
266 | 
267 | }
268 | 
269 | /**
270 |  * Invocation of Chatbot with the personality Chuck
271 |  * @param user The user
272 |  * @param message The message
273 |  * @example chatbot('anon55', 'Hello, how are you?')
274 |  * @returns The response
275 |  */
276 | export async function ChatbotChuck(
277 |   user: string,
278 |   message: string
279 | ): Promise<string> {
280 |   return await Chatbot(personality_chuck, 5, user, message);
281 | }
282 | 
283 | /**
284 |  * Invocation of Chatbot with the personality Nilp
285 |  * @param user The user
286 |  * @param message The message
287 |  * @example chatbot('anon55', 'Hello, how are you?')
288 |  * @returns The response
289 |  */
290 |  export async function ChatbotNilp(
291 |   user: string,
292 |   message: string
293 | ): Promise<string> {
294 |   return await Chatbot(personaltiy_nlp, 5, user, message);
295 | }
296 | 
297 | type decode_function<IN> = (input: IN) => Promise<string>;
298 | type encode_function<OUT> = (output: string) => Promise<OUT>;
299 | 
300 | 
301 | let cog_axios = new Cog(
302 |   'axios',
303 |   [],
304 |   async (session: ChatSession, args: any, message_sans_args: string) => {
305 |     let url = args['url'];
306 |     if (url.indexOf('<') == 0) {
307 |       url = url.substring(1);
308 |       // And also remove the trailing >
309 |       url = url.substring(0, url.length - 1);
310 |     }
311 |     let method = args['method'] || 'get';
312 |     let data = args['data'] || {};
313 |     let headers = args['headers'] || {};
314 |     let response = await wf_axios(url, method, data, headers);
315 |     return response;
316 |   }
317 | );
318 | 
319 | let cog_replicate_sd = new Cog(
320 |   'replicate_sd',
321 |   [],
322 |   async (
323 |     session: ChatSession,
324 |     args: any,
325 |     message_sans_args: string
326 |   ): Promise<string> => {
327 |     let detailedPrompt = await promptTemplate(
328 |       `{{user}} has asked you for a beautiful piece of art with the following instructions:
329 |   
330 |   {{{instructions}}}
331 |   
332 |   You are an art expert, and you are going to combine the request with your knowledge of artists and art styles to describe in great detail what this piece of art should look like, including as much detail as possible:`,
333 | 
334 |       { instructions: args['prompt'], user: session.user },
335 |       20,
336 |       500,
337 |       1.0
338 |     );
339 | 
340 |     let results = <any>(
341 |       await executeLocalWorkflow(
342 |         'StableDiffusionWorkflow',
343 |         'GO_TASK_QUEUE',
344 |         [{ Prompt: detailedPrompt }]
345 |       )
346 |     );
347 |     results['in_prompt'] = args['prompt'];
348 |     results['out_prompt'] = detailedPrompt;
349 |     return JSON.stringify(results);
350 |   }
351 | );
352 | 
353 | let plan_chainprompt_cog = new Cog(
354 |   'plan_chainprompt',
355 |   [],
356 |   async (
357 |     session: ChatSession,
358 |     args: any,
359 |     message_sans_args: string
360 |   ): Promise<string> => {
361 |     let plan = await Chatbot(
362 |       session.personality,
363 |       5,
364 |       session.personality.name,
365 |       `OK, so, ${session.user} wants me to "${args['plan']}".  I'm going to need to plan this one out into multiple steps. Let's develop a plan, step by step, and output a JSON array with the steps. | ["Step 1:`,
366 |       session,
367 |       false,
368 |     );
369 |     return plan;
370 |   }
371 | );
372 | 
373 | let mapreduce_summarize_cog = new Cog(
374 |   'mapreduce_summarize',
375 |   [],
376 |   async (session: ChatSession, args: any, message_sans_args: string) =>
377 |     await mapreduce_summary(args['text'])
378 | );
379 | let noop_cog = new Cog(
380 |   'noop',
381 |   [],
382 |   async (session: ChatSession, args: any, message_sans_args: string) => ''
383 | );
384 | 
385 | let cogs = new Cogset([
386 |   cog_axios,
387 |   cog_replicate_sd,
388 |   plan_chainprompt_cog,
389 |   mapreduce_summarize_cog,
390 |   mapreduce_summarize_cog,
391 |   noop_cog
392 | ]);
393 | 
394 | export async function IndexCog(cog: Cog) {
395 |   for (let t of cog.getTeachings()) {
396 |     // Calculate md5sum of a concatenation of prompt and completion
397 |     let sum = await md5sum(t.prompt + t.completion);
398 | 
399 |     await es_index('teachings', {
400 |       text: t.text,
401 |       user: t.user,
402 |       prompt: t.prompt,
403 |       completion: t.completion,
404 |       prompt_leading: t.prompt_leading,
405 |       md5sum: sum
406 |     });
407 |   }
408 | }
409 | 
410 | async function IndexCogset(cogset: Cogset) {
411 |   for (let cog of cogset.getCogs()) {
412 |     await IndexCog(cog);
413 |   }
414 | }
415 | 
416 | export async function DumpAndRestoreCogsetTeachings() {
417 |   await init_elasticsearch_mappings();
418 |   await IndexCogset(cogs);
419 | }
420 | 
421 | export async function post_message_filtering(
422 |   session: ChatSession
423 | ): Promise<string> {
424 |   // If the last message has !x:{} in it, we are going to grab the last message and see if we can match x to a filter event handler
425 |   let last_message = session.messages[session.messages.length - 1];
426 | 
427 |   // !kali_sh:{"command":"nmap 127.0.0.1"}
428 |   let matches = last_message.response.match(/!([a-zA-Z0-9_]+):{(.*)}/);
429 |   if (matches) {
430 |     console.log(`We have a filter match: ${matches[1]}`);
431 |     let filter = matches[1];
432 |     let filter_args = matches[2];
433 |     console.log(`Parsing filter args: ${filter_args}`);
434 |     let filter_args_json = JSON.parse(`{${filter_args}}`);
435 |     let filter_handler = cogs.getCog(filter);
436 |     if (filter_handler) {
437 |       let message_sans_args = last_message.response.replace(matches[0], '');
438 |       let filtered_message = await filter_handler.handle(
439 |         session,
440 |         filter_args_json,
441 |         message_sans_args
442 |       );
443 |       return filtered_message;
444 |     }
445 |   }
446 | 
447 |   console.log('No filter matching.');
448 |   return last_message.response;
449 | }
450 | 


--------------------------------------------------------------------------------
/src/activities/application/ijavascript.ts:
--------------------------------------------------------------------------------
  1 | import { executeWorkflow, executeLocalWorkflow } from '../util';
  2 | import {
  3 |   generateText,
  4 |   generateTextOpenAI,
  5 |   retryGenerateTextOpenAI
  6 | } from '../llm';
  7 | import axios from 'axios';
  8 | import { spawn } from 'child_process';
  9 | import fs from 'fs';
 10 | 
 11 | import { TEMPORAL_HOST } from '../../config';
 12 | import { OPENAI_TOKEN } from '../../config';
 13 | 
 14 | export function searchLoadedPackages(): [string, string][] {
 15 |   return [
 16 |     ['x.http', 'perform HTTP requests'],
 17 |     //  ["x.mysql",     "MySQL client"],
 18 |     ['x.google', 'Google APIs'],
 19 |     ['x.nlp', 'Natural Language Processing']
 20 |     // ["x.illustrations", "Illustrations"],
 21 |   ];
 22 | }
 23 | 
 24 | export function inspectLoadedPackage(name: string): string[] {
 25 |   let package_methods = new Map<string, string[]>();
 26 |   package_methods.set('x.google', [
 27 |     'async function search(query:string): Promise< {rank: number, url: string, title: string, description: string}[] >'
 28 |   ]);
 29 |   package_methods.set('x.http', [
 30 |     'async function x.http.get(url: string): Promise< {status: number, statusText: string, content: any} >',
 31 |     'async function x.http.post(url: string, data: any): Promise< string >',
 32 |     'async function x.http.put(url: string, data: any): Promise< string >',
 33 |     'async function x.http.delete(url: string): Promise< string >'
 34 |   ]);
 35 |   /* package_methods.set( "mysql",
 36 |     [
 37 |       "async function query(query: string): Promise< any >",
 38 |     ]);*/
 39 | 
 40 |   package_methods.set('x.nlp', [
 41 |     'async function x.nlp.summarize(text: string): Promise< string >',
 42 |     'async function x.nlp.question_answer(text: string, question: string): Promise< string >',
 43 |     'async function x.nlp.ai_instruction_on_text( text: string, instructions: string ): Promise< string >'
 44 |   ]);
 45 | 
 46 |   package_methods.set('x.illustrations', [
 47 |     'async function x.illustrations.get_illustration( description: string ): Promise< string >'
 48 |   ]);
 49 | 
 50 |   return package_methods.get(name) || [];
 51 | }
 52 | 
 53 | import repl from 'node:repl';
 54 | import net from 'node:net';
 55 | import Semaphore from 'semaphore-async-await';
 56 | 
 57 | net
 58 |   .createServer((socket: any) => {
 59 |     repl
 60 |       .start({
 61 |         prompt: '28dc9e0c ',
 62 |         input: socket,
 63 |         output: socket,
 64 | 
 65 |         writer: (output: any) => {
 66 |           return JSON.stringify(output);
 67 |         }
 68 |       })
 69 |       .on('exit', () => {
 70 |         socket.end();
 71 |       });
 72 |   })
 73 |   .listen(5002);
 74 | 
 75 | /* Class which wraps net.connect, sockets with promises */
 76 | class Socket {
 77 |   private socket: net.Socket;
 78 |   private lock: Semaphore;
 79 | 
 80 |   constructor(port: number, host: string) {
 81 |     this.socket = net.connect(port, host);
 82 |     this.lock = new Semaphore(1);
 83 |   }
 84 | 
 85 |   public async write(data: string): Promise<void> {
 86 |     return new Promise((resolve, reject) => {
 87 |       this.socket.write(data, () => {
 88 |         resolve();
 89 |       });
 90 |     });
 91 |   }
 92 | 
 93 |   public async read(): Promise<string> {
 94 |     return new Promise((resolve, reject) => {
 95 |       this.socket.once('data', (data: any) => {
 96 |         resolve(data.toString());
 97 |       });
 98 |     });
 99 |   }
100 | 
101 |   public async close(): Promise<void> {
102 |     return new Promise((resolve, reject) => {
103 |       this.socket.end(() => {
104 |         resolve();
105 |       });
106 |     });
107 |   }
108 | 
109 |   public async writeAndRead(data: string): Promise<string> {
110 |     console.log(`Acquiring lock for command ${data}`);
111 |     await this.lock.acquire();
112 |     console.log(`Lock acquired for command ${data}`);
113 |     let retval = new Promise<string>((resolve, reject) => {
114 |       this.socket.once('data', (data) => resolve(data.toString()));
115 |     });
116 | 
117 |     await this.write(data);
118 |     console.log(`Wrote command ${data}`);
119 |     let response = await retval;
120 |     console.log(`Read response ${response}`);
121 |     this.lock.release();
122 |     console.log(`Released lock for command ${data}`);
123 | 
124 |     /* .break() is a special command which causes the REPL to exit. We're gonna use this if we find this at the beginning: "... ..."
125 |      */
126 |     if (response.startsWith('... ...')) {
127 |       console.log(`Breaking REPL`);
128 |       await this.writeAndRead('.break');
129 |       return 'ERROR: Incomplete javascript statement sent. Cancelled.';
130 |     }
131 | 
132 |     return response;
133 |   }
134 | }
135 | 
136 | export const google = {
137 |   search: async function (query: string): Promise<string[]> {
138 |     console.log(`Searching for ${query}`);
139 |     let results = await executeWorkflow(
140 |       TEMPORAL_HOST,
141 |       'GoogleSearchWorkflow',
142 |       'GO_TASK_QUEUE',
143 |       [query]
144 |     );
145 |     console.log(`Results: ${results}`);
146 |     return <any>results;
147 |   }
148 | };
149 | 
150 | export const http = {
151 |   get: async function (
152 |     url: string
153 |   ): Promise<{ status: number; statusText: string; content: any }> {
154 |     console.log(`http.get: ${url}`);
155 |     try {
156 |       let r = await axios.get(url);
157 |       if (r.status != 200) {
158 |         return { status: r.status, statusText: r.statusText, content: null };
159 |       }
160 |       console.log(`http.get: ${url}  Status: ${r.status}`);
161 |       // Is this JSON? We probably just want to return it, and worry about summarizing it later.
162 |       if (r.headers['content-type'].startsWith('application/json')) {
163 |         // Is r.data a string? If so, parse it.
164 |         if (typeof r.data == 'string') r.data = JSON.parse(r.data);
165 |         return { status: r.status, statusText: r.statusText, content: r.data };
166 |       }
167 |       // Is this HTML? We probably want to summarize it.
168 |       if (
169 |         r.data.length >= 1024 &&
170 |         r.headers['content-type'].startsWith('text/html')
171 |       ) {
172 |         // Call out to pandoc to convert the HTML to plain
173 |         let pandoc = spawn('/usr/bin/pandoc', ['-f', 'html', '-t', 'plain'], {
174 |           stdio: ['pipe', 'pipe', 'pipe']
175 |         });
176 |         pandoc.stdin.write(r.data);
177 |         pandoc.stdin.end();
178 |         let text = await new Promise((resolve, reject) => {
179 |           pandoc.stdout.on('data', async (data: any) => {
180 |             let text = data.toString();
181 |             console.log(`Pandoc Text length: ${text.length}`);
182 |             resolve(text);
183 |           });
184 |         });
185 |         return { status: r.status, statusText: r.statusText, content: text };
186 |       }
187 |       return { status: r.status, statusText: r.statusText, content: r.data };
188 |     } catch (e: any) {
189 |       console.log(`http.get: ${url}  Error: ${e}`);
190 |       return { status: 500, statusText: e, content: null };
191 |     }
192 |   }
193 | };
194 | 
195 | export const nlp = {
196 |   summarize: async function (text: string): Promise<string> {
197 |     if (text == null || text.length == 0 || text == undefined) {
198 |       return 'ERROR: <Text is null or empty.>';
199 |     }
200 |     // if type of the text is actually an object, we should also return an error
201 |     if (typeof text == 'object') {
202 |       return 'ERROR: <Text is an object.>';
203 |     }
204 |     console.log(`Summarizing: ${text}`);
205 |     let r = await executeLocalWorkflow('mapreduce_summary', 'hello-world', [
206 |       text
207 |     ]);
208 |     console.log(`Result: ${r}`);
209 |     return <string>r;
210 |   },
211 |   question_answer: async function (
212 |     text: string,
213 |     question: string
214 |   ): Promise<string> {
215 |     console.log(`Summarizing: ${text}`);
216 |     let r = await executeLocalWorkflow(
217 |       'mapreduce_question_text',
218 |       'hello-world',
219 |       [text, question]
220 |     );
221 |     console.log(`Result: ${r}`);
222 |     return <string>r;
223 |   },
224 |   ai_instruction_on_text: async function (
225 |     text: string,
226 |     instructions: string
227 |   ): Promise<string> {
228 |     console.log(`ai_instruction: ${text}`);
229 |     let r = <any>(
230 |       await executeLocalWorkflow('mapPromptTemplate', 'hello-world', [
231 |         text,
232 |         `{{{chunk}}}\n${instructions}:\n`
233 |       ])
234 |     );
235 |     let r2 = r.join('\n');
236 |     console.log(`Result: ${r2}`);
237 |     return <string>r2;
238 |   }
239 | };
240 | 
241 | export const illustrations = {
242 |   get_illustration: async function (description: string): Promise<string> {
243 |     console.log(`Getting illustration for ${description}`);
244 |     let r = await executeWorkflow(
245 |       TEMPORAL_HOST,
246 |       'StableDiffusionWorkflow',
247 |       'GO_TASK_QUEUE',
248 |       [{ Prompt: description }]
249 |     );
250 |     console.log(`Result: ${r}`);
251 |     return <string>r;
252 |   }
253 | };
254 | 
255 | /**
256 |  * Execute javascript code in a sandbox with prompt chaining in notebook format
257 |  * @param p
258 |  * @param apiKey
259 |  */
260 | export async function executeJavascriptNotebook(
261 |   p: string
262 | ): Promise<string> {
263 |   console.log(`Executing Javascript Notebook: ${p}`);
264 | 
265 |   let notebook = fs.readFileSync('./src/prompts/notebook-template.txt', 'utf8');
266 |   notebook += p;
267 |   // Template includes a variable called {{{todaysdate}}} which we want to replace with something like October 16, 2022
268 |   notebook = notebook.replace(
269 |     '{{{todaysdate}}}',
270 |     new Date().toLocaleDateString('en-US', {
271 |       month: 'long',
272 |       day: 'numeric',
273 |       year: 'numeric'
274 |     })
275 |   );
276 |   // if p does not have a newline at the end, we're going to add it to notebook
277 |   if (p[p.length - 1] != '\n') {
278 |     notebook += '\n';
279 |   }
280 | 
281 |   // Execute OpenAI API call using code model
282 |   let r = await retryGenerateTextOpenAI(
283 |     notebook,
284 |     10,
285 |     1024,
286 |     0.0,
287 |     0.99,
288 |     'code-davinci-002',
289 |     ['Question:', 'Out[', 'Out [']
290 |   );
291 |   let orig_completion = r;
292 |   if (orig_completion.indexOf('Answer:') != -1) {
293 |     // Yes, so we need to execute the answer
294 |     let answer = orig_completion.substring(orig_completion.indexOf('Answer:'));
295 |     return answer;
296 |   }
297 | 
298 |   // This will probably return a string that looks like this:
299 |   // "IJavascript session:\n```\nIn [1]: searchLoadedPackages()\n"
300 |   // Chop off the "IJavascript session:\n```\nIn [1]: " part so we are left with nothing but the javascript part
301 |   let js = r.substring(r.indexOf('In [1') + 8);
302 | 
303 |   // Now we will execute the javascript using nodejs vm module:
304 |   try {
305 |     // Connect a socket to localhost:5001
306 |     let socket = new Socket(5001, 'localhost');
307 |     // First, we import the packages we need
308 |     await socket.writeAndRead("const x = require('./src/activities')\n");
309 |     await socket.writeAndRead('console.log = function(x) { return x }\n');
310 | 
311 |     return await executeJavascriptSnippetLoop(notebook, socket, 1, p);
312 | 
313 |     /*
314 |     This is all bad.
315 | 
316 |     // Write the javascript to the socket
317 |     let ret = await socket.writeAndRead( js );
318 |     // Remove anything after 28dc9e0c in the string ret
319 |     ret = ret.substring( 0, ret.indexOf( "28dc9e0c" ) );
320 | 
321 |     console.log( `Javascript returned: ${ret}` );
322 |     let retraw = parse( ret );
323 | 
324 |      // Append the output to the notebook
325 |     notebook += orig_completion;
326 |     notebook += "Out [1] " + ret;
327 |     notebook += '\n'; 
328 |     return await executeJavascriptSnippetLoop( notebook, apiKey, socket, 2 );
329 |     */
330 |   } catch (e: any) {
331 |     return js + '\n\nError: ' + e;
332 |   }
333 | }
334 | 
335 | async function executeJavascriptSnippetLoop(
336 |   n: string,
337 |   socket: Socket,
338 |   counter: number,
339 |   original_question: string
340 | ): Promise<string> {
341 |   console.log(`Executing Javascript Snippet count ${counter}`);
342 | 
343 |   if (counter > 100) {
344 |     return `>> ERRROR.  Too many loops (${counter}).  Aborting.`;
345 |   }
346 | 
347 |   // Print the last 3 lines of n
348 |   let lines = n.split(/\r?\n/);
349 |   let last3 = lines.slice(Math.max(lines.length - 3, 0)).join('\n');
350 |   console.log(last3);
351 | 
352 |   let next_in_stop = `In [${counter + 1}]`;
353 |   let next_out_stop = `Out[`;
354 |   let error_in_stop = `Error [${counter + 1}]`;
355 | 
356 |   let r = await retryGenerateTextOpenAI(
357 |     n,
358 |     10,
359 |     512,
360 |     0.0,
361 |     1,
362 |     'text-davinci-002',
363 |     ['Question:', 'Out [', error_in_stop, next_in_stop]
364 |   );
365 |   let origr = r;
366 |   console.log(origr);
367 | 
368 |   if (origr.indexOf(next_out_stop) != -1) {
369 |     // Delete this value and everything else after it
370 |     r = r.substring(0, r.indexOf(next_out_stop));
371 |     origr = r;
372 |   }
373 | 
374 |   if (origr.indexOf('Answer:') != -1) {
375 |     console.log('Answer found');
376 |     try {
377 |       console.log('Going to wait for the last value to be returned');
378 |       let lastret = await socket.writeAndRead('JSON.stringify(_)\n');
379 |       let answer = origr + '\n' + lastret;
380 |       console.log(answer);
381 | 
382 |       console.log('Ask it to summarize the whole thing.');
383 |       let summary_prompt_append =
384 |         "\n\nTake everything you've seen so far, and summarize it in a way that may be useful later:";
385 |       let r2 = await retryGenerateTextOpenAI(
386 |         n + summary_prompt_append,
387 |         10,
388 |         512,
389 |         0.0,
390 |         1,
391 |         'text-davinci-002'
392 |       );
393 |       console.log(r2);
394 |       return answer + '\n\n' + r2;
395 | 
396 |       // console.log("Good bye!");
397 |       // return( answer );
398 |     } catch (e: any) {
399 |       console.log(
400 |         `Strange. We got an answer but we couldn't get the last return value: ${e}`
401 |       );
402 |       return origr;
403 |     }
404 |   }
405 | 
406 |   let js = r.substring(r.indexOf(`In [${counter}`) + 8);
407 |   // Trim off any leading newlines
408 |   js = js.replace(/^\s+/, '');
409 |   // Trim off any trailing newlines
410 |   js = js.replace(/\s+$/, '');
411 |   js = js.trim();
412 |   js = js + '\n';
413 |   // Remove all of the ...:
414 |   js = js.replace(/\.\.\.\:\s+/g, '');
415 | 
416 |   try {
417 |     console.log(`Javascript: "${js}"`);
418 |     let processedOut = await socket.writeAndRead(js);
419 |     console.log(`Raw ret: <<<${processedOut}>>>\n\n=======\n`);
420 |     processedOut = processedOut.substring(0, processedOut.indexOf('28dc9e0c'));
421 |     if (processedOut.indexOf('Uncaught') != -1) {
422 |       console.log(`Uncaught error found.  Trying to figure out what it is.`);
423 |       n += `In [${counter}] ${js}\n`;
424 |       let errmsg = await socket.writeAndRead(`_error.message\n`);
425 |       console.log(`** REAL Error message: ${errmsg}`);
426 |       n += `Error [${counter}]: ${errmsg}\n`;
427 |       return await executeJavascriptSnippetLoop(
428 |         n,
429 |         socket,
430 |         counter + 1,
431 |         original_question
432 |       );
433 |     }
434 | 
435 |     // If output is longer than 80 characters, truncate, and add dots
436 |     if (processedOut.length > 512) {
437 |       console.log('Truncating.');
438 |       processedOut =
439 |         processedOut.substring(0, 512) +
440 |         `... Output truncated. Full output is in the 'return_${counter}' variable.`;
441 |       console.log('Truncated and set lastreturn.');
442 |     }
443 | 
444 |     // Append the output to the notebook
445 |     let varname = `return_${counter}`;
446 |     console.log(`We are writing to variable ${varname}`);
447 |     await socket.writeAndRead(`var ${varname} = _;\n`);
448 |     n += origr;
449 |     n += `Out [${counter}] ${varname} = ` + processedOut;
450 |     n += '\n';
451 |     // n += "/* This is where we will";
452 |     if (counter % 3 == 0) {
453 |       n += `/* Remembering our original goal: ${original_question} */\n`;
454 |     }
455 |     console.log(`Out [${counter}] ${varname} = ` + processedOut);
456 | 
457 |     if (js.startsWith('console.log')) {
458 |       console.log('console.log detected. Not going to execute the next line.');
459 |       return processedOut;
460 |     }
461 | 
462 |     return executeJavascriptSnippetLoop(
463 |       n,
464 |       socket,
465 |       counter + 1,
466 |       original_question
467 |     );
468 |   } catch (e: any) {
469 |     n += `Error [${counter}] ` + e;
470 |     console.log(`Error: ${e} while trying to execute ${js}`);
471 |     //return executeJavascriptSnippetLoop( n, apiKey, context, counter ++ );
472 |     return `Error: ${e} while trying to execute ${js}\n`;
473 |   }
474 | }
475 | 


--------------------------------------------------------------------------------