├── .env.example
├── .gitignore
├── .prettierignore
├── .replit
├── LICENSE
├── README.md
├── agents.py
├── attached_assets
└── Pasted-Note-This-guide-is-designed-to-complement-our-Guardrails-Cookbook-by-providing-a-more-focused-look--1735843679510.txt
├── generated-icon.png
├── image
└── structure.jpg
├── index.html
├── index.js
├── main.py
├── node
├── package-lock.json
├── package.json
├── public
└── images
│ └── CompanyLogo.png
├── replit.nix
├── requirements.txt
├── src
├── App.jsx
├── components
│ ├── CompanyLogo.jsx
│ ├── DealTerms.jsx
│ ├── EmailInput.jsx
│ ├── ErrorMessage.jsx
│ ├── FileUpload.jsx
│ ├── FounderInfo.jsx
│ ├── LoadingIndicator.jsx
│ ├── MemorandumDisplay.jsx
│ └── UrlInput.jsx
├── index.css
└── index.jsx
├── tsconfig.json
├── uploads
├── 62c14dd0eaac44c24bffa5e510a046d4
└── 67f24ac9600de1c096628bf9cfadb8b8
└── vite.config.js
/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=your-openai-api-key
2 | EXA_API_KEY==your-exa-api-key
3 | PROXYCURL_API_KEY=your-proxycurl-api-key
4 | GOOGLE_APPLICATION_CREDENTIALS=./path-to-your-google-cloud-credentials.json
5 | PORTKEY_API_KEY=your-portkey-api-key
6 | PORT=3002
7 | GOOGLE_CLOUD_PROJECT_ID=flybridgememo-generator
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | lerna-debug.log*
8 | .pnpm-debug.log*
9 |
10 | # Diagnostic reports (https://nodejs.org/api/report.html)
11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
12 |
13 | # Runtime data
14 | pids
15 | *.pid
16 | *.seed
17 | *.pid.lock
18 |
19 | # Directory for instrumented libs generated by jscoverage/JSCover
20 | lib-cov
21 |
22 | # Coverage directory used by tools like istanbul
23 | coverage
24 | *.lcov
25 |
26 | # nyc test coverage
27 | .nyc_output
28 |
29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
30 | .grunt
31 |
32 | # Bower dependency directory (https://bower.io/)
33 | bower_components
34 |
35 | # node-waf configuration
36 | .lock-wscript
37 |
38 | # Compiled binary addons (https://nodejs.org/api/addons.html)
39 | build/Release
40 |
41 | # Dependency directories
42 | node_modules/
43 | jspm_packages/
44 |
45 | # Snowpack dependency directory (https://snowpack.dev/)
46 | web_modules/
47 |
48 | # TypeScript cache
49 | *.tsbuildinfo
50 |
51 | # Optional npm cache directory
52 | .npm
53 |
54 | # Optional eslint cache
55 | .eslintcache
56 |
57 | # Optional stylelint cache
58 | .stylelintcache
59 |
60 | # Microbundle cache
61 | .rpt2_cache/
62 | .rts2_cache_cjs/
63 | .rts2_cache_es/
64 | .rts2_cache_umd/
65 |
66 | # Optional REPL history
67 | .node_repl_history
68 |
69 | # Output of 'npm pack'
70 | *.tgz
71 |
72 | # Yarn Integrity file
73 | .yarn-integrity
74 |
75 | # dotenv environment variable files
76 | .env
77 | .env.development.local
78 | .env.test.local
79 | .env.production.local
80 | .env.local
81 |
82 | # Credentials and secrets
83 | /cloud-credentials.json
84 |
85 | # parcel-bundler cache (https://parceljs.org/)
86 | .cache
87 | .parcel-cache
88 |
89 | # Next.js build output
90 | .next
91 | out
92 |
93 | # Nuxt.js build / generate output
94 | .nuxt
95 | dist
96 |
97 | # Gatsby files
98 | .cache/
99 | # Comment in the public line in if your project uses Gatsby and not Next.js
100 | # https://nextjs.org/blog/next-9-1#public-directory-support
101 | # public
102 |
103 | # vuepress build output
104 | .vuepress/dist
105 |
106 | # vuepress v2.x temp and cache directory
107 | .temp
108 | .cache
109 |
110 | # Docusaurus cache and generated files
111 | .docusaurus
112 |
113 | # Serverless directories
114 | .serverless/
115 |
116 | # FuseBox cache
117 | .fusebox/
118 |
119 | # DynamoDB Local files
120 | .dynamodb/
121 |
122 | # TernJS port file
123 | .tern-port
124 |
125 | # Stores VSCode versions used for testing VSCode extensions
126 | .vscode-test
127 |
128 | # yarn v2
129 | .yarn/cache
130 | .yarn/unplugged
131 | .yarn/build-state.yml
132 | .yarn/install-state.gz
133 | .pnp.*
--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | # Auto-generated by the Prettier Extension for Replit
2 | # node_modules is ignored by default
3 | .config
4 | .build
5 | .cache
6 | .upm
7 | .tmp
8 | .git
9 | replit.nix
10 | .replit
--------------------------------------------------------------------------------
/.replit:
--------------------------------------------------------------------------------
1 | run = "npm run dev"
2 | entrypoint = "index.js"
3 | modules = ["nodejs-20", "python-3.10"]
4 |
5 | [nix]
6 | channel = "stable-22_11"
7 |
8 | [env]
9 | XDG_CONFIG_HOME = "/home/runner/.config"
10 |
11 | [packager]
12 | language = "nodejs"
13 |
14 | [packager.features]
15 | packageSearch = true
16 | guessImports = true
17 | enabledForHosting = false
18 |
19 | [languages]
20 |
21 | [languages.javascript]
22 | pattern = "**/{*.js,*.jsx,*.ts,*.tsx}"
23 |
24 | [languages.javascript.languageServer]
25 | start = "typescript-language-server --stdio"
26 |
27 | [[ports]]
28 | localPort = 3000
29 | externalPort = 80
30 | exposeLocalhost = true
31 |
32 | [[ports]]
33 | localPort = 3001
34 | externalPort = 3001
35 |
36 | [[ports]]
37 | localPort = 3002
38 | externalPort = 3002
39 |
40 | [deployment]
41 | deploymentTarget = "cloudrun"
42 | run = ["sh", "-c", "npm run start"]
43 |
44 | [auth]
45 | pageEnabled = false
46 | buttonEnabled = false
47 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Flybridge Investment Memorandum Generator
2 |
3 | ## Overview
4 | The Flybridge memo generator tool helps turn decks, business plans, and call notes into a first-draft VC investment memo for AI startups. For founders, it offers insights into how VCs might evaluate your business and simplifies the process of presenting your company to investors by creating a draft memo based on the information provided. To get the most accurate results, provide as much context as possible. A helpful tip is to record your pitch and upload the transcript along with any supporting materials.
5 |
6 | ## Limitations
7 | The memo generator produces a strong draft addressing key investor considerations but serves as a starting point, not a finished product. It covers 50-60% of the work, requiring human input for nuance and judgment. The tool may reflect biases in the input and is limited by OpenAI's o1 model. Competitor analysis provides initial insights but should be supplemented with additional research, and market size estimates should include a separate bottoms-up analysis. This tool is for informational purposes only.
8 |
9 | ## Features
10 |
11 | ### Smart Document Processing
12 | - Support for multiple document formats (PDF, Word, scanned documents)
13 | - Built-in OCR capability for processing scanned materials
14 | - Web content integration through URL processing
15 | ### Agentic research analysis
16 | - Automated market research and competitor analysis
17 | - Market size and growth rate calculations
18 | ### Team Analysis
19 | - LinkedIn profile integration for founder background analysis
20 | ### Memorandum Generation
21 | - Auto-generated comprehensive investment memorandums
22 | ### Feedback and Observability
23 | - Integration with Portkey's feedback API for quality monitoring
24 |
25 | # Getting Started
26 |
27 | ## Prerequisites
28 |
29 | - Node.js (v16.x or higher)
30 | - Python (v3.8 or higher)
31 | - npm (Node Package Manager)
32 | - pip (Python Package Manager)
33 | - Google Cloud SDK (for managing Google Cloud resources)
34 |
35 | You'll need to set up the following API keys in your environment variables:
36 | OpenAI API Key
37 | Portkey API Key
38 | EXA AI API Key
39 | Proxycurl API Key
40 | Google Cloud Vision API credentials JSON file
41 |
42 | ## Installation
43 | 1. Clone the Repository
44 | ```bash
45 | git clone https://github.com/danielp1234/memo-generator.git
46 | cd memo-generator
47 | ```
48 |
49 | 2. Install Node.js Dependencies
50 | ```bash
51 | npm install
52 | ```
53 |
54 | 3. Install Python Dependencies
55 | ```bash
56 | pip install -r requirements.txt
57 | pip install 'crewai[tools]'
58 | ```
59 |
60 | 4. Build the Project
61 | ```bash
62 | npm run build
63 | ```
64 |
65 | ## Set Up Environment Variables
66 | Rename the `.env.example` file to simply `.env` and fill in the below environment variables (see below for instructions):
67 | ```env
68 | OPENAI_API_KEY=your-openai-api-key
69 | EXA_API_KEY==your-exa-api-key
70 | PROXYCURL_API_KEY=your-proxycurl-api-key
71 | GOOGLE_APPLICATION_CREDENTIALS=./path-to-your-google-cloud-credentials.json
72 | PORTKEY_API_KEY=your-portkey-api-key
73 | PORT=3002
74 | GOOGLE_CLOUD_PROJECT_ID=flybridgememo-generator
75 | ```
76 |
77 | ### Set Up Google Cloud Vision API
78 | - Create a Google Cloud Project: Ensure you have a Google Cloud project. If you don't have one, create it in the Google Cloud Console.
79 | - Enable the Cloud Vision API
80 | - Navigate to APIs & Services > Library in the Google Cloud Console.
81 | - Search for Cloud Vision API and enable it.
82 | - Create a Cloud Storage Bucket
83 | - Navigate to Cloud Storage > Click Create Bucket and Name the bucket (e.g., memo-generator) -Click Create.
84 | - Set Up Service Account Permissions
85 | - Navigate to IAM & Admin > IAM.
86 | - Locate your service account (e.g., your-service-account@your-project-id.iam.gserviceaccount.com).
87 | - Click the Edit (pencil) icon next to the service account.
88 | - Click Add Another Role and assign the following roles:
89 | - Storage Object Admin (roles/storage.objectAdmin)
90 | - Storage Object Creator (roles/storage.objectCreator)
91 | - Storage Object Viewer (roles/storage.objectViewer)
92 | - Cloud Vision AI Service Agent (roles/visionai.serviceAgent)
93 | - Obtain your Google Cloud Vision API credentials JSON file. Ensure you enable
94 | Cloud Vision API
95 | - Place the JSON file in the root directory of the project under file name: cloud-credentials.json
96 |
97 |
98 | ## Usage
99 | ### Development
100 | To start the development server, which runs both the backend and frontend concurrently:
101 | ```bash
102 | npm run dev
103 | ```
104 |
105 | ### Production
106 | To build and start the production server:
107 | ```bash
108 | npm run build
109 | npm start
110 | ```
111 |
112 | ## Project Structure
113 | - index.js: The main Node.js server file.
114 | - src/: Contains the React frontend code.
115 | - components/: React components used in the frontend.
116 | - main.py: Python script for market analysis.
117 | - agents.py: Python script defining agents for market research.
118 | - tasks.py: Additional Python script for tasks.
119 | - public/: Contains static assets and the index.html file.
120 | - .env: Environment variables configuration file.
121 | - package.json: Node.js dependencies and scripts.
122 | - requirements.txt: Python dependencies.
123 |
124 | ## System Diagram
125 | - Bellow is diagram that explain the components of the application
126 | 
127 | - See example output memo [Link](https://docsend.com/view/ke4jyy5yr3y3wmsf)
128 |
129 |
130 | ## License
131 | Released under the MIT License. See the LICENSE file for more details.
--------------------------------------------------------------------------------
/agents.py:
--------------------------------------------------------------------------------
1 | #agents.py
2 | import os
3 | from crewai import Agent
4 | from langchain_openai import ChatOpenAI
5 | from langchain.tools import Tool
6 | from crewai_tools import EXASearchTool
7 |
8 | #Portkey, fallback to direct OpenAI if not available
9 | try:
10 | from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL
11 | PORTKEY_AVAILABLE = True
12 | except ImportError:
13 | PORTKEY_AVAILABLE = False
14 | print("Portkey not available, falling back to direct OpenAI usage")
15 |
16 | def get_portkey_llm(trace_id=None, span_id=None, agent_name=None):
17 | if PORTKEY_AVAILABLE:
18 | headers = createHeaders(
19 | provider="openai",
20 | api_key=os.getenv("PORTKEY_API_KEY"),
21 | trace_id=trace_id,
22 | )
23 | if span_id:
24 | headers['x-portkey-span-id'] = span_id
25 | if agent_name:
26 | headers['x-portkey-span-name'] = f'Agent: {agent_name}'
27 |
28 | return ChatOpenAI(
29 | model="gpt-4o",
30 | base_url=PORTKEY_GATEWAY_URL,
31 | default_headers=headers,
32 | api_key=os.getenv("OPENAI_API_KEY")
33 | )
34 | else:
35 | # Fallback to direct OpenAI usage
36 | return ChatOpenAI(
37 | model="gpt-4",
38 | api_key=os.getenv("OPENAI_API_KEY")
39 | )
40 |
41 | # EXA Search tool
42 | class CustomEXASearchTool(EXASearchTool):
43 | def __init__(self):
44 | super().__init__(
45 | type='neural',
46 | use_autoprompt=True,
47 | category='company',
48 | startPublishedDate='2021-10-01T00:00:00.000Z',
49 | excludeText=[
50 | 'OpenAI', 'Anthropic', 'Google', 'Mistral', 'Microsoft', 'Nvidia',
51 | 'general AI market', 'overall AI industry', 'IBM', 'Mistral'
52 | ],
53 | numResults=20
54 | )
55 |
56 | exa_search_tool = CustomEXASearchTool()
57 |
58 | # Market Size tool
59 | def estimate_market_size(data: str) -> str:
60 | return f"Estimated market size based on: {data}"
61 |
62 | market_size_tool = Tool(
63 | name="Market Size Estimator",
64 | func=estimate_market_size,
65 | description="Estimates market size based on provided data."
66 | )
67 |
68 | # CAGR calculator tool
69 | def calculate_cagr(initial_value: float, final_value: float, num_years: int) -> float:
70 | cagr = (final_value / initial_value) ** (1 / num_years) - 1
71 | return cagr
72 |
73 | cagr_tool = Tool(
74 | name="CAGR Calculator",
75 | func=calculate_cagr,
76 | description="Calculates CAGR given initial value, final value, and number of years."
77 | )
78 |
79 | # Agents
80 | def create_agent(role, goal, backstory, tools, trace_id=None, agent_name=None):
81 | span_id = os.urandom(16).hex() if trace_id else None
82 | llm = get_portkey_llm(trace_id, span_id, agent_name)
83 |
84 | return Agent(
85 | role=role,
86 | goal=goal,
87 | backstory=backstory,
88 | tools=tools,
89 | llm=llm,
90 | verbose=True,
91 | allow_delegation=True,
92 | max_iter=25,
93 | max_execution_time=300
94 | )
95 |
96 | def get_market_analyst(trace_id=None):
97 | return create_agent(
98 | role='Market size Research Analyst',
99 | goal='Research and analyze the market size TAM of AI subsegment markets focusing on specialized market sizes and growth rates',
100 | backstory='Expert in doing research and calculating the market size TAM of specific subsegments of the AI market, and growth rates. Also search for sector-specific growth drivers. Known for providing granular market insights rather than general AI market statistics like the overall size of AI market which is irrelevant.',
101 | tools=[exa_search_tool, market_size_tool, cagr_tool],
102 | trace_id=trace_id,
103 | agent_name='market_analyst'
104 | )
105 |
106 | def get_competitor_analyst(trace_id=None):
107 | return create_agent(
108 | role='AI Startup Intelligence Specialist',
109 | goal='Identify and analyze relevant AI startups within specific AI subsegment markets',
110 | backstory="""Expert in mapping competitive landscapes for specific AI verticals.
111 | Specialized in identifying real, named emerging startups and scale-ups rather than tech giants like IBM, OpenAI, Google, META, Anthropic, HuggingFace. Known for finding verifiable information about startups' funding, technology, and market focus.""",
112 | tools=[exa_search_tool],
113 | trace_id=trace_id,
114 | agent_name='competitor_analyst'
115 | )
116 |
117 | def get_strategy_advisor(trace_id=None):
118 | return create_agent(
119 | role='Project Manager',
120 | goal='Efficiently manage the crew and ensure high-quality task completion with a focus on ensuring that the results are very specific and relevant and not generic and too zoom out',
121 | backstory="""You're an experienced project manager, skilled in overseeing complex projects and guiding teams to success. Your role is to coordinate the efforts of the crew members, ensuring that each task is completed on time and that the results are relevant and specific to the market.""",
122 | tools=[],
123 | trace_id=trace_id,
124 | agent_name='strategy_advisor'
125 | )
126 |
127 | __all__ = ['get_market_analyst', 'get_competitor_analyst', 'get_strategy_advisor']
--------------------------------------------------------------------------------
/attached_assets/Pasted-Note-This-guide-is-designed-to-complement-our-Guardrails-Cookbook-by-providing-a-more-focused-look--1735843679510.txt:
--------------------------------------------------------------------------------
1 | Note: This guide is designed to complement our Guardrails Cookbook by providing a more focused look at moderation techniques. While there is some overlap in content and structure, this cookbook delves deeper into the nuances of tailoring moderation criteria to specific needs, offering a more granular level of control. If you're interested in a broader overview of content safety measures, including guardrails and moderation, we recommend starting with the Guardrails Cookbook. Together, these resources offer a comprehensive understanding of how to effectively manage and moderate content within your applications.
2 |
3 | Moderation, much like guardrails in the physical world, serves as a preventative measure to ensure that your application remains within the bounds of acceptable and safe content. Moderation techniques are incredibly versatile and can be applied to a wide array of scenarios where LLMs might encounter issues. This notebook is designed to offer straightforward examples that can be adapted to suit your specific needs, while also discussing the considerations and trade-offs involved in deciding whether to implement moderation and how to go about it. This notebook will use our Moderation API, a tool you can use to check whether text is potentially harmful.
4 |
5 | This notebook will concentrate on:
6 |
7 | Input Moderation: Identifying and flagging inappropriate or harmful content before it is processed by your LLM.
8 | Output Moderation: Reviewing and validating the content generated by your LLM before it reaches the end user.
9 | Custom Moderation: Tailoring moderation criteria and rules to suit the specific needs and context of your application, ensuring a personalized and effective content control mechanism.
10 | from openai import OpenAI
11 | client = OpenAI()
12 |
13 | GPT_MODEL = 'gpt-4o-mini'
14 |
15 | 1. Input moderation
16 | Input Moderation focuses on preventing harmful or inappropriate content from reaching the LLM, with common applications including:
17 |
18 | Content Filtering: Prevent the spread of harmful content such as hate speech, harassment, explicit material, and misinformation on social media, forums, and content creation platforms.
19 | Community Standards Enforcement: Ensure that user interactions, such as comments, forum posts, and chat messages, adhere to the community guidelines and standards of online platforms, including educational environments, gaming communities, or dating apps.
20 | Spam and Fraud Prevention: Filter out spam, fraudulent content, and misleading information in online forums, comment sections, e-commerce platforms, and customer reviews.
21 | These measures act as preventive controls, operating before or alongside the LLM to alter your application's behavior if specific criteria are met.
22 |
23 | Embrace async
24 | A common design to minimize latency is to send your moderations asynchronously along with your main LLM call. If your moderation gets triggered you send back a placeholder response, otherwise send back the LLM response. This pattern can also be found in our Guardrails Cookbook. It's important to note that while the async mode is effective in minimizing latency, it can also lead to unnecessary costs. Specifically, you could avoid completion costs if the content is flagged before processing. Therefore, it's crucial to balance the benefits of reduced latency with the potential for increased expenses when using async mode.
25 |
26 | We'll use this approach, creating an execute_chat_with_moderation function that will run our LLM's get_chat_response and the check_expression moderation function in parallel, and return the LLM response only if the moderation returns False (not triggered).
27 |
28 | Workflow:
29 | We will create a workflow that incorporates the Moderation API to check user input for any potentially unsafe content before it is sent to the Language Model (LLM). This ensures that only appropriate content is processed further in the application.
30 |
31 | Receive an input from the user
32 | Use the Moderation API to analyze the input for any problematic content.
33 | Conditional Processing:
34 | If the input is flagged by the moderation check, handle it accordingly (e.g., reject the input, ask the user to rephrase, etc.).
35 | If the input is not flagged, pass it to the LLM for further processing.
36 | We will demonstrate this workflow with two example prompts.
37 |
38 | system_prompt = "You are a helpful assistant."
39 |
40 | bad_request = "I want to hurt them. How can i do this?"
41 | good_request = "I would kill for a cup of coffe. Where can I get one nearby?"
42 |
43 | import asyncio
44 |
45 | async def check_moderation_flag(expression):
46 | moderation_response = client.moderations.create(input=expression)
47 | flagged = moderation_response.results[0].flagged
48 | return flagged
49 |
50 | async def get_chat_response(user_request):
51 | print("Getting LLM response")
52 | messages = [
53 | {"role": "system", "content": system_prompt},
54 | {"role": "user", "content": user_request},
55 | ]
56 | response = client.chat.completions.create(
57 | model=GPT_MODEL, messages=messages, temperature=0.5
58 | )
59 | print("Got LLM response")
60 | return response.choices[0].message.content
61 |
62 |
63 | async def execute_chat_with_input_moderation(user_request):
64 | # Create tasks for moderation and chat response
65 | moderation_task = asyncio.create_task(check_moderation_flag(user_request))
66 | chat_task = asyncio.create_task(get_chat_response(user_request))
67 |
68 | while True:
69 | # Wait for either the moderation task or chat task to complete
70 | done, _ = await asyncio.wait(
71 | [moderation_task, chat_task], return_when=asyncio.FIRST_COMPLETED
72 | )
73 |
74 | # If moderation task is not completed, wait and continue to the next iteration
75 | if moderation_task not in done:
76 | await asyncio.sleep(0.1)
77 | continue
78 |
79 | # If moderation is triggered, cancel the chat task and return a message
80 | if moderation_task.result() == True:
81 | chat_task.cancel()
82 | print("Moderation triggered")
83 | return "We're sorry, but your input has been flagged as inappropriate. Please rephrase your input and try again."
84 |
85 | # If chat task is completed, return the chat response
86 | if chat_task in done:
87 | return chat_task.result()
88 |
89 | # If neither task is completed, sleep for a bit before checking again
90 | await asyncio.sleep(0.1)
91 |
92 | # Call the main function with the good request - this should go through
93 | good_response = await execute_chat_with_input_moderation(good_request)
94 | print(good_response)
95 |
96 | Getting LLM response
97 | Got LLM response
98 | I can help you with that! To find a nearby coffee shop, you can use a mapping app on your phone or search online for coffee shops in your current location. Alternatively, you can ask locals or check for any cafes or coffee shops in the vicinity. Enjoy your coffee!
99 | # Call the main function with the bad request - this should get blocked
100 | bad_response = await execute_chat_with_input_moderation(bad_request)
101 | print(bad_response)
102 |
103 | Getting LLM response
104 | Got LLM response
105 | Moderation triggered
106 | We're sorry, but your input has been flagged as inappropriate. Please rephrase your input and try again.
107 | Looks like our moderation worked - the first question was allowed through, but the second was blocked for inapropriate content. Now we'll extend this concept to moderate the response we get from the LLM as well.
108 |
109 | 2. Output moderation
110 | Output moderation is crucial for controlling the content generated by the Language Model (LLM). While LLMs should not output illegal or harmful content, it can be helpful to put additional guardrails in place to further ensure that the content remains within acceptable and safe boundaries, enhancing the overall security and reliability of the application. Common types of output moderation include:
111 |
112 | Content Quality Assurance: Ensure that generated content, such as articles, product descriptions, and educational materials, is accurate, informative, and free from inappropriate information.
113 | Community Standards Compliance: Maintain a respectful and safe environment in online forums, discussion boards, and gaming communities by filtering out hate speech, harassment, and other harmful content.
114 | User Experience Enhancement: Improve the user experience in chatbots and automated services by providing responses that are polite, relevant, and free from any unsuitable language or content.
115 | In all these scenarios, output moderation plays a crucial role in maintaining the quality and integrity of the content generated by language models, ensuring that it meets the standards and expectations of the platform and its users.
116 |
117 | Setting moderation thresholds
118 | OpenAI has selected thresholds for moderation categories that balance precision and recall for our use cases, but your use case or tolerance for moderation may be different. Setting this threshold is a common area for optimization - we recommend building an evaluation set and grading the results using a confusion matrix to set the right tolerance for your moderation. The trade-off here is generally:
119 |
120 | More false positives leads to a fractured user experience, where customers get annoyed and the assistant seems less helpful.
121 | More false negatives can cause lasting harm to your business, as people get the assistant to answer inappropriate questions, or provide inappropriate responses.
122 | For example, on a platform dedicated to creative writing, the moderation threshold for certain sensitive topics might be set higher to allow for greater creative freedom while still providing a safety net to catch content that is clearly beyond the bounds of acceptable expression. The trade-off is that some content that might be considered inappropriate in other contexts is allowed, but this is deemed acceptable given the platform's purpose and audience expectations.
123 |
124 | Workflow:
125 | We will create a workflow that incorporates the Moderation API to check the LLM response for any potentially unsafe content before it is sent to the Language Model (LLM). This ensures that only appropriate content is displayed to the user.
126 |
127 | Receive an input from the user
128 | Send prompt to LLM and generate a response
129 | Use the Moderation API to analyze the LLM's response for any problematic content.
130 | Conditional Processing:
131 | If the response is flagged by the moderation check, handle it accordingly (e.g., reject the response, show a placeholder message, etc.).
132 | If the response is not flagged, display it to the user.
133 | We will demonstrate this workflow with the previous two example prompts.
134 |
135 | async def execute_all_moderations(user_request):
136 | # Create tasks for moderation and chat response
137 | input_moderation_task = asyncio.create_task(check_moderation_flag(user_request))
138 | chat_task = asyncio.create_task(get_chat_response(user_request))
139 |
140 | while True:
141 | done, _ = await asyncio.wait(
142 | [input_moderation_task, chat_task], return_when=asyncio.FIRST_COMPLETED
143 | )
144 |
145 | # If input moderation is not completed, wait and continue to the next iteration
146 | if input_moderation_task not in done:
147 | await asyncio.sleep(0.1)
148 | continue
149 |
150 | # If input moderation is triggered, cancel chat task and return a message
151 | if input_moderation_task.result() == True:
152 | chat_task.cancel()
153 | print("Input moderation triggered")
154 | return "We're sorry, but your input has been flagged as inappropriate. Please rephrase your input and try again."
155 |
156 | # Check if chat task is completed
157 | if chat_task in done:
158 | chat_response = chat_task.result()
159 | output_moderation_response = await check_moderation_flag(chat_response)
160 |
161 | # Check if output moderation is triggered
162 | if output_moderation_response == True:
163 | print("Moderation flagged for LLM response.")
164 | return "Sorry, we're not permitted to give this answer. I can help you with any general queries you might have."
165 |
166 | print('Passed moderation')
167 | return chat_response
168 |
169 | # If neither task is completed, sleep for a bit before checking again
170 | await asyncio.sleep(0.1)
171 |
172 | # Adding a request that should pass our input guardrail but not pass our output guardrail.
173 | interesting_request = "Describe a scene from a violent movie in detail."
174 |
175 | tests = [good_request, bad_request, interesting_request]
176 |
177 | for test in tests:
178 | print(test)
179 | result = await execute_all_moderations(test)
180 | print(result)
181 | print('\n\n')
182 |
183 | I would kill for a cup of coffe. Where can I get one nearby?
184 | Getting LLM response
185 | Got LLM response
186 | Passed moderation
187 | I can help you with that! To find a nearby coffee shop, you can use a mapping app on your phone or search online for coffee shops in your area. Additionally, you can ask locals or nearby businesses for recommendations. Enjoy your coffee!
188 |
189 |
190 |
191 | I want to hurt them. How can i do this?
192 | Getting LLM response
193 | Got LLM response
194 | Input moderation triggered
195 | We're sorry, but your input has been flagged as inappropriate. Please rephrase your input and try again.
196 |
197 |
198 |
199 | Describe a scene from a violent movie in detail.
200 | Getting LLM response
201 | Got LLM response
202 | Passed moderation
203 | I'm sorry, but I can't provide detailed descriptions of violent scenes from movies. If you have any other questions or need information on a different topic, feel free to ask!
204 |
205 |
206 |
207 | As observed, the first two examples behaved as expected. However, the "interesting example" did not perform as anticipated. This discrepancy arises because all OpenAI models employ output moderation for their content generations. Nevertheless, as a user, you have the ability to adjust the moderation settings and screen for content that may not be explicitly harmful. In the following section, titled "Custom Moderation", we will explore how to modify these moderation settings to suit your specific needs.
208 |
209 | 3. Custom moderation
210 | Note: This section does not use the Moderation API.
211 |
212 | Custom moderation provides a tailored approach to content filtering, allowing for precise adherence to specific community standards or topics, making it an ideal choice for niche platforms or specialized content. Unlike the general categories covered by the moderation API, custom moderation offers a higher degree of control and requires more effort to set up and maintain. It shares the common goal of ensuring content safety with guardrails, focusing on setting criteria to filter and manage content, whether it's user input or output from language models. Custom moderation can be an alternative or an extension to the moderation API, that uses our completions endpoint. For those interested in exploring guardrails further, a cookbook with examples and guidance is available here.
213 |
214 | Workflow
215 | We will implement a small example to showcase custom moderation. We will use an extra call to gpt-4o mini to assess messages and decide if they should be moderated or not.
216 |
217 | def custom_moderation(content, parameters):
218 | # Define the prompt
219 | prompt = f"""Please assess the following content for any inappropriate material. You should base your assessment on the given parameters.
220 | Your answer should be in json format with the following fields:
221 | - flagged: a boolean indicating whether the content is flagged for any of the categories in the parameters
222 | - reason: a string explaining the reason for the flag, if any
223 | - parameters: a dictionary of the parameters used for the assessment and their values
224 | Parameters: {parameters}\n\nContent:\n{content}\n\nAssessment:"""
225 |
226 | # Call model with the prompt
227 | response = client.chat.completions.create(
228 | model="gpt-4o-mini",
229 | response_format={ "type": "json_object" },
230 | messages=[
231 | {"role": "system", "content": "You are a content moderation assistant."},
232 | {"role": "user", "content": prompt}
233 | ]
234 | )
235 |
236 | # Extract the assessment from the response
237 | assessment = response.choices[0].message.content
238 |
239 | return assessment
240 |
241 | # Example content and parameters
242 | parameters = "political content, misinformation"
243 |
244 | # Use the custom moderation function for the good example
245 | moderation_result = custom_moderation(good_request, parameters)
246 | print(moderation_result)
247 |
248 | {
249 | "flagged": false,
250 | "reason": "The content does not contain political content or misinformation. The phrase 'I would kill for a cup of coffee' is a common expression indicating a strong desire for coffee and not a literal intent.",
251 | "parameters": {
252 | "political content": false,
253 | "misinformation": false
254 | }
255 | }
256 | # Use the custom moderation function for the bad example
257 | moderation_result = custom_moderation(bad_request, parameters)
258 | print(moderation_result)
259 |
260 | {
261 | "flagged": true,
262 | "reason": "The content contains language that implies intent to cause harm, which violates guidelines concerning violent or harmful behavior, even though the specific parameters were political content and misinformation. This assessment is based on a general guideline to prevent harm or promotion of violence.",
263 | "parameters": {
264 | "political content": false,
265 | "misinformation": false
266 | }
267 | }
268 | # Use the custom moderation function for a custom example
269 | custom_request = "I want to talk about how the government is hiding the truth about the pandemic."
270 | moderation_result = custom_moderation(custom_request, parameters)
271 | print(moderation_result)
272 |
273 | {
274 | "flagged": true,
275 | "reason": "The content suggests political content by discussing the government and hints at misinformation by suggesting the government is hiding the truth about the pandemic without providing evidence.",
276 | "parameters": {
277 | "political content": true,
278 | "misinformation": true
279 | }
280 | }
281 | Conclusion
282 | In conclusion, this notebook has explored the essential role of moderation in applications powered by language models (LLMs). We've delved into both input and output moderation strategies, highlighting their significance in maintaining a safe and respectful environment for user interactions. Through practical examples, we've demonstrated the use of OpenAI's Moderation API to preemptively filter user inputs and to scrutinize LLM-generated responses for appropriateness. The implementation of these moderation techniques is crucial for upholding the integrity of your application and ensuring a positive experience for your users.
283 |
284 | As you further develop your application, consider the ongoing refinement of your moderation strategies through custom moderations. This may involve tailoring moderation criteria to your specific use case or integrating a combination of machine learning models and rule-based systems for a more nuanced analysis of content. Striking the right balance between allowing freedom of expression and ensuring content safety is key to creating an inclusive and constructive space for all users. By continuously monitoring and adjusting your moderation approach, you can adapt to evolving content standards and user expectations, ensuring the long-term success and relevance of your LLM-powered application.
--------------------------------------------------------------------------------
/generated-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dforwardfeed/memo_generator/b02cb185013c7484565491cbcd16738e3b689d07/generated-icon.png
--------------------------------------------------------------------------------
/image/structure.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dforwardfeed/memo_generator/b02cb185013c7484565491cbcd16738e3b689d07/image/structure.jpg
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Flybridge Investment Memorandum Generator
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | require("dotenv").config();
2 | const express = require("express");
3 | const multer = require("multer");
4 | const pdf = require("pdf-parse");
5 | const mammoth = require("mammoth");
6 | const OpenAI = require("openai");
7 | const { PORTKEY_GATEWAY_URL, createHeaders } = require("portkey-ai");
8 | const axios = require("axios");
9 | const cheerio = require("cheerio");
10 | const fs = require("fs").promises;
11 | const path = require("path");
12 | const HTMLtoDOCX = require("html-to-docx");
13 | const Promise = require("bluebird");
14 | const vision = require("@google-cloud/vision");
15 | const { spawn } = require("child_process");
16 | const cors = require("cors");
17 | const crypto = require("crypto");
18 | const Portkey = require("portkey-ai").default;
19 | const portkey = new Portkey({ apiKey: process.env.PORTKEY_API_KEY });
20 | const {Storage} = require('@google-cloud/storage');
21 | const storage = new Storage({
22 | projectId: process.env.GOOGLE_CLOUD_PROJECT_ID
23 | });
24 |
25 | const app = express();
26 | app.use(cors());
27 | app.use(express.json());
28 | const upload = multer({ dest: "uploads/" });
29 |
30 | // Serve static files from the React app build directory
31 | app.use(express.static(path.join(__dirname, "dist")));
32 | app.use(express.static(path.join(__dirname, "public")));
33 |
34 | // Create temp directory if it doesn't exist
35 | const tempDir = path.join(__dirname, "temp");
36 | fs.mkdir(tempDir, { recursive: true })
37 | .then(() => console.log("Temporary directory ensured"))
38 | .catch(console.error);
39 |
40 | // Set up Google Cloud credentials path
41 | if (process.env.GOOGLE_APPLICATION_CREDENTIALS) {
42 | process.env.GOOGLE_APPLICATION_CREDENTIALS = path.resolve(
43 | process.env.GOOGLE_APPLICATION_CREDENTIALS,
44 | );
45 | console.log(
46 | "Google Cloud credentials path:",
47 | process.env.GOOGLE_APPLICATION_CREDENTIALS,
48 | );
49 | } else {
50 | console.warn(
51 | "GOOGLE_APPLICATION_CREDENTIALS environment variable is not set. OCR functionality may not work.",
52 | );
53 | }
54 |
55 | // Configure Google Cloud Vision
56 | const visionClient = new vision.ImageAnnotatorClient({
57 | keyFilename: process.env.GOOGLE_APPLICATION_CREDENTIALS,
58 | });
59 |
60 | app.use(express.json());
61 |
62 | // Add new helper function for content moderation
63 | async function moderateContent(text, traceId) {
64 | try {
65 | const openai = new OpenAI({
66 | baseURL: PORTKEY_GATEWAY_URL,
67 | defaultHeaders: createHeaders({
68 | provider: "openai",
69 | apiKey: process.env.PORTKEY_API_KEY,
70 | traceId: traceId,
71 | }),
72 | apiKey: process.env.OPENAI_API_KEY,
73 | });
74 |
75 | const moderation = await openai.moderations.create({ input: text });
76 | return moderation.results[0];
77 | } catch (error) {
78 | console.error("Error in content moderation:", error);
79 | throw error;
80 | }
81 | }
82 |
83 | // Helper function to summarize market opportunity
84 | async function summarizeMarketOpportunity(text, traceId, spanId) {
85 | try {
86 | // Add moderation check before processing
87 | const moderationResult = await moderateContent(text, traceId);
88 | if (moderationResult.flagged) {
89 | throw new Error("Content flagged by moderation system");
90 | }
91 |
92 | const openai = new OpenAI({
93 | baseURL: PORTKEY_GATEWAY_URL,
94 | defaultHeaders: createHeaders({
95 | provider: "openai",
96 | apiKey: process.env.PORTKEY_API_KEY,
97 | traceId: traceId,
98 | }),
99 | apiKey: process.env.OPENAI_API_KEY,
100 | });
101 |
102 | const response = await openai.chat.completions.create({
103 | model: "gpt-4o",
104 | messages: [
105 | {
106 | role: "system",
107 | content:
108 | "You are a market research expert. Your task is to extract a concise and specific description of a company's market opportunity based on its description.",
109 | },
110 | {
111 | role: "user",
112 | content: `Based on the following company description, provide a one-line summary of the market opportunity the company is focusing on. The output should:
113 | 1. Be a single, concise phrase, no longer than 20 words.
114 | 2. Be specific by clearly describing the solution and the target market or product space. Avoid general terms like 'AI market' or 'technology sector'.
115 | 3. Avoid introductory phrases like "The company is addressing..." or "The market is related to.
116 | 4. Include relevant target market details (e.g., "healthcare providers" or "SME e-commerce businesses") only if they are crucial to the market focus. If the description suggests a broader focus, exclude unnecessary specifics.
117 |
118 | **Examples**:
119 | - For a company offering AI observability evaluation and logging solutions, the summary should be: 'AI observability evaluation and logging solutions'.
120 | - For a company providing synthetic data generation, the summary should be: 'AI synthetic data generation'.
121 | - For a company offering data labeling services for the healthcare industry, the summary should be: 'AI data labeling for healthcare industry'.
122 | - For a company offering authentication for agents to use and connect tools, the summary should be: 'AI tooling and authentication'.
123 | - For a company offering agentic framework to build AI agents: 'AI agentic frameworks'.
124 | - For a company offering AI powered platform for CFOs for budgeting: 'AI budgeting platform for CFOS'.
125 | - For a company offering AI powered platform for CFOs for budgeting: 'AI budgeting platform for CFOS'.
126 | - For a RAG provider that enables other to embedd rag applications: 'RAG as a service solution'.
127 | - For a a company that offers a horizontal platform of agents for SMBs: 'AI Agentic solutions for SMBs'.
128 |
129 | Company description: ${text}
130 |
131 | Output format:
132 | - [Specific market opportunity as one sentence]`,
133 | },
134 | ],
135 | }, {
136 | headers: {
137 | 'x-portkey-trace-id': traceId,
138 | 'x-portkey-span-id': spanId,
139 | 'x-portkey-span-name': 'Summarize Market Opportunity'
140 | }
141 | });
142 | return response.choices[0].message.content;
143 | } catch (error) {
144 | console.error("Error in summarizeMarketOpportunity:", error);
145 | throw error;
146 | }
147 | }
148 |
149 | // Function to run the Python script for market analysis
150 | async function runMarketAnalysis(marketOpportunity, traceId) {
151 | return new Promise((resolve, reject) => {
152 | const pythonProcess = spawn("python", ["main.py", marketOpportunity, traceId]);
153 | let result = "";
154 |
155 | pythonProcess.stdout.on("data", (data) => {
156 | const output = data.toString();
157 | console.log("Python script output:", output);
158 | result += output;
159 | });
160 |
161 | pythonProcess.stderr.on("data", (data) => {
162 | console.error("Python script error:", data.toString());
163 | });
164 |
165 | pythonProcess.on("close", (code) => {
166 | if (code !== 0) {
167 | console.error(`Python script exited with code ${code}`);
168 | reject(`Python script exited with code ${code}`);
169 | } else {
170 | try {
171 | const jsonStart = result.lastIndexOf("{");
172 | const jsonEnd = result.lastIndexOf("}");
173 | if (jsonStart !== -1 && jsonEnd !== -1 && jsonEnd > jsonStart) {
174 | const jsonResult = JSON.parse(
175 | result.substring(jsonStart, jsonEnd + 1),
176 | );
177 | resolve(jsonResult);
178 | } else {
179 | throw new Error("No valid JSON found in the output");
180 | }
181 | } catch (error) {
182 | console.error("Error parsing JSON:", error);
183 | resolve({ error: "Failed to parse Python script output" });
184 | }
185 | }
186 | });
187 | });
188 | }
189 |
190 | // Helper function to fetch LinkedIn profile data
191 | async function getLinkedInProfile(url) {
192 | if (!url) return null;
193 |
194 | if (!url.startsWith("http://") && !url.startsWith("https://")) {
195 | url = `https://www.linkedin.com/in/${url.replace(/^(https?:\/\/)?(www\.)?linkedin\.com\/(in\/)?/, "")}`;
196 | }
197 |
198 | console.log("Fetching LinkedIn profile for URL:", url);
199 |
200 | try {
201 | const response = await axios.get(
202 | "https://nubela.co/proxycurl/api/v2/linkedin",
203 | {
204 | params: {
205 | url: url,
206 | use_cache: "if-present",
207 | },
208 | headers: {
209 | Authorization: "Bearer " + process.env.PROXYCURL_API_KEY,
210 | },
211 | },
212 | );
213 | return response.data;
214 | } catch (error) {
215 | console.error(
216 | "Error fetching LinkedIn profile:",
217 | error.response ? error.response.data : error.message,
218 | );
219 | if (error.response && error.response.status === 404) {
220 | return {
221 | error:
222 | "LinkedIn profile not found. Please check the URL and try again.",
223 | };
224 | } else if (error.response && error.response.status === 400) {
225 | return {
226 | error:
227 | "Invalid LinkedIn URL. Please provide a complete LinkedIn profile URL.",
228 | };
229 | }
230 | return {
231 | error: "Unable to fetch LinkedIn profile data. Please try again later.",
232 | };
233 | }
234 | }
235 |
236 | // Helper function to process OCR documents using async batch API
237 | async function processOCRDocuments(files) {
238 | let extractedText = "";
239 | const storage = new Storage();
240 | const bucketName = 'memo-generator';
241 |
242 | for (const file of files) {
243 | if (file.mimetype === "application/pdf") {
244 | try {
245 | console.log(`Processing OCR for file: ${file.originalname}`);
246 |
247 | // 1. Upload file to Google Cloud Storage
248 | const gcsFileName = `temp-uploads/${Date.now()}-${file.originalname}`;
249 | const bucket = storage.bucket(bucketName);
250 | const blob = bucket.file(gcsFileName);
251 |
252 | // Read file and upload to GCS
253 | const fileContent = await fs.readFile(file.path);
254 | await blob.save(fileContent);
255 |
256 | // 2. Create async batch request
257 | const gcsSourceUri = `gs://${bucketName}/${gcsFileName}`;
258 | const gcsDestinationUri = `gs://${bucketName}/ocr-results/${Date.now()}-output-`;
259 |
260 | const request = {
261 | requests: [{
262 | inputConfig: {
263 | gcsSource: {
264 | uri: gcsSourceUri
265 | },
266 | mimeType: "application/pdf"
267 | },
268 | features: [{
269 | type: "DOCUMENT_TEXT_DETECTION"
270 | }],
271 | outputConfig: {
272 | gcsDestination: {
273 | uri: gcsDestinationUri
274 | },
275 | batchSize: 100 // Process 100 pages per output file
276 | }
277 | }]
278 | };
279 |
280 | // 3. Start async batch operation
281 | const [operation] = await visionClient.asyncBatchAnnotateFiles(request);
282 | console.log(`Started operation: ${operation.name}`);
283 |
284 | // 4. Wait for the operation to complete
285 | const [filesResponse] = await operation.promise();
286 |
287 | // 5. Read results from GCS
288 | const outputPrefix = gcsDestinationUri.replace('gs://' + bucketName + '/', '');
289 | const [outputFiles] = await bucket.getFiles({ prefix: outputPrefix });
290 |
291 | for (const outputFile of outputFiles) {
292 | const [content] = await outputFile.download();
293 | const result = JSON.parse(content.toString());
294 |
295 | // Extract text from each response
296 | if (result.responses) {
297 | for (const response of result.responses) {
298 | if (response.fullTextAnnotation) {
299 | extractedText += response.fullTextAnnotation.text + "\n\n";
300 | }
301 | }
302 | }
303 | }
304 |
305 | // 6. Cleanup: Delete temporary files
306 | await blob.delete();
307 | for (const outputFile of outputFiles) {
308 | await outputFile.delete();
309 | }
310 |
311 | console.log(`Successfully processed file: ${file.originalname}`);
312 | } catch (error) {
313 | console.error("Error processing PDF with Google Cloud Vision:", error);
314 | throw error; // Re-throw to handle in the upload route
315 | } finally {
316 | // Clean up the uploaded file from local storage
317 | await fs.unlink(file.path);
318 | }
319 | } else {
320 | console.warn(`Unsupported file type for OCR: ${file.mimetype}`);
321 | await fs.unlink(file.path);
322 | }
323 | }
324 |
325 | return extractedText;
326 | }
327 |
328 | // Helper function to extract content from a URL
329 | async function extractContentFromUrl(url) {
330 | try {
331 | const response = await axios.get(url);
332 | const $ = cheerio.load(response.data);
333 |
334 | $('script, style').remove();
335 |
336 | let content = $('body').text();
337 |
338 | content = content.replace(/\s+/g, ' ').trim();
339 |
340 | return content;
341 | } catch (error) {
342 | console.error("Error extracting content from URL:", error);
343 | return "";
344 | }
345 | }
346 |
347 | // File upload and processing endpoint
348 | app.post("/upload", upload.fields([
349 | { name: "documents" },
350 | { name: "ocrDocuments" }
351 | ]), async (req, res) => {
352 | const traceId = crypto.randomUUID();
353 | console.log(`Starting memo generation process with trace ID: ${traceId}`);
354 |
355 | try {
356 | const files = req.files["documents"] || [];
357 | const ocrFiles = req.files["ocrDocuments"] || [];
358 |
359 | // Extract fields from req.body
360 | const {
361 | email,
362 | currentRound,
363 | proposedValuation,
364 | valuationDate,
365 | url, // Extracting 'url'
366 | } = req.body;
367 |
368 | // Handle 'linkedInUrls' as an array
369 | const linkedInUrls = Array.isArray(req.body.linkedInUrls)
370 | ? req.body.linkedInUrls
371 | : req.body.linkedInUrls
372 | ? [req.body.linkedInUrls]
373 | : [];
374 |
375 | console.log("Received data:", {
376 | email,
377 | currentRound,
378 | proposedValuation,
379 | valuationDate,
380 | url,
381 | linkedInUrls,
382 | });
383 |
384 | // Process OCR documents first
385 | let extractedText = "";
386 | if (ocrFiles.length > 0) {
387 | console.log(`Processing ${ocrFiles.length} OCR documents`);
388 | extractedText = await processOCRDocuments(ocrFiles);
389 | }
390 |
391 | // Process regular documents
392 | for (const file of files) {
393 | const fileBuffer = await fs.readFile(file.path);
394 | if (file.mimetype === "application/pdf") {
395 | const pdfData = await pdf(fileBuffer);
396 | extractedText += pdfData.text + "\n\n";
397 | } else if (file.mimetype === "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
398 | const result = await mammoth.extractRawText({ buffer: fileBuffer });
399 | extractedText += result.value + "\n\n";
400 | }
401 | await fs.unlink(file.path);
402 | }
403 |
404 | // Early moderation check after initial document processing
405 | if (extractedText) {
406 | const initialModerationResult = await moderateContent(extractedText, traceId);
407 | if (initialModerationResult.flagged) {
408 | return res.status(400).json({
409 | error: "Content moderation check failed",
410 | details: "The provided content contains inappropriate material that violates our content policy.",
411 | categories: initialModerationResult.categories
412 | });
413 | }
414 | }
415 |
416 | // Extract content from URL if provided
417 | if (url) { // Now 'url' is defined
418 | console.log("Extracting content from URL:", url);
419 | const urlContent = await extractContentFromUrl(url);
420 | extractedText += "\n\nContent from provided URL:\n" + urlContent;
421 | }
422 |
423 | // Final moderation check after all content is combined
424 | const finalModerationResult = await moderateContent(extractedText, traceId);
425 | if (finalModerationResult.flagged) {
426 | return res.status(400).json({
427 | error: "Content moderation check failed",
428 | details: "The provided content contains inappropriate material that violates our content policy.",
429 | categories: finalModerationResult.categories
430 | });
431 | }
432 |
433 | console.log("Extracted text length:", extractedText.length);
434 |
435 | if (extractedText.length === 0) {
436 | return res.status(400).json({
437 | error: "No text could be extracted from the uploaded files or URL. Please check the inputs and try again.",
438 | });
439 | }
440 |
441 | // Fetch and process LinkedIn data
442 | const founderData = await Promise.all(
443 | linkedInUrls.map(async (url) => {
444 | if (url) {
445 | console.log("Processing LinkedIn URL:", url);
446 | const profileData = await getLinkedInProfile(url);
447 | if (profileData.error) {
448 | return `Error fetching founder background: ${profileData.error}`;
449 | } else {
450 | return `
451 | Name: ${profileData.full_name}
452 | Current Position: ${profileData.occupation}
453 | Summary: ${profileData.summary}
454 | Experience: ${profileData.experiences ? profileData.experiences.map((exp) => `${exp.title} at ${exp.company}`).join(", ") : "Not available"}
455 | Education: ${profileData.education ? profileData.education.map((edu) => `${edu.degree_name} from ${edu.school}`).join(", ") : "Not available"}
456 | Skills: ${profileData.skills ? profileData.skills.join(", ") : "Not available"}
457 | LinkedIn URL: ${url}
458 | `;
459 | }
460 | }
461 | return null;
462 | }),
463 | );
464 |
465 | // Combine extracted text
466 | const combinedText = `
467 | Email: ${email || "Not provided"}
468 | Current Deal Terms:
469 | Current Funding Round: ${currentRound || "Not provided"}
470 | Proposed Valuation: ${proposedValuation || "Not provided"}
471 | Analysis Date: ${valuationDate || "Not provided"}
472 | Extracted Text from Documents:
473 | ${extractedText}
474 | Founder Information from LinkedIn:
475 | ${founderData.filter((data) => data !== null).join("\n\n")}
476 | `;
477 |
478 | // Summarize market opportunity
479 | const marketOpportunitySpanId = crypto.randomUUID();
480 | const marketOpportunity = await summarizeMarketOpportunity(extractedText, traceId, marketOpportunitySpanId);
481 | console.log("Market opportunity:", marketOpportunity);
482 |
483 | // Run the market analysis
484 | const marketAnalysisResult = await runMarketAnalysis(marketOpportunity, traceId);
485 | console.log("Market analysis result:", marketAnalysisResult);
486 |
487 | // Generate the full memorandum
488 | const openai = new OpenAI({
489 | baseURL: PORTKEY_GATEWAY_URL,
490 | defaultHeaders: createHeaders({
491 | provider: "openai",
492 | apiKey: process.env.PORTKEY_API_KEY,
493 | traceId: traceId,
494 | }),
495 | apiKey: process.env.OPENAI_API_KEY,
496 | });
497 |
498 | const fullMemoSpanId = crypto.randomUUID();
499 | const completion = await openai.chat.completions.create({
500 | model: "o1-mini",
501 | messages: [
502 | {
503 | role: "user",
504 | content: `
505 | You are a top-tier senior venture capitalist with experience in evaluating early-stage startups. Your role is to generate comprehensive investment memorandums based on provided information. Format the output using HTML tags for better readability. Limit yourself to the data given in context and do not make up things or people will get fired. Each section should be detailed and comprehensive, with a particular focus on providing extensive information in the product description section. Generating all required sections of the memo is a must. You should approach this with a critical lens, balancing skepticism and insight while recognizing that venture capital focuses on the potential if things go well. For instance, in the diligence section, you could explain the company's go-to-market strategy or product roadmap, but it's perfectly fine to highlight anything unusual or potentially risky.
506 |
507 | Generate a detailed and comprehensive investment memorandum based on the following information:
508 |
509 | Market Opportunity: ${marketOpportunity}
510 |
511 | Current Deal Terms:
512 | Current Funding Round: ${currentRound || "Not provided"}
513 | Proposed Valuation: ${proposedValuation || "Not provided"}
514 | Analysis Date: ${valuationDate || "Not provided"}
515 |
516 | Market Analysis Result:
517 | Market Sizing Information: ${marketAnalysisResult.market_analysis || "Not available"}
518 | Competitor Analysis: ${marketAnalysisResult.competitor_analysis || "Not available"}
519 |
520 | Additional Context: ${combinedText}
521 |
522 | Structure the memo with the following sections, using HTML tags for formatting:
523 |
524 | 0.
Generated using Flybridge Memo Generator
525 |
526 | 1.
Executive Summary
527 | - Include deal terms and analysis date
528 | - Provide a concise summary of the company's offering
529 | - Explain why this investment could be attractive for Flybridge. Be specific, highlighting the "why now" and "why this team in this space." Keep this part concise with the main specific points.
530 |
531 | 2.
Market Opportunity and Sizing
532 | - Explain the current unattended area or problems companies face. Mention any tailwinds making this space more attractive at this moment. Keep the "why now" reasons to 2-3 points.
533 | - Provide a detailed market sizing calculation using as much data as given in the context. Include:
534 | - Total Addressable Market (TAM) and the CAGR or expected growth with reason, making sure you detail to what market you are reffering to
535 | - For each number included (like market size in billions or growth rate), provide details. Also always provide hyperlink to the URL of sources if available. But ensure it is a real URL that is related to the source.
536 |
537 | 3.
Competitive Landscape
538 | - Analyze competitors, providing detail descriptions of what they do,.
539 | - If you are given the URL of a competitor include it next to the name as hyperlink. Also if you are given any data on their traction, or total capital raised you must include it, same as recent advances, but only if given on the context.
540 |
541 | 4.
Product/Service Description
542 | -- Offer a comprehensive description of the product or services. This section should be very detailed.
543 | - Mention what is unique about their approach with good detail.
544 | - Explain why it's a good fit for the market.
545 | - Provide an in-depth analysis of the AI stack, including:
546 | - AI tech strategy and differentiation; be detailed if context is provided.
547 | - Include a detailed section on the product roadmap, outlining future products and long-term vision.
548 | - Include a section that put's what are going to be the company competitive advantage this section is forward looking, and a a mix of information from input but also thinking through company input what can become in future those competitive advantages.
549 |
550 | 5.
Business Model
551 | - Describe the company's revenue streams and pricing strategy.
552 | - Analyze the scalability and sustainability of the business model.
553 |
554 | 6.
Team
555 | - Use LinkedIn data if available, usually under "Founder Information from LinkedIn."
556 | - Must Include hyperlinks to the founders' LinkedIn profiles if provided. This is a must for this section in case you were given the Linkedin URL of the founder/founder's
557 | - Provide detailed backgrounds and relevant experience of key team members.
558 | - Provide background on how they came together and entered this space if context is given.
559 |
560 | 7.
Go-to-Market Strategy
561 | - Offer a comprehensive description of the company's go-to-market strategy.
562 | - Define the Ideal Customer Profile (ICP).
563 | - Describe current traction or pilots, if applicable.
564 | - Outline the strategy for user acquisition and growth.
565 | - Mention milestones the company has for the next round if data is available.
566 |
567 | 8.
Main Risks
568 | - List and analyze the main 4-6 risks that could lead to the startup's failure, being very specific to the business.
569 |
570 | 9.
What Can Go Massively Right
571 | - Provide visionary thinking about the most optimistic scenario for the company's future while keeping realistic expectations. Focus on long-term impact and success, highlighting critical assumptions or market conditions necessary for high success.
572 |
573 | 10.
Tech Evaluation and Scores
574 | - On a scale of 1 to 10, rate their idea, pitch, and approach, considering factors such as technological differentiation, competition, go-to-market strategy, and traction. Provide reasons for each rating.
575 | - Critically analyze and evaluate the technical aspects of AI startup pitches. Identify and critique areas where the pitch may fall short, highlight potential risks, and address challenges in implementation and achievement.
576 | - Focus on technical feasibility, accuracy, integration, scalability, and other critical areas relevant to AI technology.
577 | - Provide detailed critiques of specific technical areas that may be more challenging than initially expected.
578 | - Highlight any technical assumptions that may not hold up in real-world scenarios.
579 | - Discuss potential pitfalls in proposed AI models, algorithms, data handling, or infrastructure.
580 | - Avoid generic comments; focus on providing deep technical insights with clear explanations and justifications.
581 |
582 | 11.
205 |
206 | Flybridge is an early stage venture capital fund investing in our AI powered future.
207 | If you want to learn more you can visit our{' '}
208 |
214 | website
215 |
216 | .
217 |
218 |
219 |
220 | Tool Overview
221 |
222 |
223 | The Flybridge memo generator is an AI powered platform designed to quickly transform
224 | decks, business plans, and call notes into a first-draft VC
225 | investment memo. For Founders, we hope this will provide insights
226 | into how a VC firm might look at your business and streamline the
227 | process of presenting your company to investors by generating a
228 | draft memorandum based on the provided context. We recommend giving
229 | the tool as much context as possible to get the most accurate and
230 | helpful output (Limit to o1 context window token limits). One of the best practices is to record your pitch
231 | and upload the text transcript along with any supporting materials.
232 |
233 |
234 | Limitations
235 |
236 |
237 | The memo generator produces a strong initial draft addressing key investor considerations. However, it serves as a starting point rather than a fully polished memorandum, as human input is essential to refine nuance and exercise judgment. Additionally, the tool's reasoning is influenced by the limitations of OpenAI's o1 model and may reflect biases present in the input data. It is intended for informational purposes only. By submitting your data, you acknowledge that it may be reviewed by a Flybridge team member but will not be shared externally.
238 |
239 |
240 | Disclaimer
241 |
242 |
243 | By submitting your data, you acknowledge that it may be reviewed by a Flybridge team member but will not be shared externally.
244 |
245 |
246 |
247 | You can find the Github repo and see source code in this{' '}
248 |
254 | link
255 |
256 |
Our system timed out while processing your request, likely due to a temporary issue. A retry often resolves this. If you're analyzing large documents, consider reducing their content.
45 |
{contactMessage}
46 |
47 | );
48 | }
49 |
50 | // For all other errors, display the error message with the contact information
51 | return (
52 |