├── .gitignore ├── LICENSE ├── README.md ├── docs └── curl.txt ├── huggingface-transformers └── example3.js ├── llama-cpp ├── example4.js └── example5.js ├── load-tests ├── create_ollama_containers.sh ├── loadtest.js ├── ollama-multi.js ├── prompts.js ├── reports.md ├── run_ollama_containers.sh ├── sendRequests.js ├── stop_ollama_containers.sh ├── vllm-commands.md └── vllm-single.js ├── ollama ├── example6.js ├── example7.js ├── parseAndChunk.js └── prompt.js ├── openai-js ├── dinner.jpg ├── openai-vllm.js └── openai.js ├── package-lock.json ├── package.json ├── scripts └── copy-md.sh ├── tts └── tts-kokoro.py ├── vision └── img-to-b64.html └── xenova-transformers ├── example1.js └── example2.js /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and not Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # vuepress v2.x temp and cache directory 104 | .temp 105 | .cache 106 | 107 | # Docusaurus cache and generated files 108 | .docusaurus 109 | 110 | # Serverless directories 111 | .serverless/ 112 | 113 | # FuseBox cache 114 | .fusebox/ 115 | 116 | # DynamoDB Local files 117 | .dynamodb/ 118 | 119 | # TernJS port file 120 | .tern-port 121 | 122 | # Stores VSCode versions used for testing VSCode extensions 123 | .vscode-test 124 | 125 | # yarn v2 126 | .yarn/cache 127 | .yarn/unplugged 128 | .yarn/build-state.yml 129 | .yarn/install-state.gz 130 | .pnp.* 131 | 132 | *.gguf 133 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Foyzul Karim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Text Generation with LLMs from node.js 2 | 3 | ![8d783373-5607-4195-b36f-cfce3fc46159](https://github.com/user-attachments/assets/02e9eaad-caa9-4113-b4f1-22833abfd547) 4 | 5 | 6 | This project demonstrates how to use the different LLMs for text generation using the `@xenova/transformers` library or the `@huggingface/transformers` library. 7 | 8 | ## Prerequisites 9 | 10 | - Node.js (version 20 or higher recommended) 11 | - npm (Node Package Manager) 12 | 13 | ## Installation 14 | 15 | 1. Clone this repository or create a new directory for your project. 16 | 17 | 2. Navigate to the project directory in your terminal. 18 | 19 | 3. Install the required dependency: 20 | ``` 21 | npm install @xenova/transformers @huggingface/transformers 22 | ``` 23 | 24 | ## Usage 25 | 26 | 1. Run the script using Node.js: 27 | 28 | For example, to run `example1.js`: 29 | ``` 30 | node example1.js 31 | ``` 32 | 33 | 2. The script will generate text based on the input prompt and display the output in the console. 34 | 35 | ## Customization 36 | 37 | - You can modify the `text` variable or other input parameters to change the input prompt. 38 | - Adjust the `max_new_tokens` and `temperature` parameters in the `generator` function call to control the length and creativity of the generated text. 39 | 40 | ## Note 41 | 42 | - The first time you run the script, it may take some time to download the model. Subsequent runs will be faster. 43 | - Make sure you have a stable internet connection for the initial model download. 44 | - GPU is recommended for better performance. 45 | 46 | ## Troubleshooting 47 | 48 | If you encounter any issues, please ensure that: 49 | - You have the latest version of Node.js installed. 50 | - All dependencies are correctly installed. 51 | - You have sufficient disk space for the model download. 52 | -------------------------------------------------------------------------------- /docs/curl.txt: -------------------------------------------------------------------------------- 1 | curl -X POST "http://192.168.4.28:8000/v1/chat/completions" \ 2 | -H "Content-Type: application/json" \ 3 | --data '{ 4 | "model": "OpenGVLab/InternVL2_5-1B", 5 | "stream": true, 6 | "stream_options": { 7 | "include_usage": true 8 | }, 9 | "messages": [ 10 | { 11 | "role": "user", 12 | "content": [ 13 | { 14 | "type": "text", 15 | "text": "Describe this image." 16 | }, 17 | { 18 | "type": "image_url", 19 | "image_url": { 20 | "url": "https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png" 21 | } 22 | } 23 | ] 24 | } 25 | ] 26 | }' 27 | -------------------------------------------------------------------------------- /huggingface-transformers/example3.js: -------------------------------------------------------------------------------- 1 | import { pipeline } from '@huggingface/transformers'; 2 | 3 | // Create a text generation pipeline 4 | const generator = await pipeline( 5 | 'text-generation', 6 | 'onnx-community/Llama-3.2-1B-Instruct' 7 | ); 8 | 9 | // Define the list of messages 10 | const messages = [ 11 | { 12 | role: 'system', 13 | content: 'You are a standup comedian for kids birthday party.', 14 | }, 15 | { role: 'user', content: 'Tell me a joke.' }, 16 | ]; 17 | 18 | // Generate a response 19 | const output = await generator(messages, { 20 | max_new_tokens: 200, 21 | temperature: 0.5, 22 | }); 23 | console.log(output[0].generated_text[2].content); 24 | -------------------------------------------------------------------------------- /llama-cpp/example4.js: -------------------------------------------------------------------------------- 1 | import {fileURLToPath} from "url"; 2 | import path from "path"; 3 | import {getLlama, LlamaChatSession} from "node-llama-cpp"; 4 | 5 | 6 | // original path: /home/foyzul/Downloads/Llama-3.2-1B-Instruct-IQ3_M.gguf 7 | const __dirname = path.dirname(fileURLToPath(import.meta.url)); 8 | const modelPath = path.join(__dirname, "Llama-3.2-1B-Instruct-IQ3_M.gguf"); 9 | 10 | const llama = await getLlama(); 11 | const model = await llama.loadModel({ 12 | modelPath: modelPath, 13 | gpuLayers: 32, // Increase GPU usage 14 | contextSize: 2048, // Increase context size 15 | batchSize: 512, // Keep batch size moderate 16 | gpuVRAMLimit: 11 * 1024 * 1024 * 1024, // Set VRAM limit to 11GB, 17 | nGpuLayers: 32, // For CUDA. Set to 0 for CPU-only. 18 | useVulkan: false, // Explicitly disable Vulkan 19 | useCuda: true, // Enable CUDA. Set to false for CPU-only. 20 | }); 21 | const context = await model.createContext(); 22 | const session = new LlamaChatSession({ 23 | contextSequence: context.getSequence(), 24 | }); 25 | 26 | 27 | // const q1 = "Hi there, how are you?"; 28 | // console.log("User: " + q1); 29 | 30 | // const a1 = await session.prompt(q1); 31 | // console.log("AI: " + a1); 32 | 33 | 34 | // const q2 = "Summarize what you said"; 35 | // console.log("User: " + q2); 36 | 37 | // const a2 = await session.prompt(q2); 38 | // console.log("AI: " + a2); 39 | 40 | const question = "How do I become better at coding?"; 41 | console.log("User: " + question); 42 | 43 | const answer = await session.prompt(question); 44 | console.log("AI: " + answer); 45 | -------------------------------------------------------------------------------- /llama-cpp/example5.js: -------------------------------------------------------------------------------- 1 | // Import the llama-cpp package 2 | // const { Llama } = require('node-llama-cpp'); 3 | import { getLlama,LlamaChatSession } from 'node-llama-cpp'; 4 | import path from "path"; 5 | import {fileURLToPath} from "url"; 6 | 7 | // original path: /home/foyzul/Downloads/Llama-3.2-1B-Instruct-IQ3_M.gguf 8 | const __dirname = path.dirname(fileURLToPath(import.meta.url)); 9 | const modelPath = path.join(__dirname, "Llama-3.2-1B-Instruct-IQ3_M.gguf"); 10 | 11 | 12 | // Initialize the Llama model 13 | const llama = await getLlama({ 14 | gpu: "cuda", 15 | gpuLayers: 32, 16 | contextSize: 2048, 17 | batchSize: 512, 18 | gpuVRAMLimit: 11 * 1024 * 1024 * 1024, 19 | }); 20 | const model = await llama.loadModel({ 21 | modelPath: modelPath, 22 | }); 23 | const context = await model.createContext(); 24 | const session = new LlamaChatSession({ 25 | contextSequence: context.getSequence(), 26 | }); 27 | //console.log(model); 28 | 29 | // Function to tell a joke 30 | async function tellAJoke() { 31 | const prompt = "Tell me a joke."; 32 | 33 | try { 34 | const response = await session.prompt(prompt); 35 | console.log(response); 36 | } catch (error) { 37 | console.error("Error generating joke:", error); 38 | } 39 | } 40 | 41 | // Call the function 42 | for (let i = 0; i < 10; i++) { 43 | tellAJoke(); 44 | } 45 | -------------------------------------------------------------------------------- /load-tests/create_ollama_containers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Container names 4 | CONTAINER1="ollama1" 5 | CONTAINER2="ollama2" 6 | CONTAINER3="ollama3" 7 | CONTAINER4="ollama4" 8 | 9 | # sample command: docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama 10 | 11 | # Create and start first Ollama container 12 | echo "Starting first Ollama container..." 13 | docker run -d \ 14 | --gpus=all \ 15 | -v ollama:/root/.ollama \ 16 | --name $CONTAINER1 \ 17 | -p 11435:11434 \ 18 | ollama/ollama 19 | 20 | # Create and start second Ollama container 21 | echo "Starting second Ollama container..." 22 | docker run -d \ 23 | --gpus=all \ 24 | -v ollama:/root/.ollama \ 25 | --name $CONTAINER2 \ 26 | -p 11436:11434 \ 27 | ollama/ollama 28 | 29 | # Create and start third Ollama container 30 | echo "Starting third Ollama container..." 31 | docker run -d \ 32 | --gpus=all \ 33 | -v ollama:/root/.ollama \ 34 | --name $CONTAINER3 \ 35 | -p 11437:11434 \ 36 | ollama/ollama 37 | 38 | # Create and start fourth Ollama container 39 | echo "Starting fourth Ollama container..." 40 | docker run -d \ 41 | --gpus=all \ 42 | -v ollama:/root/.ollama \ 43 | --name $CONTAINER4 \ 44 | -p 11438:11434 \ 45 | ollama/ollama 46 | 47 | echo "Containers created successfully!" 48 | 49 | 50 | # How to run the script 51 | # chmod +x create_ollama_containers.sh 52 | # ./create_ollama_containers.sh 53 | -------------------------------------------------------------------------------- /load-tests/loadtest.js: -------------------------------------------------------------------------------- 1 | // const autocannon = require('autocannon'); 2 | import autocannon from 'autocannon'; 3 | import prompts from './prompts.js'; 4 | 5 | const run = async () => { 6 | const endpoint = 'http://localhost:11434/api/generate'; 7 | const model = 'qwen2.5-coder:3b'; 8 | const prompt = 'Explain how the internet works'; 9 | const instance = autocannon({ 10 | url: endpoint, 11 | method: 'POST', 12 | headers: { 13 | 'Content-Type': 'application/json', 14 | }, 15 | body: JSON.stringify({ 16 | model: model, 17 | prompt: prompt, 18 | stream: false 19 | }), 20 | connections: 5, // Number of concurrent connections 21 | duration: 15 // Duration of the test in seconds 22 | }); 23 | 24 | autocannon.track(instance, { renderProgressBar: true }); 25 | 26 | await instance; 27 | }; 28 | 29 | const runVllmOpenAI = async () => { 30 | const endpoint = 'http://192.168.4.28:8000/v1/chat/completions'; 31 | const model = 'OpenGVLab/InternVL2_5-1B'; 32 | const prompt = 'Explain how the internet works'; 33 | 34 | const instance = autocannon({ 35 | url: endpoint, 36 | method: 'POST', 37 | headers: { 38 | 'Content-Type': 'application/json', 39 | }, 40 | body: JSON.stringify({ 41 | model: model, 42 | messages: [ 43 | { 44 | role: "user", 45 | content: [ 46 | { 47 | type: "text", 48 | text: prompt 49 | } 50 | ] 51 | } 52 | ] 53 | }), 54 | connections: 5, // Number of concurrent connections 55 | duration: 15 // Duration of the test in seconds 56 | }); 57 | 58 | autocannon.track(instance, { renderProgressBar: true }); 59 | 60 | await instance; 61 | 62 | }; 63 | 64 | runVllmOpenAI(); 65 | // run(); 66 | -------------------------------------------------------------------------------- /load-tests/ollama-multi.js: -------------------------------------------------------------------------------- 1 | import { exec } from 'child_process'; 2 | import prompts from './prompts.js'; 3 | 4 | const ollama1 = { 5 | endpoint: 'http://localhost:11435/api/generate', 6 | model: 'qwen2.5:1.5b', 7 | }; 8 | 9 | const ollama2 = { 10 | endpoint: 'http://localhost:11436/api/generate', 11 | model: 'qwen2.5:1.5b', 12 | }; 13 | 14 | const ollama3 = { 15 | endpoint: 'http://localhost:11437/api/generate', 16 | model: 'qwen2.5:1.5b', 17 | }; 18 | 19 | const ollama4 = { 20 | endpoint: 'http://localhost:11438/api/generate', 21 | model: 'qwen2.5:1.5b', 22 | }; 23 | 24 | 25 | function sendCurlRequest({ 26 | endpoint, 27 | model, 28 | prompt, 29 | id 30 | }) { 31 | const start = new Date().getTime(); 32 | console.log(`Request ${id} sent at: ${new Date().toISOString()} to endpoint: ${endpoint}`); 33 | return new Promise((resolve, reject) => { 34 | exec(`curl --max-time 300 \ 35 | -X POST ${endpoint} \ 36 | -H "Content-Type: application/json" \ 37 | -d '{"model":"${model}","prompt":"${prompt}","stream":false}'`, (error, stdout, stderr) => { 38 | console.log(`Request ${id} received at: ${new Date().toISOString()}`, { prompt }); 39 | const diff = new Date().getTime() - start; 40 | console.log(`Request ${id} took ${diff} ms for token count: ${stdout.length}`); 41 | if (error) { 42 | reject(error); 43 | } 44 | resolve(stdout); 45 | }); 46 | }); 47 | } 48 | 49 | 50 | const requestPromises = []; 51 | const ollamaServers = [ollama1, ollama2, ollama3, ollama4]; 52 | const totalStart = new Date().getTime(); 53 | console.log('Sending requests', ollamaServers); 54 | for (let i = 0; i < prompts.length; i++) { 55 | const ollamaServer = ollamaServers[i % 4]; 56 | requestPromises.push(sendCurlRequest({ 57 | id: i + 1, 58 | prompt: prompts[i], 59 | ...ollamaServer 60 | })); 61 | } 62 | 63 | Promise.all(requestPromises) 64 | .then(responses => { 65 | responses.forEach((response, index) => { 66 | console.log('--------------------------------'); 67 | console.log(`Prompt ${index + 1}: ${prompts[index]}\n`); 68 | console.log(response); 69 | console.log('--------------------------------'); 70 | }); 71 | const totalDiff = new Date().getTime() - totalStart; 72 | const timeInSec = totalDiff / 1000; 73 | console.log(`Total time taken: ${timeInSec} s, total prompt handled ${prompts.length}, responses received: ${responses.length}`); 74 | }) 75 | .catch(error => console.error(error)); 76 | -------------------------------------------------------------------------------- /load-tests/prompts.js: -------------------------------------------------------------------------------- 1 | const prompts = [ 2 | "Generate a random joke.", 3 | "What is the weather like today?", 4 | "Summarize the plot of Inception.", 5 | "Explain quantum computing in simple terms.", 6 | "What is the capital of France?", 7 | "Give me a random motivational quote.", 8 | "Describe the process of photosynthesis.", 9 | "Who won the FIFA World Cup in 2018?", 10 | "What is the meaning of life?", 11 | "Explain blockchain technology in two sentences.", 12 | "Generate a Haiku about the ocean.", 13 | "What is the population of Japan?", 14 | "Tell me a fun fact about space.", 15 | "What are the principles of Object-Oriented Programming?", 16 | "Summarize the history of the Roman Empire.", 17 | "Explain how machine learning works.", 18 | "Who is Albert Einstein?", 19 | "How do airplanes fly?", 20 | "Write a Python script to reverse a string.", 21 | "What is Node.js used for?", 22 | "List three benefits of using cloud computing.", 23 | "What is the difference between HTTP and HTTPS?", 24 | "Define the term - microservices architecture.", 25 | "Write a SQL query to find duplicate rows in a table.", 26 | "Explain the difference between Docker and Kubernetes.", 27 | "What are the benefits of TypeScript over JavaScript?", 28 | "How does the Internet work?", 29 | "Describe how DNS works.", 30 | "Write a regex to validate an email address.", 31 | "What is the difference between REST and GraphQL?", 32 | "Explain the concept of event-driven architecture.", 33 | "What is serverless computing?", 34 | "Give me an example of a design pattern.", 35 | "How does OAuth2 work?", 36 | "What is the difference between synchronous and asynchronous programming?", 37 | "Explain how garbage collection works in JavaScript.", 38 | "Write a JavaScript function to calculate Fibonacci numbers.", 39 | "What is a distributed system?", 40 | "What are the best practices for API design?", 41 | "How does Load Balancing work?", 42 | "What is an IAM role in cloud computing?", 43 | "Explain the CAP theorem.", 44 | "What is CI/CD?", 45 | "What is the difference between a monolith and a microservice?", 46 | "How does caching improve performance?", 47 | "What are the key features of React.js?", 48 | "Explain the concept of eventual consistency.", 49 | "What is a Kubernetes pod?", 50 | "Write a Bash script to count the number of lines in a file.", 51 | "What are the key characteristics of a PWA?", 52 | "Define the term machine learning.", 53 | "Explain the difference between AI and machine learning.", 54 | "What is the Turing test?", 55 | "List the different HTTP status codes and their meanings.", 56 | "What are the advantages of cloud-native architecture?", 57 | "Explain the concept of immutable infrastructure.", 58 | "Write a JavaScript function to sort an array of numbers.", 59 | "What is the purpose of a firewall?", 60 | "How does SSL/TLS encryption work?", 61 | "Explain the concept of Infrastructure as Code (IaC).", 62 | "What are the key features of AWS Lambda?", 63 | "How does server-side rendering differ from client-side rendering?", 64 | "What are webhooks, and how are they used?", 65 | "Write a Python script to scrape a webpage.", 66 | "What are the pros and cons of microservices?", 67 | "Explain the role of a CDN in web development.", 68 | "What is an ETL pipeline?", 69 | "How does RESTful API design differ from RPC-style APIs?", 70 | "Explain Kubernetes StatefulSets and their use cases.", 71 | "Describe the anatomy of a Dockerfile.", 72 | "What is the concept of shared nothing architecture?", 73 | "Write a SQL query to find the second-highest salary in a table.", 74 | "What are the best practices for designing a scalable database?", 75 | "How does two-factor authentication (2FA) work?", 76 | "What is the difference between vertical scaling and horizontal scaling?", 77 | "How do message queues improve system performance?", 78 | "Write a JavaScript function to debounce an input field.", 79 | "What are some common security vulnerabilities in web applications?", 80 | "How does a graph database differ from a relational database?", 81 | "Explain the principles of the Twelve-Factor App.", 82 | "What is the difference between stateless and stateful applications?", 83 | "How does a reverse proxy server work?", 84 | "What is the concept of eventual consistency in distributed systems?", 85 | "Explain the role of sharding in database scaling.", 86 | "Write a JavaScript function to generate a random string.", 87 | "What is the significance of the OSI model in networking?", 88 | "What are the benefits of container orchestration?", 89 | "How does a webhook differ from an API endpoint?", 90 | "Explain the principles of Zero Trust security.", 91 | "What are the challenges of implementing distributed transactions?", 92 | "How does a Content Delivery Network (CDN) improve web performance?", 93 | "What is the difference between public, private, and hybrid cloud?", 94 | "Write a Python script to convert JSON to CSV.", 95 | "Explain the concept of a service mesh in microservices.", 96 | "What are the key features of GraphQL?", 97 | "How does WebSocket differ from HTTP?", 98 | "What is the purpose of the ACID properties in databases?", 99 | "Explain the concept of event sourcing in system design.", 100 | "How does a Load Balancer distribute traffic?", 101 | "What is an API gateway, and why is it important?", 102 | "Write a function in JavaScript to check if a string is a palindrome.", 103 | "What are the advantages of asynchronous programming?", 104 | "Explain the concept of polyglot persistence.", 105 | "What is the difference between synchronous and asynchronous APIs?", 106 | "How does CI/CD streamline the development process?", 107 | "What are the benefits of using OAuth for authentication?", 108 | "Write a SQL query to join two tables and filter results.", 109 | "What is a state machine, and where is it used?", 110 | "Explain the principles of Domain-Driven Design (DDD).", 111 | "What are the differences between monolithic and SOA architectures?", 112 | "How does distributed logging work in a microservices architecture?", 113 | "What is the significance of Kubernetes namespaces?", 114 | "Explain the concept of blue-green deployment.", 115 | "What are the benefits of using a GraphQL API?", 116 | "Write a script to generate random passwords.", 117 | "How does gRPC differ from REST?", 118 | "What is the difference between a VM and a container?", 119 | "Explain how caching strategies improve API performance.", 120 | "What are the benefits of edge computing?", 121 | "How does Kubernetes handle scaling?", 122 | "Write a JavaScript function to throttle API requests.", 123 | "What is the significance of service discovery in microservices?", 124 | "What are the challenges of scaling relational databases?", 125 | "Explain the concept of a circuit breaker in distributed systems.", 126 | "How does serverless architecture differ from traditional hosting?", 127 | "Write a Python script to parse XML data.", 128 | "What is the purpose of an index in a database?", 129 | "Explain the concept of Continuous Monitoring (CM).", 130 | "How do web sockets enable real-time communication?", 131 | "What is the difference between symmetric and asymmetric encryption?", 132 | "How does Docker handle networking between containers?", 133 | "Write a SQL query to create a new table with constraints.", 134 | "What is the purpose of a distributed cache?", 135 | "Explain the role of Helm in Kubernetes.", 136 | "What is the difference between synchronous and event-driven messaging?", 137 | "What is the significance of observability in modern applications?", 138 | "Write a JavaScript function to find duplicates in an array.", 139 | "What are the challenges of maintaining data consistency in distributed systems?", 140 | "Explain the concept of API rate limiting.", 141 | "How does DNS load balancing work?", 142 | "What is the importance of a Service Level Agreement (SLA)?", 143 | "How do feature flags enable incremental rollouts?", 144 | "Explain the principles of DevSecOps.", 145 | "What are the challenges of running stateful workloads in Kubernetes?", 146 | "Write a JavaScript function to flatten a nested array.", 147 | "What is a sidecar pattern in microservices?", 148 | "How do cloud providers implement shared responsibility models?", 149 | "What is the significance of event-driven architecture in modern applications?", 150 | "Explain the concept of a rolling update in Kubernetes." 151 | ]; 152 | 153 | export default prompts; 154 | -------------------------------------------------------------------------------- /load-tests/reports.md: -------------------------------------------------------------------------------- 1 | # Results 2 | 3 | ## OLLAMA 2 containers 4 | 5 | Total time taken: 154.534 s, total prompt handled 149, responses received: 149 6 | 7 | ## OLLAMA 4 containers 8 | 9 | Total time taken: 163.414 s, total prompt handled 149, responses received: 149 10 | 11 | 12 | ## VLLM 1 API server 13 | 14 | Total time taken: 22.537 s, total prompt handled 149, responses received: 149 15 | -------------------------------------------------------------------------------- /load-tests/run_ollama_containers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Container names 4 | CONTAINER1="ollama1" 5 | CONTAINER2="ollama2" 6 | CONTAINER3="ollama3" 7 | CONTAINER4="ollama4" 8 | 9 | # Run qwen model in first container 10 | echo "Starting qwen2.5 model in first container..." 11 | docker exec -d $CONTAINER1 ollama run qwen2.5:1.5b 12 | 13 | # Run qwen model in second container 14 | echo "Starting qwen2.5 model in second container..." 15 | docker exec -d $CONTAINER2 ollama run qwen2.5:1.5b 16 | 17 | # Run qwen model in third container 18 | echo "Starting qwen2.5 model in third container..." 19 | docker exec -d $CONTAINER3 ollama run qwen2.5:1.5b 20 | 21 | # Run qwen model in fourth container 22 | echo "Starting qwen2.5 model in fourth container..." 23 | docker exec -d $CONTAINER4 ollama run qwen2.5:1.5b 24 | 25 | echo "Models are now running in all containers! It will download approx 4GB of data in total (according to the model size * 4). Ensure models are downloaded in each container before running the load test." 26 | 27 | 28 | # How to run the script 29 | # chmod +x run_ollama_containers.sh 30 | # ./run_ollama_containers.sh 31 | # Check docker stats by running: `docker stats` to see if the containers are running 32 | 33 | 34 | # How to test the containers 35 | # Go inside of the container by running: `docker exec -it ollama1 /bin/bash` 36 | # Verify ollama is running the model by running: `ollama ps` 37 | # If you don't see the model running, you can run: `ollama run qwen2.5:1.5b` by yourself 38 | # If you want to run the load test, you can run: `node ollama-multi.js` 39 | -------------------------------------------------------------------------------- /load-tests/sendRequests.js: -------------------------------------------------------------------------------- 1 | import { exec } from 'child_process'; 2 | import prompts from './prompts.js'; 3 | 4 | const ollama = { 5 | endpoint: 'http://localhost:11434/api/generate', 6 | model: 'qwen2.5:1.5b', 7 | }; 8 | 9 | const vllmOpenAI = { 10 | endpoint: 'http://localhost:8000/v1/chat/completions', 11 | model: 'Qwen/Qwen2.5-3B-Instruct', 12 | maxTokens: 500 13 | }; 14 | 15 | const internVL = { 16 | endpoint: 'http://192.168.4.28:8000/v1/chat/completions', 17 | model: 'OpenGVLab/InternVL2_5-1B', 18 | maxTokens: 500 19 | }; 20 | 21 | function sendCurlRequest({ 22 | endpoint, 23 | model, 24 | maxTokens, 25 | prompt, 26 | id 27 | }) { 28 | const start = new Date().getTime(); 29 | console.log(`Request ${id} sent at: ${new Date().toISOString()}`); 30 | return new Promise((resolve, reject) => { 31 | exec(`curl --max-time 300 \ 32 | -X POST ${endpoint} \ 33 | -H "Content-Type: application/json" \ 34 | -d '{"model":"${model}","prompt":"${prompt}","stream":false}'`, (error, stdout, stderr) => { 35 | console.log(`Request ${id} received at: ${new Date().toISOString()}`, { prompt }); 36 | const diff = new Date().getTime() - start; 37 | console.log(`Request ${id} took ${diff} ms for token count: ${stdout.length}`); 38 | if (error) { 39 | reject(error); 40 | } 41 | resolve(stdout); 42 | }); 43 | }); 44 | } 45 | 46 | 47 | 48 | const requestPromises = []; 49 | const payload = ollama; 50 | const totalStart = new Date().getTime(); 51 | console.log('Sending requests', payload); 52 | for (let i = 0; i < prompts.length; i++) { 53 | requestPromises.push(sendCurlRequest({ 54 | id: i + 1, 55 | prompt: prompts[i], 56 | ...payload 57 | })); 58 | } 59 | 60 | Promise.all(requestPromises) 61 | .then(responses => { 62 | responses.forEach((response, index) => { 63 | console.log('--------------------------------'); 64 | console.log(`Prompt ${index + 1}: ${prompts[index]}\n`); 65 | console.log(response); 66 | console.log('--------------------------------'); 67 | }); 68 | const totalDiff = new Date().getTime() - totalStart; 69 | const timeInSec = totalDiff / 1000; 70 | console.log(`Total time taken: ${timeInSec} s, total prompt handled ${prompts.length}, responses received: ${responses.length}`); 71 | }) 72 | .catch(error => console.error(error)); 73 | -------------------------------------------------------------------------------- /load-tests/stop_ollama_containers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Container names 4 | CONTAINER1="ollama1" 5 | CONTAINER2="ollama2" 6 | CONTAINER3="ollama3" 7 | CONTAINER4="ollama4" 8 | 9 | # Stop containers 10 | echo "Stopping Ollama containers..." 11 | docker stop $CONTAINER1 $CONTAINER2 $CONTAINER3 $CONTAINER4 12 | 13 | echo "Containers stopped successfully!" 14 | 15 | 16 | # How to run the script 17 | # chmod +x stop_ollama_containers.sh 18 | # ./stop_ollama_containers.sh 19 | -------------------------------------------------------------------------------- /load-tests/vllm-commands.md: -------------------------------------------------------------------------------- 1 | ## VLLM 2 | 3 | ## running a model in vllm 4 | 5 | ``` 6 | vllm serve "Qwen/Qwen2.5-1.5B-Instruct" --port 8001 7 | ``` 8 | -------------------------------------------------------------------------------- /load-tests/vllm-single.js: -------------------------------------------------------------------------------- 1 | import { exec } from 'child_process'; 2 | import prompts from './prompts.js'; 3 | 4 | const vllmOpenAI = { 5 | endpoint: 'http://localhost:8001/v1/chat/completions', 6 | model: 'Qwen/Qwen2.5-1.5B-Instruct', 7 | maxTokens: 500 8 | }; 9 | 10 | 11 | function sendCurlRequest({ 12 | endpoint, 13 | model, 14 | prompt, 15 | id 16 | }) { 17 | const start = new Date().getTime(); 18 | console.log(`Request ${id} sent at: ${new Date().toISOString()}`); 19 | return new Promise((resolve, reject) => { 20 | exec(`curl --max-time 300 \ 21 | -X POST ${endpoint} \ 22 | -H "Content-Type: application/json" \ 23 | -d '{"model":"${model}","messages":[{"role":"user","content":[{"type":"text","text":"${prompt}"}]}],"stream":false}'`, (error, stdout, stderr) => { 24 | console.log(`Request ${id} received at: ${new Date().toISOString()}`, { prompt }); 25 | const diff = new Date().getTime() - start; 26 | console.log(`Request ${id} took ${diff} ms for token count: ${stdout.length}`); 27 | if (error) { 28 | reject(error); 29 | } 30 | resolve(stdout); 31 | }); 32 | }); 33 | } 34 | 35 | const requestPromises = []; 36 | const totalStart = new Date().getTime(); 37 | console.log('Sending requests', vllmOpenAI); 38 | for (let i = 0; i < prompts.length; i++) { 39 | requestPromises.push(sendCurlRequest({ 40 | id: i + 1, 41 | prompt: prompts[i], 42 | ...vllmOpenAI 43 | })); 44 | } 45 | 46 | Promise.all(requestPromises) 47 | .then(responses => { 48 | responses.forEach((response, index) => { 49 | console.log('--------------------------------'); 50 | console.log(`Prompt ${index + 1}: ${prompts[index]}\n`); 51 | console.log(response); 52 | console.log('--------------------------------'); 53 | }); 54 | const totalDiff = new Date().getTime() - totalStart; 55 | const timeInSec = totalDiff / 1000; 56 | console.log(`Total time taken: ${timeInSec} s, total prompt handled ${prompts.length}, responses received: ${responses.length}`); 57 | }) 58 | .catch(error => console.error(error)); 59 | -------------------------------------------------------------------------------- /ollama/example6.js: -------------------------------------------------------------------------------- 1 | // Import required modules 2 | import { Ollama } from 'ollama'; 3 | import fs from 'fs'; 4 | import path from 'path'; 5 | 6 | // Initialize the Ollama client 7 | const ollama = new Ollama({ 8 | host: 'http://localhost:11434', // default Ollama host 9 | }); 10 | 11 | // Specify the repository path 12 | const repoPath = '/home/foyzul/personal/public-repo/react/packages'; 13 | 14 | // Function to get all files with allowed extensions in the repository 15 | async function getJsFiles(dirPath, allowedExtensions = ['.js', '.jsx', '.ts', '.tsx']) { 16 | const ignoredDirs = ['tests', '__tests__', 'node_modules', 'dist', 'build', 'coverage']; 17 | const entries = await fs.promises.readdir(dirPath, { withFileTypes: true }); 18 | 19 | const files = entries 20 | .filter(entry => !entry.isDirectory()) 21 | .map(entry => path.join(dirPath, entry.name)); 22 | 23 | const folders = entries 24 | .filter(entry => entry.isDirectory()) 25 | .filter(folder => !ignoredDirs.includes(folder.name)); // Filter out ignored directories 26 | 27 | for (const folder of folders) { 28 | files.push(...await getJsFiles(path.join(dirPath, folder.name), allowedExtensions)); 29 | } 30 | 31 | return files.filter(file => allowedExtensions.includes(path.extname(file))); 32 | } 33 | 34 | const prompt = ` 35 | Provide a comprehensive analysis in the following format: 36 | 37 | ## Code Analysis Report 38 | 39 | ### 1. Code Quality 40 | - Purpose and functionality of the code and methods 41 | - Code organization and readability 42 | - Modularity and maintainability 43 | 44 | ### 2. Project Structure and Architecture 45 | - Component organization 46 | - Design patterns used 47 | - Dependencies and relationships 48 | 49 | ### 3. Recommendations 50 | - Key improvements needed 51 | - Best practices to implement 52 | - Priority action items 53 | 54 | ### 4. Final Recommendations: 55 | [Summarize key findings and provide actionable, high-level recommendations for improving the project.] 56 | `; 57 | 58 | const model = 'qwen2.5-coder:14b'; 59 | 60 | 61 | // Function to analyze a file and save the report 62 | async function analyzeFile(filePath) { 63 | try { 64 | const fileContent = await fs.promises.readFile(filePath, 'utf-8'); 65 | 66 | const response = await ollama.chat({ 67 | model: model, 68 | messages: [ 69 | { 70 | role: 'user', 71 | content: `${prompt}\n\n${fileContent}`, 72 | }, 73 | ], 74 | }); 75 | 76 | const reportContent = response.message.content; 77 | const reportFileName = `${path.basename(filePath, '.js')}.${model}.${new Date().getTime()}.md`; 78 | const reportFilePath = path.join(path.dirname(filePath), reportFileName); 79 | 80 | await fs.promises.writeFile(reportFilePath, reportContent, 'utf-8'); 81 | console.log(`Analysis report generated for ${filePath}`); 82 | } catch (error) { 83 | console.error(`Error analyzing ${filePath}:`, error); 84 | } 85 | } 86 | 87 | async function main() { 88 | try { 89 | const jsFiles = await getJsFiles(repoPath); 90 | 91 | for (const file of jsFiles) { 92 | await analyzeFile(file); 93 | } 94 | // await analyzeFile(jsFiles[0]); 95 | 96 | console.log('Analysis completed.'); 97 | } catch (error) { 98 | console.error('Error:', error); 99 | } 100 | } 101 | 102 | // Run the main function 103 | main(); 104 | -------------------------------------------------------------------------------- /ollama/example7.js: -------------------------------------------------------------------------------- 1 | // Import required modules 2 | import { Ollama } from 'ollama'; 3 | import fs from 'fs'; 4 | import path from 'path'; 5 | import parser from '@babel/parser'; 6 | import { default as traverse } from '@babel/traverse'; 7 | import { prompt } from './prompt.js'; 8 | 9 | const PARSER_OPTIONS = { 10 | sourceType: 'module', // or 'script' 11 | plugins: [ 12 | 'jsx', // if you are parsing JSX/React code 13 | 'typescript', // if it's TypeScript code 14 | 'decorators-legacy', // if decorators are in use 15 | // Add more plugins depending on your code 16 | ] 17 | }; 18 | 19 | // Initialize the Ollama client 20 | const ollama = new Ollama({ 21 | host: 'http://localhost:11434', // default Ollama host 22 | }); 23 | 24 | // Specify the repository path 25 | const repoPath = '/home/foyzul/personal/public-repo/react/packages'; 26 | 27 | // Function to get all files with allowed extensions in the repository 28 | async function getJsFiles(dirPath, allowedExtensions = ['.js', '.jsx', '.ts', '.tsx']) { 29 | const ignoredDirs = ['tests', '__tests__', 'node_modules', 'dist', 'build', 'coverage']; 30 | const entries = await fs.promises.readdir(dirPath, { withFileTypes: true }); 31 | 32 | const files = entries 33 | .filter(entry => !entry.isDirectory()) 34 | .map(entry => path.join(dirPath, entry.name)); 35 | 36 | const folders = entries 37 | .filter(entry => entry.isDirectory()) 38 | .filter(folder => !ignoredDirs.includes(folder.name)); // Filter out ignored directories 39 | 40 | for (const folder of folders) { 41 | files.push(...await getJsFiles(path.join(dirPath, folder.name), allowedExtensions)); 42 | } 43 | 44 | return files.filter(file => allowedExtensions.includes(path.extname(file))); 45 | } 46 | 47 | const model = 'qwen2.5-coder:14b'; 48 | 49 | function validateInput(inputFile) { 50 | if (!inputFile) { 51 | console.error('Please specify a file to parse. For example: node parseAndChunk.js ./src/app.js'); 52 | process.exit(1); 53 | } 54 | return fs.readFileSync(inputFile, 'utf8'); 55 | } 56 | 57 | function parseCode(code) { 58 | return parser.parse(code, PARSER_OPTIONS); 59 | } 60 | 61 | function extractFunctionFromNode(node, code, name) { 62 | const { loc } = node; 63 | const startLine = loc.start.line; 64 | const endLine = loc.end.line; 65 | const functionCode = code.split('\n').slice(startLine - 1, endLine).join('\n'); 66 | return { name, code: functionCode, startLine, endLine }; 67 | } 68 | 69 | function extractFunctions(ast, code) { 70 | const functions = []; 71 | const fn = traverse.default; 72 | fn(ast, { 73 | FunctionDeclaration(path) { 74 | const { id } = path.node; 75 | const name = id ? id.name : 'anonymous_function'; 76 | functions.push(extractFunctionFromNode(path.node, code, name)); 77 | }, 78 | VariableDeclaration(path) { 79 | path.node.declarations.forEach(decl => { 80 | if (decl.init && (decl.init.type === 'ArrowFunctionExpression' || decl.init.type === 'FunctionExpression')) { 81 | const name = decl.id && decl.id.name ? decl.id.name : 'anonymous_function_expr'; 82 | functions.push(extractFunctionFromNode(decl.init, code, name)); 83 | } 84 | }); 85 | } 86 | }); 87 | 88 | return functions; 89 | } 90 | 91 | function generatePrompt(prompt, chunks) { 92 | return ` 93 | ${prompt} 94 | \n\n\n 95 | Here are the code chunks: 96 | ${chunks.map((c, index) => `\n---\n**Name:** ${c.name}\n**Lines:** ${c.startLine}-${c.endLine}\n\`\`\`\n${c.code}\n\`\`\``).join('\n')} 97 | `; 98 | } 99 | 100 | function createLineBasedChunks(code, chunkSize = 50) { 101 | const lines = code.split('\n'); 102 | const chunks = []; 103 | 104 | for (let i = 0; i < lines.length; i += chunkSize) { 105 | const chunkLines = lines.slice(i, i + chunkSize); 106 | chunks.push({ 107 | name: `chunk_${i / chunkSize}`, 108 | code: chunkLines.join('\n'), 109 | startLine: i + 1, 110 | endLine: Math.min(i + chunkSize, lines.length) 111 | }); 112 | } 113 | 114 | return chunks; 115 | } 116 | 117 | 118 | // Function to analyze a file and save the report 119 | async function analyzeFile(filePath) { 120 | try { 121 | console.log(`Analyzing ${filePath}`); 122 | const reportFileName = `${path.basename(filePath)}.md`; 123 | const reportFilePath = path.join(path.dirname(filePath), reportFileName); 124 | console.log(reportFileName); 125 | if (fs.existsSync(reportFilePath)) { 126 | console.log(`Report already exists for ${filePath}`); 127 | return; 128 | } 129 | const code = validateInput(filePath); 130 | const ast = parseCode(code); 131 | const functions = extractFunctions(ast, code); 132 | 133 | const chunks = functions.length > 0 ? functions : createLineBasedChunks(code); 134 | const concatedPrompt = generatePrompt(prompt, chunks); 135 | console.log('chunks found:', chunks.length); 136 | const response = await ollama.chat({ 137 | model: model, 138 | messages: [ 139 | { 140 | role: 'user', 141 | content: concatedPrompt, 142 | }, 143 | ], 144 | }); 145 | 146 | const reportContent = response.message.content; 147 | 148 | await fs.promises.writeFile(reportFilePath, reportContent, 'utf-8'); 149 | console.log(`Analysis report generated for ${filePath}`); 150 | } catch (error) { 151 | console.error(`Error analyzing ${filePath}:`, error); 152 | } 153 | } 154 | 155 | async function main() { 156 | try { 157 | const jsFiles = await getJsFiles(repoPath); 158 | console.log('Total files:', jsFiles.length); 159 | let currentFile = 0; 160 | for (const file of jsFiles) { 161 | console.log(`Processing file ${currentFile + 1} of ${jsFiles.length}: ${file}`); 162 | await analyzeFile(file); 163 | currentFile++; 164 | console.log(`Processed ${currentFile} of ${jsFiles.length} files`); 165 | } 166 | // await analyzeFile(jsFiles[0]); 167 | 168 | console.log('Analysis completed.'); 169 | } catch (error) { 170 | console.error('Error:', error); 171 | } 172 | } 173 | 174 | // Run the main function 175 | main(); 176 | -------------------------------------------------------------------------------- /ollama/parseAndChunk.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import fs from 'fs'; 4 | import path from 'path'; 5 | import parser from '@babel/parser'; 6 | import { default as traverse } from '@babel/traverse'; 7 | import { Ollama } from 'ollama'; 8 | 9 | // Configure how you parse the code 10 | const PARSER_OPTIONS = { 11 | sourceType: 'module', // or 'script' 12 | plugins: [ 13 | 'jsx', // if you are parsing JSX/React code 14 | 'typescript', // if it's TypeScript code 15 | 'decorators-legacy', // if decorators are in use 16 | // Add more plugins depending on your code 17 | ] 18 | }; 19 | 20 | // New utility functions 21 | function validateInput(inputFile) { 22 | if (!inputFile) { 23 | console.error('Please specify a file to parse. For example: node parseAndChunk.js ./src/app.js'); 24 | process.exit(1); 25 | } 26 | return fs.readFileSync(inputFile, 'utf8'); 27 | } 28 | 29 | function parseCode(code) { 30 | return parser.parse(code, PARSER_OPTIONS); 31 | } 32 | 33 | function extractFunctionFromNode(node, code, name) { 34 | const { loc } = node; 35 | const startLine = loc.start.line; 36 | const endLine = loc.end.line; 37 | const functionCode = code.split('\n').slice(startLine - 1, endLine).join('\n'); 38 | return { name, code: functionCode, startLine, endLine }; 39 | } 40 | 41 | function extractFunctions(ast, code) { 42 | const functions = []; 43 | const fn = traverse.default; 44 | fn(ast, { 45 | FunctionDeclaration(path) { 46 | const { id } = path.node; 47 | const name = id ? id.name : 'anonymous_function'; 48 | functions.push(extractFunctionFromNode(path.node, code, name)); 49 | }, 50 | VariableDeclaration(path) { 51 | path.node.declarations.forEach(decl => { 52 | if (decl.init && (decl.init.type === 'ArrowFunctionExpression' || decl.init.type === 'FunctionExpression')) { 53 | const name = decl.id && decl.id.name ? decl.id.name : 'anonymous_function_expr'; 54 | functions.push(extractFunctionFromNode(decl.init, code, name)); 55 | } 56 | }); 57 | } 58 | }); 59 | 60 | return functions; 61 | } 62 | 63 | function createLineBasedChunks(code, chunkSize = 50) { 64 | const lines = code.split('\n'); 65 | const chunks = []; 66 | 67 | for (let i = 0; i < lines.length; i += chunkSize) { 68 | const chunkLines = lines.slice(i, i + chunkSize); 69 | chunks.push({ 70 | name: `chunk_${i / chunkSize}`, 71 | code: chunkLines.join('\n'), 72 | startLine: i + 1, 73 | endLine: Math.min(i + chunkSize, lines.length) 74 | }); 75 | } 76 | 77 | return chunks; 78 | } 79 | 80 | function generatePrompt(chunks, inputFile) { 81 | return ` 82 | You are given code chunks extracted from ${path.basename(inputFile)}. 83 | For each chunk, provide a summary of what it does, identify its inputs and outputs, 84 | and highlight any noteworthy patterns or potential issues. 85 | 86 | Please produce a structured explanation with the following format: 87 | 88 | **Function/Chunk Name:** 89 | **Summary:** 90 | **Inputs:** 91 | **Outputs:** 92 | **Key Observations:** 93 | 94 | Here are the code chunks: 95 | ${chunks.map((c, index) => `\n---\n**Name:** ${c.name}\n**Lines:** ${c.startLine}-${c.endLine}\n\`\`\`\n${c.code}\n\`\`\``).join('\n')} 96 | `; 97 | } 98 | 99 | function savePromptToFile(prompt, filename = 'analysis_report.md') { 100 | const outputPath = path.join(process.cwd(), filename); 101 | fs.writeFileSync(outputPath, prompt, 'utf8'); 102 | console.log(`Analysis prompt generated and saved to ${outputPath}`); 103 | } 104 | 105 | const ollama = new Ollama({ 106 | host: 'http://localhost:11434', // default Ollama host 107 | }); 108 | const model = 'qwen2.5-coder:14b'; 109 | 110 | 111 | // Add new function to call Ollama 112 | async function analyzeWithOllama(prompt) { 113 | try { 114 | const response = await ollama.chat({ 115 | model: model, 116 | messages: [ 117 | { 118 | role: 'user', 119 | content: `${prompt}`, 120 | }, 121 | ], 122 | }); 123 | 124 | console.log('Analysis from Ollama:'); 125 | console.log(response.message.content); 126 | 127 | // Optionally save the response to a file 128 | savePromptToFile(response.message.content, `ollama_analysis.${model}.${new Date().getTime()}.md`); 129 | 130 | } catch (error) { 131 | console.error('Error calling Ollama:', error); 132 | } 133 | } 134 | 135 | // Modify main function to be async 136 | async function main() { 137 | const inputFile = process.argv[2]; 138 | const code = validateInput(inputFile); 139 | const ast = parseCode(code); 140 | const functions = extractFunctions(ast, code); 141 | 142 | const chunks = functions.length > 0 ? functions : createLineBasedChunks(code); 143 | const prompt = generatePrompt(chunks, inputFile); 144 | 145 | // Call Ollama instead of just logging 146 | await analyzeWithOllama(prompt); 147 | } 148 | 149 | // Modify the execution to handle async main 150 | main().catch(console.error); 151 | -------------------------------------------------------------------------------- /ollama/prompt.js: -------------------------------------------------------------------------------- 1 | export const prompt = ` 2 | As a highly skilled code reviewer, please thoroughly examine each provided code chunk below and produce a well-structured Markdown report. Your analysis should be both high-level and detailed, providing real-world, actionable insights. If a category does not apply to a particular chunk, you may omit it. If after review you find no significant points to mention, provide a brief note indicating that the code meets expected standards. 3 | 4 | For **each code chunk**, provide the following: 5 | 6 | - **Summary (Core Functionality):** A brief explanation of what the code does, its purpose, and how it contributes to the overall functionality. 7 | - **Inputs:** Identify the parameters or data that this code consumes. 8 | - **Outputs:** Specify what the code returns, produces, modifies, or its observable side-effects. 9 | - **Key Observations (Covering Code Quality Assessment & Best Practices Alignment):** 10 | - Mention noteworthy patterns, logic flows, or design choices. 11 | - Highlight the degree of adherence to best practices or identify any missing enhancements. 12 | - Consider readability, maintainability, error handling, and performance aspects. 13 | - Note any security implications, testing considerations, or potential improvements. 14 | 15 | After analyzing each chunk, provide a section at the end of the report to address any broader points that apply to the codebase as a whole: 16 | 17 | 1. **Specific Issues** (If Applicable): 18 | - Anti-patterns or code smells 19 | - Potential bugs or runtime issues 20 | - Performance bottlenecks 21 | - Security vulnerabilities 22 | 23 | 2. **Actionable Recommendations** (If Applicable): 24 | - **Critical & Immediate:** Issues that need urgent fixes 25 | - **Important but Not Urgent:** Improvements for maintainability or clarity 26 | - **Optional Enhancements:** Future optimizations or design refinements 27 | 28 | **Formatting Guidelines:** 29 | - Start each code chunk's analysis with a header that includes the chunk name and lines. 30 | - Use Markdown headings, bullet points, and code fences as appropriate. 31 | - If you find a category not applicable, omit it entirely for that chunk. 32 | - If the code is generally solid with no issues, indicate that briefly. 33 | 34 | --- 35 | ## output format 36 | 37 | **Name:** \`fetchData\` 38 | **Lines:** \`{start_line}-{end_line}\` 39 | 40 | **Summary (Core Functionality):** 41 | *(LLM to fill)* 42 | 43 | **Inputs:** 44 | *(LLM to fill)* 45 | 46 | **Outputs:** 47 | *(LLM to fill)* 48 | 49 | **Key Observations (Quality & Best Practices):** 50 | *(LLM to fill)* 51 | 52 | --- 53 | 54 | *(Repeat the above structure for each subsequent chunk...)* 55 | 56 | --- 57 | 58 | **Specific Issues Across the Codebase (If Any):** 59 | *(LLM to fill)* 60 | 61 | **Actionable Recommendations:** 62 | - **Critical & Immediate:** *(LLM to fill if any)* 63 | - **Important but Not Urgent:** *(LLM to fill if any)* 64 | - **Optional Enhancements:** *(LLM to fill if any)* 65 | 66 | --- 67 | 68 | If no issues or improvements are found, simply state that the code meets expected standards.`; 69 | -------------------------------------------------------------------------------- /openai-js/dinner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foyzulkarim/llm-pocs/06152cc3724de22ba99f2a515254767270155ee9/openai-js/dinner.jpg -------------------------------------------------------------------------------- /openai-js/openai-vllm.js: -------------------------------------------------------------------------------- 1 | import OpenAI from 'openai'; 2 | import fs from 'fs'; 3 | import path from 'path'; 4 | 5 | 6 | const createVLLMClient = (baseURL = 'http://192.168.4.28:8000/v1') => { 7 | return new OpenAI({ 8 | baseURL, 9 | apiKey: 'dummy-key', // vLLM doesn't require real OpenAI key 10 | }); 11 | }; 12 | 13 | export async function generateCompletion(input, options = {}) { 14 | try { 15 | const vllm = createVLLMClient(); 16 | 17 | const defaultOptions = { 18 | model: 'OpenGVLab/InternVL2_5-1B', // example model, replace with your loaded model 19 | messages: [ 20 | { role: 'user', content: input } 21 | ], 22 | temperature: 0.7, 23 | }; 24 | 25 | const response = await vllm.chat.completions.create({ 26 | ...defaultOptions, 27 | ...options, 28 | }); 29 | 30 | return response.choices[0].message.content; 31 | } catch (error) { 32 | console.error('Error calling vLLM:', error); 33 | throw error; 34 | } 35 | } 36 | 37 | async function convertImageToBase64(filePath) { 38 | const imageData = fs.readFileSync(filePath); 39 | const base64String = imageData.toString('base64'); 40 | return base64String; 41 | } 42 | 43 | 44 | export async function generateCompletionWithImage(prompt, imagePath, options = {}) { 45 | try { 46 | const client = createVLLMClient(); 47 | 48 | if (!fs.existsSync(imagePath)) { 49 | throw new Error(`Image file does not exist at path: ${imagePath}`); 50 | } 51 | 52 | const base64Image = await convertImageToBase64(imagePath); 53 | 54 | const defaultOptions = { 55 | model: 'OpenGVLab/InternVL2_5-1B', // example model, replace with your loaded model 56 | messages: [ 57 | { 58 | role: 'user', 59 | content: [ 60 | { type: 'text', text: prompt }, 61 | { 62 | type: 'image_url', 63 | image_url: { 64 | url: `data:image/jpeg;base64,${base64Image}` 65 | } 66 | } 67 | ] 68 | } 69 | ], 70 | temperature: 0.7, 71 | }; 72 | 73 | const response = await client.chat.completions.create({ 74 | ...defaultOptions, 75 | ...options, 76 | }); 77 | 78 | return response.choices[0].message.content; 79 | } catch (error) { 80 | console.error('Error generating completion with image:', error.message); 81 | throw error; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /openai-js/openai.js: -------------------------------------------------------------------------------- 1 | import { generateCompletion, generateCompletionWithImage } from './openai-vllm.js'; 2 | 3 | // Example usage 4 | // const result = await generateCompletion('What is the capital of France?'); 5 | // console.log(result); 6 | 7 | const resultWithImage = await generateCompletionWithImage('What are the food items in the image?', './dinner.jpg'); 8 | console.log(resultWithImage); 9 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "llm-pocs", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "example1.js", 6 | "type": "module", 7 | "scripts": { 8 | "test": "echo \"Error: no test specified\" && exit 1", 9 | "loadtest": "node loadtest.js" 10 | }, 11 | "keywords": [], 12 | "author": "", 13 | "license": "ISC", 14 | "dependencies": { 15 | "@babel/parser": "^7.26.3", 16 | "@babel/traverse": "^7.26.4", 17 | "@huggingface/transformers": "^3.0.0-alpha.22", 18 | "@xenova/transformers": "^2.17.2", 19 | "node-llama-cpp": "^3.1.1", 20 | "ollama": "^0.5.11", 21 | "openai": "^4.78.0" 22 | }, 23 | "devDependencies": { 24 | "autocannon": "^7.8.0" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /scripts/copy-md.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set the source and destination directories 4 | src_dir="/home/foyzul/personal/public-repo/react/packages" 5 | dest_dir="$HOME/Documents/markdowns" 6 | 7 | # Find all markdown files in the source directory and its subdirectories 8 | find "$src_dir" -type f -name "*.md" -print0 | while IFS= read -r -d '' file; do 9 | # Get the relative path of the file from the source directory 10 | relative_path="${file#$src_dir/}" 11 | 12 | # Create the destination directory path 13 | dest_path="$dest_dir/${relative_path%/*}" 14 | 15 | # Create the destination directory if it doesn't exist 16 | mkdir -p "$dest_path" 17 | 18 | # Copy the markdown file to the destination directory 19 | cp "$file" "$dest_path" 20 | done 21 | -------------------------------------------------------------------------------- /tts/tts-kokoro.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import sys 4 | import numpy as np 5 | from datetime import datetime 6 | import subprocess 7 | 8 | # Clone the repository if it doesn't exist 9 | if not os.path.exists('Kokoro-82M'): 10 | print("Cloning Kokoro-82M repository...") 11 | os.system('git clone https://huggingface.co/hexgrad/Kokoro-82M') 12 | 13 | # Store the original directory and Kokoro path 14 | original_dir = os.getcwd() 15 | kokoro_path = os.path.abspath('Kokoro-82M') 16 | 17 | # Change to the Kokoro-82M directory and add it to Python path 18 | os.chdir(kokoro_path) 19 | sys.path.append(kokoro_path) 20 | 21 | try: 22 | from models import build_model 23 | from kokoro import generate 24 | except ImportError: 25 | print("Error: Couldn't import models. Please ensure the repository was cloned correctly.") 26 | print("Current directory:", os.getcwd()) 27 | print("Files in current directory:", os.listdir()) 28 | sys.exit(1) 29 | 30 | # Build the model and load the default voicepack 31 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 32 | MODEL = build_model('kokoro-v0_19.pth', device) 33 | 34 | # Select voice 35 | VOICE_NAME = [ 36 | 'af', # Default voice is a 50-50 mix of Bella & Sarah 37 | 'af_bella', 'af_sarah', 'am_adam', 'am_michael', 38 | 'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis', 39 | 'af_nicole', 'af_sky', 40 | ][0] 41 | 42 | VOICEPACK = torch.load(f'voices/{VOICE_NAME}.pt', weights_only=True).to(device) 43 | print(f'Loaded voice: {VOICE_NAME}') 44 | 45 | def split_text_into_sentences(text): 46 | # Split by periods, question marks, and exclamation marks 47 | # but keep the punctuation with the sentence 48 | import re 49 | sentences = re.split('(?<=[.!?])\s+', text) 50 | return [s.strip() for s in sentences if s.strip()] 51 | 52 | def generate_audio_files(model, text, voicepack, lang): 53 | sentences = split_text_into_sentences(text) 54 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 55 | 56 | # Create output directory with timestamp 57 | output_dir = f'output_{timestamp}' 58 | os.makedirs(output_dir, exist_ok=True) 59 | 60 | for i, sentence in enumerate(sentences): 61 | print(f"Processing sentence {i+1}/{len(sentences)}") 62 | audio, _ = generate(model, sentence, voicepack, lang=lang) 63 | 64 | # Save individual sentence audio 65 | filename = f'{output_dir}/sentence_{i+1:03d}.wav' 66 | import scipy.io.wavfile as wavfile 67 | wavfile.write(filename, 24000, audio) 68 | 69 | # Save the text content 70 | with open(f'{output_dir}/sentence_{i+1:03d}.txt', 'w') as tf: 71 | tf.write(sentence) 72 | 73 | return output_dir 74 | 75 | def merge_audio_files(output_dir): 76 | # Create a file list for ffmpeg 77 | file_list_path = os.path.join(output_dir, 'files.txt') 78 | 79 | # Get all wav files and sort them 80 | wav_files = sorted([f for f in os.listdir(output_dir) if f.startswith('sentence_') and f.endswith('.wav')]) 81 | 82 | # Create the files.txt for ffmpeg 83 | with open(file_list_path, 'w') as f: 84 | for wav_file in wav_files: 85 | f.write(f"file '{wav_file}'\n") 86 | 87 | # Get timestamp from directory name 88 | timestamp = output_dir.split('_')[1] 89 | merged_output = f'merged.wav' 90 | 91 | # Merge all files using ffmpeg 92 | ffmpeg_cmd = [ 93 | 'ffmpeg', 94 | '-f', 'concat', 95 | '-safe', '0', 96 | '-i', 'files.txt', 97 | '-c', 'copy', 98 | merged_output 99 | ] 100 | 101 | print("\nMerging audio files...") 102 | subprocess.run(ffmpeg_cmd, cwd=output_dir) 103 | print(f"Merged audio saved as: {output_dir}/{merged_output}") 104 | 105 | def read_transcript(file_path): 106 | try: 107 | with open(file_path, 'r', encoding='utf-8') as f: 108 | return f.read() 109 | except FileNotFoundError: 110 | print(f"Error: Could not find {file_path}") 111 | sys.exit(1) 112 | except Exception as e: 113 | print(f"Error reading file: {e}") 114 | sys.exit(1) 115 | 116 | # Load text from transcript.txt from the original directory 117 | transcript_path = os.path.join(original_dir, 'transcript.txt') 118 | print(f"Reading transcript from {transcript_path}...") 119 | text = read_transcript(transcript_path) 120 | 121 | print("Generating audio files... This may take a while for long texts.") 122 | output_dir = generate_audio_files(MODEL, text, VOICEPACK, VOICE_NAME[0]) 123 | print(f"Individual files have been saved in directory: '{output_dir}'") 124 | 125 | # Merge the files after all have been generated 126 | merge_audio_files(output_dir) 127 | -------------------------------------------------------------------------------- /vision/img-to-b64.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Image to Base64 and Analyze 6 | 32 | 33 | 34 | 35 |

Image to Base64 and Analyze

36 | 37 | 38 |
39 |

Preview:

40 | Image Preview 41 |
42 | 43 |
44 |

Base64 String:

45 | 46 |
47 | 48 | 49 | 50 |
51 |

Analysis Response:

52 | 53 |
54 | 55 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /xenova-transformers/example1.js: -------------------------------------------------------------------------------- 1 | import { pipeline } from '@xenova/transformers'; 2 | 3 | // Create a text-generation pipeline 4 | const generator = await pipeline( 5 | 'text-generation', 6 | 'Xenova/llama2.c-stories15M' 7 | ); 8 | 9 | const text = 'Once upon a time, during the era of Gilgamesh, a traveller was walking through the desert of the silk road. It was late afternoon and suddenly a sandstorm hit. The traveller had to find shelter. He saw a cave and went inside. The cave was dark and cold. The traveller was afraid but he found a light and a fire. He lit the fire and made a plan to survive the night.'; 10 | 11 | const output2 = await generator(text, { max_new_tokens: 150, temperature: 0.5 }); 12 | console.log(output2); 13 | -------------------------------------------------------------------------------- /xenova-transformers/example2.js: -------------------------------------------------------------------------------- 1 | // Example 2 2 | import { pipeline } from '@xenova/transformers'; 3 | 4 | // Create text-generation pipeline 5 | const generator = await pipeline('text-generation', 'Xenova/Qwen1.5-0.5B-Chat'); 6 | 7 | // Define the prompt and list of messages 8 | const prompt = 'Give me a short introduction to large language model.'; 9 | const messages = [ 10 | { role: 'system', content: 'You are a helpful assistant.' }, 11 | { role: 'user', content: prompt }, 12 | ]; 13 | 14 | // Apply chat template 15 | const text = generator.tokenizer.apply_chat_template(messages, { 16 | tokenize: false, 17 | add_generation_prompt: true, 18 | }); 19 | 20 | // Generate text 21 | const output = await generator(text, { 22 | max_new_tokens: 150, 23 | do_sample: false, 24 | return_full_text: false, 25 | }); 26 | console.log(JSON.stringify(output)); 27 | --------------------------------------------------------------------------------