├── docs ├── favicon.ico ├── logo192.png ├── logo512.png ├── weback.jpg ├── itsovermeme.png ├── robots.txt ├── manifest.json ├── asset-manifest.json ├── index.html └── static │ ├── js │ ├── main.ad865f2c.js.LICENSE.txt │ ├── 787.dccdf937.chunk.js │ └── 787.dccdf937.chunk.js.map │ └── css │ ├── main.ea4e714c.css.map │ └── main.ea4e714c.css ├── public ├── favicon.ico ├── logo192.png ├── logo512.png ├── weback.jpg ├── robots.txt ├── itsovermeme.png ├── manifest.json └── index.html ├── src ├── setupTests.js ├── App.test.js ├── reportWebVitals.js ├── index.css ├── index.js ├── App.css ├── textContent.js ├── textBox.js ├── gpu_config.json ├── cpu_config.json ├── logo.svg └── App.js ├── tailwind.config.js ├── .gitignore ├── package.json └── README.md /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0xSojalSec/gpu_poor/HEAD/docs/favicon.ico -------------------------------------------------------------------------------- /docs/logo192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0xSojalSec/gpu_poor/HEAD/docs/logo192.png -------------------------------------------------------------------------------- /docs/logo512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0xSojalSec/gpu_poor/HEAD/docs/logo512.png -------------------------------------------------------------------------------- /docs/weback.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0xSojalSec/gpu_poor/HEAD/docs/weback.jpg -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0xSojalSec/gpu_poor/HEAD/public/favicon.ico -------------------------------------------------------------------------------- /public/logo192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0xSojalSec/gpu_poor/HEAD/public/logo192.png -------------------------------------------------------------------------------- /public/logo512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0xSojalSec/gpu_poor/HEAD/public/logo512.png -------------------------------------------------------------------------------- /public/weback.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0xSojalSec/gpu_poor/HEAD/public/weback.jpg -------------------------------------------------------------------------------- /docs/itsovermeme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0xSojalSec/gpu_poor/HEAD/docs/itsovermeme.png -------------------------------------------------------------------------------- /docs/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /public/itsovermeme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0xSojalSec/gpu_poor/HEAD/public/itsovermeme.png -------------------------------------------------------------------------------- /src/setupTests.js: -------------------------------------------------------------------------------- 1 | // jest-dom adds custom jest matchers for asserting on DOM nodes. 2 | // allows you to do things like: 3 | // expect(element).toHaveTextContent(/react/i) 4 | // learn more: https://github.com/testing-library/jest-dom 5 | import '@testing-library/jest-dom'; 6 | -------------------------------------------------------------------------------- /src/App.test.js: -------------------------------------------------------------------------------- 1 | import { render, screen } from '@testing-library/react'; 2 | import App from './App'; 3 | 4 | test('renders learn react link', () => { 5 | render(); 6 | const linkElement = screen.getByText(/learn react/i); 7 | expect(linkElement).toBeInTheDocument(); 8 | }); 9 | -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: [ 4 | "./src/**/*.{js,jsx,ts,tsx}", 5 | ], 6 | theme: { 7 | extend: { 8 | fontFamily: { 9 | 'poppins': ['Poppins', 'sans-serif'] 10 | }, 11 | }, 12 | }, 13 | plugins: [], 14 | } 15 | 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | -------------------------------------------------------------------------------- /src/reportWebVitals.js: -------------------------------------------------------------------------------- 1 | const reportWebVitals = onPerfEntry => { 2 | if (onPerfEntry && onPerfEntry instanceof Function) { 3 | import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => { 4 | getCLS(onPerfEntry); 5 | getFID(onPerfEntry); 6 | getFCP(onPerfEntry); 7 | getLCP(onPerfEntry); 8 | getTTFB(onPerfEntry); 9 | }); 10 | } 11 | }; 12 | 13 | export default reportWebVitals; 14 | -------------------------------------------------------------------------------- /src/index.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | @import url("https://fonts.googleapis.com/css2?family=Poppins&display=swap"); 5 | body { 6 | margin: 0; 7 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 8 | 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', 9 | sans-serif; 10 | -webkit-font-smoothing: antialiased; 11 | -moz-osx-font-smoothing: grayscale; 12 | } 13 | 14 | code { 15 | font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', 16 | monospace; 17 | } 18 | -------------------------------------------------------------------------------- /docs/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "logo192.png", 12 | "type": "image/png", 13 | "sizes": "192x192" 14 | }, 15 | { 16 | "src": "logo512.png", 17 | "type": "image/png", 18 | "sizes": "512x512" 19 | } 20 | ], 21 | "start_url": ".", 22 | "display": "standalone", 23 | "theme_color": "#000000", 24 | "background_color": "#ffffff" 25 | } 26 | -------------------------------------------------------------------------------- /public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "logo192.png", 12 | "type": "image/png", 13 | "sizes": "192x192" 14 | }, 15 | { 16 | "src": "logo512.png", 17 | "type": "image/png", 18 | "sizes": "512x512" 19 | } 20 | ], 21 | "start_url": ".", 22 | "display": "standalone", 23 | "theme_color": "#000000", 24 | "background_color": "#ffffff" 25 | } 26 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom/client'; 3 | import './index.css'; 4 | import App from './App'; 5 | import reportWebVitals from './reportWebVitals'; 6 | 7 | const root = ReactDOM.createRoot(document.getElementById('root')); 8 | root.render( 9 | 10 | 11 | 12 | 13 | ); 14 | 15 | // If you want to start measuring performance in your app, pass a function 16 | // to log results (for example: reportWebVitals(console.log)) 17 | // or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals 18 | reportWebVitals(); 19 | -------------------------------------------------------------------------------- /docs/asset-manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": { 3 | "main.css": "/gpu_poor/static/css/main.ea4e714c.css", 4 | "main.js": "/gpu_poor/static/js/main.ad865f2c.js", 5 | "static/js/787.dccdf937.chunk.js": "/gpu_poor/static/js/787.dccdf937.chunk.js", 6 | "index.html": "/gpu_poor/index.html", 7 | "main.ea4e714c.css.map": "/gpu_poor/static/css/main.ea4e714c.css.map", 8 | "main.ad865f2c.js.map": "/gpu_poor/static/js/main.ad865f2c.js.map", 9 | "787.dccdf937.chunk.js.map": "/gpu_poor/static/js/787.dccdf937.chunk.js.map" 10 | }, 11 | "entrypoints": [ 12 | "static/css/main.ea4e714c.css", 13 | "static/js/main.ad865f2c.js" 14 | ] 15 | } -------------------------------------------------------------------------------- /src/App.css: -------------------------------------------------------------------------------- 1 | .App { 2 | text-align: center; 3 | } 4 | 5 | .App-logo { 6 | height: 40vmin; 7 | pointer-events: none; 8 | } 9 | 10 | @media (prefers-reduced-motion: no-preference) { 11 | .App-logo { 12 | animation: App-logo-spin infinite 20s linear; 13 | } 14 | } 15 | 16 | .App-header { 17 | background-color: #282c34; 18 | min-height: 100vh; 19 | display: flex; 20 | flex-direction: column; 21 | align-items: center; 22 | justify-content: center; 23 | font-size: calc(10px + 2vmin); 24 | color: white; 25 | } 26 | 27 | .App-link { 28 | color: #61dafb; 29 | } 30 | 31 | @keyframes App-logo-spin { 32 | from { 33 | transform: rotate(0deg); 34 | } 35 | to { 36 | transform: rotate(360deg); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/textContent.js: -------------------------------------------------------------------------------- 1 | // textContent.js 2 | const fullText = ` 3 | It was cold and there was snow on the ground and he rode past me and kept on goin'. Never said nothin'. He just rode on past and he had this blanket wrapped around him and he had his head down and when he rode past I seen he was carryin' fire in a horn the way people used to do and I could see the horn from the light inside of it. About the color of the moon. And in the dream I knew that he was goin' on ahead and he was fixin' to make a fire somewhere out there in all that dark and all that cold and I knew that whenever I got there he would be there. And then I woke up. 4 | `; 5 | 6 | 7 | // const words = fullText.split(/[\s,.;!?]+/) 8 | // console.log(words.length); 9 | 10 | export default fullText; -------------------------------------------------------------------------------- /src/textBox.js: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | 3 | function TextInput(props) { 4 | // const [value, setValue] = useState(''); // useState hook to manage the input value 5 | 6 | const fun = props.setValue; 7 | let disableStatus = false; 8 | if ('disableStatus' in props){ 9 | disableStatus = props.disableStatus 10 | } 11 | 12 | return ( 13 | { fun(e.target.value); 17 | if (typeof props.setChange === 'function') { 18 | props.setChange(true); 19 | } 20 | }} 21 | onKeyDown={props.handleKeyDown} 22 | placeholder={props.placeholder} 23 | disabled={disableStatus} 24 | /> 25 | ); 26 | } 27 | 28 | export default TextInput; 29 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | LLM check
-------------------------------------------------------------------------------- /src/gpu_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "rtx-3090": {"bandwidth": 936, "compute": 16.3, "memory": 24}, 3 | "rtx-4060": {"bandwidth": 288, "compute": 22.06, "memory": 16}, 4 | "rtx-4090": {"bandwidth": 1000, "compute": 82.58, "memory": 24}, 5 | "rtx-4080": {"bandwidth": 716, "compute": 63.9, "memory": 16}, 6 | "rtx-3060": {"bandwidth": 360, "compute": 13.0, "memory": 12}, 7 | "rtx-2060": {"bandwidth": 336, "compute": 13.0, "memory": 6}, 8 | "rtx-2070": {"bandwidth": 448, "compute": 15.0, "memory": 8}, 9 | "A-6000": {"bandwidth": 768, "compute": 38.71, "memory": 48}, 10 | "A-4000": {"bandwidth": 768, "compute": 19.17, "memory": 16}, 11 | "P-100": {"bandwidth": 1000, "compute": 19.05, "memory": 12}, 12 | "P-40": {"bandwidth": 364, "compute": 11.76, "memory": 24}, 13 | "A100-40GB": {"bandwidth": 1555, "compute": 19.5, "memory": 40}, 14 | "A100-80GB": {"bandwidth": 2039, "compute": 19.5, "memory": 80}, 15 | "H100-SXM5": {"bandwidth": 3350, "compute": 51.7, "memory": 80}, 16 | "H100-PCIe": {"bandwidth": 2000, "compute": 51.7, "memory": 80} 17 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gpu_mem", 3 | "version": "0.1.0", 4 | "homepage": "https://rahulschand.github.io/gpu_poor", 5 | "private": true, 6 | "dependencies": { 7 | "@testing-library/jest-dom": "^5.17.0", 8 | "@testing-library/react": "^13.4.0", 9 | "@testing-library/user-event": "^13.5.0", 10 | "react": "^18.2.0", 11 | "react-dom": "^18.2.0", 12 | "react-modal": "^3.16.1", 13 | "react-router-dom": "^6.16.0", 14 | "react-scripts": "^5.0.1", 15 | "tailwind": "^4.0.0", 16 | "web-vitals": "^2.1.4" 17 | }, 18 | "scripts": { 19 | "predeploy": "npm run build", 20 | "deploy": "gh-pages -d build", 21 | "start": "react-scripts start", 22 | "build": "react-scripts build", 23 | "test": "react-scripts test", 24 | "eject": "react-scripts eject" 25 | }, 26 | "eslintConfig": { 27 | "extends": [ 28 | "react-app", 29 | "react-app/jest" 30 | ] 31 | }, 32 | "browserslist": { 33 | "production": [ 34 | ">0.2%", 35 | "not dead", 36 | "not op_mini all" 37 | ], 38 | "development": [ 39 | "last 1 chrome version", 40 | "last 1 firefox version", 41 | "last 1 safari version" 42 | ] 43 | }, 44 | "devDependencies": { 45 | "gh-pages": "^6.0.0" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 17 | 18 | 27 | LLM check 28 | 29 | 36 | 37 | 38 | 39 |
40 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /src/cpu_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "3600x": { 3 | "Speed": 0.0, 4 | "Bus": "Dual", 5 | "Cores": 6, 6 | "ddr5": 0, 7 | "ddr4": 1, 8 | "speed_ddr4": 3200, 9 | "Memory": "DDR4", 10 | "Flops": 432 11 | }, 12 | "7950x": { 13 | "Speed": 5200, 14 | "Bus": "Dual", 15 | "ddr4": 0, 16 | "ddr5": 1, 17 | "speed_ddr4": 0.0, 18 | "Cores": 16, 19 | "Memory": "DDR5", 20 | "Flops": 970.0 21 | }, 22 | "12700H": { 23 | "Speed": 4800, 24 | "Bus": "Dual", 25 | "ddr4": 1, 26 | "ddr5": 1, 27 | "speed_ddr4": 3200.0, 28 | "Cores": 14, 29 | "Memory": "DDR5", 30 | "Flops": 2000.0 31 | }, 32 | "13900K": { 33 | "Speed": 5600, 34 | "Bus": "Dual", 35 | "Cores": 24, 36 | "ddr4": 1, 37 | "ddr5": 1, 38 | "speed_ddr4": 3200.0, 39 | "Memory": "DDR5", 40 | "Flops": 2000.0 41 | }, 42 | "13700K": { 43 | "Speed": 5600, 44 | "Bus": "Dual", 45 | "ddr4": 1, 46 | "ddr5": 1, 47 | "speed_ddr4": 3200.0, 48 | "Cores": 16, 49 | "Memory": "DDR5", 50 | "Flops": 2000.0 51 | }, 52 | "9900K": { 53 | "Speed": 0.0, 54 | "Bus": "Dual", 55 | "ddr4": 1, 56 | "ddr5": 0, 57 | "speed_ddr4": 2666, 58 | "Cores": 8, 59 | "Memory": "DDR5", 60 | "Flops": 2000.0 61 | }, 62 | "5900X" : { 63 | "Speed": 0.0, 64 | "Bus": "Dual", 65 | "ddr4": 1, 66 | "ddr5": 0, 67 | "speed_ddr4": 3200, 68 | "Cores": 12, 69 | "Memory": "DDR5", 70 | "Flops": 2000.0 71 | }, 72 | "5600X" : { 73 | "Speed": 0.0, 74 | "Bus": "Dual", 75 | "ddr4": 1, 76 | "ddr5": 0, 77 | "speed_ddr4": 3200, 78 | "Cores": 12, 79 | "Memory": "DDR5", 80 | "Flops": 2000.0 81 | }, 82 | "3990X" : { 83 | "Speed": 0.0, 84 | "Bus": "Quad", 85 | "ddr4": 1, 86 | "ddr5": 0, 87 | "speed_ddr4": 3200, 88 | "Cores": 64, 89 | "Memory": "DDR5", 90 | "Flops": 2000.0 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /docs/static/js/main.ad865f2c.js.LICENSE.txt: -------------------------------------------------------------------------------- 1 | /*! 2 | Copyright (c) 2015 Jed Watson. 3 | Based on code that is Copyright 2013-2015, Facebook, Inc. 4 | All rights reserved. 5 | */ 6 | 7 | /*! 8 | * Adapted from jQuery UI core 9 | * 10 | * http://jqueryui.com 11 | * 12 | * Copyright 2014 jQuery Foundation and other contributors 13 | * Released under the MIT license. 14 | * http://jquery.org/license 15 | * 16 | * http://api.jqueryui.com/category/ui-core/ 17 | */ 18 | 19 | /*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */ 20 | 21 | /** 22 | * @license React 23 | * react-dom.production.min.js 24 | * 25 | * Copyright (c) Facebook, Inc. and its affiliates. 26 | * 27 | * This source code is licensed under the MIT license found in the 28 | * LICENSE file in the root directory of this source tree. 29 | */ 30 | 31 | /** 32 | * @license React 33 | * react-jsx-runtime.production.min.js 34 | * 35 | * Copyright (c) Facebook, Inc. and its affiliates. 36 | * 37 | * This source code is licensed under the MIT license found in the 38 | * LICENSE file in the root directory of this source tree. 39 | */ 40 | 41 | /** 42 | * @license React 43 | * react.production.min.js 44 | * 45 | * Copyright (c) Facebook, Inc. and its affiliates. 46 | * 47 | * This source code is licensed under the MIT license found in the 48 | * LICENSE file in the root directory of this source tree. 49 | */ 50 | 51 | /** 52 | * @license React 53 | * scheduler.production.min.js 54 | * 55 | * Copyright (c) Facebook, Inc. and its affiliates. 56 | * 57 | * This source code is licensed under the MIT license found in the 58 | * LICENSE file in the root directory of this source tree. 59 | */ 60 | 61 | //! Backward pass *2 62 | 63 | //! Can't train full with QLoRA 64 | 65 | //! Cost due to bnb 66 | 67 | //! If I have bnb4 or bnb8 selected then put a disclaimer that it doesn't work 68 | 69 | //! In total training, we will have to move the weights back to GPU for update, so its 2x more + update all so 1.5x (approx) more. Total 3x 70 | 71 | //! Per token Time calculation 72 | 73 | //! Prompt Time Calculation 74 | 75 | //! Train 76 | 77 | //! Training is most of the time compute bound 78 | 79 | //!Inference 80 | -------------------------------------------------------------------------------- /src/logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/static/js/787.dccdf937.chunk.js: -------------------------------------------------------------------------------- 1 | "use strict";(self.webpackChunkgpu_mem=self.webpackChunkgpu_mem||[]).push([[787],{787:function(e,t,n){n.r(t),n.d(t,{getCLS:function(){return y},getFCP:function(){return g},getFID:function(){return C},getLCP:function(){return P},getTTFB:function(){return D}});var i,r,a,o,u=function(e,t){return{name:e,value:void 0===t?-1:t,delta:0,entries:[],id:"v2-".concat(Date.now(),"-").concat(Math.floor(8999999999999*Math.random())+1e12)}},c=function(e,t){try{if(PerformanceObserver.supportedEntryTypes.includes(e)){if("first-input"===e&&!("PerformanceEventTiming"in self))return;var n=new PerformanceObserver((function(e){return e.getEntries().map(t)}));return n.observe({type:e,buffered:!0}),n}}catch(e){}},f=function(e,t){var n=function n(i){"pagehide"!==i.type&&"hidden"!==document.visibilityState||(e(i),t&&(removeEventListener("visibilitychange",n,!0),removeEventListener("pagehide",n,!0)))};addEventListener("visibilitychange",n,!0),addEventListener("pagehide",n,!0)},s=function(e){addEventListener("pageshow",(function(t){t.persisted&&e(t)}),!0)},m=function(e,t,n){var i;return function(r){t.value>=0&&(r||n)&&(t.delta=t.value-(i||0),(t.delta||void 0===i)&&(i=t.value,e(t)))}},v=-1,p=function(){return"hidden"===document.visibilityState?0:1/0},d=function(){f((function(e){var t=e.timeStamp;v=t}),!0)},l=function(){return v<0&&(v=p(),d(),s((function(){setTimeout((function(){v=p(),d()}),0)}))),{get firstHiddenTime(){return v}}},g=function(e,t){var n,i=l(),r=u("FCP"),a=function(e){"first-contentful-paint"===e.name&&(f&&f.disconnect(),e.startTime-1&&e(t)},r=u("CLS",0),a=0,o=[],v=function(e){if(!e.hadRecentInput){var t=o[0],i=o[o.length-1];a&&e.startTime-i.startTime<1e3&&e.startTime-t.startTime<5e3?(a+=e.value,o.push(e)):(a=e.value,o=[e]),a>r.value&&(r.value=a,r.entries=o,n())}},p=c("layout-shift",v);p&&(n=m(i,r,t),f((function(){p.takeRecords().map(v),n(!0)})),s((function(){a=0,T=-1,r=u("CLS",0),n=m(i,r,t)})))},E={passive:!0,capture:!0},w=new Date,L=function(e,t){i||(i=t,r=e,a=new Date,F(removeEventListener),S())},S=function(){if(r>=0&&r1e12?new Date:performance.now())-e.timeStamp;"pointerdown"==e.type?function(e,t){var n=function(){L(e,t),r()},i=function(){r()},r=function(){removeEventListener("pointerup",n,E),removeEventListener("pointercancel",i,E)};addEventListener("pointerup",n,E),addEventListener("pointercancel",i,E)}(t,e):L(t,e)}},F=function(e){["mousedown","keydown","touchstart","pointerdown"].forEach((function(t){return e(t,b,E)}))},C=function(e,t){var n,a=l(),v=u("FID"),p=function(e){e.startTimeperformance.now())return;n.entries=[t],e(n)}catch(e){}},"complete"===document.readyState?setTimeout(t,0):addEventListener("load",(function(){return setTimeout(t,0)}))}}}]); 2 | //# sourceMappingURL=787.dccdf937.chunk.js.map -------------------------------------------------------------------------------- /docs/static/css/main.ea4e714c.css.map: -------------------------------------------------------------------------------- 1 | {"version":3,"file":"static/css/main.ea4e714c.css","mappings":";AAAA;;CAAc,CAAd,uCAAc,CAAd,qBAAc,CAAd,8BAAc,CAAd,kCAAc,CAAd,oCAAc,CAAd,4BAAc,CAAd,gMAAc,CAAd,8BAAc,CAAd,eAAc,CAAd,UAAc,CAAd,wBAAc,CAAd,uBAAc,CAAd,aAAc,CAAd,QAAc,CAAd,4DAAc,CAAd,gCAAc,CAAd,mCAAc,CAAd,mBAAc,CAAd,eAAc,CAAd,uBAAc,CAAd,2BAAc,CAAd,qHAAc,CAAd,aAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,aAAc,CAAd,iBAAc,CAAd,sBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,8BAAc,CAAd,oBAAc,CAAd,aAAc,CAAd,2EAAc,CAAd,6BAAc,CAAd,aAAc,CAAd,mBAAc,CAAd,cAAc,CAAd,+BAAc,CAAd,mBAAc,CAAd,mBAAc,CAAd,QAAc,CAAd,SAAc,CAAd,iCAAc,CAAd,yEAAc,CAAd,wBAAc,CAAd,qBAAc,CAAd,4BAAc,CAAd,gCAAc,CAAd,+BAAc,CAAd,mEAAc,CAAd,0CAAc,CAAd,mBAAc,CAAd,mDAAc,CAAd,sDAAc,CAAd,YAAc,CAAd,yBAAc,CAAd,2DAAc,CAAd,iBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,QAAc,CAAd,SAAc,CAAd,gBAAc,CAAd,wBAAc,CAAd,kFAAc,CAAd,SAAc,CAAd,sDAAc,CAAd,SAAc,CAAd,mCAAc,CAAd,wBAAc,CAAd,4DAAc,CAAd,qBAAc,CAAd,qBAAc,CAAd,cAAc,CAAd,qBAAc,CAAd,wCAAc,CAAd,uBAAc,CAAd,kBAAc,CAAd,kBAAc,CAAd,aAAc,CAAd,aAAc,CAAd,aAAc,CAAd,cAAc,CAAd,cAAc,CAAd,YAAc,CAAd,YAAc,CAAd,iBAAc,CAAd,qCAAc,CAAd,6BAAc,CAAd,4BAAc,CAAd,2BAAc,CAAd,cAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,iBAAc,CAAd,0BAAc,CAAd,2BAAc,CAAd,mCAAc,CAAd,iCAAc,CAAd,0BAAc,CAAd,qBAAc,CAAd,6BAAc,CAAd,WAAc,CAAd,iBAAc,CAAd,eAAc,CAAd,gBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,eAAc,CAAd,YAAc,CAAd,kBAAc,CAAd,oBAAc,CAAd,0BAAc,CAAd,wBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,wBAAc,CAAd,qBAAc,CAAd,0CAAc,CAAd,uBAAc,CAAd,kBAAc,CAAd,kBAAc,CAAd,aAAc,CAAd,aAAc,CAAd,aAAc,CAAd,cAAc,CAAd,cAAc,CAAd,YAAc,CAAd,YAAc,CAAd,iBAAc,CAAd,qCAAc,CAAd,6BAAc,CAAd,4BAAc,CAAd,2BAAc,CAAd,cAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,iBAAc,CAAd,0BAAc,CAAd,2BAAc,CAAd,mCAAc,CAAd,iCAAc,CAAd,0BAAc,CAAd,qBAAc,CAAd,6BAAc,CAAd,WAAc,CAAd,iBAAc,CAAd,eAAc,CAAd,gBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,eAAc,CAAd,YAAc,CAAd,kBAAc,CAAd,oBAAc,CAAd,0BAAc,CAAd,wBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,wBAAc,CAAd,qBAAc,CAAd,kCAAc,CAAd,uBAAc,CAAd,kBAAc,CAAd,kBAAc,CAAd,aAAc,CAAd,aAAc,CAAd,aAAc,CAAd,cAAc,CAAd,cAAc,CAAd,YAAc,CAAd,YAAc,CAAd,iBAAc,CAAd,qCAAc,CAAd,6BAAc,CAAd,4BAAc,CAAd,2BAAc,CAAd,cAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,iBAAc,CAAd,0BAAc,CAAd,2BAAc,CAAd,mCAAc,CAAd,iCAAc,CAAd,0BAAc,CAAd,qBAAc,CAAd,6BAAc,CAAd,WAAc,CAAd,iBAAc,CAAd,eAAc,CAAd,gBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,eAAc,CAAd,YAAc,CAAd,kBAAc,CAAd,oBAAc,CAAd,0BAAc,CAAd,wBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,wBAAc,CAAd,qBAAc,CAEd,qBAAmB,CAAnB,gBAAmB,CAAnB,uBAAmB,CAAnB,wBAAmB,CAAnB,sBAAmB,CAAnB,yBAAmB,CAAnB,wBAAmB,CAAnB,sBAAmB,CAAnB,uBAAmB,CAAnB,sBAAmB,CAAnB,sBAAmB,CAAnB,qBAAmB,CAAnB,kCAAmB,CAAnB,kBAAmB,CAAnB,oBAAmB,CAAnB,oBAAmB,CAAnB,kBAAmB,CAAnB,kBAAmB,CAAnB,gBAAmB,CAAnB,iBAAmB,CAAnB,iBAAmB,CAAnB,iBAAmB,CAAnB,eAAmB,CAAnB,0BAAmB,CAAnB,yBAAmB,CAAnB,4BAAmB,CAAnB,yCAAmB,CAAnB,gNAAmB,CAAnB,6LAAmB,CAAnB,8BAAmB,CAAnB,+BAAmB,CAAnB,4BAAmB,CAAnB,+BAAmB,CAAnB,oCAAmB,CAAnB,sCAAmB,CAAnB,+DAAmB,CAAnB,oHAAmB,CAAnB,gCAAmB,CAAnB,qCAAmB,CAAnB,6BAAmB,CAAnB,+BAAmB,CAAnB,wBAAmB,CAAnB,0BAAmB,CAAnB,iCAAmB,CAAnB,gCAAmB,CAAnB,mCAAmB,CAAnB,gDAAmB,CAAnB,sCAAmB,CAAnB,qDAAmB,CAAnB,sCAAmB,CAAnB,sDAAmB,CAAnB,sCAAmB,CAAnB,sDAAmB,CAAnB,sCAAmB,CAAnB,sDAAmB,CAAnB,sCAAmB,CAAnB,mDAAmB,CAAnB,uCAAmB,CAAnB,oDAAmB,CAAnB,qCAAmB,CAAnB,oDAAmB,CAAnB,2BAAmB,CAAnB,gDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,6BAAmB,CAAnB,sDAAmB,CAAnB,8BAAmB,CAAnB,qDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,+BAAmB,CAAnB,sDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,6BAAmB,CAAnB,sDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,6BAAmB,CAAnB,sDAAmB,CAAnB,gCAAmB,CAAnB,sDAAmB,CAAnB,+BAAmB,CAAnB,sDAAmB,CAAnB,2BAAmB,CAAnB,sDAAmB,CAAnB,kCAAmB,CAAnB,kBAAmB,CAAnB,mBAAmB,CAAnB,iBAAmB,CAAnB,oBAAmB,CAAnB,eAAmB,CAAnB,6BAAmB,CAAnB,qBAAmB,CAAnB,yBAAmB,CAAnB,oBAAmB,CAAnB,wBAAmB,CAAnB,mBAAmB,CAAnB,uBAAmB,CAAnB,kBAAmB,CAAnB,8CAAmB,CAAnB,4CAAmB,CAAnB,2BAAmB,CAAnB,0BAAmB,CAAnB,wBAAmB,CAAnB,yBAAmB,CAAnB,uBAAmB,CAAnB,0BAAmB,CAAnB,yBAAmB,CAAnB,yBAAmB,CAAnB,0BAAmB,CAAnB,wBAAmB,CAAnB,0BAAmB,CAAnB,wBAAmB,CAAnB,uBAAmB,CAAnB,wBAAmB,CAAnB,sBAAmB,CAAnB,sBAAmB,CAAnB,8BAAmB,CAAnB,8GAAmB,CAAnB,4CAAmB,CAAnB,2MAAmB,CAAnB,4EAAmB,CAAnB,0BAAmB,CAAnB,gBAAmB,CAAnB,yBAAmB,CAAnB,kBAAmB,CAAnB,2BAAmB,CAAnB,mBAAmB,CAAnB,0BAAmB,CAAnB,mBAAmB,CAAnB,yBAAmB,CAAnB,gBAAmB,CAAnB,0BAAmB,CAAnB,+BAAmB,CAAnB,8BAAmB,CAAnB,+BAAmB,CAAnB,uCAAmB,CAAnB,kCAAmB,CAAnB,4CAAmB,CAAnB,kCAAmB,CAAnB,2CAAmB,CAAnB,kCAAmB,CAAnB,2CAAmB,CAAnB,kCAAmB,CAAnB,0CAAmB,CAAnB,mCAAmB,CAAnB,2CAAmB,CAAnB,iCAAmB,CAAnB,2CAAmB,CAAnB,iCAAmB,CAAnB,2CAAmB,CAAnB,iCAAmB,CAAnB,2CAAmB,CAAnB,+BAAmB,CAAnB,6CAAmB,CAAnB,yCAAmB,CAAnB,qFAAmB,CAAnB,kGAAmB,CAAnB,+CAAmB,CAAnB,kGAAmB,CAAnB,gMAAmB,CAAnB,gLAAmB,CAAnB,oFAAmB,CAAnB,6BAAmB,CAAnB,+CAAmB,CAAnB,kDAAmB,CAAnB,qCAAmB,CAEnB,KAKE,kCAAmC,CACnC,iCAAkC,CAJlC,mIAEY,CAHZ,QAMF,CAEA,KACE,uEAEF,CAhBA,wCAiBA,CAjBA,gBAiBA,CAjBA,qMAiBA,CAjBA,6LAiBA,CAjBA,mDAiBA,CAjBA,uCAiBA,CAjBA,gDAiBA,CAjBA,gDAiBA,CAjBA,2CAiBA,CAjBA,sDAiBA,CAjBA,2CAiBA,CAjBA,sDAiBA,CAjBA,2CAiBA,CAjBA,sDAiBA,CAjBA,4CAiBA,CAjBA,sDAiBA,CAjBA,0CAiBA,CAjBA,sDAiBA,CAjBA,0CAiBA,CAjBA,sDAiBA,CAjBA,yCAiBA,CAjBA,mBAiBA,CAjBA,uCAiBA,CAjBA,4CAiBA,CAjBA,8CAiBA,EAjBA,sDAiBA","sources":["index.css"],"sourcesContent":["@tailwind base;\n@tailwind components;\n@tailwind utilities;\n@import url(\"https://fonts.googleapis.com/css2?family=Poppins&display=swap\");\nbody {\n margin: 0;\n font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',\n 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',\n sans-serif;\n -webkit-font-smoothing: antialiased;\n -moz-osx-font-smoothing: grayscale;\n}\n\ncode {\n font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',\n monospace;\n}\n"],"names":[],"sourceRoot":""} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Can my GPU run this LLM? & at what token/s? 2 | 3 | ![Made with](https://img.shields.io/badge/logo-javascript-blue?logo=javascript) 4 | 5 | Calculates how much **GPU memory you need** and how much **token/s you can get** for any LLM & GPU/CPU. 6 | 7 | Also breakdown of where it goes for training/inference with quantization (GGML/bitsandbytes/QLoRA) & inference frameworks (vLLM/llama.cpp/HF) supported 8 | 9 | Link: **https://rahulschand.github.io/gpu_poor/** 10 | 11 | ### Demo 12 | 13 | ![new_upload](https://github.com/RahulSChand/gpu_poor/assets/16897807/14250f55-e886-4cc6-9aeb-08532382860c) 14 | 15 | 16 | --- 17 | 18 | ## Use cases/Features 19 | 20 | #### 1. Calculate vRAM memory requirement 💾 21 | 22 | image 23 | 24 | --- 25 | 26 | #### 2. Calculate ~token/s you can get ⏱️ 27 | 28 | image 29 | 30 | --- 31 | 32 | #### 3. Approximate time for finetuning (ms per iteration) ⌛️ 33 | 34 | image 35 | 36 | --- 37 | 38 | For memory, output is total vRAM & its breakdown. It looks like below 39 | 40 | ``` 41 | { 42 | "Total": 4000, 43 | "KV Cache": 1000, 44 | "Model Size": 2000, 45 | "Activation Memory": 500, 46 | "Grad & Optimizer memory": 0, 47 | "cuda + other overhead": 500 48 | } 49 | ``` 50 | 51 | For token/s, additional info looks like below 52 | 53 | ``` 54 | { 55 | "Token per second": 50, 56 | "ms per token": 20, 57 | "Prompt process time (s)": 5 s, 58 | "memory or compute bound?": Memory, 59 | } 60 | ``` 61 | 62 | For training, output is time for each forward pass (in ms) 63 | 64 | ``` 65 | { 66 | "ms per iteration (forward + backward)": 100, 67 | "memory or compute bound?": Memory, 68 | } 69 | ``` 70 | 71 | --- 72 | 73 | 74 | ### Purpose 75 | 76 | made this to check if you can run a particular LLM on your GPU. Useful to figure out the following 77 | 78 | 1. How much token/s can I get? 79 | 2. How much total time to finetune? 80 | 3. What quantization will fit on my GPU? 81 | 4. Max context length & batch-size my GPU can handle? 82 | 5. Which finetuning? Full? LoRA? QLoRA? 83 | 6. What is consuming my GPU memory? What to change to fit the LLM on GPU? 84 | 85 | --- 86 | 87 | ## Additional info + FAQ 88 | 89 | 90 | ### Can't we just look at the model size & figure this out? 91 | 92 | Finding which LLMs your GPU can handle isn't as easy as looking at the model size because during inference (KV cache) takes susbtantial amount of memory. For example, with sequence length 1000 on llama-2-7b it takes 1GB of extra memory (using hugginface LlamaForCausalLM, with exLlama & vLLM this is 500MB). And during training both KV cache & activations & quantization overhead take a lot of memory. For example, llama-7b with bnb int8 quant is of size ~7.5GB but it isn't possible to finetune it using LoRA on data with 1000 context length even with RTX 4090 24 GB. Which means an additional 16GB memory goes into quant overheads, activations & grad memory. 93 | 94 | 95 | ### How reliable are the numbers? 96 | The results can vary depending on your model, input data, cuda version & what quant you are using & it is impossible to predict exact values. I have tried to take these into account & make sure the results are within 500MB. Below table I cross-check 3b,7b & 13b model memories given by the website vs. what what I get on my RTX 4090 & 2060 GPUs. All values are within 500MB. 97 | 98 | image 99 | 100 | 101 | ### How are the values calculated? 102 | 103 | `Total memory = model size + kv-cache + activation memory + optimizer/grad memory + cuda etc. overhead` 104 | 1. **Model size** = this is your `.bin` file size (divide it by 2 if Q8 quant & by 4 if Q4 quant). 105 | 2. **KV-Cache** = Memory taken by KV (key-value) vectors. Size = `(2 x sequence length x hidden size)` _per layer_. For huggingface this `(2 x 2 x sequence length x hidden size)` _per layer_. In training the whole sequence is processed at once (therefore KV cache memory = 0) 106 | 3. **Activation Memory** = In forward pass every operation's output has to be stored for doing `.backward()`. For example if you do `output = Q * input` where `Q = (dim, dim)` and `input = (batch, seq, dim)` then output of shape `(batch, seq, dim)` will need to be stored (in fp16). This consumes the most memory in LoRA/QLoRA. In LLMs there are many such intermediate steps (after Q,K,V and after attention, after norm, after FFN1, FFN2, FFN3, after skip layer ....) Around 15 intermediate representations are saved _per layer_. 107 | 4. **Optimizer/Grad memory** = Memory taken by `.grad` tensors & tensors associated with the optimizer (`running avg` etc.) 108 | 5. **Cuda etc. overhead** = Around 500-1GB memory is taken by CUDA whenever cuda is loaded. Also there are additional overheads when you use any quantization (like bitsandbytes). There is not straightforward formula here (I assume 650 MB overhead in my calculations for cuda overhead) 109 | 110 | 111 | ### Why are the results wrong? 112 | Sometimes the answers might be very wrong in which case please open an issue here & I will try to fix it. 113 | 114 | 115 | --- 116 | 117 | ### TODO 118 | 1. Add support for vLLM for token/s 119 | 2. ~Add QLora~ ✅ 120 | 3. ~Add way to measure approximste tokens/s you can get for a particular GPU~ ✅ 121 | 4. ~Improve logic to get hyper-params from size~ (since hidden layer/intermediate size/number of layers can vary for a particular size) ✅ 122 | 5. Add AWQ 123 | -------------------------------------------------------------------------------- /docs/static/js/787.dccdf937.chunk.js.map: -------------------------------------------------------------------------------- 1 | {"version":3,"file":"static/js/787.dccdf937.chunk.js","mappings":"mQAAA,IAAIA,EAAEC,EAAEC,EAAEC,EAAEC,EAAE,SAASJ,EAAEC,GAAG,MAAM,CAACI,KAAKL,EAAEM,WAAM,IAASL,GAAG,EAAEA,EAAEM,MAAM,EAAEC,QAAQ,GAAGC,GAAG,MAAMC,OAAOC,KAAKC,MAAM,KAAKF,OAAOG,KAAKC,MAAM,cAAcD,KAAKE,UAAU,MAAM,EAAEC,EAAE,SAAShB,EAAEC,GAAG,IAAI,GAAGgB,oBAAoBC,oBAAoBC,SAASnB,GAAG,CAAC,GAAG,gBAAgBA,KAAK,2BAA2BoB,MAAM,OAAO,IAAIlB,EAAE,IAAIe,qBAAqB,SAASjB,GAAG,OAAOA,EAAEqB,aAAaC,IAAIrB,EAAE,IAAI,OAAOC,EAAEqB,QAAQ,CAACC,KAAKxB,EAAEyB,UAAS,IAAKvB,CAAC,CAAC,CAAC,MAAMF,GAAG,CAAC,EAAE0B,EAAE,SAAS1B,EAAEC,GAAG,IAAIC,EAAE,SAASA,EAAEC,GAAG,aAAaA,EAAEqB,MAAM,WAAWG,SAASC,kBAAkB5B,EAAEG,GAAGF,IAAI4B,oBAAoB,mBAAmB3B,GAAE,GAAI2B,oBAAoB,WAAW3B,GAAE,IAAK,EAAE4B,iBAAiB,mBAAmB5B,GAAE,GAAI4B,iBAAiB,WAAW5B,GAAE,EAAG,EAAE6B,EAAE,SAAS/B,GAAG8B,iBAAiB,YAAY,SAAS7B,GAAGA,EAAE+B,WAAWhC,EAAEC,EAAE,IAAG,EAAG,EAAEgC,EAAE,SAASjC,EAAEC,EAAEC,GAAG,IAAIC,EAAE,OAAO,SAASC,GAAGH,EAAEK,OAAO,IAAIF,GAAGF,KAAKD,EAAEM,MAAMN,EAAEK,OAAOH,GAAG,IAAIF,EAAEM,YAAO,IAASJ,KAAKA,EAAEF,EAAEK,MAAMN,EAAEC,IAAI,CAAC,EAAEiC,GAAG,EAAEC,EAAE,WAAW,MAAM,WAAWR,SAASC,gBAAgB,EAAE,GAAG,EAAEQ,EAAE,WAAWV,GAAG,SAAS1B,GAAG,IAAIC,EAAED,EAAEqC,UAAUH,EAAEjC,CAAC,IAAG,EAAG,EAAEqC,EAAE,WAAW,OAAOJ,EAAE,IAAIA,EAAEC,IAAIC,IAAIL,GAAG,WAAWQ,YAAY,WAAWL,EAAEC,IAAIC,GAAG,GAAG,EAAE,KAAK,CAAC,mBAAII,GAAkB,OAAON,CAAC,EAAE,EAAEO,EAAE,SAASzC,EAAEC,GAAG,IAAIC,EAAEC,EAAEmC,IAAIZ,EAAEtB,EAAE,OAAO8B,EAAE,SAASlC,GAAG,2BAA2BA,EAAEK,OAAO+B,GAAGA,EAAEM,aAAa1C,EAAE2C,UAAUxC,EAAEqC,kBAAkBd,EAAEpB,MAAMN,EAAE2C,UAAUjB,EAAElB,QAAQoC,KAAK5C,GAAGE,GAAE,IAAK,EAAEiC,EAAEU,OAAOC,aAAaA,YAAYC,kBAAkBD,YAAYC,iBAAiB,0BAA0B,GAAGX,EAAED,EAAE,KAAKnB,EAAE,QAAQkB,IAAIC,GAAGC,KAAKlC,EAAE+B,EAAEjC,EAAE0B,EAAEzB,GAAGkC,GAAGD,EAAEC,GAAGJ,GAAG,SAAS5B,GAAGuB,EAAEtB,EAAE,OAAOF,EAAE+B,EAAEjC,EAAE0B,EAAEzB,GAAG+C,uBAAuB,WAAWA,uBAAuB,WAAWtB,EAAEpB,MAAMwC,YAAYlC,MAAMT,EAAEkC,UAAUnC,GAAE,EAAG,GAAG,GAAG,IAAI,EAAE+C,GAAE,EAAGC,GAAG,EAAEC,EAAE,SAASnD,EAAEC,GAAGgD,IAAIR,GAAG,SAASzC,GAAGkD,EAAElD,EAAEM,KAAK,IAAI2C,GAAE,GAAI,IAAI/C,EAAEC,EAAE,SAASF,GAAGiD,GAAG,GAAGlD,EAAEC,EAAE,EAAEiC,EAAE9B,EAAE,MAAM,GAAG+B,EAAE,EAAEC,EAAE,GAAGE,EAAE,SAAStC,GAAG,IAAIA,EAAEoD,eAAe,CAAC,IAAInD,EAAEmC,EAAE,GAAGjC,EAAEiC,EAAEA,EAAEiB,OAAO,GAAGlB,GAAGnC,EAAE2C,UAAUxC,EAAEwC,UAAU,KAAK3C,EAAE2C,UAAU1C,EAAE0C,UAAU,KAAKR,GAAGnC,EAAEM,MAAM8B,EAAEQ,KAAK5C,KAAKmC,EAAEnC,EAAEM,MAAM8B,EAAE,CAACpC,IAAImC,EAAED,EAAE5B,QAAQ4B,EAAE5B,MAAM6B,EAAED,EAAE1B,QAAQ4B,EAAElC,IAAI,CAAC,EAAEiD,EAAEnC,EAAE,eAAesB,GAAGa,IAAIjD,EAAE+B,EAAE9B,EAAE+B,EAAEjC,GAAGyB,GAAG,WAAWyB,EAAEG,cAAchC,IAAIgB,GAAGpC,GAAE,EAAG,IAAI6B,GAAG,WAAWI,EAAE,EAAEe,GAAG,EAAEhB,EAAE9B,EAAE,MAAM,GAAGF,EAAE+B,EAAE9B,EAAE+B,EAAEjC,EAAE,IAAI,EAAEsD,EAAE,CAACC,SAAQ,EAAGC,SAAQ,GAAIC,EAAE,IAAI/C,KAAKgD,EAAE,SAASxD,EAAEC,GAAGJ,IAAIA,EAAEI,EAAEH,EAAEE,EAAED,EAAE,IAAIS,KAAKiD,EAAE/B,qBAAqBgC,IAAI,EAAEA,EAAE,WAAW,GAAG5D,GAAG,GAAGA,EAAEC,EAAEwD,EAAE,CAAC,IAAItD,EAAE,CAAC0D,UAAU,cAAczD,KAAKL,EAAEwB,KAAKuC,OAAO/D,EAAE+D,OAAOC,WAAWhE,EAAEgE,WAAWrB,UAAU3C,EAAEqC,UAAU4B,gBAAgBjE,EAAEqC,UAAUpC,GAAGE,EAAE+D,SAAS,SAASlE,GAAGA,EAAEI,EAAE,IAAID,EAAE,EAAE,CAAC,EAAEgE,EAAE,SAASnE,GAAG,GAAGA,EAAEgE,WAAW,CAAC,IAAI/D,GAAGD,EAAEqC,UAAU,KAAK,IAAI1B,KAAKmC,YAAYlC,OAAOZ,EAAEqC,UAAU,eAAerC,EAAEwB,KAAK,SAASxB,EAAEC,GAAG,IAAIC,EAAE,WAAWyD,EAAE3D,EAAEC,GAAGG,GAAG,EAAED,EAAE,WAAWC,GAAG,EAAEA,EAAE,WAAWyB,oBAAoB,YAAY3B,EAAEqD,GAAG1B,oBAAoB,gBAAgB1B,EAAEoD,EAAE,EAAEzB,iBAAiB,YAAY5B,EAAEqD,GAAGzB,iBAAiB,gBAAgB3B,EAAEoD,EAAE,CAAhO,CAAkOtD,EAAED,GAAG2D,EAAE1D,EAAED,EAAE,CAAC,EAAE4D,EAAE,SAAS5D,GAAG,CAAC,YAAY,UAAU,aAAa,eAAekE,SAAS,SAASjE,GAAG,OAAOD,EAAEC,EAAEkE,EAAEZ,EAAE,GAAG,EAAEa,EAAE,SAASlE,EAAEgC,GAAG,IAAIC,EAAEC,EAAEE,IAAIG,EAAErC,EAAE,OAAO6C,EAAE,SAASjD,GAAGA,EAAE2C,UAAUP,EAAEI,kBAAkBC,EAAEnC,MAAMN,EAAEiE,gBAAgBjE,EAAE2C,UAAUF,EAAEjC,QAAQoC,KAAK5C,GAAGmC,GAAE,GAAI,EAAEe,EAAElC,EAAE,cAAciC,GAAGd,EAAEF,EAAE/B,EAAEuC,EAAEP,GAAGgB,GAAGxB,GAAG,WAAWwB,EAAEI,cAAchC,IAAI2B,GAAGC,EAAER,YAAY,IAAG,GAAIQ,GAAGnB,GAAG,WAAW,IAAIf,EAAEyB,EAAErC,EAAE,OAAO+B,EAAEF,EAAE/B,EAAEuC,EAAEP,GAAG/B,EAAE,GAAGF,GAAG,EAAED,EAAE,KAAK4D,EAAE9B,kBAAkBd,EAAEiC,EAAE9C,EAAEyC,KAAK5B,GAAG6C,GAAG,GAAG,EAAEQ,EAAE,CAAC,EAAEC,EAAE,SAAStE,EAAEC,GAAG,IAAIC,EAAEC,EAAEmC,IAAIJ,EAAE9B,EAAE,OAAO+B,EAAE,SAASnC,GAAG,IAAIC,EAAED,EAAE2C,UAAU1C,EAAEE,EAAEqC,kBAAkBN,EAAE5B,MAAML,EAAEiC,EAAE1B,QAAQoC,KAAK5C,GAAGE,IAAI,EAAEkC,EAAEpB,EAAE,2BAA2BmB,GAAG,GAAGC,EAAE,CAAClC,EAAE+B,EAAEjC,EAAEkC,EAAEjC,GAAG,IAAIwC,EAAE,WAAW4B,EAAEnC,EAAEzB,MAAM2B,EAAEkB,cAAchC,IAAIa,GAAGC,EAAEM,aAAa2B,EAAEnC,EAAEzB,KAAI,EAAGP,GAAE,GAAI,EAAE,CAAC,UAAU,SAASgE,SAAS,SAASlE,GAAG8B,iBAAiB9B,EAAEyC,EAAE,CAAC8B,MAAK,EAAGd,SAAQ,GAAI,IAAI/B,EAAEe,GAAE,GAAIV,GAAG,SAAS5B,GAAG+B,EAAE9B,EAAE,OAAOF,EAAE+B,EAAEjC,EAAEkC,EAAEjC,GAAG+C,uBAAuB,WAAWA,uBAAuB,WAAWd,EAAE5B,MAAMwC,YAAYlC,MAAMT,EAAEkC,UAAUgC,EAAEnC,EAAEzB,KAAI,EAAGP,GAAE,EAAG,GAAG,GAAG,GAAG,CAAC,EAAEsE,EAAE,SAASxE,GAAG,IAAIC,EAAEC,EAAEE,EAAE,QAAQH,EAAE,WAAW,IAAI,IAAIA,EAAE6C,YAAY2B,iBAAiB,cAAc,IAAI,WAAW,IAAIzE,EAAE8C,YAAY4B,OAAOzE,EAAE,CAAC6D,UAAU,aAAanB,UAAU,GAAG,IAAI,IAAIzC,KAAKF,EAAE,oBAAoBE,GAAG,WAAWA,IAAID,EAAEC,GAAGW,KAAK8D,IAAI3E,EAAEE,GAAGF,EAAE4E,gBAAgB,IAAI,OAAO3E,CAAC,CAAjL,GAAqL,GAAGC,EAAEI,MAAMJ,EAAEK,MAAMN,EAAE4E,cAAc3E,EAAEI,MAAM,GAAGJ,EAAEI,MAAMwC,YAAYlC,MAAM,OAAOV,EAAEM,QAAQ,CAACP,GAAGD,EAAEE,EAAE,CAAC,MAAMF,GAAG,CAAC,EAAE,aAAa2B,SAASmD,WAAWvC,WAAWtC,EAAE,GAAG6B,iBAAiB,QAAQ,WAAW,OAAOS,WAAWtC,EAAE,EAAE,GAAG,C","sources":["../node_modules/web-vitals/dist/web-vitals.js"],"sourcesContent":["var e,t,n,i,r=function(e,t){return{name:e,value:void 0===t?-1:t,delta:0,entries:[],id:\"v2-\".concat(Date.now(),\"-\").concat(Math.floor(8999999999999*Math.random())+1e12)}},a=function(e,t){try{if(PerformanceObserver.supportedEntryTypes.includes(e)){if(\"first-input\"===e&&!(\"PerformanceEventTiming\"in self))return;var n=new PerformanceObserver((function(e){return e.getEntries().map(t)}));return n.observe({type:e,buffered:!0}),n}}catch(e){}},o=function(e,t){var n=function n(i){\"pagehide\"!==i.type&&\"hidden\"!==document.visibilityState||(e(i),t&&(removeEventListener(\"visibilitychange\",n,!0),removeEventListener(\"pagehide\",n,!0)))};addEventListener(\"visibilitychange\",n,!0),addEventListener(\"pagehide\",n,!0)},u=function(e){addEventListener(\"pageshow\",(function(t){t.persisted&&e(t)}),!0)},c=function(e,t,n){var i;return function(r){t.value>=0&&(r||n)&&(t.delta=t.value-(i||0),(t.delta||void 0===i)&&(i=t.value,e(t)))}},f=-1,s=function(){return\"hidden\"===document.visibilityState?0:1/0},m=function(){o((function(e){var t=e.timeStamp;f=t}),!0)},v=function(){return f<0&&(f=s(),m(),u((function(){setTimeout((function(){f=s(),m()}),0)}))),{get firstHiddenTime(){return f}}},d=function(e,t){var n,i=v(),o=r(\"FCP\"),f=function(e){\"first-contentful-paint\"===e.name&&(m&&m.disconnect(),e.startTime-1&&e(t)},f=r(\"CLS\",0),s=0,m=[],v=function(e){if(!e.hadRecentInput){var t=m[0],i=m[m.length-1];s&&e.startTime-i.startTime<1e3&&e.startTime-t.startTime<5e3?(s+=e.value,m.push(e)):(s=e.value,m=[e]),s>f.value&&(f.value=s,f.entries=m,n())}},h=a(\"layout-shift\",v);h&&(n=c(i,f,t),o((function(){h.takeRecords().map(v),n(!0)})),u((function(){s=0,l=-1,f=r(\"CLS\",0),n=c(i,f,t)})))},T={passive:!0,capture:!0},y=new Date,g=function(i,r){e||(e=r,t=i,n=new Date,w(removeEventListener),E())},E=function(){if(t>=0&&t1e12?new Date:performance.now())-e.timeStamp;\"pointerdown\"==e.type?function(e,t){var n=function(){g(e,t),r()},i=function(){r()},r=function(){removeEventListener(\"pointerup\",n,T),removeEventListener(\"pointercancel\",i,T)};addEventListener(\"pointerup\",n,T),addEventListener(\"pointercancel\",i,T)}(t,e):g(t,e)}},w=function(e){[\"mousedown\",\"keydown\",\"touchstart\",\"pointerdown\"].forEach((function(t){return e(t,S,T)}))},L=function(n,f){var s,m=v(),d=r(\"FID\"),p=function(e){e.startTimeperformance.now())return;n.entries=[t],e(n)}catch(e){}},\"complete\"===document.readyState?setTimeout(t,0):addEventListener(\"load\",(function(){return setTimeout(t,0)}))};export{h as getCLS,d as getFCP,L as getFID,F as getLCP,P as getTTFB};\n"],"names":["e","t","n","i","r","name","value","delta","entries","id","concat","Date","now","Math","floor","random","a","PerformanceObserver","supportedEntryTypes","includes","self","getEntries","map","observe","type","buffered","o","document","visibilityState","removeEventListener","addEventListener","u","persisted","c","f","s","m","timeStamp","v","setTimeout","firstHiddenTime","d","disconnect","startTime","push","window","performance","getEntriesByName","requestAnimationFrame","p","l","h","hadRecentInput","length","takeRecords","T","passive","capture","y","g","w","E","entryType","target","cancelable","processingStart","forEach","S","L","b","F","once","P","getEntriesByType","timing","max","navigationStart","responseStart","readyState"],"sourceRoot":""} -------------------------------------------------------------------------------- /docs/static/css/main.ea4e714c.css: -------------------------------------------------------------------------------- 1 | @import url(https://fonts.googleapis.com/css2?family=Poppins&display=swap); 2 | /* 3 | ! tailwindcss v3.3.3 | MIT License | https://tailwindcss.com 4 | */*,:after,:before{border:0 solid #e5e7eb;box-sizing:border-box}:after,:before{--tw-content:""}html{-webkit-text-size-adjust:100%;-webkit-font-feature-settings:normal;font-feature-settings:normal;font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;font-variation-settings:normal;line-height:1.5;tab-size:4}body{line-height:inherit}hr{border-top-width:1px;color:inherit;height:0}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,pre,samp{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:initial}sub{bottom:-.25em}sup{top:-.5em}table{border-collapse:collapse;border-color:inherit;text-indent:0}button,input,optgroup,select,textarea{-webkit-font-feature-settings:inherit;font-feature-settings:inherit;color:inherit;font-family:inherit;font-size:100%;font-variation-settings:inherit;font-weight:inherit;line-height:inherit;margin:0;padding:0}button,select{text-transform:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button;background-color:initial;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:initial}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dd,dl,figure,h1,h2,h3,h4,h5,h6,hr,p,pre{margin:0}fieldset{margin:0}fieldset,legend{padding:0}menu,ol,ul{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#9ca3af;opacity:1}input::placeholder,textarea::placeholder{color:#9ca3af;opacity:1}[role=button],button{cursor:pointer}:disabled{cursor:default}audio,canvas,embed,iframe,img,object,svg,video{display:block;vertical-align:middle}img,video{height:auto;max-width:100%}[hidden]{display:none}*,:after,:before{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: }::-webkit-backdrop{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: }::backdrop{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: }.fixed{position:fixed}.inset-0{inset:0}.ml-2{margin-left:.5rem}.ml-3{margin-left:.75rem}.ml-4{margin-left:1rem}.ml-5{margin-left:1.25rem}.ml-6{margin-left:1.5rem}.ml-8{margin-left:2rem}.mt-1{margin-top:.25rem}.mt-2{margin-top:.5rem}.mt-24{margin-top:6rem}.mt-4{margin-top:1rem}.inline-block{display:inline-block}.flex{display:flex}.table{display:table}.hidden{display:none}.h-1{height:.25rem}.w-10{width:2.5rem}.w-12{width:3rem}.w-3\/4{width:75%}.w-48{width:12rem}.w-64{width:16rem}.w-8{width:2rem}.max-w-2xl{max-width:42rem}.max-w-xl{max-width:36rem}.flex-shrink-0{flex-shrink:0}.border-collapse{border-collapse:collapse}.transform{-webkit-transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.cursor-pointer{cursor:pointer}.list-disc{list-style-type:disc}.flex-row{flex-direction:row}.flex-col{flex-direction:column}.content-center{align-content:center}.justify-center{justify-content:center}.divide-y>:not([hidden])~:not([hidden]){--tw-divide-y-reverse:0;border-bottom-width:calc(1px*var(--tw-divide-y-reverse));border-top-width:calc(1px*(1 - var(--tw-divide-y-reverse)))}.overflow-hidden{overflow:hidden}.whitespace-normal{white-space:normal}.rounded{border-radius:.25rem}.rounded-lg{border-radius:.5rem}.border{border-width:1px}.border-2{border-width:2px}.border-l-2{border-left-width:2px}.border-r-0{border-right-width:0}.border-black{--tw-border-opacity:1;border-color:rgb(0 0 0/var(--tw-border-opacity))}.border-blue-500{--tw-border-opacity:1;border-color:rgb(59 130 246/var(--tw-border-opacity))}.border-gray-300{--tw-border-opacity:1;border-color:rgb(209 213 219/var(--tw-border-opacity))}.border-gray-400{--tw-border-opacity:1;border-color:rgb(156 163 175/var(--tw-border-opacity))}.border-gray-500{--tw-border-opacity:1;border-color:rgb(107 114 128/var(--tw-border-opacity))}.border-gray-600{--tw-border-opacity:1;border-color:rgb(75 85 99/var(--tw-border-opacity))}.border-green-500{--tw-border-opacity:1;border-color:rgb(34 197 94/var(--tw-border-opacity))}.border-red-500{--tw-border-opacity:1;border-color:rgb(239 68 68/var(--tw-border-opacity))}.bg-black{--tw-bg-opacity:1;background-color:rgb(0 0 0/var(--tw-bg-opacity))}.bg-blue-100{--tw-bg-opacity:1;background-color:rgb(219 234 254/var(--tw-bg-opacity))}.bg-blue-50{--tw-bg-opacity:1;background-color:rgb(239 246 255/var(--tw-bg-opacity))}.bg-blue-500{--tw-bg-opacity:1;background-color:rgb(59 130 246/var(--tw-bg-opacity))}.bg-gray-100{--tw-bg-opacity:1;background-color:rgb(243 244 246/var(--tw-bg-opacity))}.bg-gray-200{--tw-bg-opacity:1;background-color:rgb(229 231 235/var(--tw-bg-opacity))}.bg-gray-300{--tw-bg-opacity:1;background-color:rgb(209 213 219/var(--tw-bg-opacity))}.bg-green-100{--tw-bg-opacity:1;background-color:rgb(220 252 231/var(--tw-bg-opacity))}.bg-green-50{--tw-bg-opacity:1;background-color:rgb(240 253 244/var(--tw-bg-opacity))}.bg-red-100{--tw-bg-opacity:1;background-color:rgb(254 226 226/var(--tw-bg-opacity))}.bg-teal-100{--tw-bg-opacity:1;background-color:rgb(204 251 241/var(--tw-bg-opacity))}.bg-teal-50{--tw-bg-opacity:1;background-color:rgb(240 253 250/var(--tw-bg-opacity))}.bg-violet-100{--tw-bg-opacity:1;background-color:rgb(237 233 254/var(--tw-bg-opacity))}.bg-violet-50{--tw-bg-opacity:1;background-color:rgb(245 243 255/var(--tw-bg-opacity))}.bg-white{--tw-bg-opacity:1;background-color:rgb(255 255 255/var(--tw-bg-opacity))}.bg-opacity-50{--tw-bg-opacity:0.5}.p-2{padding:.5rem}.p-3{padding:.75rem}.p-4{padding:1rem}.px-0{padding-left:0;padding-right:0}.px-0\.5{padding-left:.125rem;padding-right:.125rem}.px-1{padding-left:.25rem;padding-right:.25rem}.px-2{padding-left:.5rem;padding-right:.5rem}.px-4{padding-left:1rem;padding-right:1rem}.py-1{padding-bottom:.25rem;padding-top:.25rem}.py-2{padding-bottom:.5rem;padding-top:.5rem}.pb-1{padding-bottom:.25rem}.pb-2{padding-bottom:.5rem}.pl-2{padding-left:.5rem}.pl-3{padding-left:.75rem}.pl-4{padding-left:1rem}.pl-5{padding-left:1.25rem}.pl-6{padding-left:1.5rem}.pr-2{padding-right:.5rem}.pr-3{padding-right:.75rem}.pr-4{padding-right:1rem}.pr-6{padding-right:1.5rem}.pt-1{padding-top:.25rem}.pt-2{padding-top:.5rem}.pt-3{padding-top:.75rem}.pt-4{padding-top:1rem}.pt-8{padding-top:2rem}.text-center{text-align:center}.font-mono{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace}.font-poppins{font-family:Poppins,sans-serif}.font-sans{font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji}.font-serif{font-family:ui-serif,Georgia,Cambria,Times New Roman,Times,serif}.text-2xl{font-size:1.5rem;line-height:2rem}.text-base{font-size:1rem;line-height:1.5rem}.text-lg{font-size:1.125rem;line-height:1.75rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xs{font-size:.75rem;line-height:1rem}.font-bold{font-weight:700}.font-extrabold{font-weight:800}.font-semibold{font-weight:600}.text-black{--tw-text-opacity:1;color:rgb(0 0 0/var(--tw-text-opacity))}.text-blue-400{--tw-text-opacity:1;color:rgb(96 165 250/var(--tw-text-opacity))}.text-blue-600{--tw-text-opacity:1;color:rgb(37 99 235/var(--tw-text-opacity))}.text-blue-700{--tw-text-opacity:1;color:rgb(29 78 216/var(--tw-text-opacity))}.text-gray-600{--tw-text-opacity:1;color:rgb(75 85 99/var(--tw-text-opacity))}.text-green-700{--tw-text-opacity:1;color:rgb(21 128 61/var(--tw-text-opacity))}.text-red-500{--tw-text-opacity:1;color:rgb(239 68 68/var(--tw-text-opacity))}.text-red-700{--tw-text-opacity:1;color:rgb(185 28 28/var(--tw-text-opacity))}.text-red-800{--tw-text-opacity:1;color:rgb(153 27 27/var(--tw-text-opacity))}.text-white{--tw-text-opacity:1;color:rgb(255 255 255/var(--tw-text-opacity))}.underline{text-decoration-line:underline}.shadow-xl{--tw-shadow:0 20px 25px -5px rgba(0,0,0,.1),0 8px 10px -6px rgba(0,0,0,.1);--tw-shadow-colored:0 20px 25px -5px var(--tw-shadow-color),0 8px 10px -6px var(--tw-shadow-color);box-shadow:0 0 #0000,0 0 #0000,var(--tw-shadow);box-shadow:var(--tw-ring-offset-shadow,0 0 #0000),var(--tw-ring-shadow,0 0 #0000),var(--tw-shadow)}.filter{-webkit-filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow);filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.transition-transform{transition-duration:.15s;transition-property:-webkit-transform;transition-property:transform;transition-property:transform,-webkit-transform;transition-timing-function:cubic-bezier(.4,0,.2,1)}.duration-300{transition-duration:.3s}body{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif;margin:0}code{font-family:source-code-pro,Menlo,Monaco,Consolas,Courier New,monospace}.hover\:scale-110:hover{--tw-scale-x:1.1;--tw-scale-y:1.1;-webkit-transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.hover\:cursor-not-allowed:hover{cursor:not-allowed}.hover\:border-2:hover{border-width:2px}.hover\:border-black:hover{--tw-border-opacity:1;border-color:rgb(0 0 0/var(--tw-border-opacity))}.hover\:bg-blue-300:hover{--tw-bg-opacity:1;background-color:rgb(147 197 253/var(--tw-bg-opacity))}.hover\:bg-gray-200:hover{--tw-bg-opacity:1;background-color:rgb(229 231 235/var(--tw-bg-opacity))}.hover\:bg-gray-300:hover{--tw-bg-opacity:1;background-color:rgb(209 213 219/var(--tw-bg-opacity))}.hover\:bg-green-300:hover{--tw-bg-opacity:1;background-color:rgb(134 239 172/var(--tw-bg-opacity))}.hover\:bg-red-200:hover{--tw-bg-opacity:1;background-color:rgb(254 202 202/var(--tw-bg-opacity))}.hover\:bg-red-300:hover{--tw-bg-opacity:1;background-color:rgb(252 165 165/var(--tw-bg-opacity))}.hover\:text-3xl:hover{font-size:1.875rem;line-height:2.25rem}.hover\:font-bold:hover{font-weight:700}.hover\:font-extrabold:hover{font-weight:800}@media (min-width:768px){.md\:w-1\/2{width:50%}}@media (min-width:1024px){.lg\:w-1\/3{width:33.333333%}} 5 | /*# sourceMappingURL=main.ea4e714c.css.map*/ -------------------------------------------------------------------------------- /src/App.js: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect, useRef } from "react"; 2 | import TextInput from "./textBox"; 3 | import Modal from "react-modal"; 4 | import fullText from "./textContent"; 5 | import gpuJSONData from "./gpu_config.json"; 6 | import cpuJSONData from "./cpu_config.json"; 7 | 8 | const billion = 1000000000; 9 | const tera = 1000000000 * 1000; 10 | let configPath = "/gpu_poor/all_configs.json"; 11 | if ( 12 | window.location.hostname === "localhost" || 13 | window.location.hostname === "127.0.0.1" 14 | ) { 15 | configPath = "/gpu_poor/all_configs.json"; 16 | } 17 | const MAX_FILE_SIZE = 500000; 18 | const ggml_quants = [ 19 | "ggml_QK4_0", 20 | "ggml_QK4_1", 21 | "ggml_QK5_0", 22 | "ggml_QK5_1", 23 | "ggml_QK8_0", 24 | "ggml_QK8_1", 25 | 26 | "ggml_Q2_K", 27 | 28 | "ggml_Q3_K_L", 29 | "ggml_Q3_K_M", 30 | 31 | "ggml_QK4_K_M", 32 | "ggml_QK4_K_S", 33 | 34 | "ggml_QK5_K_M", 35 | "ggml_Q6_K", 36 | ]; 37 | // console.log(configPath); 38 | 39 | /* 40 | dropdownTrnOrNot: 'inf', 'trn', 'inf_vLLM','inf_exL','inf_ggml' 41 | dropdownFullOrNot: 'lora_trn, 'full_trn', 'qlora' 42 | dropdownOpt: 'no_opt', 'sgd_opt','adam_opt' 43 | dropdownQuant: 'no_quant','bnb_int8','bnb_q4', 44 | */ 45 | const specialNamesMapping = { 46 | "meta-llama/Llama-2-7b": "meta-llama/Llama-2-7b-hf", 47 | "meta-llama/Llama-13-7b": "meta-llama/Llama-13-7b-hf", 48 | "meta-llama/Llama-2-70b": "meta-llama/Llama-13-70b-hf", 49 | }; 50 | 51 | function specialMapping(name) { 52 | if (name in specialNamesMapping) { 53 | return specialNamesMapping[name]; 54 | } 55 | return name; 56 | } 57 | 58 | function getKey(keys, obj, defaultVal) { 59 | let toReturn = null; 60 | for (const key of keys) { 61 | if (obj.hasOwnProperty(key)) { 62 | // console.log("found: ",key); 63 | toReturn = obj[key]; 64 | break; 65 | } 66 | } 67 | if (toReturn == null) { 68 | return defaultVal; 69 | } 70 | return toReturn; 71 | } 72 | 73 | function computeOverheadGGML(contextLen) { 74 | return 0.1 * contextLen; 75 | } 76 | 77 | function computeInferenceOnlyActivationMemory(contextLen, parsedConfig) { 78 | const hiddenDim = parsedConfig["hiddenDim"]; 79 | const heads = parsedConfig["heads"]; 80 | 81 | //return ((1000*4096*5)*2 + (1000*1000*32*2))/(1024*1024) 82 | return ( 83 | (contextLen * hiddenDim * 5 * 2 + contextLen * contextLen * heads * 2) / 84 | (1024 * 1024) 85 | ); 86 | } 87 | 88 | function getModelSuggestions(query, modelNames, maxSuggestions = 10) { 89 | if (!query || query.length < 2) return []; 90 | 91 | query = query.toLowerCase(); 92 | 93 | // Split query into words for more flexible matching 94 | const queryWords = query.split(/[\s/-]+/).filter(word => word.length > 0); 95 | 96 | return modelNames 97 | .filter(modelName => { 98 | const modelNameLower = modelName.toLowerCase(); 99 | 100 | // Check if all query words are present in the model name 101 | return queryWords.every(word => modelNameLower.includes(word)); 102 | }) 103 | .sort((a, b) => { 104 | const aLower = a.toLowerCase(); 105 | const bLower = b.toLowerCase(); 106 | 107 | // Prioritize exact matches 108 | const aExactMatch = aLower.startsWith(query); 109 | const bExactMatch = bLower.startsWith(query); 110 | 111 | if (aExactMatch && !bExactMatch) return -1; 112 | if (!aExactMatch && bExactMatch) return 1; 113 | 114 | // Then prioritize matches at word boundaries 115 | const aWordMatch = aLower.includes('/' + query) || aLower.includes('-' + query); 116 | const bWordMatch = bLower.includes('/' + query) || bLower.includes('-' + query); 117 | 118 | if (aWordMatch && !bWordMatch) return -1; 119 | if (!aWordMatch && bWordMatch) return 1; 120 | 121 | // Finally, sort by string length (shorter names first) 122 | return a.length - b.length; 123 | }) 124 | .slice(0, maxSuggestions); 125 | } 126 | 127 | 128 | //floatBytes, quant 129 | function computeModelSizeGGML(parsedConfig, quant) { 130 | const vocab = parsedConfig["vocab"], 131 | heads = parsedConfig["heads"], 132 | numLayers = parsedConfig["num_layers"], 133 | hiddenDim = parsedConfig["hiddenDim"], 134 | interDim = parsedConfig["interDim"]; 135 | 136 | const totalParams = 137 | vocab * hiddenDim * 2 + 138 | numLayers * 4 * hiddenDim * hiddenDim + 139 | numLayers * 3 * interDim * hiddenDim; 140 | 141 | const other_v_down_params = 142 | numLayers * hiddenDim * hiddenDim + numLayers * hiddenDim * interDim; 143 | 144 | const other_params_Q2K = 145 | totalParams - 146 | (hiddenDim * hiddenDim * numLayers * 2 + 2 * vocab * hiddenDim); 147 | 148 | const mult_factor_dic = { 149 | ggml_QK4_0: 18, 150 | ggml_QK4_1: 20, 151 | ggml_QK5_0: 22, 152 | ggml_QK5_1: 24, 153 | ggml_QK8_0: 34, 154 | ggml_QK8_1: 40, 155 | }; 156 | 157 | const mult_factor_dic_64 = { 158 | ggml_Q6_K: 54.0, 159 | ggml_Q3: 26.0, 160 | ggml_Q4: 38.0, 161 | ggml_Q5: 46.0, 162 | }; 163 | 164 | //Q2_K is 22.0 165 | 166 | const mult_factor_dic_combination = { 167 | ggml_Q3_K_L: [38.0, 26.0], 168 | ggml_Q3_K_M: [46.0, 26.0], 169 | ggml_QK4_K_S: [46.0, 38.0], 170 | ggml_QK4_K_M: [54.0, 38.0], 171 | ggml_QK5_K_M: [54.0, 46.0], 172 | ggml_Q2_K: [26.0, 22.0], 173 | }; 174 | 175 | let total = 0; 176 | if (mult_factor_dic.hasOwnProperty(quant)) { 177 | total = (mult_factor_dic[quant] * totalParams) / (32 * 1024 * 1024); 178 | } 179 | if (mult_factor_dic_64.hasOwnProperty(quant)) { 180 | total = (mult_factor_dic_64[quant] * totalParams) / (64 * 1024 * 1024); 181 | } 182 | if (mult_factor_dic_combination.hasOwnProperty(quant)) { 183 | const factors = mult_factor_dic_combination[quant]; 184 | 185 | if (quant === "ggml_Q2_K") { 186 | total = 187 | ((totalParams - other_params_Q2K) * factors[1] + 188 | other_params_Q2K * factors[0]) / 189 | (64 * 1024 * 1024); 190 | } else { 191 | total = 192 | ((totalParams - other_v_down_params) * factors[1] + 193 | other_v_down_params * factors[0]) / 194 | (64 * 1024 * 1024); 195 | } 196 | } 197 | 198 | return total; 199 | } 200 | 201 | function computeModelSize(parsedConfig) { 202 | const vocab = parsedConfig["vocab"], 203 | heads = parsedConfig["heads"], 204 | numLayers = parsedConfig["num_layers"], 205 | hiddenDim = parsedConfig["hiddenDim"], 206 | interDim = parsedConfig["interDim"]; 207 | 208 | // console.log(vocab, heads, numLayers, hiddenDim, interDim); 209 | // let fB = floatBytes; 210 | // if (quant === 'bnb_int8'){fB = 1;} 211 | // if (quant === 'bnb_q4'){fB = 0.5;} 212 | 213 | const out = 214 | vocab * hiddenDim * 2 + 215 | numLayers * 4 * hiddenDim * hiddenDim + 216 | numLayers * 3 * interDim * hiddenDim; 217 | // console.log("this is out: ", out) 218 | 219 | return out; 220 | } 221 | 222 | function getGradOptMemory( 223 | dropdownFullOrNot, 224 | dropdownOpt, 225 | dropdownQuant, 226 | modelSize, 227 | floatBytes, 228 | parsedConfig, 229 | contextLen, 230 | batchSize = 1 231 | ) { 232 | const full = dropdownFullOrNot, 233 | opt = dropdownOpt, 234 | quant = dropdownQuant; 235 | console.log(full, opt, quant); 236 | 237 | //QLora start 238 | // console.log("full: ", full); 239 | if (full === "qlora" && opt === "adam_opt") { 240 | //Need to check if q4 also takes extra memory 241 | console.log("calculating qlora"); 242 | return ( 243 | parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 0.5 * 4 * 3 + 244 | getExtraMemory(parsedConfig, "qlora", contextLen) * batchSize 245 | ); 246 | } 247 | if (full === "qlora" && opt === "sgd_opt") { 248 | //Need to check if q4 also takes extra memory 249 | return ( 250 | parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 0.5 * 4 * 1 + 251 | getExtraMemory(parsedConfig, "qlora", contextLen) * batchSize 252 | ); 253 | } 254 | //QLora end 255 | 256 | if (full === "full_trn" && opt === "adam_opt" && quant === "no_quant") { 257 | return modelSize * 3 * floatBytes; 258 | } 259 | 260 | if (full === "full_trn" && opt === "adam_opt" && quant === "bnb_int8") { 261 | return ( 262 | modelSize * 3 * 1 + 263 | getExtraMemory(parsedConfig, quant, contextLen) * batchSize 264 | ); //Some extra mmeory that bnb int8 takes 265 | } 266 | 267 | if (full === "full_trn" && opt === "adam_opt" && quant === "bnb_q4") { 268 | //Need to check if q4 also takes extra memory 269 | return ( 270 | modelSize * 3 * 0.5 + 271 | getExtraMemory(parsedConfig, quant, contextLen) * batchSize 272 | ); 273 | } 274 | 275 | //------------ 276 | if (full === "full_trn" && opt === "sgd_opt" && quant === "no_quant") { 277 | return modelSize * 1 * floatBytes; 278 | } 279 | 280 | if (full === "full_trn" && opt === "sgd_opt" && quant === "bnb_int8") { 281 | return ( 282 | modelSize * 1 * 1 + 283 | getExtraMemory(parsedConfig, quant, contextLen) * batchSize 284 | ); 285 | } 286 | 287 | if (full === "full_trn" && opt === "sgd_opt" && quant === "bnb_q4") { 288 | return ( 289 | modelSize * 1 * 0.5 + 290 | getExtraMemory(parsedConfig, quant, contextLen) * batchSize 291 | ); 292 | } 293 | 294 | //4*layer*8*hid*4*2 295 | 296 | //------------ 297 | if (full === "lora_trn" && opt === "adam_opt" && quant === "no_quant") { 298 | return ( 299 | parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 3 * 2 300 | ); 301 | } 302 | 303 | if (full === "lora_trn" && opt === "adam_opt" && quant === "bnb_int8") { 304 | return ( 305 | parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 3 + 306 | getExtraMemory(parsedConfig, quant, contextLen) * batchSize 307 | ); 308 | } 309 | 310 | if (full === "lora_trn" && opt === "adam_opt" && quant === "bnb_q4") { 311 | return ( 312 | parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 3 + 313 | getExtraMemory(parsedConfig, quant, contextLen) * batchSize 314 | ); 315 | } 316 | 317 | //------------ 318 | if (full === "lora_trn" && opt === "sgd_opt" && quant === "no_quant") { 319 | return parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 2; 320 | } 321 | 322 | if (full === "lora_trn" && opt === "sgd_opt" && quant === "bnb_int8") { 323 | return ( 324 | parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 1 + 325 | getExtraMemory(parsedConfig, quant, contextLen) * batchSize 326 | ); 327 | } 328 | 329 | if (full === "lora_trn" && opt === "sgd_opt" && quant === "bnb_q4") { 330 | return ( 331 | parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 1 + 332 | getExtraMemory(parsedConfig, quant, contextLen) * batchSize 333 | ); 334 | } 335 | 336 | console.log(full, opt, quant); 337 | throw new Error("Invalid combination of values"); 338 | } 339 | 340 | function getExtraMemory(parsedConfig, quant, contextLen) { 341 | const constant_8_extra = 0.75; 342 | const constant_4_extra = 1.0; 343 | const constant_qlora = 0.75; 344 | 345 | const common = 346 | (10 * parsedConfig.hiddenDim + 347 | 5 * parsedConfig.hiddenDim + 348 | 4 * parsedConfig.interDim + 349 | 2 * parsedConfig.interDim) * 350 | parsedConfig.num_layers; 351 | 352 | let extra_mem = 0; 353 | let contextLenSqrtRoot = 1.0; 354 | // if (contextLen > 100){ 355 | // contextLenSqrtRoot = Math.round(Math.sqrt(contextLen)); 356 | // } 357 | // else{ 358 | // contextLenSqrtRoot = contextLen; 359 | // } 360 | const baseLen = 50; 361 | const ratioContextLen = contextLen / 50; 362 | if (ratioContextLen > 1.0) { 363 | contextLenSqrtRoot = Math.sqrt(ratioContextLen); 364 | } 365 | 366 | if (quant === "bnb_int8") { 367 | extra_mem = 368 | constant_8_extra * common * baseLen * contextLenSqrtRoot * 1.25; 369 | } 370 | 371 | if (quant === "bnb_q4") { 372 | extra_mem = 373 | constant_4_extra * common * baseLen * contextLenSqrtRoot * 1.0; 374 | } 375 | 376 | if (quant === "qlora") { 377 | extra_mem = 378 | constant_qlora * common * baseLen * contextLenSqrtRoot * 1.0; 379 | } 380 | 381 | console.log("extra mem", extra_mem); 382 | return extra_mem; 383 | } 384 | 385 | function getExtraMemoryOld(parsedConfig, quant) { 386 | const constant_8_overhead = 200.0, 387 | constant_8_extra = 350.0; 388 | const constant_4_overhead = 350.0, 389 | constant_4_extra = 550.0; 390 | 391 | const common = 392 | (10 * parsedConfig.hiddenDim + 393 | 5 * parsedConfig.hiddenDim + 394 | 4 * parsedConfig.interDim + 395 | 2 * parsedConfig.interDim) * 396 | parsedConfig.num_layers; 397 | 398 | let extra_mem = 0; 399 | 400 | if (quant === "bnb_int8") { 401 | extra_mem = constant_8_overhead * common + constant_8_extra * common; 402 | } 403 | 404 | if (quant === "bnb_q4") { 405 | extra_mem = constant_4_overhead * common + constant_4_extra * common; 406 | } 407 | 408 | console.log("extra mem", extra_mem); 409 | return extra_mem; 410 | } 411 | 412 | function getActivationMemory( 413 | parsedConfig, 414 | contextLen, 415 | floatBytes, 416 | quant, 417 | dropdownFullOrNot, 418 | batchSize = 1 419 | ) { 420 | const heads = parsedConfig["heads"], 421 | numLayers = parsedConfig["num_layers"], 422 | hiddenDim = parsedConfig["hiddenDim"], 423 | interDim = parsedConfig["interDim"]; 424 | 425 | let fB = floatBytes; 426 | const len = contextLen; 427 | 428 | // if (quant==='bnb_int8'){fB=1;} 429 | // if (quant==='bnb_q4'){fB=0.5;} 430 | 431 | console.log("activation: ", heads, numLayers, hiddenDim, interDim); 432 | 433 | //const attn_per_layer = qkv + qk (transpose) + attn mat + attn mat convert tp fp32 + attn mat divided by sqrt + 434 | const attn_per_layer = 435 | len * hiddenDim * 3 * fB + 436 | len * hiddenDim * 2 * fB + 437 | len * len * heads * fB + 438 | len * len * heads * 4 + 439 | len * len * heads * fB + 440 | len * hiddenDim * fB + 441 | len * hiddenDim * fB + 442 | len * hiddenDim * fB; 443 | 444 | // heads*len*len*4 + heads*len*len*fB + 3*hiddenDim*len*fB + hiddenDim*len*fB + hiddenDim*len*fB 445 | 446 | const ffn_per_layer = 447 | hiddenDim * len * fB + 448 | hiddenDim * len * fB + 449 | fB * 5 * len * interDim + 450 | interDim * len * fB; 451 | 452 | const norm = len * 4 * 2 + len * hiddenDim * fB * 6; 453 | 454 | let lora = 0; 455 | // if (dropdownFullOrNot==='lora_trn'){ 456 | // lora = (8*len*2 + hiddenDim*len*2)*4; 457 | // } 458 | 459 | const total_per_layer = attn_per_layer + ffn_per_layer + norm + lora; 460 | console.log( 461 | "total per layer: ", 462 | convertToMB(attn_per_layer), 463 | convertToMB(ffn_per_layer), 464 | convertToMB(norm), 465 | convertToMB(lora) 466 | ); 467 | 468 | //total per layer: 4.2724609375 5.55419921875 6.409454345703125 8.02001953125 469 | let total = total_per_layer * numLayers; 470 | total = total * batchSize; 471 | 472 | console.log("this is total: ", total, attn_per_layer + ffn_per_layer); 473 | 474 | return total; 475 | } 476 | //exl2 quants 477 | function checkCombinationTrainInferenceTok( 478 | quantType, 479 | setErrorMessage, 480 | openModal, 481 | typeOfTrn 482 | ) { 483 | // //! Can't train full with QLoRA 484 | // if (typeOfTrn === "full_trn" && ggml_quants.includes(quantType)) { 485 | // setErrorMessage("Can't use GGML for training"); 486 | // openModal(); 487 | // return false; 488 | // } 489 | if (typeOfTrn === "qlora" && quantType != "no_quant") { 490 | setErrorMessage( 491 | "QLoRA is 4bit explicit. No need to select a quant type if you are training using QLoRA. Set it to 'None'" 492 | ); 493 | openModal(); 494 | return false; 495 | } 496 | return true; 497 | } 498 | 499 | function checkCombinationTrainInference( 500 | quantType, 501 | setErrorMessage, 502 | openModal, 503 | typeOfTrn 504 | ) { 505 | //! Can't train full with QLoRA 506 | if (typeOfTrn === "full_trn" && ggml_quants.includes(quantType)) { 507 | setErrorMessage("Can't use GGML for training"); 508 | openModal(); 509 | return false; 510 | } 511 | if (typeOfTrn === "qlora" && quantType != "no_quant") { 512 | setErrorMessage( 513 | "QLoRA is 4bit explicit. No need to select a quant type if you are training using QLoRA. Set it to 'None'" 514 | ); 515 | openModal(); 516 | return false; 517 | } 518 | return true; 519 | } 520 | 521 | function checkCombinationInferenceTok( 522 | trnType, 523 | quantType, 524 | setErrorMessage, 525 | openModal 526 | ) { 527 | if (ggml_quants.includes(quantType)) { 528 | if (trnType != "inf_ggml") { 529 | setErrorMessage( 530 | "Invalid combination of inference type/quantization" 531 | ); 532 | openModal(); 533 | return false; 534 | } 535 | } 536 | if (quantType != "no_quant" && trnType === "inf_vLLM") { 537 | setErrorMessage("vLLm doesn't support quant (maybe)"); 538 | openModal(); 539 | return false; 540 | } 541 | if ( 542 | trnType === "inf_ggml" && 543 | (quantType === "bnb_int8" || quantType === "bnb_q4") 544 | ) { 545 | setErrorMessage("ggml doesn't support bnb"); 546 | openModal(); 547 | return false; 548 | } 549 | 550 | return true; 551 | } 552 | 553 | function checkCombinationInference( 554 | trnType, 555 | quantType, 556 | setErrorMessage, 557 | openModal 558 | ) { 559 | if (ggml_quants.includes(quantType)) { 560 | if (trnType != "inf_ggml") { 561 | setErrorMessage( 562 | "Invalid combination of inference type/quantization" 563 | ); 564 | openModal(); 565 | return false; 566 | } 567 | } 568 | if (quantType != "no_quant" && trnType === "inf_vLLM") { 569 | setErrorMessage("vLLm doesn't support quant (maybe)"); 570 | openModal(); 571 | return false; 572 | } 573 | if ( 574 | trnType === "inf_ggml" && 575 | (quantType === "bnb_int8" || quantType === "bnb_q4") 576 | ) { 577 | setErrorMessage("ggml doesn't support bnb"); 578 | openModal(); 579 | return false; 580 | } 581 | if (trnType === "inf_ggml" && quantType === "no_quant") { 582 | setErrorMessage( 583 | "If you want no quant then pick vLLM/HF inference framework" 584 | ); 585 | openModal(); 586 | return false; 587 | } 588 | 589 | if (trnType === "inf_exL") { 590 | setErrorMessage("exLlama hasn't been added yet :)"); 591 | openModal(); 592 | return false; 593 | } 594 | return true; 595 | } 596 | 597 | function sanityUploadedConfig(jsonUploadedData, setErrorMessage, openModal) { 598 | function uploadError() { 599 | setErrorMessage( 600 | "upload config doesn't have correct keys. make sure your config has the keys present in https://huggingface.co/codellama/CodeLlama-7b-hf/blob/main/config.json" 601 | ); 602 | openModal(); 603 | return null; 604 | } 605 | 606 | if (Object.keys(jsonUploadedData).length === 0) { 607 | setErrorMessage("Uploaded json is empty :)"); 608 | openModal(); 609 | return null; // JSON is empty 610 | } 611 | 612 | // console.log(jsonUploadedData); 613 | 614 | let vocab = 0, 615 | hiddenDim = 0, 616 | heads = 0, 617 | interDim = 0, 618 | num_layers = 0; 619 | 620 | if (jsonUploadedData.hasOwnProperty("vocab_size")) { 621 | vocab = jsonUploadedData["vocab_size"]; 622 | } else { 623 | uploadError(); 624 | return null; 625 | } 626 | 627 | if (jsonUploadedData.hasOwnProperty("hidden_size")) { 628 | hiddenDim = jsonUploadedData["hidden_size"]; 629 | } else { 630 | uploadError(); 631 | return null; 632 | } 633 | 634 | if (jsonUploadedData.hasOwnProperty("num_attention_heads")) { 635 | heads = jsonUploadedData["num_attention_heads"]; 636 | } else { 637 | uploadError(); 638 | return null; 639 | } 640 | 641 | if (jsonUploadedData.hasOwnProperty("intermediate_size")) { 642 | interDim = jsonUploadedData["intermediate_size"]; 643 | } else { 644 | uploadError(); 645 | return null; 646 | } 647 | 648 | if (jsonUploadedData.hasOwnProperty("num_hidden_layers")) { 649 | num_layers = jsonUploadedData["num_hidden_layers"]; 650 | } else { 651 | uploadError(); 652 | return null; 653 | } 654 | 655 | return { 656 | vocab: vocab, 657 | hiddenDim: hiddenDim, 658 | heads: heads, 659 | interDim: interDim, 660 | num_layers: num_layers, 661 | }; 662 | } 663 | 664 | function getParseConfig(parsedJSONData, setErrorMessage, openModal) { 665 | console.log(Object.keys(parsedJSONData).length); 666 | if (Object.keys(parsedJSONData).length == 0) { 667 | setErrorMessage( 668 | "Huggingface config of this id doesn't have correct keys. e.g. this is a ggml model. Please upload your config in correct format" 669 | ); 670 | openModal(); 671 | return null; 672 | } 673 | 674 | const vocab = getKey(["vocab_size"], parsedJSONData, 32000); 675 | const hiddenDim = getKey( 676 | ["hidden_size", "d_model", "n_embd"], 677 | parsedJSONData, 678 | 768 679 | ); 680 | const heads = getKey( 681 | ["num_attention_heads", "num_heads", "n_head"], 682 | parsedJSONData, 683 | 12 684 | ); 685 | const interDim = getKey( 686 | ["intermediate_size", "n_inner", "d_ff"], 687 | parsedJSONData, 688 | hiddenDim * 4 689 | ); 690 | const num_layers = getKey( 691 | ["num_layers", "num_hidden_layers", "n_layer"], 692 | parsedJSONData, 693 | 12 694 | ); 695 | 696 | return { 697 | vocab: vocab, 698 | hiddenDim: hiddenDim, 699 | heads: heads, 700 | interDim: interDim, 701 | num_layers: num_layers, 702 | }; 703 | } 704 | 705 | function getDefault(modelSize) { 706 | //If only model size is provided. Guess the values 707 | let vocab = null; 708 | let heads = null; 709 | let numLayers = null; 710 | 711 | function getApprox(modelSize) { 712 | let vocabR = null, 713 | headsR = null, 714 | numLayersR = null; 715 | if (modelSize < 5) { 716 | vocabR = 32000; 717 | headsR = 32; 718 | numLayersR = 24; 719 | return [vocabR, headsR, numLayersR]; 720 | } 721 | if (modelSize < 10) { 722 | vocabR = 32000; 723 | headsR = 32; 724 | numLayersR = 32; 725 | return [vocabR, headsR, numLayersR]; 726 | } 727 | if (modelSize < 24) { 728 | vocabR = 32000; 729 | headsR = 40; 730 | numLayersR = 40; 731 | return [vocabR, headsR, numLayersR]; 732 | } 733 | 734 | if (modelSize < 55) { 735 | vocabR = 32000; 736 | headsR = 64; 737 | numLayersR = 48; 738 | return [vocabR, headsR, numLayersR]; 739 | } 740 | 741 | vocabR = 32000; 742 | headsR = 64; 743 | numLayersR = 80; 744 | return [vocabR, headsR, numLayersR]; 745 | } 746 | 747 | [vocab, heads, numLayers] = getApprox(modelSize); 748 | 749 | //vocab*h + numLayers*4*h*h + 3*4*h*h*numLayers = modelSize*10^9 750 | const A = numLayers * 4 + 3 * 4 * numLayers; 751 | const B = 2 * vocab; 752 | const C = -1 * modelSize * billion; 753 | 754 | let h = (-B + Math.sqrt(B * B - 4 * A * C)) / (2 * A); 755 | h = Math.ceil(h); 756 | 757 | return { 758 | vocab: vocab, 759 | hiddenDim: h, 760 | heads: heads, 761 | interDim: 4 * h, 762 | num_layers: numLayers, 763 | }; 764 | } 765 | 766 | function convertToMB(value) { 767 | return value / (1024 * 1024); 768 | } 769 | 770 | function convertToMBModelSize(value, quant, typeOfTrn) { 771 | let extra = 0; 772 | let fB = 2; 773 | let size = (value * fB) / (1024 * 1024); 774 | if (quant === "bnb_int8" || quant === "bnb_q4" || typeOfTrn === "qlora") { 775 | extra = 0.06 * size; 776 | } 777 | 778 | if (quant === "bnb_int8") { 779 | size = size / 2; 780 | } 781 | if (quant === "bnb_q4") { 782 | size = size / 4; 783 | } 784 | 785 | if (typeOfTrn === "qlora") { 786 | size = size / 4 - (value * 2) / (64 * 1024 * 1024); 787 | } 788 | 789 | return size + extra; 790 | } 791 | 792 | function convertToBytes(floatType) { 793 | return 2.0; 794 | } 795 | 796 | function getAllComputedData( 797 | parsedJSONData, 798 | jsonUploadedData, 799 | modelSize, 800 | contextLen, 801 | floatType, 802 | selections, 803 | setErrorMessage, 804 | openModal, 805 | batchSize, 806 | isGradCheckPoint 807 | ) { 808 | let parsedConfig = null, 809 | modelSizeinB = null; 810 | let activationMemory = 0, 811 | gradAndOptMemory = 0; 812 | let inferenceMemory = 0; 813 | let totalMemory = 0; 814 | const floatBytes = convertToBytes(floatType); 815 | const quantType = selections.dropdownQuant; 816 | const trnType = selections.dropdownTrnOrNot; 817 | const typeOfTrn = selections.dropdownFullOrNot; 818 | 819 | //trnType should be trnOrNot 820 | 821 | if (batchSize === "") { 822 | batchSize = "1"; 823 | } 824 | 825 | let overHead = 650; 826 | if (!isValidPositiveInteger(contextLen)) { 827 | setErrorMessage( 828 | "Context len can't be blank or have non numeric or negative/zero values." 829 | ); 830 | openModal(); 831 | return null; 832 | } 833 | 834 | if (!isValidPositiveInteger(batchSize)) { 835 | setErrorMessage( 836 | "Batch size cant have non numeric or negative/zero values" 837 | ); 838 | openModal(); 839 | return null; 840 | } 841 | 842 | if (parsedJSONData == null) { 843 | if (jsonUploadedData != null) { 844 | parsedConfig = sanityUploadedConfig( 845 | jsonUploadedData, 846 | setErrorMessage, 847 | openModal 848 | ); 849 | console.log(parsedConfig, "uploaded"); 850 | if (parsedConfig == null) { 851 | return null; 852 | } 853 | modelSizeinB = computeModelSize(parsedConfig); 854 | } else { 855 | if (!isNumberOrFloat(modelSize)) { 856 | console.log("error with model size"); 857 | setErrorMessage( 858 | "Hugginface model id not available, enter model size(>0) or upload config" 859 | ); 860 | openModal(); 861 | return null; 862 | } 863 | 864 | parsedConfig = getDefault(modelSize); 865 | modelSizeinB = modelSize * billion; 866 | } 867 | } else { 868 | parsedConfig = getParseConfig( 869 | parsedJSONData, 870 | setErrorMessage, 871 | openModal 872 | ); 873 | if (parsedConfig == null) { 874 | return null; 875 | } 876 | console.log(parsedConfig); 877 | modelSizeinB = computeModelSize(parsedConfig); 878 | } 879 | 880 | let fB = floatBytes; 881 | if (quantType === "bnb_int8") { 882 | fB = 1; 883 | } 884 | if (quantType === "bnb_q4" || typeOfTrn === "qlora") { 885 | fB = 0.5; 886 | } 887 | let modelSizeinMB = convertToMBModelSize( 888 | modelSizeinB, 889 | quantType, 890 | typeOfTrn 891 | ); 892 | // console.log(modelSizeinB); 893 | 894 | //!Inference 895 | if (trnType != "trn") { 896 | let checkSanity = checkCombinationInference( 897 | trnType, 898 | quantType, 899 | setErrorMessage, 900 | openModal 901 | ); 902 | if (!checkSanity) { 903 | return null; 904 | } 905 | 906 | if (trnType === "inf" || trnType === "inf_vLLM") { 907 | let fB = 2; 908 | //If bnb quant 909 | if (quantType === "bnb_int8") { 910 | fB = 1; 911 | } 912 | if (quantType === "bnb_q4" || typeOfTrn === "qlora") { 913 | fB = 0.5; 914 | } 915 | 916 | inferenceMemory = batchSize*convertToMB( 917 | 2 * 918 | contextLen * 919 | 2 * 920 | 2 * 921 | parsedConfig["hiddenDim"] * 922 | parsedConfig["num_layers"] 923 | ); 924 | 925 | activationMemory = batchSize*computeInferenceOnlyActivationMemory( 926 | contextLen, 927 | parsedConfig 928 | ); 929 | 930 | console.log( 931 | "HERE!!!", 932 | inferenceMemory, 933 | modelSizeinMB, 934 | overHead, 935 | activationMemory 936 | ); 937 | } 938 | if (trnType === "inf_ggml") { 939 | modelSizeinMB = computeModelSizeGGML(parsedConfig, quantType); 940 | inferenceMemory = batchSize*convertToMB( 941 | 1 * 942 | contextLen * 943 | 2 * 944 | 2 * 945 | parsedConfig["hiddenDim"] * 946 | parsedConfig["num_layers"] 947 | ); 948 | activationMemory = batchSize*computeInferenceOnlyActivationMemory( 949 | contextLen, 950 | parsedConfig 951 | ); 952 | overHead = overHead + computeOverheadGGML(contextLen); 953 | } 954 | 955 | totalMemory = 956 | inferenceMemory + modelSizeinMB + overHead + activationMemory; 957 | } else { 958 | // console.log("training!"); 959 | 960 | let checkSanity = checkCombinationTrainInference( 961 | quantType, 962 | setErrorMessage, 963 | openModal, 964 | typeOfTrn 965 | ); 966 | if (!checkSanity) { 967 | return null; 968 | } 969 | //! Train 970 | activationMemory = getActivationMemory( 971 | parsedConfig, 972 | contextLen, 973 | floatBytes, 974 | quantType, 975 | typeOfTrn, 976 | batchSize 977 | ); 978 | 979 | activationMemory = convertToMB(activationMemory); 980 | // console.log("got activation", activationMemory); 981 | 982 | gradAndOptMemory = getGradOptMemory( 983 | typeOfTrn, 984 | selections.dropdownOpt, 985 | quantType, 986 | modelSizeinB, 987 | floatBytes, 988 | parsedConfig, 989 | contextLen, 990 | batchSize 991 | ); 992 | 993 | // console.log("got gradOpt", gradAndOptMemory); 994 | console.log(isGradCheckPoint); 995 | let actFactorGradCheckPoint = 1.0 996 | if (isGradCheckPoint === 'yes'){ 997 | actFactorGradCheckPoint = 0.15; 998 | } 999 | 1000 | activationMemory = activationMemory*actFactorGradCheckPoint; 1001 | 1002 | gradAndOptMemory = convertToMB(gradAndOptMemory); 1003 | totalMemory = modelSizeinMB + gradAndOptMemory + activationMemory; 1004 | 1005 | console.log("got total", totalMemory); 1006 | 1007 | totalMemory = totalMemory + overHead; 1008 | } 1009 | 1010 | 1011 | 1012 | return { 1013 | Total: Math.ceil(totalMemory), 1014 | "KV Cache": Math.ceil(inferenceMemory), 1015 | "Model Size": Math.ceil(modelSizeinMB), 1016 | "Activation Memory": Math.ceil(activationMemory), 1017 | "Grad & Optimizer memory": Math.ceil(gradAndOptMemory), 1018 | "cuda + other overhead": overHead, 1019 | }; 1020 | } 1021 | 1022 | ///Users/rahulchand/gpu_mem/public/all_configs.json 1023 | async function fetchParams(name) { 1024 | // let output = fetch('https://huggingface.co/meta-llama/Llama-2-7b/raw/main/params.json'); 1025 | 1026 | let response = await fetch(configPath); 1027 | response = await response.json(); 1028 | // console.log(response.hasOwnProperty(name)); 1029 | 1030 | return response.hasOwnProperty(name) ? response[name] : null; 1031 | } 1032 | 1033 | // function isNumberOrFloat(value) { 1034 | // return /^-?\d+(\.\d+)?$/.test(value); 1035 | // } 1036 | 1037 | function isNumberOrFloat(value) { 1038 | const num = parseFloat(value); 1039 | return !isNaN(num) && num > 0; 1040 | } 1041 | 1042 | function isValidPositiveInteger(input) { 1043 | const num = parseFloat(input); 1044 | console.log(num, input); 1045 | // console.log("isvalid :", input); 1046 | 1047 | return Number.isInteger(num) && num > 0; 1048 | } 1049 | 1050 | function getGPUDataFromJSON() {} 1051 | 1052 | function App() { 1053 | // let subtitle; 1054 | const [modelSize, setModelSize] = useState(""); 1055 | const [modelName, setModelName] = useState(""); 1056 | const [contextLen, setContextLen] = useState(""); 1057 | 1058 | const [promptLen, setPromptLen] = useState(""); 1059 | 1060 | const [batchSize, setBatchSize] = useState(1); 1061 | const [totalMemoryShown, setTotalMemoryShown] = useState(0); 1062 | 1063 | const [gpuJsonDataForTable, setGPUJSONDataForTable] = useState([]); 1064 | const [cpuJsonDataForTable, setCPUJSONDataForTable] = useState([]); 1065 | 1066 | // const [breakDownMemory, setBreakDownMemory] = useState(" "); 1067 | 1068 | const [breakDownMemoryJson, setBreakDownMemoryJson] = useState([]); 1069 | 1070 | const [errorMessage, setErrorMessage] = useState(""); 1071 | 1072 | const [fileNameUpload, setFileNameUpload] = useState(""); 1073 | 1074 | const [modalIsOpen, setIsOpen] = React.useState(false); 1075 | 1076 | const [responseCache, setResponseCache] = useState(null); 1077 | const [responseCacheKeys, setResponseCacheKeys] = useState(null); 1078 | 1079 | const [suggestions, setSuggestions] = useState([]); 1080 | const [selectedIdx, setSelectedIdx] = useState(-1); 1081 | const [tokenPerSecond, setTokenPerSecond] = useState(""); 1082 | 1083 | const [numGPU, setNumGPU] = useState(1); 1084 | const [numGPUINeed, setNumGPUINeed] = useState(null); 1085 | const [memReqHardwareName, setMemReqHardwareName] = useState(""); 1086 | const [compReqHardwareName, setCompReqHardwareName] = useState(""); 1087 | 1088 | const [compReqHardwareNameBefore, setCompReqHardwareNameBefore] = 1089 | useState(""); 1090 | 1091 | const [numOffload, setNumOffLoad] = useState(1); 1092 | 1093 | const [computedTokenPerSecond, setComputedTokenPerSecond] = useState(1); 1094 | 1095 | const [jsonData, setJsonData] = useState(null); 1096 | 1097 | const [jsonDataCompute, setJsonDataCompute] = useState(null); 1098 | 1099 | const [showSuggestions, setShowSuggestions] = useState(true); 1100 | const [showDDR, setShowDDR] = useState([1, 0]); 1101 | 1102 | const [showTable, setShowTable] = useState(false); 1103 | const [showTableGPU, setShowTableGPU] = useState(false); 1104 | const [showTableCPU, setShowTableCPU] = useState(false); 1105 | const [showTableCompute, setShowTableCompute] = useState(false); 1106 | const [showTableComputeDisclaimer, setShowTableComputeDisclaimer] = 1107 | useState(""); 1108 | const [showTableComputeSmallInfo, setShowTableComputeSmallInfo] = 1109 | useState(0); 1110 | 1111 | const [showTrainLenInfo, setShowTrainLenInfo] = useState(true); 1112 | const [showTrainGradientCheck, setShowTrainGradientCheck] = useState(true); 1113 | 1114 | const gpuTableRef = React.useRef(null); 1115 | const cpuTableRef = React.useRef(null); 1116 | 1117 | const [faqOpen, setFaqOpen] = useState(false); 1118 | 1119 | // const th_css = "py-2 px-4 border bg-gray-200 text-gray-600 "; 1120 | 1121 | // const jsonDataSample = [ 1122 | // { index: 1, name: "Alice", value: 30 }, 1123 | // { index: 2, name: "Bob", value: 40 }, 1124 | // { index: 3, name: "Carol", value: 50 }, 1125 | // ]; 1126 | 1127 | function openModal() { 1128 | setIsOpen(true); 1129 | } 1130 | 1131 | function closeModal() { 1132 | setIsOpen(false); 1133 | } 1134 | 1135 | const handleFileClear = (event) => { 1136 | setFileNameUpload(""); 1137 | setJsonData(null); 1138 | // setTotalMemoryShown(""); 1139 | // setBreakDownMemory(""); 1140 | }; 1141 | 1142 | const [displayedText, setDisplayedText] = useState(""); 1143 | const [isVisible, setIsVisible] = useState(true); 1144 | const intervalIdRef = useRef(null); 1145 | const wordIndexRef = useRef(0); 1146 | const timeoutIdRef = useRef(null); 1147 | 1148 | const handleClickGenerateText = () => { 1149 | let token_per_second = parseInt(tokenPerSecond, 10); 1150 | 1151 | setIsVisible(true); 1152 | const words = fullText.split(/[\s,.;!?]+/); 1153 | // console.log(words); 1154 | wordIndexRef.current = 0; // reset word index 1155 | setDisplayedText(""); 1156 | 1157 | // Clear any existing interval before setting up a new one 1158 | if (intervalIdRef.current) { 1159 | clearInterval(intervalIdRef.current); 1160 | } 1161 | if (timeoutIdRef.current) { 1162 | clearTimeout(timeoutIdRef.current); 1163 | } 1164 | 1165 | intervalIdRef.current = setInterval(() => { 1166 | if (wordIndexRef.current < words.length - 1) { 1167 | wordIndexRef.current++; 1168 | setDisplayedText((prevText) => { 1169 | if (prevText) { 1170 | return prevText + " " + words[wordIndexRef.current]; 1171 | } 1172 | return words[wordIndexRef.current]; // No preceding space for the first word 1173 | }); 1174 | } 1175 | }, 1000 / token_per_second); 1176 | }; 1177 | 1178 | const handleClearGeneratedText = () => { 1179 | if (intervalIdRef.current) { 1180 | clearInterval(intervalIdRef.current); 1181 | } 1182 | if (timeoutIdRef.current) { 1183 | clearTimeout(timeoutIdRef.current); 1184 | } 1185 | setDisplayedText(""); 1186 | setIsVisible(false); 1187 | }; 1188 | 1189 | useEffect(() => { 1190 | return () => { 1191 | if (intervalIdRef.current) { 1192 | clearInterval(intervalIdRef.current); 1193 | } 1194 | if (timeoutIdRef.current) { 1195 | clearTimeout(timeoutIdRef.current); 1196 | } 1197 | }; 1198 | }, []); 1199 | 1200 | const handleFileChange = (event) => { 1201 | const file = event.target.files[0]; 1202 | if (file) { 1203 | // Check file size 1204 | if (file.size > MAX_FILE_SIZE) { 1205 | alert("File is too large. Please upload a smaller JSON file."); 1206 | return; 1207 | } 1208 | 1209 | const reader = new FileReader(); 1210 | reader.onload = (e) => { 1211 | try { 1212 | const json = JSON.parse(e.target.result); 1213 | setJsonData(json); 1214 | event.target.value = null; 1215 | } catch (error) { 1216 | console.error("Error parsing JSON:", error); 1217 | alert("Invalid JSON file."); 1218 | } 1219 | }; 1220 | setFileNameUpload(file.name); 1221 | reader.readAsText(file); 1222 | // console.log(jsonData); 1223 | } 1224 | }; 1225 | 1226 | const [selections, setSelections] = useState({ 1227 | dropdownTrnOrNot: "inf", 1228 | dropdownFullOrNot: "full_trn", 1229 | dropdownOpt: "adam_opt", 1230 | dropdownQuant: "no_quant", 1231 | dropdownGPU: "rtx-2060", 1232 | dropdownCPU: "3600x", 1233 | dropdownDDR: "ddr4", 1234 | isGPUorCPU: "usingGPU", 1235 | isGradCheckPoint : "no" 1236 | }); 1237 | 1238 | function setDDROptions(value) { 1239 | let cpuSpecs = cpuJSONData[value]; 1240 | // console.log("calling: ", cpuSpecs); 1241 | if (cpuSpecs["ddr4"] == 1 && cpuSpecs["ddr5"] == 1) { 1242 | setShowDDR([1, 1]); 1243 | return; 1244 | } 1245 | if (cpuSpecs["ddr4"] == 1) { 1246 | setShowDDR([1, 0]); 1247 | return; 1248 | } 1249 | if (cpuSpecs["ddr5"] == 1) { 1250 | setShowDDR([0, 1]); 1251 | return; 1252 | } 1253 | } 1254 | 1255 | function setTrainPromptLenInfoMessage(value){ 1256 | if (value === 'trn'){ 1257 | setShowTrainLenInfo(true); 1258 | // setShowTrainGradientCheck(true); 1259 | } 1260 | else{ 1261 | setShowTrainLenInfo(false); 1262 | // setShowTrainGradientCheck(false); 1263 | } 1264 | } 1265 | 1266 | const handleChangeSelection = (e) => { 1267 | const { name, value } = e.target; 1268 | setSelections((prevState) => ({ 1269 | ...prevState, 1270 | [name]: value, 1271 | })); 1272 | 1273 | if (name === "dropdownCPU") { 1274 | setDDROptions(value); 1275 | } 1276 | if (name === "dropdownTrnOrNot"){ 1277 | setTrainPromptLenInfoMessage(value); 1278 | } 1279 | 1280 | }; 1281 | 1282 | // const handleChangeInText1 = (event) => { 1283 | // setModelSize(event.target.value); 1284 | // }; 1285 | 1286 | const [output1, setOutput1] = useState(""); 1287 | 1288 | function enchanceGPUJSONData(onlyNumGPUJsonData) { 1289 | const newJsonData = { 1290 | Name: selections.dropdownGPU.toUpperCase(), 1291 | bandwidth: onlyNumGPUJsonData["bandwidth"] + " GB", 1292 | compute: onlyNumGPUJsonData["compute"] + " TFlops/s", 1293 | memory: onlyNumGPUJsonData["memory"] + " GB", 1294 | }; 1295 | return newJsonData; 1296 | } 1297 | 1298 | function enchanceCPUJSONData(onlyNumCPUJsonData) { 1299 | const newJsonData = { 1300 | Name: selections.dropdownCPU.toUpperCase(), 1301 | "DDR5 Rated Speed": onlyNumCPUJsonData["Speed"] + " MT/s", 1302 | "DDR4 Rated Speed": onlyNumCPUJsonData["speed_ddr4"] + " MT/s", 1303 | Cores: onlyNumCPUJsonData["Cores"], 1304 | "DDR5 Support": Boolean(onlyNumCPUJsonData["ddr5"]).toString(), 1305 | "DDR4 Support": Boolean(onlyNumCPUJsonData["ddr4"]).toString(), 1306 | "Memory Bus": onlyNumCPUJsonData["Bus"] + " Channel", 1307 | }; 1308 | // console.log("My data"); 1309 | // console.log(newJsonData); 1310 | return newJsonData; 1311 | } 1312 | 1313 | // function getTotalFlops(parsedConfig){ 1314 | 1315 | // let totalFlops = 0; 1316 | // totalFlops += vocab*hiddenDim*2; //embedding 1317 | // totalFlops += hiddenDim*hiddenDim*2 //qkvo 1318 | 1319 | // } 1320 | 1321 | function getTotalFlopsForKV(parsedConfig, batchSize, contextLen) { 1322 | const hidDim = parsedConfig["hiddenDim"]; 1323 | return 2 * contextLen * contextLen * hidDim * batchSize; 1324 | } 1325 | 1326 | function convertGBToByte(sizeInGB) { 1327 | return sizeInGB * 1024 * 1024 * 1024; 1328 | } 1329 | 1330 | function convertByteToGB(sizeInByte) { 1331 | return sizeInByte / (1024 * 1024 * 1024); 1332 | } 1333 | 1334 | function convertByteToMB(sizeInByte) { 1335 | return sizeInByte / (1024 * 1024); 1336 | } 1337 | 1338 | function getFloatRatio_F16(quant) { 1339 | return 1.0; 1340 | } 1341 | 1342 | function getCPUSpeedFromSpecs(speed, speed_ddr4, bus, memory) { 1343 | const busMap = { Dual: 2.0, Quad: 4.0, Hexa: 6.0, Octa: 8.0 }; 1344 | 1345 | // console.log("speeds: ",speed, speed_ddr4, selections.dropdownDDR); 1346 | 1347 | let useThiSpeed = 0; 1348 | if (selections.dropdownDDR === "ddr4") { 1349 | useThiSpeed = speed_ddr4; 1350 | } else { 1351 | useThiSpeed = speed; 1352 | } 1353 | 1354 | const busValue = busMap[bus]; 1355 | const rateMult = 8.0; 1356 | 1357 | const memInGBPerSecond = (busValue * rateMult * useThiSpeed) / 1024; 1358 | 1359 | return memInGBPerSecond; 1360 | } 1361 | 1362 | function getFloatRatio_F16_CPU(quantType) { 1363 | let k_values = [2, 3, 4, 5, 6, 8, 16]; 1364 | for (let k of k_values) { 1365 | if (quantType.includes(k.toString())) { 1366 | return k / 16; 1367 | } 1368 | } 1369 | return 1.0; 1370 | } 1371 | 1372 | function token_per_second_logic_CPU( 1373 | cpuDataOnlyNum, 1374 | parsedJSONData, 1375 | promptLen, 1376 | contextLen, 1377 | batchSize, 1378 | setErrorMessage, 1379 | openModal 1380 | ) { 1381 | const speed = cpuDataOnlyNum["Speed"]; 1382 | const speed_ddr4 = cpuDataOnlyNum["speed_ddr4"]; 1383 | 1384 | const bus = cpuDataOnlyNum["Bus"]; 1385 | const memory = cpuDataOnlyNum["Memory"]; 1386 | const cpu_compute = cpuDataOnlyNum["Flops"] * 0.5; 1387 | 1388 | const cpu_bandwidth = getCPUSpeedFromSpecs( 1389 | speed, 1390 | speed_ddr4, 1391 | bus, 1392 | memory 1393 | ); 1394 | 1395 | const quantType = selections.dropdownQuant; 1396 | 1397 | let parsedConfig = getParseConfig( 1398 | parsedJSONData, 1399 | setErrorMessage, 1400 | openModal 1401 | ); 1402 | const numLayers = parsedConfig["num_layers"], 1403 | hiddenDim = parsedConfig["hiddenDim"]; 1404 | 1405 | let memoryTransfer = 1406 | (computeModelSizeGGML(parsedConfig, quantType) * 1024 * 1024) / 2.0; 1407 | if (quantType === "no_quant") { 1408 | memoryTransfer = computeModelSize(parsedConfig); 1409 | } 1410 | 1411 | const extraFactorCPU = 1.6; 1412 | //! Prompt Time Calculation 1413 | //Time to process prompt (depending on contextLen this is either compute bound or memory bound) 1414 | //Since the prompts are usually (above >50, i think it is safe to say this is mostly COMPUTE BOUND) 1415 | 1416 | // console.log("this is memory: ",convertByteToMB(memoryTransfer),quantType); 1417 | const totalLen = parseInt(contextLen) + parseInt(promptLen); 1418 | // console.log( 1419 | // "Theory: ", 1420 | // promptLen, 1421 | // memoryTransfer, 1422 | // numLayers, 1423 | // hiddenDim, 1424 | // batchSize 1425 | // ); 1426 | let theoryTimePrompt = 1427 | 2 * promptLen * memoryTransfer + 1428 | 2 * numLayers * hiddenDim * hiddenDim * 2 * promptLen; 1429 | theoryTimePrompt = batchSize * theoryTimePrompt; 1430 | 1431 | // console.log("first: ", theoryTimePrompt); 1432 | let theoryTimePrompt_in_ms = 1433 | theoryTimePrompt / (tera * (cpu_compute / 1000.0)); 1434 | 1435 | // console.log("first: ",theoryTimePrompt_in_ms) 1436 | console.log("mem trans: ", convertByteToMB(memoryTransfer)); 1437 | let finalPromptTime = 1438 | theoryTimePrompt_in_ms * getFloatRatio_F16_CPU(quantType) + 1439 | convertByteToMB(2 * memoryTransfer) * (0.008 / 1000); 1440 | 1441 | // const totalFlopsInB = 2*batchSize*modelSizeinB*billion + getTotalFlopsForKV(parsedConfig, batchSize, contextLen); 1442 | 1443 | //! Per token Time calculation 1444 | const utilizationRate = 1.0; 1445 | const kv_cache_memory = 2 * 2 * numLayers * hiddenDim * totalLen; 1446 | 1447 | //! Why is this 2* factor here? because of float16? -> Yes! 1448 | let timeIfMemory = 1449 | (convertByteToGB(2 * memoryTransfer + kv_cache_memory) / 1450 | (utilizationRate * cpu_bandwidth)) * 1451 | extraFactorCPU; 1452 | let timeIfMemory_in_ms = timeIfMemory * 1000; 1453 | 1454 | //! Check if it is compute bound 1455 | 1456 | // console.log( 1457 | // memoryTransfer, 1458 | // numLayers, 1459 | // hiddenDim, 1460 | // batchSize, 1461 | // cpu_compute, 1462 | // extraFactorCPU 1463 | // ); 1464 | let totalFlopsToken = 1465 | 2 * memoryTransfer + 1466 | 2 * totalLen * hiddenDim * 2 * numLayers * 2 * 2; 1467 | totalFlopsToken = batchSize * totalFlopsToken; 1468 | let timeIfFlops_in_ms = 1469 | (totalFlopsToken * 1000) / (tera * (cpu_compute / 1000.0)); 1470 | timeIfFlops_in_ms = timeIfFlops_in_ms * extraFactorCPU; 1471 | 1472 | let finalTimeToConsider = null; 1473 | let memoryOrCompute = null; 1474 | 1475 | if (timeIfMemory_in_ms > timeIfFlops_in_ms) { 1476 | finalTimeToConsider = timeIfMemory_in_ms; 1477 | memoryOrCompute = "memory"; 1478 | } else { 1479 | finalTimeToConsider = timeIfFlops_in_ms; 1480 | memoryOrCompute = "compute"; 1481 | } 1482 | 1483 | let token_per_s = 1000 / finalTimeToConsider; //finalTimeToConsider is time in ms for each token. So divide by 1000 1484 | 1485 | setComputedTokenPerSecond(Math.round(token_per_s)); 1486 | 1487 | const jsonComputeReturnData = { 1488 | "Token/s": 1489 | Math.round(token_per_s) >= 1 ? Math.round(token_per_s) : "< 1", 1490 | "ms per token": finalTimeToConsider.toFixed(2), 1491 | // "ms per token (compute bound)": timeIfFlops_in_ms.toFixed(2), 1492 | "Prompt process Time (s)": finalPromptTime.toFixed(2), 1493 | "memory or compute bound?": memoryOrCompute, 1494 | }; 1495 | 1496 | setJsonDataCompute(jsonComputeReturnData); 1497 | setShowTableCompute(true); 1498 | } 1499 | 1500 | function token_per_second_logic_Train( 1501 | gpuDataOnlyNum, 1502 | parsedJSONData, 1503 | promptLen, 1504 | contextLen, 1505 | batchSize, 1506 | setErrorMessage, 1507 | openModal 1508 | ) { 1509 | //! Training is most of the time compute bound 1510 | const gpu_bandwidth = gpuDataOnlyNum["bandwidth"]; 1511 | const gpu_compute = gpuDataOnlyNum["compute"]; 1512 | 1513 | const trnType = selections.dropdownTrnOrNot; 1514 | const quantType = selections.dropdownQuant; 1515 | const totalLen = parseInt(promptLen) + parseInt(contextLen); 1516 | 1517 | setShowTableComputeDisclaimer(""); 1518 | let bnb_cost = 1.0; 1519 | if (quantType === "bnb_int8") { 1520 | setShowTableComputeDisclaimer( 1521 | "Disclaimer: bitsandbytes llm.int8 quant is NOT optimized for time. It takes more time than float16" 1522 | ); 1523 | bnb_cost = 3.0; 1524 | } 1525 | if (quantType === "bnb_q4") { 1526 | setShowTableComputeDisclaimer( 1527 | "Disclaimer: https://github.com/TimDettmers/bitsandbytes/releases/tag/0.41.0 says that int4/qlora is 2-4x faster but I haven't been able to reproduce this. Other people have raised similar issues. " 1528 | ); 1529 | bnb_cost = 2.75; 1530 | } 1531 | if (quantType === "qlora") { 1532 | setShowTableComputeDisclaimer( 1533 | "Disclaimer: https://github.com/TimDettmers/bitsandbytes/releases/tag/0.41.0 says that int4/qlora is 2-4x faster but I haven't been able to reproduce this. Other people have raised similar issues. " 1534 | ); 1535 | bnb_cost = 1.75; 1536 | } 1537 | 1538 | let parsedConfig = getParseConfig( 1539 | parsedJSONData, 1540 | setErrorMessage, 1541 | openModal 1542 | ); 1543 | const numLayers = parsedConfig["num_layers"], 1544 | hiddenDim = parsedConfig["hiddenDim"]; 1545 | 1546 | const memoryTransfer = computeModelSize(parsedConfig); 1547 | 1548 | let totalFlopsToken = 1549 | 2 * batchSize * totalLen * memoryTransfer + 1550 | totalLen * hiddenDim * 2 * numLayers * batchSize; 1551 | 1552 | // console.log(batchSize, totalLen, memoryTransfer); 1553 | // console.log( 1554 | // "other: ", 1555 | // totalLen * hiddenDim * 2 * numLayers * batchSize 1556 | // ); 1557 | 1558 | // console.log( 1559 | // 2 * memoryTransfer, 1560 | // totalLen * hiddenDim * 2 * numLayers * 2 1561 | // ); 1562 | 1563 | let extraGradChoice = 1.0; 1564 | if (selections.dropdownOpt === "adam_opt") { 1565 | extraGradChoice = 1.15; 1566 | } 1567 | 1568 | console.log("tot flops: ", totalFlopsToken); 1569 | 1570 | totalFlopsToken = totalFlopsToken * 2; //! Backward pass *2 1571 | totalFlopsToken = totalFlopsToken * extraGradChoice; 1572 | 1573 | totalFlopsToken = totalFlopsToken * bnb_cost; //! Cost due to bnb 1574 | 1575 | if (selections.dropdownFullOrNot === "full_trn") { 1576 | //! In total training, we will have to move the weights back to GPU for update, so its 2x more + update all so 1.5x (approx) more. Total 3x 1577 | totalFlopsToken = totalFlopsToken * 3; //! I don't have capcacity to check this 1578 | } 1579 | 1580 | let timeIfFlops_in_ms = 1581 | (totalFlopsToken * 1000) / (tera * gpu_compute * 0.85); 1582 | let memoryOrCompute = "compute"; 1583 | if (selections.isGradCheckPoint==='yes'){ 1584 | //This factor should be around ~1.5x. To be safe I have kept it as 1.65x 1585 | //Source: https://github.com/huggingface/transformers/issues/25572#issuecomment-1687749561 1586 | timeIfFlops_in_ms = timeIfFlops_in_ms*1.65; 1587 | } 1588 | const jsonComputeReturnData = { 1589 | "ms per iteration(forward + backward)": 1590 | timeIfFlops_in_ms.toFixed(2), 1591 | "memory or compute bound?": memoryOrCompute, 1592 | }; 1593 | 1594 | // console.log(jsonComputeReturnData); 1595 | 1596 | setJsonDataCompute(jsonComputeReturnData); 1597 | setShowTableCompute(true); 1598 | } 1599 | 1600 | function token_per_second_logic_GPU( 1601 | gpuDataOnlyNum, 1602 | parsedJSONData, 1603 | promptLen, 1604 | contextLen, 1605 | batchSize, 1606 | setErrorMessage, 1607 | openModal 1608 | ) { 1609 | const gpu_bandwidth = gpuDataOnlyNum["bandwidth"]; 1610 | const gpu_compute = gpuDataOnlyNum["compute"]; 1611 | 1612 | const trnType = selections.dropdownTrnOrNot; 1613 | const quantType = selections.dropdownQuant; 1614 | const totalLen = parseInt(promptLen) + parseInt(contextLen); 1615 | 1616 | let extraFactor = 1.0; 1617 | 1618 | if (trnType === "inf") { 1619 | extraFactor = 2.0; 1620 | } 1621 | if (trnType === "inf_ggml") { 1622 | extraFactor = 1.5; 1623 | if (quantType === "ggml_Q2_K") { 1624 | extraFactor = 2.0; 1625 | } 1626 | } 1627 | 1628 | if ((trnType === "inf") & (selections.dropdownFullOrNot === "qlora")) { 1629 | setErrorMessage( 1630 | "afaik qlora trained model's inference is just 4 bit inference, i.e. bnb int4/nf4. You can select that option from quant to calculate this" 1631 | ); 1632 | openModal(); 1633 | return; 1634 | } 1635 | 1636 | setShowTableComputeDisclaimer(""); 1637 | let bnb_cost = 1.0; 1638 | if (trnType === "inf" && quantType === "bnb_int8") { 1639 | setShowTableComputeDisclaimer( 1640 | "Disclaimer: bitsandbytes llm.int8 quant is NOT optimized for inference. It takes more than time than float16." 1641 | ); 1642 | bnb_cost = 4.5; 1643 | } 1644 | if (trnType === "inf" && quantType === "bnb_q4") { 1645 | setShowTableComputeDisclaimer( 1646 | "Disclaimer: https://github.com/TimDettmers/bitsandbytes/releases/tag/0.41.0 says that int4 is 2-4x faster but I haven't been able to reproduce this. Other people have raised similar issues in the repo." 1647 | ); 1648 | bnb_cost = 3.0; 1649 | } 1650 | 1651 | let parsedConfig = getParseConfig( 1652 | parsedJSONData, 1653 | setErrorMessage, 1654 | openModal 1655 | ); 1656 | const numLayers = parsedConfig["num_layers"], 1657 | hiddenDim = parsedConfig["hiddenDim"]; 1658 | 1659 | let memoryTransfer = 0; 1660 | if (ggml_quants.includes(quantType)) { 1661 | memoryTransfer = 1662 | (computeModelSizeGGML(parsedConfig, quantType) * 1024 * 1024) / 1663 | 2.0; 1664 | } else { 1665 | if (quantType === "no_quant") { 1666 | memoryTransfer = computeModelSize(parsedConfig); 1667 | } else { 1668 | if (quantType === "bnb_int8") { 1669 | memoryTransfer = computeModelSize(parsedConfig) / 2.0; 1670 | } 1671 | if (quantType === "bnb_q4") { 1672 | memoryTransfer = computeModelSize(parsedConfig) / 4.0; 1673 | } 1674 | } 1675 | } 1676 | 1677 | //! Prompt Time Calculation 1678 | //Time to process prompt (depending on contextLen this is either compute bound or memory bound) 1679 | //Since the prompts are usually (above >50, i think it is safe to say this is mostly COMPUTE BOUND) 1680 | 1681 | let theoryTimePrompt = 1682 | 2 * promptLen * memoryTransfer + 1683 | 2 * numLayers * hiddenDim * hiddenDim * 2 * promptLen; 1684 | theoryTimePrompt = batchSize * theoryTimePrompt; 1685 | let theoryTimePrompt_in_ms = 1686 | theoryTimePrompt / (tera * gpu_compute * 0.85); 1687 | 1688 | let finalPromptTime = 1689 | theoryTimePrompt_in_ms * getFloatRatio_F16(quantType) * 1.8 + 1690 | convertByteToMB(2 * memoryTransfer) * (0.008 / 100); 1691 | 1692 | // const totalFlopsInB = 2*batchSize*modelSizeinB*billion + getTotalFlopsForKV(parsedConfig, batchSize, contextLen); 1693 | 1694 | //! Per token Time calculation 1695 | const utilizationRate = 1.0; 1696 | const kv_cache_memory = 2 * 2 * numLayers * hiddenDim * totalLen; 1697 | 1698 | // console.log( 1699 | // "memory GPU side: ", 1700 | // convertByteToMB(memoryTransfer), 1701 | // memoryTransfer 1702 | // ); 1703 | 1704 | //1326940160*2 1705 | 1706 | let timeIfMemory = 1707 | convertByteToGB( 1708 | 2 * memoryTransfer * extraFactor + kv_cache_memory * extraFactor 1709 | ) / 1710 | (utilizationRate * gpu_bandwidth); 1711 | let timeIfMemory_in_ms = timeIfMemory * 1000; 1712 | 1713 | //! Check if it is compute bound 1714 | let totalFlopsToken = 1715 | 2 * memoryTransfer + totalLen * hiddenDim * 2 * numLayers * 2 * 2; 1716 | 1717 | // console.log( 1718 | // 2 * memoryTransfer, 1719 | // totalLen * hiddenDim * 2 * numLayers * 2 1720 | // ); 1721 | 1722 | totalFlopsToken = batchSize * totalFlopsToken; 1723 | let timeIfFlops_in_ms = 1724 | (totalFlopsToken * 1000) / (tera * gpu_compute * 0.85); 1725 | 1726 | let finalTimeToConsider = null; 1727 | let memoryOrCompute = null; 1728 | 1729 | if (timeIfMemory_in_ms > timeIfFlops_in_ms) { 1730 | finalTimeToConsider = timeIfMemory_in_ms; 1731 | memoryOrCompute = "memory"; 1732 | } else { 1733 | finalTimeToConsider = timeIfFlops_in_ms; 1734 | memoryOrCompute = "compute"; 1735 | } 1736 | 1737 | if (!isValidPositiveInteger(numGPU)) { 1738 | setErrorMessage("Number of GPUs have to be positive number (>0)"); 1739 | openModal(); 1740 | return; 1741 | } 1742 | 1743 | if (numGPU > 1) { 1744 | finalTimeToConsider = (finalTimeToConsider * 1.25) / numGPU; 1745 | } 1746 | 1747 | finalTimeToConsider = finalTimeToConsider * bnb_cost; 1748 | finalPromptTime = finalPromptTime * bnb_cost; 1749 | 1750 | let token_per_s = 1000 / finalTimeToConsider; //finalTimeToConsider is time in ms for each token. So divide by 1000 1751 | 1752 | setComputedTokenPerSecond(Math.round(token_per_s)); 1753 | 1754 | const jsonComputeReturnData = { 1755 | "Token/s": 1756 | Math.round(token_per_s) >= 1 ? Math.round(token_per_s) : "< 1", 1757 | "ms per token": finalTimeToConsider.toFixed(2), 1758 | // "ms per token (compute bound)": timeIfFlops_in_ms.toFixed(2), 1759 | "Prompt process Time (s)": finalPromptTime.toFixed(2), 1760 | "memory or compute bound?": memoryOrCompute, 1761 | }; 1762 | 1763 | setJsonDataCompute(jsonComputeReturnData); 1764 | setShowTableCompute(true); 1765 | } 1766 | 1767 | function showGPUSpecs() { 1768 | const gpuDataOnlyNum = gpuJSONData[selections.dropdownGPU]; 1769 | setGPUJSONDataForTable(enchanceGPUJSONData(gpuDataOnlyNum)); 1770 | 1771 | setShowTableGPU(true); 1772 | if (gpuTableRef.current) { 1773 | gpuTableRef.current.scrollIntoView({ behavior: "smooth" }); 1774 | } 1775 | } 1776 | 1777 | function showCPUSpecs() { 1778 | const cpuDataOnlyNum = cpuJSONData[selections.dropdownCPU]; 1779 | setCPUJSONDataForTable(enchanceCPUJSONData(cpuDataOnlyNum)); 1780 | setShowTableCPU(true); 1781 | 1782 | if (cpuTableRef.current) { 1783 | cpuTableRef.current.scrollIntoView({ behavior: "smooth" }); 1784 | } 1785 | } 1786 | 1787 | function sanityChecks() { 1788 | if (!isValidPositiveInteger(batchSize)) { 1789 | setErrorMessage( 1790 | "Batch size cant have non numeric or negative/zero values" 1791 | ); 1792 | openModal(); 1793 | return false; 1794 | } 1795 | 1796 | let check1 = checkCombinationInferenceTok( 1797 | selections.dropdownTrnOrNot, 1798 | selections.dropdownQuant, 1799 | setErrorMessage, 1800 | openModal 1801 | ); 1802 | 1803 | let check2 = checkCombinationTrainInferenceTok( 1804 | selections.dropdownQuant, 1805 | setErrorMessage, 1806 | openModal, 1807 | selections.dropdownFullOrNot 1808 | ); 1809 | 1810 | return check1 && check2; 1811 | } 1812 | 1813 | function handleClickTokS() { 1814 | // setErrorMessage("To be added"); 1815 | // openModal(); 1816 | if ( 1817 | !isValidPositiveInteger(contextLen) || 1818 | !isValidPositiveInteger(promptLen) 1819 | ) { 1820 | setErrorMessage( 1821 | "context len & promt len should be positive numbers (>0)" 1822 | ); 1823 | openModal(); 1824 | return; 1825 | } 1826 | 1827 | if (!sanityChecks()) { 1828 | return; 1829 | } 1830 | 1831 | if ( 1832 | selections.isGPUorCPU === "usingCPU" && 1833 | selections.dropdownTrnOrNot != "inf_ggml" 1834 | ) { 1835 | setErrorMessage( 1836 | "Inference with CPU only makes applicable(sensible) for GGML" 1837 | ); 1838 | openModal(); 1839 | return; 1840 | } 1841 | 1842 | if (selections.dropdownTrnOrNot === "inf_vLLM") { 1843 | setErrorMessage( 1844 | "Still working on adding vLLM. For now, as a rule of thumb, vLLM is 2-3x faster (than HF) when serving requests at your GPUs capacity" 1845 | ); 1846 | openModal(); 1847 | return; 1848 | } 1849 | 1850 | // if (selections.dropdownTrnOrNot === "trn") { 1851 | // setErrorMessage( 1852 | // "Token/s doesn't make sense for training, as whole sequence is generated at once. But how much time will one forward/backward pass take makese sense. I haven't added that yet." 1853 | // ); 1854 | // openModal(); 1855 | // return; 1856 | // } 1857 | if ( 1858 | selections.dropdownTrnOrNot === "trn" && 1859 | selections.isGPUorCPU === "usingCPU" 1860 | ) { 1861 | setErrorMessage("You can't train using HuggingFace on CPU"); 1862 | openModal(); 1863 | return; 1864 | } 1865 | 1866 | // console.log(gpuJSONData); 1867 | // console.log(cpuJSONData); 1868 | // console.log(selections.dropdownGPU); 1869 | 1870 | const gpuDataOnlyNum = gpuJSONData[selections.dropdownGPU]; 1871 | const cpuDataOnlyNum = cpuJSONData[selections.dropdownCPU]; 1872 | 1873 | let parsedConfig = responseCache.hasOwnProperty(modelName) 1874 | ? responseCache[modelName] 1875 | : null; 1876 | 1877 | if (parsedConfig === null) { 1878 | setErrorMessage("Huggingface ID not present"); 1879 | openModal(); 1880 | return; 1881 | } 1882 | 1883 | if (selections.dropdownTrnOrNot === "trn") { 1884 | token_per_second_logic_Train( 1885 | gpuDataOnlyNum, 1886 | parsedConfig, 1887 | promptLen, 1888 | contextLen, 1889 | batchSize, 1890 | setErrorMessage, 1891 | openModal 1892 | ); 1893 | setCompReqHardwareName(selections.dropdownGPU); 1894 | setShowTableComputeSmallInfo(2); 1895 | setCompReqHardwareNameBefore("Time for training: "); 1896 | return; 1897 | } 1898 | 1899 | if (selections.isGPUorCPU === "usingGPU") { 1900 | //! If I have bnb4 or bnb8 selected then put a disclaimer that it doesn't work 1901 | 1902 | token_per_second_logic_GPU( 1903 | gpuDataOnlyNum, 1904 | parsedConfig, 1905 | promptLen, 1906 | contextLen, 1907 | batchSize, 1908 | setErrorMessage, 1909 | openModal 1910 | ); 1911 | setCompReqHardwareName(selections.dropdownGPU); 1912 | setShowTableComputeSmallInfo(1); 1913 | setCompReqHardwareNameBefore("Tokens/s stats for: "); 1914 | } else { 1915 | token_per_second_logic_CPU( 1916 | cpuDataOnlyNum, 1917 | parsedConfig, 1918 | promptLen, 1919 | contextLen, 1920 | batchSize, 1921 | setErrorMessage, 1922 | openModal 1923 | ); 1924 | setCompReqHardwareName(selections.dropdownCPU); 1925 | setShowTableComputeSmallInfo(1); 1926 | setCompReqHardwareNameBefore("Tokens/s stats for: "); 1927 | } 1928 | return; 1929 | } 1930 | 1931 | async function handleReset() { 1932 | setFileNameUpload(""); 1933 | setJsonData(null); 1934 | // setTotalMemoryShown(""); 1935 | // setBreakDownMemory(""); 1936 | setContextLen(1); 1937 | setPromptLen(1); 1938 | setShowTableGPU(false); 1939 | setShowTable(false); 1940 | setBatchSize(""); 1941 | setModelSize(""); 1942 | setModelName(""); 1943 | setShowTableCPU(false); 1944 | setShowTableCompute(false); 1945 | } 1946 | 1947 | async function handleClick() { 1948 | if (modelName.includes("GGML") || modelName.includes("GGUF")) { 1949 | setErrorMessage( 1950 | "If you want info about GGML/GGUF models then enter the normal name & select GGML inference & quant type below. For example, if you want info about llama-2-7b.Q3_K_L.gguf then enter meta-llama/Llama-2-7b in the model name" 1951 | ); 1952 | openModal(); 1953 | return; 1954 | } 1955 | let parsedConfig = responseCache.hasOwnProperty(modelName) 1956 | ? responseCache[modelName] 1957 | : null; 1958 | 1959 | if ( 1960 | !isValidPositiveInteger(contextLen) || 1961 | !isValidPositiveInteger(promptLen) 1962 | ) { 1963 | setErrorMessage( 1964 | "context len & promt len should be positive numbers (>0)" 1965 | ); 1966 | openModal(); 1967 | } 1968 | 1969 | const out = getAllComputedData( 1970 | parsedConfig, 1971 | jsonData, 1972 | modelSize, 1973 | parseInt(contextLen) + parseInt(promptLen), 1974 | 2, 1975 | selections, 1976 | setErrorMessage, 1977 | openModal, 1978 | batchSize, 1979 | selections.isGradCheckPoint 1980 | ); 1981 | 1982 | if (out == null) { 1983 | return; 1984 | } 1985 | 1986 | // setTotalMemoryShown(`Total Memory: ${out["Total"]} MB`); 1987 | // const jsonOut = JSON.stringify(out); 1988 | // setBreakDownMemory(`Breakdown(in MB): ${jsonOut}`); 1989 | setTotalMemoryShown(out["Total"]); 1990 | 1991 | setShowTable(true); 1992 | 1993 | // setGPUJSONDataForTable( 1994 | // enchanceGPUJSONData(gpuJSONData[selections.dropdownGPU]) 1995 | // ); 1996 | 1997 | let numGPUsINeed = Math.ceil( 1998 | out["Total"] / 1999 | (1024 * gpuJSONData[selections.dropdownGPU]["memory"]) 2000 | ); 2001 | // const nameOfGPUForNeed = selections.dropdownGPU + ' GPUs Needed' 2002 | setNumGPUINeed(numGPUsINeed); 2003 | setMemReqHardwareName(selections.dropdownGPU); 2004 | setBreakDownMemoryJson(out); 2005 | } 2006 | 2007 | // const handleClick = () => { 2008 | 2009 | // const trnVal = selections.dropdownTrnOrNot; 2010 | // let totalMemory = 0; 2011 | // let size = parseFloat(modelSize); 2012 | // if (trnVal==='trn'){ 2013 | 2014 | // } 2015 | 2016 | // console.log(modelSize); 2017 | // console.log(isNumberOrFloat(modelSize)); 2018 | 2019 | // // console.log("clicking"); 2020 | // // setOutput1(selections.dropdownTrnOrNot + ' ' + selections.dropdownFullOrNot); 2021 | 2022 | // // console.log() 2023 | 2024 | // }; 2025 | 2026 | useEffect(() => { 2027 | // Your function here to populate myVariable 2028 | const fetchData = async () => { 2029 | // Fetch data or perform some other operation 2030 | let response = await fetch(configPath); 2031 | response = await response.json(); 2032 | setResponseCache(response); 2033 | setResponseCacheKeys(Object.keys(response)); 2034 | }; 2035 | 2036 | fetchData(); 2037 | }, []); 2038 | 2039 | useEffect(() => { 2040 | if (modelName && responseCacheKeys) { 2041 | if (modelName.length >= 2) { 2042 | const suggestions = getModelSuggestions(modelName, responseCacheKeys); 2043 | setSuggestions(suggestions); 2044 | } else { 2045 | setSuggestions([]); 2046 | } 2047 | } else { 2048 | setSuggestions([]); 2049 | } 2050 | }, [modelName, responseCacheKeys]); 2051 | 2052 | // useEffect(() => { 2053 | // if (modelName && responseCacheKeys) { 2054 | // if (modelName.length > 1) { 2055 | // const filtered = responseCacheKeys.filter((item) => 2056 | // item.startsWith(modelName) 2057 | // ); 2058 | // setSuggestions(filtered.slice(0, 10)); 2059 | // } else { 2060 | // setSuggestions([]); 2061 | // } 2062 | // } else { 2063 | // setSuggestions([]); 2064 | // } 2065 | // }, [modelName]); 2066 | 2067 | // useEffect(() => { 2068 | // if (modelName) { 2069 | // if (modelName.length > 2) { 2070 | // const filtered = responseCacheKeys.filter((item) => 2071 | // item.startsWith(modelName) 2072 | // ); 2073 | // setSuggestions(filtered.slice(0, 10)); 2074 | // } else { 2075 | // setSuggestions([]); 2076 | // } 2077 | // } else { 2078 | // setSuggestions([]); 2079 | // } 2080 | // }, [modelName]); 2081 | 2082 | // console.log(responseCache); 2083 | 2084 | const handleKeyDown = (e) => { 2085 | if (e.key === "ArrowDown") { 2086 | e.preventDefault(); 2087 | setSelectedIdx((prevIdx) => 2088 | Math.min(prevIdx + 1, suggestions.length - 1) 2089 | ); 2090 | } else if (e.key === "ArrowUp") { 2091 | e.preventDefault(); 2092 | setSelectedIdx((prevIdx) => Math.max(prevIdx - 1, -1)); 2093 | } else if (e.key === "Enter" && selectedIdx >= 0) { 2094 | setModelName(suggestions[selectedIdx]); 2095 | setShowSuggestions(false); 2096 | } 2097 | }; 2098 | 2099 | return ( 2100 |
2101 |
2102 |
2103 | 2111 |
2112 | 2118 |
{errorMessage}
2119 |
2120 |
2121 |
2122 | Are you GPU poor?{" "} 2123 | 🫵🤨 2124 |
2125 |
2126 | Calculate GPU memory requirement and token/s for any LLM 2127 |
2128 |
2129 | meme 2135 |

OR

2136 | meme 2142 |
2143 |
2144 |
2145 |
2146 |
2147 | 2150 | 2151 | 2159 | {modelName && showSuggestions && ( 2160 |
    2161 | {suggestions.map((item, index) => ( 2162 |
  • { 2165 | setModelName(item); 2166 | setShowSuggestions(false); 2167 | }} 2168 | className={`p-2 ${ 2169 | selectedIdx === index 2170 | ? "bg-gray-300" 2171 | : "hover:bg-gray-200" 2172 | } cursor-pointer`} 2173 | > 2174 | {item} 2175 |
  • 2176 | ))} 2177 |
2178 | )} 2179 | 2180 |
2181 | 2182 |
2183 |
2184 | 2187 | 2193 |
2194 | {/*
OR
*/} 2195 | {/*
2196 |
2197 | 2204 | 2210 | 2211 | {fileNameUpload} 2212 | 2213 |
2214 |
2215 | 2221 |
2222 |
*/} 2223 |
2224 | 2225 |

2226 |
2227 |
2228 | 2231 | 2252 | 2253 | {showTrainLenInfo && ( 2254 |
2255 | For training, set tokens to generate as 2256 | 1 2257 |
2258 | )} 2259 |
2260 | 2261 |
2262 |
2263 | 2266 | 2279 |
2280 |
2281 | 2284 | 2294 |
2295 |
2296 | 2299 | 2358 |
2359 |
2360 | 2361 |
2362 |
2363 | 2366 | 2372 |
2373 |
2374 | 2377 | 2383 |
2384 |
2385 | 2388 | 2394 |
2395 |
2396 | {showTrainGradientCheck && (
2397 |
2398 | 2401 | 2413 |
2414 | Only applicable for train 2415 |
2416 |
2417 |
)} 2418 |
2419 |
2420 |
2421 | 2424 |
2425 |
2426 | 2429 | 2437 | 2438 |
2439 | {selections.isGPUorCPU === "usingGPU" && ( 2440 |
2441 |
2442 | 2445 | 2493 |
2494 |
2495 | 2498 | 2504 |
2505 |
2506 | 2512 |
2513 |
2514 | )} 2515 | 2516 | {selections.isGPUorCPU === "usingCPU" && ( 2517 |
2518 |
2519 | 2522 | 2555 |
2556 | {/*
2557 | 2563 | 2570 |
*/} 2571 |
2572 | 2575 | 2591 |
2592 |
2593 | 2599 |
2600 |
2601 | )} 2602 |
2603 |
2604 |

2605 | {/* */} 2606 |
2607 |
2608 | 2614 |
2615 |
2616 | 2622 |
2623 |
2624 | 2630 |
2631 | {/*
2632 | 2638 |
*/} 2639 |
2640 |
2641 |
2642 |
2643 |
2644 | How does 2645 | 2646 | {" "} 2647 | X{" "} 2648 | 2649 | tokens/s look like? 2650 |
2651 |
2652 |
2653 | 2656 | 2662 | 2668 | 2674 |
2675 |
2676 |
2677 | {isVisible &&
{displayedText}
} 2678 |
2679 |
2680 |
2681 |

2682 |
2683 | {/*
2684 | {totalMemoryShown} 2685 |
*/} 2686 |
2687 |
2688 | {showTable && ( 2689 | <> 2690 |
2691 | Memory Requirement:{" "} 2692 | {/* {memReqHardwareName} */} 2693 |
2694 | 2695 | 2696 | {/* Total row */} 2697 | 2698 | 2701 | 2709 | 2710 | 2711 | {/* Breakdown row */} 2712 | 2713 | 2719 | 2720 | 2721 | {/* Name-Value pairs */} 2722 | {Object.entries( 2723 | breakDownMemoryJson 2724 | ).map(([key, value], index) => { 2725 | if (key === "Total") { 2726 | return null; // Skip this iteration and return nothing 2727 | } 2728 | 2729 | return ( 2730 | 2738 | 2741 | 2744 | 2745 | ); 2746 | })} 2747 | 2748 | 2751 | 2754 | 2755 | 2756 |
2699 | Total 2700 | 2702 | 2703 | {" "} 2704 | { 2705 | totalMemoryShown 2706 | } MB{" "} 2707 | 2708 |
2717 | Breakdown 2718 |
2739 | {key} 2740 | 2742 | {value} MB 2743 |
2749 | selected GPUs needed 2750 | 2752 | {numGPUINeed} 2753 |
2757 | 2758 | )} 2759 | {/* */} 2767 |
2768 |
2769 | {showTableCompute && ( 2770 |
2771 |
2772 | {compReqHardwareNameBefore} 2773 | {compReqHardwareName} 2774 |
2775 | {/* {selections.isGPUorCPU==='usingGPU' ? selections.dropdownGPU : selections.dropdownCPU} */} 2776 | 2777 | 2778 | {/* Name-Value pairs */} 2779 | {Object.entries( 2780 | jsonDataCompute 2781 | ).map(([key, value], index) => { 2782 | if (key === "Total") { 2783 | return null; // Skip this iteration and return nothing 2784 | } 2785 | 2786 | return ( 2787 | 2795 | 2798 | 2801 | 2802 | ); 2803 | })} 2804 | 2805 |
2796 | {key} 2797 | 2799 | {value} 2800 |
2806 |
2807 | {showTableComputeDisclaimer} 2808 |
2809 | {showTableComputeSmallInfo == 1 && ( 2810 |
2811 | Check above to see how{" "} 2812 | {computedTokenPerSecond} token/s 2813 | looks like 2814 |
2815 | )} 2816 | {showTableComputeSmallInfo == 2 && ( 2817 |
2818 | For train, generate length = 1. 2819 | Since training is next token pred. 2820 | e.g. if u train on 500 len sequence 2821 | then put 500 in prompt len. 2822 |
2823 | )} 2824 |
2825 | )} 2826 | {/* */} 2834 |
2835 |
2836 |
2837 |
2838 | {showTableGPU && ( 2839 | <> 2840 |
2841 | GPU Info: 2842 |
2843 | 2847 | 2848 | {/* Total row */} 2849 | {/* Name-Value pairs */} 2850 | {Object.entries( 2851 | gpuJsonDataForTable 2852 | ).map(([key, value], index) => { 2853 | return ( 2854 | 2862 | 2865 | 2868 | 2869 | ); 2870 | })} 2871 | 2872 |
2863 | {key} 2864 | 2866 | {value} 2867 |
2873 | 2874 | )} 2875 | {/* */} 2883 |
2884 |
2885 | {showTableCPU && ( 2886 | <> 2887 |
2888 | CPU Info: 2889 |
2890 | 2894 | 2895 | {/* Total row */} 2896 | {/* Name-Value pairs */} 2897 | {Object.entries( 2898 | cpuJsonDataForTable 2899 | ).map(([key, value], index) => { 2900 | return ( 2901 | 2909 | 2912 | 2915 | 2916 | ); 2917 | })} 2918 | 2919 |
2910 | {key} 2911 | 2913 | {value} 2914 |
2920 | 2921 | )} 2922 | {/* */} 2930 |
2931 |
2932 | {/*
{breakDownMemory}
*/} 2933 |
2934 |
2935 | 2945 |
setFaqOpen(!faqOpen)} 2948 | > 2949 |

2950 | Read FAQ 🔽 2951 |

2952 |
2953 | 2954 | {faqOpen && ( 2955 |
    2956 |
  • 2957 | These are APPORXIMATE values. They can vary by 2958 | +-15% depending on your CPU, GPU, cuda version, 2959 | llama.cpp version, model etc. 2960 |
  • 2961 | {/*
  • 2962 | For training, the total context length will be 2963 | prompt Len + Generate Len. The correct (ideal) 2964 | use case is to set generate = 1 for training, 2965 | since all training is next token prediction 2966 | loss. 2967 |
  • */} 2968 |
  • 2969 | CPU inference is only compatible with GGML. You 2970 | can't use CPU with HF/vLLM 2971 |
  • 2972 |
  • GPU + CPU is not yet supported
  • 2973 |
2974 | )} 2975 |
2976 | 2977 | {/*
2978 | PS: These are approximate values & may vary by 500MB-1GB 2979 | depending on the GPU, model, input, cuda version etc. If 2980 | your setup has ~1GB over the requirement you should likely 2981 | be good. 2982 |
2983 |
2984 | 2990 | FAQ 2991 | 2992 |
*/} 2993 | {/* 2994 | 2995 | */} 2996 |
2997 |
2998 | ); 2999 | } 3000 | 3001 | export default App; 3002 | --------------------------------------------------------------------------------