├── .gitattributes
├── .gitignore
├── CreateZip.ps1
├── EasyQuantizationGUI.bat
├── EasyQuantizationGUI.py
├── LICENSE
├── README.md
├── convert.py
├── debug.bat
├── ggml.dll
├── llama-quantize.exe
├── llama.dll
└── requirements.txt


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv/
2 | *.zip


--------------------------------------------------------------------------------
/CreateZip.ps1:
--------------------------------------------------------------------------------
 1 | # Read VERSION from Python file
 2 | $versionLine = Get-Content "EasyQuantizationGUI.py" | Select-String "VERSION = "
 3 | if ($versionLine -match 'VERSION = "(.*?)"') {
 4 |     $version = $matches[1]
 5 |     Write-Host "Found version: $version"
 6 | } else {
 7 |     Write-Host "WARNING: Version not found, using 'unknown'"
 8 |     $version = "unknown"
 9 | }
10 | 
11 | # Define the zip file name and subdirectory name
12 | $zipName = "EasyQuantizationGUI-$version.zip"
13 | $subDirName = "EasyQuantizationGUI"
14 | 
15 | # Get the current directory
16 | $sourcePath = (Get-Location).Path
17 | Write-Host "Current directory: $sourcePath"
18 | 
19 | # Create temp directory in Windows temp folder
20 | $tempDir = Join-Path $env:TEMP "EasyQuantizationGUI_temp"
21 | $tempSubDir = Join-Path $tempDir $subDirName
22 | 
23 | if (Test-Path $tempDir) {
24 |    Remove-Item -Recurse -Force $tempDir
25 | }
26 | New-Item -Path $tempSubDir -ItemType Directory -Force | Out-Null
27 | Write-Host "Created temp directory: $tempSubDir"
28 | 
29 | # Copy files (excluding .git, .zip, .ps1, and README.md)
30 | $filesCopied = 0
31 | Get-ChildItem -File | Where-Object {
32 |    $_.Name -ne $zipName -and 
33 |    $_.Name -ne "README.md" -and
34 |    $_.Extension -ne ".zip" -and 
35 |    $_.Extension -ne ".ps1" -and 
36 |    $_.FullName -notlike "*.git*"
37 | } | ForEach-Object {
38 |    Copy-Item $_.FullName -Destination $tempSubDir
39 |    Write-Host "Copied file: $($_.Name)"
40 |    $filesCopied++
41 | }
42 | 
43 | Write-Host "Copied $filesCopied files to temp directory"
44 | 
45 | # Create zip
46 | $zipPath = Join-Path $sourcePath $zipName
47 | Write-Host "Creating zip file at: $zipPath"
48 | Add-Type -Assembly "System.IO.Compression.FileSystem"
49 | [System.IO.Compression.ZipFile]::CreateFromDirectory($tempDir, $zipPath)
50 | 
51 | # Verify zip was created
52 | if (Test-Path $zipPath) {
53 |    Write-Host "Successfully created zip file: $zipPath"
54 | } else {
55 |    Write-Host "ERROR: Zip file was not created!"
56 | }
57 | 
58 | # Cleanup
59 | Remove-Item -Recurse -Force $tempDir
60 | Write-Host "Cleaned up temp directory"


--------------------------------------------------------------------------------
/EasyQuantizationGUI.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | REM Check if pip is installed and show output only if it needs to be installed
 3 | python -m pip --version >nul 2>&1
 4 | if %ERRORLEVEL% neq 0 (
 5 |     echo Installing pip...
 6 |     python -m ensurepip --default-pip
 7 | ) else (
 8 |     python -m pip install --upgrade pip >nul 2>&1
 9 | )
10 | 
11 | REM Check if virtual environment exists, create if it doesn't
12 | if not exist "venv" (
13 |     echo Creating virtual environment...
14 |     python -m venv venv
15 |     call venv\Scripts\activate
16 |     echo Installing requirements...
17 |     pip install -r requirements.txt
18 |     echo Setup complete!
19 |     echo.
20 | ) else (
21 |     call venv\Scripts\activate
22 | )
23 | 
24 | REM Run the application
25 | python EasyQuantizationGUI.py
26 | 
27 | REM Keep the window open if there's an error
28 | if %ERRORLEVEL% neq 0 (
29 |     pause
30 | )


--------------------------------------------------------------------------------
/EasyQuantizationGUI.py:
--------------------------------------------------------------------------------
  1 | VERSION = "1.11"
  2 | 
  3 | import sys
  4 | import subprocess
  5 | import importlib
  6 | import os
  7 | 
  8 | def install(package):
  9 |     subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
 10 | 
 11 | try:
 12 |     import torch, tqdm, safetensors, gguf, sentencepiece, yaml, numpy
 13 | except ImportError:
 14 |     print("Some required packages are missing. Installing from requirements.txt...")
 15 |     install("requirements.txt")
 16 |     import torch, tqdm, safetensors, gguf, sentencepiece, yaml, numpy
 17 | 
 18 | import tkinter as tk
 19 | from tkinter import filedialog, ttk, messagebox
 20 | import os
 21 | import shutil
 22 | import winsound
 23 | import tkinter.scrolledtext as scrolledtext
 24 | 
 25 | def scroll_entry_to_end(entry):
 26 |     entry.xview_moveto(1)
 27 | 
 28 | def browse_file(entry):
 29 |     file_path = filedialog.askopenfilename(filetypes=[("Model files", "*.safetensors *.gguf *.sft")])
 30 |     if file_path:
 31 |         file_path = file_path.replace('\\', '/')  # Ensure forward slashes
 32 |         entry.delete(0, tk.END)
 33 |         entry.insert(0, file_path)
 34 |         scroll_entry_to_end(entry)
 35 |         suggest_output_file()  # Call this instead of update_output_file
 36 | 
 37 | def suggest_output_file():
 38 |     input_file = input_entry.get()
 39 |     quantize_level = quantize_level_var.get()
 40 |     if input_file:
 41 |         input_dir = os.path.dirname(input_file)
 42 |         input_filename = os.path.basename(input_file)
 43 |         input_name, _ = os.path.splitext(input_filename)
 44 |         output_file = f"{input_dir}/{input_name}-{quantize_level}.gguf"
 45 |         output_entry.delete(0, tk.END)
 46 |         output_entry.insert(0, output_file)
 47 |         scroll_entry_to_end(output_entry)
 48 | 
 49 | def browse_output_file(entry):
 50 |     # Get the current input file and quantization level
 51 |     input_file = input_entry.get()
 52 |     quantize_level = quantize_level_var.get()
 53 |     
 54 |     # Generate a default output filename
 55 |     if input_file:
 56 |         input_dir = os.path.dirname(input_file)
 57 |         input_filename = os.path.basename(input_file)
 58 |         input_name, _ = os.path.splitext(input_filename)
 59 |         default_filename = f"{input_name}-{quantize_level}.gguf"
 60 |     else:
 61 |         default_filename = f"output-{quantize_level}.gguf"
 62 |         input_dir = "/"
 63 |     
 64 |     # Open the file dialog with the default filename
 65 |     file_path = filedialog.asksaveasfilename(
 66 |         initialdir=input_dir,
 67 |         initialfile=default_filename,
 68 |         defaultextension=".gguf", 
 69 |         filetypes=[("GGUF files", "*.gguf")]
 70 |     )
 71 |     
 72 |     if file_path:
 73 |         file_path = file_path.replace('\\', '/')  # Ensure forward slashes
 74 |         entry.delete(0, tk.END)
 75 |         entry.insert(0, file_path)
 76 |         scroll_entry_to_end(entry)
 77 | 
 78 | def disable_ui():
 79 |     global input_entry, output_entry, input_browse, output_browse, quantize_dropdown, run_button
 80 |     input_entry.config(state='disabled')
 81 |     output_entry.config(state='disabled')
 82 |     input_browse.config(state='disabled')
 83 |     output_browse.config(state='disabled')
 84 |     quantize_dropdown.config(state='disabled')
 85 |     run_button.config(state='disabled')
 86 | 
 87 | def enable_ui():
 88 |     global input_entry, output_entry, input_browse, output_browse, quantize_dropdown, run_button
 89 |     input_entry.config(state='normal')
 90 |     output_entry.config(state='normal')
 91 |     input_browse.config(state='normal')
 92 |     output_browse.config(state='normal')
 93 |     quantize_dropdown.config(state='readonly')
 94 |     run_button.config(state='normal')
 95 | 
 96 | def run_llama_quantize():
 97 |     input_file = input_entry.get()
 98 |     output_file = output_entry.get()
 99 |     quantize_level = quantize_level_var.get()
100 |     
101 |     if not input_file or not output_file:
102 |         messagebox.showerror("Error", "Please select both input and output files.")
103 |         return
104 |     
105 |     # Check if input and output files are the same
106 |     if os.path.abspath(input_file) == os.path.abspath(output_file):
107 |         messagebox.showerror("Error", "Input and output files cannot be the same.")
108 |         return
109 | 
110 |     output_dir = os.path.dirname(output_file)
111 |     required_space = 40_000_000_000  # ~40 GB (a bit more than 36.5 GB)
112 |     available_space = shutil.disk_usage(output_dir).free
113 | 
114 |     if available_space < required_space:
115 |         required_gb = required_space / (1024**3)
116 |         available_gb = available_space / (1024**3)
117 |         messagebox.showerror("Error", f"You need {required_gb:.1f} GB of drive space to continue. Only {available_gb:.1f} GB available.")
118 |         return
119 | 
120 |     disable_ui()
121 |     
122 |     # Clear previous log
123 |     process_text.delete('1.0', tk.END)
124 |     root.update()
125 | 
126 |     is_input_gguf = input_file.lower().endswith(".gguf")
127 |     temp_gguf_file = None # Initialize temp_gguf_file
128 | 
129 |     if not is_input_gguf:
130 |         process_text.insert(tk.END, "Starting conversion process (Safetensors/SFT -> GGUF)...\n")
131 |         process_text.see(tk.END)
132 |         root.update()
133 | 
134 |         # Convert the input file to GGUF format
135 |         convert_py_path = resource_path("convert.py")
136 |         output_dir = os.path.dirname(output_file)
137 |         # Use a more descriptive temporary file name based on the output file
138 |         output_name, _ = os.path.splitext(os.path.basename(output_file))
139 |         temp_gguf_file = os.path.join(output_dir, f"{output_name}_temp_conversion.gguf")
140 | 
141 | 
142 |         # Add cleanup of existing temp file
143 |         if os.path.exists(temp_gguf_file):
144 |             try:
145 |                 os.remove(temp_gguf_file)
146 |                 process_text.insert(tk.END, "Cleaned up existing temporary file.\n")
147 |                 process_text.see(tk.END)
148 |                 root.update()
149 |             except Exception as e:
150 |                 process_text.insert(tk.END, f"Error cleaning up temporary file: {e}\n")
151 |                 process_text.see(tk.END)
152 |                 root.update()
153 |                 enable_ui()
154 |                 return
155 | 
156 |         try:
157 |             startupinfo = subprocess.STARTUPINFO()
158 |             startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
159 |             startupinfo.wShowWindow = subprocess.SW_HIDE
160 | 
161 |             # Get the Python executable path from the current environment
162 |             pythonpath = sys.executable
163 |             
164 |             process = subprocess.Popen([pythonpath, convert_py_path, "--src", input_file, "--dst", temp_gguf_file], 
165 |                                        stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, 
166 |                                        bufsize=1, universal_newlines=True, startupinfo=startupinfo)
167 |             
168 |             for line in process.stdout:
169 |                 process_text.insert(tk.END, line)
170 |                 process_text.see(tk.END)
171 |                 root.update()
172 |             
173 |             process.wait()
174 |             if process.returncode != 0:
175 |                 raise subprocess.CalledProcessError(process.returncode, process.args)
176 |             
177 |             process_text.insert(tk.END, "Conversion completed successfully.\n")
178 |             process_text.see(tk.END)
179 |             root.update()
180 | 
181 |         except subprocess.CalledProcessError as e:
182 |             process_text.insert(tk.END, f"Error converting file: {e}\n")
183 |             process_text.insert(tk.END, f"Command: {e.cmd}\n")
184 |             process_text.insert(tk.END, f"Return code: {e.returncode}\n")
185 |             process_text.see(tk.END)
186 |             root.update()
187 |             # Clean up the temporary file even if conversion fails
188 |             if temp_gguf_file and os.path.exists(temp_gguf_file):
189 |                 os.remove(temp_gguf_file)
190 |             enable_ui()
191 |             return
192 |         except Exception as e: # Catch other potential errors during conversion
193 |             process_text.insert(tk.END, f"An unexpected error occurred during conversion: {e}\n")
194 |             process_text.see(tk.END)
195 |             root.update()
196 |             if temp_gguf_file and os.path.exists(temp_gguf_file):
197 |                 os.remove(temp_gguf_file)
198 |             enable_ui()
199 |             return
200 |         
201 |         # --- End of conversion block ---
202 |     else:
203 |          process_text.insert(tk.END, "Input is already GGUF. Skipping conversion step.\n")
204 |          process_text.see(tk.END)
205 |          root.update()
206 |          # If input is GGUF, llama-quantize will read directly from it
207 |          quantize_input_file = input_file
208 | 
209 |     # Determine the input file for the quantization step
210 |     quantize_input_file = temp_gguf_file if temp_gguf_file else input_file
211 | 
212 |     # Quantize the file (either the temporary one or the original GGUF)
213 |     llama_quantize_path = resource_path("llama-quantize.exe")
214 |     process_text.insert(tk.END, "Starting quantization process...\n")
215 |     process_text.see(tk.END)
216 |     root.update()
217 | 
218 |     try:
219 |         startupinfo = subprocess.STARTUPINFO()
220 |         startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
221 |         startupinfo.wShowWindow = subprocess.SW_HIDE
222 | 
223 |         # Use quantize_input_file determined above
224 |         process = subprocess.Popen([llama_quantize_path, quantize_input_file, output_file, quantize_level], 
225 |                                    stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, 
226 |                                    bufsize=1, universal_newlines=True, startupinfo=startupinfo)
227 |         
228 |         for line in process.stdout:
229 |             process_text.insert(tk.END, line)
230 |             process_text.see(tk.END)
231 |             root.update()
232 |         
233 |         process.wait()
234 |         if process.returncode != 0:
235 |             # If quantization failed and we used a temp file, report the temp file name
236 |             if temp_gguf_file:
237 |                  process_text.insert(tk.END, f"Quantization command failed on temporary file: {temp_gguf_file}\n")
238 |             raise subprocess.CalledProcessError(process.returncode, process.args)
239 |         
240 |         process_text.insert(tk.END, "Quantization completed successfully.\n")
241 |     except subprocess.CalledProcessError as e:
242 |         process_text.insert(tk.END, f"Error running llama-quantize: {e}\n")
243 |         process_text.insert(tk.END, f"Command: {e.cmd}\n")
244 |         process_text.insert(tk.END, f"Return code: {e.returncode}\n")
245 |         process_text.see(tk.END)
246 |         root.update()
247 |     except Exception as e: # Catch other potential errors during quantization
248 |         process_text.insert(tk.END, f"An unexpected error occurred during quantization: {e}\n")
249 |         process_text.see(tk.END)
250 |         root.update()
251 |     finally:
252 |         # Clean up the temporary file if it was created
253 |         if temp_gguf_file and os.path.exists(temp_gguf_file):
254 |             try:
255 |                 os.remove(temp_gguf_file)
256 |                 process_text.insert(tk.END, "Cleaned up temporary conversion file.\n")
257 |                 process_text.see(tk.END)
258 |                 root.update()
259 |             except Exception as e:
260 |                  process_text.insert(tk.END, f"Error cleaning up temporary file {temp_gguf_file}: {e}\n")
261 |                  process_text.see(tk.END)
262 |                  root.update()
263 |         
264 |     process_text.insert(tk.END, "Process finished.\n") # Changed message slightly
265 |     process_text.see(tk.END)
266 |     root.update()
267 |     
268 |     enable_ui()
269 |     
270 |     # Play sound effect
271 |     winsound.PlaySound("SystemAsterisk", winsound.SND_ALIAS)
272 | 
273 | def main():
274 |     global root, process_text, input_entry, output_entry, quantize_dropdown, run_button, quantize_level_var
275 |     global input_browse, output_browse  # Add these two variables
276 |     root = tk.Tk()
277 |     root.title(f"Easy Quantization GUI v{VERSION}")
278 |     root.geometry("800x600")
279 | 
280 |     # Quantize level selection
281 |     quantize_frame = tk.Frame(root)
282 |     quantize_frame.pack(pady=10, padx=10)
283 | 
284 |     quantize_label = tk.Label(quantize_frame, text="Quantize Level:")
285 |     quantize_label.pack(side=tk.LEFT)
286 | 
287 |     quantize_levels = ["Q2_K", "Q2_K_S", "Q3_K", "Q3_K_L", "Q3_K_M", "Q3_K_S", "Q4_0", "Q4_1", "Q4_K", "Q4_K_M", "Q4_K_S", "Q5_0", "Q5_1", "Q5_K", "Q5_K_M", "Q5_K_S", "Q6_K", "Q8_0", "F16", "BF16", "F32"]
288 |     quantize_level_var = tk.StringVar(root)
289 |     quantize_level_var.set("Q8_0")  # Set default value to Q8_0
290 | 
291 |     quantize_dropdown = ttk.Combobox(quantize_frame, textvariable=quantize_level_var, values=quantize_levels, state="readonly")
292 |     quantize_dropdown.pack(side=tk.LEFT)
293 |     quantize_dropdown.bind("<<ComboboxSelected>>", lambda event: suggest_output_file())
294 | 
295 |     # Input file selection
296 |     input_frame = tk.Frame(root)
297 |     input_frame.pack(pady=10, padx=10, fill=tk.X)
298 | 
299 |     input_label = tk.Label(input_frame, text="Input File:")
300 |     input_label.pack(side=tk.LEFT)
301 | 
302 |     input_entry = tk.Entry(input_frame)
303 |     input_entry.pack(side=tk.LEFT, expand=True, fill=tk.X)
304 | 
305 |     input_browse = tk.Button(input_frame, text="Browse", command=lambda: browse_file(input_entry))
306 |     input_browse.pack(side=tk.RIGHT)
307 | 
308 |     # Add binding to scroll input entry when it gains focus
309 |     input_entry.bind("<FocusIn>", lambda event: scroll_entry_to_end(input_entry))
310 | 
311 |     # Output file selection
312 |     output_frame = tk.Frame(root)
313 |     output_frame.pack(pady=10, padx=10, fill=tk.X)
314 | 
315 |     output_label = tk.Label(output_frame, text="Output File:")
316 |     output_label.pack(side=tk.LEFT)
317 | 
318 |     output_entry = tk.Entry(output_frame)
319 |     output_entry.pack(side=tk.LEFT, expand=True, fill=tk.X)
320 | 
321 |     output_browse = tk.Button(output_frame, text="Browse", command=lambda: browse_output_file(output_entry))
322 |     output_browse.pack(side=tk.RIGHT)
323 | 
324 |     # Add binding to scroll output entry when it gains focus
325 |     output_entry.bind("<FocusIn>", lambda event: scroll_entry_to_end(output_entry))
326 | 
327 |     # Run button
328 |     run_button = tk.Button(root, text="Run Quantization", command=run_llama_quantize)
329 |     run_button.pack(pady=20)
330 | 
331 |     # Add process log to bottom of main window
332 |     process_frame = tk.Frame(root)
333 |     process_frame.pack(pady=10, padx=10, fill=tk.BOTH, expand=True)
334 | 
335 |     process_label = tk.Label(process_frame, text="Process Log:")
336 |     process_label.pack(side=tk.TOP, anchor='w')
337 | 
338 |     process_text = scrolledtext.ScrolledText(process_frame, wrap=tk.WORD, height=15)
339 |     process_text.pack(expand=True, fill=tk.BOTH)
340 | 
341 |     root.mainloop()
342 | 
343 | def resource_path(relative_path):
344 |     """ Get absolute path to resource, works for dev and for PyInstaller """
345 |     try:
346 |         # PyInstaller creates a temp folder and stores path in _MEIPASS
347 |         base_path = sys._MEIPASS
348 |     except Exception:
349 |         base_path = os.path.abspath(".")
350 | 
351 |     return os.path.join(base_path, relative_path)
352 | 
353 | if __name__ == "__main__":
354 |     main()
355 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This application basically just simplifies this process: https://github.com/city96/ComfyUI-GGUF/tree/main/tools
 2 | 
 3 | ![screenshot](https://github.com/user-attachments/assets/11d2315b-9ea4-4caf-a3a0-e211defae7a7)
 4 | 
 5 | Run `EasyQuantizationGUI.bat` to start the application.
 6 | 
 7 | Requirements:
 8 | - [Python](https://www.python.org/downloads/windows/)
 9 | - Windows (can be adjusted later to support Linux)
10 | 


--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
  1 | # (c) City96 || Apache-2.0 (apache.org/licenses/LICENSE-2.0)
  2 | import os
  3 | import gguf
  4 | import torch
  5 | import logging
  6 | import argparse
  7 | from tqdm import tqdm
  8 | from safetensors.torch import load_file, save_file
  9 | 
 10 | QUANTIZATION_THRESHOLD = 1024
 11 | REARRANGE_THRESHOLD = 512
 12 | MAX_TENSOR_NAME_LENGTH = 127
 13 | MAX_TENSOR_DIMS = 4
 14 | 
 15 | class ModelTemplate:
 16 |     arch = "invalid"  # string describing architecture
 17 |     shape_fix = False # whether to reshape tensors
 18 |     keys_detect = []  # list of lists to match in state dict
 19 |     keys_banned = []  # list of keys that should mark model as invalid for conversion
 20 |     keys_hiprec = []  # list of keys that need to be kept in fp32 for some reason
 21 | 
 22 |     def handle_nd_tensor(self, key, data):
 23 |         raise NotImplementedError(f"Tensor detected that exceeds dims supported by C++ code! ({key} @ {data.shape})")
 24 | 
 25 | class ModelFlux(ModelTemplate):
 26 |     arch = "flux"
 27 |     keys_detect = [
 28 |         ("transformer_blocks.0.attn.norm_added_k.weight",),
 29 |         ("double_blocks.0.img_attn.proj.weight",),
 30 |     ]
 31 |     keys_banned = ["transformer_blocks.0.attn.norm_added_k.weight",]
 32 | 
 33 | class ModelSD3(ModelTemplate):
 34 |     arch = "sd3"
 35 |     keys_detect = [
 36 |         ("transformer_blocks.0.attn.add_q_proj.weight",),
 37 |         ("joint_blocks.0.x_block.attn.qkv.weight",),
 38 |     ]
 39 |     keys_banned = ["transformer_blocks.0.attn.add_q_proj.weight",]
 40 | 
 41 | class ModelAura(ModelTemplate):
 42 |     arch = "aura"
 43 |     keys_detect = [
 44 |         ("double_layers.3.modX.1.weight",),
 45 |         ("joint_transformer_blocks.3.ff_context.out_projection.weight",),
 46 |     ]
 47 |     keys_banned = ["joint_transformer_blocks.3.ff_context.out_projection.weight",]
 48 | 
 49 | class ModelHiDream(ModelTemplate):
 50 |     arch = "hidream"
 51 |     keys_detect = [
 52 |         (
 53 |             "caption_projection.0.linear.weight",
 54 |             "double_stream_blocks.0.block.ff_i.shared_experts.w3.weight"
 55 |         )
 56 |     ]
 57 |     keys_hiprec = [
 58 |         # nn.parameter, can't load from BF16 ver
 59 |         ".ff_i.gate.weight",
 60 |         "img_emb.emb_pos"
 61 |     ]
 62 | 
 63 | class ModelHyVid(ModelTemplate):
 64 |     arch = "hyvid"
 65 |     keys_detect = [
 66 |         (
 67 |             "double_blocks.0.img_attn_proj.weight",
 68 |             "txt_in.individual_token_refiner.blocks.1.self_attn_qkv.weight",
 69 |         )
 70 |     ]
 71 | 
 72 |     def handle_nd_tensor(self, key, data):
 73 |         # hacky but don't have any better ideas
 74 |         path = f"./fix_5d_tensors_{self.arch}.safetensors" # TODO: somehow get a path here??
 75 |         if os.path.isfile(path):
 76 |             raise RuntimeError(f"5D tensor fix file already exists! {path}")
 77 |         fsd = {key: torch.from_numpy(data)}
 78 |         tqdm.write(f"5D key found in state dict! Manual fix required! - {key} {data.shape}")
 79 |         save_file(fsd, path)
 80 | 
 81 | class ModelWan(ModelHyVid):
 82 |     arch = "wan"
 83 |     keys_detect = [
 84 |         (
 85 |             "blocks.0.self_attn.norm_q.weight",
 86 |             "text_embedding.2.weight",
 87 |             "head.modulation",
 88 |         )
 89 |     ]
 90 |     keys_hiprec = [
 91 |         ".modulation" # nn.parameter, can't load from BF16 ver
 92 |     ]
 93 | 
 94 | class ModelLTXV(ModelTemplate):
 95 |     arch = "ltxv"
 96 |     keys_detect = [
 97 |         (
 98 |             "adaln_single.emb.timestep_embedder.linear_2.weight",
 99 |             "transformer_blocks.27.scale_shift_table",
100 |             "caption_projection.linear_2.weight",
101 |         )
102 |     ]
103 |     keys_hiprec = [
104 |         "scale_shift_table" # nn.parameter, can't load from BF16 base quant
105 |     ]
106 | 
107 | class ModelSDXL(ModelTemplate):
108 |     arch = "sdxl"
109 |     shape_fix = True
110 |     keys_detect = [
111 |         ("down_blocks.0.downsamplers.0.conv.weight", "add_embedding.linear_1.weight",),
112 |         (
113 |             "input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight",
114 |             "output_blocks.2.2.conv.weight", "output_blocks.5.2.conv.weight",
115 |         ), # Non-diffusers
116 |         ("label_emb.0.0.weight",),
117 |     ]
118 | 
119 | class ModelSD1(ModelTemplate):
120 |     arch = "sd1"
121 |     shape_fix = True
122 |     keys_detect = [
123 |         ("down_blocks.0.downsamplers.0.conv.weight",),
124 |         (
125 |             "input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight", "input_blocks.9.0.op.weight",
126 |             "output_blocks.2.1.conv.weight", "output_blocks.5.2.conv.weight", "output_blocks.8.2.conv.weight"
127 |         ), # Non-diffusers
128 |     ]
129 | 
130 | # The architectures are checked in order and the first successful match terminates the search.
131 | arch_list = [ModelFlux, ModelSD3, ModelAura, ModelHiDream, ModelLTXV, ModelHyVid, ModelWan, ModelSDXL, ModelSD1]
132 | 
133 | def is_model_arch(model, state_dict):
134 |     # check if model is correct
135 |     matched = False
136 |     invalid = False
137 |     for match_list in model.keys_detect:
138 |         if all(key in state_dict for key in match_list):
139 |             matched = True
140 |             invalid = any(key in state_dict for key in model.keys_banned)
141 |             break
142 |     assert not invalid, "Model architecture not allowed for conversion! (i.e. reference VS diffusers format)"
143 |     return matched
144 | 
145 | def detect_arch(state_dict):
146 |     model_arch = None
147 |     for arch in arch_list:
148 |         if is_model_arch(arch, state_dict):
149 |             model_arch = arch()
150 |             break
151 |     assert model_arch is not None, "Unknown model architecture!"
152 |     return model_arch
153 | 
154 | def parse_args():
155 |     parser = argparse.ArgumentParser(description="Generate F16 GGUF files from single UNET")
156 |     parser.add_argument("--src", required=True, help="Source model ckpt file.")
157 |     parser.add_argument("--dst", help="Output unet gguf file.")
158 |     args = parser.parse_args()
159 | 
160 |     if not os.path.isfile(args.src):
161 |         parser.error("No input provided!")
162 | 
163 |     return args
164 | 
165 | def strip_prefix(state_dict):
166 |     # only keep unet with no prefix!
167 |     prefix = None
168 |     for pfx in ["model.diffusion_model.", "model."]:
169 |         if any([x.startswith(pfx) for x in state_dict.keys()]):
170 |             prefix = pfx
171 |             break
172 | 
173 |     sd = {}
174 |     for k, v in state_dict.items():
175 |         if prefix and prefix not in k:
176 |             continue
177 |         if prefix:
178 |             k = k.replace(prefix, "")
179 |         sd[k] = v
180 | 
181 |     return sd
182 | 
183 | def load_state_dict(path):
184 |     if any(path.endswith(x) for x in [".ckpt", ".pt", ".bin", ".pth"]):
185 |         state_dict = torch.load(path, map_location="cpu", weights_only=True)
186 |         for subkey in ["model", "module"]:
187 |             if subkey in state_dict:
188 |                 state_dict = state_dict[subkey]
189 |                 break
190 |         if len(state_dict) < 20:
191 |             raise RuntimeError(f"pt subkey load failed: {state_dict.keys()}")
192 |     else:
193 |         state_dict = load_file(path)
194 | 
195 |     return strip_prefix(state_dict)
196 | 
197 | def handle_tensors(writer, state_dict, model_arch):
198 |     name_lengths = tuple(sorted(
199 |         ((key, len(key)) for key in state_dict.keys()),
200 |         key=lambda item: item[1],
201 |         reverse=True,
202 |     ))
203 |     if not name_lengths:
204 |         return
205 |     max_name_len = name_lengths[0][1]
206 |     if max_name_len > MAX_TENSOR_NAME_LENGTH:
207 |         bad_list = ", ".join(f"{key!r} ({namelen})" for key, namelen in name_lengths if namelen > MAX_TENSOR_NAME_LENGTH)
208 |         raise ValueError(f"Can only handle tensor names up to {MAX_TENSOR_NAME_LENGTH} characters. Tensors exceeding the limit: {bad_list}")
209 |     for key, data in tqdm(state_dict.items()):
210 |         old_dtype = data.dtype
211 | 
212 |         if data.dtype == torch.bfloat16:
213 |             data = data.to(torch.float32).numpy()
214 |         # this is so we don't break torch 2.0.X
215 |         elif data.dtype in [getattr(torch, "float8_e4m3fn", "_invalid"), getattr(torch, "float8_e5m2", "_invalid")]:
216 |             data = data.to(torch.float16).numpy()
217 |         else:
218 |             data = data.numpy()
219 | 
220 |         n_dims = len(data.shape)
221 |         data_shape = data.shape
222 |         if old_dtype == torch.bfloat16:
223 |             data_qtype = gguf.GGMLQuantizationType.BF16
224 |         # elif old_dtype == torch.float32:
225 |         #     data_qtype = gguf.GGMLQuantizationType.F32
226 |         else:
227 |             data_qtype = gguf.GGMLQuantizationType.F16
228 | 
229 |         # The max no. of dimensions that can be handled by the quantization code is 4
230 |         if len(data.shape) > MAX_TENSOR_DIMS:
231 |             model_arch.handle_nd_tensor(key, data)
232 |             continue # needs to be added back later
233 | 
234 |         # get number of parameters (AKA elements) in this tensor
235 |         n_params = 1
236 |         for dim_size in data_shape:
237 |             n_params *= dim_size
238 | 
239 |         if old_dtype in (torch.float32, torch.bfloat16):
240 |             if n_dims == 1:
241 |                 # one-dimensional tensors should be kept in F32
242 |                 # also speeds up inference due to not dequantizing
243 |                 data_qtype = gguf.GGMLQuantizationType.F32
244 | 
245 |             elif n_params <= QUANTIZATION_THRESHOLD:
246 |                 # very small tensors
247 |                 data_qtype = gguf.GGMLQuantizationType.F32
248 | 
249 |             elif any(x in key for x in model_arch.keys_hiprec):
250 |                 # tensors that require max precision
251 |                 data_qtype = gguf.GGMLQuantizationType.F32
252 | 
253 |         if (model_arch.shape_fix                        # NEVER reshape for models such as flux
254 |             and n_dims > 1                              # Skip one-dimensional tensors
255 |             and n_params >= REARRANGE_THRESHOLD         # Only rearrange tensors meeting the size requirement
256 |             and (n_params / 256).is_integer()           # Rearranging only makes sense if total elements is divisible by 256
257 |             and not (data.shape[-1] / 256).is_integer() # Only need to rearrange if the last dimension is not divisible by 256
258 |         ):
259 |             orig_shape = data.shape
260 |             data = data.reshape(n_params // 256, 256)
261 |             writer.add_array(f"comfy.gguf.orig_shape.{key}", tuple(int(dim) for dim in orig_shape))
262 | 
263 |         try:
264 |             data = gguf.quants.quantize(data, data_qtype)
265 |         except (AttributeError, gguf.QuantError) as e:
266 |             tqdm.write(f"falling back to F16: {e}")
267 |             data_qtype = gguf.GGMLQuantizationType.F16
268 |             data = gguf.quants.quantize(data, data_qtype)
269 | 
270 |         new_name = key # do we need to rename?
271 | 
272 |         shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"
273 |         tqdm.write(f"{f'%-{max_name_len + 4}s' % f'{new_name}'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
274 | 
275 |         writer.add_tensor(new_name, data, raw_dtype=data_qtype)
276 | 
277 | def convert_file(path, dst_path=None, interact=True, overwrite=False):
278 |     # load & run model detection logic
279 |     state_dict = load_state_dict(path)
280 |     model_arch = detect_arch(state_dict)
281 |     logging.info(f"* Architecture detected from input: {model_arch.arch}")
282 | 
283 |     # detect & set dtype for output file
284 |     dtypes = [x.dtype for x in state_dict.values()]
285 |     dtypes = {x:dtypes.count(x) for x in set(dtypes)}
286 |     main_dtype = max(dtypes, key=dtypes.get)
287 | 
288 |     if main_dtype == torch.bfloat16:
289 |         ftype_name = "BF16"
290 |         ftype_gguf = gguf.LlamaFileType.MOSTLY_BF16
291 |     # elif main_dtype == torch.float32:
292 |     #     ftype_name = "F32"
293 |     #     ftype_gguf = None
294 |     else:
295 |         ftype_name = "F16"
296 |         ftype_gguf = gguf.LlamaFileType.MOSTLY_F16
297 | 
298 |     if dst_path is None:
299 |         dst_path = f"{os.path.splitext(path)[0]}-{ftype_name}.gguf"
300 |     elif "{ftype}" in dst_path: # lcpp logic
301 |         dst_path = dst_path.replace("{ftype}", ftype_name)
302 | 
303 |     if os.path.isfile(dst_path) and not overwrite:
304 |         if interact:
305 |             input("Output exists enter to continue or ctrl+c to abort!")
306 |         else:
307 |             raise OSError("Output exists and overwriting is disabled!")
308 | 
309 |     # handle actual file
310 |     writer = gguf.GGUFWriter(path=None, arch=model_arch.arch)
311 |     writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
312 |     if ftype_gguf is not None:
313 |         writer.add_file_type(ftype_gguf)
314 | 
315 |     handle_tensors(writer, state_dict, model_arch)
316 |     writer.write_header_to_file(path=dst_path)
317 |     writer.write_kv_data_to_file()
318 |     writer.write_tensors_to_file(progress=True)
319 |     writer.close()
320 | 
321 |     fix = f"./fix_5d_tensors_{model_arch.arch}.safetensors"
322 |     if os.path.isfile(fix):
323 |         logging.warning(f"\n### Warning! Fix file found at '{fix}'")
324 |         logging.warning(" you most likely need to run 'fix_5d_tensors.py' after quantization.")
325 | 
326 |     return dst_path, model_arch
327 | 
328 | if __name__ == "__main__":
329 |     args = parse_args()
330 |     convert_file(args.src, args.dst)
331 | 


--------------------------------------------------------------------------------
/debug.bat:
--------------------------------------------------------------------------------
1 | python EasyQuantizationGUI.py
2 | pause


--------------------------------------------------------------------------------
/ggml.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainlizard/EasyQuantizationGUI/4f885af0e44f1e1c7198944d99bf53f45e2b5f45/ggml.dll


--------------------------------------------------------------------------------
/llama-quantize.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainlizard/EasyQuantizationGUI/4f885af0e44f1e1c7198944d99bf53f45e2b5f45/llama-quantize.exe


--------------------------------------------------------------------------------
/llama.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rainlizard/EasyQuantizationGUI/4f885af0e44f1e1c7198944d99bf53f45e2b5f45/llama.dll


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | tqdm
3 | safetensors
4 | gguf
5 | sentencepiece
6 | pyyaml
7 | numpy
8 | 


--------------------------------------------------------------------------------