├── .gitignore ├── README.md ├── cpp ├── .gitignore ├── README.md ├── compile.sh └── src │ ├── build_char_table.cpp │ ├── build_hufffman_tree.cpp │ ├── decode.cpp │ ├── main.cpp │ ├── main.hpp │ ├── write_decoded.cpp │ └── write_encoded.cpp ├── cs ├── .gitattributes ├── .gitignore ├── HuffamDecoder.cs ├── HuffmanEncoder.cs ├── HuffmanTree.csproj ├── HuffmanTree.sln ├── Node.cs ├── Program.cs └── README.md ├── go ├── .gitignore ├── README.md ├── go.mod ├── huffman │ ├── concurrent_frequency.go │ ├── huffman.go │ ├── node.go │ └── pq.go └── main.go ├── java ├── .gitignore ├── README.md ├── run.sh └── src │ └── main │ └── java │ └── com │ └── huffmancoding │ ├── HuffamnDecoder.java │ ├── HuffmanEncoder.java │ ├── HuffmanNode.java │ └── Program.java └── python ├── .gitignore ├── README.md ├── huffman_coding.py └── main.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.bin 2 | *.exe 3 | .vscode -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Huffman-Coding Collection 2 | 3 | ## Motivation 4 | 5 | I embarked on this project with a passion for exploring various programming languages and delving into fundamental computer science concepts. The implementation of Huffman coding presented a perfect opportunity to immerse myself in a multifaceted coding adventure, covering a broad range of topics in programming languages. 6 | 7 | #### Comprehensive Learning Experience 8 | 9 | 1. Data Structures: The project necessitated the understanding of data structures, especially priority queues and trees, to encode and decode information. 10 | 2. File I/O: Working on file input/output operations sharpened my skills in handling external data, a crucial aspect of real-world applications. 11 | 3. Types, Loops, and Conditions: Crafting the Huffman coding algorithm involved a meticulous use of data types, loops, and conditions. This provided a hands-on experience in applying these foundational programming constructs. 12 | 13 | #### Actual Utility 14 | 15 | Beyond the educational aspect, Huffman coding is a practically useful endeavor. It serves as a powerful tool for data compression, finding applications in various domains where efficient storage and transmission of data are essential. 16 | 17 | ## Languages 18 | 19 | I implemented the Huffman coding algorithm in multiple programming languages, including: 20 | 21 | - Go 22 | - Python 23 | - C++ 24 | - C# 25 | - Java 26 | 27 | Each implementation showcases the adaptability of the algorithm across different languages and provides a holistic view of programming language nuances. 28 | 29 | ## How to Use 30 | 31 | Follow the instructions in each language-specific directory to run and test the Huffman coding implementation. Feel free to explore, modify, and adapt the code for your own projects. 32 | 33 | ## Todos 34 | 35 | - [ ] Write binary code in char table as bytes not as utf-8 strings 36 | 37 | ## Contributing 38 | 39 | I welcome contributions to enhance the functionality and maintainability of this Huffman coding implementation. If you're interested, feel free to open issues, submit pull requests, or contribute to the project in any meaningful way. 40 | -------------------------------------------------------------------------------- /cpp/.gitignore: -------------------------------------------------------------------------------- 1 | *.bin 2 | *.exe 3 | *.out -------------------------------------------------------------------------------- /cpp/README.md: -------------------------------------------------------------------------------- 1 | # Huffman-coding - CPP 2 | 3 | This repository contains a straightforward implementation of Huffman coding in C++. 4 | 5 | ## Usage 6 | 7 | Once you have successfully compiled the program, utilize the following command-line arguments to compress and decompress files. 8 | 9 | ### File compression 10 | 11 | ``` 12 | $ ./program 13 | ``` 14 | 15 | Example: 16 | 17 | ``` 18 | $ ./program file.txt compressed_data.bin 19 | ``` 20 | 21 | ### File decompression 22 | 23 | ``` 24 | $ ./program -d 25 | ``` 26 | 27 | Example: 28 | 29 | ``` 30 | $ ./program -d compress_data.bin results.txt 31 | ``` 32 | -------------------------------------------------------------------------------- /cpp/compile.sh: -------------------------------------------------------------------------------- 1 | g++ ./src/*.cpp -O3 -Wall -Wextra -------------------------------------------------------------------------------- /cpp/src/build_char_table.cpp: -------------------------------------------------------------------------------- 1 | #include "main.hpp" 2 | 3 | void build_char_table(const HuffmanNode *node, std::unordered_map &char_table, std::string code) 4 | { 5 | if (node) 6 | { 7 | if (node->data != '\0') 8 | char_table[node->data] = code; 9 | 10 | build_char_table(node->left, char_table, code + "0"); 11 | build_char_table(node->right, char_table, code + "1"); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /cpp/src/build_hufffman_tree.cpp: -------------------------------------------------------------------------------- 1 | #include "main.hpp" 2 | 3 | HuffmanNode *build_huffman_tree(const std::string &text) 4 | { 5 | std::unordered_map char_frequency; 6 | for (char ch : text) 7 | char_frequency[ch]++; 8 | 9 | std::priority_queue, CompareNodes> pq; 10 | 11 | for (const auto &entry : char_frequency) 12 | { 13 | HuffmanNode *node = new HuffmanNode(entry.first, entry.second); 14 | pq.push(node); 15 | } 16 | 17 | while (pq.size() > 1) 18 | { 19 | HuffmanNode *left = pq.top(); 20 | pq.pop(); 21 | HuffmanNode *right = pq.top(); 22 | pq.pop(); 23 | 24 | HuffmanNode *internal_node = new HuffmanNode('\0', left->frequency + right->frequency); 25 | internal_node->left = left; 26 | internal_node->right = right; 27 | 28 | pq.push(internal_node); 29 | } 30 | 31 | HuffmanNode *root = pq.top(); 32 | pq.pop(); 33 | 34 | return root; 35 | } 36 | -------------------------------------------------------------------------------- /cpp/src/decode.cpp: -------------------------------------------------------------------------------- 1 | #include "main.hpp" 2 | 3 | std::string decode(std::string &file_name) 4 | { 5 | std::ifstream input_file(file_name, std::ios::binary | std::ios::in); 6 | 7 | if (!input_file.is_open()) 8 | { 9 | std::cerr << "Error opening file for writing: " << file_name << std::endl; 10 | exit(1); 11 | } 12 | 13 | // read padding size 14 | size_t padding_bits; 15 | input_file.read(reinterpret_cast(&padding_bits), sizeof(padding_bits)); 16 | 17 | // read table size 18 | size_t table_size; 19 | input_file.read(reinterpret_cast(&table_size), sizeof(table_size)); 20 | 21 | // read and build char table 22 | std::unordered_map char_table; 23 | for (size_t i = 0; i < table_size; ++i) 24 | { 25 | char character; 26 | input_file.get(character); 27 | 28 | size_t code_size; 29 | input_file.read(reinterpret_cast(&code_size), sizeof(code_size)); 30 | 31 | std::string code(code_size, '\0'); 32 | input_file.read(&code[0], code_size); 33 | 34 | char_table[character] = code; 35 | } 36 | 37 | // read and create the binary string 38 | std::string encoded_data, decoded, current; 39 | char byte; 40 | while (input_file.get(byte)) 41 | { 42 | for (int i = 7; i >= 0; --i) 43 | { 44 | char bit = ((byte >> i) & 1) ? '1' : '0'; 45 | encoded_data += bit; 46 | } 47 | } 48 | 49 | input_file.close(); 50 | 51 | // remove padding bits 52 | if (padding_bits > 0) 53 | encoded_data.resize(encoded_data.size() - padding_bits); 54 | 55 | // decode bits 56 | for (auto bit : encoded_data) 57 | { 58 | current += bit; 59 | 60 | for (const auto &entry : char_table) 61 | { 62 | if (entry.second == current) 63 | { 64 | decoded += entry.first; 65 | current.clear(); 66 | break; 67 | } 68 | } 69 | } 70 | 71 | // TODO do something here 72 | return decoded; 73 | } -------------------------------------------------------------------------------- /cpp/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "main.hpp" 2 | 3 | int main(int argc, char *argv[]) 4 | { 5 | 6 | std::vector args; 7 | for (int i = 0; i < argc; ++i) 8 | args.push_back(argv[i]); 9 | 10 | if (argc < 3 || (argc > 4 && args[1] == "-d") || (argc > 3 && args[1] != "-d")) 11 | { 12 | std::cout << "CLI args error" << '\n'; 13 | return 0; 14 | } 15 | 16 | // deocde 17 | if (args[1] == "-d") 18 | { 19 | std::string in_filepath = args[2]; 20 | std::string out_filepath = args[3]; 21 | 22 | std::string content = decode(in_filepath); 23 | write_decoded(out_filepath, content); 24 | // encode 25 | } 26 | else 27 | { 28 | std::string in_filepath = args[1]; 29 | std::string out_filepath = args[2]; 30 | std::unordered_map char_table; 31 | 32 | std::fstream filecontent(in_filepath); 33 | std::string text((std::istreambuf_iterator(filecontent)), std::istreambuf_iterator()); 34 | 35 | HuffmanNode *root = build_huffman_tree(text); 36 | build_char_table(root, char_table, ""); 37 | write_encoded(out_filepath, text, char_table); 38 | 39 | int og_file_size = std::filesystem::file_size(in_filepath); 40 | int compressed_file_size = std::filesystem::file_size(out_filepath); 41 | double compression_rate = (1.0 - (double) compressed_file_size / (double) og_file_size) * 100; 42 | 43 | std::cout << "Original file size: " << og_file_size << " bytes" << '\n'; 44 | std::cout << "Encoded file size: " << compressed_file_size << " bytes" << '\n'; 45 | std::cout << "Compression rate: " << std::fixed << std::setprecision(4) << compression_rate << '\n'; 46 | } 47 | 48 | return 0; 49 | } -------------------------------------------------------------------------------- /cpp/src/main.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | struct HuffmanNode 10 | { 11 | char data; 12 | int frequency; 13 | HuffmanNode *left; 14 | HuffmanNode *right; 15 | 16 | HuffmanNode(char ch, int freq) : data(ch), frequency(freq), left(nullptr), right(nullptr) {} 17 | }; 18 | 19 | struct CompareNodes 20 | { 21 | bool operator()(const HuffmanNode *lhs, const HuffmanNode *rhs) const 22 | { 23 | return lhs->frequency > rhs->frequency; 24 | } 25 | }; 26 | 27 | HuffmanNode *build_huffman_tree(const std::string &text); 28 | 29 | void build_char_table(const HuffmanNode *node, std::unordered_map &char_table, std::string code); 30 | 31 | void write_encoded(const std::string &file_name, const std::string &text, std::unordered_map &char_table); 32 | 33 | void write_decoded(const std::string &file_name, const std::string &content); 34 | 35 | std::string decode(std::string &file_name); -------------------------------------------------------------------------------- /cpp/src/write_decoded.cpp: -------------------------------------------------------------------------------- 1 | #include "main.hpp" 2 | 3 | void write_decoded(const std::string &file_name, const std::string &content) 4 | { 5 | std::ofstream output_file(file_name); 6 | 7 | if (!output_file.is_open()) 8 | { 9 | std::cerr << "Error opening file for writing: " << file_name << std::endl; 10 | exit(1); 11 | } 12 | 13 | output_file << content; 14 | 15 | output_file.close(); 16 | } -------------------------------------------------------------------------------- /cpp/src/write_encoded.cpp: -------------------------------------------------------------------------------- 1 | #include "main.hpp" 2 | 3 | void write_encoded(const std::string &file_name, const std::string &text, std::unordered_map &char_table) 4 | { 5 | std::fstream output_file(file_name, std::ios::binary | std::ios::out); 6 | 7 | if (!output_file.is_open()) 8 | { 9 | std::cerr << "Error opening file for writing: " << file_name << std::endl; 10 | exit(1); 11 | } 12 | 13 | // generate the binary string 14 | std::string binstr; 15 | for (auto c : text) 16 | binstr += char_table[c]; 17 | 18 | // get needed padding size and write it 19 | size_t padding_bits = (8 - binstr.size() % 8) % 8; 20 | output_file.write(reinterpret_cast(&padding_bits), sizeof(padding_bits)); 21 | binstr += std::string(padding_bits, '0'); 22 | 23 | // Write character table size 24 | size_t table_size = char_table.size(); 25 | output_file.write(reinterpret_cast(&table_size), sizeof(table_size)); 26 | 27 | // Write character table 28 | for (const auto &entry : char_table) 29 | { 30 | output_file.put(entry.first); 31 | size_t codeSize = entry.second.size(); 32 | output_file.write(reinterpret_cast(&codeSize), sizeof(codeSize)); 33 | output_file.write(entry.second.c_str(), codeSize); 34 | } 35 | 36 | // write binary data 37 | for (int i = 0; i < binstr.size(); i += 8) 38 | { 39 | std::bitset<8> byte(binstr.substr(i, 8)); 40 | char byteChar = static_cast(byte.to_ulong()); 41 | output_file.put(byteChar); 42 | } 43 | 44 | output_file.close(); 45 | } -------------------------------------------------------------------------------- /cs/.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /cs/.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Oo]ut/ 33 | [Ll]og/ 34 | [Ll]ogs/ 35 | 36 | # Visual Studio 2015/2017 cache/options directory 37 | .vs/ 38 | # Uncomment if you have tasks that create the project's static files in wwwroot 39 | #wwwroot/ 40 | 41 | # Visual Studio 2017 auto generated files 42 | Generated\ Files/ 43 | 44 | # MSTest test Results 45 | [Tt]est[Rr]esult*/ 46 | [Bb]uild[Ll]og.* 47 | 48 | # NUnit 49 | *.VisualState.xml 50 | TestResult.xml 51 | nunit-*.xml 52 | 53 | # Build Results of an ATL Project 54 | [Dd]ebugPS/ 55 | [Rr]eleasePS/ 56 | dlldata.c 57 | 58 | # Benchmark Results 59 | BenchmarkDotNet.Artifacts/ 60 | 61 | # .NET Core 62 | project.lock.json 63 | project.fragment.lock.json 64 | artifacts/ 65 | 66 | # ASP.NET Scaffolding 67 | ScaffoldingReadMe.txt 68 | 69 | # StyleCop 70 | StyleCopReport.xml 71 | 72 | # Files built by Visual Studio 73 | *_i.c 74 | *_p.c 75 | *_h.h 76 | *.ilk 77 | *.meta 78 | *.obj 79 | *.iobj 80 | *.pch 81 | *.pdb 82 | *.ipdb 83 | *.pgc 84 | *.pgd 85 | *.rsp 86 | *.sbr 87 | *.tlb 88 | *.tli 89 | *.tlh 90 | *.tmp 91 | *.tmp_proj 92 | *_wpftmp.csproj 93 | *.log 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio LightSwitch build output 298 | **/*.HTMLClient/GeneratedArtifacts 299 | **/*.DesktopClient/GeneratedArtifacts 300 | **/*.DesktopClient/ModelManifest.xml 301 | **/*.Server/GeneratedArtifacts 302 | **/*.Server/ModelManifest.xml 303 | _Pvt_Extensions 304 | 305 | # Paket dependency manager 306 | .paket/paket.exe 307 | paket-files/ 308 | 309 | # FAKE - F# Make 310 | .fake/ 311 | 312 | # CodeRush personal settings 313 | .cr/personal 314 | 315 | # Python Tools for Visual Studio (PTVS) 316 | __pycache__/ 317 | *.pyc 318 | 319 | # Cake - Uncomment if you are using it 320 | # tools/** 321 | # !tools/packages.config 322 | 323 | # Tabs Studio 324 | *.tss 325 | 326 | # Telerik's JustMock configuration file 327 | *.jmconfig 328 | 329 | # BizTalk build output 330 | *.btp.cs 331 | *.btm.cs 332 | *.odx.cs 333 | *.xsd.cs 334 | 335 | # OpenCover UI analysis results 336 | OpenCover/ 337 | 338 | # Azure Stream Analytics local run output 339 | ASALocalRun/ 340 | 341 | # MSBuild Binary and Structured Log 342 | *.binlog 343 | 344 | # NVidia Nsight GPU debugger configuration file 345 | *.nvuser 346 | 347 | # MFractors (Xamarin productivity tool) working folder 348 | .mfractor/ 349 | 350 | # Local History for Visual Studio 351 | .localhistory/ 352 | 353 | # BeatPulse healthcheck temp database 354 | healthchecksdb 355 | 356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 357 | MigrationBackup/ 358 | 359 | # Ionide (cross platform F# VS Code tools) working folder 360 | .ionide/ 361 | 362 | # Fody - auto-generated XML schema 363 | FodyWeavers.xsd -------------------------------------------------------------------------------- /cs/HuffamDecoder.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace HuffmanTree 4 | { 5 | public class HuffmanDecoder 6 | { 7 | public Dictionary CharTable { get; set; } 8 | 9 | public HuffmanDecoder(string filePath, string outputPath) 10 | { 11 | CharTable = new Dictionary(); 12 | 13 | using (FileStream fileStream = new(filePath, FileMode.Open)) 14 | using (BinaryReader reader = new(fileStream, Encoding.UTF8)) 15 | { 16 | ReadCharTableFromFile(reader); 17 | 18 | string binstr = ReadBinaryData(reader); 19 | string result = DecodeBinaryString(binstr); 20 | 21 | // write decoded data 22 | File.WriteAllText(outputPath, result); 23 | } 24 | } 25 | 26 | public void ReadCharTableFromFile(BinaryReader reader) 27 | { 28 | int tableCount = reader.ReadInt32(); 29 | 30 | for (int i = 0; i < tableCount; i++) 31 | { 32 | char key = reader.ReadChar(); 33 | int length = reader.ReadInt32(); 34 | string value = Encoding.UTF8.GetString(reader.ReadBytes(length)); 35 | 36 | // NOTE: reversed the key value pair for faster search 37 | CharTable[value] = key; 38 | } 39 | } 40 | 41 | public string ReadBinaryData(BinaryReader reader) 42 | { 43 | int binaryDataSize = reader.ReadInt32(); 44 | int paddingLength = reader.ReadInt32(); 45 | 46 | byte[] byteArr = reader.ReadBytes(binaryDataSize); 47 | 48 | StringBuilder binstr = new(byteArr.Length * 8); 49 | 50 | foreach (var b in byteArr) 51 | binstr.Append(Convert.ToString(b, 2).PadLeft(8, '0')); 52 | 53 | // remove padding zeros 54 | binstr.Length -= paddingLength; 55 | 56 | return binstr.ToString(); 57 | } 58 | 59 | private string DecodeBinaryString(string binstr) 60 | { 61 | StringBuilder result = new(); 62 | StringBuilder current = new(); 63 | 64 | for (int i = 0; i < binstr.Length; i++) 65 | { 66 | current.Append(binstr[i]); 67 | 68 | if (CharTable.ContainsKey(current.ToString())) 69 | { 70 | result.Append(CharTable[current.ToString()]); 71 | current.Clear(); 72 | } 73 | } 74 | 75 | return result.ToString(); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /cs/HuffmanEncoder.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace HuffmanTree 4 | { 5 | public class HuffmanEncoder 6 | { 7 | public Dictionary CharTable { get; set; } 8 | 9 | public HuffmanEncoder(string input, string filePath) 10 | { 11 | CharTable = new Dictionary(); 12 | 13 | Node root = BuildHuffmanTree(input); 14 | BuildCharTable(root, ""); 15 | WriteToFile(filePath, input); 16 | } 17 | 18 | private Node BuildHuffmanTree(string input) 19 | { 20 | PriorityQueue pq = new(); 21 | var frequencies = input.GroupBy(x => x).ToDictionary(x => x.Key, x => x.Count()); 22 | 23 | foreach (var kvp in frequencies) 24 | pq.Enqueue(new Node { Symbol = kvp.Key, Frequency = kvp.Value }, kvp.Value); 25 | 26 | while (pq.Count > 1) 27 | { 28 | Node left = pq.Dequeue(); 29 | Node right = pq.Dequeue(); 30 | 31 | // combine the two children's frequencies 32 | int combinedFreq = left.Frequency + right.Frequency; 33 | Node parentNode = new() { Symbol = '\0', Frequency = combinedFreq, Left = left, Right = right }; 34 | 35 | pq.Enqueue(parentNode, combinedFreq); 36 | } 37 | 38 | return pq.Peek(); 39 | } 40 | 41 | private void BuildCharTable(Node node, string prefix) 42 | { 43 | if (node == null) 44 | return; 45 | 46 | if (!CharTable.ContainsKey(node.Symbol)) 47 | CharTable.Add(node.Symbol, prefix); 48 | 49 | if (node.Left != null) 50 | BuildCharTable(node.Left, prefix + "0"); 51 | if (node.Right != null) 52 | BuildCharTable(node.Right, prefix + "1"); 53 | } 54 | 55 | private string GenerateBinaryString(string input) 56 | { 57 | StringBuilder output = new(); 58 | 59 | for (int i = 0; i < input.Length; i++) 60 | output.Append(CharTable[input[i]]); 61 | 62 | return output.ToString(); 63 | } 64 | 65 | private void WriteCharTable(BinaryWriter writer) 66 | { 67 | writer.Write(CharTable.Count); 68 | foreach (var kvp in CharTable) 69 | { 70 | char symbol = kvp.Key; 71 | string code = kvp.Value; 72 | 73 | writer.Write(symbol); 74 | writer.Write(code.Length); 75 | writer.Write(Encoding.UTF8.GetBytes(kvp.Value)); 76 | } 77 | } 78 | 79 | private void WriteBinaryStringToFile(BinaryWriter writer, string binstr) 80 | { 81 | // get padding size and pad right 82 | int paddingLength = (8 - (binstr.Length % 8)) % 8; 83 | binstr = binstr.PadRight(binstr.Length + paddingLength, '0'); 84 | 85 | // write data size and padding length 86 | writer.Write(binstr.Length / 8); 87 | writer.Write(paddingLength); 88 | 89 | for (int i = 0; i < binstr.Length; i += 8) 90 | { 91 | string eightBits = binstr.Substring(i, Math.Min(8, binstr.Length - i)); 92 | writer.Write(Convert.ToByte(eightBits, 2)); 93 | } 94 | } 95 | 96 | private void WriteToFile(string filePath, string input) 97 | { 98 | string binstr = GenerateBinaryString(input); 99 | 100 | 101 | using (BinaryWriter writer = new(new FileStream(filePath, FileMode.Create))) 102 | { 103 | WriteCharTable(writer); 104 | WriteBinaryStringToFile(writer, binstr); 105 | } 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /cs/HuffmanTree.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | net6.0 6 | enable 7 | enable 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /cs/HuffmanTree.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.8.34322.80 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HuffmanTree", "HuffmanTree.csproj", "{79027F24-4A16-46E5-9E24-D3C617FCAC78}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {79027F24-4A16-46E5-9E24-D3C617FCAC78}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {79027F24-4A16-46E5-9E24-D3C617FCAC78}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {79027F24-4A16-46E5-9E24-D3C617FCAC78}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {79027F24-4A16-46E5-9E24-D3C617FCAC78}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {70086345-5648-4AAE-9D24-A814C457FF42} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /cs/Node.cs: -------------------------------------------------------------------------------- 1 | namespace HuffmanTree 2 | { 3 | public class Node 4 | { 5 | public char Symbol; 6 | public int Frequency; 7 | public Node? Left; 8 | public Node? Right; 9 | } 10 | } 11 | 12 | 13 | -------------------------------------------------------------------------------- /cs/Program.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | 3 | namespace HuffmanTree 4 | { 5 | 6 | internal class Program 7 | { 8 | static void Main(string[] args) 9 | { 10 | if (args.Length < 2 || (args.Length > 3 && args[1] == "-d") || (args.Length > 2 && args[1] == "-d")) 11 | { 12 | Console.WriteLine("Invalid CLI arguments"); 13 | return; 14 | } 15 | 16 | 17 | try 18 | { 19 | if (args[0] == "d" || args[0] == "decode") 20 | { 21 | _ = new HuffmanDecoder(args[1], args[2]); 22 | 23 | FileInfo compressedFile = new(args[1]); 24 | FileInfo originalFile = new(args[2]); 25 | 26 | Console.WriteLine("Original file size: " + originalFile.Length); 27 | Console.WriteLine("Compressed file size: " + compressedFile.Length); 28 | } 29 | else 30 | { 31 | _ = new HuffmanEncoder(File.ReadAllText(args[0]), args[1]); 32 | 33 | FileInfo originalFile = new(args[0]); 34 | FileInfo compressedFile = new(args[1]); 35 | 36 | double compressionRate = (1 - compressedFile.Length / (double)originalFile.Length) * 100; 37 | 38 | Console.WriteLine("Original file size: " + originalFile.Length); 39 | Console.WriteLine("Compressed file size: " + compressedFile.Length); 40 | Console.WriteLine($"Compression rate: {compressionRate:F2}"); 41 | } 42 | } 43 | catch (Exception e) 44 | { 45 | Console.WriteLine(e.Message); 46 | } 47 | } 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /cs/README.md: -------------------------------------------------------------------------------- 1 | # Huffman-coding - C# 2 | 3 | This repository contains a straightforward implementation of Huffman coding in C#. 4 | 5 | ## Usage 6 | 7 | Once you have successfully compiled the program, utilize the following command-line arguments to compress and decompress files. 8 | 9 | ### File compression 10 | 11 | ``` 12 | $ ./program 13 | ``` 14 | 15 | Example: 16 | 17 | ``` 18 | $ ./program file.txt compressed_data.bin 19 | ``` 20 | 21 | ### File decompression 22 | 23 | ``` 24 | $ ./program d 25 | ``` 26 | 27 | Example: 28 | 29 | ``` 30 | $ ./program d compress_data.bin results.txt 31 | $ ./program decode compress_data.bin results.txt 32 | ``` 33 | -------------------------------------------------------------------------------- /go/.gitignore: -------------------------------------------------------------------------------- 1 | *.exe -------------------------------------------------------------------------------- /go/README.md: -------------------------------------------------------------------------------- 1 | # Go-huffman 2 | 3 | ## Usage 4 | 5 | 1. After cloning the repo, build the executable 6 | ``` 7 | go build -o main 8 | ``` 9 | 1. Compress a file 10 | ``` 11 | $ ./main e 12 | 13 | Example: 14 | $ ./main e mybigfile.txt bigfile.bin 15 | ``` 16 | 1. Decompress a file a file 17 | ``` 18 | $ ./main d 19 | 20 | Example: 21 | $ ./main d bigfile.bin mybigfile.txt 22 | ``` 23 | ### Example 24 | 25 | ``` 26 | $ .\main.exe e D:\Coding\Work\index.html out.bin 27 | 28 | Original file size: 20318 29 | Compressed file size: 11366 30 | Compression ratio: 1.788 31 | ``` -------------------------------------------------------------------------------- /go/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/hasssanezzz/huffman-coding/go 2 | 3 | go 1.22.1 4 | -------------------------------------------------------------------------------- /go/huffman/concurrent_frequency.go: -------------------------------------------------------------------------------- 1 | package huffman 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "sync" 7 | ) 8 | 9 | func countBytes(data []byte, ch chan map[byte]int, wg *sync.WaitGroup) { 10 | defer wg.Done() 11 | freq := make(map[byte]int) 12 | for _, b := range data { 13 | freq[b]++ 14 | } 15 | ch <- freq 16 | } 17 | 18 | func mergeMaps(ch chan map[byte]int, result map[byte]int, done chan struct{}) { 19 | for freq := range ch { 20 | for b, count := range freq { 21 | result[b] += count 22 | } 23 | } 24 | done <- struct{}{} 25 | } 26 | 27 | func ConcurrentFrequencyRead(r io.Reader) (map[byte]int, error) { 28 | var wg sync.WaitGroup 29 | ch := make(chan map[byte]int) 30 | done := make(chan struct{}) 31 | chunkSize := 1024 * 1024 32 | result := map[byte]int{} 33 | 34 | go mergeMaps(ch, result, done) 35 | 36 | for { 37 | chunk := make([]byte, chunkSize) 38 | n, err := r.Read(chunk) 39 | if err != nil && err != io.EOF { 40 | return nil, fmt.Errorf("can not read chunk of sise %d: %v", chunkSize, err) 41 | } 42 | 43 | if n == 0 { 44 | break 45 | } 46 | 47 | wg.Add(1) 48 | go countBytes(chunk[:n], ch, &wg) 49 | } 50 | 51 | wg.Wait() 52 | close(ch) 53 | 54 | <-done // Wait for the mergeMaps goroutine to finish 55 | 56 | return result, nil 57 | } 58 | -------------------------------------------------------------------------------- /go/huffman/huffman.go: -------------------------------------------------------------------------------- 1 | package huffman 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "container/heap" 7 | "fmt" 8 | "io" 9 | "os" 10 | "strconv" 11 | "strings" 12 | "time" 13 | ) 14 | 15 | type Huffman struct { 16 | writer io.Writer 17 | file *os.File 18 | table map[byte]string 19 | revTable map[string]byte 20 | } 21 | 22 | func NewHuffman(writer io.Writer, file *os.File) *Huffman { 23 | return &Huffman{ 24 | writer: writer, 25 | file: file, 26 | } 27 | } 28 | 29 | func (h *Huffman) Encode() error { 30 | h.table = map[byte]string{} 31 | 32 | start := time.Now() 33 | root, err := h.buildTree() 34 | if err != nil { 35 | return fmt.Errorf("encode function can not build tree: %v", err) 36 | } 37 | fmt.Printf("tree built in:\t%d\n", time.Since(start).Milliseconds()) 38 | 39 | start = time.Now() 40 | h.buildCharTable(root, "") 41 | fmt.Printf("table built in:\t%d\n", time.Since(start).Milliseconds()) 42 | 43 | start = time.Now() 44 | err = h.writeCharTable() 45 | if err != nil { 46 | return fmt.Errorf("can not write char table: %v", err) 47 | } 48 | fmt.Printf("table written:\t%d\n", time.Since(start).Milliseconds()) 49 | 50 | start = time.Now() 51 | err = h.writeBinaryCodes() 52 | if err != nil { 53 | return fmt.Errorf("can not write binary codes: %v", err) 54 | } 55 | fmt.Printf("code written:\t%d\n", time.Since(start).Milliseconds()) 56 | 57 | return nil 58 | } 59 | 60 | func (h *Huffman) buildTree() (*Node, error) { 61 | freq, _ := ConcurrentFrequencyRead(h.file) 62 | 63 | nodes := make([]Node, len(freq)) 64 | pq, i := make(PriorityQueue, 0), 0 65 | 66 | // create and push initial nodes 67 | for b, freq := range freq { 68 | nodes[i] = Node{ 69 | Freq: freq, 70 | Byte: b, 71 | Left: nil, 72 | Right: nil, 73 | } 74 | 75 | heap.Push(&pq, &nodes[i]) 76 | i++ 77 | } 78 | 79 | for pq.Len() > 1 { 80 | left := heap.Pop(&pq).(*Node) 81 | right := heap.Pop(&pq).(*Node) 82 | 83 | newNode := &Node{ 84 | Byte: 0, 85 | Freq: left.Freq + right.Freq, 86 | Left: left, 87 | Right: right, 88 | isInternal: true, 89 | } 90 | 91 | heap.Push(&pq, newNode) 92 | } 93 | 94 | return heap.Pop(&pq).(*Node), nil 95 | } 96 | 97 | func (h *Huffman) buildCharTable(root *Node, code string) { 98 | if root == nil { 99 | return 100 | } 101 | 102 | _, exists := h.table[root.Byte] 103 | if !exists && !root.isInternal { 104 | h.table[root.Byte] = code 105 | } 106 | 107 | if root.Left != nil { 108 | h.buildCharTable(root.Left, code+"1") 109 | } 110 | 111 | if root.Right != nil { 112 | h.buildCharTable(root.Right, code+"0") 113 | } 114 | } 115 | 116 | func (h *Huffman) writeCharTable() error { 117 | tableLen := uint(len(h.table)) 118 | writer := bufio.NewWriter(h.writer) 119 | defer writer.Flush() 120 | 121 | // write table length 122 | err := writer.WriteByte(byte(tableLen)) 123 | if err != nil { 124 | return fmt.Errorf("can not write table length %d: %v", tableLen, err) 125 | } 126 | 127 | for b, code := range h.table { 128 | // write byte and code length 129 | _, err = writer.Write([]byte{b, byte(uint(len(code)))}) 130 | if err != nil { 131 | return fmt.Errorf("can not write byte and code length: %v", err) 132 | } 133 | 134 | // write code as a string 135 | _, err = writer.Write([]byte(code)) 136 | if err != nil { 137 | return fmt.Errorf("can not write code %s as string: %v", code, err) 138 | } 139 | } 140 | 141 | return nil 142 | } 143 | 144 | func (h *Huffman) readChunksIntoStringBuilder(builder *strings.Builder) error { 145 | chunkSize := 1024 * 1024 146 | _, err := h.file.Seek(0, io.SeekStart) 147 | if err != nil { 148 | return err 149 | } 150 | 151 | for { 152 | buff := make([]byte, chunkSize) 153 | n, err := h.file.Read(buff) 154 | if err != nil && err != io.EOF { 155 | return err 156 | } 157 | 158 | if n == 0 { 159 | break 160 | } 161 | 162 | for i := 0; i < n; i++ { 163 | code := h.table[buff[i]] 164 | builder.WriteString(code) 165 | } 166 | 167 | if n != chunkSize { 168 | break 169 | } 170 | } 171 | 172 | return nil 173 | } 174 | 175 | func (h *Huffman) writeBinaryCodes() error { 176 | writer := bufio.NewWriter(h.writer) 177 | defer writer.Flush() 178 | 179 | var builder strings.Builder 180 | err := h.readChunksIntoStringBuilder(&builder) 181 | if err != nil { 182 | return err 183 | } 184 | 185 | // add padding 186 | paddingSize := (8 - builder.Len()%8) % 8 187 | for i := 0; i < paddingSize; i++ { 188 | builder.WriteRune('0') 189 | } 190 | 191 | err = writer.WriteByte(byte(uint(paddingSize))) 192 | if err != nil { 193 | return fmt.Errorf("can not write padding size as byte (%d): %v", paddingSize, err) 194 | } 195 | 196 | codes, length := builder.String(), builder.Len() 197 | for i := 0; i < length; i += 8 { 198 | bitsString := codes[i : i+8] 199 | 200 | b, err := strconv.ParseUint(bitsString, 2, 8) 201 | if err != nil { 202 | return fmt.Errorf("can not parse uint from sub string %q: %v", bitsString, err) 203 | } 204 | 205 | err = writer.WriteByte(byte(uint(b))) 206 | if err != nil { 207 | return fmt.Errorf("can not write byte from sub string (%q): %v", bitsString, err) 208 | } 209 | } 210 | 211 | return nil 212 | } 213 | 214 | func (h *Huffman) Decode() error { 215 | h.table = map[byte]string{} 216 | h.revTable = map[string]byte{} 217 | 218 | reader := bufio.NewReader(h.file) 219 | 220 | // read table 221 | start := time.Now() 222 | err := h.readCharTable(reader) 223 | if err != nil { 224 | return fmt.Errorf("can read character table: %v", err) 225 | } 226 | fmt.Printf("char table read in:\t%d\n", time.Since(start).Milliseconds()) 227 | 228 | // read binary codes 229 | start = time.Now() 230 | result, err := h.readBinaryCodes(reader) 231 | if err != nil { 232 | return fmt.Errorf("can not read binary codes: %v", err) 233 | } 234 | fmt.Printf("binary codes read in:\t%d\n", time.Since(start).Milliseconds()) 235 | 236 | // write decompressed data 237 | start = time.Now() 238 | _, err = h.writer.Write(result) 239 | if err != nil { 240 | return fmt.Errorf("can not decode: %v", err) 241 | } 242 | fmt.Printf("results written in:\t%d\n", time.Since(start).Milliseconds()) 243 | 244 | return nil 245 | } 246 | 247 | func (h *Huffman) readCharTable(reader *bufio.Reader) error { 248 | tableLengthAsByte, err := reader.ReadByte() 249 | if err != nil { 250 | return fmt.Errorf("can not read table length: %v", err) 251 | } 252 | tableLength := uint(tableLengthAsByte) 253 | 254 | for i := 0; i < int(tableLength); i++ { 255 | currByte, err := reader.ReadByte() 256 | if err != nil { 257 | return fmt.Errorf("can not read a byte: %v", err) 258 | } 259 | 260 | codeSize, err := reader.ReadByte() 261 | if err != nil { 262 | return fmt.Errorf("can not read code size: %v", err) 263 | } 264 | 265 | codeStrBuffer := make([]byte, int(codeSize)) 266 | _, err = reader.Read(codeStrBuffer) 267 | if err != nil { 268 | return fmt.Errorf("can not read code string: %v", err) 269 | } 270 | 271 | code := string(codeStrBuffer) 272 | h.revTable[code] = currByte 273 | } 274 | 275 | return nil 276 | } 277 | 278 | func (h *Huffman) readBinaryCodes(reader *bufio.Reader) ([]byte, error) { 279 | paddingSizeAsByte, err := reader.ReadByte() 280 | if err != nil { 281 | return nil, err 282 | } 283 | 284 | var codes strings.Builder 285 | for { 286 | b, err := reader.ReadByte() 287 | if err != nil { 288 | if err == io.EOF { 289 | break 290 | } 291 | return nil, fmt.Errorf("can not read a byte from compressed file: %v", err) 292 | } 293 | 294 | codes.WriteString(fmt.Sprintf("%08b", b)) 295 | } 296 | 297 | var result bytes.Buffer 298 | var currCode strings.Builder 299 | 300 | codesStr, length := codes.String(), codes.Len()-int(paddingSizeAsByte) 301 | for i := 0; i < length; i++ { 302 | currCode.WriteByte(codesStr[i]) 303 | if b, ok := h.revTable[currCode.String()]; ok { 304 | result.WriteByte(b) 305 | currCode.Reset() 306 | } 307 | } 308 | 309 | return result.Bytes(), nil 310 | } 311 | -------------------------------------------------------------------------------- /go/huffman/node.go: -------------------------------------------------------------------------------- 1 | package huffman 2 | 3 | import "fmt" 4 | 5 | type Node struct { 6 | Index int 7 | Freq int 8 | Byte byte 9 | Left *Node 10 | Right *Node 11 | isInternal bool 12 | } 13 | 14 | func (n *Node) PrintSymbol() { 15 | fmt.Printf("%c -> %d\n", rune(n.Byte), n.Freq) 16 | } 17 | -------------------------------------------------------------------------------- /go/huffman/pq.go: -------------------------------------------------------------------------------- 1 | package huffman 2 | 3 | type PriorityQueue []*Node 4 | 5 | // Less compares items based on their priorities. 6 | func (pq PriorityQueue) Less(i, j int) bool { 7 | return pq[i].Freq < pq[j].Freq 8 | } 9 | 10 | // Len returns the number of items in the priority queue. 11 | func (pq PriorityQueue) Len() int { return len(pq) } 12 | 13 | // Push adds an item to the priority queue. 14 | func (pq *PriorityQueue) Push(x interface{}) { 15 | item := x.(*Node) 16 | item.Index = len(*pq) 17 | *pq = append(*pq, item) 18 | } 19 | 20 | // Swap swaps two items in the priority queue. 21 | func (pq PriorityQueue) Swap(i, j int) { 22 | pq[i], pq[j] = pq[j], pq[i] 23 | pq[i].Index = i 24 | pq[j].Index = j 25 | } 26 | 27 | // Pop removes and returns the item with the highest priority. 28 | func (pq *PriorityQueue) Pop() interface{} { 29 | old := *pq 30 | n := len(old) 31 | item := old[n-1] 32 | item.Index = -1 // for safety 33 | *pq = old[0 : n-1] 34 | return item 35 | } 36 | -------------------------------------------------------------------------------- /go/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | 8 | "github.com/hasssanezzz/huffman-coding/go/huffman" 9 | ) 10 | 11 | func main() { 12 | 13 | mode := os.Args[1] 14 | input := os.Args[2] 15 | output := os.Args[3] 16 | 17 | reader, err := os.Open(input) 18 | if err != nil { 19 | log.Fatal(err) 20 | } 21 | defer reader.Close() 22 | 23 | writer, err := os.OpenFile(output, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644) 24 | if err != nil { 25 | log.Fatal(err) 26 | } 27 | defer writer.Close() 28 | 29 | huff := huffman.NewHuffman(writer, reader) 30 | 31 | if mode == "e" { 32 | err := huff.Encode() 33 | if err != nil { 34 | log.Fatalf("fatal in main: %v\n", err) 35 | } 36 | 37 | readerInfo, err := reader.Stat() 38 | if err != nil { 39 | log.Fatalf("can not read input file info: %v", err) 40 | } 41 | 42 | writeInfo, err := os.Stat(output) 43 | if err != nil { 44 | log.Fatalf("can not read output file info: %v", err) 45 | } 46 | 47 | ratio := (float32(readerInfo.Size()) / float32(writeInfo.Size())) 48 | fmt.Printf("Original file size:\t%d\nCompressed file size:\t%d\nCompression ratio:\t%.3f", readerInfo.Size(), writeInfo.Size(), ratio) 49 | } else { 50 | err := huff.Decode() 51 | if err != nil { 52 | log.Fatalf("fatal in main: %v\n", err) 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /java/.gitignore: -------------------------------------------------------------------------------- 1 | target -------------------------------------------------------------------------------- /java/README.md: -------------------------------------------------------------------------------- 1 | # Huffman-coding - Java 2 | 3 | This repository contains a straightforward implementation of Huffman coding in Java. 4 | 5 | ## Usage 6 | 7 | ### Compile the program 8 | 9 | ``` 10 | $ javac -d target src/main/java/com/huffmancoding/*.java 11 | ``` 12 | 13 | Once you have successfully compiled the program, utilize the following command-line arguments to compress and decompress files. 14 | 15 | ### File compression 16 | 17 | ``` 18 | $ java -cp target com.huffmancoding.Program 19 | ``` 20 | 21 | Example: 22 | 23 | ``` 24 | $ java -cp target com.huffmancoding.Program file.txt compressed_data.bin 25 | ``` 26 | 27 | ### File decompression 28 | 29 | ``` 30 | $ java -cp target com.huffmancoding.Program -d 31 | ``` 32 | 33 | Example: 34 | 35 | ``` 36 | $ java -cp target com.huffmancoding.Program -d compress_data.bin results.txt 37 | ``` 38 | -------------------------------------------------------------------------------- /java/run.sh: -------------------------------------------------------------------------------- 1 | javac -d target src/main/java/com/huffmancoding/*.java 2 | java -cp target com.huffmancoding.Program "$@" -------------------------------------------------------------------------------- /java/src/main/java/com/huffmancoding/HuffamnDecoder.java: -------------------------------------------------------------------------------- 1 | package com.huffmancoding; 2 | 3 | import java.io.DataInputStream; 4 | import java.io.DataOutputStream; 5 | import java.io.FileInputStream; 6 | import java.io.FileOutputStream; 7 | import java.io.IOException; 8 | import java.util.HashMap; 9 | 10 | public class HuffamnDecoder { 11 | public String readBinaryData(DataInputStream dataInputStream) throws IOException { 12 | StringBuilder binaryData = new StringBuilder(); 13 | 14 | int paddingSize = dataInputStream.readInt(); 15 | int byteCount = dataInputStream.readInt(); 16 | 17 | for (int i = 0; i < byteCount; i++) { 18 | byte b = dataInputStream.readByte(); 19 | String binaryString = Integer.toBinaryString(b & 0xFF); 20 | 21 | while (binaryString.length() < 8) 22 | binaryString = "0" + binaryString; 23 | 24 | binaryData.append(binaryString); 25 | } 26 | 27 | binaryData.setLength(binaryData.length() - paddingSize); 28 | 29 | return binaryData.toString(); 30 | } 31 | 32 | public HashMap readTable(DataInputStream dataInputStream) throws IOException { 33 | HashMap charTable = new HashMap<>(); 34 | 35 | int tableSize = dataInputStream.readInt(); 36 | 37 | for (int i = 0; i < tableSize; i++) { 38 | String data = dataInputStream.readUTF(); 39 | dataInputStream.readInt(); 40 | String code = dataInputStream.readUTF(); 41 | charTable.put(code, data.charAt(0)); 42 | } 43 | 44 | return charTable; 45 | } 46 | 47 | public void decodeAndWriteBinaryData(String binaryData, HashMap charTable, String outFilepath) { 48 | try (DataOutputStream dataOutputStream = new DataOutputStream(new FileOutputStream(outFilepath))) { 49 | StringBuilder sb = new StringBuilder(); 50 | 51 | for (int i = 0; i < binaryData.length(); i++) { 52 | sb.append(binaryData.charAt(i)); 53 | 54 | if (charTable.containsKey(sb.toString())) { 55 | dataOutputStream.writeChar(charTable.get(sb.toString())); 56 | sb.setLength(0); 57 | } 58 | } 59 | } catch (IOException e) { 60 | e.printStackTrace(); 61 | } 62 | } 63 | 64 | public void decode(String filepath, String outFilepath) throws IOException { 65 | DataInputStream dataInputStream = new DataInputStream(new FileInputStream(filepath)); 66 | HashMap charTable = readTable(dataInputStream); 67 | String binaryData = readBinaryData(dataInputStream); 68 | decodeAndWriteBinaryData(binaryData, charTable, outFilepath); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /java/src/main/java/com/huffmancoding/HuffmanEncoder.java: -------------------------------------------------------------------------------- 1 | package com.huffmancoding; 2 | 3 | import java.nio.file.Files; 4 | import java.nio.file.Paths; 5 | import java.io.IOException; 6 | import java.io.DataOutputStream; 7 | import java.io.FileOutputStream; 8 | import java.util.PriorityQueue; 9 | import java.util.HashMap; 10 | import java.util.Comparator; 11 | 12 | public class HuffmanEncoder { 13 | 14 | private String filepath; 15 | private String text; 16 | private HashMap charTable; 17 | 18 | HuffmanEncoder(String infilepath, String outfilepath) throws IOException { 19 | this.text = Files.readString(Paths.get(infilepath)); 20 | this.filepath = outfilepath; 21 | charTable = new HashMap<>(); 22 | } 23 | 24 | private HuffmanNode buildTree() { 25 | HashMap mp = new HashMap<>(); 26 | 27 | for (int i = 0; i < text.length(); i++) 28 | mp.put(text.charAt(i), mp.getOrDefault(text.charAt(i), 0) + 1); 29 | 30 | PriorityQueue pq = new PriorityQueue<>(Comparator.comparingInt(c -> c.freq)); 31 | 32 | // push the chars 33 | mp.forEach((data, freq) -> { 34 | pq.add(new HuffmanNode(freq, data, null, null)); 35 | }); 36 | 37 | // do the magic 38 | while (pq.size() > 1) { 39 | HuffmanNode left = pq.poll(); 40 | HuffmanNode right = pq.poll(); 41 | pq.add(new HuffmanNode(left.freq + right.freq, '\0', left, right)); 42 | } 43 | 44 | return pq.peek(); 45 | } 46 | 47 | private void buildCharTable(HuffmanNode root, String code) { 48 | if (root != null) { 49 | if (root.data != '\0') 50 | charTable.put(root.data, code); 51 | 52 | buildCharTable(root.left, code + "0"); 53 | buildCharTable(root.right, code + "1"); 54 | } 55 | } 56 | 57 | private void writeCharTable() throws IOException { 58 | int tableSize = charTable.size(); 59 | 60 | try (DataOutputStream dataOutputStream = new DataOutputStream(new FileOutputStream(filepath))) { 61 | 62 | dataOutputStream.writeInt(tableSize); 63 | for (char data : charTable.keySet()) { 64 | String code = charTable.get(data); 65 | dataOutputStream.writeUTF(Character.toString(data)); 66 | dataOutputStream.writeInt(code.length()); 67 | dataOutputStream.writeUTF(code); 68 | } 69 | 70 | } catch (IOException e) { 71 | e.printStackTrace(); 72 | } 73 | } 74 | 75 | private void writeBinaryData(String binstr) throws IOException { 76 | // get paddding size 77 | int paddingSize = (8 - binstr.length() % 8) % 8; 78 | 79 | // add padding 80 | StringBuilder sb = new StringBuilder(binstr); 81 | for (int i = 0; i < paddingSize; i++) 82 | sb.append('0'); 83 | binstr = sb.toString(); 84 | 85 | try (DataOutputStream dataOutputStream = new DataOutputStream(new FileOutputStream(filepath, true))) { 86 | dataOutputStream.writeInt(paddingSize); 87 | dataOutputStream.writeInt(binstr.length() / 8); 88 | // create byte array 89 | for (int i = 0; i < binstr.length(); i += 8) { 90 | String byteString = binstr.substring(i, i + 8); 91 | byte b = (byte) Integer.parseInt(byteString, 2); 92 | dataOutputStream.writeByte(b); 93 | } 94 | 95 | } 96 | } 97 | 98 | private String encodeText() { 99 | StringBuilder sb = new StringBuilder(); 100 | 101 | for (int i = 0; i < text.length(); i++) 102 | sb.append(charTable.get(text.charAt(i))); 103 | 104 | return sb.toString(); 105 | } 106 | 107 | public void encode() throws IOException { 108 | HuffmanNode tree = buildTree(); 109 | buildCharTable(tree, ""); 110 | writeCharTable(); 111 | writeBinaryData(encodeText()); 112 | } 113 | } -------------------------------------------------------------------------------- /java/src/main/java/com/huffmancoding/HuffmanNode.java: -------------------------------------------------------------------------------- 1 | package com.huffmancoding; 2 | 3 | public class HuffmanNode { 4 | int freq; 5 | char data = '\0'; 6 | HuffmanNode left = null; 7 | HuffmanNode right = null; 8 | 9 | public HuffmanNode(int freq, char data, HuffmanNode left, HuffmanNode right) { 10 | this.freq = freq; 11 | this.data = data; 12 | this.left = left; 13 | this.right = right; 14 | } 15 | } -------------------------------------------------------------------------------- /java/src/main/java/com/huffmancoding/Program.java: -------------------------------------------------------------------------------- 1 | package com.huffmancoding; 2 | 3 | import java.io.IOException; 4 | 5 | class Program { 6 | public static void main(String[] args) { 7 | 8 | if (args.length < 2 || (args.length > 3 && args[1].equals("-d")) || (args.length > 2 && args[1].equals("-d"))) { 9 | System.err.println("Invalid CLI arguments"); 10 | return; 11 | } 12 | 13 | try { 14 | 15 | if (args[0].equals("-d")) { 16 | HuffamnDecoder decoder = new HuffamnDecoder(); 17 | decoder.decode(args[1], args[2]); 18 | } else { 19 | HuffmanEncoder encoder = new HuffmanEncoder(args[0], args[1]); 20 | encoder.encode(); 21 | } 22 | } catch (IOException e) { 23 | e.printStackTrace(); 24 | } 25 | } 26 | } -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .vscode -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # Huffman-coding - Python3 2 | 3 | This repository contains a straightforward implementation of Huffman coding in Python3. 4 | 5 | ## Usage 6 | 7 | ### File compression 8 | 9 | ``` 10 | $ python3 main.py 11 | ``` 12 | 13 | Example: 14 | 15 | ``` 16 | $ python3 main.py file.txt compressed_data.bin 17 | ``` 18 | 19 | ### File decompression 20 | 21 | ``` 22 | $ python3 main.py -d 23 | ``` 24 | 25 | Example: 26 | 27 | ``` 28 | $ python3 main.py -d compress_data.bin results.txt 29 | ``` 30 | -------------------------------------------------------------------------------- /python/huffman_coding.py: -------------------------------------------------------------------------------- 1 | import struct 2 | from heapq import heapify, heappop, heappush 3 | from collections import Counter 4 | 5 | 6 | class Node: 7 | def __init__(self, chr: str, freq: int, left: 'Node' = None, right: 'Node' = None): 8 | self.symbol = chr 9 | self.freq = freq 10 | self.left = left 11 | self.right = right 12 | 13 | def __lt__(self, other: 'Node'): 14 | return self.freq < other.freq 15 | 16 | 17 | class HuffmanCoding: 18 | 19 | @staticmethod 20 | def generate_tree(text: str) -> Node: 21 | if not text: 22 | return 23 | 24 | char_count = Counter(text) 25 | nodes = [Node(k, v) for k, v in char_count.items()] 26 | heapify(nodes) 27 | 28 | while len(nodes) > 1: 29 | left, right = heappop(nodes), heappop(nodes) 30 | newfreq = left.freq + right.freq 31 | heappush(nodes, Node(None, newfreq, left, right)) 32 | 33 | return heappop(nodes) 34 | 35 | @staticmethod 36 | def build_char_table(root: Node, table: dict[str, str] = {}, code: str = ""): 37 | if not root: 38 | return 39 | 40 | if root.symbol and root.symbol not in table: 41 | table[root.symbol] = code 42 | 43 | if root.left: 44 | HuffmanCoding.build_char_table(root.left, table, code + '0') 45 | if root.right: 46 | HuffmanCoding.build_char_table(root.right, table, code + '1') 47 | 48 | @staticmethod 49 | def encode_data(text: str, char_table: dict[str, str]) -> str: 50 | return ''.join([char_table[c] for c in text]) 51 | 52 | @staticmethod 53 | def write_char_table(char_table: dict[str, str], file): 54 | file.write(struct.pack('i', len(char_table))) 55 | for char in char_table: 56 | code = char_table[char] 57 | # write character 58 | file.write(char.encode()) 59 | # write code size 60 | file.write(struct.pack('i', len(code))) 61 | # write code 62 | file.write(code.encode()) 63 | 64 | @staticmethod 65 | def write_encoded_data(data: str, file): 66 | binary_string = HuffmanCoding.encode_data(data) 67 | file.write(struct.pack('i', len(binary_string))) 68 | file.write(binary_string) 69 | 70 | @staticmethod 71 | def encode_file(infilepath: str, filepath: str): 72 | text = open(infilepath, 'r').read() 73 | 74 | char_table = {} 75 | root = HuffmanCoding.generate_tree(text) 76 | HuffmanCoding.build_char_table(root, char_table) 77 | 78 | with open(filepath, 'wb') as file: 79 | HuffmanCoding.write_char_table(char_table, file) 80 | 81 | binary_string = HuffmanCoding.encode_data(text, char_table) 82 | padding_size = (8 - (len(binary_string) % 8)) % 8 83 | binary_string += padding_size * '0' 84 | # write data size 85 | file.write(struct.pack('i', len(binary_string))) 86 | # write padding size 87 | file.write(struct.pack('i', padding_size)) 88 | file.write(int(binary_string, 2).to_bytes( 89 | (len(binary_string) + 7) // 8, byteorder='big')) 90 | 91 | @staticmethod 92 | def read_char_table(file) -> dict[str, str]: 93 | char_table: dict[str, str] = {} 94 | table_size = struct.unpack(' str: 110 | data_size = struct.unpack(' 4 and args[1] == '-d') or (len(args) > 3 and args[1] != '-d'): 10 | print("Invalid arguments") 11 | return 12 | 13 | if args[1] == '-d': 14 | HuffmanCoding.decode_file(args[2], args[3]) 15 | else: 16 | HuffmanCoding.encode_file(args[1], args[2]) 17 | file_size, compressed_file_size = getsize(args[1]), getsize(args[2]) 18 | print("Original file size:", file_size) 19 | print("compressed file size:", compressed_file_size) 20 | print("Compression ratio:", (1 - compressed_file_size / file_size) * 100) 21 | 22 | 23 | if __name__ == "__main__": 24 | main() 25 | --------------------------------------------------------------------------------