├── .gitignore ├── README.md ├── huffman_tree.sdf ├── huffman_tree.sln └── huffman_tree ├── BinaryFileHelper.cpp ├── BinaryFileHelper.h ├── HuffmanTree.cpp ├── HuffmanTree.h ├── Node.cpp ├── Node.h ├── ReadMe.txt ├── TransformUtils.cpp ├── TransformUtils.h ├── huffman_tree.cpp ├── huffman_tree.vcxproj ├── huffman_tree.vcxproj.filters ├── huffman_tree.vcxproj.user ├── lorem.txt ├── stdafx.cpp ├── stdafx.h └── targetver.h /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE 2 | /.idea 3 | /.vs -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Huffman algorithm implementation (C++) 2 | 3 | Article: https://kamilmysliwiec.com/implementation-of-huffman-coding-algorithm-with-binary-trees 4 | -------------------------------------------------------------------------------- /huffman_tree.sdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamilmysliwiec/huffman-algorithm/813a1e95e5f815234d2ddd6511ddfbbc2414304a/huffman_tree.sdf -------------------------------------------------------------------------------- /huffman_tree.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.24720.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "huffman_tree", "huffman_tree\huffman_tree.vcxproj", "{A55F6107-FD3D-4E25-A050-0CB7839356A2}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {A55F6107-FD3D-4E25-A050-0CB7839356A2}.Debug|x64.ActiveCfg = Debug|x64 17 | {A55F6107-FD3D-4E25-A050-0CB7839356A2}.Debug|x64.Build.0 = Debug|x64 18 | {A55F6107-FD3D-4E25-A050-0CB7839356A2}.Debug|x86.ActiveCfg = Debug|Win32 19 | {A55F6107-FD3D-4E25-A050-0CB7839356A2}.Debug|x86.Build.0 = Debug|Win32 20 | {A55F6107-FD3D-4E25-A050-0CB7839356A2}.Release|x64.ActiveCfg = Release|x64 21 | {A55F6107-FD3D-4E25-A050-0CB7839356A2}.Release|x64.Build.0 = Release|x64 22 | {A55F6107-FD3D-4E25-A050-0CB7839356A2}.Release|x86.ActiveCfg = Release|Win32 23 | {A55F6107-FD3D-4E25-A050-0CB7839356A2}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /huffman_tree/BinaryFileHelper.cpp: -------------------------------------------------------------------------------- 1 | #include "BinaryFileHelper.h" 2 | 3 | BinaryFileHelper::BinaryFileHelper() {} 4 | 5 | void BinaryFileHelper::write(const string& filename, const vector& data) 6 | { 7 | ofstream output(filename, ios::binary); 8 | if (!output.is_open()) 9 | throw exception("Exception: Unable to open file."); 10 | 11 | copy( 12 | data.begin(), 13 | data.end(), 14 | ostream_iterator(output)); 15 | 16 | output.close(); 17 | } 18 | 19 | string BinaryFileHelper::read(const string& filename, const int max_per_cycle) 20 | { 21 | ifstream input(filename, ios::binary); 22 | if (!input.is_open()) 23 | throw exception("Exception: Unable to open file."); 24 | 25 | vector container; 26 | container.reserve(max_per_cycle); 27 | 28 | stringstream byte_stream; 29 | const auto transform_to_string = [&byte_stream](unsigned char byte) 30 | { 31 | byte_stream << TransformUtils::transform_byte_to_string(byte); 32 | }; 33 | 34 | char * const buffer = new char[max_per_cycle]; 35 | while (!input.eof()) 36 | { 37 | input.read(buffer, max_per_cycle); 38 | 39 | const auto count = input.gcount(); 40 | container.insert(container.begin(), buffer, buffer + count); 41 | for_each(container.begin(), container.begin() + count, transform_to_string); 42 | 43 | memset(buffer, 0, max_per_cycle); 44 | } 45 | delete[] buffer; 46 | 47 | input.close(); 48 | return byte_stream.str(); 49 | } 50 | 51 | streampos BinaryFileHelper::get_file_size(const string& filename) 52 | { 53 | ifstream file(filename, ios::binary | ios::ate); 54 | return file.tellg(); 55 | } -------------------------------------------------------------------------------- /huffman_tree/BinaryFileHelper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "TransformUtils.h" 10 | 11 | using namespace std; 12 | 13 | class BinaryFileHelper 14 | { 15 | public: 16 | static void write(const string&, const vector&); 17 | static string read(const string&, const int = 1000); 18 | static streampos get_file_size(const string& filename); 19 | 20 | private: 21 | BinaryFileHelper(); 22 | }; 23 | 24 | -------------------------------------------------------------------------------- /huffman_tree/HuffmanTree.cpp: -------------------------------------------------------------------------------- 1 | #include "HuffmanTree.h" 2 | 3 | HuffmanTree::HuffmanTree(const string& data) 4 | { 5 | init(data); 6 | } 7 | 8 | void HuffmanTree::init(const string& data) 9 | { 10 | auto symbols_dictionary = create_dictionary(data); 11 | auto symbols_vec = move_to_vector(symbols_dictionary); 12 | 13 | create_binary_tree(symbols_vec); 14 | create_hash_table(symbols_dictionary); 15 | } 16 | 17 | map HuffmanTree::create_dictionary(const string& data) const 18 | { 19 | map symbols_dict; 20 | for (const auto& c: data) 21 | { 22 | auto pos = symbols_dict.find(c); 23 | if (pos != symbols_dict.end()) 24 | { 25 | ++(*pos->second); 26 | continue; 27 | } 28 | symbols_dict.insert(make_pair(c, static_pointer_cast(make_shared(c)))); 29 | } 30 | return symbols_dict; 31 | } 32 | 33 | vector HuffmanTree::move_to_vector(map& dictionary) const 34 | { 35 | vector symbols; 36 | symbols.reserve(dictionary.size()); 37 | for_each(dictionary.begin(), dictionary.end(), [&symbols](pair& symbol) { 38 | symbols.push_back(move(symbol.second)); 39 | }); 40 | return symbols; 41 | } 42 | 43 | void HuffmanTree::create_binary_tree(vector& vec) 44 | { 45 | auto symbol_comparator = [](const SymbolPtr a, const SymbolPtr b) { 46 | return a->get_count() < b->get_count(); 47 | }; 48 | while (vec.size() > 1) 49 | { 50 | std::sort(vec.begin(), vec.end(), symbol_comparator); 51 | 52 | auto left = vec.front(), right = (*++vec.begin()); 53 | auto node = new Node( 54 | left->get_count() + right->get_count(), 55 | left, 56 | right); 57 | 58 | vec.erase(vec.begin(), vec.begin() + 2); 59 | vec.push_back(SymbolPtr(node)); 60 | } 61 | binary_tree = vec.front(); 62 | } 63 | 64 | void HuffmanTree::create_hash_table(map dictionary) 65 | { 66 | for (const auto& sym: dictionary) 67 | { 68 | auto exists = false; 69 | auto path = find_path(*binary_tree, sym.first, exists); 70 | 71 | if (!exists) throw exception("Exception: Invalid dictionary for given data."); 72 | 73 | hash_table.insert(pair(sym.first, path)); 74 | } 75 | } 76 | 77 | string HuffmanTree::find_path(Symbol& node, const char& c, bool& flag, const string& path) 78 | { 79 | if (flag) return path; 80 | 81 | auto result(path); 82 | if (node.has_left()) result = find_path(*node.get_left(), c, flag, path + "0"); 83 | if (node.has_right() && !flag) result = find_path(*node.get_right(), c, flag, path + "1"); 84 | if (!node.has_left() && !node.has_right() && node.get_value() == c) flag = true; 85 | 86 | return result; 87 | } 88 | 89 | string HuffmanTree::encode(const string& data) const 90 | { 91 | const short byte_size = 8; 92 | stringstream encoded_stream; 93 | for (const auto& c : data) 94 | encoded_stream << hash_table.at(c); 95 | 96 | auto result_str = encoded_stream.str(); 97 | for (auto i = 0; i < result_str.size() % byte_size; i++) 98 | result_str += "0"; 99 | 100 | return result_str; 101 | } 102 | 103 | string HuffmanTree::decode(const string& binary_data_str) const 104 | { 105 | stringstream decoded_data; 106 | const char * const binary_str = binary_data_str.c_str(); 107 | unsigned long i = 0; 108 | while (strstr(binary_str + i, "1")) 109 | { 110 | decoded_data << find_symbol(*binary_tree, binary_str + i, i); 111 | } 112 | return decoded_data.str(); 113 | } 114 | 115 | char HuffmanTree::find_symbol(Symbol& node, const char * const path, unsigned long& i) const 116 | { 117 | if (!node.has_left() && !node.has_right()) 118 | return node.get_value(); 119 | 120 | if (*path == '0' && node.has_left()) return find_symbol(*node.get_left(), path + 1, ++i); 121 | if (*path == '1' && node.has_right()) return find_symbol(*node.get_right(), path + 1, ++i); 122 | 123 | return 0; 124 | } 125 | 126 | 127 | -------------------------------------------------------------------------------- /huffman_tree/HuffmanTree.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "Node.h" 9 | 10 | using namespace std; 11 | 12 | typedef Node Symbol; 13 | typedef shared_ptr SymbolPtr; 14 | 15 | class HuffmanTree 16 | { 17 | public: 18 | HuffmanTree(const string&); 19 | 20 | string encode(const string&) const; 21 | string decode(const string&) const; 22 | 23 | private: 24 | void init(const string&); 25 | void create_binary_tree(vector&); 26 | void create_hash_table(map); 27 | 28 | map create_dictionary(const string&) const; 29 | vector move_to_vector(map&) const; 30 | string find_path(Symbol&, const char&, bool&, const string& = ""); 31 | char find_symbol(Symbol&, const char * const, unsigned long&) const; 32 | 33 | SymbolPtr binary_tree; 34 | map hash_table; 35 | }; 36 | 37 | -------------------------------------------------------------------------------- /huffman_tree/Node.cpp: -------------------------------------------------------------------------------- 1 | #include "Node.h" 2 | 3 | template 4 | Node::Node(const int count) 5 | : left(nullptr), right(nullptr), count(count) 6 | {} 7 | 8 | template 9 | Node::Node(const T value) 10 | : left(nullptr), right(nullptr), count(1), value(value) 11 | {} 12 | 13 | template 14 | Node::Node(const T value, const int count) 15 | : left(nullptr), right(nullptr), count(count), value(value) 16 | {} 17 | 18 | template 19 | Node::Node(const int count, const shared_ptr& left, const shared_ptr& right) 20 | : left(left), right(right), count(count) 21 | {} 22 | 23 | template 24 | int Node::get_count() const 25 | { 26 | return count; 27 | } 28 | 29 | template 30 | T Node::get_value() const 31 | { 32 | return value; 33 | } 34 | 35 | template 36 | bool Node::has_left() const 37 | { 38 | return left != nullptr; 39 | } 40 | 41 | template 42 | bool Node::has_right() const 43 | { 44 | return right != nullptr; 45 | } 46 | 47 | template 48 | const shared_ptr>& Node::get_left() const 49 | { 50 | return left; 51 | } 52 | 53 | template 54 | const shared_ptr>& Node::get_right() const 55 | { 56 | return right; 57 | } 58 | 59 | template 60 | Node& Node::operator ++() 61 | { 62 | ++count; 63 | return *this; 64 | } 65 | 66 | template class Node; -------------------------------------------------------------------------------- /huffman_tree/Node.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | using std::shared_ptr; 5 | 6 | template 7 | class Node 8 | { 9 | public: 10 | Node(const T); 11 | Node(const T, const int); 12 | Node(const int); 13 | Node(const int, const shared_ptr&, const shared_ptr&); 14 | 15 | int get_count() const; 16 | T get_value() const; 17 | 18 | bool has_left() const; 19 | bool has_right() const; 20 | const shared_ptr>& get_left() const; 21 | const shared_ptr>& get_right() const; 22 | Node& operator ++ (); 23 | 24 | private: 25 | shared_ptr> left; 26 | shared_ptr> right; 27 | 28 | int count; 29 | T value; 30 | }; 31 | 32 | -------------------------------------------------------------------------------- /huffman_tree/ReadMe.txt: -------------------------------------------------------------------------------- 1 | ======================================================================== 2 | CONSOLE APPLICATION : huffman_tree Project Overview 3 | ======================================================================== 4 | 5 | AppWizard has created this huffman_tree application for you. 6 | 7 | This file contains a summary of what you will find in each of the files that 8 | make up your huffman_tree application. 9 | 10 | 11 | huffman_tree.vcxproj 12 | This is the main project file for VC++ projects generated using an Application Wizard. 13 | It contains information about the version of Visual C++ that generated the file, and 14 | information about the platforms, configurations, and project features selected with the 15 | Application Wizard. 16 | 17 | huffman_tree.vcxproj.filters 18 | This is the filters file for VC++ projects generated using an Application Wizard. 19 | It contains information about the association between the files in your project 20 | and the filters. This association is used in the IDE to show grouping of files with 21 | similar extensions under a specific node (for e.g. ".cpp" files are associated with the 22 | "Source Files" filter). 23 | 24 | huffman_tree.cpp 25 | This is the main application source file. 26 | 27 | ///////////////////////////////////////////////////////////////////////////// 28 | Other standard files: 29 | 30 | StdAfx.h, StdAfx.cpp 31 | These files are used to build a precompiled header (PCH) file 32 | named huffman_tree.pch and a precompiled types file named StdAfx.obj. 33 | 34 | ///////////////////////////////////////////////////////////////////////////// 35 | Other notes: 36 | 37 | AppWizard uses "TODO:" comments to indicate parts of the source code you 38 | should add to or customize. 39 | 40 | ///////////////////////////////////////////////////////////////////////////// 41 | -------------------------------------------------------------------------------- /huffman_tree/TransformUtils.cpp: -------------------------------------------------------------------------------- 1 | #include "TransformUtils.h" 2 | #include 3 | #include 4 | 5 | TransformUtils::TransformUtils() {} 6 | 7 | vector TransformUtils::transform_string_to_bytes(const string& data) 8 | { 9 | const short byte_size = 8; 10 | const char * const buffer_ptr = data.c_str(); 11 | 12 | vector bytes_code; 13 | for (auto pos = 0; pos / 8 < data.size() / byte_size; pos += 8) 14 | { 15 | unsigned byte = 0x00; 16 | for (auto b = 0; b < byte_size; ++b) 17 | { 18 | byte <<= 1; 19 | if (*(buffer_ptr + b + pos) == '1') byte |= 0x01; 20 | } 21 | bytes_code.push_back(byte); 22 | } 23 | return bytes_code; 24 | } 25 | 26 | string TransformUtils::transform_byte_to_string(const unsigned char& byte) 27 | { 28 | bitset<8> byte_str(byte); 29 | return byte_str.to_string(); 30 | } -------------------------------------------------------------------------------- /huffman_tree/TransformUtils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | using std::string; 6 | using std::vector; 7 | using std::bitset; 8 | 9 | class TransformUtils 10 | { 11 | public: 12 | static vector transform_string_to_bytes(const string&); 13 | static string transform_byte_to_string(const unsigned char&); 14 | 15 | private: 16 | TransformUtils(); 17 | }; 18 | 19 | -------------------------------------------------------------------------------- /huffman_tree/huffman_tree.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "HuffmanTree.h" 3 | #include "TransformUtils.h" 4 | #include "BinaryFileHelper.h" 5 | 6 | using std::string; 7 | using std::cin; 8 | using std::cout; 9 | 10 | int main() 11 | { 12 | try 13 | { 14 | const auto input = "lorem.txt"; 15 | const auto output = "lorem.bin"; 16 | 17 | ifstream file(input); 18 | if (!file.is_open()) throw exception("Exception: Unable to open file."); 19 | 20 | stringstream stream; 21 | stream << file.rdbuf(); 22 | file.close(); 23 | 24 | const auto file_content = stream.str(); 25 | const auto file_size = BinaryFileHelper::get_file_size(input); 26 | cout << "Before compression: " << file_size << " bytes" << endl; 27 | 28 | HuffmanTree huffman_tree(file_content); 29 | const auto encoded = huffman_tree.encode(file_content); 30 | const auto bytes = TransformUtils::transform_string_to_bytes(encoded); 31 | 32 | BinaryFileHelper::write(output, bytes); 33 | const auto after_size = BinaryFileHelper::get_file_size(output); 34 | const auto delta = file_size - after_size; 35 | 36 | cout << "After compression: " << after_size << " bytes" << endl; 37 | cout << "Delta: " << delta << " (" << static_cast(delta) / file_size << ")" << endl; 38 | 39 | const auto output_data = BinaryFileHelper::read(output); 40 | //cout << huffman_tree.decode(output_data); 41 | } 42 | catch(exception& e) 43 | { 44 | cout << e.what() << endl; 45 | } 46 | cin.get(); 47 | return 0; 48 | } -------------------------------------------------------------------------------- /huffman_tree/huffman_tree.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {A55F6107-FD3D-4E25-A050-0CB7839356A2} 23 | Win32Proj 24 | huffman_tree 25 | 8.1 26 | 27 | 28 | 29 | Application 30 | true 31 | v140 32 | Unicode 33 | 34 | 35 | Application 36 | false 37 | v140 38 | true 39 | Unicode 40 | 41 | 42 | Application 43 | true 44 | v140 45 | Unicode 46 | 47 | 48 | Application 49 | false 50 | v140 51 | true 52 | Unicode 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | true 74 | 75 | 76 | true 77 | 78 | 79 | false 80 | 81 | 82 | false 83 | 84 | 85 | 86 | 87 | 88 | Level3 89 | Disabled 90 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 91 | 92 | 93 | Console 94 | true 95 | 96 | 97 | 98 | 99 | 100 | 101 | Level3 102 | Disabled 103 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions) 104 | 105 | 106 | Console 107 | true 108 | 109 | 110 | 111 | 112 | Level3 113 | 114 | 115 | MaxSpeed 116 | true 117 | true 118 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 119 | 120 | 121 | Console 122 | true 123 | true 124 | true 125 | 126 | 127 | 128 | 129 | Level3 130 | 131 | 132 | MaxSpeed 133 | true 134 | true 135 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 136 | 137 | 138 | Console 139 | true 140 | true 141 | true 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /huffman_tree/huffman_tree.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | Header Files 23 | 24 | 25 | Header Files 26 | 27 | 28 | Header Files 29 | 30 | 31 | Header Files 32 | 33 | 34 | Header Files 35 | 36 | 37 | Header Files 38 | 39 | 40 | 41 | 42 | Source Files 43 | 44 | 45 | Source Files 46 | 47 | 48 | Source Files 49 | 50 | 51 | Source Files 52 | 53 | 54 | Source Files 55 | 56 | 57 | Source Files 58 | 59 | 60 | -------------------------------------------------------------------------------- /huffman_tree/huffman_tree.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /huffman_tree/stdafx.cpp: -------------------------------------------------------------------------------- 1 | // stdafx.cpp : source file that includes just the standard includes 2 | // huffman_tree.pch will be the pre-compiled header 3 | // stdafx.obj will contain the pre-compiled type information 4 | 5 | #include "stdafx.h" 6 | 7 | // TODO: reference any additional headers you need in STDAFX.H 8 | // and not in this file 9 | -------------------------------------------------------------------------------- /huffman_tree/stdafx.h: -------------------------------------------------------------------------------- 1 | // stdafx.h : include file for standard system include files, 2 | // or project specific include files that are used frequently, but 3 | // are changed infrequently 4 | // 5 | 6 | #pragma once 7 | 8 | #include "targetver.h" 9 | 10 | #include 11 | #include 12 | 13 | 14 | 15 | // TODO: reference additional headers your program requires here 16 | -------------------------------------------------------------------------------- /huffman_tree/targetver.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Including SDKDDKVer.h defines the highest available Windows platform. 4 | 5 | // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and 6 | // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. 7 | 8 | #include 9 | --------------------------------------------------------------------------------