├── .gitignore ├── LICENSE ├── README.md ├── compression.pdf ├── cpr ├── cpr.sln ├── huffman │ ├── ClassDiagramForHuffman.cd │ ├── bit_string.hpp │ ├── code_word_dictionary.hpp │ ├── encoder.hpp │ ├── frequency_map.hpp │ ├── huffman.vcxproj │ ├── huffman.vcxproj.filters │ ├── huffman_tree.hpp │ ├── main.cpp │ └── node.hpp ├── test_cases │ ├── shake.cpr │ ├── shake.txt │ ├── test_for_encoder.cpr │ └── test_for_encoder.txt └── unit_test_for_huffman │ ├── quick_unit_tests.playlist │ ├── stdafx.cpp │ ├── stdafx.h │ ├── targetver.h │ ├── unit_test_for_bit_string.cpp │ ├── unit_test_for_code_word_dictionary.cpp │ ├── unit_test_for_encoder.cpp │ ├── unit_test_for_frequency_map.cpp │ ├── unit_test_for_huffman.vcxproj │ ├── unit_test_for_huffman.vcxproj.filters │ ├── unit_test_for_huffman_tree.cpp │ └── unit_test_for_node.cpp └── cpr_in_python ├── frequency_map.py ├── frequency_map_test.py ├── node.py └── node_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.sln.docstates 8 | 9 | # Build results 10 | [Dd]ebug/ 11 | [Dd]ebugPublic/ 12 | [Rr]elease/ 13 | [Rr]eleases/ 14 | x64/ 15 | x86/ 16 | build/ 17 | bld/ 18 | [Bb]in/ 19 | [Oo]bj/ 20 | 21 | # Roslyn cache directories 22 | *.ide/ 23 | 24 | # MSTest test Results 25 | [Tt]est[Rr]esult*/ 26 | [Bb]uild[Ll]og.* 27 | 28 | #NUNIT 29 | *.VisualState.xml 30 | TestResult.xml 31 | 32 | # Build Results of an ATL Project 33 | [Dd]ebugPS/ 34 | [Rr]eleasePS/ 35 | dlldata.c 36 | 37 | *_i.c 38 | *_p.c 39 | *_i.h 40 | *.ilk 41 | *.meta 42 | *.obj 43 | *.pch 44 | *.pdb 45 | *.pgc 46 | *.pgd 47 | *.rsp 48 | *.sbr 49 | *.tlb 50 | *.tli 51 | *.tlh 52 | *.tmp 53 | *.tmp_proj 54 | *.log 55 | *.vspscc 56 | *.vssscc 57 | .builds 58 | *.pidb 59 | *.svclog 60 | *.scc 61 | 62 | # Chutzpah Test files 63 | _Chutzpah* 64 | 65 | # Visual C++ cache files 66 | ipch/ 67 | *.aps 68 | *.ncb 69 | *.opensdf 70 | *.sdf 71 | *.cachefile 72 | 73 | # Visual Studio profiler 74 | *.psess 75 | *.vsp 76 | *.vspx 77 | 78 | # TFS 2012 Local Workspace 79 | $tf/ 80 | 81 | # Guidance Automation Toolkit 82 | *.gpState 83 | 84 | # ReSharper is a .NET coding add-in 85 | _ReSharper*/ 86 | *.[Rr]e[Ss]harper 87 | *.DotSettings.user 88 | 89 | # JustCode is a .NET coding addin-in 90 | .JustCode 91 | 92 | # TeamCity is a build add-in 93 | _TeamCity* 94 | 95 | # DotCover is a Code Coverage Tool 96 | *.dotCover 97 | 98 | # NCrunch 99 | _NCrunch_* 100 | .*crunch*.local.xml 101 | 102 | # MightyMoose 103 | *.mm.* 104 | AutoTest.Net/ 105 | 106 | # Web workbench (sass) 107 | .sass-cache/ 108 | 109 | # Installshield output folder 110 | [Ee]xpress/ 111 | 112 | # DocProject is a documentation generator add-in 113 | DocProject/buildhelp/ 114 | DocProject/Help/*.HxT 115 | DocProject/Help/*.HxC 116 | DocProject/Help/*.hhc 117 | DocProject/Help/*.hhk 118 | DocProject/Help/*.hhp 119 | DocProject/Help/Html2 120 | DocProject/Help/html 121 | 122 | # Click-Once directory 123 | publish/ 124 | 125 | # Publish Web Output 126 | *.[Pp]ublish.xml 127 | *.azurePubxml 128 | # TODO: Comment the next line if you want to checkin your web deploy settings 129 | # but database connection strings (with potential passwords) will be unencrypted 130 | *.pubxml 131 | *.publishproj 132 | 133 | # NuGet Packages 134 | *.nupkg 135 | # The packages folder can be ignored because of Package Restore 136 | **/packages/* 137 | # except build/, which is used as an MSBuild target. 138 | !**/packages/build/ 139 | # If using the old MSBuild-Integrated Package Restore, uncomment this: 140 | #!**/packages/repositories.config 141 | 142 | # Windows Azure Build Output 143 | csx/ 144 | *.build.csdef 145 | 146 | # Windows Store app package directory 147 | AppPackages/ 148 | 149 | # Others 150 | sql/ 151 | *.Cache 152 | ClientBin/ 153 | [Ss]tyle[Cc]op.* 154 | ~$* 155 | *~ 156 | *.dbmdl 157 | *.dbproj.schemaview 158 | *.pfx 159 | *.publishsettings 160 | node_modules/ 161 | 162 | # RIA/Silverlight projects 163 | Generated_Code/ 164 | 165 | # Backup & report files from converting an old project file 166 | # to a newer Visual Studio version. Backup files are not needed, 167 | # because we have git ;-) 168 | _UpgradeReport_Files/ 169 | Backup*/ 170 | UpgradeLog*.XML 171 | UpgradeLog*.htm 172 | 173 | # SQL Server files 174 | *.mdf 175 | *.ldf 176 | 177 | # Business Intelligence projects 178 | *.rdl.data 179 | *.bim.layout 180 | *.bim_*.settings 181 | 182 | # Microsoft Fakes 183 | FakesAssemblies/ 184 | 185 | # 186 | *build* 187 | 188 | #for pycharm 189 | *.idea* 190 | *.xml 191 | *.name 192 | *.iml 193 | *.pyc 194 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Yue Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Compression 2 | Data Compression using Huffman. 3 | -------------------------------------------------------------------------------- /compression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mooophy/Compression/2d369956a83703f0283d36d438e76783e909957d/compression.pdf -------------------------------------------------------------------------------- /cpr/cpr.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.31101.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "huffman", "huffman\huffman.vcxproj", "{1D1CCDFE-663C-43E9-BAD9-8ED42E7E0C08}" 7 | EndProject 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "unit_test_for_huffman", "unit_test_for_huffman\unit_test_for_huffman.vcxproj", "{DE37A775-FA0E-4665-AD9B-D65125B6CAA6}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Win32 = Debug|Win32 13 | Release|Win32 = Release|Win32 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {1D1CCDFE-663C-43E9-BAD9-8ED42E7E0C08}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {1D1CCDFE-663C-43E9-BAD9-8ED42E7E0C08}.Debug|Win32.Build.0 = Debug|Win32 18 | {1D1CCDFE-663C-43E9-BAD9-8ED42E7E0C08}.Release|Win32.ActiveCfg = Release|Win32 19 | {1D1CCDFE-663C-43E9-BAD9-8ED42E7E0C08}.Release|Win32.Build.0 = Release|Win32 20 | {DE37A775-FA0E-4665-AD9B-D65125B6CAA6}.Debug|Win32.ActiveCfg = Debug|Win32 21 | {DE37A775-FA0E-4665-AD9B-D65125B6CAA6}.Debug|Win32.Build.0 = Debug|Win32 22 | {DE37A775-FA0E-4665-AD9B-D65125B6CAA6}.Release|Win32.ActiveCfg = Release|Win32 23 | {DE37A775-FA0E-4665-AD9B-D65125B6CAA6}.Release|Win32.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /cpr/huffman/ClassDiagramForHuffman.cd: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | AAAAAIAAAAAAAAAAAAAAAAAAIAAAACCBAAAAAAAAAAA= 7 | bit_string.hpp 8 | 9 | 10 | 11 | 12 | 13 | AAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAQAAAAAAAAA= 14 | code_word_dictionary.hpp 15 | 16 | 17 | 18 | 19 | 20 | gAgAAAAAAAwAAAAAAAAAAAAAAAAAAAABAQAQAgAAAgA= 21 | encoder.hpp 22 | 23 | 24 | 25 | 26 | 27 | AAAAAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 28 | frequency_map.hpp 29 | 30 | 31 | 32 | 33 | 34 | AAAAAAAAAAAAAAAAAAAgAAAAABAAAAAAIAAAAAAIAAA= 35 | huffman_tree.hpp 36 | 37 | 38 | 39 | 40 | 41 | AAAAAEAAAAAAAAABAAAAAAAAAAAAABAAAAAACAEAgAA= 42 | node.hpp 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /cpr/huffman/bit_string.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | #ifndef BIT_STRING_HPP 5 | #define BIT_STRING_HPP 6 | 7 | 8 | namespace cpr 9 | { 10 | namespace huffman 11 | { 12 | template 13 | class BitString 14 | { 15 | public: 16 | BitString() 17 | : data_{} 18 | { } 19 | 20 | std::string const& str() const 21 | { 22 | return data_; 23 | } 24 | 25 | void push_back_bits(Char bits) 26 | { 27 | if (0 == bit_length(bits)) 28 | { 29 | data_.push_back(bits); 30 | } 31 | else 32 | { 33 | for (int pos = bit_length(bits) - 1; pos >= 0; --pos) 34 | { 35 | char curr_bit = ((bits & (1 << pos)) >> pos); 36 | data_.push_back(curr_bit); 37 | } 38 | } 39 | } 40 | 41 | unsigned bit_length(Char ch) const 42 | { 43 | if (ch < 0) 44 | return sizeof(ch) * 8; 45 | if (ch == 0) 46 | return 1; 47 | 48 | unsigned count = 0; 49 | for (; ch > 0; ch >>= 1) ++count; 50 | return count; 51 | } 52 | 53 | // protocol : 54 | // FrequencyTable|CompressedPart|Remainder|RemainderSize 55 | std::string compress(Char delimiter) const 56 | { 57 | std::string compressed_data; 58 | auto curr = data_.cbegin(); 59 | 60 | //for compressed part 61 | while (data_.cend() - curr >= sizeof(Char) * 8) 62 | { 63 | Char ch = 0; 64 | auto peek = curr; 65 | for (; peek != curr + sizeof(Char) * 8; ++peek) 66 | ch = (ch << 1) + *peek; 67 | compressed_data.push_back(ch); 68 | 69 | curr = peek; 70 | } 71 | 72 | compressed_data.push_back(delimiter); 73 | 74 | //for remainder part and remainder size 75 | Char remainder = 0; 76 | for (auto peek = curr; peek != data_.cend(); ++peek) 77 | remainder = (remainder << 1) + *peek; 78 | compressed_data.push_back(remainder); 79 | compressed_data.push_back(delimiter); 80 | compressed_data.push_back(data_.cend() - curr);//remainder size 81 | 82 | return compressed_data; 83 | } 84 | 85 | private: 86 | std::string data_; 87 | }; 88 | } 89 | } 90 | 91 | 92 | #endif // !BIT_STRING_HPP 93 | -------------------------------------------------------------------------------- /cpr/huffman/code_word_dictionary.hpp: -------------------------------------------------------------------------------- 1 | #include "huffman_tree.hpp" 2 | 3 | 4 | #ifndef CODE_WORD_DICTIONARY 5 | #define CODE_WORD_DICTIONARY 6 | 7 | namespace cpr 8 | { 9 | namespace huffman 10 | { 11 | template 12 | class CodeWordDictionary : public std::map < Char, CodeWord > 13 | { 14 | using SharedNode = typename cpr::huffman::Node::SharedNode; 15 | public: 16 | explicit CodeWordDictionary(HuffmanTree const& huffman_tree) 17 | { 18 | fill_this_by_dfs(0, huffman_tree.root); 19 | } 20 | 21 | private: 22 | void fill_this_by_dfs(CodeWord path, SharedNode node) 23 | { 24 | if (!node) 25 | return; 26 | 27 | if (node->character_ != 0) 28 | { 29 | (*this)[node->character_] = path; 30 | } 31 | else 32 | { 33 | fill_this_by_dfs((path << 1) + 0, node->left_); 34 | fill_this_by_dfs((path << 1) + 1, node->right_); 35 | } 36 | } 37 | }; 38 | } 39 | } 40 | 41 | #endif // !CODE_WORD_DICTIONARY 42 | -------------------------------------------------------------------------------- /cpr/huffman/encoder.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "frequency_map.hpp" 7 | #include "huffman_tree.hpp" 8 | #include "code_word_dictionary.hpp" 9 | #include "bit_string.hpp" 10 | 11 | 12 | #ifndef ENCODER_HPP 13 | #define ENCODER_HPP 14 | 15 | 16 | namespace cpr 17 | { 18 | namespace huffman 19 | { 20 | // protocol : 21 | // FrequencyTable|CompressedPart|Remainder 22 | // So the last bits that less than 8 should be append to Remainder part without compressing. 23 | template 24 | class Encoder 25 | { 26 | public: 27 | explicit Encoder(std::string path) : 28 | data(read_file(path)), 29 | frequency_map(data), 30 | huffman_tree(frequency_map), 31 | code_dictionary(huffman_tree), 32 | bit_string(encode_data_and_push_into_bit_string()) 33 | { } 34 | 35 | void write(std::string out_file, Char delimiter) const 36 | { 37 | std::ofstream ofs(out_file, std::ios::binary); 38 | ofs << frequency_map.str() << delimiter << bit_string.compress(delimiter); 39 | } 40 | 41 | // 42 | // data members, read only 43 | // 44 | const std::vector data; 45 | const FrequencyMap frequency_map; 46 | const HuffmanTree huffman_tree; 47 | const CodeWordDictionary code_dictionary; 48 | const BitString bit_string; 49 | 50 | private: 51 | std::vector read_file(std::string path)const 52 | { 53 | std::ifstream file(path, std::ios::binary); 54 | if (file.bad()) 55 | throw std::logic_error("bad file"); 56 | auto begin = std::istreambuf_iterator(file); 57 | auto end = std::istreambuf_iterator(); 58 | return std::vector(begin, end); 59 | } 60 | 61 | BitString encode_data_and_push_into_bit_string()const 62 | { 63 | BitString encoded; 64 | for (auto ch : data) 65 | encoded.push_back_bits(code_dictionary.at(ch)); 66 | return encoded; 67 | } 68 | }; 69 | } 70 | } 71 | 72 | 73 | #endif // !ENCODER_HPP 74 | -------------------------------------------------------------------------------- /cpr/huffman/frequency_map.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifndef FREQUENCY_MAP_HPP 5 | #define FREQUENCY_MAP_HPP 6 | 7 | 8 | namespace cpr 9 | { 10 | namespace huffman 11 | { 12 | template 13 | class FrequencyMap : public std::map < Char, Freq > 14 | { 15 | public: 16 | template 17 | explicit FrequencyMap(Container const& container) 18 | { 19 | for (auto ch : container) 20 | if (this->find(ch) != this->end()) 21 | ++(*this)[ch]; 22 | else 23 | (*this)[ch] = 1; 24 | } 25 | 26 | //format : symbol:frequency, 27 | //say : 28 | // a:1304,b:89,c:990, 29 | std::string str()const 30 | { 31 | std::string str; 32 | for (auto pair : *this) 33 | { 34 | str.push_back(pair.first); 35 | str.push_back(':'); 36 | str += frequency_to_string(pair.second); 37 | str.push_back(','); 38 | } 39 | return str; 40 | } 41 | 42 | private: 43 | std::string frequency_to_string(Freq freq) const 44 | { 45 | std::deque < char > dq; 46 | for (/* */; freq > 0; freq >>= 8) 47 | dq.push_front(freq & 0xff); 48 | return std::string(dq.begin(), dq.end()); 49 | } 50 | }; 51 | } 52 | } 53 | 54 | 55 | #endif // !FREQUENCY_MAP_HPP 56 | -------------------------------------------------------------------------------- /cpr/huffman/huffman.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {1D1CCDFE-663C-43E9-BAD9-8ED42E7E0C08} 15 | huffman 16 | 17 | 18 | 19 | Application 20 | true 21 | v120 22 | MultiByte 23 | 24 | 25 | Application 26 | false 27 | v120 28 | true 29 | MultiByte 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | Level3 45 | Disabled 46 | true 47 | true 48 | 49 | 50 | true 51 | 52 | 53 | true 54 | 55 | 56 | 57 | 58 | Level3 59 | MaxSpeed 60 | true 61 | true 62 | true 63 | 64 | 65 | true 66 | true 67 | true 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /cpr/huffman/huffman.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | Header Files 28 | 29 | 30 | Header Files 31 | 32 | 33 | Header Files 34 | 35 | 36 | Header Files 37 | 38 | 39 | Header Files 40 | 41 | 42 | -------------------------------------------------------------------------------- /cpr/huffman/huffman_tree.hpp: -------------------------------------------------------------------------------- 1 | #include "node.hpp" 2 | #include "frequency_map.hpp" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | #ifndef HUFFMAN_TREE_HPP 11 | #define HUFFMAN_TREE_HPP 12 | 13 | 14 | namespace cpr 15 | { 16 | namespace huffman 17 | { 18 | template 19 | class HuffmanTree 20 | { 21 | public: 22 | using SharedNode = typename Node::SharedNode; 23 | 24 | explicit HuffmanTree(FrequencyMap const& map) 25 | : root{ make_tree(map) } 26 | {} 27 | 28 | std::string to_string() const 29 | { 30 | std::string str; 31 | std::function inorder = [&](SharedNode node) 32 | { 33 | if (node) 34 | { 35 | inorder(node->left_); 36 | 37 | str.push_back('['); 38 | str.push_back(node->character_ == 0 ? '_' : node->character_); 39 | str.push_back(','); 40 | str += std::to_string(node->freq_); 41 | str.push_back(']'); 42 | 43 | inorder(node->right_); 44 | } 45 | }; 46 | 47 | inorder(root); 48 | return str; 49 | } 50 | 51 | const SharedNode root; 52 | 53 | private: 54 | // huffman coding algorithm 55 | // based on a pseudocode on 16.3 CLRS 3rd. 56 | SharedNode make_tree(FrequencyMap const& map) const 57 | { 58 | auto greater = [](SharedNode lhs, SharedNode rhs) 59 | { 60 | if (lhs->freq_ != rhs->freq_) 61 | return lhs->freq_ > rhs->freq_; 62 | else 63 | return lhs->character_ > rhs->character_; 64 | }; 65 | 66 | 67 | using MinPriorityQueue = std::priority_queue < SharedNode, std::vector, decltype(greater) > ; 68 | 69 | MinPriorityQueue queue(greater); 70 | for (auto const& pair : map) 71 | queue.push(make_new_node(pair.first, pair.second)); 72 | 73 | for (int count = 1; count != map.size(); ++count) 74 | { 75 | auto merge = make_new_node(); 76 | merge->left_ = queue.top(); queue.pop(); 77 | merge->right_ = queue.top(); queue.pop(); 78 | merge->freq_ = merge->left_->freq_ + merge->right_->freq_; 79 | queue.push(merge); 80 | } 81 | 82 | return queue.top(); 83 | } 84 | }; 85 | } 86 | } 87 | 88 | #endif // !HUFFMAN_TREE_HPP -------------------------------------------------------------------------------- /cpr/huffman/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | return 0; 7 | } -------------------------------------------------------------------------------- /cpr/huffman/node.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | #ifndef NODE_HPP 6 | #define NODE_HPP 7 | 8 | 9 | namespace cpr 10 | { 11 | namespace huffman 12 | { 13 | template 14 | struct Node 15 | { 16 | using SharedNode = std::shared_ptr < Node > ; 17 | 18 | //default ctor 19 | Node() 20 | : character_{ 0 }, freq_{ 0 }, left_{ nullptr }, right_{ nullptr } 21 | { } 22 | 23 | //ctor 24 | Node(Char ch, Freq freq) 25 | : character_{ ch }, freq_{ freq }, left_{ nullptr }, right_{ nullptr } 26 | { } 27 | 28 | Char character_; 29 | Freq freq_; 30 | SharedNode left_, right_; 31 | 32 | bool is_leaf() const 33 | { 34 | return !left_ && !right_; 35 | } 36 | }; 37 | 38 | template 39 | inline bool operator > (Node const& lhs, Node const& rhs) 40 | { 41 | return lhs.freq_ > rhs.freq_; 42 | } 43 | 44 | template 45 | inline bool operator < (Node const& lhs, Node const& rhs) 46 | { 47 | return lhs.freq_ < rhs.freq_; 48 | } 49 | 50 | template 51 | inline std::ostream& operator<<(std::ostream& os, Node const& node) 52 | { 53 | return os << "[" << node.character_ << "," << node.freq_ << "]"; 54 | } 55 | 56 | template 57 | std::shared_ptr < Node> make_new_node() 58 | { 59 | return std::make_shared>(); 60 | } 61 | 62 | template 63 | std::shared_ptr < Node> make_new_node(Char ch, Freq freq) 64 | { 65 | return std::make_shared>(ch, freq); 66 | } 67 | 68 | template 69 | std::shared_ptr < Node> make_new_node(Node const& other) 70 | { 71 | return std::make_shared>(other); 72 | } 73 | } 74 | } 75 | #endif // !NODE_HPP -------------------------------------------------------------------------------- /cpr/test_cases/shake.cpr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mooophy/Compression/2d369956a83703f0283d36d438e76783e909957d/cpr/test_cases/shake.cpr -------------------------------------------------------------------------------- /cpr/test_cases/test_for_encoder.cpr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mooophy/Compression/2d369956a83703f0283d36d438e76783e909957d/cpr/test_cases/test_for_encoder.cpr -------------------------------------------------------------------------------- /cpr/test_cases/test_for_encoder.txt: -------------------------------------------------------------------------------- 1 | abbcccddddeeeeeffffff -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/quick_unit_tests.playlist: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/stdafx.cpp: -------------------------------------------------------------------------------- 1 | // stdafx.cpp : source file that includes just the standard includes 2 | // unit_test_for_huffman.pch will be the pre-compiled header 3 | // stdafx.obj will contain the pre-compiled type information 4 | 5 | #include "stdafx.h" 6 | 7 | // TODO: reference any additional headers you need in STDAFX.H 8 | // and not in this file 9 | -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/stdafx.h: -------------------------------------------------------------------------------- 1 | // stdafx.h : include file for standard system include files, 2 | // or project specific include files that are used frequently, but 3 | // are changed infrequently 4 | // 5 | 6 | #pragma once 7 | 8 | #include "targetver.h" 9 | 10 | // Headers for CppUnitTest 11 | #include "CppUnitTest.h" 12 | 13 | // TODO: reference additional headers your program requires here 14 | -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/targetver.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Including SDKDDKVer.h defines the highest available Windows platform. 4 | 5 | // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and 6 | // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. 7 | 8 | #include 9 | -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/unit_test_for_bit_string.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "CppUnitTest.h" 3 | #include "../huffman/bit_string.hpp" 4 | 5 | using namespace Microsoft::VisualStudio::CppUnitTestFramework; 6 | 7 | namespace unit_test_for_huffman 8 | { 9 | TEST_CLASS(unit_test_for_bit_string) 10 | { 11 | public: 12 | 13 | TEST_METHOD(ctor) 14 | { 15 | cpr::huffman::BitString bit_string; 16 | } 17 | 18 | TEST_METHOD(data) 19 | { 20 | cpr::huffman::BitString bit_string; 21 | Assert::AreEqual(0u, bit_string.str().size()); 22 | } 23 | 24 | TEST_METHOD(bit_length) 25 | { 26 | cpr::huffman::BitString bit_string; 27 | 28 | Assert::AreEqual(1u, bit_string.bit_length(0x00)); 29 | Assert::AreEqual(7u, bit_string.bit_length(0x7f)); 30 | Assert::AreEqual(8u, bit_string.bit_length(-0x01)); 31 | Assert::AreEqual(8u, bit_string.bit_length(-0x7f)); 32 | Assert::AreEqual(8u, bit_string.bit_length(-0x80)); 33 | Assert::AreNotEqual(8u, bit_string.bit_length((char)-0x81)); 34 | } 35 | 36 | TEST_METHOD(push_back_bits) 37 | { 38 | // case 1 39 | cpr::huffman::BitString bit_string; 40 | bit_string.push_back_bits(0); 41 | Assert::AreEqual((char)0, bit_string.str().front()); 42 | 43 | bit_string.push_back_bits((char)0xff); 44 | std::string expected(1, 0); 45 | expected += std::string(8, 1); 46 | Assert::AreEqual(expected, bit_string.str()); 47 | Assert::AreEqual(9u, bit_string.str().size()); 48 | 49 | //case 2 50 | cpr::huffman::BitString bs_for_testing_minus_number; 51 | bs_for_testing_minus_number.push_back_bits(char(-1)); 52 | Assert::AreEqual(8u, bs_for_testing_minus_number.str().size()); 53 | 54 | bs_for_testing_minus_number.push_back_bits(0); 55 | Assert::AreEqual(9u, bs_for_testing_minus_number.str().size()); 56 | 57 | //case 3 58 | cpr::huffman::BitString case3; 59 | case3.push_back_bits(0x07); 60 | std::string expected_for_case3(3, 1); 61 | Assert::AreEqual(expected_for_case3, case3.str()); 62 | } 63 | 64 | // protocol : 65 | // FrequencyTable|CompressedPart|Remainder|RemainderSize 66 | 67 | TEST_METHOD(compress_case1) 68 | { 69 | cpr::huffman::BitString bit_string; 70 | bit_string.push_back_bits((char)0); 71 | std::string compressed = bit_string.compress('|'); 72 | 73 | //note the compressed part is empty 74 | Assert::AreEqual(4u, compressed.size()); 75 | std::string exprected{ '|', 0, '|', 1 }; 76 | Assert::AreEqual(exprected, compressed); 77 | } 78 | 79 | TEST_METHOD(compress_case2) 80 | { 81 | cpr::huffman::BitString bit_string; 82 | bit_string.push_back_bits((char)0xff); 83 | std::string compressed = bit_string.compress('|'); 84 | Assert::AreEqual(5u, compressed.size()); 85 | 86 | std::string exprected{ (char)0xff, '|', 0, '|', 0 }; 87 | Assert::AreEqual(exprected, compressed); 88 | } 89 | 90 | TEST_METHOD(compress_case3) 91 | { 92 | cpr::huffman::BitString bit_string; 93 | bit_string.push_back_bits((char)0X00); 94 | bit_string.push_back_bits((char)0Xff); 95 | std::string compressed = bit_string.compress('|'); 96 | 97 | Assert::AreEqual(5u, compressed.size()); 98 | 99 | std::string exprected{ (char)0x7f, '|', 1, '|', 1 }; 100 | Assert::AreEqual(exprected, compressed); 101 | } 102 | 103 | TEST_METHOD(compress_case4) 104 | { 105 | cpr::huffman::BitString bit_string; 106 | bit_string.push_back_bits((char)0X00); 107 | bit_string.push_back_bits((char)0Xff); 108 | bit_string.push_back_bits((char)0Xff); 109 | std::string compressed = bit_string.compress('|'); 110 | 111 | Assert::AreEqual(6u, compressed.size()); 112 | 113 | std::string exprected{ (char)0x7f, (char)0xff, '|', 1, '|', 1 }; 114 | Assert::AreEqual(exprected, compressed); 115 | } 116 | }; 117 | } -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/unit_test_for_code_word_dictionary.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "CppUnitTest.h" 3 | #include "../huffman/code_word_dictionary.hpp" 4 | 5 | using namespace Microsoft::VisualStudio::CppUnitTestFramework; 6 | 7 | namespace unit_test_for_huffman 8 | { 9 | using FrequencyMap = cpr::huffman::FrequencyMap < char, long > ; 10 | using HuffmanTree = cpr::huffman::HuffmanTree < char, long > ; 11 | using CodeWordDictionary = cpr::huffman::CodeWordDictionary < char, long, char > ; 12 | 13 | TEST_CLASS(unit_test_for_code_word_dictionary) 14 | { 15 | public: 16 | 17 | TEST_METHOD(ctor) 18 | { 19 | //setup huffman tree for testing code word dictionary 20 | auto empty_case = std::vector(); 21 | FrequencyMap fmap_from_clrs(empty_case); 22 | fmap_from_clrs['a'] = 45; 23 | fmap_from_clrs['b'] = 13; 24 | fmap_from_clrs['c'] = 12; 25 | fmap_from_clrs['d'] = 16; 26 | fmap_from_clrs['e'] = 9; 27 | fmap_from_clrs['f'] = 5; 28 | HuffmanTree htree_from_clrs{ fmap_from_clrs }; 29 | 30 | 31 | //test ctor 32 | const CodeWordDictionary dic{ htree_from_clrs }; 33 | Assert::IsTrue(6 == dic.size()); 34 | Assert::IsTrue(0x00 == dic.at('a')); //a : 0b 35 | Assert::IsTrue(0x04 == dic.at('c')); //c : 100b 36 | Assert::IsTrue(0x05 == dic.at('b')); //b : 101b 37 | Assert::IsTrue(0x0c == dic.at('f')); //f : 1100b 38 | Assert::IsTrue(0x0d == dic.at('e')); //e : 1101b 39 | Assert::IsTrue(0x07 == dic.at('d')); //d : 111b 40 | } 41 | }; 42 | } -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/unit_test_for_encoder.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "CppUnitTest.h" 3 | #include "../huffman/encoder.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace Microsoft::VisualStudio::CppUnitTestFramework; 10 | 11 | namespace unit_test_for_huffman 12 | { 13 | TEST_CLASS(unit_test_for_encoder) 14 | { 15 | public: 16 | 17 | TEST_METHOD(ctor) 18 | { 19 | cpr::huffman::Encoder encoder("../test_cases/test_for_encoder.txt"); 20 | 21 | //test each character in data 22 | Assert::AreEqual(21u, encoder.data.size()); 23 | 24 | std::string content = "abbcccddddeeeeeffffff"; 25 | auto it = content.cbegin(); 26 | for (auto ch : encoder.data) 27 | Assert::AreEqual(ch, *it++); 28 | 29 | //test frequency_map part 30 | Assert::AreEqual(6u, encoder.frequency_map.size()); 31 | 32 | Assert::AreEqual(1l, encoder.frequency_map.at('a')); 33 | Assert::AreEqual(2l, encoder.frequency_map.at('b')); 34 | Assert::AreEqual(3l, encoder.frequency_map.at('c')); 35 | Assert::AreEqual(4l, encoder.frequency_map.at('d')); 36 | Assert::AreEqual(5l, encoder.frequency_map.at('e')); 37 | Assert::AreEqual(6l, encoder.frequency_map.at('f')); 38 | 39 | //test huffman_tree part 40 | std::string str_expected = "[d,4][_,9][e,5][_,21][a,1][_,3][b,2][_,6][c,3][_,12][f,6]"; 41 | Assert::AreEqual(str_expected, encoder.huffman_tree.to_string()); 42 | 43 | //test code word dictionary part 44 | Assert::AreEqual(6u, encoder.code_dictionary.size()); 45 | 46 | Assert::IsTrue(0x00 == encoder.code_dictionary.at('d')); //d : 4 -->0b 47 | Assert::IsTrue(0x01 == encoder.code_dictionary.at('e')); //e : 5 -->1b 48 | Assert::IsTrue(0x08 == encoder.code_dictionary.at('a')); //a : 1 -->1000b 49 | Assert::IsTrue(0x09 == encoder.code_dictionary.at('b')); //b : 2 -->1001b 50 | Assert::IsTrue(0x05 == encoder.code_dictionary.at('c')); //c : 3 -->101b 51 | Assert::IsTrue(0x03 == encoder.code_dictionary.at('f')); //f : 6 -->11b 52 | } 53 | 54 | TEST_METHOD(encode) 55 | { 56 | cpr::huffman::Encoder encoder("../test_cases/test_for_encoder.txt"); 57 | 58 | std::string expect = "100010011001101101101000011111111111111111"; 59 | for (auto& ch : expect) ch -= 48; // <-- note this conversion 60 | 61 | Assert::AreEqual(expect, encoder.bit_string.str()); 62 | } 63 | 64 | 65 | // frequency part : 66 | // abbcccddddeeeeeffffff raw data 67 | // --> 68 | // a:1,b:2,c:3,d:4,e:5,f:6, 69 | // 70 | // 71 | // note '|' is added as delimiter for the two parts 72 | // 73 | // 74 | // 75 | // encoded part : 76 | // abbcccddddeeeeeffffff raw data 77 | // --> 78 | // 100010011001101101101000011111111111111111 bit string encoded with huffman tree 79 | // --> 80 | // 1000 1001 -- 0x89 81 | // 1001 1011 -- 0x9b 82 | // 0110 1000 -- 0x68 83 | // 0111 1111 -- 0x7f 84 | // 1111 1111 -- 0xff 85 | // 11 -- 0x03 86 | // --> 87 | // [0x89][0x9b][0x68][0x7f][0xff]|[0x03]|[0x02] final data using protocol : FrequencyTable|CompressedPart|Remainder|RemainderSize 88 | TEST_METHOD(write_case1) 89 | { 90 | //compress and write 91 | cpr::huffman::Encoder encoder("../test_cases/test_for_encoder.txt"); 92 | encoder.write("../test_cases/test_for_encoder.cpr", '|'); 93 | 94 | // read the compressed file back 95 | std::ifstream ifs("../test_cases/test_for_encoder.cpr", std::ios::binary); 96 | auto begin = std::istreambuf_iterator(ifs); 97 | auto end = std::istreambuf_iterator(); 98 | std::string actual(begin, end); 99 | Assert::AreEqual(34u, actual.size()); 100 | 101 | //test each char in the compressed file 102 | std::string expect = "a:1,b:2,c:3,d:4,e:5,f:6,"; 103 | for (auto& element : expect) 104 | if (isdigit(element)) element -= 48; 105 | expect.push_back('|'); 106 | for (auto ch : { (char)0x89, (char)0x9b, (char)0x68, (char)0x7f, (char)0xff, '|', (char)0x03, '|', (char)0x02 }) 107 | expect.push_back(ch); 108 | Assert::AreEqual(expect, actual); 109 | } 110 | 111 | //1 minute to run this case 2M text file 112 | //1951KB -> 963 KB 113 | TEST_METHOD(write_case2) 114 | { 115 | cpr::huffman::Encoder encoder("../test_cases/shake.txt"); 116 | encoder.write("../test_cases/shake.cpr", '|'); 117 | } 118 | }; 119 | } -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/unit_test_for_frequency_map.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "CppUnitTest.h" 3 | #include "../huffman/frequency_map.hpp" 4 | #include 5 | 6 | using namespace Microsoft::VisualStudio::CppUnitTestFramework; 7 | 8 | namespace unit_test_for_huffman 9 | { 10 | TEST_CLASS(unit_test_for_frequency_map) 11 | { 12 | public: 13 | 14 | TEST_METHOD(ctor) 15 | { 16 | std::vector test_case{ 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'c' }; 17 | cpr::huffman::FrequencyMap fmap(test_case); 18 | 19 | Assert::AreEqual(3u, fmap.size()); 20 | Assert::AreEqual(4l, fmap.at('a')); 21 | Assert::AreEqual(3l, fmap.at('b')); 22 | Assert::AreEqual(1l, fmap.at('c')); 23 | } 24 | 25 | TEST_METHOD(str_case0) 26 | { 27 | std::vector test_case{ 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'c' }; 28 | cpr::huffman::FrequencyMap fmap(test_case); 29 | std::string expect{ 'a', ':', 4, ',', 'b', ':', 3, ',', 'c', ':', 1, ',' }; 30 | Assert::AreEqual(expect, fmap.str()); 31 | } 32 | 33 | TEST_METHOD(str_case1) 34 | { 35 | std::string test_case; 36 | for (int i = 0; i != 0xabc; ++i) 37 | test_case += "ab"; 38 | cpr::huffman::FrequencyMap fmap(test_case); 39 | std::string expect{ 'a', ':', (char)0x0a, (char)0xbc, ',', 'b', ':', (char)0x0a, (char)0xbc, ',' }; 40 | Assert::AreEqual(expect, fmap.str()); 41 | } 42 | }; 43 | } -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/unit_test_for_huffman.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {DE37A775-FA0E-4665-AD9B-D65125B6CAA6} 15 | Win32Proj 16 | unit_test_for_huffman 17 | 18 | 19 | 20 | DynamicLibrary 21 | true 22 | v120 23 | Unicode 24 | false 25 | 26 | 27 | DynamicLibrary 28 | false 29 | v120 30 | true 31 | Unicode 32 | false 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | true 46 | 47 | 48 | true 49 | 50 | 51 | 52 | Use 53 | Level3 54 | Disabled 55 | $(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories) 56 | WIN32;_DEBUG;%(PreprocessorDefinitions) 57 | true 58 | true 59 | 60 | 61 | Windows 62 | true 63 | $(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories) 64 | 65 | 66 | true 67 | 68 | 69 | 70 | 71 | Level3 72 | Use 73 | MaxSpeed 74 | true 75 | true 76 | $(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories) 77 | WIN32;NDEBUG;%(PreprocessorDefinitions) 78 | true 79 | 80 | 81 | Windows 82 | true 83 | true 84 | true 85 | $(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories) 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | Create 95 | Create 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | {1d1ccdfe-663c-43e9-bad9-8ed42e7e0c08} 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/unit_test_for_huffman.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | 26 | 27 | Source Files 28 | 29 | 30 | Source Files 31 | 32 | 33 | Source Files 34 | 35 | 36 | Source Files 37 | 38 | 39 | Source Files 40 | 41 | 42 | Source Files 43 | 44 | 45 | Source Files 46 | 47 | 48 | -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/unit_test_for_huffman_tree.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "CppUnitTest.h" 3 | #include "../huffman/huffman_tree.hpp" 4 | #include 5 | 6 | using namespace Microsoft::VisualStudio::CppUnitTestFramework; 7 | 8 | namespace unit_test_for_huffman 9 | { 10 | using FrequencyMap = cpr::huffman::FrequencyMap < char, long > ; 11 | using HuffmanTree = cpr::huffman::HuffmanTree < char, long > ; 12 | TEST_CLASS(unit_test_for_huffman_tree) 13 | { 14 | public: 15 | 16 | TEST_METHOD(ctor) 17 | { 18 | //case 1 made up on my own 19 | std::vector test_case{ 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'c' }; 20 | FrequencyMap fmap(test_case); 21 | HuffmanTree htree{ fmap }; 22 | Assert::IsNotNull(htree.root.get()); 23 | 24 | 25 | //case 2 based on 16.3 clrs 26 | auto empty_case = std::vector(); 27 | FrequencyMap fmap_from_clrs(empty_case); 28 | fmap_from_clrs['a'] = 45; 29 | fmap_from_clrs['b'] = 13; 30 | fmap_from_clrs['c'] = 12; 31 | fmap_from_clrs['d'] = 16; 32 | fmap_from_clrs['e'] = 9; 33 | fmap_from_clrs['f'] = 5; 34 | HuffmanTree htree_from_clrs{ fmap_from_clrs }; 35 | 36 | Assert::AreEqual(100l, htree_from_clrs.root->freq_); 37 | Assert::AreEqual((char)0, htree_from_clrs.root->character_); 38 | Assert::AreEqual((char)'a', htree_from_clrs.root->left_->character_); 39 | 40 | //! the rest is tested below using method "to_string". 41 | } 42 | 43 | TEST_METHOD(to_string) 44 | { 45 | auto empty_case = std::vector(); 46 | FrequencyMap fmap_from_clrs(empty_case); 47 | fmap_from_clrs['a'] = 45; 48 | fmap_from_clrs['b'] = 13; 49 | fmap_from_clrs['c'] = 12; 50 | fmap_from_clrs['d'] = 16; 51 | fmap_from_clrs['e'] = 9; 52 | fmap_from_clrs['f'] = 5; 53 | HuffmanTree htree_from_clrs{ fmap_from_clrs }; 54 | 55 | std::string expected = "[a,45][_,100][c,12][_,25][b,13][_,55][f,5][_,14][e,9][_,30][d,16]"; 56 | Assert::AreEqual(expected, htree_from_clrs.to_string()); 57 | } 58 | }; 59 | } -------------------------------------------------------------------------------- /cpr/unit_test_for_huffman/unit_test_for_node.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "CppUnitTest.h" 3 | #include "../huffman/node.hpp" 4 | #include 5 | 6 | using namespace Microsoft::VisualStudio::CppUnitTestFramework; 7 | 8 | namespace unit_test_for_huffman 9 | { 10 | using Node = cpr::huffman::Node < char, long > ; 11 | 12 | TEST_CLASS(test_node) 13 | { 14 | public: 15 | 16 | TEST_METHOD(default_ctor) 17 | { 18 | Node node; 19 | 20 | Assert::AreEqual((char)0, node.character_); 21 | Assert::AreEqual(0l, node.freq_); 22 | Assert::IsNull(node.left_.get()); 23 | Assert::IsNull(node.right_.get()); 24 | } 25 | 26 | TEST_METHOD(ctor_with_2_args) 27 | { 28 | Node node{ 'a', 42ul }; 29 | 30 | Assert::AreEqual('a', node.character_); 31 | Assert::AreEqual(42l, node.freq_); 32 | Assert::IsNull(node.left_.get()); 33 | Assert::IsNull(node.right_.get()); 34 | } 35 | 36 | TEST_METHOD(copy_ctor) 37 | { 38 | Node lhs{ 'a', 42l }; 39 | lhs.left_ = cpr::huffman::make_new_node(); 40 | lhs.right_ = cpr::huffman::make_new_node('b', 99l); 41 | Node rhs(lhs); 42 | 43 | Assert::AreEqual(lhs.character_, rhs.character_); 44 | Assert::AreEqual(lhs.freq_, rhs.freq_); 45 | 46 | Assert::IsTrue(lhs.left_ == rhs.left_); 47 | Assert::IsTrue(lhs.right_ == rhs.right_); 48 | 49 | Assert::AreSame(lhs.left_->character_, rhs.left_->character_); 50 | Assert::AreSame(lhs.left_->freq_, rhs.left_->freq_); 51 | } 52 | 53 | TEST_METHOD(is_leaf) 54 | { 55 | Node node; 56 | Assert::IsTrue(node.is_leaf()); 57 | } 58 | 59 | TEST_METHOD(make_new_node_without_args) 60 | { 61 | auto pointer = cpr::huffman::make_new_node(); 62 | 63 | Assert::AreEqual((char)0, pointer->character_); 64 | Assert::AreEqual(0l, pointer->freq_); 65 | Assert::IsNull(pointer->left_.get()); 66 | Assert::IsNull(pointer->right_.get()); 67 | } 68 | 69 | TEST_METHOD(make_new_node_with_2_args) 70 | { 71 | auto pointer = cpr::huffman::make_new_node('a', 42l); 72 | 73 | Assert::AreEqual('a', pointer->character_); 74 | Assert::AreEqual(42l, pointer->freq_); 75 | Assert::IsNull(pointer->left_.get()); 76 | Assert::IsNull(pointer->right_.get()); 77 | } 78 | 79 | TEST_METHOD(make_new_node_that_calling_copy_ctor) 80 | { 81 | auto copied_from = cpr::huffman::make_new_node('a', 42ul); 82 | copied_from->left_ = cpr::huffman::make_new_node(); 83 | copied_from->right_ = cpr::huffman::make_new_node(); 84 | 85 | auto copied_to = cpr::huffman::make_new_node(*copied_from); 86 | 87 | Assert::AreEqual(copied_from->character_, copied_to->character_); 88 | Assert::AreEqual(copied_from->freq_, copied_to->freq_); 89 | 90 | Assert::AreEqual(true, copied_from->left_ == copied_to->left_); 91 | Assert::AreEqual(true, copied_from->right_ == copied_to->right_); 92 | 93 | Assert::AreSame(copied_from->left_->character_, copied_to->left_->character_); 94 | Assert::AreSame(copied_from->left_->freq_, copied_to->left_->freq_); 95 | 96 | Assert::AreSame(copied_from->right_->character_, copied_to->right_->character_); 97 | Assert::AreSame(copied_from->right_->freq_, copied_to->right_->freq_); 98 | } 99 | 100 | TEST_METHOD(for_shared_pointer) 101 | { 102 | auto pointer = cpr::huffman::make_new_node('a', 42l); 103 | auto p2 = pointer; 104 | auto p3 = p2; 105 | } 106 | 107 | TEST_METHOD(operator_greater_than) 108 | { 109 | // note there is a type conversion here: 110 | auto lhs = Node('a', 42); 111 | auto rhs = Node('c', 41); 112 | Assert::IsTrue(lhs > rhs); 113 | } 114 | 115 | TEST_METHOD(operator_less_than) 116 | { 117 | // note there is a type conversion here: 118 | auto lhs = Node('a', 42); 119 | auto rhs = Node('c', 41); 120 | Assert::IsTrue(rhs < lhs); 121 | } 122 | 123 | TEST_METHOD(operator_output) 124 | { 125 | std::stringstream stream; 126 | // note there is a type conversion here: 127 | auto node = Node('a', 10); 128 | stream << node; 129 | Assert::AreEqual("[a,10]", stream.str().c_str()); 130 | } 131 | }; 132 | } -------------------------------------------------------------------------------- /cpr_in_python/frequency_map.py: -------------------------------------------------------------------------------- 1 | __author__ = 'yue' 2 | 3 | 4 | from collections import deque 5 | 6 | 7 | class FrequencyMap(): 8 | def __init__(self, sequence): 9 | dic = {} 10 | for key in sequence: 11 | if key in dic: 12 | dic[key] += 1 13 | else: 14 | dic[key] = 1 15 | self.dictionary = dic 16 | 17 | @staticmethod 18 | def frequency_to_bytes(freq): 19 | dq = deque() 20 | while freq > 0: 21 | dq.appendleft(freq & 0xff) 22 | freq >>= 8 23 | return [byte for byte in dq] 24 | 25 | def __str__(self): 26 | return ','.join([(key + ':' + str(self.dictionary[key])) for key in self.dictionary.keys()]) + ',' -------------------------------------------------------------------------------- /cpr_in_python/frequency_map_test.py: -------------------------------------------------------------------------------- 1 | __author__ = 'yue' 2 | 3 | 4 | import unittest 5 | import frequency_map as fmap 6 | 7 | 8 | class TestFrequencyMap(unittest.TestCase): 9 | 10 | def test_ctor(self): 11 | fm = fmap.FrequencyMap('aaabbc') 12 | self.assertEqual(3, fm.dictionary['a']) 13 | self.assertEqual(2, fm.dictionary['b']) 14 | self.assertEqual(1, fm.dictionary['c']) 15 | 16 | def test_frequency_to_bytes(self): 17 | self.assertEqual([0x01], fmap.FrequencyMap.frequency_to_bytes(0x01)) 18 | self.assertEqual([0xab, 0xcd], fmap.FrequencyMap.frequency_to_bytes(0xabcd)) 19 | self.assertEqual([0x0b, 0xcd], fmap.FrequencyMap.frequency_to_bytes(0xbcd)) 20 | 21 | def test_str(self): 22 | fm = fmap.FrequencyMap('aaabbc') 23 | self.assertEqual(12, len(str(fm))) 24 | -------------------------------------------------------------------------------- /cpr_in_python/node.py: -------------------------------------------------------------------------------- 1 | __author__ = 'yue' 2 | 3 | 4 | class Node: 5 | 6 | def __init__(self, ch, freq): 7 | self.character = ch 8 | self.frequency = freq 9 | self.left = None 10 | self.right = None 11 | 12 | @classmethod 13 | def default(cls): 14 | return cls('', 0) 15 | 16 | def is_leaf(self): 17 | return not self.left and not self.right 18 | 19 | def __gt__(self, other): 20 | """ 21 | 22 | :type other: Node 23 | """ 24 | assert isinstance(other, Node) 25 | return self.frequency > other.frequency if self.frequency != other.frequency else self.character > other.character 26 | -------------------------------------------------------------------------------- /cpr_in_python/node_test.py: -------------------------------------------------------------------------------- 1 | __author__ = 'yue' 2 | 3 | 4 | import unittest 5 | import node as n 6 | 7 | 8 | class TestNode(unittest.TestCase): 9 | 10 | def test_default_ctor(self): 11 | node = n.Node.default() 12 | self.assertEqual('', node.character) 13 | self.assertEqual(0, node.frequency) 14 | self.assertIsNone(node.left) 15 | self.assertIsNone(node.right) 16 | 17 | def test_ctor(self): 18 | node = n.Node('c', 42) 19 | self.assertEqual('c', node.character) 20 | self.assertEqual(42, node.frequency) 21 | self.assertIsNone(node.left) 22 | self.assertIsNone(node.right) 23 | 24 | def test_is_leaf(self): 25 | node = n.Node('c', 42) 26 | self.assertTrue(node.is_leaf()) 27 | 28 | def test_greater(self): 29 | lhs = n.Node('c', 42) 30 | rhs = n.Node.default() 31 | self.assertGreater(lhs, rhs) 32 | rhs2 = n.Node('a', 42) 33 | self.assertGreater(lhs, rhs2) 34 | --------------------------------------------------------------------------------