├── .gitignore
├── LICENSE
├── README.md
├── compression.pdf
├── cpr
├── cpr.sln
├── huffman
│ ├── ClassDiagramForHuffman.cd
│ ├── bit_string.hpp
│ ├── code_word_dictionary.hpp
│ ├── encoder.hpp
│ ├── frequency_map.hpp
│ ├── huffman.vcxproj
│ ├── huffman.vcxproj.filters
│ ├── huffman_tree.hpp
│ ├── main.cpp
│ └── node.hpp
├── test_cases
│ ├── shake.cpr
│ ├── shake.txt
│ ├── test_for_encoder.cpr
│ └── test_for_encoder.txt
└── unit_test_for_huffman
│ ├── quick_unit_tests.playlist
│ ├── stdafx.cpp
│ ├── stdafx.h
│ ├── targetver.h
│ ├── unit_test_for_bit_string.cpp
│ ├── unit_test_for_code_word_dictionary.cpp
│ ├── unit_test_for_encoder.cpp
│ ├── unit_test_for_frequency_map.cpp
│ ├── unit_test_for_huffman.vcxproj
│ ├── unit_test_for_huffman.vcxproj.filters
│ ├── unit_test_for_huffman_tree.cpp
│ └── unit_test_for_node.cpp
└── cpr_in_python
├── frequency_map.py
├── frequency_map_test.py
├── node.py
└── node_test.py
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 |
4 | # User-specific files
5 | *.suo
6 | *.user
7 | *.sln.docstates
8 |
9 | # Build results
10 | [Dd]ebug/
11 | [Dd]ebugPublic/
12 | [Rr]elease/
13 | [Rr]eleases/
14 | x64/
15 | x86/
16 | build/
17 | bld/
18 | [Bb]in/
19 | [Oo]bj/
20 |
21 | # Roslyn cache directories
22 | *.ide/
23 |
24 | # MSTest test Results
25 | [Tt]est[Rr]esult*/
26 | [Bb]uild[Ll]og.*
27 |
28 | #NUNIT
29 | *.VisualState.xml
30 | TestResult.xml
31 |
32 | # Build Results of an ATL Project
33 | [Dd]ebugPS/
34 | [Rr]eleasePS/
35 | dlldata.c
36 |
37 | *_i.c
38 | *_p.c
39 | *_i.h
40 | *.ilk
41 | *.meta
42 | *.obj
43 | *.pch
44 | *.pdb
45 | *.pgc
46 | *.pgd
47 | *.rsp
48 | *.sbr
49 | *.tlb
50 | *.tli
51 | *.tlh
52 | *.tmp
53 | *.tmp_proj
54 | *.log
55 | *.vspscc
56 | *.vssscc
57 | .builds
58 | *.pidb
59 | *.svclog
60 | *.scc
61 |
62 | # Chutzpah Test files
63 | _Chutzpah*
64 |
65 | # Visual C++ cache files
66 | ipch/
67 | *.aps
68 | *.ncb
69 | *.opensdf
70 | *.sdf
71 | *.cachefile
72 |
73 | # Visual Studio profiler
74 | *.psess
75 | *.vsp
76 | *.vspx
77 |
78 | # TFS 2012 Local Workspace
79 | $tf/
80 |
81 | # Guidance Automation Toolkit
82 | *.gpState
83 |
84 | # ReSharper is a .NET coding add-in
85 | _ReSharper*/
86 | *.[Rr]e[Ss]harper
87 | *.DotSettings.user
88 |
89 | # JustCode is a .NET coding addin-in
90 | .JustCode
91 |
92 | # TeamCity is a build add-in
93 | _TeamCity*
94 |
95 | # DotCover is a Code Coverage Tool
96 | *.dotCover
97 |
98 | # NCrunch
99 | _NCrunch_*
100 | .*crunch*.local.xml
101 |
102 | # MightyMoose
103 | *.mm.*
104 | AutoTest.Net/
105 |
106 | # Web workbench (sass)
107 | .sass-cache/
108 |
109 | # Installshield output folder
110 | [Ee]xpress/
111 |
112 | # DocProject is a documentation generator add-in
113 | DocProject/buildhelp/
114 | DocProject/Help/*.HxT
115 | DocProject/Help/*.HxC
116 | DocProject/Help/*.hhc
117 | DocProject/Help/*.hhk
118 | DocProject/Help/*.hhp
119 | DocProject/Help/Html2
120 | DocProject/Help/html
121 |
122 | # Click-Once directory
123 | publish/
124 |
125 | # Publish Web Output
126 | *.[Pp]ublish.xml
127 | *.azurePubxml
128 | # TODO: Comment the next line if you want to checkin your web deploy settings
129 | # but database connection strings (with potential passwords) will be unencrypted
130 | *.pubxml
131 | *.publishproj
132 |
133 | # NuGet Packages
134 | *.nupkg
135 | # The packages folder can be ignored because of Package Restore
136 | **/packages/*
137 | # except build/, which is used as an MSBuild target.
138 | !**/packages/build/
139 | # If using the old MSBuild-Integrated Package Restore, uncomment this:
140 | #!**/packages/repositories.config
141 |
142 | # Windows Azure Build Output
143 | csx/
144 | *.build.csdef
145 |
146 | # Windows Store app package directory
147 | AppPackages/
148 |
149 | # Others
150 | sql/
151 | *.Cache
152 | ClientBin/
153 | [Ss]tyle[Cc]op.*
154 | ~$*
155 | *~
156 | *.dbmdl
157 | *.dbproj.schemaview
158 | *.pfx
159 | *.publishsettings
160 | node_modules/
161 |
162 | # RIA/Silverlight projects
163 | Generated_Code/
164 |
165 | # Backup & report files from converting an old project file
166 | # to a newer Visual Studio version. Backup files are not needed,
167 | # because we have git ;-)
168 | _UpgradeReport_Files/
169 | Backup*/
170 | UpgradeLog*.XML
171 | UpgradeLog*.htm
172 |
173 | # SQL Server files
174 | *.mdf
175 | *.ldf
176 |
177 | # Business Intelligence projects
178 | *.rdl.data
179 | *.bim.layout
180 | *.bim_*.settings
181 |
182 | # Microsoft Fakes
183 | FakesAssemblies/
184 |
185 | #
186 | *build*
187 |
188 | #for pycharm
189 | *.idea*
190 | *.xml
191 | *.name
192 | *.iml
193 | *.pyc
194 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Yue Wang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Compression
2 | Data Compression using Huffman.
3 |
--------------------------------------------------------------------------------
/compression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mooophy/Compression/2d369956a83703f0283d36d438e76783e909957d/compression.pdf
--------------------------------------------------------------------------------
/cpr/cpr.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 2013
4 | VisualStudioVersion = 12.0.31101.0
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "huffman", "huffman\huffman.vcxproj", "{1D1CCDFE-663C-43E9-BAD9-8ED42E7E0C08}"
7 | EndProject
8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "unit_test_for_huffman", "unit_test_for_huffman\unit_test_for_huffman.vcxproj", "{DE37A775-FA0E-4665-AD9B-D65125B6CAA6}"
9 | EndProject
10 | Global
11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | Debug|Win32 = Debug|Win32
13 | Release|Win32 = Release|Win32
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {1D1CCDFE-663C-43E9-BAD9-8ED42E7E0C08}.Debug|Win32.ActiveCfg = Debug|Win32
17 | {1D1CCDFE-663C-43E9-BAD9-8ED42E7E0C08}.Debug|Win32.Build.0 = Debug|Win32
18 | {1D1CCDFE-663C-43E9-BAD9-8ED42E7E0C08}.Release|Win32.ActiveCfg = Release|Win32
19 | {1D1CCDFE-663C-43E9-BAD9-8ED42E7E0C08}.Release|Win32.Build.0 = Release|Win32
20 | {DE37A775-FA0E-4665-AD9B-D65125B6CAA6}.Debug|Win32.ActiveCfg = Debug|Win32
21 | {DE37A775-FA0E-4665-AD9B-D65125B6CAA6}.Debug|Win32.Build.0 = Debug|Win32
22 | {DE37A775-FA0E-4665-AD9B-D65125B6CAA6}.Release|Win32.ActiveCfg = Release|Win32
23 | {DE37A775-FA0E-4665-AD9B-D65125B6CAA6}.Release|Win32.Build.0 = Release|Win32
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | EndGlobal
29 |
--------------------------------------------------------------------------------
/cpr/huffman/ClassDiagramForHuffman.cd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | AAAAAIAAAAAAAAAAAAAAAAAAIAAAACCBAAAAAAAAAAA=
7 | bit_string.hpp
8 |
9 |
10 |
11 |
12 |
13 | AAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAQAAAAAAAAA=
14 | code_word_dictionary.hpp
15 |
16 |
17 |
18 |
19 |
20 | gAgAAAAAAAwAAAAAAAAAAAAAAAAAAAABAQAQAgAAAgA=
21 | encoder.hpp
22 |
23 |
24 |
25 |
26 |
27 | AAAAAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=
28 | frequency_map.hpp
29 |
30 |
31 |
32 |
33 |
34 | AAAAAAAAAAAAAAAAAAAgAAAAABAAAAAAIAAAAAAIAAA=
35 | huffman_tree.hpp
36 |
37 |
38 |
39 |
40 |
41 | AAAAAEAAAAAAAAABAAAAAAAAAAAAABAAAAAACAEAgAA=
42 | node.hpp
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/cpr/huffman/bit_string.hpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 |
4 | #ifndef BIT_STRING_HPP
5 | #define BIT_STRING_HPP
6 |
7 |
8 | namespace cpr
9 | {
10 | namespace huffman
11 | {
12 | template
13 | class BitString
14 | {
15 | public:
16 | BitString()
17 | : data_{}
18 | { }
19 |
20 | std::string const& str() const
21 | {
22 | return data_;
23 | }
24 |
25 | void push_back_bits(Char bits)
26 | {
27 | if (0 == bit_length(bits))
28 | {
29 | data_.push_back(bits);
30 | }
31 | else
32 | {
33 | for (int pos = bit_length(bits) - 1; pos >= 0; --pos)
34 | {
35 | char curr_bit = ((bits & (1 << pos)) >> pos);
36 | data_.push_back(curr_bit);
37 | }
38 | }
39 | }
40 |
41 | unsigned bit_length(Char ch) const
42 | {
43 | if (ch < 0)
44 | return sizeof(ch) * 8;
45 | if (ch == 0)
46 | return 1;
47 |
48 | unsigned count = 0;
49 | for (; ch > 0; ch >>= 1) ++count;
50 | return count;
51 | }
52 |
53 | // protocol :
54 | // FrequencyTable|CompressedPart|Remainder|RemainderSize
55 | std::string compress(Char delimiter) const
56 | {
57 | std::string compressed_data;
58 | auto curr = data_.cbegin();
59 |
60 | //for compressed part
61 | while (data_.cend() - curr >= sizeof(Char) * 8)
62 | {
63 | Char ch = 0;
64 | auto peek = curr;
65 | for (; peek != curr + sizeof(Char) * 8; ++peek)
66 | ch = (ch << 1) + *peek;
67 | compressed_data.push_back(ch);
68 |
69 | curr = peek;
70 | }
71 |
72 | compressed_data.push_back(delimiter);
73 |
74 | //for remainder part and remainder size
75 | Char remainder = 0;
76 | for (auto peek = curr; peek != data_.cend(); ++peek)
77 | remainder = (remainder << 1) + *peek;
78 | compressed_data.push_back(remainder);
79 | compressed_data.push_back(delimiter);
80 | compressed_data.push_back(data_.cend() - curr);//remainder size
81 |
82 | return compressed_data;
83 | }
84 |
85 | private:
86 | std::string data_;
87 | };
88 | }
89 | }
90 |
91 |
92 | #endif // !BIT_STRING_HPP
93 |
--------------------------------------------------------------------------------
/cpr/huffman/code_word_dictionary.hpp:
--------------------------------------------------------------------------------
1 | #include "huffman_tree.hpp"
2 |
3 |
4 | #ifndef CODE_WORD_DICTIONARY
5 | #define CODE_WORD_DICTIONARY
6 |
7 | namespace cpr
8 | {
9 | namespace huffman
10 | {
11 | template
12 | class CodeWordDictionary : public std::map < Char, CodeWord >
13 | {
14 | using SharedNode = typename cpr::huffman::Node::SharedNode;
15 | public:
16 | explicit CodeWordDictionary(HuffmanTree const& huffman_tree)
17 | {
18 | fill_this_by_dfs(0, huffman_tree.root);
19 | }
20 |
21 | private:
22 | void fill_this_by_dfs(CodeWord path, SharedNode node)
23 | {
24 | if (!node)
25 | return;
26 |
27 | if (node->character_ != 0)
28 | {
29 | (*this)[node->character_] = path;
30 | }
31 | else
32 | {
33 | fill_this_by_dfs((path << 1) + 0, node->left_);
34 | fill_this_by_dfs((path << 1) + 1, node->right_);
35 | }
36 | }
37 | };
38 | }
39 | }
40 |
41 | #endif // !CODE_WORD_DICTIONARY
42 |
--------------------------------------------------------------------------------
/cpr/huffman/encoder.hpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include "frequency_map.hpp"
7 | #include "huffman_tree.hpp"
8 | #include "code_word_dictionary.hpp"
9 | #include "bit_string.hpp"
10 |
11 |
12 | #ifndef ENCODER_HPP
13 | #define ENCODER_HPP
14 |
15 |
16 | namespace cpr
17 | {
18 | namespace huffman
19 | {
20 | // protocol :
21 | // FrequencyTable|CompressedPart|Remainder
22 | // So the last bits that less than 8 should be append to Remainder part without compressing.
23 | template
24 | class Encoder
25 | {
26 | public:
27 | explicit Encoder(std::string path) :
28 | data(read_file(path)),
29 | frequency_map(data),
30 | huffman_tree(frequency_map),
31 | code_dictionary(huffman_tree),
32 | bit_string(encode_data_and_push_into_bit_string())
33 | { }
34 |
35 | void write(std::string out_file, Char delimiter) const
36 | {
37 | std::ofstream ofs(out_file, std::ios::binary);
38 | ofs << frequency_map.str() << delimiter << bit_string.compress(delimiter);
39 | }
40 |
41 | //
42 | // data members, read only
43 | //
44 | const std::vector data;
45 | const FrequencyMap frequency_map;
46 | const HuffmanTree huffman_tree;
47 | const CodeWordDictionary code_dictionary;
48 | const BitString bit_string;
49 |
50 | private:
51 | std::vector read_file(std::string path)const
52 | {
53 | std::ifstream file(path, std::ios::binary);
54 | if (file.bad())
55 | throw std::logic_error("bad file");
56 | auto begin = std::istreambuf_iterator(file);
57 | auto end = std::istreambuf_iterator();
58 | return std::vector(begin, end);
59 | }
60 |
61 | BitString encode_data_and_push_into_bit_string()const
62 | {
63 | BitString encoded;
64 | for (auto ch : data)
65 | encoded.push_back_bits(code_dictionary.at(ch));
66 | return encoded;
67 | }
68 | };
69 | }
70 | }
71 |
72 |
73 | #endif // !ENCODER_HPP
74 |
--------------------------------------------------------------------------------
/cpr/huffman/frequency_map.hpp:
--------------------------------------------------------------------------------
1 | #include