├── HuckleBerry.txt ├── HuckleBerry_compressed.txt ├── Makefile ├── README.md ├── huff.cpp ├── huff.h └── main.cpp /HuckleBerry_compressed.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patricksheehan/Huffman-Compression/e02021396164e843345694135602b12b7dbac753/HuckleBerry_compressed.txt -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | huffman : huff.o main.o 2 | g++ -Wall -g -O3 -o huffman huff.o main.o 3 | 4 | huff.o : huff.cpp huff.h 5 | g++ -Wall -g -O3 -c huff.cpp 6 | 7 | main.o : main.cpp 8 | g++ -Wall -g -O3 -c main.cpp 9 | 10 | clean : 11 | rm -f huffman huff.o main.o 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Huffman 2 | 3 | Takes input and provides compressed output via the Huffman encoding algorithm. 4 | 5 | To build: 6 | - Open a terminal 7 | - Go to folder with files & makefile 8 | - type "make" 9 | To run: 10 | - In terminal 11 | - to compress 12 | - type: huffman < notcompressed.txt > compressed.txt 13 | - to decompress 14 | - type: huffman -d < compressed.txt > decompressed.txt 15 | 16 | example: 17 | ------ 18 | $ huffman < Huckleberry.txt > Huckleberry_compressed.txt 19 | ----- 20 | 21 | -------------------------------------------------------------------------------- /huff.cpp: -------------------------------------------------------------------------------- 1 | // Patrick Sheehan 2 | 3 | #include "huff.h" 4 | #include 5 | using namespace std; 6 | 7 | void Node:: fillCodebook(string * codebook, string &code) { 8 | if(!leftC && !rightC){ 9 | codebook[data] = code; 10 | return; 11 | } 12 | if(leftC){ 13 | code += '0'; 14 | leftC->fillCodebook(codebook, code); 15 | code.erase(code.end()-1); 16 | } 17 | if(rightC){ 18 | code += '1'; 19 | rightC->fillCodebook(codebook, code); 20 | code.erase(code.end()-1); 21 | } 22 | } 23 | 24 | Node:: Node(Node * rc, Node * lc){ 25 | frequency = rc->frequency + lc->frequency; 26 | rightC = rc; 27 | leftC = lc; 28 | min = (rc->min < lc->min) ? rc->min : lc->min; 29 | } 30 | 31 | void Heap:: push(Node *newNode) { 32 | int currentHeapNode = ++heapSize; 33 | while (currentHeapNode != 1 && *minHeap[currentHeapNode / 2] > *newNode) { 34 | minHeap[currentHeapNode] = minHeap[currentHeapNode / 2]; 35 | currentHeapNode = currentHeapNode / 2; 36 | } 37 | minHeap[currentHeapNode] = newNode; 38 | } 39 | 40 | void Heap:: pop(){ 41 | Node *lastNode = minHeap[heapSize]; 42 | minHeap [heapSize--] = minHeap[1]; 43 | int currentHeapNode = 1; 44 | int child = 2; 45 | 46 | while (child <= heapSize) { 47 | if (child < heapSize && *minHeap[child] > *minHeap[child + 1]) 48 | child++; 49 | 50 | if (*minHeap[child] > *lastNode) 51 | break; 52 | 53 | minHeap[currentHeapNode] = minHeap[child]; 54 | currentHeapNode = child; 55 | child *= 2; 56 | } // while not at end of heap 57 | 58 | minHeap[currentHeapNode] = lastNode; 59 | } 60 | 61 | bool Node::operator> (const Node &rhs){ 62 | if(frequency > rhs.frequency) 63 | return true; 64 | if(frequency < rhs.frequency) 65 | return false; 66 | if(frequency == rhs.frequency) 67 | if(min > rhs.min) 68 | return true; 69 | return false; 70 | } 71 | -------------------------------------------------------------------------------- /huff.h: -------------------------------------------------------------------------------- 1 | // Patrick Sheehan 2 | 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | class Node{ 9 | unsigned char data; 10 | unsigned int frequency; 11 | unsigned char min; 12 | Node * leftC; 13 | Node * rightC; 14 | public: 15 | Node(){} 16 | Node(const Node &n){data = n.data; frequency = n.frequency; leftC = n.leftC; rightC = n.rightC;} 17 | Node(unsigned char d, unsigned int f): data(d), frequency(f), min(d){} 18 | Node(Node *, Node *); 19 | void fillCodebook(string *, string &); 20 | bool operator> (const Node &); 21 | }; 22 | 23 | class Heap{ 24 | Node **minHeap; 25 | int heapSize; 26 | public: 27 | Heap(){heapSize = 0; minHeap = new Node*[257];} // max of 255 characters 28 | void push(Node *); 29 | int size(){return heapSize;} 30 | void pop(); 31 | Node * top(){return minHeap[1];} 32 | }; 33 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | // Patrick Sheehan 2 | 3 | #include "huff.h" 4 | #include 5 | #include 6 | 7 | void compress(); 8 | void putOut(); 9 | void decompress(); 10 | Node * constructHeap(); 11 | unsigned int frequencies[256] = {0}; 12 | string codebook[256]; 13 | 14 | int main(int argc, char* argv[]) { 15 | if(argc == 2){ 16 | if((argv[1][0] == '-') && (argv[1][1] == 'd')) 17 | decompress(); 18 | } 19 | else 20 | compress(); 21 | return 0; 22 | } 23 | 24 | void compress(){ 25 | unsigned char nextChar; 26 | // first, calculate the frequencies of each character 27 | cin >> noskipws; 28 | while(cin >> nextChar) 29 | frequencies[nextChar]++; 30 | 31 | 32 | Node * root = constructHeap(); 33 | string code; 34 | root->fillCodebook(codebook,code); 35 | 36 | putOut(); 37 | } 38 | 39 | void putOut(){ 40 | cout<< "HUFFMA3" << '\0'; 41 | 42 | unsigned int i; 43 | for(i = 0; i < 256; i++){ 44 | cout<<(char) (0x000000ff & frequencies[i]); 45 | cout<<(char) ((0x0000ff00 & frequencies[i]) >> 8); 46 | cout<<(char) ((0x00ff0000 & frequencies[i]) >> 16); 47 | cout<<(char) ((0xff000000 & frequencies[i]) >> 24); 48 | } 49 | 50 | unsigned char nextChar; 51 | char nextByte = 0; 52 | int bitCounter = 0; 53 | 54 | cin.clear(); 55 | cin.seekg(0); 56 | cin >> noskipws; 57 | while(cin >> nextChar){ 58 | for(i = 0; i < codebook[nextChar].size(); i++, bitCounter++){ 59 | if(bitCounter == 8){ 60 | cout<< nextByte; 61 | nextByte = 0; 62 | bitCounter = 0; 63 | } 64 | if(codebook[nextChar][i] == '1') 65 | nextByte = nextByte | (0x01 << bitCounter); 66 | } 67 | } 68 | if(bitCounter) 69 | cout << nextByte; 70 | } 71 | 72 | void decompress(){ 73 | cin >> noskipws; 74 | char magic[8]; 75 | cin.read(magic,8); 76 | char nextByte; 77 | for(int i = 0; i < 256; i++){ 78 | cin.read((char *)&frequencies[i],4); 79 | } 80 | 81 | Node * root = constructHeap(); 82 | string code; 83 | root->fillCodebook(codebook,code); 84 | 85 | while(cin>>nextByte){ 86 | for(int i = 0; i < 8; i++){ 87 | if((nextByte >> i) & 0x01) 88 | code += '1'; 89 | else 90 | code += '0'; 91 | for(int i = 0; i < 256; i++){ 92 | if(codebook[i] == code){ 93 | if(frequencies[i]){ 94 | cout << (unsigned char) i; 95 | code.clear(); 96 | frequencies[i]--; 97 | break; 98 | } 99 | else 100 | return; 101 | } 102 | } // for 103 | } 104 | } 105 | 106 | 107 | } 108 | 109 | Node * constructHeap(){ 110 | Heap minHeap; 111 | Node *nextNode; 112 | for(int i = 0; i < 256; i++){ 113 | if (frequencies[i]){ 114 | nextNode = new Node(i, frequencies[i]); 115 | minHeap.push(nextNode); 116 | } 117 | } 118 | 119 | Node * node1; 120 | Node * node2; 121 | Node * merged; 122 | while(minHeap.size() > 1){ 123 | node1 = minHeap.top(); 124 | minHeap.pop(); 125 | node2 = minHeap.top(); 126 | minHeap.pop(); 127 | merged = new Node(node1, node2); 128 | minHeap.push(merged); 129 | } 130 | 131 | return minHeap.top(); 132 | } 133 | --------------------------------------------------------------------------------