├── .gitignore ├── LICENSE ├── README.md └── src ├── modules ├── c.c ├── c.h ├── d.c ├── d.h ├── f.c ├── f.h ├── t.c ├── t.h └── utils │ ├── errors.c │ ├── errors.h │ ├── extensions.c │ ├── extensions.h │ ├── file.c │ ├── file.h │ ├── multithread.c │ └── multithread.h └── shafa.c /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![GitHub top language](https://img.shields.io/github/languages/top/fytex/Shafa-CD?style=for-the-badge) 2 | 3 | 4 | # What is Shafa? 5 | 6 | **Shafa** is a free and open-source program made for Lossless files' compression written in C (2020/21). 7 | It is OS independent and some modules are multithread executed if running on Windows or POSIX-compliant (allowing pthreads). 8 | 9 | Uses two algorithms for compression: 10 | - **RLE** 11 | - **Shannon Fano** with blocks of length 1 (K=1). 12 | 13 | For RLE's decompression a .freq file is needed along with the .rle file. 14 | For Shannon Fano's decompression a .cod file is needed along with .shaf. 15 | (All these files are generated by the program) 16 | 17 | ## Modules: 18 | - F ( RLE compression and Frequencies calculation ) 19 | - T ( Codes calculation using Shannon-Fano's algorithm ) 20 | - C ( Shannon-Fano compression ) 21 | - D ( RLE and Shannon-Fano decompression ) 22 | 23 | #### SETUP - \*NIX 24 | ``` 25 | gcc -o shafa $(find ./src -name '*.c' -or -name '*.h') -O3 -Wno-format -pthread 26 | ``` 27 | 28 | #### SETUP - WINDOWS 29 | ``` 30 | gcc -o shafa src/*.c src/*.h src/*/*.c src/*/*.h src/*/*/*.c src/*/*/*.h -O3 -Wno-format 31 | ``` 32 | 33 | 34 | ### How to execute? 35 | Open terminal where the created executable `shafa` is located and type the following: 36 | 37 | **Windows**: 38 | - shafa.exe \ \ 39 | 40 | **\*NIX**: 41 | - ./shafa \ \ 42 | 43 | ### CLI Options: 44 | -m : Executes respective module (Can be executed more than one module if possible) 45 | -b : Blocks size for compression (default: K) 46 | -c : Forces execution (r -> RLE's compress | f -> Original file's frequencies) 47 | -d : Only executes a specific decompression (s -> Shannon-Fano's algorithm | r -> RLE's algorithm) 48 | --no-multithread : Disables multithread 49 | 50 | 51 | ### Blocks Size: 52 | - K = 640 KiB 53 | - m = 8 MiB 54 | - M = 64 MiB 55 | 56 | **Note:** Multithread was only implemented in modules C and D (the ones that cost the most) 57 | -------------------------------------------------------------------------------- /src/modules/c.c: -------------------------------------------------------------------------------- 1 | /************************************************ 2 | * 3 | * Author(s): Pedro Tavares, Tiago Costa 4 | * Created Date: 7 Dec 2020 5 | * Updated Date: 29 Dec 2020 6 | * 7 | ***********************************************/ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "utils/errors.h" 15 | #include "utils/extensions.h" 16 | #include "utils/multithread.h" 17 | 18 | #define MAX_CODE_INT 32 19 | #define NUM_SYMBOLS 256 20 | #define NUM_OFFSETS 8 21 | 22 | /** 23 | Struct with the symbol code, next and index which represent each row of the table for compression 24 | */ 25 | typedef struct { 26 | int index; 27 | int next; 28 | uint8_t code[MAX_CODE_INT + 1]; 29 | } CodesIndex; 30 | 31 | /** 32 | Struct containing parameters passed to the functions which run in multithread 33 | */ 34 | typedef struct { 35 | unsigned long block_size; 36 | FILE * fd_shafa; 37 | char * block_codes; 38 | uint8_t * block_input; 39 | uint8_t * block_output; 40 | unsigned long * new_block_size; 41 | } Arguments; 42 | 43 | 44 | /** 45 | \brief Aplies algorithm to make the symbols' codification 46 | @param table Table of codes 47 | @param block_input Block with original file's bytes 48 | @param block_size Block size 49 | @param new_block_size Block size after codification 50 | @returns Allocated string of compressed binary 51 | */ 52 | static uint8_t * binary_coding(CodesIndex * const table, const uint8_t * restrict block_input, const unsigned long block_size, unsigned long * const new_block_size) 53 | { 54 | CodesIndex * symbol; 55 | int next = 0, num_bytes_code; 56 | uint8_t * code, * output; 57 | 58 | uint8_t * const block_output = calloc(block_size * 1.05, sizeof(uint8_t)); // Uncompressed Block Size + 5% which is a big margin for the new compressed block size 59 | 60 | if (!block_output) 61 | return NULL; 62 | 63 | output = block_output; 64 | 65 | for (unsigned long idx = 0; idx < block_size; ++idx) { 66 | symbol = &table[next + *block_input++]; 67 | 68 | num_bytes_code = symbol->index; 69 | code = symbol->code; 70 | 71 | for (int i = 0; i < num_bytes_code; ++i) 72 | *output++ |= *code++; 73 | 74 | // Even if the compiler won't store it in a registry it will be cached 75 | *output |= *code; 76 | 77 | next = symbol->next; 78 | } 79 | 80 | *new_block_size = output - block_output + (next ? 1 : 0); 81 | 82 | return block_output; 83 | } 84 | 85 | 86 | /** 87 | \brief Generates table of codes 88 | @param _args Pointer to a structure with all arguments needed to this function 89 | @returns Error status 90 | */ 91 | static _modules_error compress_to_buffer(void * const _args) 92 | { 93 | Arguments * args = (Arguments *) _args; 94 | const char * block_codes = args->block_codes; 95 | const uint8_t * block_input = args->block_input; 96 | const unsigned long block_size = args->block_size; 97 | unsigned long * const new_block_size = args->new_block_size; 98 | 99 | CodesIndex header_symbol_row, *symbol_row; 100 | char cur_char, next_char; 101 | int bit_idx, code_idx; 102 | uint8_t byte, next_byte_prefix = 0, mask; 103 | 104 | CodesIndex (* table)[NUM_SYMBOLS] = calloc(1, sizeof(CodesIndex[NUM_OFFSETS][NUM_SYMBOLS])); 105 | 106 | if (!table) 107 | return _LACK_OF_MEMORY; 108 | 109 | /* 110 | / 111 | / Table's header initialization 112 | / 113 | */ 114 | 115 | cur_char = *block_codes++; 116 | next_char = *block_codes++; 117 | for (int syb_idx = 0; syb_idx < NUM_SYMBOLS; ++syb_idx) { 118 | 119 | code_idx = 0; 120 | 121 | for ( ; cur_char != ';' && cur_char != '\0' && code_idx < MAX_CODE_INT; ++code_idx) { 122 | byte = 0; 123 | for (bit_idx = 0; bit_idx < 8; ++bit_idx) { 124 | 125 | if (cur_char == '1') 126 | ++byte; 127 | else if (cur_char != '0') { 128 | free(args->block_codes); 129 | free(table); 130 | return _FILE_UNRECOGNIZABLE; 131 | } 132 | 133 | if (bit_idx < 7) { 134 | if (next_char == ';') { 135 | byte <<= 7 - bit_idx; 136 | cur_char = next_char; 137 | next_char = *block_codes++; 138 | break; 139 | } 140 | else if (next_char == '\0') { 141 | byte <<= 7 - bit_idx; 142 | cur_char = next_char; 143 | break; 144 | } 145 | else 146 | byte <<= 1; 147 | } 148 | 149 | cur_char = next_char; 150 | next_char = *block_codes++; 151 | 152 | } 153 | table[0][syb_idx].code[code_idx] = byte; 154 | } 155 | 156 | if (code_idx > 0) { 157 | table[0][syb_idx].next = (bit_idx != 8) ? (bit_idx + 1) * NUM_SYMBOLS : 0; 158 | table[0][syb_idx].index = code_idx - (bit_idx < 8 ? 1 : 0); 159 | } 160 | 161 | if (cur_char != '\0') { 162 | cur_char = next_char; 163 | next_char = *block_codes++; 164 | } 165 | else if (syb_idx < NUM_SYMBOLS - 1) { // if end of codes' block but still hasn't iterated over 256 symbols 166 | free(args->block_codes); 167 | free(table); 168 | return _FILE_UNRECOGNIZABLE; 169 | } 170 | 171 | } 172 | free(args->block_codes); 173 | 174 | if (cur_char != '\0') { // Check whether file is actually correct (Not required but it is an assert) 175 | free(table); 176 | return _FILE_UNRECOGNIZABLE; 177 | } 178 | 179 | 180 | /* 181 | / 182 | / Table's body initialization 183 | / 184 | */ 185 | 186 | for (int idx = 0, new_index; idx < NUM_SYMBOLS; ++idx) { 187 | 188 | header_symbol_row = table[0][idx]; 189 | 190 | if (header_symbol_row.next || header_symbol_row.index) { 191 | 192 | for (int offset = 1, bit_offset; offset < NUM_OFFSETS; ++offset) { 193 | 194 | symbol_row = &table[offset][idx]; 195 | 196 | bit_offset = (header_symbol_row.next / NUM_SYMBOLS) + offset; 197 | new_index = header_symbol_row.index + (bit_offset < 8 ? 0: 1); 198 | symbol_row->index = new_index; 199 | symbol_row->next = ((bit_offset < 8) ? bit_offset : bit_offset - 8) * NUM_SYMBOLS; 200 | 201 | next_byte_prefix = 0; 202 | for (code_idx = 0; code_idx < new_index; ++code_idx) { 203 | 204 | byte = header_symbol_row.code[code_idx]; 205 | symbol_row->code[code_idx] = (byte >> offset) | (next_byte_prefix << (8 - offset)); 206 | 207 | mask = (1 << offset) - 1; 208 | next_byte_prefix = byte & mask; 209 | 210 | } 211 | 212 | // This will only execute if next != 0 (which is the same as bit_offset != 8) Because this iteration is unnecessary 213 | if (bit_offset != 8) { 214 | byte = header_symbol_row.code[new_index]; 215 | symbol_row->code[new_index] = (byte >> offset) | (next_byte_prefix << (8 - offset)); 216 | } 217 | } 218 | } 219 | } 220 | 221 | 222 | /* 223 | / 224 | / Write compressed code to buffer 225 | / 226 | */ 227 | 228 | 229 | args->block_output = binary_coding((CodesIndex *) table, block_input, block_size, new_block_size); 230 | 231 | free(table); 232 | 233 | if (!args->block_output) 234 | return _LACK_OF_MEMORY; 235 | 236 | return _SUCCESS; 237 | } 238 | 239 | 240 | /** 241 | \brief Writes codification to file 242 | @param _args Pointer to a structure with all arguments needed to this function 243 | @param prev_error Error before calling this functione 244 | @param error Error after calling this function 245 | @returns Error status 246 | */ 247 | static _modules_error write_shafa(void * const _args, _modules_error prev_error, _modules_error error) 248 | { 249 | Arguments * args = (Arguments *) _args; 250 | FILE * const fd_shafa = args->fd_shafa; 251 | uint8_t * const block_output = args->block_output; 252 | const unsigned long new_block_size = *args->new_block_size; 253 | 254 | if (!error) { 255 | if (!prev_error) { 256 | if (fprintf(fd_shafa, "@%lu@", new_block_size) >= 2) { 257 | 258 | if (fwrite(block_output, sizeof(uint8_t), new_block_size, fd_shafa) != new_block_size) 259 | error = _FILE_STREAM_FAILED; 260 | } 261 | else 262 | error = _FILE_STREAM_FAILED; 263 | 264 | } 265 | 266 | free(block_output); 267 | } 268 | 269 | free(_args); 270 | return error; 271 | } 272 | 273 | 274 | /** 275 | \brief Prints the results of the program execution 276 | @param num_blocks Number of blocks analysed 277 | @param blocks_input_size Block sizes previous to oodification 278 | @param blocks_output_size Block sizes after the codification 279 | @param total_time Time that the program took to execute 280 | @param path The path to the generated file 281 | */ 282 | static inline void print_summary(const unsigned long long num_blocks, const unsigned long * const blocks_input_size, const unsigned long * const blocks_output_size, const double total_time, const char * const path) 283 | { 284 | unsigned long block_input_size, block_output_size; 285 | 286 | printf( 287 | "Pedro Tavares, a93227, MIEI/CD, 1-JAN-2021\n" 288 | "Tiago Costa, a93322, MIEI/CD, 1-JAN-2021\n" 289 | "Module: C (Symbol codes' codification)\n" 290 | "Number of blocks: %lu\n", num_blocks 291 | ); 292 | for (unsigned long long i = 0; i < num_blocks; ++i) { 293 | block_input_size = blocks_input_size[i]; 294 | block_output_size = blocks_output_size[i]; 295 | printf("Size before/after & compression rate (Block %lu): %lu/%lu -> %d%%\n", i, block_input_size, block_output_size, (int) (((float) block_output_size / block_input_size) * 100)); 296 | } 297 | 298 | printf( 299 | "Module runtime (milliseconds): %f\n" 300 | "Generated file %s\n", 301 | total_time, path 302 | ); 303 | } 304 | 305 | 306 | _modules_error shafa_compress(char ** const path) 307 | { 308 | FILE * fd_file, * fd_codes, * fd_shafa; 309 | Arguments * args; 310 | float total_time; 311 | char * path_file = *path; 312 | char * path_codes; 313 | char * path_shafa; 314 | char * block_codes; 315 | unsigned long long num_blocks; 316 | unsigned long block_size; 317 | int error = _SUCCESS; 318 | uint8_t * block_input; 319 | unsigned long * blocks_size = NULL, * blocks_input_size, * blocks_output_size; 320 | 321 | clock_main_thread(START_CLOCK); 322 | 323 | // Create Codes's path string and Open Codes's handle 324 | 325 | path_codes = add_ext(path_file, CODES_EXT); 326 | 327 | if (path_codes) { 328 | 329 | fd_codes = fopen(path_codes, "rb"); 330 | 331 | if (fd_codes) { 332 | 333 | if (fscanf(fd_codes, "@%*c@%lu", &num_blocks) == 1) { 334 | 335 | // Open File's handle 336 | fd_file = fopen(path_file, "rb"); 337 | 338 | if (fd_file) { 339 | 340 | 341 | // Create Shafa's path string and Open Shafa's handle 342 | 343 | path_shafa = add_ext(path_file, SHAFA_EXT); 344 | 345 | if (path_shafa) { 346 | 347 | fd_shafa = fopen(path_shafa, "wb"); 348 | 349 | if (fd_shafa) { 350 | 351 | if (fprintf(fd_shafa, "@%lu", num_blocks) >= 2) { 352 | 353 | blocks_size = malloc(2 * num_blocks * sizeof(unsigned long)); 354 | 355 | if (blocks_size) { 356 | 357 | blocks_input_size = blocks_size; 358 | blocks_output_size = blocks_input_size + num_blocks; // Acts as a "virtual" array 359 | 360 | for (unsigned long long thread_idx = 0; thread_idx < num_blocks; ++thread_idx) { 361 | 362 | block_codes = malloc((33151 + 1 + 1) * sizeof(char)); //sum 1 to 256 (worst case shannon fano) + 255 semicolons + 1 byte NULL + 1 algorithm efficiency (exchange 2 * 256 + 2 compares for +1 byte in heap and +1 memory access) 363 | 364 | if (!block_codes) { 365 | error = _LACK_OF_MEMORY; 366 | break; 367 | } 368 | 369 | if (fscanf(fd_codes,"@%lu@%33151[^@]", &block_size, block_codes) != 2) { 370 | free(block_codes); 371 | error = _FILE_STREAM_FAILED; 372 | break; 373 | } 374 | 375 | args = malloc(sizeof(Arguments)); 376 | 377 | if (!args) { 378 | free(block_codes); 379 | error = _LACK_OF_MEMORY; 380 | break; 381 | } 382 | 383 | block_input = malloc(block_size * sizeof(uint8_t)); 384 | 385 | if (!block_input) { 386 | free(block_codes); 387 | free(args); 388 | error = _LACK_OF_MEMORY; 389 | break; 390 | } 391 | 392 | if (fread(block_input, sizeof(uint8_t), block_size, fd_file) != block_size) { 393 | free(block_codes); 394 | free(block_input); 395 | free(args); 396 | error = _FILE_STREAM_FAILED; 397 | break; 398 | } 399 | 400 | *args = (Arguments) { 401 | .block_size = block_size, 402 | .fd_shafa = fd_shafa, 403 | .block_codes = block_codes, 404 | .block_input = block_input, 405 | .block_output = NULL, 406 | .new_block_size = &blocks_output_size[thread_idx] 407 | }; 408 | 409 | blocks_input_size[thread_idx] = block_size; 410 | 411 | error = multithread_create(compress_to_buffer, write_shafa, args); 412 | 413 | if (error) { 414 | free(block_codes); 415 | free(block_input); 416 | free(args); 417 | break; 418 | } 419 | 420 | } 421 | multithread_wait(); 422 | } 423 | else 424 | error = _LACK_OF_MEMORY; 425 | } 426 | else 427 | error = _FILE_STREAM_FAILED; 428 | 429 | 430 | fclose(fd_shafa); 431 | } 432 | else 433 | error = _FILE_INACCESSIBLE; 434 | 435 | if (error) 436 | free(path_shafa); 437 | } 438 | else 439 | error = _LACK_OF_MEMORY; 440 | 441 | fclose(fd_file); 442 | } 443 | else 444 | error = _FILE_INACCESSIBLE; 445 | } 446 | else 447 | error = _FILE_UNRECOGNIZABLE; 448 | 449 | fclose(fd_codes); 450 | } 451 | else 452 | error = _FILE_INACCESSIBLE; 453 | 454 | free(path_codes); 455 | } 456 | else 457 | error = _LACK_OF_MEMORY; 458 | 459 | if (!error) { 460 | *path = path_shafa; 461 | free(path_file); 462 | 463 | total_time = clock_main_thread(STOP_CLOCK); 464 | 465 | print_summary(num_blocks, blocks_input_size, blocks_output_size, total_time, path_shafa); 466 | } 467 | 468 | if (blocks_size) 469 | free(blocks_size); 470 | 471 | return error; 472 | } 473 | -------------------------------------------------------------------------------- /src/modules/c.h: -------------------------------------------------------------------------------- 1 | #ifndef MODULE_C_H 2 | #define MODULE_C_H 3 | 4 | #include "utils/errors.h" 5 | 6 | /** 7 | \brief Compresses file with Shannon Fano's algorithm and saves it to disk 8 | @param path Pointer to the original/RLE file's path 9 | @returns Error status 10 | */ 11 | _modules_error shafa_compress(char ** path); 12 | 13 | #endif //MODULE_C_H -------------------------------------------------------------------------------- /src/modules/d.c: -------------------------------------------------------------------------------- 1 | /*************************************************** 2 | * 3 | * Author(s): Alexandre Martins, Beatriz Rodrigues 4 | * Created Date: 3 Dec 2020 5 | * Updated Date: 3 Jan 2020 6 | * 7 | **************************************************/ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | #include "utils/file.h" 17 | #include "utils/errors.h" 18 | #include "utils/extensions.h" 19 | #include "utils/multithread.h" 20 | 21 | #define NUM_SYMBOLS 256 22 | 23 | /** 24 | Enumeration of all possible types of decoding 25 | */ 26 | typedef enum 27 | { 28 | _RLE, 29 | _SHAFA, 30 | _SHAFA_RLE, 31 | 32 | } Algorithm; 33 | 34 | 35 | /** 36 | \brief Prints the results of the program execution 37 | @param time Time that the program took to execute 38 | @param decomp_sizes Block sizes previous to the decompression 39 | @param new_sizes Block sizes after the decompression 40 | @param length Number of blocks 41 | @param new_path The path to the generated file 42 | @param algo The type of decoding that occured 43 | */ 44 | static inline void print_summary (double time, unsigned long * decomp_sizes, unsigned long * new_sizes, unsigned long long length, char* new_path, Algorithm algo) 45 | { 46 | printf( 47 | "Alexandre Martins, a93242, MIEI/CD, 1-JAN-2021\n" 48 | "Beatriz Rodrigues, a93230, MIEI/CD, 1-JAN-2021\n" 49 | ); 50 | 51 | if (algo == _RLE) 52 | printf("Module: D (RLE decoding)\n"); 53 | else if (algo == _SHAFA) 54 | printf("Module: D (SHAFA decoding)\n"); 55 | else 56 | printf("Module: D (SHAFA & RLE decoding)\n"); 57 | 58 | for (unsigned long long i = 0; i < length; ++i) 59 | printf("Size before/after generating file (block %lu): %lu/%lu\n", i + 1, decomp_sizes[i], new_sizes[i]); 60 | printf( 61 | "Module runtime (in milliseconds): %f\n" 62 | "Generated file %s\n", 63 | time, new_path 64 | ); 65 | } 66 | 67 | 68 | /** 69 | \brief Loads the rle file to a buffer and saves it 70 | @param f_rle Pointer to the RLE file 71 | @param block_size Size of the block to be loaded 72 | @param buffer Address to a string to load a block of RLE content 73 | @returns Error status 74 | */ 75 | static _modules_error load_rle (FILE* f_rle, unsigned long block_size, uint8_t ** buffer) 76 | { 77 | _modules_error error = _SUCCESS; 78 | 79 | // Memory allocation for the buffer that will contain the contents of one block of symbols 80 | *buffer = malloc(block_size); 81 | if (*buffer) { 82 | // The function fread loads the said contents into the buffer 83 | // For the correct execution, the amount read by fread has to match the amount that was supposed to be read: block_size 84 | if (fread(*buffer, sizeof(uint8_t), block_size, f_rle) != block_size) { 85 | error = _FILE_STREAM_FAILED; 86 | *buffer = NULL; 87 | } 88 | 89 | } 90 | else 91 | error = _LACK_OF_MEMORY; 92 | 93 | 94 | return error; 95 | } 96 | 97 | /** 98 | Struct containing all arguments passed to the multithreaded RLE decompression 99 | */ 100 | typedef struct { 101 | 102 | FILE * f_rle; 103 | FILE * f_wrt; 104 | unsigned long rle_block_size; 105 | unsigned long * final_sizes; 106 | uint8_t * buffer; 107 | uint8_t * sequence; 108 | 109 | } ArgumentsRLE; 110 | 111 | /** 112 | \brief Decompresses a RLE block 113 | @param args Arguments necessary to the function 114 | @returns Error status 115 | */ 116 | static _modules_error rle_block_decompressor (void * _args) 117 | { 118 | _modules_error error = _SUCCESS; 119 | ArgumentsRLE * args = (ArgumentsRLE *) _args; 120 | unsigned long block_size = args->rle_block_size; 121 | unsigned long * final_sizes = args->final_sizes; 122 | uint8_t * buffer = args->buffer; 123 | uint8_t * sequence; 124 | unsigned long orig_size, l; 125 | char simb; 126 | uint8_t n_reps; 127 | 128 | // Assumption of the smallest size possible for the decompressed file 129 | if (block_size <= _64KiB) 130 | orig_size = _64KiB + _1KiB; 131 | else if (block_size <= _640KiB) 132 | orig_size = _640KiB + _1KiB; 133 | else if (block_size <= _8MiB) 134 | orig_size = _8MiB + _1KiB; 135 | else 136 | orig_size = _64MiB + _1KiB; 137 | 138 | // Allocation of the corresponding memory 139 | sequence = malloc(orig_size); 140 | if (sequence) { 141 | 142 | // Loop to decompress block by block 143 | l = 0; // Variable to be used to go through the sequence string 144 | for (unsigned long i = 0; i < block_size; ++i) { 145 | 146 | simb = buffer[i]; 147 | n_reps = 0; 148 | // Case of RLE pattern {0}char{n_rep} 149 | if (!simb) { 150 | simb = buffer[++i]; 151 | n_reps = buffer[++i]; 152 | } 153 | // Re-allocation of memory in the string 154 | if (l + n_reps > orig_size) { 155 | switch (orig_size) { 156 | case _64KiB + _1KiB: 157 | orig_size = _640KiB + _1KiB; 158 | break; 159 | case _640KiB + _1KiB: 160 | orig_size = _8MiB + _1KiB; 161 | break; 162 | case _8MiB + _1KiB: 163 | orig_size = _64MiB + _1KiB; 164 | break; 165 | default: // Invalid size 166 | error = _FILE_UNRECOGNIZABLE; 167 | free(sequence); 168 | return error; 169 | } 170 | sequence = realloc(sequence, orig_size); 171 | // In case of realloc failure, we free the previous memory allocation 172 | if (!sequence) { 173 | error = _LACK_OF_MEMORY; 174 | free(sequence); // Frees the previously allocated memory 175 | break; 176 | } 177 | 178 | } 179 | if (n_reps) { 180 | memset(sequence + l, simb, n_reps); 181 | l += n_reps; 182 | } 183 | else 184 | sequence[l++] = simb; 185 | 186 | } 187 | *final_sizes = l; 188 | 189 | args->sequence = sequence; 190 | } 191 | else 192 | error = _LACK_OF_MEMORY; 193 | 194 | free(buffer); 195 | 196 | return error; 197 | } 198 | 199 | /** 200 | \brief Writes the contents resulting of the decompression of the RLE file 201 | @param _args Arguments to the function 202 | @param prev_error Error status in previous thread 203 | @param error Error status from process 204 | @returns Error status 205 | */ 206 | static _modules_error write_decompressed_rle (void * const _args, _modules_error prev_error, _modules_error error) 207 | { 208 | ArgumentsRLE * args = (ArgumentsRLE *) _args; 209 | FILE * const f_wrt = args->f_wrt; 210 | const unsigned long new_block_size = *args->final_sizes; 211 | uint8_t * const sequence = args->sequence; 212 | 213 | if (!error) { 214 | 215 | if (!prev_error) { 216 | 217 | // Writing the decompressed block in ORIGINAL file 218 | if (fwrite(sequence, sizeof(uint8_t), new_block_size, f_wrt) != new_block_size) 219 | error = _FILE_STREAM_FAILED; 220 | 221 | } 222 | 223 | free(sequence); 224 | } 225 | 226 | free(_args); 227 | 228 | return error; 229 | } 230 | 231 | 232 | _modules_error rle_decompress (char ** path) 233 | { 234 | _modules_error error = _SUCCESS; 235 | FILE *f_rle, *f_freq, *f_wrt; 236 | char *path_freq, *path_wrt, *path_rle; 237 | uint8_t * buffer; 238 | char mode; 239 | unsigned long *rle_sizes, *final_sizes; 240 | unsigned long long length; 241 | float total_time; 242 | ArgumentsRLE * args; 243 | 244 | clock_main_thread(START_CLOCK); 245 | 246 | path_rle = *path; 247 | // Opens RLE file 248 | f_rle = fopen(path_rle, "rb"); 249 | if (f_rle) { 250 | 251 | // Creates path to the original file 252 | path_wrt = rm_ext(path_rle); 253 | if (path_wrt) { 254 | 255 | // Opens file to write the original contents in 256 | f_wrt = fopen(path_wrt, "wb"); 257 | if (f_wrt) { 258 | 259 | // Creates path to the FREQ file 260 | path_freq = add_ext(path_rle, FREQ_EXT); 261 | if (path_freq) { 262 | 263 | // Opens FREQ file 264 | f_freq = fopen(path_freq, "rb"); 265 | if (f_freq) { 266 | 267 | // Reads the header of the FREQ file 268 | if (fscanf(f_freq, "@%c@%lu", &mode, &length) == 2) { 269 | 270 | if (mode == 'R') { 271 | 272 | // Allocates memory for an array to contain the sizes of all the blocks of the RLE file 273 | rle_sizes = malloc(sizeof(unsigned long) * length); 274 | if (rle_sizes) { 275 | 276 | // Loads the sizes to the array 277 | for (unsigned long long i = 0; i < length && !error; ++i) { 278 | if (fscanf(f_freq, "@%lu@%*[^@]", rle_sizes + i) != 1) 279 | error = _FILE_STREAM_FAILED; 280 | 281 | } 282 | 283 | if (error) 284 | free(rle_sizes); 285 | 286 | } 287 | else 288 | error = _LACK_OF_MEMORY; 289 | 290 | } 291 | else 292 | error = _FILE_UNRECOGNIZABLE; 293 | 294 | } 295 | else 296 | error = _FILE_STREAM_FAILED; 297 | 298 | 299 | fclose(f_freq); 300 | 301 | } 302 | else 303 | error = _FILE_INACCESSIBLE; 304 | 305 | free(path_freq); 306 | 307 | } 308 | else 309 | error = _LACK_OF_MEMORY; 310 | 311 | // If there wasn't an error in any of the previous processes, the decompression continues 312 | if (!error) { 313 | 314 | // Allocates memory for an array that will contain the final size of the blocks after decompression 315 | final_sizes = malloc(sizeof(unsigned long) * length); 316 | if (final_sizes) { 317 | 318 | // Loop to execute block by block 319 | for (unsigned long long thread_idx = 0; thread_idx < length; ++thread_idx) { 320 | 321 | // Loading rle block 322 | error = load_rle(f_rle, rle_sizes[thread_idx], &buffer); 323 | if (error) break; 324 | 325 | args = malloc(sizeof(ArgumentsRLE)); 326 | 327 | if (!args) { 328 | free(buffer); 329 | break; 330 | } 331 | 332 | *args = (ArgumentsRLE) { 333 | .rle_block_size = rle_sizes[thread_idx], 334 | .buffer = buffer, 335 | .f_rle = f_rle, 336 | .f_wrt = f_wrt, 337 | .final_sizes = &final_sizes[thread_idx] 338 | 339 | }; 340 | 341 | // Decompressing the RLE block and loading the final size of the blocks after decompression to the array 342 | error = multithread_create(rle_block_decompressor, write_decompressed_rle, args); 343 | 344 | if (error) { 345 | free(args); 346 | free(buffer); 347 | break; 348 | } 349 | 350 | } 351 | 352 | multithread_wait(); 353 | 354 | 355 | if (error) 356 | free(final_sizes); 357 | } 358 | else 359 | error = _LACK_OF_MEMORY; 360 | } 361 | fclose(f_wrt); 362 | 363 | } 364 | else 365 | error = _FILE_INACCESSIBLE; 366 | 367 | } 368 | else 369 | error = _LACK_OF_MEMORY; 370 | 371 | fclose(f_rle); 372 | } 373 | else 374 | error = _FILE_INACCESSIBLE; 375 | 376 | if (!error) { 377 | 378 | free(path_rle); 379 | *path = path_wrt; 380 | total_time = clock_main_thread(STOP_CLOCK); 381 | print_summary(total_time, rle_sizes, final_sizes, length, *path, _RLE); 382 | free(rle_sizes); 383 | free(final_sizes); 384 | 385 | } 386 | 387 | return error; 388 | } 389 | 390 | 391 | /** 392 | \brief Binary tree that will contain all of the symbols codes needed to the descompressed file 393 | */ 394 | typedef struct btree{ 395 | char symbol; 396 | struct btree *left,*right; 397 | } *BTree; 398 | 399 | /** 400 | \brief Frees all the memory used by a BTree 401 | @param tree Binary tree 402 | */ 403 | static void free_tree(BTree tree) 404 | { 405 | if (tree) { 406 | free_tree(tree->right); 407 | free_tree(tree->left); 408 | free(tree); 409 | } 410 | 411 | } 412 | 413 | /** 414 | \brief Adds a given symbol to the BTree 415 | @param decoder Tree with saved symbols to help in decoding 416 | @param code String with the codes from COD file 417 | @param start Beggining of the code to be added 418 | @param end Ending of the code to be added 419 | @param symbol Symbol to be saved in the tree 420 | @returns Error status 421 | */ 422 | static _modules_error add_tree(BTree* decoder, char *code, int start, int end, char symbol) 423 | { 424 | // Creation of the path to the symbol we are placing 425 | for (int i = start; i < end; ++i) { 426 | 427 | if (*decoder && code[i] == '0') decoder = &(*decoder)->left; 428 | else if (*decoder && code[i] == '1') decoder = &(*decoder)->right; 429 | else { 430 | *decoder = malloc(sizeof(struct btree)); 431 | if (!(*decoder)) return _LACK_OF_MEMORY; 432 | (*decoder)->left = (*decoder)->right = NULL; 433 | if (code[i] == '0') decoder = &(*decoder)->left; 434 | else decoder = &(*decoder)->right; 435 | } 436 | } 437 | // Adding the symbol to the corresponding leaf of the tree 438 | *decoder = malloc(sizeof(struct btree)); 439 | (*decoder)->symbol = symbol; 440 | (*decoder)->left = (*decoder)->right = NULL; 441 | return _SUCCESS; 442 | } 443 | 444 | /* 445 | Struct containing all arguments passed to the multithreaded SHAFA decompression 446 | */ 447 | typedef struct { 448 | 449 | FILE * f_wrt; 450 | char * cod_code; 451 | unsigned long * rle_sizes; 452 | unsigned long * final_sizes; 453 | uint8_t * rle_decompressed; 454 | uint8_t * shafa_decompressed; 455 | uint8_t * shafa_code; 456 | bool rle_decompression; 457 | 458 | } ArgumentsSHAFA; 459 | 460 | /** 461 | \brief Generates a binary tree that contains the symbols acording to the codes 462 | @param code String with a block of the COD file 463 | @param decoder Pointer to the binary tree 464 | @returns Error status 465 | */ 466 | static _modules_error create_tree (char * code, BTree * decoder) 467 | { 468 | _modules_error error; 469 | int j, start, end; 470 | 471 | error = _SUCCESS; 472 | // Initialize root without meaning 473 | *decoder = malloc(sizeof(struct btree)); 474 | 475 | if (*decoder) { 476 | 477 | (*decoder)->left = (*decoder)->right = NULL; 478 | 479 | for (int symb = 0, l = 0; code[l] && !error;) { 480 | 481 | // When it finds a ';' it is no longer on the same symbol. This updates it. 482 | while (code[l] == ';') { 483 | symb++; 484 | l++; 485 | } 486 | 487 | start = l; 488 | for ( ; code[l] && (code[l] != ';'); ++l); 489 | end = l; 490 | 491 | if (start != end) 492 | // Adds the code to the tree 493 | error = add_tree(decoder, code, start, end, symb); 494 | 495 | } 496 | 497 | free(code); 498 | } 499 | else 500 | error = _LACK_OF_MEMORY; 501 | 502 | 503 | return error; 504 | } 505 | 506 | /** 507 | \brief Decompresses a block of shafa code 508 | @param shafa Content of the file to be descompressed 509 | @param block_size Block size 510 | @param decoder Binary tree with the symbols 511 | @param decomp Address to load a string with the decompressed contents 512 | @returns Error status 513 | */ 514 | static _modules_error shafa_block_decompressor (uint8_t * shafa, unsigned long block_size, BTree decoder, uint8_t ** decomp) 515 | { 516 | BTree root; 517 | uint8_t mask; 518 | unsigned long i, l; 519 | int bit; 520 | 521 | // String for the decompressed contents 522 | *decomp = malloc(block_size); 523 | if (!(*decomp)) return _LACK_OF_MEMORY; 524 | 525 | root = decoder; // Saving the root for multiple crossings in the tree 526 | mask = 128; // 1000 0000 527 | i = l = 0; 528 | 529 | // Loop to check every byte, bit by bit (it's used the final size to control the cycle to avoid padding excess) 530 | while (l < block_size) { 531 | 532 | bit = mask & shafa[i]; 533 | if (!bit) decoder = decoder->left; // bit = 0 534 | else decoder = decoder->right; 535 | // Finds a leaf of the tree 536 | if (decoder && !(decoder->left) && !(decoder->right)) { 537 | (*decomp)[l++] = decoder->symbol; 538 | decoder = root; 539 | } 540 | mask >>= 1; // 1000 0000 >> 0100 0000 >> ... >> 0000 0001 >> 0000 0000 541 | 542 | // When mask = 0 it's time to move to the next byte 543 | if (!mask) { 544 | ++i; 545 | mask = 128; 546 | } 547 | 548 | } 549 | 550 | return _SUCCESS; 551 | } 552 | 553 | /** Does the process of the main function: includes the creation of a binary tree, the shafa block decompression and, if needed, the rle block decompression 554 | \brief 555 | @param _args Arguments of the function 556 | @returns Error status 557 | */ 558 | static _modules_error process_shafa_decomp (void * _args) { 559 | 560 | _modules_error error; 561 | ArgumentsSHAFA * args_shafa = (ArgumentsSHAFA *) _args; 562 | BTree decoder; 563 | ArgumentsRLE args_rle; 564 | 565 | error = create_tree(args_shafa->cod_code, &decoder); 566 | 567 | if (!error) { 568 | 569 | error = shafa_block_decompressor(args_shafa->shafa_code, *args_shafa->rle_sizes, decoder, &args_shafa->shafa_decompressed); 570 | 571 | free(args_shafa->shafa_code); 572 | free_tree(decoder); 573 | 574 | if (!error && args_shafa->rle_decompression) { 575 | 576 | args_rle = (ArgumentsRLE) { 577 | .buffer = args_shafa->shafa_decompressed, 578 | .rle_block_size = *args_shafa->rle_sizes, 579 | .final_sizes = args_shafa->final_sizes 580 | }; 581 | 582 | error = rle_block_decompressor(&args_rle); 583 | if (!error) { 584 | args_shafa->rle_decompressed = args_rle.sequence; 585 | } 586 | } 587 | } 588 | 589 | return error; 590 | } 591 | 592 | /** 593 | \brief Writes the decompressed shafa in the destined file 594 | @param _args Arguments of the function 595 | @param prev_error Previous thread error status 596 | @param error Process error status 597 | @returns Error status 598 | */ 599 | static _modules_error write_decompressed_shafa (void * _args, _modules_error prev_error, _modules_error error) { 600 | 601 | ArgumentsSHAFA * args_shafa = (ArgumentsSHAFA *) _args; 602 | unsigned long size_wrt; 603 | uint8_t * decomp; 604 | FILE * f_wrt = args_shafa->f_wrt; 605 | bool rle_decompression = args_shafa->rle_decompression; 606 | 607 | if (!error) { 608 | 609 | if (!prev_error) { 610 | 611 | size_wrt = (rle_decompression) ? (*args_shafa->final_sizes) : (*args_shafa->rle_sizes); 612 | decomp = (rle_decompression) ? (args_shafa->rle_decompressed) : (args_shafa->shafa_decompressed); 613 | if (fwrite(decomp, sizeof(uint8_t), size_wrt, f_wrt) != size_wrt) 614 | error = _FILE_STREAM_FAILED; 615 | 616 | } 617 | 618 | if (rle_decompression) 619 | free(args_shafa->rle_decompressed); 620 | } 621 | 622 | free(_args); 623 | 624 | return error; 625 | } 626 | 627 | 628 | _modules_error shafa_decompress (char ** const path, bool rle_decompression) 629 | { 630 | _modules_error error; 631 | FILE *f_shafa, *f_cod, *f_wrt; 632 | char *path_cod, *path_wrt, *path_shafa, *path_tmp; 633 | uint8_t * shafa_code; 634 | char * cod_code; 635 | char mode; 636 | float total_time; 637 | unsigned long long length; 638 | unsigned long *sizes, *sf_sizes, *final_sizes; 639 | unsigned long sf_bsize; 640 | ArgumentsSHAFA * args; 641 | 642 | sizes = sf_sizes = final_sizes = NULL; 643 | path_shafa = *path; 644 | error = _SUCCESS; 645 | clock_main_thread(START_CLOCK); 646 | 647 | // Opening the shafa file 648 | f_shafa = fopen(path_shafa, "rb"); 649 | if (f_shafa) { 650 | 651 | // Creates path to the .cod file 652 | path_tmp = rm_ext(path_shafa); 653 | if (path_tmp) { // free this somehow 654 | 655 | if (rle_decompression) { 656 | path_wrt = rm_ext(path_tmp); 657 | if (!path_wrt) 658 | error = _LACK_OF_MEMORY; 659 | } 660 | else 661 | path_wrt = path_tmp; 662 | 663 | f_wrt = fopen(path_wrt, "wb"); 664 | if (f_wrt) { 665 | 666 | path_cod = add_ext(path_tmp, CODES_EXT); 667 | if (path_cod) { 668 | 669 | f_cod = fopen(path_cod, "rb"); 670 | if (f_cod) { 671 | 672 | // Reading header of shafa file 673 | if (fscanf(f_shafa, "@%lu", &length) == 1) { 674 | 675 | // Reading header of cod file 676 | if (fscanf(f_cod, "@%c@%lu", &mode, &length) == 2) { 677 | // Checking the mode of the file 678 | if ((mode == 'N' && !rle_decompression) || (mode == 'R')) { 679 | 680 | // Allocates memory to an array with the purpose of saving the size of each SHAF block 681 | sf_sizes = malloc(sizeof(unsigned long) * length); 682 | if (sf_sizes) { 683 | 684 | // Allocates memory to an array with the purpose of saving the sizes of each RLE/ORIGINAL block 685 | sizes = malloc(sizeof(unsigned long) * length); 686 | if (sizes) { 687 | 688 | if (rle_decompression) { 689 | final_sizes = malloc(sizeof(unsigned long) * length); 690 | if (!final_sizes) 691 | error = _LACK_OF_MEMORY; 692 | } 693 | 694 | for (unsigned long long thread_idx = 0; thread_idx < length && !error; ++thread_idx) { 695 | 696 | // Reads the size of the shafa blockss 697 | if (fscanf(f_shafa, "@%lu@", &sf_bsize) == 1) { 698 | 699 | sf_sizes[thread_idx] = sf_bsize; 700 | 701 | // Allocates memory to a buffer in which will be loaded one block of shafa code 702 | shafa_code = malloc(sf_bsize); 703 | if (shafa_code) { 704 | 705 | // Reads a block of shafa code 706 | if (fread(shafa_code, sizeof(uint8_t), sf_bsize, f_shafa) == sf_bsize) { 707 | 708 | // Reads the size of the decompressed shafa code and saves it 709 | if (fscanf(f_cod, "@%lu", &sizes[thread_idx]) == 1) { 710 | 711 | // Allocates memory for a block of COD code 712 | cod_code = malloc(33152); //sum 1 to 256 (worst case shannon fano) + 255 semicolons + 1 byte NULL 713 | if (cod_code) { 714 | 715 | // Loads the block of COD code 716 | if (fscanf(f_cod,"@%33151[^@]", cod_code) == 1) { 717 | 718 | // Allocates memory for the arguments 719 | args = malloc(sizeof(ArgumentsSHAFA)); 720 | if (!args) { 721 | error = _LACK_OF_MEMORY; 722 | free(shafa_code); 723 | break; 724 | } 725 | 726 | // Arguments for the SHAFA multithread 727 | *args = (ArgumentsSHAFA) { 728 | .f_wrt = f_wrt, 729 | .shafa_code = shafa_code, 730 | .rle_decompression = rle_decompression, 731 | .rle_sizes = &sizes[thread_idx], 732 | .final_sizes = &final_sizes[thread_idx], 733 | .cod_code = cod_code 734 | }; 735 | error = multithread_create(process_shafa_decomp, write_decompressed_shafa, args); 736 | 737 | if (error) { 738 | free(cod_code); 739 | break; 740 | } 741 | } 742 | else 743 | error = _FILE_STREAM_FAILED; 744 | 745 | } 746 | else 747 | error = _LACK_OF_MEMORY; 748 | } 749 | else 750 | error = _FILE_STREAM_FAILED; 751 | 752 | } 753 | else 754 | error = _FILE_STREAM_FAILED; 755 | 756 | } 757 | else 758 | error = _LACK_OF_MEMORY; 759 | } 760 | else 761 | error = _FILE_STREAM_FAILED; 762 | 763 | } 764 | multithread_wait(); 765 | 766 | } 767 | else 768 | error = _LACK_OF_MEMORY; 769 | } 770 | else 771 | error = _LACK_OF_MEMORY; 772 | 773 | } 774 | else 775 | error = _FILE_UNRECOGNIZABLE; 776 | } 777 | else 778 | error = _FILE_STREAM_FAILED; 779 | } 780 | else 781 | error = _FILE_STREAM_FAILED; 782 | 783 | fclose(f_cod); 784 | 785 | } 786 | else 787 | error = _FILE_INACCESSIBLE; 788 | 789 | free(path_cod); 790 | 791 | } 792 | else 793 | error = _LACK_OF_MEMORY; 794 | 795 | fclose(f_wrt); 796 | 797 | } 798 | else 799 | error = _FILE_INACCESSIBLE; 800 | 801 | if (rle_decompression) 802 | free(path_tmp); 803 | } 804 | else 805 | error = _LACK_OF_MEMORY; 806 | 807 | fclose(f_shafa); 808 | } 809 | else 810 | error = _FILE_INACCESSIBLE; 811 | 812 | if (!error) { 813 | total_time = clock_main_thread(STOP_CLOCK); 814 | *path = path_wrt; 815 | free(path_shafa); 816 | 817 | if (rle_decompression) { 818 | 819 | print_summary(total_time, sf_sizes, final_sizes, length, path_wrt, _SHAFA_RLE); 820 | free(final_sizes); 821 | 822 | }// If RLE decompression didn't occur 823 | else { 824 | print_summary(total_time, sf_sizes, sizes, length, path_wrt, _SHAFA); 825 | } 826 | } 827 | 828 | if (sizes) 829 | free(sizes); 830 | if (sf_sizes) 831 | free(sf_sizes); 832 | 833 | return error; 834 | } 835 | -------------------------------------------------------------------------------- /src/modules/d.h: -------------------------------------------------------------------------------- 1 | #ifndef MODULE_D_H 2 | #define MODULE_D_H 3 | 4 | #include 5 | 6 | #include "utils/errors.h" 7 | 8 | /** 9 | \brief Decompresses file which was compressed with Shannon Fano's algorithm and saves it to disk 10 | @param path Pointer to the SHAFA->RLE file's path 11 | @param decompress_rle Decompresses file with RLE's algorithm too 12 | @returns Error status 13 | */ 14 | _modules_error shafa_decompress(char ** path, bool decompress_rle); 15 | 16 | 17 | /** 18 | \brief Decompresses file which was compressed with RLE's algorithm and saves it to disk. 19 | @param path Pointer to the RLE->Original file's path 20 | @returns Error status 21 | */ 22 | _modules_error rle_decompress(char ** path); 23 | 24 | #endif //MODULE_D_H 25 | -------------------------------------------------------------------------------- /src/modules/f.c: -------------------------------------------------------------------------------- 1 | /*************************************************** 2 | * 3 | * Author(s): Ana Teixeira, João Carvalho 4 | * Created Date: 3 Dec 2020 5 | * Updated Date: 3 Jan 2021 6 | * 7 | **************************************************/ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | #include "utils/file.h" 18 | #include "utils/errors.h" 19 | #include "utils/extensions.h" 20 | 21 | /** 22 | \brief Compresses a block 23 | @param buffer Array loaded with the original file content 24 | @param block Array where to load the compressed content 25 | @param block_size Size of the current block 26 | @param size_f Size of the original file 27 | @returns Size of the compressed block 28 | */ 29 | static unsigned long block_compression(const uint8_t buffer[], uint8_t block[], const unsigned long block_size, unsigned long size_f) 30 | { 31 | //Looping variables(i,j) 32 | unsigned long i, j, size_block_rle; 33 | //Cycle that goes through the block of symbols of the file 34 | for(i = 0, size_block_rle=0; i < block_size && i < size_f; i = j) { 35 | //Number of repetitions of a symbol 36 | int n_reps = 0; 37 | //Counts the number of repetitions of a symbol 38 | for(j = i; buffer[i] == buffer[j] && n_reps <255 && j= 4 || !buffer[i]) 41 | { 42 | block[size_block_rle] = 0; 43 | block[size_block_rle+1] = buffer[i]; 44 | block[size_block_rle+2] = n_reps; 45 | size_block_rle +=3; 46 | } 47 | else 48 | { 49 | block[size_block_rle] = buffer[i]; 50 | size_block_rle++; 51 | j = ++i; 52 | } 53 | } 54 | return size_block_rle; 55 | } 56 | 57 | /** 58 | \brief Turns block of content in an array of frequencies (each index matches a symbol from 0 to 255) 59 | @param block Array with the symbols (current block) 60 | @param freq Array to put the frequencies 61 | @param size_block Block size 62 | */ 63 | static void make_freq(const unsigned char* block, unsigned long* freq, unsigned long size_block) 64 | { 65 | int i; 66 | unsigned long j; 67 | //Puts all the elements of freq as 0 68 | for(i = 0; i < 256; i++) freq[i] = 0; 69 | //Goes through the block 70 | for(j = 0; j < size_block; j++) 71 | { 72 | int symbol; 73 | //Saves symbol 74 | symbol = block[j]; 75 | //Increments frequency of the symbol 76 | ++freq[symbol]; 77 | 78 | } 79 | } 80 | 81 | /** 82 | \brief Writes the frequencies in the freq file 83 | @param freq Array with the frequencies 84 | @param f_freq Freq file where we load the content 85 | @param block_num Current block 86 | @param n_blocks Number of blocks 87 | @returns Error status 88 | */ 89 | static _modules_error write_freq(const unsigned long *freq, FILE* f_freq, const unsigned long long block_num, const unsigned long long n_blocks) 90 | { 91 | int i, j, print = 0, print2 = 0, print3 = 0; 92 | _modules_error error = _SUCCESS; 93 | //Goes through the block of frequencies 94 | for(i = 0; i < 256;) 95 | { //Writes the frequency of each value in the freq file 96 | print = fprintf(f_freq,"%lu", freq[i]); 97 | //Verifys if the fprintf went well 98 | if(print >= 1) { 99 | //If the frequencies of consecutive values are the same writes ';' after the fisrt value 100 | for(j = i; freq[i] == freq[j] && j<256 ; j++) 101 | { 102 | if(j!=255) { 103 | print2 = fprintf(f_freq, ";"); 104 | //If the fprintf went wrong 105 | if(print2 < 0) error = _FILE_STREAM_FAILED; 106 | } 107 | } 108 | i = j; 109 | } 110 | else error = _FILE_STREAM_FAILED; 111 | }//If it's the last block 112 | if(block_num == n_blocks -1) { 113 | print3 = fprintf(f_freq, "@0"); 114 | //If the fprintf went wrong 115 | if(print3 < 1) error = _FILE_STREAM_FAILED; 116 | } 117 | 118 | return error; 119 | } 120 | 121 | /** 122 | \brief Prints the results of the program execution 123 | @param n_blocks Number of blocks 124 | @param block_sizes Block sizes of the original file 125 | @param size_f size of the original file 126 | @param block_rle_sizes Block sizes of the rle file 127 | @param total_t Time that the program took to execute 128 | @param path_rle Path to the rle file 129 | @param path_freq Path to the freq file from the txt file 130 | @param path_rle_freq Path to the freq file from the rle file 131 | */ 132 | static inline void print_summary(unsigned long long n_blocks, unsigned long *block_sizes, unsigned long size_f, unsigned long *block_rle_sizes, double total_t, const char * const path_rle, const char * const path_freq, const char * const path_rle_freq) 133 | { 134 | printf( 135 | "Ana Rita Teixeira, a93276, MIEI/CD, 1-jan-2021\n" 136 | "João Carvalho, a93166, MIEI/CD, 1-jan-2021\n" 137 | "Module: f (calculation of symbol frequencies)\n" 138 | "Number of blocks: %lu\n" , n_blocks 139 | ); 140 | 141 | printf("Size of blocks analyzed in the original file: "); 142 | //Cycle to print the block sizes of the txt file 143 | for(unsigned long long i = 0; i < n_blocks; i++) { 144 | if(i == n_blocks - 1) 145 | printf("%lu\n", block_sizes[i]); 146 | else printf("%lu/", block_sizes[i]); 147 | } 148 | 149 | if(path_rle) { 150 | unsigned long size_rle = 0; 151 | long compression; 152 | float compression_ratio; 153 | for(unsigned long long j = 0; j= _1KiB){ 221 | 222 | compresd = the_block_size; 223 | size_block_rle = 0; 224 | //Allocates memory for the array that will contain the block sizes of the txt file 225 | block_sizes = malloc(n_blocks * sizeof(unsigned long)); 226 | if(block_sizes) { 227 | //Allocates memory for the array that will contain the block sizes of the rle file 228 | block_rle_sizes = malloc(n_blocks * sizeof(unsigned long)); 229 | if(block_rle_sizes) { 230 | //Divides the buffer into blocks 231 | for (block_num = 0, s = 0; block_num < n_blocks; ++block_num) { 232 | //If it's the last block 233 | if(block_num == n_blocks -1) { 234 | compresd = size_f - s; 235 | } 236 | //Loads size of the current block of the txt file to the respective array 237 | block_sizes[block_num] = compresd; 238 | //Allocates memory for the array that will contain the content of the txt file 239 | buffer = malloc(compresd * sizeof(uint8_t)); 240 | if(buffer) { 241 | //Loads the content of the block of the txt file into the buffer 242 | if(fread(buffer, sizeof(uint8_t), compresd, f) == compresd) { 243 | //Allocates memory for the array that will contain the compressed content of the buffer 244 | block = malloc(compresd * 2 + 3); // (size/2 + 1) * 3 + size/2 = 2*size + 3 245 | if(block) { 246 | if(compress_rle) { 247 | //Compresses the current block and returns its size 248 | size_block_rle = block_compression(buffer, block, compresd, size_f); 249 | //If it's the first block 250 | if(block_num == 0) { 251 | //Calculates the compression rate 252 | compression = compresd - size_block_rle; 253 | compression_ratio = (float)compression/(float)compresd; 254 | 255 | 256 | //If the rate is lower than 5% and the user didn't force the rle file 257 | if(compression_ratio < 0.05 && !force_rle) compress_rle = false; 258 | } 259 | } 260 | 261 | //Opening rle file 262 | if(!f_rle && compress_rle) { 263 | f_rle = fopen(path_rle, "wb"); 264 | if(!f_rle) { 265 | error = _FILE_INACCESSIBLE; 266 | break; 267 | } 268 | } 269 | //Opening rle freq file 270 | if(!f_rle_freq && compress_rle) { 271 | f_rle_freq = fopen(path_rle_freq, "wb"); 272 | if(!f_rle_freq) { 273 | error = _FILE_INACCESSIBLE; 274 | break; 275 | } 276 | } 277 | //Opening freq file 278 | if(!f_freq && (force_freq || (!compress_rle))) { 279 | f_freq = fopen(path_freq, "wb"); 280 | if(!f_freq) { 281 | error = _FILE_INACCESSIBLE; 282 | break; 283 | } 284 | } 285 | 286 | //If it's the first block and the user forced the rle file 287 | if(block_num == 0 && compress_rle) { 288 | //Prints the header of the freq file: @R@n_blocks 289 | print_rle = fprintf(f_rle_freq,"@R@%lu", n_blocks); 290 | } 291 | //If it's the first block and the user didn't forced the rle file or forced the freq file 292 | if(block_num == 0 && (!compress_rle || force_freq)) { 293 | //Prints the header of the freq file: @N@n_blocks 294 | print = fprintf(f_freq,"@N@%lu", n_blocks); 295 | } 296 | //If the fprintf went well 297 | if((print >= 4 && print_rle >= 4) || (print >= 4 && !compress_rle) || print_rle >= 4) { 298 | //Allocates memory for all the 256 symbol's frequencies 299 | unsigned long *freq = malloc(sizeof(unsigned long)*256); 300 | if(freq) { 301 | //If it can be compressed 302 | if(compress_rle) { 303 | 304 | //Loads size of the current block of the rle file to the respective array 305 | block_rle_sizes[block_num] = size_block_rle; 306 | //Writes each compressed block in the rle file 307 | int res = fwrite(block, 1, size_block_rle, f_rle); 308 | if(res == size_block_rle){ 309 | //Generates an array of frequencies of the block (rle file content) 310 | make_freq(block, freq, size_block_rle); 311 | //Prints the size of the current compressed block in the freq file 312 | if(fprintf(f_rle_freq, "@%lu@", size_block_rle) >= 2) { 313 | //Writes each frequencies block in the freq file from the rle file 314 | error = write_freq(freq, f_rle_freq, block_num, n_blocks); 315 | } 316 | else error = _FILE_STREAM_FAILED; 317 | 318 | } 319 | else error = _FILE_STREAM_FAILED; 320 | } 321 | //If it can't be compressed or if the user forced the freq file 322 | if(!compress_rle || force_freq) { 323 | 324 | //Generates an array of frequencies of the block (txt file content) 325 | make_freq(buffer, freq, compresd); 326 | //Prints the current block size in the freq file 327 | if(fprintf(f_freq, "@%lu@", compresd) >= 2) { 328 | //Writes each frequencies block in the freq file from the txt file 329 | error = write_freq(freq, f_freq, block_num, n_blocks); 330 | 331 | } 332 | else error = _FILE_STREAM_FAILED; 333 | 334 | } 335 | free(freq); 336 | 337 | } 338 | else error = _LACK_OF_MEMORY; 339 | } 340 | else error = _FILE_STREAM_FAILED; 341 | 342 | 343 | free(block); 344 | } 345 | else error = _LACK_OF_MEMORY; 346 | 347 | 348 | } 349 | else error = _FILE_STREAM_FAILED; 350 | 351 | s+=compresd; 352 | free(buffer); 353 | } 354 | else error = _LACK_OF_MEMORY; 355 | 356 | } 357 | if(f_rle) fclose(f_rle); 358 | if(f_freq) fclose(f_freq); 359 | if(f_rle_freq) fclose(f_rle_freq); 360 | } 361 | else error = _LACK_OF_MEMORY; 362 | 363 | } 364 | else error = _LACK_OF_MEMORY; 365 | } 366 | else error = _FILE_TOO_SMALL; //If the file is too small 367 | 368 | if(error || (!force_freq && (force_rle || compress_rle))){ 369 | free(path_freq); 370 | path_freq = NULL; 371 | } 372 | } 373 | else error = _LACK_OF_MEMORY; 374 | 375 | if(error || (!force_rle && !compress_rle)){ 376 | free(path_rle_freq); 377 | path_rle_freq = NULL; 378 | } 379 | } 380 | else error = _LACK_OF_MEMORY; 381 | 382 | 383 | if(error || (!compress_rle && !force_rle)){ 384 | free(path_rle); 385 | path_rle = NULL; 386 | } 387 | } 388 | else error = _LACK_OF_MEMORY; 389 | 390 | fclose(f); 391 | 392 | } 393 | else error = _FILE_INACCESSIBLE; 394 | //If there was no error 395 | if(!error){ 396 | if (compress_rle || force_rle) { 397 | free(*path); 398 | *path = path_rle; 399 | } 400 | //Calculates the runtime 401 | t = clock() - t; 402 | //Calculates the time in milliseconds 403 | total_t = (float) ((((double) t) / CLOCKS_PER_SEC) * 1000); 404 | print_summary(n_blocks, block_sizes, size_f, block_rle_sizes, total_t, path_rle, path_freq, path_rle_freq); 405 | free(block_sizes); 406 | free(block_rle_sizes); 407 | if(path_freq) free(path_freq); 408 | if(path_rle_freq) free(path_rle_freq); 409 | } 410 | 411 | return error; 412 | } 413 | -------------------------------------------------------------------------------- /src/modules/f.h: -------------------------------------------------------------------------------- 1 | #ifndef MODULE_F_H 2 | #define MODULE_F_H 3 | 4 | #include 5 | 6 | #include "utils/errors.h" 7 | 8 | /** 9 | \brief Compresses file with RLE's algorithm if needed and creates the respective output frequencies' table. Finally saves it to disk 10 | @param path Pointer to the original file's path 11 | @param force_rle Force execution of RLE's algorithm even if % of compression <= 5% 12 | @param force_freq Force frequencies' file creation for original file even if it can be compressed with RLE 13 | @param block_size Size of each block 14 | @returns Error status 15 | */ 16 | _modules_error freq_rle_compress(char ** path, bool force_rle, bool force_freq, unsigned long block_size); 17 | 18 | #endif //MODULE_F_H 19 | -------------------------------------------------------------------------------- /src/modules/t.c: -------------------------------------------------------------------------------- 1 | /************************************************ 2 | * 3 | * Author(s): Francisco Neves, Leonardo Freitas 4 | * Created Date: 3 Dec 2020 5 | * Updated Date: 2 Jan 2021 6 | * 7 | ***********************************************/ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "utils/errors.h" 16 | #include "utils/extensions.h" 17 | 18 | #define NUM_SYMBOLS 256 19 | #define MIN(a,b) ((a) < (b) ? a : b) 20 | 21 | /** 22 | \brief Reads the frequencies of each block provided by the .freq file 23 | @param codes_input Buffer of the respective .freq file block 24 | @param frequencies Array to store the frequencies from each symbol 25 | @returns Error status 26 | */ 27 | static _modules_error read_block(char * restrict codes_input, unsigned long * restrict frequencies) 28 | { 29 | int read_count; 30 | // Checks if there are any errors with the input file 31 | if (sscanf(codes_input, "%lu%n;", frequencies, &read_count) != 1) 32 | return _FILE_UNRECOGNIZABLE; 33 | 34 | codes_input += read_count + 1; 35 | 36 | // Loop to go through all symbols 37 | for (int i = 1; i < NUM_SYMBOLS; ++i) { 38 | 39 | // Reads the frequency 40 | if (sscanf(codes_input, "%lu%n", &frequencies[i], &read_count) == 1) { 41 | 42 | // Checks for possible errors in .freq file 43 | if (codes_input[read_count] != ';' && i != NUM_SYMBOLS - 1) 44 | return _FILE_UNRECOGNIZABLE; 45 | 46 | codes_input += read_count + 1; 47 | } 48 | 49 | // Checks if we are in one of the specific cases with equal frequencies 50 | else if (*codes_input == ';' || (*codes_input == '\0' && i == NUM_SYMBOLS - 1)) { 51 | frequencies[i] = frequencies[i-1]; 52 | ++codes_input; 53 | } 54 | 55 | // Checks for possible errors in .freq file 56 | else 57 | return _FILE_UNRECOGNIZABLE; 58 | } 59 | 60 | // Checks for possible errors in the buffer 61 | if (codes_input[-1] != '\0') 62 | return _FILE_UNRECOGNIZABLE; 63 | 64 | return _SUCCESS; 65 | } 66 | 67 | /** 68 | \brief Sort the frequencies array in descending order 69 | @param frequencies The array to save the frequencies 70 | @param positions Array with the original index of each symbol 71 | @param left Inital index of the array 72 | @param right Final index of the array 73 | */ 74 | static void insert_sort (unsigned long frequencies[], int positions[], int left, int right) 75 | { 76 | unsigned long tmpFreq; 77 | int j, a, iters; 78 | 79 | for (int i = left + 1; i <= right; ++i){ 80 | 81 | tmpFreq = frequencies[i]; 82 | j = i - 1; 83 | 84 | /* Move elements of frequencies[0..i-1], that are greater 85 | than tmpFreq, to one position ahead of their current position 86 | */ 87 | while (j >= left && frequencies[j] < tmpFreq){ 88 | frequencies[j+1] = frequencies[j]; 89 | --j; 90 | } 91 | frequencies[j+1] = tmpFreq; 92 | a = j + 1; 93 | iters = i - a; 94 | /* Do the same thing to position so that the 95 | frequency value associated with each symbol isn't lost 96 | */ 97 | for (int idx = i-1; iters; --idx) 98 | if (positions[idx] >= a) { 99 | ++positions[idx]; 100 | --iters; 101 | } 102 | positions[i] = a; 103 | } 104 | } 105 | 106 | /** 107 | \brief Calculates the sum of the positional frequencies from the first to the last position 108 | @param frequencies Array Frequencies 109 | @param first First Element of the array 110 | @param last Last element of the array 111 | @returns Total sum of the frequencies 112 | */ 113 | static unsigned long sum_freq (unsigned long frequencies[], int first, int last) 114 | { 115 | unsigned long sum = 0; 116 | 117 | for (int i = first; i <= last; i++) 118 | sum += frequencies[i]; 119 | 120 | return sum; 121 | } 122 | 123 | /** 124 | \brief Find the best division of any sequence of frequencies ordered between the element in the first position and the element in the last position 125 | @param frequencies Array Frequencies 126 | @param first First element of the array 127 | @param last Last element of the array 128 | @returns Index of the best division 129 | */ 130 | static int best_Division (unsigned long frequencies[], int first, int last) 131 | { 132 | 133 | int division = first, total , mindif, dif; 134 | unsigned long g1 = 0; 135 | 136 | total = mindif = dif = sum_freq(frequencies, first, last); 137 | 138 | while (dif == mindif){ 139 | 140 | g1 = g1 + frequencies[division]; 141 | dif = abs(2 * g1 - total); 142 | 143 | if (dif < mindif){ 144 | division = division + 1; 145 | mindif = dif; 146 | } 147 | else 148 | dif = mindif + 1; 149 | } 150 | 151 | return division - 1; 152 | } 153 | 154 | /** 155 | \brief Add character representing a bit 0 or 1 to the code according to the Shannon-Fano algorithm 156 | @param value bit to be added to code (0 or 1) 157 | @param codes Place to store the codes 158 | @param start First Element 159 | @param end Last Element 160 | */ 161 | static void add_bit_to_code(char value, char codes[NUM_SYMBOLS][NUM_SYMBOLS], int start, int end) 162 | { 163 | char * symbol_codes; 164 | /*For symbol_codes, start the pointer on the first element, 165 | going through the array until the end */ 166 | for ( ; start <= end; ++start) { 167 | symbol_codes = codes[start]; 168 | 169 | // While the pointer is moved it adds value 1 or 0. 170 | for ( ; *symbol_codes; ++symbol_codes); 171 | *symbol_codes = value; 172 | } 173 | } 174 | 175 | /** 176 | \brief Apply the Shannon-Fano algorithm 177 | @param frequencies Array with frequencies 178 | @param codes Array to store the codes 179 | @param start First element to aply the algorithm 180 | @param end Last element to apply the algorithm 181 | */ 182 | static void sf_codes (unsigned long frequencies[], char codes[NUM_SYMBOLS][NUM_SYMBOLS], int start, int end) 183 | { 184 | //While the pointer at the beginning is not the same as at the end it applies the algorithm 185 | if (start != end){ 186 | 187 | int div = best_Division(frequencies, start, end); 188 | 189 | add_bit_to_code('0', codes, start, div); 190 | add_bit_to_code('1', codes, div + 1, end); 191 | 192 | sf_codes(frequencies, codes, start, div); 193 | sf_codes(frequencies, codes, div + 1, end); 194 | } 195 | } 196 | 197 | /** 198 | \brief Counts how many symbols have frequencies different from 0 199 | @param frequencies Array of the frequencies sorted in descending order 200 | @returns Number of non-null elements in the array 201 | */ 202 | static int not_null (unsigned long frequencies[NUM_SYMBOLS]) 203 | { 204 | int r = 0; 205 | 206 | // Parses the array from the end to the beginning until it finds the first non-null element incrementing r by 1 each interation 207 | for (int i = NUM_SYMBOLS - 1; frequencies[i] == 0; --i) ++r; 208 | 209 | return (NUM_SYMBOLS - 1 - r); 210 | } 211 | 212 | /** 213 | \brief Prints in the screen all information related to this module 214 | @param num_blocks Number of blocks analyzed 215 | @param sizes Array with sizes of each block analyzed 216 | @param total_time Time it took to execute the module 217 | @param path Path of the created .cod file 218 | */ 219 | static inline void print_summary(unsigned long long num_blocks, const unsigned long * sizes, const double total_time, const char * const path) 220 | { 221 | unsigned long long i; 222 | 223 | printf( 224 | "Francisco Neves,a93202,MIEI/CD, 1-JAN-2021\n" 225 | "Leonardo Freitas,a93281,MIEI/CD, 1-JAN-2021\n" 226 | "Module:T (Calculation of symbol codes)\n" 227 | "Number of blocks: %lu\n" 228 | "Size of blocks analyzed in the symbol file: " , 229 | num_blocks 230 | ); 231 | // Prints the sizes of each block, except the last 232 | for (i = 0; i < num_blocks - 1; ++i) { 233 | printf("%lu/", sizes[i]); 234 | } 235 | // Prints the size of the last block 236 | printf("%lu bytes\n", sizes[i]); 237 | 238 | printf( 239 | "Module runtime (milliseconds): %f\n" 240 | "Generated file %s\n" , 241 | total_time, path 242 | ); 243 | } 244 | 245 | 246 | _modules_error get_shafa_codes(const char * path) 247 | { 248 | clock_t t; 249 | FILE * fd_freq, * fd_codes; 250 | char * path_freq; 251 | char * path_codes; 252 | char * block_input; 253 | char mode; 254 | unsigned long long num_blocks = 0; 255 | unsigned long block_size = 0; 256 | int freq_notnull, iter; 257 | int error = _SUCCESS; 258 | int positions[NUM_SYMBOLS]; 259 | unsigned long frequencies[NUM_SYMBOLS], * sizes = NULL ; 260 | double total_time; 261 | char (* codes)[NUM_SYMBOLS]; 262 | 263 | t = clock(); 264 | 265 | // add .freq extension to read the correct file 266 | path_freq = add_ext(path, FREQ_EXT); 267 | 268 | // Checks if it was possible to add the extension 269 | if (path_freq) { 270 | 271 | // Opens the file to read 272 | fd_freq = fopen(path_freq, "rb"); 273 | 274 | // Checks if it was possible to open the file 275 | if (fd_freq) { 276 | 277 | // Reading the header of .freq file 278 | if (fscanf(fd_freq, "@%c@%lu", &mode, &num_blocks) == 2) { 279 | 280 | // Checks if it haves a possible mode (R - RLE or N - Normal) 281 | if (mode == 'R' || mode == 'N') { 282 | 283 | // Allocates memory to an array with the purpose of saving the sizes of each block 284 | sizes = malloc (num_blocks * sizeof(unsigned long)); 285 | 286 | // Checks if it was possible to allocate memory 287 | if (sizes) { 288 | 289 | // Add .cod extension to write the proper file 290 | path_codes = add_ext(path, CODES_EXT); 291 | 292 | // Checks if it was possible to add the extension 293 | if (path_codes) { 294 | 295 | // Opens the file to write 296 | fd_codes = fopen(path_codes, "wb"); 297 | 298 | // Checks if it was possible to open the file 299 | if (fd_codes) { 300 | 301 | // Prints header in the .cod file and checks if it only prints the proper elements 302 | if (fprintf(fd_codes, "@%c@%lu", mode, num_blocks) >= 3) { 303 | 304 | // Loop to analyze every block in .freq file 305 | for (long long i = 0; i < num_blocks && !error; ++i) { 306 | 307 | // Memory allocation to save the generated codes 308 | codes = calloc(1, sizeof(char[NUM_SYMBOLS][NUM_SYMBOLS])); 309 | 310 | // Checks if it was possible to allocate the required memory 311 | if (codes) { 312 | 313 | // Initializes the array to keep the frequencies with 0's 314 | memset(frequencies, 0, NUM_SYMBOLS * 4); 315 | 316 | // Initializes the array to keep the original index of each symbol 317 | for (int j = 0; j < NUM_SYMBOLS; ++j) positions[j] = j; 318 | 319 | // Reads the current block size and verifies possible file stream errors 320 | if (fscanf(fd_freq, "@%lu", &block_size) == 1) { 321 | 322 | // Saves the size of the block in the array to that purpose 323 | sizes[i] = block_size; 324 | 325 | // Allocates memory to keep the frequencies read, so it's possible to the lecture in only 1 access 326 | block_input = malloc(9 * NUM_SYMBOLS + (NUM_SYMBOLS - 1) + 1); // 9 (max digits for frequency) + 256 (symbols) + 255 (';') + 1 (NULL terminator) 327 | 328 | // Checks if it was possible to allocate the required memory 329 | if (block_input) { 330 | 331 | // Reads the frequencies and verifies the read 332 | if (fscanf(fd_freq, "@%2559[^@]", block_input) == 1) { 333 | 334 | // Calls read_block function 335 | error = read_block(block_input, frequencies); 336 | 337 | // Checks for possible errors in read_block function 338 | if (!error) { 339 | 340 | // Calls insert_sort function 341 | insert_sort(frequencies, positions, 0, NUM_SYMBOLS - 1); 342 | 343 | // Saves in freq_notnull the number of non-null elements in the array 344 | freq_notnull = not_null(frequencies); 345 | 346 | // Calls sf_codes to generate the Shannon-Fano codes 347 | sf_codes(frequencies, codes, 0, freq_notnull); 348 | 349 | // Prints in the .cod file the block size 350 | if (fprintf(fd_codes, "@%lu@", block_size) >= 2) { 351 | 352 | // Loop to print the codes till the last one in .cod file and checks for possible file stream errors 353 | for (iter = 0; iter < NUM_SYMBOLS - 1 && !error; ++iter) { 354 | 355 | if (fprintf(fd_codes, "%s;", codes[positions[iter]]) < 1) 356 | error = _FILE_STREAM_FAILED; 357 | } 358 | 359 | // Prints last code in the file and checks for possible file stream errors 360 | if (!error && fprintf(fd_codes, "%s", codes[positions[iter]]) < 0) 361 | error = _FILE_STREAM_FAILED; 362 | 363 | } 364 | else 365 | error = _FILE_STREAM_FAILED; 366 | 367 | } 368 | 369 | } 370 | else 371 | error = _FILE_STREAM_FAILED; 372 | 373 | // Free allocated memory to block_input 374 | free(block_input); 375 | } 376 | else 377 | error = _LACK_OF_MEMORY; 378 | } 379 | else 380 | error = _FILE_STREAM_FAILED; 381 | 382 | // Free allocated memory to codes 383 | free(codes); 384 | } 385 | else 386 | error = _LACK_OF_MEMORY; 387 | } 388 | } 389 | else 390 | error = _FILE_STREAM_FAILED; 391 | 392 | /* if we don't have any error at this point, 393 | it should write "@0" in the .cod file to indicate 394 | that there are no more blocks*/ 395 | if (!error) 396 | fprintf(fd_codes, "@0"); 397 | 398 | // Closes output file 399 | fclose(fd_codes); 400 | } 401 | else { 402 | error = _FILE_INACCESSIBLE; 403 | 404 | // Free allocated memory to path_codes 405 | free(path_codes); 406 | } 407 | } 408 | else 409 | error = _LACK_OF_MEMORY; 410 | } 411 | else 412 | error = _LACK_OF_MEMORY; 413 | } 414 | else 415 | error = _FILE_UNRECOGNIZABLE; 416 | } 417 | else 418 | error = _FILE_UNRECOGNIZABLE; 419 | 420 | // Closes input file 421 | fclose(fd_freq); 422 | } 423 | else 424 | error = _FILE_INACCESSIBLE; 425 | 426 | // Free allocated memory to path_freq 427 | free(path_freq); 428 | } 429 | else 430 | error = _LACK_OF_MEMORY; 431 | 432 | // If no error occurred during the execution of the module, the time taken to execute it is counted 433 | if (!error) { 434 | t = clock() - t; 435 | total_time = (((double) t) / CLOCKS_PER_SEC) * 1000; 436 | 437 | // Calls print_summary function 438 | print_summary(num_blocks, sizes, total_time, path_codes); 439 | } 440 | 441 | // Free allocated memory to sizes 442 | free(sizes); 443 | 444 | return error; 445 | } 446 | -------------------------------------------------------------------------------- /src/modules/t.h: -------------------------------------------------------------------------------- 1 | #ifndef MODULE_T_H 2 | #define MODULE_T_H 3 | 4 | #include "utils/errors.h" 5 | 6 | /** 7 | \brief Creates a table of Shanon Fano's codes and saves it to disk 8 | @param path Original/RLE file's path 9 | @returns Error status 10 | */ 11 | _modules_error get_shafa_codes(const char * path); 12 | 13 | #endif //MODULE_T_H -------------------------------------------------------------------------------- /src/modules/utils/errors.c: -------------------------------------------------------------------------------- 1 | /************************************************ 2 | * 3 | * Author(s): Pedro Tavares 4 | * Created Date: 5 Dec 2020 5 | * Updated Date: 8 Dec 2020 6 | * 7 | ***********************************************/ 8 | 9 | 10 | #include "errors.h" 11 | 12 | #define ERROR_MSG(_) \ 13 | _( _SUCCESS, "No error\n" ) \ 14 | _( _LACK_OF_MEMORY, "Not enough memory for allocation\n" ) \ 15 | _( _FILE_INACCESSIBLE, "File can't be accessed. Either lack of permissions or file doesn't exist\n" ) \ 16 | _( _FILE_UNRECOGNIZABLE, "File not recognized\n" ) \ 17 | _( _FILE_STREAM_FAILED, "Can't communicate properly with file's stream\n" ) \ 18 | _( _FILE_TOO_SMALL, "File too small for decompression\n" ) \ 19 | _( _THREAD_CREATION_FAILED, "Thread couldn't be created\n" ) \ 20 | _(_THREAD_TERMINATION_FAILED, "Thread didn't terminate properly\n" ) 21 | 22 | 23 | #define ERROR_CASE(NUM, MSG) case NUM: return MSG; 24 | 25 | 26 | const char * error_msg(const int num) 27 | { 28 | // Since this returns a const char * allocated at compile time typically placed in .rodata 29 | // So there won't be any Undefined Behaviour 30 | 31 | switch (num) { 32 | ERROR_MSG(ERROR_CASE) 33 | } 34 | 35 | return "Unknown error"; 36 | } 37 | -------------------------------------------------------------------------------- /src/modules/utils/errors.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_ERRORS_H 2 | #define UTILS_ERRORS_H 3 | 4 | 5 | typedef enum _error // Use integers between 0-255 for safe integer cast to pointer in multithreading [Better not use negative numbers] 6 | { // If this fills 256 integers then assert sizeof(void *) <= max(_modules_error) or change multithreading to pass errors as arguments only 7 | _SUCCESS = 0, 8 | _OUTSIDE_MODULE = 1, 9 | _LACK_OF_MEMORY = 2, 10 | _FILE_INACCESSIBLE = 3, 11 | _FILE_UNRECOGNIZABLE = 4, 12 | _FILE_STREAM_FAILED = 5, 13 | _FILE_TOO_SMALL = 6, 14 | _THREAD_CREATION_FAILED = 7, 15 | _THREAD_TERMINATION_FAILED = 8, 16 | } _modules_error; 17 | 18 | 19 | /** 20 | \brief Respective error's message 21 | @param num Error's Number 22 | @returns Error's message 23 | */ 24 | const char * error_msg(int num); 25 | 26 | 27 | #endif //UTILS_ERRORS_H 28 | -------------------------------------------------------------------------------- /src/modules/utils/extensions.c: -------------------------------------------------------------------------------- 1 | /************************************************ 2 | * 3 | * Author(s): Pedro Tavares 4 | * Created Date: 4 Dec 2020 5 | * Updated Date: 7 Dec 2020 6 | * 7 | ***********************************************/ 8 | 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include "extensions.h" 15 | 16 | // If used strrchr the worst case would be a long path without a '.' because it would compare every letter 17 | bool check_ext(const char * const path, const char * const ext) 18 | { 19 | size_t diff; 20 | 21 | if (path) { 22 | diff = strlen(path) - strlen(ext); 23 | 24 | if (diff >= 0) 25 | return (!strcmp(path + diff, ext)); 26 | } 27 | 28 | return false; 29 | } 30 | 31 | 32 | char * add_ext(const char * const path, const char * const ext) 33 | { 34 | size_t len_path = strlen(path); 35 | size_t len_ext = strlen(ext); 36 | char * new_path = NULL; 37 | 38 | new_path = malloc(len_path + len_ext + 1); 39 | 40 | if (new_path) { 41 | memcpy(new_path, path, len_path); 42 | memcpy(new_path + len_path, ext, len_ext + 1); 43 | } 44 | 45 | return new_path; 46 | } 47 | 48 | 49 | char * rm_ext(const char * const path) 50 | { 51 | size_t len_path_no_ext; 52 | char * path_ext = strrchr(path, '.'); 53 | char * new_path = NULL; 54 | 55 | len_path_no_ext = path_ext ? path_ext - path: strlen(path); 56 | 57 | new_path = malloc(len_path_no_ext + 1); 58 | 59 | if (new_path) { 60 | memcpy(new_path, path, len_path_no_ext); 61 | new_path[len_path_no_ext] = '\0'; 62 | } 63 | 64 | return new_path; 65 | } 66 | -------------------------------------------------------------------------------- /src/modules/utils/extensions.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_EXTENSIONS_H 2 | #define UTILS_EXTENSIONS_H 3 | 4 | #include 5 | #include 6 | 7 | #define RLE_EXT ".rle" 8 | #define FREQ_EXT ".freq" 9 | #define CODES_EXT ".cod" 10 | #define SHAFA_EXT ".shaf" 11 | 12 | 13 | /** 14 | \brief Checks whether the file has the given extension 15 | @param path File's path 16 | @param ext Extension 17 | @returns Success 18 | */ 19 | bool check_ext(const char * path, const char * ext); 20 | 21 | 22 | /** 23 | \brief Allocates a new `string` copying file's path's content and appends an extension 24 | @param path File's path 25 | @param ext Extension 26 | @returns File's path or NULL if an error ocurred while allocating memory 27 | */ 28 | char * add_ext(const char * path, const char * ext); 29 | 30 | 31 | /** 32 | \brief Allocates a new `string` copying file's path's content and removes the extension 33 | @param path File's path 34 | @returns File's path or NULL if an error ocurred while allocating memory 35 | */ 36 | char * rm_ext(const char * path); 37 | 38 | #endif //UTILS_EXTENSIONS_H -------------------------------------------------------------------------------- /src/modules/utils/file.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | Function fsize() to get the size of files and the number of blocks contained 6 | in the file (for a given block size). 7 | 8 | The function works for files already opened (and pointed by ) and also 9 | for files not already opened (in this case the file is identified by ). 10 | 11 | This function works on files of any size and on any operating system and on 12 | any machine architecture. Bear in mind that for files larger than 2 GBytes the 13 | function is very slow. Block size of FSIZE_DEFAULT_BLOCK_SIZE should yield 14 | the best results in terms of execution time but it may be dependent of the 15 | operating system and/or machine architecture/hardware. 16 | 17 | The function returns directly the number of blocks contained in the file of 18 | a given block size in <*the_block_size>. It also returns, indirectly, the 19 | actual block size used in <*the_block_size>, which may be different than 20 | the original size in <*the_block_size>. If the original value of <*the_block_size> 21 | is smaller than FSIZE_MIN_BLOCK_SIZE then FSIZE_MIN_BLOCK_SIZE is used. 22 | Also, if the original value of <*the_block_size> is bigger than FSIZE_MAX_BLOCK_SIZE 23 | then FSIZE_MAX_BLOCK_SIZE is used. If the original value of <*the_block_size> is 24 | equal to 0 (zero) then FSIZE_DEFAULT_BLOCK_SIZE is used. Bear in mind that the last 25 | block may have a smaller size and its value is returned in <*size_of_last_block>. 26 | 27 | If n_blocks is the returned number of blocks then: 28 | total_file_size = n_blocks * <*the_block_size> + <*size_of_last_block> 29 | 30 | Arguments: 31 | FILE *fp_in - a file pointer (if NULL, then the file is identified by ) 32 | unsigned char *filename - if NULL then file must be identified by < *fp> 33 | unsigned long *the_block_size - this is used to pass the intended size for each block 34 | and to receive the actual block size used 35 | long *size_of_last_block - this is used to receive the size of the last block 36 | 37 | This code is open source and free to use as long as the original author is identified. 38 | 39 | (c) 2020, Bruno A.F. Dias - University of Minho, Informatics Department 40 | */ 41 | 42 | #define FSIZE_DEFAULT_BLOCK_SIZE 524288 // Default block size = 512 KBytes 43 | #define FSIZE_MIN_BLOCK_SIZE 512 // Min block size = 512 Bytes 44 | #define FSIZE_MAX_BLOCK_SIZE 67108864 // Max block size = 64 MBytes 45 | #define FSIZE_MAX_NUMBER_OF_BLOCKS 4294967296 // Max number of blocks that can be returned = 2^32 blocks 46 | #define FSIZE_MAX_SIZE_FSEEK 2147483648 // Max size permitted in fseek/ftell functions = 2 GBytes 47 | #define FSIZE_ERROR_BLOCK_SIZE -1 // Error: Block size is larger than max value 48 | #define FSIZE_ERROR_NUMBER_OF_BLOCKS -2 // Error: Number of Blocks exceeds max value permitted 49 | #define FSIZE_ERROR_IN_FILE -3 // Error: Opening or reading file 50 | #define FSIZE_ERROR_IN_FTELL -1L // Error: When using ftell() 51 | 52 | long long fsize(FILE *fp_in, char *filename, unsigned long *the_block_size, long *size_of_last_block) 53 | { 54 | unsigned long long total; 55 | long long n_blocks; 56 | unsigned long n_read, block_size; 57 | unsigned char *temp_buffer; 58 | int fseek_error; 59 | FILE *fp; 60 | 61 | block_size = *the_block_size; 62 | if (block_size > FSIZE_MAX_BLOCK_SIZE) return (FSIZE_ERROR_BLOCK_SIZE); 63 | if (block_size == 0UL) block_size = FSIZE_DEFAULT_BLOCK_SIZE; 64 | if (block_size < FSIZE_MIN_BLOCK_SIZE) block_size = FSIZE_MIN_BLOCK_SIZE; 65 | *the_block_size = block_size; 66 | 67 | if (filename == NULL || *filename == 0) fp = fp_in; 68 | else 69 | { fp = fopen(filename, "rb"); 70 | if (fp == NULL) return (FSIZE_ERROR_IN_FILE); 71 | } 72 | 73 | fseek_error = fseek(fp, 0L, SEEK_SET); 74 | if (fseek_error) return (FSIZE_ERROR_IN_FILE); 75 | 76 | fseek_error = fseek(fp, 0L, SEEK_END); 77 | if (!fseek_error) 78 | { total = ftell(fp); 79 | if (total == FSIZE_ERROR_IN_FTELL) return (FSIZE_ERROR_IN_FILE); 80 | n_blocks = total/block_size; 81 | if (n_blocks*block_size == total) *size_of_last_block = block_size; 82 | else 83 | { *size_of_last_block = total - n_blocks*block_size; 84 | n_blocks++; 85 | } 86 | fseek_error = fseek(fp, 0L, SEEK_SET); 87 | if (fseek_error) return (FSIZE_ERROR_IN_FILE); 88 | else return(n_blocks); 89 | } 90 | 91 | n_blocks = FSIZE_MAX_SIZE_FSEEK/block_size-1; // In reality fseek() can't handle FSIZE_MAX_SIZE_FSEEK of 2GBytes, so let's use a smaller size 92 | fseek_error = fseek(fp, n_blocks * block_size, SEEK_SET); 93 | if (fseek_error) return (FSIZE_ERROR_IN_FILE); 94 | 95 | temp_buffer = malloc(sizeof(unsigned char)*block_size); 96 | do 97 | { n_blocks++; 98 | n_read = fread(temp_buffer, sizeof(unsigned char), block_size, fp); 99 | } while (n_read == block_size && n_blocks <= FSIZE_MAX_NUMBER_OF_BLOCKS); 100 | 101 | free(temp_buffer); 102 | if (n_blocks > FSIZE_MAX_NUMBER_OF_BLOCKS) return(FSIZE_ERROR_NUMBER_OF_BLOCKS); 103 | 104 | if (n_read == 0L) 105 | { *size_of_last_block = block_size; 106 | n_blocks--; 107 | } 108 | else *size_of_last_block = n_read; 109 | 110 | if (filename == NULL || *filename == 0) 111 | { fseek_error = fseek(fp, 0L, SEEK_SET); 112 | if (fseek_error) return (FSIZE_ERROR_IN_FILE); 113 | } 114 | else fclose(fp); 115 | 116 | return(n_blocks); 117 | } 118 | -------------------------------------------------------------------------------- /src/modules/utils/file.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_FILE_H 2 | #define UTILS_FILE_H 3 | 4 | #include 5 | 6 | enum { 7 | _1KiB = 1024, 8 | _64KiB = 65536, 9 | _640KiB = 655360, 10 | _8MiB = 8388608, 11 | _64MiB = 67108864 12 | }; 13 | 14 | 15 | /** 16 | \brief Calculate the whole file size per block. O.S. Independent 17 | @param fp_in File Descriptor 18 | @param the_block_size Pointer to the block size 19 | @param size_of_last_block Pointer to the size of the last block 20 | @returns Number of blocks which have `*the_block_size` bytes 21 | */ 22 | long long fsize(FILE *fp_in, char *filename, unsigned long *the_block_size, long *size_of_last_block); 23 | 24 | #endif //UTILS_FILE_H -------------------------------------------------------------------------------- /src/modules/utils/multithread.c: -------------------------------------------------------------------------------- 1 | /************************************************ 2 | * 3 | * Author(s): Pedro Tavares 4 | * Created Date: 24 Dec 2020 5 | * Updated Date: 03 Jan 2021 6 | * 7 | ***********************************************/ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | 15 | #include "errors.h" 16 | #include "multithread.h" 17 | 18 | #ifdef THREADS 19 | bool NO_MULTITHREAD = false; 20 | #include 21 | 22 | #ifdef POSIX_THREADS 23 | #include 24 | 25 | #elif defined(WIN_THREADS) 26 | #include 27 | 28 | #endif 29 | 30 | #else 31 | bool NO_MULTITHREAD = true; 32 | 33 | #endif 34 | 35 | /* 36 | Stack functions, arguments for both functions and lastly the previous thread ID into a struct 37 | */ 38 | #ifdef THREADS 39 | typedef struct { 40 | 41 | #ifdef POSIX_THREADS 42 | pthread_t prev_thread; 43 | uintptr_t (* process)(void *); 44 | uintptr_t (* write)(void *, _modules_error, _modules_error); 45 | #elif defined(WIN_THREADS) 46 | HANDLE prev_hthread; 47 | DWORD (* process)(void *); 48 | DWORD (* write)(void *, _modules_error, _modules_error); 49 | #endif 50 | 51 | void * args; 52 | } Data; 53 | #endif //THREADS 54 | 55 | // This static global variable is only accessed by the main thread 56 | // `multithread[_create | _wait]`'s functions are Thread-Unsafe because of it. For Thread-Safety it requires a reentrant version 57 | // For further examples check: `strtok` and `strtok_r` respectively defined in C standard and Posix only 58 | // Why? Use same multithread[_create | _wait]'s function assignature for Windows and Posix 59 | #ifdef POSIX_THREADS 60 | static pthread_t THREAD = 0; 61 | #elif defined(WIN_THREADS) 62 | static HANDLE HTHREAD = 0; 63 | #endif 64 | 65 | /** 66 | \brief Calls both functions and waits last thread closing the handle 67 | @returns Error status (Hack as a pointer) 68 | */ 69 | #ifdef POSIX_THREADS 70 | static uintptr_t wrapper(Data * data) 71 | { 72 | pthread_t prev_thread; 73 | uintptr_t error, prev_error = _SUCCESS; // Define as _SUCCESS in case there is no prev_hthread 74 | 75 | error = (*(data->process))(data->args); 76 | prev_thread = data->prev_thread; 77 | 78 | // In case of error in `data->process` it still joins the thread for resource cleanup 79 | if (prev_thread && pthread_join(prev_thread, (void **) &prev_error)) 80 | prev_error = _THREAD_TERMINATION_FAILED; 81 | 82 | error = (*(data->write))(data->args, prev_error, error); 83 | 84 | free(data); 85 | 86 | return prev_error ? prev_error : error; 87 | } 88 | #elif defined(WIN_THREADS) 89 | static DWORD WINAPI wrapper(LPVOID _data) 90 | { 91 | Data * data = (Data *) _data; 92 | HANDLE prev_hthread; 93 | DWORD error, prev_error = _SUCCESS; // Define as _SUCCESS in case there is no prev_hthread 94 | 95 | error = (*(data->process))(data->args); 96 | prev_hthread = data->prev_hthread; 97 | 98 | if (prev_hthread) { // In case of error in `data->process` it still joins the thread for resource cleanup 99 | if (WaitForSingleObject(prev_hthread, INFINITE) != WAIT_OBJECT_0 || !GetExitCodeThread(prev_hthread, &prev_error)) 100 | prev_error = _THREAD_TERMINATION_FAILED; 101 | 102 | CloseHandle(prev_hthread); 103 | } 104 | 105 | error = (*(data->write))(data->args, prev_error, error); 106 | 107 | HeapFree(GetProcessHeap(), 0, data); 108 | 109 | return prev_error ? prev_error : error; 110 | } 111 | #endif 112 | 113 | 114 | 115 | /* 116 | POSIX CAUTION 117 | Attention: Be careful that sizeof(void *) must be >= sizeof(_modules_error) 118 | otherwise make sure the value returned by those funtions are always <= sizeof(void *) 119 | 120 | WINDOWS CAUTION 121 | Attention: Be careful that sizeof(DWORD) must be >= sizeof(int) 122 | otherwise make sure the value returned by those funtions are always <= sizeof(DWORD) 123 | 124 | Why? Because we use a hack to consider a pointer as an _modules_error to pass between threads 125 | */ 126 | _modules_error multithread_create(_modules_error (* process)(void *), _modules_error (* write)(void *, _modules_error, _modules_error), void * args) 127 | { 128 | 129 | #ifndef _NO_MULTITHREAD 130 | if (NO_MULTITHREAD) 131 | #endif 132 | { 133 | _modules_error error; 134 | 135 | error = process(args); 136 | return write(args, _SUCCESS, error); 137 | } 138 | 139 | 140 | 141 | #ifdef POSIX_THREADS 142 | 143 | Data * data = malloc(sizeof(Data)); 144 | 145 | if (!data) 146 | return _LACK_OF_MEMORY; 147 | 148 | * data = (Data) { 149 | .prev_thread = THREAD, 150 | .process = (uintptr_t (*) (void *)) process, 151 | .write = (uintptr_t (*) (void *, _modules_error, _modules_error)) write, 152 | .args = args, 153 | }; 154 | 155 | if (pthread_create(&THREAD, NULL, (void *) wrapper, data)) { 156 | free(data); 157 | return _THREAD_CREATION_FAILED; 158 | } 159 | 160 | #elif defined(WIN_THREADS) 161 | 162 | HANDLE handle; 163 | Data * data = HeapAlloc(GetProcessHeap(), 0, sizeof(Data)); 164 | 165 | if (!data) 166 | return _LACK_OF_MEMORY; 167 | 168 | * data = (Data) { 169 | .prev_hthread = HTHREAD, 170 | .process = (DWORD (*) (void *)) process, 171 | .write = (DWORD (*) (void *, _modules_error, _modules_error)) write, 172 | .args = args 173 | }; 174 | 175 | handle = CreateThread( 176 | NULL, // default security attributes 177 | 0, // use default stack size 178 | wrapper, // thread function name 179 | data, // argument to thread function 180 | 0, // use default creation flags 181 | NULL // returns the thread identifier 182 | ); 183 | 184 | if (handle) 185 | HTHREAD = handle; 186 | else { 187 | HeapFree(GetProcessHeap(), 0, data); 188 | return _THREAD_CREATION_FAILED; 189 | } 190 | 191 | #endif 192 | 193 | return _SUCCESS; 194 | } 195 | 196 | _modules_error multithread_wait() 197 | { 198 | #ifndef _NO_MULTITHREAD 199 | if (NO_MULTITHREAD) 200 | #endif 201 | return _SUCCESS; 202 | 203 | 204 | #ifdef POSIX_THREADS 205 | 206 | uintptr_t error; 207 | 208 | if (pthread_join(THREAD, (void **) &error)) 209 | return _THREAD_TERMINATION_FAILED; 210 | 211 | #elif defined(WIN_THREADS) 212 | 213 | DWORD error; 214 | 215 | if (WaitForSingleObject(HTHREAD, INFINITE) != WAIT_OBJECT_0 || !GetExitCodeThread(HTHREAD, &error)) 216 | error = _THREAD_TERMINATION_FAILED; 217 | 218 | CloseHandle(HTHREAD); 219 | 220 | #endif 221 | 222 | return error; 223 | } 224 | 225 | 226 | 227 | 228 | float clock_main_thread(CLOCK_ACTION action) 229 | { 230 | 231 | #if defined(THREADS) && (_POSIX_C_SOURCE >= 199309L) // All these have POSIX 232 | 233 | static struct timespec start_time; 234 | static bool time_fail = true; 235 | struct timespec finish_time; 236 | 237 | if (action == START_CLOCK) 238 | return time_fail = clock_gettime(CLOCK_MONOTONIC, &start_time); 239 | else { 240 | if (time_fail || clock_gettime(CLOCK_MONOTONIC, &finish_time) == -1) 241 | return -1; 242 | return (finish_time.tv_sec - start_time.tv_sec) * 1000 + ((double)(finish_time.tv_nsec - start_time.tv_nsec)) / 1000000.0; 243 | } 244 | 245 | 246 | #else 247 | static clock_t start_time = -1; 248 | clock_t time; 249 | 250 | if (action == START_CLOCK) { 251 | start_time = clock(); 252 | return start_time != -1 ? 0 : -1; 253 | } 254 | else { 255 | if (start_time == -1) 256 | return -1; 257 | 258 | time = clock(); 259 | 260 | if (time == -1) 261 | return -1; 262 | 263 | return (float) (((double) (time - start_time)) / CLOCKS_PER_SEC) * 1000; 264 | } 265 | 266 | #endif 267 | } 268 | -------------------------------------------------------------------------------- /src/modules/utils/multithread.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_MULTITHREAD_H 2 | #define UTILS_MULTITHREAD_H 3 | 4 | #include 5 | 6 | #include "errors.h" 7 | 8 | #ifdef _WIN32 9 | #define THREADS 10 | #define WIN_THREADS 11 | 12 | #elif defined(__unix__) || defined(__APPLE__) || defined(__MACH__) 13 | #define THREADS 14 | #define POSIX_THREADS 15 | 16 | #else 17 | #define _NO_MULTITHREAD // Avoid explicit if-statements if it is defined at compile time 18 | 19 | #endif 20 | 21 | extern bool NO_MULTITHREAD; 22 | 23 | /* 24 | Clock's time action 25 | */ 26 | typedef enum {STOP_CLOCK, START_CLOCK} CLOCK_ACTION; 27 | 28 | /** 29 | \brief Counts the CPU time for the current thread in order to avoid the sum of each thread's CPU time elapsed 30 | Warning: This function can't count two elapsed times at both time. 31 | @param path Pointer to the SHAFA->RLE file's path 32 | @param decompress_rle Decompresses file with RLE's algorithm too 33 | @returns Error status 34 | */ 35 | float clock_main_thread(CLOCK_ACTION action); 36 | 37 | /** 38 | \brief Calls both functions in a separated thread. Even though its a different process, write's function will execute sequentially. 39 | Warning: This function isn't thread-safe itself 40 | @param process This is the processing function which doesn't do IO sequencially 41 | @param write This is the function which does IO sequentially 42 | @param args Arguments passed to both other parameters of this multithread_create's function 43 | @returns Error status 44 | */ 45 | _modules_error multithread_create(_modules_error (* process)(void *), _modules_error (* write)(void *, _modules_error, _modules_error), void * args); 46 | 47 | /** 48 | \brief Waits for all threads created from multithread_create's function 49 | Warning: This function isn't thread-safe itself 50 | @returns Error status 51 | */ 52 | _modules_error multithread_wait(); 53 | 54 | #endif //UTILS_MULTITHREAD_H 55 | -------------------------------------------------------------------------------- /src/shafa.c: -------------------------------------------------------------------------------- 1 | /************************************************ 2 | * 3 | * Author(s): Pedro Tavares 4 | * Created Date: 3 Dec 2020 5 | * Updated Date: 3 Jan 2021 6 | * 7 | ***********************************************/ 8 | 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "modules/f.h" 16 | #include "modules/t.h" 17 | #include "modules/c.h" 18 | #include "modules/d.h" 19 | #include "modules/utils/file.h" 20 | #include "modules/utils/errors.h" 21 | #include "modules/utils/extensions.h" 22 | #include "modules/utils/multithread.h" 23 | 24 | /* 25 | Every option parsed from user's input 26 | */ 27 | typedef struct { 28 | unsigned long block_size; 29 | bool module_f; 30 | bool module_t; 31 | bool module_c; 32 | bool module_d; 33 | bool f_force_rle; 34 | bool f_force_freq; 35 | bool d_shaf; 36 | bool d_rle; 37 | } Options; 38 | 39 | 40 | /** 41 | \brief Parses the arguments provided by the user into a Options' struct 42 | @param argc Number of arguments provided by the user 43 | @param argv Array of arguments provided by the user 44 | @param file Pointer to a string for pointing to the file's path provided by the user 45 | @returns Error status 46 | */ 47 | static bool parse(const int argc, char * const argv[], Options * const options, char ** const file) 48 | { 49 | char opt; 50 | char * key, * value; 51 | 52 | for (int i = 1; i < argc; ++i) { // argv[0] == "./shafa" 53 | key = argv[i]; 54 | 55 | if (strcmp(key, "--no-multithread") == 0) 56 | NO_MULTITHREAD = true; 57 | 58 | else if (key[0] != '-') { 59 | if (*file) // There is a path to file already as an argument 60 | return false; 61 | 62 | *file = key; 63 | } 64 | else { 65 | 66 | if (++i >= argc) 67 | return false; 68 | 69 | value = argv[i]; 70 | 71 | if (strlen(key) != 2 || strlen(value) != 1) 72 | return false; 73 | 74 | opt = *value; 75 | 76 | switch (key[1]) { 77 | case 'm': // Chooses module 78 | switch (opt) { 79 | case 'f': 80 | options->module_f = true; 81 | break; 82 | case 't': 83 | options->module_t = true; 84 | break; 85 | case 'c': 86 | options->module_c = true; 87 | break; 88 | case 'd': 89 | options->module_d = true; 90 | break; 91 | default: 92 | return 0; 93 | } 94 | break; 95 | case 'b': // K|m|M 96 | switch (opt) { 97 | case 'K': 98 | options->block_size = _640KiB; 99 | break; 100 | case 'm': 101 | options->block_size = _8MiB; 102 | break; 103 | case 'M': 104 | options->block_size = _64MiB; 105 | break; 106 | default: 107 | return 0; 108 | } 109 | break; 110 | case 'c': // r -> rle force | f -> freq force 111 | if (opt == 'r') 112 | options->f_force_rle = true; 113 | else if (opt == 'f') 114 | options->f_force_freq = true; 115 | else 116 | return false; 117 | break; 118 | case 'd': // s|r 119 | if (opt == 's') 120 | options->d_shaf = true; 121 | else if (opt == 'r') 122 | options->d_rle = true; 123 | else 124 | return false; 125 | break; 126 | default: 127 | return false; 128 | } 129 | } 130 | } 131 | return true; 132 | } 133 | 134 | 135 | /* 136 | Execute modules 137 | 138 | It is the responsability of each module to append/remove extensions to the filename and 139 | assign the new filename's reference to the argument `path`. 140 | Every module needs to check if the files that they need exist and are accessible otherwise 141 | they should return an int enumerated in _modules_error from modules/utils/errors.h 142 | */ 143 | 144 | /** 145 | \brief Executes each module taking care of every mistake done from the user's input along with each execute module 146 | @param options A struct to the Options parsed from the user's input 147 | @param ptr_file a Pointer to a string which corresponds to the file's path 148 | @returns Error status 149 | */ 150 | _modules_error execute_modules(Options options, char ** const ptr_file) // better copying only a few bytes instead of dereferncing all of them 151 | { 152 | _modules_error error; 153 | char * tmp_file; 154 | bool file_rle_shaf = false, decompressed = false; 155 | 156 | if (options.module_f) { 157 | error = freq_rle_compress(ptr_file, options.f_force_rle, options.f_force_freq, options.block_size); // Returns true if file was RLE compressed 158 | 159 | if (error) { 160 | fputs("Module f: Something went wrong while compressing with RLE or creating frequencies' table...\n", stderr); 161 | return error; 162 | } 163 | } 164 | 165 | if (options.module_t) { 166 | 167 | if (!options.module_f) { 168 | if (check_ext(*ptr_file, FREQ_EXT)) { 169 | tmp_file = rm_ext(*ptr_file); 170 | 171 | if (!tmp_file) 172 | return _LACK_OF_MEMORY; 173 | 174 | free(*ptr_file); 175 | *ptr_file = tmp_file; 176 | } 177 | else { 178 | fprintf(stderr, "Module t: Wrong extension... Should end in %s\n", FREQ_EXT); 179 | return _OUTSIDE_MODULE; 180 | } 181 | } 182 | 183 | error = get_shafa_codes(*ptr_file); // If file doesn't end in .rle then its considered an uncompressed one 184 | 185 | if (error) { 186 | fputs("Module t: Something went wrong...\n", stderr); 187 | return error; 188 | } 189 | } 190 | 191 | if (options.module_c) { 192 | 193 | if (options.module_f && !options.module_t) { // Conflict 194 | fputs("Module c: Can't execute module 'c' after 'f' without 't'...\n", stderr); 195 | return _OUTSIDE_MODULE; 196 | } 197 | 198 | error = shafa_compress(ptr_file); // If file doesn't end in .rle then its considered an uncompressed one 199 | 200 | if (error) { 201 | fputs("Module c: Something went wrong...\n", stderr); 202 | return error; 203 | } 204 | } 205 | 206 | if (options.module_d) { 207 | 208 | if ((options.module_f && (!options.module_t || !options.module_c) && !check_ext(*ptr_file, RLE_EXT)) || (options.module_t && !options.module_c)) { // Conflict 209 | fputs("Module d: Can't execute module 'd' after 'f' without 't' or 'c', nor execute it after 't' without 'c'...\n", stderr); 210 | return _OUTSIDE_MODULE; 211 | } 212 | 213 | if (options.d_shaf || !options.d_rle) { // Trigger: NULL | -m d | -m d -d s 214 | 215 | if (!check_ext(*ptr_file, SHAFA_EXT)) { 216 | if (options.d_shaf) { // User forced execution of Shannon Fano's decompression 217 | fprintf(stderr, "Module d: Wrong extension... Should end in %s\n", SHAFA_EXT); 218 | return _OUTSIDE_MODULE; 219 | } 220 | } 221 | else { 222 | 223 | if (options.d_rle) { 224 | if (check_ext(*ptr_file, RLE_EXT SHAFA_EXT)) 225 | file_rle_shaf = true; // Avoid executing extension check twice 226 | else { 227 | fprintf(stderr, "Module d: Wrong extension... Should end in %s\n", RLE_EXT SHAFA_EXT); 228 | return _OUTSIDE_MODULE; 229 | } 230 | } 231 | 232 | error = shafa_decompress(ptr_file, (options.d_rle || !options.d_shaf) && (file_rle_shaf || check_ext(*ptr_file, RLE_EXT SHAFA_EXT))); // RLE => Trigger: NULL | -m d 233 | 234 | if (error) { 235 | fputs("Module d: Something went wrong while decompressing...\n", stderr); 236 | return error; 237 | } 238 | else 239 | decompressed = true; 240 | } 241 | } 242 | 243 | if (!decompressed && (options.d_rle || !options.d_shaf)) { // Trigger: NULL | -m d | -m d -d r (Won't execute this statement if had already been decompressed) 244 | 245 | if (!check_ext(*ptr_file, RLE_EXT)) { 246 | fprintf(stderr, "Module d: Wrong extension... Should end in %s\n", RLE_EXT); 247 | return _OUTSIDE_MODULE; 248 | } 249 | 250 | error = rle_decompress(ptr_file); 251 | 252 | if (error) { 253 | fputs("Module d: Something went wrong while decompressing...\n", stderr); 254 | return error; 255 | } 256 | } 257 | } 258 | return _SUCCESS; 259 | } 260 | 261 | 262 | int main (const int argc, char * const argv[]) 263 | { 264 | Options options = {0}; // Reference C99 Standard 6.7.8.21 265 | char * file = NULL; 266 | int error; 267 | 268 | if (argc <= 1) { 269 | fputs("No file input\n", stderr); 270 | return 1; 271 | } 272 | 273 | if (!parse(argc, argv, &options, &file)) { 274 | fputs("Wrong Options' syntax\n", stderr); 275 | return 1; 276 | } 277 | 278 | if (!file) { 279 | fputs("No file input\n", stderr); 280 | return 1; 281 | } 282 | 283 | // Have to otherwise it will raise error if some modules tries to free it in order to change the pointer to the new file's path 284 | file = add_ext(file, ""); // does the same as `strdup` from which is not supported in c17 285 | 286 | if (!file) { 287 | fputs("Not enough memory\n", stderr); 288 | return 1; 289 | } 290 | 291 | 292 | 293 | if (!options.module_f && !options.module_t && !options.module_c && !options.module_d) { 294 | if (check_ext(file, SHAFA_EXT)) // if user wants to decompress a RLE only then they must specify `-m d` which will be equivalent to `-m d -d r` 295 | options.module_d = 1; 296 | else 297 | options.module_f = options.module_t = options.module_c = 1; 298 | } 299 | 300 | // Can't do the same for `options.d_shaf` and `options.d_rle` since we would lost information 301 | // about the user forcing Shannon Fano's decompression in case they passed a `.rle` file 302 | // which should raise a custom error 303 | 304 | if (!options.block_size) 305 | options.block_size = _64KiB; 306 | 307 | error = execute_modules(options, &file); 308 | free(file); 309 | 310 | if (error) { 311 | if (error != _OUTSIDE_MODULE) 312 | fputs(error_msg(error), stderr); 313 | return 1; 314 | } 315 | 316 | return 0; 317 | } 318 | --------------------------------------------------------------------------------