├── modern-cpp ├── src │ ├── .gitignore │ ├── naive.hpp │ ├── Makefile │ ├── safe.hpp │ ├── final.hpp │ ├── safe.cpp │ ├── naive.cpp │ ├── final.cpp │ └── main.cpp └── README.md ├── smart-ptr ├── src │ ├── .gitignore │ ├── Makefile │ ├── interfaces.hpp │ ├── main.cpp │ └── mutation.hpp └── README.md ├── cpp-build-process ├── pdp7.jpg ├── src │ ├── add.h │ ├── add.c │ ├── simple.c │ └── Makefile ├── tree.svg └── README.md ├── README.md ├── Makefile ├── .clang-format ├── .gitignore ├── LICENSE └── interface-typing ├── pyramid.svg └── README.md /modern-cpp/src/.gitignore: -------------------------------------------------------------------------------- 1 | modern 2 | -------------------------------------------------------------------------------- /smart-ptr/src/.gitignore: -------------------------------------------------------------------------------- 1 | smart_ptr 2 | -------------------------------------------------------------------------------- /cpp-build-process/pdp7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/green7ea/blog/HEAD/cpp-build-process/pdp7.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Blogs 2 | 3 | - `2024-01` — [Typing Interfaces](./interface-typing/README.md) 4 | - `2018-11` — [C++ Build Process](./cpp-build-process/README.md) 5 | -------------------------------------------------------------------------------- /cpp-build-process/src/add.h: -------------------------------------------------------------------------------- 1 | #ifndef ADD_H_INCLUDED 2 | #define ADD_H_INCLUDED 3 | 4 | int add(int a, int b); 5 | int sub(int a, int b); 6 | 7 | #endif // ADD_H_INCLUDED 8 | -------------------------------------------------------------------------------- /cpp-build-process/src/add.c: -------------------------------------------------------------------------------- 1 | #include "add.h" 2 | 3 | int add(int a, int b) 4 | { 5 | return a + b; 6 | } 7 | 8 | int sub(int a, int b) 9 | { 10 | return a - b; 11 | } 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean 2 | 3 | all: 4 | $(MAKE) -C modern-cpp/src/ 5 | $(MAKE) -C smart-ptr/src/ 6 | 7 | clean: 8 | $(MAKE) -C modern-cpp/src/ clean 9 | $(MAKE) -C smart-ptr/src/ clean 10 | -------------------------------------------------------------------------------- /cpp-build-process/src/simple.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "add.h" 4 | 5 | int main(int argv, char **argc) 6 | { 7 | printf("%i\n", sub(add(5, 6), 6)); 8 | 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: llvm 2 | AllowShortFunctionsOnASingleLine: None 3 | ColumnLimit: 80 4 | BreakConstructorInitializers: BeforeComma 5 | PackConstructorInitializers: Never 6 | BreakTemplateDeclarations: Yes 7 | -------------------------------------------------------------------------------- /modern-cpp/src/naive.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class NaiveFile { 6 | public: 7 | NaiveFile(const std::string &path); 8 | ~NaiveFile(); 9 | 10 | /// Reads up to 1024 characters. If 11 | /// something goes wrong, it returns 12 | /// an empty string. 13 | std::string read_1024() const; 14 | 15 | private: 16 | int fd; 17 | }; 18 | -------------------------------------------------------------------------------- /smart-ptr/src/Makefile: -------------------------------------------------------------------------------- 1 | CXXFLAGS += -std=c++23 -MD -pipe -Wall -Wextra -Wpedantic -Wdeprecated-copy-dtor -fsanitize=address 2 | SRCS = main.cpp 3 | OBJS = $(SRCS:.cpp=.o) 4 | DEPS = $(SRCS:.cpp=.d) 5 | PROG = smart_ptr 6 | 7 | .PHONY: all clean 8 | 9 | all: $(OBJS) 10 | $(CXX) $(CXXFLAGS) $(OBJS) -o $(PROG) 11 | 12 | clean: 13 | rm -f $(OBJS) $(DEPS) $(PROG) 14 | 15 | -include $(DEPS) 16 | -------------------------------------------------------------------------------- /modern-cpp/src/Makefile: -------------------------------------------------------------------------------- 1 | CXXFLAGS += -std=c++23 -MD -pipe -Wall -Wextra -Wpedantic -Wdeprecated-copy-dtor -fsanitize=address 2 | SRCS = final.cpp main.cpp naive.cpp safe.cpp 3 | OBJS = $(SRCS:.cpp=.o) 4 | DEPS = $(SRCS:.cpp=.d) 5 | PROG = modern 6 | 7 | .PHONY: all clean 8 | 9 | all: $(OBJS) 10 | $(CXX) $(CXXFLAGS) $(OBJS) -o $(PROG) 11 | 12 | clean: 13 | rm -f $(OBJS) $(DEPS) $(PROG) 14 | 15 | -include $(DEPS) 16 | -------------------------------------------------------------------------------- /modern-cpp/src/safe.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class SafeFile { 6 | public: 7 | SafeFile(const std::string &path); 8 | SafeFile(const SafeFile &) = delete; 9 | ~SafeFile(); 10 | void operator=(const SafeFile &) = delete; 11 | 12 | /// Reads up to 1024 characters. If 13 | /// something goes wrong, it returns 14 | /// an empty string. 15 | std::string read_1024() const; 16 | 17 | private: 18 | int fd; 19 | }; 20 | -------------------------------------------------------------------------------- /cpp-build-process/src/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CXX = g++ 3 | 4 | CFLAGS = -MD -pipe -Wall -O0 -std=c99 -g 5 | CXXFLAGS = -MD -pipe -Wall -O0 -std=c++11 -g 6 | 7 | PROG1 = simple 8 | EX1 = add.o simple.o 9 | EX2 = mangling.o objects.o 10 | 11 | OBJS = $(EX1) $(EX2) 12 | DUMPS = $(OBJS:.o=.sym) 13 | DEPS = $(OBJS:.o=.d) 14 | 15 | .PHONY: all clean 16 | 17 | all: $(EX1) $(DUMPS) 18 | $(CC) $(CFLAGS) $(EX1) -o $(PROG1) 19 | 20 | clean: 21 | rm -f $(OBJS) $(PROG1) $(DUMPS) $(DEPS) 22 | 23 | %.sym: %.o 24 | nm $< > $@ 25 | 26 | -include $(DEPS) 27 | -------------------------------------------------------------------------------- /modern-cpp/src/final.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class FileWrapper { 6 | public: 7 | FileWrapper(const std::string &path); 8 | FileWrapper(const FileWrapper &) = delete; 9 | FileWrapper(FileWrapper &&); 10 | ~FileWrapper(); 11 | void operator=(const FileWrapper &) = delete; 12 | void operator=(FileWrapper &&); 13 | 14 | /// Reads up to 1024 characters. If 15 | /// something goes wrong, it returns 16 | /// an empty string. 17 | std::string read_1024() const; 18 | 19 | private: 20 | int fd{-1}; 21 | }; 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | -------------------------------------------------------------------------------- /modern-cpp/src/safe.cpp: -------------------------------------------------------------------------------- 1 | #include "safe.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | SafeFile::SafeFile(const std::string &path) { 11 | fd = open(path.c_str(), O_RDONLY); 12 | } 13 | 14 | SafeFile::~SafeFile() { 15 | if (fd >= 0) { 16 | printf("~SafeFile closing file\n"); 17 | 18 | if (close(fd) < 0) { 19 | fprintf(stderr, " Couldn't close file: '%s'\n", strerror(errno)); 20 | } 21 | } 22 | } 23 | 24 | std::string SafeFile::read_1024() const { 25 | if (fd < 0) { 26 | return ""; 27 | } 28 | 29 | const int nbytes = 1024; 30 | char buf[nbytes] = {}; 31 | 32 | int n = ::read(fd, buf, nbytes); 33 | if (n <= 0) { 34 | return ""; 35 | } 36 | 37 | return std::string(buf, n); 38 | } 39 | -------------------------------------------------------------------------------- /smart-ptr/src/interfaces.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | struct Config { 7 | std::string hostname; 8 | int port; 9 | std::string url; 10 | }; 11 | 12 | class ConfigWatcher { 13 | public: 14 | ConfigWatcher() 15 | : config( 16 | std::make_shared(Config{"localhost", 80, "/index.html"})) { 17 | } 18 | 19 | /** 20 | * By returning a shared_ptr to a const Config, we make sure that 21 | * we're the only ones that can modify the underlying configuration. 22 | */ 23 | inline std::shared_ptr get_config() { 24 | return config; 25 | } 26 | 27 | /** 28 | * This could be triggered when the config is updated. 29 | */ 30 | inline void update_config() { 31 | config->port += 1; 32 | } 33 | 34 | private: 35 | std::shared_ptr config; 36 | }; 37 | -------------------------------------------------------------------------------- /smart-ptr/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "interfaces.hpp" 2 | #include "mutation.hpp" 3 | 4 | void mutation() { 5 | auto shared = std::make_shared("Hello shared world!"); 6 | 7 | printf("Good count\n"); 8 | int good = count_good(*shared, 'l'); 9 | printf("'%s' contains %i 'l's.\n\n", shared->c_str(), good); 10 | 11 | printf("Bad count\n"); 12 | int bad = count_bad(shared, 'l'); 13 | printf("'%s' contains %i 'l's.\n\n", shared->c_str(), bad); 14 | } 15 | 16 | int main() { 17 | mutation(); 18 | 19 | ConfigWatcher watcher; 20 | std::shared_ptr config = watcher.get_config(); 21 | 22 | printf("Port %i is configured\n", config->port); 23 | 24 | // We can't update the config ourselves but the watcher can 25 | // internally. 26 | watcher.update_config(); 27 | 28 | printf("Port %i is configured\n", config->port); 29 | } 30 | -------------------------------------------------------------------------------- /modern-cpp/src/naive.cpp: -------------------------------------------------------------------------------- 1 | #include "naive.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | NaiveFile::NaiveFile(const std::string &path) { 11 | fd = open(path.c_str(), O_RDONLY); 12 | 13 | printf("(fd %i) open %s\n", fd, path.c_str()); 14 | } 15 | 16 | NaiveFile::~NaiveFile() { 17 | if (fd >= 0) { 18 | printf("(fd %i) ~NaiveFile closing\n", fd); 19 | 20 | if (close(fd) < 0) { 21 | fprintf(stderr, " (fd %i) Couldn't close file: '%s'\n", fd, 22 | strerror(errno)); 23 | } 24 | } 25 | } 26 | 27 | std::string NaiveFile::read_1024() const { 28 | if (fd < 0) { 29 | return ""; 30 | } 31 | 32 | const int nbytes = 1024; 33 | char buf[nbytes] = {}; 34 | 35 | int n = ::read(fd, buf, nbytes); 36 | if (n <= 0) { 37 | return ""; 38 | } 39 | 40 | return std::string(buf, n); 41 | } 42 | -------------------------------------------------------------------------------- /smart-ptr/src/mutation.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | inline void mutate(const std::shared_ptr &value) { 7 | *value = "Not what you think!"; 8 | } 9 | 10 | inline void mutate2(const std::shared_ptr &value) { 11 | std::shared_ptr copy = value; 12 | 13 | *copy = "Still not what you think"; 14 | } 15 | 16 | inline void mutate_unique(const std::unique_ptr &value) { 17 | *value = "Same with unique_ptr"; 18 | } 19 | 20 | inline int count_bad(const std::shared_ptr &s, char d) { 21 | int count = 0; 22 | for (auto it = s->begin(); it != s->end(); ++it) { 23 | if (*it == d) { 24 | count += 1; 25 | 26 | // this is a surprise 27 | *it = '*'; 28 | } 29 | } 30 | 31 | return count; 32 | } 33 | 34 | inline int count_good(const std::string &s, char d) { 35 | int count = 0; 36 | for (auto it = s.begin(); it != s.end(); ++it) { 37 | if (*it == d) { 38 | count += 1; 39 | } 40 | } 41 | 42 | return count; 43 | } 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 green7ea 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /modern-cpp/src/final.cpp: -------------------------------------------------------------------------------- 1 | #include "final.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | FileWrapper::FileWrapper(const std::string &path) { 11 | fd = open(path.c_str(), O_RDONLY); 12 | } 13 | 14 | FileWrapper::FileWrapper(FileWrapper &&wrapper) 15 | : fd(wrapper.fd) { 16 | // 'wrapper' won't close a fd with -1 in its 17 | // destructor. 18 | wrapper.fd = -1; 19 | } 20 | 21 | FileWrapper::~FileWrapper() { 22 | if (fd >= 0) { 23 | if (close(fd) < 0) { 24 | fprintf(stderr, " Couldn't close file: '%s'\n", strerror(errno)); 25 | } 26 | } 27 | } 28 | 29 | void FileWrapper::operator=(FileWrapper &&wrapper) { 30 | this->fd = wrapper.fd; 31 | // 'wrapper' won't close a fd with -1 in its 32 | // destructor. 33 | wrapper.fd = -1; 34 | } 35 | 36 | std::string FileWrapper::read_1024() const { 37 | if (fd < 0) { 38 | return ""; 39 | } 40 | 41 | const int nbytes = 1024; 42 | char buf[nbytes] = {}; 43 | 44 | int n = ::read(fd, buf, nbytes); 45 | if (n <= 0) { 46 | return ""; 47 | } 48 | 49 | return std::string(buf, n); 50 | } 51 | -------------------------------------------------------------------------------- /modern-cpp/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "final.hpp" 2 | #include "naive.hpp" 3 | #include "safe.hpp" 4 | 5 | template 6 | void accidental_copy(T file) { 7 | auto content = file.read_1024(); 8 | 9 | if (content.size() >= 1024) { 10 | printf("File is bigger than 1KB\n"); 11 | } else { 12 | printf("File is %zu bytes long\n", content.size()); 13 | } 14 | } 15 | 16 | int main() { 17 | const std::string filename = __FILE__; 18 | 19 | printf("Naive file wrapper\n"); 20 | { 21 | NaiveFile naive_file = NaiveFile(filename); 22 | 23 | // Copy 1, since we pass by copy 24 | accidental_copy(naive_file); 25 | 26 | // Copy 2, we used the = operator that makes a copy 27 | auto file2 = naive_file; 28 | } 29 | printf("\n\n"); 30 | 31 | printf("Safe file wrapper\n"); 32 | { 33 | SafeFile safe_file = SafeFile(filename); 34 | 35 | // Gives an error "call to deleted constructor of 36 | // 'SafeFile'". 37 | // accidental_copy(safe_file); 38 | 39 | // Gives an error as well 40 | // auto file2 = safe_file; 41 | } 42 | printf("\n\n"); 43 | 44 | printf("File file wrapper\n"); 45 | { 46 | FileWrapper file = FileWrapper(filename); 47 | 48 | // Gives an error "call to deleted constructor of 49 | // 'SafeFile'". 50 | // accidental_copy(safe_file); 51 | 52 | // Gives an error as well 53 | // auto file2 = safe_file; 54 | } 55 | printf("\n\n"); 56 | } 57 | -------------------------------------------------------------------------------- /cpp-build-process/tree.svg: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 8 | program 9 | 10 | 12 | 14 | 15 | 17 | simple.o 18 | 19 | 21 | add.o 22 | 23 | 24 | 26 | 28 | 29 | 31 | simple.c 32 | 33 | 35 | add.c 36 | 37 | 38 | 40 | 42 | 43 | 45 | 46 | 48 | stdio.h 49 | 50 | 52 | add.h 53 | 54 | 55 | 57 | 58 | 60 | other headers 61 | 62 | 63 | -------------------------------------------------------------------------------- /interface-typing/pyramid.svg: -------------------------------------------------------------------------------- 1 | 2 | 7 | 12 | 19 | Compiler Validation 20 | 21 | 26 | 33 | Unit Tests 34 | 35 | 40 | 47 | Service Tests 48 | 49 | 54 | 61 | UI Tests 62 | 63 | 76 | 83 | more integration 84 | 85 | 92 | more isolation 93 | 94 | 107 | 114 | slower 115 | 116 | 123 | faster 124 | 125 | 126 | -------------------------------------------------------------------------------- /smart-ptr/README.md: -------------------------------------------------------------------------------- 1 | # Pointers on C++'s smart pointers 2 | 3 | > Sorry for the title, I just couldn't help myself. 4 | 5 | ## TL;DR 6 | 7 | Don't use smart pointers in function parameters unless you're taking 8 | ownership (shared or exclusive). 9 | 10 | Return stack objects instead of smart pointers when possible. 11 | 12 | ```c++ 13 | // bad 14 | int count(const std::shared_ptr &s, char d); 15 | 16 | // good for read only access 17 | int count(const std::string &s, char d); 18 | 19 | // good for making a copy 20 | void take_copy(std:shared_ptr &&s); 21 | ``` 22 | 23 | Avoid using pointers, the following is the preference for creating 24 | objects: 25 | 26 | | Pointer type | Comment | 27 | |------------------------|---------------------------------------------------------------------------------------------------------------------------------| 28 | | `T` (stack allocation) | Best option | 29 | | `unique_ptr` | If you really need a stack allocation | 30 | | `shared_ptr` | Not great, but sometimes necessary | 31 | | `T *` | When working with C code, shouldn't be exposed long [I.11](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#Ri-raw) | 32 | | `shared_ptr` | Be careful with this one, avoid | 33 | 34 | ## Smart pointers as parameters 35 | 36 | Let's take the example from the TL;DR, and explain why having smart 37 | pointers in the interface is a bad idea. 38 | 39 | ```c++ 40 | // bad 41 | int count_bad(const std::shared_ptr &s, char d); 42 | 43 | // good 44 | int count_good(const std::string &s, char d); 45 | ``` 46 | 47 | In this use case, counting is only reading the string, not making a 48 | copy of the shared pointer. 49 | 50 | The first thing to mention is that they are both easily callable if I 51 | have a smart pointer object. 52 | 53 | ```c++ 54 | auto shared = std::make_shared("Hello shared world!"); 55 | 56 | int bad = count_bad(shared, 'l'); 57 | 58 | // Calling the non-smart pointer is as easy as adding a * 59 | int good = count_good(*shared, 'l'); 60 | ``` 61 | 62 | They're both as easy to call if I'm already using the same smart 63 | pointer type as the function but much harder to call if I'm not. This 64 | puts pressure on the caller to store their objects in a smart pointer 65 | and in the worst case, all objects end up being stored in smart 66 | pointers even when they don't have to be. 67 | 68 | Using smart pointers as a parameter also creates a link between the 69 | function parameter and the outside object: if the function changes 70 | from a `unique_ptr` to a `shared_ptr` the calling code as to be 71 | updated. There's the option to rewrap the object or switch to a 72 | `shared_ptr` — neither of these are good choices. 73 | 74 | When you use smart pointers, you're leaving performance on the table — 75 | you rule out faster options like using the stack or an arena 76 | alocator. Most of the time you don't need the performance but it's 77 | nice to be able to get it without major refactoring when you need it. 78 | 79 | Lastly, the intention of a function using a smart pointer is also 80 | unclear. It could be making a copy for later use or it could be simply 81 | reading the pointer's values during the call. With a `const 82 | shared_ptr &`, both are possible, with a `const T &`, it's clear 83 | that we're only reading values. 84 | 85 | I hope I've convinced you that if you only need to read a parameter of 86 | type `T`, use `const T &` instead of `const shared_ptr &` — only 87 | take `const shared_ptr &` if you are taking a copy of the shared 88 | pointer for future use. There are no downsides but many upsides: 89 | 90 | 1. easier to call, 91 | 2. better decoupling between caller and callee, 92 | 3. can be faster when needed, 93 | 4. clearer intention. 94 | 95 | All of this to avoid having to use `operator*`! 96 | 97 | ## Const correctness 98 | 99 | If you aren't yet convinced, the bad version of the interface doesn't 100 | give you the `const` protections you might expect: 101 | 102 | ```c++ 103 | auto shared = std::make_shared("Hello shared world!"); 104 | 105 | printf("Good count\n"); 106 | int good = count_good(*shared, 'l'); 107 | printf("'%s' contains %i 'l's.\n\n", shared->c_str(), good); 108 | 109 | printf("Bad count\n"); 110 | int bad = count_bad(shared, 'l'); 111 | printf("'%s' contains %i 'l's.\n\n", shared->c_str(), bad); 112 | ``` 113 | 114 | Prints 115 | 116 | ``` 117 | Good count 118 | 'Hello shared world!' contains 3 'l's. 119 | 120 | Bad count 121 | 'He**o shared wor*d!' contains 3 'l's. 122 | ``` 123 | 124 | Notice the `*`s where the `l`s used to be for the bad count. The 125 | object that is pointed to in a `const shared_ptr` can be modified! 126 | It's the same with a unique pointer. 127 | 128 | A `const shared_ptr` doesn't protect the `T` from being modified 129 | but a `shared_ptr` does. This is done because C++ requires a 130 | copy constructor to work with a `const` object — doing otherwise 131 | breaks basic functionality like returning an object via a temporary. 132 | 133 | ## The shared_ptr const trick 134 | 135 | We can still use `const` to our advantage by using the 136 | `shared_ptr` type to restrict modification. This is a really 137 | useful trick to overcome the `shared_ptr`'s biggest limitation — loss 138 | of control as to where and when it can change. 139 | 140 | We're always told to [Avoid non-const global variables (I.2)](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#Ri-global) and 141 | the reasoning is: 142 | 143 | > Non-const global variables hide dependencies and make the 144 | > dependencies subject to unpredictable changes. 145 | 146 | `const shared_ptr` have the same problem as a global variable — any 147 | code that can get a hold of the pointer can make an unpredictable 148 | change. 149 | 150 | Let's say we have a race condition on the data held in a `const 151 | shared_ptr`, we have to figure out: 152 | 153 | > Which thread modifies the shared pointer? 154 | 155 | You can't easily answer that question without auditing the code that 156 | can access the pointer. Imagine having to fix a race condition with a 157 | shared pointer! 158 | 159 | `const` global variables don't have the same weakness as the 160 | `non-const` global variables — the equivalent for `shared_ptr` is 161 | `shared_ptr`. 162 | 163 | Shared pointers have an extra trick where you can have a 164 | `shared_ptr` and give everyone else a `shared_ptr`. It 165 | becomes easier to audit where the data can be modified. 166 | 167 | Here's a simple example, we have a configuration that we load from a 168 | file and want to watch for changes. We hand out the configuration to 169 | many parts of our program via a `shared_ptr` so they get 170 | updates when the configuration changes. In the example, only the 171 | `ConfigWatcher` can update the configuration object and no one else. 172 | 173 | ```c++ 174 | class ConfigWatcher { 175 | public: 176 | /** 177 | * By returning a shared_ptr to a const Config, we make sure that 178 | * we're the only ones that can modify the underlying configuration. 179 | */ 180 | std::shared_ptr get_config() { 181 | return config; 182 | } 183 | 184 | /** 185 | * This could be triggered when the config is updated. 186 | */ 187 | void update_config() { 188 | // Code that can modify config can only be in this class. 189 | } 190 | 191 | private: 192 | std::shared_ptr config; 193 | }; 194 | ``` 195 | 196 | ## Conclusion 197 | 198 | Smart pointers in C++ are nuanced — they are a powerful tool that have 199 | shortcomings. They make heap allocation much safer but heap allocation 200 | shouldn't become the default or a lot of C++'s usefullness is left on 201 | the table. 202 | -------------------------------------------------------------------------------- /interface-typing/README.md: -------------------------------------------------------------------------------- 1 | # Leveraging types for better APIs 2 | 3 | ## Modules and interfaces 4 | 5 | > Well-designed computational systems, like well-designed automobiles 6 | > or nuclear reactors, are designed in a modular manner, so that the 7 | > parts can be constructed, replaced, and debugged separately. 8 | > 9 | > — [Structure and Interpretation of Computer Programs](https://mitp-content-server.mit.edu/books/content/sectbyfn/books_pres_0/6515/sicp.zip/full-text/book/book-Z-H-9.html#%_chap_1) 10 | 11 | Software is built by breaking a complex problem down into smaller 12 | modules that each solve a simpler problem. We can further break down a 13 | module we just created into ever smaller parts until we have something 14 | simple enough to work with. 15 | 16 | All of these modules then have to be connected to each other to solve 17 | our original, more complex problem. Connecting these modules together 18 | can in itself become a difficult problem — especially as the number of 19 | modules increases. 20 | 21 | The key to managing the complexity that arises from connecting modules 22 | together is having well-defined interfaces. A well-defined interface 23 | is typed in such a way that we can rely on compilers to ensure that we 24 | respect the interfaces that we use. 25 | 26 | When modules are written in the same programming language, the typing 27 | information for the interfaces is usually already in place. Often 28 | though, modules are written in different languages or are written by 29 | different organizations and so the typing information is 30 | missing. Adding typing formation to those interfaces presents a great 31 | opportunity to improve the software we write. 32 | 33 | We often overlook the quantity errors that can be caught by the 34 | compiler when it has good type information. There are surprisingly few 35 | typing techniques that we need to avoid many common errors. With these 36 | techniques, compiler validation becomes the most powerful tool for 37 | catching errors. 38 | 39 | The traditional test automation pyramid[^pyramid] has unit tests as a 40 | foundation but it might be better to place compiler validation bellow 41 | unit tests. Unit tests validate that assertions given by the tests 42 | hold true — compilers are able to validate that assertions given by 43 | the types hold true. Here's what our updated test automation pyramid 44 | can look like. 45 | 46 | ![](./pyramid.svg) 47 | 48 | With this in mind, the questions becomes 49 | 50 | > What type information can we give the compiler to help us catch 51 | > errors, especially, around our interfaces? 52 | 53 | ## Static type checking 54 | 55 | > Code never lies, comments sometimes do. 56 | > 57 | > — Ron Jeffries 58 | 59 | The most basic type contract is being able to specify what type of 60 | value you expect to find in a variable. Most programming languages 61 | have static typing built-in but server requests often lack typing 62 | information — the data is often untyped `json` or `xml`. 63 | 64 | To show an example of typing at work, we can use Typescript with 65 | simple functions as our interface. Typescript lets us easily compare a 66 | typed function and an untyped equivalent to highlight errors that 67 | typing can prevent. Even though a function is used here, it's 68 | important to remember that this also applies to the interfaces between 69 | our modules. 70 | 71 | ```typescript 72 | function typed_sum(a: number, b: number): number { 73 | return a + b; 74 | } 75 | 76 | /** 77 | * Well intentioned comments. 78 | * 79 | * @param a is a number 80 | * @param b is a number 81 | * 82 | * @returns a number 83 | */ 84 | function untyped_sum(a, b) { 85 | return a + b; 86 | } 87 | 88 | // Behaviour that is hard to predict, potential bugs 89 | untyped_sum("1", "2"); // → "12" 90 | 91 | // A compiler error 92 | typed_sum("1", "2"); 93 | ``` 94 | 95 | ## Null safety 96 | 97 | > I call it my billion-dollar mistake. It was the invention of the 98 | > null reference in 1965… This has led to innumerable errors, 99 | > vulnerabilities, and system crashes, which have probably caused a 100 | > billion dollars of pain and damage in the last forty years. 101 | > 102 | > — Tony Hoare 103 | 104 | Null references have been around for a long time but null safety has 105 | resurfaced in the last 10 years[^null]. The concept is simple, you 106 | have to indicate when a value can be missing. The alternative — any 107 | value can be missing without warning — sounds crazy but it's been a 108 | standard part of many programming languages for so long that we've 109 | developed Stockholm syndrome. 110 | 111 | Using a value as if it was there when it isn't leads to crashes or 112 | unexpected behaviour. With null safety, when a value is potentially 113 | missing, the compiler can make sure we do a null check before the 114 | value can be used. 115 | 116 | It's incredible to see how many errors disappear with this simple 117 | change. This is especially true across interfaces, the errors are 118 | usually harder to diagnose and resolve because of the extra distance 119 | between the two pieces of code. 120 | 121 | Once again, Typescript lets us illustrate both sides of the coin in 122 | the same program using functions. 123 | 124 | ```typescript 125 | function nullsafe_sum(a: number, b: number): number { 126 | return a + b; 127 | } 128 | 129 | function untyped_sum(a, b) { 130 | return a + b; 131 | } 132 | 133 | // Behaviour that is hard to predict, potential bugs 134 | untyped_sum(5, null); // → 5 135 | untyped_sum(5, undefined); // → NaN (Not a Number) 136 | 137 | // Caught by the compiler 138 | nullsafe_sum(5, null); // Compiler error 139 | nullsafe_sum(5, undefined); // Compiler error 140 | ``` 141 | 142 | ## Tagged unions 143 | 144 | > The best way to eliminate exception handling complexity is to define 145 | > your APIs so that there are no exceptions to handle: define errors 146 | > out of existence. 147 | > 148 | > — A Philosophy of Software Design 149 | 150 | Just like null safety, tagged unions aren't a new concept[^taggedold] 151 | but are becoming a standard part of many new programming 152 | languages[^taggednew]. 153 | 154 | Tagged unions allow us to create data structures that better fit our 155 | use cases. They do this by being able to represent a choice as 156 | data. Each choice has its own associated data. 157 | 158 | When used correctly, the ability to represent a choice makes 159 | inconsistent states impossible to represent. It's surprising how many 160 | errors ultimately boil down to ambiguous states. 161 | 162 | ### State with tagged unions 163 | 164 | The simplest choice we can represent is true or false. This is often 165 | useful when we want to represent the return value of a function that 166 | can fail. The successful case is mapped to `true` and we associate the 167 | normal result of the function with it. The error case is mapped to 168 | false and we associate an error message with it. 169 | 170 | This sounds more complex than it is, hopefully another Typescript 171 | example can clear things up. 172 | 173 | ```typescript 174 | interface Result { 175 | tag: 'result'; 176 | value: number; 177 | } 178 | 179 | interface Error { 180 | tag: 'error'; 181 | message: string; 182 | } 183 | 184 | function divide(a: number, b: number): Result | Error { 185 | if (b === 0) { 186 | return { 187 | tag: 'error', 188 | message: "Can't divide by 0", 189 | }; 190 | } 191 | 192 | return { 193 | tag: 'result', 194 | value: a / b, 195 | }; 196 | } 197 | ``` 198 | 199 | This might look a lot like exceptions but there are two key 200 | differences. 201 | 202 | 1. Exceptions modify a program's control flow which makes it hard to 203 | transfer across to another process or program. 204 | 2. Tagged unions combine with other data types, we can have a list of 205 | results or errors. 206 | 207 | The example above is the simplest case where there are only two states 208 | — tagged unions work for more than just two states. 209 | 210 | Let's say we have a device that start out as offline. It then connects 211 | to our server and downloads docker images as part of an initializing 212 | phase. Once those images are booted up and sending us sensor data, the 213 | device is considered online. We can model that with the diagram below 214 | 215 | If we had to represent this without tagged unions, we might combine 216 | all the different states into one bigger data structure and have 217 | `null` for the parts that don't make sense in the current state. 218 | 219 | ```typescript 220 | interface Device { 221 | status: 'offline' | 'initializing' | 'online'; 222 | ip: string | null; 223 | progress: number | null; 224 | sensorData: SensorData | null; 225 | } 226 | ``` 227 | 228 | With this approach, knowing which fields have meaningful values based 229 | on the current `status` is implicit information. When programs get big 230 | and there are many interfaces, this quickly becomes extremely 231 | complex. 232 | 233 | Another shortcoming of this approach is that it's possible to have 234 | states that don't make any sense, what does it mean when an `offline` 235 | device that has an `ip`? 236 | 237 | Tagged unions solve both of these problems as can be seen bellow 238 | 239 | ```typescript 240 | interface OfflineDevice { 241 | status: 'offline'; 242 | } 243 | 244 | interface InitializingDevice { 245 | status: 'initializing'; 246 | ip: string; 247 | progress: number; 248 | } 249 | 250 | interface OnlineDevice { 251 | status: 'online'; 252 | ip: string; 253 | sensorData: SensorData; 254 | } 255 | 256 | type Device = OfflineDevice | InitializingDevice | OnlineDevice; 257 | ``` 258 | 259 | Another approach without tagged unions would be to use the same data 260 | structures as nullable. 261 | 262 | ```typescript 263 | interface Device { 264 | offline: OfflineDevice | null; 265 | initializing: InitializingDevice | null; 266 | online: OnlineDevice | null; 267 | } 268 | ``` 269 | 270 | This approach still leaves us with the possibility of having a 271 | contradicting state that doesn't make sense. What do you do with a 272 | device if every field is null or if all three have values? 273 | 274 | ### Lists with tagged unions 275 | 276 | Tagged unions are also useful to represent a list of different 277 | types. Many programming languages use inheritance to model such a list 278 | but, just like exceptions, inheritance acts on a program's control 279 | flow. Whenever we are interacting with the control flow, crossing the 280 | process boundary and combining with other typing concepts becomes 281 | difficult. 282 | 283 | Let's say a connected devices can be robots, sensors or servers — each 284 | with their own associated information. 285 | 286 | With tagged unions, we can have a list that combines multiple types. 287 | 288 | ```typescript 289 | const devices = (Robot | Sensor | Server)[]; 290 | ``` 291 | 292 | Without tagged unions, we could fall back to having multiple lists — 293 | one per type. The downside of this approach is that the sorting 294 | information is lost as one list is decoupled into three. 295 | 296 | ```typescript 297 | const robots = Robot[]; 298 | const sensors = Sensor[]; 299 | const servers = Server[]; 300 | ``` 301 | 302 | ## Conclusion 303 | 304 | A modern type system, offering **static types**, **null safety** and 305 | **tagged unions** is a powerful tool that enables the compiler to 306 | catch many errors with little effort. Interfaces are one of the most 307 | important places to apply typing as that can serve as a contract 308 | between modules that is enforced by the compiler. 309 | 310 | When we don't have static type checking, we rely on informal 311 | agreements: 312 | 313 | - comments, 314 | - documentation, 315 | - meeting notes, 316 | - best practices, 317 | - hopeful optimism, 318 | - … 319 | 320 | What all these have in common is that they rely on developers to 321 | ensure everything fits and when we don't respect the agreements, we 322 | get run-time crashes or unexpected behaviour. 323 | 324 | These agreements are usually respected at first but start to break as 325 | the program grows and evolves. This is especially painful for 326 | interfaces where it's harder to make sure that both sides of the 327 | interface evolve in the same way. 328 | 329 | [^pyramid]: Test Automation Pyramid — [Succeeding with Agile](https://www.mountaingoatsoftware.com/books/succeeding-with-agile-software-development-using-scrum) (Mike Cohn) 330 | 331 | [^tagged1]: Tagged unions have other names: 332 | **variant**, **choice type**, **discriminated union**, **sum type**. 333 | 334 | [^taggedold]: Early languages implementing tagged unions 335 | **ALGOL 68**, **ML**, **Pascal**, **Ada**, **Modula-2**, 336 | **Haskell**. 337 | 338 | [^taggednew]: New languages implementing tagged unions 339 | **Rust**, **Swift**, **Scala**, **Typescript**, **Kotlin**, 340 | **Python 3.9+**. 341 | 342 | [^null]: A few languages with null safety 343 | **Typescript**, **Rust**, **Kotlin**, **Swift**. 344 | -------------------------------------------------------------------------------- /modern-cpp/README.md: -------------------------------------------------------------------------------- 1 | # Modern C++ 2 | 3 | Modern C++ embraces the features from C++11 which lay the foundation 4 | for a new generation of C++. It introduced move semantics and embraced 5 | RAII in the standard library (`unique_ptr`, `shared_ptr`, 6 | `lock_guard`). 7 | 8 | Embracing **R**esource **A**cquisition **I**s **I**nitialization 9 | (RAII) makes C++ the safest, most productive and fun C++ has ever 10 | been. Unfortunately, to leverage RAII, you need a good understanding 11 | of why it's needed and how it works which is what I hope to distill 12 | for you. 13 | 14 | I'll explain RAII by example while referencing the [C++ core 15 | guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#main). 16 | 17 | # What is RAII? 18 | 19 | The idea behind [RAII](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#Rr-raii) is to manage resources using variables and 20 | scope so that resources are automatically freed. A resource is 21 | anything that can be acquired and must later be released, not just 22 | memory. 23 | 24 | | Resource | | 25 | |------------------|-------------| 26 | | Memory | new/delete | 27 | | File descriptors | open/close | 28 | | Lock | lock/unlock | 29 | 30 | The main idea is to acquire a resource and store its handle in a 31 | `private` member in a class. We can then control its access and 32 | release the resource in the `destructor`. 33 | 34 | We'll create a safe wrapper around a UNIX file descriptor keeping 35 | in mind that the same concept can be applied to any other 36 | resource. Normally, the standard library has a robust 37 | implementation already in place so make sure you check there first 38 | to save yourself some work. 39 | 40 | # Naive implementation 41 | 42 | We'll start off with the following definition and assume that 43 | there's a reasonable implementation for each prototype. 44 | 45 | ```c++ 46 | class NaiveFile { 47 | public: 48 | NaiveFile(const std::string &path); 49 | ~NaiveFile(); 50 | 51 | /// Reads up to 1024 characters. If 52 | /// something goes wrong, it returns 53 | /// an empty string. 54 | std::string read_1024() const; 55 | 56 | private: 57 | int fd; 58 | }; 59 | ``` 60 | 61 | The following code behaves in ways that are unintended: 62 | 63 | ```c++ 64 | // Notice we pass by copy 65 | void accidental_copy(NaiveFile file); 66 | 67 | int main() { 68 | NaiveFile naive_file = NaiveFile(filename); 69 | 70 | // Copy 1, since we pass by copy 71 | accidental_copy(naive_file); 72 | 73 | // Copy 2, we used the = operator that makes a copy 74 | auto file2 = naive_file; 75 | } 76 | ``` 77 | 78 | Running the above code with trace printing results in the 79 | following: 80 | 81 | ``` 82 | (fd 3) open main.cpp 83 | (fd 3) ~NaiveFile closing 84 | (fd 3) ~NaiveFile closing 85 | (fd 3) Couldn't close file: 'Bad file descriptor' 86 | (fd 3) ~NaiveFile closing 87 | (fd 3) Couldn't close file: 'Bad file descriptor' 88 | ``` 89 | 90 | We can see that the destructor is called 3 times and we free the 91 | resource each time. This happens once for the main instance and 92 | once for each copy as the comments predicted. We shouldn't free 93 | a resource more than once, so we get a 'Bad file descriptor' error 94 | when we try to close the file the second or third time. 95 | 96 | # Understanding the problem 97 | 98 | Before fixing the problem, we should understand what's going 99 | on. The first thing to know is that, under certain conditions, C++ 100 | compilers implement a default version of the following: 101 | 102 | | Can be defaulted | | 103 | |---------------------|-------------------------------------| 104 | | default constructor | `NaiveFile()` | 105 | | destructor | `~NaiveFile()` | 106 | | copy constructor | `NaiveFile(const NaiveFile &)` | 107 | | copy assignment | `void operator=(const NaiveFile &)` | 108 | | move constructor | `NaiveFile(NaiveFile &&)` | 109 | | move assignment | `void operator=(NaiveFile &&)` | 110 | 111 | These are known as special operations and together they control 112 | an object's life cycle (create, copy, move, destroy) ([guidelines 113 | C.ctor](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#S-ctor)). For RAII, we don't have to worry about the default 114 | constructor since it's either a sane default or we implemented a 115 | constructor ourselves. 116 | 117 | The defaults are pretty simple — if you have no implementation, a 118 | default is put in place that calls the same operation for each 119 | member. If a member has no implementation, the default isn't 120 | created. 121 | 122 | In our case, we implemented a constructor so there won't be a 123 | default constructor. The only member is an `int`, so all the other 124 | defaults will be created. 125 | 126 | | | Compiler destructor | Compiler copy operations | Compiler move operations | 127 | |----------------------------|---------------------|--------------------------|--------------------------| 128 | | No user special operation | defaulted | defaulted | defaulted | 129 | | User destructor exists | — | **defaulted** | | 130 | | User copy operation exists | defaulted | **defaulted** | deleted | 131 | | User move operation exists | defaulted | | | 132 | 133 | In the table, we highlighted that the compiler implemented a default 134 | copy constructor (triggered by copy 1) and a default copy 135 | assignment (triggered by copy 2). This is what caused the unintended 136 | behaviour. 137 | 138 | The default that is put in place treats the `int` as copyable. That's 139 | a problem — file descriptors aren't simple numbers that can be 140 | copied since they're a handle to a resource the kernel holds for us. 141 | 142 | # The fix — rule of 3 143 | 144 | The fix is easy: make copies impossible by deleting the operations 145 | that we don't want to default. With this change, the destructor will 146 | only be called once and the resource will only be freed once. 147 | 148 | ```c++ 149 | class SafeFile { 150 | public: 151 | SafeFile(const std::string &path); 152 | SafeFile(const SafeFile &) = delete; 153 | ~SafeFile(); 154 | void operator=(const SafeFile &) = delete; 155 | 156 | /// Reads up to 1024 characters. If 157 | /// something goes wrong, it returns 158 | /// an empty string. 159 | std::string read_1024() const; 160 | 161 | private: 162 | int fd; 163 | }; 164 | ``` 165 | 166 | Now, our file wrapper can't be misused since trying to call a 167 | deleted operation will cause a compilation error. To highlight that 168 | our implementation is now safe, it's now called `SafeFile`. 169 | 170 | As a rule if you implement a destructor or a copy operation, 171 | you're likely doing resource management and have to implement all 172 | 3. If you don't, you're likely to end up with an error like we did 173 | above. This is known as the rule of 3. 174 | 175 | | Rule of 3 | 176 | |------------------| 177 | | Destructor | 178 | | Copy constructor | 179 | | Copy assignment | 180 | 181 | We see in the table above that if we `delete` the copy operations, the 182 | move operations also get deleted. This makes our wrapper less 183 | useful because the resource is stuck in the context where it was 184 | created. Implementing a move lets us safely transfer it to a new 185 | context. 186 | 187 | Without a move operation, we are also potentially giving up 188 | performance. If we have a value that could be moved (`rvalue`) 189 | but no move operation is available, the value will be copied 190 | instead — this can be slow. 191 | 192 | Because of the two use cases above, you usually want to expand the 193 | rule of 3 to include the move constructor and move 194 | assignment as well. This is known as [the rule of five](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#Rc-five). So the 195 | rule of 3 makes us safe and the rule of 5 gives performance and 196 | convenience. 197 | 198 | | Rule of 5 | 199 | |------------------| 200 | | Destructor | 201 | | Copy constructor | 202 | | Copy assignment | 203 | | Move constructor | 204 | | Move assignment | 205 | 206 | I wondered why the rule of 3 and the rule of 5, aren't 207 | enforced by the compiler. It couldn't be an error because that 208 | would break too many existing programs but it could at least be a 209 | warning. It turns out the C++ committee did exactly this for C++11 210 | by updating the wording ([PDF, section 12.8/7](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2012/n3337.pdf)) so that such a 211 | warning could be generated. 212 | 213 | In GCC and Clang, the warning is hidden behind the 214 | `-Wdeprecated-copy-dtor` flag which is disabled by default and 215 | almost never enabled (not part of `-Wall`, `-Wextra` or 216 | `-Wpedantic`). This is why I had never seen it and, hopefully, I'll 217 | remember to turn it on for my future projects. 218 | 219 | # Nice wrapper — Rule of 5 220 | 221 | To round up our example, let's upgrade it from using the rule of 3 222 | to the rule of 5 by implementing the move operations. 223 | 224 | ```c++ 225 | class FileWrapper { 226 | public: 227 | FileWrapper(const std::string &path); 228 | FileWrapper(const FileWrapper &) = delete; 229 | FileWrapper(FileWrapper &&); 230 | ~FileWrapper(); 231 | void operator=(const FileWrapper &) = delete; 232 | void operator=(FileWrapper &&); 233 | 234 | /// Reads up to 1024 characters. If 235 | /// something goes wrong, it returns 236 | /// an empty string. 237 | std::string read_1024() const; 238 | 239 | private: 240 | int fd { -1 }; 241 | }; 242 | ``` 243 | 244 | Once again, it gets a name update, since it's now fleshed out. 245 | 246 | There's an important implementation detail with the move 247 | operations, the destructor of the object that was moved will 248 | still be called but it shouldn't free the resource. This means 249 | that we have to put the original object in a state where the resource 250 | won't be released by the destructor. 251 | 252 | In our example, we do that by setting the original `fd` to `-1` as we 253 | can see bellow. This works because an `fd` smaller than `0` indicates 254 | that the open operation wasn't successful and shouldn't be closed. 255 | 256 | ```c++ 257 | FileWrapper::~FileWrapper() { 258 | if (fd >= 0) close(fd); 259 | } 260 | 261 | FileWrapper::FileWrapper(FileWrapper &&wrapper) 262 | : fd(wrapper.fd) { 263 | // 'wrapper' won't close a fd with -1 in its 264 | // destructor. 265 | wrapper.fd = -1; 266 | } 267 | 268 | // We do something similar for operator= 269 | ``` 270 | 271 | To sum up the latest changes: 272 | 273 | | Method | Description | 274 | |---------------|--------------------------------------------------------| 275 | | `constructor` | opens a file, `fd` >= 0 when successful | 276 | | `move` | takes `fd` from the original and sets original to `-1` | 277 | | `destructor` | frees the resource if opened successfully (>= 0) | 278 | 279 | # Implications — rule of zero 280 | 281 | We should now have a good enough understanding of RAII to put it to 282 | use. Once we have a RAII wrapper around a resource, it becomes very 283 | hard to misuse it which allows us to create sealed abstractions 284 | around resources. Most of the time, the standard library has an 285 | existing wrapper so we simply have to use those. 286 | 287 | One important detail that briefly mentioned is that a `struct` or 288 | `class` that uses our `FileWrapper` won't have default copy operations 289 | because `FileWrapper` doesn't have copy operations. This means that, 290 | any class that properly implements RAII can safely be put in any 291 | other class and it will just work. 292 | 293 | We've successfully isolated all the complexity of resource 294 | management into our wrapper class making modern C++ feel like 295 | a garbage collected language only it's faster and it works for 296 | any resource, not just memory. This happens because the defaults 297 | just do the right thing if you follow the rule of 5. 298 | 299 | In a codebase, there are usually have a few resource types that 300 | are used all over the code. For each resource, we use an 301 | RAII wrapper from the standard library or, in a rare case, we 302 | implement one ourselves. This is the only time we really need to think 303 | about resource management and in the rest of the code, which is 304 | the majority, we don't worry about resource management. C++'s 305 | defaults of the special operations do the heavy lifting for us. This 306 | is known as the [rule of zero](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#Rc-zero). 307 | 308 | Having safe, fast and effortless resource management makes modern C++ 309 | extremely pleasant. 310 | 311 | # Limitations 312 | 313 | RAII makes resource management safe, even when exceptions cause an 314 | unpredicted control flow to happen, but it has one main limitation: 315 | RAII doesn't prevent use after free. If you move a resource, you can 316 | still use it afterwards. 317 | 318 | # Further reading 319 | 320 | - [foonathan, When to Write Which Special Member](https://www.foonathan.net/2019/02/special-member-functions/), 321 | - [MC++, C++ Core Guidelines: The Rule of Zero, Five, or Six](https://www.modernescpp.com/index.php/c-core-guidelines-constructors-assignments-and-desctructors/), 322 | - [Bjarne Stroustrup, Writting Good C++14](https://isocpp.org/blog/2015/09/stroustrup-cppcon15-keynote), 323 | - [Herb Sutter, Writting Good C++14… By Default](https://isocpp.org/blog/2015/09/sutter-cppcon15-day2plenary). 324 | -------------------------------------------------------------------------------- /cpp-build-process/README.md: -------------------------------------------------------------------------------- 1 | # The C++ Build Process Explained 2 | 3 | > The last good thing written in C was Franz Schubert's Symphony Number 9. 4 | > 5 | > -- Erwin Dieterich 6 | 7 | > Fifty years of programming language research, and we end up with C++? 8 | > 9 | > -- Richard A. O’Keefe 10 | 11 | > There are only two kinds of programming languages: those people always bitch 12 | > about and those nobody uses. 13 | > 14 | > -- Bjarne Stroustrup 15 | 16 | The C++ build process is built on top of the C build process which was hacked 17 | together in 1972 on a PDP-7 with at most 144 KB of RAM. Honestly, I'm surprised 18 | it aged so well. 19 | 20 | ![](pdp7.jpg) 21 | 22 | The exact linking process in C and C++ isn't standard, it's up to each compiler 23 | to make things work. Most compilers behave in the same way with small 24 | differences but we'll be focusing on the GNU tools to keep things simple. 25 | 26 | ## An Overview 27 | 28 | Computers in 1972 weren't very powerful and so the process had to be broken down 29 | into smaller steps. This also allows you to rebuild only the parts that change. 30 | 31 | - we have many C/C++ source files, 32 | - for each source file, we run an independent compiler, 33 | - we run the pre-processor over the current file, 34 | - this pulls in every header and its parents into the file, 35 | - the file gets compiled and transformed into an object, 36 | - we group every object we generated into an executable or library. 37 | 38 | ![](tree.svg) 39 | 40 | Entering a command to compile every source file separately is annoying and so we 41 | have a build tool that knows how to compile each of our source files. Not only 42 | that but it can also regenerate the objects for the source files that have 43 | changed saving us a lot of time when recompiling with minor changes. 44 | 45 | ## Trivial C Program 46 | 47 | Let's use a trivial C program to illustrate the build process. We can build it 48 | with the following commands: 49 | 50 | ``` 51 | cc -Wall -O0 -std=c99 -g -c -o add.o add.c 52 | cc -Wall -O0 -std=c99 -g -c -o simple.o simple.c 53 | ``` 54 | 55 | ### add.h 56 | 57 | ```c 58 | #ifndef ADD_H_INCLUDED 59 | #define ADD_H_INCLUDED 60 | 61 | int add(int a, int b); 62 | int sub(int a, int b); 63 | 64 | #endif // ADD_H_INCLUDED 65 | ``` 66 | 67 | ### add.c 68 | 69 | ```c 70 | #include "add.h" 71 | 72 | int add(int a, int b) 73 | { 74 | return a + b; 75 | } 76 | 77 | int sub(int a, int b) 78 | { 79 | return a - b; 80 | } 81 | ``` 82 | 83 | ### simple.c 84 | 85 | ```c 86 | #include 87 | 88 | #include "add.h" 89 | 90 | int main(int argv, char **argc) 91 | { 92 | printf("%i\n", sub(add(5, 6), 6)); 93 | 94 | return 0; 95 | } 96 | ``` 97 | 98 | ## Why Headers? 99 | 100 | > TLDR we need to know the size of structures and function arguments 101 | 102 | When our main function wants to call the `add` function, it needs to know what 103 | it returns and what it takes in as an argument before it can call the function. 104 | A typical call to `add` from our main would look like: 105 | 106 | - our `main` function, 107 | - push space for the return value onto a stack (an int), 108 | - push the parameters onto the stack (`a` and `b`), 109 | - push the return address (the next part of our main function), 110 | - jump to the add function, 111 | 112 | - our `add` function 113 | - store the execution state, 114 | - add `a` and `b` placing the result in the return value space, 115 | - restore the execution state, 116 | - jump to the return address, 117 | 118 | - back in our `main` function, 119 | - pop the parameters (`a` and `b`), 120 | - use the return value that is now on the stack. 121 | 122 | The way to call a function and the method for building the stack make up most of 123 | an ABI (Application Binary Interface) definition. Every compiler is free to have 124 | its own ABI. The same compiler usually has a different ABI for the different 125 | processors it supports. This makes things complex but efficient. 126 | 127 | Since we are making space on the stack for our return value and parameters, we 128 | have to know their size. If any of those are a structure, we have to know what 129 | it's made of so that we can know its size. We let the compiler know all this 130 | with function declarations and structure definitions. 131 | 132 | You could put a function's declaration in every source file that needs it but 133 | that's a terrible idea since the declaration has to be the same everywhere if you 134 | want anything to work. Instead of having the same declaration everywhere, we put 135 | the declaration in a common file and include it where it is necessary. This 136 | common file is what we know as a header. 137 | 138 | > Sometimes, we only use a structure through pointers or references which means 139 | > we don't really have to know that structure's exact size. This leads to a 140 | > clever trick known as pointer implementation (PIMPL) which is really useful 141 | > for speeding up compilation and hiding implementation details. For 142 | > [more information](https://marcmutz.wordpress.com/translated-articles/pimp-my-pimpl/) 143 | > on PIMPL. 144 | 145 | ## Pre-processor 146 | 147 | In those header files and source files, you've hopefully noticed lines that 148 | start with `#`. Whenever you see a directive that starts with `#`, we are 149 | dealing with the C pre-processor. The pre-processor does the following: 150 | 151 | - include files (`#include`), 152 | - macro expansions (`#define RADTODEG(x) ((x) * 57.29578)`), 153 | - conditional compilation (`#if`, `#ifdef`, etc.), 154 | - line control (`__FILE__`, `__LINE__`). 155 | 156 | Basically, the compiler has a state which can be modified by these directives. 157 | Since every `*.c` file is treated independently, every `*.c` file that is being 158 | compiled has its own state. The headers that are included modify that file's 159 | state. The pre-processor works at a string level and replaces the tags in the 160 | source file by the result of basic functions based on the state of the compiler. 161 | 162 | The `#include` pre-processor is probably the most important. Luckily, it is 163 | really simple: it finds the file and replaces the `#include` line with the 164 | contents of that file. 165 | 166 | Where does it find the files? 167 | 168 | - `#include ` looks for `sum.h` in a list of include directories, 169 | - `#include "sum.h"` does the same but looks in the current folder first. 170 | 171 | C and C++ don't actually define a mechanism for providing the list of include 172 | directories, that is up to the compiler. This causes many problems with cross 173 | platform development which some build tools can solve. 174 | 175 | ## Include Guards 176 | 177 | When you include a header, there is usually a `#ifndef` and `#define` statement 178 | at the top of the file and a corresponding `#endif` at the bottom. We call this 179 | an include guard. It is responsible for setting a variable the first time it is 180 | run so that including the same file a second time doesn't redefine things that already 181 | exist and cause the compiler to panic. 182 | 183 | ``` 184 | #ifndef FILENAME_INCLUDED 185 | #define FILENAME_INCLUDED 186 | 187 | // code 188 | 189 | #endif 190 | ``` 191 | 192 | This is a very useful trick but it's also one of the more fundamental problems of 193 | C: 194 | 195 | - you include a file a first time, 196 | - it modifies the compiler state, 197 | - you include the same file a second time, 198 | - based on the compiler state, it pretends to be empty. 199 | 200 | That is completely crazy - the file you include can change based on the state of 201 | the compiler. Not only that but the included files themselves can modify the 202 | state of the compiler (windows.h is infamous for doing this). 203 | 204 | Because of this, compiling becomes slow and complex. Suppose that we want to 205 | compile two files which both include `` and that `` itself 206 | includes about 50 other files. We are not able to cache `` without 207 | proving that the compiler state is the same when we include it! 208 | 209 | So what started out as a simple, easy to implement solution turns out to scale 210 | really poorly. This wasn't an issue in 1972 when the computers limited the 211 | complexity but almost 50 years later, it's a big problem. The C++ standards 212 | committee has been trying to introduce a module system to fix this but it's a 213 | difficult task to change such a fundamental system in an established language. 214 | 215 | ## Header Trees 216 | 217 | When you include a header, this header can include others and it can quickly get 218 | messy. If we compile a file with the `-H` flag, we can visualize the various header graphs: 219 | 220 | ``` 221 | gcc -H -O0 -std=c99 -g -c -o simple.o simple.c 222 | ``` 223 | 224 | - /usr/include/stdio.h 225 | - /usr/include/bits/libc-header-start.h 226 | - /usr/include/features.h 227 | - /usr/include/sys/cdefs.h 228 | - /usr/include/bits/wordsize.h 229 | - /usr/include/bits/long-double.h 230 | - /usr/include/gnu/stubs.h 231 | - /usr/include/gnu/stubs-64.h 232 | - /usr/lib/gcc/x86_64-pc-linux-gnu/8 2 1/include/stddef.h 233 | - /usr/lib/gcc/x86_64-pc-linux-gnu/8 2 1/include/stdarg.h 234 | - /usr/include/bits/types.h 235 | - /usr/include/bits/wordsize.h 236 | - /usr/include/bits/typesizes.h 237 | - /usr/include/bits/types/__fpos_t.h 238 | - /usr/include/bits/types/__mbstate_t.h 239 | - /usr/include/bits/types/__fpos64_t.h 240 | - /usr/include/bits/types/__FILE.h 241 | - /usr/include/bits/types/FILE.h 242 | - /usr/include/bits/types/struct_FILE.h 243 | - /usr/include/bits/stdio_lim.h 244 | - /usr/include/bits/sys_errlist.h 245 | - add.h 246 | 247 | We can see that we go from 2 includes to 22. This can quickly get out of hand 248 | for big projects. 249 | 250 | The difficulty is that sometimes, you are including many headers indirectly 251 | through another header. For example, if you include `ros.h`, it includes `boost` 252 | which quickly balloons the number of headers to parse. It can quickly get out of 253 | hand and to compile a single source file, you sometimes have to visit over 2000 254 | header files. This makes compilation excruciatingly slow and this is where the 255 | [`PIMPL`](https://en.cppreference.com/w/cpp/language/pimpl) idiom can really help. 256 | 257 | ## An Object File 258 | 259 | After all this work, the compiler can do the actual compiling of our source file 260 | with all the headers pasted into it. Once the compilation is finished, we have 261 | an object file. 262 | 263 | An object file is an organized way to store assembly functions that aren't yet 264 | linked together. We can examine the object file we generate with the `simple.c` 265 | source file with the following command: 266 | 267 | ``` 268 | objdump -dr simple.o 269 | ``` 270 | 271 | ### simple.o 272 | 273 | ``` 274 | simple.o: file format elf64-x86-64 275 | 276 | 277 | Disassembly of section .text: 278 | 279 | 0000000000000000
: 280 | 0: 55 push %rbp 281 | 1: 48 89 e5 mov %rsp,%rbp 282 | 4: 48 83 ec 10 sub $0x10,%rsp 283 | 8: 89 7d fc mov %edi,-0x4(%rbp) 284 | b: 48 89 75 f0 mov %rsi,-0x10(%rbp) 285 | f: be 06 00 00 00 mov $0x6,%esi 286 | 14: bf 05 00 00 00 mov $0x5,%edi 287 | 19: e8 00 00 00 00 callq 1e 288 | 1a: R_X86_64_PLT32 add-0x4 289 | 1e: be 06 00 00 00 mov $0x6,%esi 290 | 23: 89 c7 mov %eax,%edi 291 | 25: e8 00 00 00 00 callq 2a 292 | 26: R_X86_64_PLT32 sub-0x4 293 | 2a: 89 c6 mov %eax,%esi 294 | 2c: 48 8d 3d 00 00 00 00 lea 0x0(%rip),%rdi # 33 295 | 2f: R_X86_64_PC32 .rodata-0x4 296 | 33: b8 00 00 00 00 mov $0x0,%eax 297 | 38: e8 00 00 00 00 callq 3d 298 | 39: R_X86_64_PLT32 printf-0x4 299 | 3d: b8 00 00 00 00 mov $0x0,%eax 300 | 42: c9 leaveq 301 | 43: c3 retq 302 | ``` 303 | 304 | This is what the most complex file in our tiny example looks like. The important 305 | thing to notice is that the assembly code is grouped into the `
` function 306 | and that there are calls to functions that don't yet exist like the call to 307 | `sub` copied below: 308 | 309 | ``` 310 | 25: e8 00 00 00 00 callq 2a 311 | 26: R_X86_64_PLT32 sub-0x4 312 | ``` 313 | 314 | ## Symbol Tables 315 | 316 | At the top of an object file there is a list of all the functions that are 317 | defined in that object and all the functions that are used but not defined. This 318 | is known as a symbol table. 319 | 320 | To get our symbol tables, we use the following commands: 321 | 322 | ``` 323 | nm add.o > add.sym 324 | nm simple.o > simple.sym 325 | ``` 326 | 327 | ### add.o 328 | 329 | | Position | Type | Name | 330 | | - | - | - | 331 | | 0 | Text | add | 332 | | 14 | Text | sub | 333 | 334 | ### simple.o 335 | 336 | | Position | Type | Name | 337 | | - | - | - | 338 | | | Undefined | add | 339 | | | Undefined | _GLOBAL_OFFSET_TABLE_ | 340 | | 0 | Text | main | 341 | | | Undefined | printf | 342 | | | Undefined | sub | 343 | 344 | ## A Linker's Job 345 | 346 | To get an executable, we put many object files together and link the undefined 347 | function calls to their implementations found in other object files. There are 348 | two ways to do this: 349 | 350 | 1. Linking the functions directly together by jumping directly to the function. 351 | 2. Having a table that contains our functions and look up where to jump before 352 | jumping to the desired function. 353 | 354 | The first option describes static linking. This is more efficient, less flexible 355 | and rarely used. 356 | 357 | The second option describes dynamic linking. It is a little bit slower but much 358 | more flexible and is the standard way to ship a library. 359 | 360 | ## Differences in C++ 361 | 362 | So far, we've been talking about C but luckily, C++ was designed to be 363 | compatible with the C build process. In fact, the first C++ compiler was known 364 | as "C with Classes" and it was a pre-compiler that transformed a C++ into C. 365 | 366 | Modern C++, introduces two big differences: 367 | 368 | - templates, 369 | - mangling. 370 | 371 | Templates are complicated enough to have their own tutorial but mangling is 372 | pretty simple and more important. The following source file gives us an idea of 373 | mangling: 374 | 375 | ```c++ 376 | extern "C" int add_c(int a, int b) 377 | { 378 | return a + b; 379 | } 380 | 381 | int add(int a, int b) 382 | { 383 | return a + b; 384 | } 385 | 386 | int add(const int *a, const int &b) 387 | { 388 | return *a + b; 389 | } 390 | 391 | float add(float a, float b) 392 | { 393 | return a + b; 394 | } 395 | 396 | namespace manu 397 | { 398 | int add(int a, int b) 399 | { 400 | return a + b; 401 | } 402 | } 403 | ``` 404 | 405 | If we look at it with `nm`: 406 | 407 | ``` 408 | nm mangling.o 409 | c++filt _Z3addff 410 | ... 411 | ``` 412 | 413 | | Position | Type | Name | Signature | 414 | | - | - | - | - | 415 | | 0 | Text | add_c | int add_c(int, int) | 416 | | 44 | Text | _Z3addff | float add(float, float) | 417 | | 14 | Text | _Z3addii | int add(int, int) | 418 | | 28 | Text | \_Z3addPKiRS\_ | int add(const int *, const int &)| 419 | | 5e | Text | _ZN4manu3addEii | int manu::add(int, int) | 420 | 421 | Basically, in C, functions are simply identified by their names. This prevents 422 | us from having namespaces and having a function with the same name but different 423 | arguments. C++ gets around this by using mangling. `extern "C"` turns off 424 | mangling so that C++ can be compatible with C. 425 | 426 | Unfortunately, many compilers do mangling differently and so are incompatible. 427 | Luckily, most compilers have recently standardized on the Itanium C++ ABI that 428 | you see above. 429 | 430 | - start with `_Z` since underscore capital letter is reserved in C, 431 | - an `N` after the `Z` indicates nested names, 432 | - put numbers that indicate the length of the next argument, 433 | - this gives us a list of strings, 434 | - the last string is the function, class or struct name, 435 | - the previous ones are the namespaces or outer classes, 436 | - if our names were nested, we insert an `E`, 437 | - we indicated the type and modifiers of our arguments. 438 | 439 | [mangling details](https://github.com/gchatelet/gcc_cpp_mangling_documentation) 440 | 441 | > Even with mangling, we don't have enough size information for function calls 442 | > to forgo headers. We are missing the size of the return value and the size of 443 | > structures. 444 | 445 | ## A Basis for Objects 446 | 447 | To build an object system, we need static dispatch (when two functions have the 448 | same name, calling the one with matching arguments). This is crucial so that we 449 | can differentiate between `a.method` and `b.method`. If we didn't have mangling, 450 | we couldn't use the same method name in two different classes. 451 | 452 | ```c++ 453 | struct Num 454 | { 455 | int add(int a, int b) 456 | { 457 | return a + b; 458 | } 459 | }; 460 | 461 | int add(const Num *self, int a, int b) 462 | { 463 | return a + b; 464 | } 465 | 466 | int main(int, char **) 467 | { 468 | Num a; 469 | const int res1 = a.add(5, 6); 470 | const int res2 = add(&a, 5, 6); 471 | 472 | return res1 + res2; 473 | } 474 | ``` 475 | 476 | | Position | Type | Name | Signature | 477 | | - | - | - | - | 478 | | 18 | Text | main | main | 479 | | 0 | Text | _Z3addPK3Numii | add(const Num *, int, int) | 480 | | 0 | Weak | _ZN3Num3addEii | Num::add(int, int) | 481 | 482 | 483 | ## Putting Things Together with Build Systems 484 | 485 | We don't want to remember and execute the build commands by hand (at least I 486 | don't). That's why we have build tools: 487 | 488 | - Make, 489 | - meson, 490 | - Autotools, 491 | - bazel, 492 | - CMake, 493 | - VisualStudio, 494 | - bash scripts, 495 | - etc. 496 | 497 | A build tool usually: 498 | 499 | - has a list of source files, 500 | - knows how to build each source file, 501 | - keeps a dependency graph to rebuild only files that change, 502 | - keeps a list of directories containing header files, 503 | - keeps a list of external libraries to link to (static/dynamic), 504 | - manages compiler flags (optimization level, warning level), 505 | - knows which files to link into executables and libraries. 506 | 507 | Some build tools offer additional features: 508 | 509 | - program installation, 510 | - cross platform support, 511 | - cross compilation, 512 | - dependency installation. 513 | 514 | Ideally, at the end of all this, we can quickly and easily generate bug free 515 | programs. 516 | 517 | We've been trying to generate bug free programs since at least 1972, have come 518 | close a few times but have never truly managed. Thankfully, most of the time, 519 | the compiler, linker and build system aren't to blame. 520 | --------------------------------------------------------------------------------