├── .travis.yml ├── LICENSE ├── README.md ├── demo.cc ├── tests.cxx ├── unify.cpp └── unify.hpp /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | sudo: required 3 | 4 | compiler: 5 | - clang 6 | - gcc 7 | 8 | install: 9 | - wget --quiet -O - https://raw.githubusercontent.com/r-lyeh/depot/master/travis.pre.sh | bash -x 10 | 11 | script: 12 | - wget --quiet -O - https://raw.githubusercontent.com/r-lyeh/depot/master/travis.build.sh | bash -x 13 | - wget --quiet -O - https://raw.githubusercontent.com/r-lyeh/depot/master/travis.run.sh | bash -x 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 r-lyeh (https://github.com/r-lyeh) 2 | 3 | This software is provided 'as-is', without any express or implied 4 | warranty. In no event will the authors be held liable for any damages 5 | arising from the use of this software. 6 | 7 | Permission is granted to anyone to use this software for any purpose, 8 | including commercial applications, and to alter it and redistribute it 9 | freely, subject to the following restrictions: 10 | 11 | 1. The origin of this software must not be misrepresented; you must not 12 | claim that you wrote the original software. If you use this software 13 | in a product, an acknowledgment in the product documentation would be 14 | appreciated but is not required. 15 | 2. Altered source versions must be plainly marked as such, and must not be 16 | misrepresented as being the original software. 17 | 3. This notice may not be removed or altered from any source distribution. 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Unify :link: 2 | ===== 3 | 4 | **Unify** is a C++11 function to normalize resource identificators. 5 | 6 | Unify transforms any physical resource string to a unified string, called UID (Unified ID). Any absolute, relative, virtual and/or networks paths, URI, URL or ID will transform to an UID. Basically `unify(src)` does a string transformation from given string to a sorted `[a-zA-Z0-9-]+` pattern, which is guaranteed to remain inmutable (on a high degree) on code, even if physical source is altered externally. 7 | 8 | ## Features 9 | - [x] Unified folder/asset separators. 10 | - [x] Unified absolute, relative, virtual and remote paths. 11 | - [x] Unified uppercases, lowercases, whitespaces and hyphens. 12 | - [x] Unified extensions. 13 | - [x] Unified typos on double extensions and double punctuations. 14 | - [x] Unified typos on many diacritics. 15 | - [x] Unified AoS (OO) and SoA (ECS) disk layouts. 16 | - [x] Unified plurals as well (if using English words). 17 | - [x] Unified SOV, SVO, VSO, VOS, OVS, OSV subject/verb/object language topologies. 18 | - [x] Unified tagging (useful when globbing and deploying files and/or directories) 19 | - [x] Unified consistency. Reunification as a lossless process. 20 | - [x] Unify is header-only, cross-platform and standalone. 21 | - [x] Unify is ZLIB/LibPNG licensed. 22 | 23 | ## Public API 24 | ```c++ 25 | // Convert anything to an UID 26 | // Additionally, if tags != null pushback all parsed tags found 27 | string unify( const string &uri, vector *tags = 0 ); 28 | ``` 29 | 30 | ## Quick tutorial TL;DR 31 | ```c++ 32 | // unified folder/asset separators 33 | std::string test = unify("folder\\asset"); 34 | assert( test == unify("folder/asset") ); 35 | assert( test == unify("folder-asset") ); 36 | assert( test == unify("folder|asset") ); 37 | assert( test == unify("folder:asset") ); 38 | assert( test == unify("folder;asset") ); 39 | assert( test == unify("folder,asset") ); 40 | assert( test == unify("[folder]asset") ); 41 | assert( test == unify("asset(folder)") ); 42 | // -> asset_folder 43 | 44 | // unified absolute, relative, virtual and remote paths 45 | test = unify("~home/game/folder/asset.jpg"); 46 | assert( test == unify("~user/game1/folder/asset.jpg") ); 47 | assert( test == unify("~mark/game2/folder/asset.jpg") ); 48 | assert( test == unify("~john/game3/data/folder/asset.jpg") ); 49 | assert( test == unify("../folder/asset.jpg") ); 50 | assert( test == unify("C:\\data\\folder\\asset.jpg") ); 51 | assert( test == unify("C:/game/data/folder/asset.jpg") ); 52 | assert( test == unify("data.zip/data/folder/asset.jpg") ); 53 | assert( test == unify("virtual.rar/folder/asset.jpg") ); 54 | assert( test == unify("http://web.domain.com%20/folder/asset.jpg?blabla=123&abc=123#qwe") ); 55 | // -> asset_folder 56 | 57 | // unified uppercases, lowercases, whitespaces and hyphens 58 | assert( unify("mesh/main-character") == "character_main_mesh" ); 59 | assert( unify("mesh/main_character") == "character_main_mesh" ); 60 | assert( unify("mesh/Main Character") == "character_main_mesh" ); 61 | assert( unify("mesh / Main character ") == "character_main_mesh" ); 62 | // -> character_main_mesh 63 | 64 | // unified extensions 65 | assert( unify("music/theme.ogg") == "music_theme" ); 66 | assert( unify("music/theme.wav") == "music_theme" ); 67 | assert( unify("ui/logo.png") == "logo_ui" ); 68 | assert( unify("ui/logo.webp") == "logo_ui" ); 69 | // -> music_theme, -> logo_ui 70 | 71 | // unified typos on double extensions and double punctuations 72 | assert( unify("game/logo.bmp.png") == unify("game/logo.bmp") ); 73 | assert( unify("game/logo.png") == unify("game/logo..png") ); 74 | // -> game_logo 75 | 76 | // unified typos on many diacritics 77 | assert( unify("âñimátïón/wàlk") == unify("animation/walk") ); 78 | // -> animation_walk 79 | 80 | // unified AoS (OO) and SoA (ECS) disk layouts 81 | // unified plurals as well (if using English words) 82 | assert( unify("sounds/kid") == unify("kid/sound") ); 83 | assert( unify("sprites/kid") == unify("kid/sprite") ); 84 | assert( unify("sounds/car") == unify("car/sound") ); 85 | assert( unify("sprites/car") == unify("car/sprite") ); 86 | // -> car_sound, car_sprite, kid_sound, kid_sprite 87 | 88 | // unified SOV, SVO, VSO, VOS, OVS, OSV subject/verb/object language topologies 89 | test = unify("player-joins-scene.intro"); 90 | assert( test == unify("player-scene-join.intro") ); 91 | assert( test == unify("join-player-scene.intro") ); 92 | assert( test == unify("join-scene-player.intro") ); 93 | assert( test == unify("scene-join-player.intro") ); 94 | assert( test == unify("scene-player-join.intro") ); 95 | // -> join_player_scene 96 | 97 | // unified tagging (useful when globbing and deploying files and/or directories) 98 | test = unify("splash/logo"); 99 | assert( unify("/splash/#win32/logo") == test ); 100 | assert( unify("splash #mobile/logo #win32=always.png") == test ); 101 | // -> logo_splash 102 | 103 | // unified consistency. reunification as a lossless process 104 | assert( unify( unify("roses-are-red") ) == unify("roses-are-red") ); 105 | // -> are_red_rose 106 | ``` 107 | 108 | ## Showcase 109 | - John is a lead coder and must finish his game at time. 110 | - When dealing with asset identifiers and filesystems, John just wants to write the identifiers once and forget about them during development (even after deployment!). 111 | - To achieve this: 112 | - John names every asset in the game with an identifier. 113 | - John embeds Unify over a thin disk wrapper. 114 | - From this point: 115 | - Everybody in the production team can `#tag` files and folders from now as much as needed (or even `#tag=with-attributes`). These tags will be automatically removed from identifers once unified and will help John later, specially when globbing files and prior to deploy a release (more on this later). 116 | - Whitespaces, underscores, uppercase characters, diacritics, double extensions (and a large list of mistakes) will be fixed automatically. 117 | - Artists will be able to change (on a high degree) the asset names and/or folders on disk at any the time without telling the coders (just because artists are always that consistent). 118 | - Programmers will be able to transcode files to different formats without telling their technical directors: like going from .etc1 (desktop) to .pvr (iOS) (just because programmers are always that efficent). 119 | - Technical directors will be able to encrypt, repack and compress the filesystem into a completely different filesystem 2 hours before the release without telling the CEO (just because directors are always that security concerned). 120 | - And CEOs just can still take that lovely bath on the beach with no interruptions. 121 | - This is actually John's wrapper: 122 | 123 | ```c++ 124 | #include 125 | #include 126 | #include "unify.hpp" 127 | 128 | // Simple filesystem dictionary 129 | struct disk { 130 | std::map< std::string, std::string > map; 131 | void add( const std::string &uri ) { 132 | map[ unify(uri) ] = uri; 133 | }; 134 | std::string lookup( const std::string &uid_or_uri ) const { 135 | auto find = map.find( unify(uid_or_uri) ); 136 | return find == map.end() ? std::string() : find->second; 137 | }; 138 | }; 139 | 140 | int main() { 141 | disk d; 142 | d.add("./local/file.txt"); 143 | d.add("./data/game/icon.png"); 144 | d.add("./songs/main_theme.ogg"); 145 | d.add("./game.zip/json #win32/inventory.json"); 146 | d.add("./game.zip/logos #win32/big.webp"); 147 | d.add("./game.zip/logos #mobile/small.png"); 148 | /* pseudocode: 149 | for( all mounted filesystems ) { 150 | for( all monitored files in subdirs ) { 151 | d.add( file.full_path ); 152 | } 153 | } */ 154 | // these virtual<->physical uris are now equivalent 155 | assert( d.lookup("local/file") == "./local/file.txt" ); 156 | assert( d.lookup("local-file") == "./local/file.txt" ); 157 | assert( d.lookup("file-local") == "./local/file.txt" ); 158 | assert( d.lookup("../file/local") == "./local/file.txt" ); 159 | assert( d.lookup("game/icon") == "./data/game/icon.png" ); 160 | assert( d.lookup("game-icon") == "./data/game/icon.png" ); 161 | assert( d.lookup("icon/game") == "./data/game/icon.png" ); 162 | assert( d.lookup("icon-game") == "./data/game/icon.png" ); 163 | assert( d.lookup("songs/main-theme") == "./songs/main_theme.ogg" ); 164 | assert( d.lookup("inventory-json") == "./game.zip/json #win32/inventory.json" ); 165 | assert( d.lookup("logos-big") == "./game.zip/logos #win32/big.webp" ); 166 | assert( d.lookup("logos-big") == "./game.zip/logos #win32/big.webp" ); 167 | } 168 | ``` 169 | 170 | - As a bonus side, John packages and updates DLCs in a breeze: 171 | ``` 172 | tools/7za a -tzip common.zip *#all* 173 | tools/7za a -tzip win32.zip *#w32* 174 | tools/7za a -tzip iphones.zip *#iphone* 175 | tools/7za a -tzip xmas.zip *#xmas* 176 | tools/7za a -tzip halloween.zip *#halloween* 177 | ``` 178 | 179 | - John's DLC updating code is roughly similar to: 180 | ``` 181 | wget -N http://website.com/dlc/common.zip 182 | wget -N http://website.com/dlc/xmas.zip 183 | wget -N http://website.com/dlc/halloween.zip 184 | [ "$PROFILE" == "WIN32" ] && wget -N http://website.com/dlc/win32.zip 185 | [ "$PROFILE" == "IPHONE" ] && wget -N http://website.com/dlc/iphones.zip 186 | ``` 187 | 188 | - John compares asset optimizations from rev3 to rev4 189 | ``` 190 | dir *#3* && dir *#4* 191 | ``` 192 | 193 | - John checks how big are all textures on iphone5 build 194 | ``` 195 | dir *#iphone5*#textures* 196 | ``` 197 | 198 | ## Appendix: On transformation 199 | Transformation on reference implementation performs as follows: 200 | 201 | 1. Latinization (utf8) 202 | 2. Remove diacritics (utf8) 203 | 3. Unescape URL (utf8) 204 | 4. Remove url options (if any) 205 | 5. Lowercase contents 206 | 6. Strip tags in `#tag-123`, `#tag_456` or `#xbox360=yes` format. 207 | 7. Split path up to 2nd level. 208 | 8. Trim extensions and punctuators (if any). 209 | 9. Replace whitespaces with `-` hyphens. 210 | 9. Split string into tokens (with `-` hyphen separator). 211 | 10. Sort tokens array. 212 | 11. For every token, fix aos/soa plural (if any). 213 | 12. Join stems with `-` hyphen separator. 214 | 215 | ## Appendix: Full tagging proposal 216 | ``` 217 | name (#platforms)(#factories)(#contexts)(#alias)(#type)(#version) 218 | ``` 219 | 220 | A possible proposal for a family of optional tags for any UID, that would be: 221 | - applied to `#platforms` device targets, and 222 | - loaded into `#factories` program consumers, and 223 | - used in `#contexts` program scenarios, and 224 | - referred to as `#aliases` identification aliases, and 225 | - kind of `#type` file format, and 226 | - located in `#versions` revision numbers. 227 | 228 | Note: there are no reserved keywords in **Unify**. Name tags are **application/project/company dependant** and have to be defined in anticipation. 229 | 230 | ## Changelog 231 | - v2.0.0 (2016/02/01): Switch to underscore separator, as oposed to hyphen 232 | - v1.0.1 (2015/11/21): Disabled diacritics for now. Also, x18 times faster 233 | - v1.0.0 (2015/08/18): Initial commit 234 | -------------------------------------------------------------------------------- /demo.cc: -------------------------------------------------------------------------------- 1 | #include "unify.hpp" 2 | 3 | // UID, hypothetical sugar class { 4 | struct UID : public std::string { 5 | UID() : std::string() 6 | {} 7 | template 8 | UID( const T &t ) : std::string(unify(t)) 9 | {} 10 | UID( const UID &t ) : std::string(t) 11 | {} 12 | template 13 | bool operator<( const T &t ) const { 14 | const std::string &self = *this; 15 | return self < (std::string)UID(t); 16 | } 17 | template 18 | bool operator==( const T &t ) const { 19 | const std::string &self = *this; 20 | return self == (std::string)UID(t); 21 | } 22 | template 23 | bool operator!=( const T &t ) const { 24 | const std::string &self = *this; 25 | return self != (std::string)UID(t); 26 | } 27 | operator const std::string() const { 28 | return *this; 29 | } 30 | operator std::string() { 31 | return *this; 32 | } 33 | operator const char*() const { 34 | return this->c_str(); 35 | } 36 | }; 37 | // } 38 | 39 | #include 40 | 41 | int main() { 42 | // demo: transparent UID conversion 43 | // UID stands for Unified Identifier. 44 | // UID is inmutable on code always, even if changed on disk. 45 | // UID can be converted from paths, URLs, URIs, and IDs 46 | UID test = "game\\logo.bmp"; 47 | puts(test); 48 | test = "game_logo"; 49 | puts(test); 50 | test = "logo/game"; 51 | puts(test); 52 | test = "game_logo"; 53 | puts(test); 54 | test = "~home/game/folder/asset.jpg"; 55 | puts(test); 56 | test = "~user/game1/folder/asset.jpg"; 57 | puts(test); 58 | test = "~mark/game2/folder/asset.jpg"; 59 | puts(test); 60 | test = "~john/game3/data/folder/asset.jpg"; 61 | puts(test); 62 | test = "../folder/asset.jpg"; 63 | puts(test); 64 | test = "C:\\data\\folder\\asset.jpg"; 65 | puts(test); 66 | test = "C:/game/data/folder/asset.jpg"; 67 | puts(test); 68 | test = "data.zip/data/folder/asset.jpg"; 69 | puts(test); 70 | test = "virtual.rar/folder/asset.jpg"; 71 | puts(test); 72 | test = "http://web.domain.com%20/folder/asset.jpg?blabla=123&abc=123#qwe"; 73 | puts(test); 74 | } -------------------------------------------------------------------------------- /tests.cxx: -------------------------------------------------------------------------------- 1 | #define UNIFY_BUILD_TESTS 2 | #include "unify.cpp" 3 | -------------------------------------------------------------------------------- /unify.cpp: -------------------------------------------------------------------------------- 1 | #include "unify.hpp" 2 | -------------------------------------------------------------------------------- /unify.hpp: -------------------------------------------------------------------------------- 1 | /* Unify is a C++11 function to normalize resouce identificators. 2 | - rlyeh, zlib/libpng licensed. 3 | 4 | Unify transforms any physical resource string to a unified string, 5 | called UID (Unified ID). Any absolute, relative, virtual and/or networks 6 | paths, URI, URL or ID will transform to an UID. Basically `unify(src)` does 7 | a string transformation from given string to a sorted `[a-zA-Z0-9-]+` pattern, 8 | which is guarnateed to remain inmutable (on a high degree) on code, even if 9 | physical source is altered externally. 10 | 11 | ## Features 12 | - Unified folder/asset separators. 13 | - Unified absolute, relative, virtual and remote paths. 14 | - Unified uppercases, lowercases, whitespaces and hyphens. 15 | - Unified extensions. 16 | - Unified typos on double extensions and double punctuations. 17 | - Unified typos on many diacritics. 18 | - Unified AoS (OO) and SoA (ECS) disk layouts. 19 | - Unified plurals as well (if using English words). 20 | - Unified SOV, SVO, VSO, VOS, OVS, OSV subject/verb/object language topologies. 21 | - Unified tagging (useful when globbing and deploying files and/or directories) 22 | - Unified consistency. Reunification as a lossless process. 23 | - Unify is header-only, cross-platform and standalone. 24 | - Unify is ZLIB/LibPNG licensed. 25 | 26 | ## Public API 27 | // Convert anything to an UID. Additionally, if tags != null pushback all parsed tags found 28 | string unify( const string &uri, vector *tags = 0 ); 29 | 30 | ## Samples 31 | You better inspect the test suite at bottom of file. 32 | */ 33 | 34 | #pragma once 35 | 36 | // Public API 37 | 38 | #include 39 | #include 40 | 41 | #define UNIFY_VERSION "2.0.0" /* (2016/02/01) Switch to underscore separator, as oposed to hyphen 42 | #define UNIFY_VERSION "1.0.1" // (2015/11/21) Disabled diacritics for now. Also, x18 times faster 43 | #define UNIFY_VERSION "1.0.0" // (2015/08/18) Initial version */ 44 | 45 | // Public API following 46 | 47 | // Convert anything to an UID. Additionally, if tags != null pushback all parsed tags found 48 | std::string unify( const std::string &uri, std::vector *tags = 0 ); 49 | 50 | // Private API following, and tests at bottom of file 51 | 52 | #include 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | 61 | inline 62 | std::string unify( const std::string &uri, std::vector *tags ) { 63 | // taken from https://github.com/r-lyeh/wire { 64 | struct string { 65 | static void left_of( std::string &self, const std::string &substring ) { 66 | std::string::size_type pos = self.find( substring ); 67 | if( pos != std::string::npos ) self = self.substr(0, pos); 68 | } 69 | static std::deque< std::string > tokenize( const std::string &self, const std::string &delimiters ) { 70 | std::string map( 256, '\0' ); 71 | for( const unsigned char &ch : delimiters ) 72 | map[ ch ] = '\1'; 73 | std::deque< std::string > tokens(1); 74 | for( const unsigned char &ch : self ) { 75 | /**/ if( !map.at(ch) ) tokens.back().push_back( char(ch) ); 76 | else if( tokens.back().size() ) tokens.push_back( std::string() ); 77 | } 78 | while( tokens.size() && !tokens.back().size() ) tokens.pop_back(); 79 | return tokens; 80 | } 81 | static void replace( std::string &self, const std::string &target, const std::string &replacement ) { 82 | size_t found = 0; 83 | while( ( found = self.find( target, found ) ) != std::string::npos ) { 84 | self.replace( found, target.length(), replacement ); 85 | found += replacement.length(); 86 | } 87 | } 88 | static void replace( std::string &self, const std::vector &replacements ) { 89 | for( size_t end = self.size(), i = end -end; i < end; ++i ) { 90 | self[i] = replacements[ (unsigned char)self[i] ]; 91 | } 92 | } 93 | static void lowercase( std::string &s ) { 94 | std::transform( s.begin(), s.end(), s.begin(), (int(*)(int)) std::tolower ); 95 | } 96 | }; 97 | // } 98 | 99 | static std::vector ascii, diacritics, whitespaces; 100 | if( ascii.empty() ) { 101 | /* 102 | std::map table1 = { 103 | {'ä', 'a'}, {'Ä', 'A'}, 104 | {'â', 'a'}, {'Â', 'A'}, 105 | {'á', 'a'}, {'Á', 'A'}, 106 | {'à', 'a'}, {'À', 'A'}, 107 | {'ã', 'a'}, {'Ã', 'A'}, 108 | {'ë', 'e'}, {'Ë', 'E'}, 109 | {'ê', 'e'}, {'Ê', 'E'}, 110 | {'é', 'e'}, {'É', 'E'}, 111 | {'è', 'e'}, {'È', 'E'}, 112 | //{'~e', 'e'}, {'~E', 'E'}, 113 | {'ï', 'i'}, {'Ï', 'I'}, 114 | {'î', 'i'}, {'Î', 'I'}, 115 | {'í', 'i'}, {'Í', 'I'}, 116 | {'ì', 'i'}, {'Ì', 'I'}, 117 | //{'~i', 'i'}, {'~I', 'I'}, 118 | {'ö', 'o'}, {'Ö', 'O'}, 119 | {'ô', 'o'}, {'Ô', 'O'}, 120 | {'ó', 'o'}, {'Ó', 'O'}, 121 | {'ò', 'o'}, {'Ò', 'O'}, 122 | {'õ', 'o'}, {'Õ', 'O'}, 123 | {'ü', 'u'}, {'Ü', 'U'}, 124 | {'û', 'u'}, {'Û', 'U'}, 125 | {'ú', 'u'}, {'Ú', 'U'}, 126 | {'ù', 'u'}, {'Ù', 'U'}, 127 | //{'~u', 'u'}, {'~U', 'U'}, 128 | //{'¨n', 'n'}, {'¨N', 'N'}, 129 | //{'^n', 'n'}, {'^N', 'N'}, 130 | //{'´n', 'n'}, {'´N', 'N'}, 131 | //{'`n', 'n'}, {'`N', 'N'}, 132 | {'ñ', 'n'}, {'Ñ', 'N'}, 133 | };*/ 134 | 135 | std::map table2 { 136 | {' ', '_'}, 137 | {'-', '_'}, 138 | {',', '_'}, 139 | {'|', '_'}, 140 | {';', '_'}, 141 | {':', '_'}, 142 | {'(', '_'}, 143 | {')', '_'}, 144 | {'[', '_'}, 145 | {']', '_'}, 146 | }; 147 | 148 | ascii.resize( 256 ); 149 | for( auto i = 0; i < 256; ++i ) { 150 | ascii[ (unsigned char)i ] = (char)i; 151 | } 152 | 153 | /* 154 | diacritics = ascii; 155 | for( auto &entry : table1 ) { 156 | diacritics[ (unsigned char)entry.first ] = entry.second; 157 | }*/ 158 | 159 | whitespaces = ascii; 160 | for( auto &entry : table2 ) { 161 | whitespaces[ (unsigned char)entry.first ] = entry.second; 162 | } 163 | } 164 | 165 | std::string tmp = uri; 166 | 167 | // 1) @todo latinization/romanization here (proper utf8) 168 | // [...] 169 | 170 | // 2) @todo remove diacritics here (proper utf8) 171 | // string::replace( tmp, diacritics ); 172 | 173 | // 3) @todo unescape url here 174 | // [...] 175 | 176 | // 4) remove url options (if any) 177 | tmp = tmp.substr( 0, tmp.find_first_of('?') ); 178 | 179 | // 5) lowercase contents 180 | string::lowercase( tmp ); 181 | 182 | // 6) strip tags like #tag-123 #tag_456 or #xbox360=yes 183 | int dst = 0; 184 | for( auto end = tmp.size(), it = end - end; it < end; ) { 185 | auto &chr = tmp[it]; 186 | if( chr != '#' ) tmp[dst++] = tmp[it++]; 187 | else { 188 | std::string tag; 189 | while( ++it < end ) { 190 | tag += (chr = tmp[it]); 191 | if( chr == '-' || chr == '=' || chr == '_' ) continue; 192 | if( chr >= '0' && chr <= '9' ) continue; 193 | if( chr >= 'a' && chr <= 'z' ) continue; 194 | if( tags ) { 195 | tags->push_back( "#" + tag.substr( 0, tag.size() - 1 ) ); 196 | } 197 | break; 198 | } 199 | } 200 | } 201 | tmp.resize(dst); 202 | 203 | // 7) split path up to 2nd level 204 | auto split = string::tokenize(tmp, "\\/"); 205 | while( split.size() > 2 ) split.pop_front(); 206 | 207 | // 8) trim extension, whitespaces and punctuations 208 | tmp.clear(); 209 | for( auto &s : split ) { 210 | string::left_of(s, "."); 211 | string::replace(s, whitespaces); 212 | tmp = tmp + "_" + s; 213 | } 214 | 215 | // 9) split, sort, and join stems 216 | // 10) fix aos/soa plural 217 | split = string::tokenize(tmp, "_"); 218 | std::sort( split.begin(), split.end() ); 219 | tmp.clear(); 220 | for( auto &s: split ) { 221 | if( !s.empty() && s.back() == 's' ) { 222 | s.pop_back(); 223 | } 224 | tmp = tmp + "_" + s; 225 | } 226 | 227 | // 11) remove lead separator 228 | return &tmp[1]; 229 | } 230 | 231 | #ifdef UNIFY_BUILD_TESTS 232 | 233 | // UID, hypothetical sugar class { 234 | struct UID : public std::string { 235 | UID() : std::string() 236 | {} 237 | template 238 | UID( const T &t ) : std::string(unify(t)) 239 | {} 240 | UID( const UID &t ) : std::string(t) 241 | {} 242 | template 243 | bool operator<( const T &t ) const { 244 | const std::string &self = *this; 245 | return self < (std::string)UID(t); 246 | } 247 | template 248 | bool operator==( const T &t ) const { 249 | const std::string &self = *this; 250 | return self == (std::string)UID(t); 251 | } 252 | template 253 | bool operator!=( const T &t ) const { 254 | const std::string &self = *this; 255 | return self != (std::string)UID(t); 256 | } 257 | operator const std::string() const { 258 | return *this; 259 | } 260 | operator std::string() { 261 | return *this; 262 | } 263 | operator const char*() const { 264 | return this->c_str(); 265 | } 266 | }; 267 | // } 268 | 269 | // Simple filesystem dictionary, hypothetical sugar class { 270 | struct disk { 271 | std::map< std::string, std::string > map; 272 | void add( const std::string &uri ) { 273 | map[ unify(uri) ] = uri; 274 | }; 275 | std::string lookup( const std::string &uid_or_uri ) const { 276 | auto find = map.find( unify(uid_or_uri) ); 277 | return find == map.end() ? std::string() : find->second; 278 | }; 279 | }; 280 | // } 281 | 282 | // unittest suite 283 | #include 284 | #include 285 | #include 286 | #define suite(...) if(printf("------ " __VA_ARGS__),puts(""),true) 287 | #define test(...) (errno=0,++tst,err+=!(ok=!!(__VA_ARGS__))),printf("[%s] %d %s (%s)\n",ok?" OK ":"FAIL",__LINE__,#__VA_ARGS__,strerror(errno)) 288 | unsigned tst=0,err=0,ok=atexit([]{ suite("summary"){ printf("[%s] %d tests, %d passed, %d errors\n",err?"FAIL":" OK ",tst,tst-err,err); }}); 289 | 290 | int main() { 291 | 292 | suite( "unified folder/asset separators" ) { 293 | std::string item = unify("folder\\asset"); 294 | test( item == unify("folder/asset") ); 295 | test( item == unify("folder-asset") ); 296 | test( item == unify("folder_asset") ); 297 | test( item == unify("folder|asset") ); 298 | test( item == unify("folder:asset") ); 299 | test( item == unify("folder;asset") ); 300 | test( item == unify("folder,asset") ); 301 | test( item == unify("[folder]asset") ); 302 | test( item == unify("asset(folder)") ); 303 | // -> asset_folder 304 | } 305 | 306 | suite( "unified absolute, relative, virtual and remote paths" ) { 307 | std::string item = unify("~home/game/folder/asset.jpg"); 308 | test( item == unify("~user/game1/folder/asset.jpg") ); 309 | test( item == unify("~mark/game2/folder/asset.jpg") ); 310 | test( item == unify("~john/game3/data/folder/asset.jpg") ); 311 | test( item == unify("../folder/asset.jpg") ); 312 | test( item == unify("C:\\data\\folder\\asset.jpg") ); 313 | test( item == unify("C:/game/data/folder/asset.jpg") ); 314 | test( item == unify("data.zip/data/folder/asset.jpg") ); 315 | test( item == unify("virtual.rar/folder/asset.jpg") ); 316 | test( item == unify("http://web.domain.com%20/folder/asset.jpg?blabla=123&abc=123#qwe") ); 317 | // -> asset_folder 318 | } 319 | 320 | suite( "unified uppercases, lowercases, whitespaces and hyphens" ) { 321 | test( unify("mesh/main-character") == "character_main_mesh" ); 322 | test( unify("mesh/main_character") == "character_main_mesh" ); 323 | test( unify("mesh/Main Character") == "character_main_mesh" ); 324 | test( unify("mesh / Main character ") == "character_main_mesh" ); 325 | // -> character_main_mesh 326 | } 327 | 328 | suite( "unified extensions" ) { 329 | test( unify("music/theme.ogg") == "music_theme" ); 330 | test( unify("music/theme.wav") == "music_theme" ); 331 | test( unify("ui/logo.png") == "logo_ui" ); 332 | test( unify("ui/logo.webp") == "logo_ui" ); 333 | // -> music_theme, -> logo_ui 334 | } 335 | 336 | suite( "unified typos on double extensions and double punctuations" ) { 337 | test( unify("game/logo.bmp.png") == unify("game/logo.bmp") ); 338 | test( unify("game/logo.png") == unify("game/logo..png") ); 339 | // -> game_logo 340 | } 341 | 342 | suite( "unified typos on many diacritics" ) { 343 | // @todo: diacritrics need proper utf8 support. might be much slower though. 344 | // test( unify("âñimátïón/wàlk") == unify("animation/walk") ); 345 | // -> animation_walk 346 | } 347 | 348 | suite( "unified AoS (OO) and SoA (ECS) disk layouts. " 349 | "unified plurals as well (if using English words)" ) { 350 | test( unify("sounds/kid") == unify("kid/sound") ); 351 | test( unify("sprites/kid") == unify("kid/sprite") ); 352 | test( unify("sounds/car") == unify("car/sound") ); 353 | test( unify("sprites/car") == unify("car/sprite") ); 354 | // -> car_sound, car_sprite, kid_sound, kid_sprite 355 | } 356 | 357 | suite( "unified SOV, SVO, VSO, VOS, OVS, OSV subject/verb/object language topologies" ) { 358 | std::string item = unify("player-joins-scene.intro"); 359 | test( item == unify("player-scene-join.intro") ); 360 | test( item == unify("join-player-scene.intro") ); 361 | test( item == unify("join-scene-player.intro") ); 362 | test( item == unify("scene-join-player.intro") ); 363 | test( item == unify("scene-player-join.intro") ); 364 | // -> join_player_scene 365 | } 366 | 367 | suite( "unified tagging (useful when globbing and deploying files and/or directories)" ) { 368 | std::string item = unify("splash/logo"); 369 | test( unify("/splash/#win32/logo") == item ); 370 | test( unify("splash #mobile/logo #win32=always.png") == item ); 371 | // -> logo_splash 372 | } 373 | 374 | suite( "unified consistency. reunification as a lossless process" ) { 375 | test( unify( unify("roses-are-red") ) == unify("roses-are-red") ); 376 | // -> are_red_rose 377 | } 378 | 379 | suite( "demo: transparent UID conversion" ) { 380 | // UID stands for Unified Identifier. 381 | // UID is inmutable on code always, even if changed on disk. 382 | // UID can be converted from paths, URLs, URIs, and IDs 383 | UID item = "game\\logo.bmp"; 384 | test( item == "game_logo" ); 385 | item = "logo/game"; 386 | test( item == "game_logo" ); 387 | item = "~home/game/folder/asset.jpg"; 388 | test( item == "~user/game1/folder/asset.jpg" ); 389 | test( item == "~mark/game2/folder/asset.jpg" ); 390 | test( item == "~john/game3/data/folder/asset.jpg" ); 391 | test( item == "../folder/asset.jpg" ); 392 | test( item == "C:\\data\\folder\\asset.jpg" ); 393 | test( item == "C:/game/data/folder/asset.jpg" ); 394 | test( item == "data.zip/data/folder/asset.jpg" ); 395 | test( item == "virtual.rar/folder/asset.jpg" ); 396 | test( item == "http://web.domain.com%20/folder/asset.jpg?blabla=123&abc=123#qwe" ); 397 | } 398 | 399 | suite( "demo: insertion & lookup from a virtual filesystem" ) { 400 | disk d; 401 | d.add("./local/file.txt"); 402 | d.add("./data/game/icon.png"); 403 | d.add("./songs/main_theme.ogg"); 404 | d.add("./game.zip/json #win32/inventory.json"); 405 | d.add("./game.zip/logos #win32/big.webp"); 406 | d.add("./game.zip/logos #mobile/small.png"); 407 | /* pseudocode : 408 | for( all mounted filesystems ) { 409 | for( all monitored files in subdirs ) { 410 | d.add( file.full_path ); 411 | } 412 | } */ 413 | test( d.lookup("local/file") == "./local/file.txt" ); 414 | test( d.lookup("local-file") == "./local/file.txt" ); 415 | test( d.lookup("file-local") == "./local/file.txt" ); 416 | test( d.lookup("../file/local") == "./local/file.txt" ); 417 | test( d.lookup("game/icon") == "./data/game/icon.png" ); 418 | test( d.lookup("game-icon") == "./data/game/icon.png" ); 419 | test( d.lookup("icon/game") == "./data/game/icon.png" ); 420 | test( d.lookup("icon-game") == "./data/game/icon.png" ); 421 | test( d.lookup("songs/main-theme") == "./songs/main_theme.ogg" ); 422 | test( d.lookup("inventory-json") == "./game.zip/json #win32/inventory.json" ); 423 | test( d.lookup("logos-big") == "./game.zip/logos #win32/big.webp" ); 424 | test( d.lookup("logos-big") == "./game.zip/logos #win32/big.webp" ); 425 | } 426 | } 427 | 428 | #endif 429 | --------------------------------------------------------------------------------