├── AVLtree.c ├── AVLtree.cpp ├── AVLtree.h ├── AVLtree.hpp ├── COPYING ├── COPYING.LESSER ├── Makefile ├── README ├── atomic-compat.c ├── atomic-compat.h ├── autoconfigure.c ├── example.c ├── segalloc.c ├── segalloc.cpp ├── segalloc.h ├── stm.c ├── stm.h ├── stmalloc.c └── stmalloc.h /AVLtree.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | AVLtree.c 4 | 5 | Implementation of AVL trees, a form of balanced binary tree. 6 | 7 | Copyright 2009 Shel Kaphan 8 | 9 | This file is part of stmmap. 10 | 11 | stmmap is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU Lesser General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | stmmap is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU Lesser General Public License for more details. 20 | 21 | You should have received a copy of the GNU Lesser General Public License 22 | along with stmmap. If not, see . 23 | 24 | */ 25 | 26 | 27 | #include 28 | 29 | #include "AVLtree.h" 30 | 31 | 32 | void (*AVLuserHook)(AVLtreeNode*); 33 | 34 | /* 35 | Set the depth of a tree node, assuming the child nodes have correct depths. 36 | */ 37 | static void setDepth(AVLtreeNode* t) { 38 | int ldepth = t->left? t->left->depth : 0; 39 | int rdepth = t->right? t->right->depth : 0; 40 | 41 | t->depth = (((ldepth > rdepth)? ldepth : rdepth) + 1); 42 | if (AVLuserHook) 43 | (*AVLuserHook)(t); 44 | if (t->parent) 45 | setDepth(t->parent); 46 | } 47 | 48 | 49 | /* 50 | A utility routine which adds a node "new" in place of node "old" immediately under 51 | node "t". If "t" is null, the tree is re-rooted at "new". 52 | */ 53 | static void newSubTree(AVLtreeNode* t, AVLtreeNode** tree, 54 | AVLtreeNode* old, AVLtreeNode* new) 55 | { 56 | if (t) { 57 | if (t->left==old) 58 | t->left=new; 59 | else if (t->right==old) 60 | t->right=new; 61 | } else 62 | *tree = new; 63 | 64 | if (new) 65 | new->parent=t; 66 | } 67 | 68 | 69 | 70 | 71 | /* 72 | Rotate the tree right at a particular node. Used in rebalancing. 73 | */ 74 | static void rotateRight(AVLtreeNode* t, AVLtreeNode** tree) { 75 | AVLtreeNode* l = t->left; 76 | AVLtreeNode* lr = l->right; 77 | AVLtreeNode* p; 78 | l->right = t; 79 | t->left = lr; 80 | if (lr) 81 | lr->parent = t; 82 | p = t->parent; 83 | t->parent = l; 84 | newSubTree(p, tree,t,l); 85 | setDepth(t); 86 | } 87 | 88 | 89 | /* 90 | Rotate the tree left at a particular node. Used in rebalancing. 91 | */ 92 | static void rotateLeft(AVLtreeNode* t, AVLtreeNode** tree) { 93 | AVLtreeNode* r = t->right; 94 | AVLtreeNode* rl = r->left; 95 | AVLtreeNode* p; 96 | r->left = t; 97 | t->right = rl; 98 | if (rl) 99 | rl->parent = t; 100 | p = t->parent; 101 | t->parent = r; 102 | newSubTree(p, tree,t,r); 103 | setDepth(t); 104 | } 105 | 106 | 107 | 108 | /* 109 | Compute the balance factor at a node "t". Negative result indicates 110 | left-heavy, positive result indicates right-heavy. 111 | */ 112 | static int balance(AVLtreeNode* t) { 113 | int ldepth = t->left? t->left->depth : 0; 114 | int rdepth = t->right? t->right->depth : 0; 115 | return (rdepth - ldepth); 116 | } 117 | 118 | 119 | 120 | 121 | /* 122 | Re-balance a tree starting at node "t" and working upward if necessary. This 123 | is where the "AVL" double rotation algorithm is used. 124 | */ 125 | 126 | static void rebalance(AVLtreeNode* t, AVLtreeNode** tree) { 127 | 128 | int b = balance(t); 129 | if (b == 2) { 130 | if (balance(t->right) == -1) 131 | rotateRight(t->right, tree); 132 | rotateLeft(t, tree); 133 | } else if (b == -2) { 134 | if (balance(t->left) == 1) 135 | rotateLeft(t->left, tree); 136 | rotateRight(t, tree); 137 | } 138 | 139 | if (t && t->parent) 140 | rebalance(t->parent, tree); 141 | } 142 | 143 | /* 144 | Add a node "i" to the tree "tree". This is recursive. 145 | t should start out the same as *tree, but is used on each recursion to 146 | find the correct branch to insert into. The tree is rebalanced, and possibly 147 | re-rooted after the insertion. 148 | */ 149 | static void __addToTree(AVLtreeNode* i, AVLtreeNode** tree, AVLtreeNode* t, 150 | int (*cmp)(void*,void*), void*(*getKey)(void*)) 151 | { 152 | 153 | if (t) { 154 | if ((*cmp)((*getKey)(i), (*getKey)(t)) < 0) { 155 | if (t->left) 156 | __addToTree(i, tree, t->left, cmp, getKey); 157 | else { 158 | t->left=i; 159 | i->parent = t; 160 | setDepth(i); 161 | rebalance(i, tree); 162 | } 163 | } else { 164 | if (t->right) 165 | __addToTree(i, tree, t->right, cmp, getKey); 166 | else { 167 | t->right = i; 168 | i->parent = t; 169 | setDepth(i); 170 | rebalance(i, tree); 171 | } 172 | } 173 | } else { 174 | *tree = i; 175 | i->parent = NULL; 176 | // i->depth = 1; 177 | setDepth(i); 178 | } 179 | } 180 | 181 | 182 | void AVLaddToTree(AVLtreeNode* i, AVLtreeNode** tree, int (*cmp)(void*,void*), void*(*getKey)(void*)) { 183 | i->parent = i->left = i->right = NULL; 184 | i->depth = 0; 185 | __addToTree(i, tree, *tree, cmp, getKey); 186 | } 187 | 188 | 189 | 190 | 191 | /* 192 | Removes a node "t" from a tree. This is where things get hairy, since 193 | rebalancing is a pain. But it works. 194 | */ 195 | void AVLremoveFromTree(AVLtreeNode* t, AVLtreeNode** tree) { 196 | AVLtreeNode* moved = t->parent; 197 | AVLtreeNode* s; 198 | if (t->left) { 199 | if (t->right) { 200 | /* there are two subtrees. */ 201 | 202 | if (t->left->depth >= t->right->depth) { 203 | /* tree is left-heavy (or balanced) */ 204 | s = t->left->right; 205 | if (s) { 206 | while (s->right) s=s->right; 207 | moved = s->parent; 208 | s->parent->right=s->left; 209 | if (s->left) { 210 | s->left->parent=s->parent; 211 | } 212 | s->left=t->left; 213 | t->left->parent = s; 214 | // s->depth = t->depth; 215 | 216 | } else { 217 | moved = s = t->left; 218 | } 219 | s->right = t->right; 220 | t->right->parent = s; 221 | 222 | newSubTree(t->parent, tree,t,s); 223 | 224 | } else { 225 | /* tree is right-heavy */ 226 | s = t->right->left; 227 | if (s) { 228 | while (s->left) s=s->left; 229 | moved = s->parent; 230 | s->parent->left=s->right; 231 | if (s->right) { 232 | s->right->parent=s->parent; 233 | } 234 | s->right=t->right; 235 | t->right->parent = s; 236 | // s->depth = t->depth; 237 | 238 | } else { 239 | moved = s = t->right; 240 | } 241 | s->left = t->left; 242 | t->left->parent = s; 243 | 244 | newSubTree(t->parent, tree,t,s); 245 | 246 | } 247 | } else { 248 | /* left subtree only */ 249 | newSubTree(t->parent, tree,t,t->left); 250 | } 251 | } else if (t->right) { 252 | /* right subtree only */ 253 | newSubTree(t->parent, tree,t,t->right); 254 | 255 | } else { 256 | /* no subtrees */ 257 | newSubTree(t->parent, tree,t,NULL); 258 | } 259 | if(moved) { 260 | setDepth(moved); 261 | rebalance(moved, tree); 262 | } 263 | } 264 | 265 | 266 | 267 | AVLtreeNode* AVLsearch(AVLtreeNode* t, void* key, int (*cmp)(void*,void*), void* (*getKey)(void*)) 268 | { 269 | int x; 270 | if ((x = (*cmp)((*getKey)(t),key)) == 0) { 271 | return t; 272 | } else if (x < 0) { 273 | if (t->right) 274 | return AVLsearch(t->right, key, cmp, getKey); 275 | else 276 | return NULL; 277 | } else { 278 | if (t->left) 279 | return AVLsearch(t->left, key, cmp, getKey); 280 | else 281 | return NULL; 282 | } 283 | } 284 | 285 | 286 | 287 | static long treesize(AVLtreeNode* t) { 288 | return (1 + (t->left? treesize(t->left) : 0) + (t->right? treesize(t->right) : 0)); 289 | 290 | 291 | } 292 | 293 | 294 | -------------------------------------------------------------------------------- /AVLtree.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | AVLtree.cpp 4 | 5 | Position-independent, multi-threading compatible implementation of AVL trees, 6 | a form of balanced binary tree. 7 | 8 | The trees are assumed to be in a mapped, shared segment, so we use offset_ptrs instead 9 | of regular pointers, to make the tree structures position-independent. 10 | 11 | Copyright 2009 Shel Kaphan 12 | 13 | This file is part of stmmap. 14 | 15 | stmmap is free software: you can redistribute it and/or modify 16 | it under the terms of the GNU Lesser General Public License as published by 17 | the Free Software Foundation, either version 3 of the License, or 18 | (at your option) any later version. 19 | 20 | stmmap is distributed in the hope that it will be useful, 21 | but WITHOUT ANY WARRANTY; without even the implied warranty of 22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 23 | GNU Lesser General Public License for more details. 24 | 25 | You should have received a copy of the GNU Lesser General Public License 26 | along with stmmap. If not, see . 27 | 28 | */ 29 | 30 | 31 | #include 32 | 33 | #include "AVLtree.hpp" 34 | 35 | #if 0 36 | // There's a built-in "placement" operator new that does this already. 37 | void *AVLtreeNode::operator new(size_t size, void *base) 38 | { 39 | return base; 40 | } 41 | #endif 42 | 43 | void (*AVLuserHook)(AVLtreeNode *); 44 | 45 | /* 46 | Set the depth of a tree node, assuming the child nodes have correct depths. 47 | */ 48 | 49 | #if 0 50 | void AVLtreeNode::setDepth() 51 | { 52 | int ldepth = left? left->depth : 0; 53 | int rdepth = right? right->depth : 0; 54 | 55 | depth = (((ldepth > rdepth)? ldepth : rdepth) + 1); 56 | if (AVLuserHook) 57 | (*AVLuserHook)(this); 58 | if (parent) 59 | parent.get()->setDepth(); 60 | } 61 | #endif 62 | 63 | 64 | static void setDepth(offset_ptr t) { 65 | 66 | int ldepth = t->left? t->left->depth : 0; 67 | int rdepth = t->right? t->right->depth : 0; 68 | 69 | t->depth = (((ldepth > rdepth)? ldepth : rdepth) + 1); 70 | if (AVLuserHook) 71 | (*AVLuserHook)(t.get()); 72 | if (t->parent) 73 | setDepth(t->parent); 74 | 75 | } 76 | 77 | /* 78 | A utility routine which adds a node "new" in place of node "old" immediately under 79 | node "t". If "t" is null, the tree is re-rooted at "new". 80 | */ 81 | static void newSubTree(offset_ptr t, offset_ptr *tree_addr, 82 | offset_ptr old, offset_ptr _new) 83 | { 84 | if (t) { 85 | if (t->left==old) 86 | t->left= _new; 87 | else if (t->right==old) 88 | t->right= _new; 89 | } else { 90 | *tree_addr = _new; 91 | } 92 | if ( _new) 93 | _new->parent=t; 94 | } 95 | 96 | 97 | 98 | 99 | /* 100 | Rotate the tree right at a particular node. Used in rebalancing. 101 | */ 102 | static void rotateRight(offset_ptr t, offset_ptr* tree_addr) { 103 | offset_ptr l = t->left; 104 | offset_ptr lr = l->right; 105 | offset_ptr p; 106 | l->right = t; 107 | t->left = lr; 108 | if (lr) 109 | lr->parent = t; 110 | p = t->parent; 111 | t->parent = l; 112 | newSubTree(p, tree_addr,t,l); 113 | setDepth(t); 114 | } 115 | 116 | 117 | /* 118 | Rotate the tree left at a particular node. Used in rebalancing. 119 | */ 120 | static void rotateLeft(offset_ptr t, offset_ptr* tree_addr) { 121 | offset_ptr r = t->right; 122 | offset_ptr rl = r->left; 123 | offset_ptr p; 124 | r->left = t; 125 | t->right = rl; 126 | if (rl) 127 | rl->parent = t; 128 | p = t->parent; 129 | t->parent = r; 130 | newSubTree(p, tree_addr,t,r); 131 | setDepth(t); 132 | } 133 | 134 | 135 | 136 | /* 137 | Compute the balance factor at a node "t". Negative result indicates 138 | left-heavy, positive result indicates right-heavy. 139 | */ 140 | static int balance(offset_ptr t) { 141 | int ldepth = t->left? t->left->depth : 0; 142 | int rdepth = t->right? t->right->depth : 0; 143 | return (rdepth - ldepth); 144 | } 145 | 146 | 147 | 148 | 149 | /* 150 | Re-balance a tree starting at node "t" and working upward if necessary. This 151 | is where the "AVL" double rotation algorithm is used. 152 | */ 153 | 154 | static void rebalance(offset_ptr t, offset_ptr* tree_addr) { 155 | 156 | int b = balance(t); 157 | if (b == 2) { 158 | if (balance(t->right) == -1) 159 | rotateRight(t->right, tree_addr); 160 | rotateLeft(t, tree_addr); 161 | } else if (b == -2) { 162 | if (balance(t->left) == 1) 163 | rotateLeft(t->left, tree_addr); 164 | rotateRight(t, tree_addr); 165 | } 166 | 167 | if (t && t->parent) 168 | rebalance(t->parent, tree_addr); 169 | } 170 | 171 | /* 172 | Add a node "i" to the tree "tree_addr". This is recursive. 173 | t should start out the same as *tree_addr, but is used on each recursion to 174 | find the correct branch to insert into. The tree is rebalanced, and possibly 175 | re-rooted after the insertion. 176 | */ 177 | static void __addToTree(offset_ptr i, offset_ptr *tree_addr, offset_ptr t, 178 | int (*cmp)(void*,void*), void*(*getKey)(void*)) 179 | { 180 | 181 | if (t) { 182 | if ((*cmp)((*getKey)((void*)i.get()), (*getKey)((void*)t.get())) < 0) { 183 | if (t->left) 184 | __addToTree(i, tree_addr, t->left, cmp, getKey); 185 | else { 186 | t->left=i; 187 | i->parent = t; 188 | setDepth(i); 189 | rebalance(i, tree_addr); 190 | } 191 | } else { 192 | if (t->right) 193 | __addToTree(i, tree_addr, t->right, cmp, getKey); 194 | else { 195 | t->right = i; 196 | i->parent = t; 197 | setDepth(i); 198 | rebalance(i, tree_addr); 199 | } 200 | } 201 | } else { 202 | *tree_addr = i; 203 | i->parent = NULL; 204 | // i->depth = 1; 205 | setDepth(i); 206 | } 207 | } 208 | 209 | 210 | void AVLaddToTree(AVLtreeNode* i, offset_ptr* tree_addr, 211 | int (*cmp)(void*,void*), void*(*getKey)(void*)) { 212 | i->parent = i->left = i->right = NULL; 213 | i->depth = 0; 214 | 215 | __addToTree(i, tree_addr, (*tree_addr), cmp, getKey); 216 | } 217 | 218 | 219 | 220 | 221 | /* 222 | Removes a node "t" from a tree. This is where things get hairy, since 223 | rebalancing is a pain. But it works. 224 | */ 225 | void AVLremoveFromTree(AVLtreeNode* t, offset_ptr* tree_addr) { 226 | offset_ptr moved = t->parent; 227 | offset_ptr s; 228 | if (t->left) { 229 | if (t->right) { 230 | /* there are two subtrees. */ 231 | 232 | if (t->left->depth >= t->right->depth) { 233 | /* tree is left-heavy (or balanced) */ 234 | s = t->left->right; 235 | if (s) { 236 | while (s->right) s=s->right; 237 | moved = s->parent; 238 | s->parent->right=s->left; 239 | if (s->left) { 240 | s->left->parent=s->parent; 241 | } 242 | s->left=t->left; 243 | t->left->parent = s; 244 | // s->depth = t->depth; 245 | 246 | } else { 247 | moved = s = t->left; 248 | } 249 | s->right = t->right; 250 | t->right->parent = s; 251 | 252 | newSubTree(t->parent, tree_addr,t,s); 253 | 254 | } else { 255 | /* tree is right-heavy */ 256 | s = t->right->left; 257 | if (s) { 258 | while (s->left) s=s->left; 259 | moved = s->parent; 260 | s->parent->left=s->right; 261 | if (s->right) { 262 | s->right->parent=s->parent; 263 | } 264 | s->right=t->right; 265 | t->right->parent = s; 266 | // s->depth = t->depth; 267 | 268 | } else { 269 | moved = s = t->right; 270 | } 271 | s->left = t->left; 272 | t->left->parent = s; 273 | 274 | newSubTree(t->parent, tree_addr,t,s); 275 | 276 | } 277 | } else { 278 | /* left subtree only */ 279 | newSubTree(t->parent, tree_addr,t,t->left); 280 | } 281 | } else if (t->right) { 282 | /* right subtree only */ 283 | newSubTree(t->parent, tree_addr,t,t->right); 284 | 285 | } else { 286 | /* no subtrees */ 287 | newSubTree(t->parent, tree_addr,t,NULL); 288 | } 289 | if(moved) { 290 | setDepth(moved); 291 | rebalance(moved, tree_addr); 292 | } 293 | } 294 | 295 | 296 | 297 | AVLtreeNode* AVLsearch(AVLtreeNode *t, void* key, int (*cmp)(void*,void*), void* (*getKey)(void*)) 298 | { 299 | int x; 300 | if ((x = (*cmp)((*getKey)((void*)t),key)) == 0) { 301 | return t; 302 | } else if (x < 0) { 303 | if (t->right) 304 | return AVLsearch(t->right.get(), key, cmp, getKey); 305 | else 306 | return NULL; 307 | } else { 308 | if (t->left) 309 | return AVLsearch(t->left.get(), key, cmp, getKey); 310 | else 311 | return NULL; 312 | } 313 | } 314 | 315 | 316 | 317 | #if 0 318 | AVLtreeNode* AVLsearch(AVLtreeNode *t, void* key) 319 | { 320 | int x; 321 | if ((x = t->compareToKey(key)) == 0) { 322 | return t; 323 | } else if (x < 0) { 324 | if (t->right) 325 | return AVLsearch(t->right.get(), key); 326 | else 327 | return NULL; 328 | } else { 329 | if (t->left) 330 | return AVLsearch(t->left.get(), key); 331 | else 332 | return NULL; 333 | } 334 | } 335 | #endif 336 | 337 | 338 | 339 | static long treesize(offset_ptr t) { 340 | return (1 + (t->left? treesize(t->left) : 0) + (t->right? treesize(t->right) : 0)); 341 | 342 | 343 | } 344 | 345 | 346 | -------------------------------------------------------------------------------- /AVLtree.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | AVLtree.h 4 | 5 | Interface to implementation of AVL trees, a form of balanced binary trees. 6 | This is a low level implementation which does not depend on anything else 7 | in stmmap in any way. 8 | 9 | Copyright 2009 Shel Kaphan 10 | 11 | This file is part of stmmap. 12 | 13 | stmmap is free software: you can redistribute it and/or modify 14 | it under the terms of the GNU Lesser General Public License as published by 15 | the Free Software Foundation, either version 3 of the License, or 16 | (at your option) any later version. 17 | 18 | stmmap is distributed in the hope that it will be useful, 19 | but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | GNU Lesser General Public License for more details. 22 | 23 | You should have received a copy of the GNU Lesser General Public License 24 | along with stmmap. If not, see . 25 | 26 | */ 27 | 28 | 29 | typedef struct AVLtreeNode 30 | { 31 | struct AVLtreeNode* parent; 32 | struct AVLtreeNode* left; 33 | struct AVLtreeNode* right; 34 | int depth; 35 | } AVLtreeNode; 36 | 37 | 38 | /* 39 | Add a node "i" to the tree "*tree". The tree is rebalanced, and possibly 40 | re-rooted after the insertion (that's why the pointer-to-pointer is passed). 41 | */ 42 | void AVLaddToTree(AVLtreeNode* i, AVLtreeNode** tree, int (*cmp)(void*,void*), void*(*getKey)(void*)); 43 | 44 | /* 45 | Removes a node "t" from a tree. 46 | */ 47 | void AVLremoveFromTree(AVLtreeNode* t, AVLtreeNode** tree); 48 | 49 | /* 50 | Search for a node in the tree using a user supplied comparison function, and key extractor. 51 | */ 52 | AVLtreeNode* AVLsearch(AVLtreeNode* t, void* key, int (*cmp)(void*,void*), void* (*getKey)(void*)); 53 | 54 | /* 55 | For "subtypes" of AVLtreeNode, there's a hook which, if set, is called on each node when the 56 | node's depth is being calculated. 57 | */ 58 | extern void (*AVLuserHook)(AVLtreeNode*); 59 | -------------------------------------------------------------------------------- /AVLtree.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | AVLtree.hpp 4 | 5 | Interface to position-independent implementation of AVL trees, a form of balanced 6 | binary trees. This is a low level implementation which does not depend on anything else 7 | in stmmap in any way. 8 | 9 | Copyright 2009 Shel Kaphan 10 | 11 | This file is part of stmmap. 12 | 13 | stmmap is free software: you can redistribute it and/or modify 14 | it under the terms of the GNU Lesser General Public License as published by 15 | the Free Software Foundation, either version 3 of the License, or 16 | (at your option) any later version. 17 | 18 | stmmap is distributed in the hope that it will be useful, 19 | but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | GNU Lesser General Public License for more details. 22 | 23 | You should have received a copy of the GNU Lesser General Public License 24 | along with stmmap. If not, see . 25 | 26 | */ 27 | 28 | #include 29 | 30 | using namespace boost::interprocess; 31 | 32 | 33 | class AVLtreeNode 34 | { 35 | public: 36 | offset_ptr parent; 37 | offset_ptr left; 38 | offset_ptr right; 39 | int depth; 40 | 41 | // virtual int compareToKey(void *key) = 0; 42 | // virtual void* nodeKey() = 0; 43 | 44 | // void *operator new(size_t size, void *base); 45 | 46 | AVLtreeNode() { 47 | parent = left = right = NULL; 48 | }; 49 | 50 | }; 51 | 52 | 53 | /* 54 | Add a node "i" to the tree "*tree". The tree is rebalanced, and possibly 55 | re-rooted after the insertion (that's why the pointer-to-pointer is passed). 56 | */ 57 | void AVLaddToTree(AVLtreeNode* i, offset_ptr* tree_addr, int (*cmp)(void*,void*), void*(*getKey)(void*)); 58 | 59 | /* 60 | Removes a node "t" from a tree. 61 | */ 62 | void AVLremoveFromTree(AVLtreeNode* t, offset_ptr* tree_addr); 63 | 64 | /* 65 | Search for a node in the tree using a user supplied comparison function, and key extractor. 66 | */ 67 | AVLtreeNode *AVLsearch(AVLtreeNode* t, void* key, int (*cmp)(void*,void*), void* (*getKey)(void*)); 68 | 69 | /* 70 | For "subtypes" of AVLtreeNode, there's a hook which, if set, is called on each node when the 71 | node's depth is being calculated. 72 | */ 73 | extern void (*AVLuserHook)(AVLtreeNode *); 74 | 75 | 76 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /COPYING.LESSER: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | # You need to install the boost library from www.boost.org. 3 | IDIR =/usr/local/boost_1_41_0 4 | #IDIR = ../boost_1_41_0 5 | 6 | CC=gcc 7 | CPP=g++ 8 | 9 | THREADFLAGS = -DTHREADS 10 | 11 | CFLAGS = $(shell ./autoconfigure) 12 | 13 | CPLUSPLUSFLAGS = $(CFLAGS) -I$(IDIR) 14 | 15 | 16 | NLIBSTEM = stm 17 | THLIBSTEM = stm-th 18 | 19 | NLIB = lib$(NLIBSTEM).a 20 | THLIB = lib$(THLIBSTEM).a 21 | 22 | LIBDIR = -L. 23 | 24 | LIBS=-lpthread 25 | THLIBS = -l$(THLIBSTEM) $(LIBS) 26 | NLIBS = -l$(NLIBSTEM) $(LIBS) 27 | 28 | 29 | OBJ = stm.o stmalloc.o atomic-compat.o 30 | 31 | NOBJ = AVLtree.o segalloc.o example.o 32 | 33 | THOBJ = segalloc.th.o AVLtree.th.o example.th.o 34 | 35 | TARGETS = autoconfigure stmtest1 stmtest2 36 | 37 | all: $(TARGETS) 38 | 39 | 40 | # this is a single-threaded test program that uses absolute pointers 41 | # in the shared segment. Uses pthreads library for thread local storage 42 | # which is the same in both single- and multi-threaded versions. 43 | # 44 | stmtest1: autoconfigure example.o $(NLIB) 45 | $(CC) -o $@ example.o $(LIBDIR) $(NLIBS) 46 | 47 | 48 | # this is a multi-threading test program that uses position-independent 49 | # relative pointers in the shared segment. 50 | # 51 | stmtest2: autoconfigure example.th.o $(THLIB) 52 | $(CPP) -o $@ example.th.o $(LIBDIR) $(THLIBS) 53 | 54 | %.o: %.c Makefile 55 | $(CC) -c $(CFLAGS) $< -o $@ 56 | 57 | # The two following rules must appear in the order they appear here. 58 | %.th.o: %.cpp Makefile 59 | $(CPP) -c $(CPLUSPLUSFLAGS) $(THREADFLAGS) $< -o $@ 60 | 61 | %.th.o: %.c Makefile 62 | $(CC) -c $(CFLAGS) $(THREADFLAGS) $< -o $@ 63 | 64 | %: %.c 65 | $(CC) -o $@ $@.c 66 | 67 | $(NLIB): autoconfigure $(NLIB)($(OBJ) $(NOBJ)) 68 | ranlib $(NLIB) 69 | 70 | $(THLIB): autoconfigure $(THLIB)($(OBJ) $(THOBJ)) 71 | ranlib $(THLIB) 72 | 73 | .PHONY: clean 74 | 75 | clean: 76 | rm -f *.o *~ core $(TARGETS) $(NLIB) $(THLIB) 77 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | README 4 | 5 | Copyright 2009 Shel Kaphan 6 | 7 | This file is part of stmmap. 8 | 9 | stmmap is free software: you can redistribute it and/or modify 10 | it under the terms of the GNU Lesser General Public License as published by 11 | the Free Software Foundation, either version 3 of the License, or 12 | (at your option) any later version. 13 | 14 | stmmap is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU Lesser General Public License for more details. 18 | 19 | You should have received a copy of the GNU Lesser General Public License 20 | along with stmmap. If not, see . 21 | 22 | */ 23 | 24 | 25 | INSTALLATION 26 | ============ 27 | 28 | The primary product here is a library you can use to get Software Transactional Memory in your 29 | C and C++ programs. Actually it is two libraries, the difference being in the optional 30 | memory allocator that is included. One of them, libstm.a, supports transactions in only 31 | one thread per process. Processes can share memory, with transactions, between them. 32 | The other one, libstm-th.a, supports transactions in many threads per process (and still 33 | allows multiple processes too). If you want to use regular pointers in the objects in shared 34 | memory, you should stick to libstm.a, because you can control the location where the one 35 | and only shared memory region will live. If you use libstm-th.a, the shared memory region 36 | will be mapped at a different address for each thread doing transactions. So you will either 37 | need to avoid storing pointers in shared objects, or you will need to use a smart pointer 38 | class like offset_ptr, as supplied by the Boost C++ libraries (www.boost.org). Libstm-th.a 39 | makes use of offset_ptr. Because of offset_ptr, libstm-th.a needs to use some C++, 40 | so if you don't want any C++ in your program, you must stick to libstm.a. 41 | Note, though, that the C++ doesn't escape stmmap's internals, so though it requires 42 | a bit of C++ at compile-time, no APIs are affected and no runtime libraries are required. 43 | 44 | There is a Makefile that builds the libraries and two tests: stmtest1 and stmtest2. 45 | these are, respectively,a single-threaded and a multi-threading-capable version of the same thing. 46 | They run the test case in example.c, which is just a test, not part of the core package. 47 | Example.c also shows how to set up and call the stm package. To test stmtest1, you need to 48 | run multiple copies of it at the same time, in different processes. 49 | 50 | 51 | Here is a manifest of the files and what they do: 52 | 53 | Essentials: 54 | 55 | stm.[ch] core STM functionality 56 | atomic-compat.[ch] atomic operations needed by stm.c. Currently uses atomic-builtins. 57 | 58 | Support: (you can use stm.c without any of this if you want) 59 | 60 | stmalloc.[ch] memory allocator suitable for stmmap's shared segments. 61 | segalloc.[ch] C implementation of low level memory allocator used by stmalloc.c 62 | segalloc.{cpp,h} C++ implementation of low level memory allocator used by stmalloc.c 63 | AVLtree.[ch] C AVL tree implementation that supports memory allocator 64 | AVLtree.{cpp,hpp} C++ AVL tree implementation that supports memory allocator 65 | 66 | Tests & Configuration: 67 | 68 | Makefile 69 | autoconfigure.c The Makefile uses this 70 | 71 | To use stmmap-th.a and the C++ versions of the memory allocator, you will need the Boost C++ 72 | library available at www.boost.org. The only thing from there that is used is offset_ptr, and 73 | it is self-contained as a header file. No libraries are required. Offset_ptr provides 74 | position-independent "smart pointers" that make it possible to have the same object mapped 75 | into the address space in multiple locations and still have its pointers work correctly. This 76 | is used in the multi-threading version of stmmap. It is only used in the memory allocator. 77 | 78 | The header files your program will need are stm.h and stmalloc.h. Stmalloc.h is only needed if you 79 | are using the included memory allocator. 80 | 81 | You may need to port atomic-compat.[ch] to your OS environment, as different 82 | OS versions have different atomic primitives. (Please send me the changes!) 83 | Right now it uses the atomic builtins as specified by Intel and implemented in gcc. 84 | Or you can use the set that Apple provides. 85 | 86 | Example.c gives a very simple example of how to use this package. In fact, 87 | this is what I used to debug it. 88 | 89 | 90 | THEORY OF OPERATION 91 | =================== 92 | 93 | The purpose of this package is to provide "Software Transactional Memory" 94 | functionality in C and C++, without requiring much extra complexity for the programmer. 95 | 96 | It is built on top of file-mapping -- in particular Unix's mmap() system call. 97 | One or more shared memory areas are opened by each thread or process that uses 98 | this package. When no transaction is in progress, each shared area is potentially visible 99 | to any processes that access that segment of mapped memory. 100 | 101 | When a transaction is in progress, the shared area is access-protected, and the first 102 | access by the process performing the transaction to any page is trapped. Other 103 | processes are not affected. A signal handler keeps track of the accessed pages. 104 | Upon first access in a transaction, a private copy of each page is made so that 105 | any modifications are not visible to any other process. If, on first access to a page 106 | within a transaction, it can be determined that another process has modified that page 107 | since the transaction started, the transaction is aborted. When a transaction is 108 | committed, there is an attempt to establish ownership of all modified pages and then 109 | re-map those private pages as shared. 110 | 111 | This package uses "optimistic locking" in that the modified pages 112 | are only locked at the end of the transaction, during the commit process. At commit 113 | time, there is another check to see if any other processes have modified the pages that 114 | have been accessed during the transaction, by means of a transaction ID associated 115 | with each page. If they have, the transaction is aborted and all changes discarded. 116 | Though this system works using mapped files, no I/O needs to occur normally -- 117 | the writes just affect the mapped file's memory buffers. 118 | 119 | This mechanism provides read consistency in that if a transaction A succeeds, it 120 | is guaranteed that no other transaction B will have modified the pages accessed by 121 | transaction A. However, there is no guarantee the transaction will 122 | succeed. Transactions that are automatically aborted are also automatically retried, 123 | with a backoff mechanism, until they succeed. 124 | 125 | The access control on shared segments between (not during) transactions can be 126 | specified for each shared segment. If access is permitted, reads and writes to 127 | shared segments not during transactions are unchecked and there is no 128 | guarantee of consistency (so this is generally not recommended!). 129 | 130 | Transactions are composable -- that is, they can be nested. This is so that 131 | complex transactions can be built up out of simpler ones. 132 | 133 | Processes can have a number of shared memory areas, limited by the number of file 134 | descriptors and virtual memory available. 135 | 136 | A recent (December '09) addition to stmmap is that it now works at both the thread and process 137 | level. If you use it to communicate between single-threaded processes, you have the 138 | option of setting things up so that the shared segment is mapped at the same 139 | virtual address in any process that accesses it. This means you can use ordinary 140 | pointers within the shared segment and it will be correct in all processes that map 141 | that segment at the same virtual address. 142 | 143 | When you use this package in a multi-threaded mode, each thread that maps a shared segment 144 | will map it at a different virtual address. The process of which the threads are part 145 | therefore has the same shared segment open multiple times, by multiple threads. This 146 | implies that you cannot use pointers in the ordinary way within a shared segment. 147 | Although the STM core mechanism is the same in single- and multi-threaded mode, the 148 | memory allocator that is provided is different in each case. In the multi-threaded version, 149 | it uses the Boost C++ Library's "offset_ptr", which provides pointer-like functionality but will 150 | work correctly no matter where the object of which they are a part is mapped in virtual 151 | space. If you use the multi-threaded version of stmmap, you may find this, or something 152 | like it, useful. 153 | 154 | However, in either single- or multh-threaded usage, these shared data structures should 155 | not refer outside the shared area to objects that only exist in the private parts 156 | of a process's address space! 157 | 158 | You can mix and match, and have multiple threads in multiple processes all sharing 159 | the same memory, safely, by using stmmap. 160 | 161 | Another limitation of this package is that it operates on the OS page level. 162 | That is, only one process can write to a page during a transaction, and any other 163 | thread's transaction that accesses the same page will have to be aborted and 164 | retried. This may not be as bad as it sounds if you code your transactions to be 165 | relatively short. The arbitration between processes seeking to own a page during 166 | a transaction is on a first-come first-served basis. Since pages are always 167 | locked in order of virtual address, there can be no deadlock. As soon as a 168 | transaction tries to obtain a page that has been modified by another process's 169 | transaction, it aborts. 170 | 171 | Another restriction is that this package will only work on systems where the 172 | contents of shared, mapped files are immediately visible to all processes that 173 | have them mapped shared. This could possibly fail on some systems without 174 | sufficient cache coherency, for example. This package does *not* depend on 175 | private mappings being kept up to date with the current contents of a file. 176 | This occurs in some OS versions but not others. It does depend on writes into 177 | private pages not being visible to other mappings, and it uses "copy-on-write" 178 | semantics of private mapping to ensure that private pagesare private and will 179 | not be arbitrarily overwritten with data from another process or thread. 180 | 181 | Warning: since transactions will be retried until they succeed, any variables 182 | outside the explicitly shared memory segment(s) that are referenced within a 183 | transaction must be handled with care. In particular, you should not modify 184 | anything (outside the shared memory area) that you access earlier in the same 185 | transaction. If the transaction is retried, the reference will pick up the 186 | value set in a previous try. You can set and then use a variable in the same 187 | transaction, but you can't read a variable that was initialized prior to a 188 | transaction, then set it to a new value, and expect that to work right if 189 | retries are necessary. It is helpful to think of the code in a transaction as 190 | being the body of a loop, that you don't know how many times is going to be 191 | executed. Only information in the shared segments is managed transactionally. 192 | 193 | 194 | ISSUES 195 | ====== 196 | 197 | 198 | Is Private Mapping Private? 199 | --------------------------- 200 | 201 | stmmap depends heavily on mmap(). Some features of mmap() are implemented differently 202 | on different systems. Of particular concern is the behavior of private mapping, which 203 | you get with the MAP_PRIVATE flag. stmmap uses this when a transaction accesses 204 | pages in memory. Some systems treat MAP_PRIVATE as a true private mapping, whether 205 | reading or writing. Most, on the other hand, treat the mapping as if it were shared 206 | until there is a write, at which time they make a private copy. Here, for example, 207 | is a quote from http://docs.hp.com/en/5992-3373/ch10s03.html: 208 | 209 | "In case of mmap(2), if MAP_PRIVATE mapping is created for a file for which MAP_SHARED 210 | exists, a separate copy of the page is created for MAP_PRIVATE only when it first 211 | writes to the page. As long as MAP_PRIVATE reads, it shares the page with MAP_SHARED 212 | mapping. That is, updates made by shared mapping will be visible to private mapping 213 | until private mapping writes. This change makes HP-UX mmap(2) compliant with industry 214 | standard, thus helping application portability." 215 | 216 | Also, from Understanding the Linux Kernel, 3rd Edition, by Daniel P. Bovet abd Marco Cesati 217 | http://my.safaribooksonline.com/0596005652: 218 | 219 | "[...] private mapping is more efficient than shared mapping. 220 | But each write operation on a privately mapped page will cause it to stop 221 | mapping the page in the file. Thus, a write does not change the file on disk, nor 222 | is the change visible to any other processes that access the same file. However, 223 | pages of a private memory mapping that have not been modified by the process 224 | are affected by file updates performed by other processes." 225 | 226 | However, experimentally, Mac OSX does not behave this way. Private mappings are 227 | actually private on Mac OSX even when only reading a privately mapped file. 228 | 229 | The Makefile autoconfigures itself to compile the source files the right way depending 230 | on the specific behavior of the system it is compiling on. The relevant compiler flag is 231 | -DPRIVATE_MAPPING_IS_PRIVATE, which must be defined for systems that behave like Mac OSX does, 232 | but may be undefined for others (Linux, FreeBSD, etc.) 233 | 234 | 235 | Debugging 236 | --------- 237 | 238 | stmmap makes liberal use of the SIGBUS signal. This can make it difficult to debug 239 | programs which use stmmap unless you can get your debugger to pass through the SIGBUS 240 | signals without getting all confused about it. gdb, in particular, requires some 241 | coaxing. There is a bit of code in example.c that was needed to debug while using 242 | stmmap on Mac OS X. It is likely to be needed on any Mach system. Also, when using 243 | gdb on any system, you should type this line before your code calls on anything in stmmap: 244 | 245 | handle SIGBUS nostop noprint pass 246 | 247 | 248 | 249 | 250 | 251 | PROS AND CONS 252 | ============= 253 | 254 | Here are some pros and cons of this approach: 255 | 256 | Pros 257 | ---- 258 | * Very easy to use 259 | * Avoids accidental memory sharing if used in single-threaded process mode. 260 | * No extra code executed on memory reads and writes, except the first access to 261 | each page during a transaction. 262 | * Enables STM in C-like languages 263 | 264 | 265 | Cons 266 | ---- 267 | * Can be a little tricky to do the right thing with non-transactionally controlled 268 | data in the body of a transaction -- until you "get it." 269 | * Coarse grain approach -- pages supported by OS -- leads to more retried transactions 270 | because of more contention. 271 | * System call overhead in mmap() is non-trivial. 272 | * Requires language such as C/C++ in which memory and file-mapped pages are directly 273 | accessible. 274 | * Debuggers may have a hard time dealing with a program that depends on SIGBUS to 275 | work at all! 276 | 277 | 278 | HOPES AND DREAMS 279 | ================ 280 | 281 | 282 | Right now mmap() allows you to map a given region of memory to a region in a file. 283 | It would be useful if there were a call that did the complementary operation -- 284 | taking the contents of memory as the source and associating that to a region of a file 285 | (basically what pwrite() does...). 286 | 287 | Alternatively, simply the ability to re-map a region (a set of pages) of virtual 288 | memory to some other virtual address would be very useful. Either way, this could 289 | avoid some memory-to-memory copying in the stmmap implementation. 290 | 291 | Shared memory mapping that is backed only by swap space and not by a file could 292 | also be a good thing, but there needs to be some way to name it. (Mac OS has 293 | this). 294 | 295 | Someone should fix the standard behavior of mmap() so that there is a way to get 296 | either copy-on-write or private mapping, and not to confuse the two as appears to 297 | be the present situation. 298 | 299 | 300 | ACKNOWLEDGEMENTS 301 | ================ 302 | 303 | Thanks to Bryan Woods who convinced me a multi-threaded version of stmmap could work 304 | and showed me offset_ptr, and who also came up with a way to make it possible to use 305 | gdb to debug programs that use stmmap. 306 | 307 | 308 | Shel Kaphan, Oct. 17, 2009 309 | 310 | -------------------------------------------------------------------------------- /atomic-compat.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | atomic-compat.c 4 | 5 | This is a compatibility package for whatever OS-supplied atomic operators are to be found on your 6 | platform. It was originally implemented using MAC OS X so the API is very similar to that. 7 | This file and atomic-compat.h can be conditionalized to support additional OS versions. 8 | 9 | Copyright 2009 Shel Kaphan 10 | 11 | This file is part of stmmap. 12 | 13 | stmmap is free software: you can redistribute it and/or modify 14 | it under the terms of the GNU Lesser General Public License as published by 15 | the Free Software Foundation, either version 3 of the License, or 16 | (at your option) any later version. 17 | 18 | stmmap is distributed in the hope that it will be useful, 19 | but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | GNU Lesser General Public License for more details. 22 | 23 | You should have received a copy of the GNU Lesser General Public License 24 | along with stmmap. If not, see . 25 | 26 | */ 27 | 28 | 29 | #include "atomic-compat.h" 30 | 31 | 32 | 33 | int32_t atomic_increment_32(int32_t *addr) { 34 | 35 | #ifdef USE_ATOMIC_BUILTINS 36 | return __sync_add_and_fetch (addr, 1); 37 | #else 38 | return OSAtomicIncrement32Barrier(addr); 39 | #endif 40 | 41 | } 42 | 43 | int32_t atomic_decrement_32(int32_t *addr) { 44 | #ifdef USE_ATOMIC_BUILTINS 45 | return __sync_add_and_fetch (addr, -1); 46 | #else 47 | return OSAtomicDecrement32Barrier(addr); 48 | #endif 49 | } 50 | 51 | int32_t atomic_compare_and_swap_32(int32_t oldval, int32_t newval, int32_t *addr) { 52 | #ifdef USE_ATOMIC_BUILTINS 53 | return __sync_bool_compare_and_swap (addr, oldval, newval); 54 | #else 55 | return OSAtomicCompareAndSwap32Barrier(oldval, newval, addr); 56 | #endif 57 | } 58 | 59 | 60 | void atomic_spin_lock_lock(atomic_lock *lock) { 61 | #ifdef USE_ATOMIC_BUILTINS 62 | __sync_lock_test_and_set (lock, 1); 63 | #else 64 | OSSpinLockLock(lock); 65 | #endif 66 | } 67 | 68 | void atomic_spin_lock_unlock(atomic_lock *lock) { 69 | #ifdef USE_ATOMIC_BUILTINS 70 | __sync_lock_release (lock); 71 | 72 | #else 73 | OSSpinLockUnlock(lock); 74 | #endif 75 | 76 | } 77 | -------------------------------------------------------------------------------- /atomic-compat.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | atomic-compat.h 4 | 5 | This is the API for a compatibility package for whatever OS-supplied atomic operators are to be found on your 6 | platform. It was originally implemented using MAC OS X so the API is very similar to that. 7 | This file and atomic-compat.c can be conditionalized to support additional OS versions. 8 | 9 | Copyright 2009 Shel Kaphan 10 | 11 | This file is part of stmmap. 12 | 13 | stmmap is free software: you can redistribute it and/or modify 14 | it under the terms of the GNU Lesser General Public License as published by 15 | the Free Software Foundation, either version 3 of the License, or 16 | (at your option) any later version. 17 | 18 | stmmap is distributed in the hope that it will be useful, 19 | but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | GNU Lesser General Public License for more details. 22 | 23 | You should have received a copy of the GNU Lesser General Public License 24 | along with stmmap. If not, see . 25 | 26 | */ 27 | 28 | #define USE_ATOMIC_BUILTINS 29 | 30 | 31 | #ifdef USE_ATOMIC_BUILTINS 32 | 33 | #include 34 | 35 | typedef int32_t atomic_lock; 36 | 37 | #else 38 | 39 | #include 40 | typedef OSSpinLock atomic_lock; 41 | 42 | #endif 43 | 44 | 45 | 46 | 47 | void atomic_spin_lock_lock(atomic_lock *lock); 48 | 49 | void atomic_spin_lock_unlock(atomic_lock *lock); 50 | 51 | int32_t atomic_increment_32(int32_t *addr); 52 | 53 | int32_t atomic_decrement_32(int32_t *addr); 54 | 55 | int32_t atomic_compare_and_swap_32(int32_t oldval, int32_t newval, int32_t *addr); 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /autoconfigure.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | void *open_and_map_file(char *filename, size_t length, int flags, int prot, int *fdp) { 12 | int fd; 13 | void *status; 14 | struct stat sbuf; 15 | 16 | if ((fd = open(filename, O_RDWR|O_CREAT, 0777)) < 0) { 17 | fprintf(stderr, "could not open file %s: %s\n", filename, strerror(errno)); 18 | exit(-1); 19 | } 20 | fstat(fd, &sbuf); 21 | if ((sbuf.st_mode & S_IFMT) != S_IFREG) { 22 | perror("bad filetype"); 23 | exit(-1); 24 | } 25 | if (ftruncate(fd, length) == -1) { 26 | perror("ftruncate failed"); 27 | exit(-1); 28 | } 29 | 30 | status = mmap(0, length, prot, flags, fd, 0); 31 | if (status == (void*)-1) { 32 | perror("mmap failed"); 33 | exit(-1); 34 | } 35 | *fdp = fd; 36 | return status; 37 | 38 | } 39 | 40 | 41 | char *seg1; 42 | char *seg2; 43 | 44 | static void sigbus_handler(int sig, siginfo_t *si, void *foo) { 45 | printf("-DPAGE_ACCESS_SIGNAL=SIGBUS"); 46 | mprotect(seg1, 0x1000, PROT_READ|PROT_WRITE); 47 | 48 | } 49 | 50 | static void sigsegv_handler(int sig, siginfo_t *si, void *foo) { 51 | printf("-DPAGE_ACCESS_SIGNAL=SIGSEGV"); 52 | mprotect(seg1, 0x1000, PROT_READ|PROT_WRITE); 53 | } 54 | 55 | 56 | 57 | 58 | int main (int argc, const char * argv[]) { 59 | 60 | int status; 61 | void *statusp; 62 | 63 | char *filename = "/tmp/test_mmap"; 64 | int fd1, fd2; 65 | 66 | int prot = PROT_READ|PROT_WRITE; 67 | 68 | int page_size = getpagesize(); 69 | size_t length = page_size; 70 | 71 | struct sigaction sa1; 72 | 73 | struct sigaction sa2; 74 | 75 | sa1.sa_flags = SA_SIGINFO; 76 | sigemptyset(&sa1.sa_mask); 77 | sa1.sa_sigaction = sigbus_handler; 78 | 79 | if ((status = sigaction(SIGBUS, &sa1, NULL)) != 0) { 80 | perror("sigaction failed"); 81 | } 82 | 83 | sa2.sa_flags = SA_SIGINFO; 84 | sigemptyset(&sa2.sa_mask); 85 | sa2.sa_sigaction = sigsegv_handler; 86 | 87 | if ((status = sigaction(SIGSEGV, &sa2, NULL)) != 0) { 88 | perror("sigaction failed"); 89 | exit(-1); 90 | } 91 | 92 | 93 | seg1 = open_and_map_file(filename, length, MAP_SHARED, PROT_NONE, &fd1); 94 | 95 | seg1[0] = 1; 96 | 97 | close(fd1); 98 | 99 | // now open one shared and one private mapping 100 | seg1 = open_and_map_file(filename, length, MAP_PRIVATE, prot, &fd1); 101 | // printf("fd1 = %d, seg1 = %lx\n", fd1, (unsigned long)seg1); 102 | 103 | seg2 = open_and_map_file(filename, length, MAP_SHARED, prot, &fd2); 104 | // printf("fd2 = %d, seg2 = %lx\n", fd2, (unsigned long)seg2); 105 | 106 | seg2[0] = 2; 107 | 108 | if (seg1[0] == 1) 109 | printf(" -DPRIVATE_MAPPING_IS_PRIVATE\n"); 110 | else 111 | printf("\n"); 112 | 113 | close(fd1); 114 | close(fd2); 115 | 116 | return 0; 117 | } 118 | -------------------------------------------------------------------------------- /example.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include // for getpid() 3 | #include 4 | #include 5 | 6 | 7 | #include "stm.h" 8 | #include "segalloc.h" // just for seg_verify_tree_integrity() 9 | #include "stmalloc.h" 10 | 11 | // This is normally supplied, when needed from the Makefile. 12 | // #define THREADS 13 | 14 | #ifdef __APPLE__ 15 | 16 | #include 17 | #include 18 | 19 | 20 | // This makes it possible to use gdb to debug this, to an extent. 21 | // 22 | static void disable_gdb_nosiness() __attribute__ ((constructor)); 23 | 24 | static void disable_gdb_nosiness() 25 | { 26 | // kern_return_t success = 27 | task_set_exception_ports(mach_task_self(), 28 | EXC_MASK_BAD_ACCESS, 29 | MACH_PORT_NULL, 30 | EXCEPTION_STATE_IDENTITY, 31 | MACHINE_THREAD_STATE); 32 | // assert(success == KERN_SUCCESS); 33 | } 34 | 35 | #endif 36 | 37 | 38 | #define array_size 128 39 | 40 | void alloc_test(struct shared_segment *seg, int n_iterations) { 41 | int i, j, size; 42 | size_t size_mask = 0xffff; 43 | void *allocated[array_size]; 44 | 45 | memset(allocated, 0, sizeof(allocated)); 46 | srandom(getpid()); 47 | 48 | for(i=0; i. 24 | 25 | */ 26 | 27 | #include "AVLtree.h" 28 | #include "segalloc.h" 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | typedef struct segalloc_node { 36 | AVLtreeNode avlnode; 37 | size_t size; 38 | size_t size_mask; 39 | } segalloc_node; 40 | 41 | 42 | static void set_size_mask(AVLtreeNode *a) { 43 | segalloc_node *l, *r; 44 | segalloc_node *n = (segalloc_node *)a; 45 | 46 | n->size_mask = n->size; 47 | 48 | l = (segalloc_node *)a->left; 49 | r = (segalloc_node *)a->right; 50 | if (l) 51 | n->size_mask |= l->size_mask; 52 | 53 | if (r) 54 | n->size_mask |= r->size_mask; 55 | 56 | } 57 | 58 | static void set_size_mask_r(AVLtreeNode *a) { 59 | AVLtreeNode *p; 60 | set_size_mask(a); 61 | p = a->parent; 62 | if (p) 63 | set_size_mask_r(p); 64 | } 65 | 66 | 67 | 68 | 69 | static void *nodekey(void *n) { 70 | return n; 71 | } 72 | 73 | static int nodecmp(void* a, void* b) { 74 | 75 | if (a==b) 76 | return 0; 77 | else if (a= the size requested. 97 | 98 | return remaining_bits; 99 | } 100 | 101 | size_t seg_block_size_for (size_t size) { 102 | static size_t t = 0; 103 | 104 | if (t == 0) t = least_power_of_2_ge(sizeof(segalloc_node)); 105 | 106 | if (size <= t) 107 | return t; 108 | else 109 | return least_power_of_2_ge(size); 110 | } 111 | 112 | 113 | // here x is a bitmask of powers of two, and size is a number with a single bit (a power of two) 114 | // find the least power of 2 in x that is at least as large as size. 115 | // 116 | static size_t least_power_of_2_gt_in (size_t x, size_t size) { 117 | x &= -size; // just keep the bits to the left of the size bit. 118 | return x & -x; // finds the low order bit of what's left. 119 | } 120 | 121 | 122 | static size_t greatest_power_of_2_le (size_t size) { 123 | size_t remaining_bits, low_bit; 124 | 125 | remaining_bits = size; 126 | while ((low_bit = (remaining_bits & -remaining_bits)) != remaining_bits) 127 | remaining_bits ^= low_bit; 128 | 129 | return low_bit; 130 | 131 | } 132 | 133 | 134 | static void split_node(segalloc_node* t, size_t size, segalloc_node **free_list) { 135 | segalloc_node *new_node; 136 | while (t->size > size) { 137 | t->size >>= 1; 138 | set_size_mask_r((AVLtreeNode *)t); 139 | 140 | new_node = (void*)t + t->size; 141 | new_node->size = t->size; 142 | 143 | #if 0 144 | if (AVLsearch((AVLtreeNode*)*free_list, (AVLtreeNode*)new_node, nodecmp, nodekey) != NULL) { 145 | fprintf(stderr, "Already in tree!\n"); 146 | } 147 | #endif 148 | 149 | AVLaddToTree((AVLtreeNode*)new_node, (AVLtreeNode**)free_list, nodecmp, nodekey); 150 | } 151 | } 152 | 153 | // Find the best-fitting block on the free-list for a block of size "size" below. 154 | // Size must be a power of two. 155 | // 156 | static AVLtreeNode* segalloc_search(AVLtreeNode* t, size_t size, segalloc_node **free_list) { 157 | AVLtreeNode* result = NULL; 158 | size_t tsize; 159 | 160 | if (t == NULL) 161 | return NULL; 162 | 163 | if ((tsize = ((segalloc_node*)t)->size) == size) 164 | return t; 165 | 166 | size_t left_smallest = t->left? least_power_of_2_gt_in (((segalloc_node*)t->left)->size_mask, size) : 0; 167 | size_t right_smallest = t->right? least_power_of_2_gt_in (((segalloc_node*)t->right)->size_mask, size) : 0; 168 | 169 | if (size > tsize) { 170 | // current node won't do -- just decide between left & right branches 171 | 172 | if (left_smallest == 0) { 173 | if (right_smallest == 0) { 174 | result = NULL; 175 | } else { 176 | result = segalloc_search(t->right, size, free_list); 177 | } 178 | } else { 179 | if (right_smallest == 0) { 180 | result = segalloc_search(t->left, size, free_list); 181 | } else { 182 | if (left_smallest < right_smallest) 183 | result = segalloc_search(t->left, size, free_list); 184 | else 185 | result = segalloc_search(t->right, size, free_list); 186 | } 187 | } 188 | } else { 189 | // current node is usable. 190 | 191 | if (left_smallest && left_smallest < tsize) { 192 | if (right_smallest && right_smallest < tsize) { 193 | // both left and right branches usable and better than current node 194 | if (left_smallest > right_smallest) 195 | result = segalloc_search(t->right, size, free_list); 196 | else 197 | result = segalloc_search(t->left, size, free_list); 198 | } else { 199 | // only left branch usable 200 | result = segalloc_search(t->left, size, free_list); 201 | } 202 | } else { 203 | if (right_smallest && right_smallest < tsize) { 204 | // only right branch usable 205 | result = segalloc_search(t->right, size, free_list); 206 | } else { 207 | // neither left or right branches are better than current node 208 | result = t; 209 | // check here to split the node, since it is too big. 210 | split_node((segalloc_node*)t, size, free_list); 211 | } 212 | 213 | } 214 | 215 | } 216 | return result; 217 | 218 | } 219 | 220 | void *seg_alloc(size_t size, void *free_list) { 221 | 222 | AVLtreeNode **ptr_to_free_list_ptr = (AVLtreeNode**)free_list; 223 | 224 | AVLtreeNode* result; 225 | size_t real_size; 226 | 227 | result = segalloc_search(*ptr_to_free_list_ptr, seg_block_size_for(size), free_list); 228 | if (result) { 229 | real_size = ((segalloc_node *)result)->size; 230 | AVLremoveFromTree(result, (AVLtreeNode**)free_list); 231 | memset(result, 0, real_size); 232 | } 233 | 234 | 235 | return ((void*)result); 236 | 237 | } 238 | 239 | 240 | static size_t find_potential_buddy(off_t offset, size_t buddy_size) { 241 | 242 | size_t buddy_lowbits = ~(-buddy_size); 243 | if (offset & buddy_lowbits) 244 | return -1; 245 | else 246 | return (offset ^ buddy_size); 247 | } 248 | 249 | static void merge_with_buddies(void *base_va, segalloc_node *freed_object, segalloc_node **free_list) { 250 | 251 | off_t offset, buddy_offset; 252 | segalloc_node *buddy_block; 253 | 254 | 255 | // while (freed_object->size <= MAX_BLOCK_SIZE) { 256 | 257 | while (1) { 258 | 259 | offset = (void*)freed_object - base_va; 260 | 261 | if ((buddy_offset = find_potential_buddy(offset, freed_object->size)) == -1) 262 | break; 263 | 264 | if ((buddy_block = (segalloc_node *)AVLsearch((AVLtreeNode*)*free_list, 265 | base_va + buddy_offset, nodecmp, nodekey)) != NULL && 266 | buddy_block->size == freed_object->size) { 267 | 268 | size_t fsize = freed_object->size; 269 | 270 | if (buddy_block > freed_object) { 271 | AVLremoveFromTree((AVLtreeNode *)buddy_block, (AVLtreeNode**)free_list); 272 | } else { 273 | AVLremoveFromTree((AVLtreeNode *)freed_object, (AVLtreeNode**)free_list); 274 | freed_object = buddy_block; 275 | } 276 | freed_object->size = fsize << 1; 277 | set_size_mask_r((AVLtreeNode*)freed_object); 278 | 279 | } else { 280 | // there is no buddy. 281 | return; 282 | } 283 | } 284 | } 285 | 286 | static int nodes_overlap_cmp(void* a, void* b) { 287 | 288 | if ((a <= b && b < a + ((segalloc_node*)a)->size) || 289 | (b <= a && a < b + ((segalloc_node*)b)->size)) 290 | return 0; 291 | else if (asize = block_size; 312 | AVLaddToTree((AVLtreeNode*)object_va, (AVLtreeNode**)free_list, nodecmp, nodekey); 313 | merge_with_buddies(base_va, (segalloc_node*)object_va, free_list); 314 | 315 | } 316 | 317 | void *seg_alloc_init(void *base_va, size_t size, int mode) { 318 | 319 | AVLuserHook = set_size_mask; 320 | 321 | if (mode == 1) { 322 | int first_time = 1; 323 | void *va = base_va; 324 | size_t allocated_size; 325 | size_t remaining_size = size; 326 | size_t min_block_size; 327 | segalloc_node *n; 328 | segalloc_node *tmp_free_list = NULL; 329 | 330 | min_block_size = least_power_of_2_ge(sizeof(segalloc_node)); 331 | 332 | while (remaining_size >= min_block_size) { 333 | allocated_size = greatest_power_of_2_le(remaining_size); 334 | n = va; 335 | n->size = allocated_size; 336 | 337 | if (first_time) { 338 | AVLaddToTree((AVLtreeNode*)n, (AVLtreeNode**)&tmp_free_list, nodecmp, nodekey); 339 | if (seg_alloc(min_block_size, &tmp_free_list) != base_va) { 340 | fprintf(stderr, "seg_alloc_init: initial allocation != base_va\n"); 341 | exit(-1); 342 | } 343 | *(segalloc_node**)base_va = tmp_free_list; // move the tree root to the base of the segment 344 | first_time = 0; 345 | } else { 346 | AVLaddToTree((AVLtreeNode*)n, (AVLtreeNode**)base_va, nodecmp, nodekey); 347 | } 348 | 349 | va += allocated_size; 350 | remaining_size -= allocated_size; 351 | } 352 | } 353 | 354 | return (segalloc_node **)base_va; 355 | } 356 | 357 | static int verify_tree_integrity(AVLtreeNode *tt, AVLtreeNode* parent, void* lower_bound, void* upper_bound) { 358 | 359 | segalloc_node *t; 360 | size_t size_mask; 361 | int ldepth, rdepth, depth, bal; 362 | int result = 0; 363 | 364 | t = (segalloc_node *)tt; 365 | 366 | if (lower_bound && ((void*)t < lower_bound)) { 367 | fprintf(stderr, "overlapping nodes: node %lx < lower bound %lx\n", 368 | (unsigned long)t, (unsigned long)lower_bound); 369 | result++; 370 | } 371 | 372 | if (upper_bound && (((void*)t + t->size) > upper_bound)) { 373 | fprintf(stderr, "overlapping nodes: node %lx[%lx] > upper bound %lx\n", 374 | (unsigned long)t, t->size, (unsigned long)upper_bound); 375 | result++; 376 | } 377 | 378 | 379 | if (tt->parent != parent) { 380 | fprintf(stderr, "bad parent: node %lx, parent is %lx, should be %lx\n", 381 | (unsigned long)tt, (unsigned long)tt->parent, (unsigned long)parent); 382 | result++; 383 | } 384 | size_mask = t->size | (tt->right? ((segalloc_node*)tt->right)->size_mask : 0) 385 | | (tt->left? ((segalloc_node*)tt->left)->size_mask : 0); 386 | 387 | if (size_mask != t->size_mask) { 388 | fprintf(stderr, "Node %lx, size mask is %lx, should be %lx. size=%lx, lmask=%lx, rmask=%lx\n", 389 | (unsigned long)t, t->size_mask, size_mask, t->size, 390 | tt->left? ((segalloc_node*)tt->left)->size_mask:0, 391 | tt->right? ((segalloc_node*)tt->right)->size_mask:0); 392 | result++; 393 | } 394 | 395 | ldepth = tt->left? tt->left->depth : 0; 396 | rdepth = tt->right? tt->right->depth : 0; 397 | depth = (((ldepth > rdepth)? ldepth : rdepth) + 1); 398 | 399 | if (depth != tt->depth) { 400 | fprintf(stderr, "depth is %d, should be %d\n", tt->depth, depth); 401 | result++; 402 | } 403 | 404 | bal = ldepth - rdepth; 405 | 406 | if (bal < -1 || bal > 1) { 407 | fprintf(stderr, "tree out of balance: %d\n", bal); 408 | result++; 409 | } 410 | 411 | if (tt->left && tt->left >= tt) { 412 | fprintf(stderr, "left branch %lx not to left of its parent %lx\n", 413 | (unsigned long)tt->left, (unsigned long)tt); 414 | result++; 415 | } 416 | 417 | if (tt->right && tt->right <= tt) { 418 | fprintf(stderr, "right branch %lx not to right of its parent %lx\n", 419 | (unsigned long)tt->right, (unsigned long)tt); 420 | result++; 421 | } 422 | 423 | 424 | if (tt->left) 425 | result += verify_tree_integrity(tt->left, tt, lower_bound, tt); 426 | 427 | if (tt->right) 428 | result += verify_tree_integrity(tt->right, tt, (void*)t + t->size, upper_bound); 429 | 430 | return result; 431 | } 432 | 433 | int seg_verify_tree_integrity(segalloc_node *free_list) { 434 | return verify_tree_integrity((AVLtreeNode*)free_list, NULL, NULL, NULL); 435 | } 436 | 437 | 438 | 439 | static void __overlap_check(segalloc_node *t, void *base, size_t size, void* lower_bound, void* upper_bound) { 440 | 441 | AVLtreeNode *tt = (AVLtreeNode *)t; 442 | if (lower_bound && (base < lower_bound)) { 443 | fprintf(stderr, "overlapping nodes\n"); 444 | } 445 | 446 | if (upper_bound && ((base + size) > upper_bound)) { 447 | fprintf(stderr, "overlapping nodes\n"); 448 | } 449 | 450 | if (base <= (void*)t) { 451 | if (tt->left) 452 | __overlap_check((segalloc_node*)tt->left, base, size, lower_bound, tt); 453 | 454 | } 455 | if (base >= (void*)t) { 456 | if (tt->right) 457 | __overlap_check((segalloc_node*)tt->right, base, size, (void*)t + t->size, upper_bound); 458 | } 459 | } 460 | 461 | 462 | void overlap_check(segalloc_node *t, void *base, size_t size) { 463 | __overlap_check(t, base, size, NULL, NULL); 464 | } 465 | 466 | 467 | void seg_print_free_list(segalloc_node *t) { 468 | AVLtreeNode *a = (AVLtreeNode *)t; 469 | 470 | if (a->left) 471 | seg_print_free_list((segalloc_node*)a->left); 472 | 473 | printf("[ %lx, %lx ] %lx\n", (unsigned long)t, (unsigned long)t+t->size, (unsigned long)t->size); 474 | 475 | if (a->right) 476 | seg_print_free_list((segalloc_node*)a->right); 477 | } 478 | 479 | 480 | // If this seems ridiculous, it is because the alternative C++ implementation for the 481 | // multi-threaded version of stmmap does a bit more work here. 482 | struct segalloc_node* seg_free_list_from_free_list_addr(void *free_list_addr) 483 | { 484 | return *(segalloc_node **)free_list_addr; 485 | } 486 | 487 | 488 | 489 | 490 | 491 | -------------------------------------------------------------------------------- /segalloc.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | segalloc.cpp 4 | 5 | This is the multi-threading compatible implementation of a low-level memory 6 | allocator for memory segments. It knows nothing of the STM package or any of 7 | its objects. It is used by stmalloc.c. 8 | 9 | Copyright 2009 Shel Kaphan 10 | 11 | This file is part of stmmap. 12 | 13 | stmmap is free software: you can redistribute it and/or modify 14 | it under the terms of the GNU Lesser General Public License as published by 15 | the Free Software Foundation, either version 3 of the License, or 16 | (at your option) any later version. 17 | 18 | stmmap is distributed in the hope that it will be useful, 19 | but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | GNU Lesser General Public License for more details. 22 | 23 | You should have received a copy of the GNU Lesser General Public License 24 | along with stmmap. If not, see . 25 | 26 | */ 27 | 28 | #include "AVLtree.hpp" 29 | #include "segalloc.h" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | typedef char voidish; 37 | 38 | 39 | class segalloc_node : public AVLtreeNode { 40 | public: 41 | size_t size; 42 | size_t size_mask; 43 | 44 | // virtual int compareToKey(void *key); 45 | // virtual void* nodeKey(); 46 | 47 | segalloc_node(size_t _size) : AVLtreeNode() { 48 | size = _size; 49 | }; 50 | 51 | }; 52 | 53 | static void set_size_mask(segalloc_node *n) { 54 | segalloc_node *l, *r; 55 | 56 | AVLtreeNode *a = n; 57 | n->size_mask = n->size; 58 | 59 | l = (segalloc_node *)a->left.get(); 60 | r = (segalloc_node *)a->right.get(); 61 | if (l) 62 | n->size_mask |= l->size_mask; 63 | 64 | if (r) 65 | n->size_mask |= r->size_mask; 66 | 67 | } 68 | 69 | static void set_size_mask_r(segalloc_node *a) { 70 | segalloc_node *p; 71 | set_size_mask(a); 72 | p = (segalloc_node *)a->parent.get(); 73 | if (p) 74 | set_size_mask_r(p); 75 | } 76 | 77 | #if 0 78 | void* segalloc_node::nodeKey() { 79 | return this; 80 | } 81 | 82 | int segalloc_node::compareToKey(void *key) { 83 | 84 | if (this == key) 85 | return 0; 86 | else if ((voidish*)this < (voidish*)key) 87 | return -1; 88 | else { 89 | return 1; 90 | } 91 | } 92 | #endif 93 | 94 | static void *nodekey(void *n) { 95 | return n; 96 | } 97 | 98 | static int nodecmp(void* a, void* b) { 99 | 100 | if (a==b) 101 | return 0; 102 | else if ((voidish*)a < (voidish*)b) 103 | return -1; 104 | else { 105 | return 1; 106 | } 107 | 108 | } 109 | 110 | 111 | // here size is a number, and we want the smallest power of two that is at least as large as size 112 | // 113 | static size_t least_power_of_2_ge (size_t size) { 114 | size_t remaining_bits, lowbit; 115 | 116 | remaining_bits = size; 117 | while ((lowbit = (remaining_bits & -remaining_bits)) != remaining_bits) 118 | remaining_bits ^= lowbit; 119 | 120 | if (size != remaining_bits) 121 | remaining_bits <<= 1; // This is now the smallest power of 2 >= the size requested. 122 | 123 | return remaining_bits; 124 | } 125 | 126 | size_t seg_block_size_for (size_t size) { 127 | static size_t t = 0; 128 | 129 | if (t == 0) t = least_power_of_2_ge(sizeof(segalloc_node)); 130 | 131 | if (size <= t) 132 | return t; 133 | else 134 | return least_power_of_2_ge(size); 135 | } 136 | 137 | 138 | // here x is a bitmask of powers of two, and size is a number with a single bit (a power of two) 139 | // find the least power of 2 in x that is at least as large as size. 140 | // 141 | static size_t least_power_of_2_gt_in (size_t x, size_t size) { 142 | x &= -size; // just keep the bits to the left of the size bit. 143 | return x & -x; // finds the low order bit of what's left. 144 | } 145 | 146 | 147 | static size_t greatest_power_of_2_le (size_t size) { 148 | size_t remaining_bits, low_bit; 149 | 150 | remaining_bits = size; 151 | while ((low_bit = (remaining_bits & -remaining_bits)) != remaining_bits) 152 | remaining_bits ^= low_bit; 153 | 154 | return low_bit; 155 | 156 | } 157 | 158 | 159 | static void split_node(segalloc_node* t, size_t size, offset_ptr* free_list_addr) { 160 | segalloc_node *new_node; 161 | while (t->size > size) { 162 | t->size >>= 1; 163 | set_size_mask_r(t); 164 | 165 | new_node = (segalloc_node*)((voidish*)t + t->size); 166 | new(new_node) segalloc_node (t->size); // initialize node in place 167 | #if 0 168 | if (AVLsearch(free_list_addr->get(), new_node, nodecmp, nodekey) != NULL) { 169 | fprintf(stderr, "Already in tree!\n"); 170 | } 171 | #endif 172 | 173 | AVLaddToTree(new_node, (offset_ptr*)free_list_addr, nodecmp, nodekey); 174 | } 175 | } 176 | 177 | // Find the best-fitting block on the free-list for a block of size "size" below. 178 | // Size must be a power of two. 179 | // 180 | static segalloc_node* segalloc_search(AVLtreeNode* t, size_t size, offset_ptr* free_list_addr) { 181 | segalloc_node* result = NULL; 182 | size_t tsize; 183 | 184 | if (t == NULL) 185 | return NULL; 186 | 187 | if ((tsize = ((segalloc_node*)t)->size) == size) 188 | return (segalloc_node*)t; 189 | 190 | size_t left_smallest = t->left? least_power_of_2_gt_in (((segalloc_node*)t->left.get())->size_mask, size) : 0; 191 | size_t right_smallest = t->right? least_power_of_2_gt_in (((segalloc_node*)t->right.get())->size_mask, size) : 0; 192 | 193 | if (size > tsize) { 194 | // current node won't do -- just decide between left & right branches 195 | 196 | if (left_smallest == 0) { 197 | if (right_smallest == 0) { 198 | result = NULL; 199 | } else { 200 | result = segalloc_search(t->right.get(), size, free_list_addr); 201 | } 202 | } else { 203 | if (right_smallest == 0) { 204 | result = segalloc_search(t->left.get(), size, free_list_addr); 205 | } else { 206 | if (left_smallest < right_smallest) 207 | result = segalloc_search(t->left.get(), size, free_list_addr); 208 | else 209 | result = segalloc_search(t->right.get(), size, free_list_addr); 210 | } 211 | } 212 | } else { 213 | // current node is usable. 214 | 215 | if (left_smallest && left_smallest < tsize) { 216 | if (right_smallest && right_smallest < tsize) { 217 | // both left and right branches usable and better than current node 218 | if (left_smallest > right_smallest) 219 | result = segalloc_search(t->right.get(), size, free_list_addr); 220 | else 221 | result = segalloc_search(t->left.get(), size, free_list_addr); 222 | } else { 223 | // only left branch usable 224 | result = segalloc_search(t->left.get(), size, free_list_addr); 225 | } 226 | } else { 227 | if (right_smallest && right_smallest < tsize) { 228 | // only right branch usable 229 | result = segalloc_search(t->right.get(), size, free_list_addr); 230 | } else { 231 | // neither left or right branches are better than current node 232 | result = (segalloc_node*)t; 233 | // check here to split the node, since it is too big. 234 | split_node((segalloc_node*)t, size, free_list_addr); 235 | } 236 | 237 | } 238 | 239 | } 240 | return result; 241 | 242 | } 243 | 244 | void *seg_alloc(size_t size, void* free_list_addr) { 245 | 246 | offset_ptr *ptr_to_free_list_ptr = (offset_ptr*)free_list_addr; 247 | 248 | AVLtreeNode* result; 249 | size_t real_size; 250 | 251 | result = segalloc_search(ptr_to_free_list_ptr->get(), seg_block_size_for(size), ptr_to_free_list_ptr); 252 | if (result) { 253 | real_size = ((segalloc_node *)result)->size; 254 | AVLremoveFromTree(result, (offset_ptr*) free_list_addr); 255 | memset(result, 0, real_size); 256 | } 257 | 258 | 259 | return ((void*)result); 260 | 261 | } 262 | 263 | 264 | static size_t find_potential_buddy(off_t offset, size_t buddy_size) { 265 | 266 | size_t buddy_lowbits = ~(-buddy_size); 267 | if (offset & buddy_lowbits) 268 | return -1; 269 | else 270 | return (offset ^ buddy_size); 271 | } 272 | 273 | static void merge_with_buddies(void *base_va, segalloc_node *freed_object, 274 | offset_ptr* free_list_addr) { 275 | 276 | off_t offset, buddy_offset; 277 | segalloc_node *buddy_block; 278 | 279 | 280 | // while (freed_object->size <= MAX_BLOCK_SIZE) { 281 | 282 | while (1) { 283 | 284 | offset = (voidish*)freed_object - (voidish*)base_va; 285 | 286 | if ((buddy_offset = find_potential_buddy(offset, freed_object->size)) == -1) 287 | break; 288 | 289 | offset_ptr *ptr_to_free_list_ptr = (offset_ptr*)free_list_addr; 290 | 291 | if ((buddy_block = (segalloc_node *)AVLsearch(ptr_to_free_list_ptr->get(), 292 | (voidish*)base_va + buddy_offset, nodecmp, nodekey)) != NULL && 293 | buddy_block->size == freed_object->size) { 294 | 295 | size_t fsize = freed_object->size; 296 | 297 | if (buddy_block > freed_object) { 298 | AVLremoveFromTree(buddy_block, (offset_ptr*)free_list_addr); 299 | } else { 300 | AVLremoveFromTree(freed_object, (offset_ptr*)free_list_addr); 301 | freed_object = buddy_block; 302 | } 303 | freed_object->size = fsize << 1; 304 | set_size_mask_r(freed_object); 305 | 306 | } else { 307 | // there is no buddy. 308 | return; 309 | } 310 | } 311 | } 312 | 313 | static int nodes_overlap_cmp(void* aa, void* bb) { 314 | voidish *a = (voidish*)aa; 315 | voidish *b = (voidish*)bb; 316 | 317 | if ((a <= b && b < a + ((segalloc_node*)a)->size) || 318 | (b <= a && a < b + ((segalloc_node*)b)->size)) 319 | return 0; 320 | else if (a *ptr_to_free_list_ptr = (offset_ptr*)free_list_addr; 333 | 334 | if (AVLsearch(ptr_to_free_list_ptr->get(), object_va, nodes_overlap_cmp, nodekey) != NULL) { 335 | fprintf(stderr, "seg_free: node 0x%lx already in free list!\n", (unsigned long)object_va); 336 | return; 337 | } 338 | 339 | new(object_va) segalloc_node(block_size); // initialize node in place 340 | AVLaddToTree((AVLtreeNode*)object_va, (offset_ptr*)free_list_addr, nodecmp, nodekey); 341 | merge_with_buddies(base_va, (segalloc_node*)object_va, ptr_to_free_list_ptr); 342 | 343 | } 344 | 345 | void *seg_alloc_init(void *base_va, size_t size, int mode) { 346 | 347 | AVLuserHook = (void (*)(AVLtreeNode*))set_size_mask; 348 | 349 | if (mode == 1) { 350 | int first_time = 1; 351 | void *va = base_va; 352 | size_t allocated_size; 353 | size_t remaining_size = size; 354 | size_t min_block_size; 355 | offset_ptr tmp_free_list = NULL; 356 | 357 | min_block_size = least_power_of_2_ge(sizeof(segalloc_node)); 358 | 359 | // printf("min_block_size = %d, sizeof(segalloc_node) = %d\n", min_block_size, sizeof(segalloc_node)); 360 | 361 | while (remaining_size >= min_block_size) { 362 | allocated_size = greatest_power_of_2_le(remaining_size); 363 | 364 | new(va) segalloc_node(allocated_size); // initialize node in place 365 | 366 | if (first_time) { 367 | AVLaddToTree((segalloc_node*)va, &tmp_free_list, nodecmp, nodekey); 368 | if (seg_alloc(min_block_size, &tmp_free_list) != base_va) { 369 | fprintf(stderr, "seg_alloc_init: initial allocation != base_va\n"); 370 | exit(-1); 371 | } 372 | 373 | *(offset_ptr *)base_va = tmp_free_list; // move the tree root to the base of the segment 374 | first_time = 0; 375 | } else { 376 | AVLaddToTree((segalloc_node*)va, (offset_ptr*)base_va, nodecmp, nodekey); 377 | } 378 | 379 | va = (voidish*)va + allocated_size; 380 | remaining_size -= allocated_size; 381 | } 382 | } 383 | 384 | return base_va; // this is now the address of the offset_ptr that points to the free list. 385 | } 386 | 387 | static int verify_tree_integrity(AVLtreeNode *tt, AVLtreeNode* parent, void* lower_bound, void* upper_bound) { 388 | 389 | segalloc_node *t; 390 | size_t size_mask; 391 | int ldepth, rdepth, depth, bal; 392 | int result = 0; 393 | 394 | t = (segalloc_node *)tt; 395 | 396 | if (lower_bound && ((void*)t < lower_bound)) { 397 | fprintf(stderr, "overlapping nodes: node %lx < lower bound %lx\n", 398 | (unsigned long)t, (unsigned long)lower_bound); 399 | result++; 400 | } 401 | 402 | if (upper_bound && (((voidish*)t + t->size) > upper_bound)) { 403 | fprintf(stderr, "overlapping nodes: node %lx[%lx] > upper bound %lx\n", 404 | (unsigned long)t, t->size, (unsigned long)upper_bound); 405 | result++; 406 | } 407 | 408 | 409 | if (tt->parent.get() != parent) { 410 | fprintf(stderr, "bad parent: node %lx, parent is %lx, should be %lx\n", 411 | (unsigned long)tt, (unsigned long)tt->parent.get(), (unsigned long)parent); 412 | result++; 413 | } 414 | size_mask = t->size | (tt->right? ((segalloc_node*)tt->right.get())->size_mask : 0) 415 | | (tt->left? ((segalloc_node*)tt->left.get())->size_mask : 0); 416 | 417 | if (size_mask != t->size_mask) { 418 | fprintf(stderr, "Node %lx, size mask is %lx, should be %lx. size=%lx, lmask=%lx, rmask=%lx\n", 419 | (unsigned long)t, t->size_mask, size_mask, t->size, 420 | tt->left? ((segalloc_node*)tt->left.get())->size_mask:0, 421 | tt->right? ((segalloc_node*)tt->right.get())->size_mask:0); 422 | result++; 423 | } 424 | 425 | ldepth = tt->left? tt->left->depth : 0; 426 | rdepth = tt->right? tt->right->depth : 0; 427 | depth = (((ldepth > rdepth)? ldepth : rdepth) + 1); 428 | 429 | if (depth != tt->depth) { 430 | fprintf(stderr, "depth is %d, should be %d\n", tt->depth, depth); 431 | result++; 432 | } 433 | 434 | bal = ldepth - rdepth; 435 | 436 | if (bal < -1 || bal > 1) { 437 | fprintf(stderr, "tree out of balance: %d\n", bal); 438 | result++; 439 | } 440 | 441 | if (tt->left && tt->left.get() >= tt) { 442 | fprintf(stderr, "left branch %lx not to left of its parent %lx\n", 443 | (unsigned long)tt->left.get(), (unsigned long)tt); 444 | result++; 445 | } 446 | 447 | if (tt->right && tt->right.get() <= tt) { 448 | fprintf(stderr, "right branch %lx not to right of its parent %lx\n", 449 | (unsigned long)tt->right.get(), (unsigned long)tt); 450 | result++; 451 | } 452 | 453 | 454 | if (tt->left) 455 | result += verify_tree_integrity(tt->left.get(), tt, lower_bound, tt); 456 | 457 | if (tt->right) 458 | result += verify_tree_integrity(tt->right.get(), tt, (voidish*)t + t->size, upper_bound); 459 | 460 | return result; 461 | } 462 | 463 | int seg_verify_tree_integrity(segalloc_node *free_list) { 464 | return verify_tree_integrity(free_list, NULL, NULL, NULL); 465 | } 466 | 467 | 468 | 469 | static void __overlap_check(segalloc_node *t, void* base, size_t size, void* lower_bound, void* upper_bound) { 470 | 471 | AVLtreeNode *tt = t; 472 | if (lower_bound && (base < lower_bound)) { 473 | fprintf(stderr, "overlapping nodes\n"); 474 | } 475 | 476 | if (upper_bound && (((voidish*)base + size) > upper_bound)) { 477 | fprintf(stderr, "overlapping nodes\n"); 478 | } 479 | 480 | if (base <= (void*)t) { 481 | if (tt->left) 482 | __overlap_check((segalloc_node*)tt->left.get(), base, size, lower_bound, tt); 483 | 484 | } 485 | if (base >= (void*)t) { 486 | if (tt->right) 487 | __overlap_check((segalloc_node*)tt->right.get(), base, size, (voidish*)t + t->size, upper_bound); 488 | } 489 | } 490 | 491 | 492 | void overlap_check(segalloc_node *t, void *base, size_t size) { 493 | __overlap_check(t, base, size, NULL, NULL); 494 | } 495 | 496 | 497 | void seg_print_free_list(segalloc_node *t) { 498 | AVLtreeNode *a = t; 499 | 500 | if (a->left) 501 | seg_print_free_list((segalloc_node*)a->left.get()); 502 | 503 | printf("[ %lx, %lx ] %lx\n", (unsigned long)t, (unsigned long)t+t->size, (unsigned long)t->size); 504 | 505 | if (a->right) 506 | seg_print_free_list((segalloc_node*)a->right.get()); 507 | } 508 | 509 | 510 | struct segalloc_node* seg_free_list_from_free_list_addr(void *free_list_addr) 511 | { 512 | offset_ptr *ptr_to_free_list_ptr = (offset_ptr*)free_list_addr; 513 | return ptr_to_free_list_ptr->get(); 514 | } 515 | 516 | 517 | 518 | 519 | 520 | 521 | -------------------------------------------------------------------------------- /segalloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | segalloc.h 4 | 5 | This is the interface to the low-level memory allocator for memory segments. 6 | It knows nothing of the STM package or any of its objects. It is used by stmalloc.c. 7 | You probably won't need to access this API. 8 | 9 | 10 | Copyright 2009 Shel Kaphan 11 | 12 | This file is part of stmmap. 13 | 14 | stmmap is free software: you can redistribute it and/or modify 15 | it under the terms of the GNU Lesser General Public License as published by 16 | the Free Software Foundation, either version 3 of the License, or 17 | (at your option) any later version. 18 | 19 | stmmap is distributed in the hope that it will be useful, 20 | but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 | GNU Lesser General Public License for more details. 23 | 24 | You should have received a copy of the GNU Lesser General Public License 25 | along with stmmap. If not, see . 26 | 27 | */ 28 | 29 | 30 | #include 31 | 32 | #ifdef __cplusplus 33 | extern "C" { 34 | #endif 35 | 36 | struct segalloc_node; 37 | 38 | void *seg_alloc_init(void *base_va, size_t size, int mode); 39 | 40 | void *seg_alloc(size_t size, void *free_list_addr); 41 | 42 | void seg_free(void *object_va, size_t size, void *base_va, void *free_list_addr); 43 | 44 | size_t seg_block_size_for(size_t size); 45 | 46 | // some diagnostic routines: 47 | 48 | int seg_verify_tree_integrity(struct segalloc_node *free_list); 49 | 50 | void seg_print_free_list(struct segalloc_node*); 51 | 52 | struct segalloc_node* seg_free_list_from_free_list_addr(void *free_list_addr); 53 | 54 | #ifdef __cplusplus 55 | }; 56 | #endif 57 | 58 | -------------------------------------------------------------------------------- /stm.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | stm.c 4 | 5 | This is the implementation of the Software Transactional Memory system. 6 | The corresponding API is in stm.h. 7 | 8 | Copyright 2009 Shel Kaphan 9 | 10 | This file is part of stmmap. 11 | 12 | stmmap is free software: you can redistribute it and/or modify 13 | it under the terms of the GNU Lesser General Public License as published by 14 | the Free Software Foundation, either version 3 of the License, or 15 | (at your option) any later version. 16 | 17 | stmmap is distributed in the hope that it will be useful, 18 | but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | GNU Lesser General Public License for more details. 21 | 22 | You should have received a copy of the GNU Lesser General Public License 23 | along with stmmap. If not, see . 24 | 25 | */ 26 | 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include // absolutely need this for pwrite(). (Just spent an hour chasing this...) 34 | // (leaving it in even though I'm not using pwrite() right now...) 35 | #include 36 | 37 | #include "atomic-compat.h" 38 | #include "stm.h" 39 | 40 | 41 | /* 42 | 43 | There are at least two versions of the semantics of mmap() out there. 44 | PRIVATE_MAPPING_IS_PRIVATE should be defined on systems where when we use mmap() 45 | with MAP_PRIVATE, our version of the file will not reflect any modifications made by 46 | other processes or threads. 47 | 48 | On systems where writes by other processes or threads are visible in pages mapped 49 | with MAP_PRIVATE that have not been written to, PRIVATE_MAPPING_IS_PRIVATE should NOT 50 | be defined. 51 | 52 | On all systems, MAP_PRIVATE implies that mapped pages we write to are private at 53 | least from the first write onward. 54 | 55 | If PRIVATE_MAPPING_IS_PRIVATE is defined: 56 | 57 | - shared memory segments are mapped with MAP_SHARED 58 | - when transactions start the memory is protected with PROT_NONE (no access). 59 | - when pages are touched in transactions, they are mapped MAP_PRIVATE and 60 | PROT_READ|PROT_WRITE (any access allowed). 61 | - on commit the shared segment is mapped MAP_SHARED and the modified pages 62 | are copied into it. 63 | - then the shared segment is protected with the user-specified protection between 64 | transactions. 65 | 66 | If PRIVATE_MAPPING_IS_PRIVATE is NOT defined: 67 | 68 | - "shared" memory segments are mapped with MAP_PRIVATE 69 | - when transactions start the memory is protected with PROT_NONE (no access). 70 | - when pages are touched in transactions, they are not remapped, but are 71 | protected with PROT_READ|PROT_WRITE (any access allowed). Because of 72 | copy-on-write semantics with MAP_PRIVATE, we now have a private copy of the 73 | written page that will not reflect changes made by other processes. 74 | - on commit the shared segment is mapped MAP_SHARED and the modified pages 75 | are copied into it. 76 | - then the shared segment is mapped MAP_PRIVATE and protected with the 77 | user-specified protection between transactions. 78 | 79 | */ 80 | 81 | #ifdef __APPLE__ 82 | #ifndef PRIVATE_MAPPING_IS_PRIVATE 83 | #define PRIVATE_MAPPING_IS_PRIVATE 84 | #endif 85 | #endif 86 | 87 | // On some systems this will be SIGSEGV. On MacOS, for example, it must be SIGBUS. 88 | // 89 | #ifndef PAGE_ACCESS_SIGNAL 90 | #define PAGE_ACCESS_SIGNAL SIGSEGV 91 | #endif 92 | 93 | 94 | 95 | 96 | #define MAX_ACTIVE_TRANSACTIONS 100 97 | 98 | 99 | // The structs used by stm.c are defined here and not in the header file, so they are opaque to other programs. 100 | // To the extent necessary and useful, an access API is defined here and in stm.h. 101 | 102 | // 103 | // There's just one of these at the start of the metadata file associated with each shared segment 104 | // 105 | typedef struct transaction_data { 106 | transaction_id_t transaction_counter; // global counter for transaction IDs in each segment 107 | atomic_lock transaction_lock; 108 | int active_transaction_high_water; 109 | transaction_id_t active_transactions[MAX_ACTIVE_TRANSACTIONS]; 110 | 111 | } transaction_data; 112 | 113 | 114 | // 115 | // There is an array of these starting in the 2nd page of the metadata file. Each one represents 116 | // the ID of a transaction currently modifying the page (if any), and keeps track of the most recent 117 | // transaction to have modified the page. 118 | // 119 | typedef struct page_table_element { 120 | transaction_id_t current_transaction; // used to establish ownership of each page during commit 121 | transaction_id_t completed_transaction; // used to keep a record of the last transaction to modify each page 122 | } page_table_element; 123 | 124 | // 125 | // This represents a snapshot of a single page. We take this snapshot on first access (read or write) 126 | // within a transaction. These are kept in a list sorted by the page's virtual address, so that 127 | // it is easy to lock them in a known order during commit. 128 | // 129 | typedef struct snapshot_list_element { 130 | struct snapshot_list_element *next; 131 | void *original_page_va; // The virtual address where the "real" copy of this page lives 132 | void *original_page_snapshot; // copy of the unmodified page, on first access. 133 | int page_dirty; // during commit, we set this if we have modified the page. 134 | transaction_id_t snapshot_transaction_id; // the most recent transaction to have affected the page, 135 | // at the time the snapshot is taken. 136 | } snapshot_list_element; 137 | 138 | // 139 | // There is a global stack that keeps track of nested transactions. We only really commit changes when we commit 140 | // the outermost transaction (the last one on the stack). 141 | // 142 | typedef struct transaction_stack_element { 143 | struct transaction_stack_element *next; 144 | char *transaction_name; 145 | } transaction_stack_element; 146 | 147 | 148 | // 149 | // This data structure represents a shared memory area and the metadata that goes with it. 150 | // They are kept on a global list that is sorted by inode, to facilitate locking in a known order 151 | // to prevent deadlocks. 152 | // 153 | typedef struct shared_segment { 154 | struct shared_segment *next; 155 | 156 | char *filename; // Filename of file backing shared memory area 157 | int fd; // File descriptor for above file 158 | ino_t inode; // inode of above file. 159 | char *metadata_filename; // "metadata" file for above file - contains control 160 | // info and page table with transaction info 161 | int metadata_fd; // file descriptor for metadata file 162 | 163 | int default_prot_flags; // protection flags (PROT_READ, PROT_WRITE, PROT_NONE) 164 | // for use on shared memory area *between* transactions 165 | size_t page_size; // cached value of operating system page size 166 | 167 | size_t shared_seg_size; // size of the shared memory area 168 | void *shared_base_va; // first virtual address of the shared memory area 169 | 170 | size_t transaction_data_size; // size of the metadata area in memory 171 | struct transaction_data *segment_transaction_data; // the "control" information for all transactions on this 172 | // shared segment. 173 | struct page_table_element *segment_page_table; // the page table describing transactions on this segment 174 | 175 | transaction_id_t transaction_id; // current transaction ID, if any 176 | struct snapshot_list_element *snapshot_list; // list of snapshotted pages accessed during a transaction 177 | struct snapshot_list_element *snapshot_pool; // place to put snapshot list elements we're done with instead 178 | // of freeing and reallocating later. 179 | 180 | int n_prior_active_transactions; // number of transactions active at the time the current one 181 | // started. 182 | transaction_id_t prior_active_transactions[MAX_ACTIVE_TRANSACTIONS]; 183 | // array of transaction IDs of transactions active at the time 184 | // the current one started. 185 | 186 | void *free_list_addr; // if stmalloc is in use, this points to the free list header 187 | } shared_segment; 188 | 189 | 190 | static int stm_verbose; 191 | 192 | // There used to be more globals, but now they are in thread-local storage 193 | // 194 | // static shared_segment *shared_segment_list; 195 | // static transaction_stack_element *transaction_stack; 196 | // jmp_buf stm_jmp_buf; 197 | // int stm_errno; 198 | 199 | 200 | static pthread_key_t shared_segment_list_key; 201 | static pthread_key_t transaction_stack_key; 202 | static pthread_key_t stm_jmp_buf_key; 203 | static pthread_key_t stm_errno_key; 204 | 205 | 206 | 207 | static shared_segment *shared_segment_list() { 208 | return (shared_segment*)pthread_getspecific(shared_segment_list_key); 209 | } 210 | 211 | static void set_shared_segment_list(shared_segment *seg) { 212 | pthread_setspecific(shared_segment_list_key, seg); 213 | } 214 | 215 | 216 | static transaction_stack_element *transaction_stack() { 217 | return (transaction_stack_element *)pthread_getspecific(transaction_stack_key); 218 | } 219 | 220 | static void set_transaction_stack(transaction_stack_element *trans) { 221 | pthread_setspecific(transaction_stack_key, trans); 222 | } 223 | 224 | // This one has to be global scope so clients can use it. 225 | jmp_buf *stm_jmp_buf() { 226 | return (jmp_buf *)pthread_getspecific(stm_jmp_buf_key); 227 | } 228 | 229 | 230 | void set_stm_jmp_buf(jmp_buf *jb) { 231 | pthread_setspecific(stm_jmp_buf_key, jb); 232 | } 233 | 234 | 235 | int stm_errno() { 236 | return (long int)pthread_getspecific(stm_errno_key); 237 | } 238 | 239 | static void set_stm_errno(int err) { 240 | long int lerr = err; 241 | pthread_setspecific(stm_errno_key, (void*)lerr); 242 | } 243 | 244 | 245 | static void create_thread_keys() { 246 | pthread_key_create(&shared_segment_list_key, NULL); 247 | pthread_key_create(&transaction_stack_key, NULL); 248 | pthread_key_create(&stm_jmp_buf_key, NULL); 249 | pthread_key_create(&stm_errno_key, NULL); 250 | 251 | } 252 | 253 | // Must be called in a thread, except the main thread, before doing any transactions 254 | void stm_init_thread_locals() 255 | { 256 | set_shared_segment_list(NULL); 257 | set_transaction_stack(NULL); 258 | set_stm_errno(0); 259 | 260 | set_stm_jmp_buf(calloc(1, sizeof(jmp_buf))); 261 | 262 | } 263 | 264 | 265 | // 266 | // The next few routines manage a shared list of active transaction IDs in the metadata segment. 267 | // 268 | static void add_active_transaction(shared_segment *seg) { 269 | int i, high_water; 270 | transaction_data *td; 271 | 272 | td = seg->segment_transaction_data; 273 | for (high_water = td->active_transaction_high_water; 274 | high_water < MAX_ACTIVE_TRANSACTIONS; 275 | high_water = atomic_increment_32(&td->active_transaction_high_water)) { 276 | 277 | if (high_water >= MAX_ACTIVE_TRANSACTIONS) 278 | break; // could happen despite 'for' condition because of multiple processes accessing concurrently 279 | 280 | 281 | for (i = high_water - 1; i >= 0; i--) { 282 | if (atomic_compare_and_swap_32(0, seg->transaction_id, 283 | (int32_t*)&(td->active_transactions[i]))) { 284 | return; 285 | } 286 | } 287 | 288 | } 289 | 290 | if (stm_verbose & 1) 291 | fprintf(stderr, "add_active_transaction: Too many active transactions; recompile for larger number!\n"); 292 | exit(-1); 293 | // *** Is there a more graceful way to handle this case? There must be, but it eludes me. 294 | 295 | } 296 | 297 | static void delete_active_transaction(shared_segment *seg) { 298 | int i; 299 | transaction_data *td; 300 | 301 | td = seg->segment_transaction_data; 302 | for (i = 0; i < td->active_transaction_high_water; i++) { 303 | if (td->active_transactions[i] == seg->transaction_id) { 304 | td->active_transactions[i] = 0; 305 | 306 | #if 0 307 | // *** Functionally this is not necessary. If not decremented it will truly be the 308 | // "high water" mark and will just contain some empty elements. 309 | 310 | if (i == td->active_transaction_high_water - 1) { 311 | atomic_decrement_32(&td->active_transaction_high_water); 312 | } 313 | 314 | #endif 315 | return; 316 | } 317 | } 318 | } 319 | 320 | 321 | 322 | static void snapshot_active_transactions(shared_segment *seg) { 323 | int i; 324 | transaction_data *td; 325 | 326 | td = seg->segment_transaction_data; 327 | seg->n_prior_active_transactions = 0; 328 | 329 | for (i = 0; i < td->active_transaction_high_water; i++) { 330 | if (td->active_transactions[i] != 0 && td->active_transactions[i] != seg->transaction_id) { 331 | seg->prior_active_transactions[seg->n_prior_active_transactions++] = td->active_transactions[i]; 332 | } 333 | } 334 | } 335 | 336 | static int find_prior_active_transaction(shared_segment *seg, transaction_id_t trans) { 337 | int i; 338 | for (i = 0; i < seg->n_prior_active_transactions; i++) { 339 | if (seg->prior_active_transactions[i] == trans) { 340 | return 1; 341 | } 342 | } 343 | return 0; 344 | } 345 | 346 | void print_snapshot_active_transactions(shared_segment *seg) { 347 | int i; 348 | for (i=0; in_prior_active_transactions; i++) { 349 | if (seg->prior_active_transactions[i]) 350 | printf("+ %d\n", seg->prior_active_transactions[i]); 351 | } 352 | } 353 | 354 | 355 | 356 | static int check_file_length(int fd, size_t length, ino_t *inode) { 357 | struct stat sbuf; 358 | fstat(fd, &sbuf); 359 | if (inode) *inode = sbuf.st_ino; 360 | if ((sbuf.st_mode & S_IFMT) != S_IFREG) { 361 | if (stm_verbose & 1) 362 | fprintf(stderr, "check_file_length: bad filetype"); 363 | set_stm_errno(STM_FILETYPE_ERROR); 364 | return -1; 365 | } else if (length > sbuf.st_size) { 366 | // fprintf(stderr, "file too short\n"); 367 | if (ftruncate(fd, length) == -1) { 368 | if (stm_verbose & 1) 369 | perror("check_file_length: ftruncate failed"); 370 | set_stm_errno(STM_FILESIZE_ERROR); 371 | return -1; 372 | } 373 | } 374 | return 0; 375 | } 376 | 377 | 378 | 379 | shared_segment *stm_open_shared_segment(char *filename, size_t segment_size, void *requested_va, int prot_flags) { 380 | void *status; 381 | int mmap_flags; 382 | int metadata_size; 383 | shared_segment *s, *prev; 384 | static const char *metadata_suffix = ".metadata"; 385 | 386 | shared_segment *seg; 387 | if ((seg = calloc(1, sizeof(shared_segment))) == NULL) { 388 | set_stm_errno(STM_ALLOC_ERROR); 389 | return NULL; 390 | } 391 | if ((seg->filename = calloc(1, strlen(filename) + 1)) == NULL) { 392 | set_stm_errno(STM_ALLOC_ERROR); 393 | stm_close_shared_segment(seg); 394 | return NULL; 395 | } 396 | strcpy(seg->filename, filename); 397 | 398 | if ((seg->fd = open(seg->filename, O_RDWR|O_CREAT, 0777)) < 0) { 399 | if (stm_verbose & 1) 400 | fprintf(stderr, "stm_open_shared_segment: could not open file %s: %s\n", seg->filename, strerror(errno)); 401 | set_stm_errno(STM_OPEN_ERROR); 402 | seg->fd = 0; 403 | stm_close_shared_segment(seg); 404 | return NULL; 405 | } 406 | 407 | seg->shared_seg_size = segment_size; 408 | 409 | if (check_file_length(seg->fd, seg->shared_seg_size, &seg->inode) != 0) { 410 | stm_close_shared_segment(seg); 411 | return NULL; 412 | } 413 | 414 | seg->metadata_filename = calloc(1, strlen(filename) + strlen(metadata_suffix) + 1); 415 | strcpy(seg->metadata_filename, filename); 416 | strcat(seg->metadata_filename, metadata_suffix); 417 | 418 | seg->page_size = getpagesize(); 419 | 420 | metadata_size = seg->page_size; 421 | while (metadata_size < sizeof(transaction_data)) 422 | metadata_size += seg->page_size; 423 | seg->transaction_data_size = ((segment_size/seg->page_size) * sizeof(page_table_element)) + metadata_size; 424 | 425 | if ((seg->metadata_fd = open(seg->metadata_filename, O_RDWR|O_CREAT, 0777)) < 0) { 426 | if (stm_verbose & 1) 427 | fprintf(stderr, "stm_open_shared_segment: could not open metadata file %s: %s\n", 428 | seg->filename, strerror(errno)); 429 | set_stm_errno(STM_OPEN_ERROR); 430 | seg->metadata_fd = 0; 431 | stm_close_shared_segment(seg); 432 | return NULL; 433 | } 434 | 435 | if (check_file_length(seg->metadata_fd, seg->transaction_data_size, NULL)) { 436 | stm_close_shared_segment(seg); 437 | return NULL; 438 | } 439 | 440 | seg->default_prot_flags = prot_flags; 441 | 442 | 443 | #ifdef PRIVATE_MAPPING_IS_PRIVATE 444 | mmap_flags = MAP_SHARED; 445 | #else 446 | mmap_flags = MAP_PRIVATE; 447 | #endif 448 | 449 | if (requested_va != NULL) 450 | mmap_flags |= MAP_FIXED; 451 | 452 | status = mmap(requested_va, seg->shared_seg_size, seg->default_prot_flags, mmap_flags, seg->fd, (off_t)0); 453 | 454 | if (status != (void*)-1) { 455 | seg->shared_base_va = status; 456 | // fprintf(stderr, "shared base va = %x\n", status); 457 | } else { 458 | if (stm_verbose & 1) 459 | perror("stm_open_shared_segment: error mapping shared segment"); 460 | set_stm_errno(STM_MMAP_ERROR); 461 | stm_close_shared_segment(seg); 462 | return NULL; 463 | } 464 | 465 | 466 | status = mmap(0, seg->transaction_data_size, PROT_READ|PROT_WRITE, MAP_SHARED, seg->metadata_fd, (off_t)0); 467 | 468 | if (status != (void*)-1) { 469 | seg->segment_transaction_data = (transaction_data *)status; 470 | seg->segment_page_table = (page_table_element*)((void*)seg->segment_transaction_data + metadata_size); 471 | } else { 472 | if (stm_verbose & 1) 473 | perror("stm_open_shared_segment: error mapping shared metadata segment"); 474 | set_stm_errno(STM_MMAP_ERROR); 475 | stm_close_shared_segment(seg); 476 | return NULL; 477 | } 478 | 479 | // Don't link this onto the segment list until the end, so we don't have to undo it if there is an error 480 | // above. And insert it into the segment list in ascending inode order. Inodes should be unique and stable, 481 | // so each process using a set of mapped files will be able to list them in the same order, avoiding livelocks 482 | // during commit. 483 | 484 | for(s = shared_segment_list(), prev=NULL; s; prev = s, s = s->next) { 485 | if (seg->inode < s->inode) { 486 | break; 487 | } 488 | } 489 | 490 | seg->next = s; // either item to insert before, or NULL if no list or we ran off end 491 | if (prev) 492 | prev->next = seg; 493 | else 494 | set_shared_segment_list(seg); 495 | 496 | 497 | 498 | 499 | return seg; 500 | } 501 | 502 | 503 | 504 | 505 | #define n_histo_buckets 9 506 | int collision_histo[n_histo_buckets]; 507 | 508 | void print_collision_histo() { 509 | int i; 510 | printf("collision histogram:\n"); 511 | for (i=0; isnapshot_list; sl; prev = sl, sl = sl->next) { 523 | sl->original_page_va = NULL; 524 | sl->snapshot_transaction_id = 0; 525 | sl->page_dirty = 0; 526 | } 527 | 528 | if (prev != NULL) 529 | prev->next = seg->snapshot_pool; 530 | seg->snapshot_pool = seg->snapshot_list; 531 | seg->snapshot_list = NULL; 532 | 533 | } 534 | 535 | static void free_snapshot_pool(shared_segment *seg) { 536 | snapshot_list_element *sl; 537 | for ( ; seg->snapshot_pool; seg->snapshot_pool = sl) { 538 | sl = seg->snapshot_pool->next; 539 | if (seg->snapshot_pool->original_page_va) 540 | free(seg->snapshot_pool->original_page_snapshot); 541 | free(seg->snapshot_pool); 542 | } 543 | } 544 | 545 | 546 | static void abort_transaction_on_segment(shared_segment *seg) { 547 | snapshot_list_element *sl; 548 | size_t page_num; 549 | page_table_element *page_table_elt; 550 | void *status; 551 | 552 | if (seg->transaction_id == 0) { 553 | if (stm_verbose & 2) 554 | fprintf(stderr, "Aborting transaction but transaction_id is already 0\n"); 555 | return; 556 | } 557 | 558 | if (stm_verbose & 4) 559 | fprintf(stderr, "Aborting Transaction %d [", seg->transaction_id); 560 | 561 | delete_active_transaction(seg); 562 | 563 | for(sl = seg->snapshot_list; sl; sl = sl->next) { 564 | 565 | page_num = (sl->original_page_va - seg->shared_base_va)/seg->page_size; 566 | page_table_elt = &(seg->segment_page_table[page_num]); 567 | 568 | if (stm_verbose & 4) { 569 | int dirty = memcmp(sl->original_page_va, sl->original_page_snapshot, seg->page_size); 570 | fprintf(stderr, " %s%lx", dirty? "*":"", page_num); 571 | } 572 | 573 | if (page_table_elt->current_transaction == seg->transaction_id) { 574 | // Only release pages owned by this transaction! 575 | // Pages that were only read and not modified by this transaction will not be marked as 576 | // associated with this transaction under optimistic locking. They may even be 577 | // associated with another transaction. 578 | 579 | page_table_elt->current_transaction = 0; 580 | } 581 | } 582 | 583 | if (stm_verbose & 4) 584 | fprintf(stderr, " ]\n"); 585 | 586 | free_snapshot_list(seg); 587 | 588 | // reprotect *all* pages with the default inter-transaction protection. 589 | 590 | 591 | #ifdef PRIVATE_MAPPING_IS_PRIVATE 592 | if (seg->default_prot_flags == PROT_NONE) { 593 | status = (void*)(long)mprotect(seg->shared_base_va, seg->shared_seg_size, PROT_NONE); 594 | } else 595 | #endif // ifdef PRIVATE_MAPPING_IS_PRIVATE 596 | 597 | { 598 | int mmap_flags; 599 | #ifdef PRIVATE_MAPPING_IS_PRIVATE 600 | mmap_flags = MAP_FIXED|MAP_SHARED; 601 | #else 602 | mmap_flags = MAP_FIXED|MAP_PRIVATE; 603 | #endif 604 | status = mmap(seg->shared_base_va, seg->shared_seg_size, seg->default_prot_flags, mmap_flags, seg->fd, 605 | (off_t)0); 606 | } 607 | if (status == (void*)-1) 608 | perror("abort_transaction_on_segment: mmap error"); 609 | 610 | seg->transaction_id = 0; 611 | 612 | } 613 | 614 | 615 | 616 | int _stm_transaction_stack_empty() { 617 | return (transaction_stack() == NULL); 618 | } 619 | 620 | static int push_transaction_stack(char *trans_name) { 621 | transaction_stack_element *trans; 622 | 623 | 624 | if ((trans = calloc(1, sizeof(transaction_stack_element))) == NULL) { 625 | set_stm_errno(STM_ALLOC_ERROR); 626 | return -1; 627 | } 628 | 629 | #if 0 630 | if (trans_name) { 631 | if ((trans->transaction_name = calloc(1, strlen(trans_name)+1)) == null) { 632 | set_stm_errno(STM_ALLOC_ERROR); 633 | free(trans); 634 | return -1; 635 | } 636 | strcpy(trans->transaction_name, trans_name); 637 | } 638 | #endif 639 | 640 | trans->transaction_name = trans_name; 641 | trans->next = transaction_stack(); 642 | set_transaction_stack(trans); 643 | 644 | #if 0 645 | printf("> "); 646 | for (trans = transaction_stack(); trans; trans = trans->next) 647 | printf("%s ", trans->transaction_name); 648 | printf("\n"); 649 | #endif 650 | 651 | return 0; 652 | } 653 | 654 | 655 | static void pop_transaction_stack() { 656 | transaction_stack_element *trans; 657 | 658 | #if 0 659 | printf("< "); 660 | for (trans = transaction_stack(); trans; trans = trans->next) 661 | printf("%s ", trans->transaction_name); 662 | printf("\n"); 663 | #endif 664 | 665 | trans = transaction_stack(); 666 | if (trans) { 667 | // if (trans->transaction_name) free(trans->transaction_name); 668 | set_transaction_stack(trans->next); 669 | free(trans); 670 | } 671 | } 672 | 673 | static void stm_abort_transaction() { 674 | shared_segment *seg; 675 | 676 | for(seg = shared_segment_list(); seg; seg = seg->next) { 677 | abort_transaction_on_segment(seg); 678 | } 679 | while (transaction_stack()) 680 | pop_transaction_stack(); 681 | 682 | } 683 | 684 | static void transaction_error_exit(int error_code, int return_value) { 685 | if (error_code) 686 | set_stm_errno(error_code); 687 | stm_abort_transaction(); 688 | longjmp(*stm_jmp_buf(), return_value); 689 | 690 | } 691 | 692 | 693 | 694 | static int insert_into_snapshot_list(shared_segment *seg, void *va, transaction_id_t trans_id) { 695 | 696 | snapshot_list_element *new_elt, *sl, *prev; 697 | 698 | // fprintf(stderr, "inserting into snapshot list %x\n", va); 699 | 700 | if (va < seg->shared_base_va || seg->shared_base_va + seg->shared_seg_size <= va) { 701 | if (stm_verbose & 1) 702 | fprintf(stderr, "insert_into_snapshot_list: va %lx not in segment\n", (unsigned long)va); 703 | set_stm_errno(STM_ACCESS_ERROR); 704 | return -1; 705 | } 706 | 707 | if ((new_elt = seg->snapshot_pool) != NULL) { 708 | seg->snapshot_pool = new_elt->next; 709 | new_elt->next = NULL; 710 | 711 | } else { 712 | if ((new_elt = calloc(1, sizeof(snapshot_list_element))) == NULL) { 713 | set_stm_errno(STM_ALLOC_ERROR); 714 | return -1; 715 | } 716 | 717 | if ((new_elt->original_page_snapshot = malloc(seg->page_size)) == NULL) { 718 | set_stm_errno(STM_ALLOC_ERROR); 719 | return -1; 720 | } 721 | } 722 | 723 | new_elt->original_page_va = va; 724 | new_elt->page_dirty = 0; 725 | new_elt->snapshot_transaction_id = trans_id; 726 | 727 | memcpy(new_elt->original_page_snapshot, va, seg->page_size); 728 | 729 | for(sl = seg->snapshot_list, prev=NULL; sl; prev = sl, sl = sl->next) { 730 | if (va < sl->original_page_va) { 731 | break; 732 | } else if (va == sl->original_page_va) { 733 | if (stm_verbose & 1) 734 | fprintf(stderr, "insert_into_snapshot_list: duplicate page at %lx\n", (unsigned long)va); 735 | } 736 | } 737 | 738 | new_elt->next = sl; // either item to insert before, or NULL if no list or we ran off end 739 | if (prev) 740 | prev->next = new_elt; 741 | else 742 | seg->snapshot_list = new_elt; 743 | 744 | return 0; 745 | 746 | } 747 | 748 | 749 | static int defeat_optimizer(volatile int *foo) { 750 | return *foo; 751 | } 752 | 753 | shared_segment *stm_find_shared_segment(void *va) { 754 | shared_segment *seg; 755 | for (seg = shared_segment_list(); seg; seg = seg->next) { 756 | if (seg->shared_base_va <= va && 757 | va < seg->shared_base_va + seg->shared_seg_size) 758 | return seg; 759 | } 760 | return NULL; 761 | } 762 | 763 | void **stm_free_list_addr(shared_segment *seg) { 764 | return seg->free_list_addr; 765 | } 766 | 767 | void stm_set_free_list_addr(shared_segment *seg, void **free_list_addr) { 768 | seg->free_list_addr = free_list_addr; 769 | } 770 | 771 | void *stm_segment_base(shared_segment *seg) { 772 | return seg->shared_base_va; 773 | } 774 | 775 | size_t stm_segment_size(shared_segment *seg) { 776 | return seg->shared_seg_size; 777 | } 778 | 779 | size_t stm_page_size(shared_segment *seg) { 780 | return seg->page_size; 781 | } 782 | 783 | int stm_segment_fd(shared_segment *seg) { 784 | return seg->fd; 785 | } 786 | 787 | 788 | // signal_handler is invoked when there is a read or write access to a shared segment during a transaction. 789 | // It remaps the page accessed to be private, with read and write access allowed. But it also makes a snapshot 790 | // of the page before it is allowed to be modified. This allows the commit mechanism to detect dirty pages 791 | // that need to be written. 792 | 793 | static void signal_handler(int sig, siginfo_t *si, void *foo) { 794 | void *page_base; 795 | void *status; 796 | shared_segment *seg; 797 | page_table_element *page_table_elt; 798 | transaction_id_t completed_transaction; 799 | size_t page_num; 800 | 801 | struct sigaction sa; 802 | 803 | sa.sa_flags = 0; 804 | sigemptyset(&sa.sa_mask); 805 | sa.sa_handler = SIG_DFL; 806 | 807 | if (transaction_stack() == NULL) { 808 | if (stm_verbose & 1) 809 | fprintf(stderr, "signal_handler: virtual address %lx referenced outside transaction\n", 810 | (unsigned long)si->si_addr); 811 | sigaction(PAGE_ACCESS_SIGNAL, &sa, 0); 812 | transaction_error_exit(STM_ACCESS_ERROR, -1); 813 | return; 814 | } 815 | 816 | seg = stm_find_shared_segment(si->si_addr); 817 | 818 | if (seg == NULL) { 819 | if (stm_verbose & 1) 820 | fprintf(stderr, "signal_handler: virtual address %lx not found in shared segment\n", 821 | (unsigned long)si->si_addr); 822 | sigaction(PAGE_ACCESS_SIGNAL, &sa, 0); 823 | transaction_error_exit(STM_ACCESS_ERROR, -1); 824 | return; 825 | } 826 | 827 | if (seg->transaction_id == 0) { 828 | if (stm_verbose & 1) 829 | fprintf(stderr, "signal_handler: signal received outside transaction\n"); 830 | sigaction(PAGE_ACCESS_SIGNAL, &sa, 0); 831 | transaction_error_exit(STM_ACCESS_ERROR, -1); 832 | } 833 | 834 | page_base = (void*)((long)si->si_addr & ~(seg->page_size-1)); 835 | page_num = (page_base - seg->shared_base_va)/seg->page_size; 836 | page_table_elt = &(seg->segment_page_table[page_num]); 837 | completed_transaction = page_table_elt->completed_transaction; 838 | 839 | #define OPTIMISTIC_LOCKING 840 | 841 | #ifdef OPTIMISTIC_LOCKING 842 | 843 | if (page_table_elt->current_transaction != 0) { 844 | if (seg->transaction_id != page_table_elt->current_transaction) { 845 | 846 | if (stm_verbose & 2) 847 | fprintf(stderr, "Transaction %d owns page %lx while transaction %d is snapshotting it.\n", 848 | page_table_elt->current_transaction, page_num, seg->transaction_id); 849 | collision_histo[0]++; 850 | transaction_error_exit(STM_COLLISION_ERROR, 1); 851 | return; 852 | } else { 853 | if (stm_verbose & 1) 854 | fprintf(stderr, "Transaction %d already owns page %lx\n", 855 | page_table_elt->current_transaction, page_num); 856 | transaction_error_exit(STM_OWNERSHIP_ERROR, -1); 857 | } 858 | } 859 | 860 | #else 861 | 862 | if (atomic_compare_and_swap_32(0, seg->transaction_id, 863 | (int32_t*)&(page_table_elt->current_transaction))) { 864 | // fprintf(stderr, "succeeded in locking page %x\n", page_num); 865 | } else { 866 | if (stm_verbose & 2) 867 | fprintf(stderr,"Transaction %d owns page %x while transaction %d is snapshotting it.\n", 868 | page_table_elt->current_transaction, page_num, seg->transaction_id); 869 | transaction_error_exit(STM_COLLISION_ERROR, 1); 870 | return; 871 | } 872 | 873 | #endif 874 | 875 | if ((int32_t)completed_transaction - (int32_t)seg->transaction_id > 0) { 876 | 877 | if (stm_verbose & 2) 878 | fprintf(stderr, "On page %lx, current transaction %d is before page's completed transaction %d\n", 879 | page_num, seg->transaction_id, completed_transaction); 880 | 881 | collision_histo[1]++; 882 | transaction_error_exit(STM_COLLISION_ERROR, 1); 883 | return; 884 | } 885 | 886 | if (find_prior_active_transaction(seg, completed_transaction)) { 887 | if (stm_verbose & 2) 888 | fprintf(stderr, "On page %lx, completed transaction %d was active when transaction %d started\n", 889 | page_num, completed_transaction, seg->transaction_id); 890 | collision_histo[2]++; 891 | transaction_error_exit(STM_COLLISION_ERROR, 1); 892 | return; 893 | } 894 | 895 | 896 | 897 | #ifdef PRIVATE_MAPPING_IS_PRIVATE 898 | // Change from shared to private mapping, and make the page readable and writable. 899 | // 900 | 901 | status = mmap(page_base, seg->page_size, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, seg->fd, 902 | (off_t)(page_base - seg->shared_base_va)); 903 | 904 | if (status == (void*)-1) { 905 | if (stm_verbose & 1) 906 | perror("signal_handler: mmap error in sig handler"); 907 | transaction_error_exit(STM_MMAP_ERROR, -1); 908 | return; 909 | 910 | } 911 | 912 | #else 913 | // Private mapping is NOT private, so we have the whole segment mapped private. 914 | // By writing into a page we get a private copy of it which is all we need. 915 | 916 | status = (void*)(long)mprotect(page_base, seg->page_size, PROT_READ|PROT_WRITE); 917 | if (status == (void*)-1) { 918 | if (stm_verbose & 1) 919 | perror("signal_handler: mprotect error in sig handler"); 920 | transaction_error_exit(STM_MMAP_ERROR, -1); 921 | return; 922 | 923 | } 924 | 925 | // Some systems evidently allow changes by other processes to be reflected in private mappings. 926 | // To prevent that (hopefully!) we modify the page (without really changing anything) to 927 | // invoke the "copy-on-write" semantics and really make a private copy 928 | // 929 | *(volatile int*)page_base = defeat_optimizer((volatile int*)page_base); 930 | #endif 931 | 932 | if (insert_into_snapshot_list(seg, page_base, completed_transaction) != 0) { 933 | transaction_error_exit(0, -1); 934 | } 935 | 936 | // Double check to make sure that during the above, nobody grabbed this page. 937 | 938 | if (page_table_elt->current_transaction != 0) { 939 | if (seg->transaction_id != page_table_elt->current_transaction) { 940 | 941 | if (stm_verbose & 2) 942 | fprintf(stderr, "Transaction %d owns page %lx while transaction %d is snapshotting it. [2]\n", 943 | page_table_elt->current_transaction, page_num, seg->transaction_id); 944 | collision_histo[3]++; 945 | transaction_error_exit(STM_COLLISION_ERROR, 1); 946 | return; 947 | } else { 948 | #ifdef OPTIMISTIC_LOCKING 949 | if (stm_verbose & 1) 950 | fprintf(stderr, "Transaction %d already owns page %lx [2]\n", 951 | page_table_elt->current_transaction, page_num); 952 | transaction_error_exit(STM_OWNERSHIP_ERROR, -1); 953 | #endif 954 | } 955 | } 956 | 957 | if (completed_transaction != page_table_elt->completed_transaction) { 958 | if (stm_verbose & 2) { 959 | fprintf(stderr, "Transaction %d snuck in on transaction %d on page %lx during snapshot\n", 960 | page_table_elt->completed_transaction, completed_transaction, page_num); 961 | } 962 | collision_histo[4]++; 963 | transaction_error_exit(STM_COLLISION_ERROR, 1); 964 | return; 965 | } 966 | 967 | return; 968 | } 969 | 970 | 971 | static struct sigaction saved_sigaction; 972 | 973 | int stm_init(int verbose) { 974 | 975 | int status; 976 | struct sigaction sa; 977 | 978 | stm_verbose = verbose; 979 | set_stm_errno(0); 980 | 981 | sa.sa_flags = SA_SIGINFO; 982 | sigemptyset(&sa.sa_mask); 983 | sa.sa_sigaction = signal_handler; 984 | 985 | if ((status = sigaction(PAGE_ACCESS_SIGNAL, &sa, &saved_sigaction)) != 0) { 986 | if (stm_verbose & 1) 987 | fprintf(stderr, "sigaction status = %d\n", status); 988 | set_stm_errno(STM_SIGNAL_ERROR); 989 | } 990 | 991 | create_thread_keys(); 992 | stm_init_thread_locals(); 993 | 994 | return status; 995 | 996 | } 997 | 998 | static int start_transaction_on_segment(shared_segment *seg) { 999 | int status; 1000 | 1001 | 1002 | // There is a small interval between the time we allocate a transaction ID and the time we can register it as an active 1003 | // transaction so other transactions can know of its existence. So transaction startup has to be 1004 | // single threaded at least up until we add our new transaction ID to the active transactions list. 1005 | 1006 | atomic_spin_lock_lock(&seg->segment_transaction_data->transaction_lock); 1007 | 1008 | if ((seg->transaction_id = atomic_increment_32((int32_t*)&seg->segment_transaction_data->transaction_counter)) == 0) { 1009 | // unlikely we'll wrap, but if we do, skip 0. 1010 | seg->transaction_id = atomic_increment_32((int32_t*)&seg->segment_transaction_data->transaction_counter); 1011 | } 1012 | 1013 | snapshot_active_transactions(seg); 1014 | add_active_transaction(seg); 1015 | 1016 | atomic_spin_lock_unlock(&seg->segment_transaction_data->transaction_lock); 1017 | 1018 | if (seg->default_prot_flags != PROT_NONE) { 1019 | 1020 | status = mprotect(seg->shared_base_va, seg->shared_seg_size, PROT_NONE); 1021 | 1022 | // status = mprotect(seg->shared_base_va, seg->shared_seg_size, PROT_READ|PROT_WRITE); 1023 | 1024 | if (status == -1) { 1025 | if (stm_verbose & 1) 1026 | perror("start_transaction: mprotect error"); 1027 | set_stm_errno(STM_MMAP_ERROR); 1028 | return -1; 1029 | } 1030 | } 1031 | 1032 | return 0; 1033 | } 1034 | 1035 | int _stm_start_transaction(char *trans_name) { 1036 | shared_segment *seg; 1037 | 1038 | 1039 | set_stm_errno(0); // This is as good a place as any to re-initialize this error code to 0. 1040 | 1041 | if (trans_name == NULL) { 1042 | if (stm_verbose & 1) 1043 | fprintf(stderr, "stm_start_transaction: tried to start transaction with NULL name\n"); 1044 | transaction_error_exit(STM_NULL_NAME_ERROR, -1); } 1045 | 1046 | 1047 | if (transaction_stack() == NULL) 1048 | for(seg = shared_segment_list(); seg; seg = seg->next) { 1049 | if (start_transaction_on_segment(seg) != 0) { 1050 | transaction_error_exit(0, -1); 1051 | } 1052 | } 1053 | 1054 | if (push_transaction_stack(trans_name) != 0) 1055 | transaction_error_exit(0, -1); 1056 | 1057 | return 0; 1058 | } 1059 | 1060 | // returns: 1061 | // 0 - success 1062 | // -1 - non-recoverable error 1063 | // 1 - collision error: should retry aborted transaction 1064 | // 1065 | static int lock_segment_pages(shared_segment *seg) { 1066 | snapshot_list_element *sl; 1067 | size_t page_num; 1068 | page_table_element *page_table_elt; 1069 | 1070 | if (seg->transaction_id == 0) { 1071 | if (stm_verbose & 1) 1072 | fprintf(stderr, "lock_segment_pages: segment should have active transaction, but doesn't\n"); 1073 | return -1; 1074 | } 1075 | 1076 | 1077 | for (sl = seg->snapshot_list; sl; sl = sl->next) { 1078 | 1079 | page_num = (sl->original_page_va - seg->shared_base_va)/seg->page_size; 1080 | page_table_elt = &(seg->segment_page_table[page_num]); 1081 | 1082 | // even if this transaction is just reading a page, if any other transaction is writing into it, 1083 | // or has written into it, that is enough to make us abort. In that case we know the information 1084 | // we are accessing is stale and therefore our results may be inconsistent with results of other transactions 1085 | 1086 | if (sl->snapshot_transaction_id != page_table_elt->completed_transaction) { 1087 | 1088 | if (stm_verbose & 2) 1089 | fprintf(stderr, "lock_segment_pages: Transaction %d modified page %lx!\n", 1090 | page_table_elt->completed_transaction, page_num); 1091 | collision_histo[5]++; 1092 | set_stm_errno(STM_COLLISION_ERROR); 1093 | return 1; 1094 | 1095 | } 1096 | 1097 | 1098 | if (page_table_elt->current_transaction != 0 && 1099 | page_table_elt->current_transaction != seg->transaction_id) { 1100 | if (stm_verbose & 2) 1101 | fprintf(stderr, "lock_segment_pages: Transaction %d is modifying page %lx!\n", 1102 | page_table_elt->current_transaction, page_num); 1103 | collision_histo[6]++; 1104 | set_stm_errno(STM_COLLISION_ERROR); 1105 | return 1; 1106 | } 1107 | 1108 | if (memcmp(sl->original_page_snapshot, sl->original_page_va, seg->page_size) == 0) 1109 | continue; 1110 | 1111 | sl->page_dirty = 1; 1112 | 1113 | // re-use the page snapshot buffer to temporarily keep a copy of the page so we can re-map the page as shared, 1114 | // then copy the new contents into it. 1115 | // *** If only there were page-remapping syscalls, I wouldn't have to do this copying - I could re-map the modified 1116 | // page out of the way, then re-map it into the right location in the file. Better yet, if there were a way to associate 1117 | // a file region with a memory region (so the file is written into as opposed to read from) that would solve this. 1118 | 1119 | memcpy(sl->original_page_snapshot, sl->original_page_va, seg->page_size); 1120 | 1121 | 1122 | #ifdef OPTIMISTIC_LOCKING 1123 | 1124 | if (atomic_compare_and_swap_32(0, seg->transaction_id, 1125 | (int32_t*)&(page_table_elt->current_transaction))) { 1126 | // fprintf(stderr, "succeeded in locking page %x\n", page_num); 1127 | } else { 1128 | if (stm_verbose & 2) 1129 | fprintf(stderr, "lock_segment_pages: Race detected. Failed to lock page %lx\n", page_num); 1130 | collision_histo[7]++; 1131 | set_stm_errno(STM_COLLISION_ERROR); 1132 | return 1; 1133 | } 1134 | 1135 | #endif 1136 | 1137 | if (page_table_elt->current_transaction != seg->transaction_id) { 1138 | if (stm_verbose & 1) 1139 | fprintf(stderr, "lock_segment_pages: page %lx should already be locked by transaction %d, but is owned by %d\n", 1140 | page_num, seg->transaction_id, page_table_elt->current_transaction); 1141 | set_stm_errno(STM_OWNERSHIP_ERROR); 1142 | return -1; 1143 | } 1144 | 1145 | if (sl->snapshot_transaction_id != page_table_elt->completed_transaction) { 1146 | 1147 | if (stm_verbose & 2) 1148 | fprintf(stderr, "lock_segment_pages: Transaction %d modified page %lx!\n", 1149 | page_table_elt->completed_transaction, page_num); 1150 | collision_histo[8]++; 1151 | set_stm_errno(STM_COLLISION_ERROR); 1152 | return 1; 1153 | } 1154 | } 1155 | 1156 | return 0; 1157 | } 1158 | 1159 | 1160 | static int write_locked_segment_pages(shared_segment *seg) { 1161 | 1162 | snapshot_list_element *sl; 1163 | void *status; 1164 | size_t page_num; 1165 | int result = 0; 1166 | 1167 | page_table_element *page_table_elt; 1168 | 1169 | 1170 | // Re-map shared. 1171 | 1172 | 1173 | status = mmap(seg->shared_base_va, seg->shared_seg_size, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, seg->fd, 1174 | (off_t)0); 1175 | if (status == (void*)-1) { 1176 | if (stm_verbose & 1) 1177 | perror("write_locked_pages: mmap error"); 1178 | set_stm_errno(STM_MMAP_ERROR); 1179 | return -1; 1180 | } 1181 | 1182 | if (stm_verbose & 4) 1183 | fprintf(stderr, "Transaction %d [", seg->transaction_id); 1184 | 1185 | // now copy the new versions of the dirty pages into the shared, mapped file. 1186 | 1187 | for (sl = seg->snapshot_list; sl; sl = sl->next) { 1188 | 1189 | page_num = (sl->original_page_va - seg->shared_base_va)/seg->page_size; 1190 | page_table_elt = &(seg->segment_page_table[page_num]); 1191 | 1192 | if (sl->page_dirty) { 1193 | 1194 | if (stm_verbose & 4) 1195 | fprintf(stderr, " %lx", page_num); 1196 | 1197 | page_table_elt->completed_transaction = seg->transaction_id; 1198 | 1199 | // copy the temporarily saved, modified pages back into the right places 1200 | // 1201 | memcpy(sl->original_page_va, sl->original_page_snapshot, seg->page_size); 1202 | 1203 | } 1204 | 1205 | if (page_table_elt->current_transaction == seg->transaction_id) { 1206 | // Only release pages owned by this transaction! 1207 | // Pages that were only read and not modified by this transaction will not be marked as 1208 | // associated with this transaction under optimistic locking. They may even be 1209 | // associated with another transaction. 1210 | 1211 | page_table_elt->current_transaction = 0; 1212 | } 1213 | } 1214 | 1215 | if (stm_verbose & 4) 1216 | fprintf(stderr, " ]\n"); 1217 | 1218 | // re-protect the segment to be whatever it is supposed to be between transactions 1219 | 1220 | 1221 | #ifdef PRIVATE_MAPPING_IS_PRIVATE 1222 | if (seg->default_prot_flags != (PROT_READ|PROT_WRITE)) 1223 | status = (void*)(long)mprotect(seg->shared_base_va, seg->shared_seg_size, seg->default_prot_flags); 1224 | else 1225 | status = 0; 1226 | 1227 | #else 1228 | 1229 | status = mmap(seg->shared_base_va, seg->shared_seg_size, seg->default_prot_flags, MAP_FIXED|MAP_PRIVATE, seg->fd, 1230 | (off_t)0); 1231 | 1232 | #endif 1233 | if (status == (void*)-1) { 1234 | perror("write_locked_segment_pages: mmap error"); 1235 | set_stm_errno(STM_MMAP_ERROR); 1236 | result = -1; 1237 | } 1238 | 1239 | free_snapshot_list(seg); 1240 | 1241 | delete_active_transaction(seg); 1242 | seg->transaction_id = 0; 1243 | 1244 | return result; 1245 | 1246 | } 1247 | 1248 | 1249 | int stm_commit_transaction(char *trans_name) { 1250 | shared_segment *seg; 1251 | int result = 0; 1252 | 1253 | set_stm_errno(0); 1254 | 1255 | if (transaction_stack() == NULL) { 1256 | if (stm_verbose & 1) 1257 | fprintf(stderr, "stm_commit_transaction: empty transaction stack while trying to commit transaction \"%s\"\n", 1258 | trans_name); 1259 | transaction_error_exit(STM_TRANS_STACK_ERROR, -1); 1260 | } 1261 | 1262 | if (trans_name == NULL) { 1263 | if (stm_verbose & 1) 1264 | fprintf(stderr, "stm_commit_transaction: null transaction name\n"); 1265 | transaction_error_exit(STM_NULL_NAME_ERROR, -1); 1266 | } 1267 | 1268 | if (strcmp(transaction_stack()->transaction_name, trans_name) != 0) { 1269 | if (stm_verbose & 1) 1270 | fprintf(stderr, "stm_commit_transaction: \"%s\" is not the innermost transaction (\"%s\" is)\n", 1271 | trans_name, transaction_stack()->transaction_name); 1272 | 1273 | transaction_error_exit(STM_TRANS_STACK_ERROR, -1); 1274 | } 1275 | 1276 | if (transaction_stack()->next == NULL) { 1277 | // only actually commit on outermost transaction. 1278 | 1279 | sigset_t blocked_signals; 1280 | sigset_t saved_signals; 1281 | 1282 | // We don't want to be interrupted or anything during the commit of the transaction. 1283 | sigfillset(&blocked_signals); 1284 | 1285 | // if (sigprocmask(SIG_SETMASK, &blocked_signals, &saved_signals) == -1) { 1286 | if (pthread_sigmask(SIG_SETMASK, &blocked_signals, &saved_signals) == -1) { 1287 | if (stm_verbose & 1) 1288 | perror("stm_commit_transaction: error blocking signals"); 1289 | transaction_error_exit(STM_SIGNAL_ERROR, -1); 1290 | } 1291 | 1292 | 1293 | for(seg = shared_segment_list(); seg; seg = seg->next) { 1294 | if ((result = lock_segment_pages(seg)) != 0) { 1295 | // if there is a failure on any shared segment, abort on all segments 1296 | transaction_error_exit(0, result); 1297 | } 1298 | } 1299 | 1300 | 1301 | 1302 | for(seg = shared_segment_list(); seg; seg = seg->next) { 1303 | if ((result = write_locked_segment_pages(seg)) != 0) { 1304 | // if there is a failure on any shared segment, abort on all segments 1305 | transaction_error_exit(0, result); 1306 | } 1307 | } 1308 | 1309 | // At this point, it's really too late to reverse anything... 1310 | 1311 | // if (sigprocmask(SIG_SETMASK, &saved_signals, NULL) == -1) { 1312 | if (pthread_sigmask(SIG_SETMASK, &saved_signals, NULL) == -1) { 1313 | if (stm_verbose & 1) 1314 | perror("stm_commit_transaction: error unblocking signals"); 1315 | set_stm_errno(STM_SIGNAL_ERROR); 1316 | result = -1; 1317 | } 1318 | } 1319 | 1320 | pop_transaction_stack(); 1321 | 1322 | return result; 1323 | 1324 | } 1325 | 1326 | void stm_close_shared_segment(shared_segment *seg) { 1327 | shared_segment *s, *prev; 1328 | 1329 | if (seg->transaction_id) 1330 | abort_transaction_on_segment(seg); 1331 | 1332 | if (seg->shared_base_va) 1333 | munmap(seg->shared_base_va, seg->shared_seg_size); 1334 | 1335 | if (seg->segment_transaction_data) 1336 | munmap(seg->segment_transaction_data, seg->transaction_data_size); 1337 | 1338 | if (seg->fd) 1339 | close(seg->fd); 1340 | 1341 | if (seg->metadata_fd) 1342 | close(seg->metadata_fd); 1343 | 1344 | for (s = shared_segment_list(), prev=NULL; s; prev = s, s=s->next) { 1345 | if (s == seg) { 1346 | if (prev) 1347 | prev->next = s->next; 1348 | else 1349 | set_shared_segment_list(s->next); 1350 | break; 1351 | } 1352 | } 1353 | 1354 | if (seg->filename) free(seg->filename); 1355 | if (seg->metadata_filename) free(seg->metadata_filename); 1356 | free_snapshot_pool(seg); 1357 | 1358 | free(seg); 1359 | } 1360 | 1361 | void stm_close() 1362 | { 1363 | shared_segment *s; 1364 | while ((s = shared_segment_list()) != NULL) 1365 | stm_close_shared_segment(s); 1366 | sigaction(PAGE_ACCESS_SIGNAL, &saved_sigaction, 0); 1367 | } 1368 | 1369 | 1370 | 1371 | 1372 | -------------------------------------------------------------------------------- /stm.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | stm.h 4 | 5 | This is the main API for stmmap, a Software Transactional Memory system. 6 | All the functions and macros you need to use in your applications are declared here. 7 | 8 | Copyright 2009 Shel Kaphan 9 | 10 | This file is part of stmmap. 11 | 12 | stmmap is free software: you can redistribute it and/or modify 13 | it under the terms of the GNU Lesser General Public License as published by 14 | the Free Software Foundation, either version 3 of the License, or 15 | (at your option) any later version. 16 | 17 | stmmap is distributed in the hope that it will be useful, 18 | but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | GNU Lesser General Public License for more details. 21 | 22 | You should have received a copy of the GNU Lesser General Public License 23 | along with stmmap. If not, see . 24 | 25 | */ 26 | 27 | 28 | 29 | #include 30 | #include 31 | #include // for PROT_NONE, PROT_READ, PROT_WRITE 32 | #include // for ino_t 33 | #include 34 | #include 35 | 36 | 37 | 38 | typedef uint32_t transaction_id_t; 39 | 40 | 41 | /* 42 | Call stm_init() to initialize the STM package. It sets up signal handlers and verbosity level for 43 | subsequent activity. On Mac OS you get SIGBUS 44 | signals on page access faults. You might need to change it to handle SIGSEGV instead on some systems. 45 | 46 | Args: 47 | verbose Controls whether to print out errors on stderr. This is a bit mask 48 | Bit Meaning 49 | 1 report errors 50 | 2 report conflicts that cause transactions to abort 51 | 4 report pages modified during each transaction commit or abort 52 | 53 | Return value: 54 | 0 success 55 | -1 failure, stm_error contains error code. 56 | */ 57 | int stm_init(int verbose); 58 | 59 | /* 60 | stm_init_thread_locals() must be called to initialize thread-local variables. 61 | This is done automatically in stm_init() for the main thread. 62 | */ 63 | void stm_init_thread_locals(); 64 | 65 | 66 | /* 67 | Call stm_open_shared_segment() to open a shared memory segment in each process that wants to access it. 68 | You can have as many shared areas as you like. You specify a file that is shared among all 69 | processes that want to communicate through each shared area. If it is important to control the 70 | virtual address at which the shared area exists in your address space, you can specify it here 71 | (or pass in NULL to let mmap decide where to map it). The shared file, as well as a metadata file 72 | with the same name but ending in ".metadata", will be created if it does not exist, and will be grown 73 | to the requested length if it is not long enough. 74 | 75 | Args: 76 | filename Pathname of file to back this shared segment. does not need to exist - will be created. 77 | Also, .metadata will be created to hold transaction metadata 78 | size Length in bytes of the shared segment you need. File will be grown if it is not this large already. 79 | requested_va If NULL, the shared segment will be allocated at will. If specified, the address where you would 80 | like your shared segment. 81 | prot_flags Either PROT_NONE, or the binary OR of PROT_READ and PROT_WRITE (or just one of them). 82 | Controls access to shared segment between transactions. 83 | 84 | Return value: 85 | NULL failure; stm_error contains error code. 86 | non-NULL pointer to a shared_segment object. This is an opaque object (the struct internals are not exposed) 87 | The API for accessing it is defined in this file. 88 | 89 | */ 90 | struct shared_segment *stm_open_shared_segment(char *filename, size_t size, void *requested_va, int prot_flags); 91 | 92 | /* 93 | Until you start a transaction, you have full unsynchronized read-write access to the shared area 94 | (depending on the protection you set when the shared segment was opened). 95 | Once you call stm_start_transaction(), your writes will be private and not seen by other processes until 96 | you commit the transaction. In addition, you are guaranteed either read consistency (no other process will 97 | have written into any page you read from during the transaction) or else the transaction will abort 98 | and retry. (You should design transactions to be short, have no other side effects (like I/O), and be restartable!). 99 | Transactions can be nested. Only the outermost transaction actually commits changes when it is done. 100 | This allows transactions to be built up of other transactions. 101 | The name you provide must match the name in the matching commit. 102 | 103 | Argument: 104 | trans_name name-tag for this transaction. Cannot be NULL. Must match name in corresponding commit. 105 | 106 | Return value: 107 | 0 success 108 | -1 failure, stm_error contains error code. 109 | */ 110 | 111 | /* 112 | Call this macro, not the internal function it calls. This is what enables transaction restarts. 113 | */ 114 | #define stm_start_transaction(trans_name) \ 115 | { if (_stm_transaction_stack_empty()) {\ 116 | int _status_, _delay_ = STM_MIN_DELAY;\ 117 | struct timespec _ts_;\ 118 | if ((_status_ = setjmp(*stm_jmp_buf())) > 0) {\ 119 | _ts_.tv_sec = 0;\ 120 | _ts_.tv_nsec = _delay_;\ 121 | nanosleep(&_ts_, NULL);\ 122 | _delay_ += _delay_>>2;\ 123 | } else if (_status_ < 0) {\ 124 | exit (-1);\ 125 | }\ 126 | }\ 127 | _stm_start_transaction(trans_name);\ 128 | } 129 | 130 | 131 | // Things needed when the above macro expands: 132 | // 133 | #define STM_MIN_DELAY 10 134 | jmp_buf *stm_jmp_buf(); 135 | 136 | 137 | 138 | 139 | /* 140 | Call stm_commit_transaction() when you are ready to commit the changes you have made during a transaction 141 | to the shared area(s). If any other processes have modified the pages you accessed during the transaction 142 | your transaction will fail and you should retry the transaction. 143 | 144 | This should be in the same scope as the corresponding stm_start_transaction(). 145 | 146 | Arguments: 147 | trans_name matching name-tag to name given in stm_start_transaction 148 | Return values: 149 | 0 Success 150 | -1 Serious error other than conflict with another process, which causes a retry. 151 | Do not retry the transaction, figure out the error. 152 | */ 153 | int stm_commit_transaction(char *trans_name); 154 | 155 | 156 | /* 157 | When you are done with a shared segment you can close it with stm_close_shared_segment(). You pass it 158 | the object that was returned by stm_open_shared_segment(). Any transactions in progress will abort their 159 | changes to this segment. You do not normally need to do this - you can call stm_close() to close everything instead. 160 | 161 | Arguments: 162 | seg pointer to shared_segment, as provided by stm_open_shared_segment 163 | */ 164 | void stm_close_shared_segment(struct shared_segment *seg); 165 | 166 | 167 | /* 168 | Call this to release all resources and virtual memory mappings associated with the STM manager when you 169 | are done with it. Also restores default signal handling. 170 | */ 171 | void stm_close(); 172 | 173 | 174 | 175 | /* 176 | Returns the shared_segment associated with a virtual address, or NULL. Used by stmalloc.c. 177 | */ 178 | struct shared_segment *stm_find_shared_segment(void *va); 179 | 180 | 181 | /* 182 | Returns the first virtual address within a shared memory segment. 183 | */ 184 | void *stm_segment_base(struct shared_segment *seg); 185 | 186 | /* 187 | Returns the size in bytes of a shared memory segment. 188 | */ 189 | size_t stm_segment_size(struct shared_segment *seg); 190 | 191 | /* 192 | Returns the page size in bytes of a shared memory segment (should be the same for all segments!) 193 | */ 194 | size_t stm_page_size(struct shared_segment *seg); 195 | 196 | 197 | /* 198 | Returns the file descriptor associated with an open shared memory segment. 199 | */ 200 | int stm_segment_fd(struct shared_segment *seg); 201 | 202 | int stm_errno(); // on errors, this variable will contain one of the following codes: 203 | // This is a function, not a global, because it is per-thread 204 | 205 | /* 206 | Possible values of stm_errno. 207 | */ 208 | 209 | #define STM_COLLISION_ERROR 1 210 | #define STM_FILETYPE_ERROR 2 211 | #define STM_FILESIZE_ERROR 3 212 | #define STM_ALLOC_ERROR 4 213 | #define STM_OPEN_ERROR 5 214 | #define STM_MMAP_ERROR 6 215 | #define STM_ACCESS_ERROR 7 216 | #define STM_SIGNAL_ERROR 8 217 | #define STM_NULL_NAME_ERROR 9 218 | #define STM_WRITE_ERROR 10 219 | #define STM_TRANS_STACK_ERROR 11 220 | #define STM_OWNERSHIP_ERROR 12 221 | 222 | 223 | 224 | /* 225 | Private functions that your program should not use. 226 | */ 227 | 228 | /* 229 | The following function is for use by stmalloc.c 230 | Returns a pointer to the head of the free list for a shared_segment. Note the head of the 231 | free list is *in* the segment because it is shared by all processes that use the segment! 232 | */ 233 | void **stm_free_list_addr(struct shared_segment *seg); 234 | 235 | /* 236 | The following function is for use by stmalloc.c 237 | Records the head of the free list into the shared_segment object. 238 | */ 239 | void stm_set_free_list_addr(struct shared_segment *seg, void **free_list_addr); 240 | 241 | 242 | /* 243 | These are really private functions so do not call them directly. They have to be exposed for use by the 244 | above macro. 245 | */ 246 | int _stm_transaction_stack_empty(); 247 | int _stm_start_transaction(char *trans_name); 248 | 249 | 250 | 251 | 252 | -------------------------------------------------------------------------------- /stmalloc.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | stmalloc.c 4 | 5 | This is the implementation of an optional memory allocator for shared memory segments that 6 | works under stmmap, a Software Transactional Memory system. It need not be used, but if you don't 7 | want to write your own allocator, it might help. 8 | 9 | Copyright 2009 Shel Kaphan 10 | 11 | This file is part of stmmap. 12 | 13 | stmmap is free software: you can redistribute it and/or modify 14 | it under the terms of the GNU Lesser General Public License as published by 15 | the Free Software Foundation, either version 3 of the License, or 16 | (at your option) any later version. 17 | 18 | stmmap is distributed in the hope that it will be useful, 19 | but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 | GNU Lesser General Public License for more details. 22 | 23 | You should have received a copy of the GNU Lesser General Public License 24 | along with stmmap. If not, see . 25 | 26 | */ 27 | 28 | #include 29 | 30 | 31 | #include "stm.h" 32 | #include "segalloc.h" 33 | #include "stmalloc.h" 34 | 35 | 36 | void stm_alloc_init(struct shared_segment *seg, int mode) { 37 | stm_start_transaction("alloc.init"); 38 | 39 | stm_set_free_list_addr(seg, seg_alloc_init(stm_segment_base(seg), stm_segment_size(seg), mode)); 40 | 41 | stm_commit_transaction("alloc.init"); 42 | 43 | } 44 | 45 | void stm_free(void *va) { 46 | struct shared_segment *seg; 47 | size_t size; 48 | 49 | stm_start_transaction("alloc.free"); 50 | seg = stm_find_shared_segment(va); 51 | if (seg) { 52 | size = *(((size_t*)va)-1); 53 | seg_free(va - sizeof(size_t), size, stm_segment_base(seg), stm_free_list_addr(seg)); 54 | } 55 | stm_commit_transaction("alloc.free"); 56 | } 57 | 58 | void *stm_alloc(struct shared_segment *seg, size_t size) { 59 | void *result; 60 | size_t real_size = seg_block_size_for(size + sizeof(size_t)); 61 | 62 | stm_start_transaction("alloc.new"); 63 | result = seg_alloc(real_size, stm_free_list_addr(seg)); 64 | if (result) { 65 | *(size_t*)result = real_size; 66 | } 67 | stm_commit_transaction("alloc.new"); 68 | 69 | if (result == NULL) { 70 | fprintf(stderr, "Failed to allocate size %ld\n", size); 71 | return NULL; 72 | } 73 | return result + sizeof(size_t); 74 | } 75 | 76 | // This apparently trivial function causes the offset_ptr which is the free list 77 | // to be converted into a regular pointer for regular programs to work with. 78 | struct segalloc_node *stm_free_list(struct shared_segment *seg) { 79 | return seg_free_list_from_free_list_addr(stm_free_list_addr(seg)); 80 | } 81 | -------------------------------------------------------------------------------- /stmalloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | stmalloc.h 4 | 5 | This is the API for the STM package's optional memory allocator. 6 | 7 | Copyright 2009 Shel Kaphan 8 | 9 | This file is part of stmmap. 10 | 11 | stmmap is free software: you can redistribute it and/or modify 12 | it under the terms of the GNU Lesser General Public License as published by 13 | the Free Software Foundation, either version 3 of the License, or 14 | (at your option) any later version. 15 | 16 | stmmap is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU Lesser General Public License for more details. 20 | 21 | You should have received a copy of the GNU Lesser General Public License 22 | along with stmmap. If not, see . 23 | 24 | */ 25 | 26 | 27 | #include 28 | 29 | struct shared_segment; 30 | 31 | /* 32 | Initialize the memory allocator to work on a particular shared memory segment, which should already have 33 | been opened with stm_open_shared_segment(). 34 | 35 | Args: 36 | seg shared_segment that was returned by stm_open_shared_segment(). 37 | mode 0 = use existing free-list in segment 38 | 1 = initialize free-list (you have to know, somehow, that your process is the first to access the shared_segment 39 | */ 40 | void stm_alloc_init(struct shared_segment *seg, int mode); 41 | 42 | 43 | /* 44 | Allocate size bytes out of the shared memory segment seg. Seg is the object previously returned to you by 45 | stm_open_shared_segment(). 46 | */ 47 | void *stm_alloc(struct shared_segment *seg, size_t size); 48 | 49 | /* 50 | Free a block of memory that was previously allocated with stm_alloc() 51 | */ 52 | void stm_free(void *va); 53 | 54 | struct segalloc_node *stm_free_list(struct shared_segment *seg); 55 | 56 | 57 | --------------------------------------------------------------------------------