├── .gitignore
├── CMakeLists.txt
├── README.md
├── src
├── GPL-3.txt
├── README.txt
├── alexport.def
├── compiler.h
├── gcc.h
├── install_dyn.txt
├── install_stt.txt
├── ll_alloc.c
├── ll_asm.h
├── ll_list.h
├── makefile
├── ming.h
├── vs.h
└── win_stub.cc
└── tests
├── calloc.c
├── malloc.c
└── realloc.c
/.gitignore:
--------------------------------------------------------------------------------
1 | /test_*
2 | lib*.a
3 | *.lib
4 | Testing
5 | CTestTestfile.cmake
6 | Makefile
7 | CMakeCache.txt
8 | cmake_install.cmake
9 | CMakeFiles
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project(lockless C CXX)
2 | cmake_minimum_required(VERSION 2.6)
3 |
4 | option(LOCKLESS_DEBUG_ALLOC "Debug allocator" OFF)
5 |
6 | if("${CMAKE_C_COMPILER_ID}" MATCHES "clang" OR "${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
7 | set(CLANG 1)
8 | endif("${CMAKE_C_COMPILER_ID}" MATCHES "clang" OR "${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
9 |
10 | set(CMAKE_C_FLAGS "")
11 | set(CMAKE_CXX_FLAGS "")
12 | set(CMAKE_C_FLAGS_RELEASE "-g")
13 | set(CMAKE_CXX_FLAGS_RELEASE "-g")
14 | set(CMAKE_C_FLAGS_DEBUG "-ggdb")
15 | set(CMAKE_CXX_FLAGS_DEBUG "-ggdb")
16 |
17 | if(LOCKLESS_DEBUG_ALLOC)
18 | add_definitions(-DDEBUG_ALLOC -DDEBUG_ALLOC_SLOW)
19 | endif()
20 |
21 | if(CMAKE_COMPILER_IS_GNUCC OR CLANG OR MINGW)
22 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -fomit-frame-pointer -Wall -Wno-attributes -std=gnu99 -O3 -fno-builtin-malloc -fno-builtin-realloc -fno-builtin-calloc -fno-builtin-free -fno-builtin-memalign")
23 | endif()
24 |
25 | if(CLANG)
26 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function -Wno-ignored-attributes")
27 | endif()
28 |
29 | if(MINGW)
30 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -fomit-frame-pointer -O3 -fno-builtin-malloc -fno-builtin-realloc -fno-builtin-calloc -fno-builtin-free -fno-builtin-memalign")
31 | add_definitions(-DWINVER=0x0600 -D_WIN32_WINNT=0x0600)
32 | endif()
33 |
34 | if(MSVC)
35 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /MD")
36 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /MD")
37 | endif()
38 |
39 | if(NOT WIN32)
40 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
41 | set(PLATFORM_EXE_SUFFIX "")
42 | add_library(lockless STATIC ${CMAKE_CURRENT_SOURCE_DIR}/src/ll_alloc.c)
43 | else()
44 | set(PLATFORM_EXE_SUFFIX ".exe")
45 | add_library(lockless STATIC ${CMAKE_CURRENT_SOURCE_DIR}/src/ll_alloc.c ${CMAKE_CURRENT_SOURCE_DIR}/src/win_stub.cc)
46 | endif()
47 |
48 | enable_testing()
49 | add_executable(test_malloc tests/malloc.c)
50 | add_executable(test_calloc tests/calloc.c)
51 | add_executable(test_realloc tests/realloc.c)
52 | target_link_libraries(test_malloc lockless)
53 | target_link_libraries(test_calloc lockless)
54 | target_link_libraries(test_realloc lockless)
55 | if(NOT CMAKE_CROSSCOMPILING)
56 | add_test(test_malloc ${CMAKE_CURRENT_BINARY_DIR}/test_malloc${PLATFORM_EXE_SUFFIX})
57 | add_test(test_calloc ${CMAKE_CURRENT_BINARY_DIR}/test_calloc${PLATFORM_EXE_SUFFIX})
58 | add_test(test_realloc ${CMAKE_CURRENT_BINARY_DIR}/test_realloc${PLATFORM_EXE_SUFFIX})
59 | add_custom_target(tests ALL ctest)
60 | endif()
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | About this allocator
2 | -----------------
3 | This is the Lockless Allocator, derived from http://locklessinc.com/
4 |
5 | Original sources located in ./src/
6 |
7 | Please, look at the terms of usage: http://locklessinc.com/products/
8 |
9 | What I did
10 | -----------------
11 | Since original allocator was released under GPL-3, I slightly modified sources to make it buildable for both VS and GCC. It was tested with VS 2013 and GCC 4.8 (both Linux and Windows builds were tested and work fine). Now it is possible to build allocator with all modern compilers, including
12 | * Clang >=3.4
13 | * GCC >=4.8 (including MinGW)
14 | * and VS 2013
15 |
16 | Both x86 (32bit) and x86-64 (64bit) builds should work without any problems for all listed compilers, but I wasn't tested all these possible variants.
17 |
18 | In addition, I added CMake support, so allocator builds smoothly in both Linux and Windows (no need for POSIX emulation/Automake anymore).
19 |
20 | TODO
21 | -----------------
22 | I want to add Mac OS X and FreeBSD support, but I don't feel like I need this right now. Maybe in the future I'll do this.
--------------------------------------------------------------------------------
/src/GPL-3.txt:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/src/README.txt:
--------------------------------------------------------------------------------
1 | To compile on Linux, use the makefile.
2 |
3 | "make staticlib" will create the static version of the Lockless memory allocator. You can also simply add the ll_alloc.c file to the list of other source files in your project.
4 |
5 | "make dynamiclib" will create the dynamic version of the Lockless memory allocator. This is the version you want if you want to use LD_PRELOAD to add the use of the library to already compiled applications.
6 |
7 |
8 | Compiling for Microsoft Windows is much more complex. First, you will need to cross-compile with mingw-w64 to create the object file for ll_alloc.c Second, log onto windows, and then compile win_stub.cc with MS Visual C++. Finally, link the two objects together using Microsoft's linker to create the library. (This complex build method is due to mingw-64 not supporting thread local variables correctly, and MSVC not supporting inline asm. If either compilers change, this could be simplified.)
9 |
10 | If you want to create the dynamic (dll) version, use the -DUSE_DLL and /DUSE_DLL command line switches, as shown in the example compilation instructions within the makefile.
11 |
12 |
13 | Remember, this library is licensed under the GPL version 3 (or any later version). If you wish to redistribute programs linked to this library, all source code must be redistributed as well. If you wish to create closed-source proprietory software using the Lockless Memory allocator, other licenses are available. Contact us at locklessinc.com for details.
14 |
--------------------------------------------------------------------------------
/src/alexport.def:
--------------------------------------------------------------------------------
1 | LIBRARY llalloc
2 | EXPORTS
3 | llallocfree
4 | cfree
5 | llallocmalloc
6 | llalloccalloc
7 | llallocrealloc
8 | memalign
9 | posix_memalign
10 | valloc
11 | pvalloc
12 | llalloc_msize
13 | mallinfo
14 | malloc_trim
15 | mallopt
16 | independent_calloc
17 | independent_comalloc
18 | llalloc_expand
19 | llalloc_free_nolock
20 | llalloc_realloc_nolock
21 | llalloc_calloc_nolock
22 | llalloc_msize_nolock
23 | llalloc_use
24 | lldebug_hook
25 |
--------------------------------------------------------------------------------
/src/compiler.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009, 2010, 2011 Lockless Inc., Steven Von Fuerst.
3 | *
4 | * This library is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This library is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | /* Pick compiler specific details */
19 | #if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)))
20 | #include "gcc.h"
21 | #else
22 | #ifdef _MSC_VER
23 | #include "vs.h"
24 | #else
25 | #include "ming.h"
26 | #endif
27 | #endif
28 |
--------------------------------------------------------------------------------
/src/gcc.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009, 2010, 2011 Lockless Inc., Steven Von Fuerst.
3 | *
4 | * This library is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This library is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | /* Coompiler specific details - gcc */
19 | #ifndef LL_GCC_H
20 | #define LL_GCC_H
21 | #define _GNU_SOURCE
22 | #include
23 |
24 | #define __thread__ __thread __attribute__ ((tls_model ("initial-exec")))
25 |
26 | #define HAVE_PTHREADS
27 | #define HAVE_SYS_SELECT
28 | #define HAVE_SYS_SOCKET
29 | #define HAVE_NETINET_IN
30 | #define HAVE_SYS_MMAN
31 |
32 | #define cache_align __attribute__((aligned(64)))
33 | #define noinline __attribute__((__noinline__))
34 | #define prefetch(x) __builtin_prefetch(x)
35 | #define barrier() asm volatile("": : :"memory")
36 | #define forget(v) asm volatile("": "=m"(v) :"m"(v))
37 | #define mfence(v) asm volatile("mfence\n": : :"memory")
38 | #define cpu_relax() asm volatile("rep; nop\n": : :"memory")
39 | #define likely(x) __builtin_expect(!!(x), 1)
40 | #define unlikely(x) __builtin_expect(!!(x), 0)
41 | #define gcc_used __attribute__((used))
42 | #define _hide_ __attribute__((visibility("hidden")))
43 | #define __pure __attribute__((pure))
44 |
45 | #define GCC_ASM
46 |
47 | #include
48 |
49 | #endif /* LL_GCC_H */
50 |
--------------------------------------------------------------------------------
/src/install_dyn.txt:
--------------------------------------------------------------------------------
1 | To install the Lockless memory allocator, you must first copy it to the correct location in your file system.
2 |
3 | This location depends on which linux distribution you use, and whether or not you use a 64bit machine.
4 |
5 | The correct location is:
6 |
7 | Debian or Ubuntu based systems:
8 | 32bit: /usr/lib
9 | 64bit: /usr/lib
10 | 32bit library in a 64bit multilib install: /usr/lib32
11 |
12 | Redhat, Suse or Gentoo based systems:
13 | 32bit: /usr/lib
14 | 64bit: /usr/lib64
15 | 32bit library in a 64bit multilib install: /usr/lib32 or /usr/lib depending on if /usr/lib is a symlink to /usr/lib64 or not.
16 |
17 |
18 | Become root
19 | > su
20 | "root password"
21 |
22 | Copy the library to the correct location (here we assume /usr/lib)
23 | > cp libllalloc.so.1.3 /usr/lib
24 |
25 | Go to that location (here we assume /usr/lib)
26 | > cd /usr/lib
27 |
28 | Make the two symlinks to properly install the library
29 | > ln -sf libllalloc.so.1.3 libllalloc.so
30 | > ln -sf libllalloc.so.1.3 libllalloc.so.1
31 |
32 | Regenerate your library cache for the directory you are in (assuming /usr/lib)
33 | > ldconfig -n /usr/lib
34 |
35 | Done - all installed
36 | > exit
37 |
38 |
39 |
40 |
41 |
42 | Once installed, the Lockless memory allocator can be used in several ways.
43 | 1) Firstly, you can link it to programs you compile. Just add "-lllalloc" to the link command line to add the library to your program. You can check that this has worked by running ldd on the compiled program.
44 | > ldd my_program
45 |
46 | Make sure libllalloc.so is listed as a library in the output from ldd. If not, check your library search paths in /etc/ld.so.conf to make sure the directory you installed the library in is listed. Another possible error is that you didn't put "-lllalloc" far enough along in the link command line. The linker will only add a library to an executable if it will satisfy dependencies for currently unseen external symbols as it scans the command line from left to right. If the object files that use i.e. malloc() or free() are placed after -lllalloc on the link command line, then the linker won't know that the library is needed. The easiest way to fix this is to put "-lllalloc" last.
47 |
48 | 2) If you wish to have a program use the library, and do not wish to recompile it, you can use the LD_PRELOAD feature of glibc. Again assuming the library is installed in /usr/lib
49 |
50 | If your shell is bash or ksh you can use:
51 | > export LD_PRELOAD=/usr/lib/libllalloc.so
52 |
53 | If your shell is csh or tcsh you can use:
54 | > setenv LD_PRELOAD /usr/lib/libllalloc.so
55 |
56 | This will cause all programs launched from that shell to use the Lockless memory allocator instead of the default one.
57 |
58 | 3) If you want all your programs to run using the memory allocator add the above lines to your shell's init file.
59 |
60 | i.e. If your shell is bash, edit your .bashrc in your home directory, and add "export LD_PRELOAD=/usr/lib/libllalloc.so" to it.
61 |
62 | Note that it is probably not wise to let root run programs linked with the Lockless memory allocator. The allocator is designed for speed, not security.
63 |
--------------------------------------------------------------------------------
/src/install_stt.txt:
--------------------------------------------------------------------------------
1 | To use the static version of the Lockless memory allocator, you may want to first copy it to somewhere where your linker will find it. Do this if you wish to avoid adding "-L" directory search options to your linker command line.
2 |
3 | First determine the correct location for the library in your filesystem. This location depends on which linux distribution you use, and whether or not you use a 64bit machine.
4 |
5 | The correct location is:
6 |
7 | Debian or Ubuntu based systems:
8 | 32bit: /usr/lib
9 | 64bit: /usr/lib
10 | 32bit library in a 64bit multilib install: /usr/lib32
11 |
12 | Redhat, Suse or Gentoo based systems:
13 | 32bit: /usr/lib
14 | 64bit: /usr/lib64
15 | 32bit library in a 64bit multilib install: /usr/lib32 or /usr/lib depending on if /usr/lib is a symlink to /usr/lib64 or not.
16 |
17 |
18 | Become root
19 | > su
20 | "root password"
21 |
22 | Copy the library to the correct location (here we assume /usr/lib)
23 | >cp libllalloc.a /usr/lib
24 |
25 | Done - all installed
26 | > exit
27 |
28 | If you do this, then linking with "-lllalloc" will include the static library in your executable. Note, if you have also purchased and installed the dynamic library, then you will also need to include the "-static" compile option to select static linking rather than the dynamic library.
29 |
30 |
31 | If you do not wish to install the library in the standard location, then you can compile your programs by adding "/some/path/to/library/libllalloc.a" to the end of your link command line. This method allows you to install the library where ever you wish.
32 |
33 | Note that it is probably not wise to let root run programs linked with the Lockless memory allocator. The allocator is designed for speed, not security.
34 |
--------------------------------------------------------------------------------
/src/ll_alloc.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009, 2010, 2011 Lockless Inc., Steven Von Fuerst.
3 | *
4 | * This library is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This library is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | /*
19 | * Implement a lockfree allocator based upon lockless queues
20 | * that communicate between processors, and btrees to hold the
21 | * unallocated memory.
22 | */
23 |
24 | #define _GNU_SOURCE
25 |
26 | #include "compiler.h"
27 | #include "ll_asm.h"
28 | #include "ll_list.h"
29 | #include
30 | #include
31 | #include
32 | #include
33 |
34 | #ifndef WINDOWS
35 | #include
36 | #include
37 | #include
38 | #include
39 | #endif /* !WINDOWS */
40 |
41 | #include
42 | #include
43 | #include
44 | #include
45 | #include
46 | #include
47 |
48 | /* Debugging */
49 | //#define DEBUG_ALLOC
50 |
51 | /* Extra checking */
52 | //#define DEBUG_ALLOC_SLOW
53 |
54 | /* Memory leak debugging */
55 | //#define DEBUG_LEAK
56 | //#define DEBUG_LEAK_DISP 0
57 |
58 | /* For windows and valgrind */
59 | //#define EMU_SBRK
60 | //#define EMU_SBRK_VG
61 |
62 | /* Turn off slab usage - useful for debugging btree and small alloc code */
63 | //#define DEBUG_NO_SLAB
64 |
65 | /* Turn on home-made profiler */
66 | //#define DEBUG_PROFILE
67 |
68 | #ifdef DEBUG_PROFILE
69 | #include "prof.h"
70 | #else
71 | #define DECL_PROF_FUNC
72 | #endif
73 |
74 |
75 | #ifndef WINDOWS
76 | #define PAGESIZE 4096UL
77 | #else
78 | #define PAGESIZE ((size_t) 65536)
79 | #endif
80 |
81 | /* Seperator between allocations */
82 | #define SEPSIZE 16
83 | #define PTRSIZE 8
84 |
85 | #define HEADERSIZE 32
86 |
87 | #define ADDR_SIZE 27
88 |
89 | #define SLABSIZE ((uintptr_t) (1 << 17))
90 | //#define SLABBMAX ((SLABSIZE / 8) - 2)
91 | #define SLABBMAX 64 /* About 4M per thread */
92 |
93 | /* Slab sizes 0 to 512 bytes in steps of 16 */
94 | #define NUM_SB 33
95 | #define SB_MAX ((NUM_SB - 1) * 16)
96 |
97 | /* Maximum size of medium allocations */
98 | #define BTMALLOC ((1L << ADDR_SIZE) - HEADERSIZE)
99 |
100 | #define TOP_SIZE (-(PAGESIZE * 2))
101 |
102 | /* Minimum size to allocate at a time */
103 | #define MINALLOC (1L << 21)
104 |
105 | /* 64 queues */
106 | #define NUM_QS 64
107 | #define QS_MAX (NUM_QS * 16 - SEPSIZE)
108 |
109 | /* Only check four fast bins */
110 | #define FAST_MASK 0x0fULL
111 |
112 | /* Clear the fast lists at least this often on free() */
113 | #define FREE_FAST ((1 << 16) - 1)
114 |
115 | /* The biggest size that can reasonably be stored in the fast lists */
116 | #ifdef __x86_64__
117 | #define FAST_64_BIN 67108863
118 | #else
119 | #define FAST_64_BIN 3669975
120 | #endif
121 |
122 |
123 | #ifdef __x86_64__
124 | #define MYSIZE_TO_PTR(T, N) ((dlist *) (((char *) T) + offsetof(atls, qs) + N - SEPSIZE))
125 | #else
126 | #define MYSIZE_TO_PTR(T, N) ((dlist *) (((char *) T) + offsetof(atls, qs) + N/2 - PTRSIZE))
127 | #endif
128 |
129 | /* Fast-lists */
130 | #define NUM_FL 64
131 |
132 | /* btree size */
133 | #define BT_MAX 16
134 |
135 | /* 64bit mask type */
136 | typedef unsigned long long u64b;
137 |
138 | /* Pre declare */
139 | typedef struct btree btree;
140 |
141 | typedef struct sep sep;
142 | struct sep
143 | {
144 | btree *left;
145 |
146 | #ifndef __x86_64__
147 | int pad;
148 | #endif
149 |
150 | __extension__ union
151 | {
152 | __extension__ struct
153 | {
154 | unsigned bs_offset;
155 | unsigned size;
156 | };
157 | uintptr_t prev;
158 | };
159 | };
160 |
161 | struct btree
162 | {
163 | /* Seperator */
164 | sep s;
165 |
166 | __extension__ union
167 | {
168 | slist list;
169 | dlist list2;
170 | void *data;
171 |
172 | __extension__ struct
173 | {
174 | btree *parent;
175 | unsigned bsize[BT_MAX + 1];
176 | char prev[BT_MAX + 1];
177 | btree *ptr[BT_MAX];
178 | };
179 | };
180 | #ifndef __x86_64__
181 | unsigned pad;
182 | #endif
183 | };
184 |
185 | #ifdef WINDOWS
186 | /* For documentation purposes only */
187 | struct mallinfo
188 | {
189 | /* Total space allocated with sbrk in all threads */
190 | int arena;
191 |
192 | /* Number of ordinary (non-slab and non-mmap) allocations */
193 | int ordblks;
194 |
195 | /* Number of blocks in this threads slab */
196 | int smblks;
197 |
198 | /* Number of mmaped chunks in our thread */
199 | int hblks;
200 |
201 | /* Number of btree nodes for our thread */
202 | int hblkhd;
203 |
204 | /* Total (possibly partially) used slab blocks */
205 | int usmblks;
206 |
207 | /* Total number of free slab blocks */
208 | int fsmblks;
209 |
210 | /* Total allocated space for this thread including overhead */
211 | int uordblks;
212 |
213 | /* Total free space for this thread in ordinary mmap region */
214 | int fordblks;
215 |
216 | /* zero */
217 | int keepcost;
218 | };
219 | #endif
220 |
221 |
222 | /* Seperator bitflags */
223 | #define FLG_UNUSED 0x01
224 | #define FLG_LUNUSED 0x02
225 | #define FLG_LSIZE8 0x04
226 | #define FLG_SIZE8 0x08
227 |
228 | static int b_leaf(btree *b);
229 |
230 | #define SEP_INDEX(b, loc, v) (((unsigned char *) &((b)->bsize[loc]))[v])
231 |
232 | /* Index into the zeroth int in bsize[] */
233 | #define b_start(b) SEP_INDEX(b, 0, 0)
234 | #define b_pindex(b) SEP_INDEX(b, 0, 1)
235 | #define b_mask(b) (*(unsigned short*) &SEP_INDEX(b, 0, 2))
236 |
237 | #define b_next(b, loc) SEP_INDEX(b, loc, 0)
238 | #define b_prev(b, loc) (b->prev[loc])
239 | #define b_last(b) b_prev(b, 0)
240 | #define b_ptr(b, loc) (b->ptr[(loc) - 1])
241 |
242 | typedef union mealloc mealloc;
243 | union mealloc
244 | {
245 | __extension__ struct
246 | {
247 | slist **tail;
248 | dlist m_list;
249 | };
250 |
251 | __extension__ struct
252 | {
253 | char pad[16];
254 | btree b;
255 | };
256 |
257 | /* Prevent compiler warning "no named members" */
258 | void *dummy;
259 | };
260 |
261 | typedef struct sbheader sbheader;
262 | struct sbheader
263 | {
264 | __extension__ union
265 | {
266 | __extension__ struct
267 | {
268 | slist **tail;
269 |
270 | dlist list;
271 |
272 | uintptr_t max;
273 |
274 | unsigned size;
275 |
276 | };
277 |
278 | /* First cache line is mostly read-only */
279 | char pad[64];
280 | };
281 |
282 | /* Second cache line is read-write */
283 | uintptr_t first;
284 |
285 | unsigned used;
286 |
287 | #ifndef __x86_64__
288 | u64b dummy; /* padding to get right alignment */
289 | #endif
290 |
291 | /* This needs to be 16 byte aligned */
292 | void *data;
293 | };
294 |
295 | /* 64k block of pointers to free blocks */
296 | typedef struct freesb freesb;
297 | struct freesb
298 | {
299 | freesb *next;
300 | unsigned count;
301 |
302 | sbheader *blocks[SLABBMAX];
303 | };
304 |
305 | typedef struct atls atls;
306 | struct atls
307 | {
308 | slist fl[NUM_FL];
309 | u64b f_mask;
310 |
311 | #ifndef __x86_64__
312 | unsigned dummy; /* padding to get right q8 alignment */
313 | #endif
314 |
315 | /* Note that qs[0] is a miss-aligned btree pointer! */
316 | dlist qs[NUM_QS];
317 |
318 | __extension__ union
319 | {
320 | __extension__ struct
321 | {
322 | /* Overlap with the seperator in bheap */
323 | slist btree_freenode;
324 | unsigned b_hgt;
325 | unsigned b_cnt;
326 | };
327 |
328 | btree bheap;
329 | };
330 |
331 | u64b q_mask;
332 |
333 | /* Partially full slabs */
334 | dlist slab[NUM_SB];
335 |
336 | dlist slab_full;
337 |
338 | freesb *slab_chunk;
339 |
340 | size_t percpu_hash;
341 |
342 | size_t a_alloced;
343 | size_t s_wanted;
344 |
345 | slist *head;
346 |
347 | dlist bl;
348 |
349 | #ifdef DEBUG_LEAK
350 | int leak_fd;
351 | #endif
352 |
353 | /* Hazard list */
354 | dlist h_list;
355 | void *hazard;
356 |
357 | /* Deleted list */
358 | atls *d_list;
359 |
360 | int fcount;
361 |
362 | char callocable;
363 |
364 | char dummy3[59];
365 |
366 | /* Off by itself to reduce false sharing */
367 | slist *tail;
368 | };
369 |
370 | #ifdef USE_DLL
371 | #define PREFIX(X) llalloc##X
372 | #else
373 | #ifdef USE_PREFIX
374 | #define PREFIX(X) llalloc##X
375 | #else
376 | #define PREFIX(X) X
377 | #endif
378 | #endif
379 |
380 | #ifndef DEBUG_ALLOC
381 | #ifdef _MSC_VER
382 | #define always_inline __forceinline
383 | #else
384 | #define always_inline inline __attribute__((always_inline))
385 | #endif
386 | #else /* DEBUG_ALLOC */
387 | #define always_inline
388 | #endif
389 |
390 | /* This ISO C11 function might not have a prototype in older headers */
391 | void *aligned_alloc(size_t alignment, size_t size);
392 |
393 | #ifdef WINDOWS
394 |
395 | void llmutex_lock(void *l);
396 | void llmutex_unlock(void *l);
397 | int llmutex_trylock(void *l);
398 |
399 | typedef void * mutex_t;
400 | #define mutex_lock llmutex_lock
401 | #define mutex_unlock llmutex_unlock
402 | #define mutex_trylock llmutex_trylock
403 | #define MUTEX_INITIALIZER {0}
404 |
405 | #ifndef EMU_SBRK
406 | #define EMU_SBRK
407 | #endif
408 |
409 | #define set_enomem() _set_errno(ENOMEM)
410 |
411 | /* Other functions that need prototypes */
412 | #if defined USE_DLL || defined USE_PREFIX
413 | void llallocfree(void *p);
414 | void *llallocmalloc(size_t size);
415 | void *llalloccalloc(size_t size, size_t n);
416 | void *llallocrealloc(void *p, size_t size);
417 | size_t llalloc_msize(void *p);
418 | void *llalloc_expand(void *p, size_t size);
419 |
420 | /* Hack - indirect calls to crt functions */
421 | int (* __callnewh)(size_t);
422 | int (* __newmode)(void);
423 | #endif /* USE_DLL */
424 |
425 | void cfree(void *p);
426 | void *memalign(size_t align, size_t size);
427 | int posix_memalign(void **p, size_t align, size_t size);
428 | void *valloc(size_t size);
429 | void *pvalloc(size_t size);
430 | struct mallinfo mallinfo(void);
431 | int malloc_trim(size_t pad);
432 | int mallopt(int param, int val);
433 | void *PREFIX(_calloc_impl)(size_t n, size_t size, int *errno_tmp);
434 | void PREFIX(_free_nolock)(void *p);
435 | void *PREFIX(_realloc_nolock)(void *p, size_t size);
436 | void *PREFIX(_calloc_nolock)(size_t n, size_t size);
437 | size_t PREFIX(_msize_nolock)(void *p);
438 | static size_t malloc_usable_size(void *p);
439 |
440 | void __tlregdtor(void (*)(void *));
441 |
442 | #else /* WINDOWS */
443 |
444 | static int sys_futex(void *addr1, int op, int val1, struct timespec *timeout, void *addr2, int val3)
445 | {
446 | return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
447 | }
448 |
449 | #define cmpxchg(P, O, N) __sync_val_compare_and_swap((P), (O), (N))
450 |
451 | typedef union mutex_t mutex_t;
452 |
453 | union mutex_t
454 | {
455 | unsigned u;
456 | struct
457 | {
458 | unsigned char locked;
459 | unsigned char contended;
460 | } b;
461 | };
462 |
463 | static void mutex_init(mutex_t *m)
464 | {
465 | m->u = 0;
466 | }
467 |
468 | static void mutex_lock(mutex_t *m)
469 | {
470 | int i;
471 |
472 | /* Try to grab lock */
473 | for (i = 0; i < 100; i++)
474 | {
475 | if (!xchg_8(&m->b.locked, 1)) return;
476 |
477 | cpu_relax();
478 | }
479 |
480 | /* Have to sleep */
481 | while (xchg_32(&m->u, 257) & 1)
482 | {
483 | sys_futex(m, FUTEX_WAIT_PRIVATE, 257, NULL, NULL, 0);
484 | }
485 | }
486 |
487 | static void mutex_unlock(mutex_t *m)
488 | {
489 | DECL_PROF_FUNC;
490 |
491 | int i;
492 |
493 | /* Locked and not contended */
494 | if ((m->u == 1) && (cmpxchg(&m->u, 1, 0) == 1)) return;
495 |
496 | /* Unlock */
497 | m->b.locked = 0;
498 |
499 | barrier();
500 |
501 | /* Spin and hope someone takes the lock */
502 | for (i = 0; i < 200; i++)
503 | {
504 | if (m->b.locked) return;
505 |
506 | cpu_relax();
507 | }
508 |
509 | /* We need to wake someone up */
510 | m->b.contended = 0;
511 |
512 | sys_futex(m, FUTEX_WAKE_PRIVATE, 1, NULL, NULL, 0);
513 | }
514 |
515 | static int mutex_trylock(mutex_t *m)
516 | {
517 | unsigned c;
518 |
519 | if (m->b.locked) return EBUSY;
520 | c = xchg_8(&m->b.locked, 1);
521 | if (!c) return 0;
522 | return EBUSY;
523 | }
524 |
525 | #define MUTEX_INITIALIZER {0}
526 |
527 | /* Interface for hooks, if needed */
528 | #ifdef USE_ALLOC_HOOK
529 | #include "ll_alloc_hook.c"
530 | #endif
531 |
532 |
533 | static void malloc_stats_aux(int show_nodes);
534 |
535 | static gcc_used char dummy1[64];
536 | static pthread_once_t init_once = PTHREAD_ONCE_INIT;
537 | static pthread_key_t death_key;
538 |
539 | /*
540 | * If pthread isn't linked in,
541 | * have weak replacements for the single-threaded case
542 | */
543 | #pragma weak pthread_atfork
544 | #pragma weak pthread_key_create
545 | #pragma weak pthread_setspecific
546 | #pragma weak pthread_once
547 |
548 | #define set_enomem() (errno = ENOMEM)
549 |
550 | #endif /* WINDOWS */
551 |
552 | typedef union percpu_list percpu_list;
553 | union percpu_list
554 | {
555 | __extension__ struct
556 | {
557 | mutex_t m;
558 | freesb *list;
559 | };
560 | char pad[64];
561 | };
562 |
563 | /* Global thread hazard list */
564 | static cache_align mutex_t h_lock = MUTEX_INITIALIZER;
565 | static dlist h_list = DLIST_INIT(h_list);
566 |
567 | static cache_align mutex_t d_lock = MUTEX_INITIALIZER;
568 | static atls *d_list = NULL;
569 |
570 | /* List of freed slab blocks */
571 | static cache_align percpu_list *pc_slab;
572 | static size_t cpu_total;
573 |
574 | /* sbrk information */
575 | #ifndef HOOK_SBRK
576 | static cache_align mutex_t sb_lock = MUTEX_INITIALIZER;
577 | static cache_align uintptr_t sbrk_start = 0;
578 | static uintptr_t sbrk_size = 0;
579 | static int sbrk_oom = 0;
580 | #endif /* HOOK_SBRK */
581 | static unsigned sltotal[NUM_SB];
582 |
583 | #ifdef DEBUG_LEAK
584 | #define LEAK_MAX 1024
585 | typedef struct bigallocs bigallocs;
586 | struct bigallocs
587 | {
588 | void *p;
589 | size_t size;
590 | };
591 |
592 | static bigallocs big_leak[LEAK_MAX] = {{NULL, 0}};
593 | static mutex_t l_lock = MUTEX_INITIALIZER;
594 |
595 | #endif
596 |
597 | /* Hack */
598 | #define BUILD_ASSERT(C) do {switch (0){case 0:; case (C):;}} while (0)
599 |
600 | /* Pre-declares */
601 | static always_inline void *split_node(atls *tl, btree *b, size_t t_size, size_t size);
602 | static void merge_node(atls *tl, void *p);
603 | static int init_sldata(void);
604 | static void slab_free(atls *tl, void *p);
605 | static void local_free(atls *tl, void *p);
606 | static void *local_alloc(atls *tl, size_t size);
607 | static void *slab_alloc_safe(atls *tl, size_t size);
608 | static always_inline void *fast_alloc(atls *tl, size_t size);
609 | static void *slow_alloc(atls *tl, size_t size);
610 | static void atls_merge(atls *tl1, atls *tl2);
611 | static void test_all(atls *tl);
612 | static void *zalloc(atls *tl, size_t size);
613 | void **independent_calloc(size_t n, size_t size, void **chunks);
614 | void **independent_comalloc(size_t n, size_t *sizes, void **chunks);
615 |
616 | static inline btree *small_next(btree *b)
617 | {
618 | return b->data;
619 | }
620 |
621 | static inline void set_small_next(btree *b, btree *next)
622 | {
623 | b->data = next;
624 | }
625 |
626 | #ifdef __x86_64__
627 | static inline btree *small_prev(btree *b)
628 | {
629 | return (btree *) (b->s.prev & ~15);
630 | }
631 |
632 | static inline void set_small_prev(btree *b, btree *prev)
633 | {
634 | uintptr_t p = b->s.prev & 15;
635 | b->s.prev = p + (uintptr_t) prev;
636 | }
637 | #else
638 | static inline btree *small_prev(btree *b)
639 | {
640 | return (btree *) b->s.size;
641 | }
642 |
643 | static inline void set_small_prev(btree *b, btree *prev)
644 | {
645 | b->s.size = (unsigned) prev;
646 | }
647 |
648 | #endif
649 |
650 | static inline void *shift(void *p, size_t s)
651 | {
652 | return &(((char *)p)[s]);
653 | }
654 |
655 | /* Unfortunately, TLS support with mingw is totally broken... so we need to emulate it */
656 | #ifdef WINDOWS
657 | static DWORD tls_index = TLS_OUT_OF_INDEXES;
658 | static atls *get_tls(void)
659 | {
660 | if (tls_index == TLS_OUT_OF_INDEXES) return NULL;
661 |
662 | return TlsGetValue(tls_index);
663 | }
664 |
665 | static void set_tls(atls *tls)
666 | {
667 | TlsSetValue(tls_index, tls);
668 | }
669 | #else
670 | #ifndef USE_ALLOC_HOOK
671 | static __thread__ atls *tls = NULL;
672 | #define get_tls() tls
673 | #define set_tls(T) (tls = (T))
674 | #endif
675 | #endif
676 |
677 | static size_t cpu_num(void)
678 | {
679 | #ifdef WINDOWS
680 | SYSTEM_INFO info;
681 |
682 | GetSystemInfo(&info);
683 | return info.dwNumberOfProcessors;
684 | #else
685 | #ifdef SYS_MACOSX
686 | int num;
687 | size_t len = sizeof(num);
688 | if (sysctlbyname("hw.ncpu", &num, &len, NULL, 0)) num = 1;
689 | return num;
690 | #else
691 | return sysconf(_SC_NPROCESSORS_ONLN);
692 | #endif /* SYS_MACOSX */
693 | #endif /* WINDOWS */
694 | }
695 |
696 | /*
697 | * Emulate sbrk()
698 | * Assumes we are called under a lock */
699 | #ifdef EMU_SBRK
700 |
701 | #ifdef EMU_SBRK_VG
702 | #define SBRK_SIZE (1ULL << 30)
703 | #else /* EMU_SBRK_VG */
704 |
705 | #ifdef __x86_64__
706 |
707 | /* Default to 32GiB of sbrk space */
708 | #define SBRK_SIZE (1ULL << 37)
709 | #else /* __x86_64__ */
710 |
711 | /* Default to 1GiB of sbrk space */
712 | #define SBRK_SIZE (1ULL << 30)
713 | #endif /* __x86_64__ */
714 | #endif /* EMU_SBRK_VG */
715 |
716 |
717 | static void *sbrk_mmap_base = NULL;
718 | static void *sbrk_mmap_end = 0;
719 | static void init_sbrk(void)
720 | {
721 | DECL_PROF_FUNC;
722 |
723 | size_t size = SBRK_SIZE;
724 |
725 | while (1)
726 | {
727 | #ifndef WINDOWS
728 | sbrk_mmap_base = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
729 | #else
730 | /* Allocate address space - but no memory */
731 | sbrk_mmap_base = VirtualAlloc(NULL, size, MEM_RESERVE, PAGE_READWRITE);
732 | #endif
733 | if (sbrk_mmap_base == MAP_FAILED)
734 | {
735 | sbrk_mmap_base = NULL;
736 | size = size / 2;
737 | if (size < 65536) return;
738 | }
739 | else
740 | {
741 | sbrk_mmap_end = shift(sbrk_mmap_base, size);
742 |
743 | return;
744 | }
745 | }
746 | }
747 |
748 | static void *emu_sbrk(size_t size)
749 | {
750 | DECL_PROF_FUNC;
751 |
752 | void *out;
753 |
754 | /* Hack - initialize if required */
755 | if (!size) init_sbrk();
756 | if (!sbrk_mmap_base) return MAP_FAILED;
757 |
758 | out = sbrk_mmap_base;
759 | sbrk_mmap_base = shift(sbrk_mmap_base, size);
760 | if (sbrk_mmap_base >= sbrk_mmap_end)
761 | {
762 | sbrk_mmap_base = out;
763 |
764 | return MAP_FAILED;
765 | }
766 |
767 | #ifdef WINDOWS
768 | /* Enable memory */
769 | VirtualAlloc(out, size, MEM_COMMIT, PAGE_READWRITE);
770 | #endif
771 |
772 | return out;
773 | }
774 |
775 | #define sbrk(S) emu_sbrk(S)
776 | #endif
777 |
778 | #ifndef HOOK_SBRK
779 | static inline void init_sbrk_start(void)
780 | {
781 | void *v = sbrk(0);
782 | sbrk_start = (uintptr_t) v;
783 | }
784 | #endif
785 |
786 | static inline int is_slab(void *p)
787 | {
788 | return ((uintptr_t) p - sbrk_start < sbrk_size);
789 | }
790 |
791 | static inline void *page_start(void *p)
792 | {
793 | return (void *) (-PAGESIZE & (uintptr_t) p);
794 | }
795 |
796 | static inline size_t page_align(size_t s)
797 | {
798 | return -PAGESIZE & (s + PAGESIZE - 1);
799 | }
800 |
801 | static inline size_t sep_align(size_t s)
802 | {
803 | /*
804 | * We want to align on 16byte boundaries
805 | *
806 | * 16 -> 16
807 | * 24 -> 16
808 | * 25 -> 32
809 | * 32 -> 32
810 | */
811 | /*
812 | * Then we want to include the extra 8 bytes of last ptr that are free.
813 | * Finally, we want to include the following sep data.
814 | */
815 | /*
816 | * 0 -> 16
817 | * 8 -> 16
818 | * 9 -> 32
819 | * 16 -> 32
820 | * 24 -> 32
821 | * 25 -> 48
822 | * 32 -> 48
823 | * 40 -> 48
824 | */
825 |
826 | return (s + 7 + 16) & ~15;
827 | }
828 |
829 | static inline int un_used(btree *b)
830 | {
831 | return (b->s.bs_offset & FLG_UNUSED);
832 | }
833 |
834 | static inline int left_unused(btree *b)
835 | {
836 | return b->s.bs_offset & FLG_LUNUSED;
837 | }
838 |
839 | static inline void set_unused(btree *b, btree *br)
840 | {
841 | br->s.bs_offset |= FLG_LUNUSED;
842 | br->s.left = b;
843 |
844 | b->s.bs_offset |= FLG_UNUSED;
845 | }
846 |
847 | static inline void set_used(btree *b, size_t size)
848 | {
849 | btree *br = shift(b, size);
850 | br->s.bs_offset &= ~FLG_LUNUSED;
851 |
852 | #ifdef DEBUG_ALLOC_SLOW
853 | if (size != b->s.size) errx(1, "size missmatch\n");
854 | #endif
855 |
856 | b->s.bs_offset &= ~FLG_UNUSED;
857 | }
858 |
859 | static inline void set_size8(btree *b)
860 | {
861 | btree *br = shift(b, 16);
862 | br->s.bs_offset |= FLG_LSIZE8;
863 | b->s.bs_offset |= FLG_SIZE8;
864 | }
865 |
866 | static inline void unset_size8(btree *b)
867 | {
868 | btree *br = shift(b, 16);
869 | br->s.bs_offset &= ~FLG_LSIZE8;
870 | b->s.bs_offset &= ~FLG_SIZE8;
871 | b->s.size = 16;
872 | b->s.bs_offset &= 15;
873 | b->s.bs_offset += (br->s.bs_offset & ~15) - 16;
874 | }
875 |
876 | #ifdef __x86_64__
877 | static inline btree *get_q8(atls *tl)
878 | {
879 | /* Mega hack - align so that we return a btree object pointer to the correct memory locations */
880 | return shift(&tl->qs[0], -(uintptr_t)8);
881 | }
882 | #else
883 | static inline btree *get_q8(atls *tl)
884 | {
885 | /* Mega hack - align so that we return a btree object pointer to the correct memory locations */
886 | return shift(&tl->qs[0], -(uintptr_t)12);
887 | }
888 | #endif
889 |
890 | static inline btree *read_left(btree *b)
891 | {
892 | if (!left_unused(b)) return NULL;
893 | if (b->s.bs_offset & FLG_LSIZE8) return shift(b, -(uintptr_t)16);
894 | return b->s.left;
895 | }
896 |
897 | static inline mealloc *read_bs(btree *b)
898 | {
899 | uintptr_t s = b->s.bs_offset & ~15;
900 |
901 | #ifdef DEBUG_ALLOC_SLOW
902 | void *v = shift(b, -s);
903 | if ((PAGESIZE - 1) & (uintptr_t) v) errx(1, "mealloc misaligned\n");
904 | #endif
905 |
906 | return (mealloc *) shift(b, -s);
907 | }
908 |
909 | static void btree_init(btree *b)
910 | {
911 | /* Init header */
912 | //b_start(b) = 0;
913 | b_mask(b) = -1;
914 | //b_pindex(b) = 0;
915 | //b_last(b) = 0;
916 | }
917 |
918 | static inline void set_sep(btree *b, int size, btree *bo)
919 | {
920 | unsigned offset = bo->s.bs_offset & ~15;
921 |
922 | /* Store split block offset + size + used indicators */
923 | b->s.bs_offset = offset + ((uintptr_t) b - (uintptr_t) bo);
924 | b->s.size = size;
925 | }
926 |
927 | #ifdef DEBUG_ALLOC
928 | static void check_sep(btree *b)
929 | {
930 | btree *br = shift(b, b->s.size);
931 |
932 | if (((uintptr_t) b) & 15) errx(1, "btree misaligned\n");
933 |
934 | if (is_slab(&b->data)) errx(1, "btree slab overlap\n");
935 |
936 | /* Test unused bit */
937 | if (un_used(b))
938 | {
939 | if (b->s.bs_offset & FLG_SIZE8)
940 | {
941 | br = shift(b, 16);
942 |
943 | if (!(br->s.bs_offset & FLG_LSIZE8)) errx(1, "size8 bit missmatch\n");
944 | }
945 | else
946 | {
947 | if (b->s.size & 15) errx(1, "mysize misaligned\n");
948 |
949 | if ((b->s.size == 16) || (br->s.bs_offset & FLG_LSIZE8)) errx(1, "size8 bit missmatch\n");
950 | if (read_left(br) != b) errx(1, "left pointer wrong\n");
951 | }
952 |
953 | if (!left_unused(br)) errx(1, "Unused flag conflict\n");
954 | }
955 | else
956 | {
957 | if (b->s.size & 15) errx(1, "mysize misaligned\n");
958 | if (left_unused(br)) errx(1, "Unused flag conflict\n");
959 | }
960 | }
961 | #else
962 | #define check_sep(B) ((void) sizeof(B))
963 | #endif
964 |
965 | #ifndef WINDOWS
966 | static __attribute__((format (gnu_printf, 2, 3))) void leak_print(atls *tl, const char *format, ...)
967 | {
968 | char buf[1024];
969 |
970 | va_list ap;
971 |
972 | va_start(ap, format);
973 | vsnprintf(buf, 1024, format, ap);
974 | va_end(ap);
975 |
976 | #ifdef DEBUG_LEAK
977 | /* Need tls and leak_fd initialized */
978 | if (tl)
979 | {
980 | if (tl->leak_fd == -1)
981 | {
982 | char buf[1024];
983 | int pid = getpid();
984 | int tid = syscall(SYS_gettid);
985 |
986 | snprintf(buf, 1024, "/tmp/leak-%d:%d.txt", pid, tid);
987 |
988 | tl->leak_fd = open(buf, O_WRONLY | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
989 | }
990 |
991 | if (tl->leak_fd != -1)
992 | {
993 | int len = strlen(buf);
994 | char *c = buf;
995 |
996 | while (len)
997 | {
998 | int out = write(tl->leak_fd, c, len);
999 |
1000 | /* Interrupted - try again */
1001 | if (out == -1) continue;
1002 |
1003 | /* Device is full - stop writing */
1004 | if (!out) return;
1005 |
1006 | len -= out;
1007 | c += out;
1008 | }
1009 | }
1010 | }
1011 | #else
1012 | /* Shut up compiler warning */
1013 | (void) tl;
1014 |
1015 | /* Otherwise output to stderr */
1016 | fprintf(stderr, "%s", buf);
1017 |
1018 | #endif
1019 | }
1020 | #else
1021 | #define leak_print(...)
1022 | #define malloc_stats_aux(...)
1023 | #endif
1024 |
1025 | #ifdef DEBUG_LEAK
1026 | static void big_alloced(void *p, size_t size)
1027 | {
1028 | int i;
1029 |
1030 | mutex_lock(&l_lock);
1031 | for (i = 0; i < LEAK_MAX; i++)
1032 | {
1033 | if (big_leak[i].p) continue;
1034 |
1035 | big_leak[i].p = p;
1036 | big_leak[i].size = size;
1037 | mutex_unlock(&l_lock);
1038 | leak_print(get_tls(), "Big alloc %p %llu\n", p, (unsigned long long) size);
1039 | return;
1040 | }
1041 | mutex_unlock(&l_lock);
1042 |
1043 | errx(1, "debug leak oom, increase LEAK_MAX\n");
1044 | }
1045 |
1046 | static void big_freed(void *p, size_t size)
1047 | {
1048 | int i;
1049 |
1050 | mutex_lock(&l_lock);
1051 | for (i = 0; i < LEAK_MAX; i++)
1052 | {
1053 | if (big_leak[i].p != p) continue;
1054 |
1055 | if (big_leak[i].size != size) errx(1, "big alloc size missmatch\n");
1056 | big_leak[i].p = NULL;
1057 | mutex_unlock(&l_lock);
1058 | leak_print(get_tls(), "Big free %p %llu\n", p, (unsigned long long) size);
1059 | return;
1060 | }
1061 | mutex_unlock(&l_lock);
1062 |
1063 | errx(1, "freeing unknown large block %p\n", p);
1064 | }
1065 |
1066 | static size_t big_block_size(void *p)
1067 | {
1068 | int i;
1069 |
1070 | mutex_lock(&l_lock);
1071 | for (i = 0; i < LEAK_MAX; i++)
1072 | {
1073 | if (big_leak[i].p != p) continue;
1074 | mutex_unlock(&l_lock);
1075 | return big_leak[i].size;
1076 | }
1077 |
1078 | mutex_unlock(&l_lock);
1079 |
1080 | errx(1, "freeing unknown large block %p\n", p);
1081 | }
1082 |
1083 | static void test_leak_aux(void)
1084 | {
1085 | atls *tl = get_tls();
1086 | if (!tl) return;
1087 | malloc_stats_aux(3);
1088 | leak_print(tl, "Done\n");
1089 | close(tl->leak_fd);
1090 | }
1091 |
1092 | static void test_leak(void)
1093 | {
1094 | static int count = 0;
1095 |
1096 | /* Display turned off? */
1097 | if (!DEBUG_LEAK_DISP) return;
1098 |
1099 | /* Don't bother to be thread safe - it doesn't matter much */
1100 | if (count++ == DEBUG_LEAK_DISP)
1101 | {
1102 | count = 0;
1103 | malloc_stats_aux(3);
1104 | }
1105 | }
1106 |
1107 |
1108 | #else
1109 | #define big_alloced(p, size) ((void) (sizeof(p) + sizeof(size)))
1110 | #define big_freed(p, size) ((void)(sizeof(p) + sizeof(size)))
1111 | #define test_leak_aux()
1112 | #define test_leak()
1113 | #define big_block_size(p) (sizeof(p) * 0)
1114 | #endif
1115 |
1116 |
1117 | /* Big allocations */
1118 | static void *big_alloc_aux(size_t size)
1119 | {
1120 | DECL_PROF_FUNC;
1121 |
1122 | /* Get memory */
1123 | #ifndef WINDOWS
1124 | void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
1125 | #else
1126 | void *p = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
1127 | #endif
1128 |
1129 | /* Out of memory */
1130 | if (p == MAP_FAILED) return NULL;
1131 |
1132 | big_alloced(p, size);
1133 |
1134 | /* Done */
1135 | return p;
1136 | }
1137 |
1138 | #ifdef WINDOWS
1139 | int handle_oom(int aize);
1140 | #else
1141 | #define handle_oom(S) ((errno = ENOMEM), 0)
1142 | #endif
1143 |
1144 |
1145 | static noinline void *big_alloc(atls *tl, size_t size)
1146 | {
1147 | DECL_PROF_FUNC;
1148 |
1149 | size_t psize;
1150 |
1151 | size_t *p;
1152 |
1153 | /* This arguement prevents register problems in the fast path */
1154 | (void) tl;
1155 |
1156 | if (size > TOP_SIZE) goto nomem;
1157 |
1158 | /* Get real size to allocate */
1159 | psize = page_align(size + SEPSIZE);
1160 |
1161 | p = big_alloc_aux(psize);
1162 |
1163 | if (p)
1164 | {
1165 | *p = psize;
1166 | return shift(p, SEPSIZE);
1167 | }
1168 |
1169 | nomem:
1170 |
1171 | if (handle_oom(size)) return big_alloc(tl, size);
1172 |
1173 | return NULL;
1174 | }
1175 |
1176 | #ifdef WINDOWS
1177 | static noinline void big_free_aux(size_t *p)
1178 | {
1179 | DECL_PROF_FUNC;
1180 |
1181 | big_freed(p, *p);
1182 |
1183 | VirtualFree(p, 0, MEM_RELEASE);
1184 | }
1185 | #else
1186 | static inline void big_free_aux(size_t *p)
1187 | {
1188 | big_freed(p, *p);
1189 |
1190 | munmap(p, *p);
1191 | }
1192 | #endif
1193 |
1194 |
1195 | #ifdef DEBUG_ALLOC_SLOW
1196 | static void test_queue(atls *tl)
1197 | {
1198 | slist *q;
1199 |
1200 | btree *b;
1201 |
1202 | /* Scan incoming queue, looking for corruption */
1203 | for (q = tl->head; q; q = q->next)
1204 | {
1205 | /* Ignore slab nodes */
1206 | if (is_slab(q)) continue;
1207 |
1208 | if (((uintptr_t) q) & 15) errx(1, "incoming queue corrupted\n");
1209 |
1210 | b = CONTAINER(btree, data, q);
1211 |
1212 | if (un_used(b)) errx(1, "queue element marked as unused\n");
1213 | }
1214 | }
1215 | #else
1216 | #define test_queue(T) ((void) sizeof(T))
1217 | #endif
1218 |
1219 | #ifdef __x86_64__
1220 | /* Magic code that converts size to entry in fast-list array */
1221 | static always_inline size_t size2fl(size_t size)
1222 | {
1223 | size_t n = (size / 32);
1224 |
1225 | /* Make sure we don't overflow */
1226 | if (size == 16) return 0;
1227 | if (size > FAST_64_BIN) return NUM_FL - 1;
1228 |
1229 | n = n * n * n;
1230 |
1231 | return flsq(n);
1232 | }
1233 | #else
1234 | /* Magic code that converts size to entry in fast-list array */
1235 | static inline size_t size2fl(size_t size)
1236 | {
1237 | size_t n;
1238 |
1239 | /* 32 bit version uses old floating point instructions */
1240 | union di
1241 | {
1242 | double d;
1243 | unsigned u2[2];
1244 | } di;
1245 |
1246 | di.d = size + 40;
1247 |
1248 | n = (di.u2[1] >> 18) - 4115;
1249 |
1250 | /* Make sure we don't overflow */
1251 | if (n >= NUM_FL) n = NUM_FL - 1;
1252 |
1253 | return n;
1254 | }
1255 | #endif
1256 |
1257 |
1258 | /* Add to previous list - but don't set flag */
1259 | static always_inline void fast_add(atls *tl, btree *b, size_t n)
1260 | {
1261 | slist_add(&tl->fl[n], &b->list);
1262 | tl->f_mask |= 1ULL << n;
1263 | }
1264 |
1265 | /* Add to free lists */
1266 | static always_inline void fast_free(atls *tl, btree *b, size_t ms)
1267 | {
1268 | size_t n = size2fl(ms);
1269 |
1270 | #ifdef DEBUG_ALLOC_SLOW
1271 | if (un_used(b)) errx(1, "fast_free() needs used node\n");
1272 | if (b->s.size != ms) errx(1, "fast_free size wrong\n");
1273 | #endif
1274 |
1275 | fast_add(tl, b, n);
1276 | }
1277 |
1278 | static int scan_queue(atls *tl, slist **qh, size_t wanted)
1279 | {
1280 | DECL_PROF_FUNC;
1281 |
1282 | slist *q, *qn, *qp = NULL;
1283 |
1284 | btree *b;
1285 |
1286 | size_t msize;
1287 | int flag = 0;
1288 |
1289 | /* Size wanted */
1290 | tl->s_wanted = wanted;
1291 |
1292 | /* Scan incoming queue, freeing as we go */
1293 | for (q = *qh; q; q = qn)
1294 | {
1295 | #ifdef DEBUG_ALLOC_SLOW
1296 | if (!is_slab(q))
1297 | {
1298 | if (((uintptr_t) q) & 15) errx(1, "incoming queue corrupted\n");
1299 | }
1300 | #endif
1301 |
1302 | qn = q->next;
1303 | qp = q;
1304 | if (qn)
1305 | {
1306 | if (is_slab(q))
1307 | {
1308 | slab_free(tl, q);
1309 | }
1310 | else
1311 | {
1312 | merge_node(tl, q);
1313 | }
1314 |
1315 | flag = 1;
1316 | }
1317 | }
1318 |
1319 | *qh = qp;
1320 |
1321 | /* Reset size wanted */
1322 | tl->s_wanted = 0;
1323 |
1324 | /*
1325 | * Make sure the last node isn't taking up too much room.
1326 | * Not that a slab node could only take up a max of SB_MAX bytes.
1327 | * (They aren't splittable anyway)
1328 | */
1329 | if (is_slab(qp)) return flag;
1330 |
1331 | b = CONTAINER(btree, data, qp);
1332 |
1333 | msize = b->s.size;
1334 |
1335 | /* Don't split if too small */
1336 | if (msize <= (1 << 16)) return flag;
1337 |
1338 | /* Make the head node take up less room. Also, size 32 is faster than 16. */
1339 | split_node(tl, b, msize, 32);
1340 |
1341 | return 1;
1342 | }
1343 |
1344 |
1345 | #ifdef DEBUG_ALLOC_SLOW
1346 |
1347 | static void test_node(atls *tl, btree *b)
1348 | {
1349 | mealloc *m = read_bs(b);
1350 |
1351 | if (tl != get_tls()) errx(1, "tls incorrect\n");
1352 | if (m->tail != &tl->tail) errx(1, "node owner wrong\n");
1353 | }
1354 |
1355 | /* Test fast list constraints */
1356 | static void test_fast_lists(atls *tl)
1357 | {
1358 | int i, j;
1359 |
1360 | //if (tl->fl[63].next) errx(1, "fast list overflow\n");
1361 |
1362 | for (i = 0; i < NUM_FL; i++)
1363 | {
1364 | slist *p = &tl->fl[i];
1365 | slist *f;
1366 |
1367 | scan_slist(p, f)
1368 | {
1369 | btree *b = CONTAINER(btree, list, f);
1370 |
1371 | /* Are we a slab node? */
1372 | if (is_slab(&b->data))
1373 | {
1374 | errx(1, "Slab node on fast list\n");
1375 | }
1376 |
1377 | test_node(tl, b);
1378 |
1379 | if (un_used(b)) errx(1, "Unused element in fast list\n");
1380 | check_sep(b);
1381 |
1382 | j = size2fl(b->s.size);
1383 | if ((i != j) && (i != j - 1)) errx(1, "Fast element in wrong bin\n");
1384 |
1385 |
1386 | if (!(((uintptr_t) b ^ (uintptr_t) tl) & ~(PAGESIZE - 1))) errx(1, "tls on fast list!\n");
1387 | //if (f == tl->head) errx(1, "queue head in fast list\n");
1388 |
1389 | if (f->next == f) errx(1, "fast list loop\n");
1390 | }
1391 | }
1392 | }
1393 | #else
1394 | #define test_fast_lists(T) ((void) sizeof(T))
1395 | #endif
1396 |
1397 | /* Clear fast-lists */
1398 | static void clear_fast(atls *tl)
1399 | {
1400 | DECL_PROF_FUNC;
1401 |
1402 | u64b mask = tl->f_mask;
1403 |
1404 | /* Anything to do? */
1405 | while (mask)
1406 | {
1407 | size_t n = ffsq(mask);
1408 |
1409 | slist *p = &tl->fl[n];
1410 |
1411 | /* Get mask bit */
1412 | mask &= -mask;
1413 |
1414 | /* Convert to a mask */
1415 | mask = ~mask;
1416 |
1417 | /* Properly free everything in the list */
1418 | while (p->next)
1419 | {
1420 | merge_node(tl, slist_rem(p));
1421 | }
1422 |
1423 | /* Clear bottom bit */
1424 | tl->f_mask &= mask;
1425 | mask = tl->f_mask;
1426 | }
1427 | }
1428 |
1429 | /* Hack - same as clear_fast() but free nodes from tl2 into tl1 */
1430 | static void fast_merge(atls *tl1, atls *tl2)
1431 | {
1432 | size_t n;
1433 | //slist *p = tl2->fl;
1434 | slist *p;
1435 |
1436 | /* Anything to do? */
1437 | while (tl2->f_mask)
1438 | {
1439 | n = ffsq(tl2->f_mask);
1440 | p = &tl2->fl[n];
1441 |
1442 | /* Turn off bit in f_mask, as nothing will be left there */
1443 | tl2->f_mask &= tl2->f_mask - 1;
1444 |
1445 | /* Properly free everything in the list */
1446 | while (p->next)
1447 | {
1448 | void *l = slist_rem(p);
1449 |
1450 | merge_node(tl1, l);
1451 | }
1452 | }
1453 | }
1454 |
1455 | static noinline int reap_dead(atls *tl)
1456 | {
1457 | DECL_PROF_FUNC;
1458 |
1459 | dlist *d;
1460 |
1461 | atls *tl2, *tl3;
1462 |
1463 | /* Check without taking mutex */
1464 | if (!d_list) return 0;
1465 |
1466 | /* Try to get dead thread */
1467 | if (mutex_trylock(&d_lock)) return 0;
1468 |
1469 | if (!d_list)
1470 | {
1471 | /* Nothing there */
1472 | mutex_unlock(&d_lock);
1473 | return 0;
1474 | }
1475 |
1476 | /* Grab dead thread */
1477 | tl2 = d_list;
1478 | d_list = tl2->d_list;
1479 | mutex_unlock(&d_lock);
1480 |
1481 | mutex_lock(&h_lock);
1482 |
1483 | /* Remove from hazard list */
1484 | dlist_del(&tl2->h_list);
1485 |
1486 | /* Set flag so that memless free works */
1487 | tl2->h_list.next = NULL;
1488 |
1489 | /* Merge data + update tail pointers */
1490 | atls_merge(tl, tl2);
1491 |
1492 | /* Wait for all threads to not point to dead thread */
1493 | scan_list(&h_list, d)
1494 | {
1495 | tl3 = list_entry(atls, h_list, d);
1496 |
1497 | while (tl3->hazard == &tl2->tail) cpu_relax();
1498 | }
1499 |
1500 | mutex_unlock(&h_lock);
1501 |
1502 | /* Scan all final pending */
1503 | scan_queue(tl, &tl2->head, 0);
1504 |
1505 | /* Free head */
1506 | local_free(tl, tl2->head);
1507 |
1508 | /* Finally free tls data for dead thread */
1509 | #ifdef WINDOWS
1510 | VirtualFree(page_start(tl2), 0, MEM_RELEASE);
1511 | #else
1512 | munmap(page_start(tl2), PAGESIZE);
1513 | #endif
1514 |
1515 | test_all(tl);
1516 |
1517 | /* Try to free up memory */
1518 | return 1;
1519 | }
1520 |
1521 | static void prepend_queue(slist *p, atls *tl, slist ***bs)
1522 | {
1523 | DECL_PROF_FUNC;
1524 |
1525 | slist *tail;
1526 |
1527 | slist **btail = *bs;
1528 | slist **btold;
1529 |
1530 | do
1531 | {
1532 | btold = btail;
1533 |
1534 | /* Make sure we write to the hazard pointer */
1535 | xchg_ptr(&tl->hazard, btail);
1536 |
1537 | /* Has it changed while we were writing to our hazard pointer? */
1538 | btail = *bs;
1539 | }
1540 | while (btold != btail);
1541 |
1542 | p->next = NULL;
1543 | tail = xchg_ptr(btail, p);
1544 | tail->next = p;
1545 |
1546 | barrier();
1547 |
1548 | tl->hazard = NULL;
1549 | }
1550 |
1551 | static void destroy_tls(void *dummy)
1552 | {
1553 | DECL_PROF_FUNC;
1554 |
1555 | atls *tl = get_tls();
1556 |
1557 | (void) dummy;
1558 |
1559 | test_all(tl);
1560 |
1561 | test_leak_aux();
1562 |
1563 | /*
1564 | * Make sure that any recursion via signals or other
1565 | * pthread_key destructors will reset this handler.
1566 | */
1567 | #ifdef WINDOWS
1568 | set_tls((atls *) 1);
1569 | #else
1570 | set_tls(NULL);
1571 | #endif
1572 |
1573 | /* The above line isn't allowed to be moved inside the lock due to possible signals */
1574 | barrier();
1575 |
1576 | /* Add to dead list */
1577 | mutex_lock(&d_lock);
1578 | tl->d_list = d_list;
1579 | d_list = tl;
1580 | mutex_unlock(&d_lock);
1581 | }
1582 |
1583 |
1584 | /* Convert a pointer into a 32bit random number */
1585 | static unsigned rnd_ptr(void *p)
1586 | {
1587 | u64b rnd_seed = (uintptr_t) p;
1588 | rnd_seed *= 7319936632422683443ULL;
1589 | rnd_seed ^= rnd_seed >> 32;
1590 | rnd_seed *= 7319936632422683443ULL;
1591 | rnd_seed ^= rnd_seed >> 32;
1592 |
1593 | /* Truncate to 32 bits */
1594 | return rnd_seed;
1595 | }
1596 |
1597 | /*
1598 | * Pick a random offset from p into a region of size total
1599 | * to fit an object of size size.
1600 | *
1601 | * Return a pointer to the object
1602 | */
1603 | static void *rnd_offset(void *p, size_t total, size_t size)
1604 | {
1605 | u64b slack_space = total - size;
1606 |
1607 | unsigned rng = rnd_ptr(p);
1608 |
1609 | unsigned offset = (slack_space * rng) >> 32;
1610 |
1611 | /* Keep 16-byte alignment */
1612 | offset &= ~15;
1613 |
1614 | return shift(p, offset);
1615 | }
1616 |
1617 | static atls *init_atls(atls *tl)
1618 | {
1619 | int i;
1620 |
1621 | mealloc *m;
1622 | btree *b, *br;
1623 |
1624 | btree *q8;
1625 |
1626 | /* Init lists */
1627 | dlist_init(&tl->bl);
1628 |
1629 | /* queue 0 is taken by size 8 small-list */
1630 | for (i = 1; i < NUM_QS; i++)
1631 | {
1632 | dlist_init(&tl->qs[i]);
1633 | }
1634 |
1635 | /* Init small list */
1636 | q8 = get_q8(tl);
1637 |
1638 | #ifdef DEBUG_ALLOC_SLOW
1639 | /* Btree needs to be correctly aligned */
1640 | if (((uintptr_t) q8) & 15) errx(1, "q8 misaligned\n");
1641 | #endif
1642 |
1643 | set_small_next(q8, q8);
1644 | set_small_prev(q8, q8);
1645 |
1646 | /* Init slabs */
1647 | for (i = 0; i < NUM_SB; i++)
1648 | {
1649 | dlist_init(&tl->slab[i]);
1650 | }
1651 | dlist_init(&tl->slab_full);
1652 |
1653 | tl->percpu_hash = rnd_ptr(tl);
1654 |
1655 | /* Init btree */
1656 | btree_init(&tl->bheap);
1657 |
1658 | /* Need a maximum of 2 nodes at this point */
1659 | tl->b_hgt = 2;
1660 |
1661 | #ifdef DEBUG_LEAK
1662 | tl->leak_fd = -1;
1663 | #endif
1664 |
1665 | /* Grab initial allocation */
1666 | m = big_alloc_aux(PAGESIZE);
1667 | if (!m)
1668 | {
1669 | set_tls(NULL);
1670 | #ifdef WINDOWS
1671 | VirtualFree(page_start(tl), 0, MEM_RELEASE);
1672 | #else
1673 | munmap(page_start(tl), PAGESIZE);
1674 | #endif
1675 | return NULL;
1676 | }
1677 |
1678 | /* Keep track of total allocations */
1679 | tl->a_alloced = PAGESIZE;
1680 |
1681 | /* Fill in header */
1682 | dlist_add(&tl->bl, &m->m_list);
1683 |
1684 | m->tail = &tl->tail;
1685 |
1686 | b = &m->b;
1687 |
1688 | /* Create left seperator */
1689 | b->s.size = PAGESIZE - HEADERSIZE;
1690 | b->s.bs_offset = 16;
1691 |
1692 | /* Position of right seperator */
1693 | br = shift(b, b->s.size);
1694 |
1695 | /* Create right seperator */
1696 | br->s.bs_offset = PAGESIZE - SEPSIZE;
1697 | split_node(tl, b, b->s.size, SEPSIZE);
1698 |
1699 | /* Make queue */
1700 | tl->head = (void *) &b->data;
1701 | tl->tail = tl->head;
1702 | tl->head->next = NULL;
1703 |
1704 | /* Add to hazard list */
1705 | mutex_lock(&h_lock);
1706 | dlist_add(&h_list, &tl->h_list);
1707 | mutex_unlock(&h_lock);
1708 |
1709 | return tl;
1710 | }
1711 |
1712 | #ifndef WINDOWS
1713 |
1714 | static void prepare_fork(void)
1715 | {
1716 | size_t i;
1717 |
1718 | /* Stablize slab */
1719 | for (i = 0; i < cpu_total; i++)
1720 | {
1721 | mutex_lock(&pc_slab[i].m);
1722 | }
1723 |
1724 | /* Stablize hazard list */
1725 | mutex_lock(&h_lock);
1726 |
1727 | /* Stabilize dead list */
1728 | mutex_lock(&d_lock);
1729 |
1730 | /* Stablize sbrk */
1731 | mutex_lock(&sb_lock);
1732 | }
1733 |
1734 | static void parent_fork(void)
1735 | {
1736 | size_t i;
1737 |
1738 | /* Done with sbrk */
1739 | mutex_unlock(&sb_lock);
1740 |
1741 | /* Done with dead list */
1742 | mutex_unlock(&d_lock);
1743 |
1744 | /* Done with hazard list */
1745 | mutex_unlock(&h_lock);
1746 |
1747 | /* Done with slab */
1748 | for (i = 0; i < cpu_total; i++)
1749 | {
1750 | mutex_unlock(&pc_slab[i].m);
1751 | }
1752 | }
1753 |
1754 | static void child_fork(void)
1755 | {
1756 | size_t i;
1757 |
1758 | /* Clean up sb_lock in child */
1759 | mutex_init(&sb_lock);
1760 |
1761 | /* Clean up d_lock in child */
1762 | mutex_init(&d_lock);
1763 |
1764 | /* Clean up h_lock in child */
1765 | mutex_init(&h_lock);
1766 |
1767 | /* Clean up slab locks in child */
1768 | for (i = 0; i < cpu_total; i++)
1769 | {
1770 | mutex_unlock(&pc_slab[i].m);
1771 | }
1772 |
1773 | /*
1774 | * Wipe hazard list as the other threads no longer exist
1775 | * This leaks memory, but we can't help it,
1776 | * as the other threads may be concurrently modifying internal
1777 | * data structures now.
1778 | */
1779 | dlist_init(&h_list);
1780 |
1781 | /* We are the only member */
1782 | dlist_add(&h_list, &get_tls()->h_list);
1783 | }
1784 |
1785 | /*
1786 | * Initialize things.
1787 | * Unfortunately, we don't have a failure return value so we must die instead.
1788 | */
1789 | static void init_handler(void)
1790 | {
1791 | int res;
1792 |
1793 | /* Init sbrk information */
1794 | init_sbrk_start();
1795 |
1796 | /* Add a fork handler */
1797 | if (pthread_atfork)
1798 | {
1799 | res = pthread_atfork(prepare_fork, parent_fork, child_fork);
1800 | if (res) errx(1, "pthread_atfork failed\n");
1801 | }
1802 |
1803 | /* Create thread death key */
1804 | if (pthread_key_create)
1805 | {
1806 | res = pthread_key_create(&death_key, destroy_tls);
1807 | if (res) errx(1, "pthread_key_create() failed\n");
1808 | }
1809 |
1810 | if (init_sldata())
1811 | {
1812 | errx(1, "Failed to allocate enough memory to initialize slab\n");
1813 | }
1814 |
1815 | #ifdef DEBUG_LEAK
1816 | atexit(test_leak_aux);
1817 | #endif
1818 | }
1819 |
1820 | static atls *init_tls(void)
1821 | {
1822 | DECL_PROF_FUNC;
1823 |
1824 | atls *tl;
1825 |
1826 | /* Can we use a dead thread's tls data? */
1827 | if (d_list)
1828 | {
1829 | mutex_lock(&d_lock);
1830 |
1831 | if (d_list)
1832 | {
1833 | /* Grab from death list */
1834 | tl = d_list;
1835 | d_list = tl->d_list;
1836 | mutex_unlock(&d_lock);
1837 |
1838 | set_tls(tl);
1839 |
1840 | /* Init my thread destructor */
1841 | if (pthread_setspecific) pthread_setspecific(death_key, tl);
1842 |
1843 | test_all(tl);
1844 |
1845 | /* Done */
1846 | return tl;
1847 | }
1848 | mutex_unlock(&d_lock);
1849 | }
1850 |
1851 | /* Hack - use a full page for it */
1852 | tl = mmap(NULL, PAGESIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
1853 |
1854 | /* Out of memory */
1855 | if (tl == MAP_FAILED) goto nomem;
1856 |
1857 | /* Randomly cache colour the tls data */
1858 | tl = rnd_offset(tl, PAGESIZE, sizeof(atls));
1859 |
1860 | /* Save pointer for later memory calls */
1861 | set_tls(tl);
1862 |
1863 | /* Make sure that we can always allocate two btree nodes from within itself */
1864 | BUILD_ASSERT(NUM_QS * 8 + 16 >= sizeof(btree) * 2);
1865 |
1866 | /* Make sure atls isn't too big */
1867 | BUILD_ASSERT(sizeof(atls) <= PAGESIZE);
1868 |
1869 | /* Make sure btree nodes fit in the slab */
1870 | BUILD_ASSERT(sizeof(btree) <= SB_MAX);
1871 |
1872 | /* Hack - we should use the rest of the space to init the heap... */
1873 | tl = init_atls(tl);
1874 | if (!tl) goto nomem;
1875 |
1876 | /*
1877 | * Init handler.
1878 | * Note that this can allocate memory, so needs to be done last
1879 | */
1880 | if (pthread_once)
1881 | {
1882 | pthread_once(&init_once, init_handler);
1883 | }
1884 | else
1885 | {
1886 | /* Since there are no threads... */
1887 | if (!sbrk_start) init_handler();
1888 | }
1889 |
1890 | /* Init my thread destructor */
1891 | if (pthread_setspecific) pthread_setspecific(death_key, tl);
1892 |
1893 | test_all(tl);
1894 |
1895 | return tl;
1896 |
1897 | nomem:
1898 | set_enomem();
1899 | return NULL;
1900 | }
1901 | #else /* WINDOWS */
1902 |
1903 |
1904 | #ifdef USE_DLL
1905 |
1906 | typedef struct patch patch;
1907 | struct patch
1908 | {
1909 | const char *name;
1910 | void *func;
1911 | };
1912 |
1913 |
1914 | #define PATCH_FUNC(X)\
1915 | {#X, (void *) ((uintptr_t) llalloc##X)}
1916 |
1917 | static patch patch_list[] =
1918 | {
1919 | PATCH_FUNC(free),
1920 | PATCH_FUNC(malloc),
1921 | PATCH_FUNC(calloc),
1922 | PATCH_FUNC(realloc),
1923 | PATCH_FUNC(_msize),
1924 | PATCH_FUNC(_expand),
1925 | PATCH_FUNC(_free_nolock),
1926 | PATCH_FUNC(_realloc_nolock),
1927 | PATCH_FUNC(_calloc_nolock),
1928 | PATCH_FUNC(_msize_nolock),
1929 | {NULL, NULL}
1930 | };
1931 |
1932 | #define JMP_OP 0xE9
1933 | #define CALL_OP 0xE8
1934 | #define JMP_OFFSET(P1, P2) (((uintptr_t)(P1)) - ((uintptr_t)(P2)) - 5)
1935 |
1936 | /* Follow a call to its target */
1937 | static void *follow_call(void *p)
1938 | {
1939 | int target;
1940 |
1941 | /* Are we pointing to a jump? */
1942 | if ((*(unsigned char *)p) != CALL_OP) return NULL;
1943 |
1944 | target = *(int *) shift(p, 1);
1945 |
1946 | return (void *) shift(p, (uintptr_t) target + 5);
1947 | }
1948 |
1949 | /* Find a jump in a dumb manner */
1950 | static void *find_call(void *p)
1951 | {
1952 | while ((*(unsigned char *) p) != CALL_OP)
1953 | {
1954 | p = shift(p, 1);
1955 | }
1956 |
1957 | return p;
1958 | }
1959 |
1960 | static void patch_function(void *func, void *my_func)
1961 | {
1962 | MEMORY_BASIC_INFORMATION mbi;
1963 |
1964 | /* Make code read/write */
1965 | VirtualQuery(func, &mbi, sizeof(mbi));
1966 | VirtualProtect(mbi.BaseAddress, mbi.RegionSize,
1967 | PAGE_EXECUTE_READWRITE, &mbi.Protect);
1968 |
1969 | /* Patch in a jmp to our routine */
1970 | *(unsigned char *) func = JMP_OP;
1971 | *(unsigned *) shift(func, 1) = JMP_OFFSET(my_func, func);
1972 |
1973 | /* Reset code permissions */
1974 | VirtualProtect(mbi.BaseAddress, mbi.RegionSize, mbi.Protect, &mbi.Protect);
1975 | }
1976 |
1977 | static void *init_crt_funcs(void)
1978 | {
1979 | FARPROC func_f;
1980 | patch *p;
1981 | void *f;
1982 |
1983 | HMODULE library = GetModuleHandle("MSVCR90.DLL");
1984 | if (!library) return NULL;
1985 |
1986 | func_f = GetProcAddress(library, "_callnewh");
1987 | if (!func_f) return NULL;
1988 | __callnewh = (typeof(__callnewh)) func_f;
1989 |
1990 | func_f = GetProcAddress(library, "?_query_new_mode@@YAHXZ");
1991 | if (!func_f) return NULL;
1992 | __newmode = (typeof(__newmode)) func_f;
1993 |
1994 | for (p = patch_list; p->name; p++)
1995 | {
1996 | func_f = GetProcAddress(library, p->name);
1997 | if (!func_f) continue;
1998 |
1999 | patch_function((void *) (uintptr_t) func_f, p->func);
2000 | }
2001 |
2002 | func_f = GetProcAddress(library, "calloc");
2003 | f = (void *) (uintptr_t) func_f;
2004 |
2005 | /* Not here... don't crash */
2006 | if (!f) goto out;
2007 |
2008 | /* Get pointer to _calloc_impl() */
2009 | f = find_call(f);
2010 | f = follow_call(f);
2011 |
2012 | /* Finally patch _calloc_impl */
2013 | patch_function(f, (void *) (uintptr_t) llalloc_calloc_impl);
2014 |
2015 | out:
2016 |
2017 | /* Success */
2018 | return (void*) 1;
2019 | }
2020 |
2021 |
2022 | #endif
2023 |
2024 | void lldebug_hook(void);
2025 | static atls *init_tls(void)
2026 | {
2027 | DECL_PROF_FUNC;
2028 |
2029 | atls *tl;
2030 |
2031 | static void *init = (void *) 1;
2032 | void *first = xchg_ptr(&init, NULL);
2033 |
2034 | if (!first)
2035 | {
2036 | /* We've already died - use free_nomem() */
2037 | if (get_tls() == (atls *) 1) return NULL;
2038 |
2039 | /* Can we use a dead thread's tls data? */
2040 | if (d_list)
2041 | {
2042 | mutex_lock(&d_lock);
2043 |
2044 | if (d_list)
2045 | {
2046 | /* Grab from death list */
2047 | tl = d_list;
2048 | d_list = tl->d_list;
2049 | mutex_unlock(&d_lock);
2050 |
2051 | set_tls(tl);
2052 |
2053 | test_all(tl);
2054 |
2055 | /* Undocumented crt function */
2056 | __tlregdtor(destroy_tls);
2057 |
2058 | /* Done */
2059 | return tl;
2060 | }
2061 | mutex_unlock(&d_lock);
2062 | }
2063 | }
2064 | else
2065 | {
2066 | /* Init slab */
2067 | if (init_sldata())
2068 | {
2069 | init = (void *) 1;
2070 | return NULL;
2071 | }
2072 |
2073 | tls_index = TlsAlloc();
2074 | if (tls_index == TLS_OUT_OF_INDEXES)
2075 | {
2076 | /* We can't handle this */
2077 | init = (void *) 1;
2078 | return NULL;
2079 | }
2080 |
2081 | #ifdef USE_DLL
2082 | /* Initialize function pointers */
2083 | if (!init_crt_funcs())
2084 | {
2085 | /* Doesn't work... fail and bail out of dll init */
2086 | init = (void *) 1;
2087 | return NULL;
2088 | }
2089 | #endif
2090 | /* Init sbrk information */
2091 | init_sbrk_start();
2092 | }
2093 |
2094 | /* Hack - use a full page for it */
2095 | tl = VirtualAlloc(NULL, PAGESIZE, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
2096 |
2097 | /* Out of memory */
2098 | if (!tl) goto nomem;
2099 |
2100 | /* Randomly cache colour the tls data */
2101 | tl = rnd_offset(tl, PAGESIZE, sizeof(atls));
2102 |
2103 | /* Save pointer for later memory calls */
2104 | set_tls(tl);
2105 |
2106 | /* Make sure that we can always allocate two btree nodes from within itself */
2107 | //BUILD_ASSERT(NUM_QS * 8 + 16 >= sizeof(btree) * 2);
2108 |
2109 | /* Hack - we should use the rest of the space to init the heap... */
2110 | tl = init_atls(tl);
2111 | if (!tl) goto nomem;
2112 |
2113 | /* Undocumented crt function */
2114 | __tlregdtor(destroy_tls);
2115 |
2116 | test_all(tl);
2117 |
2118 | return tl;
2119 |
2120 | nomem:
2121 | /* Try again if possible */
2122 | if (handle_oom(PAGESIZE * 2)) return init_tls();
2123 |
2124 | return NULL;
2125 | }
2126 |
2127 | #ifdef USE_DLL
2128 | BOOL DllMain(HINSTANCE h, DWORD reason, LPVOID reserved);
2129 | BOOL DllMain(HINSTANCE h, DWORD reason, LPVOID reserved)
2130 | {
2131 | /* Silence compiler warnings */
2132 | (void) h;
2133 | (void) reserved;
2134 |
2135 | /* Init the memory allocator */
2136 | if ((reason == DLL_PROCESS_ATTACH) || (reason == DLL_THREAD_ATTACH))
2137 | {
2138 | if (!init_tls()) return 0;
2139 | }
2140 | #ifdef DEBUG_PROFILE
2141 | else if(reason == DLL_PROCESS_DETACH)
2142 | {
2143 | ll_print_prof();
2144 | }
2145 | #endif
2146 |
2147 | return 1;
2148 | }
2149 | #endif /* USE_DLL */
2150 |
2151 | #endif /* WINDOWS */
2152 |
2153 | #ifdef DEBUG_ALLOC_SLOW
2154 |
2155 | /* Get node previous to loc in b */
2156 | static void test_btree_linked(btree *b, int loc)
2157 | {
2158 | int i, j = 0;
2159 |
2160 | if (b_leaf(b)) errx(1, "No previous!\n");
2161 |
2162 | for (i = b_start(b); i != loc; i = b_next(b, i))
2163 | {
2164 | j++;
2165 |
2166 | if (j > BT_MAX) errx(1, "Btree node loop!\n");
2167 | }
2168 | }
2169 |
2170 | static void test_in_btree(atls *tl, btree *b)
2171 | {
2172 | btree *bp;
2173 |
2174 | if (!b_leaf(b)) errx(1, "Unused btree object that is not a leaf\n");
2175 |
2176 | while (b->parent)
2177 | {
2178 | bp = b->parent;
2179 |
2180 | if (b_ptr(bp, b_pindex(b)) != b) errx(1, "Parent doesn't own %p\n", (void *) b);
2181 | if (!bp->bsize[b_pindex(b)]) errx(1, "Parent link broken\n");
2182 |
2183 | test_btree_linked(bp, b_pindex(b));
2184 |
2185 | b = bp;
2186 | }
2187 |
2188 | if (&tl->bheap != b) errx(1, "Heap doesn't own %p\n", (void *) b);
2189 | }
2190 |
2191 | #ifdef UNUSED_FUNC
2192 | static int is_fast_node(atls *tl, btree *b)
2193 | {
2194 | size_t bin = size2fl(b->s.size);
2195 | slist *f;
2196 |
2197 | scan_slist(&tl->fl[bin], f)
2198 | {
2199 | /* Found it? */
2200 | if (f == &b->list) return 1;
2201 | }
2202 |
2203 | /* Didn't find it */
2204 | return 0;
2205 | }
2206 | #endif /* UNUSED_FUNC */
2207 |
2208 | static void test_blocks(atls *tl)
2209 | {
2210 | mealloc *m;
2211 | dlist *d;
2212 |
2213 | btree *b;
2214 |
2215 | size_t size;
2216 |
2217 | if (tl->bl.next->prev != &tl->bl) errx(1, "Block list corrupt\n");
2218 |
2219 | /* Scan blocks */
2220 | scan_list(&tl->bl, d)
2221 | {
2222 | m = list_entry(mealloc, m_list, d);
2223 |
2224 | if (d->next->prev != d) errx(1, "Block list corrupt\n");
2225 |
2226 | /* Scan seps for this block */
2227 | for (b = &m->b;; b = shift(b, size))
2228 | {
2229 | if (b < &m->b) errx(1, "Node before block start!\n");
2230 |
2231 | if (b->s.bs_offset & FLG_SIZE8)
2232 | {
2233 | size = 16;
2234 | }
2235 | else
2236 | {
2237 | size = b->s.size;
2238 |
2239 | if (!size) break;
2240 |
2241 | if (shift(b, -(uintptr_t)(b->s.bs_offset & ~15)) != m) errx(1, "Block back link broken\n");
2242 |
2243 | if (read_bs(b) != m) errx(1, "Block start corrupted\n");
2244 | }
2245 |
2246 | check_sep(b);
2247 |
2248 | if ((size > QS_MAX) && un_used(b)) test_in_btree(tl, b);
2249 | }
2250 | }
2251 | }
2252 | #else
2253 | #define test_blocks(T) ((void) sizeof(T))
2254 | #endif
2255 |
2256 | /* Medium allocations */
2257 |
2258 | #ifdef DEBUG_ALLOC_SLOW
2259 |
2260 | static unsigned test_btree_aux(atls *tl, btree *b, unsigned lsize)
2261 | {
2262 | btree *bn;
2263 | int n = b_start(b);
2264 | int i = 0;
2265 | unsigned ssize = lsize;
2266 |
2267 | unsigned short msk = -1;
2268 |
2269 | /* Size of node can be incorrect if splitting not possible */
2270 | if (n && b->parent && !is_slab(b) && (b->s.size < sizeof(btree)))
2271 | {
2272 | errx(1, "Btree nodesize wrong\n");
2273 | }
2274 |
2275 | while (n)
2276 | {
2277 | bn = b_ptr(b, n);
2278 |
2279 | i++;
2280 |
2281 | if (bn->parent != b) errx(1, "Btree parent incorrect\n");
2282 | if (b_pindex(bn) != n) errx(1, "Btree p_index incorrect\n");
2283 |
2284 | if (b_mask(b) & (1 << (n - 1))) errx(1, "Used btree node marked free\n");
2285 | msk -= (1 << (n - 1));
2286 |
2287 | /* Scan lower */
2288 | ssize = test_btree_aux(tl, bn, ssize);
2289 |
2290 | if (b->bsize[n] < ssize) errx(1, "Btree size misordered\n");
2291 | ssize = b->bsize[n] & ~0xff;
2292 |
2293 | if (b_leaf(bn))
2294 | {
2295 | if (bn->s.size*16 != ssize) errx(1, "Btree leaf size wrong\n");
2296 |
2297 | if (!un_used(bn)) errx(1, "Btree leaf marked used!\n");
2298 | }
2299 | else if (!is_slab(bn) && un_used(bn)) errx(1, "Btree node marked unused!\n");
2300 |
2301 | if (b_prev(b, b_next(b, n)) != n) errx(1, "prev link broken\n");
2302 |
2303 | n = b_next(b, n);
2304 |
2305 | if (i > BT_MAX) errx(1, "Btree node loop!\n");
2306 | }
2307 |
2308 | /* Leaf node? */
2309 | if (!i) return ssize;
2310 |
2311 | if (msk != b_mask(b)) errx(1, "Btree free mask missmatch\n");
2312 |
2313 | if (b->parent && (i <= 3)) errx(1, "Btree has too few children\n");
2314 |
2315 | return ssize & ~0xff;
2316 | }
2317 |
2318 | static void test_btree(atls *tl)
2319 | {
2320 | if ((tl->b_hgt > 100) || (tl->b_cnt > 100)) errx(1, "btree height corrupt\n");
2321 |
2322 | test_btree_aux(tl, &tl->bheap, 0);
2323 | }
2324 | #else
2325 | #define test_btree(T) ((void) sizeof(T))
2326 | #endif
2327 |
2328 | static char btree_count(btree *b)
2329 | {
2330 | int x = b_mask(b);
2331 |
2332 | /* See Wikipedia for this algorithm for popcount */
2333 | int m1 = 0x5555;
2334 | int m2 = 0x3333;
2335 | int m4 = 0x0f0f;
2336 |
2337 | /* Put counts into pairs of bits */
2338 | x -= (x >> 1) & m1;
2339 |
2340 | /* 4 bit counts */
2341 | x = (x & m2) + ((x >> 2) & m2);
2342 |
2343 | /* Make 8bit counts */
2344 | x = (x + (x >> 4)) & m4;
2345 |
2346 | return 16 - (x + (x >> 8));
2347 | }
2348 |
2349 | static inline int btree_alloc(btree *b)
2350 | {
2351 | int loc = ffsu(b_mask(b));
2352 |
2353 | b_mask(b) &= ~(1 << loc);
2354 |
2355 | return loc + 1;
2356 | }
2357 |
2358 | static inline void btree_free(btree *b, int loc)
2359 | {
2360 | b_mask(b) |= 1 << (loc - 1);
2361 | }
2362 |
2363 | static int b_leaf(btree *b)
2364 | {
2365 | /* Check to see if there are no children */
2366 | return !b_start(b);
2367 | }
2368 |
2369 | static inline unsigned btree_ssize(btree *b, int loc)
2370 | {
2371 | return b->bsize[loc] & ~0xff;
2372 | }
2373 |
2374 | static inline void btree_update_daughter(btree *bp, btree *b, int loc)
2375 | {
2376 | b_ptr(bp, loc) = b;
2377 | b->parent = bp;
2378 | b_pindex(b) = loc;
2379 | }
2380 |
2381 |
2382 | /* Update my parent with my new size */
2383 | static void btree_update_psize(btree *b, unsigned ssize)
2384 | {
2385 | int bpi = b_pindex(b);
2386 | btree *bp;
2387 |
2388 | /* Update parent size value */
2389 | for (bp = b->parent; bp; bp = bp->parent)
2390 | {
2391 | bp->bsize[bpi] &= 0xff;
2392 | bp->bsize[bpi] += ssize;
2393 |
2394 | /* Are we done with the chain of updates? */
2395 | if (b_next(bp, bpi)) break;
2396 |
2397 | bpi = b_pindex(bp);
2398 | }
2399 | }
2400 |
2401 | /* Forward declare */
2402 | static void btree_node_del(atls *tl, btree *b, int loc);
2403 |
2404 | static void btree_merge_aux(atls *tl, btree *bl, btree *br)
2405 | {
2406 | int i, j, k;
2407 |
2408 | int ip, pi;
2409 |
2410 | int next;
2411 |
2412 | unsigned ssize;
2413 |
2414 | btree *bp;
2415 |
2416 | int bcl, bcr;
2417 |
2418 | #ifdef DEBUG_ALLOC_SLOW
2419 | if (!bl->parent || !br->parent) errx(1, "Trying to merge heap top\n");
2420 | if (bl->parent != br->parent) errx(1, "Trying to merge two nodes with different parents\n");
2421 | #endif
2422 |
2423 | bcl = btree_count(bl);
2424 | bcr = btree_count(br);
2425 |
2426 | /* Move some from neighbour to me */
2427 | if (bcr + bcl > BT_MAX)
2428 | {
2429 | if (bcr > bcl)
2430 | {
2431 | /* Silence compiler warning */
2432 | next = 0;
2433 |
2434 | /* Move some nodes from br to bl */
2435 | ip = b_last(bl);
2436 |
2437 | for (j = bcr / 3, k = b_start(br); j; k = next, j--)
2438 | {
2439 | next = b_next(br, k);
2440 | i = btree_alloc(bl);
2441 |
2442 | /* Add in new node */
2443 | bl->bsize[i] = br->bsize[k];
2444 | b_next(bl, ip) = i;
2445 | b_prev(bl, i) = ip;
2446 | btree_update_daughter(bl, b_ptr(br, k), i);
2447 | ip = i;
2448 |
2449 | /* Remove old node */
2450 | btree_free(br, k);
2451 | }
2452 | b_next(bl, ip) = 0;
2453 | ssize = bl->bsize[ip];
2454 | b_last(bl) = ip;
2455 |
2456 | b_start(br) = next;
2457 | b_prev(br, next) = 0;
2458 |
2459 | /* Notify parent of my new size */
2460 | btree_update_psize(bl, ssize);
2461 |
2462 | return;
2463 | }
2464 |
2465 | /* Scan 2/3rds of the way through bl */
2466 | for (j = bcl / 3, ip = b_last(bl); j; ip = b_prev(bl, ip), j--);
2467 |
2468 | k = b_start(br);
2469 | ssize = btree_ssize(bl, ip);
2470 | j = b_next(bl, ip);
2471 | b_next(bl, ip) = 0;
2472 | b_last(bl) = ip;
2473 |
2474 | /* Copy remainder to br, deleting as we go */
2475 | for (ip = 0; j; j = next)
2476 | {
2477 | next = b_next(bl, j);
2478 | i = btree_alloc(br);
2479 |
2480 | /* Add in new node */
2481 | br->bsize[i] = bl->bsize[j];
2482 | b_next(br, ip) = i;
2483 | b_prev(br, i) = ip;
2484 | ip = i;
2485 | btree_update_daughter(br, b_ptr(bl, j), i);
2486 |
2487 | /* Remove old node */
2488 | btree_free(bl, j);
2489 | }
2490 |
2491 | /* link to remainder of nodes in br */
2492 | b_next(br, ip) = k;
2493 | b_prev(br, k) = ip;
2494 |
2495 | /* Notify parent of my new size */
2496 | btree_update_psize(bl, ssize);
2497 |
2498 | return;
2499 | }
2500 |
2501 | /* merge bl into br and delete bl */
2502 | ip = 0;
2503 | k = b_start(br);
2504 | for (j = b_start(bl); j; j = b_next(bl, j))
2505 | {
2506 | i = btree_alloc(br);
2507 |
2508 | /* Add in new node */
2509 | br->bsize[i] = bl->bsize[j];
2510 | b_next(br, ip) = i;
2511 | b_prev(br, i) = ip;
2512 | ip = i;
2513 |
2514 | btree_update_daughter(br, b_ptr(bl, j), i);
2515 | }
2516 |
2517 | #ifdef DEBUG_ALLOC_SLOW
2518 | if (!ip) errx(1, "Empty left node?\n");
2519 | #endif
2520 |
2521 | b_next(br, ip) = k;
2522 | b_prev(br, k) = ip;
2523 |
2524 | /* Save these so we can delete */
2525 | bp = bl->parent;
2526 | pi = b_pindex(bl);
2527 |
2528 | /* Delete this node when done */
2529 | local_free(tl, &bl->data);
2530 |
2531 | /* Delete bl */
2532 | btree_node_del(tl, bp, pi);
2533 |
2534 | /* Tail recursion */
2535 | }
2536 |
2537 | static noinline void btree_node_del_aux(atls *tl, btree *b, btree *bp)
2538 | {
2539 | size_t prev, next;
2540 |
2541 | int pi = b_pindex(b);
2542 |
2543 | int i;
2544 |
2545 | #ifdef DEBUG_ALLOC_SLOW
2546 | if (!pi) errx(1, "Corrupted leaf\n");
2547 | #endif
2548 |
2549 | /* Rebalance if possible */
2550 | next = b_next(bp, pi);
2551 | if (next)
2552 | {
2553 | /* Merge with next node */
2554 | btree_merge_aux(tl, b, b_ptr(bp, next));
2555 |
2556 | return;
2557 | }
2558 |
2559 | prev = b_prev(bp, pi);
2560 | if (prev)
2561 | {
2562 | /* Merge with previous node */
2563 | btree_merge_aux(tl, b_ptr(bp, prev), b);
2564 |
2565 | return;
2566 | }
2567 |
2568 | /* Just me here? */
2569 | #ifdef DEBUG_ALLOC_SLOW
2570 | if (bp != &tl->bheap) errx(1, "Invalid node count\n");
2571 | #endif
2572 |
2573 | /* Move my data to the top of the btree */
2574 | b_start(bp) = b_start(b);
2575 | b_last(bp) = b_last(b);
2576 | b_mask(bp) = b_mask(b);
2577 |
2578 | /* Init alloced list */
2579 | for (i = b_start(b); i; i = b_next(b, i))
2580 | {
2581 | bp->bsize[i] = b->bsize[i];
2582 | bp->prev[i] = b->prev[i];
2583 | btree_update_daughter(bp, b_ptr(b, i), i);
2584 | }
2585 |
2586 | /* Btree is shorter */
2587 | tl->b_hgt--;
2588 |
2589 | /* Delete this node when done */
2590 | local_free(tl, &b->data);
2591 |
2592 | /* Prevent having too many spare nodes which can cause fragmentation */
2593 | if (tl->b_hgt < tl->b_cnt)
2594 | {
2595 | /* Pop off the extra node */
2596 | void *st = slist_rem(&tl->btree_freenode);
2597 | b = list_entry(btree, data, st);
2598 |
2599 | /* Delete it */
2600 | local_free(tl, &b->data);
2601 | tl->b_cnt--;
2602 | }
2603 | }
2604 |
2605 | /* Delete node at location loc */
2606 | static void btree_node_del(atls *tl, btree *b, int loc)
2607 | {
2608 | size_t prev = b_prev(b, loc);
2609 | size_t next = b_next(b, loc);
2610 |
2611 | btree *bp = b->parent;
2612 |
2613 | b_next(b, prev) = next;
2614 | b_prev(b, next) = prev;
2615 |
2616 | /* Add to free list */
2617 | btree_free(b, loc);
2618 |
2619 | /* If top - am done */
2620 | if (!bp) return;
2621 |
2622 | /* Was last? */
2623 | if (!next)
2624 | {
2625 | /* Update parent size (we know there must be at least one other node) */
2626 | btree_update_psize(b, btree_ssize(b, prev));
2627 | }
2628 |
2629 | /* Still not empty enough btree_count(b) > 3) (faster than popcount) */
2630 | if (b_next(b, b_next(b, b_next(b, b_start(b))))) return;
2631 |
2632 | btree_node_del_aux(tl, b, bp);
2633 | }
2634 |
2635 | static __pure inline btree *btree_search(atls *tl, unsigned ssize)
2636 | {
2637 | btree *b = &tl->bheap;
2638 | size_t i = b_start(b);
2639 |
2640 | while (i)
2641 | {
2642 | /* Scan level below? */
2643 | if (b->bsize[i] < ssize)
2644 | {
2645 | i = b_next(b, i);
2646 | }
2647 | else
2648 | {
2649 | b = b_ptr(b, i);
2650 | i = b_start(b);
2651 | }
2652 | }
2653 |
2654 | return b;
2655 | }
2656 |
2657 | /* Return node of size ssize if possible */
2658 | static btree *btree_remove(atls *tl, unsigned ssize)
2659 | {
2660 | btree *b = btree_search(tl, ssize);
2661 |
2662 | /* Nothing? */
2663 | if (b == &tl->bheap) return NULL;
2664 |
2665 | /* Disconnect it */
2666 | btree_node_del(tl, b->parent, b_pindex(b));
2667 |
2668 | return b;
2669 | }
2670 |
2671 | /* Find space for node of size ssize */
2672 | static btree *btree_find(atls *tl, unsigned ssize, int *ipv)
2673 | {
2674 | btree *b = btree_search(tl, ssize);
2675 | btree *bp = &tl->bheap;
2676 |
2677 | if (b != bp)
2678 | {
2679 | bp = b->parent;
2680 | *ipv = b_prev(bp, b_pindex(b));
2681 | return bp;
2682 | }
2683 |
2684 | /* Nothing in btree? */
2685 | if (b_leaf(b)) return bp;
2686 |
2687 | /* We are larger than anything */
2688 | do
2689 | {
2690 | /* Scan level below */
2691 | b = b_ptr(b, (int) b_last(b));
2692 | }
2693 | while (!b_leaf(b));
2694 |
2695 | *ipv = b_pindex(b);
2696 |
2697 | return b->parent;
2698 | }
2699 |
2700 | /* Cleanup - make sure we have enough temp nodes */
2701 | static noinline void btree_cleanup(atls *tl)
2702 | {
2703 | /* First try to use slab allocations to prevent fragmentation */
2704 | while (tl->b_hgt > tl->b_cnt)
2705 | {
2706 | slist *s = slab_alloc_safe(tl, sizeof(btree) - SEPSIZE);
2707 |
2708 | /* Fall back to in-btree allocations */
2709 | if (!s) goto use_btree;
2710 |
2711 | slist_add(&tl->btree_freenode, s);
2712 | tl->b_cnt++;
2713 | }
2714 |
2715 | return;
2716 |
2717 | use_btree:
2718 |
2719 | /* In-btree allocation by manual memory manipulation */
2720 | while (tl->b_hgt > tl->b_cnt)
2721 | {
2722 | size_t num, msize;
2723 |
2724 | unsigned i;
2725 |
2726 | btree *br;
2727 |
2728 | unsigned offset;
2729 |
2730 | /* Get smallest allocation in btree */
2731 | btree *b = btree_remove(tl, 0);
2732 |
2733 | msize = b->s.size;
2734 |
2735 | /* How many nodes can fit? */
2736 | num = msize / sizeof(btree);
2737 |
2738 | /* As many as required */
2739 | if (num > tl->b_hgt - tl->b_cnt) num = tl->b_hgt - tl->b_cnt;
2740 |
2741 | /* Prevent recursion by always adding at least one node */
2742 | if (num < 1) num = 1;
2743 |
2744 | /* We are using this */
2745 | set_used(b, msize);
2746 | offset = b->s.bs_offset & ~15;
2747 |
2748 | for (i = 0; i < num; i++)
2749 | {
2750 | br = shift(b, sizeof(btree));
2751 | b->s.size = sizeof(btree);
2752 | offset += sizeof(btree);
2753 | if (i != num - 1) br->s.bs_offset = offset;
2754 | msize -= sizeof(btree);
2755 |
2756 | slist_add(&tl->btree_freenode, &b->list);
2757 | b = br;
2758 | }
2759 |
2760 | tl->b_cnt += num;
2761 |
2762 | /* Any room left? */
2763 | if (!msize) continue;
2764 |
2765 | /* Free remaining fragment */
2766 | b->s.size = msize;
2767 | b->s.bs_offset = offset;
2768 | fast_free(tl, b, msize);
2769 | }
2770 | }
2771 |
2772 | static void btree_node_insert(atls *tl, btree *b, int loc, unsigned ssize, btree *node);
2773 |
2774 | /* Split myself */
2775 | static noinline void btree_node_insert_aux(atls *tl, btree *b, int loc, unsigned ssize, btree *node)
2776 | {
2777 | btree *tmp, *tmp2, *bp;
2778 |
2779 | unsigned bsize = 0, tsize;
2780 |
2781 | int i, j, bn;
2782 |
2783 | void *st;
2784 |
2785 | size_t new;
2786 | int inserted = 0;
2787 | size_t next;
2788 |
2789 | /* New node */
2790 | st = slist_rem(&tl->btree_freenode);
2791 | tmp = list_entry(btree, data, st);
2792 | tl->b_cnt--;
2793 |
2794 | /* Clear it */
2795 | memset(&tmp->data, 0, offsetof(btree, prev) - SEPSIZE);
2796 |
2797 | #if 0
2798 | /* Hack - get daughter testing to work */
2799 | tmp->parent = (btree *) 1;
2800 | #endif
2801 |
2802 | /* Special case - insert at start? */
2803 | if (!loc)
2804 | {
2805 | /* Insert at the beginning */
2806 | tmp->bsize[1] = ssize + 2;
2807 | tmp->prev[1] = 0;
2808 | btree_update_daughter(tmp, node, 1);
2809 | inserted = 1;
2810 |
2811 | /* Copy things below median here */
2812 | for (i = 2, j = b_start(b); i <= BT_MAX/2; i++, j = bn)
2813 | {
2814 | bn = b_next(b, j);
2815 |
2816 | tmp->bsize[i] = btree_ssize(b, j) + i + 1;
2817 | tmp->prev[i] = i - 1;
2818 | btree_update_daughter(tmp, b_ptr(b, j), i);
2819 |
2820 | btree_free(b, j);
2821 | }
2822 | }
2823 | else
2824 | {
2825 | /* Copy things below median here */
2826 | for (i = 1, j = b_start(b); i <= BT_MAX/2; i++, j = bn)
2827 | {
2828 | bn = b_next(b, j);
2829 |
2830 | tmp->bsize[i] = btree_ssize(b, j) + i + 1;
2831 | tmp->prev[i] = i - 1;
2832 | btree_update_daughter(tmp, b_ptr(b, j), i);
2833 |
2834 | btree_free(b, j);
2835 |
2836 | /* Need to insert new node? */
2837 | if (j == loc)
2838 | {
2839 | i++;
2840 | tmp->bsize[i] = ssize + i + 1;
2841 | tmp->prev[i] = i - 1;
2842 | btree_update_daughter(tmp, node, i);
2843 | inserted = 1;
2844 | }
2845 | }
2846 | }
2847 | b_start(b) = j;
2848 | b_prev(b, j) = 0;
2849 |
2850 | /* Finish initialization of new node */
2851 | b_start(tmp) = 1;
2852 | b_last(tmp) = i - 1;
2853 | b_next(tmp, i - 1) = 0;
2854 | tsize = tmp->bsize[i - 1];
2855 | b_mask(tmp) = -(1 << (i - 1));
2856 |
2857 | /* Need to insert in remainder? */
2858 | if (!inserted)
2859 | {
2860 | next = b_next(b, loc);
2861 |
2862 | /* We have space - add it */
2863 | new = btree_alloc(b);
2864 |
2865 | b->bsize[new] = ssize + next;
2866 | b_prev(b, next) = new;
2867 | b_next(b, loc) = new;
2868 | b_prev(b, new) = loc;
2869 |
2870 | /* Am I last? Need to update parents */
2871 | if (!next) btree_update_psize(b, ssize);
2872 |
2873 | btree_update_daughter(b, node, new);
2874 | }
2875 |
2876 | bp = b->parent;
2877 | if (bp)
2878 | {
2879 | /* Get node previous to myself above */
2880 | size_t ip = b_prev(bp, b_pindex(b));
2881 |
2882 | /* Easy - just insert into the parent, tail recurse */
2883 | btree_node_insert(tl, bp, ip, tsize, tmp);
2884 |
2885 | return;
2886 | }
2887 |
2888 | /* I'm the top node */
2889 |
2890 | /* New node */
2891 | st = slist_rem(&tl->btree_freenode);
2892 | tmp2 = list_entry(btree, data, st);
2893 | tl->b_cnt--;
2894 |
2895 | /* btree is taller */
2896 | tl->b_hgt++;
2897 |
2898 | /* Copy b into this -shouldn't need this, use allocated root instead */
2899 | memcpy(&tmp2->data, &b->data, sizeof(btree) - SEPSIZE);
2900 |
2901 | for (i = b_start(b); i; i = b_next(b, i))
2902 | {
2903 | b_ptr(b, i)->parent = tmp2;
2904 | bsize = b->bsize[i];
2905 | }
2906 |
2907 | /* Init b */
2908 | b->bsize[1] = tsize + 2;
2909 | b->bsize[2] = bsize & ~0xff;
2910 | b_ptr(b, 1) = tmp;
2911 | b_ptr(b, 2) = tmp2;
2912 | b_prev(b, 0) = 2;
2913 | b_prev(b, 1) = 0;
2914 | b_prev(b, 2) = 1;
2915 | b_start(b) = 1;
2916 | b_mask(b) = -4;
2917 | b->parent = NULL;
2918 |
2919 | /* Make links */
2920 | tmp->parent = b;
2921 | tmp2->parent = b;
2922 | b_pindex(tmp) = 1;
2923 | b_pindex(tmp2) = 2;
2924 | }
2925 |
2926 | static void btree_node_insert(atls *tl, btree *b, int loc, unsigned ssize, btree *node)
2927 | {
2928 | size_t new;
2929 | size_t next;
2930 |
2931 | #ifdef DEBUG_ALLOC_SLOW
2932 | if (ssize & 0xff) errx(1, "ssize not clean\n");
2933 | #endif
2934 |
2935 | if (!b_mask(b))
2936 | {
2937 | btree_node_insert_aux(tl, b, loc, ssize, node);
2938 |
2939 | return;
2940 | }
2941 |
2942 | /* We have space - add it */
2943 | new = btree_alloc(b);
2944 |
2945 | next = b_next(b, loc);
2946 | b->bsize[new] = ssize + next;
2947 | b_prev(b, next) = new;
2948 | b_next(b, loc) = new;
2949 | b_prev(b, new) = loc;
2950 |
2951 | /* Am I last? Need to update parents */
2952 | if (!next) btree_update_psize(b, ssize);
2953 |
2954 | btree_update_daughter(b, node, new);
2955 | }
2956 |
2957 | static void btree_insert(atls *tl, btree *n, size_t size)
2958 | {
2959 | int ip = 0;
2960 |
2961 | /* Convert to internal size (upper 24bits of 32bit bsize) */
2962 | unsigned ssize = size * 16;
2963 |
2964 | /* First find where to put it, splitting to make room */
2965 | btree *b = btree_find(tl, ssize, &ip);
2966 |
2967 | #ifdef DEBUG_ALLOC_SLOW
2968 | if (!un_used(n)) errx(1, "inserting a used node\n");
2969 | if (size != n->s.size) errx(1, "size missmatch\n");
2970 | #endif
2971 |
2972 | /* Make a leaf node */
2973 | //b_start(n) = 0;
2974 |
2975 | /* Hack - do the above more efficiently */
2976 | n->bsize[0] = 0;
2977 |
2978 | /* Insert it */
2979 | btree_node_insert(tl, b, ip, ssize, n);
2980 |
2981 | btree_cleanup(tl);
2982 | }
2983 |
2984 | static noinline btree *btree_get(atls *tl, unsigned size)
2985 | {
2986 | DECL_PROF_FUNC;
2987 |
2988 | unsigned ssize = size * 16;
2989 | btree *b;
2990 |
2991 | b = btree_remove(tl, ssize);
2992 |
2993 | if (b)
2994 | {
2995 | /* Do not try to merge with me - I'm already taken! */
2996 | set_used(b, b->s.size);
2997 | }
2998 |
2999 | return b;
3000 | }
3001 |
3002 | /* Dumb nlogn merge. Avoids recursion though */
3003 | static void btree_merge(atls *tl1, atls *tl2)
3004 | {
3005 | btree *b;
3006 |
3007 | slist *s, *sn;
3008 |
3009 | while ((b = btree_remove(tl2, 0)))
3010 | {
3011 | btree_insert(tl1, b, b->s.size);
3012 | }
3013 |
3014 | /* Update allocated size */
3015 | tl1->a_alloced += tl2->a_alloced;
3016 |
3017 | /* Free the old extra btree nodes */
3018 | scan_slist_safe(&tl2->btree_freenode, s, sn)
3019 | {
3020 | b = list_entry(btree, data, s);
3021 |
3022 | /* Delete it */
3023 | local_free(tl1, &b->data);
3024 | }
3025 |
3026 | tl2->b_cnt = 0;
3027 | }
3028 |
3029 | /* Count number of nodes + leaves in the btree recursively */
3030 | static __pure int count_btree(btree *b)
3031 | {
3032 | int i, count = 1;
3033 |
3034 | for (i = b_start(b); i; i = b_next(b, i))
3035 | {
3036 | count += count_btree(b_ptr(b, i));
3037 | }
3038 |
3039 | return count;
3040 | }
3041 |
3042 | static __pure int count_btree_space(btree *b)
3043 | {
3044 | int i, count = 0;
3045 |
3046 | if (b_leaf(b)) return b->s.size - PTRSIZE;
3047 |
3048 | for (i = b_start(b); i; i = b_next(b, i))
3049 | {
3050 | count += count_btree_space(b_ptr(b, i));
3051 | }
3052 |
3053 | return count;
3054 | }
3055 |
3056 |
3057 | #ifdef DEBUG_ALLOC_SLOW
3058 |
3059 | /* Check double list constraints */
3060 | static void test_double_lists(atls *tl)
3061 | {
3062 | btree *b;
3063 | dlist *d, *dn;
3064 |
3065 | unsigned i;
3066 |
3067 | for (i = 1; i < NUM_QS; i++)
3068 | {
3069 | if (tl->qs[i].next->prev != &tl->qs[i]) errx(1, "First double link broken\n");
3070 |
3071 | scan_list_safe(&tl->qs[i], d, dn)
3072 | {
3073 | b = list_entry(btree, list, d);
3074 | check_sep(b);
3075 |
3076 | if (!un_used(b)) errx(1, "False used\n");
3077 | if (b->s.size != (i+1)*16) errx(1, "Wrong sized double link\n");
3078 | if (b->s.bs_offset & FLG_SIZE8) errx(1, "flag size wrong\n");
3079 |
3080 | if (dn->prev != d) errx(1, "Back-link broken\n");
3081 | }
3082 | }
3083 |
3084 | if (tl->q_mask & (1ULL << 63)) errx(1, "double list last bit set\n");
3085 | }
3086 |
3087 | #else
3088 | #define test_double_lists(T) ((void) sizeof(T))
3089 | #endif
3090 |
3091 |
3092 | #ifdef DEBUG_ALLOC_SLOW
3093 | /* Test small list constraints */
3094 | static void test_small_list(atls *tl)
3095 | {
3096 | btree *b, *bn;
3097 |
3098 | btree *q8 = get_q8(tl);
3099 |
3100 | if (small_prev(small_next(q8)) != q8) errx(1, "First link broken\n");
3101 |
3102 | for (b = small_next(q8); b != q8; b = bn)
3103 | {
3104 | check_sep(b);
3105 | bn = small_next(b);
3106 |
3107 | if (!(b->s.bs_offset & FLG_SIZE8)) errx(1, "Wrong sized small link\n");
3108 | if (!un_used(b)) errx(1, "False used\n");
3109 | if (small_prev(bn) != b) errx(1, "Back-link broken\n");
3110 | }
3111 | }
3112 | #else
3113 | #define test_small_list(T) ((void) sizeof(T))
3114 | #endif
3115 |
3116 | /* Add to end of small list */
3117 | static void small_insert(atls *tl, btree *b)
3118 | {
3119 | btree *q8 = get_q8(tl);
3120 | btree *qp;
3121 |
3122 | /* Set flag */
3123 | set_size8(b);
3124 |
3125 | qp = small_prev(q8);
3126 |
3127 | set_small_prev(b, qp);
3128 | set_small_next(qp, b);
3129 |
3130 | set_small_next(b, q8);
3131 | set_small_prev(q8, b);
3132 | }
3133 |
3134 | static void small_remove(btree *b)
3135 | {
3136 | btree *qn = small_next(b);
3137 | btree *qp = small_prev(b);
3138 |
3139 | set_small_next(qp, qn);
3140 | set_small_prev(qn, qp);
3141 |
3142 | /* Clear flag */
3143 | unset_size8(b);
3144 | }
3145 |
3146 | static btree *small_del_first(atls *tl)
3147 | {
3148 | btree *q8 = get_q8(tl);
3149 | btree *b = small_next(q8);
3150 | btree *qn = small_next(b);
3151 |
3152 | /* List is empty */
3153 | if (b == q8) return NULL;
3154 |
3155 | /* Dequeue b */
3156 | set_small_next(q8, qn);
3157 | set_small_prev(qn, q8);
3158 |
3159 | /* Clear flag */
3160 | unset_size8(b);
3161 |
3162 | return b;
3163 | }
3164 |
3165 | static void small_merge(atls *tl1, atls *tl2)
3166 | {
3167 | btree *q81 = get_q8(tl1);
3168 | btree *q82 = get_q8(tl2);
3169 |
3170 | btree *q1p = small_prev(q81);
3171 | btree *q2n = small_next(q82);
3172 | btree *q2p = small_prev(q82);
3173 |
3174 | /* Don't need to do anything if adding an empty list */
3175 | if (q2n == q82) return;
3176 |
3177 | set_small_next(q1p, q2n);
3178 | set_small_prev(q2n, q1p);
3179 |
3180 | set_small_prev(q81, q2p);
3181 | set_small_next(q2p, q81);
3182 | }
3183 |
3184 | /* Slab implementation */
3185 |
3186 | static sbheader *slab_start(void *p)
3187 | {
3188 | return (sbheader *) (-SLABSIZE & (uintptr_t) p);
3189 | }
3190 |
3191 | #ifdef DEBUG_ALLOC_SLOW
3192 |
3193 | static void test_slab(atls *tl)
3194 | {
3195 | int i;
3196 |
3197 | dlist *d, *dn;
3198 |
3199 | for (i = 0; i < NUM_SB; i++)
3200 | {
3201 | scan_list_safe(&tl->slab[i], d, dn)
3202 | {
3203 | if (dn->prev != d) errx(1, "Back-link broken\n");
3204 | }
3205 | }
3206 |
3207 | scan_list_safe(&tl->slab_full, d, dn)
3208 | {
3209 | if (dn->prev != d) errx(1, "Back-link broken\n");
3210 | }
3211 | }
3212 | #else
3213 | #define test_slab(T) ((void) sizeof(T))
3214 | #endif
3215 |
3216 |
3217 | static freesb *slab_alloc_chunk(atls *tl)
3218 | {
3219 | DECL_PROF_FUNC;
3220 |
3221 | freesb *fsb;
3222 |
3223 | size_t alloc_amount = SLABSIZE * (SLABBMAX + 1);
3224 | size_t sbrk_end;
3225 |
3226 | unsigned i;
3227 | unsigned alloced = SLABBMAX;
3228 |
3229 | /* Handle oom more efficiently */
3230 | if (sbrk_oom) return NULL;
3231 |
3232 | /* Make sure percpu value isn't too big */
3233 | if (tl->percpu_hash > cpu_total)
3234 | {
3235 | tl->percpu_hash %= cpu_total;
3236 | }
3237 |
3238 | /* Find an unlocked list with something in it */
3239 | for (i = tl->percpu_hash; i < cpu_total; i++)
3240 | {
3241 | if (pc_slab[i].list && !mutex_trylock(&pc_slab[i].m))
3242 | {
3243 | if (pc_slab[i].list)
3244 | {
3245 | fsb = pc_slab[i].list;
3246 | pc_slab[i].list = fsb->next;
3247 |
3248 | mutex_unlock(&pc_slab[i].m);
3249 | #ifdef WINDOWS
3250 | /* Reallow use of pages */
3251 | for (i = 0; i < fsb->count; i++)
3252 | {
3253 | VirtualAlloc(fsb->blocks[i], SLABSIZE, MEM_COMMIT, PAGE_READWRITE);
3254 | }
3255 | #endif
3256 |
3257 | return fsb;
3258 | }
3259 |
3260 | mutex_unlock(&pc_slab[i].m);
3261 | }
3262 | }
3263 |
3264 | for (i = 0; i < tl->percpu_hash; i++)
3265 | {
3266 | if (pc_slab[i].list && !mutex_trylock(&pc_slab[i].m))
3267 | {
3268 | if (pc_slab[i].list)
3269 | {
3270 | fsb = pc_slab[i].list;
3271 | pc_slab[i].list = fsb->next;
3272 |
3273 | mutex_unlock(&pc_slab[i].m);
3274 | #ifdef WINDOWS
3275 | /* Reallow use of pages */
3276 | for (i = 0; i < fsb->count; i++)
3277 | {
3278 | VirtualAlloc(fsb->blocks[i], SLABSIZE, MEM_COMMIT, PAGE_READWRITE);
3279 | }
3280 | #endif
3281 |
3282 | return fsb;
3283 | }
3284 |
3285 | mutex_unlock(&pc_slab[i].m);
3286 | }
3287 | }
3288 |
3289 | mutex_lock(&sb_lock);
3290 |
3291 | sbrk_end = sbrk_start + sbrk_size;
3292 |
3293 | /* Try to realign with SLABSIZE boundaries */
3294 | if (sbrk_end & (SLABSIZE - 1))
3295 | {
3296 | alloc_amount += SLABSIZE - (sbrk_end & (SLABSIZE - 1));
3297 | }
3298 |
3299 | fsb = sbrk(alloc_amount);
3300 |
3301 | if (fsb == MAP_FAILED)
3302 | {
3303 | /* Too much to allocate - fall back on mmap */
3304 | sbrk_oom = 1;
3305 | mutex_unlock(&sb_lock);
3306 |
3307 | return NULL;
3308 | }
3309 |
3310 | /* Update sbrk information */
3311 | sbrk_size = alloc_amount + (uintptr_t) fsb - sbrk_start;
3312 |
3313 | mutex_unlock(&sb_lock);
3314 |
3315 | /* Are we improperly aligned? */
3316 | if ((SLABSIZE - 1) & (uintptr_t) fsb)
3317 | {
3318 | /* Realign myself (wastes memory) */
3319 | freesb *fsb_new = (freesb *) slab_start(shift(fsb, SLABSIZE - 1));
3320 |
3321 | /* Did we shift too much? */
3322 | if ((uintptr_t) fsb_new - (uintptr_t) fsb > alloc_amount - SLABSIZE * (SLABBMAX + 1))
3323 | {
3324 | alloced--;
3325 | }
3326 |
3327 | fsb = fsb_new;
3328 | }
3329 |
3330 | /* Fill in details */
3331 | for (i = 0; i < alloced; i++)
3332 | {
3333 | fsb->blocks[i] = shift(fsb, SLABSIZE * (i + 1));
3334 | }
3335 | fsb->count = alloced;
3336 |
3337 | return fsb;
3338 | }
3339 |
3340 | static noinline void slab_free_chunk(atls *tl, freesb *fsb)
3341 | {
3342 | DECL_PROF_FUNC;
3343 |
3344 | unsigned i;
3345 |
3346 | /* Mark memory as unused */
3347 | for (i = 0; i < fsb->count; i++)
3348 | {
3349 | #ifdef WINDOWS
3350 | VirtualFree(fsb->blocks[i], SLABSIZE, MEM_DECOMMIT);
3351 | #else
3352 | madvise(fsb->blocks[i], SLABSIZE, MADV_DONTNEED);
3353 | #endif
3354 | }
3355 |
3356 | /* Make sure percpu value isn't too big */
3357 | if (tl->percpu_hash > cpu_total)
3358 | {
3359 | tl->percpu_hash %= cpu_total;
3360 | }
3361 |
3362 | /* First trylock everything, to find something that works */
3363 | for (i = tl->percpu_hash; i < cpu_total; i++)
3364 | {
3365 | if (!mutex_trylock(&pc_slab[i].m))
3366 | {
3367 | tl->percpu_hash = i;
3368 | goto success;
3369 | }
3370 | }
3371 |
3372 | for (i = 0; i < tl->percpu_hash; i++)
3373 | {
3374 | if (!mutex_trylock(&pc_slab[i].m))
3375 | {
3376 | tl->percpu_hash = i;
3377 | goto success;
3378 | }
3379 | }
3380 |
3381 | /* Failure - too much contention, just use our current value */
3382 | i = tl->percpu_hash;
3383 | mutex_lock(&pc_slab[i].m);
3384 |
3385 | success:
3386 | tl->percpu_hash = i;
3387 | fsb->next = pc_slab[i].list;
3388 | pc_slab[i].list = fsb;
3389 | mutex_unlock(&pc_slab[i].m);
3390 | }
3391 |
3392 |
3393 | static sbheader *slab_alloc_block(atls *tl)
3394 | {
3395 | freesb *fsb;
3396 | sbheader *sb;
3397 |
3398 | /* Make sure we have empty blocks */
3399 | if (!tl->slab_chunk)
3400 | {
3401 | tl->slab_chunk = slab_alloc_chunk(tl);
3402 | if (!tl->slab_chunk) return NULL;
3403 | }
3404 |
3405 | fsb = tl->slab_chunk;
3406 |
3407 | if (!fsb->count)
3408 | {
3409 | sb = (sbheader *) fsb;
3410 |
3411 | tl->slab_chunk = NULL;
3412 |
3413 | /* Prevent reuse of this overwritten block */
3414 | sb->size = 0;
3415 |
3416 | return sb;
3417 | }
3418 |
3419 | fsb->count--;
3420 | sb = fsb->blocks[fsb->count];
3421 |
3422 | return sb;
3423 | }
3424 |
3425 | static void slab_free_block(atls *tl, sbheader *sb)
3426 | {
3427 | freesb *ofsb = tl->slab_chunk;
3428 | freesb *fsb = (freesb *) sb;
3429 |
3430 | if (ofsb)
3431 | {
3432 | if (ofsb->count < SLABBMAX - 1)
3433 | {
3434 | ofsb->blocks[ofsb->count] = sb;
3435 | ofsb->count++;
3436 | return;
3437 | }
3438 |
3439 | /* Simplest case - no chunk yet */
3440 | fsb->count = 0;
3441 | tl->slab_chunk = fsb;
3442 |
3443 | slab_free_chunk(tl, ofsb);
3444 |
3445 | return;
3446 | }
3447 |
3448 | /* Simplest case - no chunk yet */
3449 | fsb->count = 0;
3450 | tl->slab_chunk = fsb;
3451 | }
3452 |
3453 | static int init_sldata(void)
3454 | {
3455 | unsigned i;
3456 |
3457 | /* Init total number of cpus */
3458 | cpu_total = cpu_num();
3459 |
3460 | /* Init per-cpu free slab lists */
3461 | pc_slab = big_alloc_aux(page_align(cpu_total * 64));
3462 | if (!pc_slab) return 1;
3463 |
3464 | /*
3465 | * Initialized mutexes have state zero, and initialized lists are NULL
3466 | * so we don't have to do anything to pc_slab to finish initializing it.
3467 | */
3468 |
3469 | /* Calculate slab total sizes so we avoid a division later */
3470 | for (i = 1; i < NUM_SB; i++)
3471 | {
3472 | unsigned size = i * 16;
3473 |
3474 | /* total size taken by all blocks */
3475 | sltotal[i] = ((SLABSIZE - offsetof(sbheader, data))/size) * size;
3476 | }
3477 |
3478 | return 0;
3479 | }
3480 |
3481 | static sbheader *slab_create(atls *tl, dlist *slab, unsigned size)
3482 | {
3483 | DECL_PROF_FUNC;
3484 |
3485 | unsigned index = size/16;
3486 | unsigned total = sltotal[index];
3487 |
3488 | uintptr_t offset;
3489 |
3490 | sbheader *sb = slab_alloc_block(tl);
3491 | if (!sb) return NULL;
3492 |
3493 | /* Normalize size */
3494 | size = index * 16;
3495 |
3496 | /* Fill in details */
3497 | sb->tail = &tl->tail;
3498 |
3499 | dlist_add(slab, &sb->list);
3500 |
3501 | /* Already initialized? */
3502 | if (sb->size == size) return sb;
3503 |
3504 | sb->used = 0;
3505 | sb->size = size;
3506 |
3507 | /* Calculate offset */
3508 | if ((size == 64) || (size == 256))
3509 | {
3510 | /* Make cacheline-aligned */
3511 | offset = (uintptr_t) sb + 128 + 1;
3512 | }
3513 | else
3514 | {
3515 | void *tmp;
3516 |
3517 | /* Start of region */
3518 | offset = (-16 & (uintptr_t) &sb->data);
3519 |
3520 | /* Randomize offset */
3521 | tmp = rnd_offset((void *) offset, (uintptr_t) sb + SLABSIZE - (uintptr_t) offset, total);
3522 |
3523 | offset = (uintptr_t) tmp + 1;
3524 | }
3525 |
3526 | #ifdef DEBUG_ALLOC
3527 | if (offset - 1 + total - (uintptr_t) sb > SLABSIZE) errx(1, "slab overflow\n");
3528 | #endif
3529 |
3530 | sb->first = offset;
3531 | sb->max = (uintptr_t) sb + SLABSIZE - sb->size;
3532 |
3533 | return sb;
3534 | }
3535 |
3536 | static void slab_free(atls *tl, void *p)
3537 | {
3538 | DECL_PROF_FUNC;
3539 |
3540 | sbheader *sb = slab_start(p);
3541 |
3542 | /* Do I own this? */
3543 | if (unlikely(sb->tail != &tl->tail))
3544 | {
3545 | /* Hack wrt mealloc */
3546 | prepend_queue(p, tl, &sb->tail);
3547 |
3548 | return;
3549 | }
3550 |
3551 | /* Add to this slab's free list */
3552 | *(uintptr_t *)p = sb->first;
3553 | sb->first = (uintptr_t) p;
3554 |
3555 | sb->used--;
3556 | if (!sb->used)
3557 | {
3558 | /* If I am the only one in the partial list, don't bother to delete */
3559 | if (sb->list.next == sb->list.prev) return;
3560 |
3561 | dlist_del(&sb->list);
3562 |
3563 | /* Free it */
3564 | slab_free_block(tl, sb);
3565 | }
3566 | }
3567 |
3568 | static void *slab_alloc(atls *tl, size_t size);
3569 | static noinline void *slab_alloc_nolist(size_t size, dlist *slab, atls *tl)
3570 | {
3571 | DECL_PROF_FUNC;
3572 |
3573 | void *res;
3574 |
3575 | /* Out of line zero-check */
3576 | if (!size)
3577 | {
3578 | size++;
3579 | }
3580 | else
3581 | {
3582 | /* Scan queue if we have nothing left */
3583 | if (!tl->slab_chunk)
3584 | {
3585 | scan_queue(tl, &tl->head, 0);
3586 | }
3587 |
3588 | /* Still nothing? */
3589 | if (dlist_empty(slab))
3590 | {
3591 | if (!slab_create(tl, slab, size + 15))
3592 | {
3593 | goto again;
3594 | }
3595 | }
3596 | }
3597 |
3598 | /* We have something to use */
3599 | res = slab_alloc(tl, size);
3600 | if (res) return res;
3601 |
3602 | again:
3603 |
3604 | size = sep_align(size);
3605 | return local_alloc(tl, size);
3606 | }
3607 |
3608 | static void *slab_alloc_aux(atls *tl, dlist *slab)
3609 | {
3610 | DECL_PROF_FUNC;
3611 |
3612 | /* Get sbheader */
3613 | sbheader *sb = list_entry(sbheader, list, slab->next);
3614 |
3615 | uintptr_t p = sb->first;
3616 |
3617 | sb->used++;
3618 |
3619 | if (!(p & 1))
3620 | {
3621 | sb->first = *(uintptr_t *) (void *) p;
3622 |
3623 | if (!sb->first) goto done;
3624 | }
3625 | else
3626 | {
3627 | p--;
3628 | sb->first += sb->size;
3629 | if (sb->first > sb->max)
3630 | {
3631 | sb->first = 0;
3632 |
3633 | goto done;
3634 | }
3635 | }
3636 |
3637 | return (void *) p;
3638 |
3639 | done:
3640 | /* Move to full list */
3641 | dlist_del(&sb->list);
3642 | dlist_add(&tl->slab_full, &sb->list);
3643 |
3644 | return (void *) p;
3645 | }
3646 |
3647 | static void *slab_alloc(atls *tl, size_t size)
3648 | {
3649 | dlist *slab;
3650 |
3651 | size_t nsize = size + 15;
3652 |
3653 | #ifdef DEBUG_NO_SLAB
3654 | size = sep_align(size);
3655 | return local_alloc(tl, size);
3656 | #endif
3657 |
3658 | /* Get slab */
3659 | #ifdef __x86_64__
3660 | slab = shift(&tl->slab[0], nsize & ~15);
3661 | #else
3662 | slab = shift(&tl->slab[0], (nsize & ~15) / 2);
3663 | #endif
3664 |
3665 | if (dlist_empty(slab)) return slab_alloc_nolist(size, slab, tl);
3666 |
3667 | return slab_alloc_aux(tl, slab);
3668 | }
3669 |
3670 | /* Same as above, but fail if we can't quickly allocate */
3671 | static void *slab_alloc_safe(atls *tl, size_t size)
3672 | {
3673 | dlist *slab;
3674 |
3675 | #ifdef DEBUG_NO_SLAB
3676 | return NULL;
3677 | #endif
3678 |
3679 | size += 15;
3680 |
3681 | /* Get slab */
3682 | #ifdef __x86_64__
3683 | slab = shift(&tl->slab[0], size & ~15);
3684 | #else
3685 | slab = shift(&tl->slab[0], (size & ~15) / 2);
3686 | #endif
3687 |
3688 | /* Fail if we can't quickly allocate (don't call scan_queue) */
3689 | if (dlist_empty(slab) && !slab_create(tl, slab, size)) return NULL;
3690 |
3691 | return slab_alloc_aux(tl, slab);
3692 | }
3693 |
3694 | static noinline void *slab_zalloc(atls *tl, size_t size)
3695 | {
3696 | void *p = slab_alloc(tl, size);
3697 | if (p) return memset(p, 0, size);
3698 |
3699 | size = sep_align(size);
3700 |
3701 | p = fast_alloc(tl, size);
3702 | if (!p)
3703 | {
3704 | tl->callocable = 0;
3705 | p = slow_alloc(tl, size);
3706 |
3707 | /* No need to memset? */
3708 | if (!p || tl->callocable)
3709 | {
3710 | return p;
3711 | }
3712 | }
3713 |
3714 | /* Success */
3715 | return memset(p, 0, size - 8);
3716 | }
3717 |
3718 | static void slab_merge(atls *tl1, atls *tl2)
3719 | {
3720 | int i;
3721 | dlist *d, *dn;
3722 |
3723 | /* Merge partial slabs */
3724 | for (i = 0; i < NUM_SB; i++)
3725 | {
3726 | /* Update all the tail pointers */
3727 | scan_list_safe(&tl2->slab[i], d, dn)
3728 | {
3729 | sbheader *sb = list_entry(sbheader, list, d);
3730 | sb->tail = &tl1->tail;
3731 |
3732 | /* There may be one empty slab in this slot */
3733 | if (!sb->used)
3734 | {
3735 | /* Move to full list */
3736 | dlist_del(&sb->list);
3737 | dlist_add(&tl1->slab_full, &sb->list);
3738 | }
3739 | }
3740 |
3741 | dlist_merge(&tl1->slab[i], &tl2->slab[i]);
3742 | }
3743 |
3744 | /* Merge full and empty slabs */
3745 |
3746 | /* Update all the tail pointers */
3747 | scan_list(&tl2->slab_full, d)
3748 | {
3749 | sbheader *sb = list_entry(sbheader, list, d);
3750 | sb->tail = &tl1->tail;
3751 | }
3752 |
3753 | dlist_merge(&tl1->slab_full, &tl2->slab_full);
3754 |
3755 | /* Get rid of empty pages */
3756 | if (tl2->slab_chunk) slab_free_chunk(tl1, tl2->slab_chunk);
3757 | }
3758 |
3759 | static void local_free(atls *tl, void *p)
3760 | {
3761 | if (is_slab(p))
3762 | {
3763 | slab_free(tl, p);
3764 | }
3765 | else
3766 | {
3767 | btree *b = CONTAINER(btree, data, p);
3768 | fast_free(tl, b, b->s.size);
3769 | }
3770 | }
3771 |
3772 | static void *local_alloc(atls *tl, size_t size)
3773 | {
3774 | DECL_PROF_FUNC;
3775 |
3776 | void *p = fast_alloc(tl, size);
3777 | if (p) return p;
3778 |
3779 | return slow_alloc(tl, size);
3780 | }
3781 |
3782 | static void test_all(atls *tl)
3783 | {
3784 | test_fast_lists(tl);
3785 | test_double_lists(tl);
3786 | test_small_list(tl);
3787 | test_btree(tl);
3788 | test_queue(tl);
3789 | test_slab(tl);
3790 | test_blocks(tl);
3791 | }
3792 |
3793 | static void block_list_merge(atls *tl1, atls *tl2)
3794 | {
3795 | mealloc *m;
3796 | dlist *d;
3797 |
3798 | /* Scan block list, and update all tail pointers */
3799 | scan_list(&tl2->bl, d)
3800 | {
3801 | m = list_entry(mealloc, m_list, d);
3802 | m->tail = &tl1->tail;
3803 | }
3804 |
3805 | dlist_merge(&tl1->bl, &tl2->bl);
3806 | }
3807 |
3808 | static void atls_merge(atls *tl1, atls *tl2)
3809 | {
3810 | int i;
3811 |
3812 | /* Merge block lists so others know about us */
3813 | block_list_merge(tl1, tl2);
3814 |
3815 | /* Then merge the btrees */
3816 | btree_merge(tl1, tl2);
3817 |
3818 | /* Merge the fast lists */
3819 | fast_merge(tl1, tl2);
3820 |
3821 | /* Merge the slabs */
3822 | slab_merge(tl1, tl2);
3823 |
3824 | /* Then the double links */
3825 | for (i = 1; i < NUM_QS; i++)
3826 | {
3827 | dlist_merge(&tl1->qs[i], &tl2->qs[i]);
3828 | }
3829 |
3830 | /* Finally the small list */
3831 | small_merge(tl1, tl2);
3832 |
3833 | test_all(tl1);
3834 | }
3835 |
3836 | static btree *split_node_rhs(atls *tl, btree *b, size_t t_size, size_t msize)
3837 | {
3838 | size_t r_size = t_size - msize;
3839 |
3840 | btree *bm = shift(b, msize);
3841 |
3842 | #ifdef DEBUG_ALLOC_SLOW
3843 | if (t_size != b->s.size) errx(1, "size missmatch\n");
3844 | if (msize > t_size) errx(1, "too big to fit in split\n");
3845 | check_sep(b);
3846 | #endif
3847 |
3848 | /* Too large to split profitably? */
3849 | if (!r_size) return b;
3850 |
3851 | /* Update local size */
3852 | b->s.size = msize;
3853 |
3854 | /* Create middle seperator */
3855 | set_sep(bm, r_size, b);
3856 |
3857 | check_sep(bm);
3858 |
3859 | /* Make sure to try to use remainder */
3860 | fast_free(tl, b, msize);
3861 |
3862 | /* Paranoia */
3863 | check_sep(b);
3864 | check_sep(bm);
3865 |
3866 | /* Used for when right node is used */
3867 | return bm;
3868 | }
3869 |
3870 | static always_inline void *split_node(atls *tl, btree *b, size_t t_size, size_t msize)
3871 | {
3872 | size_t r_size = t_size - msize;
3873 |
3874 | btree *bm = shift(b, msize);
3875 |
3876 | #ifdef DEBUG_ALLOC_SLOW
3877 | if (t_size != b->s.size) errx(1, "size missmatch\n");
3878 | if (msize > t_size) errx(1, "too big to fit in split\n");
3879 | check_sep(b);
3880 | #endif
3881 |
3882 | /* Too large to split profitably? */
3883 | if (r_size * 4 < msize)
3884 | {
3885 | /* Used this whole */
3886 | return &b->data;
3887 | }
3888 |
3889 | /* Update local size */
3890 | b->s.size = msize;
3891 |
3892 | /* Create middle seperator */
3893 | set_sep(bm, r_size, b);
3894 |
3895 | check_sep(bm);
3896 |
3897 | /* Make sure to try to use remainder */
3898 | fast_free(tl, bm, r_size);
3899 |
3900 | /* Paranoia */
3901 | check_sep(b);
3902 | check_sep(bm);
3903 |
3904 | return &b->data;
3905 | }
3906 |
3907 | static void node_insert(atls *tl, btree *b)
3908 | {
3909 | size_t size = b->s.size;
3910 |
3911 | if (size > QS_MAX)
3912 | {
3913 | /* Insert new segment into btree */
3914 | btree_insert(tl, b, size);
3915 |
3916 | return;
3917 | }
3918 |
3919 | if (size == 16)
3920 | {
3921 | small_insert(tl, b);
3922 |
3923 | return;
3924 | }
3925 |
3926 | dlist_add(MYSIZE_TO_PTR(tl, size), &b->list2);
3927 | tl->q_mask |= 1ULL << ((size - 8) / 16);
3928 | }
3929 |
3930 | /* Complete merge */
3931 | static void merge_node_aux(atls *tl, btree *bl)
3932 | {
3933 | DECL_PROF_FUNC;
3934 |
3935 | size_t msize = bl->s.size;
3936 |
3937 | /* Are we the only element in the allocation? */
3938 | btree *br = shift(bl, msize);
3939 |
3940 | mealloc *m;
3941 |
3942 | /* Is block empty? */
3943 | if ((bl->s.bs_offset >= HEADERSIZE) || br->s.size || (msize * 4 > tl->a_alloced)) goto save;
3944 |
3945 | /* Save a block, if it is big enough to use for a pending allocation */
3946 | if (tl->s_wanted && (tl->s_wanted <= bl->s.size))
3947 | {
3948 | /* No longer wanted */
3949 | tl->s_wanted = 0;
3950 |
3951 | goto save;
3952 | }
3953 |
3954 | /* Get header */
3955 | m = page_start(bl);
3956 |
3957 | /* Remove from the list */
3958 | dlist_del(&m->m_list);
3959 |
3960 | /* Size of block */
3961 | msize = m->b.s.size + HEADERSIZE;
3962 |
3963 | #ifdef DEBUG_ALLOC_SLOW
3964 | if (msize & (PAGESIZE - 1)) errx(1, "big block size incorrect\n");
3965 | #endif
3966 |
3967 | tl->a_alloced -= msize;
3968 |
3969 | big_freed(m, msize);
3970 |
3971 | #ifndef WINDOWS
3972 | munmap(m, msize);
3973 | #else
3974 | VirtualFree(m, 0, MEM_RELEASE);
3975 | #endif
3976 | return;
3977 |
3978 | save:
3979 | /* element is unallocated */
3980 | set_unused(bl, br);
3981 |
3982 | /* Insert into correct data structure */
3983 | node_insert(tl, bl);
3984 | }
3985 |
3986 | /* Merge a node with unallocated neighbours + insert into free list */
3987 | static void merge_node(atls *tl, void *p)
3988 | {
3989 | DECL_PROF_FUNC;
3990 |
3991 | btree *b = CONTAINER(btree, data, p);
3992 | btree *bl = b, *br = shift(b, b->s.size);
3993 |
3994 | size_t tsize;
3995 |
3996 | #ifdef DEBUG_ALLOC_SLOW
3997 | if (un_used(b)) errx(1, "merging unused node");
3998 | #endif
3999 |
4000 | /* Test right */
4001 | if (un_used(br))
4002 | {
4003 | if (br->s.bs_offset & FLG_SIZE8)
4004 | {
4005 | small_remove(br);
4006 | tsize = 16;
4007 | }
4008 | else
4009 | {
4010 | tsize = br->s.size;
4011 |
4012 | if (tsize > QS_MAX)
4013 | {
4014 | btree_node_del(tl, br->parent, b_pindex(br));
4015 | }
4016 | else
4017 | {
4018 | dlist_del(&br->list2);
4019 | }
4020 | }
4021 |
4022 | /* Fixup sizes */
4023 | b->s.size += tsize;
4024 | }
4025 |
4026 | /* Test left */
4027 | if (left_unused(b))
4028 | {
4029 | if (b->s.bs_offset & FLG_LSIZE8)
4030 | {
4031 | bl = shift(b, -(uintptr_t)16);
4032 |
4033 | small_remove(bl);
4034 | }
4035 | else
4036 | {
4037 | bl = b->s.left;
4038 |
4039 | if (bl->s.size > QS_MAX)
4040 | {
4041 | btree_node_del(tl, bl->parent, b_pindex(bl));
4042 | }
4043 | else
4044 | {
4045 | dlist_del(&bl->list2);
4046 | }
4047 | }
4048 |
4049 | /* Fixup sizes */
4050 | bl->s.size += b->s.size;
4051 | }
4052 |
4053 | merge_node_aux(tl, bl);
4054 | }
4055 |
4056 | #ifdef __x86_64__
4057 |
4058 | #ifdef _MSC_VER
4059 |
4060 | #define INIT_REG(N) u64b xmm##N
4061 | #define SAVE_REG(N, V) xmm##N = V
4062 | #define RESTORE_REG(N, V) V = xmm##N
4063 |
4064 | #else
4065 |
4066 | #define INIT_REG(N)
4067 |
4068 | #define SAVE_REG(N, V)\
4069 | asm volatile ("movq %0, %%xmm" #N :: "mr" (V))
4070 |
4071 | #define RESTORE_REG(N, V)\
4072 | asm volatile ("movq %%xmm" #N ", %0" : "=r" (V))
4073 |
4074 | #endif
4075 |
4076 | static always_inline void *fast_alloc(atls *tl, size_t size)
4077 | {
4078 | DECL_PROF_FUNC;
4079 |
4080 | size_t n;
4081 | u64b mask, tmask;
4082 | slist *p;
4083 |
4084 | btree *b;
4085 | size_t rsize;
4086 |
4087 | if (unlikely(size > FAST_64_BIN)) return NULL;
4088 |
4089 | n = size2fl(size);
4090 | mask = FAST_MASK << n;
4091 | tmask = tl->f_mask & mask;
4092 |
4093 | /* Anything to do? */
4094 | while (tmask)
4095 | {
4096 | INIT_REG(0);
4097 | n = ffsq(tmask);
4098 | p = &tl->fl[n];
4099 |
4100 | SAVE_REG(0, tmask);
4101 |
4102 | while (p->next)
4103 | {
4104 | slist *s = slist_rem(p);
4105 | b = CONTAINER(btree, list, s);
4106 |
4107 | rsize = b->s.size;
4108 |
4109 | check_sep(b);
4110 |
4111 | /* Found a match? */
4112 | if (likely(rsize >= size)) return &b->data;
4113 |
4114 | RESTORE_REG(0, tmask);
4115 |
4116 | /* Move to lower bin */
4117 | //fast_add(tl, b, n - 1);
4118 |
4119 | /* Inlined version of the above so %rbx isn't used */
4120 | slist_add(&p[-1], &b->list);
4121 | tmask = tmask / 2;
4122 | tl->f_mask |= tmask & (-tmask);
4123 | }
4124 |
4125 | RESTORE_REG(0, tmask);
4126 |
4127 | /*
4128 | * Turn off least significant bit in tmask, as nothing is left there
4129 | */
4130 | mask = (tmask - 1) | ~tmask;
4131 | tmask &= mask;
4132 | tl->f_mask &= mask;
4133 | }
4134 |
4135 | return NULL;
4136 | }
4137 |
4138 | static void *slow_alloc_aux(atls *tl, size_t size)
4139 | {
4140 | DECL_PROF_FUNC;
4141 |
4142 | size_t n;
4143 | u64b mask, tmask;
4144 |
4145 | btree *b;
4146 | dlist *d;
4147 | size_t rsize;
4148 |
4149 | /* Special case empty allocations */
4150 | if (size == 16)
4151 | {
4152 | b = small_del_first(tl);
4153 | if (b)
4154 | {
4155 | set_used(b, 16);
4156 |
4157 | return &b->data;
4158 | }
4159 |
4160 | n = 1;
4161 | }
4162 | else
4163 | {
4164 | n = (size / 16) - 1;
4165 | }
4166 |
4167 | mask = (~0ULL) << n;
4168 | tmask = tl->q_mask & mask;
4169 |
4170 | /* Are there nodes big enough in the queues? */
4171 | while (tmask)
4172 | {
4173 | /* Ignore if bit unset */
4174 | n = ffsq(tmask);
4175 | d = &tl->qs[n];
4176 |
4177 | /* Found something? */
4178 | if (d->next != d)
4179 | {
4180 | b = list_entry(btree, list2, d->next);
4181 |
4182 | /* Paranoia */
4183 | check_sep(b);
4184 |
4185 | dlist_del(&b->list2);
4186 |
4187 | rsize = (n + 1) * 16;
4188 | set_used(b, rsize);
4189 | return split_node(tl, b, rsize, size);
4190 | }
4191 |
4192 | /*
4193 | * Turn off least significant bit in tmask, as nothing is left there
4194 | */
4195 | mask = (tmask - 1) | ~tmask;
4196 | tmask &= mask;
4197 | tl->q_mask &= mask;
4198 | }
4199 |
4200 | return NULL;
4201 | }
4202 |
4203 | #else
4204 |
4205 | /* Versions optimized for 32bit */
4206 | static always_inline void *fast_alloc(atls *tl, size_t size)
4207 | {
4208 | size_t n = size2fl(size);
4209 |
4210 | unsigned tmask;
4211 |
4212 | slist *p;
4213 |
4214 | btree *b;
4215 | size_t rsize;
4216 |
4217 | if (n < 32)
4218 | {
4219 | tmask = tl->f_mask & (FAST_MASK << n);
4220 |
4221 | /* Anything to do? */
4222 | while (tmask)
4223 | {
4224 | n = ffsu(tmask);
4225 | p = &tl->fl[n];
4226 |
4227 | while (p->next)
4228 | {
4229 | slist *s = slist_rem(p);
4230 | b = CONTAINER(btree, list, s);
4231 |
4232 | rsize = b->s.size;
4233 |
4234 | check_sep(b);
4235 |
4236 | /* Found a match? */
4237 | if (likely(rsize >= size)) return &b->data;
4238 |
4239 | /* Move to lower bin */
4240 | fast_add(tl, b, n - 1);
4241 | }
4242 |
4243 | /*
4244 | * Turn off least significant bit in tmask, as nothing is left there
4245 | */
4246 | tmask &= tmask - 1;
4247 | tl->f_mask &= ~(1ULL << n);
4248 | }
4249 |
4250 | tmask = (tl->f_mask >> 32) & (FAST_MASK >> (32 - n));
4251 | }
4252 | else
4253 | {
4254 | tmask = (tl->f_mask >> 32) & (FAST_MASK << (n - 32));
4255 | }
4256 |
4257 | if (unlikely(size >= FAST_64_BIN)) return NULL;
4258 |
4259 | /* Anything to do? */
4260 | while (tmask)
4261 | {
4262 | n = ffsu(tmask) + 32;
4263 | p = &tl->fl[n];
4264 |
4265 | while (p->next)
4266 | {
4267 | slist *s = slist_rem(p);
4268 | b = CONTAINER(btree, list, s);
4269 |
4270 | rsize = b->s.size;
4271 |
4272 | check_sep(b);
4273 |
4274 | /* Found a match */
4275 | if (likely(rsize >= size)) return &b->data;
4276 |
4277 | /* Move to lower bin */
4278 | fast_add(tl, b, n - 1);
4279 | }
4280 |
4281 | /*
4282 | * Turn off least significant bit in tmask, as nothing is left there
4283 | */
4284 | tmask &= tmask - 1;
4285 | tl->f_mask &= ~(1ULL << n);
4286 | }
4287 |
4288 | return NULL;
4289 | }
4290 |
4291 | static void *slow_alloc_aux(atls *tl, size_t size)
4292 | {
4293 | size_t n;
4294 | unsigned tmask;
4295 |
4296 | btree *b;
4297 | dlist *d;
4298 | size_t rsize;
4299 |
4300 | /* Special case empty allocations */
4301 | if (size == 16)
4302 | {
4303 | b = small_del_first(tl);
4304 | if (b)
4305 | {
4306 | set_used(b, 16);
4307 | return &b->data;
4308 | }
4309 |
4310 | n = 1;
4311 | }
4312 | else
4313 | {
4314 | n = (size / 16) - 1;
4315 | }
4316 |
4317 | if (n < 32)
4318 | {
4319 | tmask = tl->q_mask & (~0 << n);
4320 |
4321 | /* Are there nodes big enough in the queues? */
4322 | while (tmask)
4323 | {
4324 | /* Ignore if bit unset */
4325 | n = ffsu(tmask);
4326 | d = &tl->qs[n];
4327 |
4328 | /* Found something? */
4329 | if (d->next != d)
4330 | {
4331 | b = list_entry(btree, list, d->next);
4332 |
4333 | /* Paranoia */
4334 | check_sep(b);
4335 |
4336 | dlist_del(&b->list2);
4337 |
4338 | rsize = (n + 1) * 16;
4339 | set_used(b, rsize);
4340 |
4341 | return split_node(tl, b, rsize, size);
4342 | }
4343 |
4344 | /*
4345 | * Turn off least significant bit in tmask, as nothing is left there
4346 | */
4347 | tmask &= tmask - 1;
4348 | tl->q_mask &= ~(1ULL << n);
4349 | }
4350 |
4351 | tmask = tl->q_mask >> 32;
4352 | }
4353 | else
4354 | {
4355 | tmask = (tl->q_mask >> 32) & (~0 << (n - 32));
4356 | }
4357 |
4358 | /* Are there nodes big enough in the queues? */
4359 | while (tmask)
4360 | {
4361 | /* Ignore if bit unset */
4362 | n = ffsu(tmask) + 32;
4363 | d = &tl->qs[n];
4364 |
4365 | /* Found something? */
4366 | if (d->next != d)
4367 | {
4368 | b = list_entry(btree, list, d->next);
4369 |
4370 | /* Paranoia */
4371 | check_sep(b);
4372 |
4373 | dlist_del(&b->list2);
4374 |
4375 | rsize = (n + 1) * 16;
4376 | set_used(b, rsize);
4377 |
4378 | return split_node(tl, b, rsize, size);
4379 | }
4380 |
4381 | /*
4382 | * Turn off least significant bit in tmask, as nothing is left there
4383 | */
4384 | tmask &= tmask - 1;
4385 | tl->q_mask &= ~(1ULL << n);
4386 | }
4387 |
4388 | /* Failed */
4389 | return NULL;
4390 | }
4391 |
4392 | #endif
4393 |
4394 | static void *block_alloc_aux(atls *tl, size_t size)
4395 | {
4396 | DECL_PROF_FUNC;
4397 |
4398 | btree *b, *br;
4399 | mealloc *ma;
4400 | size_t rsize, tasize;
4401 |
4402 | /* Make overhead 1/4th of total allocated after this allocation */
4403 | tasize = size + (size + tl->a_alloced) / 3;
4404 | tasize = page_align(tasize);
4405 |
4406 | /* Clip to BTMALLOC */
4407 | if (tasize > BTMALLOC) tasize = BTMALLOC;
4408 |
4409 | /* Must be more than MINALLOC */
4410 | if (tasize < MINALLOC) tasize = MINALLOC;
4411 |
4412 | ma = big_alloc_aux(tasize);
4413 |
4414 | if (!ma)
4415 | {
4416 | /* Try with smaller alloc */
4417 | tasize = page_align(size + HEADERSIZE);
4418 |
4419 | ma = big_alloc_aux(tasize);
4420 |
4421 | if (!ma)
4422 | {
4423 | /* Try again if new handler works */
4424 | if (handle_oom(size)) return slow_alloc(tl, size);
4425 |
4426 | return NULL;
4427 | }
4428 | }
4429 |
4430 | rsize = tasize - HEADERSIZE;
4431 |
4432 | /* Keep track of total allocations */
4433 | tl->a_alloced += tasize;
4434 |
4435 | /* Fill in header */
4436 | dlist_add(&tl->bl, &ma->m_list);
4437 |
4438 | ma->tail = &tl->tail;
4439 |
4440 | b = &ma->b;
4441 |
4442 | /* Create left seperator */
4443 | b->s.size = rsize;
4444 | b->s.bs_offset = 16;
4445 |
4446 | /* Position of right seperator */
4447 | br = shift(b, rsize);
4448 |
4449 | /* Create right seperator */
4450 | br->s.bs_offset = tasize - SEPSIZE;
4451 |
4452 | tl->callocable = 1;
4453 |
4454 | return split_node(tl, b, rsize, size);
4455 | }
4456 |
4457 | static void *block_alloc(atls *tl, size_t size)
4458 | {
4459 | DECL_PROF_FUNC;
4460 |
4461 | btree *b;
4462 | void *p;
4463 |
4464 | if (size >= BTMALLOC)
4465 | {
4466 | tl->callocable = 1;
4467 | return big_alloc(tl, size);
4468 | }
4469 |
4470 | if (size <= QS_MAX)
4471 | {
4472 | p = slow_alloc_aux(tl, size);
4473 | if (p) return p;
4474 |
4475 | /* Clear fast lists */
4476 | clear_fast(tl);
4477 |
4478 | p = slow_alloc_aux(tl, size);
4479 | if (p) return p;
4480 | }
4481 | else
4482 | {
4483 | /* Clear fast lists */
4484 | clear_fast(tl);
4485 | }
4486 |
4487 | /* Try to alloc on the btree */
4488 | b = btree_get(tl, size);
4489 | if (b) return split_node(tl, b, b->s.size, size);
4490 |
4491 | /* Try to grab space from a dead thread */
4492 | if (reap_dead(tl)) return local_alloc(tl, size);
4493 |
4494 | /* We need more space, so try to free memory. */
4495 | if (scan_queue(tl, &tl->head, size)) return slow_alloc(tl, size);
4496 |
4497 | #ifdef DEBUG_LEAK
4498 | leak_print(tl, "Trying to allocate %llu (%p) but cannot\n", (unsigned long long) size, (void *) size);
4499 | malloc_stats_aux(3);
4500 | #endif
4501 |
4502 | /* Failure - make a big alloc, and add to the btree */
4503 | return block_alloc_aux(tl, size);
4504 | }
4505 |
4506 |
4507 | static void *slow_alloc(atls *tl, size_t size)
4508 | {
4509 | DECL_PROF_FUNC;
4510 |
4511 | /* Fast allocation failed - try normal data structures */
4512 | if (size <= QS_MAX)
4513 | {
4514 | void *res = slow_alloc_aux(tl, size);
4515 | if (res) return res;
4516 | }
4517 |
4518 | return block_alloc(tl, size);
4519 | }
4520 |
4521 | /* Free with no memory usage */
4522 | static void free_nomem(void *p)
4523 | {
4524 | DECL_PROF_FUNC;
4525 |
4526 | btree *b = CONTAINER(btree, data, p);
4527 |
4528 | mealloc *m;
4529 |
4530 | slist *v, *tail;
4531 |
4532 | #ifdef DEBUG_ALLOC
4533 | /* Check for double-free errors */
4534 | if (un_used(b)) errx(1, "Double free with %p\n", p);
4535 | #endif
4536 |
4537 | /* Get block start */
4538 | m = read_bs(b);
4539 |
4540 | /* Treat node as a list now */
4541 | v = &b->list;
4542 |
4543 | v->next = NULL;
4544 |
4545 | /*
4546 | * Prevent other threads from dying because we have no hazard pointer
4547 | * This protects the dereference of m->tail
4548 | */
4549 | mutex_lock(&h_lock);
4550 |
4551 | /* Prepend the data */
4552 | tail = xchg_ptr(m->tail, v);
4553 |
4554 | /* Done */
4555 | mutex_unlock(&h_lock);
4556 |
4557 | tail->next = v;
4558 | }
4559 |
4560 | static noinline void free_aux(void *p)
4561 | {
4562 | DECL_PROF_FUNC;
4563 |
4564 | atls *tl = init_tls();
4565 | if (!tl)
4566 | {
4567 | free_nomem(p);
4568 | return;
4569 | }
4570 |
4571 | PREFIX(free)(p);
4572 | }
4573 |
4574 | static noinline void free_clear(atls *tl)
4575 | {
4576 | clear_fast(tl);
4577 | }
4578 |
4579 | void PREFIX(free)(void *p)
4580 | {
4581 | DECL_PROF_FUNC;
4582 |
4583 | btree *b;
4584 | size_t size;
4585 |
4586 | atls *tl;
4587 |
4588 | if (!p) return;
4589 |
4590 | tl = get_tls();
4591 |
4592 | if (!tl)
4593 | {
4594 | free_aux(p);
4595 | return;
4596 | }
4597 |
4598 | test_all(tl);
4599 |
4600 | if (likely(is_slab(p)))
4601 | {
4602 | slab_free(tl, p);
4603 |
4604 | test_all(tl);
4605 |
4606 | return;
4607 | }
4608 |
4609 | b = CONTAINER(btree, data, p);
4610 | size = b->s.size;
4611 |
4612 | #ifdef DEBUG_ALLOC
4613 | /* Check for double-free errors */
4614 | if (un_used(b)) errx(1, "Double free with %p\n", p);
4615 | #endif
4616 |
4617 | if (size)
4618 | {
4619 | /* Get block start */
4620 | mealloc *m = read_bs(b);
4621 |
4622 | /* My tail = a local node */
4623 | if (unlikely(m->tail != &tl->tail))
4624 | {
4625 |
4626 | /* Add to their queue, and let them deal with it */
4627 | prepend_queue(p, tl, &m->tail);
4628 |
4629 | return;
4630 | }
4631 |
4632 | /* Inlined version of fast_free() */
4633 | size = size2fl(size);
4634 | tl->f_mask |= 1ULL << size;
4635 | slist_add(&tl->fl[size], p);
4636 |
4637 | tl->fcount++;
4638 | if (!(tl->fcount & FREE_FAST)) free_clear(tl);
4639 |
4640 | test_all(tl);
4641 | }
4642 | else
4643 | {
4644 | big_free_aux(page_start(b));
4645 | }
4646 | }
4647 |
4648 | void cfree(void *p)
4649 | {
4650 | PREFIX(free)(p);
4651 | }
4652 |
4653 | static noinline void *malloc_aux(size_t size)
4654 | {
4655 | DECL_PROF_FUNC;
4656 |
4657 | atls *tl = init_tls();
4658 | if (!tl) return NULL;
4659 | return PREFIX(malloc)(size);
4660 | }
4661 |
4662 | void *PREFIX(malloc)(size_t size)
4663 | {
4664 | DECL_PROF_FUNC;
4665 |
4666 | void *res;
4667 | atls *tl;
4668 |
4669 | test_leak();
4670 |
4671 | /* Init local data if required */
4672 | tl = get_tls();
4673 |
4674 | if (!tl) return malloc_aux(size);
4675 |
4676 | test_all(tl);
4677 |
4678 | if (likely(size <= SB_MAX)) return slab_alloc(tl, size);
4679 |
4680 | /* Prevent overflow bug in sep_align() below */
4681 | if (unlikely(size > BTMALLOC)) return big_alloc(tl, size);
4682 |
4683 | size = sep_align(size);
4684 | res = fast_alloc(tl, size);
4685 | if (res) return res;
4686 |
4687 | return slow_alloc(tl, size);
4688 | }
4689 |
4690 | #ifdef DEBUG_ALLOC_SLOW
4691 | static void test_wiped(void *p, size_t len)
4692 | {
4693 | char *endy = &(((char *)p)[len - 8]);
4694 | char *y;
4695 |
4696 | if (!len) return;
4697 |
4698 | for (y = p; y < endy; y++)
4699 | {
4700 | if (*y) errx(1, "found non-zero\n");
4701 | }
4702 | }
4703 | #else
4704 | #define test_wiped(P, L) ((void) (sizeof(P) + sizeof(L)))
4705 | #endif
4706 |
4707 | static noinline void *zalloc_aux(size_t size)
4708 | {
4709 | atls *tl = init_tls();
4710 | if (!tl) return NULL;
4711 | return zalloc(tl, size);
4712 | }
4713 |
4714 | static void *zalloc(atls *tl, size_t size)
4715 | {
4716 | void *p;
4717 |
4718 | test_leak();
4719 | test_all(tl);
4720 |
4721 | if (likely(size <= SB_MAX)) return slab_zalloc(tl, size);
4722 |
4723 | /* Prevent overflow bug in sep_align() below */
4724 | if (unlikely(size > BTMALLOC)) return big_alloc(tl, size);
4725 |
4726 | size = sep_align(size);
4727 |
4728 | p = fast_alloc(tl, size);
4729 |
4730 | if (!p)
4731 | {
4732 | tl->callocable = 0;
4733 | p = slow_alloc(tl, size);
4734 |
4735 | /* No need to memset? */
4736 | if (!p || tl->callocable)
4737 | {
4738 | test_wiped(p, size);
4739 |
4740 | return p;
4741 | }
4742 | }
4743 |
4744 | test_all(tl);
4745 |
4746 | return memset(p, 0, size - 8);
4747 | }
4748 |
4749 | static size_t safemul(size_t n, size_t size)
4750 | {
4751 | #ifdef __x86_64__
4752 | #ifdef _MSC_VER
4753 | size_t highproduct;
4754 | size_t rsize = _mul128(n, size, &highproduct);
4755 | if (highproduct)
4756 | return TOP_SIZE + 1;
4757 | #else /* _MSC_VER */
4758 | /* 64 bit */
4759 | __uint128_t dn = n;
4760 | __uint128_t dsize = size;
4761 | __uint128_t drsize = dn*dsize;
4762 | size_t rsize = drsize;
4763 | if (drsize >> 64)
4764 | {
4765 | /* Overflow */
4766 | return TOP_SIZE + 1;
4767 | }
4768 | #endif /* _MSC_VER */
4769 | #else
4770 |
4771 | /* 32 bit */
4772 | u64b dn = n;
4773 | u64b dsize = size;
4774 | u64b drsize = dn*dsize;
4775 | size_t rsize = drsize;
4776 |
4777 | if (drsize >> 32)
4778 | {
4779 | /* Overflow */
4780 | return TOP_SIZE + 1;
4781 | }
4782 | #endif
4783 |
4784 | return rsize;
4785 | }
4786 |
4787 | void *PREFIX(calloc)(size_t n, size_t size)
4788 | {
4789 | DECL_PROF_FUNC;
4790 |
4791 | /* Init local data if required */
4792 | atls *tl = get_tls();
4793 |
4794 | size = safemul(n, size);
4795 |
4796 | test_leak();
4797 |
4798 | if (!tl) return zalloc_aux(size);
4799 |
4800 | return zalloc(tl, size);
4801 | }
4802 |
4803 | #ifdef WINDOWS
4804 | void *PREFIX(_calloc_impl)(size_t n, size_t size, int *errno_tmp)
4805 | {
4806 | DECL_PROF_FUNC;
4807 |
4808 | void *ret;
4809 | atls *tl;
4810 |
4811 | int errno_orig;
4812 | if (!errno_tmp) return PREFIX(calloc)(n, size);
4813 |
4814 | /* Init local data if required */
4815 | tl = get_tls();
4816 |
4817 | size = safemul(n, size);
4818 |
4819 | test_leak();
4820 |
4821 | if (!tl) return zalloc_aux(size);
4822 |
4823 | _get_errno(&errno_orig);
4824 |
4825 | ret = zalloc(tl, safemul(n, size));
4826 | _get_errno(errno_tmp);
4827 | _set_errno(errno_orig);
4828 |
4829 | return ret;
4830 | }
4831 | #endif
4832 |
4833 | static noinline void *realloc_aux(void *p, size_t size)
4834 | {
4835 | atls *tl = init_tls();
4836 |
4837 | /* Cannot allocate anything */
4838 | if (!tl) return NULL;
4839 |
4840 | return PREFIX(realloc)(p, size);
4841 | }
4842 |
4843 | static noinline void *realloc_aux2(void *p, size_t size, atls *tl)
4844 | {
4845 | btree *b = CONTAINER(btree, data, p);
4846 | size_t msize = b->s.size;
4847 |
4848 | size_t old_size;
4849 |
4850 | #ifdef DEBUG_ALLOC
4851 | if (un_used(b)) errx(1, "Realloc of unmalloced pointer %p\n", p);
4852 | #endif
4853 |
4854 | /* Was a big block? */
4855 | if (!msize)
4856 | {
4857 | #ifndef WINDOWS
4858 | size_t *np;
4859 | #endif
4860 | size_t *ps = page_start(b);
4861 |
4862 | size_t offset = (char *) b - (char *) ps;
4863 |
4864 | /* Get old size */
4865 | old_size = *ps;
4866 |
4867 | /* Don't bother resizing shrinks that are more than half the allocated size */
4868 | if ((old_size - offset <= size * 2) && (old_size - offset >= size)) return p;
4869 |
4870 | /* Resize to new big block if possible */
4871 | if (size >= BTMALLOC)
4872 | {
4873 | /* Align */
4874 | size = page_align(size + offset + offsetof(btree, data));
4875 |
4876 | #ifndef WINDOWS
4877 | /* Use (nonportable) mremap */
4878 | np = mremap(ps, old_size, size, MREMAP_MAYMOVE);
4879 |
4880 | /* Success? */
4881 | if (np != MAP_FAILED)
4882 | {
4883 | /* Save new size */
4884 | *np = size;
4885 |
4886 | /* Return new pointer */
4887 | return shift(np, offset + offsetof(btree, data));
4888 | }
4889 | #endif
4890 |
4891 | if (size < old_size)
4892 | {
4893 | #ifndef WINDOWS
4894 | if (!munmap(shift(ps, size), old_size - size))
4895 | {
4896 | /* Update size */
4897 | *ps = size;
4898 | }
4899 | #else
4900 | /*
4901 | * Say we no longer want the memory....
4902 | * But it is still mapped into our address space taking up room!
4903 | */
4904 | if (VirtualAlloc(shift(ps, size), old_size - size, MEM_RESET,
4905 | PAGE_NOACCESS))
4906 | {
4907 | /* Update size */
4908 | *ps = size;
4909 | }
4910 | #endif
4911 |
4912 | return p;
4913 | }
4914 | }
4915 | }
4916 | else
4917 | {
4918 | mealloc *m;
4919 |
4920 | /* Get old size */
4921 | old_size = msize;
4922 |
4923 | size = sep_align(size);
4924 |
4925 | /* Don't bother resizing shrinks that are more than half the allocated size */
4926 | if ((old_size <= size * 2) && (old_size >= size)) return p;
4927 |
4928 | m = read_bs(b);
4929 |
4930 | /* Local node? */
4931 | if (m->tail == &tl->tail)
4932 | {
4933 | btree *br;
4934 |
4935 | /* Easy case */
4936 | if (size <= msize) return split_node(tl, b, msize, size);
4937 |
4938 | /* Make sure adjacent nodes are in the btree */
4939 | clear_fast(tl);
4940 |
4941 | /* Medium or small size - try to merge */
4942 | br = shift(b, msize);
4943 | if (un_used(br))
4944 | {
4945 | if (br->s.bs_offset & FLG_SIZE8)
4946 | {
4947 | small_remove(br);
4948 |
4949 | /* Fixup sizes */
4950 | b->s.size += 16;
4951 | msize += 16;
4952 |
4953 | br = shift(br, 16);
4954 |
4955 | /* Set it as used */
4956 | br->s.bs_offset &= ~FLG_LUNUSED;
4957 | }
4958 | else
4959 | {
4960 | size_t rsize = br->s.size;
4961 | if (rsize)
4962 | {
4963 | if (rsize > QS_MAX)
4964 | {
4965 | btree_node_del(tl, br->parent, b_pindex(br));
4966 | }
4967 | else
4968 | {
4969 | dlist_del(&br->list2);
4970 | }
4971 |
4972 | /* Fixup sizes */
4973 | b->s.size += rsize;
4974 | msize += rsize;
4975 |
4976 | br = shift(br, rsize);
4977 |
4978 | /* Set it as used */
4979 | br->s.bs_offset &= ~FLG_LUNUSED;
4980 | }
4981 | }
4982 | }
4983 |
4984 | /* Region fits? */
4985 | if (size <= msize) return split_node(tl, b, msize, size);
4986 | }
4987 | else
4988 | {
4989 | /* We can only shrink a foreign node */
4990 | if (size <= msize)
4991 | {
4992 | /* Open coded split node */
4993 | btree *bm = shift(b, size);
4994 |
4995 | /* Update my size */
4996 | b->s.size = size;
4997 |
4998 | /* Create middle seperator */
4999 | set_sep(bm, old_size - size, b);
5000 |
5001 | /* Free the foreign excess */
5002 | prepend_queue((void *) &bm->data, tl, &m->tail);
5003 |
5004 | return p;
5005 | }
5006 | }
5007 | }
5008 |
5009 | /* Failure */
5010 | return NULL;
5011 | }
5012 |
5013 | /* Same as realloc(), but takes additional parameter 'actual_old_size' to eliminate overcopying */
5014 | void *PREFIX(exrealloc)(void *p, size_t size, size_t actual_old_size, size_t copy_offset)
5015 | {
5016 | DECL_PROF_FUNC;
5017 |
5018 | void *np;
5019 |
5020 | /* Init local data if required */
5021 | atls *tl = get_tls();
5022 |
5023 | int old_errno;
5024 |
5025 | test_leak();
5026 |
5027 | if (!tl) return realloc_aux(p, size);
5028 |
5029 | test_all(tl);
5030 |
5031 | /* realloc(p, 0) is the same as free(p) */
5032 | if (!size)
5033 | {
5034 | PREFIX(free)(p);
5035 |
5036 | return NULL;
5037 | }
5038 |
5039 | /* Relloc NULL is the same as malloc */
5040 | if (!p) return PREFIX(malloc)(size);
5041 |
5042 | /* Too large to allocate */
5043 | if (size > TOP_SIZE) goto nomem;
5044 |
5045 | if (!is_slab(p))
5046 | {
5047 | /* See if merging will work */
5048 | np = realloc_aux2(p, size, tl);
5049 |
5050 | if (np) return np;
5051 | }
5052 |
5053 | #ifdef WINDOWS
5054 | _get_errno(&old_errno);
5055 | #else
5056 | /* Failure - have to do it manually */
5057 | old_errno = errno;
5058 | #endif
5059 | np = PREFIX(malloc)(size);
5060 | if (!np)
5061 | {
5062 | /* Is original allocation still okay? */
5063 | if (size <= malloc_usable_size(p))
5064 | {
5065 | /* Don't set errno to be ENOMEM */
5066 | #ifdef WINDOWS
5067 | _set_errno(old_errno);
5068 | #else
5069 | errno = old_errno;
5070 | #endif
5071 |
5072 | /* Return old allocation */
5073 | return p;
5074 | }
5075 | goto nomem;
5076 | }
5077 |
5078 | /* Copy data */
5079 | if (size > actual_old_size) size = actual_old_size;
5080 | memcpy((unsigned char *) np + copy_offset, (unsigned char *) p + copy_offset, size - copy_offset);
5081 |
5082 | PREFIX(free)(p);
5083 |
5084 | /* Done */
5085 | return np;
5086 |
5087 | nomem:
5088 | set_enomem();
5089 | return NULL;
5090 | }
5091 |
5092 | void *PREFIX(realloc)(void *p, size_t size)
5093 | {
5094 | DECL_PROF_FUNC;
5095 |
5096 | void *np;
5097 |
5098 | size_t old_size;
5099 |
5100 | /* Init local data if required */
5101 | atls *tl = get_tls();
5102 |
5103 | int old_errno;
5104 |
5105 | test_leak();
5106 |
5107 | if (!tl) return realloc_aux(p, size);
5108 |
5109 | test_all(tl);
5110 |
5111 | /* realloc(p, 0) is the same as free(p) */
5112 | if (!size)
5113 | {
5114 | PREFIX(free)(p);
5115 |
5116 | return NULL;
5117 | }
5118 |
5119 | /* Relloc NULL is the same as malloc */
5120 | if (!p) return PREFIX(malloc)(size);
5121 |
5122 | /* Too large to allocate */
5123 | if (size > TOP_SIZE) goto nomem;
5124 |
5125 | if (!is_slab(p))
5126 | {
5127 | /* See if merging will work */
5128 | np = realloc_aux2(p, size, tl);
5129 |
5130 | if (np) return np;
5131 | }
5132 |
5133 | old_size = malloc_usable_size(p);
5134 |
5135 | #ifdef WINDOWS
5136 | _get_errno(&old_errno);
5137 | #else
5138 | /* Failure - have to do it manually */
5139 | old_errno = errno;
5140 | #endif
5141 |
5142 | np = PREFIX(malloc)(size);
5143 | if (!np)
5144 | {
5145 | /* Is original allocation still okay? */
5146 | if (size <= old_size)
5147 | {
5148 | /* Don't set errno to be ENOMEM */
5149 | #ifdef WINDOWS
5150 | _set_errno(old_errno);
5151 | #else
5152 | errno = old_errno;
5153 | #endif
5154 |
5155 | /* Return old allocation */
5156 | return p;
5157 | }
5158 | goto nomem;
5159 | }
5160 |
5161 | /* Copy data */
5162 | if (size > old_size) size = old_size;
5163 | memcpy(np, p, size);
5164 |
5165 | PREFIX(free)(p);
5166 |
5167 | /* Done */
5168 | return np;
5169 |
5170 | nomem:
5171 | set_enomem();
5172 | return NULL;
5173 | }
5174 |
5175 | #ifndef WINDOWS
5176 | static void unmap_range(void *x1, void *x2)
5177 | {
5178 | if (x1 != x2)
5179 | {
5180 | munmap(x1, (char *) x2 - (char *) x1);
5181 | }
5182 | }
5183 | #endif
5184 |
5185 | #ifdef DEBUG_ALLOC_SLOW
5186 | static void test_align(size_t align, void *p)
5187 | {
5188 | uintptr_t x = (uintptr_t) p;
5189 | if (align && (x & (align - 1))) errx(1, "Incorrect alignment of pointer\n");
5190 | }
5191 | #else
5192 | #define test_align(a, p) ((void) (sizeof(a) + sizeof(p)))
5193 | #endif
5194 |
5195 | static noinline void *aligned_alloc_aux(size_t align, size_t size)
5196 | {
5197 | atls *tl = init_tls();
5198 |
5199 | /* Cannot allocate anything! */
5200 | if (!tl) return NULL;
5201 |
5202 | return aligned_alloc(align, size);
5203 | }
5204 |
5205 | #ifdef WINDOWS
5206 | static void *aligned_alloc_aux2(size_t align, size_t size, size_t rsize)
5207 | {
5208 | size_t psize = page_align(rsize + PAGESIZE);
5209 | size_t *ps;
5210 |
5211 | (void) size;
5212 |
5213 | if (align > PAGESIZE) goto nomem;
5214 | if (rsize > TOP_SIZE) goto nomem;
5215 |
5216 | ps = VirtualAlloc(NULL, rsize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
5217 | if (ps == MAP_FAILED) goto nomem;
5218 |
5219 | /* Small alignment */
5220 | *ps = psize;
5221 | ps = shift(ps, PAGESIZE);
5222 |
5223 | test_align(align, ps);
5224 |
5225 | return ps;
5226 |
5227 | nomem:
5228 | set_enomem();
5229 | return NULL;
5230 | }
5231 | #else
5232 | static void *aligned_alloc_aux2(size_t align, size_t size, size_t rsize)
5233 | {
5234 | size_t pssize = page_align(size + PAGESIZE);
5235 | size_t psize = page_align(rsize + PAGESIZE);
5236 | size_t lsize;
5237 | int flags = MAP_PRIVATE | MAP_ANONYMOUS;
5238 |
5239 | size_t *lstart, *lend;
5240 |
5241 | size_t *ps;
5242 | void *p;
5243 |
5244 | if (rsize > TOP_SIZE) goto nomem;
5245 |
5246 | /*
5247 | * Hack - large alignments require no reservation,
5248 | * otherwise we run out of memory
5249 | */
5250 | if (align > size) flags |= MAP_NORESERVE;
5251 |
5252 | ps = mmap(NULL, psize, PROT_READ | PROT_WRITE, flags, -1, 0);
5253 |
5254 | /* Out of memory */
5255 | if (ps == MAP_FAILED) goto nomem;
5256 |
5257 | /* Small alignment */
5258 | if (align <= PAGESIZE)
5259 | {
5260 | *ps = psize;
5261 | p = shift(ps, PAGESIZE);
5262 |
5263 | test_align(align, p);
5264 |
5265 | return p;
5266 | }
5267 |
5268 | /* Large alignment */
5269 | lstart = ps;
5270 | lsize = (-(uintptr_t) ps) & (align - 1);
5271 |
5272 | /* Already aligned - need to shift to get sep+size at beginning */
5273 | if (!lsize)
5274 | {
5275 | ps = shift(ps, align - PAGESIZE);
5276 |
5277 | /* Fragment at beginning to unmap */
5278 | unmap_range(lstart, ps);
5279 |
5280 | *ps = pssize;
5281 | p = shift(ps, PAGESIZE);
5282 |
5283 | test_align(align, p);
5284 | return p;
5285 | }
5286 |
5287 | lend = shift(ps, rsize);
5288 | ps = shift(ps, lsize - PAGESIZE);
5289 |
5290 | /* Fragment at beginning to unmap */
5291 | unmap_range(lstart, ps);
5292 | *ps = pssize;
5293 | p = shift(ps, PAGESIZE);
5294 |
5295 | lstart = shift(p, pssize);
5296 |
5297 | /* Fragment at end to unmap */
5298 | unmap_range(lstart, lend);
5299 |
5300 | test_align(align, p);
5301 | return p;
5302 |
5303 | nomem:
5304 | set_enomem();
5305 | return NULL;
5306 | }
5307 | #endif
5308 |
5309 | void *aligned_alloc(size_t align, size_t size)
5310 | {
5311 | DECL_PROF_FUNC;
5312 |
5313 | size_t rsize, lsize;
5314 |
5315 | void *p;
5316 |
5317 | btree *b;
5318 |
5319 | atls *tl = get_tls();
5320 |
5321 | test_leak();
5322 |
5323 | /* Too large to allocate */
5324 | if (size > TOP_SIZE) goto nomem;
5325 |
5326 | if (align <= SEPSIZE) return PREFIX(malloc)(size);
5327 |
5328 | if (!tl) return aligned_alloc_aux(align, size);
5329 |
5330 | /* Try to cache-line align via slab special case */
5331 | if ((size <= 64) && (align <= 64))
5332 | {
5333 | p = slab_alloc(tl, 64);
5334 | if (p)
5335 | {
5336 | /* Double-check alignment as slab_alloc may fall-back in low mem */
5337 | if (!(63 & (uintptr_t) p)) return p;
5338 | local_free(tl, p);
5339 | }
5340 | }
5341 |
5342 | size = sep_align(size);
5343 | rsize = sep_align(size + align);
5344 |
5345 | /* Check for overflow */
5346 | if ((rsize <= size) || (rsize <= align)) goto nomem;
5347 |
5348 | #ifdef WINDOWS
5349 | /* Large allocations are special */
5350 | if (rsize >= BTMALLOC)
5351 | {
5352 | return aligned_alloc_aux2(align, size, rsize);
5353 | }
5354 | #else
5355 | /* Large allocations are special */
5356 | if (rsize >= BTMALLOC)
5357 | {
5358 | return aligned_alloc_aux2(align, size, rsize);
5359 | }
5360 | #endif
5361 |
5362 | while (1)
5363 | {
5364 | p = fast_alloc(tl, rsize);
5365 | if (p) break;
5366 |
5367 | if (rsize < QS_MAX)
5368 | {
5369 | p = slow_alloc_aux(tl, rsize);
5370 | if (p) break;
5371 |
5372 | /* Clear fast lists */
5373 | clear_fast(tl);
5374 |
5375 | p = slow_alloc_aux(tl, rsize);
5376 | if (p) break;
5377 | }
5378 | else
5379 | {
5380 | /* Clear fast lists */
5381 | clear_fast(tl);
5382 | }
5383 |
5384 | /* Try to alloc on the btree */
5385 | b = btree_get(tl, rsize);
5386 | if (b)
5387 | {
5388 | p = split_node(tl, b, b->s.size, rsize);
5389 | break;
5390 | }
5391 |
5392 | /* Try to grab space from a dead thread */
5393 | if (reap_dead(tl)) continue;
5394 |
5395 | /* We need more space, so try to free memory. */
5396 | if (scan_queue(tl, &tl->head, rsize)) continue;
5397 |
5398 | /* Everything failed - fall back to large allocation */
5399 | return aligned_alloc_aux2(align, size, rsize);
5400 | }
5401 |
5402 | lsize = (-(uintptr_t) p) & (align - 1);
5403 |
5404 | b = CONTAINER(btree, data, p);
5405 |
5406 | #ifdef DEBUG_ALLOC_SLOW
5407 | if (rsize > b->s.size) errx(1, "node received is too small\n");
5408 | #endif
5409 |
5410 | /* get real size allocated */
5411 | rsize = b->s.size;
5412 |
5413 | /* Already aligned? */
5414 | if (!lsize)
5415 | {
5416 | test_align(align, &b->data);
5417 |
5418 | /* Split off part we need */
5419 | return split_node(tl, b, rsize, size);
5420 | }
5421 |
5422 | b = split_node_rhs(tl, b, rsize, lsize);
5423 | test_align(align, &b->data);
5424 |
5425 | return split_node(tl, b, rsize - lsize, size);
5426 |
5427 | nomem:
5428 | set_enomem();
5429 | return NULL;
5430 | }
5431 |
5432 | /* This is now just a wrapper around the ISO C11 aligned_alloc() function */
5433 | void *memalign(size_t align, size_t size)
5434 | {
5435 | return aligned_alloc(align, size);
5436 | }
5437 |
5438 | int posix_memalign(void **p, size_t align, size_t size)
5439 | {
5440 | /* Make sure power of two and greater than sizeof(void *) */
5441 | #ifdef __x86_64__
5442 | if (align & ((align - 1) | 7))
5443 | {
5444 | *p = NULL;
5445 | return EINVAL;
5446 | }
5447 | #else
5448 | if (align & ((align - 1) | 3))
5449 | {
5450 | *p = NULL;
5451 | return EINVAL;
5452 | }
5453 | #endif
5454 |
5455 | *p = aligned_alloc(align, size);
5456 |
5457 | if (!*p) return ENOMEM;
5458 |
5459 | return 0;
5460 | }
5461 |
5462 | void *valloc(size_t size)
5463 | {
5464 | return aligned_alloc(PAGESIZE, size);
5465 | }
5466 |
5467 | void *pvalloc(size_t size)
5468 | {
5469 | return aligned_alloc(PAGESIZE, page_align(size));
5470 | }
5471 |
5472 | #ifdef WINDOWS
5473 | static
5474 | #endif
5475 | size_t malloc_usable_size(void *p)
5476 | {
5477 | size_t offset;
5478 | size_t *ps;
5479 | size_t size;
5480 |
5481 | DECL_PROF_FUNC;
5482 |
5483 | btree *b = CONTAINER(btree, data, p);
5484 |
5485 | /* Don't crash on a NULL pointer */
5486 | if (!p) return 0;
5487 |
5488 | /* Handle slab allocations */
5489 | if (is_slab(p))
5490 | {
5491 | sbheader *sb = slab_start(p);
5492 | return sb->size;
5493 | }
5494 |
5495 | size = b->s.size;
5496 |
5497 | /* Small allocation */
5498 | if (size) return size - PTRSIZE;
5499 |
5500 | /* Large allocation */
5501 | ps = page_start(b);
5502 | offset = (uintptr_t) &b->data - (uintptr_t) ps;
5503 |
5504 | return *ps - offset;
5505 | }
5506 |
5507 | #ifdef WINDOWS
5508 | #ifdef PREFIX
5509 | size_t __pure PREFIX(_msize)(void *p)
5510 | #else /* !PREFIX */
5511 | __attribute__((dllimport)) size_t _msize(void *p)
5512 | #endif /* PREFIX */
5513 | {
5514 | return malloc_usable_size(p);
5515 | }
5516 | #endif
5517 |
5518 | struct mallinfo mallinfo(void)
5519 | {
5520 | atls *tl = get_tls();
5521 | struct mallinfo mi = {0,0,0,0,0,0,0,0,0,0};
5522 |
5523 | dlist *d;
5524 | slist *s;
5525 |
5526 | int i;
5527 |
5528 | btree *b;
5529 |
5530 | size_t size;
5531 |
5532 | mi.arena = sbrk_size;
5533 |
5534 | if (!tl)
5535 | {
5536 | tl = init_tls();
5537 |
5538 | /* Cannot allocate anything, just return arena count */
5539 | if (!tl) return mi;
5540 | }
5541 |
5542 | /* Scan slab */
5543 | for (i = 0; i < NUM_SB; i++)
5544 | {
5545 | scan_list(&tl->slab[i], d)
5546 | {
5547 | mi.smblks++;
5548 | mi.usmblks++;
5549 | }
5550 | }
5551 |
5552 | scan_list(&tl->slab_full, d)
5553 | {
5554 | mi.smblks++;
5555 | mi.usmblks++;
5556 | }
5557 |
5558 | if (tl->slab_chunk)
5559 | {
5560 | mi.fsmblks = 1 + tl->slab_chunk->count;
5561 | mi.smblks += mi.fsmblks;
5562 | }
5563 |
5564 | /* Scan dlists */
5565 | for (i = 1; i < NUM_QS; i++)
5566 | {
5567 | scan_list(&tl->qs[i], d)
5568 | {
5569 | mi.ordblks++;
5570 |
5571 | b = CONTAINER(btree, list, d);
5572 | size = b->s.size - PTRSIZE;
5573 | mi.fordblks += size;
5574 | }
5575 | }
5576 |
5577 | /* Add in results from small list */
5578 | for (b = small_next((btree *) &tl->qs[0]); b != (btree *) &tl->qs[0]; b = small_next(b))
5579 | {
5580 | mi.ordblks++;
5581 | mi.fordblks += 8;
5582 | }
5583 |
5584 | /* Scan fastlists */
5585 | for (i = 0; i < NUM_FL; i++)
5586 | {
5587 | scan_slist(&tl->fl[i], s)
5588 | {
5589 | mi.ordblks++;
5590 |
5591 | b = CONTAINER(btree, list, s);
5592 | size = b->s.size - PTRSIZE;
5593 | mi.fordblks += size;
5594 | }
5595 | }
5596 |
5597 | /* Count memory blocks */
5598 | scan_list(&tl->bl, d)
5599 | {
5600 | mi.hblks++;
5601 | }
5602 |
5603 | /* Count btree nodes */
5604 | mi.hblkhd = count_btree(&tl->bheap);
5605 |
5606 | /* Count btree space */
5607 | mi.fordblks += count_btree_space(&tl->bheap);
5608 |
5609 | /* Total allocated space (including overhead of seperators and atls) */
5610 | mi.uordblks = tl->a_alloced - mi.fordblks + PAGESIZE;
5611 |
5612 | /* Total easily callocable region */
5613 | mi.keepcost = 0;
5614 |
5615 | /* Done */
5616 | return mi;
5617 | }
5618 |
5619 | int malloc_trim(size_t pad)
5620 | {
5621 | atls *tl = get_tls();
5622 |
5623 | /* Nothing allocated - do nothing */
5624 | if (!tl) return 1;
5625 |
5626 | /* Clear incoming frees */
5627 | scan_queue(tl, &tl->head, 0);
5628 |
5629 | /* Hack - ignore pad - and just free as much as possible */
5630 | clear_fast(tl);
5631 |
5632 | (void) pad;
5633 |
5634 | /* Always return success */
5635 | return 1;
5636 | }
5637 |
5638 | int mallopt(int param, int val)
5639 | {
5640 | /* Ignore parameters */
5641 | (void) param;
5642 | (void) val;
5643 |
5644 | /* Just return success - we don't have any parameters to modify */
5645 | return 1;
5646 | }
5647 |
5648 | #ifdef DEBUG_LEAK
5649 | static int btree_print(atls *tl, btree *b)
5650 | {
5651 | int i;
5652 | btree *bc;
5653 |
5654 | int count = 1;
5655 |
5656 | if (b_leaf(b))
5657 | {
5658 | leak_print(tl, "%u\n", b->s.size);
5659 | return 0;
5660 | }
5661 |
5662 | leak_print(tl, "Btree: %p\n", (void *) b);
5663 |
5664 | for (i = b_start(b); i; i = b_next(b, i))
5665 | {
5666 | bc = b_ptr(b, i);
5667 | leak_print(tl, "link %p\n", (void *) bc);
5668 | count += btree_print(tl, bc);
5669 | }
5670 |
5671 | return count;
5672 | }
5673 | #endif
5674 |
5675 | #ifndef WINDOWS
5676 |
5677 | static void mem_slab(void)
5678 | {
5679 | int i;
5680 | int count;
5681 | dlist *d;
5682 |
5683 | atls *tl = get_tls();
5684 | if (!tl) return;
5685 |
5686 | leak_print(tl, "Total Slab Virtual: %llu\n", (unsigned long long) sbrk_size);
5687 |
5688 | for (i = 0; i < NUM_SB; i++)
5689 | {
5690 | if (dlist_empty(&tl->slab[i])) continue;
5691 |
5692 | count = 0;
5693 | scan_list(&tl->slab[i], d)
5694 | {
5695 | count++;
5696 | }
5697 | leak_print(tl, "Partial slab %d used: %lld\n", i * 16, count * 65536ULL);
5698 | }
5699 |
5700 | if (!dlist_empty(&tl->slab_full))
5701 | {
5702 | count = 0;
5703 | scan_list(&tl->slab_full, d)
5704 | {
5705 | count++;
5706 | }
5707 | leak_print(tl, "Full slab used: %lld\n", count * 65536ULL);
5708 | }
5709 |
5710 | if (tl->slab_chunk)
5711 | {
5712 | leak_print(tl, "Local free slabs: %lld\n", (tl->slab_chunk->count + 1) * 65536LL);
5713 | }
5714 | else
5715 | {
5716 | leak_print(tl, "Local free slabs: 0\n");
5717 | }
5718 | }
5719 |
5720 | #ifdef DEBUG_LEAK
5721 | static void mem_big(void)
5722 | {
5723 | int i;
5724 |
5725 | /* If vsnprintf allocates, we may have a problem... */
5726 | mutex_lock(&l_lock);
5727 |
5728 | for (i = 0; i < LEAK_MAX; i++)
5729 | {
5730 | if (big_leak[i].p)
5731 | {
5732 | leak_print(get_tls(), "big block %p: %llu\n", big_leak[i].p, (unsigned long long) big_leak[i].size);
5733 | }
5734 | }
5735 |
5736 | mutex_unlock(&l_lock);
5737 | }
5738 | #endif
5739 |
5740 | static void malloc_stats_aux(int show_nodes)
5741 | {
5742 | atls *tl = get_tls();
5743 |
5744 | dlist *d;
5745 | btree *b;
5746 |
5747 | size_t size;
5748 | size_t tsize = 0;
5749 | size_t asize = 0;
5750 |
5751 | /* Nothing allocated - print nothing */
5752 | if (!tl) return;
5753 |
5754 | clear_fast(tl);
5755 |
5756 | scan_list(&tl->bl, d)
5757 | {
5758 | mealloc *m = list_entry(mealloc, m_list, d);
5759 |
5760 | size = big_block_size(m);
5761 |
5762 | if (size)
5763 | {
5764 | leak_print(tl, "Block: %p %llu\n", (void *) m, (unsigned long long) size);
5765 | }
5766 |
5767 | /* Scan seps for this block */
5768 | for (b = &m->b;; b = shift(b, size))
5769 | {
5770 | if (b->s.bs_offset & FLG_SIZE8)
5771 | {
5772 | size = 16;
5773 | }
5774 | else
5775 | {
5776 | size = b->s.size;
5777 | }
5778 |
5779 | if (!size) break;
5780 |
5781 | tsize += size;
5782 |
5783 | if (un_used(b))
5784 | {
5785 | if (show_nodes) leak_print(tl, " %p\n", (void *) size);
5786 | }
5787 | else
5788 | {
5789 | if (show_nodes) leak_print(tl, "* %p\n", (void *) size);
5790 | asize += size;
5791 | }
5792 | }
5793 | }
5794 |
5795 | leak_print(tl, "Total in btree %llu, total alloced %llu\n", (unsigned long long) tsize, (unsigned long long) asize);
5796 |
5797 | #ifdef DEBUG_LEAK
5798 | if (show_nodes & 2)
5799 | {
5800 | int count = btree_print(tl, &tl->bheap);
5801 |
5802 | leak_print(tl, "b_cnt = %d, b_hgt = %d, total = %d\n", tl->b_cnt, tl->b_hgt, count);
5803 | }
5804 |
5805 | mutex_lock(&h_lock);
5806 | size = 0;
5807 | scan_list(&h_list, d)
5808 | {
5809 | size++;
5810 | }
5811 | mutex_unlock(&h_lock);
5812 | leak_print(tl, "%d threads\n", (int) size);
5813 |
5814 | mem_big();
5815 | #endif
5816 | mem_slab();
5817 | }
5818 |
5819 | void malloc_stats(void)
5820 | {
5821 | malloc_stats_aux(0);
5822 | }
5823 | #endif
5824 |
5825 | static void **ialloc_fallback(atls *tl, size_t n, size_t *sizes, void **chunks, int clear)
5826 | {
5827 | size_t i;
5828 | void **out;
5829 |
5830 | /* Get storage for pointers */
5831 | if (!chunks)
5832 | {
5833 | out = local_alloc(tl, sep_align(sizeof(void *) * n));
5834 | if (!out) return NULL;
5835 | }
5836 | else
5837 | {
5838 | out = chunks;
5839 | }
5840 |
5841 | /* Do it manually */
5842 | if (clear)
5843 | {
5844 | for (i = 0; i < n; i++)
5845 | {
5846 | out[i] = zalloc(tl, sizes[0]);
5847 | if (!out[i]) goto fail;
5848 | }
5849 | }
5850 | else
5851 | {
5852 | for (i = 0; i < n; i++)
5853 | {
5854 | out[i] = local_alloc(tl, sizes[i]);
5855 | if (!out[i]) goto fail;
5856 | }
5857 | }
5858 |
5859 | return out;
5860 |
5861 | fail:
5862 | for (n = 0; n < i; n++)
5863 | {
5864 | PREFIX(free)(out[n]);
5865 | }
5866 |
5867 | if (!chunks) PREFIX(free)(out);
5868 |
5869 | return NULL;
5870 | }
5871 |
5872 | static void **ialloc(atls *tl, size_t n, size_t *sizes, void **chunks, int clear)
5873 | {
5874 | size_t i;
5875 |
5876 | size_t nsize;
5877 | size_t total_size = 0;
5878 |
5879 | void *p;
5880 | btree *b, *br;
5881 | unsigned offset;
5882 |
5883 | void **out;
5884 |
5885 | test_all(tl);
5886 |
5887 | test_leak();
5888 |
5889 | /* Zero sized array? */
5890 | if (!n)
5891 | {
5892 | if (chunks) return chunks;
5893 |
5894 | return PREFIX(malloc)(0);
5895 | }
5896 |
5897 | /* Get total size to allocate */
5898 | if (clear)
5899 | {
5900 | total_size = safemul(sep_align(sizes[0]), n);
5901 |
5902 | /* Overflow */
5903 | if (total_size >= TOP_SIZE)
5904 | {
5905 | set_enomem();
5906 | return NULL;
5907 | }
5908 | }
5909 | else
5910 | {
5911 | for (i = 0; i < n; i++)
5912 | {
5913 | nsize = sep_align(sizes[i]);
5914 | total_size += nsize;
5915 |
5916 | /* Overflow */
5917 | if (total_size < nsize)
5918 | {
5919 | set_enomem();
5920 | return NULL;
5921 | }
5922 | }
5923 | }
5924 |
5925 | if (clear) tl->callocable = 0;
5926 |
5927 | while (1)
5928 | {
5929 | p = fast_alloc(tl, total_size);
5930 | if (p) break;
5931 |
5932 | if (total_size < QS_MAX) p = slow_alloc(tl, total_size);
5933 | if (p) break;
5934 |
5935 | /* Too large to allocate normally */
5936 | if (total_size >= BTMALLOC) return ialloc_fallback(tl, n, sizes, chunks, clear);
5937 |
5938 | /* Clear fast lists */
5939 | clear_fast(tl);
5940 |
5941 | /* Try to alloc on the btree */
5942 | b = btree_get(tl, total_size);
5943 | if (b)
5944 | {
5945 | p = split_node(tl, b, b->s.size, total_size);
5946 | break;
5947 | }
5948 |
5949 | /* Try to grab space from a dead thread */
5950 | if (reap_dead(tl)) continue;
5951 |
5952 | /* We need more space, so try to free memory. */
5953 | if (scan_queue(tl, &tl->head, total_size)) continue;
5954 |
5955 | /* Try to allocate a new block */
5956 | p = block_alloc_aux(tl, total_size);
5957 | if (p) break;
5958 |
5959 | /* Everything failed - fall back to individual allocations */
5960 | return ialloc_fallback(tl, n, sizes, chunks, clear);
5961 | }
5962 |
5963 | b = CONTAINER(btree, data, p);
5964 |
5965 | /* Get real total size */
5966 | total_size = b->s.size;
5967 | offset = b->s.bs_offset & ~15;
5968 |
5969 | /* Do we need to clear it? */
5970 | if (clear && !tl->callocable) memset(p, 0, total_size - 8);
5971 |
5972 | /* Get storage for pointers */
5973 | if (!chunks)
5974 | {
5975 | out = local_alloc(tl, sep_align(sizeof(void *) * n));
5976 |
5977 | if (!out)
5978 | {
5979 | PREFIX(free)(p);
5980 | return NULL;
5981 | }
5982 | }
5983 | else
5984 | {
5985 | out = chunks;
5986 | }
5987 |
5988 | for (i = 0; i < n; i++)
5989 | {
5990 | out[i] = p;
5991 |
5992 | if (clear)
5993 | {
5994 | nsize = sep_align(sizes[0]);
5995 | }
5996 | else
5997 | {
5998 | nsize = sep_align(sizes[i]);
5999 | }
6000 | total_size -= nsize;
6001 |
6002 | /* Update local size */
6003 | b->s.size = nsize;
6004 |
6005 | p = shift(p, nsize);
6006 | br = CONTAINER(btree, data, p);
6007 |
6008 | /* Create offset part of right seperator */
6009 | offset += nsize;
6010 | if (i != n - 1) br->s.bs_offset = offset;
6011 |
6012 | b = br;
6013 | }
6014 |
6015 | /* Nothing left - then we are done */
6016 | if (!total_size)
6017 | {
6018 | test_all(tl);
6019 | return out;
6020 | }
6021 |
6022 | /* Resize last element to have the slack */
6023 | p = out[n - 1];
6024 | b = CONTAINER(btree, data, p);
6025 |
6026 | b->s.size += total_size;
6027 | check_sep(b);
6028 |
6029 | /* How big is last allocation? */
6030 | if (clear)
6031 | {
6032 | nsize = sep_align(sizes[0]);
6033 | }
6034 | else
6035 | {
6036 | nsize = sep_align(sizes[n - 1]);
6037 | }
6038 |
6039 | /* Split off excess if too much */
6040 | split_node(tl, b, b->s.size, nsize);
6041 |
6042 | test_all(tl);
6043 | return out;
6044 | }
6045 |
6046 | static noinline void **ialloc_aux(size_t n, size_t *sizes, void **chunks, int clear)
6047 | {
6048 | atls *tl = init_tls();
6049 | if (!tl) return NULL;
6050 |
6051 | return ialloc(tl, n, sizes, chunks, clear);
6052 | }
6053 |
6054 | void **independent_calloc(size_t n, size_t size, void **chunks)
6055 | {
6056 | atls *tl = get_tls();
6057 |
6058 | if (!tl) return ialloc_aux(n, &size, chunks, 1);
6059 |
6060 | return ialloc(tl, n, &size, chunks, 1);
6061 | }
6062 |
6063 | void **independent_comalloc(size_t n, size_t *sizes, void **chunks)
6064 | {
6065 | atls *tl = get_tls();
6066 |
6067 | if (!tl) return ialloc_aux(n, sizes, chunks, 0);
6068 |
6069 | return ialloc(tl, n, sizes, chunks, 0);
6070 | }
6071 |
6072 | #ifndef WINDOWS
6073 | void *malloc_get_state(void)
6074 | {
6075 | abort();
6076 |
6077 | return NULL;
6078 | }
6079 |
6080 | int malloc_set_state(void *p)
6081 | {
6082 | (void) p;
6083 | abort();
6084 |
6085 | return 0;
6086 | }
6087 | #endif
6088 |
6089 |
6090 | #ifdef WINDOWS
6091 | #ifdef PREFIX
6092 | void *PREFIX(_expand)(void *p, size_t size)
6093 | #else /* PREFIX */
6094 | __attribute__((dllimport)) void *_expand(void *p, size_t size)
6095 | #endif /* PREFIX */
6096 | {
6097 | DECL_PROF_FUNC;
6098 |
6099 | atls *tl = get_tls();
6100 |
6101 | /* paranoia */
6102 | if (!p) return NULL;
6103 |
6104 | /* Handle expansion into already allocated memory */
6105 | if (malloc_usable_size(p) <= size) return p;
6106 |
6107 | /* Don't handle slab allocations */
6108 | if (is_slab(p)) return NULL;
6109 |
6110 | /* Cannot expand a block created by someone else */
6111 | if (!tl) goto nomem;
6112 |
6113 | p = realloc_aux2(p, size, tl);
6114 |
6115 | /* Did it work? */
6116 | if (malloc_usable_size(p) >= size) return p;
6117 |
6118 | nomem:
6119 | set_enomem();
6120 | return NULL;
6121 | }
6122 |
6123 | /* Nolock functions call their normal functions */
6124 | void PREFIX(_free_nolock)(void *p)
6125 | {
6126 | PREFIX(free)(p);
6127 | }
6128 |
6129 | void *PREFIX(_realloc_nolock)(void *p, size_t size)
6130 | {
6131 | return PREFIX(realloc)(p, size);
6132 | }
6133 |
6134 | void *PREFIX(_calloc_nolock)(size_t n, size_t size)
6135 | {
6136 | return PREFIX(calloc)(n, size);
6137 | }
6138 |
6139 | size_t __pure PREFIX(_msize_nolock)(void *p)
6140 | {
6141 | return malloc_usable_size(p);
6142 | }
6143 |
6144 | #endif
6145 |
--------------------------------------------------------------------------------
/src/ll_asm.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009, 2010, 2011 Lockless Inc., Steven Von Fuerst.
3 | *
4 | * This library is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This library is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | /*
19 | * Functions that require asm for efficiency, or to work at all...
20 | */
21 |
22 | #ifndef LL_ASM_H
23 | #define LL_ASM_H
24 | #include "compiler.h"
25 | #ifdef GCC_ASM
26 | #include
27 | #define atomic_or(P, V) __sync_or_and_fetch((P), (V))
28 | #define atomic_and(P, V) __sync_and_and_fetch((P), (V))
29 | #define atomic_add(P, V) __sync_add_and_fetch((P), (V))
30 | #define atomic_xadd(P, V) __sync_fetch_and_add((P), (V))
31 | #define atomic_cmpxchg_bool(P, O, N) __sync_bool_compare_and_swap((P), (O), (N))
32 | #define atomic_access(V) (*(volatile typeof(V) *)&(V))
33 |
34 | static inline unsigned char bts(volatile void *mem, size_t offset)
35 | {
36 | unsigned char result;
37 | asm (
38 | "lock; bts %2, %1; setc %0;"
39 | : "=r" (result), "+m" (* (volatile long *) mem)
40 | : "r" (offset)
41 | : "cc");
42 | return result;
43 | }
44 |
45 | static inline unsigned char btr(volatile void *mem, size_t offset)
46 | {
47 | unsigned char result;
48 | asm (
49 | "lock; btr %2, %1; setc %0;"
50 | : "=r" (result), "+m" (* (volatile long *) mem)
51 | : "r" (offset)
52 | : "cc");
53 | return result;
54 | }
55 |
56 | static inline int ffsu(unsigned x)
57 | {
58 | int result;
59 |
60 | asm ("bsf %[x], %[result]"
61 | : [result] "=r" (result)
62 | : [x] "mr" (x)
63 | :"cc");
64 |
65 | return result;
66 | }
67 |
68 | static inline size_t flsu(unsigned x)
69 | {
70 | size_t result;
71 |
72 | asm ("bsr %[x], %[result]"
73 | : [result] "=r" (result)
74 | : [x] "mr" (x)
75 | :"cc");
76 |
77 | return result;
78 | }
79 |
80 | #ifdef __x86_64__
81 | static inline size_t ffsq(size_t x)
82 | {
83 | size_t result;
84 |
85 | asm ("bsfq %[x], %[result]"
86 | : [result] "=r" (result)
87 | : [x] "mr" (x)
88 | :"cc");
89 |
90 | return result;
91 | }
92 |
93 | static inline size_t flsq(size_t x)
94 | {
95 | size_t result;
96 |
97 | asm ("bsrq %[x], %[result]"
98 | : [result] "=r" (result)
99 | : [x] "mr" (x)
100 | :"cc");
101 |
102 | return result;
103 | }
104 |
105 | #else
106 | static inline size_t ffsq(unsigned long long x)
107 | {
108 | size_t result;
109 |
110 | unsigned xlo = x & 0xffffffff;
111 | unsigned xhi = x >> 32;
112 |
113 | unsigned tmp;
114 |
115 | asm ("bsfl %[xhi], %[tmp]\n"
116 | "addl $0x20, %[tmp]\n"
117 | "bsfl %[xlo], %[result]\n"
118 | "cmove %[tmp], %[result]\n"
119 | :[result] "=r" (result), [tmp] "=&r" (tmp)
120 | :[xlo] "rm" (xlo), [xhi] "rm" (xhi)
121 | :"cc");
122 |
123 | return result;
124 | }
125 |
126 | static inline size_t flsq(unsigned long long x)
127 | {
128 | size_t result;
129 |
130 | unsigned xlo = x & 0xffffffff;
131 | unsigned xhi = x >> 32;
132 | unsigned tmp;
133 |
134 | asm ("bsrl %[xlo], %[tmp]\n"
135 | "addl $-0x20, %[tmp]\n"
136 | "bsrl %[xhi], %[result]\n"
137 | "cmove %[tmp], %[result]\n"
138 | "addl $0x20, %[result]\n"
139 | :[result] "=r" (result), [tmp] "=&r" (tmp)
140 | :[xlo] "rm" (xlo), [xhi] "rm" (xhi)
141 | :"cc");
142 |
143 | return result;
144 | }
145 |
146 | #endif
147 |
148 | static inline unsigned char xchg_8(void *ptr, unsigned char x)
149 | {
150 | asm volatile("xchgb %0,%1"
151 | :"=r" (x)
152 | :"m" (*(volatile unsigned char *)ptr), "0" (x)
153 | :"memory");
154 |
155 | return x;
156 | }
157 |
158 | static inline unsigned short xchg_16(void *ptr, unsigned short x)
159 | {
160 | asm volatile("xchgw %0,%1"
161 | :"=r" (x)
162 | :"m" (*(volatile unsigned short *)ptr), "0" (x)
163 | :"memory");
164 |
165 | return x;
166 | }
167 |
168 |
169 | static inline unsigned xchg_32(void *ptr, unsigned x)
170 | {
171 | asm volatile("xchgl %0,%1"
172 | :"=r" (x)
173 | :"m" (*(volatile unsigned *)ptr), "0" (x)
174 | :"memory");
175 |
176 | return x;
177 | }
178 |
179 | #ifdef __x86_64__
180 | static inline unsigned long long xchg_64(void *ptr, unsigned long long x)
181 | {
182 | asm volatile("xchgq %0,%1"
183 | :"=r" (x)
184 | :"m" (*(volatile unsigned long long *)ptr), "0" (x)
185 | :"memory");
186 |
187 | return x;
188 | }
189 |
190 | static inline void *xchg_ptr(void *ptr, void *x)
191 | {
192 | __asm__ __volatile__("xchgq %0,%1"
193 | :"=r" (x)
194 | :"m" (*(volatile uintptr_t *)ptr), "0" ((uintptr_t) x)
195 | :"memory");
196 |
197 | return x;
198 | }
199 | #else
200 | static inline void *xchg_ptr(void *ptr, void *x)
201 | {
202 | __asm__ __volatile__("xchgl %k0,%1"
203 | :"=r" (x)
204 | :"m" (*(volatile uintptr_t *)ptr), "0" ((uintptr_t) x)
205 | :"memory");
206 | return x;
207 | }
208 | #endif
209 |
210 | static inline unsigned long long rdtsc(void)
211 | {
212 | unsigned hi, lo;
213 | asm volatile ("rdtsc" : "=a"(lo), "=d"(hi));
214 | return lo + ((unsigned long long)hi << 32);
215 | }
216 |
217 | #else /* GCC_ASM */
218 |
219 | static inline int ffsu(unsigned x)
220 | {
221 | unsigned long result;
222 | __assume(x);
223 | _BitScanForward(&result, x);
224 |
225 | return result;
226 | }
227 |
228 | static inline int flsu(unsigned x)
229 | {
230 | unsigned long result;
231 | __assume(x);
232 | _BitScanReverse(&result, x);
233 |
234 | return result;
235 | }
236 |
237 | static inline size_t ffsq(unsigned long long x)
238 | {
239 | unsigned long result;
240 | __assume(x);
241 | _BitScanForward64(&result, x);
242 |
243 | return result;
244 | }
245 |
246 | static inline size_t fflq(unsigned long long x)
247 | {
248 | unsigned long result;
249 | __assume(x);
250 | _BitScanReverse64(&result, x);
251 |
252 | return result;
253 | }
254 |
255 | #ifdef __x86_64__
256 | static inline void *xchg_ptr(void *ptr, void *x)
257 | {
258 | return (void *) _InterlockedExchange64(ptr, (__int64) x);
259 | }
260 | #else
261 | static inline void *xchg_ptr(void *ptr, void *x)
262 | {
263 | return (void *) _InterlockedExchange(ptr, (long) x);
264 | }
265 | #endif
266 |
267 |
268 | #endif /* GCC_ASM */
269 |
270 | #endif /* LL_ASM_H */
271 |
--------------------------------------------------------------------------------
/src/ll_list.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009, 2010, 2011 Lockless Inc., Steven Von Fuerst.
3 | *
4 | * This library is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This library is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | /* Header for intrinsic lists */
19 | #ifndef LL_LIST_H
20 | #define LL_LIST_H
21 |
22 | #include
23 | #include
24 |
25 | /* Go from a member * to its container */
26 | #define CONTAINER(T, S, P)\
27 | ((T *) (((uintptr_t) P) - offsetof(T, S)))
28 |
29 | /* List types */
30 | typedef struct slist slist;
31 | struct slist
32 | {
33 | slist *next;
34 | };
35 |
36 | typedef struct dlist dlist;
37 | struct dlist
38 | {
39 | dlist *next;
40 | dlist *prev;
41 | };
42 |
43 |
44 | #define list_entry(T, S, P) CONTAINER(T, S, P)
45 |
46 | #define scan_list(P, I)\
47 | for (I = (P)->next; I != (P); I = I->next)
48 |
49 | #define scan_list_safe(P, I, T)\
50 | for (I = (P)->next, T = I->next; I != (P); I = T, T = I->next)
51 |
52 | #define dlist_empty(T) ((T)->next == T)
53 |
54 | #define scan_slist(P, I)\
55 | for (I = (P)->next; I; I = I->next)
56 |
57 | #define scan_slist_safe(P, I, T)\
58 | for (I = (P)->next, I?(T = I->next):0; I; I = T, I?(T = I->next):0)
59 |
60 | /* Add an entry to the front of the list */
61 | static inline void slist_add(slist *s, slist *a)
62 | {
63 | a->next = s->next;
64 | s->next = a;
65 | }
66 |
67 | /* Remove first entry, and return it */
68 | static inline slist *slist_rem(slist *s)
69 | {
70 | slist *r = s->next;
71 | s->next = r->next;
72 |
73 | return r;
74 | }
75 |
76 | #define DLIST_INIT(X) {.next = &X, .prev = &X}
77 |
78 | static inline void dlist_init(dlist *d)
79 | {
80 | /* Point to self, so list deletion is faster */
81 | d->next = d;
82 | d->prev = d;
83 | }
84 |
85 | /* Adds "a" to start of dlist d */
86 | static inline void dlist_add(dlist *d, dlist *a)
87 | {
88 | dlist *dn = d->next;
89 |
90 | a->next = dn;
91 | a->prev = d;
92 | dn->prev = a;
93 | d->next = a;
94 | }
95 |
96 | /* Adds "a" to end of dlist d */
97 | static inline void dlist_add_end(dlist *d, dlist *a)
98 | {
99 | dlist *dp = d->prev;
100 |
101 | a->next = d;
102 | a->prev = dp;
103 | dp->next = a;
104 | d->prev = a;
105 | }
106 |
107 | /* Remove node "d" from the list */
108 | static inline void dlist_del(dlist *d)
109 | {
110 | dlist *dp = d->prev;
111 | dlist *dn = d->next;
112 |
113 | dn->prev = dp;
114 | dp->next = dn;
115 | }
116 |
117 | static inline dlist *dlist_rem_last(dlist *d)
118 | {
119 | dlist *dp = d->prev;
120 | if (dp == d) return NULL;
121 |
122 | dlist_del(dp);
123 |
124 | return dp;
125 | }
126 |
127 | /* Merge two dlists: d2 into d*/
128 | static inline void dlist_merge(dlist *d, dlist *d2)
129 | {
130 | dlist *dp = d->prev;
131 | dlist *d2n = d2->next;
132 | dlist *d2p = d2->prev;
133 |
134 | /* Don't need to do anything if adding an empty list */
135 | if (d2n == d2) return;
136 |
137 | dp->next = d2n;
138 | d2n->prev = dp;
139 |
140 | d->prev = d2p;
141 | d2p->next = d;
142 | }
143 |
144 | #endif /* LL_LIST_H */
145 |
--------------------------------------------------------------------------------
/src/makefile:
--------------------------------------------------------------------------------
1 | CFLAGS := -fomit-frame-pointer -Wcast-qual -Wmissing-format-attribute -Wlogical-op -Wstrict-aliasing -Wsign-compare -Wdeclaration-after-statement -Wnested-externs -Wdisabled-optimization -Winline -Wundef -Wimplicit -Wunused -Wfloat-equal -Winit-self -Wformat=2 -Wswitch -Wsequence-point -Wparentheses -Wimplicit -Wchar-subscripts -Wredundant-decls -Wstrict-prototypes -Wbad-function-cast -Wpointer-arith -Wwrite-strings -Wno-long-long -Wmissing-declarations -Wmissing-prototypes -Wextra -Wall -pedantic -ggdb3 -std=gnu99 -O3
2 | CPPFLAGS :=
3 | LIBS :=
4 | LDFLAGS :=
5 |
6 | # To compile the stub on windows, you'll need to set up these batch files to
7 | # call MSVC with the right (64bit) environment.
8 | CL_WIN := cl64.bat
9 | LINK_WIN := link64.bat
10 | # The main .c file is cross-compiled with mingw-w64
11 | CC_WIN := x86_64-w64-mingw32-gcc -mwin32
12 | WIN_CFLAGS = $(CFLAGS) -fno-leading-underscore
13 | WIN_STRIP = x86_64-w64-mingw32-strip --strip-debug --strip-unneeded
14 |
15 | ALLOCLIBMAJOR := 1
16 | ALLOCLIBMINOR := 3
17 |
18 | ALLOCSFX := .so.$(ALLOCLIBMAJOR).$(ALLOCLIBMINOR)
19 |
20 | ALLOCLIBM := libllalloc.so.$(ALLOCLIBMAJOR)
21 | ALLOCLIB := $(ALLOCLIBM).$(ALLOCLIBMINOR)
22 |
23 | AR := ar
24 | RANLIB := ranlib
25 | STRIP := strip
26 |
27 | # Expand dependencies one level
28 | dependless = %.o %.a %.d %.h
29 | expand = $($(var)) $(var) $(var).d
30 | depend_test = $(if $(filter $(dependless),$(var)),$(var),$(expand))
31 | depend = $(sort $(foreach var,$(1),$(depend_test)))
32 |
33 | & = $(filter-out %.h %.d,$^)
34 |
35 | include $(wildcard *.d)
36 |
37 | DEPEND = $(SHELL) -ec 'gcc -MM $(CPPFLAGS) $< | sed -n "H;$$ {g;s@.*:\(.*\)@$< := \$$\(wildcard\1\)\n$*.o $@: $$\($<\)@;p}" > $@'
38 |
39 | default: staticlib dynamiclib
40 |
41 | staticlib: libllalloc.a
42 |
43 | dynamiclib: libllalloc$(ALLOCSFX)
44 |
45 | %.S.d: %.S
46 | $(DEPEND)
47 |
48 | %.c.d: %.c
49 | $(DEPEND)
50 |
51 | libllalloc.o: $(call depend,ll_alloc.c)
52 | $(CC) $& $(CFLAGS) $(LDFLAGS) -fPIC -pthread -c -o $@ $(LIBS)
53 |
54 | libllalloc.a: libllalloc.o
55 | $(STRIP) -g $^
56 | $(AR) rcs $@ $^
57 | $(RANLIB) $@
58 |
59 | libllalloc$(ALLOCSFX): $(call depend,ll_alloc.c)
60 | $(CC) $& $(CFLAGS) $(LDFLAGS) -shared -fpic -Wl,-soname,libllalloc$(ALLOCSFX) -Wl,-z,interpose -o $@ $(LIBS)
61 | $(STRIP) $@
62 |
63 |
64 | # Create the object files for the windows version via cross-compiling
65 | llalloc.obj: $(call depend,ll_alloc.c)
66 | $(CC_WIN) $(WIN_CFLAGS) $& -c -o $@
67 | $(WIN_STRIP) $@
68 |
69 | lldalloc.obj: $(call depend,ll_alloc.c)
70 | $(CC_WIN) $(WIN_CFLAGS) $& -c -o $@ -DUSE_DLL
71 | $(WIN_STRIP) $@
72 |
73 | # Use these commands within windows to compile the stubs
74 | win_stub.obj: win_stub.cc
75 | $(CL_WIN) /nologo /O2 win_stub.cc -c /Fowin_stub.obj
76 |
77 | win_stubd.obj: win_stub.cc
78 | $(CL_WIN) /nologo /O2 win_stub.cc -c /Fowin_stubd.obj /DUSE_DLL
79 |
80 | # Link the stubs with the main object file to create the library
81 | llalloc.lib: llalloc.obj win_stub.obj
82 | $(LINK_WIN) /nologo /OUT:llalloc.lib llalloc.obj win_stub.obj
83 |
84 | llalloc.dll: lldalloc.obj win_stubd.obj
85 | $(LINK_WIN) /nologo /OUT:lldalloc.dll /DLL /DEF:alexport.def /BASE:0x63800000 ldalloc.obj win_stubd.obj
86 | mv lldalloc.dll llalloc.dll
87 |
88 | clean:
89 | rm -f *.o *.a *.so *.so.* *.d *.obj
90 |
91 |
--------------------------------------------------------------------------------
/src/ming.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009, 2010, 2011 Lockless Inc., Steven Von Fuerst.
3 | *
4 | * This library is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This library is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | /* Coompiler specific details - gcc */
19 | #ifndef LL_MING_H
20 | #define LL_MING_H
21 |
22 | #ifndef WINDOWS
23 | #define WINDOWS
24 | #endif
25 |
26 | #include
27 |
28 | void set_enomem(void);
29 |
30 | #define __thread__ __thread __attribute__ ((tls_model ("initial-exec")))
31 |
32 | /* #define HAVE_PTHREADS */
33 | /* #define HAVE_SYS_SELECT */
34 | /* #define HAVE_SYS_SOCKET */
35 | /* #define HAVE_NETINET_IN */
36 | /* #define HAVVE_SYS_MMAN */
37 |
38 | #define cache_align __attribute__((aligned(64)))
39 | #define noinline __attribute__((__noinline__))
40 | #define prefetch(x) __builtin_prefetch(x)
41 | #define barrier() asm volatile("": : :"memory")
42 | #define forget(v) asm volatile("": "=m"(v) :"m"(v))
43 | #define cpu_relax() asm volatile("rep; nop\n": : :"memory")
44 | #define likely(x) __builtin_expect(!!(x), 1)
45 | #define unlikely(x) __builtin_expect(!!(x), 0)
46 | #define gcc_used __attribute__((used))
47 | #define _export_
48 | #define __pure __attribute__((pure))
49 |
50 | #define GCC_ASM
51 |
52 | #include
53 |
54 | struct iovec
55 | {
56 | void *iov_base;
57 | size_t iov_len;
58 | };
59 |
60 | #ifndef EBUSY
61 | #define EBUSY 1
62 | #endif
63 | #define pthread_mutex_lock EnterCriticalSection
64 | #define pthread_mutex_unlock LeaveCriticalSection
65 | #define pthread_mutex_t CRITICAL_SECTION
66 | static inline int pthread_mutex_trylock(pthread_mutex_t *m)
67 | {
68 | return TryEnterCriticalSection(m) ? 0 : EBUSY;
69 | }
70 | #define pthread_mutex_init(L, A) InitializeCriticalSection(L)
71 | #define pthread_mutex_destroy(L) DeleteCriticalSection(L)
72 | #define pthread_cond_init(C, A) InitializeConditionVariable(C)
73 | #define pthread_cond_signal(C) WakeConditionVariable(C)
74 | #define pthread_cond_broadcast(C) WakeAllConditionVariable(C)
75 | #define pthread_cond_wait(C, M) SleepConditionVariableCS((C), (M), INFINITE)
76 | #define pthread_cond_t CONDITION_VARIABLE
77 | #define pthread_key_create 1
78 |
79 | #define MAP_FAILED NULL
80 |
81 | #include
82 | #include
83 |
84 | static __attribute__((format (ms_printf, 2, 3))) inline void errx(int ret, const char *fmt, ...)
85 | {
86 | va_list va;
87 |
88 | /* Get args */
89 | va_start(va, fmt);
90 |
91 | /* Print it */
92 | vfprintf(stderr, fmt, va);
93 |
94 | /* Make sure it is printed */
95 | fflush(NULL);
96 |
97 | /* Done */
98 | va_end(va);
99 |
100 | exit(ret);
101 | }
102 |
103 | #ifndef _CRT_ERRNO_DEFINED
104 | #define _CRT_ERRNO_DEFINED
105 | __cdecl int _set_errno(int err);
106 | __cdecl int _get_errno(int *err);
107 | #endif /* _CRT_ERRNO_DEFINED */
108 |
109 | #endif /* LL_GCC_H */
110 |
--------------------------------------------------------------------------------
/src/vs.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2014 Vladislav Samsonov.
3 | *
4 | * This library is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This library is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | #ifndef LL_VS_H
19 | #define LL_VS_H
20 |
21 | #ifndef WINDOWS
22 | #define WINDOWS
23 | #endif
24 |
25 | #include
26 | #include
27 | #include
28 |
29 | #define inline __inline
30 | #define __extension__
31 | #define cache_align __declspec(align(64))
32 | #define noinline __declspec(noinline)
33 | #define __pure
34 |
35 | #ifdef _WIN64
36 | #define __x86_64__ 1
37 | #endif
38 |
39 | struct iovec
40 | {
41 | void *iov_base;
42 | size_t iov_len;
43 | };
44 |
45 | #ifndef EBUSY
46 | #define EBUSY 1
47 | #endif
48 | #define pthread_mutex_lock EnterCriticalSection
49 | #define pthread_mutex_unlock LeaveCriticalSection
50 | #define pthread_mutex_t CRITICAL_SECTION
51 | static inline int pthread_mutex_trylock(pthread_mutex_t *m)
52 | {
53 | return TryEnterCriticalSection(m) ? 0 : EBUSY;
54 | }
55 | #define pthread_mutex_init(L, A) InitializeCriticalSection(L)
56 | #define pthread_mutex_destroy(L) DeleteCriticalSection(L)
57 | #define pthread_cond_init(C, A) InitializeConditionVariable(C)
58 | #define pthread_cond_signal(C) WakeConditionVariable(C)
59 | #define pthread_cond_broadcast(C) WakeAllConditionVariable(C)
60 | #define pthread_cond_wait(C, M) SleepConditionVariableCS((C), (M), INFINITE)
61 | #define pthread_cond_t CONDITION_VARIABLE
62 | #define pthread_key_create 1
63 |
64 | #define MAP_FAILED NULL
65 |
66 | #ifndef _CRT_ERRNO_DEFINED
67 | #define _CRT_ERRNO_DEFINED
68 | int __cdecl _set_errno(int err);
69 | int __cdecl _get_errno(int *err);
70 | #endif /* _CRT_ERRNO_DEFINED */
71 |
72 | #endif
73 |
--------------------------------------------------------------------------------
/src/win_stub.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2009, 2010, 2011 Lockless Inc., Steven Von Fuerst.
3 | *
4 | * This library is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * This library is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with this program. If not, see .
16 | */
17 |
18 | #include
19 | #include
20 | #include "windows.h"
21 | #include "errno.h"
22 |
23 |
24 | /* Hack to use tls */
25 | #ifdef __GNUC__
26 | __thread void *ll_hack_tls;
27 | #else
28 | __declspec(thread) void *ll_hack_tls;
29 | #endif
30 |
31 | extern "C"
32 | {
33 | int llalloc_use = 0;
34 | void lldebug_hook(void);
35 | void *valloc(size_t size);
36 | int handle_oom(int size);
37 |
38 | void llmutex_lock(SRWLOCK *l);
39 | void llmutex_unlock(SRWLOCK *l);
40 | int llmutex_trylock(SRWLOCK *l);
41 | };
42 |
43 | #ifdef USE_DLL
44 |
45 | extern "C"
46 | {
47 | /* Hack - indirect calls to crt functions */
48 | extern int (* __callnewh)(size_t);
49 | extern int (* __newmode)(void);
50 | }
51 | #define _newmode (__newmode())
52 | #define _callnewh __callnewh
53 |
54 | #else /* USE_DLL */
55 | extern "C"
56 | {
57 | /* Undocumented functions that need declarations */
58 | int _callnewh(size_t size);
59 | extern int _newmode;
60 | }
61 | #endif /* USE_DLL */
62 |
63 | int handle_oom(int size)
64 | {
65 | #ifndef __GNUC__
66 | if (_newmode)
67 | {
68 | __try
69 | {
70 | if (_callnewh(size)) return 1;
71 | }
72 | __except(EXCEPTION_EXECUTE_HANDLER)
73 | {
74 | /* Do nothing, and fail with ENOMEM below */
75 | }
76 | }
77 | #endif
78 | _set_errno(ENOMEM);
79 | return 0;
80 | }
81 |
82 | void lldebug_hook(void)
83 | {
84 | void *a = valloc(0);
85 | free(a);
86 | }
87 |
88 | void llmutex_lock(SRWLOCK *l)
89 | {
90 | AcquireSRWLockExclusive(l);
91 | }
92 |
93 | void llmutex_unlock(SRWLOCK *l)
94 | {
95 | ReleaseSRWLockExclusive(l);
96 | }
97 |
98 | int llmutex_trylock(SRWLOCK *l)
99 | {
100 | /* Quick check for users */
101 | if (*(void **) l) return EBUSY;
102 |
103 | /* Try to grab lock if it has no users */
104 | if (!InterlockedCompareExchangePointer((void **) l, (void *) 1, NULL)) return 0;
105 |
106 | return EBUSY;
107 | }
108 |
109 |
--------------------------------------------------------------------------------
/tests/calloc.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | void * calloc(size_t, size_t);
5 |
6 | const size_t min_size = 1024 * 1;
7 | const size_t max_size = 1024 * 1024;
8 | #define num 100
9 |
10 | int main (void)
11 | {
12 | size_t i;
13 | char * ptrs[num];
14 | for (i = 0; i < num; i++)
15 | {
16 | size_t size = (rand() % (max_size - min_size)) + min_size;
17 | ptrs[i] = calloc(size, 1);
18 | memset(ptrs[i], 1, size);
19 | }
20 | for (i = 0; i < num; i++)
21 | free(ptrs[i]);
22 | return 0;
23 | }
24 |
--------------------------------------------------------------------------------
/tests/malloc.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | void * malloc(size_t);
5 |
6 | const size_t min_size = 1024 * 1;
7 | const size_t max_size = 1024 * 1024;
8 | #define num 100
9 |
10 | int main (void)
11 | {
12 | size_t i;
13 | char * ptrs[num];
14 | for (i = 0; i < num; i++)
15 | {
16 | size_t size = (rand() % (max_size - min_size)) + min_size;
17 | ptrs[i] = malloc(size);
18 | memset(ptrs[i], 1, size);
19 | }
20 | for (i = 0; i < num; i++)
21 | free(ptrs[i]);
22 | return 0;
23 | }
24 |
--------------------------------------------------------------------------------
/tests/realloc.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | void * realloc(void *, size_t);
5 |
6 | const size_t min_size = 1024 * 1;
7 | const size_t max_size = 1024 * 1024;
8 | #define num 100
9 |
10 | int main (void)
11 | {
12 | size_t i;
13 | char * ptrs[num];
14 | for (i = 0; i < num; i++)
15 | {
16 | size_t size = (rand() % (max_size - min_size)) + min_size;
17 | ptrs[i] = realloc(NULL, 1);
18 | ptrs[i] = realloc(ptrs[i], size);
19 | memset(ptrs[i], 1, size);
20 | }
21 | for (i = 0; i < num; i++)
22 | free(ptrs[i]);
23 | return 0;
24 | }
25 |
--------------------------------------------------------------------------------