├── .travis.yml ├── LICENSE ├── README.md ├── README_zh.md ├── aco.c ├── aco.h ├── aco_assert_override.h ├── acosw.S ├── img ├── logo │ ├── icon-blue.svg │ ├── icon-white.svg │ ├── logo-blue.png │ ├── logo-blue.svg │ ├── logo-white.png │ └── logo-white.svg ├── proof_0.png ├── proof_1.png ├── proof_2.png ├── proof_3.png ├── qr_alipay.png ├── qr_wechat.png ├── thread_model_0.png ├── thread_model_1.png ├── thread_model_2.png └── thread_model_3.png ├── make.sh ├── test.sh ├── test_aco_benchmark.c ├── test_aco_synopsis.c ├── test_aco_tutorial_0.c ├── test_aco_tutorial_1.c ├── test_aco_tutorial_2.c ├── test_aco_tutorial_3.c ├── test_aco_tutorial_4.c ├── test_aco_tutorial_5.c └── test_aco_tutorial_6.c /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: required 3 | 4 | notifications: 5 | email: 6 | - 00hnes@gmail.com 7 | 8 | language: c 9 | 10 | script: 11 | - echo $TRAVIS_EVENT_TYPE 12 | - echo $TRAVIS_OS_NAME 13 | - echo $CC 14 | - cc --version 15 | - if [ "$TRAVIS_OS_NAME" = "osx" ] ; then brew update; brew install grep;export PATH="/usr/local/opt/grep/libexec/gnubin:$PATH"; fi 16 | - if [ "$TRAVIS_OS_NAME" = "osx" ] ; then brew install gnu-sed;export PATH="/usr/local/opt/gnu-sed/libexec/gnubin:$PATH"; fi 17 | - if [ "$TRAVIS_OS_NAME" = "linux" ] ; then sudo apt-get install -qq -y libreadline-dev libncurses5-dev libpcre3-dev libssl-dev perl make build-essential valgrind gcc-multilib g++-multilib libc6-dbg libc6-dbg:i386 ; fi 18 | - date 19 | - grep -V 20 | - sed --version 21 | - export EXTRA_CFLAGS="" 22 | - echo "" 23 | - bash test.sh version_check=1.2.4 24 | - echo "" 25 | - if [ "$TRAVIS_OS_NAME" = "linux" ] ; then mkdir output ; bash make.sh ; ls output ; ls output | wc -l ; cd output ; bash ../test.sh ; cd .. ; rm -fr output; fi 26 | - echo "" 27 | - if [ "$TRAVIS_OS_NAME" = "linux" ] ; then mkdir output ; bash make.sh -o no-m32 ; ls output ; ls output | wc -l ; cd output ; bash ../test.sh ; cd .. ; rm -fr output; fi 28 | - echo "" 29 | - mkdir output ; bash make.sh -o no-valgrind ; ls output ; ls output | wc -l ; cd output ; bash ../test.sh ; cd .. ; rm -fr output 30 | - echo "" 31 | - if [ "$TRAVIS_OS_NAME" = "linux" ] ; then mkdir output ; bash make.sh -o no-valgrind -o no-m32 ; ls output ; ls output | wc -l ; cd output ; bash ../test.sh ; cd .. ; rm -fr output; fi 32 | - echo "" 33 | - export EXTRA_CFLAGS="--std=c99" 34 | - echo "" 35 | - if [ "$TRAVIS_OS_NAME" = "linux" ] ; then mkdir output ; bash make.sh ; ls output ; ls output | wc -l ; cd output ; bash ../test.sh ; cd .. ; rm -fr output; fi 36 | - echo "" 37 | - if [ "$TRAVIS_OS_NAME" = "linux" ] ; then mkdir output ; bash make.sh -o no-m32 ; ls output ; ls output | wc -l ; cd output ; bash ../test.sh ; cd .. ; rm -fr output; fi 38 | - echo "" 39 | - mkdir output ; bash make.sh -o no-valgrind ; ls output ; ls output | wc -l ; cd output ; bash ../test.sh ; cd .. ; rm -fr output 40 | - echo "" 41 | - if [ "$TRAVIS_OS_NAME" = "linux" ] ; then mkdir output ; bash make.sh -o no-valgrind -o no-m32 ; ls output ; ls output | wc -l ; cd output ; bash ../test.sh ; cd .. ; rm -fr output; fi 42 | - echo "" ; date 43 | 44 | matrix: 45 | include: 46 | - os: linux 47 | compiler: gcc-4.9 48 | addons: 49 | apt: 50 | sources: 51 | - ubuntu-toolchain-r-test 52 | packages: 53 | - gcc-4.9 54 | - os: linux 55 | compiler: clang-3.5 56 | addons: 57 | apt: 58 | sources: 59 | - ubuntu-toolchain-r-test 60 | - llvm-toolchain-trusty-3.5 61 | packages: 62 | - clang-3.5 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Logo of libaco](https://github.com/libaco/logo/blob/main/web/libaco_logo_blue.svg)](https://libaco.org) 2 | 3 | libaco - A blazing fast and lightweight C asymmetric coroutine library. 4 | 5 | The code name of this project is Arkenstone 💎 6 | 7 | Asymmetric COroutine & Arkenstone is the reason why it's been named `aco`. 8 | 9 | Currently supports Sys V ABI of Intel386 and x86-64. 10 | 11 | Here is a brief summary of this project: 12 | 13 | - Along with the implementation of a production-ready C coroutine library, here is a detailed documentation about how to implement a *fastest* and *correct* coroutine library and also with a strict [mathematical proof](#proof-of-correctness); 14 | - It has no more than 700 LOC but has the full functionality which you may want from a coroutine library; 15 | - The [benchmark](#benchmark) part shows that a context switch between coroutines only takes about *10 ns* (in the case of standalone stack) on the AWS c5d.large machine; 16 | - User could choose to create a new coroutine with a *standalone stack* or with a *shared stack* (could be shared with others); 17 | - It is extremely memory efficient: *10,000,000* coroutines simultaneously to run cost only *2.8 GB* physical memory (run with tcmalloc, each coroutine has a *120B* copy-stack size configuration). 18 | 19 | The phrase "*fastest*" in above means the fastest context switching implementation which complies to the Sys V ABI of Intel386 or AMD64. 20 | 21 | [![Build Status Travis](https://img.shields.io/travis/hnes/libaco.svg?style=flat-square&&branch=master)](https://travis-ci.org/hnes/libaco) 22 | [![Releases](https://img.shields.io/github/release/hnes/libaco/all.svg?style=flat-square)](https://github.com/hnes/libaco/releases) 23 | [![LICENSE](https://img.shields.io/github/license/hnes/libaco.svg?style=flat-square)](https://github.com/hnes/libaco/blob/master/LICENSE) 24 | [![中文文档](https://img.shields.io/badge/doc-en%20+%20中文-blue.svg?style=flat-square)](https://github.com/hnes/libaco/blob/master/README_zh.md) 25 | [![Tweet](https://img.shields.io/twitter/url/http/shields.io.svg?style=social)](https://twitter.com/intent/tweet?text=libaco+-+A+blazing+fast+and+lightweight+C+asymmetric+coroutine+library&url=https://github.com/hnes/libaco&via=00hnes) 26 | 27 | Issues and PRs are welcome 🎉🎉🎉 28 | 29 | Note: Please use [releases][github-release] instead of the `master` to build the final binary. 30 | 31 | Besides this readme, you could also visit the documentation from [https://libaco.org/docs](https://libaco.org/docs). Please follow this readme if there are any differences because the documentation on the website may be lagging behind from this readme. 32 | 33 | [github-release]: https://github.com/hnes/libaco/releases 34 | 35 | # Table of Contents 36 | 37 | * [Status](#status) 38 | * [Synopsis](#synopsis) 39 | * [Description](#description) 40 | * [Build and Test](#build-and-test) 41 | * [CFLAGS](#cflags) 42 | * [Build](#build) 43 | * [Test](#test) 44 | * [Tutorials](#tutorials) 45 | * [API](#api) 46 | * [aco_thread_init](#aco_thread_init) 47 | * [aco_share_stack_new](#aco_share_stack_new) 48 | * [aco_share_stack_new2](#aco_share_stack_new2) 49 | * [aco_share_stack_destroy](#aco_share_stack_destroy) 50 | * [aco_create](#aco_create) 51 | * [aco_resume](#aco_resume) 52 | * [aco_yield](#aco_yield) 53 | * [aco_get_co](#aco_get_co) 54 | * [aco_get_arg](#aco_get_arg) 55 | * [aco_exit](#aco_exit) 56 | * [aco_destroy](#aco_destroy) 57 | * [MACROS](#macros) 58 | * [Benchmark](#benchmark) 59 | * [Proof of Correctness](#proof-of-correctness) 60 | * [Running Model](#running-model) 61 | * [Mathematical Induction](#mathematical-induction) 62 | * [Miscellaneous](#miscellaneous) 63 | * [Red Zone](#red-zone) 64 | * [Stack Pointer](#stack-pointer) 65 | * [Best Practice](#best-practice) 66 | * [TODO](#todo) 67 | * [CHANGES](#changes) 68 | * [Donation](#donation) 69 | * [Thanks](#thanks) 70 | * [Copyright and License](#copyright-and-license) 71 | 72 | # Status 73 | 74 | Production ready. 75 | 76 | # Synopsis 77 | 78 | ```c 79 | #include "aco.h" 80 | #include 81 | 82 | // this header would override the default C `assert`; 83 | // you may refer the "API : MACROS" part for more details. 84 | #include "aco_assert_override.h" 85 | 86 | void foo(int ct) { 87 | printf("co: %p: yield to main_co: %d\n", aco_get_co(), *((int*)(aco_get_arg()))); 88 | aco_yield(); 89 | *((int*)(aco_get_arg())) = ct + 1; 90 | } 91 | 92 | void co_fp0() { 93 | printf("co: %p: entry: %d\n", aco_get_co(), *((int*)(aco_get_arg()))); 94 | int ct = 0; 95 | while(ct < 6){ 96 | foo(ct); 97 | ct++; 98 | } 99 | printf("co: %p: exit to main_co: %d\n", aco_get_co(), *((int*)(aco_get_arg()))); 100 | aco_exit(); 101 | } 102 | 103 | int main() { 104 | aco_thread_init(NULL); 105 | 106 | aco_t* main_co = aco_create(NULL, NULL, 0, NULL, NULL); 107 | aco_share_stack_t* sstk = aco_share_stack_new(0); 108 | 109 | int co_ct_arg_point_to_me = 0; 110 | aco_t* co = aco_create(main_co, sstk, 0, co_fp0, &co_ct_arg_point_to_me); 111 | 112 | int ct = 0; 113 | while(ct < 6){ 114 | assert(co->is_end == 0); 115 | printf("main_co: yield to co: %p: %d\n", co, ct); 116 | aco_resume(co); 117 | assert(co_ct_arg_point_to_me == ct); 118 | ct++; 119 | } 120 | printf("main_co: yield to co: %p: %d\n", co, ct); 121 | aco_resume(co); 122 | assert(co_ct_arg_point_to_me == ct); 123 | assert(co->is_end); 124 | 125 | printf("main_co: destroy and exit\n"); 126 | aco_destroy(co); 127 | co = NULL; 128 | aco_share_stack_destroy(sstk); 129 | sstk = NULL; 130 | aco_destroy(main_co); 131 | main_co = NULL; 132 | 133 | return 0; 134 | } 135 | ``` 136 | ```bash 137 | # default build 138 | $ gcc -g -O2 acosw.S aco.c test_aco_synopsis.c -o test_aco_synopsis 139 | $ ./test_aco_synopsis 140 | main_co: yield to co: 0x1887120: 0 141 | co: 0x1887120: entry: 0 142 | co: 0x1887120: yield to main_co: 0 143 | main_co: yield to co: 0x1887120: 1 144 | co: 0x1887120: yield to main_co: 1 145 | main_co: yield to co: 0x1887120: 2 146 | co: 0x1887120: yield to main_co: 2 147 | main_co: yield to co: 0x1887120: 3 148 | co: 0x1887120: yield to main_co: 3 149 | main_co: yield to co: 0x1887120: 4 150 | co: 0x1887120: yield to main_co: 4 151 | main_co: yield to co: 0x1887120: 5 152 | co: 0x1887120: yield to main_co: 5 153 | main_co: yield to co: 0x1887120: 6 154 | co: 0x1887120: exit to main_co: 6 155 | main_co: destroy and exit 156 | # i386 157 | $ gcc -g -m32 -O2 acosw.S aco.c test_aco_synopsis.c -o test_aco_synopsis 158 | # share fpu and mxcsr env 159 | $ gcc -g -D ACO_CONFIG_SHARE_FPU_MXCSR_ENV -O2 acosw.S aco.c test_aco_synopsis.c -o test_aco_synopsis 160 | # with valgrind friendly support 161 | $ gcc -g -D ACO_USE_VALGRIND -O2 acosw.S aco.c test_aco_synopsis.c -o test_aco_synopsis 162 | $ valgrind --leak-check=full --tool=memcheck ./test_aco_synopsis 163 | ``` 164 | 165 | For more information you may refer to the "[Build and Test](#build-and-test)" part. 166 | 167 | # Description 168 | 169 | ![thread_model_0](img/thread_model_0.png) 170 | 171 | There are 4 basic elements of an ordinary execution state: `{cpu_registers, code, heap, stack}`. 172 | 173 | Since the code information is indicated by `({E|R})?IP` register, and the address of the memory allocated from heap is normally stored in the stack directly or indirectly, thus we could simplify the 4 elements into only 2 of them: `{cpu_registers, stack}`. 174 | 175 | ![thread_model_1](img/thread_model_1.png) 176 | 177 | We define the `main co` as the coroutine who monopolizes the default stack of the current thread. And since the main co is the only user of this stack, we only need to save/restore the necessary cpu registers' state of the main co when it's been yielded-from/resumed-to (switched-out/switched-in). 178 | 179 | Next, the definition of the `non-main co` is the coroutine whose execution stack is a stack which is not the default stack of the current thread and may be shared with the other non-main co. Thus the non-main co must have a `private save stack` memory buffer to save/restore its execution stack when it is been switched-out/switched-in (because the succeeding/preceding co may would/had use/used the share stack as its execution stack). 180 | 181 | ![thread_model_2](img/thread_model_2.png) 182 | 183 | There is a special case of non-main co, that is `standalone non-main co` what we called in libaco: the share stack of the non-main coroutine has only one co user. Thus there is no need to do saving/restoring stuff of its private save stack when it is been switched-out/switched-in since there is no other co will touch the execution stack of the standalone non-main co except itself. 184 | 185 | ![thread_model_3](img/thread_model_3.png) 186 | 187 | Finally, we get the big picture of libaco. 188 | 189 | There is a "[Proof of Correctness](#proof-of-correctness)" part you may find really helpful if you want to dive into the internal of libaco or want to implement your own coroutine library. 190 | 191 | It is also highly recommended to read the source code of the tutorials and benchmark next. The [benchmark](#benchmark) result is very impressive and enlightening too. 192 | 193 | # Build and Test 194 | 195 | ## CFLAGS 196 | 197 | * `-m32` 198 | 199 | The `-m32` option of gcc could help you to build the i386 application of libaco on a x86_64 machine. 200 | 201 | * C macro: `ACO_CONFIG_SHARE_FPU_MXCSR_ENV` 202 | 203 | You could define the global C macro `ACO_CONFIG_SHARE_FPU_MXCSR_ENV` to speed up the performance of context switching between coroutines slightly if none of your code would change the control words of FPU and MXCSR. If the macro is not defined, all the co would maintain its own copy of the FPU and MXCSR control words. It is recommended to always define this macro globally since it is very rare that one function needs to set its own special env of FPU or MXCSR instead of using the default env defined by the ISO C. But you may not need to define this macro if you are not sure of it. 204 | 205 | * C macro:`ACO_USE_VALGRIND` 206 | 207 | If you want to use the tool memcheck of valgrind to test the application, then you may need to define the global C macro `ACO_USE_VALGRIND` to enable the friendly support of valgrind in libaco. But it is not recommended to define this macro in the final release build for the performance reason. You may also need to install the valgrind headers (package name is "valgrind-devel" in centos for example) to build libaco application with C macro `ACO_USE_VALGRIND` defined. (The memcheck of valgrind only works well with the standalone co currently. In the case of the shared stack used by more than one non-main co, the memcheck of valgrind would generate many false positive reports. For more information you may refer to "[test_aco_tutorial_6.c](test_aco_tutorial_6.c)".) 208 | 209 | * C macro:`ACO_USE_ASAN` 210 | 211 | The global C macro `ACO_USE_ASAN` would enable the friendly support of [Address Sanitizer](https://en.wikipedia.org/wiki/AddressSanitizer) in libaco (support both gcc and clang). 212 | 213 | ## Build 214 | 215 | To build the test suites of libaco: 216 | 217 | ```bash 218 | $ mkdir output 219 | $ bash make.sh 220 | ``` 221 | 222 | There is also some detailed options in make.sh: 223 | 224 | ```bash 225 | $bash make.sh -h 226 | Usage: make.sh [-o ] [-h] 227 | 228 | Example: 229 | # default build 230 | bash make.sh 231 | # build without the i386 binary output 232 | bash make.sh -o no-m32 233 | # build without the valgrind supported binary output 234 | bash make.sh -o no-valgrind 235 | # build without the valgrind supported and i386 binary output 236 | bash make.sh -o no-valgrind -o no-m32 237 | ``` 238 | 239 | In short, using `-o no-valgrind ` if you have no valgrind headers installed, `-o no-m32` if you have no 32-bit gcc development tools installed on a AMD64 host. 240 | 241 | On MacOS, you need to [replace](https://apple.stackexchange.com/questions/69223/how-to-replace-mac-os-x-utilities-with-gnu-core-utilities) the default `sed` and `grep` commands of MacOS with the GNU `sed` and `grep` to run `make.sh` and `test.sh` (such requirement would be removed in the future): 242 | 243 | ```bash 244 | $ brew install grep --with-default-names 245 | $ brew install gnu-sed --with-default-names 246 | ``` 247 | 248 | ## Test 249 | 250 | ```bash 251 | $ cd output 252 | $ bash ../test.sh 253 | ``` 254 | 255 | # Tutorials 256 | 257 | The `test_aco_tutorial_0.c` in this repository shows the basic usage of libaco. There is only one main co and one standalone non-main co in this tutorial. The comments in the source code is also very helpful. 258 | 259 | The `test_aco_tutorial_1.c` shows the usage of some statistics of non-main co. The data structure of `aco_t` is very clear and is defined in `aco.h`. 260 | 261 | There are one main co, one standalone non-main co and two non-main co (pointing to the same share stack) in `test_aco_tutorial_2.c`. 262 | 263 | The `test_aco_tutorial_3.c` shows how to use libaco in a multithreaded process. Basically, one instance of libaco is designed only to work inside one certain thread to gain the maximum performance of context switching between coroutines. If you want to use libaco in multithreaded environment, simply to create one instance of libaco in each of the threads. There is no data-sharing across threads inside the libaco, and you have to deal with the data competition among multiple threads yourself (like what `gl_race_aco_yield_ct` does in this tutorial). 264 | 265 | One of the rules in libaco is to call `aco_exit()` to terminate the execution of the non-main co instead of the default direct C style `return`, otherwise libaco will treat such behaviour as illegal and trigger the default protector whose job is to log the error information about the offending co to stderr and abort the process immediately. The `test_aco_tutorial_4.c` shows such "offending co" situation. 266 | 267 | You could also define your own protector to substitute the default one (to do some customized "last words" stuff). But no matter in what case, the process will be aborted after the protector was executed. The `test_aco_tutorial_5.c` shows how to define the customized last word function. 268 | 269 | The last example is a simple coroutine scheduler in `test_aco_tutorial_6.c`. 270 | 271 | # API 272 | 273 | It would be very helpful to read the corresponding API implementation in the source code simultaneously when you are reading the following API description of libaco since the source code is pretty clear and easy to understand. And it is also recommended to read all the [tutorials](#tutorials) before reading the API document. 274 | 275 | It is strongly recommended to read the [Best Practice](#best-practice) part before starting to write the real application of libaco (in addition to describing how to truly release libaco's extreme performance in your application, there is also a notice about the programming of libaco). 276 | 277 | Note: The version control of libaco follows the spec: [Semantic Versioning 2.0.0](https://semver.org/spec/v2.0.0.html). So the API in the following list have the compatibility guarantee. (Please note that there is no such guarantee for the API no in the list.) 278 | 279 | ## aco_thread_init 280 | 281 | ```c 282 | typedef void (*aco_cofuncp_t)(void); 283 | void aco_thread_init(aco_cofuncp_t last_word_co_fp); 284 | ``` 285 | 286 | Initializes the libaco environment in the current thread. 287 | 288 | It will store the current control words of FPU and MXCSR into a thread-local global variable. 289 | 290 | * If the global macro `ACO_CONFIG_SHARE_FPU_MXCSR_ENV` is not defined, the saved control words would be used as a reference value to set up the control words of the new co's FPU and MXCSR (in `aco_create`) and each co would maintain its own copy of FPU and MXCSR control words during later context switching. 291 | * If the global macro `ACO_CONFIG_SHARE_FPU_MXCSR_ENV` is defined, then all the co shares the same control words of FPU and MXCSR. You may refer the "[Build and Test](#build-and-test)" part of this document for more information about this. 292 | 293 | And as it said in the `test_aco_tutorial_5.c` of the "[Tutorials](#tutorials)" part, when the 1st argument `last_word_co_fp` is not NULL then the function pointed by `last_word_co_fp` will substitute the default protector to do some "last words" stuff about the offending co before the process is aborted. In such last word function, you could use `aco_get_co` to get the pointer of the offending co. For more information, you may read `test_aco_tutorial_5.c`. 294 | 295 | ## aco_share_stack_new 296 | 297 | ```c 298 | aco_share_stack_t* aco_share_stack_new(size_t sz); 299 | ``` 300 | 301 | Equal to `aco_share_stack_new2(sz, 1)`. 302 | 303 | ## aco_share_stack_new2 304 | 305 | ```c 306 | aco_share_stack_t* aco_share_stack_new2(size_t sz, char guard_page_enabled); 307 | ``` 308 | 309 | Creates a new share stack with a advisory memory size of `sz` in bytes and may have a guard page (read-only) for the detection of stack overflow which is depending on the 2nd argument `guard_page_enabled`. 310 | 311 | To use the default size value (2MB) if the 1st argument `sz` equals 0. After some computation of alignment and reserve, this function will ensure the final valid length of the share stack in return: 312 | 313 | * `final_valid_sz >= 4096` 314 | * `final_valid_sz >= sz` 315 | * `final_valid_sz % page_size == 0 if the guard_page_enabled != 0` 316 | 317 | And as close to the value of `sz` as possible. 318 | 319 | When the value of the 2nd argument `guard_page_enabled` is 1, the share stack in return would have one read-only guard page for the detection of stack overflow while a value 0 of `guard_page_enabled` means without such guard page. 320 | 321 | This function will always return a valid share stack. 322 | 323 | ## aco_share_stack_destroy 324 | 325 | ```c 326 | void aco_share_stack_destroy(aco_share_stack_t* sstk); 327 | ``` 328 | 329 | Destory the share stack `sstk`. 330 | 331 | Be sure that all the co whose share stack is `sstk` is already destroyed when you destroy the `sstk`. 332 | 333 | ## aco_create 334 | 335 | ```c 336 | typedef void (*aco_cofuncp_t)(void); 337 | aco_t* aco_create(aco_t* main_co, aco_share_stack_t* share_stack, 338 | size_t save_stack_sz, aco_cofuncp_t co_fp, void* arg); 339 | ``` 340 | 341 | Create a new co. 342 | 343 | If it is a main_co you want to create, just call: `aco_create(NULL, NULL, 0, NULL, NULL)`. Main co is a special standalone coroutine whose share stack is the default thread stack. In the thread, main co is the coroutine who should be created and started to execute before all the other non-main coroutine does. 344 | 345 | Otherwise it is a non-main co you want to create: 346 | 347 | * The 1st argument `main_co` is the main co the co will `aco_yield` to in the future context switching. `main_co` must not be NULL; 348 | * The 2nd argument `share_stack` is the address of a share stack which the non-main co you want to create will use as its executing stack in the future. `share_stack` must not be NULL; 349 | * The 3rd argument `save_stack_sz` specifies the init size of the private save stack of this co. The unit is in bytes. A value of 0 means to use the default size 64 bytes. Since automatical resizing would happen when the private save stack is not big enough to hold the executing stack of the co when it has to yield the share stack it is occupying to another co, you usually should not worry about the value of `sz` at all. But it will bring some performance impact to the memory allocator when a huge amount (say 10,000,000) of the co resizes their private save stack continuously, so it is very wise and highly recommended to set the `save_stack_sz` with a value equal to the maximum value of `co->save_stack.max_cpsz` when the co is running (You may refer to the "[Best Practice](#best-practice)" part of this document for more information about such optimization); 350 | * The 4th argument `co_fp` is the entry function pointer of the co. `co_fp` must not be NULL; 351 | * The last argument `arg` is a pointer value and will set to `co->arg` of the co to create. It could be used as a input argument for the co. 352 | 353 | This function will always return a valid co. And we name the state of the co in return as "init" if it is a non-main co you want to create. 354 | 355 | ## aco_resume 356 | 357 | ```c 358 | void aco_resume(aco_t* co); 359 | ``` 360 | 361 | Yield from the caller main co and to start or continue the execution of `co`. 362 | 363 | The caller of this function must be a main co and must be `co->main_co`. And the 1st argument `co` must be a non-main co. 364 | 365 | The first time you resume a `co`, it starts running the function pointing by `co->fp`. If `co` has already been yielded, `aco_resume` restarts it and continues the execution. 366 | 367 | After the call of `aco_resume`, we name the state of the caller — main co as "yielded". 368 | 369 | ## aco_yield 370 | 371 | ```c 372 | void aco_yield(); 373 | ``` 374 | 375 | Yield the execution of `co` and resume `co->main_co`. The caller of this function must be a non-main co. And `co->main_co` must not be NULL. 376 | 377 | After the call of `aco_yield`, we name the state of the caller — `co` as "yielded". 378 | 379 | ## aco_get_co 380 | 381 | ```c 382 | aco_t* aco_get_co(); 383 | ``` 384 | 385 | Return the pointer of the current non-main co. The caller of this function must be a non-main co. 386 | 387 | ## aco_get_arg 388 | 389 | ```c 390 | void* aco_get_arg(); 391 | ``` 392 | 393 | Equal to `(aco_get_co()->arg)`. And also, the caller of this function must be a non-main co. 394 | 395 | ## aco_exit 396 | 397 | ```c 398 | void aco_exit(); 399 | ``` 400 | 401 | In addition do the same as `aco_yield()`, `aco_exit()` also set `co->is_end` to 1 thus to mark the `co` at the status of "end". 402 | 403 | ## aco_destroy 404 | 405 | ```c 406 | void aco_destroy(aco_t* co); 407 | ``` 408 | 409 | Destroy the `co`. The argument `co` must not be NULL. The private save stack would also been destroyed if the `co` is a non-main co. 410 | 411 | ## MACROS 412 | 413 | ### Version 414 | 415 | ```c 416 | #define ACO_VERSION_MAJOR 1 417 | #define ACO_VERSION_MINOR 2 418 | #define ACO_VERSION_PATCH 4 419 | ``` 420 | 421 | These 3 macros are defined in the header `aco.h` and the value of them follows the spec: [Semantic Versioning 2.0.0](https://semver.org/spec/v2.0.0.html). 422 | 423 | ### aco_assert_override.h 424 | 425 | ```c 426 | // provide the compiler with branch prediction information 427 | #define likely(x) aco_likely(x) 428 | #define unlikely(x) aco_unlikely(x) 429 | 430 | // override the default `assert` for convenience when coding 431 | #define assert(EX) aco_assert(EX) 432 | 433 | // equal to `assert((ptr) != NULL)` 434 | #define assertptr(ptr) aco_assertptr(ptr) 435 | 436 | // assert the successful return of memory allocation 437 | #define assertalloc_bool(b) aco_assertalloc_bool(b) 438 | #define assertalloc_ptr(ptr) aco_assertalloc_ptr(ptr) 439 | ``` 440 | 441 | You could choose to include the header `"aco_assert_override.h"` to override the default C "[assert](http://man7.org/linux/man-pages/man3/assert.3.html)" in the libaco application like [test_aco_synopsis.c](test_aco_synopsis.c) does (this header including should be at the last of the include directives list in the source file because the C "[assert](http://man7.org/linux/man-pages/man3/assert.3.html)" is a C macro definition too) and define the 5 other macros in the above. Please do not include this header in the application source file if you want to use the default C "[assert](http://man7.org/linux/man-pages/man3/assert.3.html)". 442 | 443 | For more details you may refer to the source file [aco_assert_override.h](aco_assert_override.h). 444 | 445 | # Benchmark 446 | 447 | Date: Sat Jun 30 UTC 2018. 448 | 449 | Machine: [c5d.large on AWS](https://aws.amazon.com/cn/blogs/aws/now-available-compute-intensive-c5-instances-for-amazon-ec2/). 450 | 451 | OS: RHEL-7.5 (Red Hat Enterprise Linux 7.5). 452 | 453 | Here is a brief summary of the benchmark part: 454 | 455 | * One time of the context switching between coroutines takes only about **10.29 ns** (in the case of standalone stack, where x87 and mxcsr control words are shared between coroutines); 456 | * One time of the context switching between coroutines takes only about **10.38 ns** (in the case of standalone stack, where each coroutine maintains their own x87 and mxcsr control words); 457 | * It is extremely memory efficient: it only costs **2.8 GB** of physical memory to run **10,000,000** coroutines simultaneously (with tcmalloc, where each coroutine has a **120 bytes** copy-stack size configuration). 458 | 459 | ``` 460 | $ LD_PRELOAD=/usr/lib64/libtcmalloc_minimal.so.4 ./test_aco_benchmark..no_valgrind.shareFPUenv 461 | 462 | +build:x86_64 463 | +build:-DACO_CONFIG_SHARE_FPU_MXCSR_ENV 464 | +build:share fpu & mxcsr control words between coroutines 465 | +build:undefined ACO_USE_VALGRIND 466 | +build:without valgrind memcheck friendly support 467 | 468 | sizeof(aco_t)=152: 469 | 470 | 471 | comment task_amount all_time_cost ns_per_op speed 472 | 473 | aco_create/init_save_stk_sz=64B 1 0.000 s 230.00 ns/op 4347824.79 op/s 474 | aco_resume/co_amount=1/copy_stack_size=0B 20000000 0.412 s 20.59 ns/op 48576413.55 op/s 475 | -> acosw 40000000 0.412 s 10.29 ns/op 97152827.10 op/s 476 | aco_destroy 1 0.000 s 650.00 ns/op 1538461.66 op/s 477 | 478 | aco_create/init_save_stk_sz=64B 1 0.000 s 200.00 ns/op 5000001.72 op/s 479 | aco_resume/co_amount=1/copy_stack_size=0B 20000000 0.412 s 20.61 ns/op 48525164.25 op/s 480 | -> acosw 40000000 0.412 s 10.30 ns/op 97050328.50 op/s 481 | aco_destroy 1 0.000 s 666.00 ns/op 1501501.49 op/s 482 | 483 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.50 ns/op 15266771.53 op/s 484 | aco_resume/co_amount=2000000/copy_stack_size=8B 20000000 0.666 s 33.29 ns/op 30043022.64 op/s 485 | aco_destroy 2000000 0.066 s 32.87 ns/op 30425152.25 op/s 486 | 487 | aco_create/init_save_stk_sz=64B 2000000 0.130 s 65.22 ns/op 15332218.24 op/s 488 | aco_resume/co_amount=2000000/copy_stack_size=24B 20000000 0.675 s 33.75 ns/op 29630018.73 op/s 489 | aco_destroy 2000000 0.067 s 33.45 ns/op 29898311.36 op/s 490 | 491 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.42 ns/op 15286937.97 op/s 492 | aco_resume/co_amount=2000000/copy_stack_size=40B 20000000 0.669 s 33.45 ns/op 29891277.59 op/s 493 | aco_destroy 2000000 0.080 s 39.87 ns/op 25084242.29 op/s 494 | 495 | aco_create/init_save_stk_sz=64B 2000000 0.224 s 111.86 ns/op 8940010.49 op/s 496 | aco_resume/co_amount=2000000/copy_stack_size=56B 20000000 0.678 s 33.88 ns/op 29515473.53 op/s 497 | aco_destroy 2000000 0.067 s 33.42 ns/op 29922412.68 op/s 498 | 499 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.74 ns/op 15211896.70 op/s 500 | aco_resume/co_amount=2000000/copy_stack_size=120B 20000000 0.769 s 38.45 ns/op 26010724.94 op/s 501 | aco_destroy 2000000 0.088 s 44.11 ns/op 22669240.25 op/s 502 | 503 | aco_create/init_save_stk_sz=64B 10000000 1.240 s 123.97 ns/op 8066542.54 op/s 504 | aco_resume/co_amount=10000000/copy_stack_size=8B 40000000 1.327 s 33.17 ns/op 30143409.55 op/s 505 | aco_destroy 10000000 0.328 s 32.82 ns/op 30467658.05 op/s 506 | 507 | aco_create/init_save_stk_sz=64B 10000000 0.659 s 65.94 ns/op 15165717.02 op/s 508 | aco_resume/co_amount=10000000/copy_stack_size=24B 40000000 1.345 s 33.63 ns/op 29737708.53 op/s 509 | aco_destroy 10000000 0.337 s 33.71 ns/op 29666697.09 op/s 510 | 511 | aco_create/init_save_stk_sz=64B 10000000 0.654 s 65.38 ns/op 15296191.35 op/s 512 | aco_resume/co_amount=10000000/copy_stack_size=40B 40000000 1.348 s 33.71 ns/op 29663992.77 op/s 513 | aco_destroy 10000000 0.336 s 33.56 ns/op 29794574.96 op/s 514 | 515 | aco_create/init_save_stk_sz=64B 10000000 0.653 s 65.29 ns/op 15316087.09 op/s 516 | aco_resume/co_amount=10000000/copy_stack_size=56B 40000000 1.384 s 34.60 ns/op 28902221.24 op/s 517 | aco_destroy 10000000 0.337 s 33.73 ns/op 29643682.93 op/s 518 | 519 | aco_create/init_save_stk_sz=64B 10000000 0.652 s 65.19 ns/op 15340872.40 op/s 520 | aco_resume/co_amount=10000000/copy_stack_size=120B 40000000 1.565 s 39.11 ns/op 25566255.73 op/s 521 | aco_destroy 10000000 0.443 s 44.30 ns/op 22574242.55 op/s 522 | 523 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.61 ns/op 15241722.94 op/s 524 | aco_resume/co_amount=2000000/copy_stack_size=136B 20000000 0.947 s 47.36 ns/op 21114212.05 op/s 525 | aco_destroy 2000000 0.125 s 62.35 ns/op 16039466.45 op/s 526 | 527 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.71 ns/op 15218784.72 op/s 528 | aco_resume/co_amount=2000000/copy_stack_size=136B 20000000 0.948 s 47.39 ns/op 21101216.29 op/s 529 | aco_destroy 2000000 0.125 s 62.73 ns/op 15941559.26 op/s 530 | 531 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.49 ns/op 15270258.18 op/s 532 | aco_resume/co_amount=2000000/copy_stack_size=152B 20000000 1.069 s 53.44 ns/op 18714275.17 op/s 533 | aco_destroy 2000000 0.122 s 61.05 ns/op 16378678.85 op/s 534 | 535 | aco_create/init_save_stk_sz=64B 2000000 0.132 s 65.91 ns/op 15171336.62 op/s 536 | aco_resume/co_amount=2000000/copy_stack_size=232B 20000000 1.190 s 59.48 ns/op 16813230.99 op/s 537 | aco_destroy 2000000 0.123 s 61.26 ns/op 16324298.25 op/s 538 | 539 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.68 ns/op 15224361.30 op/s 540 | aco_resume/co_amount=2000000/copy_stack_size=488B 20000000 1.828 s 91.40 ns/op 10941133.56 op/s 541 | aco_destroy 2000000 0.145 s 72.56 ns/op 13781182.82 op/s 542 | 543 | aco_create/init_save_stk_sz=64B 2000000 0.132 s 65.80 ns/op 15197461.34 op/s 544 | aco_resume/co_amount=2000000/copy_stack_size=488B 20000000 1.829 s 91.47 ns/op 10932139.32 op/s 545 | aco_destroy 2000000 0.149 s 74.70 ns/op 13387258.82 op/s 546 | 547 | aco_create/init_save_stk_sz=64B 1000000 0.067 s 66.63 ns/op 15007426.35 op/s 548 | aco_resume/co_amount=1000000/copy_stack_size=1000B 20000000 4.224 s 211.20 ns/op 4734744.76 op/s 549 | aco_destroy 1000000 0.093 s 93.36 ns/op 10711651.49 op/s 550 | 551 | aco_create/init_save_stk_sz=64B 1000000 0.066 s 66.28 ns/op 15086953.73 op/s 552 | aco_resume/co_amount=1000000/copy_stack_size=1000B 20000000 4.222 s 211.12 ns/op 4736537.93 op/s 553 | aco_destroy 1000000 0.094 s 94.09 ns/op 10627664.78 op/s 554 | 555 | aco_create/init_save_stk_sz=64B 100000 0.007 s 70.72 ns/op 14139923.59 op/s 556 | aco_resume/co_amount=100000/copy_stack_size=1000B 20000000 4.191 s 209.56 ns/op 4771909.70 op/s 557 | aco_destroy 100000 0.010 s 101.21 ns/op 9880747.28 op/s 558 | 559 | aco_create/init_save_stk_sz=64B 100000 0.007 s 66.62 ns/op 15010433.00 op/s 560 | aco_resume/co_amount=100000/copy_stack_size=2024B 20000000 7.002 s 350.11 ns/op 2856228.03 op/s 561 | aco_destroy 100000 0.016 s 159.69 ns/op 6262129.35 op/s 562 | 563 | aco_create/init_save_stk_sz=64B 100000 0.007 s 65.76 ns/op 15205994.08 op/s 564 | aco_resume/co_amount=100000/copy_stack_size=4072B 20000000 11.918 s 595.90 ns/op 1678127.54 op/s 565 | aco_destroy 100000 0.019 s 186.32 ns/op 5367189.85 op/s 566 | 567 | aco_create/init_save_stk_sz=64B 100000 0.006 s 63.03 ns/op 15865531.37 op/s 568 | aco_resume/co_amount=100000/copy_stack_size=7992B 20000000 21.808 s 1090.42 ns/op 917079.11 op/s 569 | aco_destroy 100000 0.038 s 378.33 ns/op 2643225.42 op/s 570 | ``` 571 | 572 | ``` 573 | $ LD_PRELOAD=/usr/lib64/libtcmalloc_minimal.so.4 ./test_aco_benchmark..no_valgrind.standaloneFPUenv 574 | 575 | +build:x86_64 576 | +build:undefined ACO_CONFIG_SHARE_FPU_MXCSR_ENV 577 | +build:each coroutine maintain each own fpu & mxcsr control words 578 | +build:undefined ACO_USE_VALGRIND 579 | +build:without valgrind memcheck friendly support 580 | 581 | sizeof(aco_t)=160: 582 | 583 | 584 | comment task_amount all_time_cost ns_per_op speed 585 | 586 | aco_create/init_save_stk_sz=64B 1 0.000 s 273.00 ns/op 3663004.27 op/s 587 | aco_resume/co_amount=1/copy_stack_size=0B 20000000 0.415 s 20.76 ns/op 48173877.75 op/s 588 | -> acosw 40000000 0.415 s 10.38 ns/op 96347755.51 op/s 589 | aco_destroy 1 0.000 s 381.00 ns/op 2624672.26 op/s 590 | 591 | aco_create/init_save_stk_sz=64B 1 0.000 s 212.00 ns/op 4716980.43 op/s 592 | aco_resume/co_amount=1/copy_stack_size=0B 20000000 0.415 s 20.75 ns/op 48185455.26 op/s 593 | -> acosw 40000000 0.415 s 10.38 ns/op 96370910.51 op/s 594 | aco_destroy 1 0.000 s 174.00 ns/op 5747123.38 op/s 595 | 596 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.63 ns/op 15237386.02 op/s 597 | aco_resume/co_amount=2000000/copy_stack_size=8B 20000000 0.664 s 33.20 ns/op 30119155.82 op/s 598 | aco_destroy 2000000 0.065 s 32.67 ns/op 30604542.55 op/s 599 | 600 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.33 ns/op 15305975.29 op/s 601 | aco_resume/co_amount=2000000/copy_stack_size=24B 20000000 0.675 s 33.74 ns/op 29638360.61 op/s 602 | aco_destroy 2000000 0.067 s 33.31 ns/op 30016633.42 op/s 603 | 604 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.61 ns/op 15241767.78 op/s 605 | aco_resume/co_amount=2000000/copy_stack_size=40B 20000000 0.678 s 33.88 ns/op 29518648.08 op/s 606 | aco_destroy 2000000 0.079 s 39.74 ns/op 25163018.30 op/s 607 | 608 | aco_create/init_save_stk_sz=64B 2000000 0.221 s 110.73 ns/op 9030660.30 op/s 609 | aco_resume/co_amount=2000000/copy_stack_size=56B 20000000 0.684 s 34.18 ns/op 29253416.65 op/s 610 | aco_destroy 2000000 0.067 s 33.40 ns/op 29938840.64 op/s 611 | 612 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.60 ns/op 15244077.65 op/s 613 | aco_resume/co_amount=2000000/copy_stack_size=120B 20000000 0.769 s 38.43 ns/op 26021228.41 op/s 614 | aco_destroy 2000000 0.087 s 43.74 ns/op 22863987.42 op/s 615 | 616 | aco_create/init_save_stk_sz=64B 10000000 1.251 s 125.08 ns/op 7994958.59 op/s 617 | aco_resume/co_amount=10000000/copy_stack_size=8B 40000000 1.327 s 33.19 ns/op 30133654.80 op/s 618 | aco_destroy 10000000 0.329 s 32.85 ns/op 30439787.32 op/s 619 | 620 | aco_create/init_save_stk_sz=64B 10000000 0.674 s 67.37 ns/op 14843796.57 op/s 621 | aco_resume/co_amount=10000000/copy_stack_size=24B 40000000 1.354 s 33.84 ns/op 29548523.05 op/s 622 | aco_destroy 10000000 0.339 s 33.90 ns/op 29494634.83 op/s 623 | 624 | aco_create/init_save_stk_sz=64B 10000000 0.672 s 67.19 ns/op 14882262.88 op/s 625 | aco_resume/co_amount=10000000/copy_stack_size=40B 40000000 1.361 s 34.02 ns/op 29393520.19 op/s 626 | aco_destroy 10000000 0.338 s 33.77 ns/op 29609577.59 op/s 627 | 628 | aco_create/init_save_stk_sz=64B 10000000 0.673 s 67.31 ns/op 14857716.02 op/s 629 | aco_resume/co_amount=10000000/copy_stack_size=56B 40000000 1.371 s 34.27 ns/op 29181897.80 op/s 630 | aco_destroy 10000000 0.339 s 33.85 ns/op 29540633.63 op/s 631 | 632 | aco_create/init_save_stk_sz=64B 10000000 0.672 s 67.24 ns/op 14873017.10 op/s 633 | aco_resume/co_amount=10000000/copy_stack_size=120B 40000000 1.548 s 38.71 ns/op 25835542.17 op/s 634 | aco_destroy 10000000 0.446 s 44.61 ns/op 22415961.64 op/s 635 | 636 | aco_create/init_save_stk_sz=64B 2000000 0.132 s 66.01 ns/op 15148290.52 op/s 637 | aco_resume/co_amount=2000000/copy_stack_size=136B 20000000 0.944 s 47.22 ns/op 21177946.19 op/s 638 | aco_destroy 2000000 0.124 s 61.99 ns/op 16132721.97 op/s 639 | 640 | aco_create/init_save_stk_sz=64B 2000000 0.133 s 66.36 ns/op 15068860.85 op/s 641 | aco_resume/co_amount=2000000/copy_stack_size=136B 20000000 0.944 s 47.20 ns/op 21187541.38 op/s 642 | aco_destroy 2000000 0.124 s 62.21 ns/op 16073322.25 op/s 643 | 644 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.62 ns/op 15238955.93 op/s 645 | aco_resume/co_amount=2000000/copy_stack_size=152B 20000000 1.072 s 53.61 ns/op 18652789.74 op/s 646 | aco_destroy 2000000 0.121 s 60.42 ns/op 16551368.04 op/s 647 | 648 | aco_create/init_save_stk_sz=64B 2000000 0.132 s 66.08 ns/op 15132547.65 op/s 649 | aco_resume/co_amount=2000000/copy_stack_size=232B 20000000 1.198 s 59.88 ns/op 16699389.91 op/s 650 | aco_destroy 2000000 0.121 s 60.71 ns/op 16471465.52 op/s 651 | 652 | aco_create/init_save_stk_sz=64B 2000000 0.133 s 66.50 ns/op 15036985.95 op/s 653 | aco_resume/co_amount=2000000/copy_stack_size=488B 20000000 1.853 s 92.63 ns/op 10796126.04 op/s 654 | aco_destroy 2000000 0.146 s 72.87 ns/op 13723559.36 op/s 655 | 656 | aco_create/init_save_stk_sz=64B 2000000 0.132 s 66.14 ns/op 15118324.13 op/s 657 | aco_resume/co_amount=2000000/copy_stack_size=488B 20000000 1.855 s 92.75 ns/op 10781572.22 op/s 658 | aco_destroy 2000000 0.152 s 75.79 ns/op 13194130.51 op/s 659 | 660 | aco_create/init_save_stk_sz=64B 1000000 0.067 s 66.97 ns/op 14931921.56 op/s 661 | aco_resume/co_amount=1000000/copy_stack_size=1000B 20000000 4.218 s 210.90 ns/op 4741536.66 op/s 662 | aco_destroy 1000000 0.093 s 93.16 ns/op 10734691.98 op/s 663 | 664 | aco_create/init_save_stk_sz=64B 1000000 0.066 s 66.49 ns/op 15039274.31 op/s 665 | aco_resume/co_amount=1000000/copy_stack_size=1000B 20000000 4.216 s 210.81 ns/op 4743543.53 op/s 666 | aco_destroy 1000000 0.094 s 93.97 ns/op 10641539.58 op/s 667 | 668 | aco_create/init_save_stk_sz=64B 100000 0.007 s 70.95 ns/op 14094724.73 op/s 669 | aco_resume/co_amount=100000/copy_stack_size=1000B 20000000 4.190 s 209.52 ns/op 4772746.50 op/s 670 | aco_destroy 100000 0.010 s 100.99 ns/op 9902271.51 op/s 671 | 672 | aco_create/init_save_stk_sz=64B 100000 0.007 s 66.49 ns/op 15040038.84 op/s 673 | aco_resume/co_amount=100000/copy_stack_size=2024B 20000000 7.028 s 351.38 ns/op 2845942.55 op/s 674 | aco_destroy 100000 0.016 s 159.15 ns/op 6283444.42 op/s 675 | 676 | aco_create/init_save_stk_sz=64B 100000 0.007 s 65.73 ns/op 15214482.36 op/s 677 | aco_resume/co_amount=100000/copy_stack_size=4072B 20000000 11.879 s 593.95 ns/op 1683636.60 op/s 678 | aco_destroy 100000 0.018 s 184.23 ns/op 5428119.00 op/s 679 | 680 | aco_create/init_save_stk_sz=64B 100000 0.006 s 63.41 ns/op 15771072.16 op/s 681 | aco_resume/co_amount=100000/copy_stack_size=7992B 20000000 21.808 s 1090.42 ns/op 917081.56 op/s 682 | aco_destroy 100000 0.038 s 376.78 ns/op 2654073.13 op/s 683 | ``` 684 | 685 | # Proof of Correctness 686 | 687 | It is essential to be very familiar with the standard of [Sys V ABI of intel386 and x86-64](https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI) before you start to implement or prove a coroutine library. 688 | 689 | The proof below has no direct description about the IP (instruction pointer), SP (stack pointer) and the saving/restoring between the private save stack and the share stack, since these things are pretty trivial and easy to understand when they are compared with the ABI constraints stuff. 690 | 691 | ## Running Model 692 | 693 | In the OS thread, the main coroutine `main_co` is the coroutine who should be created and started to execute first, before all the other non-main coroutines do. 694 | 695 | The next diagram is a simple example of the context switching between main_co and co. 696 | 697 | In this proof, we just assume that we are under Sys V ABI of intel386 since there is no fundamental differences between the Sys V ABI of intel386 and x86-64. We also assume that none of the code would change the control words of FPU and MXCSR. 698 | 699 | ![proof_0](img/proof_0.png) 700 | 701 | The next diagram is actually a symmetric coroutine's running model which has an unlimited number of non-main co-s and one main co. This is fine because the asymmetric coroutine is just a special case of the symmetric coroutine. To prove the correctness of the symmetric coroutine is a little more challenging than of the asymmetric coroutine and thus more fun it would become. (libaco only implemented the API of asymmetric coroutine currently because the semantic meaning of the asymmetric coroutine API is far more easy to understand and to use than the symmetric coroutine does.) 702 | 703 | ![proof_1](img/proof_1.png) 704 | 705 | Since the main co is the 1st coroutine starts to run, the 1st context switching in this OS thread must be in the form of `acosw(main_co, co)` where the 2nd argument `co` is a non-main co. 706 | 707 | ## Mathematical Induction 708 | 709 | It is easy to prove that there only exists two kinds of state transfer in the above diagram: 710 | 711 | * yielded state co → init state co 712 | * yielded state co → yielded state co 713 | 714 | To prove the correctness of `void* acosw(aco_t* from_co, aco_t* to_co)` implementation is equivalent to prove all the co constantly comply to the constraints of Sys V ABI before and after the call of `acosw`. We assume that the other part of binary code (except `acosw`) in the co had already comply to the ABI (they are normally generated by the compiler correctly). 715 | 716 | Here is a summary of the registers' constraints in the Function Calling Convention of Intel386 Sys V ABI: 717 | 718 | ``` 719 | Registers' usage in the calling convention of the Intel386 System V ABI: 720 | caller saved (scratch) registers: 721 | C1.0: EAX 722 | At the entry of a function call: 723 | could be any value 724 | After the return of `acosw`: 725 | hold the return value for `acosw` 726 | C1.1: ECX,EDX 727 | At the entry of a function call: 728 | could be any value 729 | After the return of `acosw`: 730 | could be any value 731 | C1.2: Arithmetic flags, x87 and mxcsr flags 732 | At the entry of a function call: 733 | could be any value 734 | After the return of `acosw`: 735 | could be any value 736 | C1.3: ST(0-7) 737 | At the entry of a function call: 738 | the stack of FPU must be empty 739 | After the return of `acosw`: 740 | the stack of FPU must be empty 741 | C1.4: Direction flag 742 | At the entry of a function call: 743 | DF must be 0 744 | After the return of `acosw`: 745 | DF must be 0 746 | C1.5: others: xmm*,ymm*,mm*,k*... 747 | At the entry of a function call: 748 | could be any value 749 | After the return of `acosw`: 750 | could be any value 751 | callee saved registers: 752 | C2.0: EBX,ESI,EDI,EBP 753 | At the entry of a function call: 754 | could be any value 755 | After the return of `acosw`: 756 | must be the same as it is at the entry of `acosw` 757 | C2.1: ESP 758 | At the entry of a function call: 759 | must be a valid stack pointer 760 | (alignment of 16 bytes, retaddr and etc...) 761 | After the return of `acosw`: 762 | must be the same as it is before the call of `acosw` 763 | C2.2: control word of FPU & mxcsr 764 | At the entry of a function call: 765 | could be any configuration 766 | After the return of `acosw`: 767 | must be the same as it is before the call of `acosw` 768 | (unless the caller of `acosw` assume `acosw` may \ 769 | change the control words of FPU or MXCSR on purpose \ 770 | like `fesetenv`) 771 | ``` 772 | 773 | (For Intel386, the register usage is defined in the "P13 - Table 2.3: Register Usage" of [Sys V ABI Intel386 V1.1](https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI), and for AMD64 is in "P23 - Figure 3.4: Register Usage" of [Sys V ABI AMD64 V1.0](https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI).) 774 | 775 | **Proof:** 776 | 777 | 1. yielded state co -> init state co: 778 | 779 | ![proof_2](img/proof_2.png) 780 | 781 | The diagram above is for the 1st case: "yielded state co -> init state co". 782 | 783 | Constraints: C 1.0, 1.1, 1.2, 1.5 (*satisfied* ✓ ) 784 | 785 | The scratch registers below can hold any value at the entry of a function: 786 | 787 | ``` 788 | EAX,ECX,EDX 789 | XMM*,YMM*,MM*,K*... 790 | status bits of EFLAGS,FPU,MXCSR 791 | ``` 792 | 793 | Constraints: C 1.3, 1.4 (*satisfied* ✓ ) 794 | 795 | Since the stack of FPU must already be empty and the DF must already be 0 before `acosw(co, to_co)` was called (the binary code of co is already complied to the ABI), the constraint 1.3 and 1.4 is complied by `acosw`. 796 | 797 | Constraints: C 2.0, 2.1, 2.2 (*satisfied* ✓ ) 798 | 799 | C 2.0 & 2.1 is already satisfied. Since we already assumed that nobody will change the control words of FPU and MXCSR, C 2.2 is satisfied too. 800 | 801 | 2. yielded state co -> yielded state co: 802 | 803 | ![proof_3](img/proof_3.png) 804 | 805 | The diagram above is for the 2nd case: yielded state co -> yielded state co. 806 | 807 | Constraints: C 1.0 (*satisfied* ✓ ) 808 | 809 | EAX already holding the return value when `acosw` returns back to to_co (resume). 810 | 811 | Constraints: C 1.1, 1.2, 1.5 (*satisfied* ✓ ) 812 | 813 | The scratch registers below can hold any value at the entry of a function and after the return of `acosw`: 814 | 815 | ``` 816 | ECX,EDX 817 | XMM*,YMM*,MM*,K*... 818 | status bits of EFLAGS,FPU,MXCSR 819 | ``` 820 | 821 | Constraints: C 1.3, 1.4 (*satisfied* ✓ ) 822 | 823 | Since the stack of FPU must already be empty and the DF must already be 0 before `acosw(co, to_co)` was called (the binary code of co is already complied to the ABI), the constraint 1.3 and 1.4 is complied by `acosw`. 824 | 825 | Constraints: C 2.0, 2.1, 2.2 (*satisfied* ✓ ) 826 | 827 | C 2.0 & 2.1 is satisfied because there is saving & restoring of the callee saved registers when `acosw` been called/returned. Since we already assumed that nobody will change the control words of FPU and MXCSR, C 2.2 is satisfied too. 828 | 829 | 3. Mathematical induction: 830 | 831 | The 1st `acosw` in the thread must be the 1st case: yielded state co -> init state co, and all the next `acosw` must be one of the 2 case above. Sequentially, we could prove that "all the co constantly comply to the constraints of Sys V ABI before and after the call of `acosw`". Thus, the proof is finished. 832 | 833 | ## Miscellaneous 834 | 835 | ### Red Zone 836 | 837 | There is a new thing called [red zone](https://en.wikipedia.org/wiki/Red_zone_(computing)) in System V ABI x86-64: 838 | 839 | > The 128-byte area beyond the location pointed to by %rsp is considered to be reserved and shall not be modified by signal or interrupt handlers. Therefore, functions may use this area for temporary data that is not needed across function calls. In particular, leaf functions may use this area for their entire stack frame, rather than adjusting the stack pointer in the prologue and epilogue. This area is known as the red zone. 840 | 841 | Since the red zone is "not preserved by the callee", we just do not care about it at all in the context switching between coroutines (because the `acosw` is a leaf function). 842 | 843 | ### Stack Pointer 844 | 845 | > The end of the input argument area shall be aligned on a 16 (32 or 64, if \_\_m256 or \_\_m512 is passed on stack) byte boundary. In other words, the value (%esp + 4) is always a multiple of 16 (32 or 64) when control is transferred to the function entry point. The stack pointer, %esp, always points to the end of the latest allocated stack frame. 846 | > 847 | > — Intel386-psABI-1.1:2.2.2 The Stack Frame 848 | 849 | > The stack pointer, %rsp, always points to the end of the latest allocated stack frame. 850 | > 851 | > — Sys V ABI AMD64 Version 1.0:3.2.2 The Stack Frame 852 | 853 | Here is a [bug example](https://github.com/Tencent/libco/blob/v1.0/coctx_swap.S#L27) in Tencent's libco. The ABI states that the `(E|R)SP` should always point to the end of the latest allocated stack frame. But in file [coctx_swap.S](https://github.com/Tencent/libco/blob/v1.0/coctx_swap.S#L27) of libco, the `(E|R)SP` had been used to address the memory on the heap. 854 | 855 | >**By default, the signal handler is invoked on the normal process stack.** It is possible to arrange that the signal handler uses an alternate stack; see sigalstack(2) for a discussion of how to do this and when it might be useful. 856 | > 857 | >— man 7 signal : Signal dispositions 858 | 859 | Terrible things may happen if the `(E|R)SP` is pointing to the data structure on the heap when signal comes. (Using the `breakpoint` and `signal` commands of gdb could produce such bug conveniently. Although by using `sigalstack` to change the default signal stack could alleviate the problem, but still, that kind of usage of `(E|R)SP` still violates the ABI.) 860 | 861 | # Best Practice 862 | 863 | In summary, if you want to gain the ultra performance of libaco, just keep the stack usage of the non-standalone non-main co at the point of calling `aco_yield` as small as possible. And be very careful if you want to pass the address of a local variable from one co to another co since the local variable is usually on the **share** stack. Allocating this kind of variables from the heap is always the wiser choice. 864 | 865 | In detail, there are 5 tips: 866 | 867 | ``` 868 | co_fp 869 | / \ 870 | / \ 871 | f1 f2 872 | / \ / \ 873 | / \ f4 \ 874 | yield f3 f5 875 | ``` 876 | 877 | 1. The stack usage of main co has no direct influence to the performance of context switching between coroutines (since it has a standalone execution stack); 878 | 2. The stack usage of standalone non-main co has no direct influence to the performance of context switching between coroutines. But a huge amount of standalone non-main co would cost too much of virtual memory (due to the standalone stack), so it is not recommended to create huge amount of standalone non-main co in one thread; 879 | 3. The stack usage of non-standalone (share stack with other coroutines) non-main co when it is been yielded (i.e. call `aco_yield` to yield back to main co) has a big impact to the performance of context switching between coroutines, as already indicated by the benchmark results. In the diagram above, the stack usage of function f2, f3, f4 and f5 has no direct influence over the context switching performance since there are no `aco_yield` when they are executing, whereas the stack usage of co_fp and f1 dominates the value of `co->save_stack.max_cpsz` and has a big influence over the context switching performance. 880 | 881 | The key to keeping the stack usage of a function as low as possible is to allocate the local variables (especially the big ones) on the heap and manage their lifecycle manually instead of allocating them on the stack by default. The `-fstack-usage` option of gcc is very helpful about this. 882 | 883 | ```c 884 | int* gl_ptr; 885 | 886 | void inc_p(int* p){ (*p)++; } 887 | 888 | void co_fp0() { 889 | int ct = 0; 890 | gl_ptr = &ct; // line 7 891 | aco_yield(); 892 | check(ct); 893 | int* ptr = &ct; 894 | inc_p(ptr); // line 11 895 | aco_exit(); 896 | } 897 | 898 | void co_fp1() { 899 | do_sth(gl_ptr); // line 16 900 | aco_exit(); 901 | } 902 | ``` 903 | 904 | 4. In the above code snippet, we assume that co_fp0 & co_fp1 shares the same share stack (they are both non-main co) and the running sequence of them is "co_fp0 -> co_fp1 -> co_fp0". Since they are sharing the same stack, the address holding in `gl_ptr` in co_fp1 (line 16) has totally different semantics with the `gl_ptr` in line 7 of co_fp0, and that kind of code would probably corrupt the execution stack of co_fp1. But the line 11 is fine because variable `ct` and function `inc_p` are in the same coroutine context. Allocating that kind of variables (need to share with other coroutines) on the heap would simply solve such problems: 905 | 906 | ```c 907 | int* gl_ptr; 908 | 909 | void inc_p(int* p){ (*p)++; } 910 | 911 | void co_fp0() { 912 | int* ct_ptr = malloc(sizeof(int)); 913 | assert(ct_ptr != NULL); 914 | *ct_ptr = 0; 915 | gl_ptr = ct_ptr; 916 | aco_yield(); 917 | check(*ct_ptr); 918 | int* ptr = ct_ptr; 919 | inc_p(ptr); 920 | free(ct_ptr); 921 | gl_ptr = NULL; 922 | aco_exit(); 923 | } 924 | 925 | void co_fp1() { 926 | do_sth(gl_ptr); 927 | aco_exit(); 928 | } 929 | ``` 930 | 931 | # TODO 932 | 933 | New ideas are welcome! 934 | 935 | * Add a macro like `aco_mem_new` which is the combination of something like `p = malloc(sz); assertalloc_ptr(p)`. 936 | 937 | * Add a new API `aco_reset` to support the reusability of the coroutine objects. 938 | 939 | * Support other platforms (especially arm & arm64). 940 | 941 | # CHANGES 942 | 943 | ``` 944 | v1.2.4 Sun Jul 29 2018 945 | Changed `asm` to `__asm__` in aco.h to support compiler's `--std=c99` 946 | flag (Issue #16, proposed by Theo Schlossnagle @postwait). 947 | v1.2.3 Thu Jul 26 2018 948 | Added support for MacOS; 949 | Added support for shared library build of libaco (PR #10, proposed 950 | by Theo Schlossnagle @postwait); 951 | Added C macro ACO_REG_IDX_BP in aco.h (PR #15, proposed by 952 | Theo Schlossnagle @postwait); 953 | Added global C config macro ACO_USE_ASAN which could enable the 954 | friendly support of address sanitizer (both gcc and clang) (PR #14, 955 | proposed by Theo Schlossnagle @postwait); 956 | Added README_zh.md. 957 | v1.2.2 Mon Jul 9 2018 958 | Added a new option `-o ` to make.sh; 959 | Correction about the value of macro ACO_VERSION_PATCH (issue #1 960 | kindly reported by Markus Elfring @elfring); 961 | Adjusted some noncompliant naming of identifiers (double underscore 962 | `__`) (issue #1, kindly proposed by Markus Elfring @elfring); 963 | Supported the header file including by C++ (issue #4, kindly 964 | proposed by Markus Elfring @elfring). 965 | v1.2.1 Sat Jul 7 2018 966 | Fixed some noncompliant include guards in two C header files ( 967 | issue #1 kindly reported by Markus Elfring @elfring); 968 | Removed the "pure" word from "pure C" statement since it is 969 | containing assembly codes (kindly reported by Peter Cawley 970 | @corsix); 971 | Many updates in the README.md document. 972 | v1.2.0 Tue Jul 3 2018 973 | Provided another header named `aco_assert_override.h` so user 974 | could choose to override the default `assert` or not; 975 | Added some macros about the version information. 976 | v1.1 Mon Jul 2 2018 977 | Removed the requirement on the GCC version (>= 5.0). 978 | v1.0 Sun Jul 1 2018 979 | The v1.0 release of libaco, cheers 🎉🎉🎉 980 | ``` 981 | 982 | # Donation 983 | 984 | I'm a full-time open source developer. Any amount of the donations will be highly appreciated and could bring me great encouragement. 985 | 986 | * Paypal 987 | 988 | [paypal.me link](https://www.paypal.me/00hnes) 989 | 990 | * Alipay (支付(宝|寶)) 991 | 992 | ![qr_alipay](img/qr_alipay.png) 993 | 994 | * Wechat (微信) 995 | 996 | ![qr_wechat](img/qr_wechat.png) 997 | 998 | # Thanks 999 | 1000 | The logo of libaco is generously donated by Peter Bech([Peteck](https://github.com/Peteck)). The logo is licensed under [CC BY-ND 4.0](https://creativecommons.org/licenses/by-nd/4.0/). The website of [libaco.org](https://libaco.org) is also kindly contributed by Peter Bech([Peteck](https://github.com/Peteck)). 1001 | 1002 | # Copyright and License 1003 | 1004 | Copyright (C) 2018, by Sen Han [<00hnes@gmail.com>](mailto:00hnes@gmail.com). 1005 | 1006 | Under the Apache License, Version 2.0. 1007 | 1008 | See the [LICENSE](LICENSE) file for details. 1009 | -------------------------------------------------------------------------------- /README_zh.md: -------------------------------------------------------------------------------- 1 | # Name 2 | 3 | libaco - 一个极速的、轻量级、C语言非对称协程库。 4 | 5 | 这个项目的代号是Arkenstone 💎 6 | 7 | Asymmetric COroutine 和 Arkenstone 是 aco 的名称来源。 8 | 9 | 当前支持Sys V ABI Intel386和Sys V ABI x86-64。 10 | 11 | 下面是这个项目的简要介绍: 12 | 13 | - 除了一个生产级别的C协程库实现,还包含了一个详细的文档描述了如何实现一个 *最快且正确* 的协程库以及其严格的数学证明; 14 | - 核心实现不超过 *700* 行代码,但包含了一个协程库应该有的全部功能; 15 | - 在AWS c5d.large机器上的性能测试结果指出,一次协程间上下文切换仅耗时 *10 ns* (独立执行栈); 16 | - 用户在创建新的协程时,可以选择其拥有一个独占的执行栈,或者是与其它任意数量的协程一起共享一个执行栈; 17 | - 拥有极致的内存使用效率:一千万个协程并发执行仅消耗2.8GB的物理内存(tcmalloc,每一个协程使用120B的复制栈)。 18 | 19 | 上文中的"最快"指的是在满足Sys V ABI Intel386或者AMD64约束下最快的上下文切换实现。 20 | 21 | [![Build Status Travis](https://img.shields.io/travis/hnes/libaco.svg?style=flat-square&&branch=master)](https://travis-ci.org/hnes/libaco) 22 | [![Releases](https://img.shields.io/github/release/hnes/libaco/all.svg?style=flat-square)](https://github.com/hnes/libaco/releases) 23 | [![LICENSE](https://img.shields.io/github/license/hnes/libaco.svg?style=flat-square)](https://github.com/hnes/libaco/blob/master/LICENSE) 24 | [![Tweet](https://img.shields.io/twitter/url/http/shields.io.svg?style=social)](https://twitter.com/intent/tweet?text=libaco+-+A+blazing+fast+and+lightweight+C+asymmetric+coroutine+library&url=https://github.com/hnes/libaco&via=00hnes) 25 | 26 | 热烈欢迎Issues和PRs 🎉🎉🎉 27 | 28 | 注意: 请使用Release而非Master分支进行最终的二进制程序构建。 29 | 30 | [github-release]: https://github.com/hnes/libaco/releases 31 | 32 | # Table of Contents 33 | 34 | * [Name](#name) 35 | * [Table of Contents](#table-of-contents) 36 | * [Status](#status) 37 | * [Synopsis](#synopsis) 38 | * [Description](#description) 39 | * [Build and Test](#build-and-test) 40 | * [CFLAGS](#cflags) 41 | * [Build](#build) 42 | * [Test](#test) 43 | * [Tutorials](#tutorials) 44 | * [API](#api) 45 | * [aco_thread_init](#aco_thread_init) 46 | * [aco_share_stack_new](#aco_share_stack_new) 47 | * [aco_share_stack_new2](#aco_share_stack_new2) 48 | * [aco_share_stack_destroy](#aco_share_stack_destroy) 49 | * [aco_create](#aco_create) 50 | * [aco_resume](#aco_resume) 51 | * [aco_yield](#aco_yield) 52 | * [aco_get_co](#aco_get_co) 53 | * [aco_get_arg](#aco_get_arg) 54 | * [aco_exit](#aco_exit) 55 | * [aco_destroy](#aco_destroy) 56 | * [MACROS](#macros) 57 | * [Benchmark](#benchmark) 58 | * [Proof of Correctness](#proof-of-correctness) 59 | * [Running Model](#running-model) 60 | * [Mathematical Induction](#mathematical-induction) 61 | * [Miscellaneous](#miscellaneous) 62 | * [Red Zone](#red-zone) 63 | * [Stack Pointer](#stack-pointer) 64 | * [Best Practice](#best-practice) 65 | * [TODO](#todo) 66 | * [CHANGES](#changes) 67 | * [Donation](#donation) 68 | * [Copyright and License](#copyright-and-license) 69 | 70 | # Status 71 | 72 | 可以用于生产环境。 73 | 74 | # Synopsis 75 | 76 | ```c 77 | #include "aco.h" 78 | #include 79 | 80 | // this header would override the default C `assert`; 81 | // you may refer the "API : MACROS" part for more details. 82 | #include "aco_assert_override.h" 83 | 84 | void foo(int ct) { 85 | printf("co: %p: yield to main_co: %d\n", aco_get_co(), *((int*)(aco_get_arg()))); 86 | aco_yield(); 87 | *((int*)(aco_get_arg())) = ct + 1; 88 | } 89 | 90 | void co_fp0() { 91 | printf("co: %p: entry: %d\n", aco_get_co(), *((int*)(aco_get_arg()))); 92 | int ct = 0; 93 | while(ct < 6){ 94 | foo(ct); 95 | ct++; 96 | } 97 | printf("co: %p: exit to main_co: %d\n", aco_get_co(), *((int*)(aco_get_arg()))); 98 | aco_exit(); 99 | } 100 | 101 | int main() { 102 | aco_thread_init(NULL); 103 | 104 | aco_t* main_co = aco_create(NULL, NULL, 0, NULL, NULL); 105 | aco_share_stack_t* sstk = aco_share_stack_new(0); 106 | 107 | int co_ct_arg_point_to_me = 0; 108 | aco_t* co = aco_create(main_co, sstk, 0, co_fp0, &co_ct_arg_point_to_me); 109 | 110 | int ct = 0; 111 | while(ct < 6){ 112 | assert(co->is_end == 0); 113 | printf("main_co: yield to co: %p: %d\n", co, ct); 114 | aco_resume(co); 115 | assert(co_ct_arg_point_to_me == ct); 116 | ct++; 117 | } 118 | printf("main_co: yield to co: %p: %d\n", co, ct); 119 | aco_resume(co); 120 | assert(co_ct_arg_point_to_me == ct); 121 | assert(co->is_end); 122 | 123 | printf("main_co: destroy and exit\n"); 124 | aco_destroy(co); 125 | co = NULL; 126 | aco_share_stack_destroy(sstk); 127 | sstk = NULL; 128 | aco_destroy(main_co); 129 | main_co = NULL; 130 | 131 | return 0; 132 | } 133 | ``` 134 | ```bash 135 | # default build 136 | $ gcc -g -O2 acosw.S aco.c test_aco_synopsis.c -o test_aco_synopsis 137 | $ ./test_aco_synopsis 138 | main_co: yield to co: 0x1887120: 0 139 | co: 0x1887120: entry: 0 140 | co: 0x1887120: yield to main_co: 0 141 | main_co: yield to co: 0x1887120: 1 142 | co: 0x1887120: yield to main_co: 1 143 | main_co: yield to co: 0x1887120: 2 144 | co: 0x1887120: yield to main_co: 2 145 | main_co: yield to co: 0x1887120: 3 146 | co: 0x1887120: yield to main_co: 3 147 | main_co: yield to co: 0x1887120: 4 148 | co: 0x1887120: yield to main_co: 4 149 | main_co: yield to co: 0x1887120: 5 150 | co: 0x1887120: yield to main_co: 5 151 | main_co: yield to co: 0x1887120: 6 152 | co: 0x1887120: exit to main_co: 6 153 | main_co: destroy and exit 154 | # i386 155 | $ gcc -g -m32 -O2 acosw.S aco.c test_aco_synopsis.c -o test_aco_synopsis 156 | # share fpu and mxcsr env 157 | $ gcc -g -D ACO_CONFIG_SHARE_FPU_MXCSR_ENV -O2 acosw.S aco.c test_aco_synopsis.c -o test_aco_synopsis 158 | # with valgrind friendly support 159 | $ gcc -g -D ACO_USE_VALGRIND -O2 acosw.S aco.c test_aco_synopsis.c -o test_aco_synopsis 160 | $ valgrind --leak-check=full --tool=memcheck ./test_aco_synopsis 161 | ``` 162 | 163 | 关于构建的更多信息请查阅"[Build and Test](#build-and-test)"部分。 164 | 165 | # Description 166 | 167 | ![thread_model_0](img/thread_model_0.png) 168 | 169 | 一个用户空间的执行状态(一般为OS线程)有四个基本要素:`{cpu_registers, code, heap, stack}`。 170 | 171 | 由于二进制程序的代码执行位置信息由`({E|R})?IP`寄存器决定,且从堆中分配出的内存地址信息一般会间接或者直接的保存在运行栈中,所以,我们可以将这个四个元素最终化简为`{cpu_registers, stack}`。 172 | 173 | ![thread_model_1](img/thread_model_1.png) 174 | 175 | 我们定义`main co`(主协程)为独占使用当前运行线程默认执行栈的协程。由于main co是这个执行栈的唯一用户,所以,在与main co相关的协程上下文切换中,我们仅需要对main co的某些必须的寄存器进行保存和恢复即可。 176 | 177 | 接着,我们定义`non-main co`(非主协程)为执行栈不是当前运行线程默认执行栈(而是它自己创建的,且有可能会与其他non-main co一起共享这个执行栈)的协程。所以,`non-main co`会有一个私有的保存栈,当它被切换进来(或者切换出去)时,会使用它的私有保存栈进行执行栈的恢复(或者保存),因为当它被切换进来(或者切换出去)时,之前的(或者之后的)运行协程可能已经使用了(或者可能将会使用)这个执行栈(在libaco实现中,私有保存栈的保存策略是惰性的最优方案,具体请参见aco_resume的源码实现细节)。 178 | 179 | ![thread_model_2](img/thread_model_2.png) 180 | 181 | 这是一个non-main co的特殊情况,在libaco中我们称之为`standalone non-main co`(独立非主协程),即独占一个执行栈的非主协程。在与standalone non-main co相关的上下文切换中,对其只需要进行一些必须寄存器的保存或恢复即可(因为它的执行栈是独占的,在它被切换出的时间里,它的执行栈的状态是不变的)。 182 | 183 | ![thread_model_3](img/thread_model_3.png) 184 | 185 | 最终,我们得到了libaco的全局鸟瞰图。 186 | 187 | 如果你想要实现自己的协程库或者更加深入的了解libaco的实现,"[Proof of Correctness](#proof-of-correctness)" 部分将会非常有用。 188 | 189 | 接下来,可以阅读[教程](#tutorials)或者性能测试部分。[性能测试的报告](#benchmark)令人印象深刻同时发人深省。 190 | 191 | # Build and Test 192 | 193 | ## CFLAGS 194 | 195 | * `-m32` 196 | 197 | 编译器选项`-m32`能够帮助用户在AMD64平台上构建libaco的i386二进制程序。 198 | 199 | * C macro: `ACO_CONFIG_SHARE_FPU_MXCSR_ENV` 200 | 201 | 如果用户的程序在运行期间不会更改FPU和MXCSR的控制字,那么可以选择定义全局C宏 `ACO_CONFIG_SHARE_FPU_MXCSR_ENV` 以轻微地加快协程间上下文切换的速度。如果该宏没有被定义,每一个协程将会维护一份属于自己的独立FPU和MXCSR控制字环境。由于更改FPU或者MXCSR控制字的应用代码是非常少见的,用户可以选择总是全局定义该宏,但是如果并不能保证这个约束,用户应该选择不定义该宏。 202 | 203 | * C macro:`ACO_USE_VALGRIND` 204 | 205 | 如果用户想要使用valgrind的memcheck工具对libaco的应用程序进行测试,则需要在构建时定义全局C宏 `ACO_USE_VALGRIND` 以使能libaco对valgrind memcheck时的支持。 由于性能的原因,在最终的生产二进制构建中并不推荐使用此宏。在全局定义了此宏的libaco应用构建之前,用户需要安转valgrind的C头文件(以Centos为例,这个开发包的名称为"valgrind-devel")。valgrind的memcheck现在只支持拥有独立运行栈的协程,memcheck在对使用共享栈的协程进行检测时会输出很多的误报。更多的信息可以查看"[test_aco_tutorial_6.c](test_aco_tutorial_6.c)"。 206 | 207 | ## Build 208 | 209 | ```bash 210 | $ mkdir output 211 | $ bash make.sh 212 | ``` 213 | 214 | make.sh脚本中有一些更加详细的构建参数: 215 | 216 | ```bash 217 | $bash make.sh -h 218 | Usage: make.sh [-o ] [-h] 219 | 220 | Example: 221 | # default build 222 | bash make.sh 223 | # build without the i386 binary output 224 | bash make.sh -o no-m32 225 | # build without the valgrind supported binary output 226 | bash make.sh -o no-valgrind 227 | # build without the valgrind supported and i386 binary output 228 | bash make.sh -o no-valgrind -o no-m32 229 | ``` 230 | 231 | 简而言之,如果系统中没有valgrind的C头文件,可以选择使用参数 `-o no-valgrind `进行测试集的构建;如果系统为AMD64平台并且没有安装32位的C编译器开发工具链,可以选择使用参数 `-o no-m32` 进行测试集的构建。 232 | 233 | ## Test 234 | 235 | ```bash 236 | $ cd output 237 | $ bash ../test.sh 238 | ``` 239 | 240 | # Tutorials 241 | 242 | 文件`test_aco_tutorial_0.c`中包含了libaco的基本使用示例。在这个示例中,只包含了一个 main co 和一个 standalone non-main co,另外,代码中的注释也很有用。 243 | 244 | 文件`test_aco_tutorial_1.c`中包含了libaco协程的运行统计信息的使用示例。类型`aco_t`的定义在`aco.h`中并且清晰易懂。 245 | 246 | 在文件`test_aco_tutorial_2.c`中,包含了一个standalone non-main co和两个共享同一个执行栈的non-main co。 247 | 248 | 文件`test_aco_tutorial_3.c`展示了如何在多线程环境中使用libaco。从根本上讲,为了获得最好的协程间上下文切换性能,在设计时一个libaco的运行实例应该仅仅工作在一个固定的线程中。这样,如果你想在多线程中使用libaco,只需要分别在各个线程中像在单线程中那样使用libaco一样使用它即可。在libaco内部没有任何的线程间数据共享;在多线程场景下,用户需要自己处理好自己的数据竞争问题(就像此实例中`gl_race_aco_yield_ct`线程间共享变量做的那样)。 249 | 250 | 在libaco中,请调用API `aco_exit()`来进行终结non-main co的执行,而不要直接使用默认的C关键字`return`进行返回(否则libaco会将这种行为当做异常事件并触发默认的protector流程:输出错误信息至stderr并立即调用`abort`来终结进程的执行)。源文件`test_aco_tutorial_4.c`中示范了一个违背了此规则的协程实例。 251 | 252 | 同时,用户也可以选择定制自己想要的protector处理逻辑(比如去做一些自定义的"last words"即“遗嘱”任务)。但是无论如何,当protector被执行完毕后,当前进程一定会被`abort`。源文件`test_aco_tutorial_5.c`中描述了如何自定义protector。 253 | 254 | 源文件`test_aco_tutorial_6.c`中示范了一个简单的协程调度器的实例。 255 | 256 | # API 257 | 258 | 在阅读下面的API文档时,建议也可以同时阅读对应源码中的实现,因为源码非常的清晰易读。同时,在阅读API文档之前,推荐先阅读[教程](#tutorials)部分。 259 | 260 | 另外,在开始写libaco的应用之前,强烈建议先进行阅读[最佳实践](#best-practice)章节,此章节中除了描述如何应用libaco以让其性能发挥到极致,也描述了一些libaco编程时的注意事项。 261 | 262 | 注意:libaco的版本控制遵从[Semantic Versioning 2.0.0](https://semver.org/spec/v2.0.0.html)标准。所以,下面列出的所有API均有标准中所描述的兼容性保证(请注意,没有在下面API列表中的函数调用则没有如此的保证)。 263 | 264 | ## aco_thread_init 265 | 266 | ```c 267 | typedef void (*aco_cofuncp_t)(void); 268 | void aco_thread_init(aco_cofuncp_t last_word_co_fp); 269 | ``` 270 | 271 | 在当前运行线程中初始化libaco的执行环境。 272 | 273 | 此API会将当前FPU与MXCSR的控制字保存到一个TLS全局变量中。 274 | 275 | * 如果全局C宏 `ACO_CONFIG_SHARE_FPU_MXCSR_ENV` 没有被定义,保存的控制字接下来会被用来初始化新协程(`aco_create`)的FPU与MXCSR的控制字,然后每一个协程都将会在以后的协程上下文切换中独立维护这一份属于自己的FPU与MXCSR的控制字配置。 276 | * 如果全局C宏 `ACO_CONFIG_SHARE_FPU_MXCSR_ENV` 被定义了,所有的协程将会共享同一份FPU与MXCSR的控制字配置。如果在这方面想了解更多,请查阅 "[Build and Test](#build-and-test)" 部分。 277 | 278 | 就像在 "[Tutorials](#tutorials)" 中关于 `test_aco_tutorial_5.c` 部分所陈述的那样,API的第一个入参`last_word_co_fp`为用户自定义的 "last words" 函数指针, 如果它的值非NULL,将会取代默认的protector handler(在进程abort之前做一些 "last words" 相关的事情)。在这样的 "last word" 函数中,用户可以调用API `aco_get_co` 以获得当前协程的指针。可以通过阅读源文件`test_aco_tutorial_5.c`以获得与此相关的更多信息。 279 | 280 | ## aco_share_stack_new 281 | 282 | ```c 283 | aco_share_stack_t* aco_share_stack_new(size_t sz); 284 | ``` 285 | 286 | 等价于调用`aco_share_stack_new2(sz, 1)`。 287 | 288 | ## aco_share_stack_new2 289 | 290 | ```c 291 | aco_share_stack_t* aco_share_stack_new2(size_t sz, char guard_page_enabled); 292 | ``` 293 | 294 | 创建一个新的执行栈,入参`sz`是对要创建执行栈的大小的一个建议性字节值,入参`guard_page_enabled`决定了要创建的执行栈是否会拥有一个只读的 "guard page" (可以用来检测执行栈的溢出)。 295 | 296 | 当第一入参`sz`为0时,表示选择使用默认的大小值(2MB)。经过一系列关于内存对齐和保留的运算后,该API保证最终创建出的执行栈满足下列所有条件: 297 | 298 | * `final_valid_sz >= 4096` 299 | * `final_valid_sz >= sz` 300 | * `final_valid_sz % page_size == 0 if the guard_page_enabled != 0` 301 | 302 | 并且尽可能的接近入参`sz`的值。 303 | 304 | 当第二入参`guard_page_enabled`的值为1时,创建的执行栈将会拥有一个只读的用来检测执行栈溢出的 "guard page",为0时则不会拥有这样的 "guard page" 。 305 | 306 | 此函数总是成功地返回一个可用的执行栈。 307 | 308 | ## aco_share_stack_destroy 309 | 310 | ```c 311 | void aco_share_stack_destroy(aco_share_stack_t* sstk); 312 | ``` 313 | 314 | 销毁执行栈`sstk`。 315 | 316 | 在销毁执行栈`sstk`之前,请确定所有使用这个执行栈的协程已经全部被销毁。 317 | 318 | ## aco_create 319 | 320 | ```c 321 | typedef void (*aco_cofuncp_t)(void); 322 | aco_t* aco_create(aco_t* main_co,aco_share_stack_t* share_stack, 323 | size_t save_stack_sz, aco_cofuncp_t co_fp, void* arg); 324 | ``` 325 | 326 | 创建一个新的协程。 327 | 328 | 如果想创建一个main co,直接调用:`aco_create(NULL, NULL, 0, NULL, NULL)`。Main co是一个特殊的standalone coroutine,它的执行栈是当前线程默认的执行栈。在一个线程中,main co 是被第一个创建并且是在所有其他non-main coroutine之前就已经开始运行了的协程。 329 | 330 | 如果想使用此API创建一个non-main co: 331 | 332 | * 第一个入参`main_co`指向当前线程中的main co,创建出的non-main co以后在调用API `aco_yield`时将会将执行流程转交给入参`main_co`指向的main co,入参`main co`必然非NULL; 333 | * 第二个入参`share_stack`指向要创建的non-main co以后要使用的执行栈。`share_stack` 必然非NULL。 334 | * 第三个入参`save_stack_sz`指定要创建的non-main co的私有保存栈的初始大小,其单位为字节。值0表示使用默认的初始大小64字节。由于在以后的non-main co执行过程中,如果其私有保存栈不够大时将会进行自动地大小调整,所以一般情况下,用户不需要担心它的值。但是,如果有巨量的协程(比如一千万个)相继的进行大小调整,将会给内存分配器带来一些性能冲击,所以一个更加明智的选择是,给入参`save_stack_sz`赋予一个协程运行期间保存栈需要的最大值(即`co->save_stack.max_cpsz`的值),查阅 "[最佳实践](#best-practice)" 部分以获得与此相关的更多优化信息。 335 | * 第四个入参`co_fp`是要创建non-main co的入口函数指针。`co_fp`必然非NULL。 336 | * 最后一个入参`arg`为一个指针值,将会设置为要创建non-main co的`co->arg`的值,`co->arg`一般用来作为协程的输入参数。 337 | 338 | 此API将会永远地成功返回一个可用的协程。同时,我们定义`aco_create`返回的non-main co处于 "init" 状态。 339 | 340 | ## aco_resume 341 | 342 | ```c 343 | void aco_resume(aco_t* co); 344 | ``` 345 | 346 | 从调用者处Yield出来并开始或者继续协程`co`的执行。 347 | 348 | 此API的调用者必须是main co并且必须是`co->main_co`,入参`co`必须是non-main co。 349 | 350 | 第一次Resume协程`co`时,将会开始`co`的执行(函数指针`co->fp`指向的函数)。如果协程`co`已经Yielded,`aco_resume`将会继续`co`的执行。 351 | 352 | 在API `aco_resume`被调用之后,我们定义调用者 -- main co 的状态为 "yielded" 。 353 | 354 | ## aco_yield 355 | 356 | ```c 357 | void aco_yield(); 358 | ``` 359 | 360 | 从调用者`co`处Yield出来并且Resume `co->main_co`的执行。 361 | 362 | 此API的调用者必须为non-main co,`co->main_co`必须非NULL。 363 | 364 | 在API `aco_yield`被调用之后,我们定义`co`的状态为 "yielded" 。 365 | 366 | ## aco_get_co 367 | 368 | ```c 369 | aco_t* aco_get_co(); 370 | ``` 371 | 372 | 返回当前non-main co的指针。此API的调用者必须是non-main co。 373 | 374 | ## aco_get_arg 375 | 376 | ```c 377 | void* aco_get_arg(); 378 | ``` 379 | 380 | 等价于`(aco_get_co()->arg)`。同样的,此API的调用者必须是non-main co。 381 | 382 | ## aco_exit 383 | 384 | ```c 385 | void aco_exit(); 386 | ``` 387 | 388 | 除了与`aco_yield()`一样的功能之外,`aco_exit()`会另外设置`co->is_end`为1,以标志`co`的状态为 "end" 。 389 | 390 | ## aco_destroy 391 | 392 | ```c 393 | void aco_destroy(aco_t* co); 394 | ``` 395 | 396 | 销毁协程`co`。入参`co`必须非NULL。如果`co`是一个non-main co,此API也会同时销毁`co`的私有保存栈。 397 | 398 | ## MACROS 399 | 400 | ### Version 401 | 402 | ```c 403 | #define ACO_VERSION_MAJOR 1 404 | #define ACO_VERSION_MINOR 2 405 | #define ACO_VERSION_PATCH 2 406 | ``` 407 | 408 | 这三个关于libaco版本值的宏定义在头文件`aco.h`中,它们的值遵守标准:[Semantic Versioning 2.0.0](https://semver.org/spec/v2.0.0.html)。 409 | 410 | ### aco_assert_override.h 411 | 412 | ```c 413 | // provide the compiler with branch prediction information 414 | #define likely(x) aco_likely(x) 415 | #define unlikely(x) aco_unlikely(x) 416 | 417 | // override the default `assert` for convenience when coding 418 | #define assert(EX) aco_assert(EX) 419 | 420 | // equal to `assert((ptr) != NULL)` 421 | #define assertptr(ptr) aco_assertptr(ptr) 422 | 423 | // assert the successful return of memory allocation 424 | #define assertalloc_bool(b) aco_assertalloc_bool(b) 425 | #define assertalloc_ptr(ptr) aco_assertalloc_ptr(ptr) 426 | ``` 427 | 428 | 像源文件[test_aco_synopsis.c](test_aco_synopsis.c) 所做的那样,用户可以选择在自己的应用源码中include头文件`"aco_assert_override.h"`来替换掉C默认的 "[assert](http://man7.org/linux/man-pages/man3/assert.3.html)" 以及定义除了`assert`之外的其它五个宏(如上所示)。因为C的 "[assert](http://man7.org/linux/man-pages/man3/assert.3.html)" 也是一个宏定义,所以在include头文件 `"aco_assert_override.h"` 时,应该将它放到源文件中所有include指令中的最后一个。如果在一个源文件中,用户想要在某个源文件中使用默认的C "[assert](http://man7.org/linux/man-pages/man3/assert.3.html)",请不要在其中include这个头文件。 429 | 430 | 阅读源文件[aco_assert_override.h](aco_assert_override.h)以获得关于此的更多信息。 431 | 432 | # Benchmark 433 | 434 | Date: Sat Jun 30 UTC 2018. 435 | 436 | Machine: [c5d.large on AWS](https://aws.amazon.com/cn/blogs/aws/now-available-compute-intensive-c5-instances-for-amazon-ec2/). 437 | 438 | OS: RHEL-7.5 (Red Hat Enterprise Linux 7.5). 439 | 440 | 下面是关于性能测试部分的一个摘要描述: 441 | 442 | * 一次协程间上下文切换仅耗时 **10.29 ns** (协程拥有独立的运行栈,并且协程间共享FPU与MXCSR控制字配置的情况下); 443 | * 一次协程间上下文切换仅耗时 **10.38 ns** (协程拥有独立的运行栈,并且各协程均维护一份属于各自的FPU与MXCSR控制字配置的情况下); 444 | * 极致的内存使用率:一千万个协程并发执行仅消耗2.8GB的物理内存(tcmalloc,每一个协程使用120B的复制栈)。 445 | 446 | ``` 447 | $ LD_PRELOAD=/usr/lib64/libtcmalloc_minimal.so.4 ./test_aco_benchmark..no_valgrind.shareFPUenv 448 | 449 | +build:x86_64 450 | +build:-DACO_CONFIG_SHARE_FPU_MXCSR_ENV 451 | +build:share fpu & mxcsr control words between coroutines 452 | +build:undefined ACO_USE_VALGRIND 453 | +build:without valgrind memcheck friendly support 454 | 455 | sizeof(aco_t)=152: 456 | 457 | 458 | comment task_amount all_time_cost ns_per_op speed 459 | 460 | aco_create/init_save_stk_sz=64B 1 0.000 s 230.00 ns/op 4347824.79 op/s 461 | aco_resume/co_amount=1/copy_stack_size=0B 20000000 0.412 s 20.59 ns/op 48576413.55 op/s 462 | -> acosw 40000000 0.412 s 10.29 ns/op 97152827.10 op/s 463 | aco_destroy 1 0.000 s 650.00 ns/op 1538461.66 op/s 464 | 465 | aco_create/init_save_stk_sz=64B 1 0.000 s 200.00 ns/op 5000001.72 op/s 466 | aco_resume/co_amount=1/copy_stack_size=0B 20000000 0.412 s 20.61 ns/op 48525164.25 op/s 467 | -> acosw 40000000 0.412 s 10.30 ns/op 97050328.50 op/s 468 | aco_destroy 1 0.000 s 666.00 ns/op 1501501.49 op/s 469 | 470 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.50 ns/op 15266771.53 op/s 471 | aco_resume/co_amount=2000000/copy_stack_size=8B 20000000 0.666 s 33.29 ns/op 30043022.64 op/s 472 | aco_destroy 2000000 0.066 s 32.87 ns/op 30425152.25 op/s 473 | 474 | aco_create/init_save_stk_sz=64B 2000000 0.130 s 65.22 ns/op 15332218.24 op/s 475 | aco_resume/co_amount=2000000/copy_stack_size=24B 20000000 0.675 s 33.75 ns/op 29630018.73 op/s 476 | aco_destroy 2000000 0.067 s 33.45 ns/op 29898311.36 op/s 477 | 478 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.42 ns/op 15286937.97 op/s 479 | aco_resume/co_amount=2000000/copy_stack_size=40B 20000000 0.669 s 33.45 ns/op 29891277.59 op/s 480 | aco_destroy 2000000 0.080 s 39.87 ns/op 25084242.29 op/s 481 | 482 | aco_create/init_save_stk_sz=64B 2000000 0.224 s 111.86 ns/op 8940010.49 op/s 483 | aco_resume/co_amount=2000000/copy_stack_size=56B 20000000 0.678 s 33.88 ns/op 29515473.53 op/s 484 | aco_destroy 2000000 0.067 s 33.42 ns/op 29922412.68 op/s 485 | 486 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.74 ns/op 15211896.70 op/s 487 | aco_resume/co_amount=2000000/copy_stack_size=120B 20000000 0.769 s 38.45 ns/op 26010724.94 op/s 488 | aco_destroy 2000000 0.088 s 44.11 ns/op 22669240.25 op/s 489 | 490 | aco_create/init_save_stk_sz=64B 10000000 1.240 s 123.97 ns/op 8066542.54 op/s 491 | aco_resume/co_amount=10000000/copy_stack_size=8B 40000000 1.327 s 33.17 ns/op 30143409.55 op/s 492 | aco_destroy 10000000 0.328 s 32.82 ns/op 30467658.05 op/s 493 | 494 | aco_create/init_save_stk_sz=64B 10000000 0.659 s 65.94 ns/op 15165717.02 op/s 495 | aco_resume/co_amount=10000000/copy_stack_size=24B 40000000 1.345 s 33.63 ns/op 29737708.53 op/s 496 | aco_destroy 10000000 0.337 s 33.71 ns/op 29666697.09 op/s 497 | 498 | aco_create/init_save_stk_sz=64B 10000000 0.654 s 65.38 ns/op 15296191.35 op/s 499 | aco_resume/co_amount=10000000/copy_stack_size=40B 40000000 1.348 s 33.71 ns/op 29663992.77 op/s 500 | aco_destroy 10000000 0.336 s 33.56 ns/op 29794574.96 op/s 501 | 502 | aco_create/init_save_stk_sz=64B 10000000 0.653 s 65.29 ns/op 15316087.09 op/s 503 | aco_resume/co_amount=10000000/copy_stack_size=56B 40000000 1.384 s 34.60 ns/op 28902221.24 op/s 504 | aco_destroy 10000000 0.337 s 33.73 ns/op 29643682.93 op/s 505 | 506 | aco_create/init_save_stk_sz=64B 10000000 0.652 s 65.19 ns/op 15340872.40 op/s 507 | aco_resume/co_amount=10000000/copy_stack_size=120B 40000000 1.565 s 39.11 ns/op 25566255.73 op/s 508 | aco_destroy 10000000 0.443 s 44.30 ns/op 22574242.55 op/s 509 | 510 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.61 ns/op 15241722.94 op/s 511 | aco_resume/co_amount=2000000/copy_stack_size=136B 20000000 0.947 s 47.36 ns/op 21114212.05 op/s 512 | aco_destroy 2000000 0.125 s 62.35 ns/op 16039466.45 op/s 513 | 514 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.71 ns/op 15218784.72 op/s 515 | aco_resume/co_amount=2000000/copy_stack_size=136B 20000000 0.948 s 47.39 ns/op 21101216.29 op/s 516 | aco_destroy 2000000 0.125 s 62.73 ns/op 15941559.26 op/s 517 | 518 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.49 ns/op 15270258.18 op/s 519 | aco_resume/co_amount=2000000/copy_stack_size=152B 20000000 1.069 s 53.44 ns/op 18714275.17 op/s 520 | aco_destroy 2000000 0.122 s 61.05 ns/op 16378678.85 op/s 521 | 522 | aco_create/init_save_stk_sz=64B 2000000 0.132 s 65.91 ns/op 15171336.62 op/s 523 | aco_resume/co_amount=2000000/copy_stack_size=232B 20000000 1.190 s 59.48 ns/op 16813230.99 op/s 524 | aco_destroy 2000000 0.123 s 61.26 ns/op 16324298.25 op/s 525 | 526 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.68 ns/op 15224361.30 op/s 527 | aco_resume/co_amount=2000000/copy_stack_size=488B 20000000 1.828 s 91.40 ns/op 10941133.56 op/s 528 | aco_destroy 2000000 0.145 s 72.56 ns/op 13781182.82 op/s 529 | 530 | aco_create/init_save_stk_sz=64B 2000000 0.132 s 65.80 ns/op 15197461.34 op/s 531 | aco_resume/co_amount=2000000/copy_stack_size=488B 20000000 1.829 s 91.47 ns/op 10932139.32 op/s 532 | aco_destroy 2000000 0.149 s 74.70 ns/op 13387258.82 op/s 533 | 534 | aco_create/init_save_stk_sz=64B 1000000 0.067 s 66.63 ns/op 15007426.35 op/s 535 | aco_resume/co_amount=1000000/copy_stack_size=1000B 20000000 4.224 s 211.20 ns/op 4734744.76 op/s 536 | aco_destroy 1000000 0.093 s 93.36 ns/op 10711651.49 op/s 537 | 538 | aco_create/init_save_stk_sz=64B 1000000 0.066 s 66.28 ns/op 15086953.73 op/s 539 | aco_resume/co_amount=1000000/copy_stack_size=1000B 20000000 4.222 s 211.12 ns/op 4736537.93 op/s 540 | aco_destroy 1000000 0.094 s 94.09 ns/op 10627664.78 op/s 541 | 542 | aco_create/init_save_stk_sz=64B 100000 0.007 s 70.72 ns/op 14139923.59 op/s 543 | aco_resume/co_amount=100000/copy_stack_size=1000B 20000000 4.191 s 209.56 ns/op 4771909.70 op/s 544 | aco_destroy 100000 0.010 s 101.21 ns/op 9880747.28 op/s 545 | 546 | aco_create/init_save_stk_sz=64B 100000 0.007 s 66.62 ns/op 15010433.00 op/s 547 | aco_resume/co_amount=100000/copy_stack_size=2024B 20000000 7.002 s 350.11 ns/op 2856228.03 op/s 548 | aco_destroy 100000 0.016 s 159.69 ns/op 6262129.35 op/s 549 | 550 | aco_create/init_save_stk_sz=64B 100000 0.007 s 65.76 ns/op 15205994.08 op/s 551 | aco_resume/co_amount=100000/copy_stack_size=4072B 20000000 11.918 s 595.90 ns/op 1678127.54 op/s 552 | aco_destroy 100000 0.019 s 186.32 ns/op 5367189.85 op/s 553 | 554 | aco_create/init_save_stk_sz=64B 100000 0.006 s 63.03 ns/op 15865531.37 op/s 555 | aco_resume/co_amount=100000/copy_stack_size=7992B 20000000 21.808 s 1090.42 ns/op 917079.11 op/s 556 | aco_destroy 100000 0.038 s 378.33 ns/op 2643225.42 op/s 557 | ``` 558 | 559 | ``` 560 | $ LD_PRELOAD=/usr/lib64/libtcmalloc_minimal.so.4 ./test_aco_benchmark..no_valgrind.standaloneFPUenv 561 | 562 | +build:x86_64 563 | +build:undefined ACO_CONFIG_SHARE_FPU_MXCSR_ENV 564 | +build:each coroutine maintain each own fpu & mxcsr control words 565 | +build:undefined ACO_USE_VALGRIND 566 | +build:without valgrind memcheck friendly support 567 | 568 | sizeof(aco_t)=160: 569 | 570 | 571 | comment task_amount all_time_cost ns_per_op speed 572 | 573 | aco_create/init_save_stk_sz=64B 1 0.000 s 273.00 ns/op 3663004.27 op/s 574 | aco_resume/co_amount=1/copy_stack_size=0B 20000000 0.415 s 20.76 ns/op 48173877.75 op/s 575 | -> acosw 40000000 0.415 s 10.38 ns/op 96347755.51 op/s 576 | aco_destroy 1 0.000 s 381.00 ns/op 2624672.26 op/s 577 | 578 | aco_create/init_save_stk_sz=64B 1 0.000 s 212.00 ns/op 4716980.43 op/s 579 | aco_resume/co_amount=1/copy_stack_size=0B 20000000 0.415 s 20.75 ns/op 48185455.26 op/s 580 | -> acosw 40000000 0.415 s 10.38 ns/op 96370910.51 op/s 581 | aco_destroy 1 0.000 s 174.00 ns/op 5747123.38 op/s 582 | 583 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.63 ns/op 15237386.02 op/s 584 | aco_resume/co_amount=2000000/copy_stack_size=8B 20000000 0.664 s 33.20 ns/op 30119155.82 op/s 585 | aco_destroy 2000000 0.065 s 32.67 ns/op 30604542.55 op/s 586 | 587 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.33 ns/op 15305975.29 op/s 588 | aco_resume/co_amount=2000000/copy_stack_size=24B 20000000 0.675 s 33.74 ns/op 29638360.61 op/s 589 | aco_destroy 2000000 0.067 s 33.31 ns/op 30016633.42 op/s 590 | 591 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.61 ns/op 15241767.78 op/s 592 | aco_resume/co_amount=2000000/copy_stack_size=40B 20000000 0.678 s 33.88 ns/op 29518648.08 op/s 593 | aco_destroy 2000000 0.079 s 39.74 ns/op 25163018.30 op/s 594 | 595 | aco_create/init_save_stk_sz=64B 2000000 0.221 s 110.73 ns/op 9030660.30 op/s 596 | aco_resume/co_amount=2000000/copy_stack_size=56B 20000000 0.684 s 34.18 ns/op 29253416.65 op/s 597 | aco_destroy 2000000 0.067 s 33.40 ns/op 29938840.64 op/s 598 | 599 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.60 ns/op 15244077.65 op/s 600 | aco_resume/co_amount=2000000/copy_stack_size=120B 20000000 0.769 s 38.43 ns/op 26021228.41 op/s 601 | aco_destroy 2000000 0.087 s 43.74 ns/op 22863987.42 op/s 602 | 603 | aco_create/init_save_stk_sz=64B 10000000 1.251 s 125.08 ns/op 7994958.59 op/s 604 | aco_resume/co_amount=10000000/copy_stack_size=8B 40000000 1.327 s 33.19 ns/op 30133654.80 op/s 605 | aco_destroy 10000000 0.329 s 32.85 ns/op 30439787.32 op/s 606 | 607 | aco_create/init_save_stk_sz=64B 10000000 0.674 s 67.37 ns/op 14843796.57 op/s 608 | aco_resume/co_amount=10000000/copy_stack_size=24B 40000000 1.354 s 33.84 ns/op 29548523.05 op/s 609 | aco_destroy 10000000 0.339 s 33.90 ns/op 29494634.83 op/s 610 | 611 | aco_create/init_save_stk_sz=64B 10000000 0.672 s 67.19 ns/op 14882262.88 op/s 612 | aco_resume/co_amount=10000000/copy_stack_size=40B 40000000 1.361 s 34.02 ns/op 29393520.19 op/s 613 | aco_destroy 10000000 0.338 s 33.77 ns/op 29609577.59 op/s 614 | 615 | aco_create/init_save_stk_sz=64B 10000000 0.673 s 67.31 ns/op 14857716.02 op/s 616 | aco_resume/co_amount=10000000/copy_stack_size=56B 40000000 1.371 s 34.27 ns/op 29181897.80 op/s 617 | aco_destroy 10000000 0.339 s 33.85 ns/op 29540633.63 op/s 618 | 619 | aco_create/init_save_stk_sz=64B 10000000 0.672 s 67.24 ns/op 14873017.10 op/s 620 | aco_resume/co_amount=10000000/copy_stack_size=120B 40000000 1.548 s 38.71 ns/op 25835542.17 op/s 621 | aco_destroy 10000000 0.446 s 44.61 ns/op 22415961.64 op/s 622 | 623 | aco_create/init_save_stk_sz=64B 2000000 0.132 s 66.01 ns/op 15148290.52 op/s 624 | aco_resume/co_amount=2000000/copy_stack_size=136B 20000000 0.944 s 47.22 ns/op 21177946.19 op/s 625 | aco_destroy 2000000 0.124 s 61.99 ns/op 16132721.97 op/s 626 | 627 | aco_create/init_save_stk_sz=64B 2000000 0.133 s 66.36 ns/op 15068860.85 op/s 628 | aco_resume/co_amount=2000000/copy_stack_size=136B 20000000 0.944 s 47.20 ns/op 21187541.38 op/s 629 | aco_destroy 2000000 0.124 s 62.21 ns/op 16073322.25 op/s 630 | 631 | aco_create/init_save_stk_sz=64B 2000000 0.131 s 65.62 ns/op 15238955.93 op/s 632 | aco_resume/co_amount=2000000/copy_stack_size=152B 20000000 1.072 s 53.61 ns/op 18652789.74 op/s 633 | aco_destroy 2000000 0.121 s 60.42 ns/op 16551368.04 op/s 634 | 635 | aco_create/init_save_stk_sz=64B 2000000 0.132 s 66.08 ns/op 15132547.65 op/s 636 | aco_resume/co_amount=2000000/copy_stack_size=232B 20000000 1.198 s 59.88 ns/op 16699389.91 op/s 637 | aco_destroy 2000000 0.121 s 60.71 ns/op 16471465.52 op/s 638 | 639 | aco_create/init_save_stk_sz=64B 2000000 0.133 s 66.50 ns/op 15036985.95 op/s 640 | aco_resume/co_amount=2000000/copy_stack_size=488B 20000000 1.853 s 92.63 ns/op 10796126.04 op/s 641 | aco_destroy 2000000 0.146 s 72.87 ns/op 13723559.36 op/s 642 | 643 | aco_create/init_save_stk_sz=64B 2000000 0.132 s 66.14 ns/op 15118324.13 op/s 644 | aco_resume/co_amount=2000000/copy_stack_size=488B 20000000 1.855 s 92.75 ns/op 10781572.22 op/s 645 | aco_destroy 2000000 0.152 s 75.79 ns/op 13194130.51 op/s 646 | 647 | aco_create/init_save_stk_sz=64B 1000000 0.067 s 66.97 ns/op 14931921.56 op/s 648 | aco_resume/co_amount=1000000/copy_stack_size=1000B 20000000 4.218 s 210.90 ns/op 4741536.66 op/s 649 | aco_destroy 1000000 0.093 s 93.16 ns/op 10734691.98 op/s 650 | 651 | aco_create/init_save_stk_sz=64B 1000000 0.066 s 66.49 ns/op 15039274.31 op/s 652 | aco_resume/co_amount=1000000/copy_stack_size=1000B 20000000 4.216 s 210.81 ns/op 4743543.53 op/s 653 | aco_destroy 1000000 0.094 s 93.97 ns/op 10641539.58 op/s 654 | 655 | aco_create/init_save_stk_sz=64B 100000 0.007 s 70.95 ns/op 14094724.73 op/s 656 | aco_resume/co_amount=100000/copy_stack_size=1000B 20000000 4.190 s 209.52 ns/op 4772746.50 op/s 657 | aco_destroy 100000 0.010 s 100.99 ns/op 9902271.51 op/s 658 | 659 | aco_create/init_save_stk_sz=64B 100000 0.007 s 66.49 ns/op 15040038.84 op/s 660 | aco_resume/co_amount=100000/copy_stack_size=2024B 20000000 7.028 s 351.38 ns/op 2845942.55 op/s 661 | aco_destroy 100000 0.016 s 159.15 ns/op 6283444.42 op/s 662 | 663 | aco_create/init_save_stk_sz=64B 100000 0.007 s 65.73 ns/op 15214482.36 op/s 664 | aco_resume/co_amount=100000/copy_stack_size=4072B 20000000 11.879 s 593.95 ns/op 1683636.60 op/s 665 | aco_destroy 100000 0.018 s 184.23 ns/op 5428119.00 op/s 666 | 667 | aco_create/init_save_stk_sz=64B 100000 0.006 s 63.41 ns/op 15771072.16 op/s 668 | aco_resume/co_amount=100000/copy_stack_size=7992B 20000000 21.808 s 1090.42 ns/op 917081.56 op/s 669 | aco_destroy 100000 0.038 s 376.78 ns/op 2654073.13 op/s 670 | ``` 671 | 672 | # Proof of Correctness 673 | 674 | 首先,在开始实现或者证明一个协程库之前,必备的条件是要对[Sys V ABI of intel386 and x86-64](https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI)标准非常的熟悉,以及一些基础的汇编知识。 675 | 676 | 接下来的证明中并没有包含关于IP(指令指针),SP(堆栈指针)和协程的私有保存栈与共享执行栈之间的保存与恢复的直接描述,因为相比于ABI约束的保证,这些东西是相当微不足道且容易实现和理解的。 677 | 678 | ## Running Model 679 | 680 | 在一个OS线程中,主协程`main_co`是被第一个创建并且是在所有其他non-main coroutine之前就已经开始运行了的协程。 681 | 682 | 下图是协程main co与co之间上下文切换的简单图示。 683 | 684 | 在这个证明中,我们假定我们的二进制程序要满足Sys V ABI intel386标准,因为Sys V ABI intel386与Sys V ABI x86-64之间没有根本的不同。为了简化描述,我们还假定二进制程序中没有会更改FPU或MXCSR控制字的代码存在。 685 | 686 | ![proof_0](img/proof_0.png) 687 | 688 | 下图实际上是对称协程的运行模型图(拥有不限量个non-main co和一个main co)。因为非对称协程仅仅是对称协程的一种特殊情况,所以我们如果证明了对称协程的正确性也就等于证明了非对称协程的正确性,如此会多些挑战性同时也会多些乐趣(libaco当前只实现了非对称协程的API,因为非对称协程的API语义远远比对称协程的API语义更容易理解和掌控)。 689 | 690 | ![proof_1](img/proof_1.png) 691 | 692 | 因为main co是在当前OS线程中第一个开始运行的协程,那么第一次协程间上下文切换一定是以`acosw(main_co, co)`这种形式存在的(这里,`acosw`的第二个入参`co`是一个non-main co)。 693 | 694 | ## Mathematical Induction 695 | 696 | 容易证明,在上图中只存在两类协程间的状态迁移: 697 | 698 | * yielded state co → init state co 699 | * yielded state co → yielded state co 700 | 701 | 要证明协程上下文切换函数`void* acosw(aco_t* from_co, aco_t* to_co)`的正确性,就等于要证明所有的协程在调用`acosw`前后都一直满足Sys V ABI规范的约束。我们假定协程中除了`acosw`之外的所有二进制均已经满足了ABI规范(它们一般是由编译器正确地生成的)。 702 | 703 | 下面是Sys V ABI Intel386函数调用约定中寄存器用法的总结: 704 | 705 | ``` 706 | Registers' usage in the calling convention of the Intel386 System V ABI: 707 | caller saved (scratch) registers: 708 | C1.0: EAX 709 | At the entry of a function call: 710 | could be any value 711 | After the return of `acosw`: 712 | hold the return value for `acosw` 713 | C1.1: ECX,EDX 714 | At the entry of a function call: 715 | could be any value 716 | After the return of `acosw`: 717 | could be any value 718 | C1.2: Arithmetic flags, x87 and mxcsr flags 719 | At the entry of a function call: 720 | could be any value 721 | After the return of `acosw`: 722 | could be any value 723 | C1.3: ST(0-7) 724 | At the entry of a function call: 725 | the stack of FPU must be empty 726 | After the return of `acosw`: 727 | the stack of FPU must be empty 728 | C1.4: Direction flag 729 | At the entry of a function call: 730 | DF must be 0 731 | After the return of `acosw`: 732 | DF must be 0 733 | C1.5: others: xmm*,ymm*,mm*,k*... 734 | At the entry of a function call: 735 | could be any value 736 | After the return of `acosw`: 737 | could be any value 738 | callee saved registers: 739 | C2.0: EBX,ESI,EDI,EBP 740 | At the entry of a function call: 741 | could be any value 742 | After the return of `acosw`: 743 | must be the same as it is at the entry of `acosw` 744 | C2.1: ESP 745 | At the entry of a function call: 746 | must be a valid stack pointer 747 | (alignment of 16 bytes, retaddr and etc...) 748 | After the return of `acosw`: 749 | must be the same as it is before the call of `acosw` 750 | C2.2: control word of FPU & mxcsr 751 | At the entry of a function call: 752 | could be any configuration 753 | After the return of `acosw`: 754 | must be the same as it is before the call of `acosw` 755 | (unless the caller of `acosw` assume `acosw` may \ 756 | change the control words of FPU or MXCSR on purpose \ 757 | like `fesetenv`) 758 | ``` 759 | 760 | (对于Intel386,寄存器的用途定义在[Sys V ABI Intel386 V1.1](https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI)的 "P13 - Table 2.3: Register Usage" 表中,对于AMD64则定义在[Sys V ABI AMD64 V1.0](https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI)的 "P23 - Figure 3.4: Register Usage" 的图中。) 761 | 762 | **Proof:** 763 | 764 | 1. yielded state co -> init state co: 765 | 766 | ![proof_2](img/proof_2.png) 767 | 768 | 上图详细地描绘了第一类状态迁移的过程: "yielded state co -> init state co" . 769 | 770 | 约束: C 1.0, 1.1, 1.2, 1.5 (*满足* ✓ ) 771 | 772 | 下面列出的Scratch Registers在一个函数的入口点时其值可以为任意值: 773 | 774 | ``` 775 | EAX,ECX,EDX 776 | XMM*,YMM*,MM*,K*... 777 | status bits of EFLAGS,FPU,MXCSR 778 | ``` 779 | 780 | 约束: C 1.3, 1.4 (*满足* ✓ ) 781 | 782 | 由于在`acosw`被调用之前,FPU栈必然已空并且DF必然已为0(因为协程co的二进制代码已经满足ABI规范),所以,`acosw`满足约束C1.3和1.4。 783 | 784 | 约束: C 2.0, 2.1, 2.2 (*满足* ✓ ) 785 | 786 | 约束C2.0和2.1已经被满足。由于我们已假定FPU与MXCSR的控制字在程序运行过程中不会被更改,所以约束C2.2也已经被`acosw`满足。 787 | 788 | 2. yielded state co -> yielded state co: 789 | 790 | ![proof_3](img/proof_3.png) 791 | 792 | 上图详细地描绘了第二类状态迁移的过程: yielded state co -> yielded state co. 793 | 794 | 约束: C 1.0 (*满足* ✓ ) 795 | 796 | 很显然,当`acosw`返回到to_co时EAX中已经保存了预期的返回值。 797 | 798 | 约束: C 1.1, 1.2, 1.5 (*满足* ✓ ) 799 | 800 | 下面列出的Scratch Registers在一个函数的入口点时以及在`acosw`返回后其值皆可为任意值: 801 | 802 | ``` 803 | ECX,EDX 804 | XMM*,YMM*,MM*,K*... 805 | status bits of EFLAGS,FPU,MXCSR 806 | ``` 807 | 808 | 约束: C 1.3, 1.4 (*满足* ✓ ) 809 | 810 | 由于在`acosw`被调用之前,FPU栈必然已空并且DF必然已为0(因为协程co的二进制代码已经满足ABI规范),所以,`acosw`满足约束C1.3和1.4。 811 | 812 | 约束: C 2.0, 2.1, 2.2 (*满足* ✓ ) 813 | 814 | 从`acosw`调用者的角度来看,由于在`acosw`被调用(或返回)时,所有的callee saved registers都做了对应的保存(或恢复)工作,则约束C2.0与2.1被`acosw`满足。由于我们已假定FPU与MXCSR的控制字在程序运行过程中不会被更改,所以约束C2.2也已经被`acosw`满足。 815 | 816 | 3. Mathematical induction: 817 | 818 | 显然,在当前OS线程中,第一次`acosw`必然属于第一类状态迁移:yielded state co -> init state co,并且接下来的所有`acosw`必然属于这两类状态迁移的其中一类。顺序地用上面得到两个结论依次证明,最终得到“所有的协程在调用`acosw`前后都一直满足Sys V ABI规范的约束”结论。如此,证明结束。 819 | 820 | ## Miscellaneous 821 | 822 | ### Red Zone 823 | 824 | 在System V ABI x86-64中描述[red zone](https://en.wikipedia.org/wiki/Red_zone_(computing))的概念: 825 | 826 | > The 128-byte area beyond the location pointed to by %rsp is considered to be reserved and shall not be modified by signal or interrupt handlers. Therefore, functions may use this area for temporary data that is not needed across function calls. In particular, leaf functions may use this area for their entire stack frame, rather than adjusting the stack pointer in the prologue and epilogue. This area is known as the red zone. 827 | 828 | 由于red zone "not preserved by the callee" ,所以我们在协程的上下文切换的实现中无需考虑它(因为`acosw`是一个叶子函数,即leaf function)。 829 | 830 | ### Stack Pointer 831 | 832 | > The end of the input argument area shall be aligned on a 16 (32 or 64, if \_\_m256 or \_\_m512 is passed on stack) byte boundary. In other words, the value (%esp + 4) is always a multiple of 16 (32 or 64) when control is transferred to the function entry point. The stack pointer, %esp, always points to the end of the latest allocated stack frame. 833 | > 834 | > — Intel386-psABI-1.1:2.2.2 The Stack Frame 835 | 836 | > The stack pointer, %rsp, always points to the end of the latest allocated stack frame. 837 | > 838 | > — Sys V ABI AMD64 Version 1.0:3.2.2 The Stack Frame 839 | 840 | 这是腾讯libco中的一个[bug](https://github.com/Tencent/libco/blob/v1.0/coctx_swap.S#L27)。ABI规范中规定用户空间程序的栈指针必须时刻指到运行栈的[栈顶](https://zh.wikipedia.org/wiki/%E5%A0%86%E6%A0%88#%E6%93%8D%E4%BD%9C),而[coctx_swap.S](https://github.com/Tencent/libco/blob/v1.0/coctx_swap.S#L27)中却使用栈指针直接对位于堆中的数据结构进行寻址内存操作,这违反了ABI约定。 841 | 842 | >**By default, the signal handler is invoked on the normal process stack.** It is possible to arrange that the signal handler uses an alternate stack; see sigalstack(2) for a discussion of how to do this and when it might be useful. 843 | > 844 | >— man 7 signal : Signal dispositions 845 | 846 | 当coctx_swap正在用栈指针对位于堆中的数据结构进行寻址内存操作时,若此时执行线程收到了一个信号,接着内核抢占了该执行线程并开始准备接下来用户空间线程的信号处理执行环境,由于在默认情况下,内核将会选择主栈作为信号处理函数的执行栈,但此时栈已经被指向了堆中(用户空间的程序违反ABI约定在先),那么信号处理函数的执行栈就会被错误的放置到堆中,这样,堆中的数据结构在接下来就极有可能会被破坏(更详细的bug复现请参见此[issue](https://github.com/Tencent/libco/issues/90))。 847 | 848 | # Best Practice 849 | 850 | 总的来说,如果你想把libaco的性能发挥到极致,一定要保证 "non-standalone non-main co" 在调用`aco_yield`时的执行栈使用尽可能的小。另外,当你想把一个协程的局部变量的地址传递到另一个协程时一定要非常小心,因为如果这个变量是在共享栈上时,将可能会发生内存数据混乱,因此,总是从堆中分配需要在协程间共享的内存是一个非常明智的选择。 851 | 852 | 详细地说,有五点建议: 853 | 854 | ``` 855 | co_fp 856 | / \ 857 | / \ 858 | f1 f2 859 | / \ / \ 860 | / \ f4 \ 861 | yield f3 f5 862 | ``` 863 | 864 | 1. Main co的执行栈使用大小对协程间上下文切换的性能没有直接影响(因为main co独占了线程的默认执行栈); 865 | 866 | 2. Standalone non-main co的执行栈使用大小对协程间上下文切换的性能没有直接影响(因为它独占了一个执行栈)。但是创建海量的standalone non-main co将会消耗海量的虚拟内存(因为海量执行栈的创建),因此,应用中并不推荐在一个线程中创建海量的standalone non-main co; 867 | 868 | 3. Non-standalone non-main co(与其他协程共享执行栈的非主协程)在调用`aco_yield`时执行栈的使用大小将会对协程间上下文切换的性能产生直接的影响,性能测试部分已经清楚的展示了这一点。在上图中,函数f2,f3,f4与f5的栈使用量对上下文切换的性能没有影响,这是因为在它们执行的过程中并没有`aco_yield`函数的来中断它们。然而,函数co_fp与f1的栈使用量之和将会决定`co->save_stack.max_cpsz`(协程运行期间私有保存栈的最大保存大小)的值,同时会对上下文切换的性能产生直接的影响; 869 | 870 | 让一个函数拥有尽可能低的栈使用量的关键是尽可能地从堆中分配局部变量(尤其是占用内存较大的变量)并手动地管理它们的生命周期(malloc/free),而非默认地从堆栈上分配和自动释放它们。C编译器gcc的选项`-fstack-usage`对此非常有用。 871 | 872 | ```c 873 | int* gl_ptr; 874 | 875 | void inc_p(int* p){ (*p)++; } 876 | 877 | void co_fp0() { 878 | int ct = 0; 879 | gl_ptr = &ct; // line 7 880 | aco_yield(); 881 | check(ct); 882 | int* ptr = &ct; 883 | inc_p(ptr); // line 11 884 | aco_exit(); 885 | } 886 | 887 | void co_fp1() { 888 | do_sth(gl_ptr); // line 16 889 | aco_exit(); 890 | } 891 | ``` 892 | 893 | 4. 在上面的代码片段中,我们假定协程co_fp0与co_fp1共享同一个执行栈,它们均是non-main co,它们的执行顺序为 "co_fp0 -> co_fp1 -> co_fp0" 。因为它们共享同一个执行栈,在代码第16行`gl_ptr`中的指针值与代码第7行`gl_ptr`中的指针值二者的语义是不同的,这样的用法很可能会破坏协程co_fp1的执行栈。而代码第11行则是正确的,因为此时局部变量`ct`与函数`inc_p`的执行是在同一个协程上下文中的。从堆中分配需要在协程间共享的内存能够很简单地解决这类问题: 894 | 895 | ```c 896 | int* gl_ptr; 897 | 898 | void inc_p(int* p){ (*p)++; } 899 | 900 | void co_fp0() { 901 | int* ct_ptr = malloc(sizeof(int)); 902 | assert(ct_ptr != NULL); 903 | *ct_ptr = 0; 904 | gl_ptr = ct_ptr; 905 | aco_yield(); 906 | check(*ct_ptr); 907 | int* ptr = ct_ptr; 908 | inc_p(ptr); 909 | free(ct_ptr); 910 | gl_ptr = NULL; 911 | aco_exit(); 912 | } 913 | 914 | void co_fp1() { 915 | do_sth(gl_ptr); 916 | aco_exit(); 917 | } 918 | ``` 919 | 920 | # TODO 921 | 922 | New ideas are welcome! 923 | 924 | * Add a macro `aco_new` which is the combination of something like `p = malloc(sz); assertalloc_ptr(p)`. 925 | 926 | * Add a new API `aco_reset` to support the reusability of the coroutine objects. 927 | 928 | * Support other platforms (especially arm & arm64). 929 | 930 | # CHANGES 931 | 932 | ``` 933 | v1.2.2 Mon Jul 9 2018 934 | Added a new option `-o ` to make.sh; 935 | Correction about the value of macro ACO_VERSION_PATCH (issue #1 936 | kindly reported by Markus Elfring @elfring); 937 | Adjusted some noncompliant naming of identifiers (double underscore 938 | `__`) (issue #1, kindly proposed by Markus Elfring @elfring); 939 | Supported the header file including by C++ (issue #4, kindly 940 | proposed by Markus Elfring @elfring). 941 | v1.2.1 Sat Jul 7 2018 942 | Fixed some noncompliant include guards in two C header files ( 943 | issue #1 kindly reported by Markus Elfring @elfring); 944 | Removed the "pure" word from "pure C" statement since it is 945 | containing assembly codes (kindly reported by Peter Cawley 946 | @corsix); 947 | Many updates in the README.md document. 948 | v1.2.0 Tue Jul 3 2018 949 | Provided another header named `aco_assert_override.h` so user 950 | could choose to override the default `assert` or not; 951 | Added some macros about the version information. 952 | v1.1 Mon Jul 2 2018 953 | Removed the requirement on the GCC version (>= 5.0). 954 | v1.0 Sun Jul 1 2018 955 | The v1.0 release of libaco, cheers 🎉🎉🎉 956 | ``` 957 | 958 | # Donation 959 | 960 | 我是一位自由的全职开源项目开发者,任何数量的捐赠对我都将会是莫大的鼓励 ;-) 961 | 962 | * Paypal 963 | 964 | [paypal.me link](https://www.paypal.me/00hnes) 965 | 966 | * Alipay (支付(宝|寶)) 967 | 968 | ![qr_alipay](img/qr_alipay.png) 969 | 970 | * Wechat (微信) 971 | 972 | ![qr_wechat](img/qr_wechat.png) 973 | 974 | # Copyright and License 975 | 976 | Copyright (C) 2018, by Sen Han [<00hnes@gmail.com>](mailto:00hnes@gmail.com). 977 | 978 | Under the Apache License, Version 2.0. 979 | 980 | See the [LICENSE](LICENSE) file for details. 981 | -------------------------------------------------------------------------------- /aco.c: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #define _GNU_SOURCE 16 | 17 | #include "aco.h" 18 | #include 19 | #include 20 | 21 | // this header including should be at the last of the `include` directives list 22 | #include "aco_assert_override.h" 23 | 24 | void aco_runtime_test(void){ 25 | #ifdef __i386__ 26 | _Static_assert(sizeof(void*) == 4, "require 'sizeof(void*) == 4'"); 27 | #elif __x86_64__ 28 | _Static_assert(sizeof(void*) == 8, "require 'sizeof(void*) == 8'"); 29 | _Static_assert(sizeof(__uint128_t) == 16, "require 'sizeof(__uint128_t) == 16'"); 30 | #else 31 | #error "platform no support yet" 32 | #endif 33 | _Static_assert(sizeof(int) >= 4, "require 'sizeof(int) >= 4'"); 34 | assert(sizeof(int) >= 4); 35 | _Static_assert(sizeof(int) <= sizeof(size_t), 36 | "require 'sizeof(int) <= sizeof(size_t)'"); 37 | assert(sizeof(int) <= sizeof(size_t)); 38 | } 39 | 40 | // assertptr(dst); assertptr(src); 41 | // assert((((uintptr_t)(src) & 0x0f) == 0) && (((uintptr_t)(dst) & 0x0f) == 0)); 42 | // assert((((sz) & 0x0f) == 0x08) && (((sz) >> 4) >= 0) && (((sz) >> 4) <= 8)); 43 | // sz = 16*n + 8 ( 0 <= n <= 8) 44 | 45 | // Note: dst and src must be valid address already 46 | #define aco_amd64_inline_short_aligned_memcpy_test_ok(dst, src, sz) \ 47 | ( \ 48 | (((uintptr_t)(src) & 0x0f) == 0) && (((uintptr_t)(dst) & 0x0f) == 0) \ 49 | && \ 50 | (((sz) & 0x0f) == 0x08) && (((sz) >> 4) >= 0) && (((sz) >> 4) <= 8) \ 51 | ) 52 | 53 | #define aco_amd64_inline_short_aligned_memcpy(dst, src, sz) do {\ 54 | __uint128_t xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7; \ 55 | switch((sz) >> 4){ \ 56 | case 0: \ 57 | break; \ 58 | case 1: \ 59 | xmm0 = *((__uint128_t*)(src) + 0); \ 60 | *((__uint128_t*)(dst) + 0) = xmm0; \ 61 | break; \ 62 | case 2: \ 63 | xmm0 = *((__uint128_t*)(src) + 0); \ 64 | xmm1 = *((__uint128_t*)(src) + 1); \ 65 | *((__uint128_t*)(dst) + 0) = xmm0; \ 66 | *((__uint128_t*)(dst) + 1) = xmm1; \ 67 | break; \ 68 | case 3: \ 69 | xmm0 = *((__uint128_t*)(src) + 0); \ 70 | xmm1 = *((__uint128_t*)(src) + 1); \ 71 | xmm2 = *((__uint128_t*)(src) + 2); \ 72 | *((__uint128_t*)(dst) + 0) = xmm0; \ 73 | *((__uint128_t*)(dst) + 1) = xmm1; \ 74 | *((__uint128_t*)(dst) + 2) = xmm2; \ 75 | break; \ 76 | case 4: \ 77 | xmm0 = *((__uint128_t*)(src) + 0); \ 78 | xmm1 = *((__uint128_t*)(src) + 1); \ 79 | xmm2 = *((__uint128_t*)(src) + 2); \ 80 | xmm3 = *((__uint128_t*)(src) + 3); \ 81 | *((__uint128_t*)(dst) + 0) = xmm0; \ 82 | *((__uint128_t*)(dst) + 1) = xmm1; \ 83 | *((__uint128_t*)(dst) + 2) = xmm2; \ 84 | *((__uint128_t*)(dst) + 3) = xmm3; \ 85 | break; \ 86 | case 5: \ 87 | xmm0 = *((__uint128_t*)(src) + 0); \ 88 | xmm1 = *((__uint128_t*)(src) + 1); \ 89 | xmm2 = *((__uint128_t*)(src) + 2); \ 90 | xmm3 = *((__uint128_t*)(src) + 3); \ 91 | xmm4 = *((__uint128_t*)(src) + 4); \ 92 | *((__uint128_t*)(dst) + 0) = xmm0; \ 93 | *((__uint128_t*)(dst) + 1) = xmm1; \ 94 | *((__uint128_t*)(dst) + 2) = xmm2; \ 95 | *((__uint128_t*)(dst) + 3) = xmm3; \ 96 | *((__uint128_t*)(dst) + 4) = xmm4; \ 97 | break; \ 98 | case 6: \ 99 | xmm0 = *((__uint128_t*)(src) + 0); \ 100 | xmm1 = *((__uint128_t*)(src) + 1); \ 101 | xmm2 = *((__uint128_t*)(src) + 2); \ 102 | xmm3 = *((__uint128_t*)(src) + 3); \ 103 | xmm4 = *((__uint128_t*)(src) + 4); \ 104 | xmm5 = *((__uint128_t*)(src) + 5); \ 105 | *((__uint128_t*)(dst) + 0) = xmm0; \ 106 | *((__uint128_t*)(dst) + 1) = xmm1; \ 107 | *((__uint128_t*)(dst) + 2) = xmm2; \ 108 | *((__uint128_t*)(dst) + 3) = xmm3; \ 109 | *((__uint128_t*)(dst) + 4) = xmm4; \ 110 | *((__uint128_t*)(dst) + 5) = xmm5; \ 111 | break; \ 112 | case 7: \ 113 | xmm0 = *((__uint128_t*)(src) + 0); \ 114 | xmm1 = *((__uint128_t*)(src) + 1); \ 115 | xmm2 = *((__uint128_t*)(src) + 2); \ 116 | xmm3 = *((__uint128_t*)(src) + 3); \ 117 | xmm4 = *((__uint128_t*)(src) + 4); \ 118 | xmm5 = *((__uint128_t*)(src) + 5); \ 119 | xmm6 = *((__uint128_t*)(src) + 6); \ 120 | *((__uint128_t*)(dst) + 0) = xmm0; \ 121 | *((__uint128_t*)(dst) + 1) = xmm1; \ 122 | *((__uint128_t*)(dst) + 2) = xmm2; \ 123 | *((__uint128_t*)(dst) + 3) = xmm3; \ 124 | *((__uint128_t*)(dst) + 4) = xmm4; \ 125 | *((__uint128_t*)(dst) + 5) = xmm5; \ 126 | *((__uint128_t*)(dst) + 6) = xmm6; \ 127 | break; \ 128 | case 8: \ 129 | xmm0 = *((__uint128_t*)(src) + 0); \ 130 | xmm1 = *((__uint128_t*)(src) + 1); \ 131 | xmm2 = *((__uint128_t*)(src) + 2); \ 132 | xmm3 = *((__uint128_t*)(src) + 3); \ 133 | xmm4 = *((__uint128_t*)(src) + 4); \ 134 | xmm5 = *((__uint128_t*)(src) + 5); \ 135 | xmm6 = *((__uint128_t*)(src) + 6); \ 136 | xmm7 = *((__uint128_t*)(src) + 7); \ 137 | *((__uint128_t*)(dst) + 0) = xmm0; \ 138 | *((__uint128_t*)(dst) + 1) = xmm1; \ 139 | *((__uint128_t*)(dst) + 2) = xmm2; \ 140 | *((__uint128_t*)(dst) + 3) = xmm3; \ 141 | *((__uint128_t*)(dst) + 4) = xmm4; \ 142 | *((__uint128_t*)(dst) + 5) = xmm5; \ 143 | *((__uint128_t*)(dst) + 6) = xmm6; \ 144 | *((__uint128_t*)(dst) + 7) = xmm7; \ 145 | break; \ 146 | }\ 147 | *((uint64_t*)((uintptr_t)(dst) + (sz) - 8)) = *((uint64_t*)((uintptr_t)(src) + (sz) - 8)); \ 148 | } while(0) 149 | 150 | // Note: dst and src must be valid address already 151 | #define aco_amd64_optimized_memcpy_drop_in(dst, src, sz) do {\ 152 | if(aco_amd64_inline_short_aligned_memcpy_test_ok((dst), (src), (sz))){ \ 153 | aco_amd64_inline_short_aligned_memcpy((dst), (src), (sz)); \ 154 | }else{ \ 155 | memcpy((dst), (src), (sz)); \ 156 | } \ 157 | } while(0) 158 | 159 | static void aco_default_protector_last_word(void){ 160 | aco_t* co = aco_get_co(); 161 | // do some log about the offending `co` 162 | fprintf(stderr,"error: aco_default_protector_last_word triggered\n"); 163 | fprintf(stderr, "error: co:%p should call `aco_exit()` instead of direct " 164 | "`return` in co_fp:%p to finish its execution\n", co, (void*)co->fp); 165 | assert(0); 166 | } 167 | 168 | // aco's Global Thread Local Storage variable `co` 169 | __thread aco_t* aco_gtls_co; 170 | static __thread aco_cofuncp_t aco_gtls_last_word_fp = aco_default_protector_last_word; 171 | 172 | #ifdef __i386__ 173 | static __thread void* aco_gtls_fpucw_mxcsr[2]; 174 | #elif __x86_64__ 175 | static __thread void* aco_gtls_fpucw_mxcsr[1]; 176 | #else 177 | #error "platform no support yet" 178 | #endif 179 | 180 | void aco_thread_init(aco_cofuncp_t last_word_co_fp){ 181 | aco_save_fpucw_mxcsr(aco_gtls_fpucw_mxcsr); 182 | 183 | if((void*)last_word_co_fp != NULL) 184 | aco_gtls_last_word_fp = last_word_co_fp; 185 | } 186 | 187 | // This function `aco_funcp_protector` should never be 188 | // called. If it's been called, that means the offending 189 | // `co` didn't call aco_exit(co) instead of `return` to 190 | // finish its execution. 191 | void aco_funcp_protector(void){ 192 | if((void*)(aco_gtls_last_word_fp) != NULL){ 193 | aco_gtls_last_word_fp(); 194 | }else{ 195 | aco_default_protector_last_word(); 196 | } 197 | assert(0); 198 | } 199 | 200 | aco_share_stack_t* aco_share_stack_new(size_t sz){ 201 | return aco_share_stack_new2(sz, 1); 202 | } 203 | 204 | #define aco_size_t_safe_add_assert(a,b) do { \ 205 | assert((a)+(b) >= (a)); \ 206 | }while(0) 207 | 208 | aco_share_stack_t* aco_share_stack_new2(size_t sz, char guard_page_enabled){ 209 | if(sz == 0){ 210 | sz = 1024 * 1024 * 2; 211 | } 212 | if(sz < 4096){ 213 | sz = 4096; 214 | } 215 | assert(sz > 0); 216 | 217 | size_t u_pgsz = 0; 218 | if(guard_page_enabled != 0){ 219 | // although gcc's Built-in Functions to Perform Arithmetic with 220 | // Overflow Checking is better, but it would require gcc >= 5.0 221 | long pgsz = sysconf(_SC_PAGESIZE); 222 | // pgsz must be > 0 && a power of two 223 | assert(pgsz > 0 && (((pgsz - 1) & pgsz) == 0)); 224 | u_pgsz = (size_t)((unsigned long)pgsz); 225 | // it should be always true in real life 226 | assert(u_pgsz == (unsigned long)pgsz && ((u_pgsz << 1) >> 1) == u_pgsz); 227 | if(sz <= u_pgsz){ 228 | sz = u_pgsz << 1; 229 | } else { 230 | size_t new_sz; 231 | if((sz & (u_pgsz - 1)) != 0){ 232 | new_sz = (sz & (~(u_pgsz - 1))); 233 | assert(new_sz >= u_pgsz); 234 | aco_size_t_safe_add_assert(new_sz, (u_pgsz << 1)); 235 | new_sz = new_sz + (u_pgsz << 1); 236 | assert(sz / u_pgsz + 2 == new_sz / u_pgsz); 237 | } else { 238 | aco_size_t_safe_add_assert(sz, u_pgsz); 239 | new_sz = sz + u_pgsz; 240 | assert(sz / u_pgsz + 1 == new_sz / u_pgsz); 241 | } 242 | sz = new_sz; 243 | assert((sz / u_pgsz > 1) && ((sz & (u_pgsz - 1)) == 0)); 244 | } 245 | } 246 | 247 | aco_share_stack_t* p = (aco_share_stack_t*)malloc(sizeof(aco_share_stack_t)); 248 | assertalloc_ptr(p); 249 | memset(p, 0, sizeof(aco_share_stack_t)); 250 | 251 | if(guard_page_enabled != 0){ 252 | p->real_ptr = mmap( 253 | NULL, sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 254 | ); 255 | assertalloc_bool(p->real_ptr != MAP_FAILED); 256 | p->guard_page_enabled = 1; 257 | assert(0 == mprotect(p->real_ptr, u_pgsz, PROT_READ)); 258 | 259 | p->ptr = (void*)(((uintptr_t)p->real_ptr) + u_pgsz); 260 | p->real_sz = sz; 261 | assert(sz >= (u_pgsz << 1)); 262 | p->sz = sz - u_pgsz; 263 | } else { 264 | //p->guard_page_enabled = 0; 265 | p->sz = sz; 266 | p->ptr = malloc(sz); 267 | assertalloc_ptr(p->ptr); 268 | } 269 | 270 | p->owner = NULL; 271 | #ifdef ACO_USE_VALGRIND 272 | p->valgrind_stk_id = VALGRIND_STACK_REGISTER( 273 | p->ptr, (void*)((uintptr_t)p->ptr + p->sz) 274 | ); 275 | #endif 276 | #if defined(__i386__) || defined(__x86_64__) 277 | uintptr_t u_p = (uintptr_t)(p->sz - (sizeof(void*) << 1) + (uintptr_t)p->ptr); 278 | u_p = (u_p >> 4) << 4; 279 | p->align_highptr = (void*)u_p; 280 | p->align_retptr = (void*)(u_p - sizeof(void*)); 281 | *((void**)(p->align_retptr)) = (void*)(aco_funcp_protector_asm); 282 | assert(p->sz > (16 + (sizeof(void*) << 1) + sizeof(void*))); 283 | p->align_limit = p->sz - 16 - (sizeof(void*) << 1); 284 | #else 285 | #error "platform no support yet" 286 | #endif 287 | return p; 288 | } 289 | 290 | void aco_share_stack_destroy(aco_share_stack_t* sstk){ 291 | assert(sstk != NULL && sstk->ptr != NULL); 292 | #ifdef ACO_USE_VALGRIND 293 | VALGRIND_STACK_DEREGISTER(sstk->valgrind_stk_id); 294 | #endif 295 | if(sstk->guard_page_enabled){ 296 | assert(0 == munmap(sstk->real_ptr, sstk->real_sz)); 297 | sstk->real_ptr = NULL; 298 | sstk->ptr = NULL; 299 | } else { 300 | free(sstk->ptr); 301 | sstk->ptr = NULL; 302 | } 303 | free(sstk); 304 | } 305 | 306 | aco_t* aco_create( 307 | aco_t* main_co, aco_share_stack_t* share_stack, 308 | size_t save_stack_sz, aco_cofuncp_t fp, void* arg 309 | ){ 310 | 311 | aco_t* p = (aco_t*)malloc(sizeof(aco_t)); 312 | assertalloc_ptr(p); 313 | memset(p, 0, sizeof(aco_t)); 314 | 315 | if(main_co != NULL){ // non-main co 316 | assertptr(share_stack); 317 | p->share_stack = share_stack; 318 | #ifdef __i386__ 319 | // POSIX.1-2008 (IEEE Std 1003.1-2008) - General Information - Data Types - Pointer Types 320 | // http://pubs.opengroup.org/onlinepubs/9699919799.2008edition/functions/V2_chap02.html#tag_15_12_03 321 | p->reg[ACO_REG_IDX_RETADDR] = (void*)fp; 322 | // push retaddr 323 | p->reg[ACO_REG_IDX_SP] = p->share_stack->align_retptr; 324 | #ifndef ACO_CONFIG_SHARE_FPU_MXCSR_ENV 325 | p->reg[ACO_REG_IDX_FPU] = aco_gtls_fpucw_mxcsr[0]; 326 | p->reg[ACO_REG_IDX_FPU + 1] = aco_gtls_fpucw_mxcsr[1]; 327 | #endif 328 | #elif __x86_64__ 329 | p->reg[ACO_REG_IDX_RETADDR] = (void*)fp; 330 | p->reg[ACO_REG_IDX_SP] = p->share_stack->align_retptr; 331 | #ifndef ACO_CONFIG_SHARE_FPU_MXCSR_ENV 332 | p->reg[ACO_REG_IDX_FPU] = aco_gtls_fpucw_mxcsr[0]; 333 | #endif 334 | #else 335 | #error "platform no support yet" 336 | #endif 337 | p->main_co = main_co; 338 | p->arg = arg; 339 | p->fp = fp; 340 | if(save_stack_sz == 0){ 341 | save_stack_sz = 64; 342 | } 343 | p->save_stack.ptr = malloc(save_stack_sz); 344 | assertalloc_ptr(p->save_stack.ptr); 345 | p->save_stack.sz = save_stack_sz; 346 | #if defined(__i386__) || defined(__x86_64__) 347 | p->save_stack.valid_sz = 0; 348 | #else 349 | #error "platform no support yet" 350 | #endif 351 | return p; 352 | } else { // main co 353 | p->main_co = NULL; 354 | p->arg = arg; 355 | p->fp = fp; 356 | p->share_stack = NULL; 357 | p->save_stack.ptr = NULL; 358 | return p; 359 | } 360 | assert(0); 361 | } 362 | 363 | aco_attr_no_asan 364 | void aco_resume(aco_t* resume_co){ 365 | assert(resume_co != NULL && resume_co->main_co != NULL 366 | && resume_co->is_end == 0 367 | ); 368 | if(resume_co->share_stack->owner != resume_co){ 369 | if(resume_co->share_stack->owner != NULL){ 370 | aco_t* owner_co = resume_co->share_stack->owner; 371 | assert(owner_co->share_stack == resume_co->share_stack); 372 | #if defined(__i386__) || defined(__x86_64__) 373 | assert( 374 | ( 375 | (uintptr_t)(owner_co->share_stack->align_retptr) 376 | >= 377 | (uintptr_t)(owner_co->reg[ACO_REG_IDX_SP]) 378 | ) 379 | && 380 | ( 381 | (uintptr_t)(owner_co->share_stack->align_highptr) 382 | - 383 | (uintptr_t)(owner_co->share_stack->align_limit) 384 | <= 385 | (uintptr_t)(owner_co->reg[ACO_REG_IDX_SP]) 386 | ) 387 | ); 388 | owner_co->save_stack.valid_sz = 389 | (uintptr_t)(owner_co->share_stack->align_retptr) 390 | - 391 | (uintptr_t)(owner_co->reg[ACO_REG_IDX_SP]); 392 | if(owner_co->save_stack.sz < owner_co->save_stack.valid_sz){ 393 | free(owner_co->save_stack.ptr); 394 | owner_co->save_stack.ptr = NULL; 395 | while(1){ 396 | owner_co->save_stack.sz = owner_co->save_stack.sz << 1; 397 | assert(owner_co->save_stack.sz > 0); 398 | if(owner_co->save_stack.sz >= owner_co->save_stack.valid_sz){ 399 | break; 400 | } 401 | } 402 | owner_co->save_stack.ptr = malloc(owner_co->save_stack.sz); 403 | assertalloc_ptr(owner_co->save_stack.ptr); 404 | } 405 | // TODO: optimize the performance penalty of memcpy function call 406 | // for very short memory span 407 | if(owner_co->save_stack.valid_sz > 0) { 408 | #ifdef __x86_64__ 409 | aco_amd64_optimized_memcpy_drop_in( 410 | owner_co->save_stack.ptr, 411 | owner_co->reg[ACO_REG_IDX_SP], 412 | owner_co->save_stack.valid_sz 413 | ); 414 | #else 415 | memcpy( 416 | owner_co->save_stack.ptr, 417 | owner_co->reg[ACO_REG_IDX_SP], 418 | owner_co->save_stack.valid_sz 419 | ); 420 | #endif 421 | owner_co->save_stack.ct_save++; 422 | } 423 | if(owner_co->save_stack.valid_sz > owner_co->save_stack.max_cpsz){ 424 | owner_co->save_stack.max_cpsz = owner_co->save_stack.valid_sz; 425 | } 426 | owner_co->share_stack->owner = NULL; 427 | owner_co->share_stack->align_validsz = 0; 428 | #else 429 | #error "platform no support yet" 430 | #endif 431 | } 432 | assert(resume_co->share_stack->owner == NULL); 433 | #if defined(__i386__) || defined(__x86_64__) 434 | assert( 435 | resume_co->save_stack.valid_sz 436 | <= 437 | resume_co->share_stack->align_limit - sizeof(void*) 438 | ); 439 | // TODO: optimize the performance penalty of memcpy function call 440 | // for very short memory span 441 | if(resume_co->save_stack.valid_sz > 0) { 442 | #ifdef __x86_64__ 443 | aco_amd64_optimized_memcpy_drop_in( 444 | (void*)( 445 | (uintptr_t)(resume_co->share_stack->align_retptr) 446 | - 447 | resume_co->save_stack.valid_sz 448 | ), 449 | resume_co->save_stack.ptr, 450 | resume_co->save_stack.valid_sz 451 | ); 452 | #else 453 | memcpy( 454 | (void*)( 455 | (uintptr_t)(resume_co->share_stack->align_retptr) 456 | - 457 | resume_co->save_stack.valid_sz 458 | ), 459 | resume_co->save_stack.ptr, 460 | resume_co->save_stack.valid_sz 461 | ); 462 | #endif 463 | resume_co->save_stack.ct_restore++; 464 | } 465 | if(resume_co->save_stack.valid_sz > resume_co->save_stack.max_cpsz){ 466 | resume_co->save_stack.max_cpsz = resume_co->save_stack.valid_sz; 467 | } 468 | resume_co->share_stack->align_validsz = resume_co->save_stack.valid_sz + sizeof(void*); 469 | resume_co->share_stack->owner = resume_co; 470 | #else 471 | #error "platform no support yet" 472 | #endif 473 | } 474 | aco_gtls_co = resume_co; 475 | acosw(resume_co->main_co, resume_co); 476 | aco_gtls_co = resume_co->main_co; 477 | } 478 | 479 | void aco_destroy(aco_t* co){ 480 | assertptr(co); 481 | if(aco_is_main_co(co)){ 482 | free(co); 483 | } else { 484 | if(co->share_stack->owner == co){ 485 | co->share_stack->owner = NULL; 486 | co->share_stack->align_validsz = 0; 487 | } 488 | free(co->save_stack.ptr); 489 | co->save_stack.ptr = NULL; 490 | free(co); 491 | } 492 | } 493 | -------------------------------------------------------------------------------- /aco.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef ACO_H 16 | #define ACO_H 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #ifdef ACO_USE_VALGRIND 28 | #include 29 | #endif 30 | 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | 35 | #define ACO_VERSION_MAJOR 1 36 | #define ACO_VERSION_MINOR 2 37 | #define ACO_VERSION_PATCH 4 38 | 39 | #ifdef __i386__ 40 | #define ACO_REG_IDX_RETADDR 0 41 | #define ACO_REG_IDX_SP 1 42 | #define ACO_REG_IDX_BP 2 43 | #define ACO_REG_IDX_FPU 6 44 | #elif __x86_64__ 45 | #define ACO_REG_IDX_RETADDR 4 46 | #define ACO_REG_IDX_SP 5 47 | #define ACO_REG_IDX_BP 7 48 | #define ACO_REG_IDX_FPU 8 49 | #else 50 | #error "platform no support yet" 51 | #endif 52 | 53 | typedef struct { 54 | void* ptr; 55 | size_t sz; 56 | size_t valid_sz; 57 | // max copy size in bytes 58 | size_t max_cpsz; 59 | // copy from share stack to this save stack 60 | size_t ct_save; 61 | // copy from this save stack to share stack 62 | size_t ct_restore; 63 | } aco_save_stack_t; 64 | 65 | struct aco_s; 66 | typedef struct aco_s aco_t; 67 | 68 | typedef struct { 69 | void* ptr; 70 | size_t sz; 71 | void* align_highptr; 72 | void* align_retptr; 73 | size_t align_validsz; 74 | size_t align_limit; 75 | aco_t* owner; 76 | 77 | char guard_page_enabled; 78 | void* real_ptr; 79 | size_t real_sz; 80 | 81 | #ifdef ACO_USE_VALGRIND 82 | unsigned long valgrind_stk_id; 83 | #endif 84 | } aco_share_stack_t; 85 | 86 | typedef void (*aco_cofuncp_t)(void); 87 | 88 | struct aco_s{ 89 | // cpu registers' state 90 | #ifdef __i386__ 91 | #ifdef ACO_CONFIG_SHARE_FPU_MXCSR_ENV 92 | void* reg[6]; 93 | #else 94 | void* reg[8]; 95 | #endif 96 | #elif __x86_64__ 97 | #ifdef ACO_CONFIG_SHARE_FPU_MXCSR_ENV 98 | void* reg[8]; 99 | #else 100 | void* reg[9]; 101 | #endif 102 | #else 103 | #error "platform no support yet" 104 | #endif 105 | aco_t* main_co; 106 | void* arg; 107 | char is_end; 108 | 109 | aco_cofuncp_t fp; 110 | 111 | aco_save_stack_t save_stack; 112 | aco_share_stack_t* share_stack; 113 | }; 114 | 115 | #define aco_likely(x) (__builtin_expect(!!(x), 1)) 116 | 117 | #define aco_unlikely(x) (__builtin_expect(!!(x), 0)) 118 | 119 | #define aco_assert(EX) ((aco_likely(EX))?((void)0):(abort())) 120 | 121 | #define aco_assertptr(ptr) ((aco_likely((ptr) != NULL))?((void)0):(abort())) 122 | 123 | #define aco_assertalloc_bool(b) do { \ 124 | if(aco_unlikely(!(b))){ \ 125 | fprintf(stderr, "Aborting: failed to allocate memory: %s:%d:%s\n", \ 126 | __FILE__, __LINE__, __PRETTY_FUNCTION__); \ 127 | abort(); \ 128 | } \ 129 | } while(0) 130 | 131 | #define aco_assertalloc_ptr(ptr) do { \ 132 | if(aco_unlikely((ptr) == NULL)){ \ 133 | fprintf(stderr, "Aborting: failed to allocate memory: %s:%d:%s\n", \ 134 | __FILE__, __LINE__, __PRETTY_FUNCTION__); \ 135 | abort(); \ 136 | } \ 137 | } while(0) 138 | 139 | #if defined(aco_attr_no_asan) 140 | #error "aco_attr_no_asan already defined" 141 | #endif 142 | #if defined(ACO_USE_ASAN) 143 | #if defined(__has_feature) 144 | #if __has_feature(__address_sanitizer__) 145 | #define aco_attr_no_asan \ 146 | __attribute__((__no_sanitize_address__)) 147 | #endif 148 | #endif 149 | #if defined(__SANITIZE_ADDRESS__) && !defined(aco_attr_no_asan) 150 | #define aco_attr_no_asan \ 151 | __attribute__((__no_sanitize_address__)) 152 | #endif 153 | #endif 154 | #ifndef aco_attr_no_asan 155 | #define aco_attr_no_asan 156 | #endif 157 | 158 | extern void aco_runtime_test(void); 159 | 160 | extern void aco_thread_init(aco_cofuncp_t last_word_co_fp); 161 | 162 | extern void* acosw(aco_t* from_co, aco_t* to_co) __asm__("acosw"); // asm 163 | 164 | extern void aco_save_fpucw_mxcsr(void* p) __asm__("aco_save_fpucw_mxcsr"); // asm 165 | 166 | extern void aco_funcp_protector_asm(void) __asm__("aco_funcp_protector_asm"); // asm 167 | 168 | extern void aco_funcp_protector(void); 169 | 170 | extern aco_share_stack_t* aco_share_stack_new(size_t sz); 171 | 172 | aco_share_stack_t* aco_share_stack_new2(size_t sz, char guard_page_enabled); 173 | 174 | extern void aco_share_stack_destroy(aco_share_stack_t* sstk); 175 | 176 | extern aco_t* aco_create( 177 | aco_t* main_co, 178 | aco_share_stack_t* share_stack, 179 | size_t save_stack_sz, 180 | aco_cofuncp_t fp, void* arg 181 | ); 182 | 183 | // aco's Global Thread Local Storage variable `co` 184 | extern __thread aco_t* aco_gtls_co; 185 | 186 | aco_attr_no_asan 187 | extern void aco_resume(aco_t* resume_co); 188 | 189 | //extern void aco_yield1(aco_t* yield_co); 190 | #define aco_yield1(yield_co) do { \ 191 | aco_assertptr((yield_co)); \ 192 | aco_assertptr((yield_co)->main_co); \ 193 | acosw((yield_co), (yield_co)->main_co); \ 194 | } while(0) 195 | 196 | #define aco_yield() do { \ 197 | aco_yield1(aco_gtls_co); \ 198 | } while(0) 199 | 200 | #define aco_get_arg() (aco_gtls_co->arg) 201 | 202 | #define aco_get_co() ({(void)0; aco_gtls_co;}) 203 | 204 | #define aco_co() ({(void)0; aco_gtls_co;}) 205 | 206 | extern void aco_destroy(aco_t* co); 207 | 208 | #define aco_is_main_co(co) ({((co)->main_co) == NULL;}) 209 | 210 | #define aco_exit1(co) do { \ 211 | (co)->is_end = 1; \ 212 | aco_assert((co)->share_stack->owner == (co)); \ 213 | (co)->share_stack->owner = NULL; \ 214 | (co)->share_stack->align_validsz = 0; \ 215 | aco_yield1((co)); \ 216 | aco_assert(0); \ 217 | } while(0) 218 | 219 | #define aco_exit() do { \ 220 | aco_exit1(aco_gtls_co); \ 221 | } while(0) 222 | 223 | #ifdef __cplusplus 224 | } 225 | #endif 226 | 227 | #endif 228 | -------------------------------------------------------------------------------- /aco_assert_override.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef ACO_ASSERT_OVERRIDE_H 16 | #define ACO_ASSERT_OVERRIDE_H 17 | 18 | #include "aco.h" 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #define likely(x) aco_likely(x) 25 | #define unlikely(x) aco_unlikely(x) 26 | #define assert(EX) aco_assert(EX) 27 | #define assertptr(ptr) aco_assertptr(ptr) 28 | #define assertalloc_bool(b) aco_assertalloc_bool(b) 29 | #define assertalloc_ptr(ptr) aco_assertalloc_ptr(ptr) 30 | 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /acosw.S: -------------------------------------------------------------------------------- 1 | .text 2 | .globl acosw 3 | #if defined(__APPLE__) 4 | #else 5 | .type acosw, @function 6 | #endif 7 | .intel_syntax noprefix 8 | acosw: 9 | /* 10 | extern void acosw(aco_t* from_co, aco_t* to_co); 11 | 12 | struct aco_t { 13 | void* reg[X]; 14 | // ... 15 | } 16 | 17 | reference: 18 | https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI 19 | 20 | pitfall: 21 | http://man7.org/linux/man-pages/man7/signal.7.html 22 | http://man7.org/linux/man-pages/man2/sigaltstack.2.html 23 | 24 | > $ man 7 signal 25 | > ... 26 | > By default, the signal handler is invoked on the normal process 27 | > stack. It is possible to arrange that the signal handler 28 | > uses an alternate stack; see sigaltstack(2) for a discussion of 29 | > how to do this and when it might be useful. 30 | > ... 31 | 32 | This is a BUG example: 33 | https://github.com/Tencent/libco/blob/v1.0/coctx_swap.S#L27 34 | 35 | proof of correctness: 36 | https://github.com/hnes/libaco 37 | 38 | mxcsr & fpu: 39 | fnstcw * m2byte 40 | Store FPU control word to m2byte without checking for 41 | pending unmasked floating-point exceptions. 42 | 43 | fldcw m2byte 44 | Load FPU control word from m2byte. 45 | 46 | stmxcsr m32 47 | Store contents of MXCSR register to m32 48 | 49 | ldmxcsr m32 50 | Load MXCSR register from m32. 51 | */ 52 | /* 53 | 0x00 --> 0xff 54 | eip esp ebp edi esi ebx fpucw16 mxcsr32 55 | 0 4 8 c 10 14 18 1c 56 | */ 57 | #ifdef __i386__ 58 | mov eax,DWORD PTR [esp+0x4] // from_co 59 | mov edx,DWORD PTR [esp] // retaddr 60 | lea ecx,[esp+0x4] // esp 61 | mov DWORD PTR [eax+0x8],ebp //esp 73 | mov ebp,DWORD PTR [ecx+0x8] //>ebp 74 | mov eax,DWORD PTR [ecx+0x0] //>retaddr 75 | mov edi,DWORD PTR [ecx+0xc] //>edi 76 | mov esi,DWORD PTR [ecx+0x10] //>esi 77 | mov ebx,DWORD PTR [ecx+0x14] //>ebx 78 | #ifndef ACO_CONFIG_SHARE_FPU_MXCSR_ENV 79 | fldcw WORD PTR [ecx+0x18] //>fpucw 80 | ldmxcsr DWORD PTR [ecx+0x1c] //>mxcsr 81 | #endif 82 | xor ecx,ecx 83 | mov esp,edx 84 | xor edx,edx 85 | jmp eax 86 | #elif __x86_64__ 87 | /* 88 | 0x00 --> 0xff 89 | r12 r13 r14 r15 rip rsp rbx rbp fpucw16 mxcsr32 90 | 0 8 10 18 20 28 30 38 40 44 91 | */ 92 | // rdi - from_co | rsi - to_co 93 | mov rdx,QWORD PTR [rsp] // retaddr 94 | lea rcx,[rsp+0x8] // rsp 95 | mov QWORD PTR [rdi+0x0], r12 96 | mov QWORD PTR [rdi+0x8], r13 97 | mov QWORD PTR [rdi+0x10],r14 98 | mov QWORD PTR [rdi+0x18],r15 99 | mov QWORD PTR [rdi+0x20],rdx // retaddr 100 | mov QWORD PTR [rdi+0x28],rcx // rsp 101 | mov QWORD PTR [rdi+0x30],rbx 102 | mov QWORD PTR [rdi+0x38],rbp 103 | #ifndef ACO_CONFIG_SHARE_FPU_MXCSR_ENV 104 | fnstcw WORD PTR [rdi+0x40] 105 | stmxcsr DWORD PTR [rdi+0x44] 106 | #endif 107 | mov r12,QWORD PTR [rsi+0x0] 108 | mov r13,QWORD PTR [rsi+0x8] 109 | mov r14,QWORD PTR [rsi+0x10] 110 | mov r15,QWORD PTR [rsi+0x18] 111 | mov rax,QWORD PTR [rsi+0x20] // retaddr 112 | mov rcx,QWORD PTR [rsi+0x28] // rsp 113 | mov rbx,QWORD PTR [rsi+0x30] 114 | mov rbp,QWORD PTR [rsi+0x38] 115 | #ifndef ACO_CONFIG_SHARE_FPU_MXCSR_ENV 116 | fldcw WORD PTR [rsi+0x40] 117 | ldmxcsr DWORD PTR [rsi+0x44] 118 | #endif 119 | mov rsp,rcx 120 | jmp rax 121 | #else 122 | #error "platform not support" 123 | #endif 124 | 125 | .globl aco_save_fpucw_mxcsr 126 | #if defined(__APPLE__) 127 | #else 128 | .type aco_save_fpucw_mxcsr, @function 129 | #endif 130 | .intel_syntax noprefix 131 | aco_save_fpucw_mxcsr: 132 | #ifdef __i386__ 133 | mov eax,DWORD PTR [esp+0x4] // ptr 134 | fnstcw WORD PTR [eax] 135 | stmxcsr DWORD PTR [eax+0x4] 136 | ret 137 | #elif __x86_64__ 138 | fnstcw WORD PTR [rdi] 139 | stmxcsr DWORD PTR [rdi+0x4] 140 | ret 141 | #else 142 | #error "platform not support" 143 | #endif 144 | 145 | #if defined(__APPLE__) 146 | .globl _abort 147 | .globl _aco_funcp_protector 148 | #else 149 | .globl abort 150 | .globl aco_funcp_protector 151 | #endif 152 | 153 | .globl aco_funcp_protector_asm 154 | #if defined(__APPLE__) 155 | #else 156 | .type aco_funcp_protector_asm, @function 157 | #endif 158 | .intel_syntax noprefix 159 | aco_funcp_protector_asm: 160 | #ifdef __i386__ 161 | and esp,0xfffffff0 162 | #if defined(__APPLE__) 163 | call _aco_funcp_protector 164 | call _abort 165 | #else 166 | #if defined(__pic__) || defined(__PIC__) 167 | call aco_funcp_protector@PLT 168 | call abort@PLT 169 | #else 170 | call aco_funcp_protector 171 | call abort 172 | #endif 173 | #endif 174 | ret 175 | #elif __x86_64__ 176 | and rsp,0xfffffffffffffff0 177 | #if defined(__APPLE__) 178 | call _aco_funcp_protector 179 | call _abort 180 | #else 181 | #if defined(__pic__) || defined(__PIC__) 182 | call aco_funcp_protector@PLT 183 | call abort@PLT 184 | #else 185 | call aco_funcp_protector 186 | call abort 187 | #endif 188 | #endif 189 | ret 190 | #else 191 | #error "platform not support" 192 | #endif 193 | -------------------------------------------------------------------------------- /img/logo/icon-blue.svg: -------------------------------------------------------------------------------- 1 | Icon blue -------------------------------------------------------------------------------- /img/logo/icon-white.svg: -------------------------------------------------------------------------------- 1 | Icon white -------------------------------------------------------------------------------- /img/logo/logo-blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/logo/logo-blue.png -------------------------------------------------------------------------------- /img/logo/logo-blue.svg: -------------------------------------------------------------------------------- 1 | Logo blue -------------------------------------------------------------------------------- /img/logo/logo-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/logo/logo-white.png -------------------------------------------------------------------------------- /img/logo/logo-white.svg: -------------------------------------------------------------------------------- 1 | Logo white -------------------------------------------------------------------------------- /img/proof_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/proof_0.png -------------------------------------------------------------------------------- /img/proof_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/proof_1.png -------------------------------------------------------------------------------- /img/proof_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/proof_2.png -------------------------------------------------------------------------------- /img/proof_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/proof_3.png -------------------------------------------------------------------------------- /img/qr_alipay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/qr_alipay.png -------------------------------------------------------------------------------- /img/qr_wechat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/qr_wechat.png -------------------------------------------------------------------------------- /img/thread_model_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/thread_model_0.png -------------------------------------------------------------------------------- /img/thread_model_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/thread_model_1.png -------------------------------------------------------------------------------- /img/thread_model_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/thread_model_2.png -------------------------------------------------------------------------------- /img/thread_model_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hnes/libaco/d00631a9e143a8711c0a6e7b603a72b1e379b661/img/thread_model_3.png -------------------------------------------------------------------------------- /make.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Sen Han <00hnes@gmail.com> 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http:#www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | OUTPUT_DIR="./output" 16 | CFLAGS="-g -O2 -Wall -Werror" 17 | #EXTRA_CFLAGS="" 18 | OUTPUT_SUFFIX="" 19 | makecc="cc" 20 | if [ "$CC" ] 21 | then 22 | makecc="$CC" 23 | fi 24 | 25 | app_list=''' 26 | test_aco_tutorial_0 27 | test_aco_tutorial_1 28 | test_aco_tutorial_2 29 | test_aco_tutorial_3 -lpthread 30 | test_aco_tutorial_4 31 | test_aco_tutorial_5 32 | test_aco_tutorial_6 33 | test_aco_synopsis 34 | test_aco_benchmark 35 | ''' 36 | 37 | gl_opt_no_m32="" 38 | gl_opt_no_valgrind="" 39 | 40 | OUTPUT_DIR="$OUTPUT_DIR""//file" 41 | OUTPUT_DIR=`dirname "$OUTPUT_DIR"` 42 | 43 | gl_trap_str="" 44 | 45 | function error(){ 46 | >&2 echo "error: $*" 47 | } 48 | 49 | function assert(){ 50 | if [ "0" -ne "$?" ] 51 | then 52 | error "$0:""$*" 53 | exit 1 54 | fi 55 | } 56 | 57 | function tra(){ 58 | gl_trap_str="$gl_trap_str""$1" 59 | trap "$gl_trap_str exit 1;" INT 60 | assert "$LINENO:trap failed:$gl_trap_str:$1" 61 | } 62 | 63 | function untra(){ 64 | trap - INT 65 | assert "$LINENO:untrap failed:$gl_trap_str:$1" 66 | } 67 | 68 | function build_f(){ 69 | declare file 70 | declare cflags 71 | declare build_cmd 72 | declare tmp_ret 73 | declare skip_flag 74 | echo "OUTPUT_DIR: $OUTPUT_DIR" 75 | echo "CFLAGS: $CFLAGS" 76 | echo "EXTRA_CFLAGS: $EXTRA_CFLAGS" 77 | echo "ACO_EXTRA_CFLAGS: $ACO_EXTRA_CFLAGS" 78 | echo "OUTPUT_SUFFIX: $OUTPUT_SUFFIX" 79 | echo "$app_list" | grep -Po '.+$' | while read read_in 80 | do 81 | file=`echo $read_in | grep -Po "^[^\s]+"` 82 | cflags=`echo $read_in | sed -r 's/^\s*([^ ]+)(.*)$/\2/'` 83 | if [ -z "$file" ] 84 | then 85 | continue 86 | fi 87 | #echo "<$file>:<$cflags>:$OUTPUT_DIR:$CFLAGS:$EXTRA_CFLAGS:$OUTPUT_SUFFIX" 88 | build_cmd="$makecc $CFLAGS $ACO_EXTRA_CFLAGS $EXTRA_CFLAGS acosw.S aco.c $file.c $cflags -o $OUTPUT_DIR/$file$OUTPUT_SUFFIX" 89 | skip_flag="" 90 | if [ "$gl_opt_no_m32" ] 91 | then 92 | echo "$OUTPUT_SUFFIX" | grep -P "\bm32\b" &>/dev/null 93 | tmp_ret=$? 94 | if [ "$tmp_ret" -eq "0" ] 95 | then 96 | skip_flag="true" 97 | elif [ "$tmp_ret" -eq "1" ] 98 | then 99 | : 100 | else 101 | error "grep failed: $tmp_ret" 102 | exit $tmp_ret 103 | fi 104 | fi 105 | if [ "$gl_opt_no_valgrind" ] 106 | then 107 | echo "$OUTPUT_SUFFIX" | grep -P "\bvalgrind\b" &>/dev/null 108 | tmp_ret=$? 109 | if [ "$tmp_ret" -eq "0" ] 110 | then 111 | skip_flag="true" 112 | elif [ "$tmp_ret" -eq "1" ] 113 | then 114 | : 115 | else 116 | error "grep failed: $tmp_ret" 117 | exit $tmp_ret 118 | fi 119 | fi 120 | if [ "$skip_flag" ] 121 | then 122 | echo "skip $build_cmd" 123 | else 124 | echo " $build_cmd" 125 | $build_cmd 126 | assert "build fail" 127 | fi 128 | done 129 | assert "exit" 130 | } 131 | 132 | function usage() { 133 | echo "Usage: $0 [-o ] [-h]" 1>&2 134 | echo ''' 135 | Example: 136 | # default build 137 | bash make.sh 138 | # build without the i386 binary output 139 | bash make.sh -o no-m32 140 | # build without the valgrind supported binary output 141 | bash make.sh -o no-valgrind 142 | # build without the valgrind supported and i386 binary output 143 | bash make.sh -o no-valgrind -o no-m32 144 | ''' 1>&2 145 | } 146 | 147 | gl_opt_value="" 148 | while getopts ":o:h" o; do 149 | case "${o}" in 150 | o) 151 | gl_opt_value=${OPTARG} 152 | if [ "$gl_opt_value" = "no-m32" ] 153 | then 154 | gl_opt_no_m32="true" 155 | elif [ "$gl_opt_value" = "no-valgrind" ] 156 | then 157 | gl_opt_no_valgrind="true" 158 | else 159 | usage 160 | error unknow option value of '-o' 161 | exit 1 162 | fi 163 | ;; 164 | h) 165 | usage 166 | exit 0 167 | ;; 168 | *) 169 | usage 170 | error unknow option 171 | exit 1 172 | ;; 173 | esac 174 | done 175 | shift $((OPTIND-1)) 176 | 177 | #echo "o = $gl_opt_value" 178 | #echo "gl_opt_no_valgrind:$gl_opt_no_valgrind" 179 | #echo "gl_opt_no_m32:$gl_opt_no_m32" 180 | 181 | if [ -e "$OUTPUT_DIR" ] 182 | then 183 | if [ -d "$OUTPUT_DIR" ] 184 | then 185 | : 186 | else 187 | error "\"$OUTPUT_DIR\" is not a directory" 188 | exit 1 189 | fi 190 | else 191 | error "directory \"$OUTPUT_DIR\" doesn't exist" 192 | exit 1 193 | fi 194 | 195 | tra "echo;echo build has been interrupted" 196 | 197 | # the matrix of the build config for later testing 198 | # -m32 -DACO_CONFIG_SHARE_FPU_MXCSR_ENV -DACO_USE_VALGRIND 199 | # 0 0 0 200 | ACO_EXTRA_CFLAGS="" OUTPUT_SUFFIX="..no_valgrind.standaloneFPUenv" build_f 201 | # 0 0 1 202 | ACO_EXTRA_CFLAGS="-DACO_USE_VALGRIND" OUTPUT_SUFFIX="..valgrind.standaloneFPUenv" build_f 203 | # 0 1 0 204 | ACO_EXTRA_CFLAGS="-DACO_CONFIG_SHARE_FPU_MXCSR_ENV" OUTPUT_SUFFIX="..no_valgrind.shareFPUenv" build_f 205 | # 0 1 1 206 | ACO_EXTRA_CFLAGS="-DACO_CONFIG_SHARE_FPU_MXCSR_ENV -DACO_USE_VALGRIND" OUTPUT_SUFFIX="..valgrind.shareFPUenv" build_f 207 | # 1 0 0 208 | ACO_EXTRA_CFLAGS="-m32" OUTPUT_SUFFIX="..m32.no_valgrind.standaloneFPUenv" build_f 209 | # 1 0 1 210 | ACO_EXTRA_CFLAGS="-m32 -DACO_USE_VALGRIND" OUTPUT_SUFFIX="..m32.valgrind.standaloneFPUenv" build_f 211 | # 1 1 0 212 | ACO_EXTRA_CFLAGS="-m32 -DACO_CONFIG_SHARE_FPU_MXCSR_ENV" OUTPUT_SUFFIX="..m32.no_valgrind.shareFPUenv" build_f 213 | # 1 1 1 214 | ACO_EXTRA_CFLAGS="-m32 -DACO_CONFIG_SHARE_FPU_MXCSR_ENV -DACO_USE_VALGRIND" OUTPUT_SUFFIX="..m32.valgrind.shareFPUenv" build_f 215 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2018 Sen Han <00hnes@gmail.com> 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http:#www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | #ulimit -c unlimited 18 | 19 | gl_trap_str="" 20 | 21 | function error(){ 22 | >&2 echo "$*" 23 | } 24 | 25 | function assert(){ 26 | if [ "0" -ne "$?" ] 27 | then 28 | error "$0:""$*" 29 | exit 1 30 | fi 31 | } 32 | 33 | function tra(){ 34 | gl_trap_str="$gl_trap_str""$1" 35 | trap "$gl_trap_str exit 1;" INT 36 | assert "$LINENO:trap failed:$gl_trap_str:$1" 37 | } 38 | 39 | function untra(){ 40 | trap - INT 41 | assert "$LINENO:untrap failed:$gl_trap_str:$1" 42 | } 43 | 44 | function test_f_is_exclude_app(){ 45 | declare infile 46 | declare main_name 47 | infile=$1 48 | main_name=`echo "$infile" | sed -r "s|(.*)\.\.(.*)|\1|"` 49 | if [ -z "$infile" -o -z "$main_name" ] 50 | then 51 | error "$0:""$*" 52 | exit 1 53 | fi 54 | if [ "$main_name" = "test_aco_benchmark" ] 55 | then 56 | return 0 57 | else 58 | return 1 59 | fi 60 | } 61 | 62 | function test_f_handle_exit_code(){ 63 | declare infile 64 | declare errc 65 | declare main_name 66 | declare intended_to_abort 67 | infile=$1 68 | errc=$2 69 | if [ -z "$infile" ] 70 | then 71 | error test_f_handle_exit_code illegal input 72 | exit 1 73 | fi 74 | if [ -z "$errc" -o "$errc" -lt "0" ] 75 | then 76 | error test_f_handle_exit_code illegal input 77 | exit 1 78 | fi 79 | main_name=`echo "$infile" | sed -r "s|(.*)\.\.(.*)|\1|"` 80 | intended_to_abort="" 81 | if [ "$main_name" = "test_aco_tutorial_4" -o "$main_name" = "test_aco_tutorial_5" ] 82 | then 83 | intended_to_abort="true" 84 | fi 85 | if [ "$intended_to_abort" -a "$errc" -ne "134" ] 86 | then 87 | echo "" 88 | echo test $infile intended to abort failed:$errc 89 | exit $errc 90 | fi 91 | if [ -z "$intended_to_abort" -a "$errc" -ne "0" ] 92 | then 93 | echo "" 94 | echo test $infile failed:$errc 95 | exit $errc 96 | fi 97 | if [ "$intended_to_abort" ] 98 | then 99 | echo test $infile intended to abort success:$errc 100 | else 101 | echo test $infile success 102 | fi 103 | } 104 | 105 | function test_f(){ 106 | declare valgrind_support 107 | declare errc 108 | declare test_ct 109 | declare infile 110 | test_ct=`file * | grep -P "\bexecutable\b" | grep -Po '^[^:]+' | wc -l` 111 | file * | grep -P "\bexecutable\b" | grep -Po '^[^:]+' | while read infile 112 | do 113 | test_f_is_exclude_app "$infile" 114 | if [ "0" -eq "$?" ] 115 | then 116 | echo "----" $infile is in the exclude app list, bypass its test 117 | echo 118 | continue 119 | fi 120 | valgrind_support=`echo "$infile" | grep -Po '.*\.\.(.*)' | sed -r "s|(.*)\.\.(.*)|\2|" | grep -Po '\bvalgrind\b'` 121 | if [ -z "$valgrind_support" ] 122 | then 123 | echo "----" $infile start":" 124 | time ./$infile 125 | errc="$?" 126 | test_f_handle_exit_code $infile $errc 127 | else 128 | echo "----" $infile memcheck start":" 129 | time valgrind --leak-check=full --error-exitcode=2 --tool=memcheck ./$infile 130 | errc="$?" 131 | test_f_handle_exit_code $infile $errc 132 | fi 133 | echo 134 | done 135 | errc="$?" 136 | if [ "$errc" -ne "0" ] 137 | then 138 | exit "$errc" 139 | fi 140 | if [ "$test_ct" -ne "0" ] 141 | then 142 | echo all the "$test_ct" tests had passed, OK and cheers! 143 | else 144 | echo no test need to do in current directory: "`pwd`" 145 | fi 146 | } 147 | 148 | tra "echo;echo test had been interrupted;exit 0;" 149 | 150 | version_check_flag=`echo $1 | grep -Po "\bversion_check\b="` 151 | version_to_check=`echo $1 | grep -Po "[0-9]+\.[0-9]+\.[0-9]+" | head -1` 152 | version_major=`echo $version_to_check | grep -Po "^[0-9]+(?=\.)"` 153 | version_minor=`echo $version_to_check | grep -Po "(?<=\.)[0-9]+(?=\.)"` 154 | version_patch=`echo $version_to_check | grep -Po "(?<=\.)[0-9]+$"` 155 | echo "$version_check_flag |$version_to_check|" 156 | echo "|$version_major|$version_minor|$version_patch|" 157 | 158 | makecc="cc" 159 | if [ "$CC" ] 160 | then 161 | makecc="$CC" 162 | fi 163 | 164 | if [ "$version_check_flag" ] 165 | then 166 | if [ "$version_major" -lt 0 ] || [ "$version_minor" -lt 0 ] || [ "$version_patch" -lt 0 ] 167 | then 168 | error "synatx error: version_to_check: $version_to_check" 169 | exit 1 170 | fi 171 | version_check_tmpdir=`mktemp -d` 172 | version_check_tmpfile="$version_check_tmpdir"/tmp.c 173 | echo ''' #include "aco.h" 174 | #include 175 | #include "aco_assert_override.h" 176 | 177 | int main() {''' > $version_check_tmpfile 178 | echo " assert(ACO_VERSION_MAJOR == $version_major);" \ 179 | >> $version_check_tmpfile 180 | echo " assert(ACO_VERSION_MINOR == $version_minor);" \ 181 | >> $version_check_tmpfile 182 | echo " assert(ACO_VERSION_PATCH == $version_patch);" \ 183 | >> $version_check_tmpfile 184 | echo " return 0;" >> $version_check_tmpfile 185 | echo " }" >> $version_check_tmpfile 186 | echo "$version_check_tmpfile:" 187 | cat $version_check_tmpfile 188 | $makecc -I. -g -O2 acosw.S aco.c -o "$version_check_tmpfile".bin $version_check_tmpfile 189 | "$version_check_tmpfile".bin 190 | assert "error: version_check failed: $version_to_check" 191 | rm -fr "$version_check_tmpdir" 192 | exit 0 193 | fi 194 | 195 | # test loop 196 | while true 197 | do 198 | echo "---- time:"`date` 199 | test_f 200 | errc="$?" 201 | if [ "$errc" -ne 0 ] 202 | then 203 | exit $errc 204 | fi 205 | if [ "$1" != "loop" ] 206 | then 207 | exit 0 208 | fi 209 | echo "" 210 | echo "----" start all tests again 211 | sleep 1 212 | done 213 | -------------------------------------------------------------------------------- /test_aco_benchmark.c: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #define _GNU_SOURCE 16 | 17 | #include "aco.h" 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "aco_assert_override.h" 23 | 24 | aco_cofuncp_t gl_co_fp; 25 | 26 | #define PRINT_BUF_SZ 64 27 | char gl_benchmark_print_str_buf[64]; 28 | 29 | void co_fp_alloca(){ 30 | size_t sz = (size_t)((uintptr_t)aco_get_arg()); 31 | uint8_t* ptr = NULL; 32 | assert(sz > 0); 33 | ptr = alloca(sz); 34 | assertptr(ptr); 35 | memset(ptr, 0, sz); 36 | while(1){ 37 | aco_yield(); 38 | } 39 | aco_exit(); 40 | } 41 | 42 | void co_fp_stksz_128(){ 43 | int ip[28]; 44 | memset(ip, 1, sizeof(ip)); 45 | while(1){ 46 | aco_yield(); 47 | } 48 | aco_exit(); 49 | } 50 | 51 | void co_fp_stksz_64(){ 52 | int ip[12]; 53 | memset(ip, 1, sizeof(ip)); 54 | while(1){ 55 | aco_yield(); 56 | } 57 | aco_exit(); 58 | } 59 | 60 | void co_fp_stksz_40(){ 61 | int ip[8]; 62 | memset(ip, 1, sizeof(ip)); 63 | while(1){ 64 | aco_yield(); 65 | } 66 | aco_exit(); 67 | } 68 | 69 | void co_fp_stksz_24(){ 70 | int ip[4]; 71 | memset(ip, 1, sizeof(ip)); 72 | while(1){ 73 | aco_yield(); 74 | } 75 | aco_exit(); 76 | } 77 | 78 | void co_fp_stksz_8(){ 79 | while(1){ 80 | aco_yield(); 81 | } 82 | aco_exit(); 83 | } 84 | 85 | void co_fp0(){ 86 | while(1){ 87 | aco_yield(); 88 | } 89 | aco_exit(); 90 | } 91 | 92 | void benchmark_copystack(size_t co_amount,size_t stksz, size_t loopct){ 93 | struct timespec tstart={0,0}, tend={0,0}; 94 | int print_sz = 0; 95 | double delta_t; 96 | // create co 97 | assert(co_amount > 0); 98 | assertptr((void*)gl_co_fp); 99 | aco_t* main_co = aco_create(NULL, NULL, 0, NULL, NULL); 100 | aco_share_stack_t* sstk = aco_share_stack_new(0); 101 | // NOTE: size_t_safe_mul 102 | aco_t** coarray = (aco_t**) malloc(sizeof(void*) * co_amount); 103 | assertptr(coarray); 104 | memset(coarray, 0, sizeof(void*) * co_amount); 105 | size_t ct = 0; 106 | assert(0 == clock_gettime(CLOCK_MONOTONIC, &tstart)); 107 | while(ct < co_amount){ 108 | coarray[ct] = aco_create( 109 | main_co, sstk, 0, gl_co_fp, 110 | (void*)((uintptr_t)stksz) 111 | ); 112 | ct++; 113 | } 114 | assert(0 == clock_gettime(CLOCK_MONOTONIC, &tend)); 115 | delta_t = ((double)tend.tv_sec + 1.0e-9*tend.tv_nsec) - 116 | ((double)tstart.tv_sec + 1.0e-9*tstart.tv_nsec); 117 | //aco_create/init_save_stk_sz=64B 10000000 140.43 ns/op 7126683.67 op/s 118 | print_sz = snprintf( 119 | gl_benchmark_print_str_buf, PRINT_BUF_SZ, 120 | "aco_create/init_save_stk_sz=64B" 121 | ); 122 | assert(print_sz > 0 && print_sz < PRINT_BUF_SZ); 123 | printf("%-50s %11zu %9.3f s %11.2f ns/op %13.2f op/s\n", 124 | gl_benchmark_print_str_buf, 125 | co_amount, delta_t, 126 | (1.0e+9) / (co_amount / delta_t), 127 | co_amount / delta_t); 128 | fflush(stdout); 129 | // warm-up 130 | ct = 0; 131 | while(ct < co_amount){ 132 | aco_resume(coarray[ct]); 133 | ct++; 134 | } 135 | // copystack ctxsw 136 | assert(0 == clock_gettime(CLOCK_MONOTONIC, &tstart)); 137 | size_t glct = 0; 138 | while(glct < loopct){ 139 | ct = 0; 140 | while(ct < co_amount){ 141 | aco_resume(coarray[ct]); 142 | ct++; 143 | glct++; 144 | } 145 | } 146 | assert(0 == clock_gettime(CLOCK_MONOTONIC, &tend)); 147 | delta_t = ((double)tend.tv_sec + 1.0e-9*tend.tv_nsec) - 148 | ((double)tstart.tv_sec + 1.0e-9*tstart.tv_nsec); 149 | //aco_resume/copy_stack_size=8B 20000000 36.23 ns/op 27614644.57 op/s 150 | print_sz = snprintf( 151 | gl_benchmark_print_str_buf, PRINT_BUF_SZ, 152 | "aco_resume/co_amount=%zu/copy_stack_size=%zuB", 153 | co_amount, coarray[0]->save_stack.max_cpsz 154 | ); 155 | assert(print_sz > 0 && print_sz < PRINT_BUF_SZ); 156 | printf("%-50s %11zu %9.3f s %11.2f ns/op %13.2f op/s\n", 157 | gl_benchmark_print_str_buf, glct, 158 | delta_t, (1.0e+9) / (glct / delta_t), 159 | glct / delta_t); 160 | if(co_amount == 1 && coarray[0]->save_stack.max_cpsz == 0){ 161 | printf("%-50s %11zu %9.3f s %11.2f ns/op %13.2f op/s\n", 162 | " -> acosw", glct*2, 163 | delta_t, (1.0e+9) / (glct*2 / delta_t), 164 | glct*2 / delta_t); 165 | } 166 | fflush(stdout); 167 | // co cleaning 168 | assert(0 == clock_gettime(CLOCK_MONOTONIC, &tstart)); 169 | ct = 0; 170 | while(ct < co_amount){ 171 | aco_destroy(coarray[ct]); 172 | coarray[ct] = NULL; 173 | ct++; 174 | } 175 | assert(0 == clock_gettime(CLOCK_MONOTONIC, &tend)); 176 | aco_share_stack_destroy(sstk); 177 | sstk = NULL; 178 | aco_destroy(main_co); 179 | main_co = NULL; 180 | free(coarray); 181 | delta_t = ((double)tend.tv_sec + 1.0e-9*tend.tv_nsec) - 182 | ((double)tstart.tv_sec + 1.0e-9*tstart.tv_nsec); 183 | //aco_destroy 20000000 21.22 ns/op 47616496.16 op/s 184 | print_sz = snprintf( 185 | gl_benchmark_print_str_buf, PRINT_BUF_SZ, 186 | "aco_destroy" 187 | ); 188 | assert(print_sz > 0 && print_sz < PRINT_BUF_SZ); 189 | printf("%-50s %11zu %9.3f s %11.2f ns/op %13.2f op/s\n\n", 190 | gl_benchmark_print_str_buf, 191 | co_amount, delta_t, 192 | (1.0e+9) / (co_amount / delta_t), 193 | co_amount / delta_t); 194 | fflush(stdout); 195 | } 196 | 197 | int main() { 198 | #ifdef ACO_USE_VALGRIND 199 | if(1){ 200 | printf("%s doesn't have valgrind test yet, " 201 | "so bypass this test right now.\n",__FILE__ 202 | ); 203 | exit(0); 204 | } 205 | #endif 206 | 207 | aco_thread_init(NULL); 208 | 209 | printf("warm-up:\n"); 210 | gl_co_fp = co_fp_stksz_8; 211 | benchmark_copystack(200*10000, 10, 20000000); 212 | 213 | #ifdef __i386__ 214 | printf("+build:i386\n"); 215 | #elif __x86_64__ 216 | printf("+build:x86_64\n"); 217 | #endif 218 | 219 | #ifdef ACO_CONFIG_SHARE_FPU_MXCSR_ENV 220 | printf("+build:-DACO_CONFIG_SHARE_FPU_MXCSR_ENV\n"); 221 | printf("+build:share fpu & mxcsr control words between coroutines\n"); 222 | #else 223 | printf("+build:undefined ACO_CONFIG_SHARE_FPU_MXCSR_ENV\n"); 224 | printf("+build:each coroutine maintain each own fpu & mxcsr control words\n"); 225 | #endif 226 | #ifdef ACO_USE_VALGRIND 227 | printf("+build:-DACO_USE_VALGRIND\n"); 228 | printf("+build:valgrind memcheck friendly support enabled\n"); 229 | #else 230 | printf("+build:undefined ACO_USE_VALGRIND\n"); 231 | printf("+build:without valgrind memcheck friendly support\n"); 232 | #endif 233 | 234 | printf("\nsizeof(aco_t)=%zu:\n\n", sizeof(aco_t)); 235 | 236 | printf("\nstart-test:\n\n"); 237 | printf("%-50s %15s %15s %15s %15s\n\n", 238 | "comment", "task_amount", "all_time_cost", "ns_per_op", "speed" 239 | ); 240 | 241 | gl_co_fp = co_fp_stksz_8; 242 | benchmark_copystack(1, 10, 20000000); 243 | 244 | gl_co_fp = co_fp_stksz_8; 245 | benchmark_copystack(1, 10, 20000000); 246 | 247 | gl_co_fp = co_fp_stksz_8; 248 | benchmark_copystack(200*10000, 10, 20000000); 249 | gl_co_fp = co_fp_stksz_24; 250 | benchmark_copystack(200*10000, 10, 20000000); 251 | gl_co_fp = co_fp_stksz_40; 252 | benchmark_copystack(200*10000, 10, 20000000); 253 | gl_co_fp = co_fp_stksz_64; 254 | benchmark_copystack(200*10000, 10, 20000000); 255 | gl_co_fp = co_fp_stksz_128; 256 | benchmark_copystack(200*10000, 10, 20000000); 257 | 258 | gl_co_fp = co_fp_alloca; 259 | benchmark_copystack(200*10000, 150 - 64, 20000000); 260 | 261 | gl_co_fp = co_fp_alloca; 262 | benchmark_copystack(200*10000, 158 - 64, 20000000); 263 | 264 | gl_co_fp = co_fp_alloca; 265 | benchmark_copystack(200*10000, 166 - 64, 20000000); 266 | 267 | gl_co_fp = co_fp_alloca; 268 | benchmark_copystack(200*10000, 256 - 64, 20000000); 269 | 270 | gl_co_fp = co_fp_alloca; 271 | benchmark_copystack(200*10000, 512 - 64, 20000000); 272 | 273 | gl_co_fp = co_fp_alloca; 274 | benchmark_copystack(200*10000, 512 - 64, 20000000); 275 | 276 | gl_co_fp = co_fp_alloca; 277 | benchmark_copystack(100*10000, 1024 - 64, 20000000); 278 | 279 | gl_co_fp = co_fp_alloca; 280 | benchmark_copystack(100*10000, 1024 - 64, 20000000); 281 | 282 | gl_co_fp = co_fp_alloca; 283 | benchmark_copystack(10*10000, 1024 - 64, 20000000); 284 | 285 | gl_co_fp = co_fp_alloca; 286 | benchmark_copystack(10*10000, 2048 - 64, 20000000); 287 | 288 | gl_co_fp = co_fp_alloca; 289 | benchmark_copystack(10*10000, 4096 - 64, 20000000); 290 | 291 | gl_co_fp = co_fp_alloca; 292 | benchmark_copystack(10*10000, 8012 - 64, 20000000); 293 | 294 | return 0; 295 | } 296 | -------------------------------------------------------------------------------- /test_aco_synopsis.c: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "aco.h" 16 | #include 17 | #include "aco_assert_override.h" 18 | 19 | void foo(int ct) { 20 | printf("co: %p: yield to main_co: %d\n", aco_get_co(), *((int*)(aco_get_arg()))); 21 | aco_yield(); 22 | *((int*)(aco_get_arg())) = ct + 1; 23 | } 24 | 25 | void co_fp0() { 26 | printf("co: %p: entry: %d\n", aco_get_co(), *((int*)(aco_get_arg()))); 27 | int ct = 0; 28 | while(ct < 6){ 29 | foo(ct); 30 | ct++; 31 | } 32 | printf("co: %p: exit to main_co: %d\n", aco_get_co(), *((int*)(aco_get_arg()))); 33 | aco_exit(); 34 | } 35 | 36 | int main() { 37 | aco_thread_init(NULL); 38 | 39 | aco_t* main_co = aco_create(NULL, NULL, 0, NULL, NULL); 40 | aco_share_stack_t* sstk = aco_share_stack_new(0); 41 | 42 | int co_ct_arg_point_to_me = 0; 43 | aco_t* co = aco_create(main_co, sstk, 0, co_fp0, &co_ct_arg_point_to_me); 44 | 45 | int ct = 0; 46 | while(ct < 6){ 47 | assert(co->is_end == 0); 48 | printf("main_co: yield to co: %p: %d\n", co, ct); 49 | aco_resume(co); 50 | assert(co_ct_arg_point_to_me == ct); 51 | ct++; 52 | } 53 | printf("main_co: yield to co: %p: %d\n", co, ct); 54 | aco_resume(co); 55 | assert(co_ct_arg_point_to_me == ct); 56 | assert(co->is_end); 57 | 58 | printf("main_co: destroy and exit\n"); 59 | aco_destroy(co); 60 | co = NULL; 61 | aco_share_stack_destroy(sstk); 62 | sstk = NULL; 63 | aco_destroy(main_co); 64 | main_co = NULL; 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /test_aco_tutorial_0.c: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Hello aco demo. 16 | 17 | #include "aco.h" 18 | #include 19 | #include 20 | #include 21 | #include "aco_assert_override.h" 22 | 23 | void co_fp0() { 24 | // Get co->arg. The caller of `aco_get_arg()` must be a non-main co. 25 | int *iretp = (int *)aco_get_arg(); 26 | // Get current co. The caller of `aco_get_co()` must be a non-main co. 27 | aco_t* this_co = aco_get_co(); 28 | int ct = 0; 29 | while(ct < 6){ 30 | printf( 31 | "co:%p save_stack:%p share_stack:%p yield_ct:%d\n", 32 | this_co, this_co->save_stack.ptr, 33 | this_co->share_stack->ptr, ct 34 | ); 35 | // Yield the execution of current co and resume the execution of 36 | // `co->main_co`. The caller of `aco_yield()` must be a non-main co. 37 | aco_yield(); 38 | (*iretp)++; 39 | ct++; 40 | } 41 | printf( 42 | "co:%p save_stack:%p share_stack:%p co_exit()\n", 43 | this_co, this_co->save_stack.ptr, 44 | this_co->share_stack->ptr 45 | ); 46 | // In addition do the same as `aco_yield()`, `aco_exit()` also set 47 | // `co->is_end` to `1` thus to mark the `co` at the status of "END". 48 | aco_exit(); 49 | } 50 | 51 | int main() { 52 | #ifdef ACO_USE_VALGRIND 53 | if(0){ 54 | printf("%s doesn't have valgrind test yet, " 55 | "so bypass this test right now.\n",__FILE__ 56 | ); 57 | exit(0); 58 | } 59 | #endif 60 | // Initialize the aco environment in the current thread. 61 | aco_thread_init(NULL); 62 | 63 | // Create a main coroutine whose "share stack" is the default stack 64 | // of the current thread. And it doesn't need any private save stack 65 | // since it is definitely a standalone coroutine (which coroutine 66 | // monopolizes it's share stack). 67 | aco_t* main_co = aco_create(NULL, NULL, 0, NULL, NULL); 68 | 69 | // Create a share stack with the default size of 2MB and also with a 70 | // read-only guard page for the detection of stack overflow. 71 | aco_share_stack_t* sstk = aco_share_stack_new(0); 72 | 73 | int co_ct_arg_point_to_me = 0; 74 | // Create a non-main coroutine whose share stack is `sstk` and has a 75 | // default 64 bytes size private save stack. The entry function of the 76 | // coroutine is `co_fp0`. Set `co->arg` to the address of the int 77 | // variable `co_ct_arg_point_to_me`. 78 | aco_t* co = aco_create(main_co, sstk, 0, co_fp0, &co_ct_arg_point_to_me); 79 | 80 | int ct = 0; 81 | while(ct < 6){ 82 | assert(co->is_end == 0); 83 | // Start or continue the execution of `co`. The caller of this function 84 | // must be main_co. 85 | aco_resume(co); 86 | // Check whether the co has completed the job it promised. 87 | assert(co_ct_arg_point_to_me == ct); 88 | printf("main_co:%p\n", main_co); 89 | ct++; 90 | } 91 | aco_resume(co); 92 | assert(co_ct_arg_point_to_me == ct); 93 | // The value of `co->is_end` must be `1` now since it just suspended 94 | // itself by calling `aco_exit()`. 95 | assert(co->is_end); 96 | 97 | printf("main_co:%p\n", main_co); 98 | 99 | // Destroy co and its private save stack. 100 | aco_destroy(co); 101 | co = NULL; 102 | // Destroy the share stack sstk. 103 | aco_share_stack_destroy(sstk); 104 | sstk = NULL; 105 | // Destroy the main_co. 106 | aco_destroy(main_co); 107 | main_co = NULL; 108 | 109 | return 0; 110 | } 111 | -------------------------------------------------------------------------------- /test_aco_tutorial_1.c: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Some statistics of the coroutine. 16 | 17 | #include "aco.h" 18 | #include 19 | #include 20 | #include 21 | #include "aco_assert_override.h" 22 | 23 | void co_fp0(){ 24 | int *iretp = (int *)aco_get_arg(); 25 | aco_t* this_co = aco_get_co(); 26 | assert(!aco_is_main_co(this_co)); 27 | assert(this_co->fp == (void*)co_fp0); 28 | assert(this_co->is_end == 0); 29 | int ct = 0; 30 | while(ct < 6){ 31 | printf( 32 | "co:%p save_stack:%p share_stack:%p yield_ct:%d\n", 33 | this_co, this_co->save_stack.ptr, 34 | this_co->share_stack->ptr, ct 35 | ); 36 | aco_yield(); 37 | (*iretp)++; 38 | ct++; 39 | } 40 | printf( 41 | "co:%p save_stack:%p share_stack:%p co_exit()\n", 42 | this_co, this_co->save_stack.ptr, 43 | this_co->share_stack->ptr 44 | ); 45 | aco_exit(); 46 | assert(0); 47 | } 48 | 49 | int main() { 50 | #ifdef ACO_USE_VALGRIND 51 | if(0){ 52 | printf("%s doesn't have valgrind test yet, " 53 | "so bypass this test right now.\n",__FILE__ 54 | ); 55 | exit(0); 56 | } 57 | #endif 58 | 59 | aco_thread_init(NULL); 60 | 61 | aco_t* main_co = aco_create(NULL, NULL, 0, NULL, NULL); 62 | assertptr(main_co); 63 | 64 | aco_share_stack_t* sstk = aco_share_stack_new(0); 65 | assertptr(sstk); 66 | 67 | int co_ct_arg_point_to_me = 0; 68 | aco_t* co = aco_create(main_co, sstk, 0, co_fp0, &co_ct_arg_point_to_me); 69 | assertptr(co); 70 | 71 | int ct = 0; 72 | while(ct < 6){ 73 | assert(co->is_end == 0); 74 | aco_resume(co); 75 | assert(co_ct_arg_point_to_me == ct); 76 | printf("main_co:%p\n", main_co); 77 | ct++; 78 | } 79 | aco_resume(co); 80 | assert(co_ct_arg_point_to_me == ct); 81 | assert(co->is_end); 82 | 83 | printf("main_co:%p\n", main_co); 84 | 85 | printf( 86 | "\ncopy-stack co:%p:\n max stack copy size:%zu\n" 87 | " save (from share stack to save stack) counter of the private save stack:%zu\n" 88 | " restore (from save stack to share stack) counter of the private save stack:%zu\n", 89 | co, co->save_stack.max_cpsz, 90 | co->save_stack.ct_save, 91 | co->save_stack.ct_restore 92 | ); 93 | printf("\n(Since the share stack used by the co has only one user `co`, " 94 | "so there is no need to save/restore the stack every time during resume &" 95 | " yield execution, thus you can call it a co has 'standalone stack' " 96 | "which just is a very special case of copy-stack.)\n"); 97 | 98 | aco_destroy(co); 99 | co = NULL; 100 | aco_share_stack_destroy(sstk); 101 | sstk = NULL; 102 | aco_destroy(main_co); 103 | main_co = NULL; 104 | 105 | return 0; 106 | } 107 | -------------------------------------------------------------------------------- /test_aco_tutorial_2.c: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "aco.h" 16 | #include 17 | #include 18 | #include 19 | #include "aco_assert_override.h" 20 | 21 | void foo(int ct){ 22 | printf( 23 | "co:%p save_stack:%p share_stack:%p yield_ct:%d\n", 24 | aco_get_co(), aco_get_co()->save_stack.ptr, 25 | aco_get_co()->share_stack->ptr, ct 26 | ); 27 | aco_yield(); 28 | (*((int*)(aco_get_arg())))++; 29 | } 30 | 31 | void co_fp0() 32 | { 33 | aco_t* this_co = aco_get_co(); 34 | assert(!aco_is_main_co(this_co)); 35 | assert(this_co->fp == (void*)co_fp0); 36 | assert(this_co->is_end == 0); 37 | int ct = 0; 38 | while(ct < 6){ 39 | foo(ct); 40 | ct++; 41 | } 42 | printf( 43 | "co:%p save_stack:%p share_stack:%p co_exit()\n", 44 | this_co, this_co->save_stack.ptr, 45 | this_co->share_stack->ptr 46 | ); 47 | aco_exit(); 48 | assert(0); 49 | } 50 | 51 | int main() { 52 | #ifdef ACO_USE_VALGRIND 53 | if(1){ 54 | printf("%s doesn't have valgrind test yet, " 55 | "so bypass this test right now.\n",__FILE__ 56 | ); 57 | exit(0); 58 | } 59 | #endif 60 | 61 | aco_thread_init(NULL); 62 | 63 | aco_t* main_co = aco_create(NULL, NULL, 0, NULL, NULL); 64 | assertptr(main_co); 65 | 66 | aco_share_stack_t* sstk = aco_share_stack_new(0); 67 | assertptr(sstk); 68 | aco_share_stack_t* sstk2 = aco_share_stack_new(0); 69 | assertptr(sstk2); 70 | 71 | int co_ct_arg_point_to_me = 0; 72 | int co2_ct_arg_point_to_me = 0; 73 | int co3_ct_arg_point_to_me = 0; 74 | aco_t* co = aco_create(main_co, sstk, 0, co_fp0, &co_ct_arg_point_to_me); 75 | assertptr(co); 76 | aco_t* co2 = aco_create(main_co, sstk2, 0, co_fp0, &co2_ct_arg_point_to_me); 77 | aco_t* co3 = aco_create(main_co, sstk2, 0, co_fp0, &co3_ct_arg_point_to_me); 78 | assertptr(co2); 79 | assertptr(co3); 80 | 81 | int ct = 0; 82 | while(ct < 6){ 83 | assert(co->is_end == 0); 84 | aco_resume(co); 85 | assert(co_ct_arg_point_to_me == ct); 86 | 87 | assert(co2->is_end == 0); 88 | aco_resume(co2); 89 | assert(co2_ct_arg_point_to_me == ct); 90 | 91 | assert(co3->is_end == 0); 92 | aco_resume(co3); 93 | assert(co3_ct_arg_point_to_me == ct); 94 | 95 | printf("main_co:%p\n", main_co); 96 | ct++; 97 | } 98 | aco_resume(co); 99 | assert(co_ct_arg_point_to_me == ct); 100 | assert(co->is_end); 101 | 102 | aco_resume(co2); 103 | assert(co2_ct_arg_point_to_me == ct); 104 | assert(co2->is_end); 105 | 106 | aco_resume(co3); 107 | assert(co3_ct_arg_point_to_me == ct); 108 | assert(co3->is_end); 109 | 110 | printf("main_co:%p\n", main_co); 111 | 112 | printf( 113 | "\ncopy-stack co:%p:\n max stack copy size:%zu\n" 114 | " save (from share stack to save stack) counter of the private save stack:%zu\n" 115 | " restore (from save stack to share stack) counter of the private save stack:%zu\n", 116 | co, co->save_stack.max_cpsz, 117 | co->save_stack.ct_save, 118 | co->save_stack.ct_restore 119 | ); 120 | printf("\n(Since the share stack used by the co has only one user `co`, " 121 | "so there is no need to save/restore the stack every time during resume &" 122 | " yield execution, thus you can call it a co has 'standalone stack' " 123 | "which just is a very special case of copy-stack.)\n"); 124 | 125 | printf( 126 | "\ncopy-stack co2:%p:\n max stack copy size:%zu\n" 127 | " save (from share stack to save stack) counter of the private save stack:%zu\n" 128 | " restore (from save stack to share stack) counter of the private save stack:%zu\n", 129 | co2, co2->save_stack.max_cpsz, 130 | co2->save_stack.ct_save, 131 | co2->save_stack.ct_restore 132 | ); 133 | printf( 134 | "\ncopy-stack co3:%p:\n max stack copy size:%zu\n" 135 | " save (from share stack to save stack) counter of the private save stack:%zu\n" 136 | " restore (from save stack to share stack) counter of the private save stack:%zu\n", 137 | co3, co3->save_stack.max_cpsz, 138 | co3->save_stack.ct_save, 139 | co3->save_stack.ct_restore 140 | ); 141 | 142 | printf("\n(The co2 & co3 share the share stack sstk2, thus it is " 143 | "necessary to save/restore the stack every time during resume &" 144 | " yield execution, thus it is a ordinary case of copy-stack.)\n"); 145 | 146 | aco_destroy(co); 147 | co = NULL; 148 | aco_destroy(co2); 149 | co2 = NULL; 150 | aco_destroy(co3); 151 | co3 = NULL; 152 | 153 | aco_share_stack_destroy(sstk); 154 | sstk = NULL; 155 | aco_share_stack_destroy(sstk2); 156 | sstk2 = NULL; 157 | 158 | aco_destroy(main_co); 159 | main_co = NULL; 160 | 161 | return 0; 162 | } 163 | -------------------------------------------------------------------------------- /test_aco_tutorial_3.c: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Use aco in multithread. 16 | 17 | #include "aco.h" 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "aco_assert_override.h" 24 | 25 | uint64_t gl_race_aco_yield_ct = 0; 26 | pthread_mutex_t gl_race_aco_yield_ct_mutex = PTHREAD_MUTEX_INITIALIZER; 27 | 28 | void foo(int ct){ 29 | printf( 30 | "co:%p save_stack:%p share_stack:%p yield_ct:%d\n", 31 | aco_get_co(), aco_get_co()->save_stack.ptr, 32 | aco_get_co()->share_stack->ptr, ct 33 | ); 34 | pthread_mutex_lock(&gl_race_aco_yield_ct_mutex); 35 | gl_race_aco_yield_ct++; 36 | pthread_mutex_unlock(&gl_race_aco_yield_ct_mutex); 37 | aco_yield(); 38 | (*((int*)(aco_get_arg())))++; 39 | } 40 | 41 | void co_fp0() 42 | { 43 | aco_t* this_co = aco_get_co(); 44 | assert(!aco_is_main_co(this_co)); 45 | assert(this_co->fp == (void*)co_fp0); 46 | assert(this_co->is_end == 0); 47 | int ct = 0; 48 | while(ct < 6){ 49 | foo(ct); 50 | ct++; 51 | } 52 | printf( 53 | "co:%p save_stack:%p share_stack:%p co_exit()\n", 54 | this_co, this_co->save_stack.ptr, 55 | this_co->share_stack->ptr 56 | ); 57 | pthread_mutex_lock(&gl_race_aco_yield_ct_mutex); 58 | gl_race_aco_yield_ct++; 59 | pthread_mutex_unlock(&gl_race_aco_yield_ct_mutex); 60 | aco_exit(); 61 | assert(0); 62 | } 63 | 64 | void* pmain(void* pthread_in_arg) { 65 | pthread_t t = pthread_self(); 66 | size_t idx = 0; 67 | assert(sizeof(t) > 0); 68 | printf("\ntid:0x"); 69 | while(idx < sizeof(t)){ 70 | printf("%02x", *(((uint8_t*)(&t)) + idx)); 71 | idx++; 72 | } 73 | printf("\n"); 74 | 75 | aco_thread_init(NULL); 76 | 77 | aco_t* main_co = aco_create(NULL, NULL, 0, NULL, NULL); 78 | assertptr(main_co); 79 | 80 | aco_share_stack_t* sstk = aco_share_stack_new(0); 81 | assertptr(sstk); 82 | aco_share_stack_t* sstk2 = aco_share_stack_new(0); 83 | assertptr(sstk2); 84 | 85 | int co_ct_arg_point_to_me = 0; 86 | int co2_ct_arg_point_to_me = 0; 87 | int co3_ct_arg_point_to_me = 0; 88 | aco_t* co = aco_create(main_co, sstk, 0, co_fp0, &co_ct_arg_point_to_me); 89 | assertptr(co); 90 | aco_t* co2 = aco_create(main_co, sstk2, 0, co_fp0, &co2_ct_arg_point_to_me); 91 | aco_t* co3 = aco_create(main_co, sstk2, 0, co_fp0, &co3_ct_arg_point_to_me); 92 | assertptr(co2); 93 | assertptr(co3); 94 | 95 | int ct = 0; 96 | while(ct < 6){ 97 | assert(co->is_end == 0); 98 | aco_resume(co); 99 | assert(co_ct_arg_point_to_me == ct); 100 | 101 | assert(co2->is_end == 0); 102 | aco_resume(co2); 103 | assert(co2_ct_arg_point_to_me == ct); 104 | 105 | assert(co3->is_end == 0); 106 | aco_resume(co3); 107 | assert(co3_ct_arg_point_to_me == ct); 108 | 109 | printf("main_co:%p\n", main_co); 110 | ct++; 111 | } 112 | aco_resume(co); 113 | assert(co_ct_arg_point_to_me == ct); 114 | assert(co->is_end); 115 | 116 | aco_resume(co2); 117 | assert(co2_ct_arg_point_to_me == ct); 118 | assert(co2->is_end); 119 | 120 | aco_resume(co3); 121 | assert(co3_ct_arg_point_to_me == ct); 122 | assert(co3->is_end); 123 | 124 | printf("main_co:%p\n", main_co); 125 | 126 | printf( 127 | "\ncopy-stack co:%p:\n max stack copy size:%zu\n" 128 | " save (from share stack to save stack) counter of the private save stack:%zu\n" 129 | " restore (from save stack to share stack) counter of the private save stack:%zu\n", 130 | co, co->save_stack.max_cpsz, 131 | co->save_stack.ct_save, 132 | co->save_stack.ct_restore 133 | ); 134 | printf("\n(Since the share stack used by the co has only one user `co`, " 135 | "so there is no need to save/restore the stack every time during resume &" 136 | " yield execution, thus you can call it a co has 'standalone stack' " 137 | "which just is a very special case of copy-stack.)\n"); 138 | 139 | printf( 140 | "\ncopy-stack co2:%p:\n max stack copy size:%zu\n" 141 | " save (from share stack to save stack) counter of the private save stack:%zu\n" 142 | " restore (from save stack to share stack) counter of the private save stack:%zu\n", 143 | co2, co2->save_stack.max_cpsz, 144 | co2->save_stack.ct_save, 145 | co2->save_stack.ct_restore 146 | ); 147 | printf( 148 | "\ncopy-stack co3:%p:\n max stack copy size:%zu\n" 149 | " save (from share stack to save stack) counter of the private save stack:%zu\n" 150 | " restore (from save stack to share stack) counter of the private save stack:%zu\n", 151 | co3, co3->save_stack.max_cpsz, 152 | co3->save_stack.ct_save, 153 | co3->save_stack.ct_restore 154 | ); 155 | 156 | printf("\n(The co2 & co3 share the share stack sstk2, thus it is " 157 | "necessary to save/restore the stack every time during resume &" 158 | " yield execution, thus it is a ordinary case of copy-stack.)\n"); 159 | 160 | uint64_t tmp_gl_ct; 161 | pthread_mutex_lock(&gl_race_aco_yield_ct_mutex); 162 | tmp_gl_ct = gl_race_aco_yield_ct; 163 | pthread_mutex_unlock(&gl_race_aco_yield_ct_mutex); 164 | printf("\ngl_race_aco_yield_ct:%" PRIu64"\n", tmp_gl_ct); 165 | 166 | aco_destroy(co); 167 | co = NULL; 168 | aco_destroy(co2); 169 | co2 = NULL; 170 | aco_destroy(co3); 171 | co3 = NULL; 172 | 173 | aco_share_stack_destroy(sstk); 174 | sstk = NULL; 175 | aco_share_stack_destroy(sstk2); 176 | sstk2 = NULL; 177 | 178 | aco_destroy(main_co); 179 | main_co = NULL; 180 | 181 | return 0; 182 | } 183 | 184 | int main(){ 185 | #ifdef ACO_USE_VALGRIND 186 | if(1){ 187 | printf("%s doesn't have valgrind test yet, " 188 | "so bypass this test right now.\n",__FILE__ 189 | ); 190 | exit(0); 191 | } 192 | #endif 193 | 194 | pthread_t t1,t2; 195 | 196 | assert(0 == pthread_create( 197 | &t1, NULL, pmain, NULL 198 | )); 199 | assert(0 == pthread_create( 200 | &t2, NULL, pmain, NULL 201 | )); 202 | 203 | assert(0 == pthread_join(t1, NULL)); 204 | assert(0 == pthread_join(t2, NULL)); 205 | 206 | return 0; 207 | } 208 | -------------------------------------------------------------------------------- /test_aco_tutorial_4.c: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "aco.h" 16 | #include 17 | #include 18 | #include 19 | #include "aco_assert_override.h" 20 | 21 | void foo(int ct){ 22 | printf( 23 | "co:%p save_stack:%p share_stack:%p yield_ct:%d\n", 24 | aco_get_co(), aco_get_co()->save_stack.ptr, 25 | aco_get_co()->share_stack->ptr, ct 26 | ); 27 | aco_yield(); 28 | (*((int*)(aco_get_arg())))++; 29 | } 30 | 31 | void co_fp0() 32 | { 33 | aco_t* this_co = aco_get_co(); 34 | assert(!aco_is_main_co(this_co)); 35 | assert(this_co->fp == (void*)co_fp0); 36 | assert(this_co->is_end == 0); 37 | int ct = 0; 38 | while(ct < 6){ 39 | foo(ct); 40 | ct++; 41 | } 42 | printf( 43 | "co:%p save_stack:%p share_stack:%p !offending return!\n", 44 | this_co, this_co->save_stack.ptr, 45 | this_co->share_stack->ptr 46 | ); 47 | printf("Intended to Abort to test the aco protector :)\n"); 48 | // the offending `return` here it is 49 | // you should always call `aco_exit()` to finish the execution of a non-main co 50 | // instead of call `return` in the real application 51 | // this is a demo shows how protector works in libaco (intended to abort) 52 | return; 53 | aco_exit(); 54 | assert(0); 55 | } 56 | 57 | int main() { 58 | #ifdef ACO_USE_VALGRIND 59 | if(0){ 60 | printf("%s doesn't have valgrind test yet, " 61 | "so bypass this test right now.\n",__FILE__ 62 | ); 63 | exit(0); 64 | } 65 | #endif 66 | 67 | aco_thread_init(NULL); 68 | 69 | aco_t* main_co = aco_create(NULL, NULL, 0, NULL, NULL); 70 | assertptr(main_co); 71 | 72 | aco_share_stack_t* sstk = aco_share_stack_new(0); 73 | assertptr(sstk); 74 | 75 | int co_ct_arg_point_to_me = 0; 76 | aco_t* co = aco_create(main_co, sstk, 0, co_fp0, &co_ct_arg_point_to_me); 77 | assertptr(co); 78 | 79 | int ct = 0; 80 | while(ct < 6){ 81 | assert(co->is_end == 0); 82 | aco_resume(co); 83 | assert(co_ct_arg_point_to_me == ct); 84 | 85 | printf("main_co:%p\n", main_co); 86 | ct++; 87 | } 88 | aco_resume(co); 89 | assert(co_ct_arg_point_to_me == ct); 90 | assert(co->is_end); 91 | 92 | printf("main_co:%p\n", main_co); 93 | 94 | printf( 95 | "\ncopy-stack co:%p:\n max stack copy size:%zu\n" 96 | " save (from share stack to save stack) counter of the private save stack:%zu\n" 97 | " restore (from save stack to share stack) counter of the private save stack:%zu\n", 98 | co, co->save_stack.max_cpsz, 99 | co->save_stack.ct_save, 100 | co->save_stack.ct_restore 101 | ); 102 | printf("\n(Since the share stack used by the co has only one user `co`, " 103 | "so there is no need to save/restore the stack every time during resume &" 104 | " yield execution, thus you can call it a co has 'standalone stack' " 105 | "which just is a very special case of copy-stack.)\n"); 106 | 107 | aco_destroy(co); 108 | co = NULL; 109 | 110 | aco_share_stack_destroy(sstk); 111 | sstk = NULL; 112 | 113 | aco_destroy(main_co); 114 | main_co = NULL; 115 | 116 | return 0; 117 | } 118 | -------------------------------------------------------------------------------- /test_aco_tutorial_5.c: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // Test the customization of aco protector. 16 | 17 | #include "aco.h" 18 | #include 19 | #include 20 | #include 21 | 22 | #include "aco_assert_override.h" 23 | 24 | void foo(int ct){ 25 | printf( 26 | "co:%p save_stack:%p share_stack:%p yield_ct:%d\n", 27 | aco_get_co(), aco_get_co()->save_stack.ptr, 28 | aco_get_co()->share_stack->ptr, ct 29 | ); 30 | aco_yield(); 31 | (*((int*)(aco_get_arg())))++; 32 | } 33 | 34 | void co_fp0() 35 | { 36 | aco_t* this_co = aco_get_co(); 37 | assert(!aco_is_main_co(this_co)); 38 | assert(this_co->fp == (void*)co_fp0); 39 | assert(this_co->is_end == 0); 40 | int ct = 0; 41 | while(ct < 6){ 42 | foo(ct); 43 | ct++; 44 | } 45 | printf( 46 | "co:%p save_stack:%p share_stack:%p !offending return!\n", 47 | this_co, this_co->save_stack.ptr, 48 | this_co->share_stack->ptr 49 | ); 50 | printf("Intended to Abort to test the aco protector :)\n"); 51 | return; 52 | aco_exit(); 53 | assert(0); 54 | } 55 | 56 | static void co_protector_last_word(){ 57 | aco_t* co = aco_get_co(); 58 | // do some log about the offending `co` 59 | fprintf(stderr,"error: customized co_protector_last_word triggered \n"); 60 | fprintf(stderr, "error: co:%p should call `aco_exit(co)` instead of direct " 61 | "`return` in co_fp:%p to finish its execution\n", co, (void*)co->fp); 62 | assert(0); 63 | } 64 | 65 | int main() { 66 | #ifdef ACO_USE_VALGRIND 67 | if(0){ 68 | printf("%s doesn't have valgrind test yet, " 69 | "so bypass this test right now.\n",__FILE__ 70 | ); 71 | exit(0); 72 | } 73 | #endif 74 | 75 | aco_thread_init(co_protector_last_word); 76 | 77 | aco_t* main_co = aco_create(NULL, NULL, 0, NULL, NULL); 78 | assertptr(main_co); 79 | 80 | aco_share_stack_t* sstk = aco_share_stack_new(0); 81 | assertptr(sstk); 82 | 83 | int co_ct_arg_point_to_me = 0; 84 | aco_t* co = aco_create(main_co, sstk, 0, co_fp0, &co_ct_arg_point_to_me); 85 | assertptr(co); 86 | 87 | int ct = 0; 88 | while(ct < 6){ 89 | assert(co->is_end == 0); 90 | aco_resume(co); 91 | assert(co_ct_arg_point_to_me == ct); 92 | 93 | printf("main_co:%p\n", main_co); 94 | ct++; 95 | } 96 | aco_resume(co); 97 | assert(co_ct_arg_point_to_me == ct); 98 | assert(co->is_end); 99 | 100 | printf("main_co:%p\n", main_co); 101 | 102 | printf( 103 | "\ncopy-stack co:%p:\n max stack copy size:%zu\n" 104 | " save (from share stack to save stack) counter of the private save stack:%zu\n" 105 | " restore (from save stack to share stack) counter of the private save stack:%zu\n", 106 | co, co->save_stack.max_cpsz, 107 | co->save_stack.ct_save, 108 | co->save_stack.ct_restore 109 | ); 110 | printf("\n(Since the share stack used by the co has only one user `co`, " 111 | "so there is no need to save/restore the stack every time during resume &" 112 | " yield execution, thus you can call it a co has 'standalone stack' " 113 | "which just is a very special case of copy-stack.)\n"); 114 | 115 | aco_destroy(co); 116 | co = NULL; 117 | 118 | aco_share_stack_destroy(sstk); 119 | sstk = NULL; 120 | 121 | aco_destroy(main_co); 122 | main_co = NULL; 123 | 124 | return 0; 125 | } 126 | -------------------------------------------------------------------------------- /test_aco_tutorial_6.c: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Sen Han <00hnes@gmail.com> 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // A naive and pretty simple scheduler demo. 16 | 17 | #include "aco.h" 18 | #include 19 | #include 20 | #include 21 | #include "aco_assert_override.h" 22 | 23 | void co_fp0(){ 24 | int ct = 0; 25 | int loop_ct = (int)((uintptr_t)(aco_get_co()->arg)); 26 | if(loop_ct < 0){ 27 | loop_ct = 0; 28 | } 29 | while(ct < loop_ct){ 30 | aco_yield(); 31 | ct++; 32 | } 33 | aco_exit(); 34 | } 35 | 36 | int main() { 37 | aco_thread_init(NULL); 38 | 39 | time_t seed_t = time(NULL); 40 | assert((time_t)-1 != seed_t); 41 | srand(seed_t); 42 | 43 | size_t co_amount = 100; 44 | 45 | // create co 46 | assert(co_amount > 0); 47 | aco_t* main_co = aco_create(NULL, NULL, 0, NULL, NULL); 48 | aco_share_stack_t* sstk = aco_share_stack_new(0); 49 | // NOTE: size_t_safe_mul 50 | aco_t** coarray = (aco_t**) malloc(sizeof(void*) * co_amount); 51 | assertptr(coarray); 52 | memset(coarray, 0, sizeof(void*) * co_amount); 53 | size_t ct = 0; 54 | while(ct < co_amount){ 55 | #ifdef ACO_USE_VALGRIND 56 | aco_share_stack_t* private_sstk = aco_share_stack_new2( 57 | 0, ct % 2 58 | ); 59 | coarray[ct] = aco_create( 60 | main_co, private_sstk, 0, co_fp0, 61 | (void*)((uintptr_t)rand() % 1000) 62 | ); 63 | private_sstk = NULL; 64 | #else 65 | coarray[ct] = aco_create( 66 | main_co, sstk, 0, co_fp0, 67 | (void*)((uintptr_t)rand() % 1000) 68 | ); 69 | #endif 70 | ct++; 71 | } 72 | 73 | // naive scheduler with very poor performance (only for demo and testing) 74 | printf("scheduler start: co_amount:%zu\n", co_amount); 75 | size_t null_ct = 0; 76 | while(1){ 77 | ct = 0; 78 | while(ct < co_amount){ 79 | if(coarray[ct] != NULL){ 80 | aco_resume(coarray[ct]); 81 | null_ct = 0; 82 | if(coarray[ct]->is_end != 0){ 83 | printf("aco_destroy: co:%zu\n", ct); 84 | #ifdef ACO_USE_VALGRIND 85 | aco_share_stack_t* private_sstk = coarray[ct]->share_stack; 86 | #endif 87 | aco_destroy(coarray[ct]); 88 | coarray[ct] = NULL; 89 | #ifdef ACO_USE_VALGRIND 90 | aco_share_stack_destroy(private_sstk); 91 | private_sstk = NULL; 92 | #endif 93 | } 94 | } else { 95 | null_ct++; 96 | if(null_ct >= co_amount){ 97 | goto END; 98 | } 99 | } 100 | ct++; 101 | } 102 | } 103 | // co cleaning 104 | END: 105 | ct = 0; 106 | while(ct < co_amount){ 107 | assert(coarray[ct] == NULL); 108 | ct++; 109 | } 110 | aco_share_stack_destroy(sstk); 111 | sstk = NULL; 112 | aco_destroy(main_co); 113 | main_co = NULL; 114 | free(coarray); 115 | 116 | printf("sheduler exit"); 117 | 118 | return 0; 119 | } 120 | --------------------------------------------------------------------------------