├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── ChangeLog ├── LICENSE ├── Makefile.am ├── NOTICE ├── README.md ├── configure.ac ├── m4 └── .gitignore ├── notes ├── coding_style.txt ├── draft.md ├── intel-ssd-320-specification.pdf ├── memcache.txt ├── performance.md └── spec.md ├── scripts └── memcache-check.sh └── src ├── Makefile.am ├── fc.c ├── fc_array.c ├── fc_array.h ├── fc_client.c ├── fc_client.h ├── fc_common.h ├── fc_connection.c ├── fc_connection.h ├── fc_core.c ├── fc_core.h ├── fc_event.c ├── fc_event.h ├── fc_item.c ├── fc_item.h ├── fc_itemx.c ├── fc_itemx.h ├── fc_log.c ├── fc_log.h ├── fc_mbuf.c ├── fc_mbuf.h ├── fc_memcache.c ├── fc_memcache.h ├── fc_message.c ├── fc_message.h ├── fc_queue.h ├── fc_request.c ├── fc_response.c ├── fc_server.c ├── fc_server.h ├── fc_settings.h ├── fc_sha1.c ├── fc_sha1.h ├── fc_signal.c ├── fc_signal.h ├── fc_slab.c ├── fc_slab.h ├── fc_stats.c ├── fc_stats.h ├── fc_string.c ├── fc_string.h ├── fc_time.c ├── fc_time.h ├── fc_util.c ├── fc_util.h └── stg_ins_test.c /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.lo 3 | *.o 4 | 5 | # Compiled Dynamic libraries 6 | *.so 7 | 8 | # Compiled Static libraries 9 | *.la 10 | *.a 11 | 12 | # Compiled misc 13 | *.dep 14 | *.gcda 15 | *.gcno 16 | *.gcov 17 | 18 | # Packages 19 | *.tar.gz 20 | *.tar.bz2 21 | 22 | # Logs 23 | *.log 24 | 25 | # Temporary 26 | *.swp 27 | *.~ 28 | *.project 29 | *.cproject 30 | 31 | # Core and executable 32 | core* 33 | fatcache 34 | 35 | # Autotools 36 | .deps 37 | .libs 38 | 39 | /aclocal.m4 40 | /autom4te.cache 41 | /stamp-h1 42 | /autoscan.log 43 | /libtool 44 | 45 | /config/compile 46 | /config/config.guess 47 | /config/config.sub 48 | /config/depcomp 49 | /config/install-sh 50 | /config/ltmain.sh 51 | /config/missing 52 | /config 53 | 54 | /config.h 55 | /config.h.in 56 | /config.h.in~ 57 | /config.log 58 | /config.status 59 | /configure.scan 60 | /configure 61 | 62 | Makefile 63 | Makefile.in 64 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | script: CFLAGS="-ggdb3 -O0" autoreconf -fvi && ./configure --enable-debug=log && make && sudo make install 3 | 4 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Fatcache 2 | 3 | Looking to contribute something to fatcache? Here's how you can help. 4 | 5 | 6 | ## Bugs reports 7 | 8 | A bug is a _demonstrable problem_ that is caused by the code in the 9 | repository. Good bug reports are extremely helpful - thank you! 10 | 11 | Guidelines for bug reports: 12 | 13 | 1. **Use the GitHub issue search** — check if the issue has already been 14 | reported. 15 | 16 | 2. **Check if the issue has been fixed** — try to reproduce it using the 17 | latest `master` or development branch in the repository. 18 | 19 | 3. **Isolate the problem** — ideally create a reduced test 20 | case and a live example. 21 | 22 | 4. Please try to be as detailed as possible in your report. Include specific 23 | information about the environment - operating system and version, browser 24 | and version, version of fatcache - and steps required to reproduce the issue. 25 | 26 | ## Feature requests & contribution enquiries 27 | 28 | Feature requests are welcome. But take a moment to find out whether your idea 29 | fits with the scope and aims of the project. It's up to *you* to make a strong 30 | case for the inclusion of your feature. Please provide as much detail and 31 | context as possible. 32 | 33 | Contribution enquiries should take place before any significant pull request, 34 | otherwise you risk spending a lot of time working on something that we might 35 | have good reasons for rejecting. 36 | 37 | ## Pull requests 38 | 39 | Good pull requests - patches, improvements, new features - are a fantastic 40 | help. They should remain focused in scope and avoid containing unrelated 41 | commits. 42 | 43 | Make sure to adhere to the coding conventions used throughout the codebase 44 | (indentation, accurate comments, etc.) and any other requirements (such as test 45 | coverage). 46 | 47 | Please follow this process; it's the best way to get your work included in the 48 | project: 49 | 50 | 1. Create a new topic branch to contain your feature, change, or fix: 51 | 52 | 2. Commit your changes in logical chunks. Provide clear and explanatory commit 53 | messages. Use git's [interactive rebase](https://help.github.com/articles/interactive-rebase) 54 | feature to tidy up your commits before making them public. 55 | 56 | 3. Locally merge (or rebase) the upstream development branch into your topic branch: 57 | 58 | 4. Push your topic branch up to your fork: 59 | 60 | 5. [Open a Pull Request](http://help.github.com/send-pull-requests/) with a 61 | clear title and description. 62 | 63 | ## License 64 | 65 | By contributing your code, 66 | 67 | You agree to license your contribution under the terms of the Apache Version 2.0 License 68 | https://github.com/twitter/fatcache/blob/master/LICENSE 69 | 70 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2013-11-02 Manju Rajashekhar 2 | * fatcache: version 0.1.0 release 3 | fatcache is memcache on SSD. 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure config.h.in config.h.in~ stamp-h.in 2 | 3 | ACLOCAL_AMFLAGS = -I m4 4 | 5 | SUBDIRS = src 6 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | fatcache - memcache on ssd. 2 | Copyright (C) 2013 Twitter, Inc. 3 | 4 | Portions of fatcache were inspired from twemcache: https://github.com/twitter/twemcache and twemproxy: https://github.com/twitter/twemproxy 5 | 6 | The SHA-1 implementation was adopted from the FreeBSD kernel source tree 7 | 8 | /* 9 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 10 | * All rights reserved. 11 | * 12 | * Redistribution and use in source and binary forms, with or without 13 | * modification, are permitted provided that the following conditions 14 | * are met: 15 | * 1. Redistributions of source code must retain the above copyright 16 | * notice, this list of conditions and the following disclaimer. 17 | * 2. Redistributions in binary form must reproduce the above copyright 18 | * notice, this list of conditions and the following disclaimer in the 19 | * documentation and/or other materials provided with the distribution. 20 | * 3. Neither the name of the project nor the names of its contributors 21 | * may be used to endorse or promote products derived from this software 22 | * without specific prior written permission. 23 | * 24 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 | * SUCH DAMAGE. 35 | */ 36 | 37 | Portions of fatcache were also inspired from nginx: http://nginx.org/ 38 | 39 | /* 40 | * Copyright (C) 2002-2010 Igor Sysoev 41 | * 42 | * Redistribution and use in source and binary forms, with or without 43 | * modification, are permitted provided that the following conditions 44 | * are met: 45 | * 1. Redistributions of source code must retain the above copyright 46 | * notice, this list of conditions and the following disclaimer. 47 | * 2. Redistributions in binary form must reproduce the above copyright 48 | * notice, this list of conditions and the following disclaimer in the 49 | * documentation and/or other materials provided with the distribution. 50 | * 51 | * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 52 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 | * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 55 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 | * SUCH DAMAGE. 62 | */ 63 | 64 | The generic queue implementation comes from BSD 65 | 66 | /* 67 | * Copyright (c) 1991, 1993 68 | * The Regents of the University of California. All rights reserved. 69 | * 70 | * Redistribution and use in source and binary forms, with or without 71 | * modification, are permitted provided that the following conditions 72 | * are met: 73 | * 1. Redistributions of source code must retain the above copyright 74 | * notice, this list of conditions and the following disclaimer. 75 | * 2. Redistributions in binary form must reproduce the above copyright 76 | * notice, this list of conditions and the following disclaimer in the 77 | * documentation and/or other materials provided with the distribution. 78 | * 3. All advertising materials mentioning features or use of this software 79 | * must display the following acknowledgement: 80 | * This product includes software developed by the University of 81 | * California, Berkeley and its contributors. 82 | * 4. Neither the name of the University nor the names of its contributors 83 | * may be used to endorse or promote products derived from this software 84 | * without specific prior written permission. 85 | * 86 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 87 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 88 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 89 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 90 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 91 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 92 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 93 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 94 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 95 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 96 | * SUCH DAMAGE. 97 | */ 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fatcache 2 | 3 | [![status: retired](https://opensource.twitter.dev/status/retired.svg)](https://opensource.twitter.dev/status/#retired) 4 | [![Build Status](https://travis-ci.org/twitter/fatcache.png?branch=master)](https://travis-ci.org/twitter/fatcache) 5 | 6 | fatcache is no longer actively maintained. See [twitter/pelikan](https://github.com/twitter/pelikan) for our latest caching work. 7 | 8 | **fatcache** is memcache on SSD. Think of fatcache as a cache for your big data. 9 | 10 | ## Overview 11 | 12 | There are two ways to think of SSDs in system design. One is to think of SSD as an extension of disk, where it plays the role of making disks fast and the other is to think of them as an extension of memory, where it plays the role of making memory fat. The latter makes sense when persistence (non-volatility) is unnecessary and data is accessed over the network. Even though memory is thousand times faster than SSD, network connected SSD-backed memory makes sense, if we design the system in a way that network latencies dominate over the SSD latencies by a large factor. 13 | 14 | To understand why network connected SSD makes sense, it is important to understand the role distributed memory plays in large-scale web architecture. In recent years, terabyte-scale, distributed, in-memory caches have become a fundamental building block of any web architecture. In-memory indexes, hash tables, key-value stores and caches are increasingly incorporated for scaling throughput and reducing latency of persistent storage systems. However, power consumption, operational complexity and single node DRAM cost make horizontally scaling this architecture challenging. The current cost of DRAM per server increases dramatically beyond approximately 150 GB, and power cost scales similarly as DRAM density increases. 15 | 16 | Fatcache extends a volatile, in-memory cache by incorporating SSD-backed storage. 17 | 18 | SSD-backed memory presents a viable alternative for applications with large workloads that need to maintain high hit rate for high performance. SSDs have higher capacity per dollar and lower power consumption per byte, without degrading random read latency beyond network latency. 19 | 20 | Fatcache achieves performance comparable to an in-memory cache by focusing on two design criteria: 21 | 22 | - Minimize disk reads on cache hit 23 | - Eliminate small, random disk writes 24 | 25 | The latter is important due to SSDs' unique write characteristics. Writes and in-place updates to SSDs degrade performance due to an erase-and-rewrite penalty and garbage collection of dead blocks. Fatcache batches small writes to obtain consistent performance and increased disk lifetime. 26 | 27 | SSD reads happen at a page-size granularity, usually 4 KB. Single page read access times are approximately 50 to 70 usec and a single [commodity SSD](http://ark.intel.com/products/56569/Intel-SSD-320-Series-600GB-2_5in-SATA-3Gbs-25nm-ML) can sustain nearly 40K read IOPS at a 4 KB page size. 70 usec read latency dictates that disk latency will overtake typical network latency after a small number of reads. Fatcache reduces disk reads by maintaining an in-memory index for all on-disk data. 28 | 29 | ## Batched Writes 30 | 31 | There have been attempts to use an SSD as a swap layer to implement SSD-backed memory. This method degrades write performance and SSD lifetime with many small, random writes. Similar issues occur when an SSD is simply mmaped. 32 | 33 | To minimize the number of small, random writes, fatcache treats the SSD as a log-structured object store. All writes are aggregated in memory and written to the end of the circular log in batches - usually multiples of 1 MB. 34 | 35 | By managing an SSD as a log-structured store, no disk updates are in-place and objects won't have a fixed address on disk. To locate an object, fatcache maintains an in-memory index. An on-disk object without an index entry is a candidate for garbage collection, which occurs during capacity-triggered eviction. 36 | 37 | ## In-memory index 38 | 39 | Fatcache maintains an in-memory index for all data stored on disk. An in-memory index serves two purposes: 40 | 41 | - Cheap object existence checks 42 | - On-disk object address storage 43 | 44 | An in-memory index is preferable to an on-disk index to minimize disk lookups to locate and read an object. Furthermore, in-place index updates become complicated by an SSD's unique write characteristics. An in-memory index avoids these shortcomings and ensures there are no disk accesses on cache miss and only a single disk access on cache hit. 45 | 46 | Maintaining an in-memory index of all on-disk data requires a compact representation of the index. The fatcache index has the following format: 47 | 48 | ```c 49 | struct itemx { 50 | STAILQ_ENTRY(itemx) tqe; /* link in index / free q */ 51 | uint8_t md[20]; /* sha1 message digest */ 52 | uint32_t sid; /* owner slab id */ 53 | uint32_t offset; /* item offset from owner slab base */ 54 | rel_time_t expiry; /* expiry in secs */ 55 | uint64_t cas; /* cas */ 56 | } __attribute__ ((__packed__)); 57 | ``` 58 | 59 | Each index entry contains both object-specific information (key name, &c.) and disk-related information (disk address, &c.). The entries are stored in a chained hash table. To avoid long hash bin traversals, the number of hash bins is fixed to the expected number of index entries. 60 | 61 | To further reduce the memory consumed by the index, we store the SHA-1 hash of the key in each index entry, instead of the key itself. The SHA-1 hash acts as the unique identifier for each object. The on-disk object format contains the complete object key and value. False positives from SHA-1 hash collisions are detected after object retrieval from the disk by comparison with the requested key. If there are collisions on the write path, new objects with the same hash key simply overwrite previous objects. 62 | 63 | The index entry (struct itemx) on a 64-bit system is 48 bytes in size. It is possible to further reduce index entry size to 32 bytes, if CAS is unsupported, MD5 hashing is used, and the next pointer is reduced to 4 bytes. 64 | 65 | At this point, it is instructive to consider the relative size of fatcache's index and the on-disk data. With a 44 byte index entry, an index consuming 48 MB of memory can address 1M objects. If the average object size is 1 KB, then a 48 MB index can address 1 GB of on-disk storage - a 23x memory overcommit. If the average object size is 500 bytes, then a 48 MB index can address 500 MB of SSD - a 11x memory overcommit. Index size and object size relate in this way to determine the addressable capacity of the SSD. 66 | 67 | ## Build 68 | 69 | To build fatcache from a [distribution tarball](http://code.google.com/p/fatcache/downloads/list): 70 | 71 | $ ./configure 72 | $ make 73 | $ sudo make install 74 | 75 | To build fatcache from a [distribution tarball](http://code.google.com/p/fatcache/downloads/list) in _debug mode_: 76 | 77 | $ CFLAGS="-ggdb3 -O0" ./configure --enable-debug=full 78 | $ make 79 | $ sudo make install 80 | 81 | To build fatcache from source with _debug logs enabled_ and _assertions disabled_: 82 | 83 | $ git clone git@github.com:twitter/fatcache.git 84 | $ cd fatcache 85 | $ autoreconf -fvi 86 | $ ./configure --enable-debug=log 87 | $ make 88 | $ src/fatcache -h 89 | 90 | ## Help 91 | 92 | Usage: fatcache [-?hVdS] [-o output file] [-v verbosity level] 93 | [-p port] [-a addr] [-e hash power] 94 | [-f factor] [-n min item chunk size] [-I slab size] 95 | [-i max index memory[ [-m max slab memory] 96 | [-z slab profile] [-D ssd device] [-s server id] 97 | 98 | Options: 99 | -h, --help : this help 100 | -V, --version : show version and exit 101 | -d, --daemonize : run as a daemon 102 | -S, --show-sizes : print slab, item and index sizes and exit 103 | -o, --output=S : set the logging file (default: stderr) 104 | -v, --verbosity=N : set the logging level (default: 6, min: 0, max: 11) 105 | -p, --port=N : set the port to listen on (default: 11211) 106 | -a, --addr=S : set the address to listen on (default: 0.0.0.0) 107 | -e, --hash-power=N : set the item index hash table size as a power of two (default: 20) 108 | -f, --factor=D : set the growth factor of slab item sizes (default: 1.25) 109 | -n, --min-item-chunk-size=N : set the minimum item chunk size in bytes (default: 84 bytes) 110 | -I, --slab-size=N : set slab size in bytes (default: 1048576 bytes) 111 | -i, --max-index-memory=N : set the maximum memory to use for item indexes in MB (default: 64 MB) 112 | -m, --max-slab-memory=N : set the maximum memory to use for slabs in MB (default: 64 MB) 113 | -z, --slab-profile=S : set the profile of slab item chunk sizes (default: n/a) 114 | -D, --ssd-device=S : set the path to the ssd device file (default: n/a) 115 | -s, --server-id=I/N : set fatcache instance to be I out of total N instances (default: 0/1) 116 | 117 | ## Performance 118 | 119 | - Initial performance results are available [here](https://github.com/twitter/fatcache/blob/master/notes/performance.md). 120 | 121 | ## Future Work 122 | 123 | - fatcache deals with two kinds of IOs - disk IO and network IO. Network IO in fatcache is async, but disk IO is sync. It is recommended to run multiple instances of fatcache on a single machine to exploit CPU and SSD parallelism. However, by making disk IO async (using libaio, perhaps), it would be possible for a single instance to completely exploit all available SSD device parallelism. 124 | - observability in fatcache through stats 125 | 126 | ## Issues and Support 127 | 128 | Have a bug or question? Please create an issue here on GitHub! 129 | 130 | https://github.com/twitter/fatcache/issues 131 | 132 | ## Contributors 133 | 134 | * Manju Rajashekhar ([@manju](https://twitter.com/manju)) 135 | * Yao Yue ([@thinkingfish](https://twitter.com/thinkingfish)) 136 | 137 | ## License 138 | 139 | Copyright 2013 Twitter, Inc. 140 | 141 | Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 142 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | # Define the package version numbers and the bug reporting address 2 | m4_define([FC_MAJOR], 0) 3 | m4_define([FC_MINOR], 1) 4 | m4_define([FC_PATCH], 1) 5 | m4_define([FC_BUGS], [manj@twitter.com]) 6 | 7 | # Initialize autoconf 8 | AC_PREREQ([2.64]) 9 | AC_INIT([fatcache], [FC_MAJOR.FC_MINOR.FC_PATCH], [FC_BUGS]) 10 | AC_CONFIG_SRCDIR([src/fc.c]) 11 | AC_CONFIG_AUX_DIR([config]) 12 | AC_CONFIG_HEADERS([config.h:config.h.in]) 13 | AC_CONFIG_MACRO_DIR([m4]) 14 | AC_CANONICAL_SYSTEM 15 | 16 | # Initialize automake 17 | AM_INIT_AUTOMAKE(1.9 foreign) 18 | 19 | # Define macro variables for the package version numbers 20 | AC_DEFINE(FC_VERSION_MAJOR, FC_MAJOR, [Define the major version number]) 21 | AC_DEFINE(FC_VERSION_MINOR, FC_MINOR, [Define the minor version number]) 22 | AC_DEFINE(FC_VERSION_PATCH, FC_PATCH, [Define the patch version number]) 23 | AC_DEFINE(FC_VERSION_STRING, "FC_MAJOR.FC_MINOR.FC_PATCH", [Define the version string]) 24 | 25 | # Checks for language 26 | AC_LANG([C]) 27 | 28 | # Checks for programs 29 | AC_PROG_CC 30 | AC_PROG_INSTALL 31 | AC_PROG_MAKE_SET 32 | AM_PROG_CC_C_O 33 | 34 | # Checks for typedefs, structures, and compiler characteristics 35 | AC_C_INLINE 36 | AC_C_CONST 37 | AC_TYPE_INT8_T 38 | AC_TYPE_INT16_T 39 | AC_TYPE_INT32_T 40 | AC_TYPE_INT64_T 41 | AC_TYPE_INTMAX_T 42 | AC_TYPE_INTPTR_T 43 | AC_TYPE_UINT8_T 44 | AC_TYPE_UINT16_T 45 | AC_TYPE_UINT32_T 46 | AC_TYPE_UINT64_T 47 | AC_TYPE_UINTMAX_T 48 | AC_TYPE_UINTPTR_T 49 | AC_TYPE_OFF_T 50 | AC_TYPE_PID_T 51 | AC_TYPE_SIZE_T 52 | AC_TYPE_SSIZE_T 53 | AC_TYPE_UID_T 54 | 55 | # Checks for header files 56 | AC_HEADER_STDBOOL 57 | AC_CHECK_HEADERS([fcntl.h float.h limits.h stddef.h stdlib.h string.h unistd.h]) 58 | AC_CHECK_HEADERS([inttypes.h stdint.h]) 59 | AC_CHECK_HEADERS([sys/ioctl.h sys/time.h sys/uio.h]) 60 | AC_CHECK_HEADERS([sys/socket.h sys/un.h netinet/in.h arpa/inet.h netdb.h]) 61 | AC_CHECK_HEADERS([sys/epoll.h], [], [AC_MSG_ERROR([required sys/epoll.h header file is missing])]) 62 | 63 | # Checks for library functions 64 | AC_FUNC_FORK 65 | AC_FUNC_MALLOC 66 | AC_FUNC_REALLOC 67 | AC_CHECK_FUNCS([dup2]) 68 | AC_CHECK_FUNCS([gethostname]) 69 | AC_CHECK_FUNCS([gettimeofday]) 70 | AC_CHECK_FUNCS([strerror]) 71 | AC_CHECK_FUNCS([socket]) 72 | AC_CHECK_FUNCS([memchr memmove memset]) 73 | AC_CHECK_FUNCS([strchr strndup strtol strtoul strtoull]) 74 | AC_CHECK_FUNCS([mlockall]) 75 | AC_CHECK_FUNCS([getpagesizes]) 76 | AC_CHECK_FUNCS([memcntl]) 77 | AC_CHECK_FUNCS([backtrace]) 78 | 79 | AC_CACHE_CHECK([if epoll works], [ac_cv_epoll_works], 80 | AC_TRY_RUN([ 81 | #include 82 | #include 83 | #include 84 | int 85 | main(int argc, char **argv) 86 | { 87 | int fd; 88 | 89 | fd = epoll_create(256); 90 | if (fd < 0) { 91 | perror("epoll_create:"); 92 | exit(1); 93 | } 94 | exit(0); 95 | } 96 | ], [ac_cv_epoll_works=yes], [ac_cv_epoll_works=no])) 97 | AS_IF([test "x$ac_cv_epoll_works" = "xyes"], [], [AC_MSG_FAILURE([Linux epoll(7) API is missing])]) 98 | 99 | # Search for library 100 | AC_SEARCH_LIBS([pthread_create], [pthread], [], 101 | [AC_MSG_ERROR([need posix thread library to be installed])]) 102 | 103 | # Check if we're a little-endian or a big-endian system 104 | AC_C_BIGENDIAN( 105 | [AC_DEFINE(HAVE_BIG_ENDIAN, 1, [Define to 1 if machine is big endian])], 106 | [AC_DEFINE(HAVE_LITTLE_ENDIAN, 1, [Define to 1 if machine is little endian])], 107 | [AC_MSG_ERROR([endianess of this machine is unknown])], 108 | [AC_MSG_ERROR([universial endianess not supported])] 109 | ) 110 | 111 | # Check whether to enable debug logs and asserts 112 | AC_MSG_CHECKING([whether to enable debug logs and asserts]) 113 | AC_ARG_ENABLE([debug], 114 | [AS_HELP_STRING( 115 | [--enable-debug=@<:@full|yes|log|no@:>@], 116 | [enable debug logs and asserts @<:@default=no@:>@]) 117 | ], 118 | [], 119 | [enable_debug=no]) 120 | AS_CASE([x$enable_debug], 121 | [xfull], [AC_DEFINE([HAVE_ASSERT_PANIC], [1], 122 | [Define to 1 if panic on an assert is enabled]) 123 | AC_DEFINE([HAVE_DEBUG_LOG], [1], [Define to 1 if debug log is enabled]) 124 | ], 125 | [xyes], [AC_DEFINE([HAVE_ASSERT_LOG], [1], 126 | [Define to 1 if log on an assert is enabled]) 127 | AC_DEFINE([HAVE_DEBUG_LOG], [1], [Define to 1 if debug log is enabled]) 128 | ], 129 | [xlog], [AC_DEFINE([HAVE_DEBUG_LOG], [1], [Define to 1 if debug log is enabled])], 130 | [xno], [], 131 | [AC_MSG_FAILURE([invalid value ${enable_debug} for --enable-debug])]) 132 | AC_MSG_RESULT([$enable_debug]) 133 | 134 | # Define Makefiles 135 | AC_CONFIG_FILES([Makefile 136 | src/Makefile]) 137 | 138 | # Generate the "configure" script 139 | AC_OUTPUT 140 | -------------------------------------------------------------------------------- /m4/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twitter/fatcache/fa0173a50f22db0dd30fa8d09ea3c9c045a2efc4/m4/.gitignore -------------------------------------------------------------------------------- /notes/coding_style.txt: -------------------------------------------------------------------------------- 1 | - No literal tabs. Expand tabs to 4 spaces. 2 | - Indentation is 4 spaces. 3 | - No more than 3 levels of indentation. 4 | - Make sure that your editor does not leave space at the end of the line. 5 | - snake_case for variable, function and file names. 6 | - Use your own judgement when naming variables and be as Spartan as possible. 7 | Eg: Using variable name like this_variable_is_a_temporary_counter will 8 | usually be frowned upon. 9 | 10 | - Don't use int, char, short, long. Instead use int8_t uint8_t, int16_t, 11 | uint16_t, int32_t, uint32_t, int64_t, uint64_t. You have to include 12 | 13 | - Use bool for Boolean. You have to include 14 | - Always use size_t type when dealing with sizes of objects or memory ranges. 15 | 16 | 17 | - 80 column line limit. 18 | - If you have to wrap a long statement (> 80 column), put the operator at the 19 | end of the line and use 4 spaces to indent the next line. Eg: 20 | while (cnt < 20 && this_variable_name_is_too_long && 21 | ep != NULL) { 22 | z = a + really + long + statement + that + needs 23 | two + lines + gets + indented + four + spaces 24 | on + the + second + and + subsequent + lines; 25 | } 26 | 27 | and: 28 | 29 | int a = function(param_a, param_b, param_c, param_d, param_e, 30 | param_f, param_g, param_h, param_i, 31 | param_j, param_k, param_l); 32 | 33 | - Always use braces for all conditional blocks (if, switch, for, while, do). 34 | This holds good even for single statement conditional blocks. Eg: 35 | if (cond) { 36 | stmt; 37 | } 38 | - Placement of braces for non-function statement blocks - put opening brace 39 | last on the line and closing brace first. Eg: 40 | if (x is true) { 41 | we do y 42 | } 43 | - Placement of brace for functions - put the opening brace at the beginning 44 | of the next line and closing brace first. Eg: 45 | int 46 | function(int x) 47 | { 48 | body of the function 49 | } 50 | - Closing brace is empty on a line of its own, except in the cases where it is 51 | followed by a continuation of the same statement, i.e. a "while" in a 52 | do-statement or an "else" in an if-statement, like this: 53 | do { 54 | body of do-loop 55 | } while (condition); 56 | 57 | and, 58 | 59 | if (x == y) { 60 | .. 61 | } else if (x > y) { 62 | ... 63 | } else { 64 | .... 65 | } 66 | - column align switch keyword and the corresponding case/default keyword. Eg: 67 | switch (alphabet) { 68 | case 'a': 69 | case 'b': 70 | printf("I am a or b\n"); 71 | break; 72 | default: 73 | break; 74 | } 75 | - Forever loops are done with for, and not while. Eg: 76 | for (;;) { 77 | stmt; 78 | } 79 | 80 | - Don't use a space after a function name. 81 | - Use space after keywords. Exceptions are sizeof, typeof, alignof and 82 | __attribute__, which look like functions. 83 | - Do not add spaces around (inside) parenthesized expressions. 84 | s = sizeof( sizeof(*p)) ); /* Bad eg */ 85 | s = sizeof(sizeof(*p)); /* Good eg */ 86 | - When declaring pointer data, use '*' adjacent to the data name and not 87 | adjacent to the type name. Eg: 88 | int 89 | function(int *p) 90 | { 91 | char *p; 92 | body of the function 93 | } 94 | - Use one space around (on each side of) most binary and ternary operators, 95 | such as any of these: 96 | = + - < > * / % | & ^ <= >= == != ? : 97 | but no space after unary operators: 98 | & * + - ~ ! sizeof typeof alignof __attribute__ defined 99 | no space before the postfix increment & decrement unary operators: 100 | ++ -- 101 | and no space around the '.' and "->" structure member operators. 102 | 103 | - Casts should not be followed by space. Eg: 104 | int q = *(int *)&p 105 | 106 | - Don't ever use typedef for structure types. Typedefs are problematic 107 | because they do not properly hide their underlying type; for example you 108 | need to know if the typedef is the structure itself or a pointer to the 109 | structure. In addition they must be declared exactly once, whereas an 110 | incomplete structure type can be mentioned as many times as necessary. 111 | Typedefs are difficult to use in stand-alone header files: the header 112 | that defines the typedef must be included before the header that uses it, 113 | or by the header that uses it (which causes namespace pollution), or 114 | there must be a back-door mechanism for obtaining the typedef. 115 | - The only exception for using a typedef is when defining a type for a 116 | function pointer. 117 | 118 | - Function local to a file should be static. 119 | - Function type should be on a line by itself preceding the function. Eg: 120 | static char * 121 | function(int a1, int a2, float fl, int a4) 122 | { 123 | ... 124 | - Seperate two successive functions with one blank line. 125 | - Include parameter names with their datypes in function declaration. Eg: 126 | void function(int param); 127 | - When using wrapper function, it is preferred that you name the wrapped 128 | function with the same name as wrapper function and prepend an '_' to 129 | the wrapped function name. Wrapped functions are usually static. Eg: 130 | static int 131 | _fib(int n) 132 | { 133 | ... 134 | } 135 | int 136 | fib(int n) 137 | { 138 | ... 139 | _fib(n); 140 | ... 141 | } 142 | - Functions should be short and sweet, and do just one thing. They should 143 | fit on one or two screenfuls of text (80x24 screen size), and do one 144 | thing and do that well. 145 | The maximum length of a function is inversely proportional to the 146 | complexity and indentation level of that function. So, if you have a 147 | conceptually simple function that is just one long (but simple) 148 | case-statement, where you have to do lots of small things for a lot of 149 | different cases, it's OK to have a longer function. 150 | Another measure of the function is the number of local variables. They 151 | shouldn't exceed 5-10, or you're doing something wrong. Re-think the 152 | function, and split it into smaller pieces. A human brain can 153 | generally easily keep track of about 7 different things, anything more 154 | and it gets confused. You know you're brilliant, but maybe you'd like 155 | to understand what you did 2 weeks from now. 156 | 157 | - Never use goto statements. 158 | 159 | - Don't use // for single line comments. Instead use /* ... */ style 160 | - For multi-line comments use the following style 161 | /* 162 | * This is the preferred style for multi-line 163 | * comments in the Linux kernel source code. 164 | * Please use it consistently. 165 | * 166 | * Description: A column of asterisks on the left side, 167 | * with beginning and ending almost-blank lines. 168 | */ 169 | - To comment out block of code spanning several lines use "#ifdef 0 ... #endif" 170 | - All major functions should have comments describing what they do at the 171 | head of the function. Avoid putting comments in the function body unless 172 | absolutely needed. Eg: 173 | /* 174 | * Try to acquire a physical address lock while a pmap is locked. If we 175 | * fail to trylock we unlock and lock the pmap directly and cache the 176 | * locked pa in *locked. The caller should then restart their loop in case 177 | * the virtual to physical mapping has changed. 178 | */ 179 | int 180 | vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) 181 | { 182 | ... 183 | 184 | - Use just one data declaration per line (no commas for multiple data 185 | declarations). This leaves you room for a small comment on each 186 | item, explaining its use. 187 | 188 | - Macro name is always in UPPERCASE. 189 | - Use enums when defining more than one related constants. All enumeration 190 | values are in UPPERCASE. 191 | - Avoid macros as much as possible and use inline functions whereever you can. 192 | - For macros encapsulating compound statements, right justify the backslashes 193 | and enclose it in do { ... } while (0) 194 | - For parameterized macros, all the parameters used in the macro body must 195 | be surrounded by parentheses. Eg: 196 | #define ADD_1(x) ((x) + 1) 197 | 198 | - sizeof a struct. Eg 199 | char *p; 200 | p = malloc(sizeof(*p)) /* Good example */ 201 | p = malloc(sizeof(char) /* Bad example */ 202 | 203 | - When declaring variables in structures, each of the variables should 204 | gets its own line. Try to make the structure readable by aligning the 205 | member names using either tabs or spaces depending upon your judgment. 206 | You should use only one space or tab if it suffices to align at least 90% 207 | of the member names. Names following extremely long types should be 208 | separated by a single space. 209 | struct foo { 210 | struct foo *next; /* List of active foo. */ 211 | struct mumble amumble; /* Comment for mumble. */ 212 | int bar; /* Try to align the comments. */ 213 | struct verylongtypename *baz; /* Won't fit in 2 tabs. */ 214 | }; 215 | struct foo *foohead; /* Head of global foo list. * 216 | - Major structures should be declared at the top of the file in which they 217 | are used, or in separate header files if they are used in multiple source 218 | files. Use of the structures should be by separate declarations and 219 | should be extern if they are declared in a header file. 220 | 221 | - Use NULL as the null pointer constant (instead of 0) 222 | - Test pointers against NULL, e.g., use: 223 | (p = f()) == NULL 224 | 225 | not: 226 | 227 | !(p = f()) 228 | 229 | - Do not use ! for tests unless it is a boolean, e.g. use: 230 | if (*p == '\0') 231 | 232 | not: 233 | 234 | if (!*p) 235 | - Always use const for function parameters if the pointer has no side effect. 236 | 237 | -------------------------------------------------------------------------------- /notes/draft.md: -------------------------------------------------------------------------------- 1 | # Large File Support 2 | 3 | - The #define "_FILE_OFFSET_BITS" constant defined to either 32 or 64, selects whether calls that operate with file offsets will use 32-bit or 64-bit numbers for these offsets. 64-bit offsets allow to operate with files larger than 2 GBs. 4 | - This constant is not required and has no effect when compiling for a 64-bit system, as the offsets are always 64-bit in this case. 5 | 6 | # Stats to add 7 | 8 | - alloc_fail 9 | - slab_alloc 10 | - slab_free 11 | - slab_size 12 | - slab_create 13 | - slab_destroy 14 | - item_avail 15 | - item_inuse 16 | - item_max 17 | - item_total 18 | - item_size 19 | 20 | # Benchmarks 21 | 22 | + [mc-crusher](https://github.com/dormando/mc-crusher) 23 | + [twemperf](https://github.com/twitter/twemperf) 24 | 25 | # Tools 26 | 27 | - valgrind --tool=memcheck --leak-check=yes --show-reachable=yes example1 28 | - sudo dd if=/dev/sdb of=./loop_file_10MB bs=1024 count=10K 29 | - sudo /usr/sbin/smartctl --device=sat+cciss,3 -a /dev/sdb1 30 | 31 | 32 | -------------------------------------------------------------------------------- /notes/intel-ssd-320-specification.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twitter/fatcache/fa0173a50f22db0dd30fa8d09ea3c9c045a2efc4/notes/intel-ssd-320-specification.pdf -------------------------------------------------------------------------------- /notes/memcache.txt: -------------------------------------------------------------------------------- 1 | - ascii: 2 | 3 | - Storage Commands (set, add, replace, append, prepend, cas): 4 | 5 | set [noreply]\r\n\r\n 6 | add [noreply]\r\n\r\n 7 | replace [noreply]\r\n\r\n 8 | append [noreply]\r\n\r\n 9 | prepend [noreply]\r\n\r\n 10 | 11 | cas [noreply]\r\n\r\n 12 | 13 | where, 14 | - uint32_t : data specific client side flags 15 | - uint32_t : expiration time (in seconds) 16 | - uint32_t : size of the data (in bytes) 17 | - uint8_t[]: data block 18 | - uint64_t 19 | 20 | - Retrival Commands (get, gets): 21 | 22 | get \r\n 23 | get []+\r\n 24 | 25 | gets \r\n 26 | gets []+\r\n 27 | 28 | - Delete Command (delete): 29 | 30 | delete [noreply]\r\n 31 | 32 | - Arithmetic Commands (incr, decr): 33 | 34 | incr [noreply]\r\n 35 | decr [noreply]\r\n 36 | 37 | where, 38 | - uint64_t 39 | 40 | - Misc Commands (quit) 41 | 42 | quit\r\n 43 | flush_all [] [noreply]\r\n 44 | version\r\n 45 | verbosity [noreply]\r\n 46 | 47 | - Statistics Commands 48 | 49 | stats\r\n 50 | stats \r\n 51 | 52 | - Error Responses: 53 | 54 | ERROR\r\n 55 | CLIENT_ERROR [error]\r\n 56 | SERVER_ERROR [error]\r\n 57 | 58 | where, 59 | ERROR means client sent a non-existent command name 60 | CLIENT_ERROR means that command sent by the client does not conform to the protocol 61 | SERVER_ERROR means that there was an error on the server side that made processing of the command impossible 62 | 63 | - Storage Command Responses: 64 | 65 | STORED\r\n 66 | NOT_STORED\r\n 67 | EXISTS\r\n 68 | NOT_FOUND\r\n 69 | 70 | where, 71 | STORED indicates success. 72 | NOT_STORED indicates the data was not stored because condition for an add or replace wasn't met. 73 | EXISTS indicates that the item you are trying to store with a cas has been modified since you last fetched it. 74 | NOT_FOUND indicates that the item you are trying to store with a cas does not exist. 75 | 76 | - Delete Command Response: 77 | 78 | NOT_FOUND\r\n 79 | DELETED\r\n 80 | 81 | - Retrival Responses: 82 | 83 | END\r\n 84 | VALUE []\r\n\r\nEND\r\n 85 | VALUE []\r\n\r\n[VALUE []\r\n]+\r\nEND\r\n 86 | 87 | - Arithmetic Responses: 88 | 89 | NOT_FOUND\r\n 90 | \r\n 91 | 92 | where, 93 | - uint64_t : new key value after incr or decr operation 94 | 95 | - Statistics Response 96 | [STAT \r\n]+END\r\n 97 | 98 | - Misc Response 99 | 100 | OK\r\n 101 | VERSION \r\n 102 | 103 | - Notes: 104 | - set always creates mapping irrespective of whether it is present on not. 105 | - add, adds only if the mapping is not present 106 | - replace, only replaces if the mapping is present 107 | - append and prepend command ignore flags and expiry values 108 | - noreply instructs the server to not send the reply even if there is an error. 109 | - decr of 0 is 0, while incr of UINT64_MAX is 0 110 | - maximum length of the key is 250 characters 111 | - expiry of 0 means that item never expires, though it could be evicted from the cache 112 | - non-zero expiry is either unix time (# seconds since 01/01/1970) or, 113 | offset in seconds from the current time (< 60 x 60 x 24 x 30 seconds = 30 days) 114 | - expiry time is with respect to the server (not client) 115 | - can be zero and when it is, the block is empty. 116 | 117 | - Thoughts: 118 | - ascii protocol is easier to debug - think using strace or tcpdump to see 119 | protocol on the wire, Or using telnet or netcat or socat to build memcache 120 | requests and responses 121 | http://stackoverflow.com/questions/2525188/are-binary-protocols-dead 122 | 123 | - http://news.ycombinator.com/item?id=1712788 124 | -------------------------------------------------------------------------------- /notes/performance.md: -------------------------------------------------------------------------------- 1 | ## tl;dr 2 | 3 | - A single fatcache can do close to 100K set/sec for 100 bytes item sizes. 4 | - A single fatcache can do close to 4.5K get/sec for 100 byte item sizes. 5 | - All the 8 fatcache instances in aggregate do 32K get/sec to a single 600 GB SSD. 6 | - We can scale the read iops by having multiple SSD connected to a single machine. 7 | 8 | ## Setup 9 | 10 | - Machine-A (server). 11 | - Machine-B (client). 12 | - Machine-A is attached to a [600G SSD](https://github.com/twitter/fatcache/blob/master/notes/intel-ssd-320-specification.pdf). 13 | - 8 instances of fatcache runing on Machine-A. 14 | - 8 instances of twemperf running on Machine-B where each instance generates load to one of fatcache instances on Machine-A. 15 | - 600G of SSD is evenly split among 8 fatcache instances and SSD is accessed through direct-io. 16 | - Slab size is 1 MB. 17 | 18 | ## Details 19 | 20 | ### fatcache (8 instances) 21 | 22 | $ sudo src/fatcache -D /dev/sdb -p 11211 -s 0/8 23 | $ sudo src/fatcache -D /dev/sdb -p 11212 -s 1/8 24 | $ sudo src/fatcache -D /dev/sdb -p 11213 -s 2/8 25 | $ sudo src/fatcache -D /dev/sdb -p 11214 -s 3/8 26 | $ sudo src/fatcache -D /dev/sdb -p 11215 -s 4/8 27 | $ sudo src/fatcache -D /dev/sdb -p 11216 -s 5/8 28 | $ sudo src/fatcache -D /dev/sdb -p 11217 -s 6/8 29 | $ sudo src/fatcache -D /dev/sdb -p 11218 -s 7/8 30 | 31 | ### Set 32 | 33 | $ ./mcperf --sizes=u100,100 --num-calls=10000 --num-conns=100 --call-rate=1000 --conn-rate=10000 --method=set --server= --port=11211 34 | Total: connections 100 requests 1000000 responses 1000000 test-duration 10.692 s 35 | 36 | Connection rate: 9.4 conn/s (106.9 ms/conn <= 100 concurrent connections) 37 | Connection time [ms]: avg 10581.9 min 10066.1 max 10688.3 stddev 141.80 38 | Connect time [ms]: avg 3.2 min 0.1 max 6.3 stddev 2.05 39 | 40 | Request rate: 93532.1 req/s (0.0 ms/req) 41 | Request size [B]: avg 129.0 min 129.0 max 129.0 stddev 0.00 42 | 43 | Response rate: 93532.1 rsp/s (0.0 ms/rsp) 44 | Response size [B]: avg 8.0 min 8.0 max 8.0 stddev 0.00 45 | Response time [ms]: avg 413.0 min 0.4 max 1430.4 stddev 0.30 46 | Response time [ms]: p25 156.0 p50 387.0 p75 570.0 47 | Response time [ms]: p95 963.0 p99 1140.0 p999 1304.0 48 | Response type: stored 1000000 not_stored 0 exists 0 not_found 0 49 | Response type: num 0 deleted 0 end 0 value 0 50 | Response type: error 0 client_error 0 server_error 0 51 | 52 | Errors: total 0 client-timo 0 socket-timo 0 connrefused 0 connreset 0 53 | Errors: fd-unavail 0 ftab-full 0 addrunavail 0 other 0 54 | 55 | CPU time [s]: user 2.07 system 6.58 (user 19.3% system 61.6% total 80.9%) 56 | Net I/O: bytes 130.7 MB rate 12513.6 KB/s (102.5*10^6 bps) 57 | 58 | - All writes are buffered in memory and flushed to disk in slab size granularity, which is 1 MB here. 59 | - The buffering of write allows us to achieve higher write iops compared to read iops when item sizes are fairly small (< 500 byes). 60 | 61 | ### Get 62 | 63 | $ ./mcperf --sizes=u100,100 --num-calls=10000 --num-conns=100 --call-rate=40 --conn-rate=10000 --method=get --server= --port=11211 64 | 65 | Total: connections 100 requests 1000000 responses 1000000 test-duration 249.987 s 66 | 67 | Connection rate: 0.4 conn/s (2499.9 ms/conn <= 100 concurrent connections) 68 | Connection time [ms]: avg 249977.6 min 249977.1 max 249978.6 stddev 0.53 69 | Connect time [ms]: avg 0.9 min 0.1 max 1.9 stddev 0.42 70 | 71 | Request rate: 4000.2 req/s (0.2 ms/req) 72 | Request size [B]: avg 19.0 min 19.0 max 19.0 stddev 0.00 73 | 74 | Response rate: 4000.2 rsp/s (0.2 ms/rsp) 75 | Response size [B]: avg 133.0 min 133.0 max 133.0 stddev 0.00 76 | Response time [ms]: avg 595.6 min 0.1 max 7385.1 stddev 1.03 77 | Response time [ms]: p25 1.0 p50 130.0 p75 588.0 78 | Response time [ms]: p95 2728.0 p99 5191.0 p999 6501.0 79 | Response type: stored 0 not_stored 0 exists 0 not_found 0 80 | Response type: num 0 deleted 0 end 0 value 1000000 81 | Response type: error 0 client_error 0 server_error 0 82 | 83 | Errors: total 0 client-timo 0 socket-timo 0 connrefused 0 connreset 0 84 | Errors: fd-unavail 0 ftab-full 0 addrunavail 0 other 0 85 | 86 | CPU time [s]: user 136.26 system 110.11 (user 54.5% system 44.0% total 98.6%) 87 | Net I/O: bytes 145.0 MB rate 593.8 KB/s (4.9*10^6 bps) 88 | 89 | - The read throughput is bounded by the read throughput supported by the SSD. 90 | - In aggregate all the 8 instances do 32K get/sec for 100 byte item size. 91 | - We can scale read throughput by scaling number of SSD and fatcaches running on a given machine 92 | 93 | Snapshot of iostat when get workload was running 94 | 95 | $ iostat -d 2 -x -k sdb | grep --color 'Device.*' -A 1 96 | 97 | Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util 98 | sdb 0.00 0.00 31790.00 0.00 21614.25 0.00 1.36 5.83 0.18 0.03 100.05 99 | -- 100 | Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util 101 | sdb 0.00 0.00 32843.50 0.00 22318.25 0.00 1.36 6.15 0.19 0.03 99.95 102 | -- 103 | Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util 104 | sdb 0.00 0.00 31946.50 0.00 21715.00 0.00 1.36 6.09 0.19 0.03 100.05 105 | -- 106 | Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util 107 | sdb 0.00 0.00 30559.50 0.00 20758.00 0.00 1.36 5.20 0.17 0.03 99.80 108 | -- 109 | Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util 110 | sdb 0.00 0.00 32315.00 0.00 21961.00 0.00 1.36 5.92 0.18 0.03 100.00 111 | -- 112 | 113 | The above iostat numbers demonstrate that when 8 instances of fatcache were subjected to load, there were 5-6 requests pending in the queue. Each request had an average service time of 30 usec, with the queue wait time of 150 usec 114 | -------------------------------------------------------------------------------- /notes/spec.md: -------------------------------------------------------------------------------- 1 | ## Intel 320 Series SSD 2 | 3 | - [Source](http://ark.intel.com/products/56569/Intel-SSD-320-Series-600GB-2_5in-SATA-3Gbs-25nm-ML) 4 | - Summary: 5 | - Type : NAND Flash Memory with Multi-Level Cell (MLC) Technology 6 | - Sequential Read (queue depth 32) : 270 MB/s 7 | - Sequential Write (queue depth 32) : 220 MB/s 8 | - Random Read (100% Span) (queue depth 32) : 39500 IOPS 9 | - Latency Read : 75 µs 10 | - Latency Write : 90 µs 11 | - Capacity : 600 GB 12 | - Interface : SATA - 3.0 Gb/s 13 | 14 | ## Memory 15 | 16 | - Bandwidth: 15-25 GBps 17 | - Latency: 50-70 ns 18 | 19 | ## Rotational Disk 20 | 21 | - Seek time: 3-15 ms 22 | - Data transfer rate: ~1Gbps (130MBps) 23 | 24 | ## Bus 25 | 26 | - SATA bandwidth: 1.5/3/6 Gbps (v1/2/3) 27 | - PCI Express bandwidth per lane: 250/500/1000 MBps (v1/2/3) 28 | 29 | ## References 30 | 31 | - [Memory bandwidth/latency on CPUs since Nehalem](http://www.bit-tech.net/hardware/memory/2011/01/11/the-best-memory-for-sandy-bridge/8) 32 | - [Sandy Bridge cache and memory performance](http://www.anandtech.com/show/5091/intel-core-i7-3960x-sandy-bridge-e-review-keeping-the-high-end-alive/4) 33 | - [Magnetic disks](http://en.wikipedia.org/wiki/Hard_disk_drive_performance_characteristics) 34 | - [SATA](http://en.wikipedia.org/wiki/Serial_ATA) 35 | - [PCI Express](http://en.wikipedia.org/wiki/PCI_Express) 36 | -------------------------------------------------------------------------------- /scripts/memcache-check.sh: -------------------------------------------------------------------------------- 1 | printf "%b" "set key\n 0 0 3\r\nval\r\nget key val\r\n" | socat -v - TCP-CONNECT:localhost:22122 2 | printf "%b" "set key\000y 0 0 3\r\nval\r\nget key val\r\n" | socat -v - TCP-CONNECT:localhost:22122 3 | val="a" 4 | val2="a" 5 | val3="a" 6 | val4="a" 7 | val5="a" 8 | for i in `seq 1 19`; do 9 | val5=${val4} 10 | val4=${val3} 11 | val3=${val2} 12 | val2=${val} 13 | val=`printf "%s%s" "${val}" "${val}"` 14 | done 15 | 16 | #valx="a" 17 | #for i in `seq 1 10000`; do 18 | # valx=`printf "%s%s" "$valx" "a"` 19 | #done 20 | # 21 | #valy="b" 22 | #for i in `seq 1 2700`; do 23 | # valy=`printf "%s%s" "$valy" "a"` 24 | #done 25 | # 26 | #val0=`printf "%s%s%s%s%s%s%s%s%s" "$val" "$val2" "$val3" "$val4" "$val5" "$valx" "$valx" "$valx" "$valy"` 27 | 28 | val0=`printf "%s%s" "$val" "$val"` 29 | len=`printf $val0 | wc -c` 30 | echo $len 31 | 32 | key=`printf "key%s" ""` 33 | #printf "set ${key} 0 0 $len\r\n${val0}\r\n" | nc localhost 22121 34 | printf "set key 0 0 $len\r\n$val0\r\nget key\r\nset key 0 0 3\r\nval\r\n" | nc localhost 22121 35 | 36 | printf "get keyaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\r\n" | socat -d -t 100 - TCP:localhost:11211,shut-none 37 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | bin_PROGRAMS = fatcache stg_ins_test 2 | 3 | AM_CPPFLAGS = -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 4 | CFLAGS = -g 5 | AM_CFLAGS = -Wall 6 | AM_CFLAGS += -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls 7 | AM_CFLAGS += -fno-strict-aliasing 8 | AM_CFLAGS += -Wunused-function -Wunused-value -Wunused-variable 9 | 10 | LDFLAGS += -rdynamic 11 | 12 | fatcache_SOURCES = \ 13 | fc_core.c fc_core.h \ 14 | fc_connection.c fc_connection.h \ 15 | fc_server.c fc_server.h \ 16 | fc_client.c fc_client.h \ 17 | fc_slab.c fc_slab.h \ 18 | fc_item.c fc_item.h \ 19 | fc_itemx.c fc_itemx.h \ 20 | fc_memcache.c fc_memcache.h \ 21 | fc_message.c fc_message.h \ 22 | fc_request.c \ 23 | fc_response.c \ 24 | fc_mbuf.c fc_mbuf.h \ 25 | fc_signal.c fc_signal.h \ 26 | fc_event.c fc_event.h \ 27 | fc_time.c fc_time.h \ 28 | fc_sha1.c fc_sha1.h \ 29 | fc_log.c fc_log.h \ 30 | fc_string.c fc_string.h \ 31 | fc_array.c fc_array.h \ 32 | fc_util.c fc_util.h \ 33 | fc_stats.c fc_stats.h \ 34 | fc_queue.h \ 35 | fc.c 36 | 37 | stg_ins_test_SOURCES = \ 38 | fc_slab.c fc_slab.h \ 39 | fc_item.c fc_item.h \ 40 | fc_itemx.c fc_itemx.h \ 41 | fc_time.c fc_time.h \ 42 | fc_sha1.c fc_sha1.h \ 43 | fc_log.c fc_log.h \ 44 | fc_util.c fc_util.h \ 45 | fc_queue.h \ 46 | stg_ins_test.c 47 | -------------------------------------------------------------------------------- /src/fc_array.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | 20 | #include 21 | 22 | struct array * 23 | array_create(uint32_t n, size_t size) 24 | { 25 | struct array *a; 26 | 27 | ASSERT(n != 0 && size != 0); 28 | 29 | a = fc_alloc(sizeof(*a)); 30 | if (a == NULL) { 31 | return NULL; 32 | } 33 | 34 | a->elem = fc_alloc(n * size); 35 | if (a->elem == NULL) { 36 | fc_free(a); 37 | return NULL; 38 | } 39 | 40 | a->nelem = 0; 41 | a->size = size; 42 | a->nalloc = n; 43 | 44 | return a; 45 | } 46 | 47 | void 48 | array_destroy(struct array *a) 49 | { 50 | array_deinit(a); 51 | fc_free(a); 52 | } 53 | 54 | rstatus_t 55 | array_init(struct array *a, uint32_t n, size_t size) 56 | { 57 | ASSERT(n != 0 && size != 0); 58 | 59 | a->elem = fc_alloc(n * size); 60 | if (a->elem == NULL) { 61 | return FC_ENOMEM; 62 | } 63 | 64 | a->nelem = 0; 65 | a->size = size; 66 | a->nalloc = n; 67 | 68 | return FC_OK; 69 | } 70 | 71 | void 72 | array_deinit(struct array *a) 73 | { 74 | ASSERT(a->nelem == 0); 75 | 76 | if (a->elem != NULL) { 77 | fc_free(a->elem); 78 | } 79 | } 80 | 81 | uint32_t 82 | array_idx(struct array *a, void *elem) 83 | { 84 | uint8_t *p, *q; 85 | uint32_t off, idx; 86 | 87 | ASSERT(elem >= a->elem); 88 | 89 | p = a->elem; 90 | q = elem; 91 | off = (uint32_t)(q - p); 92 | 93 | ASSERT(off % (uint32_t)a->size == 0); 94 | 95 | idx = off / (uint32_t)a->size; 96 | 97 | return idx; 98 | } 99 | 100 | void * 101 | array_push(struct array *a) 102 | { 103 | void *elem, *new; 104 | size_t size; 105 | 106 | if (a->nelem == a->nalloc) { 107 | 108 | /* the array is full; allocate new array */ 109 | size = a->size * a->nalloc; 110 | new = fc_realloc(a->elem, 2 * size); 111 | if (new == NULL) { 112 | return NULL; 113 | } 114 | 115 | a->elem = new; 116 | a->nalloc *= 2; 117 | } 118 | 119 | elem = (uint8_t *)a->elem + a->size * a->nelem; 120 | a->nelem++; 121 | 122 | return elem; 123 | } 124 | 125 | void * 126 | array_pop(struct array *a) 127 | { 128 | void *elem; 129 | 130 | ASSERT(a->nelem != 0); 131 | 132 | a->nelem--; 133 | elem = (uint8_t *)a->elem + a->size * a->nelem; 134 | 135 | return elem; 136 | } 137 | 138 | void * 139 | array_get(struct array *a, uint32_t idx) 140 | { 141 | void *elem; 142 | 143 | ASSERT(a->nelem != 0); 144 | ASSERT(idx < a->nelem); 145 | 146 | elem = (uint8_t *)a->elem + (a->size * idx); 147 | 148 | return elem; 149 | } 150 | 151 | void * 152 | array_top(struct array *a) 153 | { 154 | ASSERT(a->nelem != 0); 155 | 156 | return array_get(a, a->nelem - 1); 157 | } 158 | 159 | void 160 | array_swap(struct array *a, struct array *b) 161 | { 162 | struct array tmp; 163 | 164 | tmp = *a; 165 | *a = *b; 166 | *b = tmp; 167 | } 168 | 169 | /* 170 | * Sort nelem elements of the array in ascending order based on the 171 | * compare comparator. 172 | * 173 | */ 174 | void 175 | array_sort(struct array *a, array_compare_t compare) 176 | { 177 | ASSERT(a->nelem != 0); 178 | 179 | qsort(a->elem, a->nelem, a->size, compare); 180 | } 181 | 182 | /* 183 | * Calls the func once for each element in the array as long as func returns 184 | * success. On failure short-circuits and returns the error status. 185 | */ 186 | rstatus_t 187 | array_each(struct array *a, array_each_t func, void *data) 188 | { 189 | uint32_t i, nelem; 190 | 191 | ASSERT(array_n(a) != 0); 192 | ASSERT(func != NULL); 193 | 194 | for (i = 0, nelem = array_n(a); i < nelem; i++) { 195 | void *elem = array_get(a, i); 196 | rstatus_t status; 197 | 198 | status = func(elem, data); 199 | if (status != FC_OK) { 200 | return status; 201 | } 202 | } 203 | 204 | return FC_OK; 205 | } 206 | -------------------------------------------------------------------------------- /src/fc_array.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_ARRAY_H_ 19 | #define _FC_ARRAY_H_ 20 | 21 | #include 22 | 23 | typedef int (*array_compare_t)(const void *, const void *); 24 | typedef rstatus_t (*array_each_t)(void *, void *); 25 | 26 | struct array { 27 | uint32_t nelem; /* # element */ 28 | void *elem; /* element */ 29 | size_t size; /* element size */ 30 | uint32_t nalloc; /* # allocated element */ 31 | }; 32 | 33 | #define null_array { 0, NULL, 0, 0 } 34 | 35 | static inline void 36 | array_null(struct array *a) 37 | { 38 | a->nelem = 0; 39 | a->elem = NULL; 40 | a->size = 0; 41 | a->nalloc = 0; 42 | } 43 | 44 | static inline void 45 | array_set(struct array *a, void *elem, size_t size, uint32_t nalloc) 46 | { 47 | a->nelem = 0; 48 | a->elem = elem; 49 | a->size = size; 50 | a->nalloc = nalloc; 51 | } 52 | 53 | static inline uint32_t 54 | array_n(const struct array *a) 55 | { 56 | return a->nelem; 57 | } 58 | 59 | struct array *array_create(uint32_t n, size_t size); 60 | void array_destroy(struct array *a); 61 | rstatus_t array_init(struct array *a, uint32_t n, size_t size); 62 | void array_deinit(struct array *a); 63 | 64 | uint32_t array_idx(struct array *a, void *elem); 65 | void *array_push(struct array *a); 66 | void *array_pop(struct array *a); 67 | void *array_get(struct array *a, uint32_t idx); 68 | void *array_top(struct array *a); 69 | void array_swap(struct array *a, struct array *b); 70 | void array_sort(struct array *a, array_compare_t compare); 71 | rstatus_t array_each(struct array *a, array_each_t func, void *data); 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /src/fc_client.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | bool 22 | client_active(struct conn *conn) 23 | { 24 | if (!TAILQ_EMPTY(&conn->omsg_q)) { 25 | log_debug(LOG_VVERB, "c %d is active", conn->sd); 26 | return true; 27 | } 28 | 29 | if (conn->rmsg != NULL) { 30 | log_debug(LOG_VVERB, "c %d is active", conn->sd); 31 | return true; 32 | } 33 | 34 | if (conn->smsg != NULL) { 35 | log_debug(LOG_VVERB, "c %d is active", conn->sd); 36 | return true; 37 | } 38 | 39 | log_debug(LOG_VVERB, "c %d is inactive", conn->sd); 40 | 41 | return false; 42 | } 43 | 44 | void 45 | client_close(struct context *ctx, struct conn *conn) 46 | { 47 | rstatus_t status; 48 | struct msg *msg, *nmsg; /* current and next message */ 49 | 50 | if (conn->sd < 0) { 51 | conn_put(conn); 52 | return; 53 | } 54 | 55 | msg = conn->rmsg; 56 | if (msg != NULL) { 57 | conn->rmsg = NULL; 58 | 59 | ASSERT(msg->peer == NULL); 60 | ASSERT(msg->request && !msg->done); 61 | 62 | log_debug(LOG_INFO, "close c %d discarding pending req %"PRIu64" len " 63 | "%"PRIu32" type %d", conn->sd, msg->id, msg->mlen, 64 | msg->type); 65 | 66 | req_put(msg); 67 | } 68 | 69 | ASSERT(conn->smsg == NULL); 70 | 71 | for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) { 72 | nmsg = TAILQ_NEXT(msg, c_tqe); 73 | 74 | /* dequeue the message (request) from client outq */ 75 | req_dequeue_omsgq(ctx, conn, msg); 76 | 77 | if (msg->done) { 78 | log_debug(LOG_INFO, "close c %d discarding %s req %"PRIu64" len " 79 | "%"PRIu32" type %d", conn->sd, 80 | msg->error ? "error": "completed", msg->id, msg->mlen, 81 | msg->type); 82 | req_put(msg); 83 | } else { 84 | msg->swallow = 1; 85 | 86 | ASSERT(msg->request); 87 | ASSERT(msg->peer == NULL); 88 | 89 | log_debug(LOG_INFO, "close c %d schedule swallow of req %"PRIu64" " 90 | "len %"PRIu32" type %d", conn->sd, msg->id, msg->mlen, 91 | msg->type); 92 | } 93 | } 94 | ASSERT(TAILQ_EMPTY(&conn->omsg_q)); 95 | 96 | status = close(conn->sd); 97 | if (status < 0) { 98 | log_error("close c %d failed, ignored: %s", conn->sd, strerror(errno)); 99 | } 100 | conn->sd = -1; 101 | 102 | conn_put(conn); 103 | } 104 | -------------------------------------------------------------------------------- /src/fc_client.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_CLIENT_H_ 19 | #define _FC_CLIENT_H_ 20 | 21 | #include 22 | 23 | bool client_active(struct conn *conn); 24 | void client_close(struct context *ctx, struct conn *conn); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/fc_common.h: -------------------------------------------------------------------------------- 1 | #ifndef _FC_COMMON_H_ 2 | #define _FC_COMMON_H_ 3 | 4 | #define FC_OK 0 5 | #define FC_ERROR -1 6 | #define FC_EAGAIN -2 7 | #define FC_ENOMEM -3 8 | 9 | typedef int rstatus_t; /* return type */ 10 | typedef int err_t; /* error type */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /src/fc_connection.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | extern struct settings settings; 23 | 24 | static uint32_t nalloc_conn; /* total conn num */ 25 | static uint32_t nfree_connq; /* # free conn q */ 26 | static struct conn_tqh free_connq; /* free conn q */ 27 | 28 | static void conn_free(struct conn *conn); 29 | 30 | void 31 | conn_init(void) 32 | { 33 | log_debug(LOG_DEBUG, "conn size %d", sizeof(struct conn)); 34 | nfree_connq = 0; 35 | nalloc_conn = 0; 36 | TAILQ_INIT(&free_connq); 37 | } 38 | 39 | void 40 | conn_deinit(void) 41 | { 42 | struct conn *conn, *nconn; /* current and next connection */ 43 | 44 | for (conn = TAILQ_FIRST(&free_connq); conn != NULL; 45 | conn = nconn, nfree_connq--) { 46 | ASSERT(nfree_connq > 0); 47 | nconn = TAILQ_NEXT(conn, tqe); 48 | conn_free(conn); 49 | } 50 | ASSERT(nfree_connq == 0); 51 | } 52 | 53 | ssize_t 54 | conn_recv(struct conn *conn, void *buf, size_t size) 55 | { 56 | ssize_t n; 57 | 58 | ASSERT(buf != NULL); 59 | ASSERT(size > 0); 60 | ASSERT(conn->recv_ready); 61 | 62 | for (;;) { 63 | n = fc_read(conn->sd, buf, size); 64 | 65 | log_debug(LOG_VERB, "recv on sd %d %zd of %zu", conn->sd, n, size); 66 | 67 | if (n > 0) { 68 | if (n < (ssize_t) size) { 69 | conn->recv_ready = 0; 70 | } 71 | conn->recv_bytes += (size_t)n; 72 | return n; 73 | } 74 | 75 | if (n == 0) { 76 | conn->recv_ready = 0; 77 | conn->eof = 1; 78 | log_debug(LOG_INFO, "recv on sd %d eof rb %zu sb %zu", conn->sd, 79 | conn->recv_bytes, conn->send_bytes); 80 | return n; 81 | } 82 | 83 | if (errno == EINTR) { 84 | log_debug(LOG_VERB, "recv on sd %d not ready - eintr", conn->sd); 85 | continue; 86 | } else if (errno == EAGAIN || errno == EWOULDBLOCK) { 87 | conn->recv_ready = 0; 88 | log_debug(LOG_VERB, "recv on sd %d not ready - eagain", conn->sd); 89 | return FC_EAGAIN; 90 | } else { 91 | conn->recv_ready = 0; 92 | conn->err = errno; 93 | log_error("recv on sd %d failed: %s", conn->sd, strerror(errno)); 94 | return FC_ERROR; 95 | } 96 | } 97 | 98 | NOT_REACHED(); 99 | 100 | return FC_ERROR; 101 | } 102 | 103 | ssize_t 104 | conn_sendv(struct conn *conn, struct array *sendv, size_t nsend) 105 | { 106 | ssize_t n; 107 | 108 | ASSERT(array_n(sendv) > 0); 109 | ASSERT(nsend != 0); 110 | ASSERT(conn->send_ready); 111 | 112 | for (;;) { 113 | n = fc_writev(conn->sd, sendv->elem, sendv->nelem); 114 | 115 | log_debug(LOG_VERB, "sendv on sd %d %zd of %zu in %"PRIu32" buffers", 116 | conn->sd, n, nsend, sendv->nelem); 117 | 118 | if (n > 0) { 119 | if (n < (ssize_t) nsend) { 120 | conn->send_ready = 0; 121 | } 122 | conn->send_bytes += (size_t)n; 123 | return n; 124 | } 125 | 126 | if (n == 0) { 127 | log_warn("sendv on sd %d returned zero", conn->sd); 128 | conn->send_ready = 0; 129 | return 0; 130 | } 131 | 132 | if (errno == EINTR) { 133 | log_debug(LOG_VERB, "sendv on sd %d not ready - eintr", conn->sd); 134 | continue; 135 | } else if (errno == EAGAIN || errno == EWOULDBLOCK) { 136 | conn->send_ready = 0; 137 | log_debug(LOG_VERB, "sendv on sd %d not ready - eagain", conn->sd); 138 | return FC_EAGAIN; 139 | } else { 140 | conn->send_ready = 0; 141 | conn->err = errno; 142 | log_error("sendv on sd %d failed: %s", conn->sd, strerror(errno)); 143 | return FC_ERROR; 144 | } 145 | } 146 | 147 | NOT_REACHED(); 148 | 149 | return FC_ERROR; 150 | } 151 | 152 | static void 153 | conn_free(struct conn *conn) 154 | { 155 | log_debug(LOG_VVERB, "free conn %p", conn); 156 | fc_free(conn); 157 | } 158 | 159 | void 160 | conn_put(struct conn *conn) 161 | { 162 | ASSERT(conn->sd < 0); 163 | 164 | log_debug(LOG_VVERB, "put conn %p", conn); 165 | 166 | nfree_connq++; 167 | TAILQ_INSERT_HEAD(&free_connq, conn, tqe); 168 | } 169 | 170 | static struct conn * 171 | _conn_get(void) 172 | { 173 | struct conn *conn; 174 | 175 | if (!TAILQ_EMPTY(&free_connq)) { 176 | ASSERT(nfree_connq > 0); 177 | 178 | conn = TAILQ_FIRST(&free_connq); 179 | nfree_connq--; 180 | TAILQ_REMOVE(&free_connq, conn, tqe); 181 | } else { 182 | conn = fc_alloc(sizeof(*conn)); 183 | if (conn == NULL) { 184 | return NULL; 185 | } 186 | ++nalloc_conn; 187 | } 188 | 189 | /* extra stuff */ 190 | 191 | TAILQ_INIT(&conn->omsg_q); 192 | conn->rmsg = NULL; 193 | conn->smsg = NULL; 194 | 195 | conn->recv = NULL; 196 | conn->send = NULL; 197 | conn->close = NULL; 198 | conn->active = NULL; 199 | 200 | conn->send_bytes = 0; 201 | conn->recv_bytes = 0; 202 | 203 | conn->events = 0; 204 | conn->err = 0; 205 | conn->recv_active = 0; 206 | conn->recv_ready = 0; 207 | conn->send_active = 0; 208 | conn->send_ready = 0; 209 | 210 | conn->client = 0; 211 | conn->eof = 0; 212 | conn->done = 0; 213 | conn->noreply = 0; 214 | 215 | return conn; 216 | } 217 | 218 | struct conn * 219 | conn_get(int sd, bool client) 220 | { 221 | struct conn *c; 222 | 223 | c = _conn_get(); 224 | if (c == NULL) { 225 | return NULL; 226 | } 227 | c->sd = sd; 228 | c->client = client ? 1 : 0; 229 | 230 | if (client) { 231 | c->recv = msg_recv; 232 | c->send = msg_send; 233 | c->close = client_close; 234 | c->active = client_active; 235 | } else { 236 | c->recv = server_recv; 237 | c->send = NULL; 238 | c->close = NULL; 239 | c->active = NULL; 240 | } 241 | 242 | log_debug(LOG_VVERB, "get conn %p c %d", c, c->sd); 243 | 244 | return c; 245 | } 246 | 247 | uint32_t 248 | conn_total(void) 249 | { 250 | return nalloc_conn - 1; 251 | } 252 | 253 | uint32_t 254 | conn_nused(void) 255 | { 256 | return nalloc_conn - nfree_connq - 1; 257 | } 258 | 259 | uint32_t 260 | conn_nfree(void) 261 | { 262 | return nfree_connq; 263 | } 264 | -------------------------------------------------------------------------------- /src/fc_connection.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_CONNECTION_H_ 19 | #define _FC_CONNECTION_H_ 20 | 21 | typedef rstatus_t (*conn_recv_t)(struct context *, struct conn*); 22 | typedef rstatus_t (*conn_send_t)(struct context *, struct conn*); 23 | 24 | typedef struct msg* (*conn_send_next_t)(struct context *, struct conn *); 25 | typedef void (*conn_send_done_t)(struct context *, struct conn *, struct msg *); 26 | 27 | typedef void (*conn_close_t)(struct context *, struct conn *); 28 | typedef bool (*conn_active_t)(struct conn *); 29 | 30 | struct conn { 31 | int sd; /* socket descriptor */ 32 | TAILQ_ENTRY(conn) tqe; /* link in free q */ 33 | 34 | struct msg_tqh omsg_q; /* outstanding request Q */ 35 | struct msg *rmsg; /* current request being rcvd */ 36 | struct msg *smsg; /* current response being sent */ 37 | 38 | conn_recv_t recv; /* recv (read) handler */ 39 | conn_send_t send; /* send (write) handler */ 40 | conn_close_t close; /* close handler */ 41 | conn_active_t active; /* active? handler */ 42 | 43 | size_t recv_bytes; /* received (read) bytes */ 44 | size_t send_bytes; /* sent (written) bytes */ 45 | 46 | uint32_t events; /* connection io events */ 47 | err_t err; /* connection errno */ 48 | unsigned recv_active:1; /* recv active? */ 49 | unsigned recv_ready:1; /* recv ready? */ 50 | unsigned send_active:1; /* send active? */ 51 | unsigned send_ready:1; /* send ready? */ 52 | 53 | unsigned client:1; /* client? */ 54 | unsigned eof:1; /* eof? aka passive close? */ 55 | unsigned done:1; /* done? aka close? */ 56 | unsigned noreply:1; /* noreply? */ 57 | }; 58 | 59 | TAILQ_HEAD(conn_tqh, conn); 60 | 61 | void conn_init(void); 62 | void conn_deinit(void); 63 | 64 | ssize_t conn_recv(struct conn *conn, void *buf, size_t size); 65 | ssize_t conn_sendv(struct conn *conn, struct array *sendv, size_t nsend); 66 | 67 | struct conn *conn_get(int sd, bool client); 68 | void conn_put(struct conn *c); 69 | 70 | uint32_t conn_total(void); 71 | uint32_t conn_nused(void); 72 | uint32_t conn_nfree(void); 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /src/fc_core.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | extern struct settings settings; 25 | 26 | rstatus_t 27 | core_init(void) 28 | { 29 | rstatus_t status; 30 | 31 | status = log_init(settings.verbose, settings.log_filename); 32 | if (status != FC_OK) { 33 | return status; 34 | } 35 | 36 | status = signal_init(); 37 | if (status != FC_OK) { 38 | return status; 39 | } 40 | 41 | status = time_init(); 42 | if (status != FC_OK) { 43 | return status; 44 | } 45 | 46 | status = itemx_init(); 47 | if (status != FC_OK) { 48 | return status; 49 | } 50 | 51 | conn_init(); 52 | 53 | mbuf_init(); 54 | 55 | msg_init(); 56 | 57 | item_init(); 58 | 59 | status = slab_init(); 60 | if (status != FC_OK) { 61 | return status; 62 | } 63 | 64 | return FC_OK; 65 | } 66 | 67 | void 68 | core_deinit(void) 69 | { 70 | } 71 | 72 | static rstatus_t 73 | core_recv(struct context *ctx, struct conn *conn) 74 | { 75 | rstatus_t status; 76 | 77 | status = conn->recv(ctx, conn); 78 | if (status != FC_OK) { 79 | log_debug(LOG_INFO, "recv on %c %d failed: %s", 80 | conn->client ? 'c' : 's', conn->sd, 81 | strerror(errno)); 82 | } 83 | 84 | return status; 85 | } 86 | 87 | static rstatus_t 88 | core_send(struct context *ctx, struct conn *conn) 89 | { 90 | rstatus_t status; 91 | 92 | status = conn->send(ctx, conn); 93 | if (status != FC_OK) { 94 | log_debug(LOG_INFO, "send on %c %d failed: %s", 95 | conn->client ? 'c' : 's', conn->sd, 96 | strerror(errno)); 97 | } 98 | 99 | return status; 100 | } 101 | 102 | static void 103 | core_close(struct context *ctx, struct conn *conn) 104 | { 105 | rstatus_t status; 106 | char type = conn->client ? 'c' : 's'; 107 | 108 | ASSERT(conn->sd > 0); 109 | 110 | log_debug(LOG_NOTICE, "close %c %d on event %04"PRIX32" eof %d done " 111 | "%d rb %zu sb %zu%c %s", type, conn->sd, conn->events, 112 | conn->eof, conn->done, conn->recv_bytes, conn->send_bytes, 113 | conn->err ? ':' : ' ', conn->err ? strerror(conn->err) : ""); 114 | 115 | status = event_del_conn(ctx->ep, conn); 116 | if (status < 0) { 117 | log_warn("event del conn e %d %c %d failed, ignored: %s", ctx->ep, 118 | type, conn->sd, strerror(errno)); 119 | } 120 | 121 | conn->close(ctx, conn); 122 | } 123 | 124 | static void 125 | core_error(struct context *ctx, struct conn *conn) 126 | { 127 | rstatus_t status; 128 | char type = conn->client ? 'c' : 's'; 129 | 130 | status = fc_get_soerror(conn->sd); 131 | if (status < 0) { 132 | log_warn("get soerr on %c %d failed, ignored: %s", type, conn->sd, 133 | strerror(errno)); 134 | } 135 | conn->err = errno; 136 | 137 | core_close(ctx, conn); 138 | } 139 | 140 | static void 141 | core_core(struct context *ctx, struct conn *conn, uint32_t events) 142 | { 143 | rstatus_t status; 144 | 145 | log_debug(LOG_VERB, "event %04"PRIX32" on %d", events, conn->sd); 146 | 147 | conn->events = events; 148 | 149 | /* error takes precedence over read | write */ 150 | if (events & EPOLLERR) { 151 | core_error(ctx, conn); 152 | return; 153 | } 154 | 155 | /* read takes precedence over write */ 156 | if (events & (EPOLLIN | EPOLLHUP)) { 157 | status = core_recv(ctx, conn); 158 | if (status != FC_OK || conn->done || conn->err) { 159 | core_close(ctx, conn); 160 | return; 161 | } 162 | } 163 | 164 | if (events & EPOLLOUT) { 165 | status = core_send(ctx, conn); 166 | if (status != FC_OK || conn->done || conn->err) { 167 | core_close(ctx, conn); 168 | return; 169 | } 170 | } 171 | } 172 | 173 | rstatus_t 174 | core_start(struct context *ctx) 175 | { 176 | rstatus_t status; 177 | 178 | ctx->ep = -1; 179 | ctx->nevent = 1024; 180 | ctx->max_timeout = -1; 181 | ctx->timeout = ctx->max_timeout; 182 | ctx->event = NULL; 183 | 184 | status = event_init(ctx, 1024); 185 | if (status != FC_OK) { 186 | return status; 187 | } 188 | 189 | status = server_listen(ctx); 190 | if (status != FC_OK) { 191 | return status; 192 | } 193 | 194 | return FC_OK; 195 | } 196 | 197 | void 198 | core_stop(struct context *ctx) 199 | { 200 | } 201 | 202 | rstatus_t 203 | core_loop(struct context *ctx) 204 | { 205 | int i, nsd; 206 | 207 | nsd = event_wait(ctx->ep, ctx->event, ctx->nevent, ctx->timeout); 208 | if (nsd < 0) { 209 | return nsd; 210 | } 211 | 212 | for (i = 0; i < nsd; i++) { 213 | struct epoll_event *ev = &ctx->event[i]; 214 | 215 | core_core(ctx, ev->data.ptr, ev->events); 216 | } 217 | 218 | return FC_OK; 219 | } 220 | 221 | -------------------------------------------------------------------------------- /src/fc_core.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_CORE_H_ 19 | #define _FC_CORE_H_ 20 | 21 | #ifdef HAVE_CONFIG_H 22 | # include 23 | #endif 24 | 25 | #ifdef HAVE_DEBUG_LOG 26 | # define FC_DEBUG_LOG 1 27 | #else 28 | # define FC_DEBUG_LOG 0 29 | #endif 30 | 31 | #ifdef HAVE_ASSERT_PANIC 32 | # define FC_ASSERT_PANIC 1 33 | #else 34 | # define FC_ASSERT_PANIC 0 35 | #endif 36 | 37 | #ifdef HAVE_ASSERT_LOG 38 | # define FC_ASSERT_LOG 1 39 | #else 40 | # define FC_ASSERT_LOG 0 41 | #endif 42 | 43 | #ifdef HAVE_LITTLE_ENDIAN 44 | # define FC_LITTLE_ENDIAN 1 45 | #endif 46 | 47 | #ifdef HAVE_BACKTRACE 48 | #define FC_BACKTRACE 1 49 | #endif 50 | 51 | struct array; 52 | struct context; 53 | struct epoll_event; 54 | struct conn; 55 | struct conn_tqh; 56 | struct msg; 57 | struct msg_tqh; 58 | struct mbuf; 59 | struct mhdr; 60 | struct item; 61 | struct slab; 62 | struct slabclass; 63 | #include "fc_common.h" 64 | 65 | #include 66 | #include 67 | #include 68 | #include 69 | #include 70 | #include 71 | #include 72 | 73 | #include 74 | #include 75 | #include 76 | #include 77 | 78 | #include 79 | #include 80 | #include 81 | #include 82 | #include 83 | 84 | struct context { 85 | int ep; /* epoll device */ 86 | int nevent; /* # epoll event */ 87 | int max_timeout; /* epoll wait max timeout in msec */ 88 | int timeout; /* epoll wait timeout in msec */ 89 | struct epoll_event *event; /* epoll event */ 90 | }; 91 | 92 | #include "fc_settings.h" 93 | 94 | rstatus_t core_init(void); 95 | void core_deinit(void); 96 | 97 | rstatus_t core_start(struct context *ctx); 98 | void core_stop(struct context *ctx); 99 | rstatus_t core_loop(struct context *ctx); 100 | 101 | #endif 102 | -------------------------------------------------------------------------------- /src/fc_event.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | int 25 | event_init(struct context *ctx, int size) 26 | { 27 | int status, ep; 28 | struct epoll_event *event; 29 | 30 | ASSERT(ctx->ep < 0); 31 | ASSERT(ctx->nevent != 0); 32 | ASSERT(ctx->event == NULL); 33 | 34 | ep = epoll_create(size); 35 | if (ep < 0) { 36 | log_error("epoll create of size %d failed: %s", size, strerror(errno)); 37 | return -1; 38 | } 39 | 40 | event = fc_calloc(ctx->nevent, sizeof(*ctx->event)); 41 | if (event == NULL) { 42 | status = close(ep); 43 | if (status < 0) { 44 | log_error("close e %d failed, ignored: %s", ep, strerror(errno)); 45 | } 46 | return -1; 47 | } 48 | 49 | ctx->ep = ep; 50 | ctx->event = event; 51 | 52 | log_debug(LOG_INFO, "e %d with nevent %d timeout %d", ctx->ep, 53 | ctx->nevent, ctx->timeout); 54 | 55 | return 0; 56 | } 57 | 58 | void 59 | event_deinit(struct context *ctx) 60 | { 61 | int status; 62 | 63 | ASSERT(ctx->ep >= 0); 64 | 65 | fc_free(ctx->event); 66 | 67 | status = close(ctx->ep); 68 | if (status < 0) { 69 | log_error("close e %d failed, ignored: %s", ctx->ep, strerror(errno)); 70 | } 71 | ctx->ep = -1; 72 | } 73 | 74 | int 75 | event_add_out(int ep, struct conn *c) 76 | { 77 | int status; 78 | struct epoll_event event; 79 | 80 | ASSERT(ep > 0); 81 | ASSERT(c != NULL); 82 | ASSERT(c->sd > 0); 83 | ASSERT(c->recv_active); 84 | 85 | if (c->send_active) { 86 | return 0; 87 | } 88 | 89 | event.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); 90 | event.data.ptr = c; 91 | 92 | status = epoll_ctl(ep, EPOLL_CTL_MOD, c->sd, &event); 93 | if (status < 0) { 94 | log_error("epoll ctl on e %d sd %d failed: %s", ep, c->sd, 95 | strerror(errno)); 96 | } else { 97 | c->send_active = 1; 98 | } 99 | 100 | return status; 101 | } 102 | 103 | int 104 | event_del_out(int ep, struct conn *c) 105 | { 106 | int status; 107 | struct epoll_event event; 108 | 109 | ASSERT(ep > 0); 110 | ASSERT(c != NULL); 111 | ASSERT(c->sd > 0); 112 | ASSERT(c->recv_active); 113 | 114 | if (!c->send_active) { 115 | return 0; 116 | } 117 | 118 | event.events = (uint32_t)(EPOLLIN | EPOLLET); 119 | event.data.ptr = c; 120 | 121 | status = epoll_ctl(ep, EPOLL_CTL_MOD, c->sd, &event); 122 | if (status < 0) { 123 | log_error("epoll ctl on e %d sd %d failed: %s", ep, c->sd, 124 | strerror(errno)); 125 | } else { 126 | c->send_active = 0; 127 | } 128 | 129 | return status; 130 | } 131 | 132 | int 133 | event_add_conn(int ep, struct conn *c) 134 | { 135 | int status; 136 | struct epoll_event event; 137 | 138 | ASSERT(ep > 0); 139 | ASSERT(c != NULL); 140 | ASSERT(c->sd > 0); 141 | 142 | event.events = (uint32_t)(EPOLLIN | EPOLLOUT | EPOLLET); 143 | event.data.ptr = c; 144 | 145 | status = epoll_ctl(ep, EPOLL_CTL_ADD, c->sd, &event); 146 | if (status < 0) { 147 | log_error("epoll ctl on e %d sd %d failed: %s", ep, c->sd, 148 | strerror(errno)); 149 | } else { 150 | c->send_active = 1; 151 | c->recv_active = 1; 152 | } 153 | 154 | return status; 155 | } 156 | 157 | int 158 | event_del_conn(int ep, struct conn *c) 159 | { 160 | int status; 161 | 162 | ASSERT(ep > 0); 163 | ASSERT(c != NULL); 164 | ASSERT(c->sd > 0); 165 | 166 | status = epoll_ctl(ep, EPOLL_CTL_DEL, c->sd, NULL); 167 | if (status < 0) { 168 | log_error("epoll ctl on e %d sd %d failed: %s", ep, c->sd, 169 | strerror(errno)); 170 | } else { 171 | c->recv_active = 0; 172 | c->send_active = 0; 173 | } 174 | 175 | return status; 176 | } 177 | 178 | int 179 | event_wait(int ep, struct epoll_event *event, int nevent, int timeout) 180 | { 181 | int nsd; 182 | 183 | ASSERT(ep > 0); 184 | ASSERT(event != NULL); 185 | ASSERT(nevent > 0); 186 | 187 | for (;;) { 188 | nsd = epoll_wait(ep, event, nevent, timeout); 189 | if (nsd > 0) { 190 | return nsd; 191 | } 192 | 193 | if (nsd == 0) { 194 | if (timeout == -1) { 195 | log_error("epoll wait on e %d with %d events and %d timeout " 196 | "returned no events", ep, nevent, timeout); 197 | return -1; 198 | } 199 | 200 | return 0; 201 | } 202 | 203 | if (errno == EINTR) { 204 | continue; 205 | } 206 | 207 | log_error("epoll wait on e %d with %d events failed: %s", ep, nevent, 208 | strerror(errno)); 209 | 210 | return -1; 211 | } 212 | 213 | NOT_REACHED(); 214 | } 215 | -------------------------------------------------------------------------------- /src/fc_event.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_EVENT_H_ 19 | #define _FC_EVENT_H_ 20 | 21 | #include 22 | 23 | int event_init(struct context *ctx, int size); 24 | void event_deinit(struct context *ctx); 25 | 26 | int event_add_out(int ep, struct conn *c); 27 | int event_del_out(int ep, struct conn *c); 28 | int event_add_conn(int ep, struct conn *c); 29 | int event_del_conn(int ep, struct conn *c); 30 | 31 | int event_wait(int ep, struct epoll_event *event, int nevent, int timeout); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/fc_item.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | extern struct settings settings; 24 | 25 | static uint64_t cas_id; 26 | 27 | /* 28 | * Return the owner slab of item it. 29 | */ 30 | struct slab * 31 | item_to_slab(struct item *it) 32 | { 33 | struct slab *slab; 34 | 35 | ASSERT(it->magic == ITEM_MAGIC); 36 | ASSERT(it->offset < settings.slab_size); 37 | 38 | slab = (struct slab *)((uint8_t *)it - it->offset); 39 | 40 | ASSERT(slab->magic == SLAB_MAGIC); 41 | 42 | return slab; 43 | } 44 | 45 | uint8_t 46 | item_slabcid(uint8_t nkey, uint32_t ndata) 47 | { 48 | size_t ntotal; 49 | uint8_t cid; 50 | 51 | ntotal = item_ntotal(nkey, ndata); 52 | 53 | cid = slab_cid(ntotal); 54 | if (cid == SLABCLASS_INVALID_ID) { 55 | log_debug(LOG_NOTICE, "slab class id out of range with %"PRIu8" bytes " 56 | "key, %"PRIu32" bytes value and %zu item chunk size", nkey, 57 | ndata, ntotal); 58 | } 59 | 60 | return cid; 61 | } 62 | 63 | struct item * 64 | item_get(uint8_t *key, uint8_t nkey, uint8_t cid, uint32_t ndata, 65 | rel_time_t expiry, uint32_t flags, uint8_t *md, uint32_t hash) 66 | { 67 | struct item *it; 68 | 69 | ASSERT(slab_valid_id(cid)); 70 | 71 | it = slab_get_item(cid); 72 | if (it == NULL) { 73 | log_warn("server error on allocating item in slab %"PRIu8, cid); 74 | return NULL; 75 | } 76 | 77 | it->magic = ITEM_MAGIC; 78 | /* offset and sid are initialized by slab_get_item */ 79 | it->cid = cid; 80 | it->nkey = nkey; 81 | it->ndata = ndata; 82 | it->flags = flags; 83 | fc_memcpy(it->md, md, sizeof(it->md)); 84 | it->hash = hash; 85 | /* part of end[] that stores the key string is initialized here */ 86 | fc_memcpy(item_key(it), key, nkey); 87 | 88 | log_debug(LOG_VERB, "get it '%.*s' at offset %"PRIu32" with cid %"PRIu8 89 | " expiry %u", it->nkey, item_key(it), it->offset, it->cid, 90 | expiry); 91 | 92 | itemx_putx(it->hash, it->md, it->sid, it->offset, expiry, ++cas_id); 93 | 94 | return it; 95 | } 96 | 97 | void 98 | item_put(struct item *it) 99 | { 100 | ASSERT(it->magic == ITEM_MAGIC); 101 | 102 | slab_put_item(it); 103 | } 104 | 105 | void 106 | item_init(void) 107 | { 108 | cas_id = 0ULL; 109 | } 110 | 111 | void 112 | item_deinit(void) 113 | { 114 | } 115 | -------------------------------------------------------------------------------- /src/fc_item.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_ITEM_H_ 19 | #define _FC_ITEM_H_ 20 | 21 | #include 22 | 23 | struct item { 24 | uint32_t magic; /* item magic (const) */ 25 | uint32_t offset; /* raw offset from owner slab base (const) */ 26 | uint32_t sid; /* slab id (const) */ 27 | uint8_t cid; /* slab class id (const) */ 28 | uint8_t unused[2]; /* unused */ 29 | uint8_t nkey; /* key length */ 30 | uint32_t ndata; /* date length */ 31 | uint32_t flags; /* flags opaque to the server */ 32 | uint8_t md[20]; /* key message digest */ 33 | uint32_t hash; /* key hash */ 34 | uint8_t end[1]; /* item data */ 35 | }; 36 | 37 | #define ITEM_MAGIC 0xfeedface 38 | #define ITEM_HDR_SIZE offsetof(struct item, end) 39 | 40 | /* 41 | * An item chunk is the portion of the memory carved out from the slab 42 | * for an item. An item chunk contains the item header followed by item 43 | * data. 44 | * 45 | * The smallest item data is actually a single byte key with a zero byte 46 | * value which internally is of sizeof("k"), as key is stored with 47 | * terminating '\0'. 48 | * 49 | * The largest item data is actually the room left in the slab_size() 50 | * slab, after the item header has been factored out 51 | */ 52 | #define ITEM_MIN_PAYLOAD_SIZE (sizeof("k") + sizeof(uint64_t)) 53 | #define ITEM_MIN_CHUNK_SIZE \ 54 | FC_ALIGN(ITEM_HDR_SIZE + ITEM_MIN_PAYLOAD_SIZE, FC_ALIGNMENT) 55 | 56 | #define ITEM_PAYLOAD_SIZE 32 57 | #define ITEM_CHUNK_SIZE \ 58 | FC_ALIGN(ITEM_HDR_SIZE + ITEM_PAYLOAD_SIZE, FC_ALIGNMENT) 59 | 60 | static inline uint8_t * 61 | item_key(struct item *it) 62 | { 63 | ASSERT(it->magic == ITEM_MAGIC); 64 | 65 | return it->end; 66 | } 67 | 68 | static inline size_t 69 | item_ntotal(uint8_t nkey, uint32_t ndata) 70 | { 71 | return ITEM_HDR_SIZE + nkey + ndata; 72 | } 73 | 74 | static inline size_t 75 | item_size(struct item *it) 76 | { 77 | ASSERT(it->magic == ITEM_MAGIC); 78 | 79 | return item_ntotal(it->nkey, it->ndata); 80 | } 81 | 82 | static inline uint8_t * 83 | item_data(struct item *it) 84 | { 85 | ASSERT(it->magic == ITEM_MAGIC); 86 | 87 | return it->end + it->nkey; 88 | } 89 | 90 | struct slab *item_to_slab(struct item *it); 91 | uint8_t item_slabcid(uint8_t nkey, uint32_t ndata); 92 | 93 | struct item *item_get(uint8_t *key, uint8_t nkey, uint8_t cid, uint32_t ndata, rel_time_t expiry, uint32_t dataflags, uint8_t *md, uint32_t hash); 94 | void item_put(struct item *it); 95 | 96 | void item_init(void); 97 | void item_deinit(void); 98 | #endif 99 | -------------------------------------------------------------------------------- /src/fc_itemx.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | 20 | #define HASHSIZE(_n) (1ULL << (_n)) 21 | #define HASHMASK(_n) (HASHSIZE(_n) - 1) 22 | 23 | extern struct settings settings; 24 | 25 | static uint64_t nitx; /* # item index */ 26 | static uint64_t nitx_table; /* # item index table entries */ 27 | static struct itemx_tqh *itx_table; /* item index table */ 28 | 29 | static uint64_t nalloc_itemx; /* # nalloc itemx */ 30 | static uint64_t nfree_itemxq; /* # free itemx q */ 31 | static struct itemx_tqh free_itemxq; /* free itemx q */ 32 | 33 | static struct itemx *istart; /* itemx memory start */ 34 | static struct itemx *iend; /* itemx memory end */ 35 | 36 | /* 37 | * Return true if the itemx has expired, otherwise return false. Itemx 38 | * with expiry of 0 are considered as unexpirable. 39 | */ 40 | bool 41 | itemx_expired(struct itemx *itx) 42 | { 43 | uint32_t hash; 44 | 45 | ASSERT(itx != NULL); 46 | 47 | if(itx->expiry != 0 && itx->expiry < time_now()) { 48 | hash = sha1_hash(itx->md); 49 | itemx_removex(hash, itx->md); 50 | return true; 51 | } else { 52 | return false; 53 | } 54 | } 55 | 56 | /* 57 | * Returns true, if there are no free item indexes, otherwise 58 | * return false. 59 | */ 60 | bool 61 | itemx_empty(void) 62 | { 63 | if (STAILQ_EMPTY(&free_itemxq)) { 64 | ASSERT(nfree_itemxq == 0); 65 | return true; 66 | } 67 | 68 | ASSERT(nfree_itemxq > 0); 69 | 70 | return false; 71 | } 72 | 73 | static struct itemx * 74 | itemx_get(void) 75 | { 76 | struct itemx *itx; 77 | 78 | ASSERT(!itemx_empty()); 79 | 80 | itx = STAILQ_FIRST(&free_itemxq); 81 | nfree_itemxq--; 82 | STAILQ_REMOVE_HEAD(&free_itemxq, tqe); 83 | 84 | STAILQ_NEXT(itx, tqe) = NULL; 85 | /* md[] is left uninitialized */ 86 | itx->sid = 0; 87 | itx->offset = 0; 88 | itx->cas = 0; 89 | 90 | log_debug(LOG_VVERB, "get itx %p", itx); 91 | 92 | return itx; 93 | } 94 | 95 | static void 96 | itemx_put(struct itemx *itx) 97 | { 98 | log_debug(LOG_VVERB, "put itx %p", itx); 99 | 100 | nfree_itemxq++; 101 | STAILQ_INSERT_HEAD(&free_itemxq, itx, tqe); 102 | } 103 | 104 | rstatus_t 105 | itemx_init(void) 106 | { 107 | struct itemx *itx; /* item index */ 108 | uint64_t n; /* # item index */ 109 | uint64_t i; /* item index iterator */ 110 | 111 | nitx = 0ULL; 112 | nitx_table = 0ULL; 113 | itx_table = NULL; 114 | 115 | nfree_itemxq = 0; 116 | STAILQ_INIT(&free_itemxq); 117 | 118 | istart = NULL; 119 | iend = NULL; 120 | 121 | /* init item index table */ 122 | nitx_table = HASHSIZE(settings.hash_power); 123 | itx_table = fc_alloc(sizeof(*itx_table) * nitx_table); 124 | if (itx_table == NULL) { 125 | return FC_ENOMEM; 126 | } 127 | for (i = 0ULL; i < nitx_table; i++) { 128 | STAILQ_INIT(&itx_table[i]); 129 | } 130 | 131 | n = settings.max_index_memory / sizeof(struct itemx); 132 | 133 | /* init item index memory */ 134 | itx = fc_mmap(settings.max_index_memory); 135 | if (itx == NULL) { 136 | return FC_ENOMEM; 137 | } 138 | istart = itx; 139 | iend = itx + n; 140 | 141 | for (itx = istart; itx < iend; itx++) { 142 | itemx_put(itx); 143 | } 144 | nalloc_itemx = n; 145 | 146 | return FC_OK; 147 | } 148 | 149 | void 150 | itemx_deinit(void) 151 | { 152 | struct itemx *itx; 153 | 154 | while (!STAILQ_EMPTY(&free_itemxq)) { 155 | ASSERT(nfree_itemxq > 0); 156 | 157 | itx = STAILQ_FIRST(&free_itemxq); 158 | nfree_itemxq--; 159 | STAILQ_REMOVE_HEAD(&free_itemxq, tqe); 160 | } 161 | ASSERT(nfree_itemxq == 0); 162 | 163 | if (istart != NULL) { 164 | fc_munmap(istart, settings.max_index_memory); 165 | } 166 | 167 | if (itx_table != NULL) { 168 | fc_free(itx_table); 169 | } 170 | } 171 | 172 | static struct itemx_tqh * 173 | itemx_bucket(uint32_t hash) 174 | { 175 | struct itemx_tqh *bucket; 176 | uint64_t idx; 177 | 178 | idx = hash & HASHMASK(settings.hash_power); 179 | bucket = &itx_table[idx]; 180 | 181 | return bucket; 182 | } 183 | 184 | struct itemx * 185 | itemx_getx(uint32_t hash, uint8_t *md) 186 | { 187 | struct itemx_tqh *bucket; 188 | struct itemx *itx; 189 | 190 | bucket = itemx_bucket(hash); 191 | 192 | STAILQ_FOREACH(itx, bucket, tqe) { 193 | if (memcmp(itx->md, md, sizeof(itx->md)) == 0) { 194 | break; 195 | } 196 | } 197 | 198 | return itx; 199 | } 200 | 201 | void 202 | itemx_putx(uint32_t hash, uint8_t *md, uint32_t sid, uint32_t offset, 203 | rel_time_t expiry, uint64_t cas) 204 | { 205 | struct itemx *itx; 206 | struct itemx_tqh *bucket; 207 | 208 | ASSERT(!itemx_empty()); 209 | 210 | itx = itemx_get(); 211 | itx->sid = sid; 212 | itx->offset = offset; 213 | itx->expiry = expiry; 214 | itx->cas = cas; 215 | fc_memcpy(itx->md, md, sizeof(itx->md)); 216 | 217 | ASSERT(itemx_getx(hash, md) == NULL); 218 | 219 | bucket = itemx_bucket(hash); 220 | nitx++; 221 | STAILQ_INSERT_HEAD(bucket, itx, tqe); 222 | slab_incr_chunks_by_sid(itx->sid, 1); 223 | } 224 | 225 | bool 226 | itemx_removex(uint32_t hash, uint8_t *md) 227 | { 228 | struct itemx_tqh *bucket; 229 | struct itemx *itx; 230 | 231 | itx = itemx_getx(hash, md); 232 | if (itx == NULL) { 233 | return false; 234 | } 235 | 236 | bucket = itemx_bucket(hash); 237 | nitx--; 238 | STAILQ_REMOVE(bucket, itx, itemx, tqe); 239 | slab_incr_chunks_by_sid(itx->sid, -1); 240 | 241 | itemx_put(itx); 242 | 243 | return true; 244 | } 245 | 246 | uint64_t 247 | itemx_nalloc(void) 248 | { 249 | return nalloc_itemx; 250 | } 251 | 252 | uint64_t 253 | itemx_nfree(void) 254 | { 255 | return nfree_itemxq; 256 | } 257 | -------------------------------------------------------------------------------- /src/fc_itemx.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_ITEMX_H_ 19 | #define _FC_ITEMX_H_ 20 | 21 | #define ITEMX_HASH_POWER 20 22 | 23 | struct itemx { 24 | STAILQ_ENTRY(itemx) tqe; /* link in index / free q */ 25 | uint8_t md[20]; /* sha1 message digest */ 26 | uint32_t sid; /* owner slab id */ 27 | uint32_t offset; /* item offset from owner slab base */ 28 | rel_time_t expiry; /* expiry in secs */ 29 | uint64_t cas; /* cas */ 30 | } __attribute__ ((__packed__)); 31 | 32 | STAILQ_HEAD(itemx_tqh, itemx); 33 | 34 | rstatus_t itemx_init(void); 35 | void itemx_deinit(void); 36 | 37 | bool itemx_empty(void); 38 | bool itemx_expired(struct itemx *itx); 39 | struct itemx *itemx_getx(uint32_t hash, uint8_t *md); 40 | void itemx_putx(uint32_t hash, uint8_t *md, uint32_t sid, uint32_t ioff, rel_time_t expiry, uint64_t cas); 41 | bool itemx_removex(uint32_t hash, uint8_t *md); 42 | 43 | uint64_t itemx_nalloc(void); 44 | uint64_t itemx_nfree(void); 45 | #endif 46 | -------------------------------------------------------------------------------- /src/fc_log.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | static struct logger logger; 28 | 29 | int 30 | log_init(int level, char *name) 31 | { 32 | struct logger *l = &logger; 33 | 34 | l->level = MAX(LOG_EMERG, MIN(level, LOG_PVERB)); 35 | l->name = name; 36 | if (name == NULL || !strlen(name)) { 37 | l->fd = STDERR_FILENO; 38 | } else { 39 | l->fd = open(name, O_WRONLY | O_APPEND | O_CREAT, 0644); 40 | if (l->fd < 0) { 41 | log_stderr("opening log file '%s' failed: %s", name, 42 | strerror(errno)); 43 | return -1; 44 | } 45 | } 46 | 47 | return 0; 48 | } 49 | 50 | void 51 | log_deinit(void) 52 | { 53 | struct logger *l = &logger; 54 | 55 | if (l->fd != STDERR_FILENO) { 56 | close(l->fd); 57 | } 58 | } 59 | 60 | void 61 | log_reopen(void) 62 | { 63 | struct logger *l = &logger; 64 | 65 | if (l->fd != STDERR_FILENO) { 66 | close(l->fd); 67 | l->fd = open(l->name, O_WRONLY | O_APPEND | O_CREAT, 0644); 68 | if (l->fd < 0) { 69 | log_stderr("reopening log file '%s' failed, ignored: %s", l->name, 70 | strerror(errno)); 71 | } 72 | } 73 | } 74 | 75 | void 76 | log_level_up(void) 77 | { 78 | struct logger *l = &logger; 79 | 80 | if (l->level < LOG_PVERB) { 81 | l->level++; 82 | loga("up log level to %d", l->level); 83 | } 84 | } 85 | 86 | void 87 | log_level_down(void) 88 | { 89 | struct logger *l = &logger; 90 | 91 | if (l->level > LOG_EMERG) { 92 | l->level--; 93 | loga("down log level to %d", l->level); 94 | } 95 | } 96 | 97 | void 98 | log_level_set(int level) 99 | { 100 | struct logger *l = &logger; 101 | 102 | l->level = MAX(LOG_EMERG, MIN(level, LOG_PVERB)); 103 | loga("set log level to %d", l->level); 104 | } 105 | 106 | int 107 | log_loggable(int level) 108 | { 109 | struct logger *l = &logger; 110 | 111 | if (level > l->level) { 112 | return 0; 113 | } 114 | 115 | return 1; 116 | } 117 | 118 | void 119 | _log(const char *file, int line, int panic, const char *fmt, ...) 120 | { 121 | struct logger *l = &logger; 122 | int len, size, errno_save; 123 | char buf[LOG_MAX_LEN], *timestr; 124 | va_list args; 125 | struct tm *local; 126 | time_t t; 127 | ssize_t n; 128 | 129 | if (l->fd < 0) { 130 | return; 131 | } 132 | 133 | errno_save = errno; 134 | len = 0; /* length of output buffer */ 135 | size = LOG_MAX_LEN; /* size of output buffer */ 136 | 137 | t = time(NULL); 138 | local = localtime(&t); 139 | timestr = asctime(local); 140 | 141 | len += fc_scnprintf(buf + len, size - len, "[%.*s] %s:%d ", 142 | strlen(timestr) - 1, timestr, file, line); 143 | 144 | va_start(args, fmt); 145 | len += fc_vscnprintf(buf + len, size - len, fmt, args); 146 | va_end(args); 147 | 148 | buf[len++] = '\n'; 149 | 150 | n = write(l->fd, buf, len); 151 | if (n < 0) { 152 | l->nerror++; 153 | } 154 | 155 | errno = errno_save; 156 | 157 | if (panic) { 158 | abort(); 159 | } 160 | } 161 | 162 | void 163 | _log_stderr(const char *fmt, ...) 164 | { 165 | struct logger *l = &logger; 166 | int len, size, errno_save; 167 | char buf[4 * LOG_MAX_LEN]; 168 | va_list args; 169 | ssize_t n; 170 | 171 | errno_save = errno; 172 | len = 0; /* length of output buffer */ 173 | size = 4 * LOG_MAX_LEN; /* size of output buffer */ 174 | 175 | va_start(args, fmt); 176 | len += fc_vscnprintf(buf, size, fmt, args); 177 | va_end(args); 178 | 179 | buf[len++] = '\n'; 180 | 181 | n = write(STDERR_FILENO, buf, len); 182 | if (n < 0) { 183 | l->nerror++; 184 | } 185 | 186 | errno = errno_save; 187 | } 188 | 189 | /* 190 | * Hexadecimal dump in the canonical hex + ascii display 191 | * See -C option in man hexdump 192 | */ 193 | void 194 | _log_hexdump(char *data, int datalen) 195 | { 196 | struct logger *l = &logger; 197 | char buf[8 * LOG_MAX_LEN]; 198 | int i, off, len, size, errno_save; 199 | ssize_t n; 200 | 201 | if (l->fd < 0) { 202 | return; 203 | } 204 | 205 | /* log hexdump */ 206 | errno_save = errno; 207 | off = 0; /* data offset */ 208 | len = 0; /* length of output buffer */ 209 | size = 8 * LOG_MAX_LEN; /* size of output buffer */ 210 | 211 | while (datalen != 0 && (len < size - 1)) { 212 | char *save, *str; 213 | unsigned char c; 214 | int savelen; 215 | 216 | len += fc_scnprintf(buf + len, size - len, "%08x ", off); 217 | 218 | save = data; 219 | savelen = datalen; 220 | 221 | for (i = 0; datalen != 0 && i < 16; data++, datalen--, i++) { 222 | c = (unsigned char)(*data); 223 | str = (i == 7) ? " " : " "; 224 | len += fc_scnprintf(buf + len, size - len, "%02x%s", c, str); 225 | } 226 | for ( ; i < 16; i++) { 227 | str = (i == 7) ? " " : " "; 228 | len += fc_scnprintf(buf + len, size - len, " %s", str); 229 | } 230 | 231 | data = save; 232 | datalen = savelen; 233 | 234 | len += fc_scnprintf(buf + len, size - len, " |"); 235 | 236 | for (i = 0; datalen != 0 && i < 16; data++, datalen--, i++) { 237 | c = (unsigned char)(isprint(*data) ? *data : '.'); 238 | len += fc_scnprintf(buf + len, size - len, "%c", c); 239 | } 240 | len += fc_scnprintf(buf + len, size - len, "|\n"); 241 | 242 | off += 16; 243 | } 244 | 245 | n = fc_write(l->fd, buf, len); 246 | if (n < 0) { 247 | l->nerror++; 248 | } 249 | 250 | errno = errno_save; 251 | } 252 | -------------------------------------------------------------------------------- /src/fc_log.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_LOG_H_ 19 | #define _FC_LOG_H_ 20 | 21 | struct logger { 22 | char *name; /* log file name */ 23 | int level; /* log level */ 24 | int fd; /* log file descriptor */ 25 | int nerror; /* # log error */ 26 | }; 27 | 28 | #define LOG_EMERG 0 /* system in unusable */ 29 | #define LOG_ALERT 1 /* action must be taken immediately */ 30 | #define LOG_CRIT 2 /* critical conditions */ 31 | #define LOG_ERR 3 /* error conditions */ 32 | #define LOG_WARN 4 /* warning conditions */ 33 | #define LOG_NOTICE 5 /* normal but significant condition (default) */ 34 | #define LOG_INFO 6 /* informational */ 35 | #define LOG_DEBUG 7 /* debug messages */ 36 | #define LOG_VERB 8 /* verbose messages */ 37 | #define LOG_VVERB 9 /* verbose messages on crack */ 38 | #define LOG_VVVERB 10 /* verbose messages on ganga */ 39 | #define LOG_PVERB 11 /* periodic verbose messages on crack */ 40 | 41 | #define LOG_MAX_LEN 256 /* max length of log message */ 42 | 43 | /* 44 | * log_stderr - log to stderr 45 | * loga - log always 46 | * loga_hexdump - log hexdump always 47 | * log_error - error log messages 48 | * log_warn - warning log messages 49 | * log_panic - log messages followed by a panic 50 | * ... 51 | * log_debug - debug log messages based on a log level 52 | * log_hexdump - hexadump -C of a log buffer 53 | */ 54 | #if defined FC_DEBUG_LOG && FC_DEBUG_LOG == 1 55 | 56 | #define log_debug(_level, ...) do { \ 57 | if (log_loggable(_level) != 0) { \ 58 | _log(__FILE__, __LINE__, 0, __VA_ARGS__); \ 59 | } \ 60 | } while (0) 61 | 62 | #define log_hexdump(_level, _data, _datalen, ...) do { \ 63 | if (log_loggable(_level) != 0) { \ 64 | _log(__FILE__,__LINE__, 0, __VA_ARGS__); \ 65 | _log_hexdump((char *)(_data), (int)(_datalen)); \ 66 | } \ 67 | } while (0) 68 | 69 | #else 70 | 71 | #define log_debug(_level, ...) 72 | #define log_hexdump(_level, _data, _datalen, ...) 73 | 74 | #endif 75 | 76 | #define log_stderr(...) do { \ 77 | _log_stderr(__VA_ARGS__); \ 78 | } while (0) 79 | 80 | #define loga(...) do { \ 81 | _log(__FILE__, __LINE__, 0, __VA_ARGS__); \ 82 | } while (0) 83 | 84 | #define loga_hexdump(_data, _datalen, ...) do { \ 85 | _log(__FILE__,__LINE__, 0, __VA_ARGS__); \ 86 | _log_hexdump((char *)(_data), (int)(_datalen)); \ 87 | } while (0) \ 88 | 89 | #define log_error(...) do { \ 90 | if (log_loggable(LOG_ALERT) != 0) { \ 91 | _log(__FILE__, __LINE__, 0, __VA_ARGS__); \ 92 | } \ 93 | } while (0) 94 | 95 | #define log_warn(...) do { \ 96 | if (log_loggable(LOG_WARN) != 0) { \ 97 | _log(__FILE__, __LINE__, 0, __VA_ARGS__); \ 98 | } \ 99 | } while (0) 100 | 101 | #define log_panic(...) do { \ 102 | if (log_loggable(LOG_EMERG) != 0) { \ 103 | _log(__FILE__, __LINE__, 1, __VA_ARGS__); \ 104 | } \ 105 | } while (0) 106 | 107 | int log_init(int level, char *filename); 108 | void log_deinit(void); 109 | void log_level_up(void); 110 | void log_level_down(void); 111 | void log_level_set(int level); 112 | void log_reopen(void); 113 | int log_loggable(int level); 114 | void _log(const char *file, int line, int panic, const char *fmt, ...); 115 | void _log_stderr(const char *fmt, ...); 116 | void _log_hexdump(char *data, int datalen); 117 | 118 | #endif 119 | -------------------------------------------------------------------------------- /src/fc_mbuf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | static uint32_t nfree_mbufq; /* # free mbuf */ 24 | static struct mhdr free_mbufq; /* free mbuf q */ 25 | 26 | static size_t mbuf_chunk_size; /* mbuf chunk size - header + data (const) */ 27 | static size_t mbuf_offset; /* mbuf offset in chunk (const) */ 28 | 29 | static struct mbuf * 30 | _mbuf_get(void) 31 | { 32 | struct mbuf *mbuf; 33 | uint8_t *buf; 34 | 35 | if (!STAILQ_EMPTY(&free_mbufq)) { 36 | ASSERT(nfree_mbufq > 0); 37 | 38 | mbuf = STAILQ_FIRST(&free_mbufq); 39 | nfree_mbufq--; 40 | STAILQ_REMOVE_HEAD(&free_mbufq, next); 41 | 42 | ASSERT(mbuf->magic == MBUF_MAGIC); 43 | goto done; 44 | } 45 | 46 | buf = fc_alloc(mbuf_chunk_size); 47 | if (buf == NULL) { 48 | return NULL; 49 | } 50 | 51 | /* 52 | * mbuf header is at the tail end of the mbuf. This enables us to catch 53 | * buffer overrun early by asserting on the magic value during get or 54 | * put operations 55 | * 56 | * <------------- mbuf_chunk_size -------------> 57 | * +-------------------------------------------+ 58 | * | mbuf data | mbuf header | 59 | * | (mbuf_offset) | (struct mbuf) | 60 | * +-------------------------------------------+ 61 | * ^ ^ ^ ^^ 62 | * | | | || 63 | * \ | | |\ 64 | * mbuf->start \ | | mbuf->end (one byte past valid bound) 65 | * mbuf->pos \ 66 | * \ mbuf 67 | * mbuf->last (one byte past valid byte) 68 | * 69 | */ 70 | mbuf = (struct mbuf *)(buf + mbuf_offset); 71 | mbuf->magic = MBUF_MAGIC; 72 | 73 | done: 74 | STAILQ_NEXT(mbuf, next) = NULL; 75 | return mbuf; 76 | } 77 | 78 | struct mbuf * 79 | mbuf_get(void) 80 | { 81 | struct mbuf *mbuf; 82 | uint8_t *buf; 83 | 84 | mbuf = _mbuf_get(); 85 | if (mbuf == NULL) { 86 | return NULL; 87 | } 88 | 89 | buf = (uint8_t *)mbuf - mbuf_offset; 90 | mbuf->start = buf; 91 | mbuf->end = buf + mbuf_offset; 92 | 93 | ASSERT(mbuf->end - mbuf->start == (int)mbuf_offset); 94 | ASSERT(mbuf->start < mbuf->end); 95 | 96 | mbuf->pos = mbuf->start; 97 | mbuf->last = mbuf->start; 98 | 99 | log_debug(LOG_VVERB, "get mbuf %p", mbuf); 100 | 101 | return mbuf; 102 | } 103 | 104 | static void 105 | mbuf_free(struct mbuf *mbuf) 106 | { 107 | uint8_t *buf; 108 | 109 | log_debug(LOG_VVERB, "put mbuf %p len %d", mbuf, mbuf->last - mbuf->pos); 110 | 111 | ASSERT(STAILQ_NEXT(mbuf, next) == NULL); 112 | ASSERT(mbuf->magic == MBUF_MAGIC); 113 | 114 | buf = (uint8_t *)mbuf - mbuf_offset; 115 | fc_free(buf); 116 | } 117 | 118 | void 119 | mbuf_put(struct mbuf *mbuf) 120 | { 121 | log_debug(LOG_VVERB, "put mbuf %p len %d", mbuf, mbuf->last - mbuf->pos); 122 | 123 | ASSERT(STAILQ_NEXT(mbuf, next) == NULL); 124 | ASSERT(mbuf->magic == MBUF_MAGIC); 125 | 126 | nfree_mbufq++; 127 | STAILQ_INSERT_HEAD(&free_mbufq, mbuf, next); 128 | } 129 | 130 | /* 131 | * Rewind the mbuf by discarding any of the read or unread data that it 132 | * might hold. 133 | */ 134 | void 135 | mbuf_rewind(struct mbuf *mbuf) 136 | { 137 | mbuf->pos = mbuf->start; 138 | mbuf->last = mbuf->start; 139 | } 140 | 141 | /* 142 | * Return the length of data in mbuf. Mbuf cannot contain more than 143 | * 2^32 bytes (4G). 144 | */ 145 | uint32_t 146 | mbuf_length(struct mbuf *mbuf) 147 | { 148 | ASSERT(mbuf->last >= mbuf->pos); 149 | 150 | return (uint32_t)(mbuf->last - mbuf->pos); 151 | } 152 | 153 | /* 154 | * Return the remaining space size for any new data in mbuf. Mbuf cannot 155 | * contain more than 2^32 bytes (4G). 156 | */ 157 | uint32_t 158 | mbuf_size(struct mbuf *mbuf) 159 | { 160 | ASSERT(mbuf->end >= mbuf->last); 161 | 162 | return (uint32_t)(mbuf->end - mbuf->last); 163 | } 164 | 165 | /* 166 | * Return the maximum available space size for data in any mbuf. Mbuf cannot 167 | * contain more than 2^32 bytes (4G). 168 | */ 169 | size_t 170 | mbuf_data_size(void) 171 | { 172 | return mbuf_offset; 173 | } 174 | 175 | /* 176 | * Returns true if mbuf contains non-null pointer p; otherwise return 177 | * false. 178 | */ 179 | bool 180 | mbuf_contains(struct mbuf *mbuf, uint8_t *p) 181 | { 182 | ASSERT(p != NULL); 183 | 184 | if (p >= mbuf->start && p < mbuf->last) { 185 | ASSERT(p < mbuf->end); 186 | return true; 187 | } 188 | 189 | return false; 190 | } 191 | 192 | /* 193 | * Insert mbuf at the tail of the mhdr Q 194 | */ 195 | void 196 | mbuf_insert(struct mhdr *mhdr, struct mbuf *mbuf) 197 | { 198 | STAILQ_INSERT_TAIL(mhdr, mbuf, next); 199 | log_debug(LOG_VVERB, "insert mbuf %p len %d", mbuf, mbuf->last - mbuf->pos); 200 | } 201 | 202 | /* 203 | * Remove mbuf from the mhdr Q 204 | */ 205 | void 206 | mbuf_remove(struct mhdr *mhdr, struct mbuf *mbuf) 207 | { 208 | log_debug(LOG_VVERB, "remove mbuf %p len %d", mbuf, mbuf->last - mbuf->pos); 209 | 210 | STAILQ_REMOVE(mhdr, mbuf, mbuf, next); 211 | STAILQ_NEXT(mbuf, next) = NULL; 212 | } 213 | 214 | /* 215 | * Copy size bytes from memory area pos to mbuf. 216 | * 217 | * The memory areas should not overlap and the mbuf should have 218 | * enough space for size bytes. 219 | */ 220 | void 221 | mbuf_copy(struct mbuf *mbuf, uint8_t *pos, size_t size) 222 | { 223 | if (size == 0) { 224 | return; 225 | } 226 | 227 | /* mbuf has space for size bytes */ 228 | ASSERT(!mbuf_full(mbuf) && size <= mbuf_size(mbuf)); 229 | 230 | /* no overlapping copy */ 231 | ASSERT(pos < mbuf->start || pos >= mbuf->end); 232 | 233 | fc_memcpy(mbuf->last, pos, size); 234 | mbuf->last += size; 235 | } 236 | 237 | /* 238 | * Copy size bytes from memory area pos to tail mbuf of mhdr Q allocating 239 | * mbufs along the way, if needed. 240 | */ 241 | rstatus_t 242 | mbuf_copy_from(struct mhdr *mhdr, uint8_t *pos, size_t size) 243 | { 244 | struct mbuf *mbuf; 245 | size_t n; 246 | 247 | if (size == 0) { 248 | return FC_OK; 249 | } 250 | 251 | STAILQ_FOREACH(mbuf, mhdr, next) { 252 | ASSERT(mbuf->magic == MBUF_MAGIC); 253 | } 254 | 255 | do { 256 | mbuf = STAILQ_LAST(mhdr, mbuf, next); 257 | if (mbuf == NULL || mbuf_full(mbuf)) { 258 | mbuf = mbuf_get(); 259 | if (mbuf == NULL) { 260 | return FC_ENOMEM; 261 | } 262 | STAILQ_INSERT_TAIL(mhdr, mbuf, next); 263 | } 264 | 265 | n = MIN(mbuf_size(mbuf), size); 266 | 267 | mbuf_copy(mbuf, pos, n); 268 | pos += n; 269 | size -= n; 270 | 271 | } while (size > 0); 272 | 273 | return FC_OK; 274 | } 275 | 276 | /* 277 | * Copy size bytes starting from mbuf of mhdr Q at marker position to 278 | * memory area at pos. 279 | */ 280 | void 281 | mbuf_copy_to(struct mhdr *mhdr, uint8_t *marker, uint8_t *pos, size_t size) 282 | { 283 | struct mbuf *mbuf; 284 | size_t n; 285 | 286 | if (size == 0) { 287 | return; 288 | } 289 | 290 | for (mbuf = STAILQ_FIRST(mhdr); mbuf != NULL; 291 | mbuf = STAILQ_NEXT(mbuf, next)) { 292 | 293 | if (mbuf_contains(mbuf, marker)) { 294 | n = MIN(size, mbuf->last - marker); 295 | 296 | fc_memcpy(pos, marker, n); 297 | pos += n; 298 | size -= n; 299 | break; 300 | } 301 | } 302 | 303 | ASSERT(mbuf != NULL); 304 | 305 | for (mbuf = STAILQ_NEXT(mbuf, next); mbuf != NULL && size > 0; 306 | mbuf = STAILQ_NEXT(mbuf, next)) { 307 | n = MIN(size, mbuf_length(mbuf)); 308 | 309 | fc_memcpy(pos, mbuf->pos, n); 310 | pos += n; 311 | size -= n; 312 | } 313 | } 314 | 315 | /* 316 | * Split mbuf h into h and t by copying data from h to t. Before 317 | * the copy, we invoke a precopy handler cb that will copy a predefined 318 | * string to the head of t. 319 | * 320 | * Return new mbuf t, if the split was successful. 321 | */ 322 | struct mbuf * 323 | mbuf_split(struct mhdr *h, uint8_t *pos, mbuf_copy_t cb, void *cbarg) 324 | { 325 | struct mbuf *mbuf, *nbuf; 326 | size_t size; 327 | 328 | ASSERT(!STAILQ_EMPTY(h)); 329 | 330 | mbuf = STAILQ_LAST(h, mbuf, next); 331 | ASSERT(pos >= mbuf->pos && pos <= mbuf->last); 332 | 333 | nbuf = mbuf_get(); 334 | if (nbuf == NULL) { 335 | return NULL; 336 | } 337 | 338 | if (cb != NULL) { 339 | /* precopy nbuf */ 340 | cb(nbuf, cbarg); 341 | } 342 | 343 | /* copy data from mbuf to nbuf */ 344 | size = (size_t)(mbuf->last - pos); 345 | mbuf_copy(nbuf, pos, size); 346 | 347 | /* adjust mbuf */ 348 | mbuf->last = pos; 349 | 350 | log_debug(LOG_VVERB, "split into mbuf %p len %"PRIu32" and nbuf %p len " 351 | "%"PRIu32" copied %zu bytes", mbuf, mbuf_length(mbuf), nbuf, 352 | mbuf_length(nbuf), size); 353 | 354 | return nbuf; 355 | } 356 | 357 | void 358 | mbuf_init(void) 359 | { 360 | nfree_mbufq = 0; 361 | STAILQ_INIT(&free_mbufq); 362 | 363 | mbuf_chunk_size = MBUF_SIZE; 364 | mbuf_offset = mbuf_chunk_size - MBUF_HSIZE; 365 | 366 | log_debug(LOG_DEBUG, "mbuf hsize %d chunk size %zu offset %zu length %zu", 367 | MBUF_HSIZE, mbuf_chunk_size, mbuf_offset, mbuf_offset); 368 | } 369 | 370 | void 371 | mbuf_deinit(void) 372 | { 373 | while (!STAILQ_EMPTY(&free_mbufq)) { 374 | struct mbuf *mbuf = STAILQ_FIRST(&free_mbufq); 375 | mbuf_remove(&free_mbufq, mbuf); 376 | mbuf_free(mbuf); 377 | nfree_mbufq--; 378 | } 379 | ASSERT(nfree_mbufq == 0); 380 | } 381 | -------------------------------------------------------------------------------- /src/fc_mbuf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_MBUF_H_ 19 | #define _FC_MBUF_H_ 20 | 21 | #include 22 | 23 | typedef void (*mbuf_copy_t)(struct mbuf *, void *); 24 | 25 | struct mbuf { 26 | uint32_t magic; /* mbuf magic (const) */ 27 | STAILQ_ENTRY(mbuf) next; /* next mbuf */ 28 | uint8_t *pos; /* send marker (read) */ 29 | uint8_t *last; /* recv marker (write) */ 30 | uint8_t *start; /* start of buffer (const) */ 31 | uint8_t *end; /* end of buffer (const) */ 32 | }; 33 | 34 | STAILQ_HEAD(mhdr, mbuf); 35 | 36 | #define MBUF_MAGIC 0xdeadbeef 37 | #define MBUF_MIN_SIZE 512 38 | #define MBUF_MAX_SIZE 65536 39 | #define MBUF_SIZE 8192 40 | #define MBUF_HSIZE sizeof(struct mbuf) 41 | 42 | static inline bool 43 | mbuf_empty(struct mbuf *mbuf) 44 | { 45 | return mbuf->pos == mbuf->last ? true : false; 46 | } 47 | 48 | static inline bool 49 | mbuf_full(struct mbuf *mbuf) 50 | { 51 | return mbuf->last == mbuf->end ? true : false; 52 | } 53 | 54 | void mbuf_init(void); 55 | void mbuf_deinit(void); 56 | struct mbuf *mbuf_get(void); 57 | void mbuf_put(struct mbuf *mbuf); 58 | void mbuf_rewind(struct mbuf *mbuf); 59 | uint32_t mbuf_length(struct mbuf *mbuf); 60 | uint32_t mbuf_size(struct mbuf *mbuf); 61 | size_t mbuf_data_size(void); 62 | bool mbuf_contains(struct mbuf *mbuf, uint8_t *p); 63 | void mbuf_insert(struct mhdr *mhdr, struct mbuf *mbuf); 64 | void mbuf_remove(struct mhdr *mhdr, struct mbuf *mbuf); 65 | void mbuf_copy(struct mbuf *mbuf, uint8_t *pos, size_t size); 66 | rstatus_t mbuf_copy_from(struct mhdr *mhdr, uint8_t *pos, size_t size); 67 | void mbuf_copy_to(struct mhdr *mhdr, uint8_t *marker, uint8_t *pos, size_t size); 68 | struct mbuf *mbuf_split(struct mhdr *h, uint8_t *pos, mbuf_copy_t cb, void *cbarg); 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /src/fc_memcache.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_MEMCACHE_H_ 19 | #define _FC_MEMCACHE_H_ 20 | 21 | #include 22 | 23 | #define strcrlf(m) \ 24 | (*(m) == '\r' && *((m) + 1) == '\n') 25 | 26 | #ifdef FC_LITTLE_ENDIAN 27 | 28 | #define str4cmp(m, c0, c1, c2, c3) \ 29 | (*(uint32_t *) m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)) 30 | 31 | #define str5cmp(m, c0, c1, c2, c3, c4) \ 32 | (str4cmp(m, c0, c1, c2, c3) && (m[4] == c4)) 33 | 34 | #define str6cmp(m, c0, c1, c2, c3, c4, c5) \ 35 | (str4cmp(m, c0, c1, c2, c3) && \ 36 | (((uint32_t *) m)[1] & 0xffff) == ((c5 << 8) | c4)) 37 | 38 | #define str7cmp(m, c0, c1, c2, c3, c4, c5, c6) \ 39 | (str6cmp(m, c0, c1, c2, c3, c4, c5) && (m[6] == c6)) 40 | 41 | #define str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \ 42 | (str4cmp(m, c0, c1, c2, c3) && \ 43 | (((uint32_t *) m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4))) 44 | 45 | #define str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \ 46 | (str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) && m[8] == c8) 47 | 48 | #define str10cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9) \ 49 | (str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) && \ 50 | (((uint32_t *) m)[2] & 0xffff) == ((c9 << 8) | c8)) 51 | 52 | #define str11cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10) \ 53 | (str10cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9) && (m[10] == c10)) 54 | 55 | #define str12cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11) \ 56 | (str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) && \ 57 | (((uint32_t *) m)[2] == ((c11 << 24) | (c10 << 16) | (c9 << 8) | c8))) 58 | 59 | #else 60 | 61 | #define str4cmp(m, c0, c1, c2, c3) \ 62 | (m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3) 63 | 64 | #define str5cmp(m, c0, c1, c2, c3, c4) \ 65 | (str4cmp(m, c0, c1, c2, c3) && (m[4] == c4)) 66 | 67 | #define str6cmp(m, c0, c1, c2, c3, c4, c5) \ 68 | (str5cmp(m, c0, c1, c2, c3, c4) && m[5] == c5) 69 | 70 | #define str7cmp(m, c0, c1, c2, c3, c4, c5, c6) \ 71 | (str6cmp(m, c0, c1, c2, c3, c4, c5) && m[6] == c6) 72 | 73 | #define str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \ 74 | (str7cmp(m, c0, c1, c2, c3, c4, c5, c6) && m[7] == c7) 75 | 76 | #define str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \ 77 | (str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) && m[8] == c8) 78 | 79 | #define str10cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9) \ 80 | (str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) && m[9] == c9) 81 | 82 | #define str11cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10) \ 83 | (str10cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9) && m[10] == c10) 84 | 85 | #define str12cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11) \ 86 | (str11cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10) && m[11] == c11) 87 | 88 | #endif 89 | 90 | void memcache_parse_req(struct msg *r); 91 | void memcache_pre_splitcopy(struct mbuf *mbuf, void *arg); 92 | rstatus_t memcache_post_splitcopy(struct msg *r); 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /src/fc_message.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_MESSAGE_H_ 19 | #define _FC_MESSAGE_H_ 20 | 21 | #include 22 | 23 | typedef void (*msg_parse_t)(struct msg *); 24 | 25 | #define MSG_CODEC(ACTION) \ 26 | ACTION( UNKNOWN, ""/* unknown */ ) \ 27 | ACTION( REQ_GET, "get " ) \ 28 | ACTION( REQ_GETS, "gets " ) \ 29 | ACTION( REQ_DELETE, "delete " ) \ 30 | ACTION( REQ_CAS, "cas " ) \ 31 | ACTION( REQ_SET, "set " ) \ 32 | ACTION( REQ_ADD, "add " ) \ 33 | ACTION( REQ_REPLACE, "replace " ) \ 34 | ACTION( REQ_APPEND, "append " ) \ 35 | ACTION( REQ_PREPEND, "prepend " ) \ 36 | ACTION( REQ_INCR, "incr " ) \ 37 | ACTION( REQ_DECR, "decr " ) \ 38 | ACTION( REQ_STATS, "stats " ) \ 39 | ACTION( REQ_VERSION, "version " ) \ 40 | ACTION( REQ_QUIT, "quit " ) \ 41 | ACTION( RSP_NUM, "" /* na */ ) \ 42 | ACTION( RSP_VALUE, "VALUE " ) \ 43 | ACTION( RSP_END, "END\r\n" ) \ 44 | ACTION( RSP_STORED, "STORED\r\n" ) \ 45 | ACTION( RSP_NOT_STORED, "NOT_STORED\r\n" ) \ 46 | ACTION( RSP_EXISTS, "EXISTS\r\n" ) \ 47 | ACTION( RSP_NOT_FOUND, "NOT_FOUND\r\n" ) \ 48 | ACTION( RSP_DELETED, "DELETED\r\n" ) \ 49 | ACTION( RSP_CLIENT_ERROR, "CLIENT_ERROR " ) \ 50 | ACTION( RSP_SERVER_ERROR, "SERVER_ERROR " ) \ 51 | ACTION( RSP_VERSION, "VERSION fatcache\r\n" ) \ 52 | ACTION( CRLF, "\r\n" /* empty */ ) \ 53 | ACTION( EMPTY, "" /* empty */ ) \ 54 | 55 | #define DEFINE_ACTION(_hash, _name) MSG_##_hash, 56 | typedef enum msg_type { 57 | MSG_CODEC( DEFINE_ACTION ) 58 | MSG_SENTINEL 59 | } msg_type_t; 60 | #undef DEFINE_ACTION 61 | 62 | typedef enum msg_parse_result { 63 | MSG_PARSE_OK, /* parsing ok */ 64 | MSG_PARSE_ERROR, /* parsing error */ 65 | MSG_PARSE_REPAIR, /* more to parse -> repair parsed & unparsed data */ 66 | MSG_PARSE_FRAGMENT, /* multi-vector request -> fragment */ 67 | MSG_PARSE_AGAIN, /* incomplete -> parse again */ 68 | } msg_parse_result_t; 69 | 70 | struct msg { 71 | TAILQ_ENTRY(msg) c_tqe; /* link in connection q */ 72 | TAILQ_ENTRY(msg) m_tqe; /* link in send q / free q */ 73 | 74 | uint64_t id; /* message id */ 75 | struct msg *peer; /* message peer */ 76 | struct conn *owner; /* message connection owner */ 77 | 78 | struct mhdr mhdr; /* message mbuf header */ 79 | uint32_t mlen; /* message length */ 80 | 81 | int state; /* current parser state */ 82 | uint8_t *pos; /* parser position marker */ 83 | uint8_t *token; /* token marker */ 84 | 85 | msg_parse_t parser; /* message parser */ 86 | msg_parse_result_t result; /* message parsing result */ 87 | 88 | msg_type_t type; /* message type */ 89 | 90 | uint8_t *key_start; /* key start */ 91 | uint8_t *key_end; /* key end */ 92 | 93 | uint32_t hash; /* key hash */ 94 | uint8_t md[20]; /* key message digest */ 95 | 96 | uint32_t flags; /* flags */ 97 | uint32_t expiry; /* expiry */ 98 | uint32_t vlen; /* value length */ 99 | uint32_t rvlen; /* running vlen used by parsing fsa */ 100 | uint8_t *value; /* value marker */ 101 | uint64_t cas; /* cas */ 102 | uint64_t num; /* number */ 103 | 104 | struct msg *frag_owner; /* owner of fragment message */ 105 | uint32_t nfrag; /* # fragment */ 106 | uint64_t frag_id; /* id of fragmented message */ 107 | 108 | err_t err; /* errno on error? */ 109 | unsigned error:1; /* error? */ 110 | unsigned request:1; /* request? or response? */ 111 | unsigned quit:1; /* quit request? */ 112 | unsigned noreply:1; /* noreply? */ 113 | unsigned done:1; /* done? */ 114 | unsigned first_fragment:1;/* first fragment? */ 115 | unsigned last_fragment:1; /* last fragment? */ 116 | unsigned swallow:1; /* swallow response? */ 117 | }; 118 | 119 | TAILQ_HEAD(msg_tqh, msg); 120 | 121 | bool msg_empty(struct msg *msg); 122 | rstatus_t msg_recv(struct context *ctx, struct conn *conn); 123 | rstatus_t msg_send(struct context *ctx, struct conn *conn); 124 | 125 | struct msg *req_get(struct conn *conn); 126 | void req_put(struct msg *msg); 127 | struct msg *req_recv_next(struct context *ctx, struct conn *conn, bool alloc); 128 | 129 | struct msg *msg_get(struct conn *conn, bool request); 130 | void msg_put(struct msg *msg); 131 | 132 | void msg_init(void); 133 | void msg_deinit(void); 134 | 135 | struct msg *rsp_get(struct conn *conn); 136 | void rsp_put(struct msg *msg); 137 | 138 | bool req_done(struct conn *conn, struct msg *msg); 139 | struct msg *rsp_send_next(struct context *ctx, struct conn *conn); 140 | 141 | void req_enqueue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg); 142 | void req_dequeue_omsgq(struct context *ctx, struct conn *conn, struct msg *msg); 143 | void rsp_send_done(struct context *ctx, struct conn *conn, struct msg *msg); 144 | void req_recv_done(struct context *ctx, struct conn *conn, struct msg *msg, struct msg *nmsg); 145 | 146 | void req_process_error(struct context *ctx, struct conn *conn, struct msg *msg, int err); 147 | 148 | 149 | void rsp_send_status(struct context *ctx, struct conn *conn, struct msg *msg, msg_type_t rsp_type); 150 | void rsp_send_error(struct context *ctx, struct conn *conn, struct msg *msg, msg_type_t rsp_type, int err); 151 | void rsp_send_value(struct context *ctx, struct conn *conn, struct msg *msg, struct item *it, uint64_t cas); 152 | void rsp_send_num(struct context *ctx, struct conn *conn, struct msg *msg, struct item *it); 153 | void rsp_send_string(struct context *ctx, struct conn *conn, struct msg *msg, struct string *str); 154 | 155 | #endif 156 | -------------------------------------------------------------------------------- /src/fc_server.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | extern struct settings settings; 22 | 23 | #define SERVER_BACKLOG 1024 24 | 25 | static rstatus_t 26 | server_accept(struct context *ctx, struct conn *s) 27 | { 28 | rstatus_t status; 29 | struct conn *c; 30 | int sd; 31 | 32 | ASSERT(!s->client); 33 | ASSERT(s->sd > 0); 34 | ASSERT(s->recv_active && s->recv_ready); 35 | 36 | for (;;) { 37 | sd = accept(s->sd, NULL, NULL); 38 | if (sd < 0) { 39 | if (errno == EINTR) { 40 | log_debug(LOG_VERB, "accept on s %d not ready - eintr", s->sd); 41 | continue; 42 | } 43 | 44 | if (errno == EAGAIN || errno == EWOULDBLOCK) { 45 | log_debug(LOG_VERB, "accept on s %d not ready - eagain", s->sd); 46 | s->recv_ready = 0; 47 | return FC_OK; 48 | } 49 | 50 | log_error("accept on s %d failed: %s", s->sd, strerror(errno)); 51 | return FC_ERROR; 52 | } 53 | 54 | break; 55 | } 56 | 57 | c = conn_get(sd, true); 58 | if (c == NULL) { 59 | log_error("get conn for c %d from s %d failed: %s", sd, s->sd, 60 | strerror(errno)); 61 | status = close(sd); 62 | if (status < 0) { 63 | log_error("close c %d failed, ignored: %s", sd, strerror(errno)); 64 | } 65 | return FC_ENOMEM; 66 | } 67 | 68 | status = fc_set_nonblocking(sd); 69 | if (status < 0) { 70 | log_error("set nonblock on c %d failed: %s", sd, strerror(errno)); 71 | return FC_ERROR; 72 | } 73 | 74 | status = fc_set_tcpnodelay(c->sd); 75 | if (status < 0) { 76 | log_warn("set tcp nodely on c %d failed, ignored: %s", sd, 77 | strerror(errno)); 78 | } 79 | 80 | status = fc_set_keepalive(c->sd); 81 | if (status < 0) { 82 | log_warn("set tcp keepalive on c %d failed, ignored: %s", sd, 83 | strerror(errno)); 84 | } 85 | 86 | status = event_add_conn(ctx->ep, c); 87 | if (status < 0) { 88 | log_error("event add conn e %d c %d failed: %s", ctx->ep, sd, 89 | strerror(errno)); 90 | return FC_ERROR; 91 | } 92 | 93 | log_debug(LOG_NOTICE, "accepted c %d on s %d", c->sd, s->sd); 94 | 95 | return FC_OK; 96 | } 97 | 98 | rstatus_t 99 | server_recv(struct context *ctx, struct conn *conn) 100 | { 101 | rstatus_t status; 102 | 103 | ASSERT(!conn->client); 104 | ASSERT(conn->recv_active); 105 | 106 | conn->recv_ready = 1; 107 | do { 108 | status = server_accept(ctx, conn); 109 | if (status != FC_OK) { 110 | return status; 111 | } 112 | } while (conn->recv_ready); 113 | 114 | return FC_OK; 115 | } 116 | 117 | rstatus_t 118 | server_listen(struct context *ctx) 119 | { 120 | rstatus_t status; 121 | struct sockinfo si; 122 | struct string addrstr; 123 | int sd, family; 124 | socklen_t addrlen; 125 | struct sockaddr *addr; 126 | struct conn *s; 127 | 128 | string_set_raw(&addrstr, settings.addr); 129 | status = fc_resolve(&addrstr, settings.port, &si); 130 | if (status != FC_OK) { 131 | return FC_ERROR; 132 | } 133 | 134 | family = si.family; 135 | addrlen = si.addrlen; 136 | addr = (struct sockaddr *)&si.addr; 137 | 138 | sd = socket(family, SOCK_STREAM, 0); 139 | if (sd < 0) { 140 | log_error("socket failed: %s", strerror(errno)); 141 | return FC_ERROR; 142 | } 143 | 144 | status = fc_set_reuseaddr(sd); 145 | if (status != FC_OK) { 146 | log_error("reuse of sd %d failed: %s", sd, strerror(errno)); 147 | return FC_ERROR; 148 | } 149 | 150 | status = bind(sd, addr, addrlen); 151 | if (status < 0) { 152 | log_error("bind on sd %d failed: %s", sd, strerror(errno)); 153 | return FC_ERROR; 154 | } 155 | 156 | status = listen(sd, SERVER_BACKLOG); 157 | if (status < 0) { 158 | log_error("listen on sd %d failed: %s", sd, strerror(errno)); 159 | return FC_ERROR; 160 | } 161 | 162 | status = fc_set_nonblocking(sd); 163 | if (status != FC_OK) { 164 | log_error("set nonblock on sd %d failed: %s", sd, strerror(errno)); 165 | return FC_ERROR; 166 | } 167 | 168 | s = conn_get(sd, false); 169 | if (s == NULL) { 170 | log_error("get conn for s %d failed: %s", sd, strerror(errno)); 171 | status = close(sd); 172 | if (status < 0) { 173 | log_error("close s %d failed, ignored: %s", sd, strerror(errno)); 174 | } 175 | return FC_ENOMEM; 176 | } 177 | 178 | status = event_add_conn(ctx->ep, s); 179 | if (status < 0) { 180 | log_error("event add conn e %d s %d failed: %s", ctx->ep, sd, 181 | strerror(errno)); 182 | return FC_ERROR; 183 | } 184 | 185 | status = event_del_out(ctx->ep, s); 186 | if (status != FC_OK) { 187 | log_error("event del conn e %d s %d failed: %s", ctx->ep, sd, 188 | strerror(errno)); 189 | return status; 190 | } 191 | 192 | log_debug(LOG_NOTICE, "server listening on s %d", s->sd); 193 | 194 | return FC_OK; 195 | } 196 | -------------------------------------------------------------------------------- /src/fc_server.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_SERVER_H_ 19 | #define _FC_SERVER_H_ 20 | 21 | #include 22 | 23 | rstatus_t server_recv(struct context *ctx, struct conn *conn); 24 | rstatus_t server_listen(struct context *ctx); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/fc_settings.h: -------------------------------------------------------------------------------- 1 | #ifndef _FC_SETTINGS_H_ 2 | #define _FC_SETTINGS_H_ 3 | struct settings { 4 | bool daemonize; /* daemonize? */ 5 | 6 | char *log_filename; /* log filename */ 7 | int verbose; /* log verbosity level */ 8 | 9 | int port; /* listening port */ 10 | char *addr; /* listening address */ 11 | 12 | int hash_power; /* index hash table size as power of 2 */ 13 | 14 | double factor; /* item chunk size growth factor */ 15 | size_t max_slab_memory; /* maximum memory allowed for slabs in bytes */ 16 | size_t max_index_memory; /* maximum memory allowed for in bytes */ 17 | size_t chunk_size; /* minimum item chunk size */ 18 | size_t max_chunk_size; /* maximum item chunk size */ 19 | size_t slab_size; /* slab size */ 20 | 21 | size_t profile[SLABCLASS_MAX_IDS]; /* slab profile */ 22 | uint8_t profile_last_id; /* last id in slab profile */ 23 | 24 | char *ssd_device; /* path to ssd device file */ 25 | 26 | uint32_t server_id; /* server id */ 27 | uint32_t server_n; /* # server */ 28 | }; 29 | #endif //_FC_SETTINGS_H_ 30 | -------------------------------------------------------------------------------- /src/fc_sha1.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /* 19 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 20 | * All rights reserved. 21 | * 22 | * Redistribution and use in source and binary forms, with or without 23 | * modification, are permitted provided that the following conditions 24 | * are met: 25 | * 1. Redistributions of source code must retain the above copyright 26 | * notice, this list of conditions and the following disclaimer. 27 | * 2. Redistributions in binary form must reproduce the above copyright 28 | * notice, this list of conditions and the following disclaimer in the 29 | * documentation and/or other materials provided with the distribution. 30 | * 3. Neither the name of the project nor the names of its contributors 31 | * may be used to endorse or promote products derived from this software 32 | * without specific prior written permission. 33 | * 34 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 35 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 36 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 37 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 38 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 39 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 40 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 41 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 42 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 43 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 44 | * SUCH DAMAGE. 45 | */ 46 | 47 | /* 48 | * FIPS pub 180-1: Secure Hash Algorithm (SHA-1) 49 | * based on: http://csrc.nist.gov/fips/fip180-1.txt 50 | * implemented by Jun-ichiro itojun Itoh 51 | */ 52 | 53 | #include 54 | 55 | /* constant table */ 56 | static uint32_t _K[] = { 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 }; 57 | 58 | #define K(t) _K[(t) / 20] 59 | 60 | #define F0(b, c, d) (((b) & (c)) | ((~(b)) & (d))) 61 | #define F1(b, c, d) (((b) ^ (c)) ^ (d)) 62 | #define F2(b, c, d) (((b) & (c)) | ((b) & (d)) | ((c) & (d))) 63 | #define F3(b, c, d) (((b) ^ (c)) ^ (d)) 64 | 65 | #define S(n, x) (((x) << (n)) | ((x) >> (32 - n))) 66 | 67 | #define H(n) (ctxt->h.b32[(n)]) 68 | #define COUNT (ctxt->count) 69 | #define BCOUNT (ctxt->c.b64[0] / 8) 70 | #define W(n) (ctxt->m.b32[(n)]) 71 | 72 | #define PUTBYTE(x) do { \ 73 | ctxt->m.b8[(COUNT % 64)] = (x); \ 74 | COUNT++; \ 75 | COUNT %= 64; \ 76 | ctxt->c.b64[0] += 8; \ 77 | if (COUNT % 64 == 0) { \ 78 | sha1_step(ctxt); \ 79 | } \ 80 | } while (0) 81 | 82 | #define PUTPAD(x) do { \ 83 | ctxt->m.b8[(COUNT % 64)] = (x); \ 84 | COUNT++; \ 85 | COUNT %= 64; \ 86 | if (COUNT % 64 == 0) { \ 87 | sha1_step(ctxt); \ 88 | } \ 89 | } while (0) 90 | 91 | static void 92 | sha1_step(struct sha1_ctxt *ctxt) 93 | { 94 | uint32_t a, b, c, d, e; 95 | size_t t, s; 96 | uint32_t tmp; 97 | 98 | #ifdef FC_LITTLE_ENDIAN 99 | struct sha1_ctxt tctxt; 100 | bcopy(&ctxt->m.b8[0], &tctxt.m.b8[0], 64); 101 | ctxt->m.b8[0] = tctxt.m.b8[3]; ctxt->m.b8[1] = tctxt.m.b8[2]; 102 | ctxt->m.b8[2] = tctxt.m.b8[1]; ctxt->m.b8[3] = tctxt.m.b8[0]; 103 | ctxt->m.b8[4] = tctxt.m.b8[7]; ctxt->m.b8[5] = tctxt.m.b8[6]; 104 | ctxt->m.b8[6] = tctxt.m.b8[5]; ctxt->m.b8[7] = tctxt.m.b8[4]; 105 | ctxt->m.b8[8] = tctxt.m.b8[11]; ctxt->m.b8[9] = tctxt.m.b8[10]; 106 | ctxt->m.b8[10] = tctxt.m.b8[9]; ctxt->m.b8[11] = tctxt.m.b8[8]; 107 | ctxt->m.b8[12] = tctxt.m.b8[15]; ctxt->m.b8[13] = tctxt.m.b8[14]; 108 | ctxt->m.b8[14] = tctxt.m.b8[13]; ctxt->m.b8[15] = tctxt.m.b8[12]; 109 | ctxt->m.b8[16] = tctxt.m.b8[19]; ctxt->m.b8[17] = tctxt.m.b8[18]; 110 | ctxt->m.b8[18] = tctxt.m.b8[17]; ctxt->m.b8[19] = tctxt.m.b8[16]; 111 | ctxt->m.b8[20] = tctxt.m.b8[23]; ctxt->m.b8[21] = tctxt.m.b8[22]; 112 | ctxt->m.b8[22] = tctxt.m.b8[21]; ctxt->m.b8[23] = tctxt.m.b8[20]; 113 | ctxt->m.b8[24] = tctxt.m.b8[27]; ctxt->m.b8[25] = tctxt.m.b8[26]; 114 | ctxt->m.b8[26] = tctxt.m.b8[25]; ctxt->m.b8[27] = tctxt.m.b8[24]; 115 | ctxt->m.b8[28] = tctxt.m.b8[31]; ctxt->m.b8[29] = tctxt.m.b8[30]; 116 | ctxt->m.b8[30] = tctxt.m.b8[29]; ctxt->m.b8[31] = tctxt.m.b8[28]; 117 | ctxt->m.b8[32] = tctxt.m.b8[35]; ctxt->m.b8[33] = tctxt.m.b8[34]; 118 | ctxt->m.b8[34] = tctxt.m.b8[33]; ctxt->m.b8[35] = tctxt.m.b8[32]; 119 | ctxt->m.b8[36] = tctxt.m.b8[39]; ctxt->m.b8[37] = tctxt.m.b8[38]; 120 | ctxt->m.b8[38] = tctxt.m.b8[37]; ctxt->m.b8[39] = tctxt.m.b8[36]; 121 | ctxt->m.b8[40] = tctxt.m.b8[43]; ctxt->m.b8[41] = tctxt.m.b8[42]; 122 | ctxt->m.b8[42] = tctxt.m.b8[41]; ctxt->m.b8[43] = tctxt.m.b8[40]; 123 | ctxt->m.b8[44] = tctxt.m.b8[47]; ctxt->m.b8[45] = tctxt.m.b8[46]; 124 | ctxt->m.b8[46] = tctxt.m.b8[45]; ctxt->m.b8[47] = tctxt.m.b8[44]; 125 | ctxt->m.b8[48] = tctxt.m.b8[51]; ctxt->m.b8[49] = tctxt.m.b8[50]; 126 | ctxt->m.b8[50] = tctxt.m.b8[49]; ctxt->m.b8[51] = tctxt.m.b8[48]; 127 | ctxt->m.b8[52] = tctxt.m.b8[55]; ctxt->m.b8[53] = tctxt.m.b8[54]; 128 | ctxt->m.b8[54] = tctxt.m.b8[53]; ctxt->m.b8[55] = tctxt.m.b8[52]; 129 | ctxt->m.b8[56] = tctxt.m.b8[59]; ctxt->m.b8[57] = tctxt.m.b8[58]; 130 | ctxt->m.b8[58] = tctxt.m.b8[57]; ctxt->m.b8[59] = tctxt.m.b8[56]; 131 | ctxt->m.b8[60] = tctxt.m.b8[63]; ctxt->m.b8[61] = tctxt.m.b8[62]; 132 | ctxt->m.b8[62] = tctxt.m.b8[61]; ctxt->m.b8[63] = tctxt.m.b8[60]; 133 | #endif 134 | 135 | a = H(0); b = H(1); c = H(2); d = H(3); e = H(4); 136 | 137 | for (t = 0; t < 20; t++) { 138 | s = t & 0x0f; 139 | if (t >= 16) { 140 | W(s) = S(1, W((s+13) & 0x0f) ^ W((s+8) & 0x0f) ^ W((s+2) & 0x0f) ^ W(s)); 141 | } 142 | tmp = S(5, a) + F0(b, c, d) + e + W(s) + K(t); 143 | e = d; d = c; c = S(30, b); b = a; a = tmp; 144 | } 145 | for (t = 20; t < 40; t++) { 146 | s = t & 0x0f; 147 | W(s) = S(1, W((s+13) & 0x0f) ^ W((s+8) & 0x0f) ^ W((s+2) & 0x0f) ^ W(s)); 148 | tmp = S(5, a) + F1(b, c, d) + e + W(s) + K(t); 149 | e = d; d = c; c = S(30, b); b = a; a = tmp; 150 | } 151 | for (t = 40; t < 60; t++) { 152 | s = t & 0x0f; 153 | W(s) = S(1, W((s+13) & 0x0f) ^ W((s+8) & 0x0f) ^ W((s+2) & 0x0f) ^ W(s)); 154 | tmp = S(5, a) + F2(b, c, d) + e + W(s) + K(t); 155 | e = d; d = c; c = S(30, b); b = a; a = tmp; 156 | } 157 | for (t = 60; t < 80; t++) { 158 | s = t & 0x0f; 159 | W(s) = S(1, W((s+13) & 0x0f) ^ W((s+8) & 0x0f) ^ W((s+2) & 0x0f) ^ W(s)); 160 | tmp = S(5, a) + F3(b, c, d) + e + W(s) + K(t); 161 | e = d; d = c; c = S(30, b); b = a; a = tmp; 162 | } 163 | 164 | H(0) = H(0) + a; 165 | H(1) = H(1) + b; 166 | H(2) = H(2) + c; 167 | H(3) = H(3) + d; 168 | H(4) = H(4) + e; 169 | 170 | bzero(&ctxt->m.b8[0], 64); 171 | } 172 | 173 | void 174 | sha1_init(struct sha1_ctxt *ctxt) 175 | { 176 | bzero(ctxt, sizeof(struct sha1_ctxt)); 177 | H(0) = 0x67452301; 178 | H(1) = 0xefcdab89; 179 | H(2) = 0x98badcfe; 180 | H(3) = 0x10325476; 181 | H(4) = 0xc3d2e1f0; 182 | } 183 | 184 | void 185 | sha1_pad(struct sha1_ctxt *ctxt) 186 | { 187 | size_t padlen; /* pad length in bytes */ 188 | size_t padstart; 189 | 190 | PUTPAD(0x80); 191 | 192 | padstart = COUNT % 64; 193 | padlen = 64 - padstart; 194 | if (padlen < 8) { 195 | bzero(&ctxt->m.b8[padstart], padlen); 196 | COUNT += padlen; 197 | COUNT %= 64; 198 | sha1_step(ctxt); 199 | padstart = COUNT % 64; /* should be 0 */ 200 | padlen = 64 - padstart; /* should be 64 */ 201 | } 202 | bzero(&ctxt->m.b8[padstart], padlen - 8); 203 | COUNT += (padlen - 8); 204 | COUNT %= 64; 205 | #ifdef FC_LITTLE_ENDIAN 206 | PUTPAD(ctxt->c.b8[7]); PUTPAD(ctxt->c.b8[6]); 207 | PUTPAD(ctxt->c.b8[5]); PUTPAD(ctxt->c.b8[4]); 208 | PUTPAD(ctxt->c.b8[3]); PUTPAD(ctxt->c.b8[2]); 209 | PUTPAD(ctxt->c.b8[1]); PUTPAD(ctxt->c.b8[0]); 210 | #else 211 | PUTPAD(ctxt->c.b8[0]); PUTPAD(ctxt->c.b8[1]); 212 | PUTPAD(ctxt->c.b8[2]); PUTPAD(ctxt->c.b8[3]); 213 | PUTPAD(ctxt->c.b8[4]); PUTPAD(ctxt->c.b8[5]); 214 | PUTPAD(ctxt->c.b8[6]); PUTPAD(ctxt->c.b8[7]); 215 | #endif 216 | } 217 | 218 | void 219 | sha1_loop(struct sha1_ctxt *ctxt, uint8_t *input, size_t len) 220 | { 221 | size_t gaplen; 222 | size_t gapstart; 223 | size_t off; 224 | size_t copysiz; 225 | 226 | off = 0; 227 | 228 | while (off < len) { 229 | gapstart = COUNT % 64; 230 | gaplen = 64 - gapstart; 231 | 232 | copysiz = (gaplen < len - off) ? gaplen : len - off; 233 | bcopy(&input[off], &ctxt->m.b8[gapstart], copysiz); 234 | COUNT += copysiz; 235 | COUNT %= 64; 236 | ctxt->c.b64[0] += copysiz * 8; 237 | if (COUNT % 64 == 0) 238 | sha1_step(ctxt); 239 | off += copysiz; 240 | } 241 | } 242 | 243 | void 244 | sha1_result(struct sha1_ctxt *ctxt, uint8_t *digest) 245 | { 246 | sha1_pad(ctxt); 247 | #ifdef FC_LITTLE_ENDIAN 248 | digest[0] = ctxt->h.b8[3]; digest[1] = ctxt->h.b8[2]; 249 | digest[2] = ctxt->h.b8[1]; digest[3] = ctxt->h.b8[0]; 250 | digest[4] = ctxt->h.b8[7]; digest[5] = ctxt->h.b8[6]; 251 | digest[6] = ctxt->h.b8[5]; digest[7] = ctxt->h.b8[4]; 252 | digest[8] = ctxt->h.b8[11]; digest[9] = ctxt->h.b8[10]; 253 | digest[10] = ctxt->h.b8[9]; digest[11] = ctxt->h.b8[8]; 254 | digest[12] = ctxt->h.b8[15]; digest[13] = ctxt->h.b8[14]; 255 | digest[14] = ctxt->h.b8[13]; digest[15] = ctxt->h.b8[12]; 256 | digest[16] = ctxt->h.b8[19]; digest[17] = ctxt->h.b8[18]; 257 | digest[18] = ctxt->h.b8[17]; digest[19] = ctxt->h.b8[16]; 258 | #else 259 | bcopy(&ctxt->h.b8[0], digest, 20); 260 | #endif 261 | } 262 | 263 | uint32_t 264 | sha1_hash(uint8_t *md) 265 | { 266 | return ((uint32_t) (md[3] & 0xff) << 24) | 267 | ((uint32_t) (md[2] & 0xff) << 16) | 268 | ((uint32_t) (md[1] & 0xff) << 8) | 269 | (md[0] & 0xff); 270 | } 271 | 272 | void 273 | sha1(uint8_t *d, size_t n, uint8_t *md) 274 | { 275 | struct sha1_ctxt ctxt; 276 | 277 | sha1_init(&ctxt); 278 | sha1_loop(&ctxt, d, n); 279 | sha1_result(&ctxt, md); 280 | } 281 | -------------------------------------------------------------------------------- /src/fc_sha1.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /* 19 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 20 | * All rights reserved. 21 | * 22 | * Redistribution and use in source and binary forms, with or without 23 | * modification, are permitted provided that the following conditions 24 | * are met: 25 | * 1. Redistributions of source code must retain the above copyright 26 | * notice, this list of conditions and the following disclaimer. 27 | * 2. Redistributions in binary form must reproduce the above copyright 28 | * notice, this list of conditions and the following disclaimer in the 29 | * documentation and/or other materials provided with the distribution. 30 | * 3. Neither the name of the project nor the names of its contributors 31 | * may be used to endorse or promote products derived from this software 32 | * without specific prior written permission. 33 | * 34 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 35 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 36 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 37 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 38 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 39 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 40 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 41 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 42 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 43 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 44 | * SUCH DAMAGE. 45 | */ 46 | 47 | /* 48 | * FIPS pub 180-1: Secure Hash Algorithm (SHA-1) 49 | * based on: http://csrc.nist.gov/fips/fip180-1.txt 50 | * implemented by Jun-ichiro itojun Itoh 51 | */ 52 | 53 | #ifndef _FC_SHA1_H_ 54 | #define _FC_SHA1_H_ 55 | 56 | struct sha1_ctxt { 57 | union { 58 | uint8_t b8[20]; 59 | uint32_t b32[5]; 60 | } h; 61 | union { 62 | uint8_t b8[8]; 63 | uint64_t b64[1]; 64 | } c; 65 | union { 66 | uint8_t b8[64]; 67 | uint32_t b32[16]; 68 | } m; 69 | uint8_t count; 70 | }; 71 | 72 | void sha1_init(struct sha1_ctxt *ctxt); 73 | void sha1_pad(struct sha1_ctxt *ctxt); 74 | void sha1_loop(struct sha1_ctxt *ctxt, uint8_t *input, size_t len); 75 | void sha1_result(struct sha1_ctxt *ctxt, uint8_t *digest); 76 | void sha1(uint8_t *d, size_t n, uint8_t *md); 77 | uint32_t sha1_hash(uint8_t *md); 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /src/fc_signal.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | static struct signal signals[] = { 24 | { SIGUSR1, "SIGUSR1", 0, signal_handler }, 25 | { SIGUSR2, "SIGUSR2", 0, signal_handler }, 26 | { SIGTTIN, "SIGTTIN", 0, signal_handler }, 27 | { SIGTTOU, "SIGTTOU", 0, signal_handler }, 28 | { SIGHUP, "SIGHUP", 0, signal_handler }, 29 | { SIGINT, "SIGINT", 0, signal_handler }, 30 | { SIGSEGV, "SIGSEGV", SA_RESETHAND, signal_handler }, 31 | { SIGPIPE, "SIGPIPE", 0, SIG_IGN }, 32 | { 0, NULL, 0, NULL } 33 | }; 34 | 35 | rstatus_t 36 | signal_init(void) 37 | { 38 | struct signal *sig; 39 | 40 | for (sig = signals; sig->signo != 0; sig++) { 41 | rstatus_t status; 42 | struct sigaction sa; 43 | 44 | memset(&sa, 0, sizeof(sa)); 45 | sa.sa_handler = sig->handler; 46 | sa.sa_flags = sig->flags; 47 | sigemptyset(&sa.sa_mask); 48 | 49 | status = sigaction(sig->signo, &sa, NULL); 50 | if (status < 0) { 51 | log_error("sigaction(%s) failed: %s", sig->signame, 52 | strerror(errno)); 53 | return FC_ERROR; 54 | } 55 | } 56 | 57 | return FC_OK; 58 | } 59 | 60 | void 61 | signal_deinit(void) 62 | { 63 | } 64 | 65 | void 66 | signal_handler(int signo) 67 | { 68 | struct signal *sig; 69 | void (*action)(void); 70 | char *actionstr; 71 | bool done; 72 | 73 | for (sig = signals; sig->signo != 0; sig++) { 74 | if (sig->signo == signo) { 75 | break; 76 | } 77 | } 78 | ASSERT(sig->signo != 0); 79 | 80 | actionstr = ""; 81 | action = NULL; 82 | done = false; 83 | 84 | switch (signo) { 85 | case SIGUSR1: 86 | break; 87 | 88 | case SIGUSR2: 89 | break; 90 | 91 | case SIGTTIN: 92 | actionstr = ", up logging level"; 93 | action = log_level_up; 94 | break; 95 | 96 | case SIGTTOU: 97 | actionstr = ", down logging level"; 98 | action = log_level_down; 99 | break; 100 | 101 | case SIGHUP: 102 | actionstr = ", reopening log file"; 103 | action = log_reopen; 104 | break; 105 | 106 | case SIGINT: 107 | done = true; 108 | actionstr = ", exiting"; 109 | break; 110 | 111 | case SIGSEGV: 112 | fc_stacktrace(1); 113 | actionstr = ", core dumping"; 114 | raise(SIGSEGV); 115 | break; 116 | 117 | default: 118 | NOT_REACHED(); 119 | } 120 | 121 | log_debug(LOG_NOTICE, "signal %d (%s) received%s", signo, sig->signame, 122 | actionstr); 123 | 124 | if (action != NULL) { 125 | action(); 126 | } 127 | 128 | if (done) { 129 | exit(1); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/fc_signal.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_SIGNAL_H_ 19 | #define _FC_SIGNAL_H_ 20 | 21 | struct signal { 22 | int signo; 23 | char *signame; 24 | int flags; 25 | void (*handler)(int signo); 26 | }; 27 | 28 | rstatus_t signal_init(void); 29 | void signal_deinit(void); 30 | void signal_handler(int signo); 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /src/fc_slab.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_SLAB_H_ 19 | #define _FC_SLAB_H_ 20 | 21 | struct slab { 22 | uint32_t magic; /* slab magic (const) */ 23 | uint32_t sid; /* slab id */ 24 | uint8_t cid; /* slab class id */ 25 | uint8_t unused[3]; /* unused */ 26 | uint8_t data[1]; /* opaque data */ 27 | }; 28 | 29 | #define SLAB_MAGIC 0xdeadbeef 30 | #define SLAB_HDR_SIZE offsetof(struct slab, data) 31 | #define SLAB_MIN_SIZE ((size_t) MB) 32 | #define SLAB_SIZE MB 33 | #define SLAB_MAX_SIZE ((size_t) (512 * MB)) 34 | 35 | struct slabinfo { 36 | uint32_t sid; /* slab id (const) */ 37 | uint32_t addr; /* address as slab_size offset from memory / disk base */ 38 | TAILQ_ENTRY(slabinfo) tqe; /* link in free q / partial q / full q */ 39 | uint32_t nalloc; /* # item alloced (monotonic) */ 40 | uint32_t nfree; /* # item freed (monotonic) */ 41 | uint8_t cid; /* class id */ 42 | unsigned mem:1; /* memory? */ 43 | }; 44 | 45 | TAILQ_HEAD(slabhinfo, slabinfo); 46 | 47 | struct slabclass { 48 | uint32_t nitem; /* # item per slab (const) */ 49 | size_t size; /* item size (const) */ 50 | size_t slack; /* unusable slack space (const) */ 51 | struct slabhinfo partial_msinfoq; /* partial slabinfo q */ 52 | uint32_t nmslab; /* # memory slab */ 53 | uint32_t ndslab; /* # disk slab */ 54 | uint64_t nevict; /* # eviect time */ 55 | uint64_t nused_item; /* # used item */ 56 | }; 57 | 58 | #define SLABCLASS_MIN_ID 0 59 | #define SLABCLASS_MAX_ID (UCHAR_MAX - 1) 60 | #define SLABCLASS_INVALID_ID UCHAR_MAX 61 | #define SLABCLASS_MAX_IDS UCHAR_MAX 62 | 63 | bool slab_valid_id(uint8_t cid); 64 | size_t slab_data_size(void); 65 | void slab_print(void); 66 | uint8_t slab_cid(size_t size); 67 | 68 | struct item *slab_get_item(uint8_t cid); 69 | void slab_put_item(struct item *it); 70 | struct item *slab_read_item(uint32_t sid, uint32_t addr); 71 | 72 | rstatus_t slab_init(void); 73 | void slab_deinit(void); 74 | 75 | uint32_t slab_msinfo_nalloc(void); 76 | uint32_t slab_msinfo_nfree(void); 77 | uint32_t slab_msinfo_nfull(void); 78 | uint32_t slab_msinfo_npartial(void); 79 | uint32_t slab_dsinfo_nalloc(void); 80 | uint32_t slab_dsinfo_nfree(void); 81 | uint32_t slab_dsinfo_nfull(void); 82 | uint64_t slab_nevict(void); 83 | uint8_t slab_max_cid(void); 84 | uint8_t slab_get_cid(uint32_t sid); 85 | struct slabclass *slab_get_class_by_cid(uint8_t cid); 86 | bool slab_incr_chunks_by_sid(uint32_t sid, int n); 87 | #endif 88 | -------------------------------------------------------------------------------- /src/fc_stats.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | static stats_info st_info; 24 | static stats_info sc_st_info[SLABCLASS_MAX_ID+1]; 25 | struct settings settings; 26 | 27 | buffer * 28 | stats_alloc_buffer(int n) 29 | { 30 | if (n <= 0) n = 128; 31 | 32 | buffer *buf = fc_alloc(sizeof(*buf)); 33 | buf->data = fc_alloc(n); 34 | buf->nused = 0; 35 | buf->nalloc = n; 36 | return buf; 37 | } 38 | 39 | void 40 | stats_dealloc_buffer(buffer *buf) 41 | { 42 | if (!buf) return; 43 | 44 | if (buf->data) fc_free(buf->data); 45 | fc_free(buf); 46 | } 47 | 48 | static void 49 | _stats_append(buffer *buf, uint8_t cid, const char *key, int nkey, const char *val, int nval) 50 | { 51 | int n, new_size, avail, bytes = 0; 52 | uint8_t *new; 53 | 54 | if (!buf) { 55 | return; 56 | } 57 | 58 | n = nkey + nval + 14; // +14 for "STAT :class \r\n" 59 | if (!buf->data) { 60 | buf->data = fc_alloc(n); 61 | buf->nused = 0; 62 | buf->nalloc = n; 63 | } else if (buf->nalloc - buf->nused < n) { 64 | new_size = buf->nalloc > n ? buf->nalloc * 2 : buf->nalloc + 2 * n; 65 | new = fc_realloc(buf->data, new_size); 66 | if (new == NULL) { 67 | return; 68 | } 69 | buf->data = new; 70 | buf->nalloc = new_size; 71 | } 72 | 73 | avail = buf->nalloc - buf->nused; 74 | if (nkey == 0 && nval == 0) { 75 | bytes = fc_snprintf(buf->data + buf->nused, avail - 1, "END\r\n"); 76 | } else if (nval == 0) { 77 | if (cid == SLABCLASS_INVALID_ID) { 78 | bytes = fc_snprintf(buf->data + buf->nused, avail - 1, "STAT %s\r\n", key); 79 | } else { 80 | bytes = fc_snprintf(buf->data + buf->nused, avail - 1, "STAT %u:%s\r\n", cid, key); 81 | } 82 | } else if (nkey > 0 && nval > 0) { 83 | if (cid == SLABCLASS_INVALID_ID) { 84 | bytes = fc_snprintf(buf->data + buf->nused, avail - 1, "STAT %s %s\r\n", key, val); 85 | } else { 86 | bytes = fc_snprintf(buf->data + buf->nused, avail - 1, "STAT %u:%s %s\r\n", cid, key, val); 87 | } 88 | } 89 | buf->nused += bytes; 90 | buf->data[buf->nused] = '\0'; 91 | } 92 | 93 | void 94 | stats_append(buffer *buf, uint8_t cid, const char*name, const char *fmt, ...) 95 | { 96 | int n; 97 | va_list ap; 98 | char val[128]; 99 | 100 | if (name && fmt) { 101 | va_start(ap, fmt); 102 | n = vsnprintf(val, sizeof(val) - 1, fmt, ap); 103 | va_end(ap); 104 | _stats_append(buf, cid, name, strlen(name), val, n); 105 | } else { 106 | _stats_append(buf, cid, NULL, 0, NULL, 0); 107 | } 108 | } 109 | 110 | uint64_t 111 | stats_get(uint8_t cid, msg_type_t type, int is_miss) 112 | { 113 | stats_info *info; 114 | 115 | info = cid == SLABCLASS_INVALID_ID ? &st_info : &sc_st_info[cid]; 116 | 117 | switch(type) { 118 | case MSG_REQ_GET: 119 | case MSG_REQ_GETS: 120 | return is_miss? info->get - info->get_hits : info->get; 121 | case MSG_REQ_SET: 122 | case MSG_REQ_ADD: 123 | case MSG_REQ_APPEND: 124 | case MSG_REQ_PREPEND: 125 | case MSG_REQ_REPLACE: 126 | return info->set; 127 | case MSG_REQ_DELETE: 128 | return is_miss ? info->del - info->del_hits : info->del; 129 | case MSG_REQ_INCR: 130 | return is_miss ? info->incr - info->incr_hits : info->incr; 131 | case MSG_REQ_DECR: 132 | return is_miss ? info->decr - info->decr_hits : info->decr; 133 | case MSG_REQ_CAS: 134 | return is_miss ? info->cas - info->cas_hits : info->cas; 135 | default: 136 | return 0; 137 | } 138 | } 139 | 140 | void 141 | stats_incr(uint8_t cid, msg_type_t type, int is_hit) 142 | { 143 | stats_info *info; 144 | 145 | info = cid == SLABCLASS_INVALID_ID ? &st_info : &sc_st_info[cid]; 146 | 147 | switch(type) { 148 | case MSG_REQ_GET: 149 | case MSG_REQ_GETS: 150 | is_hit ? info->get_hits++ : info->get++; 151 | break; 152 | case MSG_REQ_SET: 153 | case MSG_REQ_ADD: 154 | case MSG_REQ_APPEND: 155 | case MSG_REQ_PREPEND: 156 | case MSG_REQ_REPLACE: 157 | info->set++; 158 | break; 159 | case MSG_REQ_DELETE: 160 | is_hit ? info->del_hits++ : info->del++; 161 | break; 162 | case MSG_REQ_INCR: 163 | is_hit ? info->incr_hits++ : info->incr++; 164 | break; 165 | case MSG_REQ_DECR: 166 | is_hit ? info->decr_hits++ : info->decr++; 167 | break; 168 | case MSG_REQ_CAS: 169 | is_hit ? info->cas_hits++ : info->cas++; 170 | break; 171 | default: 172 | break; 173 | } 174 | } 175 | 176 | buffer* 177 | stats_server(void) 178 | { 179 | buffer *stats_buf; 180 | 181 | stats_buf = stats_alloc_buffer(1024); 182 | if (stats_buf == NULL) { 183 | return NULL; 184 | } 185 | 186 | APPEND_STAT(stats_buf, "pid", "%u", getpid()); 187 | APPEND_STAT(stats_buf, "uptime", "%u", time_started()); 188 | APPEND_STAT(stats_buf, "version", "%s", FC_VERSION_STRING); 189 | APPEND_STAT(stats_buf, "pointer_size", "%u", sizeof(void*)); 190 | APPEND_STAT(stats_buf, "curr_connection", "%u", conn_nused()); 191 | APPEND_STAT(stats_buf, "free_connection", "%u", conn_nfree()); 192 | APPEND_STAT(stats_buf, "total_connection", "%u", conn_total()); 193 | APPEND_STAT(stats_buf, "cmd_get", "%llu", STATS_GET(MSG_REQ_GET)); 194 | APPEND_STAT(stats_buf, "cmd_get_miss", "%llu", STATS_GET_MISS(MSG_REQ_GET)); 195 | APPEND_STAT(stats_buf, "cmd_set", "%llu", STATS_GET(MSG_REQ_SET)); 196 | APPEND_STAT(stats_buf, "cmd_del", "%llu", STATS_GET(MSG_REQ_DELETE)); 197 | APPEND_STAT(stats_buf, "cmd_del_miss", "%llu", STATS_GET_MISS(MSG_REQ_DELETE)); 198 | APPEND_STAT(stats_buf, "cmd_decr", "%llu", STATS_GET(MSG_REQ_DECR)); 199 | APPEND_STAT(stats_buf, "cmd_decr_miss", "%llu", STATS_GET_MISS(MSG_REQ_DECR)); 200 | APPEND_STAT(stats_buf, "cmd_incr", "%llu", STATS_GET(MSG_REQ_INCR)); 201 | APPEND_STAT(stats_buf, "cmd_incr_miss", "%llu", STATS_GET_MISS(MSG_REQ_INCR)); 202 | APPEND_STAT(stats_buf, "cmd_cas", "%llu", STATS_GET(MSG_REQ_CAS)); 203 | APPEND_STAT(stats_buf, "cmd_cas_miss", "%llu", STATS_GET_MISS(MSG_REQ_CAS)); 204 | APPEND_STAT(stats_buf, "alloc_itemx", "%llu", itemx_nalloc()); 205 | APPEND_STAT(stats_buf, "free_itemx", "%llu", itemx_nfree()); 206 | APPEND_STAT(stats_buf, "total_mem_slab", "%u", slab_msinfo_nalloc()); 207 | APPEND_STAT(stats_buf, "free_mem_slab", "%u", slab_msinfo_nfree()); 208 | APPEND_STAT(stats_buf, "full_mem_slab", "%u", slab_msinfo_nfull()); 209 | APPEND_STAT(stats_buf, "partial_mem_slab", "%u", slab_msinfo_npartial()); 210 | APPEND_STAT(stats_buf, "total_disk_slab", "%u", slab_dsinfo_nalloc()); 211 | APPEND_STAT(stats_buf, "free_disk_slab", "%u", slab_dsinfo_nfree()); 212 | APPEND_STAT(stats_buf, "full_disk_slab", "%u", slab_dsinfo_nfull()); 213 | APPEND_STAT(stats_buf, "evict_time", "%llu", slab_nevict()); 214 | APPEND_STAT_END(stats_buf); 215 | 216 | return stats_buf; 217 | } 218 | 219 | buffer* 220 | stats_slabs(void) 221 | { 222 | buffer *stats_buf; 223 | uint8_t cid, max_cid; 224 | uint64_t nget, nset, ndel, nincr, ndecr, ncas ; 225 | struct slabclass *sc; 226 | 227 | stats_buf = stats_alloc_buffer(512); 228 | if (stats_buf == NULL) { 229 | return NULL; 230 | } 231 | 232 | max_cid = slab_max_cid(); 233 | for (cid = SLABCLASS_MIN_ID; cid < max_cid; cid++) { 234 | sc = slab_get_class_by_cid(cid); 235 | if (!sc) continue; 236 | 237 | nget = SC_STATS_GET(cid, MSG_REQ_GET); 238 | nset = SC_STATS_GET(cid, MSG_REQ_SET); 239 | ndel = SC_STATS_GET(cid, MSG_REQ_DELETE); 240 | ndecr = SC_STATS_GET(cid, MSG_REQ_DECR); 241 | nincr = SC_STATS_GET(cid, MSG_REQ_INCR); 242 | ncas = SC_STATS_GET(cid, MSG_REQ_CAS); 243 | if (sc->nmslab == 0 && sc->ndslab == 0 && sc->nevict == 0 && nget == 0 244 | && nset == 0 && ndel == 0 && ndecr == 0 && nincr == 0 && ncas == 0) { 245 | continue; 246 | } 247 | SC_APPEND_STAT(stats_buf, cid, "used_chunks", "%u", sc->nused_item); 248 | SC_APPEND_STAT(stats_buf, cid, "chunk_size", "%u", sc->size); 249 | SC_APPEND_STAT(stats_buf, cid, "chunks_per_slab", "%u", sc->nitem); 250 | SC_APPEND_STAT(stats_buf, cid, "slack", "%u", sc->slack); 251 | SC_APPEND_STAT(stats_buf, cid, "total_mem_slab", "%u", sc->nmslab); 252 | SC_APPEND_STAT(stats_buf, cid, "total_disk_slab", "%u", sc->ndslab); 253 | SC_APPEND_STAT(stats_buf, cid, "total_evict_time", "%lu", sc->nevict); 254 | SC_APPEND_STAT(stats_buf, cid, "cmd_get", "%llu", nget); 255 | SC_APPEND_STAT(stats_buf, cid, "cmd_set", "%llu", nset); 256 | SC_APPEND_STAT(stats_buf, cid, "cmd_del", "%llu", ndel); 257 | SC_APPEND_STAT(stats_buf, cid, "cmd_decr", "%llu", ndecr); 258 | SC_APPEND_STAT(stats_buf, cid, "cmd_incr", "%llu", nincr); 259 | SC_APPEND_STAT(stats_buf, cid, "cmd_cas", "%llu", ncas); 260 | } 261 | APPEND_STAT_END(stats_buf); 262 | 263 | return stats_buf; 264 | } 265 | 266 | buffer* 267 | stats_settings(void) 268 | { 269 | buffer *stats_buf; 270 | 271 | stats_buf = stats_alloc_buffer(256); 272 | if (stats_buf == NULL) { 273 | return NULL; 274 | } 275 | 276 | APPEND_STAT(stats_buf, "addr", "%s", settings.addr); 277 | APPEND_STAT(stats_buf, "port", "%d", settings.port); 278 | APPEND_STAT(stats_buf, "hash_power", "%d", settings.hash_power); 279 | APPEND_STAT(stats_buf, "factor", "%f", settings.factor); 280 | APPEND_STAT(stats_buf, "max_slab_memory", "%u", settings.max_slab_memory); 281 | APPEND_STAT(stats_buf, "max_index_memory", "%u", settings.max_index_memory); 282 | APPEND_STAT(stats_buf, "chunk_size", "%u", settings.chunk_size); 283 | APPEND_STAT(stats_buf, "max_chunk_size", "%u", settings.max_chunk_size); 284 | APPEND_STAT(stats_buf, "slab_size", "%u", settings.slab_size); 285 | APPEND_STAT(stats_buf, "ssd_device", "%s", settings.ssd_device); 286 | APPEND_STAT(stats_buf, "server_id", "%u", settings.server_id); 287 | APPEND_STAT(stats_buf, "server_count", "%u", settings.server_n); 288 | APPEND_STAT_END(stats_buf); 289 | 290 | return stats_buf; 291 | } 292 | -------------------------------------------------------------------------------- /src/fc_stats.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_STATS_H_ 19 | #define _FC_STATS_H_ 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | typedef struct { 26 | uint8_t *data; 27 | uint32_t nused; 28 | uint32_t nalloc; 29 | } buffer; 30 | 31 | typedef struct { 32 | uint64_t get; 33 | uint64_t get_hits; 34 | uint64_t set; 35 | uint64_t del; 36 | uint64_t del_hits; 37 | uint64_t incr; 38 | uint64_t incr_hits; 39 | uint64_t decr; 40 | uint64_t decr_hits; 41 | uint64_t cas; 42 | uint64_t cas_hits; 43 | } stats_info; 44 | 45 | 46 | #define STATS_INCR(type) stats_incr(SLABCLASS_INVALID_ID, type, 0) 47 | #define SC_STATS_INCR(cid, type) stats_incr(cid, type, 0) 48 | #define STATS_HIT_INCR(type) stats_incr(SLABCLASS_INVALID_ID, type, 1) 49 | 50 | #define STATS_GET(type) stats_get(SLABCLASS_INVALID_ID, type, 0) 51 | #define SC_STATS_GET(cid, type) stats_get(cid, type, 0) 52 | #define STATS_GET_MISS(type) stats_get(SLABCLASS_INVALID_ID, type, 1) 53 | 54 | #define APPEND_STAT(b, name, fmt, val) \ 55 | stats_append(b, SLABCLASS_INVALID_ID, name, fmt, val) 56 | #define SC_APPEND_STAT(b, cid, name, fmt, val) \ 57 | stats_append(b, cid, name, fmt, val) 58 | #define APPEND_STAT_END(b) \ 59 | stats_append(b, SLABCLASS_INVALID_ID, NULL, 0, NULL, 0) 60 | 61 | buffer *stats_alloc_buffer(int n); 62 | void stats_dealloc_buffer(buffer *buf); 63 | void stats_append(buffer *buf, uint8_t cid, const char*name, const char *fmt, ...); 64 | void stats_incr(uint8_t cid, msg_type_t type, int is_hit); 65 | uint64_t stats_get(uint8_t cid, msg_type_t type, int is_miss); 66 | buffer *stats_server(void); 67 | buffer *stats_slabs(void); 68 | buffer *stats_settings(void); 69 | #endif 70 | -------------------------------------------------------------------------------- /src/fc_string.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | /* 24 | * String (struct string) is a sequence of unsigned char objects terminated 25 | * by the null character '\0'. The length of the string is pre-computed and 26 | * made available explicitly as an additional field. This means that we don't 27 | * have to walk the entire character sequence until the null terminating 28 | * character everytime that the length of the String is requested 29 | * 30 | * The only way to create a String is to initialize it using, string_init() 31 | * and duplicate an existing String - string_duplicate() or copy an existing 32 | * raw sequence of character bytes - string_copy(). Such String's must be 33 | * freed using string_deinit() 34 | * 35 | * We can also create String as reference to raw string - string_set_raw() 36 | * or to text string - string_set_text() or string(). Such String don't have 37 | * to be freed. 38 | */ 39 | 40 | void 41 | string_init(struct string *str) 42 | { 43 | str->len = 0; 44 | str->data = NULL; 45 | } 46 | 47 | void 48 | string_deinit(struct string *str) 49 | { 50 | ASSERT((str->len == 0 && str->data == NULL) || 51 | (str->len != 0 && str->data != NULL)); 52 | 53 | if (str->data != NULL) { 54 | fc_free(str->data); 55 | string_init(str); 56 | } 57 | } 58 | 59 | bool 60 | string_empty(const struct string *str) 61 | { 62 | ASSERT((str->len == 0 && str->data == NULL) || 63 | (str->len != 0 && str->data != NULL)); 64 | return str->len == 0 ? true : false; 65 | } 66 | 67 | rstatus_t 68 | string_duplicate(struct string *dst, const struct string *src) 69 | { 70 | ASSERT(dst->len == 0 && dst->data == NULL); 71 | ASSERT(src->len != 0 && src->data != NULL); 72 | 73 | dst->data = fc_strndup(src->data, src->len + 1); 74 | if (dst->data == NULL) { 75 | return FC_ENOMEM; 76 | } 77 | 78 | dst->len = src->len; 79 | dst->data[dst->len] = '\0'; 80 | 81 | return FC_OK; 82 | } 83 | 84 | rstatus_t 85 | string_copy(struct string *dst, const uint8_t *src, uint32_t srclen) 86 | { 87 | ASSERT(dst->len == 0 && dst->data == NULL); 88 | ASSERT(src != NULL && srclen != 0); 89 | 90 | dst->data = fc_strndup(src, srclen + 1); 91 | if (dst->data == NULL) { 92 | return FC_ENOMEM; 93 | } 94 | 95 | dst->len = srclen; 96 | dst->data[dst->len] = '\0'; 97 | 98 | return FC_OK; 99 | } 100 | 101 | int 102 | string_compare(const struct string *s1, const struct string *s2) 103 | { 104 | if (s1->len != s2->len) { 105 | return s1->len - s2->len > 0 ? 1 : -1; 106 | } 107 | 108 | return fc_strncmp(s1->data, s2->data, s1->len); 109 | } 110 | -------------------------------------------------------------------------------- /src/fc_string.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_STRING_H_ 19 | #define _FC_STRING_H_ 20 | 21 | #include 22 | #include 23 | 24 | struct string { 25 | uint32_t len; /* string length */ 26 | uint8_t *data; /* string data */ 27 | }; 28 | 29 | #define string(_str) { sizeof(_str) - 1, (uint8_t *)(_str) } 30 | #define null_string { 0, NULL } 31 | 32 | #define string_set_text(_str, _text) do { \ 33 | (_str)->len = (uint32_t)(sizeof(_text) - 1);\ 34 | (_str)->data = (uint8_t *)(_text); \ 35 | } while (0); 36 | 37 | #define string_set_raw(_str, _raw) do { \ 38 | (_str)->len = (uint32_t)(fc_strlen(_raw)); \ 39 | (_str)->data = (uint8_t *)(_raw); \ 40 | } while (0); 41 | 42 | void string_init(struct string *str); 43 | void string_deinit(struct string *str); 44 | bool string_empty(const struct string *str); 45 | rstatus_t string_duplicate(struct string *dst, const struct string *src); 46 | rstatus_t string_copy(struct string *dst, const uint8_t *src, uint32_t srclen); 47 | int string_compare(const struct string *s1, const struct string *s2); 48 | 49 | /* 50 | * Wrapper around common routines for manipulating C character 51 | * strings 52 | */ 53 | #define fc_memcpy(_d, _c, _n) \ 54 | memcpy(_d, _c, (size_t)(_n)) 55 | 56 | #define fc_memmove(_d, _c, _n) \ 57 | memmove(_d, _c, (size_t)(_n)) 58 | 59 | #define fc_memchr(_d, _c, _n) \ 60 | memchr(_d, _c, (size_t)(_n)) 61 | 62 | #define fc_strlen(_s) \ 63 | strlen((char *)(_s)) 64 | 65 | #define fc_strncmp(_s1, _s2, _n) \ 66 | strncmp((char *)(_s1), (char *)(_s2), (size_t)(_n)) 67 | 68 | #define fc_strchr(_p, _l, _c) \ 69 | _fc_strchr((uint8_t *)(_p), (uint8_t *)(_l), (uint8_t)(_c)) 70 | 71 | #define fc_strrchr(_p, _s, _c) \ 72 | _fc_strrchr((uint8_t *)(_p),(uint8_t *)(_s), (uint8_t)(_c)) 73 | 74 | #define fc_strndup(_s, _n) \ 75 | (uint8_t *)strndup((char *)(_s), (size_t)(_n)); 76 | 77 | #define fc_snprintf(_s, _n, ...) \ 78 | snprintf((char *)(_s), (size_t)(_n), __VA_ARGS__) 79 | 80 | #define fc_scnprintf(_s, _n, ...) \ 81 | _scnprintf((char *)(_s), (size_t)(_n), __VA_ARGS__) 82 | 83 | #define fc_vscnprintf(_s, _n, _f, _a) \ 84 | _vscnprintf((char *)(_s), (size_t)(_n), _f, _a) 85 | 86 | static inline uint8_t * 87 | _fc_strchr(uint8_t *p, uint8_t *last, uint8_t c) 88 | { 89 | while (p < last) { 90 | if (*p == c) { 91 | return p; 92 | } 93 | p++; 94 | } 95 | 96 | return NULL; 97 | } 98 | 99 | static inline uint8_t * 100 | _fc_strrchr(uint8_t *p, uint8_t *start, uint8_t c) 101 | { 102 | while (p >= start) { 103 | if (*p == c) { 104 | return p; 105 | } 106 | p--; 107 | } 108 | 109 | return NULL; 110 | } 111 | 112 | #endif 113 | -------------------------------------------------------------------------------- /src/fc_time.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | 20 | extern struct settings settings; 21 | 22 | /* 23 | * From memcache protocol specification: 24 | * 25 | * Some commands involve a client sending some kind of expiration time 26 | * (relative to an item or to an operation requested by the client) to 27 | * the server. In all such cases, the actual value sent may either be 28 | * Unix time (number of seconds since January 1, 1970, as a 32-bit 29 | * value), or a number of seconds starting from current time. In the 30 | * latter case, this number of seconds may not exceed 60*60*24*30 (number 31 | * of seconds in 30 days); if the number sent by a client is larger than 32 | * that, the server will consider it to be real Unix time value rather 33 | * than an offset from current time. 34 | */ 35 | #define TIME_MAXDELTA (time_t)(60 * 60 * 24 * 30) 36 | 37 | /* 38 | * Time when process was started expressed as absolute unix timestamp 39 | * with a time_t type 40 | */ 41 | static time_t process_started; 42 | 43 | /* 44 | * We keep a cache of the current time of day in a global variable now 45 | * that is updated periodically by a timer event every second. This 46 | * saves us a bunch of time() system calls because we really only need 47 | * to get the time once a second, whereas there can be tens of thosands 48 | * of requests a second. 49 | * 50 | * Also keeping track of time as relative to server-start timestamp 51 | * instead of absolute unix timestamps gives us a space savings on 52 | * systems where sizeof(time_t) > sizeof(unsigned int) 53 | * 54 | * So, now actually holds 32-bit seconds since the server start time. 55 | */ 56 | static volatile rel_time_t now; 57 | 58 | void 59 | time_update(void) 60 | { 61 | int status; 62 | struct timeval timer; 63 | 64 | status = gettimeofday(&timer, NULL); 65 | if (status < 0) { 66 | log_error("gettimeofday failed: %s", strerror(errno)); 67 | } 68 | now = (rel_time_t) (timer.tv_sec - process_started); 69 | 70 | log_debug(LOG_PVERB, "time updated to %u", now); 71 | } 72 | 73 | rel_time_t 74 | time_now(void) 75 | { 76 | return now; 77 | } 78 | 79 | time_t 80 | time_now_abs(void) 81 | { 82 | return process_started + (time_t)now; 83 | } 84 | 85 | time_t 86 | time_started(void) 87 | { 88 | return process_started; 89 | } 90 | 91 | /* 92 | * Given time value that's either unix time or delta from current unix 93 | * time, return the time relative to process start. 94 | */ 95 | rel_time_t 96 | time_reltime(time_t exptime) 97 | { 98 | if (exptime == 0) { /* 0 means never expire */ 99 | return 0; 100 | } 101 | 102 | if (exptime > TIME_MAXDELTA) { 103 | /* 104 | * If item expiration is at or before the server_started, give 105 | * it an expiration time of 1 second after the server started 106 | * becasue because 0 means don't expire. Without this, we would 107 | * underflow and wrap around to some large value way in the 108 | * future, effectively making items expiring in the past 109 | * really expiring never 110 | */ 111 | if (exptime <= process_started) { 112 | return (rel_time_t)1; 113 | } 114 | 115 | return (rel_time_t)(exptime - process_started); 116 | } else { 117 | return (rel_time_t)(exptime + now); 118 | } 119 | } 120 | 121 | static void * 122 | time_loop(void *arg) 123 | { 124 | struct epoll_event event; /* dummy event */ 125 | int ep; /* epoll descriptor */ 126 | int n; /* return status */ 127 | 128 | ep = epoll_create(10); 129 | if (ep < 0) { 130 | log_error("epoll create failed: %s", strerror(errno)); 131 | return NULL; 132 | } 133 | 134 | for (;;) { 135 | n = epoll_wait(ep, &event, 1, 1000); 136 | if (n < 0) { 137 | if (errno == EINTR) { 138 | continue; 139 | } 140 | log_error("epoll wait on e %d failed: %s", ep, strerror(errno)); 141 | break; 142 | } 143 | 144 | if (n == 0) { 145 | time_update(); 146 | continue; 147 | } 148 | } 149 | 150 | return NULL; 151 | } 152 | 153 | rstatus_t 154 | time_init(void) 155 | { 156 | int status; 157 | pthread_t tid; 158 | 159 | /* 160 | * Make the time we started always be 2 seconds before we really 161 | * did, so time_now(0) - time.started is never zero. If so, things 162 | * like 'settings.oldest_live' which act as booleans as well as 163 | * values are now false in boolean context. 164 | */ 165 | process_started = time(NULL) - 2; 166 | 167 | log_debug(LOG_DEBUG, "process started at %"PRId64, (int64_t)process_started); 168 | 169 | status = pthread_create(&tid, NULL, time_loop, NULL); 170 | if (status != 0) { 171 | log_error("stats aggregator create failed: %s", strerror(status)); 172 | return FC_ERROR; 173 | } 174 | 175 | return FC_OK; 176 | } 177 | 178 | void 179 | time_deinit(void) 180 | { 181 | } 182 | -------------------------------------------------------------------------------- /src/fc_time.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_TIME_H_ 19 | #define _FC_TIME_H_ 20 | 21 | #include 22 | 23 | /* 24 | * Time relative to server start time in seconds. 25 | * 26 | * On systems where size(time_t) > sizeof(unsigned int), this gives 27 | * us space savings over tracking absolute unix time of type time_t 28 | */ 29 | typedef unsigned int rel_time_t; 30 | 31 | void time_update(void); 32 | rel_time_t time_now(void); 33 | time_t time_now_abs(void); 34 | time_t time_started(void); 35 | rel_time_t time_reltime(time_t exptime); 36 | 37 | rstatus_t time_init(void); 38 | void time_deinit(void); 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /src/fc_util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fatcache - memcache on ssd. 3 | * Copyright (C) 2013 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _FC_UTIL_H_ 19 | #define _FC_UTIL_H_ 20 | 21 | #include 22 | 23 | #define LF (uint8_t) 10 24 | #define CR (uint8_t) 13 25 | #define CRLF "\r\n" 26 | #define CRLF_LEN (uint32_t) (sizeof(CRLF) - 1) 27 | 28 | #define NELEMS(a) ((sizeof(a)) / sizeof((a)[0])) 29 | 30 | #define KB (1024) 31 | #define MB (1024 * KB) 32 | #define GB (1024 * MB) 33 | 34 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) 35 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) 36 | 37 | #define SQUARE(d) ((d) * (d)) 38 | #define VAR(s, s2, n) (((n) < 2) ? 0.0 : ((s2) - SQUARE(s)/(n)) / ((n) - 1)) 39 | #define STDDEV(s, s2, n) (((n) < 2) ? 0.0 : sqrt(VAR((s), (s2), (n)))) 40 | 41 | #define FC_INET4_ADDRSTRLEN (sizeof("255.255.255.255") - 1) 42 | #define FC_INET6_ADDRSTRLEN \ 43 | (sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255") - 1) 44 | #define FC_INET_ADDRSTRLEN MAX(FC_INET4_ADDRSTRLEN, FC_INET6_ADDRSTRLEN) 45 | #define FC_UNIX_ADDRSTRLEN \ 46 | (sizeof(struct sockaddr_un) - offsetof(struct sockaddr_un, sun_path)) 47 | 48 | #define FC_MAXHOSTNAMELEN 256 49 | 50 | /* 51 | * Length of 1 byte, 2 bytes, 4 bytes, 8 bytes and largest integral 52 | * type (uintmax_t) in ascii, including the null terminator '\0' 53 | * 54 | * From stdint.h, we have: 55 | * # define UINT8_MAX (255) 56 | * # define UINT16_MAX (65535) 57 | * # define UINT32_MAX (4294967295U) 58 | * # define UINT64_MAX (__UINT64_C(18446744073709551615)) 59 | */ 60 | #define FC_UINT8_MAXLEN (3 + 1) 61 | #define FC_UINT16_MAXLEN (5 + 1) 62 | #define FC_UINT32_MAXLEN (10 + 1) 63 | #define FC_UINT64_MAXLEN (20 + 1) 64 | #define FC_UINTMAX_MAXLEN FC_UINT64_MAXLEN 65 | 66 | /* timeval to seconds */ 67 | #define TV_TO_SEC(_tv) ((_tv)->tv_sec + (1e-6 * (_tv)->tv_usec)) 68 | 69 | /* 70 | * Make data 'd' or pointer 'p', n-byte aligned, where n is a power of 2 71 | * of 2. 72 | */ 73 | #define FC_ALIGNMENT sizeof(unsigned long) /* platform word */ 74 | #define FC_ALIGN(d, n) ((size_t)(((d) + (n - 1)) & ~(n - 1))) 75 | #define FC_ALIGN_PTR(p, n) \ 76 | (void *) (((uintptr_t) (p) + ((uintptr_t) n - 1)) & ~((uintptr_t) n - 1)) 77 | 78 | /* 79 | * Return 'x' rounded up to the nearest multiple of 'step'. Only valid 80 | * for x >= 0, step >= 1. 81 | */ 82 | #define ROUND_UP(x, step) (((x) + (step) - 1) / (step) * (step)) 83 | 84 | /* 85 | * Return 'x' rounded down to the nearest multiple of step. Only valid 86 | * for x >= 0, step >= 1. 87 | */ 88 | #define ROUND_DOWN(x, step) ((x) / (step) * (step)) 89 | 90 | /* 91 | * Memory allocation and free wrappers. 92 | * 93 | * These wrappers enables us to loosely detect double free, dangling 94 | * pointer access and zero-byte alloc. 95 | */ 96 | #define fc_alloc(_s) \ 97 | _fc_alloc((size_t)(_s), __FILE__, __LINE__) 98 | 99 | #define fc_zalloc(_s) \ 100 | _fc_zalloc((size_t)(_s), __FILE__, __LINE__) 101 | 102 | #define fc_calloc(_n, _s) \ 103 | _fc_calloc((size_t)(_n), (size_t)(_s), __FILE__, __LINE__) 104 | 105 | #define fc_realloc(_p, _s) \ 106 | _fc_realloc(_p, (size_t)(_s), __FILE__, __LINE__) 107 | 108 | #define fc_free(_p) do { \ 109 | _fc_free(_p, __FILE__, __LINE__); \ 110 | (_p) = NULL; \ 111 | } while (0) 112 | 113 | #define fc_mmap(_s) \ 114 | _fc_mmap((size_t)(_s), __FILE__, __LINE__) 115 | 116 | #define fc_munmap(_p, _s) \ 117 | _fc_munmap(_p, (size_t)(_s), __FILE__, __LINE__) 118 | 119 | void *_fc_alloc(size_t size, const char *name, int line); 120 | void *_fc_zalloc(size_t size, const char *name, int line); 121 | void *_fc_calloc(size_t nmemb, size_t size, const char *name, int line); 122 | void *_fc_realloc(void *ptr, size_t size, const char *name, int line); 123 | void _fc_free(void *ptr, const char *name, int line); 124 | void *_fc_mmap(size_t size, const char *name, int line); 125 | int _fc_munmap(void *p, size_t size, const char *name, int line); 126 | 127 | /* 128 | * Wrapper to workaround well known, safe, implicit type conversion when 129 | * invoking system calls. 130 | */ 131 | #define fc_gethostname(_name, _len) \ 132 | gethostname((char *)_name, (size_t)_len) 133 | 134 | #define fc_atoi(_line, _n) \ 135 | _fc_atoi((uint8_t *)_line, (size_t)_n) 136 | 137 | #define fc_atou32(_line, _n, _u32) \ 138 | _fc_atou32((uint8_t *)_line, (size_t)_n, _u32) 139 | 140 | #define fc_atou64(_line, _n, _u64) \ 141 | _fc_atou64((uint8_t *)_line, (size_t)_n, _u64) 142 | 143 | int _fc_atoi(uint8_t *line, size_t n); 144 | rstatus_t _fc_atou32(uint8_t *line, size_t n, uint32_t *u32); 145 | rstatus_t _fc_atou64(uint8_t *line, size_t n, uint64_t *u64); 146 | bool fc_valid_port(int n); 147 | 148 | int fc_set_blocking(int sd); 149 | int fc_set_nonblocking(int sd); 150 | int fc_set_directio(int fd); 151 | int fc_set_reuseaddr(int sd); 152 | int fc_set_tcpnodelay(int sd); 153 | int fc_set_keepalive(int sd); 154 | int fc_set_linger(int sd, int timeout); 155 | int fc_unset_linger(int sd); 156 | int fc_set_sndbuf(int sd, int size); 157 | int fc_set_rcvbuf(int sd, int size); 158 | int fc_get_soerror(int sd); 159 | int fc_get_sndbuf(int sd); 160 | int fc_get_rcvbuf(int sd); 161 | void fc_maximize_sndbuf(int sd); 162 | int64_t fc_usec_now(void); 163 | rstatus_t fc_device_size(const char *path, size_t *size); 164 | 165 | /* 166 | * Wrappers to read or write data to/from (multiple) buffers 167 | * to a file or socket descriptor. 168 | */ 169 | #define fc_read(_d, _b, _n) \ 170 | read(_d, _b, (size_t)(_n)) 171 | 172 | #define fc_readv(_d, _b, _n) \ 173 | readv(_d, _b, (int)(_n)) 174 | 175 | #define fc_write(_d, _b, _n) \ 176 | write(_d, _b, (size_t)(_n)) 177 | 178 | #define fc_writev(_d, _b, _n) \ 179 | writev(_d, _b, (int)(_n)) 180 | 181 | /* 182 | * Wrappers around strtoull, strtoll, strtoul, strtol that are safer and 183 | * easier to use. Returns true if conversion succeeds. 184 | */ 185 | bool fc_strtoull(const char *str, uint64_t *out); 186 | bool fc_strtoll(const char *str, int64_t *out); 187 | bool fc_strtoul(const char *str, uint32_t *out); 188 | bool fc_strtol(const char *str, int32_t *out); 189 | bool fc_str2oct(const char *str, int32_t *out); 190 | 191 | /* 192 | * Wrappers for defining custom assert based on whether macro 193 | * FC_ASSERT_PANIC or FC_ASSERT_LOG was defined at the moment 194 | * ASSERT was called. 195 | */ 196 | #if defined FC_ASSERT_PANIC && FC_ASSERT_PANIC == 1 197 | 198 | #define ASSERT(_x) do { \ 199 | if (!(_x)) { \ 200 | fc_assert(#_x, __FILE__, __LINE__, 1); \ 201 | } \ 202 | } while (0) 203 | 204 | #define NOT_REACHED() ASSERT(0) 205 | 206 | #elif defined FC_ASSERT_LOG && FC_ASSERT_LOG == 1 207 | 208 | #define ASSERT(_x) do { \ 209 | if (!(_x)) { \ 210 | fc_assert(#_x, __FILE__, __LINE__, 0); \ 211 | } \ 212 | } while (0) 213 | 214 | #define NOT_REACHED() ASSERT(0) 215 | 216 | #else 217 | 218 | #define ASSERT(_x) 219 | 220 | #define NOT_REACHED() 221 | 222 | #endif 223 | 224 | void fc_stacktrace(int skip_count); 225 | void fc_assert(const char *cond, const char *file, int line, int panic); 226 | 227 | int _scnprintf(char *buf, size_t size, const char *fmt, ...); 228 | int _vscnprintf(char *buf, size_t size, const char *fmt, va_list args); 229 | 230 | /* 231 | * Address resolution for internet (ipv4 and ipv6) and unix domain 232 | * socket address. 233 | */ 234 | struct sockinfo { 235 | int family; /* socket address family */ 236 | socklen_t addrlen; /* socket address length */ 237 | union { 238 | struct sockaddr_in in; /* ipv4 socket address */ 239 | struct sockaddr_in6 in6; /* ipv6 socket address */ 240 | struct sockaddr_un un; /* unix domain address */ 241 | } addr; 242 | }; 243 | 244 | int fc_resolve(struct string *name, int port, struct sockinfo *si); 245 | 246 | #endif 247 | -------------------------------------------------------------------------------- /src/stg_ins_test.c: -------------------------------------------------------------------------------- 1 | //#include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | struct settings settings; /* fatcache settings */ 20 | 21 | static void set_options(){ 22 | 23 | #define FC_CHUNK_SIZE ITEM_CHUNK_SIZE 24 | #define FC_SLAB_SIZE SLAB_SIZE 25 | 26 | #define FC_DAEMONIZE true 27 | 28 | //#define FC_LOG_FILE NULL 29 | #define FC_LOG_FILE "/home/yu/test/log2" 30 | #define FC_LOG_DEFAULT LOG_INFO 31 | #define FC_LOG_MIN LOG_EMERG 32 | #define FC_LOG_MAX LOG_PVERB 33 | 34 | #define FC_PORT 11211 35 | #define FC_ADDR "0.0.0.0" 36 | 37 | #define FC_HASH_POWER ITEMX_HASH_POWER 38 | 39 | #define FC_FACTOR 1.25 40 | 41 | #define FC_INDEX_MEMORY (64 * MB) 42 | #define FC_SLAB_MEMORY (64 * MB) 43 | 44 | #define FC_SERVER_ID 0 45 | #define FC_SERVER_N 1 46 | 47 | settings.daemonize = FC_DAEMONIZE; 48 | 49 | settings.log_filename = FC_LOG_FILE; 50 | settings.verbose = 6;//11;//FC_LOG_DEFAULT; 51 | 52 | settings.port = FC_PORT; 53 | settings.addr = FC_ADDR; 54 | settings.hash_power = FC_HASH_POWER; 55 | 56 | settings.factor = FC_FACTOR; 57 | settings.max_index_memory = FC_INDEX_MEMORY; 58 | settings.max_slab_memory = FC_SLAB_MEMORY; 59 | settings.chunk_size = FC_CHUNK_SIZE; 60 | settings.slab_size = FC_SLAB_SIZE; 61 | 62 | memset(settings.profile, 0, sizeof(settings.profile)); 63 | settings.profile_last_id = SLABCLASS_MAX_ID; 64 | 65 | settings.ssd_device = "/dev/sdc"; //NULL; 66 | 67 | settings.server_id = FC_SERVER_ID; 68 | settings.server_n = FC_SERVER_N; 69 | return; 70 | } 71 | 72 | static rstatus_t 73 | fc_generate_profile(void) 74 | { 75 | size_t *profile = settings.profile; /* slab profile */ 76 | uint8_t id; /* slab class id */ 77 | size_t item_sz, last_item_sz; /* current and last item chunk size */ 78 | size_t min_item_sz, max_item_sz; /* min and max item chunk size */ 79 | 80 | ASSERT(settings.chunk_size % FC_ALIGNMENT == 0); 81 | ASSERT(settings.chunk_size <= slab_data_size()); 82 | 83 | min_item_sz = settings.chunk_size; 84 | max_item_sz = slab_data_size(); 85 | id = SLABCLASS_MIN_ID; 86 | item_sz = min_item_sz; 87 | 88 | while (id < SLABCLASS_MAX_ID && item_sz < max_item_sz) { 89 | /* save the cur item chunk size */ 90 | last_item_sz = item_sz; 91 | profile[id] = item_sz; 92 | id++; 93 | 94 | /* get the next item chunk size */ 95 | item_sz *= settings.factor; 96 | if (item_sz == last_item_sz) { 97 | item_sz++; 98 | } 99 | item_sz = FC_ALIGN(item_sz, FC_ALIGNMENT); 100 | } 101 | 102 | /* last profile entry always has a 1 item/slab of maximum size */ 103 | profile[id] = max_item_sz; 104 | settings.profile_last_id = id; 105 | settings.max_chunk_size = max_item_sz; 106 | 107 | return FC_OK; 108 | } 109 | 110 | rstatus_t init(){ 111 | rstatus_t status; 112 | 113 | status = log_init(settings.verbose, settings.log_filename); 114 | if (status != FC_OK) { 115 | return status; 116 | } 117 | 118 | status = time_init(); 119 | if (status != FC_OK) { 120 | return status; 121 | } 122 | 123 | status = itemx_init(); 124 | if (status != FC_OK) { 125 | return status; 126 | } 127 | 128 | item_init(); 129 | 130 | status = slab_init(); 131 | if (status != FC_OK) { 132 | return status; 133 | } 134 | 135 | return FC_OK; 136 | } 137 | 138 | int put(char* key, int nkey, char* value, int vlen, int expiry, int flags){ 139 | uint8_t md[20]; 140 | uint32_t hash; 141 | 142 | uint8_t * tmp_key = key; 143 | sha1(tmp_key, nkey, md); 144 | hash = sha1_hash(md); 145 | uint8_t cid; 146 | struct item *it; 147 | 148 | cid = item_slabcid(nkey, vlen); 149 | if (cid == SLABCLASS_INVALID_ID) { 150 | return -1; 151 | } 152 | 153 | itemx_removex(hash, md); 154 | it = item_get(key, nkey, cid, vlen, time_reltime(expiry), 155 | flags, md, hash); 156 | if (it == NULL) { 157 | return -1; 158 | } 159 | memcpy(item_data(it), value, (size_t)(vlen)); 160 | return 0; 161 | } 162 | 163 | int get(char* key, int nkey, char* value){ 164 | uint8_t md[20]; 165 | uint32_t hash; 166 | 167 | struct itemx *itx; 168 | struct item *it; 169 | 170 | uint8_t * tmp_key = key; 171 | sha1(tmp_key, nkey, md); 172 | hash = sha1_hash(md); 173 | 174 | itx = itemx_getx(hash,md); 175 | if (itx == NULL) { 176 | return 1; 177 | } 178 | if (itemx_expired(itx)) { 179 | return 2; 180 | } 181 | it = slab_read_item(itx->sid, itx->offset); 182 | if (it == NULL) { 183 | // rsp_send_error(ctx, conn, msg, MSG_RSP_SERVER_ERROR, errno); 184 | return -1; 185 | } 186 | memcpy(value, item_data(it), it->ndata); 187 | return 0; 188 | } 189 | int delete(char* key, int nkey){ 190 | uint8_t md[20]; 191 | uint32_t hash; 192 | uint8_t * tmp_key = key; 193 | sha1(tmp_key, nkey, md); 194 | hash = sha1_hash(md); 195 | 196 | uint8_t cid; 197 | struct itemx *itx; 198 | 199 | itx = itemx_getx(hash, md); 200 | if (itx == NULL) { 201 | // rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_FOUND); 202 | return 0; 203 | } 204 | cid = slab_get_cid(itx->sid); 205 | itemx_removex(hash, md); 206 | return 0; 207 | } 208 | 209 | int num(char *src_key, int src_key_len, int num, int expiry, int flags){ 210 | rstatus_t status; 211 | uint8_t *pkey, nkey, cid; 212 | struct item *it; 213 | struct itemx *itx; 214 | uint64_t cnum; 215 | int64_t nnum; 216 | char numstr[FC_UINT64_MAXLEN]; 217 | int n; 218 | 219 | pkey = (uint8_t *)src_key; 220 | nkey = (uint8_t)(src_key_len); 221 | 222 | uint8_t md[20]; 223 | uint32_t hash; 224 | sha1(pkey, nkey, md); 225 | hash = sha1_hash(md); 226 | 227 | /* 1). look up existing itemx */ 228 | itx = itemx_getx(hash, md); 229 | if (itx == NULL || itemx_expired(itx)) { 230 | /* 2a). miss -> return NOT_FOUND */ 231 | //rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_FOUND); 232 | return -1; 233 | } 234 | 235 | /* 2b). hit -> read existing item into it */ 236 | it = slab_read_item(itx->sid, itx->offset); 237 | if (it == NULL) { 238 | //rsp_send_error(ctx, conn, msg, MSG_RSP_SERVER_ERROR, errno); 239 | return -2; 240 | } 241 | 242 | /* 3). sanity check item data to be a number */ 243 | status = fc_atou64(item_data(it), it->ndata, &cnum); 244 | if (status != FC_OK) { 245 | //rsp_send_error(ctx, conn, msg, MSG_RSP_CLIENT_ERROR, EINVAL); 246 | return -3; 247 | } 248 | 249 | /* 4). remove existing itemx of it */ 250 | itemx_removex(hash, md); 251 | 252 | /* 5). compute the new incr/decr number nnum and numstr */ 253 | nnum = cnum + num; 254 | if (nnum<0) 255 | nnum = 0; 256 | n = _scnprintf(numstr, sizeof(numstr), "%"PRIu64"", (uint64_t)nnum); 257 | 258 | /* 6). alloc new item that can hold n worth of bytes */ 259 | cid = item_slabcid(nkey, n); 260 | ASSERT(cid != SLABCLASS_INVALID_ID); 261 | 262 | it = item_get(pkey, nkey, cid, n, time_reltime(expiry), flags, 263 | md, hash); 264 | if (it == NULL) { 265 | //rsp_send_error(ctx, conn, msg, MSG_RSP_SERVER_ERROR, ENOMEM); 266 | return -2; 267 | } 268 | 269 | /* 7). copy numstr to it */ 270 | memcpy(item_data(it), numstr, n); 271 | return 0; 272 | } 273 | int main(int argc, char** argv){ 274 | set_options(); 275 | fc_generate_profile(); 276 | init(); 277 | char key[5]= "test1"; 278 | //char value[3]="100"; 279 | char *value=NULL; 280 | char *ret=NULL; 281 | int vl = 0; 282 | if (argc > 1){ 283 | printf("%u\n", strlen(argv[1])); 284 | vl = strlen(argv[1]); 285 | value = malloc((strlen(argv[1])+1) * sizeof(char)); 286 | ret = malloc((strlen(argv[1])+1) * sizeof(char)); 287 | memcpy(value, argv[1], strlen(argv[1])); 288 | }else{ 289 | ret = malloc(10 * sizeof(char)); 290 | } 291 | 292 | if (get(key, 5, ret) == 1){ 293 | printf("no data\n"); 294 | } 295 | if (put(key, 5, value, strlen(value), 0, 1) == 0){ 296 | printf("set data ok\n"); 297 | } 298 | if (get(key, 5, ret) == 0){ 299 | printf("get data %s\n", ret); 300 | } 301 | delete(key, 5); 302 | if (get(key, 5, ret) == 1){ 303 | printf("delete no data\n"); 304 | } 305 | if (put(key, 5, value, strlen(value), 0, 1) == 0){ 306 | printf("set data ok\n"); 307 | } 308 | if (get(key, 5, ret) == 1){ 309 | printf("error, no data\n"); 310 | }else{ 311 | printf("get data: %s\n", ret); 312 | } 313 | if (num(key, 5, 12, 0, 1) == 0){ 314 | printf("num data ok\n"); 315 | } 316 | if (get(key, 5, ret) == 1){ 317 | printf("get data: %s\n", ret); 318 | } 319 | 320 | 321 | return 0; 322 | } 323 | --------------------------------------------------------------------------------