├── mkfs.sh ├── rpc ├── jsl_log.cc ├── slock.h ├── jsl_log.h ├── thr_pool.h ├── thr_pool.cc ├── fifo.h ├── connection.h ├── pollmgr.h ├── method_thread.h ├── marshall.h ├── pollmgr.cc ├── connection.cc └── rpctest.cc ├── .gitignore ├── rsm_state_transfer.h ├── tprintf.h ├── gettime.h ├── lang ├── verify.h └── algorithm.h ├── stop.sh ├── rsmtest_client.h ├── lock_demo.cc ├── lock_client.h ├── lock_protocol.h ├── log.h ├── extent_server.h ├── rsmtest_client.cc ├── extent_smain.cc ├── extent_protocol.h ├── rsm_tester.cc ├── lock_server.h ├── extent_client.h ├── lock_client.cc ├── lock_server.cc ├── config.h ├── lock_smain.cc ├── lock_server_cache.h ├── README.md ├── handle.h ├── lock_client_cache.h ├── extent_server.cc ├── start.sh ├── rsm_protocol.h ├── paxos_protocol.h ├── rsm.h ├── handle.cc ├── paxos.h ├── log.cc ├── test-lab-2-a.pl ├── extent_client.cc ├── yfs_client.h ├── lock_server_cache.cc ├── gettime.cc ├── GNUmakefile ├── test-lab-2-b.pl ├── test-lab-3-a.pl ├── lock_tester.cc ├── yfs_client.cc ├── lock_client_cache.cc ├── test-lab-3-c.c ├── config.cc ├── paxos.cc └── test-lab-3-b.c /mkfs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | rm -rf ID/* 4 | mkdir -p ID 5 | echo -n > ID/0000000000000001 6 | -------------------------------------------------------------------------------- /rpc/jsl_log.cc: -------------------------------------------------------------------------------- 1 | #include "jsl_log.h" 2 | 3 | int JSL_DEBUG_LEVEL = 0; 4 | void 5 | jsl_set_debug(int level) { 6 | JSL_DEBUG_LEVEL = level; 7 | } 8 | 9 | 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.[oad] 2 | lock_demo 3 | lock_server 4 | lock_tester 5 | rpctest 6 | tags 7 | cscope.out 8 | cscope.po.out 9 | cscope.in.out 10 | extent_server 11 | yfs_client 12 | debug.txt 13 | yfs1/ 14 | yfs2/ 15 | *.log 16 | test-lab-3-b 17 | test-lab-3-c 18 | rsm_tester 19 | -------------------------------------------------------------------------------- /rsm_state_transfer.h: -------------------------------------------------------------------------------- 1 | #ifndef rsm_state_transfer_h 2 | #define rsm_state_transfer_h 3 | 4 | class rsm_state_transfer { 5 | public: 6 | virtual std::string marshal_state() = 0; 7 | virtual void unmarshal_state(std::string) = 0; 8 | virtual ~rsm_state_transfer() {}; 9 | }; 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /tprintf.h: -------------------------------------------------------------------------------- 1 | #ifndef TPRINTF_H 2 | #define TPRINTF_H 3 | 4 | #define tprintf(args...) do { \ 5 | struct timeval tv; \ 6 | gettimeofday(&tv, 0); \ 7 | printf("%lu:\t", tv.tv_sec * 1000 + tv.tv_usec / 1000);\ 8 | printf(args); \ 9 | } while (0); 10 | #endif 11 | -------------------------------------------------------------------------------- /gettime.h: -------------------------------------------------------------------------------- 1 | #ifndef gettime_h 2 | #define gettime_h 3 | 4 | #ifdef __APPLE__ 5 | typedef enum { 6 | CLOCK_REALTIME, 7 | CLOCK_MONOTONIC, 8 | CLOCK_PROCESS_CPUTIME_ID, 9 | CLOCK_THREAD_CPUTIME_ID 10 | } clockid_t; 11 | 12 | int clock_gettime(clockid_t clk_id, struct timespec *tp); 13 | #endif 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /lang/verify.h: -------------------------------------------------------------------------------- 1 | // safe assertions. 2 | 3 | #ifndef verify_client_h 4 | #define verify_client_h 5 | 6 | #include 7 | #include 8 | 9 | #ifdef NDEBUG 10 | #define VERIFY(expr) do { if (!(expr)) abort(); } while (0) 11 | #else 12 | #define VERIFY(expr) assert(expr) 13 | #endif 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /lang/algorithm.h: -------------------------------------------------------------------------------- 1 | // compile time version of min and max 2 | 3 | #ifndef algorithm_h 4 | #define algorithm_h 5 | 6 | template 7 | struct static_max 8 | { 9 | static const int value = A > B ? A : B; 10 | }; 11 | 12 | template 13 | struct static_min 14 | { 15 | static const int value = A < B ? A : B; 16 | }; 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /stop.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | YFSDIR1=$PWD/yfs1 4 | YFSDIR2=$PWD/yfs2 5 | 6 | export PATH=$PATH:/usr/local/bin 7 | UMOUNT="umount" 8 | if [ -f "/usr/local/bin/fusermount" -o -f "/usr/bin/fusermount" -o -f "/bin/fusermount" ]; then 9 | UMOUNT="fusermount -u"; 10 | fi 11 | $UMOUNT $YFSDIR1 12 | $UMOUNT $YFSDIR2 13 | killall extent_server 14 | killall yfs_client 15 | killall lock_server 16 | -------------------------------------------------------------------------------- /rpc/slock.h: -------------------------------------------------------------------------------- 1 | #ifndef __SCOPED_LOCK__ 2 | #define __SCOPED_LOCK__ 3 | 4 | #include 5 | #include "lang/verify.h" 6 | struct ScopedLock { 7 | private: 8 | pthread_mutex_t *m_; 9 | public: 10 | ScopedLock(pthread_mutex_t *m): m_(m) { 11 | VERIFY(pthread_mutex_lock(m_)==0); 12 | } 13 | ~ScopedLock() { 14 | VERIFY(pthread_mutex_unlock(m_)==0); 15 | } 16 | }; 17 | #endif /*__SCOPED_LOCK__*/ 18 | -------------------------------------------------------------------------------- /rsmtest_client.h: -------------------------------------------------------------------------------- 1 | // rsmtest client interface. 2 | 3 | #ifndef rsmtest_client_h 4 | #define rsmtest_client_h 5 | 6 | #include 7 | #include "rsm_protocol.h" 8 | #include "rpc.h" 9 | 10 | // Client interface to the rsmtest server 11 | class rsmtest_client { 12 | protected: 13 | rpcc *cl; 14 | public: 15 | rsmtest_client(std::string d); 16 | virtual ~rsmtest_client() {}; 17 | virtual rsm_test_protocol::status net_repair(int heal); 18 | virtual rsm_test_protocol::status breakpoint(int b); 19 | }; 20 | #endif 21 | -------------------------------------------------------------------------------- /lock_demo.cc: -------------------------------------------------------------------------------- 1 | // 2 | // Lock demo 3 | // 4 | 5 | #include "lock_protocol.h" 6 | #include "lock_client.h" 7 | #include "rpc.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | std::string dst; 14 | lock_client *lc; 15 | 16 | int 17 | main(int argc, char *argv[]) 18 | { 19 | int r; 20 | 21 | if(argc != 2){ 22 | fprintf(stderr, "Usage: %s [host:]port\n", argv[0]); 23 | exit(1); 24 | } 25 | 26 | dst = argv[1]; 27 | lc = new lock_client(dst); 28 | r = lc->stat(1); 29 | printf ("stat returned %d\n", r); 30 | } 31 | -------------------------------------------------------------------------------- /lock_client.h: -------------------------------------------------------------------------------- 1 | // lock client interface. 2 | 3 | #ifndef lock_client_h 4 | #define lock_client_h 5 | 6 | #include 7 | #include "lock_protocol.h" 8 | #include "rpc.h" 9 | #include 10 | 11 | // Client interface to the lock server 12 | class lock_client { 13 | protected: 14 | rpcc *cl; 15 | public: 16 | lock_client(std::string d); 17 | virtual ~lock_client() {}; 18 | virtual lock_protocol::status acquire(lock_protocol::lockid_t); 19 | virtual lock_protocol::status release(lock_protocol::lockid_t); 20 | virtual lock_protocol::status stat(lock_protocol::lockid_t); 21 | }; 22 | 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /lock_protocol.h: -------------------------------------------------------------------------------- 1 | // lock protocol 2 | 3 | #ifndef lock_protocol_h 4 | #define lock_protocol_h 5 | 6 | #include "rpc.h" 7 | 8 | class lock_protocol { 9 | public: 10 | enum xxstatus { OK, RETRY, RPCERR, NOENT, IOERR }; 11 | typedef int status; 12 | typedef unsigned long long lockid_t; 13 | typedef unsigned long long xid_t; 14 | enum rpc_numbers { 15 | acquire = 0x7001, 16 | release, 17 | stat 18 | }; 19 | }; 20 | 21 | class rlock_protocol { 22 | public: 23 | enum xxstatus { OK, RPCERR }; 24 | typedef int status; 25 | enum rpc_numbers { 26 | revoke = 0x8001, 27 | retry = 0x8002 28 | }; 29 | }; 30 | #endif 31 | -------------------------------------------------------------------------------- /log.h: -------------------------------------------------------------------------------- 1 | #ifndef log_h 2 | #define log_h 3 | 4 | #include 5 | #include 6 | 7 | 8 | class acceptor; 9 | 10 | class log { 11 | private: 12 | std::string name; 13 | acceptor *pxs; 14 | public: 15 | log (acceptor*, std::string _me); 16 | std::string dump(); 17 | void restore(std::string s); 18 | void logread(void); 19 | /* Log a committed paxos instance*/ 20 | void loginstance(unsigned instance, std::string v); 21 | /* Log the highest proposal number that the local paxos acceptor has ever seen */ 22 | void logprop(prop_t n_h); 23 | /* Log the proposal (proposal number and value) that the local paxos acceptor 24 | accept has ever accepted */ 25 | void logaccept(prop_t n_a, std::string v); 26 | }; 27 | 28 | #endif /* log_h */ 29 | -------------------------------------------------------------------------------- /extent_server.h: -------------------------------------------------------------------------------- 1 | // this is the extent server 2 | 3 | #ifndef extent_server_h 4 | #define extent_server_h 5 | 6 | #include 7 | #include 8 | #include "extent_protocol.h" 9 | 10 | class extent_server { 11 | 12 | public: 13 | struct dir_info { 14 | extent_protocol::attr attr; 15 | std::string buf; 16 | }; 17 | extent_server(); 18 | 19 | int put(extent_protocol::extentid_t id, std::string, int &); 20 | int get(extent_protocol::extentid_t id, std::string &); 21 | int getattr(extent_protocol::extentid_t id, extent_protocol::attr &); 22 | int remove(extent_protocol::extentid_t id, int &); 23 | private: 24 | std::unordered_map data; 25 | std::mutex mtx; 26 | }; 27 | 28 | #endif 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /rpc/jsl_log.h: -------------------------------------------------------------------------------- 1 | #ifndef __JSL_LOG_H__ 2 | #define __JSL_LOG_H__ 1 3 | 4 | enum dbcode { 5 | JSL_DBG_OFF = 0, 6 | JSL_DBG_1 = 1, // Critical 7 | JSL_DBG_2 = 2, // Error 8 | JSL_DBG_3 = 3, // Info 9 | JSL_DBG_4 = 4, // Debugging 10 | }; 11 | 12 | extern int JSL_DEBUG_LEVEL; 13 | 14 | #define jsl_log(level,...) \ 15 | do { \ 16 | if(JSL_DEBUG_LEVEL < abs(level)) \ 17 | {;} \ 18 | else { \ 19 | { printf(__VA_ARGS__);} \ 20 | } \ 21 | } while(0) 22 | 23 | void jsl_set_debug(int level); 24 | 25 | #endif // __JSL_LOG_H__ 26 | -------------------------------------------------------------------------------- /rsmtest_client.cc: -------------------------------------------------------------------------------- 1 | // RPC stubs for clients to talk to rsmtest_server 2 | 3 | #include "rsmtest_client.h" 4 | #include "rpc.h" 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | rsmtest_client::rsmtest_client(std::string dst) 12 | { 13 | sockaddr_in dstsock; 14 | make_sockaddr(dst.c_str(), &dstsock); 15 | cl = new rpcc(dstsock); 16 | if (cl->bind() < 0) { 17 | printf("rsmtest_client: call bind\n"); 18 | } 19 | } 20 | 21 | int 22 | rsmtest_client::net_repair(int heal) 23 | { 24 | int r; 25 | int ret = cl->call(rsm_test_protocol::net_repair, heal, r); 26 | VERIFY (ret == rsm_test_protocol::OK); 27 | return r; 28 | } 29 | 30 | int 31 | rsmtest_client::breakpoint(int b) 32 | { 33 | int r; 34 | int ret = cl->call(rsm_test_protocol::breakpoint, b, r); 35 | VERIFY (ret == rsm_test_protocol::OK); 36 | return r; 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /extent_smain.cc: -------------------------------------------------------------------------------- 1 | #include "rpc.h" 2 | #include 3 | #include 4 | #include 5 | #include "extent_server.h" 6 | 7 | // Main loop of extent server 8 | 9 | int 10 | main(int argc, char *argv[]) 11 | { 12 | int count = 0; 13 | 14 | if(argc != 2){ 15 | fprintf(stderr, "Usage: %s port\n", argv[0]); 16 | exit(1); 17 | } 18 | 19 | setvbuf(stdout, NULL, _IONBF, 0); 20 | 21 | char *count_env = getenv("RPC_COUNT"); 22 | if(count_env != NULL){ 23 | count = atoi(count_env); 24 | } 25 | 26 | rpcs server(atoi(argv[1]), count); 27 | extent_server ls; 28 | 29 | server.reg(extent_protocol::get, &ls, &extent_server::get); 30 | server.reg(extent_protocol::getattr, &ls, &extent_server::getattr); 31 | server.reg(extent_protocol::put, &ls, &extent_server::put); 32 | server.reg(extent_protocol::remove, &ls, &extent_server::remove); 33 | 34 | while(1) 35 | sleep(1000); 36 | } 37 | -------------------------------------------------------------------------------- /extent_protocol.h: -------------------------------------------------------------------------------- 1 | // extent wire protocol 2 | 3 | #ifndef extent_protocol_h 4 | #define extent_protocol_h 5 | 6 | #include "rpc.h" 7 | 8 | class extent_protocol { 9 | public: 10 | typedef int status; 11 | typedef unsigned long long extentid_t; 12 | enum xxstatus { OK, RPCERR, NOENT, IOERR }; 13 | enum rpc_numbers { 14 | put = 0x6001, 15 | get, 16 | getattr, 17 | remove 18 | }; 19 | 20 | struct attr { 21 | unsigned int atime; 22 | unsigned int mtime; 23 | unsigned int ctime; 24 | unsigned int size; 25 | }; 26 | }; 27 | 28 | inline unmarshall & 29 | operator>>(unmarshall &u, extent_protocol::attr &a) 30 | { 31 | u >> a.atime; 32 | u >> a.mtime; 33 | u >> a.ctime; 34 | u >> a.size; 35 | return u; 36 | } 37 | 38 | inline marshall & 39 | operator<<(marshall &m, extent_protocol::attr a) 40 | { 41 | m << a.atime; 42 | m << a.mtime; 43 | m << a.ctime; 44 | m << a.size; 45 | return m; 46 | } 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /rsm_tester.cc: -------------------------------------------------------------------------------- 1 | // 2 | // RSM test client 3 | // 4 | 5 | #include "rsm_protocol.h" 6 | #include "rsmtest_client.h" 7 | #include "rpc.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | using namespace std; 14 | 15 | rsmtest_client *lc; 16 | 17 | int 18 | main(int argc, char *argv[]) 19 | { 20 | int r; 21 | 22 | if(argc != 4){ 23 | fprintf(stderr, "Usage: %s [host:]port [partition] arg\n", argv[0]); 24 | exit(1); 25 | } 26 | 27 | lc = new rsmtest_client(argv[1]); 28 | string command(argv[2]); 29 | if (command == "partition") { 30 | r = lc->net_repair(atoi(argv[3])); 31 | printf ("net_repair returned %d\n", r); 32 | } else if (command == "breakpoint") { 33 | int b = atoi(argv[3]); 34 | r = lc->breakpoint(b); 35 | printf ("breakpoint %d returned %d\n", b, r); 36 | } else { 37 | fprintf(stderr, "Unknown command %s\n", argv[2]); 38 | } 39 | exit(0); 40 | } 41 | -------------------------------------------------------------------------------- /lock_server.h: -------------------------------------------------------------------------------- 1 | // this is the lock server 2 | // the lock client has a similar interface 3 | 4 | #ifndef lock_server_h 5 | #define lock_server_h 6 | 7 | #include 8 | #include 9 | #include "lock_protocol.h" 10 | #include "lock_client.h" 11 | #include "rpc.h" 12 | struct lock_state { 13 | enum STATE { 14 | FREE = 0, 15 | BUSY, 16 | } state; 17 | std::condition_variable cv; 18 | }; 19 | 20 | class lock_server { 21 | protected: 22 | int nacquire; 23 | std::unordered_map used_locks; 24 | std::mutex server_mtx; 25 | std::condition_variable cv; 26 | 27 | public: 28 | lock_server(); 29 | ~lock_server() {}; 30 | lock_protocol::status stat(int clt, lock_protocol::lockid_t lid, int &); 31 | lock_protocol::status acquire(int clt, lock_protocol::lockid_t lid, int &); 32 | lock_protocol::status release(int clt, lock_protocol::lockid_t lid, int &); 33 | }; 34 | 35 | #endif 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /extent_client.h: -------------------------------------------------------------------------------- 1 | // extent client interface. 2 | 3 | #ifndef extent_client_h 4 | #define extent_client_h 5 | 6 | #include 7 | #include "extent_protocol.h" 8 | #include "rpc.h" 9 | 10 | class extent_client { 11 | private: 12 | rpcc *cl; 13 | struct extent_cache { 14 | std::string data; 15 | extent_protocol::attr attr; 16 | bool dirty; 17 | bool attr_only; 18 | bool removed; 19 | }; 20 | 21 | std::mutex cache_mtx; 22 | std::unordered_map cache_map; 23 | 24 | public: 25 | extent_client(std::string dst); 26 | 27 | extent_protocol::status get(extent_protocol::extentid_t eid, 28 | std::string &buf); 29 | extent_protocol::status getattr(extent_protocol::extentid_t eid, 30 | extent_protocol::attr &a); 31 | extent_protocol::status put(extent_protocol::extentid_t eid, std::string buf); 32 | extent_protocol::status remove(extent_protocol::extentid_t eid); 33 | extent_protocol::status flush(extent_protocol::extentid_t eid); 34 | 35 | }; 36 | 37 | #endif 38 | 39 | -------------------------------------------------------------------------------- /lock_client.cc: -------------------------------------------------------------------------------- 1 | // RPC stubs for clients to talk to lock_server 2 | 3 | #include "lock_client.h" 4 | #include "rpc.h" 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | lock_client::lock_client(std::string dst) 12 | { 13 | sockaddr_in dstsock; 14 | make_sockaddr(dst.c_str(), &dstsock); 15 | cl = new rpcc(dstsock); 16 | if (cl->bind() < 0) { 17 | printf("lock_client: call bind\n"); 18 | } 19 | } 20 | 21 | int 22 | lock_client::stat(lock_protocol::lockid_t lid) 23 | { 24 | int r; 25 | lock_protocol::status ret = cl->call(lock_protocol::stat, cl->id(), lid, r); 26 | VERIFY (ret == lock_protocol::OK); 27 | return r; 28 | } 29 | 30 | lock_protocol::status 31 | lock_client::acquire(lock_protocol::lockid_t lid) 32 | { 33 | int r; 34 | lock_protocol::status ret = cl->call(lock_protocol::acquire, cl->id(), lid, r); 35 | VERIFY (ret == lock_protocol::OK); 36 | return r; 37 | } 38 | 39 | lock_protocol::status 40 | lock_client::release(lock_protocol::lockid_t lid) 41 | { 42 | int r; 43 | lock_protocol::status ret = cl->call(lock_protocol::release, cl->id(), lid, r); 44 | VERIFY (ret == lock_protocol::OK); 45 | return r; 46 | } 47 | 48 | -------------------------------------------------------------------------------- /rpc/thr_pool.h: -------------------------------------------------------------------------------- 1 | #ifndef __THR_POOL__ 2 | #define __THR_POOL__ 3 | 4 | #include 5 | #include 6 | 7 | #include "fifo.h" 8 | 9 | class ThrPool { 10 | 11 | 12 | public: 13 | struct job_t { 14 | void *(*f)(void *); //function point 15 | void *a; //function arguments 16 | }; 17 | 18 | ThrPool(int sz, bool blocking=true); 19 | ~ThrPool(); 20 | template bool addObjJob(C *o, void (C::*m)(A), A a); 21 | void waitDone(); 22 | 23 | bool takeJob(job_t *j); 24 | 25 | private: 26 | pthread_attr_t attr_; 27 | int nthreads_; 28 | bool blockadd_; 29 | 30 | 31 | fifo jobq_; 32 | std::vector th_; 33 | 34 | bool addJob(void *(*f)(void *), void *a); 35 | }; 36 | 37 | template bool 38 | ThrPool::addObjJob(C *o, void (C::*m)(A), A a) 39 | { 40 | 41 | class objfunc_wrapper { 42 | public: 43 | C *o; 44 | void (C::*m)(A a); 45 | A a; 46 | static void *func(void *vvv) { 47 | objfunc_wrapper *x = (objfunc_wrapper*)vvv; 48 | C *o = x->o; 49 | void (C::*m)(A ) = x->m; 50 | A a = x->a; 51 | (o->*m)(a); 52 | delete x; 53 | return 0; 54 | } 55 | }; 56 | 57 | objfunc_wrapper *x = new objfunc_wrapper; 58 | x->o = o; 59 | x->m = m; 60 | x->a = a; 61 | return addJob(&objfunc_wrapper::func, (void *)x); 62 | } 63 | 64 | 65 | #endif 66 | 67 | -------------------------------------------------------------------------------- /lock_server.cc: -------------------------------------------------------------------------------- 1 | // the lock server implementation 2 | 3 | #include "lock_server.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | lock_server::lock_server(): 10 | nacquire (0) 11 | { 12 | } 13 | 14 | lock_protocol::status 15 | lock_server::stat(int clt, lock_protocol::lockid_t lid, int &r) 16 | { 17 | lock_protocol::status ret = lock_protocol::OK; 18 | printf("stat request from clt %d\n", clt); 19 | r = nacquire; 20 | return ret; 21 | } 22 | 23 | 24 | lock_protocol::status 25 | lock_server::acquire(int clt, lock_protocol::lockid_t lid, int &r) 26 | { 27 | std::unique_lock ulock(server_mtx); 28 | if (used_locks.find(lid) == used_locks.end()) 29 | { 30 | used_locks[lid].state = lock_state::BUSY; 31 | } 32 | else 33 | { 34 | while (used_locks.find(lid) != used_locks.end() \ 35 | && used_locks[lid].state != lock_state::FREE) 36 | { 37 | cv.wait(ulock); 38 | //used_locks[lid].cv.wait(ulock); 39 | } 40 | used_locks[lid].state = lock_state::BUSY; 41 | } 42 | return lock_protocol::OK; 43 | } 44 | 45 | lock_protocol::status 46 | lock_server::release(int clt, lock_protocol::lockid_t lid, int &r) 47 | { 48 | std::unique_lock ulock(server_mtx); 49 | if (used_locks.find(lid) != used_locks.end() \ 50 | && used_locks[lid].state != lock_state::FREE) 51 | { 52 | used_locks[lid].state = lock_state::FREE; 53 | //used_locks[lid].cv.notify_all(); 54 | cv.notify_all(); 55 | } 56 | return lock_protocol::OK; 57 | } 58 | -------------------------------------------------------------------------------- /rpc/thr_pool.cc: -------------------------------------------------------------------------------- 1 | #include "slock.h" 2 | #include "thr_pool.h" 3 | #include 4 | #include 5 | #include "lang/verify.h" 6 | 7 | static void * 8 | do_worker(void *arg) 9 | { 10 | ThrPool *tp = (ThrPool *)arg; 11 | while (1) { 12 | ThrPool::job_t j; 13 | if (!tp->takeJob(&j)) 14 | break; //die 15 | 16 | (void)(j.f)(j.a); 17 | } 18 | pthread_exit(NULL); 19 | } 20 | 21 | //if blocking, then addJob() blocks when queue is full 22 | //otherwise, addJob() simply returns false when queue is full 23 | ThrPool::ThrPool(int sz, bool blocking) 24 | : nthreads_(sz),blockadd_(blocking),jobq_(100*sz) 25 | { 26 | pthread_attr_init(&attr_); 27 | pthread_attr_setstacksize(&attr_, 128<<10); 28 | 29 | for (int i = 0; i < sz; i++) { 30 | pthread_t t; 31 | VERIFY(pthread_create(&t, &attr_, do_worker, (void *)this) ==0); 32 | th_.push_back(t); 33 | } 34 | } 35 | 36 | //IMPORTANT: this function can be called only when no external thread 37 | //will ever use this thread pool again or is currently blocking on it 38 | ThrPool::~ThrPool() 39 | { 40 | for (int i = 0; i < nthreads_; i++) { 41 | job_t j; 42 | j.f = (void *(*)(void *))NULL; //poison pill to tell worker threads to exit 43 | jobq_.enq(j); 44 | } 45 | 46 | for (int i = 0; i < nthreads_; i++) { 47 | VERIFY(pthread_join(th_[i], NULL)==0); 48 | } 49 | 50 | VERIFY(pthread_attr_destroy(&attr_)==0); 51 | } 52 | 53 | bool 54 | ThrPool::addJob(void *(*f)(void *), void *a) 55 | { 56 | job_t j; 57 | j.f = f; 58 | j.a = a; 59 | 60 | return jobq_.enq(j,blockadd_); 61 | } 62 | 63 | bool 64 | ThrPool::takeJob(job_t *j) 65 | { 66 | jobq_.deq(j); 67 | return (j->f!=NULL); 68 | } 69 | 70 | -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | #ifndef config_h 2 | #define config_h 3 | 4 | #include 5 | #include 6 | #include "paxos.h" 7 | 8 | class config_view_change { 9 | public: 10 | virtual void commit_change(unsigned vid) = 0; 11 | virtual ~config_view_change() {}; 12 | }; 13 | 14 | class config : public paxos_change { 15 | private: 16 | acceptor *acc; 17 | proposer *pro; 18 | rpcs *pxsrpc; 19 | unsigned myvid; 20 | std::string first; 21 | std::string me; 22 | config_view_change *vc; 23 | std::vector mems; 24 | pthread_mutex_t cfg_mutex; 25 | pthread_cond_t heartbeat_cond; 26 | pthread_cond_t config_cond; 27 | paxos_protocol::status heartbeat(std::string m, unsigned instance, int &r); 28 | std::string value(std::vector mems); 29 | std::vector members(std::string v); 30 | std::vector get_view_wo(unsigned instance); 31 | bool remove_wo(std::string); 32 | void reconstruct(); 33 | typedef enum { 34 | OK, // response and same view # 35 | VIEWERR, // response but different view # 36 | FAILURE, // no response 37 | } heartbeat_t; 38 | heartbeat_t doheartbeat(std::string m); 39 | public: 40 | config(std::string _first, std::string _me, config_view_change *_vc); 41 | unsigned vid() { return myvid; } 42 | std::string myaddr() { return me; }; 43 | std::string dump() { return acc->dump(); }; 44 | std::vector get_view(unsigned instance); 45 | void restore(std::string s); 46 | bool add(std::string, unsigned vid); 47 | bool ismember(std::string m, unsigned vid); 48 | void heartbeater(void); 49 | void paxos_commit(unsigned instance, std::string v); 50 | rpcs *get_rpcs() { return acc->get_rpcs(); } 51 | void breakpoint(int b) { pro->breakpoint(b); } 52 | }; 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /lock_smain.cc: -------------------------------------------------------------------------------- 1 | #include "rpc.h" 2 | #include 3 | #include 4 | #include 5 | #include "lock_server_cache.h" 6 | #include "paxos.h" 7 | #include "rsm.h" 8 | #include "lock_server.h" 9 | 10 | #include "jsl_log.h" 11 | 12 | // Main loop of lock_server 13 | 14 | int 15 | main(int argc, char *argv[]) 16 | { 17 | int count = 0; 18 | 19 | setvbuf(stdout, NULL, _IONBF, 0); 20 | setvbuf(stderr, NULL, _IONBF, 0); 21 | 22 | srandom(getpid()); 23 | 24 | if(argc != 3){ 25 | fprintf(stderr, "Usage: %s [master:]port [me:]port\n", argv[0]); 26 | exit(1); 27 | } 28 | 29 | char *count_env = getenv("RPC_COUNT"); 30 | if(count_env != NULL){ 31 | count = atoi(count_env); 32 | } 33 | 34 | //jsl_set_debug(2); 35 | // Comment out the next line to switch between the ordinary lock 36 | // server and the RSM. In Lab 6, we disable the lock server and 37 | // implement Paxos. In Lab 7, we will make the lock server use your 38 | // RSM layer. 39 | #define RSM 40 | #ifdef RSM 41 | rsm rsm(argv[1], argv[2]); 42 | #endif // RSM 43 | 44 | #ifndef RSM 45 | //lock_server ls; 46 | //rpcs server(atoi(argv[1]), count); 47 | //server.reg(lock_protocol::stat, &ls, &lock_server::stat); 48 | //server.reg(lock_protocol::acquire, &ls, &lock_server::acquire); 49 | //server.reg(lock_protocol::release, &ls, &lock_server::release); 50 | lock_server_cache ls; 51 | rpcs server(atoi(argv[1]), count); 52 | server.reg(lock_protocol::stat, &ls, &lock_server_cache::stat); 53 | server.reg(lock_protocol::acquire, &ls, &lock_server_cache::acquire); 54 | server.reg(lock_protocol::release, &ls, &lock_server_cache::release); 55 | #endif 56 | 57 | 58 | while(1) 59 | sleep(1000); 60 | } 61 | -------------------------------------------------------------------------------- /lock_server_cache.h: -------------------------------------------------------------------------------- 1 | #ifndef lock_server_cache_h 2 | #define lock_server_cache_h 3 | 4 | #include 5 | 6 | #include 7 | #include "lock_protocol.h" 8 | #include "rpc.h" 9 | #include "lock_server.h" 10 | 11 | 12 | class lock_server_cache { 13 | public: 14 | class set_queue 15 | { 16 | private: 17 | std::set s; 18 | std::queue q; 19 | public: 20 | void push(std::string cid) 21 | { 22 | if (s.find(cid) == s.end()) 23 | { 24 | s.insert(cid); 25 | q.push(cid); 26 | } 27 | } 28 | 29 | void pop() 30 | { 31 | auto cid = q.front(); 32 | s.erase(cid); 33 | q.pop(); 34 | } 35 | 36 | const std::string& front() const 37 | { 38 | return q.front(); 39 | } 40 | 41 | bool empty() 42 | { 43 | return s.empty(); 44 | } 45 | }; 46 | struct server_lock 47 | { 48 | enum XXSTATUS 49 | { 50 | FREE, 51 | LOCKED, 52 | REVOKE_SENT, 53 | }; 54 | int status; 55 | std::string client_id; 56 | std::mutex mtx; 57 | set_queue waiting_cids; 58 | server_lock(std::string cid) : status(FREE), client_id(cid) {} 59 | }; 60 | lock_server_cache(); 61 | lock_protocol::status stat(lock_protocol::lockid_t, int &); 62 | int acquire(lock_protocol::lockid_t, std::string id, int &); 63 | int release(lock_protocol::lockid_t, std::string id, int &); 64 | private: 65 | int nacquire; 66 | std::mutex server_mtx; 67 | std::unordered_map> lock_cache; 68 | }; 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 6.824-2012 代码思路、实现及相关话题讨论 2 | ## 概述 3 | 选择6.824-2012来进行分布式系统的初步学习,主要就是看中其高质量的配套lab可以让人在实践中更好理解分布式系统的相关概念。之所以选择2012年而非2017年的课程lab,主要是考虑到 4 | - 2012年的lab可以看做是一个较完整的mini分布式系统,在给出的代码框架上从底到顶实现了rpc语义、客户端服务器逻辑、缓存及一致性、基于Paxos协议的复制状态机。而2017年的lab则是4个较为独立的topic。 5 | - 2012年的lab使用C++而2017年的lab使用Go。对于C++ 11中引入的concurrency相关的特性一直想找机会学习一下,这一点2012年的lab刚好可以满足。 6 | 7 | 对于2017课程中,在2012年未涉及的一些重要topic例如Raft协议、工业界的大量新系统研究等,则单独进行学习。 8 | 9 | ![YFS分布式文件系统结构](https://pdos.csail.mit.edu/archive/6.824-2012/labs/yfs.jpg "YFS分布式文件系统结构") 10 | 11 | 上图是本lab实现的名为YFS的分布式文件系统的结构图。YFS利用Linux FUSE接口在不修改内核代码的前提下创建了一个文件系统,接管了基础文件操作例如create, mkdir, lookup, read/write等。YFS客户端将这些操作转发到存储服务器extent_server进行实际的读写查询,期间使用锁服务器来保证内容的一致性。客户端和服务器均使用了缓存机制来提高运行效率。锁服务器使用了基于Paxos的复制状态机技术进行了复制来应对宕机。 12 | 13 | 14 | ## Lab1: 锁服务、RPC语义及相关C++知识 15 | ### 目标 16 | Lab1给出了一个rpc基础库和locking service的C/S端的基础框架,需要实现的是rpc的at most once语义(testcase不要求考虑重启宕机)以及locking service的服务器端代码。完成后,locking service的C/S端之间使用rpc进行通信,完成锁的获取acquire和释放release。 17 | ### RPC基础库 18 | rpc可以简单理解为通信双方按照约定的协议和数据格式进行信息传递的过程。 19 | 著名的rpc库,例如grpc,因为其设计的出发点是一个general rpc framework,需要支持跨平台/多语言等,所以往往是大而全的,阅读起来并不简单。而lab中提供的rpc基础库可谓麻雀虽小五脏俱全,实现上也颇有技巧,值得一读(并不太清楚lab中提供的rpc是完全为此lab编写还是来自开源项目)。其中各部分的主要内容如下: 20 | 21 | - fifo 实现了一个blocking queue供PollMgr使用。实现方式是典型的双向链表配合1个互斥锁+2个条件变量的方式:入队和出队分别wait一个条件变量,在完成入队或出队操作后singal对方的条件变量。 22 | - thr_pool 使用fifo实现了一个线程池 23 | - jsl_log 日志模块。定义了一个简单的带级别设置的log宏。如果从一个服务器端通用的logging模块来说显然是远远不够的,而为一个不大的项目引入如log4cpp这样庞大的库也不够简洁。常见的简洁方案有:chenshuo在[muduo](https://github.com/chenshuo/muduo)中使用的Double Buffering(双缓冲),yedf在[C++ 多线程安全无锁日志系统](https://zhuanlan.zhihu.com/p/21477468)中使用O_APPEND方式打开日志文件,dup2系统调用实现轮替。两位都总结了服务器端日志系统应该做到的几点: 24 | - 高效:写日志本身不应该占用太多系统资源,并且不能阻塞服务器逻辑 25 | - 线程安全:多个线程能够同时写日志,日志之间不会出现交织 26 | - 滚动/轮替:在长时间运行的系统中,日志应该可以按照一定规则进行归档,以避免单个日志文件过大 27 | - 异常处理:程序崩溃退出时,最后的日志不能丢 28 | - marshall 通过重载移位操作提供了对协议数据的简单序列化反序列化 29 | - connection 封装了socket通信的相关操作 30 | - pollmgr 在connection的基础上封装了io复用(select和epoll) 31 | - slock 作用域锁,等同于C++ 17中的scoped_lock 32 | - rpc 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /handle.h: -------------------------------------------------------------------------------- 1 | // manage a cache of RPC connections. 2 | // assuming cid is a std::string holding the 3 | // host:port of the RPC server you want 4 | // to talk to: 5 | // 6 | // handle h(cid); 7 | // rpcc *cl = h.safebind(); 8 | // if(cl){ 9 | // ret = cl->call(...); 10 | // } else { 11 | // bind() failed 12 | // } 13 | // 14 | // if the calling program has not contacted 15 | // cid before, safebind() will create a new 16 | // connection, call bind(), and return 17 | // an rpcc*, or 0 if bind() failed. if the 18 | // program has previously contacted cid, 19 | // safebind() just returns the previously 20 | // created rpcc*. best not to hold any 21 | // mutexes while calling safebind(). 22 | 23 | #ifndef handle_h 24 | #define handle_h 25 | 26 | #include 27 | #include 28 | #include "rpc.h" 29 | 30 | struct hinfo { 31 | rpcc *cl; 32 | int refcnt; 33 | bool del; 34 | std::string m; 35 | pthread_mutex_t cl_mutex; 36 | }; 37 | 38 | class handle { 39 | private: 40 | struct hinfo *h; 41 | public: 42 | handle(std::string m); 43 | ~handle(); 44 | /* safebind will try to bind with the rpc server on the first call. 45 | * Since bind may block, the caller probably should not hold a mutex 46 | * when calling safebind. 47 | * 48 | * return: 49 | * if the first safebind succeeded, all later calls would return 50 | * a rpcc object; otherwise, all later calls would return NULL. 51 | * 52 | * Example: 53 | * handle h(dst); 54 | * XXX_protocol::status ret; 55 | * if (h.safebind()) { 56 | * ret = h.safebind()->call(...); 57 | * } 58 | * if (!h.safebind() || ret != XXX_protocol::OK) { 59 | * // handle failure 60 | * } 61 | */ 62 | rpcc *safebind(); 63 | }; 64 | 65 | class handle_mgr { 66 | private: 67 | pthread_mutex_t handle_mutex; 68 | std::map hmap; 69 | public: 70 | handle_mgr(); 71 | struct hinfo *get_handle(std::string m); 72 | void done_handle(struct hinfo *h); 73 | void delete_handle(std::string m); 74 | void delete_handle_wo(std::string m); 75 | }; 76 | 77 | extern class handle_mgr mgr; 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /lock_client_cache.h: -------------------------------------------------------------------------------- 1 | // lock client interface. 2 | 3 | #ifndef lock_client_cache_h 4 | 5 | #define lock_client_cache_h 6 | 7 | #include 8 | #include "lock_protocol.h" 9 | #include "rpc.h" 10 | #include "lock_client.h" 11 | #include "lang/verify.h" 12 | 13 | // Classes that inherit lock_release_user can override dorelease so that 14 | // that they will be called when lock_client releases a lock. 15 | // You will not need to do anything with this class until Lab 5. 16 | class lock_release_user { 17 | public: 18 | virtual void dorelease(lock_protocol::lockid_t) = 0; 19 | virtual ~lock_release_user() {}; 20 | }; 21 | 22 | 23 | 24 | class lock_client_cache : public lock_client { 25 | private: 26 | class lock_release_user *lu; 27 | int rlock_port; 28 | std::string hostname; 29 | std::string id; 30 | struct client_lock { 31 | enum lockstatus { 32 | NONE, 33 | FREE, 34 | LOCKED, 35 | ACQUIRING, 36 | RELEASING, 37 | }; 38 | lock_protocol::lockid_t lock_id; 39 | int status = NONE; 40 | std::mutex mtx; 41 | std::condition_variable available_cv; 42 | std::condition_variable release_cv; 43 | std::condition_variable retry_cv; 44 | int num_revoke = 0; 45 | int num_retry = 0; 46 | 47 | client_lock(lock_protocol::lockid_t lid) : lock_id(lid) {} 48 | }; 49 | std::unordered_map> lock_cache; 50 | std::mutex mtx_map; 51 | public: 52 | lock_client_cache(std::string xdst, class lock_release_user *l = 0); 53 | virtual ~lock_client_cache() {}; 54 | lock_protocol::status acquire(lock_protocol::lockid_t); 55 | lock_protocol::status release(lock_protocol::lockid_t); 56 | rlock_protocol::status revoke_handler(lock_protocol::lockid_t, 57 | int &); 58 | rlock_protocol::status retry_handler(lock_protocol::lockid_t, 59 | int &); 60 | }; 61 | 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /extent_server.cc: -------------------------------------------------------------------------------- 1 | // the extent server implementation 2 | 3 | #include "extent_server.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | extent_server::extent_server() { 12 | int ret; 13 | put(1, "", ret); 14 | } 15 | 16 | 17 | int extent_server::put(extent_protocol::extentid_t id, std::string buf, int &) 18 | { 19 | // You fill this in for Lab 2. 20 | std::lock_guard slock(mtx); 21 | auto now = std::time(nullptr); 22 | if (data.find(id) == data.end()) 23 | { 24 | dir_info di; 25 | di.attr.atime = now; 26 | data[id] = di; 27 | } 28 | data[id].buf = std::move(buf); 29 | data[id].attr.mtime = data[id].attr.ctime = now; 30 | return extent_protocol::OK; 31 | } 32 | 33 | int extent_server::get(extent_protocol::extentid_t id, std::string &buf) 34 | { 35 | // You fill this in for Lab 2. 36 | std::lock_guard slock(mtx); 37 | if (data.find(id) == data.end()) 38 | { 39 | return extent_protocol::NOENT; 40 | } 41 | buf = data[id].buf; 42 | data[id].attr.atime = std::time(nullptr); 43 | return extent_protocol::OK; 44 | 45 | } 46 | 47 | int extent_server::getattr(extent_protocol::extentid_t id, extent_protocol::attr &a) 48 | { 49 | // You fill this in for Lab 2. 50 | // You replace this with a real implementation. We send a phony response 51 | // for now because it's difficult to get FUSE to do anything (including 52 | // unmount) if getattr fails. 53 | if (data.find(id) == data.end()) 54 | { 55 | return extent_protocol::NOENT; 56 | } 57 | a.size = data[id].buf.size(); 58 | a.atime = data[id].attr.atime; 59 | a.mtime = data[id].attr.mtime; 60 | a.ctime = data[id].attr.ctime; 61 | return extent_protocol::OK; 62 | } 63 | 64 | int extent_server::remove(extent_protocol::extentid_t id, int &) 65 | { 66 | // You fill this in for Lab 2. 67 | std::lock_guard slock(mtx); 68 | if (data.find(id) == data.end()) 69 | { 70 | return extent_protocol::NOENT; 71 | } 72 | data.erase(id); 73 | return extent_protocol::OK; 74 | } 75 | 76 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ulimit -c unlimited 4 | 5 | LOSSY=$1 6 | NUM_LS=$2 7 | 8 | if [ -z $NUM_LS ]; then 9 | NUM_LS=0 10 | fi 11 | 12 | BASE_PORT=$RANDOM 13 | BASE_PORT=$[BASE_PORT+2000] 14 | EXTENT_PORT=$BASE_PORT 15 | YFS1_PORT=$[BASE_PORT+2] 16 | YFS2_PORT=$[BASE_PORT+4] 17 | LOCK_PORT=$[BASE_PORT+6] 18 | 19 | YFSDIR1=$PWD/yfs1 20 | YFSDIR2=$PWD/yfs2 21 | 22 | if [ "$LOSSY" ]; then 23 | export RPC_LOSSY=$LOSSY 24 | fi 25 | 26 | if [ $NUM_LS -gt 1 ]; then 27 | x=0 28 | rm config 29 | while [ $x -lt $NUM_LS ]; do 30 | port=$[LOCK_PORT+2*x] 31 | x=$[x+1] 32 | echo $port >> config 33 | done 34 | x=0 35 | while [ $x -lt $NUM_LS ]; do 36 | port=$[LOCK_PORT+2*x] 37 | x=$[x+1] 38 | echo "starting ./lock_server $LOCK_PORT $port > lock_server$x.log 2>&1 &" 39 | ./lock_server $LOCK_PORT $port > lock_server$x.log 2>&1 & 40 | sleep 1 41 | done 42 | else 43 | echo "starting ./lock_server $LOCK_PORT > lock_server.log 2>&1 &" 44 | ./lock_server $LOCK_PORT > lock_server.log 2>&1 & 45 | sleep 1 46 | fi 47 | 48 | unset RPC_LOSSY 49 | 50 | echo "starting ./extent_server $EXTENT_PORT > extent_server.log 2>&1 &" 51 | ./extent_server $EXTENT_PORT > extent_server.log 2>&1 & 52 | sleep 1 53 | 54 | rm -rf $YFSDIR1 55 | mkdir $YFSDIR1 || exit 1 56 | sleep 1 57 | echo "starting ./yfs_client $YFSDIR1 $EXTENT_PORT $LOCK_PORT > yfs_client1.log 2>&1 &" 58 | ./yfs_client $YFSDIR1 $EXTENT_PORT $LOCK_PORT > yfs_client1.log 2>&1 & 59 | sleep 1 60 | 61 | rm -rf $YFSDIR2 62 | mkdir $YFSDIR2 || exit 1 63 | sleep 1 64 | echo "starting ./yfs_client $YFSDIR2 $EXTENT_PORT $LOCK_PORT > yfs_client2.log 2>&1 &" 65 | ./yfs_client $YFSDIR2 $EXTENT_PORT $LOCK_PORT > yfs_client2.log 2>&1 & 66 | 67 | sleep 2 68 | 69 | # make sure FUSE is mounted where we expect 70 | pwd=`pwd -P` 71 | if [ `mount | grep "$pwd/yfs1" | grep -v grep | wc -l` -ne 1 ]; then 72 | sh stop.sh 73 | echo "Failed to mount YFS properly at ./yfs1" 74 | exit -1 75 | fi 76 | 77 | # make sure FUSE is mounted where we expect 78 | if [ `mount | grep "$pwd/yfs2" | grep -v grep | wc -l` -ne 1 ]; then 79 | sh stop.sh 80 | echo "Failed to mount YFS properly at ./yfs2" 81 | exit -1 82 | fi 83 | -------------------------------------------------------------------------------- /rpc/fifo.h: -------------------------------------------------------------------------------- 1 | #ifndef fifo_h 2 | #define fifo_h 3 | 4 | // fifo template 5 | // blocks enq() and deq() when queue is FULL or EMPTY 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "slock.h" 13 | #include "lang/verify.h" 14 | 15 | template 16 | class fifo { 17 | public: 18 | fifo(int m=0); 19 | ~fifo(); 20 | bool enq(T, bool blocking=true); 21 | void deq(T *); 22 | bool size(); 23 | 24 | private: 25 | std::list q_; 26 | pthread_mutex_t m_; 27 | pthread_cond_t non_empty_c_; // q went non-empty 28 | pthread_cond_t has_space_c_; // q is not longer overfull 29 | unsigned int max_; //maximum capacity of the queue, block enq threads if exceeds this limit 30 | }; 31 | 32 | template 33 | fifo::fifo(int limit) : max_(limit) 34 | { 35 | VERIFY(pthread_mutex_init(&m_, 0) == 0); 36 | VERIFY(pthread_cond_init(&non_empty_c_, 0) == 0); 37 | VERIFY(pthread_cond_init(&has_space_c_, 0) == 0); 38 | } 39 | 40 | template 41 | fifo::~fifo() 42 | { 43 | //fifo is to be deleted only when no threads are using it! 44 | VERIFY(pthread_mutex_destroy(&m_)==0); 45 | VERIFY(pthread_cond_destroy(&non_empty_c_) == 0); 46 | VERIFY(pthread_cond_destroy(&has_space_c_) == 0); 47 | } 48 | 49 | template bool 50 | fifo::size() 51 | { 52 | ScopedLock ml(&m_); 53 | return q_.size(); 54 | } 55 | 56 | template bool 57 | fifo::enq(T e, bool blocking) 58 | { 59 | ScopedLock ml(&m_); 60 | while (1) { 61 | if (!max_ || q_.size() < max_) { 62 | q_.push_back(e); 63 | break; 64 | } 65 | if (blocking) 66 | VERIFY(pthread_cond_wait(&has_space_c_, &m_) == 0); 67 | else 68 | return false; 69 | } 70 | VERIFY(pthread_cond_signal(&non_empty_c_) == 0); 71 | return true; 72 | } 73 | 74 | template void 75 | fifo::deq(T *e) 76 | { 77 | ScopedLock ml(&m_); 78 | 79 | while(1) { 80 | if(q_.empty()){ 81 | VERIFY (pthread_cond_wait(&non_empty_c_, &m_) == 0); 82 | } else { 83 | *e = q_.front(); 84 | q_.pop_front(); 85 | if (max_ && q_.size() < max_) { 86 | VERIFY(pthread_cond_signal(&has_space_c_)==0); 87 | } 88 | break; 89 | } 90 | } 91 | return; 92 | } 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /rsm_protocol.h: -------------------------------------------------------------------------------- 1 | #ifndef rsm_protocol_h 2 | #define rsm_protocol_h 3 | 4 | #include "rpc.h" 5 | 6 | 7 | class rsm_client_protocol { 8 | public: 9 | enum xxstatus { OK, ERR, NOTPRIMARY, BUSY}; 10 | typedef int status; 11 | enum rpc_numbers { 12 | invoke = 0x9001, 13 | members, 14 | }; 15 | }; 16 | 17 | 18 | struct viewstamp { 19 | viewstamp (unsigned int _vid = 0, unsigned int _seqno = 0) { 20 | vid = _vid; 21 | seqno = _seqno; 22 | }; 23 | unsigned int vid; 24 | unsigned int seqno; 25 | }; 26 | 27 | class rsm_protocol { 28 | public: 29 | enum xxstatus { OK, ERR, BUSY}; 30 | typedef int status; 31 | enum rpc_numbers { 32 | invoke = 0x10001, 33 | transferreq, 34 | transferdonereq, 35 | joinreq, 36 | }; 37 | 38 | struct transferres { 39 | std::string state; 40 | viewstamp last; 41 | }; 42 | 43 | struct joinres { 44 | std::string log; 45 | }; 46 | }; 47 | 48 | inline bool operator==(viewstamp a, viewstamp b) { 49 | return a.vid == b.vid && a.seqno == b.seqno; 50 | } 51 | 52 | inline bool operator>(viewstamp a, viewstamp b) { 53 | return (a.vid > b.vid) || ((a.vid == b.vid) && a.seqno > b.seqno); 54 | } 55 | 56 | inline bool operator!=(viewstamp a, viewstamp b) { 57 | return a.vid != b.vid || a.seqno != b.seqno; 58 | } 59 | 60 | inline marshall& operator<<(marshall &m, viewstamp v) 61 | { 62 | m << v.vid; 63 | m << v.seqno; 64 | return m; 65 | } 66 | 67 | inline unmarshall& operator>>(unmarshall &u, viewstamp &v) { 68 | u >> v.vid; 69 | u >> v.seqno; 70 | return u; 71 | } 72 | 73 | inline marshall & 74 | operator<<(marshall &m, rsm_protocol::transferres r) 75 | { 76 | m << r.state; 77 | m << r.last; 78 | return m; 79 | } 80 | 81 | inline unmarshall & 82 | operator>>(unmarshall &u, rsm_protocol::transferres &r) 83 | { 84 | u >> r.state; 85 | u >> r.last; 86 | return u; 87 | } 88 | 89 | inline marshall & 90 | operator<<(marshall &m, rsm_protocol::joinres r) 91 | { 92 | m << r.log; 93 | return m; 94 | } 95 | 96 | inline unmarshall & 97 | operator>>(unmarshall &u, rsm_protocol::joinres &r) 98 | { 99 | u >> r.log; 100 | return u; 101 | } 102 | 103 | class rsm_test_protocol { 104 | public: 105 | enum xxstatus { OK, ERR}; 106 | typedef int status; 107 | enum rpc_numbers { 108 | net_repair = 0x12001, 109 | breakpoint = 0x12002, 110 | }; 111 | }; 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /rpc/connection.h: -------------------------------------------------------------------------------- 1 | #ifndef connection_h 2 | #define connection_h 1 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include "pollmgr.h" 13 | 14 | class connection; 15 | 16 | class chanmgr { 17 | public: 18 | virtual bool got_pdu(connection *c, char *b, int sz) = 0; 19 | virtual ~chanmgr() {} 20 | }; 21 | 22 | class connection : public aio_callback { 23 | public: 24 | struct charbuf { 25 | charbuf(): buf(NULL), sz(0), solong(0) {} 26 | charbuf (char *b, int s) : buf(b), sz(s), solong(0){} 27 | char *buf; 28 | int sz; 29 | int solong; //amount of bytes written or read so far 30 | }; 31 | 32 | connection(chanmgr *m1, int f1, int lossytest=0); 33 | ~connection(); 34 | 35 | int channo() { return fd_; } 36 | bool isdead(); 37 | void closeconn(); 38 | 39 | bool send(char *b, int sz); 40 | void write_cb(int s); 41 | void read_cb(int s); 42 | 43 | void incref(); 44 | void decref(); 45 | int ref(); 46 | 47 | int compare(connection *another); 48 | private: 49 | 50 | bool readpdu(); 51 | bool writepdu(); 52 | 53 | chanmgr *mgr_; 54 | const int fd_; 55 | bool dead_; 56 | 57 | charbuf wpdu_; 58 | charbuf rpdu_; 59 | 60 | struct timeval create_time_; 61 | 62 | int waiters_; 63 | int refno_; 64 | const int lossy_; 65 | 66 | pthread_mutex_t m_; 67 | pthread_mutex_t ref_m_; 68 | pthread_cond_t send_complete_; 69 | pthread_cond_t send_wait_; 70 | }; 71 | 72 | class tcpsconn { 73 | public: 74 | tcpsconn(chanmgr *m1, int port, int lossytest=0); 75 | ~tcpsconn(); 76 | inline int port() { return port_; } 77 | void accept_conn(); 78 | private: 79 | int port_; 80 | pthread_mutex_t m_; 81 | pthread_t th_; 82 | int pipe_[2]; 83 | 84 | int tcp_; //file desciptor for accepting connection 85 | chanmgr *mgr_; 86 | int lossy_; 87 | std::map conns_; 88 | 89 | void process_accept(); 90 | }; 91 | 92 | struct bundle { 93 | bundle(chanmgr *m, int s, int l):mgr(m),tcp(s),lossy(l) {} 94 | chanmgr *mgr; 95 | int tcp; 96 | int lossy; 97 | }; 98 | 99 | void start_accept_thread(chanmgr *mgr, int port, pthread_t *th, int *fd = NULL, int lossy=0); 100 | connection *connect_to_dst(const sockaddr_in &dst, chanmgr *mgr, int lossy=0); 101 | #endif 102 | -------------------------------------------------------------------------------- /paxos_protocol.h: -------------------------------------------------------------------------------- 1 | #ifndef paxos_protocol_h 2 | #define paxos_protocol_h 3 | 4 | #include "rpc.h" 5 | 6 | struct prop_t { 7 | unsigned n; 8 | std::string m; 9 | }; 10 | 11 | class paxos_protocol { 12 | public: 13 | enum xxstatus { OK, ERR }; 14 | typedef int status; 15 | enum rpc_numbers { 16 | preparereq = 0x11001, 17 | acceptreq, 18 | decidereq, 19 | heartbeat, 20 | }; 21 | 22 | struct preparearg { 23 | unsigned instance; 24 | prop_t n; 25 | }; 26 | 27 | struct prepareres { 28 | bool oldinstance; 29 | bool accept; 30 | prop_t n_a; 31 | std::string v_a; 32 | }; 33 | 34 | struct acceptarg { 35 | unsigned instance; 36 | prop_t n; 37 | std::string v; 38 | }; 39 | 40 | struct decidearg { 41 | unsigned instance; 42 | std::string v; 43 | }; 44 | 45 | }; 46 | 47 | inline unmarshall & 48 | operator>>(unmarshall &u, prop_t &a) 49 | { 50 | u >> a.n; 51 | u >> a.m; 52 | return u; 53 | } 54 | 55 | inline marshall & 56 | operator<<(marshall &m, prop_t a) 57 | { 58 | m << a.n; 59 | m << a.m; 60 | return m; 61 | } 62 | 63 | inline unmarshall & 64 | operator>>(unmarshall &u, paxos_protocol::preparearg &a) 65 | { 66 | u >> a.instance; 67 | u >> a.n; 68 | return u; 69 | } 70 | 71 | inline marshall & 72 | operator<<(marshall &m, paxos_protocol::preparearg a) 73 | { 74 | m << a.instance; 75 | m << a.n; 76 | return m; 77 | } 78 | 79 | inline unmarshall & 80 | operator>>(unmarshall &u, paxos_protocol::prepareres &r) 81 | { 82 | u >> r.oldinstance; 83 | u >> r.accept; 84 | u >> r.n_a; 85 | u >> r.v_a; 86 | return u; 87 | } 88 | 89 | inline marshall & 90 | operator<<(marshall &m, paxos_protocol::prepareres r) 91 | { 92 | m << r.oldinstance; 93 | m << r.accept; 94 | m << r.n_a; 95 | m << r.v_a; 96 | return m; 97 | } 98 | 99 | inline unmarshall & 100 | operator>>(unmarshall &u, paxos_protocol::acceptarg &a) 101 | { 102 | u >> a.instance; 103 | u >> a.n; 104 | u >> a.v; 105 | return u; 106 | } 107 | 108 | inline marshall & 109 | operator<<(marshall &m, paxos_protocol::acceptarg a) 110 | { 111 | m << a.instance; 112 | m << a.n; 113 | m << a.v; 114 | return m; 115 | } 116 | 117 | inline unmarshall & 118 | operator>>(unmarshall &u, paxos_protocol::decidearg &a) 119 | { 120 | u >> a.instance; 121 | u >> a.v; 122 | return u; 123 | } 124 | 125 | inline marshall & 126 | operator<<(marshall &m, paxos_protocol::decidearg a) 127 | { 128 | m << a.instance; 129 | m << a.v; 130 | return m; 131 | } 132 | 133 | #endif 134 | -------------------------------------------------------------------------------- /rpc/pollmgr.h: -------------------------------------------------------------------------------- 1 | #ifndef pollmgr_h 2 | #define pollmgr_h 3 | 4 | #include 5 | #include 6 | 7 | #ifdef __linux__ 8 | #include 9 | #endif 10 | 11 | #define MAX_POLL_FDS 128 12 | 13 | typedef enum { 14 | CB_NONE = 0x0, 15 | CB_RDONLY = 0x1, 16 | CB_WRONLY = 0x10, 17 | CB_RDWR = 0x11, 18 | CB_MASK = ~0x11, 19 | } poll_flag; 20 | 21 | class aio_mgr { 22 | public: 23 | virtual void watch_fd(int fd, poll_flag flag) = 0; 24 | virtual bool unwatch_fd(int fd, poll_flag flag) = 0; 25 | virtual bool is_watched(int fd, poll_flag flag) = 0; 26 | virtual void wait_ready(std::vector *readable, std::vector *writable) = 0; 27 | virtual ~aio_mgr() {} 28 | }; 29 | 30 | class aio_callback { 31 | public: 32 | virtual void read_cb(int fd) = 0; 33 | virtual void write_cb(int fd) = 0; 34 | virtual ~aio_callback() {} 35 | }; 36 | 37 | class PollMgr { 38 | public: 39 | PollMgr(); 40 | ~PollMgr(); 41 | 42 | static PollMgr *Instance(); 43 | static PollMgr *CreateInst(); 44 | 45 | void add_callback(int fd, poll_flag flag, aio_callback *ch); 46 | void del_callback(int fd, poll_flag flag); 47 | bool has_callback(int fd, poll_flag flag, aio_callback *ch); 48 | void block_remove_fd(int fd); 49 | void wait_loop(); 50 | 51 | 52 | static PollMgr *instance; 53 | static int useful; 54 | static int useless; 55 | 56 | private: 57 | pthread_mutex_t m_; 58 | pthread_cond_t changedone_c_; 59 | pthread_t th_; 60 | 61 | aio_callback *callbacks_[MAX_POLL_FDS]; 62 | aio_mgr *aio_; 63 | bool pending_change_; 64 | 65 | }; 66 | 67 | class SelectAIO : public aio_mgr { 68 | public : 69 | 70 | SelectAIO(); 71 | ~SelectAIO(); 72 | void watch_fd(int fd, poll_flag flag); 73 | bool unwatch_fd(int fd, poll_flag flag); 74 | bool is_watched(int fd, poll_flag flag); 75 | void wait_ready(std::vector *readable, std::vector *writable); 76 | 77 | private: 78 | 79 | fd_set rfds_; 80 | fd_set wfds_; 81 | int highfds_; 82 | int pipefd_[2]; 83 | 84 | pthread_mutex_t m_; 85 | 86 | }; 87 | 88 | #ifdef __linux__ 89 | class EPollAIO : public aio_mgr { 90 | public: 91 | EPollAIO(); 92 | ~EPollAIO(); 93 | void watch_fd(int fd, poll_flag flag); 94 | bool unwatch_fd(int fd, poll_flag flag); 95 | bool is_watched(int fd, poll_flag flag); 96 | void wait_ready(std::vector *readable, std::vector *writable); 97 | 98 | private: 99 | int pollfd_; 100 | struct epoll_event ready_[MAX_POLL_FDS]; 101 | int fdstatus_[MAX_POLL_FDS]; 102 | 103 | }; 104 | #endif /* __linux */ 105 | 106 | #endif /* pollmgr_h */ 107 | 108 | -------------------------------------------------------------------------------- /rsm.h: -------------------------------------------------------------------------------- 1 | // replicated state machine interface. 2 | 3 | #ifndef rsm_h 4 | #define rsm_h 5 | 6 | #include 7 | #include 8 | #include "rsm_protocol.h" 9 | #include "rsm_state_transfer.h" 10 | #include "rpc.h" 11 | #include 12 | #include "config.h" 13 | 14 | 15 | class rsm : public config_view_change { 16 | protected: 17 | std::map procs; 18 | config *cfg; 19 | class rsm_state_transfer *stf; 20 | rpcs *rsmrpc; 21 | // On slave: expected viewstamp of next invoke request 22 | // On primary: viewstamp for the next request from rsm_client 23 | viewstamp myvs; 24 | viewstamp last_myvs; // Viewstamp of the last executed request 25 | std::string primary; 26 | bool insync; 27 | bool inviewchange; 28 | unsigned vid_commit; // Latest view id that is known to rsm layer 29 | unsigned vid_insync; // The view id that this node is synchronizing for 30 | std::vector backups; // A list of unsynchronized backups 31 | 32 | // For testing purposes 33 | rpcs *testsvr; 34 | bool partitioned; 35 | bool dopartition; 36 | bool break1; 37 | bool break2; 38 | 39 | 40 | rsm_client_protocol::status client_members(int i, 41 | std::vector &r); 42 | rsm_protocol::status invoke(int proc, viewstamp vs, std::string mreq, 43 | int &dummy); 44 | rsm_protocol::status transferreq(std::string src, viewstamp last, unsigned vid, 45 | rsm_protocol::transferres &r); 46 | rsm_protocol::status transferdonereq(std::string m, unsigned vid, int &); 47 | rsm_protocol::status joinreq(std::string src, viewstamp last, 48 | rsm_protocol::joinres &r); 49 | rsm_test_protocol::status test_net_repairreq(int heal, int &r); 50 | rsm_test_protocol::status breakpointreq(int b, int &r); 51 | 52 | pthread_mutex_t rsm_mutex; 53 | pthread_mutex_t invoke_mutex; 54 | pthread_cond_t recovery_cond; 55 | pthread_cond_t sync_cond; 56 | 57 | rsm_client_protocol::status client_invoke(int procno, std::string req, 58 | std::string &r); 59 | bool statetransfer(std::string m); 60 | bool statetransferdone(std::string m); 61 | bool join(std::string m); 62 | void set_primary(unsigned vid); 63 | std::string find_highest(viewstamp &vs, std::string &m, unsigned &vid); 64 | bool sync_with_backups(); 65 | bool sync_with_primary(); 66 | void net_repair_wo(bool heal); 67 | void breakpoint1(); 68 | void breakpoint2(); 69 | void partition1(); 70 | void commit_change_wo(unsigned vid); 71 | public: 72 | rsm (std::string _first, std::string _me); 73 | ~rsm() {}; 74 | 75 | bool amiprimary(); 76 | void set_state_transfer(rsm_state_transfer *_stf) { stf = _stf; }; 77 | void recovery(); 78 | void commit_change(unsigned vid); 79 | 80 | }; 81 | 82 | #endif /* rsm_h */ 83 | -------------------------------------------------------------------------------- /handle.cc: -------------------------------------------------------------------------------- 1 | #include "handle.h" 2 | #include 3 | #include "tprintf.h" 4 | 5 | handle_mgr mgr; 6 | 7 | handle::handle(std::string m) 8 | { 9 | h = mgr.get_handle(m); 10 | } 11 | 12 | rpcc * 13 | handle::safebind() 14 | { 15 | if (!h) 16 | return NULL; 17 | ScopedLock ml(&h->cl_mutex); 18 | if (h->del) 19 | return NULL; 20 | if (h->cl) 21 | return h->cl; 22 | sockaddr_in dstsock; 23 | make_sockaddr(h->m.c_str(), &dstsock); 24 | rpcc *cl = new rpcc(dstsock); 25 | tprintf("handler_mgr::get_handle trying to bind...%s\n", h->m.c_str()); 26 | int ret; 27 | // Starting with lab 6, our test script assumes that the failure 28 | // can be detected by paxos and rsm layer within few seconds. We have 29 | // to set the timeout with a small value to support the assumption. 30 | // 31 | // Note: with RPC_LOSSY=5, your lab would failed to pass the tests of 32 | // lab 6 and lab 7 because the rpc layer may delay your RPC request, 33 | // and cause a time out failure. Please make sure RPC_LOSSY is set to 0. 34 | ret = cl->bind(rpcc::to(1000)); 35 | if (ret < 0) { 36 | tprintf("handle_mgr::get_handle bind failure! %s %d\n", h->m.c_str(), ret); 37 | delete cl; 38 | h->del = true; 39 | } else { 40 | tprintf("handle_mgr::get_handle bind succeeded %s\n", h->m.c_str()); 41 | h->cl = cl; 42 | } 43 | return h->cl; 44 | } 45 | 46 | handle::~handle() 47 | { 48 | if (h) mgr.done_handle(h); 49 | } 50 | 51 | handle_mgr::handle_mgr() 52 | { 53 | VERIFY (pthread_mutex_init(&handle_mutex, NULL) == 0); 54 | } 55 | 56 | struct hinfo * 57 | handle_mgr::get_handle(std::string m) 58 | { 59 | ScopedLock ml(&handle_mutex); 60 | struct hinfo *h = 0; 61 | if (hmap.find(m) == hmap.end()) { 62 | h = new hinfo; 63 | h->cl = NULL; 64 | h->del = false; 65 | h->refcnt = 1; 66 | h->m = m; 67 | pthread_mutex_init(&h->cl_mutex, NULL); 68 | hmap[m] = h; 69 | } else if (!hmap[m]->del) { 70 | h = hmap[m]; 71 | h->refcnt ++; 72 | } 73 | return h; 74 | } 75 | 76 | void 77 | handle_mgr::done_handle(struct hinfo *h) 78 | { 79 | ScopedLock ml(&handle_mutex); 80 | h->refcnt--; 81 | if (h->refcnt == 0 && h->del) 82 | delete_handle_wo(h->m); 83 | } 84 | 85 | void 86 | handle_mgr::delete_handle(std::string m) 87 | { 88 | ScopedLock ml(&handle_mutex); 89 | delete_handle_wo(m); 90 | } 91 | 92 | // Must be called with handle_mutex locked. 93 | void 94 | handle_mgr::delete_handle_wo(std::string m) 95 | { 96 | if (hmap.find(m) == hmap.end()) { 97 | tprintf("handle_mgr::delete_handle_wo: cl %s isn't in cl list\n", m.c_str()); 98 | } else { 99 | tprintf("handle_mgr::delete_handle_wo: cl %s refcnt %d\n", m.c_str(), 100 | hmap[m]->refcnt); 101 | struct hinfo *h = hmap[m]; 102 | if (h->refcnt == 0) { 103 | if (h->cl) { 104 | h->cl->cancel(); 105 | delete h->cl; 106 | } 107 | pthread_mutex_destroy(&h->cl_mutex); 108 | hmap.erase(m); 109 | delete h; 110 | } else { 111 | h->del = true; 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /paxos.h: -------------------------------------------------------------------------------- 1 | #ifndef paxos_h 2 | #define paxos_h 3 | 4 | #include 5 | #include 6 | #include "rpc.h" 7 | #include "paxos_protocol.h" 8 | #include "log.h" 9 | 10 | 11 | class paxos_change { 12 | public: 13 | virtual void paxos_commit(unsigned instance, std::string v) = 0; 14 | virtual ~paxos_change() {}; 15 | }; 16 | 17 | class acceptor { 18 | private: 19 | log *l; 20 | rpcs *pxs; 21 | paxos_change *cfg; 22 | std::string me; 23 | pthread_mutex_t pxs_mutex; 24 | 25 | // Acceptor state 26 | prop_t n_h; // number of the highest proposal seen in a prepare 27 | prop_t n_a; // number of highest proposal accepted 28 | std::string v_a; // value of highest proposal accepted 29 | unsigned instance_h; // number of the highest instance we have decided 30 | std::map values; // vals of each instance 31 | 32 | void commit_wo(unsigned instance, std::string v); 33 | paxos_protocol::status preparereq(std::string src, 34 | paxos_protocol::preparearg a, 35 | paxos_protocol::prepareres &r); 36 | paxos_protocol::status acceptreq(std::string src, 37 | paxos_protocol::acceptarg a, bool &r); 38 | paxos_protocol::status decidereq(std::string src, 39 | paxos_protocol::decidearg a, int &r); 40 | 41 | friend class log; 42 | 43 | public: 44 | acceptor(class paxos_change *cfg, bool _first, std::string _me, 45 | std::string _value); 46 | ~acceptor() {}; 47 | void commit(unsigned instance, std::string v); 48 | unsigned instance() { return instance_h; } 49 | std::string value(unsigned instance) { return values[instance]; } 50 | std::string dump(); 51 | void restore(std::string); 52 | rpcs *get_rpcs() { return pxs; }; 53 | prop_t get_n_h() { return n_h; }; 54 | unsigned get_instance_h() { return instance_h; }; 55 | }; 56 | 57 | extern bool isamember(std::string m, const std::vector &nodes); 58 | extern std::string print_members(const std::vector &nodes); 59 | 60 | class proposer { 61 | private: 62 | log *l; 63 | paxos_change *cfg; 64 | acceptor *acc; 65 | std::string me; 66 | bool break1; 67 | bool break2; 68 | 69 | pthread_mutex_t pxs_mutex; 70 | 71 | // Proposer state 72 | bool stable; 73 | prop_t my_n; // number of the last proposal used in this instance 74 | 75 | void setn(); 76 | bool prepare(unsigned instance, std::vector &accepts, 77 | std::vector nodes, 78 | std::string &v); 79 | void accept(unsigned instance, std::vector &accepts, 80 | std::vector nodes, std::string v); 81 | void decide(unsigned instance, std::vector accepts, 82 | std::string v); 83 | 84 | void breakpoint1(); 85 | void breakpoint2(); 86 | bool majority(const std::vector &l1, const std::vector &l2); 87 | 88 | friend class log; 89 | public: 90 | proposer(class paxos_change *cfg, class acceptor *_acceptor, std::string _me); 91 | ~proposer() {}; 92 | bool run(int instance, std::vector cnodes, std::string v); 93 | bool isrunning(); 94 | void breakpoint(int b); 95 | }; 96 | 97 | 98 | 99 | #endif /* paxos_h */ 100 | -------------------------------------------------------------------------------- /log.cc: -------------------------------------------------------------------------------- 1 | #include "paxos.h" 2 | #include 3 | #include 4 | 5 | // Paxos must maintain some durable state (i.e., that survives power 6 | // failures) to run Paxos correct. This module implements a log with 7 | // all durable state to run Paxos. Since the values chosen correspond 8 | // to views, the log contains all views since the beginning of time. 9 | 10 | log::log(acceptor *_acc, std::string _me) 11 | : pxs (_acc) 12 | { 13 | name = "paxos-" + _me + ".log"; 14 | logread(); 15 | } 16 | 17 | void 18 | log::logread(void) 19 | { 20 | std::ifstream from; 21 | std::string type; 22 | unsigned instance; 23 | 24 | from.open(name.c_str()); 25 | printf ("logread\n"); 26 | while (from >> type) { 27 | if (type == "done") { 28 | std::string v; 29 | from >> instance; 30 | from.get(); 31 | getline(from, v); 32 | pxs->values[instance] = v; 33 | pxs->instance_h = instance; 34 | printf ("logread: instance: %d w. v = %s\n", instance, 35 | pxs->values[instance].c_str()); 36 | pxs->v_a.clear(); 37 | pxs->n_h.n = 0; 38 | pxs->n_a.n = 0; 39 | } else if (type == "propseen") { 40 | from >> pxs->n_h.n; 41 | from >> pxs->n_h.m; 42 | printf("logread: high update: %d(%s)\n", pxs->n_h.n, pxs->n_h.m.c_str()); 43 | } else if (type == "accepted") { 44 | std::string v; 45 | from >> pxs->n_a.n; 46 | from >> pxs->n_a.m; 47 | from.get(); 48 | getline(from, v); 49 | pxs->v_a = v; 50 | printf("logread: prop update %d(%s) with v = %s\n", pxs->n_a.n, 51 | pxs->n_a.m.c_str(), pxs->v_a.c_str()); 52 | } else { 53 | printf("logread: unknown log record\n"); 54 | VERIFY(0); 55 | } 56 | } 57 | from.close(); 58 | } 59 | 60 | std::string 61 | log::dump() 62 | { 63 | std::ifstream from; 64 | std::string res; 65 | std::string v; 66 | from.open(name.c_str()); 67 | while (getline(from, v)) { 68 | res = res + v + "\n"; 69 | } 70 | from.close(); 71 | return res; 72 | } 73 | 74 | void 75 | log::restore(std::string s) 76 | { 77 | std::ofstream f; 78 | printf("restore: %s\n", s.c_str()); 79 | f.open(name.c_str(), std::ios::trunc); 80 | f << s; 81 | f.close(); 82 | } 83 | 84 | // XXX should be an atomic operation 85 | void 86 | log::loginstance(unsigned instance, std::string v) 87 | { 88 | std::ofstream f; 89 | f.open(name.c_str(), std::ios::app); 90 | f << "done"; 91 | f << " "; 92 | f << instance; 93 | f << " "; 94 | f << v; 95 | f << "\n"; 96 | f.close(); 97 | } 98 | 99 | // an acceptor should call logprop(n_h) when it 100 | // receives a prepare to which it responds prepare_ok(). 101 | void 102 | log::logprop(prop_t n_h) 103 | { 104 | std::ofstream f; 105 | f.open(name.c_str(), std::ios::app); 106 | f << "propseen"; 107 | f << " "; 108 | f << n_h.n; 109 | f << " "; 110 | f << n_h.m; 111 | f << "\n"; 112 | f.close(); 113 | } 114 | 115 | // an acceptor should call logaccept(n_a, v_a) when it 116 | // receives an accept RPC to which it replies accept_ok(). 117 | void 118 | log::logaccept(prop_t n, std::string v) 119 | { 120 | std::ofstream f; 121 | f.open(name.c_str(), std::ios::app); 122 | f << "accepted"; 123 | f << " "; 124 | f << n.n; 125 | f << " "; 126 | f << n.m; 127 | f << " "; 128 | f << v; 129 | f << "\n"; 130 | f.close(); 131 | } 132 | 133 | -------------------------------------------------------------------------------- /test-lab-2-a.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # test CREATE/MKNOD, LOOKUP, READDIR. 4 | 5 | use strict; 6 | 7 | if($#ARGV != 0){ 8 | print STDERR "Usage: test-lab-2-a.pl directory\n"; 9 | exit(1); 10 | } 11 | my $dir = $ARGV[0]; 12 | 13 | my $seq = 0; 14 | 15 | my $files = { }; 16 | my @dead; 17 | 18 | for(my $iters = 0; $iters < 200; $iters++){ 19 | createone(); 20 | } 21 | 22 | for(my $iters = 0; $iters < 100; $iters++){ 23 | if(rand() < 0.1){ 24 | livecheck(); 25 | } 26 | if(rand() < 0.1){ 27 | deadcheck(); 28 | } 29 | if(rand() < 0.02){ 30 | dircheck(); 31 | } 32 | if(rand() < 0.5){ 33 | createone(); 34 | } 35 | } 36 | 37 | dircheck(); 38 | printf "Passed all tests!\n"; 39 | exit(0); 40 | 41 | sub createone { 42 | my $name = "file-"; 43 | for(my $i = 0; $i < 40; $i++){ 44 | $name .= sprintf("%c", ord('a') + int(rand(26))); 45 | } 46 | $name .= "-$$-" . $seq; 47 | $seq = $seq + 1; 48 | my $contents = rand(); 49 | print "create $name\n"; 50 | if(!open(F, ">$dir/$name")){ 51 | print STDERR "test-lab-2-a: cannot create $dir/$name : $!\n"; 52 | exit(1); 53 | } 54 | close(F); 55 | $files->{$name} = $contents; 56 | } 57 | 58 | sub createagain { 59 | my @a = keys(%$files); 60 | return if $#a < 0; 61 | my $i = int(rand($#a + 1)); 62 | my $k = $a[$i]; 63 | print "re-create $k\n"; 64 | if(!open(F, ">$dir/$k")){ 65 | print STDERR "test-lab-2-a: cannot re-create $dir/$k : $!\n"; 66 | exit(1); 67 | } 68 | close(F); 69 | } 70 | 71 | # make sure all the live files are there, 72 | # and that all the dead files are not there. 73 | sub dircheck { 74 | print "dircheck\n"; 75 | opendir(D, $dir); 76 | my %h; 77 | my $f; 78 | while(defined($f = readdir(D))){ 79 | if(!defined($h{$f})){ 80 | $h{$f} = 0; 81 | } 82 | $h{$f} = $h{$f} + 1; 83 | } 84 | closedir(D); 85 | 86 | foreach $f (keys(%$files)){ 87 | if(!defined($h{$f})){ 88 | print STDERR "test-lab-2-a.pl: $f is not in the directory listing\n"; 89 | exit(1); 90 | } 91 | if($h{$f} > 1){ 92 | print STDERR "test-lab-2-a.pl: $f appears more than once in the directory\n"; 93 | exit(1); 94 | } 95 | } 96 | 97 | foreach $f (@dead){ 98 | if(defined($h{$f})){ 99 | print STDERR "test-lab-2-a.pl: $f is dead but in directory listing\n"; 100 | exit(1); 101 | } 102 | } 103 | } 104 | 105 | sub livecheck { 106 | my @a = keys(%$files); 107 | return if $#a < 0; 108 | my $i = int(rand($#a + 1)); 109 | my $k = $a[$i]; 110 | print "livecheck $k\n"; 111 | if(!open(F, "$dir/$k")){ 112 | print STDERR "test-lab-2-a: cannot open $dir/$k : $!\n"; 113 | exit(1); 114 | } 115 | close(F); 116 | if( ! -f "$dir/$k" ){ 117 | print STDERR "test-lab-2-a: $dir/$k is not of type file\n"; 118 | exit(1); 119 | } 120 | if(open(F, ">$dir/$k/xx")){ 121 | print STDERR "test-lab-2-a: $dir/$k acts like a directory, not a file\n"; 122 | exit(1); 123 | } 124 | } 125 | 126 | sub deadcheck { 127 | my $name = "file-$$-" . $seq; 128 | $seq = $seq + 1; 129 | print "check-not-there $name\n"; 130 | if(open(F, "$dir/$name")){ 131 | print STDERR "test-lab-2-a: $dir/$name exists but should not\n"; 132 | exit(1); 133 | } 134 | } 135 | 136 | sub deleteone { 137 | my @a = keys(%$files); 138 | return 0 if $#a < 0; 139 | my $i = int(rand($#a + 1)); 140 | my $k = $a[$i]; 141 | print "delete $k\n"; 142 | if(unlink($dir . "/" . $k) == 0){ 143 | print STDERR "test-lab-2-a: unlink $k failed: $!\n"; 144 | exit(1); 145 | } 146 | delete $files->{$k}; 147 | push(@dead, $k); 148 | return 1; 149 | } 150 | -------------------------------------------------------------------------------- /extent_client.cc: -------------------------------------------------------------------------------- 1 | // RPC stubs for clients to talk to extent_server 2 | 3 | #include "extent_client.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // The calls assume that the caller holds a lock on the extent 11 | 12 | extent_client::extent_client(std::string dst) 13 | { 14 | sockaddr_in dstsock; 15 | make_sockaddr(dst.c_str(), &dstsock); 16 | cl = new rpcc(dstsock); 17 | if (cl->bind() != 0) { 18 | printf("extent_client: bind failed\n"); 19 | } 20 | } 21 | 22 | extent_protocol::status 23 | extent_client::get(extent_protocol::extentid_t eid, std::string &buf) 24 | { 25 | extent_protocol::status ret = extent_protocol::OK; 26 | std::unique_lock ulock(cache_mtx); 27 | if (cache_map.find(eid) == cache_map.end() || (!cache_map[eid].removed && cache_map[eid].attr_only)) 28 | { 29 | ulock.unlock(); 30 | ret = cl->call(extent_protocol::get, eid, buf); 31 | extent_protocol::attr attr; 32 | getattr(eid, attr); 33 | ulock.lock(); 34 | cache_map[eid] = { buf, attr, false, false, false }; 35 | } 36 | else 37 | { 38 | buf = cache_map[eid].data; 39 | } 40 | cache_map[eid].attr.atime = std::time(nullptr); 41 | return ret; 42 | } 43 | 44 | extent_protocol::status 45 | extent_client::getattr(extent_protocol::extentid_t eid, 46 | extent_protocol::attr &attr) 47 | { 48 | extent_protocol::status ret = extent_protocol::OK; 49 | std::unique_lock ulock(cache_mtx); 50 | if (cache_map.find(eid) == cache_map.end()) 51 | { 52 | ulock.unlock(); 53 | ret = cl->call(extent_protocol::getattr, eid, attr); 54 | ulock.lock(); 55 | cache_map[eid] = { "", attr, false, true, false }; 56 | } 57 | else 58 | { 59 | attr = cache_map[eid].attr; 60 | } 61 | return ret; 62 | } 63 | 64 | extent_protocol::status 65 | extent_client::put(extent_protocol::extentid_t eid, std::string buf) 66 | { 67 | extent_protocol::status ret = extent_protocol::OK; 68 | std::unique_lock ulock(cache_mtx); 69 | if (cache_map.find(eid) == cache_map.end() || cache_map[eid].removed || cache_map[eid].attr_only) 70 | { 71 | extent_protocol::attr attr; 72 | attr.size = buf.size(); 73 | attr.ctime = attr.mtime = attr.atime = std::time(nullptr); 74 | cache_map[eid] = { buf, attr, true, false, false }; 75 | } 76 | else 77 | { 78 | cache_map[eid].data = buf; 79 | cache_map[eid].attr.size = buf.size(); 80 | cache_map[eid].attr.mtime = cache_map[eid].attr.ctime = std::time(nullptr); 81 | cache_map[eid].dirty = true; 82 | cache_map[eid].attr_only = false; 83 | cache_map[eid].removed = false; 84 | } 85 | return ret; 86 | } 87 | 88 | extent_protocol::status 89 | extent_client::remove(extent_protocol::extentid_t eid) 90 | { 91 | extent_protocol::status ret = extent_protocol::OK; 92 | std::unique_lock ulock(cache_mtx); 93 | cache_map[eid].data = ""; 94 | cache_map[eid].removed = true; 95 | ulock.unlock(); 96 | 97 | //int r; 98 | //ret = cl->call(extent_protocol::remove, eid, r); 99 | return ret; 100 | } 101 | 102 | extent_protocol::status 103 | extent_client::flush(extent_protocol::extentid_t eid) 104 | { 105 | extent_protocol::status ret = extent_protocol::OK; 106 | std::unique_lock ulock(cache_mtx); 107 | if (cache_map.find(eid) != cache_map.end()) 108 | { 109 | if (!cache_map[eid].attr_only) 110 | { 111 | if (cache_map[eid].dirty) 112 | { 113 | ulock.unlock(); 114 | int r; 115 | ret = cl->call(extent_protocol::put, eid, cache_map[eid].data, r); 116 | ulock.lock(); 117 | } 118 | 119 | if (cache_map[eid].removed) 120 | { 121 | ulock.unlock(); 122 | int r; 123 | ret = cl->call(extent_protocol::remove, eid, r); 124 | ulock.lock(); 125 | } 126 | 127 | } 128 | cache_map.erase(eid); 129 | } 130 | return ret; 131 | } 132 | -------------------------------------------------------------------------------- /rpc/method_thread.h: -------------------------------------------------------------------------------- 1 | #ifndef method_thread_h 2 | #define method_thread_h 3 | 4 | // method_thread(): start a thread that runs an object method. 5 | // returns a pthread_t on success, and zero on error. 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "lang/verify.h" 12 | 13 | static pthread_t 14 | method_thread_parent(void *(*fn)(void *), void *arg, bool detach) 15 | { 16 | pthread_t th; 17 | pthread_attr_t attr; 18 | pthread_attr_init(&attr); 19 | // set stack size to 100K, so we don't run out of memory 20 | pthread_attr_setstacksize(&attr, 100*1024); 21 | int err = pthread_create(&th, &attr, fn, arg); 22 | pthread_attr_destroy(&attr); 23 | if (err != 0) { 24 | fprintf(stderr, "pthread_create ret %d %s\n", err, strerror(err)); 25 | exit(1); 26 | } 27 | 28 | if (detach) { 29 | // don't keep thread state around after exit, to avoid 30 | // running out of threads. set detach==false if you plan 31 | // to pthread_join. 32 | VERIFY(pthread_detach(th) == 0); 33 | } 34 | 35 | return th; 36 | } 37 | 38 | static void 39 | method_thread_child() 40 | { 41 | // defer pthread_cancel() by default. check explicitly by 42 | // enabling then pthread_testcancel(). 43 | int oldstate, oldtype; 44 | VERIFY(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate) == 0); 45 | VERIFY(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, &oldtype) == 0); 46 | } 47 | 48 | template pthread_t 49 | method_thread(C *o, bool detach, void (C::*m)()) 50 | { 51 | class XXX { 52 | public: 53 | C *o; 54 | void (C::*m)(); 55 | static void *yyy(void *vvv) { 56 | XXX *x = (XXX*)vvv; 57 | C *o = x->o; 58 | void (C::*m)() = x->m; 59 | delete x; 60 | method_thread_child(); 61 | (o->*m)(); 62 | return 0; 63 | } 64 | }; 65 | XXX *x = new XXX; 66 | x->o = o; 67 | x->m = m; 68 | return method_thread_parent(&XXX::yyy, (void *) x, detach); 69 | } 70 | 71 | template pthread_t 72 | method_thread(C *o, bool detach, void (C::*m)(A), A a) 73 | { 74 | class XXX { 75 | public: 76 | C *o; 77 | void (C::*m)(A a); 78 | A a; 79 | static void *yyy(void *vvv) { 80 | XXX *x = (XXX*)vvv; 81 | C *o = x->o; 82 | void (C::*m)(A ) = x->m; 83 | A a = x->a; 84 | delete x; 85 | method_thread_child(); 86 | (o->*m)(a); 87 | return 0; 88 | } 89 | }; 90 | XXX *x = new XXX; 91 | x->o = o; 92 | x->m = m; 93 | x->a = a; 94 | return method_thread_parent(&XXX::yyy, (void *) x, detach); 95 | } 96 | 97 | namespace { 98 | // ~xavid: this causes a bizzare compile error on OS X.5 when 99 | // it's declared in the function, so I moved it out here. 100 | template 101 | class XXX { 102 | public: 103 | C *o; 104 | void (C::*m)(A1 a1, A2 a2); 105 | A1 a1; 106 | A2 a2; 107 | static void *yyy(void *vvv) { 108 | XXX *x = (XXX*)vvv; 109 | C *o = x->o; 110 | void (C::*m)(A1 , A2 ) = x->m; 111 | A1 a1 = x->a1; 112 | A2 a2 = x->a2; 113 | delete x; 114 | method_thread_child(); 115 | (o->*m)(a1, a2); 116 | return 0; 117 | } 118 | }; 119 | } 120 | 121 | template pthread_t 122 | method_thread(C *o, bool detach, void (C::*m)(A1 , A2 ), A1 a1, A2 a2) 123 | { 124 | XXX *x = new XXX; 125 | x->o = o; 126 | x->m = m; 127 | x->a1 = a1; 128 | x->a2 = a2; 129 | return method_thread_parent(&XXX::yyy, (void *) x, detach); 130 | } 131 | 132 | template pthread_t 133 | method_thread(C *o, bool detach, void (C::*m)(A1 , A2, A3 ), A1 a1, A2 a2, A3 a3) 134 | { 135 | class XXX { 136 | public: 137 | C *o; 138 | void (C::*m)(A1 a1, A2 a2, A3 a3); 139 | A1 a1; 140 | A2 a2; 141 | A3 a3; 142 | static void *yyy(void *vvv) { 143 | XXX *x = (XXX*)vvv; 144 | C *o = x->o; 145 | void (C::*m)(A1 , A2 , A3 ) = x->m; 146 | A1 a1 = x->a1; 147 | A2 a2 = x->a2; 148 | A3 a3 = x->a3; 149 | delete x; 150 | method_thread_child(); 151 | (o->*m)(a1, a2, a3); 152 | return 0; 153 | } 154 | }; 155 | XXX *x = new XXX; 156 | x->o = o; 157 | x->m = m; 158 | x->a1 = a1; 159 | x->a2 = a2; 160 | x->a3 = a3; 161 | return method_thread_parent(&XXX::yyy, (void *) x, detach); 162 | } 163 | 164 | #endif 165 | -------------------------------------------------------------------------------- /yfs_client.h: -------------------------------------------------------------------------------- 1 | #ifndef yfs_client_h 2 | #define yfs_client_h 3 | 4 | #include 5 | //#include "yfs_protocol.h" 6 | #include "extent_client.h" 7 | #include 8 | 9 | #include "lock_protocol.h" 10 | #include "lock_client.h" 11 | #include 12 | #include 13 | #include "lock_client_cache.h" 14 | 15 | class lock_release_extent : public lock_release_user { 16 | private: 17 | extent_client *ec; 18 | public: 19 | lock_release_extent(extent_client *_ec) : ec(_ec) {} 20 | void dorelease(lock_protocol::lockid_t lid) 21 | { 22 | ec->flush(lid); 23 | } 24 | 25 | }; 26 | 27 | class yfs_client { 28 | extent_client *ec; 29 | lock_client *lc; 30 | lock_release_user *lu; 31 | public: 32 | 33 | typedef unsigned long long inum; 34 | enum xxstatus { OK, RPCERR, NOENT, IOERR, EXIST }; 35 | typedef int status; 36 | 37 | struct fileinfo { 38 | unsigned long long size; 39 | unsigned long atime; 40 | unsigned long mtime; 41 | unsigned long ctime; 42 | }; 43 | struct dirinfo { 44 | unsigned long atime; 45 | unsigned long mtime; 46 | unsigned long ctime; 47 | }; 48 | struct dirent { 49 | std::string name; 50 | yfs_client::inum inum; 51 | }; 52 | 53 | class dirent_list { 54 | public: 55 | dirent_list(std::string buf) 56 | { 57 | unsigned int cur = 0; 58 | while (cur < buf.size()) 59 | { 60 | int start = ++cur; 61 | while (cur < buf.size() && isdigit(buf[cur])) 62 | { 63 | ++cur; 64 | } 65 | unsigned int len_name = n2i(buf.substr(start, cur - start)); 66 | start = ++cur; 67 | while (cur < buf.size() && isdigit(buf[cur])) 68 | { 69 | ++cur; 70 | } 71 | unsigned int len_inum = n2i(buf.substr(start, cur - start)); 72 | start = ++cur; 73 | 74 | data[buf.substr(start, len_name)] = n2i(buf.substr(start + len_name, len_inum)); 75 | cur = start + len_name + len_inum; 76 | } 77 | } 78 | 79 | bool match(std::string name) 80 | { 81 | return data.find(name) != data.end(); 82 | } 83 | 84 | inum get(const char *name) 85 | { 86 | if (!match(name)) 87 | { 88 | return -1; 89 | } 90 | return data[name]; 91 | } 92 | 93 | void add(inum id, const char *name) 94 | { 95 | data[name] = id; 96 | } 97 | 98 | void remove(const char *name) 99 | { 100 | data.erase(name); 101 | } 102 | 103 | std::unordered_map get_map() 104 | { 105 | return data; 106 | } 107 | 108 | std::string to_string() 109 | { 110 | std::string ret = ""; 111 | for (const auto& kv : data) 112 | { 113 | std::string id = std::to_string(kv.second); 114 | // format: @name.size@id.size@nameid 115 | // eg. name="hehe" id=123 ret="@4@3@hehe123" 116 | ret += separator + std::to_string(kv.first.size()) 117 | + separator + std::to_string(id.size()) 118 | + separator + kv.first + id; 119 | } 120 | return ret; 121 | } 122 | private: 123 | std::unordered_map data; 124 | const std::string separator = "@"; 125 | }; 126 | 127 | private: 128 | static std::string filename(inum); 129 | static inum n2i(std::string); 130 | inum rand_inum(bool); 131 | std::mt19937 generator; 132 | std::uniform_int_distribution uid; 133 | public: 134 | 135 | yfs_client(std::string, std::string); 136 | 137 | bool isfile(inum); 138 | bool isdir(inum); 139 | 140 | int getfile(inum, fileinfo &); 141 | int getdir(inum, dirinfo &); 142 | 143 | yfs_client::status create(inum parent, const char *name, bool isdir, inum &ret_id); 144 | yfs_client::status lookup(inum parent, const char *name, inum &ret_id); 145 | yfs_client::status readdir(inum parent, std::unordered_map& ret_map); 146 | yfs_client::status setattr(inum ino, unsigned int len); 147 | yfs_client::status read(inum ino, std::size_t off, std::size_t len, std::string& data); 148 | yfs_client::status write(inum ino, std::size_t off, std::size_t len, const char *data); 149 | yfs_client::status unlink(inum parent, const char *name); 150 | }; 151 | 152 | class raii_wrapper 153 | { 154 | private: 155 | lock_client *lc; 156 | yfs_client::inum ino; 157 | public: 158 | raii_wrapper(lock_client *lc_, yfs_client::inum ino_) : lc(lc_), ino(ino_) 159 | { 160 | lc->acquire(ino); 161 | } 162 | ~raii_wrapper() 163 | { 164 | lc->release(ino); 165 | } 166 | }; 167 | #endif 168 | -------------------------------------------------------------------------------- /lock_server_cache.cc: -------------------------------------------------------------------------------- 1 | // the caching lock server implementation 2 | 3 | #include "lock_server_cache.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "lang/verify.h" 9 | #include "handle.h" 10 | #include "tprintf.h" 11 | 12 | 13 | lock_server_cache::lock_server_cache() 14 | { 15 | } 16 | 17 | 18 | int lock_server_cache::acquire(lock_protocol::lockid_t lid, std::string id, 19 | int &) 20 | { 21 | std::unique_lock map_lock(server_mtx); 22 | auto itr = lock_cache.find(lid); 23 | std::shared_ptr cur_lock; 24 | if (itr == lock_cache.end()) 25 | { 26 | cur_lock = std::make_shared(id); 27 | cur_lock->status = server_lock::LOCKED; 28 | cur_lock->client_id = id; 29 | lock_cache[lid] = cur_lock; 30 | return lock_protocol::OK; 31 | } 32 | else 33 | { 34 | cur_lock = itr->second; 35 | } 36 | std::unique_lock single_lock(cur_lock->mtx); 37 | map_lock.unlock(); 38 | if (cur_lock->status == server_lock::FREE) 39 | { 40 | cur_lock->status = server_lock::LOCKED; 41 | cur_lock->client_id = id; 42 | if (!cur_lock->waiting_cids.empty()) 43 | { 44 | handle cl_handle(id); 45 | rpcc *cl = cl_handle.safebind(); 46 | if (cl) 47 | { 48 | int r; 49 | cl->call(rlock_protocol::revoke, lid, r); 50 | } 51 | } 52 | return lock_protocol::OK; 53 | } 54 | else if (cur_lock->status == server_lock::LOCKED) 55 | { 56 | // If lock is server_lock::LOCKED, it means another client holds it AND 57 | // we are the first one to ask for it 58 | // so use revoke rpc to tell the lock owner to give back the lock 59 | handle cl_handle(cur_lock->client_id); 60 | rpcc *cl = cl_handle.safebind(); 61 | if (cl) 62 | { 63 | int r; 64 | cur_lock->status = server_lock::REVOKE_SENT; 65 | cur_lock->waiting_cids.push(id); 66 | single_lock.unlock(); 67 | cl->call(rlock_protocol::revoke, lid, r); 68 | single_lock.lock(); 69 | /* it seems this is overthinking 70 | while (ret != lock_protocol::OK) 71 | { 72 | // If due to any unknown reason, the revoke call fails 73 | // we need to try again until it's OK 74 | // otherwise, with no more revoke the lock would not be returned 75 | ret = cl->call(lock_protocol::revoke, lid, r); 76 | } 77 | */ 78 | return lock_protocol::RETRY; 79 | } 80 | else 81 | { 82 | // rpc conn fails 83 | return lock_protocol::RPCERR; 84 | } 85 | } 86 | else if (cur_lock->status == server_lock::REVOKE_SENT) 87 | { 88 | // server_lock::REVOKE_SENT means that another client holds the lock AND 89 | // there have been other clients asking and waiting for it AND 90 | // a revoke has been sent 91 | cur_lock->waiting_cids.push(id); 92 | return lock_protocol::RETRY; 93 | } 94 | return lock_protocol::OK; 95 | } 96 | 97 | int 98 | lock_server_cache::release(lock_protocol::lockid_t lid, std::string id, 99 | int &r) 100 | { 101 | lock_protocol::status ret = lock_protocol::OK; 102 | std::unique_lock map_lock(server_mtx); 103 | auto itr = lock_cache.find(lid); 104 | if (itr == lock_cache.end()) 105 | { 106 | return lock_protocol::IOERR; 107 | } 108 | 109 | auto cur_lock = itr->second; 110 | std::unique_lock single_lock(cur_lock->mtx); 111 | map_lock.unlock(); 112 | if (cur_lock->client_id != id || cur_lock->status == server_lock::FREE) 113 | { 114 | return ret; 115 | } 116 | cur_lock->status = server_lock::FREE; 117 | if (!cur_lock->waiting_cids.empty()) 118 | { 119 | auto waiting_cid = cur_lock->waiting_cids.front(); 120 | cur_lock->waiting_cids.pop(); 121 | handle cl_handler(waiting_cid); 122 | rpcc *cl = cl_handler.safebind(); 123 | if (cl) 124 | { 125 | int r; 126 | single_lock.unlock(); 127 | auto ret = cl->call(rlock_protocol::retry, lid, r); 128 | single_lock.lock(); 129 | if (ret != lock_protocol::OK) 130 | { 131 | return ret; 132 | } 133 | } 134 | else 135 | { 136 | return lock_protocol::IOERR; 137 | } 138 | } 139 | return ret; 140 | } 141 | 142 | lock_protocol::status 143 | lock_server_cache::stat(lock_protocol::lockid_t lid, int &r) 144 | { 145 | tprintf("stat request\n"); 146 | r = nacquire; 147 | return lock_protocol::OK; 148 | } 149 | 150 | -------------------------------------------------------------------------------- /gettime.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c), MM Weiss 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without modification, 6 | * are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the MM Weiss nor the names of its contributors 16 | * may be used to endorse or promote products derived from this software without 17 | * specific prior written permission. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 20 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 22 | * SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 24 | * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 26 | * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 27 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | /* 31 | * clock_gettime_stub.c 32 | * gcc -Wall -c clock_gettime_stub.c 33 | * posix realtime functions; MacOS user space glue 34 | */ 35 | 36 | /* @comment 37 | * other possible implementation using intel builtin rdtsc 38 | * rdtsc-workaround: http://www.mcs.anl.gov/~kazutomo/rdtsc.html 39 | * 40 | * we could get the ticks by doing this 41 | * 42 | * __asm __volatile("mov %%ebx, %%esi\n\t" 43 | * "cpuid\n\t" 44 | * "xchg %%esi, %%ebx\n\t" 45 | * "rdtsc" 46 | * : "=a" (a), 47 | * "=d" (d) 48 | * ); 49 | 50 | * we could even replace our tricky sched_yield call by assembly code to get a better accurency, 51 | * anyway the following C stub will satisfy 99% of apps using posix clock_gettime call, 52 | * moreover, the setter version (clock_settime) could be easly written using mach primitives: 53 | * http://www.opensource.apple.com/source/xnu/xnu-${VERSION}/osfmk/man/ (clock_[set|get]_time) 54 | * 55 | * hackers don't be crackers, don't you use a flush toilet? 56 | * 57 | * 58 | * @see draft: ./posix-realtime-stub/posix-realtime-stub.c 59 | * 60 | */ 61 | 62 | 63 | #ifdef __APPLE__ 64 | 65 | #pragma weak clock_gettime 66 | 67 | #include 68 | #include 69 | #include 70 | #include 71 | #include 72 | #include 73 | #include 74 | #include 75 | 76 | typedef enum { 77 | CLOCK_REALTIME, 78 | CLOCK_MONOTONIC, 79 | CLOCK_PROCESS_CPUTIME_ID, 80 | CLOCK_THREAD_CPUTIME_ID 81 | } clockid_t; 82 | 83 | static mach_timebase_info_data_t __clock_gettime_inf; 84 | 85 | int clock_gettime(clockid_t clk_id, struct timespec *tp) { 86 | kern_return_t ret; 87 | clock_serv_t clk; 88 | clock_id_t clk_serv_id; 89 | mach_timespec_t tm; 90 | 91 | uint64_t start, end, delta, nano; 92 | 93 | task_basic_info_data_t tinfo; 94 | task_thread_times_info_data_t ttinfo; 95 | mach_msg_type_number_t tflag; 96 | 97 | int retval = -1; 98 | switch (clk_id) { 99 | case CLOCK_REALTIME: 100 | case CLOCK_MONOTONIC: 101 | clk_serv_id = clk_id == CLOCK_REALTIME ? CALENDAR_CLOCK : SYSTEM_CLOCK; 102 | if (KERN_SUCCESS == (ret = host_get_clock_service(mach_host_self(), clk_serv_id, &clk))) { 103 | if (KERN_SUCCESS == (ret = clock_get_time(clk, &tm))) { 104 | tp->tv_sec = tm.tv_sec; 105 | tp->tv_nsec = tm.tv_nsec; 106 | retval = 0; 107 | } 108 | } 109 | if (KERN_SUCCESS != ret) { 110 | errno = EINVAL; 111 | retval = -1; 112 | } 113 | break; 114 | case CLOCK_PROCESS_CPUTIME_ID: 115 | case CLOCK_THREAD_CPUTIME_ID: 116 | start = mach_absolute_time(); 117 | if (clk_id == CLOCK_PROCESS_CPUTIME_ID) { 118 | getpid(); 119 | } else { 120 | sched_yield(); 121 | } 122 | end = mach_absolute_time(); 123 | delta = end - start; 124 | if (0 == __clock_gettime_inf.denom) { 125 | mach_timebase_info(&__clock_gettime_inf); 126 | } 127 | nano = delta * __clock_gettime_inf.numer / __clock_gettime_inf.denom; 128 | tp->tv_sec = nano * 1e-9; 129 | tp->tv_nsec = nano - (tp->tv_sec * 1e9); 130 | retval = 0; 131 | break; 132 | default: 133 | errno = EINVAL; 134 | retval = -1; 135 | } 136 | return retval; 137 | } 138 | 139 | #endif // __APPLE__ 140 | -------------------------------------------------------------------------------- /GNUmakefile: -------------------------------------------------------------------------------- 1 | LAB=6 2 | SOL=0 3 | RPC=./rpc 4 | LAB2GE=$(shell expr $(LAB) \>\= 2) 5 | LAB3GE=$(shell expr $(LAB) \>\= 3) 6 | LAB4GE=$(shell expr $(LAB) \>\= 4) 7 | LAB5GE=$(shell expr $(LAB) \>\= 5) 8 | LAB6GE=$(shell expr $(LAB) \>\= 6) 9 | LAB7GE=$(shell expr $(LAB) \>\= 7) 10 | CXXFLAGS = -g -MMD -Wall -I. -I$(RPC) -DLAB=$(LAB) -DSOL=$(SOL) -D_FILE_OFFSET_BITS=64 -std=c++11 11 | FUSEFLAGS= -D_FILE_OFFSET_BITS=64 -DFUSE_USE_VERSION=25 -I/usr/local/include/fuse -I/usr/include/fuse 12 | ifeq ($(shell uname -s),Darwin) 13 | MACFLAGS= -D__FreeBSD__=10 14 | else 15 | MACFLAGS= 16 | endif 17 | LDFLAGS = -L. -L/usr/local/lib 18 | LDLIBS = -lpthread 19 | ifeq ($(LAB2GE),1) 20 | ifeq ($(shell uname -s),Darwin) 21 | ifeq ($(shell sw_vers -productVersion | sed -e "s/.*\(10\.[0-9]\).*/\1/"),10.6) 22 | LDLIBS += -lfuse_ino64 23 | else 24 | LDLIBS += -lfuse 25 | endif 26 | else 27 | LDLIBS += -lfuse 28 | endif 29 | endif 30 | LDLIBS += $(shell test -f `gcc -print-file-name=librt.so` && echo -lrt) 31 | LDLIBS += $(shell test -f `gcc -print-file-name=libdl.so` && echo -ldl) 32 | CC = g++ 33 | CXX = g++ 34 | 35 | lab: lab$(LAB) 36 | lab1: rpc/rpctest lock_server lock_tester lock_demo 37 | lab2: rpc/rpctest lock_server lock_tester lock_demo yfs_client extent_server 38 | lab3: yfs_client extent_server lock_server test-lab-3-b test-lab-3-c 39 | lab4: yfs_client extent_server lock_server lock_tester test-lab-3-b\ 40 | test-lab-3-c 41 | lab5: yfs_client extent_server lock_server test-lab-3-b test-lab-3-c 42 | lab6: lock_server rsm_tester 43 | lab7: lock_tester lock_server rsm_tester 44 | 45 | hfiles1=rpc/fifo.h rpc/connection.h rpc/rpc.h rpc/marshall.h rpc/method_thread.h\ 46 | rpc/thr_pool.h rpc/pollmgr.h rpc/jsl_log.h rpc/slock.h rpc/rpctest.cc\ 47 | lock_protocol.h lock_server.h lock_client.h gettime.h gettime.cc lang/verify.h \ 48 | lang/algorithm.h 49 | hfiles2=yfs_client.h extent_client.h extent_protocol.h extent_server.h 50 | hfiles3=lock_client_cache.h lock_server_cache.h handle.h tprintf.h 51 | hfiles4=log.h rsm.h rsm_protocol.h config.h paxos.h paxos_protocol.h rsm_state_transfer.h rsmtest_client.h tprintf.h 52 | hfiles5=rsm_state_transfer.h rsm_client.h 53 | rsm_files = rsm.cc paxos.cc config.cc log.cc handle.cc 54 | 55 | rpclib=rpc/rpc.cc rpc/connection.cc rpc/pollmgr.cc rpc/thr_pool.cc rpc/jsl_log.cc gettime.cc 56 | rpc/librpc.a: $(patsubst %.cc,%.o,$(rpclib)) 57 | rm -f $@ 58 | ar cq $@ $^ 59 | ranlib rpc/librpc.a 60 | 61 | rpc/rpctest=rpc/rpctest.cc 62 | rpc/rpctest: $(patsubst %.cc,%.o,$(rpctest)) rpc/librpc.a 63 | 64 | lock_demo=lock_demo.cc lock_client.cc 65 | lock_demo : $(patsubst %.cc,%.o,$(lock_demo)) rpc/librpc.a 66 | 67 | lock_tester=lock_tester.cc lock_client.cc 68 | ifeq ($(LAB4GE),1) 69 | lock_tester += lock_client_cache.cc 70 | endif 71 | ifeq ($(LAB7GE),1) 72 | lock_tester+=rsm_client.cc handle.cc lock_client_cache_rsm.cc 73 | endif 74 | lock_tester : $(patsubst %.cc,%.o,$(lock_tester)) rpc/librpc.a 75 | 76 | lock_server=lock_server.cc lock_smain.cc 77 | ifeq ($(LAB4GE),1) 78 | lock_server+=lock_server_cache.cc handle.cc 79 | endif 80 | ifeq ($(LAB6GE),1) 81 | lock_server+= $(rsm_files) 82 | endif 83 | ifeq ($(LAB7GE),1) 84 | lock_server+= lock_server_cache_rsm.cc 85 | endif 86 | 87 | lock_server : $(patsubst %.cc,%.o,$(lock_server)) rpc/librpc.a 88 | 89 | yfs_client=yfs_client.cc extent_client.cc fuse.cc 90 | ifeq ($(LAB3GE),1) 91 | yfs_client += lock_client.cc 92 | endif 93 | ifeq ($(LAB7GE),1) 94 | yfs_client += rsm_client.cc lock_client_cache_rsm.cc 95 | endif 96 | ifeq ($(LAB4GE),1) 97 | yfs_client += lock_client_cache.cc 98 | endif 99 | yfs_client : $(patsubst %.cc,%.o,$(yfs_client)) rpc/librpc.a 100 | 101 | extent_server=extent_server.cc extent_smain.cc 102 | extent_server : $(patsubst %.cc,%.o,$(extent_server)) rpc/librpc.a 103 | 104 | test-lab-3-b=test-lab-3-b.c 105 | test-lab-3-b: $(patsubst %.c,%.o,$(test_lab_4-b)) rpc/librpc.a 106 | 107 | test-lab-3-c=test-lab-3-c.c 108 | test-lab-4-c: $(patsubst %.c,%.o,$(test_lab_4-c)) rpc/librpc.a 109 | 110 | rsm_tester=rsm_tester.cc rsmtest_client.cc 111 | rsm_tester: $(patsubst %.cc,%.o,$(rsm_tester)) rpc/librpc.a 112 | 113 | %.o: %.cc 114 | $(CXX) $(CXXFLAGS) -c $< -o $@ 115 | 116 | fuse.o: fuse.cc 117 | $(CXX) -c $(CXXFLAGS) $(FUSEFLAGS) $(MACFLAGS) $< 118 | 119 | # mklab.inc is needed by 6.824 staff only. Just ignore it. 120 | -include mklab.inc 121 | 122 | -include *.d 123 | -include rpc/*.d 124 | 125 | clean_files=rpc/rpctest rpc/*.o rpc/*.d rpc/librpc.a *.o *.d yfs_client extent_server lock_server lock_tester lock_demo rpctest test-lab-3-b test-lab-3-c rsm_tester 126 | .PHONY: clean handin 127 | clean: 128 | rm $(clean_files) -rf 129 | 130 | handin_ignore=$(clean_files) core* *log 131 | handin_file=$(shell whoami)-lab$(LAB).tgz 132 | labdir=$(shell basename $(PWD)) 133 | handin: 134 | @if test -f stop.sh; then ./stop.sh > /dev/null 2>&1 | echo ""; fi 135 | @bash -c "cd ../; tar -X <(tr ' ' '\n' < <(echo '$(handin_ignore)')) -czvf $(handin_file) $(labdir); mv $(handin_file) $(labdir); cd $(labdir)" 136 | @echo Please email $(handin_file) to 6.824-submit@pdos.csail.mit.edu 137 | @echo Thanks! 138 | -------------------------------------------------------------------------------- /test-lab-2-b.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Case 1: Single file system 4 | # 1. create new files 5 | # 2. open an existing file 6 | # 3. write to a file 7 | # - write to the middle 8 | # - append 9 | # - write from middle and beyond file length 10 | # - seek beyond file length and write 11 | # 4. read file content 12 | # from each case above 13 | # 14 | # Make sure file handles/file type for new files are correct. 15 | 16 | # Case 2: Two file systems mounted under same root dir. 17 | # 0. Start two fs with same rootdir 18 | # 1. create files in dir1 19 | # 2. read the files from dir2 20 | # 21 | # 22 | 23 | use strict; 24 | $| = 1; 25 | 26 | if($#ARGV != 1){ 27 | print STDERR "Usage: test-lab-2-b.pl directory1 directory2\n"; 28 | exit(1); 29 | } 30 | my $dir1 = $ARGV[0]; 31 | my $dir2 = $ARGV[1]; 32 | 33 | my $f1 = "a$$"; 34 | my $f2 = "b$$"; 35 | 36 | my $files = { }; 37 | 38 | print "Write and read one file: "; 39 | writeone($dir1, $f1, 600); 40 | checkcontent($dir1, $f1); 41 | print "OK\n"; 42 | 43 | print "Write and read a second file: "; 44 | writeone($dir1, $f2, 4111); 45 | checkcontent($dir1, $f2); 46 | checkcontent($dir1, $f1); 47 | print "OK\n"; 48 | 49 | print "Overwrite an existing file: "; 50 | writeone($dir1, $f1, 275); # shorter than before... 51 | checkcontent($dir1, $f1); 52 | checkcontent($dir1, $f2); 53 | print "OK\n"; 54 | 55 | print "Append to an existing file: "; 56 | writeone($dir1, $f1, 8192); 57 | append($dir1, $f1, 7007); 58 | checkcontent($dir1, $f1); 59 | print "OK\n"; 60 | 61 | print "Write into the middle of an existing file: "; 62 | writeat($dir1, $f1, 190); 63 | checkcontent($dir1, $f1); 64 | print "OK\n"; 65 | 66 | print "Write beyond the end of an existing file: "; 67 | writeat($dir1, $f1, 65536); 68 | checkcontent($dir1, $f1); 69 | print "OK\n"; 70 | 71 | print "Check that one cannot open non-existant file: "; 72 | checknot($dir1, "z-$$-z"); 73 | print "OK\n"; 74 | 75 | print "Check directory listing: "; 76 | dircheck($dir1); 77 | print "OK\n"; 78 | 79 | print "Read files via second server: "; 80 | checkcontent($dir2, $f1); 81 | checkcontent($dir2, $f2); 82 | print "OK\n"; 83 | 84 | print "Check directory listing on second server: "; 85 | dircheck($dir2); 86 | print "OK\n"; 87 | 88 | print "Passed all tests\n"; 89 | 90 | sub writeone { 91 | my($d, $name, $len) = @_; 92 | my $contents = ""; 93 | 94 | my $f = $d . "/" . $name; 95 | 96 | use FileHandle; 97 | sysopen F, $f, O_TRUNC|O_RDWR|O_CREAT 98 | or die "cannot create $f\n"; 99 | 100 | while(length($contents) < $len){ 101 | $contents .= rand(); 102 | } 103 | $contents = substr($contents, 0, $len); 104 | $files->{$name} = $contents; 105 | 106 | syswrite F, $files->{$name}, length($files->{$name}) 107 | or die "cannot write to $f"; 108 | close(F); 109 | } 110 | 111 | sub checkcontent { 112 | my($d, $name) = @_; 113 | 114 | my $f = $d . "/" . $name; 115 | 116 | open F, "$f" or die "could not open $f for reading"; 117 | my $c2 = ""; 118 | while() { 119 | $c2 .= $_; 120 | } 121 | close(F); 122 | $files->{$name} eq $c2 or die "content of $f is incorrect\n"; 123 | } 124 | 125 | sub checknot { 126 | my($d, $name) = @_; 127 | 128 | my $f = $d . "/" . $name; 129 | 130 | my $x = open(F, $f); 131 | if(defined($x)){ 132 | print STDERR "$x exists but should not\n"; 133 | exit(1); 134 | } 135 | } 136 | 137 | sub append { 138 | my($d, $name, $n) = @_; 139 | 140 | my $f = $d . "/" . $name; 141 | 142 | use FileHandle; 143 | sysopen F, "$f", O_RDWR 144 | or die "cannot open $f for append\n"; 145 | 146 | my $contents = ""; 147 | while(length($contents) < $n){ 148 | $contents .= rand(); 149 | } 150 | $contents = substr($contents, 0, $n); 151 | $files->{$name} .= $contents; ## Append the file content 152 | 153 | seek(F, 0, 2); ## goto end of file 154 | syswrite(F, $contents, length($contents), 0) or die "cannot append to $f"; 155 | close(F); 156 | } 157 | 158 | sub writeat { 159 | my($d, $name, $off) = @_; 160 | 161 | my $f = $d . "/" . $name; 162 | 163 | use FileHandle; 164 | sysopen F, "$f", O_RDWR 165 | or die "cannot open $f for read/write\n"; 166 | 167 | my $contents = rand(); 168 | 169 | my $x = $files->{$name}; 170 | if (length($x) < $off + length($contents)) { 171 | my $nappend = $off + length($contents) - length($x); 172 | for (my $i=0; $i < $nappend; $i++) { 173 | $x .= "\0"; 174 | } 175 | } 176 | substr($x, $off, length($contents)) = $contents; 177 | $files->{$name} = $x; 178 | 179 | seek(F, $off, 0); 180 | syswrite(F, $contents, length($contents), 0) 181 | or die "cannot write $f at offset $off"; 182 | close(F); 183 | } 184 | 185 | sub dircheck { 186 | my($dir) = @_; 187 | 188 | opendir(D, $dir); 189 | my %h; 190 | my $f; 191 | while(defined($f = readdir(D))){ 192 | if(defined($h{$f})){ 193 | print STDERR "$f appears more than once in directory $dir\n"; 194 | exit(1); 195 | } 196 | $h{$f} = 1; 197 | } 198 | closedir(D); 199 | 200 | foreach $f (keys(%$files)){ 201 | if(!defined($h{$f})){ 202 | print STDERR "$f is missing from directory $dir\n"; 203 | exit(1); 204 | } 205 | } 206 | } 207 | 208 | exit(0); 209 | 210 | -------------------------------------------------------------------------------- /test-lab-3-a.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | sub oops { 4 | my($msg) = @_; 5 | print STDERR "test-lab-3-a.pl error: $msg : $!\n"; 6 | exit(1); 7 | } 8 | 9 | sub oops1 { 10 | my($msg) = @_; 11 | print STDERR "test-lab-3-a.pl error: $msg\n"; 12 | exit(1); 13 | } 14 | 15 | if($#ARGV != 0){ 16 | print STDERR "Usage: test-lab-3-a.pl directory\n"; 17 | exit(1); 18 | } 19 | 20 | my $seq = 0; 21 | my $root = $ARGV[0]; 22 | my $dir = $root . "/d" . $$; 23 | print "mkdir $dir\n"; 24 | if(mkdir($dir, 0777) == 0){ 25 | oops("mkdir $dir"); 26 | } 27 | 28 | my $files = { }; 29 | my @dead; 30 | 31 | createone(); 32 | deleteone(); 33 | createone(); 34 | checkmtime(); 35 | checkdirmtime(); 36 | 37 | for($iters = 0; $iters < 10; $iters++){ 38 | createone(); 39 | } 40 | 41 | for($iters = 0; $iters < 50; $iters++){ 42 | if(rand() < 0.2){ 43 | deadcheck(); 44 | } 45 | if(rand() < 0.2){ 46 | livecheck(); 47 | } 48 | if(rand() < 0.02){ 49 | dircheck(); 50 | } 51 | if(rand() < 0.1){ 52 | checkdirmtime(); 53 | } 54 | if(rand() < 0.1){ 55 | checkmtime(); 56 | } 57 | if(rand() < 0.5){ 58 | createone(); 59 | } 60 | if(rand() < 0.5){ 61 | deleteone(); 62 | } 63 | } 64 | 65 | dircheck(); 66 | cleanup(); 67 | dircheck(); 68 | printf "Passed all tests!\n"; 69 | exit(0); 70 | 71 | sub createone { 72 | my $name = "x-" . $seq; 73 | $seq = $seq + 1; 74 | my $contents = rand(); 75 | print "create $name\n"; 76 | if(!open(F, ">$dir/$name")){ 77 | oops("cannot create $name"); 78 | } 79 | print F "$contents"; 80 | close(F); 81 | $files->{$name} = $contents; 82 | } 83 | 84 | # make sure all the live files are there, 85 | # and that all the dead files are not there. 86 | sub dircheck { 87 | print "dircheck\n"; 88 | opendir(D, $dir); 89 | my %h; 90 | my $f; 91 | while(defined($f = readdir(D))){ 92 | if(defined($h{$f})){ 93 | oops1("$f occurs twice in directory"); 94 | } 95 | $h{$f} = 1; 96 | } 97 | closedir(D); 98 | 99 | foreach $f (keys(%$files)){ 100 | if(!defined($h{$f})){ 101 | oops1("$f is not in directory listing"); 102 | } 103 | } 104 | 105 | foreach $f (@dead){ 106 | if(defined($h{$f})){ 107 | oops1("$f was removed but is in directory listing"); 108 | } 109 | } 110 | 111 | foreach $f (keys(%h)){ 112 | next if ($f eq "." or $f eq ".."); 113 | if(!defined($files->{$f})){ 114 | oops1("unexpected file $f in directory listing"); 115 | } 116 | } 117 | } 118 | 119 | sub livecheck { 120 | my @a = keys(%$files); 121 | return if $#a < 0; 122 | my $i = int(rand($#a + 1)); 123 | my $k = $a[$i]; 124 | print "livecheck $k\n"; 125 | oops("cannot open $k") if !open(F, "$dir/$k"); 126 | my $z = ; 127 | if($z ne $files->{$k}){ 128 | oops1("file $k wrong contents"); 129 | } 130 | close(F); 131 | } 132 | 133 | sub deadcheck { 134 | return if $#dead < 0; 135 | my $i = int(rand($#dead + 1)); 136 | my $k = $dead[$i]; 137 | return if defined($files->{$k}); # ??? 138 | print "deadcheck $k\n"; 139 | if(rand(1.0) < 0.5){ 140 | if(open(F, $dir . "/" . $k)){ 141 | oops1("dead file $k is readable"); 142 | } 143 | } else { 144 | if(unlink($dir . "/" . $k)){ 145 | oops1("dead file $k was removable"); 146 | } 147 | } 148 | } 149 | 150 | sub deleteone { 151 | my @a = keys(%$files); 152 | return 0 if $#a < 0; 153 | my $i = int(rand($#a + 1)); 154 | my $k = $a[$i]; 155 | print "delete $k\n"; 156 | if(unlink($dir . "/" . $k) == 0){ 157 | oops("unlink $k failed"); 158 | } 159 | delete $files->{$k}; 160 | push(@dead, $k); 161 | return 1; 162 | } 163 | 164 | sub checkdirmtime { 165 | print "checkdirmtime\n"; 166 | opendir(D, $dir); 167 | closedir(D); 168 | my @st1 = stat($dir . "/."); 169 | sleep(2); 170 | my $op; 171 | if(rand() < 0.75){ 172 | return if deleteone() == 0; 173 | $op = "delete"; 174 | } else { 175 | createone(); 176 | $op = "create"; 177 | } 178 | opendir(D, $dir); 179 | closedir(D); 180 | my @st2 = stat($dir . "/."); 181 | if($st1[9] == $st2[9]){ 182 | print $st2[9], " ", $st2[9], "\n"; 183 | oops1("$op did not change directory mtime"); 184 | } 185 | if($st1[10] == $st2[10]){ 186 | oops1("$op did not change directory ctime"); 187 | } 188 | } 189 | 190 | sub checkmtime { 191 | my @a = keys(%$files); 192 | return if $#a < 0; 193 | my $i = int(rand($#a + 1)); 194 | my $k = $a[$i]; 195 | print "checkmtime $k\n"; 196 | 197 | my @st1 = stat("$dir/$k"); 198 | sleep(2); 199 | if(!open(F, ">$dir/$k")){ 200 | oops("cannot re-create $dir/$k"); 201 | } 202 | my @st2 = stat("$dir/$k"); 203 | sleep(2); 204 | print F $files->{$k}; 205 | close(F); 206 | if(!open(F, "$dir/$k")){ 207 | oops("cannot open $dir/$k"); 208 | } 209 | close(F); 210 | my @st3 = stat("$dir/$k"); 211 | 212 | if($st1[9] == $st2[9]){ 213 | oops1("CREATE did not change mtime"); 214 | } 215 | if($st2[9] == $st3[9]){ 216 | oops1("WRITE did not change mtime"); 217 | } 218 | } 219 | 220 | sub cleanup { 221 | while(deleteone()){ 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /lock_tester.cc: -------------------------------------------------------------------------------- 1 | // 2 | // Lock server tester 3 | // 4 | 5 | #include "lock_protocol.h" 6 | #include "lock_client.h" 7 | #include "rpc.h" 8 | #include "jsl_log.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "lang/verify.h" 14 | #include "lock_client_cache.h" 15 | 16 | // must be >= 2 17 | int nt = 6; //XXX: lab1's rpc handlers are blocking. Since rpcs uses a thread pool of 10 threads, we cannot test more than 10 blocking rpc. 18 | std::string dst; 19 | lock_client_cache **lc = new lock_client_cache * [nt]; 20 | lock_protocol::lockid_t a = 1; 21 | lock_protocol::lockid_t b = 2; 22 | lock_protocol::lockid_t c = 3; 23 | 24 | // check_grant() and check_release() check that the lock server 25 | // doesn't grant the same lock to both clients. 26 | // it assumes that lock names are distinct in the first byte. 27 | int ct[256]; 28 | pthread_mutex_t count_mutex; 29 | 30 | void 31 | check_grant(lock_protocol::lockid_t lid) 32 | { 33 | ScopedLock ml(&count_mutex); 34 | int x = lid & 0xff; 35 | if(ct[x] != 0){ 36 | fprintf(stderr, "error: server granted %016llx twice\n", lid); 37 | fprintf(stdout, "error: server granted %016llx twice\n", lid); 38 | exit(1); 39 | } 40 | ct[x] += 1; 41 | } 42 | 43 | void 44 | check_release(lock_protocol::lockid_t lid) 45 | { 46 | ScopedLock ml(&count_mutex); 47 | int x = lid & 0xff; 48 | if(ct[x] != 1){ 49 | fprintf(stderr, "error: client released un-held lock %016llx\n", lid); 50 | exit(1); 51 | } 52 | ct[x] -= 1; 53 | } 54 | 55 | void 56 | test1(void) 57 | { 58 | printf ("acquire a release a acquire a release a\n"); 59 | lc[0]->acquire(a); 60 | check_grant(a); 61 | lc[0]->release(a); 62 | check_release(a); 63 | lc[0]->acquire(a); 64 | check_grant(a); 65 | lc[0]->release(a); 66 | check_release(a); 67 | 68 | printf ("acquire a acquire b release b release a\n"); 69 | lc[0]->acquire(a); 70 | check_grant(a); 71 | lc[0]->acquire(b); 72 | check_grant(b); 73 | lc[0]->release(b); 74 | check_release(b); 75 | lc[0]->release(a); 76 | check_release(a); 77 | } 78 | 79 | void * 80 | test2(void *x) 81 | { 82 | int i = * (int *) x; 83 | 84 | printf ("test2: client %d acquire a release a\n", i); 85 | lc[i]->acquire(a); 86 | printf ("test2: client %d acquire done\n", i); 87 | check_grant(a); 88 | sleep(1); 89 | printf ("test2: client %d release\n", i); 90 | check_release(a); 91 | lc[i]->release(a); 92 | printf ("test2: client %d release done\n", i); 93 | return 0; 94 | } 95 | 96 | void * 97 | test3(void *x) 98 | { 99 | int i = * (int *) x; 100 | 101 | printf ("test3: client %d acquire a release a concurrent\n", i); 102 | for (int j = 0; j < 10; j++) { 103 | lc[i]->acquire(a); 104 | check_grant(a); 105 | printf ("test3: client %d got lock\n", i); 106 | check_release(a); 107 | lc[i]->release(a); 108 | } 109 | return 0; 110 | } 111 | 112 | void * 113 | test4(void *x) 114 | { 115 | int i = * (int *) x; 116 | 117 | printf ("test4: thread %d acquire a release a concurrent; same clnt\n", i); 118 | for (int j = 0; j < 10; j++) { 119 | lc[0]->acquire(a); 120 | check_grant(a); 121 | printf ("test4: thread %d on client 0 got lock\n", i); 122 | check_release(a); 123 | lc[0]->release(a); 124 | } 125 | return 0; 126 | } 127 | 128 | void * 129 | test5(void *x) 130 | { 131 | int i = * (int *) x; 132 | 133 | printf ("test5: client %d acquire a release a concurrent; same and diff clnt\n", i); 134 | for (int j = 0; j < 10; j++) { 135 | if (i < 5) lc[0]->acquire(a); 136 | else lc[1]->acquire(a); 137 | check_grant(a); 138 | printf ("test5: client %d got lock\n", i); 139 | check_release(a); 140 | if (i < 5) lc[0]->release(a); 141 | else lc[1]->release(a); 142 | } 143 | return 0; 144 | } 145 | 146 | int 147 | main(int argc, char *argv[]) 148 | { 149 | int r; 150 | pthread_t th[nt]; 151 | int test = 0; 152 | 153 | setvbuf(stdout, NULL, _IONBF, 0); 154 | setvbuf(stderr, NULL, _IONBF, 0); 155 | srandom(getpid()); 156 | 157 | //jsl_set_debug(2); 158 | 159 | if(argc < 2) { 160 | fprintf(stderr, "Usage: %s [host:]port [test]\n", argv[0]); 161 | exit(1); 162 | } 163 | 164 | dst = argv[1]; 165 | 166 | if (argc > 2) { 167 | test = atoi(argv[2]); 168 | if(test < 1 || test > 5){ 169 | printf("Test number must be between 1 and 5\n"); 170 | exit(1); 171 | } 172 | } 173 | 174 | VERIFY(pthread_mutex_init(&count_mutex, NULL) == 0); 175 | printf("cache lock client\n"); 176 | for (int i = 0; i < nt; i++) lc[i] = new lock_client_cache(dst); 177 | 178 | if(!test || test == 1){ 179 | test1(); 180 | } 181 | 182 | if(!test || test == 2){ 183 | // test2 184 | for (int i = 0; i < nt; i++) { 185 | int *a = new int (i); 186 | r = pthread_create(&th[i], NULL, test2, (void *) a); 187 | VERIFY (r == 0); 188 | } 189 | for (int i = 0; i < nt; i++) { 190 | pthread_join(th[i], NULL); 191 | } 192 | } 193 | 194 | if(!test || test == 3){ 195 | printf("test 3\n"); 196 | 197 | // test3 198 | for (int i = 0; i < nt; i++) { 199 | int *a = new int (i); 200 | r = pthread_create(&th[i], NULL, test3, (void *) a); 201 | VERIFY (r == 0); 202 | } 203 | for (int i = 0; i < nt; i++) { 204 | pthread_join(th[i], NULL); 205 | } 206 | } 207 | 208 | if(!test || test == 4){ 209 | printf("test 4\n"); 210 | 211 | // test 4 212 | for (int i = 0; i < 2; i++) { 213 | int *a = new int (i); 214 | r = pthread_create(&th[i], NULL, test4, (void *) a); 215 | VERIFY (r == 0); 216 | } 217 | for (int i = 0; i < 2; i++) { 218 | pthread_join(th[i], NULL); 219 | } 220 | } 221 | 222 | if(!test || test == 5){ 223 | printf("test 5\n"); 224 | 225 | // test 5 226 | 227 | for (int i = 0; i < nt; i++) { 228 | int *a = new int (i); 229 | r = pthread_create(&th[i], NULL, test5, (void *) a); 230 | VERIFY (r == 0); 231 | } 232 | for (int i = 0; i < nt; i++) { 233 | pthread_join(th[i], NULL); 234 | } 235 | } 236 | 237 | printf ("%s: passed all tests successfully\n", argv[0]); 238 | 239 | } 240 | -------------------------------------------------------------------------------- /yfs_client.cc: -------------------------------------------------------------------------------- 1 | // yfs client. implements FS operations using extent and lock server 2 | #include "yfs_client.h" 3 | #include "extent_client.h" 4 | #include "lock_client.h" 5 | #include "lock_client_cache.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | yfs_client::yfs_client(std::string extent_dst, std::string lock_dst) 17 | { 18 | ec = new extent_client(extent_dst); 19 | //lc = new lock_client(lock_dst); 20 | lu = new lock_release_extent(ec); 21 | lc = new lock_client_cache(lock_dst, lu); 22 | generator = std::mt19937(std::random_device()()); 23 | uid = std::uniform_int_distribution(0, int((long long)(1 << 31) - 1)); 24 | } 25 | 26 | yfs_client::inum 27 | yfs_client::n2i(std::string n) 28 | { 29 | std::istringstream ist(n); 30 | unsigned long long finum; 31 | ist >> finum; 32 | return finum; 33 | } 34 | 35 | std::string 36 | yfs_client::filename(inum inum) 37 | { 38 | std::ostringstream ost; 39 | ost << inum; 40 | return ost.str(); 41 | } 42 | 43 | yfs_client::inum 44 | yfs_client::rand_inum(bool is_dir) 45 | { 46 | if (is_dir) 47 | return uid(generator) & 0x7FFFFFFF; 48 | else 49 | return uid(generator) | 0x80000000; 50 | } 51 | 52 | bool 53 | yfs_client::isfile(inum inum) 54 | { 55 | if(inum & 0x80000000) 56 | return true; 57 | return false; 58 | } 59 | 60 | bool 61 | yfs_client::isdir(inum inum) 62 | { 63 | return ! isfile(inum); 64 | } 65 | 66 | int 67 | yfs_client::getfile(inum inum, fileinfo &fin) 68 | { 69 | int r = OK; 70 | // You modify this function for Lab 3 71 | // - hold and release the file lock 72 | 73 | raii_wrapper rw(lc, inum); 74 | printf("getfile %016llx\n", inum); 75 | extent_protocol::attr a; 76 | if (ec->getattr(inum, a) != extent_protocol::OK) { 77 | r = IOERR; 78 | goto release; 79 | } 80 | 81 | fin.atime = a.atime; 82 | fin.mtime = a.mtime; 83 | fin.ctime = a.ctime; 84 | fin.size = a.size; 85 | printf("getfile %016llx -> sz %llu\n", inum, fin.size); 86 | 87 | release: 88 | 89 | return r; 90 | } 91 | 92 | int 93 | yfs_client::getdir(inum inum, dirinfo &din) 94 | { 95 | int r = OK; 96 | // You modify this function for Lab 3 97 | // - hold and release the directory lock 98 | 99 | raii_wrapper rw(lc, inum); 100 | printf("getdir %016llx\n", inum); 101 | extent_protocol::attr a; 102 | if (ec->getattr(inum, a) != extent_protocol::OK) { 103 | r = IOERR; 104 | goto release; 105 | } 106 | din.atime = a.atime; 107 | din.mtime = a.mtime; 108 | din.ctime = a.ctime; 109 | 110 | release: 111 | return r; 112 | } 113 | 114 | yfs_client::status 115 | yfs_client::create(inum dir, const char *name, bool is_dir, inum &ret_id) 116 | { 117 | if (isdir(dir)) 118 | { 119 | raii_wrapper rw(lc, dir); 120 | std::string buf; 121 | auto ret = ec->get(dir, buf); 122 | if (ret != OK) 123 | { 124 | return NOENT; 125 | } 126 | dirent_list dl(buf); 127 | if (dl.match(name)) 128 | { 129 | return EXIST; 130 | } 131 | ret_id = rand_inum(is_dir); 132 | dl.add(ret_id, name); 133 | raii_wrapper rw2(lc, ret_id); 134 | ret = ec->put(ret_id, ""); 135 | if (ret != OK) return ret; 136 | //ret = ec->flush(ret_id); 137 | //if (ret != OK) return ret; 138 | return ec->put(dir, dl.to_string()); 139 | //if (ret != OK) return ret; 140 | //return ec->flush(dir); 141 | } 142 | 143 | return NOENT; 144 | } 145 | 146 | yfs_client::status 147 | yfs_client::lookup(inum dir, const char *name, inum &ret_id) 148 | { 149 | if (isdir(dir)) 150 | { 151 | raii_wrapper rw(lc, dir); 152 | std::string buf; 153 | auto ret = ec->get(dir, buf); 154 | if (ret != OK) 155 | { 156 | return IOERR; 157 | } 158 | dirent_list dl(buf); 159 | if (!dl.match(name)) 160 | { 161 | return NOENT; 162 | } 163 | ret_id = dl.get(name); 164 | return OK; 165 | } 166 | 167 | return IOERR; 168 | } 169 | 170 | yfs_client::status 171 | yfs_client::readdir(inum dir, std::unordered_map& ret_map) 172 | { 173 | if (isdir(dir)) 174 | { 175 | raii_wrapper rw(lc, dir); 176 | std::string buf; 177 | auto ret = ec->get(dir, buf); 178 | if (ret != OK) 179 | { 180 | return IOERR; 181 | } 182 | dirent_list dl(buf); 183 | ret_map = dl.get_map(); 184 | return OK; 185 | } 186 | 187 | return IOERR; 188 | } 189 | 190 | yfs_client::status 191 | yfs_client::setattr(inum ino, unsigned int len) 192 | { 193 | if (!isfile(ino)) return NOENT; 194 | raii_wrapper rw(lc, ino); 195 | std::string buf; 196 | auto ret = ec->get(ino, buf); 197 | if (ret != OK) return ret; 198 | if (buf.size() > len) 199 | { 200 | buf = std::move(buf.substr(0, len)); 201 | } 202 | else 203 | { 204 | buf.append(std::string(len - buf.size(), '\0')); 205 | } 206 | return ec->put(ino, buf); 207 | } 208 | 209 | yfs_client::status 210 | yfs_client::read(inum ino, std::size_t off, std::size_t len, std::string& data) 211 | { 212 | if (!isfile(ino)) return NOENT; 213 | raii_wrapper rw(lc, ino); 214 | std::string buf; 215 | auto ret = ec->get(ino, buf); 216 | if (ret != OK) return ret; 217 | if (off < buf.size()) 218 | { 219 | if (off + len <= buf.size()) 220 | { 221 | data = std::move(buf.substr(off, len)); 222 | } 223 | else 224 | { 225 | data = std::move(buf.substr(off, buf.size() - off)); 226 | } 227 | } 228 | return OK; 229 | } 230 | 231 | yfs_client::status 232 | yfs_client::write(inum ino, std::size_t off, std::size_t len, const char *data) 233 | { 234 | if (!isfile(ino)) return NOENT; 235 | raii_wrapper rw(lc, ino); 236 | std::string buf; 237 | auto ret = ec->get(ino, buf); 238 | if (ret != OK) return ret; 239 | buf.resize(std::max(buf.size(), off + len), '\0'); 240 | std::size_t total_len = strlen(data); 241 | for (unsigned int i = off, j = 0; j < std::min(total_len, len); i++, j++) 242 | { 243 | buf[i] = data[j]; 244 | } 245 | return ec->put(ino, buf); 246 | } 247 | 248 | yfs_client::status 249 | yfs_client::unlink(inum parent, const char *name) 250 | { 251 | if (!isdir(parent)) return NOENT; 252 | raii_wrapper rw(lc, parent); 253 | std::string buf; 254 | auto ret = ec->get(parent, buf); 255 | if (ret != OK) return ret; 256 | dirent_list dl(buf); 257 | if (!dl.match(name)) return NOENT; 258 | auto file_inum = dl.get(name); 259 | raii_wrapper rw2(lc, file_inum); 260 | ret = ec->remove(file_inum); 261 | if (ret != OK) return ret; 262 | dl.remove(name); 263 | return ec->put(parent, dl.to_string()); 264 | } 265 | -------------------------------------------------------------------------------- /rpc/marshall.h: -------------------------------------------------------------------------------- 1 | #ifndef marshall_h 2 | #define marshall_h 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "lang/verify.h" 14 | #include "lang/algorithm.h" 15 | 16 | struct req_header { 17 | req_header(int x=0, int p=0, int c = 0, int s = 0, int xi = 0): 18 | xid(x), proc(p), clt_nonce(c), srv_nonce(s), xid_rep(xi) {} 19 | int xid; 20 | int proc; 21 | unsigned int clt_nonce; 22 | unsigned int srv_nonce; 23 | int xid_rep; 24 | }; 25 | 26 | struct reply_header { 27 | reply_header(int x=0, int r=0): xid(x), ret(r) {} 28 | int xid; 29 | int ret; 30 | }; 31 | 32 | typedef uint64_t rpc_checksum_t; 33 | typedef int rpc_sz_t; 34 | 35 | enum { 36 | //size of initial buffer allocation 37 | DEFAULT_RPC_SZ = 1024, 38 | #if RPC_CHECKSUMMING 39 | //size of rpc_header includes a 4-byte int to be filled by tcpchan and uint64_t checksum 40 | RPC_HEADER_SZ = static_max::value + sizeof(rpc_sz_t) + sizeof(rpc_checksum_t) 41 | #else 42 | RPC_HEADER_SZ = static_max::value + sizeof(rpc_sz_t) 43 | #endif 44 | }; 45 | 46 | class marshall { 47 | private: 48 | char *_buf; // Base of the raw bytes buffer (dynamically readjusted) 49 | int _capa; // Capacity of the buffer 50 | int _ind; // Read/write head position 51 | 52 | public: 53 | marshall() { 54 | _buf = (char *) malloc(sizeof(char)*DEFAULT_RPC_SZ); 55 | VERIFY(_buf); 56 | _capa = DEFAULT_RPC_SZ; 57 | _ind = RPC_HEADER_SZ; 58 | } 59 | 60 | ~marshall() { 61 | if (_buf) 62 | free(_buf); 63 | } 64 | 65 | int size() { return _ind;} 66 | char *cstr() { return _buf;} 67 | 68 | void rawbyte(unsigned char); 69 | void rawbytes(const char *, int); 70 | 71 | // Return the current content (excluding header) as a string 72 | std::string get_content() { 73 | return std::string(_buf+RPC_HEADER_SZ,_ind-RPC_HEADER_SZ); 74 | } 75 | 76 | // Return the current content (excluding header) as a string 77 | std::string str() { 78 | return get_content(); 79 | } 80 | 81 | void pack(int i); 82 | 83 | void pack_req_header(const req_header &h) { 84 | int saved_sz = _ind; 85 | //leave the first 4-byte empty for channel to fill size of pdu 86 | _ind = sizeof(rpc_sz_t); 87 | #if RPC_CHECKSUMMING 88 | _ind += sizeof(rpc_checksum_t); 89 | #endif 90 | pack(h.xid); 91 | pack(h.proc); 92 | pack((int)h.clt_nonce); 93 | pack((int)h.srv_nonce); 94 | pack(h.xid_rep); 95 | _ind = saved_sz; 96 | } 97 | 98 | void pack_reply_header(const reply_header &h) { 99 | int saved_sz = _ind; 100 | //leave the first 4-byte empty for channel to fill size of pdu 101 | _ind = sizeof(rpc_sz_t); 102 | #if RPC_CHECKSUMMING 103 | _ind += sizeof(rpc_checksum_t); 104 | #endif 105 | pack(h.xid); 106 | pack(h.ret); 107 | _ind = saved_sz; 108 | } 109 | 110 | void take_buf(char **b, int *s) { 111 | *b = _buf; 112 | *s = _ind; 113 | _buf = NULL; 114 | _ind = 0; 115 | return; 116 | } 117 | }; 118 | marshall& operator<<(marshall &, bool); 119 | marshall& operator<<(marshall &, unsigned int); 120 | marshall& operator<<(marshall &, int); 121 | marshall& operator<<(marshall &, unsigned char); 122 | marshall& operator<<(marshall &, char); 123 | marshall& operator<<(marshall &, unsigned short); 124 | marshall& operator<<(marshall &, short); 125 | marshall& operator<<(marshall &, unsigned long long); 126 | marshall& operator<<(marshall &, const std::string &); 127 | 128 | class unmarshall { 129 | private: 130 | char *_buf; 131 | int _sz; 132 | int _ind; 133 | bool _ok; 134 | public: 135 | unmarshall(): _buf(NULL),_sz(0),_ind(0),_ok(false) {} 136 | unmarshall(char *b, int sz): _buf(b),_sz(sz),_ind(),_ok(true) {} 137 | unmarshall(const std::string &s) : _buf(NULL),_sz(0),_ind(0),_ok(false) 138 | { 139 | //take the content which does not exclude a RPC header from a string 140 | take_content(s); 141 | } 142 | ~unmarshall() { 143 | if (_buf) free(_buf); 144 | } 145 | 146 | //take contents from another unmarshall object 147 | void take_in(unmarshall &another); 148 | 149 | //take the content which does not exclude a RPC header from a string 150 | void take_content(const std::string &s) { 151 | _sz = s.size()+RPC_HEADER_SZ; 152 | _buf = (char *)realloc(_buf,_sz); 153 | VERIFY(_buf); 154 | _ind = RPC_HEADER_SZ; 155 | memcpy(_buf+_ind, s.data(), s.size()); 156 | _ok = true; 157 | } 158 | 159 | bool ok() { return _ok; } 160 | char *cstr() { return _buf;} 161 | bool okdone(); 162 | unsigned int rawbyte(); 163 | void rawbytes(std::string &s, unsigned int n); 164 | 165 | int ind() { return _ind;} 166 | int size() { return _sz;} 167 | void unpack(int *); //non-const ref 168 | void take_buf(char **b, int *sz) { 169 | *b = _buf; 170 | *sz = _sz; 171 | _sz = _ind = 0; 172 | _buf = NULL; 173 | } 174 | 175 | void unpack_req_header(req_header *h) { 176 | //the first 4-byte is for channel to fill size of pdu 177 | _ind = sizeof(rpc_sz_t); 178 | #if RPC_CHECKSUMMING 179 | _ind += sizeof(rpc_checksum_t); 180 | #endif 181 | unpack(&h->xid); 182 | unpack(&h->proc); 183 | unpack((int *)&h->clt_nonce); 184 | unpack((int *)&h->srv_nonce); 185 | unpack(&h->xid_rep); 186 | _ind = RPC_HEADER_SZ; 187 | } 188 | 189 | void unpack_reply_header(reply_header *h) { 190 | //the first 4-byte is for channel to fill size of pdu 191 | _ind = sizeof(rpc_sz_t); 192 | #if RPC_CHECKSUMMING 193 | _ind += sizeof(rpc_checksum_t); 194 | #endif 195 | unpack(&h->xid); 196 | unpack(&h->ret); 197 | _ind = RPC_HEADER_SZ; 198 | } 199 | }; 200 | 201 | unmarshall& operator>>(unmarshall &, bool &); 202 | unmarshall& operator>>(unmarshall &, unsigned char &); 203 | unmarshall& operator>>(unmarshall &, char &); 204 | unmarshall& operator>>(unmarshall &, unsigned short &); 205 | unmarshall& operator>>(unmarshall &, short &); 206 | unmarshall& operator>>(unmarshall &, unsigned int &); 207 | unmarshall& operator>>(unmarshall &, int &); 208 | unmarshall& operator>>(unmarshall &, unsigned long long &); 209 | unmarshall& operator>>(unmarshall &, std::string &); 210 | 211 | template marshall & 212 | operator<<(marshall &m, std::vector v) 213 | { 214 | m << (unsigned int) v.size(); 215 | for(unsigned i = 0; i < v.size(); i++) 216 | m << v[i]; 217 | return m; 218 | } 219 | 220 | template unmarshall & 221 | operator>>(unmarshall &u, std::vector &v) 222 | { 223 | unsigned n; 224 | u >> n; 225 | for(unsigned i = 0; i < n; i++){ 226 | C z; 227 | u >> z; 228 | v.push_back(z); 229 | } 230 | return u; 231 | } 232 | 233 | template marshall & 234 | operator<<(marshall &m, const std::map &d) { 235 | typename std::map::const_iterator i; 236 | 237 | m << (unsigned int) d.size(); 238 | 239 | for (i = d.begin(); i != d.end(); i++) { 240 | m << i->first << i->second; 241 | } 242 | return m; 243 | } 244 | 245 | template unmarshall & 246 | operator>>(unmarshall &u, std::map &d) { 247 | unsigned int n; 248 | u >> n; 249 | 250 | d.clear(); 251 | 252 | for (unsigned int lcv = 0; lcv < n; lcv++) { 253 | A a; 254 | B b; 255 | u >> a >> b; 256 | d[a] = b; 257 | } 258 | return u; 259 | } 260 | 261 | #endif 262 | -------------------------------------------------------------------------------- /lock_client_cache.cc: -------------------------------------------------------------------------------- 1 | // RPC stubs for clients to talk to lock_server, and cache the locks 2 | // see lock_client.cache.h for protocol details. 3 | 4 | #include "lock_client_cache.h" 5 | #include "rpc.h" 6 | #include 7 | #include 8 | #include 9 | #include "tprintf.h" 10 | 11 | lock_client_cache::lock_client_cache(std::string xdst, 12 | class lock_release_user *_lu) : lock_client(xdst), lu(_lu) 13 | { 14 | rpcs *rlsrpc = new rpcs(0); 15 | rlsrpc->reg(rlock_protocol::revoke, this, &lock_client_cache::revoke_handler); 16 | rlsrpc->reg(rlock_protocol::retry, this, &lock_client_cache::retry_handler); 17 | 18 | const char *hname; 19 | hname = "127.0.0.1"; 20 | std::ostringstream host; 21 | host << hname << ":" << rlsrpc->port(); 22 | id = host.str(); 23 | } 24 | 25 | lock_protocol::status 26 | lock_client_cache::acquire(lock_protocol::lockid_t lid) 27 | { 28 | // ensure exclusive access to lock_cache 29 | // until we get and lock lock_cache[lid] 30 | std::unique_lock map_lock(mtx_map); 31 | auto itr = lock_cache.find(lid); 32 | std::shared_ptr cur_lock; 33 | if (itr == lock_cache.end()) 34 | { 35 | cur_lock = std::make_shared(lid); 36 | lock_cache[lid] = cur_lock; 37 | } 38 | else 39 | { 40 | cur_lock = itr->second; 41 | } 42 | 43 | std::unique_lock single_lock(cur_lock->mtx); 44 | // we will never erase a kv in lock_cache in this program 45 | // so cur_lock will always be valid (insert or erase other kv won't invalidate cur_lock) 46 | // so we no longer need exclusive access to lock_cache 47 | map_lock.unlock(); 48 | //while (cur_lock->status != client_lock::NONE 49 | // && cur_lock->status != client_lock::FREE) 50 | while (cur_lock->status == client_lock::LOCKED 51 | || cur_lock->status == client_lock::ACQUIRING) 52 | { 53 | // wait until cur_lock->status is (possibly) available, i.e. NONE or FREE 54 | cur_lock->available_cv.wait(single_lock); 55 | } 56 | while (cur_lock->status == client_lock::RELEASING) 57 | { 58 | // wait while release 59 | cur_lock->release_cv.wait(single_lock); 60 | } 61 | if (cur_lock->status == client_lock::FREE) 62 | { 63 | // the lock is free, so grant it 64 | cur_lock->status = client_lock::LOCKED; 65 | return lock_protocol::OK; 66 | } 67 | else if (cur_lock->status == client_lock::NONE) 68 | { 69 | // should not keep mutex locked while waiting for rpc's ret 70 | // since rpc could take a long time. 71 | // moreover it may lead to distributed deadlock 72 | cur_lock->status = client_lock::ACQUIRING; 73 | while (true) 74 | { 75 | single_lock.unlock(); 76 | int r; 77 | auto ret = cl->call(lock_protocol::acquire, lid, id, r); 78 | single_lock.lock(); // lock again 79 | if (ret == lock_protocol::OK) 80 | { 81 | cur_lock->status = client_lock::LOCKED; 82 | return lock_protocol::OK; 83 | } 84 | else if (ret == lock_protocol::RETRY) 85 | { 86 | // RETRY doesn't mean we should try again and again immediately 87 | // instead we wait for a explicit notification 88 | // i.e. a retry rpc 89 | // pay attention that retry rpc may arrive before we got this RETRY ret 90 | if (cur_lock->num_retry) 91 | { 92 | cur_lock->num_retry = 0; 93 | continue; 94 | } 95 | while (cur_lock->num_retry == 0) 96 | { 97 | cur_lock->retry_cv.wait(single_lock); 98 | } 99 | cur_lock->num_retry = 0; 100 | } 101 | else 102 | { 103 | return ret; 104 | } 105 | } 106 | } 107 | return lock_protocol::OK; 108 | } 109 | 110 | lock_protocol::status 111 | lock_client_cache::release(lock_protocol::lockid_t lid) 112 | { 113 | std::unique_lock map_lock(mtx_map); 114 | auto itr = lock_cache.find(lid); 115 | if (itr == lock_cache.end()) 116 | { 117 | return lock_protocol::OK; 118 | } 119 | 120 | auto cur_lock = itr->second; 121 | map_lock.unlock(); 122 | std::unique_lock single_lock(cur_lock->mtx); 123 | if (cur_lock->status != client_lock::LOCKED) 124 | { 125 | return lock_protocol::IOERR; 126 | } 127 | if (cur_lock->num_revoke) 128 | { 129 | lu->dorelease(lid); 130 | cur_lock->num_revoke = 0; 131 | cur_lock->status = client_lock::RELEASING; 132 | int r; 133 | single_lock.unlock(); 134 | auto ret = cl->call(lock_protocol::release, lid, id, r); 135 | single_lock.lock(); 136 | if (ret == lock_protocol::OK) 137 | { 138 | cur_lock->status = client_lock::NONE; 139 | cur_lock->available_cv.notify_all(); 140 | cur_lock->release_cv.notify_all(); 141 | } 142 | else 143 | { 144 | return ret; 145 | } 146 | } 147 | else 148 | { 149 | cur_lock->status = client_lock::FREE; 150 | cur_lock->available_cv.notify_one(); 151 | } 152 | 153 | return lock_protocol::OK; 154 | } 155 | 156 | rlock_protocol::status 157 | lock_client_cache::revoke_handler(lock_protocol::lockid_t lid, 158 | int &) 159 | { 160 | int ret = rlock_protocol::OK; 161 | std::unique_lock map_lock(mtx_map); 162 | auto itr = lock_cache.find(lid); 163 | if (itr == lock_cache.end()) 164 | { 165 | return ret; 166 | } 167 | 168 | auto cur_lock = itr->second; 169 | std::unique_lock single_lock(cur_lock->mtx); 170 | map_lock.unlock(); 171 | if (cur_lock->status == client_lock::FREE) 172 | { 173 | // the lock is free, so give it back 174 | lu->dorelease(lid); 175 | cur_lock->status = client_lock::RELEASING; 176 | int r; 177 | single_lock.unlock(); 178 | auto ret = cl->call(lock_protocol::release, lid, id, r); 179 | single_lock.lock(); 180 | if (ret == lock_protocol::OK) 181 | { 182 | cur_lock->status = client_lock::NONE; 183 | cur_lock->available_cv.notify_all(); 184 | cur_lock->release_cv.notify_all(); 185 | } 186 | else 187 | { 188 | cur_lock->status = client_lock::FREE; 189 | return ret; 190 | } 191 | } 192 | else 193 | { 194 | // mark it 195 | cur_lock->num_revoke++; 196 | } 197 | 198 | return ret; 199 | } 200 | 201 | rlock_protocol::status 202 | lock_client_cache::retry_handler(lock_protocol::lockid_t lid, 203 | int &) 204 | { 205 | int ret = rlock_protocol::OK; 206 | std::unique_lock map_lock(mtx_map); 207 | auto itr = lock_cache.find(lid); 208 | if (itr == lock_cache.end()) 209 | { 210 | return ret; 211 | } 212 | 213 | auto cur_lock = itr->second; 214 | std::unique_lock single_lock(cur_lock->mtx); 215 | map_lock.unlock(); 216 | 217 | cur_lock->num_retry++; 218 | cur_lock->retry_cv.notify_one(); 219 | return ret; 220 | } 221 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /rpc/pollmgr.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "slock.h" 7 | #include "jsl_log.h" 8 | #include "method_thread.h" 9 | #include "lang/verify.h" 10 | #include "pollmgr.h" 11 | 12 | PollMgr *PollMgr::instance = NULL; 13 | static pthread_once_t pollmgr_is_initialized = PTHREAD_ONCE_INIT; 14 | 15 | void 16 | PollMgrInit() 17 | { 18 | PollMgr::instance = new PollMgr(); 19 | } 20 | 21 | PollMgr * 22 | PollMgr::Instance() 23 | { 24 | pthread_once(&pollmgr_is_initialized, PollMgrInit); 25 | return instance; 26 | } 27 | 28 | PollMgr::PollMgr() : pending_change_(false) 29 | { 30 | bzero(callbacks_, MAX_POLL_FDS*sizeof(void *)); 31 | aio_ = new SelectAIO(); 32 | //aio_ = new EPollAIO(); 33 | 34 | VERIFY(pthread_mutex_init(&m_, NULL) == 0); 35 | VERIFY(pthread_cond_init(&changedone_c_, NULL) == 0); 36 | VERIFY((th_ = method_thread(this, false, &PollMgr::wait_loop)) != 0); 37 | } 38 | 39 | PollMgr::~PollMgr() 40 | { 41 | //never kill me!!! 42 | VERIFY(0); 43 | } 44 | 45 | void 46 | PollMgr::add_callback(int fd, poll_flag flag, aio_callback *ch) 47 | { 48 | VERIFY(fd < MAX_POLL_FDS); 49 | 50 | ScopedLock ml(&m_); 51 | aio_->watch_fd(fd, flag); 52 | 53 | VERIFY(!callbacks_[fd] || callbacks_[fd]==ch); 54 | callbacks_[fd] = ch; 55 | } 56 | 57 | //remove all callbacks related to fd 58 | //the return guarantees that callbacks related to fd 59 | //will never be called again 60 | void 61 | PollMgr::block_remove_fd(int fd) 62 | { 63 | ScopedLock ml(&m_); 64 | aio_->unwatch_fd(fd, CB_RDWR); 65 | pending_change_ = true; 66 | VERIFY(pthread_cond_wait(&changedone_c_, &m_)==0); 67 | callbacks_[fd] = NULL; 68 | } 69 | 70 | void 71 | PollMgr::del_callback(int fd, poll_flag flag) 72 | { 73 | ScopedLock ml(&m_); 74 | if (aio_->unwatch_fd(fd, flag)) { 75 | callbacks_[fd] = NULL; 76 | } 77 | } 78 | 79 | bool 80 | PollMgr::has_callback(int fd, poll_flag flag, aio_callback *c) 81 | { 82 | ScopedLock ml(&m_); 83 | if (!callbacks_[fd] || callbacks_[fd]!=c) 84 | return false; 85 | 86 | return aio_->is_watched(fd, flag); 87 | } 88 | 89 | void 90 | PollMgr::wait_loop() 91 | { 92 | 93 | std::vector readable; 94 | std::vector writable; 95 | 96 | while (1) { 97 | { 98 | ScopedLock ml(&m_); 99 | if (pending_change_) { 100 | pending_change_ = false; 101 | VERIFY(pthread_cond_broadcast(&changedone_c_)==0); 102 | } 103 | } 104 | readable.clear(); 105 | writable.clear(); 106 | aio_->wait_ready(&readable,&writable); 107 | 108 | if (!readable.size() && !writable.size()) { 109 | continue; 110 | } 111 | //no locking of m_ 112 | //because no add_callback() and del_callback should 113 | //modify callbacks_[fd] while the fd is not dead 114 | for (unsigned int i = 0; i < readable.size(); i++) { 115 | int fd = readable[i]; 116 | if (callbacks_[fd]) 117 | callbacks_[fd]->read_cb(fd); 118 | } 119 | 120 | for (unsigned int i = 0; i < writable.size(); i++) { 121 | int fd = writable[i]; 122 | if (callbacks_[fd]) 123 | callbacks_[fd]->write_cb(fd); 124 | } 125 | } 126 | } 127 | 128 | SelectAIO::SelectAIO() : highfds_(0) 129 | { 130 | FD_ZERO(&rfds_); 131 | FD_ZERO(&wfds_); 132 | 133 | VERIFY(pipe(pipefd_) == 0); 134 | FD_SET(pipefd_[0], &rfds_); 135 | highfds_ = pipefd_[0]; 136 | 137 | int flags = fcntl(pipefd_[0], F_GETFL, NULL); 138 | flags |= O_NONBLOCK; 139 | fcntl(pipefd_[0], F_SETFL, flags); 140 | 141 | VERIFY(pthread_mutex_init(&m_, NULL) == 0); 142 | } 143 | 144 | SelectAIO::~SelectAIO() 145 | { 146 | VERIFY(pthread_mutex_destroy(&m_) == 0); 147 | } 148 | 149 | void 150 | SelectAIO::watch_fd(int fd, poll_flag flag) 151 | { 152 | ScopedLock ml(&m_); 153 | if (highfds_ <= fd) 154 | highfds_ = fd; 155 | 156 | if (flag == CB_RDONLY) { 157 | FD_SET(fd,&rfds_); 158 | }else if (flag == CB_WRONLY) { 159 | FD_SET(fd,&wfds_); 160 | }else { 161 | FD_SET(fd,&rfds_); 162 | FD_SET(fd,&wfds_); 163 | } 164 | 165 | char tmp = 1; 166 | VERIFY(write(pipefd_[1], &tmp, sizeof(tmp))==1); 167 | } 168 | 169 | bool 170 | SelectAIO::is_watched(int fd, poll_flag flag) 171 | { 172 | ScopedLock ml(&m_); 173 | if (flag == CB_RDONLY) { 174 | return FD_ISSET(fd,&rfds_); 175 | }else if (flag == CB_WRONLY) { 176 | return FD_ISSET(fd,&wfds_); 177 | }else{ 178 | return (FD_ISSET(fd,&rfds_) && FD_ISSET(fd,&wfds_)); 179 | } 180 | } 181 | 182 | bool 183 | SelectAIO::unwatch_fd(int fd, poll_flag flag) 184 | { 185 | ScopedLock ml(&m_); 186 | if (flag == CB_RDONLY) { 187 | FD_CLR(fd, &rfds_); 188 | }else if (flag == CB_WRONLY) { 189 | FD_CLR(fd, &wfds_); 190 | }else if (flag == CB_RDWR) { 191 | FD_CLR(fd, &wfds_); 192 | FD_CLR(fd, &rfds_); 193 | }else{ 194 | VERIFY(0); 195 | } 196 | 197 | if (!FD_ISSET(fd,&rfds_) && !FD_ISSET(fd,&wfds_)) { 198 | if (fd == highfds_) { 199 | int newh = pipefd_[0]; 200 | for (int i = 0; i <= highfds_; i++) { 201 | if (FD_ISSET(i, &rfds_)) { 202 | newh = i; 203 | }else if (FD_ISSET(i, &wfds_)) { 204 | newh = i; 205 | } 206 | } 207 | highfds_ = newh; 208 | } 209 | } 210 | if (flag == CB_RDWR) { 211 | char tmp = 1; 212 | VERIFY(write(pipefd_[1], &tmp, sizeof(tmp))==1); 213 | } 214 | return (!FD_ISSET(fd, &rfds_) && !FD_ISSET(fd, &wfds_)); 215 | } 216 | 217 | void 218 | SelectAIO::wait_ready(std::vector *readable, std::vector *writable) 219 | { 220 | fd_set trfds, twfds; 221 | int high; 222 | 223 | { 224 | ScopedLock ml(&m_); 225 | trfds = rfds_; 226 | twfds = wfds_; 227 | high = highfds_; 228 | 229 | } 230 | 231 | int ret = select(high+1, &trfds, &twfds, NULL, NULL); 232 | 233 | if (ret < 0) { 234 | if (errno == EINTR) { 235 | return; 236 | } else { 237 | perror("select:"); 238 | jsl_log(JSL_DBG_OFF, "PollMgr::select_loop failure errno %d\n",errno); 239 | VERIFY(0); 240 | } 241 | } 242 | 243 | for (int fd = 0; fd <= high; fd++) { 244 | if (fd == pipefd_[0] && FD_ISSET(fd, &trfds)) { 245 | char tmp; 246 | VERIFY (read(pipefd_[0],&tmp,sizeof(tmp))==1); 247 | VERIFY(tmp==1); 248 | }else { 249 | if (FD_ISSET(fd, &twfds)) { 250 | writable->push_back(fd); 251 | } 252 | if (FD_ISSET(fd, &trfds)) { 253 | readable->push_back(fd); 254 | } 255 | } 256 | } 257 | } 258 | 259 | #ifdef __linux__ 260 | 261 | EPollAIO::EPollAIO() 262 | { 263 | pollfd_ = epoll_create(MAX_POLL_FDS); 264 | VERIFY(pollfd_ >= 0); 265 | bzero(fdstatus_, sizeof(int)*MAX_POLL_FDS); 266 | } 267 | 268 | EPollAIO::~EPollAIO() 269 | { 270 | close(pollfd_); 271 | } 272 | 273 | static inline 274 | int poll_flag_to_event(poll_flag flag) 275 | { 276 | int f; 277 | if (flag == CB_RDONLY) { 278 | f = EPOLLIN; 279 | }else if (flag == CB_WRONLY) { 280 | f = EPOLLOUT; 281 | }else { //flag == CB_RDWR 282 | f = EPOLLIN | EPOLLOUT; 283 | } 284 | return f; 285 | } 286 | 287 | void 288 | EPollAIO::watch_fd(int fd, poll_flag flag) 289 | { 290 | VERIFY(fd < MAX_POLL_FDS); 291 | 292 | struct epoll_event ev; 293 | int op = fdstatus_[fd]? EPOLL_CTL_MOD : EPOLL_CTL_ADD; 294 | fdstatus_[fd] |= (int)flag; 295 | 296 | ev.events = EPOLLET; 297 | ev.data.fd = fd; 298 | 299 | if (fdstatus_[fd] & CB_RDONLY) { 300 | ev.events |= EPOLLIN; 301 | } 302 | if (fdstatus_[fd] & CB_WRONLY) { 303 | ev.events |= EPOLLOUT; 304 | } 305 | 306 | if (flag == CB_RDWR) { 307 | VERIFY(ev.events == (uint32_t)(EPOLLET | EPOLLIN | EPOLLOUT)); 308 | } 309 | 310 | VERIFY(epoll_ctl(pollfd_, op, fd, &ev) == 0); 311 | } 312 | 313 | bool 314 | EPollAIO::unwatch_fd(int fd, poll_flag flag) 315 | { 316 | VERIFY(fd < MAX_POLL_FDS); 317 | fdstatus_[fd] &= ~(int)flag; 318 | 319 | struct epoll_event ev; 320 | int op = fdstatus_[fd]? EPOLL_CTL_MOD : EPOLL_CTL_DEL; 321 | 322 | ev.events = EPOLLET; 323 | ev.data.fd = fd; 324 | 325 | if (fdstatus_[fd] & CB_RDONLY) { 326 | ev.events |= EPOLLIN; 327 | } 328 | if (fdstatus_[fd] & CB_WRONLY) { 329 | ev.events |= EPOLLOUT; 330 | } 331 | 332 | if (flag == CB_RDWR) { 333 | VERIFY(op == EPOLL_CTL_DEL); 334 | } 335 | VERIFY(epoll_ctl(pollfd_, op, fd, &ev) == 0); 336 | return (op == EPOLL_CTL_DEL); 337 | } 338 | 339 | bool 340 | EPollAIO::is_watched(int fd, poll_flag flag) 341 | { 342 | VERIFY(fd < MAX_POLL_FDS); 343 | return ((fdstatus_[fd] & CB_MASK) == flag); 344 | } 345 | 346 | void 347 | EPollAIO::wait_ready(std::vector *readable, std::vector *writable) 348 | { 349 | int nfds = epoll_wait(pollfd_, ready_, MAX_POLL_FDS, -1); 350 | for (int i = 0; i < nfds; i++) { 351 | if (ready_[i].events & EPOLLIN) { 352 | readable->push_back(ready_[i].data.fd); 353 | } 354 | if (ready_[i].events & EPOLLOUT) { 355 | writable->push_back(ready_[i].data.fd); 356 | } 357 | } 358 | } 359 | 360 | #endif 361 | -------------------------------------------------------------------------------- /test-lab-3-c.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test-lab-5 dir1 dir2 3 | * 4 | * Creates and deletes files in different directories 5 | * on the same underlying file system. These operations 6 | * should not require much put/get or lock traffic in 7 | * a yfs with a write-back cache and lazy lock release. 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | char d1[512], d2[512]; 23 | extern int errno; 24 | 25 | void 26 | create1(const char *d, const char *f, const char *in) 27 | { 28 | int fd; 29 | char n[512]; 30 | 31 | /* 32 | * The FreeBSD NFS client only invalidates its caches 33 | * cache if the mtime changes by a whole second. 34 | */ 35 | sleep(1); 36 | 37 | sprintf(n, "%s/%s", d, f); 38 | fd = creat(n, 0666); 39 | if(fd < 0){ 40 | fprintf(stderr, "test-lab-3-c: create(%s): %s\n", 41 | n, strerror(errno)); 42 | exit(1); 43 | } 44 | if(write(fd, in, strlen(in)) != strlen(in)){ 45 | fprintf(stderr, "test-lab-3-c: write(%s): %s\n", 46 | n, strerror(errno)); 47 | exit(1); 48 | } 49 | if(close(fd) != 0){ 50 | fprintf(stderr, "test-lab-3-c: close(%s): %s\n", 51 | n, strerror(errno)); 52 | exit(1); 53 | } 54 | } 55 | 56 | void 57 | check1(const char *d, const char *f, const char *in) 58 | { 59 | int fd, cc; 60 | char n[512], buf[512]; 61 | 62 | sprintf(n, "%s/%s", d, f); 63 | fd = open(n, 0); 64 | if(fd < 0){ 65 | fprintf(stderr, "test-lab-3-c: open(%s): %s\n", 66 | n, strerror(errno)); 67 | exit(1); 68 | } 69 | errno = 0; 70 | cc = read(fd, buf, sizeof(buf) - 1); 71 | if(cc != strlen(in)){ 72 | fprintf(stderr, "test-lab-3-c: read(%s) returned too little %d%s%s\n", 73 | n, 74 | cc, 75 | errno ? ": " : "", 76 | errno ? strerror(errno) : ""); 77 | exit(1); 78 | } 79 | close(fd); 80 | buf[cc] = '\0'; 81 | if(strncmp(buf, in, strlen(n)) != 0){ 82 | fprintf(stderr, "test-lab-3-c: read(%s) got \"%s\", not \"%s\"\n", 83 | n, buf, in); 84 | exit(1); 85 | } 86 | } 87 | 88 | void 89 | unlink1(const char *d, const char *f) 90 | { 91 | char n[512]; 92 | 93 | sleep(1); 94 | 95 | sprintf(n, "%s/%s", d, f); 96 | if(unlink(n) != 0){ 97 | fprintf(stderr, "test-lab-3-c: unlink(%s): %s\n", 98 | n, strerror(errno)); 99 | exit(1); 100 | } 101 | } 102 | 103 | void 104 | checknot(const char *d, const char *f) 105 | { 106 | int fd; 107 | char n[512]; 108 | 109 | sprintf(n, "%s/%s", d, f); 110 | fd = open(n, 0); 111 | if(fd >= 0){ 112 | fprintf(stderr, "test-lab-3-c: open(%s) succeeded for deleted file\n", n); 113 | exit(1); 114 | } 115 | } 116 | 117 | void 118 | append1(const char *d, const char *f, const char *in) 119 | { 120 | int fd; 121 | char n[512]; 122 | 123 | sleep(1); 124 | 125 | sprintf(n, "%s/%s", d, f); 126 | fd = open(n, O_WRONLY|O_APPEND); 127 | if(fd < 0){ 128 | fprintf(stderr, "test-lab-3-c: append open(%s): %s\n", 129 | n, strerror(errno)); 130 | exit(1); 131 | } 132 | if(write(fd, in, strlen(in)) != strlen(in)){ 133 | fprintf(stderr, "test-lab-3-c: append write(%s): %s\n", 134 | n, strerror(errno)); 135 | exit(1); 136 | } 137 | if(close(fd) != 0){ 138 | fprintf(stderr, "test-lab-3-c: append close(%s): %s\n", 139 | n, strerror(errno)); 140 | exit(1); 141 | } 142 | } 143 | 144 | void 145 | createn(const char *d, const char *prefix, int nf) 146 | { 147 | int fd, i; 148 | char n[512]; 149 | 150 | /* 151 | * The FreeBSD NFS client only invalidates its caches 152 | * cache if the mtime changes by a whole second. 153 | */ 154 | sleep(1); 155 | 156 | for(i = 0; i < nf; i++){ 157 | sprintf(n, "%s/%s-%d", d, prefix, i); 158 | fd = creat(n, 0666); 159 | if(fd < 0){ 160 | fprintf(stderr, "test-lab-3-c: create(%s): %s\n", 161 | n, strerror(errno)); 162 | exit(1); 163 | } 164 | if(write(fd, &i, sizeof(i)) != sizeof(i)){ 165 | fprintf(stderr, "test-lab-3-c: write(%s): %s\n", 166 | n, strerror(errno)); 167 | exit(1); 168 | } 169 | if(close(fd) != 0){ 170 | fprintf(stderr, "test-lab-3-c: close(%s): %s\n", 171 | n, strerror(errno)); 172 | exit(1); 173 | } 174 | } 175 | } 176 | 177 | void 178 | checkn(const char *d, const char *prefix, int nf) 179 | { 180 | int fd, i, cc, j; 181 | char n[512]; 182 | 183 | for(i = 0; i < nf; i++){ 184 | sprintf(n, "%s/%s-%d", d, prefix, i); 185 | fd = open(n, 0); 186 | if(fd < 0){ 187 | fprintf(stderr, "test-lab-3-c: open(%s): %s\n", 188 | n, strerror(errno)); 189 | exit(1); 190 | } 191 | j = -1; 192 | cc = read(fd, &j, sizeof(j)); 193 | if(cc != sizeof(j)){ 194 | fprintf(stderr, "test-lab-3-c: read(%s) returned too little %d%s%s\n", 195 | n, 196 | cc, 197 | errno ? ": " : "", 198 | errno ? strerror(errno) : ""); 199 | exit(1); 200 | } 201 | if(j != i){ 202 | fprintf(stderr, "test-lab-3-c: checkn %s contained %d not %d\n", 203 | n, j, i); 204 | exit(1); 205 | } 206 | close(fd); 207 | } 208 | } 209 | 210 | void 211 | unlinkn(const char *d, const char *prefix, int nf) 212 | { 213 | char n[512]; 214 | int i; 215 | 216 | sleep(1); 217 | 218 | for(i = 0; i < nf; i++){ 219 | sprintf(n, "%s/%s-%d", d, prefix, i); 220 | if(unlink(n) != 0){ 221 | fprintf(stderr, "test-lab-3-c: unlink(%s): %s\n", 222 | n, strerror(errno)); 223 | exit(1); 224 | } 225 | } 226 | } 227 | 228 | int 229 | compar(const void *xa, const void *xb) 230 | { 231 | char *a = *(char**)xa; 232 | char *b = *(char**)xb; 233 | return strcmp(a, b); 234 | } 235 | 236 | void 237 | dircheck(const char *d, int nf) 238 | { 239 | DIR *dp; 240 | struct dirent *e; 241 | char *names[1000]; 242 | int nnames = 0, i; 243 | 244 | dp = opendir(d); 245 | if(dp == 0){ 246 | fprintf(stderr, "test-lab-3-c: opendir(%s): %s\n", d, strerror(errno)); 247 | exit(1); 248 | } 249 | while((e = readdir(dp))){ 250 | if(e->d_name[0] != '.'){ 251 | if(nnames >= sizeof(names)/sizeof(names[0])){ 252 | fprintf(stderr, "warning: too many files in %s\n", d); 253 | } 254 | names[nnames] = (char *) malloc(strlen(e->d_name) + 1); 255 | strcpy(names[nnames], e->d_name); 256 | nnames++; 257 | } 258 | } 259 | closedir(dp); 260 | 261 | if(nf != nnames){ 262 | fprintf(stderr, "test-lab-3-c: wanted %d dir entries, got %d\n", nf, nnames); 263 | exit(1); 264 | } 265 | 266 | /* check for duplicate entries */ 267 | qsort(names, nnames, sizeof(names[0]), compar); 268 | for(i = 0; i < nnames-1; i++){ 269 | if(strcmp(names[i], names[i+1]) == 0){ 270 | fprintf(stderr, "test-lab-3-c: duplicate directory entry for %s\n", names[i]); 271 | exit(1); 272 | } 273 | } 274 | 275 | for(i = 0; i < nnames; i++) 276 | free(names[i]); 277 | } 278 | 279 | void 280 | reap (int pid) 281 | { 282 | int wpid, status; 283 | wpid = waitpid (pid, &status, 0); 284 | if (wpid < 0) { 285 | perror("waitpid"); 286 | exit(1); 287 | } 288 | if (wpid != pid) { 289 | fprintf(stderr, "unexpected pid reaped: %d\n", wpid); 290 | exit(1); 291 | } 292 | if(!WIFEXITED(status) || WEXITSTATUS(status) != 0) { 293 | fprintf(stderr, "child exited unhappily\n"); 294 | exit(1); 295 | } 296 | } 297 | 298 | int 299 | main(int argc, char *argv[]) 300 | { 301 | int pid; 302 | 303 | if(argc != 3){ 304 | fprintf(stderr, "Usage: test-lab-3-c dir1 dir2\n"); 305 | exit(1); 306 | } 307 | 308 | sprintf(d1, "%s/da%d", argv[1], getpid()); 309 | if(mkdir(d1, 0777) != 0){ 310 | fprintf(stderr, "test-lab-3-c: failed: mkdir(%s): %s\n", 311 | d1, strerror(errno)); 312 | exit(1); 313 | } 314 | sprintf(d2, "%s/db%d", argv[2], getpid()); 315 | if(mkdir(d2, 0777) != 0){ 316 | fprintf(stderr, "test-lab-3-c: failed: mkdir(%s): %s\n", 317 | d2, strerror(errno)); 318 | exit(1); 319 | } 320 | 321 | { 322 | char dd[512]; 323 | sprintf(dd, "%s/da%d", argv[2], getpid()); 324 | if(access(dd, 0) != 0){ 325 | fprintf(stderr, "test-lab-3-c: failed: access(%s) after mkdir %s: %s\n", 326 | dd, d1, strerror(errno)); 327 | exit(1); 328 | } 329 | } 330 | 331 | setbuf(stdout, 0); 332 | 333 | printf("Create/delete in separate directories: "); 334 | 335 | pid = fork(); 336 | if(pid < 0){ 337 | perror("test-lab-3-c: fork"); 338 | exit(1); 339 | } 340 | if(pid == 0){ 341 | createn(d2, "xx", 100); 342 | unlinkn(d2, "xx", 99); 343 | exit(0); 344 | } 345 | createn(d1, "yy", 100); 346 | unlinkn(d1, "yy", 99); 347 | sleep(4); 348 | reap(pid); 349 | dircheck(d1, 1); 350 | dircheck(d2, 1); 351 | 352 | printf("tests completed OK\n"); 353 | 354 | exit(0); 355 | return(0); 356 | } 357 | 358 | -------------------------------------------------------------------------------- /config.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "config.h" 5 | #include "paxos.h" 6 | #include "handle.h" 7 | #include "tprintf.h" 8 | #include "lang/verify.h" 9 | 10 | // The config module maintains views. As a node joins or leaves a 11 | // view, the next view will be the same as previous view, except with 12 | // the new node added or removed. The first view contains only node 13 | // 1. If node 2 joins after the first node (it will download the views 14 | // from node 1), it will learn about view 1 with the first node as the 15 | // only member. It will then invoke Paxos to create the next view. 16 | // It will tell Paxos to ask the nodes in view 1 to agree on the value 17 | // {1, 2}. If Paxos returns success, then it moves to view 2 with 18 | // {1,2} as the members. When node 3 joins, the config module runs 19 | // Paxos with the nodes in view 2 and the proposed value to be 20 | // {1,2,3}. And so on. When a node discovers that some node of the 21 | // current view is not responding, it kicks off Paxos to propose a new 22 | // value (the current view minus the node that isn't responding). The 23 | // config module uses Paxos to create a total order of views, and it 24 | // is ensured that the majority of the previous view agrees to the 25 | // next view. The Paxos log contains all the values (i.e., views) 26 | // agreed on. 27 | // 28 | // The RSM module informs config to add nodes. The config module 29 | // runs a heartbeater thread that checks in with nodes. If a node 30 | // doesn't respond, the config module will invoke Paxos's proposer to 31 | // remove the node. Higher layers will learn about this change when a 32 | // Paxos acceptor accepts the new proposed value through 33 | // paxos_commit(). 34 | // 35 | // To be able to bring other nodes up to date to the latest formed 36 | // view, each node will have a complete history of all view numbers 37 | // and their values that it knows about. At any time a node can reboot 38 | // and when it re-joins, it may be many views behind; by remembering 39 | // all views, the other nodes can bring this re-joined node up to 40 | // date. 41 | 42 | static void * 43 | heartbeatthread(void *x) 44 | { 45 | config *r = (config *) x; 46 | r->heartbeater(); 47 | return 0; 48 | } 49 | 50 | config::config(std::string _first, std::string _me, config_view_change *_vc) 51 | : myvid (0), first (_first), me (_me), vc (_vc) 52 | { 53 | VERIFY (pthread_mutex_init(&cfg_mutex, NULL) == 0); 54 | VERIFY(pthread_cond_init(&config_cond, NULL) == 0); 55 | 56 | std::ostringstream ost; 57 | ost << me; 58 | 59 | acc = new acceptor(this, me == _first, me, ost.str()); 60 | pro = new proposer(this, acc, me); 61 | 62 | // XXX hack; maybe should have its own port number 63 | pxsrpc = acc->get_rpcs(); 64 | pxsrpc->reg(paxos_protocol::heartbeat, this, &config::heartbeat); 65 | 66 | { 67 | ScopedLock ml(&cfg_mutex); 68 | 69 | reconstruct(); 70 | 71 | pthread_t th; 72 | VERIFY (pthread_create(&th, NULL, &heartbeatthread, (void *) this) == 0); 73 | } 74 | } 75 | 76 | void 77 | config::restore(std::string s) 78 | { 79 | ScopedLock ml(&cfg_mutex); 80 | acc->restore(s); 81 | reconstruct(); 82 | } 83 | 84 | std::vector 85 | config::get_view(unsigned instance) 86 | { 87 | ScopedLock ml(&cfg_mutex); 88 | return get_view_wo(instance); 89 | } 90 | 91 | // caller should hold cfg_mutex 92 | std::vector 93 | config::get_view_wo(unsigned instance) 94 | { 95 | std::string value = acc->value(instance); 96 | tprintf("get_view(%d): returns %s\n", instance, value.c_str()); 97 | return members(value); 98 | } 99 | 100 | std::vector 101 | config::members(std::string value) 102 | { 103 | std::istringstream ist(value); 104 | std::string m; 105 | std::vector view; 106 | while (ist >> m) { 107 | view.push_back(m); 108 | } 109 | return view; 110 | } 111 | 112 | std::string 113 | config::value(std::vector m) 114 | { 115 | std::ostringstream ost; 116 | for (unsigned i = 0; i < m.size(); i++) { 117 | ost << m[i]; 118 | ost << " "; 119 | } 120 | return ost.str(); 121 | } 122 | 123 | // caller should hold cfg_mutex 124 | void 125 | config::reconstruct() 126 | { 127 | if (acc->instance() > 0) { 128 | std::string m; 129 | myvid = acc->instance(); 130 | mems = get_view_wo(myvid); 131 | tprintf("config::reconstruct: %d %s\n", myvid, print_members(mems).c_str()); 132 | } 133 | } 134 | 135 | // Called by Paxos's acceptor. 136 | void 137 | config::paxos_commit(unsigned instance, std::string value) 138 | { 139 | std::string m; 140 | std::vector newmem; 141 | ScopedLock ml(&cfg_mutex); 142 | 143 | newmem = members(value); 144 | tprintf("config::paxos_commit: %d: %s\n", instance, 145 | print_members(newmem).c_str()); 146 | 147 | for (unsigned i = 0; i < mems.size(); i++) { 148 | tprintf("config::paxos_commit: is %s still a member?\n", mems[i].c_str()); 149 | if (!isamember(mems[i], newmem) && me != mems[i]) { 150 | tprintf("config::paxos_commit: delete %s\n", mems[i].c_str()); 151 | mgr.delete_handle(mems[i]); 152 | } 153 | } 154 | 155 | mems = newmem; 156 | myvid = instance; 157 | if (vc) { 158 | unsigned vid = myvid; 159 | VERIFY(pthread_mutex_unlock(&cfg_mutex)==0); 160 | vc->commit_change(vid); 161 | VERIFY(pthread_mutex_lock(&cfg_mutex)==0); 162 | } 163 | } 164 | 165 | bool 166 | config::ismember(std::string m, unsigned vid) 167 | { 168 | bool r; 169 | ScopedLock ml(&cfg_mutex); 170 | std::vector v = get_view_wo(vid); 171 | r = isamember(m, v); 172 | return r; 173 | } 174 | 175 | bool 176 | config::add(std::string new_m, unsigned vid) 177 | { 178 | std::vector m; 179 | std::vector curm; 180 | ScopedLock ml(&cfg_mutex); 181 | if (vid != myvid) 182 | return false; 183 | tprintf("config::add %s\n", new_m.c_str()); 184 | m = mems; 185 | m.push_back(new_m); 186 | curm = mems; 187 | std::string v = value(m); 188 | int nextvid = myvid + 1; 189 | VERIFY(pthread_mutex_unlock(&cfg_mutex)==0); 190 | bool r = pro->run(nextvid, curm, v); 191 | VERIFY(pthread_mutex_lock(&cfg_mutex)==0); 192 | if (r) { 193 | tprintf("config::add: proposer returned success\n"); 194 | } else { 195 | tprintf("config::add: proposer returned failure\n"); 196 | } 197 | return r; 198 | } 199 | 200 | // caller should hold cfg_mutex 201 | bool 202 | config::remove_wo(std::string m) 203 | { 204 | tprintf("config::remove: myvid %d remove? %s\n", myvid, m.c_str()); 205 | std::vector n; 206 | for (unsigned i = 0; i < mems.size(); i++) { 207 | if (mems[i] != m) n.push_back(mems[i]); 208 | } 209 | std::string v = value(n); 210 | std::vector cmems = mems; 211 | int nextvid = myvid + 1; 212 | VERIFY(pthread_mutex_unlock(&cfg_mutex)==0); 213 | bool r = pro->run(nextvid, cmems, v); 214 | VERIFY(pthread_mutex_lock(&cfg_mutex)==0); 215 | if (r) { 216 | tprintf("config::remove: proposer returned success\n"); 217 | } else { 218 | tprintf("config::remove: proposer returned failure\n"); 219 | } 220 | return r; 221 | } 222 | 223 | void 224 | config::heartbeater() 225 | { 226 | struct timeval now; 227 | struct timespec next_timeout; 228 | std::string m; 229 | heartbeat_t h; 230 | bool stable; 231 | unsigned vid; 232 | std::vector cmems; 233 | ScopedLock ml(&cfg_mutex); 234 | 235 | while (1) { 236 | 237 | gettimeofday(&now, NULL); 238 | next_timeout.tv_sec = now.tv_sec + 3; 239 | next_timeout.tv_nsec = 0; 240 | tprintf("heartbeater: go to sleep\n"); 241 | pthread_cond_timedwait(&config_cond, &cfg_mutex, &next_timeout); 242 | 243 | stable = true; 244 | vid = myvid; 245 | cmems = get_view_wo(vid); 246 | tprintf("heartbeater: current membership %s\n", print_members(cmems).c_str()); 247 | 248 | if (!isamember(me, cmems)) { 249 | tprintf("heartbeater: not member yet; skip hearbeat\n"); 250 | continue; 251 | } 252 | 253 | //find the node with the smallest id 254 | m = me; 255 | for (unsigned i = 0; i < cmems.size(); i++) { 256 | if (m > cmems[i]) 257 | m = cmems[i]; 258 | } 259 | 260 | if (m == me) { 261 | //if i am the one with smallest id, ping the rest of the nodes 262 | for (unsigned i = 0; i < cmems.size(); i++) { 263 | if (cmems[i] != me) { 264 | if ((h = doheartbeat(cmems[i])) != OK) { 265 | stable = false; 266 | m = cmems[i]; 267 | break; 268 | } 269 | } 270 | } 271 | } else { 272 | //the rest of the nodes ping the one with smallest id 273 | if ((h = doheartbeat(m)) != OK) 274 | stable = false; 275 | } 276 | 277 | if (!stable && vid == myvid) { 278 | remove_wo(m); 279 | } 280 | } 281 | } 282 | 283 | paxos_protocol::status 284 | config::heartbeat(std::string m, unsigned vid, int &r) 285 | { 286 | ScopedLock ml(&cfg_mutex); 287 | int ret = paxos_protocol::ERR; 288 | r = (int) myvid; 289 | tprintf("heartbeat from %s(%d) myvid %d\n", m.c_str(), vid, myvid); 290 | if (vid == myvid) { 291 | ret = paxos_protocol::OK; 292 | } else if (pro->isrunning()) { 293 | VERIFY (vid == myvid + 1 || vid + 1 == myvid); 294 | ret = paxos_protocol::OK; 295 | } else { 296 | ret = paxos_protocol::ERR; 297 | } 298 | return ret; 299 | } 300 | 301 | config::heartbeat_t 302 | config::doheartbeat(std::string m) 303 | { 304 | int ret = rpc_const::timeout_failure; 305 | int r; 306 | unsigned vid = myvid; 307 | heartbeat_t res = OK; 308 | 309 | tprintf("doheartbeater to %s (%d)\n", m.c_str(), vid); 310 | handle h(m); 311 | VERIFY(pthread_mutex_unlock(&cfg_mutex)==0); 312 | rpcc *cl = h.safebind(); 313 | if (cl) { 314 | ret = cl->call(paxos_protocol::heartbeat, me, vid, r, 315 | rpcc::to(1000)); 316 | } 317 | VERIFY(pthread_mutex_lock(&cfg_mutex)==0); 318 | if (ret != paxos_protocol::OK) { 319 | if (ret == rpc_const::atmostonce_failure || 320 | ret == rpc_const::oldsrv_failure) { 321 | mgr.delete_handle(m); 322 | } else { 323 | tprintf("doheartbeat: problem with %s (%d) my vid %d his vid %d\n", 324 | m.c_str(), ret, vid, r); 325 | if (ret < 0) res = FAILURE; 326 | else res = VIEWERR; 327 | } 328 | } 329 | tprintf("doheartbeat done %d\n", res); 330 | return res; 331 | } 332 | 333 | -------------------------------------------------------------------------------- /paxos.cc: -------------------------------------------------------------------------------- 1 | #include "paxos.h" 2 | #include "handle.h" 3 | // #include 4 | #include 5 | #include "tprintf.h" 6 | #include "lang/verify.h" 7 | 8 | // This module implements the proposer and acceptor of the Paxos 9 | // distributed algorithm as described by Lamport's "Paxos Made 10 | // Simple". To kick off an instance of Paxos, the caller supplies a 11 | // list of nodes, a proposed value, and invokes the proposer. If the 12 | // majority of the nodes agree on the proposed value after running 13 | // this instance of Paxos, the acceptor invokes the upcall 14 | // paxos_commit to inform higher layers of the agreed value for this 15 | // instance. 16 | 17 | 18 | bool 19 | operator> (const prop_t &a, const prop_t &b) 20 | { 21 | return (a.n > b.n || (a.n == b.n && a.m > b.m)); 22 | } 23 | 24 | bool 25 | operator>= (const prop_t &a, const prop_t &b) 26 | { 27 | return (a.n > b.n || (a.n == b.n && a.m >= b.m)); 28 | } 29 | 30 | std::string 31 | print_members(const std::vector &nodes) 32 | { 33 | std::string s; 34 | s.clear(); 35 | for (unsigned i = 0; i < nodes.size(); i++) { 36 | s += nodes[i]; 37 | if (i < (nodes.size()-1)) 38 | s += ","; 39 | } 40 | return s; 41 | } 42 | 43 | bool isamember(std::string m, const std::vector &nodes) 44 | { 45 | for (unsigned i = 0; i < nodes.size(); i++) { 46 | if (nodes[i] == m) return 1; 47 | } 48 | return 0; 49 | } 50 | 51 | bool 52 | proposer::isrunning() 53 | { 54 | bool r; 55 | ScopedLock ml(&pxs_mutex); 56 | r = !stable; 57 | return r; 58 | } 59 | 60 | // check if the servers in l2 contains a majority of servers in l1 61 | bool 62 | proposer::majority(const std::vector &l1, 63 | const std::vector &l2) 64 | { 65 | unsigned n = 0; 66 | 67 | for (unsigned i = 0; i < l1.size(); i++) { 68 | if (isamember(l1[i], l2)) 69 | n++; 70 | } 71 | return n >= (l1.size() >> 1) + 1; 72 | } 73 | 74 | proposer::proposer(class paxos_change *_cfg, class acceptor *_acceptor, 75 | std::string _me) 76 | : cfg(_cfg), acc (_acceptor), me (_me), break1 (false), break2 (false), 77 | stable (true) 78 | { 79 | VERIFY (pthread_mutex_init(&pxs_mutex, NULL) == 0); 80 | my_n.n = 0; 81 | my_n.m = me; 82 | } 83 | 84 | void 85 | proposer::setn() 86 | { 87 | my_n.n = acc->get_n_h().n + 1 > my_n.n + 1 ? acc->get_n_h().n + 1 : my_n.n + 1; 88 | } 89 | 90 | bool 91 | proposer::run(int instance, std::vector cur_nodes, std::string newv) 92 | { 93 | std::vector accepts; 94 | std::vector nodes; 95 | std::string v; 96 | bool r = false; 97 | 98 | ScopedLock ml(&pxs_mutex); 99 | tprintf("start: initiate paxos for %s w. i=%d v=%s stable=%d\n", 100 | print_members(cur_nodes).c_str(), instance, newv.c_str(), stable); 101 | if (!stable) { // already running proposer? 102 | tprintf("proposer::run: already running\n"); 103 | return false; 104 | } 105 | stable = false; 106 | setn(); 107 | accepts.clear(); 108 | v.clear(); 109 | if (prepare(instance, accepts, cur_nodes, v)) { 110 | 111 | if (majority(cur_nodes, accepts)) { 112 | tprintf("paxos::manager: received a majority of prepare responses\n"); 113 | 114 | if (v.size() == 0) 115 | v = newv; 116 | 117 | breakpoint1(); 118 | 119 | nodes = accepts; 120 | accepts.clear(); 121 | accept(instance, accepts, nodes, v); 122 | 123 | if (majority(cur_nodes, accepts)) { 124 | tprintf("paxos::manager: received a majority of accept responses\n"); 125 | 126 | breakpoint2(); 127 | 128 | decide(instance, accepts, v); 129 | r = true; 130 | } else { 131 | tprintf("paxos::manager: no majority of accept responses\n"); 132 | } 133 | } else { 134 | tprintf("paxos::manager: no majority of prepare responses\n"); 135 | } 136 | } else { 137 | tprintf("paxos::manager: prepare is rejected %d\n", stable); 138 | } 139 | stable = true; 140 | return r; 141 | } 142 | 143 | // proposer::run() calls prepare to send prepare RPCs to nodes 144 | // and collect responses. if one of those nodes 145 | // replies with an oldinstance, return false. 146 | // otherwise fill in accepts with set of nodes that accepted, 147 | // set v to the v_a with the highest n_a, and return true. 148 | bool 149 | proposer::prepare(unsigned instance, std::vector &accepts, 150 | std::vector nodes, 151 | std::string &v) 152 | { 153 | // You fill this in for Lab 6 154 | // Note: if got an "oldinstance" reply, commit the instance using 155 | // acc->commit(...), and return false. 156 | paxos_protocol::preparearg pa = { instance, my_n }; 157 | paxos_protocol::prepareres pr; 158 | prop_t na_max = { 0, "0" }; 159 | std::string v_max; 160 | 161 | for (auto const &node : nodes) 162 | { 163 | handle h(node); 164 | rpcc *conn = h.safebind(); 165 | if (!conn) 166 | { 167 | continue; // rpc establish failed, do nothing but skip, not return 168 | } 169 | auto ret = conn->call(paxos_protocol::preparereq, me, pa, pr, rpcc::to(1000)); 170 | if (ret == paxos_protocol::OK) 171 | { 172 | if (pr.oldinstance) 173 | { 174 | acc->commit(instance, pr.v_a); 175 | return false; 176 | } 177 | accepts.push_back(node); 178 | if (pr.n_a > na_max) 179 | { 180 | na_max = pr.n_a; 181 | v_max = pr.v_a; 182 | } 183 | } 184 | } 185 | 186 | if (accepts.size()) 187 | { 188 | v = v_max; 189 | } 190 | 191 | return true; 192 | } 193 | 194 | // run() calls this to send out accept RPCs to accepts. 195 | // fill in accepts with list of nodes that accepted. 196 | void 197 | proposer::accept(unsigned instance, std::vector &accepts, 198 | std::vector nodes, std::string v) 199 | { 200 | // You fill this in for Lab 6 201 | paxos_protocol::acceptarg aa = { instance, my_n, v }; 202 | bool r; 203 | for (auto const & node : nodes) 204 | { 205 | handle h(node); 206 | rpcc *conn = h.safebind(); 207 | if (!conn) 208 | { 209 | continue; 210 | } 211 | 212 | auto ret = conn->call(paxos_protocol::acceptreq, me, aa, r, rpcc::to(1000)); 213 | if (ret == paxos_protocol::OK) 214 | { 215 | accepts.push_back(node); 216 | } 217 | } 218 | } 219 | 220 | void 221 | proposer::decide(unsigned instance, std::vector accepts, 222 | std::string v) 223 | { 224 | // You fill this in for Lab 6 225 | paxos_protocol::decidearg da = { instance, v }; 226 | int r; 227 | for (auto const &node : accepts) 228 | { 229 | handle h(node); 230 | rpcc *conn = h.safebind(); 231 | if (conn) 232 | { 233 | conn->call(paxos_protocol::decidereq, me, da, r, rpcc::to(1000)); 234 | } 235 | } 236 | } 237 | 238 | acceptor::acceptor(class paxos_change *_cfg, bool _first, std::string _me, 239 | std::string _value) : cfg(_cfg), me (_me), instance_h(0) 240 | { 241 | VERIFY (pthread_mutex_init(&pxs_mutex, NULL) == 0); 242 | 243 | n_h.n = 0; 244 | n_h.m = me; 245 | n_a.n = 0; 246 | n_a.m = me; 247 | v_a.clear(); 248 | 249 | l = new log (this, me); 250 | 251 | if (instance_h == 0 && _first) { 252 | values[1] = _value; 253 | l->loginstance(1, _value); 254 | instance_h = 1; 255 | } 256 | 257 | pxs = new rpcs(atoi(_me.c_str())); 258 | pxs->reg(paxos_protocol::preparereq, this, &acceptor::preparereq); 259 | pxs->reg(paxos_protocol::acceptreq, this, &acceptor::acceptreq); 260 | pxs->reg(paxos_protocol::decidereq, this, &acceptor::decidereq); 261 | } 262 | 263 | paxos_protocol::status 264 | acceptor::preparereq(std::string src, paxos_protocol::preparearg a, 265 | paxos_protocol::prepareres &r) 266 | { 267 | // You fill this in for Lab 6 268 | // Remember to initialize *BOTH* r.accept and r.oldinstance appropriately. 269 | // Remember to *log* the proposal if the proposal is accepted. 270 | r.accept = r.oldinstance = false; 271 | if (a.instance <= instance_h) 272 | { 273 | r.oldinstance = true; 274 | r.v_a = value(instance_h); 275 | return paxos_protocol::OK; 276 | } 277 | else if (a.n > n_h) 278 | { 279 | n_h = a.n; 280 | r.n_a = n_a; 281 | r.v_a = v_a; 282 | l->logprop(n_h); 283 | return paxos_protocol::OK; 284 | } 285 | 286 | return paxos_protocol::ERR; 287 | } 288 | 289 | // the src argument is only for debug purpose 290 | paxos_protocol::status 291 | acceptor::acceptreq(std::string src, paxos_protocol::acceptarg a, bool &r) 292 | { 293 | // You fill this in for Lab 6 294 | // Remember to *log* the accept if the proposal is accepted. 295 | if (a.n >= n_h) 296 | { 297 | n_a = a.n; 298 | v_a = a.v; 299 | l->logaccept(n_a, v_a); 300 | return paxos_protocol::OK; 301 | } 302 | 303 | return paxos_protocol::ERR; 304 | } 305 | 306 | // the src argument is only for debug purpose 307 | paxos_protocol::status 308 | acceptor::decidereq(std::string src, paxos_protocol::decidearg a, int &r) 309 | { 310 | ScopedLock ml(&pxs_mutex); 311 | tprintf("decidereq for accepted instance %d (my instance %d) v=%s\n", 312 | a.instance, instance_h, v_a.c_str()); 313 | if (a.instance == instance_h + 1) { 314 | VERIFY(v_a == a.v); 315 | commit_wo(a.instance, v_a); 316 | } else if (a.instance <= instance_h) { 317 | // we are ahead ignore. 318 | } else { 319 | // we are behind 320 | VERIFY(0); 321 | } 322 | return paxos_protocol::OK; 323 | } 324 | 325 | void 326 | acceptor::commit_wo(unsigned instance, std::string value) 327 | { 328 | //assume pxs_mutex is held 329 | tprintf("acceptor::commit: instance=%d has v= %s\n", instance, value.c_str()); 330 | if (instance > instance_h) { 331 | tprintf("commit: highestaccepteinstance = %d\n", instance); 332 | values[instance] = value; 333 | l->loginstance(instance, value); 334 | instance_h = instance; 335 | n_h.n = 0; 336 | n_h.m = me; 337 | n_a.n = 0; 338 | n_a.m = me; 339 | v_a.clear(); 340 | if (cfg) { 341 | pthread_mutex_unlock(&pxs_mutex); 342 | cfg->paxos_commit(instance, value); 343 | pthread_mutex_lock(&pxs_mutex); 344 | } 345 | } 346 | } 347 | 348 | void 349 | acceptor::commit(unsigned instance, std::string value) 350 | { 351 | ScopedLock ml(&pxs_mutex); 352 | commit_wo(instance, value); 353 | } 354 | 355 | std::string 356 | acceptor::dump() 357 | { 358 | return l->dump(); 359 | } 360 | 361 | void 362 | acceptor::restore(std::string s) 363 | { 364 | l->restore(s); 365 | l->logread(); 366 | } 367 | 368 | 369 | 370 | // For testing purposes 371 | 372 | // Call this from your code between phases prepare and accept of proposer 373 | void 374 | proposer::breakpoint1() 375 | { 376 | if (break1) { 377 | tprintf("Dying at breakpoint 1!\n"); 378 | exit(1); 379 | } 380 | } 381 | 382 | // Call this from your code between phases accept and decide of proposer 383 | void 384 | proposer::breakpoint2() 385 | { 386 | if (break2) { 387 | tprintf("Dying at breakpoint 2!\n"); 388 | exit(1); 389 | } 390 | } 391 | 392 | void 393 | proposer::breakpoint(int b) 394 | { 395 | if (b == 3) { 396 | tprintf("Proposer: breakpoint 1\n"); 397 | break1 = true; 398 | } else if (b == 4) { 399 | tprintf("Proposer: breakpoint 2\n"); 400 | break2 = true; 401 | } 402 | } 403 | -------------------------------------------------------------------------------- /rpc/connection.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "method_thread.h" 10 | #include "connection.h" 11 | #include "slock.h" 12 | #include "pollmgr.h" 13 | #include "jsl_log.h" 14 | #include "gettime.h" 15 | #include "lang/verify.h" 16 | 17 | #define MAX_PDU (10<<20) //maximum PDF is 10M 18 | 19 | 20 | connection::connection(chanmgr *m1, int f1, int l1) 21 | : mgr_(m1), fd_(f1), dead_(false),waiters_(0), refno_(1),lossy_(l1) 22 | { 23 | 24 | int flags = fcntl(fd_, F_GETFL, NULL); 25 | flags |= O_NONBLOCK; 26 | fcntl(fd_, F_SETFL, flags); 27 | 28 | signal(SIGPIPE, SIG_IGN); 29 | VERIFY(pthread_mutex_init(&m_,0)==0); 30 | VERIFY(pthread_mutex_init(&ref_m_,0)==0); 31 | VERIFY(pthread_cond_init(&send_wait_,0)==0); 32 | VERIFY(pthread_cond_init(&send_complete_,0)==0); 33 | 34 | VERIFY(gettimeofday(&create_time_, NULL) == 0); 35 | 36 | PollMgr::Instance()->add_callback(fd_, CB_RDONLY, this); 37 | } 38 | 39 | connection::~connection() 40 | { 41 | VERIFY(dead_); 42 | VERIFY(pthread_mutex_destroy(&m_)== 0); 43 | VERIFY(pthread_mutex_destroy(&ref_m_)== 0); 44 | VERIFY(pthread_cond_destroy(&send_wait_) == 0); 45 | VERIFY(pthread_cond_destroy(&send_complete_) == 0); 46 | if (rpdu_.buf) 47 | free(rpdu_.buf); 48 | VERIFY(!wpdu_.buf); 49 | close(fd_); 50 | } 51 | 52 | void 53 | connection::incref() 54 | { 55 | ScopedLock ml(&ref_m_); 56 | refno_++; 57 | } 58 | 59 | bool 60 | connection::isdead() 61 | { 62 | ScopedLock ml(&m_); 63 | return dead_; 64 | } 65 | 66 | void 67 | connection::closeconn() 68 | { 69 | { 70 | ScopedLock ml(&m_); 71 | if (!dead_) { 72 | dead_ = true; 73 | shutdown(fd_,SHUT_RDWR); 74 | }else{ 75 | return; 76 | } 77 | } 78 | //after block_remove_fd, select will never wait on fd_ 79 | //and no callbacks will be active 80 | PollMgr::Instance()->block_remove_fd(fd_); 81 | } 82 | 83 | void 84 | connection::decref() 85 | { 86 | VERIFY(pthread_mutex_lock(&ref_m_)==0); 87 | refno_ --; 88 | VERIFY(refno_>=0); 89 | if (refno_==0) { 90 | VERIFY(pthread_mutex_lock(&m_)==0); 91 | if (dead_) { 92 | VERIFY(pthread_mutex_unlock(&ref_m_)==0); 93 | VERIFY(pthread_mutex_unlock(&m_)==0); 94 | delete this; 95 | return; 96 | } 97 | VERIFY(pthread_mutex_unlock(&m_)==0); 98 | } 99 | pthread_mutex_unlock(&ref_m_); 100 | } 101 | 102 | int 103 | connection::ref() 104 | { 105 | ScopedLock rl(&ref_m_); 106 | return refno_; 107 | } 108 | 109 | int 110 | connection::compare(connection *another) 111 | { 112 | if (create_time_.tv_sec > another->create_time_.tv_sec) 113 | return 1; 114 | if (create_time_.tv_sec < another->create_time_.tv_sec) 115 | return -1; 116 | if (create_time_.tv_usec > another->create_time_.tv_usec) 117 | return 1; 118 | if (create_time_.tv_usec < another->create_time_.tv_usec) 119 | return -1; 120 | return 0; 121 | } 122 | 123 | bool 124 | connection::send(char *b, int sz) 125 | { 126 | ScopedLock ml(&m_); 127 | waiters_++; 128 | while (!dead_ && wpdu_.buf) { 129 | VERIFY(pthread_cond_wait(&send_wait_, &m_)==0); 130 | } 131 | waiters_--; 132 | if (dead_) { 133 | return false; 134 | } 135 | wpdu_.buf = b; 136 | wpdu_.sz = sz; 137 | wpdu_.solong = 0; 138 | 139 | if (lossy_) { 140 | if ((random()%100) < lossy_) { 141 | jsl_log(JSL_DBG_1, "connection::send LOSSY TEST shutdown fd_ %d\n", fd_); 142 | shutdown(fd_,SHUT_RDWR); 143 | } 144 | } 145 | 146 | if (!writepdu()) { 147 | dead_ = true; 148 | VERIFY(pthread_mutex_unlock(&m_) == 0); 149 | PollMgr::Instance()->block_remove_fd(fd_); 150 | VERIFY(pthread_mutex_lock(&m_) == 0); 151 | }else{ 152 | if (wpdu_.solong == wpdu_.sz) { 153 | }else{ 154 | //should be rare to need to explicitly add write callback 155 | PollMgr::Instance()->add_callback(fd_, CB_WRONLY, this); 156 | while (!dead_ && wpdu_.solong >= 0 && wpdu_.solong < wpdu_.sz) { 157 | VERIFY(pthread_cond_wait(&send_complete_,&m_) == 0); 158 | } 159 | } 160 | } 161 | bool ret = (!dead_ && wpdu_.solong == wpdu_.sz); 162 | wpdu_.solong = wpdu_.sz = 0; 163 | wpdu_.buf = NULL; 164 | if (waiters_ > 0) 165 | pthread_cond_broadcast(&send_wait_); 166 | return ret; 167 | } 168 | 169 | //fd_ is ready to be written 170 | void 171 | connection::write_cb(int s) 172 | { 173 | ScopedLock ml(&m_); 174 | VERIFY(!dead_); 175 | VERIFY(fd_ == s); 176 | if (wpdu_.sz == 0) { 177 | PollMgr::Instance()->del_callback(fd_,CB_WRONLY); 178 | return; 179 | } 180 | if (!writepdu()) { 181 | PollMgr::Instance()->del_callback(fd_, CB_RDWR); 182 | dead_ = true; 183 | }else{ 184 | VERIFY(wpdu_.solong >= 0); 185 | if (wpdu_.solong < wpdu_.sz) { 186 | return; 187 | } 188 | } 189 | pthread_cond_signal(&send_complete_); 190 | } 191 | 192 | //fd_ is ready to be read 193 | void 194 | connection::read_cb(int s) 195 | { 196 | ScopedLock ml(&m_); 197 | VERIFY(fd_ == s); 198 | if (dead_) { 199 | return; 200 | } 201 | 202 | bool succ = true; 203 | if (!rpdu_.buf || rpdu_.solong < rpdu_.sz) { 204 | succ = readpdu(); 205 | } 206 | 207 | if (!succ) { 208 | PollMgr::Instance()->del_callback(fd_,CB_RDWR); 209 | dead_ = true; 210 | pthread_cond_signal(&send_complete_); 211 | } 212 | 213 | if (rpdu_.buf && rpdu_.sz == rpdu_.solong) { 214 | if (mgr_->got_pdu(this, rpdu_.buf, rpdu_.sz)) { 215 | //chanmgr has successfully consumed the pdu 216 | rpdu_.buf = NULL; 217 | rpdu_.sz = rpdu_.solong = 0; 218 | } 219 | } 220 | } 221 | 222 | bool 223 | connection::writepdu() 224 | { 225 | VERIFY(wpdu_.solong >= 0); 226 | if (wpdu_.solong == wpdu_.sz) 227 | return true; 228 | 229 | if (wpdu_.solong == 0) { 230 | int sz = htonl(wpdu_.sz); 231 | bcopy(&sz,wpdu_.buf,sizeof(sz)); 232 | } 233 | int n = write(fd_, wpdu_.buf + wpdu_.solong, (wpdu_.sz-wpdu_.solong)); 234 | if (n < 0) { 235 | if (errno != EAGAIN) { 236 | jsl_log(JSL_DBG_1, "connection::writepdu fd_ %d failure errno=%d\n", fd_, errno); 237 | wpdu_.solong = -1; 238 | wpdu_.sz = 0; 239 | } 240 | return (errno == EAGAIN); 241 | } 242 | wpdu_.solong += n; 243 | return true; 244 | } 245 | 246 | bool 247 | connection::readpdu() 248 | { 249 | if (!rpdu_.sz) { 250 | int sz, sz1; 251 | int n = read(fd_, &sz1, sizeof(sz1)); 252 | 253 | if (n == 0) { 254 | return false; 255 | } 256 | 257 | if (n < 0) { 258 | VERIFY(errno!=EAGAIN); 259 | return false; 260 | } 261 | 262 | if (n >0 && n!= sizeof(sz)) { 263 | jsl_log(JSL_DBG_OFF, "connection::readpdu short read of sz\n"); 264 | return false; 265 | } 266 | 267 | sz = ntohl(sz1); 268 | 269 | if (sz > MAX_PDU) { 270 | char *tmpb = (char *)&sz1; 271 | jsl_log(JSL_DBG_2, "connection::readpdu read pdu TOO BIG %d network order=%x %x %x %x %x\n", sz, 272 | sz1, tmpb[0],tmpb[1],tmpb[2],tmpb[3]); 273 | return false; 274 | } 275 | 276 | rpdu_.sz = sz; 277 | VERIFY(rpdu_.buf == NULL); 278 | rpdu_.buf = (char *)malloc(sz+sizeof(sz)); 279 | VERIFY(rpdu_.buf); 280 | bcopy(&sz1,rpdu_.buf,sizeof(sz)); 281 | rpdu_.solong = sizeof(sz); 282 | } 283 | 284 | int n = read(fd_, rpdu_.buf + rpdu_.solong, rpdu_.sz - rpdu_.solong); 285 | if (n <= 0) { 286 | if (errno == EAGAIN) 287 | return true; 288 | if (rpdu_.buf) 289 | free(rpdu_.buf); 290 | rpdu_.buf = NULL; 291 | rpdu_.sz = rpdu_.solong = 0; 292 | return (errno == EAGAIN); 293 | } 294 | rpdu_.solong += n; 295 | return true; 296 | } 297 | 298 | tcpsconn::tcpsconn(chanmgr *m1, int port, int lossytest) 299 | : mgr_(m1), lossy_(lossytest) 300 | { 301 | 302 | VERIFY(pthread_mutex_init(&m_,NULL) == 0); 303 | 304 | struct sockaddr_in sin; 305 | memset(&sin, 0, sizeof(sin)); 306 | sin.sin_family = AF_INET; 307 | sin.sin_port = htons(port); 308 | 309 | tcp_ = socket(AF_INET, SOCK_STREAM, 0); 310 | if(tcp_ < 0){ 311 | perror("tcpsconn::tcpsconn accept_loop socket:"); 312 | VERIFY(0); 313 | } 314 | 315 | int yes = 1; 316 | setsockopt(tcp_, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)); 317 | setsockopt(tcp_, IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes)); 318 | 319 | if(bind(tcp_, (sockaddr *)&sin, sizeof(sin)) < 0){ 320 | perror("accept_loop tcp bind:"); 321 | VERIFY(0); 322 | } 323 | 324 | if(listen(tcp_, 1000) < 0) { 325 | perror("tcpsconn::tcpsconn listen:"); 326 | VERIFY(0); 327 | } 328 | 329 | socklen_t addrlen = sizeof(sin); 330 | VERIFY(getsockname(tcp_, (sockaddr *)&sin, &addrlen) == 0); 331 | port_ = ntohs(sin.sin_port); 332 | 333 | jsl_log(JSL_DBG_2, "tcpsconn::tcpsconn listen on %d %d\n", port_, 334 | sin.sin_port); 335 | 336 | if (pipe(pipe_) < 0) { 337 | perror("accept_loop pipe:"); 338 | VERIFY(0); 339 | } 340 | 341 | int flags = fcntl(pipe_[0], F_GETFL, NULL); 342 | flags |= O_NONBLOCK; 343 | fcntl(pipe_[0], F_SETFL, flags); 344 | 345 | VERIFY((th_ = method_thread(this, false, &tcpsconn::accept_conn)) != 0); 346 | } 347 | 348 | tcpsconn::~tcpsconn() 349 | { 350 | VERIFY(close(pipe_[1]) == 0); 351 | VERIFY(pthread_join(th_, NULL) == 0); 352 | 353 | //close all the active connections 354 | std::map::iterator i; 355 | for (i = conns_.begin(); i != conns_.end(); i++) { 356 | i->second->closeconn(); 357 | i->second->decref(); 358 | } 359 | } 360 | 361 | void 362 | tcpsconn::process_accept() 363 | { 364 | sockaddr_in sin; 365 | socklen_t slen = sizeof(sin); 366 | int s1 = accept(tcp_, (sockaddr *)&sin, &slen); 367 | if (s1 < 0) { 368 | perror("tcpsconn::accept_conn error"); 369 | pthread_exit(NULL); 370 | } 371 | 372 | jsl_log(JSL_DBG_2, "accept_loop got connection fd=%d %s:%d\n", 373 | s1, inet_ntoa(sin.sin_addr), ntohs(sin.sin_port)); 374 | connection *ch = new connection(mgr_, s1, lossy_); 375 | 376 | // garbage collect all dead connections with refcount of 1 377 | std::map::iterator i; 378 | for (i = conns_.begin(); i != conns_.end();) { 379 | if (i->second->isdead() && i->second->ref() == 1) { 380 | jsl_log(JSL_DBG_2, "accept_loop garbage collected fd=%d\n", 381 | i->second->channo()); 382 | i->second->decref(); 383 | // Careful not to reuse i right after erase. (i++) will 384 | // be evaluated before the erase call because in C++, 385 | // there is a sequence point before a function call. 386 | // See http://en.wikipedia.org/wiki/Sequence_point. 387 | conns_.erase(i++); 388 | } else 389 | ++i; 390 | } 391 | 392 | conns_[ch->channo()] = ch; 393 | } 394 | 395 | void 396 | tcpsconn::accept_conn() 397 | { 398 | fd_set rfds; 399 | int max_fd = pipe_[0] > tcp_ ? pipe_[0] : tcp_; 400 | 401 | while (1) { 402 | FD_ZERO(&rfds); 403 | FD_SET(pipe_[0], &rfds); 404 | FD_SET(tcp_, &rfds); 405 | 406 | int ret = select(max_fd+1, &rfds, NULL, NULL, NULL); 407 | 408 | if (ret < 0) { 409 | if (errno == EINTR) { 410 | continue; 411 | } else { 412 | perror("accept_conn select:"); 413 | jsl_log(JSL_DBG_OFF, "tcpsconn::accept_conn failure errno %d\n",errno); 414 | VERIFY(0); 415 | } 416 | } 417 | 418 | if (FD_ISSET(pipe_[0], &rfds)) { 419 | close(pipe_[0]); 420 | close(tcp_); 421 | return; 422 | } 423 | else if (FD_ISSET(tcp_, &rfds)) { 424 | process_accept(); 425 | } else { 426 | VERIFY(0); 427 | } 428 | } 429 | } 430 | 431 | connection * 432 | connect_to_dst(const sockaddr_in &dst, chanmgr *mgr, int lossy) 433 | { 434 | int s= socket(AF_INET, SOCK_STREAM, 0); 435 | int yes = 1; 436 | setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes)); 437 | if(connect(s, (sockaddr*)&dst, sizeof(dst)) < 0) { 438 | jsl_log(JSL_DBG_1, "rpcc::connect_to_dst failed to %s:%d\n", 439 | inet_ntoa(dst.sin_addr), (int)ntohs(dst.sin_port)); 440 | close(s); 441 | return NULL; 442 | } 443 | jsl_log(JSL_DBG_2, "connect_to_dst fd=%d to dst %s:%d\n", 444 | s, inet_ntoa(dst.sin_addr), (int)ntohs(dst.sin_port)); 445 | return new connection(mgr, s, lossy); 446 | } 447 | 448 | 449 | -------------------------------------------------------------------------------- /rpc/rpctest.cc: -------------------------------------------------------------------------------- 1 | // RPC test and pseudo-documentation. 2 | // generates print statements on failures, but eventually says "rpctest OK" 3 | 4 | #include "rpc.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "jsl_log.h" 11 | #include "gettime.h" 12 | #include "lang/verify.h" 13 | 14 | #define NUM_CL 2 15 | 16 | rpcs *server; // server rpc object 17 | rpcc *clients[NUM_CL]; // client rpc object 18 | struct sockaddr_in dst; //server's ip address 19 | int port; 20 | pthread_attr_t attr; 21 | 22 | // server-side handlers. they must be methods of some class 23 | // to simplify rpcs::reg(). a server process can have handlers 24 | // from multiple classes. 25 | class srv { 26 | public: 27 | int handle_22(const std::string a, const std::string b, std::string & r); 28 | int handle_fast(const int a, int &r); 29 | int handle_slow(const int a, int &r); 30 | int handle_bigrep(const int a, std::string &r); 31 | }; 32 | 33 | // a handler. a and b are arguments, r is the result. 34 | // there can be multiple arguments but only one result. 35 | // the caller also gets to see the int return value 36 | // as the return value from rpcc::call(). 37 | // rpcs::reg() decides how to unmarshall by looking 38 | // at these argument types, so this function definition 39 | // does what a .x file does in SunRPC. 40 | int 41 | srv::handle_22(const std::string a, std::string b, std::string &r) 42 | { 43 | r = a + b; 44 | return 0; 45 | } 46 | 47 | int 48 | srv::handle_fast(const int a, int &r) 49 | { 50 | r = a + 1; 51 | return 0; 52 | } 53 | 54 | int 55 | srv::handle_slow(const int a, int &r) 56 | { 57 | usleep(random() % 5000); 58 | r = a + 2; 59 | return 0; 60 | } 61 | 62 | int 63 | srv::handle_bigrep(const int len, std::string &r) 64 | { 65 | r = std::string(len, 'x'); 66 | return 0; 67 | } 68 | 69 | srv service; 70 | 71 | void startserver() 72 | { 73 | server = new rpcs(port); 74 | server->reg(22, &service, &srv::handle_22); 75 | server->reg(23, &service, &srv::handle_fast); 76 | server->reg(24, &service, &srv::handle_slow); 77 | server->reg(25, &service, &srv::handle_bigrep); 78 | } 79 | 80 | void 81 | testmarshall() 82 | { 83 | marshall m; 84 | req_header rh(1,2,3,4,5); 85 | m.pack_req_header(rh); 86 | VERIFY(m.size()==RPC_HEADER_SZ); 87 | int i = 12345; 88 | unsigned long long l = 1223344455L; 89 | std::string s = std::string("hallo...."); 90 | m << i; 91 | m << l; 92 | m << s; 93 | 94 | char *b; 95 | int sz; 96 | m.take_buf(&b,&sz); 97 | VERIFY(sz == (int)(RPC_HEADER_SZ+sizeof(i)+sizeof(l)+s.size()+sizeof(int))); 98 | 99 | unmarshall un(b,sz); 100 | req_header rh1; 101 | un.unpack_req_header(&rh1); 102 | VERIFY(memcmp(&rh,&rh1,sizeof(rh))==0); 103 | int i1; 104 | unsigned long long l1; 105 | std::string s1; 106 | un >> i1; 107 | un >> l1; 108 | un >> s1; 109 | VERIFY(un.okdone()); 110 | VERIFY(i1==i && l1==l && s1==s); 111 | } 112 | 113 | void * 114 | client1(void *xx) 115 | { 116 | 117 | // test concurrency. 118 | int which_cl = ((unsigned long) xx ) % NUM_CL; 119 | 120 | for(int i = 0; i < 100; i++){ 121 | int arg = (random() % 2000); 122 | std::string rep; 123 | int ret = clients[which_cl]->call(25, arg, rep); 124 | VERIFY(ret == 0); 125 | if ((int)rep.size()!=arg) { 126 | printf("repsize wrong %d!=%d\n", (int)rep.size(), arg); 127 | } 128 | VERIFY((int)rep.size() == arg); 129 | } 130 | 131 | // test rpc replies coming back not in the order of 132 | // the original calls -- i.e. does xid reply dispatch work. 133 | for(int i = 0; i < 100; i++){ 134 | int which = (random() % 2); 135 | int arg = (random() % 1000); 136 | int rep; 137 | 138 | struct timespec start,end; 139 | clock_gettime(CLOCK_REALTIME, &start); 140 | 141 | int ret = clients[which_cl]->call(which ? 23 : 24, arg, rep); 142 | clock_gettime(CLOCK_REALTIME, &end); 143 | int diff = diff_timespec(end, start); 144 | if (ret != 0) 145 | printf("%d ms have elapsed!!!\n", diff); 146 | VERIFY(ret == 0); 147 | VERIFY(rep == (which ? arg+1 : arg+2)); 148 | } 149 | 150 | return 0; 151 | } 152 | 153 | void * 154 | client2(void *xx) 155 | { 156 | int which_cl = ((unsigned long) xx ) % NUM_CL; 157 | 158 | time_t t1; 159 | time(&t1); 160 | 161 | while(time(0) - t1 < 10){ 162 | int arg = (random() % 2000); 163 | std::string rep; 164 | int ret = clients[which_cl]->call(25, arg, rep); 165 | if ((int)rep.size()!=arg) { 166 | printf("ask for %d reply got %d ret %d\n", 167 | arg, (int)rep.size(), ret); 168 | } 169 | VERIFY((int)rep.size() == arg); 170 | } 171 | return 0; 172 | } 173 | 174 | void * 175 | client3(void *xx) 176 | { 177 | rpcc *c = (rpcc *) xx; 178 | 179 | for(int i = 0; i < 4; i++){ 180 | int rep; 181 | int ret = c->call(24, i, rep, rpcc::to(3000)); 182 | VERIFY(ret == rpc_const::timeout_failure || rep == i+2); 183 | } 184 | return 0; 185 | } 186 | 187 | 188 | void 189 | simple_tests(rpcc *c) 190 | { 191 | printf("simple_tests\n"); 192 | // an RPC call to procedure #22. 193 | // rpcc::call() looks at the argument types to decide how 194 | // to marshall the RPC call packet, and how to unmarshall 195 | // the reply packet. 196 | std::string rep; 197 | int intret = c->call(22, (std::string)"hello", (std::string)" goodbye", rep); 198 | VERIFY(intret == 0); // this is what handle_22 returns 199 | VERIFY(rep == "hello goodbye"); 200 | printf(" -- string concat RPC .. ok\n"); 201 | 202 | // small request, big reply (perhaps req via UDP, reply via TCP) 203 | intret = c->call(25, 70000, rep, rpcc::to(200000)); 204 | VERIFY(intret == 0); 205 | VERIFY(rep.size() == 70000); 206 | printf(" -- small request, big reply .. ok\n"); 207 | 208 | #if 0 209 | // too few arguments 210 | intret = c->call(22, (std::string)"just one", rep); 211 | VERIFY(intret < 0); 212 | printf(" -- too few arguments .. failed ok\n"); 213 | 214 | // too many arguments; proc #23 expects just one. 215 | intret = c->call(23, 1001, 1002, rep); 216 | VERIFY(intret < 0); 217 | printf(" -- too many arguments .. failed ok\n"); 218 | 219 | // wrong return value size 220 | int wrongrep; 221 | intret = c->call(23, (std::string)"hello", (std::string)" goodbye", wrongrep); 222 | VERIFY(intret < 0); 223 | printf(" -- wrong ret value size .. failed ok\n"); 224 | #endif 225 | 226 | // specify a timeout value to an RPC that should succeed (udp) 227 | int xx = 0; 228 | intret = c->call(23, 77, xx, rpcc::to(3000)); 229 | VERIFY(intret == 0 && xx == 78); 230 | printf(" -- no suprious timeout .. ok\n"); 231 | 232 | // specify a timeout value to an RPC that should succeed (tcp) 233 | { 234 | std::string arg(1000, 'x'); 235 | std::string rep; 236 | c->call(22, arg, (std::string)"x", rep, rpcc::to(3000)); 237 | VERIFY(rep.size() == 1001); 238 | printf(" -- no suprious timeout .. ok\n"); 239 | } 240 | 241 | // huge RPC 242 | std::string big(1000000, 'x'); 243 | intret = c->call(22, big, (std::string)"z", rep); 244 | VERIFY(rep.size() == 1000001); 245 | printf(" -- huge 1M rpc request .. ok\n"); 246 | 247 | // specify a timeout value to an RPC that should timeout (udp) 248 | struct sockaddr_in non_existent; 249 | memset(&non_existent, 0, sizeof(non_existent)); 250 | non_existent.sin_family = AF_INET; 251 | non_existent.sin_addr.s_addr = inet_addr("127.0.0.1"); 252 | non_existent.sin_port = htons(7661); 253 | rpcc *c1 = new rpcc(non_existent); 254 | time_t t0 = time(0); 255 | intret = c1->bind(rpcc::to(3000)); 256 | time_t t1 = time(0); 257 | VERIFY(intret < 0 && (t1 - t0) <= 4); 258 | printf(" -- rpc timeout .. ok\n"); 259 | printf("simple_tests OK\n"); 260 | } 261 | 262 | void 263 | concurrent_test(int nt) 264 | { 265 | // create threads that make lots of calls in parallel, 266 | // to test thread synchronization for concurrent calls 267 | // and dispatches. 268 | int ret; 269 | 270 | printf("start concurrent_test (%d threads) ...", nt); 271 | 272 | pthread_t th[nt]; 273 | for(int i = 0; i < nt; i++){ 274 | ret = pthread_create(&th[i], &attr, client1, (void *) (uintptr_t)i); 275 | VERIFY(ret == 0); 276 | } 277 | 278 | for(int i = 0; i < nt; i++){ 279 | VERIFY(pthread_join(th[i], NULL) == 0); 280 | } 281 | printf(" OK\n"); 282 | } 283 | 284 | void 285 | lossy_test() 286 | { 287 | int ret; 288 | 289 | printf("start lossy_test ..."); 290 | VERIFY(setenv("RPC_LOSSY", "5", 1) == 0); 291 | 292 | if (server) { 293 | delete server; 294 | startserver(); 295 | } 296 | 297 | for (int i = 0; i < NUM_CL; i++) { 298 | delete clients[i]; 299 | clients[i] = new rpcc(dst); 300 | VERIFY(clients[i]->bind()==0); 301 | } 302 | 303 | int nt = 1; 304 | pthread_t th[nt]; 305 | for(int i = 0; i < nt; i++){ 306 | ret = pthread_create(&th[i], &attr, client2, (void *) (uintptr_t)i); 307 | VERIFY(ret == 0); 308 | } 309 | for(int i = 0; i < nt; i++){ 310 | VERIFY(pthread_join(th[i], NULL) == 0); 311 | } 312 | printf(".. OK\n"); 313 | VERIFY(setenv("RPC_LOSSY", "0", 1) == 0); 314 | } 315 | 316 | void 317 | failure_test() 318 | { 319 | rpcc *client1; 320 | rpcc *client = clients[0]; 321 | 322 | printf("failure_test\n"); 323 | 324 | delete server; 325 | 326 | client1 = new rpcc(dst); 327 | VERIFY (client1->bind(rpcc::to(3000)) < 0); 328 | printf(" -- create new client and try to bind to failed server .. failed ok\n"); 329 | 330 | delete client1; 331 | 332 | startserver(); 333 | 334 | std::string rep; 335 | int intret = client->call(22, (std::string)"hello", (std::string)" goodbye", rep); 336 | VERIFY(intret == rpc_const::oldsrv_failure); 337 | printf(" -- call recovered server with old client .. failed ok\n"); 338 | 339 | delete client; 340 | 341 | clients[0] = client = new rpcc(dst); 342 | VERIFY (client->bind() >= 0); 343 | VERIFY (client->bind() < 0); 344 | 345 | intret = client->call(22, (std::string)"hello", (std::string)" goodbye", rep); 346 | VERIFY(intret == 0); 347 | VERIFY(rep == "hello goodbye"); 348 | 349 | printf(" -- delete existing rpc client, create replacement rpc client .. ok\n"); 350 | 351 | 352 | int nt = 10; 353 | int ret; 354 | printf(" -- concurrent test on new rpc client w/ %d threads ..", nt); 355 | 356 | pthread_t th[nt]; 357 | for(int i = 0; i < nt; i++){ 358 | ret = pthread_create(&th[i], &attr, client3, (void *) client); 359 | VERIFY(ret == 0); 360 | } 361 | 362 | for(int i = 0; i < nt; i++){ 363 | VERIFY(pthread_join(th[i], NULL) == 0); 364 | } 365 | printf("ok\n"); 366 | 367 | delete server; 368 | delete client; 369 | 370 | startserver(); 371 | clients[0] = client = new rpcc(dst); 372 | VERIFY (client->bind() >= 0); 373 | printf(" -- delete existing rpc client and server, create replacements.. ok\n"); 374 | 375 | printf(" -- concurrent test on new client and server w/ %d threads ..", nt); 376 | for(int i = 0; i < nt; i++){ 377 | ret = pthread_create(&th[i], &attr, client3, (void *)client); 378 | VERIFY(ret == 0); 379 | } 380 | 381 | for(int i = 0; i < nt; i++){ 382 | VERIFY(pthread_join(th[i], NULL) == 0); 383 | } 384 | printf("ok\n"); 385 | 386 | printf("failure_test OK\n"); 387 | } 388 | 389 | int 390 | main(int argc, char *argv[]) 391 | { 392 | 393 | setvbuf(stdout, NULL, _IONBF, 0); 394 | setvbuf(stderr, NULL, _IONBF, 0); 395 | int debug_level = 0; 396 | 397 | bool isclient = false; 398 | bool isserver = false; 399 | 400 | srandom(getpid()); 401 | port = 20000 + (getpid() % 10000); 402 | 403 | char ch = 0; 404 | while ((ch = getopt(argc, argv, "csd:p:l"))!=-1) { 405 | switch (ch) { 406 | case 'c': 407 | isclient = true; 408 | break; 409 | case 's': 410 | isserver = true; 411 | break; 412 | case 'd': 413 | debug_level = atoi(optarg); 414 | break; 415 | case 'p': 416 | port = atoi(optarg); 417 | break; 418 | case 'l': 419 | VERIFY(setenv("RPC_LOSSY", "5", 1) == 0); 420 | default: 421 | break; 422 | } 423 | } 424 | 425 | if (!isserver && !isclient) { 426 | isserver = isclient = true; 427 | } 428 | 429 | if (debug_level > 0) { 430 | //__loginit.initNow(); 431 | jsl_set_debug(debug_level); 432 | jsl_log(JSL_DBG_1, "DEBUG LEVEL: %d\n", debug_level); 433 | } 434 | 435 | testmarshall(); 436 | 437 | pthread_attr_init(&attr); 438 | // set stack size to 32K, so we don't run out of memory 439 | pthread_attr_setstacksize(&attr, 32*1024); 440 | 441 | if (isserver) { 442 | printf("starting server on port %d RPC_HEADER_SZ %d\n", port, RPC_HEADER_SZ); 443 | startserver(); 444 | } 445 | 446 | if (isclient) { 447 | // server's address. 448 | memset(&dst, 0, sizeof(dst)); 449 | dst.sin_family = AF_INET; 450 | dst.sin_addr.s_addr = inet_addr("127.0.0.1"); 451 | dst.sin_port = htons(port); 452 | 453 | 454 | // start the client. bind it to the server. 455 | // starts a thread to listen for replies and hand them to 456 | // the correct waiting caller thread. there should probably 457 | // be only one rpcc per process. you probably need one 458 | // rpcc per server. 459 | for (int i = 0; i < NUM_CL; i++) { 460 | clients[i] = new rpcc(dst); 461 | VERIFY (clients[i]->bind() == 0); 462 | } 463 | 464 | simple_tests(clients[0]); 465 | concurrent_test(10); 466 | lossy_test(); 467 | if (isserver) { 468 | failure_test(); 469 | } 470 | 471 | printf("rpctest OK\n"); 472 | 473 | exit(0); 474 | } 475 | 476 | while (1) { 477 | sleep(1); 478 | } 479 | } 480 | -------------------------------------------------------------------------------- /test-lab-3-b.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test-lab-3-b /classfs/dir1 /classfs/dir2 3 | * 4 | * Test correctness of locking and cache coherence by creating 5 | * and deleting files in the same underlying directory 6 | * via two different ccfs servers. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | char d1[512], d2[512]; 22 | extern int errno; 23 | 24 | char big[20001]; 25 | char huge[65536*2+1]; 26 | 27 | void 28 | create1(const char *d, const char *f, const char *in) 29 | { 30 | int fd; 31 | char n[512]; 32 | 33 | /* 34 | * The FreeBSD NFS client only invalidates its caches 35 | * cache if the mtime changes by a whole second. 36 | */ 37 | sleep(1); 38 | 39 | sprintf(n, "%s/%s", d, f); 40 | fd = creat(n, 0666); 41 | if(fd < 0){ 42 | fprintf(stderr, "test-lab-3-b: create(%s): %s\n", 43 | n, strerror(errno)); 44 | exit(1); 45 | } 46 | if(write(fd, in, strlen(in)) != strlen(in)){ 47 | fprintf(stderr, "test-lab-3-b: write(%s): %s\n", 48 | n, strerror(errno)); 49 | exit(1); 50 | } 51 | if(close(fd) != 0){ 52 | fprintf(stderr, "test-lab-3-b: close(%s): %s\n", 53 | n, strerror(errno)); 54 | exit(1); 55 | } 56 | } 57 | 58 | void 59 | check1(const char *d, const char *f, const char *in) 60 | { 61 | int fd, cc; 62 | char n[512], buf[21000]; 63 | 64 | sprintf(n, "%s/%s", d, f); 65 | fd = open(n, 0); 66 | if(fd < 0){ 67 | fprintf(stderr, "test-lab-3-b: open(%s): %s\n", 68 | n, strerror(errno)); 69 | exit(1); 70 | } 71 | errno = 0; 72 | cc = read(fd, buf, sizeof(buf) - 1); 73 | if(cc != strlen(in)){ 74 | fprintf(stderr, "test-lab-3-b: read(%s) returned too little %d%s%s\n", 75 | n, 76 | cc, 77 | errno ? ": " : "", 78 | errno ? strerror(errno) : ""); 79 | exit(1); 80 | } 81 | close(fd); 82 | buf[cc] = '\0'; 83 | if(strncmp(buf, in, strlen(n)) != 0){ 84 | fprintf(stderr, "test-lab-3-b: read(%s) got \"%s\", not \"%s\"\n", 85 | n, buf, in); 86 | exit(1); 87 | } 88 | } 89 | 90 | void 91 | unlink1(const char *d, const char *f) 92 | { 93 | char n[512]; 94 | 95 | sleep(1); 96 | 97 | sprintf(n, "%s/%s", d, f); 98 | if(unlink(n) != 0){ 99 | fprintf(stderr, "test-lab-3-b: unlink(%s): %s\n", 100 | n, strerror(errno)); 101 | exit(1); 102 | } 103 | } 104 | 105 | void 106 | checknot(const char *d, const char *f) 107 | { 108 | int fd; 109 | char n[512]; 110 | 111 | sprintf(n, "%s/%s", d, f); 112 | fd = open(n, 0); 113 | if(fd >= 0){ 114 | fprintf(stderr, "test-lab-3-b: open(%s) succeeded for deleted file\n", n); 115 | exit(1); 116 | } 117 | } 118 | 119 | void 120 | append1(const char *d, const char *f, const char *in) 121 | { 122 | int fd; 123 | char n[512]; 124 | 125 | sleep(1); 126 | 127 | sprintf(n, "%s/%s", d, f); 128 | fd = open(n, O_WRONLY|O_APPEND); 129 | if(fd < 0){ 130 | fprintf(stderr, "test-lab-3-b: append open(%s): %s\n", 131 | n, strerror(errno)); 132 | exit(1); 133 | } 134 | if(write(fd, in, strlen(in)) != strlen(in)){ 135 | fprintf(stderr, "test-lab-3-b: append write(%s): %s\n", 136 | n, strerror(errno)); 137 | exit(1); 138 | } 139 | if(close(fd) != 0){ 140 | fprintf(stderr, "test-lab-3-b: append close(%s): %s\n", 141 | n, strerror(errno)); 142 | exit(1); 143 | } 144 | } 145 | 146 | // write n characters starting at offset start, 147 | // one at a time. 148 | void 149 | write1(const char *d, const char *f, int start, int n, char c) 150 | { 151 | int fd; 152 | char name[512]; 153 | 154 | sleep(1); 155 | 156 | sprintf(name, "%s/%s", d, f); 157 | fd = open(name, O_WRONLY|O_CREAT, 0666); 158 | if (fd < 0 && errno == EEXIST) 159 | fd = open(name, O_WRONLY, 0666); 160 | if(fd < 0){ 161 | fprintf(stderr, "test-lab-3-b: open(%s): %s\n", 162 | name, strerror(errno)); 163 | exit(1); 164 | } 165 | if(lseek(fd, start, 0) != (off_t) start){ 166 | fprintf(stderr, "test-lab-3-b: lseek(%s, %d): %s\n", 167 | name, start, strerror(errno)); 168 | exit(1); 169 | } 170 | for(int i = 0; i < n; i++){ 171 | if(write(fd, &c, 1) != 1){ 172 | fprintf(stderr, "test-lab-3-b: write(%s): %s\n", 173 | name, strerror(errno)); 174 | exit(1); 175 | } 176 | if(fsync(fd) != 0){ 177 | fprintf(stderr, "test-lab-3-b: fsync(%s): %s\n", 178 | name, strerror(errno)); 179 | exit(1); 180 | } 181 | } 182 | if(close(fd) != 0){ 183 | fprintf(stderr, "test-lab-3-b: close(%s): %s\n", 184 | name, strerror(errno)); 185 | exit(1); 186 | } 187 | } 188 | 189 | // check that the n bytes at offset start are all c. 190 | void 191 | checkread(const char *d, const char *f, int start, int n, char c) 192 | { 193 | int fd; 194 | char name[512]; 195 | 196 | sleep(1); 197 | 198 | sprintf(name, "%s/%s", d, f); 199 | fd = open(name, 0); 200 | if(fd < 0){ 201 | fprintf(stderr, "test-lab-3-b: open(%s): %s\n", 202 | name, strerror(errno)); 203 | exit(1); 204 | } 205 | if(lseek(fd, start, 0) != (off_t) start){ 206 | fprintf(stderr, "test-lab-3-b: lseek(%s, %d): %s\n", 207 | name, start, strerror(errno)); 208 | exit(1); 209 | } 210 | for(int i = 0; i < n; i++){ 211 | char xc; 212 | if(read(fd, &xc, 1) != 1){ 213 | fprintf(stderr, "test-lab-3-b: read(%s): %s\n", 214 | name, strerror(errno)); 215 | exit(1); 216 | } 217 | if(xc != c){ 218 | fprintf(stderr, "test-lab-3-b: checkread off %d %02x != %02x\n", 219 | start + i, xc, c); 220 | exit(1); 221 | } 222 | } 223 | close(fd); 224 | } 225 | 226 | 227 | void 228 | createn(const char *d, const char *prefix, int nf, bool possible_dup) 229 | { 230 | int fd, i; 231 | char n[512]; 232 | 233 | /* 234 | * The FreeBSD NFS client only invalidates its caches 235 | * cache if the mtime changes by a whole second. 236 | */ 237 | sleep(1); 238 | 239 | for(i = 0; i < nf; i++){ 240 | sprintf(n, "%s/%s-%d", d, prefix, i); 241 | fd = creat(n, 0666); 242 | if (fd < 0 && possible_dup && errno == EEXIST) 243 | continue; 244 | if(fd < 0){ 245 | fprintf(stderr, "test-lab-3-b: create(%s): %s\n", 246 | n, strerror(errno)); 247 | exit(1); 248 | } 249 | if(write(fd, &i, sizeof(i)) != sizeof(i)){ 250 | fprintf(stderr, "test-lab-3-b: write(%s): %s\n", 251 | n, strerror(errno)); 252 | exit(1); 253 | } 254 | if(close(fd) != 0){ 255 | fprintf(stderr, "test-lab-3-b: close(%s): %s\n", 256 | n, strerror(errno)); 257 | exit(1); 258 | } 259 | } 260 | } 261 | 262 | void 263 | checkn(const char *d, const char *prefix, int nf) 264 | { 265 | int fd, i, cc, j; 266 | char n[512]; 267 | 268 | for(i = 0; i < nf; i++){ 269 | sprintf(n, "%s/%s-%d", d, prefix, i); 270 | fd = open(n, 0); 271 | if(fd < 0){ 272 | fprintf(stderr, "test-lab-3-b: open(%s): %s\n", 273 | n, strerror(errno)); 274 | exit(1); 275 | } 276 | j = -1; 277 | cc = read(fd, &j, sizeof(j)); 278 | if(cc != sizeof(j)){ 279 | fprintf(stderr, "test-lab-3-b: read(%s) returned too little %d%s%s\n", 280 | n, 281 | cc, 282 | errno ? ": " : "", 283 | errno ? strerror(errno) : ""); 284 | exit(1); 285 | } 286 | if(j != i){ 287 | fprintf(stderr, "test-lab-3-b: checkn %s contained %d not %d\n", 288 | n, j, i); 289 | exit(1); 290 | } 291 | close(fd); 292 | } 293 | } 294 | 295 | void 296 | unlinkn(const char *d, const char *prefix, int nf) 297 | { 298 | char n[512]; 299 | int i; 300 | 301 | sleep(1); 302 | 303 | for(i = 0; i < nf; i++){ 304 | sprintf(n, "%s/%s-%d", d, prefix, i); 305 | if(unlink(n) != 0){ 306 | fprintf(stderr, "test-lab-3-b: unlink(%s): %s\n", 307 | n, strerror(errno)); 308 | exit(1); 309 | } 310 | } 311 | } 312 | 313 | int 314 | compar(const void *xa, const void *xb) 315 | { 316 | char *a = *(char**)xa; 317 | char *b = *(char**)xb; 318 | return strcmp(a, b); 319 | } 320 | 321 | void 322 | dircheck(const char *d, int nf) 323 | { 324 | DIR *dp; 325 | struct dirent *e; 326 | char *names[1000]; 327 | int nnames = 0, i; 328 | 329 | dp = opendir(d); 330 | if(dp == 0){ 331 | fprintf(stderr, "test-lab-3-b: opendir(%s): %s\n", d, strerror(errno)); 332 | exit(1); 333 | } 334 | while((e = readdir(dp))){ 335 | if(e->d_name[0] != '.'){ 336 | if(nnames >= sizeof(names)/sizeof(names[0])){ 337 | fprintf(stderr, "warning: too many files in %s\n", d); 338 | } 339 | names[nnames] = (char *) malloc(strlen(e->d_name) + 1); 340 | strcpy(names[nnames], e->d_name); 341 | nnames++; 342 | } 343 | } 344 | closedir(dp); 345 | 346 | if(nf != nnames){ 347 | fprintf(stderr, "test-lab-3-b: wanted %d dir entries, got %d\n", nf, nnames); 348 | exit(1); 349 | } 350 | 351 | /* check for duplicate entries */ 352 | qsort(names, nnames, sizeof(names[0]), compar); 353 | for(i = 0; i < nnames-1; i++){ 354 | if(strcmp(names[i], names[i+1]) == 0){ 355 | fprintf(stderr, "test-lab-3-b: duplicate directory entry for %s\n", names[i]); 356 | exit(1); 357 | } 358 | } 359 | 360 | for(i = 0; i < nnames; i++) 361 | free(names[i]); 362 | } 363 | 364 | void 365 | reap (int pid) 366 | { 367 | int wpid, status; 368 | wpid = waitpid (pid, &status, 0); 369 | if (wpid < 0) { 370 | perror("waitpid"); 371 | exit(1); 372 | } 373 | if (wpid != pid) { 374 | fprintf(stderr, "unexpected pid reaped: %d\n", wpid); 375 | exit(1); 376 | } 377 | if(!WIFEXITED(status) || WEXITSTATUS(status) != 0) { 378 | fprintf(stderr, "child exited unhappily\n"); 379 | exit(1); 380 | } 381 | } 382 | 383 | int 384 | main(int argc, char *argv[]) 385 | { 386 | int pid, i; 387 | 388 | if(argc != 3){ 389 | fprintf(stderr, "Usage: test-lab-3-b dir1 dir2\n"); 390 | exit(1); 391 | } 392 | 393 | sprintf(d1, "%s/d%d", argv[1], getpid()); 394 | if(mkdir(d1, 0777) != 0){ 395 | fprintf(stderr, "test-lab-3-b: failed: mkdir(%s): %s\n", 396 | d1, strerror(errno)); 397 | exit(1); 398 | } 399 | sprintf(d2, "%s/d%d", argv[2], getpid()); 400 | if(access(d2, 0) != 0){ 401 | fprintf(stderr, "test-lab-3-b: failed: access(%s) after mkdir %s: %s\n", 402 | d2, d1, strerror(errno)); 403 | exit(1); 404 | } 405 | 406 | setbuf(stdout, 0); 407 | 408 | for(i = 0; i < sizeof(big)-1; i++) 409 | big[i] = 'x'; 410 | for(i = 0; i < sizeof(huge)-1; i++) 411 | huge[i] = '0'; 412 | 413 | printf("Create then read: "); 414 | create1(d1, "f1", "aaa"); 415 | check1(d2, "f1", "aaa"); 416 | check1(d1, "f1", "aaa"); 417 | printf("OK\n"); 418 | 419 | printf("Unlink: "); 420 | unlink1(d2, "f1"); 421 | create1(d1, "fx1", "fxx"); /* checknot f1 fails w/o these */ 422 | unlink1(d1, "fx1"); 423 | checknot(d1, "f1"); 424 | checknot(d2, "f1"); 425 | create1(d1, "f2", "222"); 426 | unlink1(d2, "f2"); 427 | checknot(d1, "f2"); 428 | checknot(d2, "f2"); 429 | create1(d1, "f3", "333"); 430 | check1(d2, "f3", "333"); 431 | check1(d1, "f3", "333"); 432 | unlink1(d1, "f3"); 433 | create1(d2, "fx2", "22"); /* checknot f3 fails w/o these */ 434 | unlink1(d2, "fx2"); 435 | checknot(d2, "f3"); 436 | checknot(d1, "f3"); 437 | printf("OK\n"); 438 | 439 | printf("Append: "); 440 | create1(d2, "f1", "aaa"); 441 | append1(d1, "f1", "bbb"); 442 | append1(d2, "f1", "ccc"); 443 | check1(d1, "f1", "aaabbbccc"); 444 | check1(d2, "f1", "aaabbbccc"); 445 | printf("OK\n"); 446 | 447 | printf("Readdir: "); 448 | dircheck(d1, 1); 449 | dircheck(d2, 1); 450 | unlink1(d1, "f1"); 451 | dircheck(d1, 0); 452 | dircheck(d2, 0); 453 | create1(d2, "f2", "aaa"); 454 | create1(d1, "f3", "aaa"); 455 | dircheck(d1, 2); 456 | dircheck(d2, 2); 457 | unlink1(d2, "f2"); 458 | dircheck(d2, 1); 459 | dircheck(d1, 1); 460 | unlink1(d2, "f3"); 461 | dircheck(d1, 0); 462 | dircheck(d2, 0); 463 | printf("OK\n"); 464 | 465 | printf("Many sequential creates: "); 466 | createn(d1, "aa", 10, false); 467 | createn(d2, "bb", 10, false); 468 | dircheck(d2, 20); 469 | checkn(d2, "bb", 10); 470 | checkn(d2, "aa", 10); 471 | checkn(d1, "aa", 10); 472 | checkn(d1, "bb", 10); 473 | unlinkn(d1, "aa", 10); 474 | unlinkn(d2, "bb", 10); 475 | printf("OK\n"); 476 | 477 | printf("Write 20000 bytes: "); 478 | create1(d1, "bf", big); 479 | check1(d1, "bf", big); 480 | check1(d2, "bf", big); 481 | unlink1(d1, "bf"); 482 | printf("OK\n"); 483 | 484 | printf("Concurrent creates: "); 485 | pid = fork(); 486 | if(pid < 0){ 487 | perror("test-lab-3-b: fork"); 488 | exit(1); 489 | } 490 | if(pid == 0){ 491 | createn(d2, "xx", 20, false); 492 | exit(0); 493 | } 494 | createn(d1, "yy", 20, false); 495 | sleep(10); 496 | reap(pid); 497 | dircheck(d1, 40); 498 | checkn(d1, "xx", 20); 499 | checkn(d2, "yy", 20); 500 | unlinkn(d1, "xx", 20); 501 | unlinkn(d1, "yy", 20); 502 | printf("OK\n"); 503 | 504 | printf("Concurrent creates of the same file: "); 505 | pid = fork(); 506 | if(pid < 0){ 507 | perror("test-lab-3-b: fork"); 508 | exit(1); 509 | } 510 | if(pid == 0){ 511 | createn(d2, "zz", 20, true); 512 | exit(0); 513 | } 514 | createn(d1, "zz", 20, true); 515 | sleep(4); 516 | dircheck(d1, 20); 517 | reap(pid); 518 | checkn(d1, "zz", 20); 519 | checkn(d2, "zz", 20); 520 | unlinkn(d1, "zz", 20); 521 | printf("OK\n"); 522 | 523 | printf("Concurrent create/delete: "); 524 | createn(d1, "x1", 20, false); 525 | createn(d2, "x2", 20, false); 526 | pid = fork(); 527 | if(pid < 0){ 528 | perror("test-lab-3-b: fork"); 529 | exit(1); 530 | } 531 | if(pid == 0){ 532 | unlinkn(d2, "x1", 20); 533 | createn(d1, "x3", 20, false); 534 | exit(0); 535 | } 536 | createn(d1, "x4", 20, false); 537 | reap(pid); 538 | unlinkn(d2, "x2", 20); 539 | unlinkn(d2, "x4", 20); 540 | unlinkn(d2, "x3", 20); 541 | dircheck(d1, 0); 542 | printf("OK\n"); 543 | 544 | printf("Concurrent creates, same file, same server: "); 545 | pid = fork(); 546 | if(pid < 0){ 547 | perror("test-lab-3-b: fork"); 548 | exit(1); 549 | } 550 | if(pid == 0){ 551 | createn(d1, "zz", 20, true); 552 | exit(0); 553 | } 554 | createn(d1, "zz", 20, true); 555 | sleep(2); 556 | dircheck(d1, 20); 557 | reap(pid); 558 | checkn(d1, "zz", 20); 559 | unlinkn(d1, "zz", 20); 560 | printf("OK\n"); 561 | 562 | printf("Concurrent writes to different parts of same file: "); 563 | create1(d1, "www", huge); 564 | pid = fork(); 565 | if(pid < 0){ 566 | perror("test-lab-3-b: fork"); 567 | exit(1); 568 | } 569 | if(pid == 0){ 570 | write1(d2, "www", 65536, 64, '2'); 571 | exit(0); 572 | } 573 | write1(d1, "www", 0, 64, '1'); 574 | reap(pid); 575 | checkread(d1, "www", 0, 64, '1'); 576 | checkread(d2, "www", 0, 64, '1'); 577 | checkread(d1, "www", 65536, 64, '2'); 578 | checkread(d2, "www", 65536, 64, '2'); 579 | printf("OK\n"); 580 | 581 | printf("test-lab-3-b: Passed all tests.\n"); 582 | 583 | exit(0); 584 | return(0); 585 | } 586 | --------------------------------------------------------------------------------