├── LICENSE ├── README.md ├── bug_report ├── README ├── config-4.6.2 ├── linux-4.6.2 │ ├── 20160617.log │ ├── 20160621.log │ └── 20160623.log └── trinity-log-20170511 │ ├── trinity-child0.log │ ├── trinity-child1.log │ ├── trinity-child2.log │ ├── trinity-child3.log │ ├── trinity-child4.log │ ├── trinity-child5.log │ ├── trinity-child6.log │ ├── trinity-child7.log │ └── trinity.log ├── profiles └── local.cfg ├── static_analysis_tools ├── ConditionChecker │ ├── ConditionChecker.cpp │ ├── ConditionParse.cpp │ ├── Parse.h │ └── README.md ├── IRParser │ ├── extend_func.cpp │ ├── info.h │ ├── kcov_map.cpp │ ├── kstate_map.cpp │ └── log.h └── kern_instrument │ ├── AssignTrackerPass │ ├── AssignTracker.cpp │ ├── AssignTracker.exports │ └── CMakeLists.txt │ └── kern_patch │ └── 0001-KCOV_SRT_TRACK-ok.patch ├── survey.md ├── syz_patch ├── 0001-syz-manager-manager.go-executor-executor.cc-support-.patch ├── 0002-Calculate-prog-prior-base-on-weighted-pcs.patch └── 0003-Support-weighed-kstate-resource.patch └── syzkaller ├── Bitmap.png ├── Corpus.png ├── CoverageOfFiles.png ├── CoverageOfTargetFunctions.png ├── KernFunc.png ├── ProgState.png ├── TotalCoverage.png ├── cover_filter.md ├── design_implementation_intro.md ├── kstat_demo ├── README.md ├── ebpf │ ├── ebpf.go │ └── ebpftext.go ├── ebpf_sample │ ├── ebpftext_gen.go │ ├── ebpftext_recvmsg.go │ └── ebpftext_sendmsg.go ├── parse │ └── parse.go ├── pipe_monitor.go ├── state │ └── state.go ├── syz_patch │ ├── cover_filter │ │ └── 0001-fuzzer-calculate-prog-prios-base-on-weighted-blocks.patch │ └── kstate │ │ ├── 0001-Support-ebpf-feedbac-and-display-in-webui.patch │ │ ├── 0004-Support-retState-ebpfsig-resource.patch │ │ └── 0005-Add-monitor-binary-and-function-pcs-options-to-manag.patch └── tcp-ipv6 │ ├── config.json │ ├── data.tar.bz2 │ └── test.md ├── kstate_resource.md ├── multi_policy ├── 0001-Add-ebpf-feedback-and-display-in-webui.patch ├── 0002-Add-coverage-filter.patch ├── 0003-Add-manager-configure-for-coverage-filter-and-ebpf-f.patch ├── 0004-Make-the-download-sync-time-configurable.patch ├── 0005-Add-ret-ebpfsig-as-resource.patch ├── 0006-Add-monitot-binary-option-to-manager-configure.patch └── README.md └── syzkaller.png /README.md: -------------------------------------------------------------------------------- 1 | # Harbian-QA 2 | 3 | Testing matters to the software quality and security. The comprehensive testing process is likely to hunt more bugs which improve the stability of Hardened Debian GNU/Linux. 4 | 5 | Bug hunting through fuzzer/*-sanitizer/etc... 6 | 7 | * [(A/T/KT) - Sanitized GNU/Linux: a new way of bug hunter in FLOSS Community](http://hardenedlinux.org/system-security/2016/04/01/x_Sanitized-GNU-Linux-a-new-way-of-bug-hunter-in-FLOSS-Community.html) 8 | * [Debugging a kernel crash found by syzkaller](http://vegardno.blogspot.in/2016/08/sync-debug.html) 9 | * [A targeted kernel fuzzer bases on syzkaller](syzkaller/design_implementation_intro.md) 10 | * [Usage and implementation of coverage filter](syzkaller/cover_filter.md) 11 | * [Usage and implementation of kernel state resource](syzkaller/kstate_resource.md) 12 | -------------------------------------------------------------------------------- /bug_report/README: -------------------------------------------------------------------------------- 1 | You'll need GCC 6.x to build KCOV support: 2 | https://gcc.gnu.org/wiki/InstallingGCC 3 | 4 | Linux Kernel panic issue: How to fix hung_task_timeout_secs and blocked for more than 120 seconds problem 5 | https://www.blackmoreops.com/2014/09/22/linux-kernel-panic-issue-fix-hung_task_timeout_secs-blocked-120-seconds-problem/ 6 | -------------------------------------------------------------------------------- /profiles/local.cfg: -------------------------------------------------------------------------------- 1 | { 2 | "http": "127.0.0.1:56741", 3 | "workdir": "/citypw/src/github.com/google/syzkaller/workdir", 4 | "vmlinux": "-", 5 | "syzkaller": "/citypw/src/github.com/google/syzkaller", 6 | "type": "local", 7 | "count": 1, 8 | "procs": 4, 9 | "cpu": 2, 10 | "mem": 2048 11 | } 12 | -------------------------------------------------------------------------------- /static_analysis_tools/ConditionChecker/ConditionChecker.cpp: -------------------------------------------------------------------------------- 1 | #include "clang/StaticAnalyzer/Core/Checker.h" 2 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 3 | #include "clang/StaticAnalyzer/Core/CheckerManager.h" 4 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 5 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 6 | #include "llvm/ADT/SmallString.h" 7 | #include "llvm/ADT/StringExtras.h" 8 | #include "llvm/Support/raw_ostream.h" 9 | #include "clang/AST/ParentMap.h" 10 | #include "clang/Basic/TargetInfo.h" 11 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 12 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" 13 | #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 14 | 15 | #include 16 | #include 17 | 18 | #include "Parse.h" 19 | 20 | using namespace clang; 21 | using namespace ento; 22 | 23 | /* FuncMap[FUNC_NAME] = SUBSTMT_INFO_STRUCT*/ 24 | std::map> FuncMap; 25 | /* MemCount[BASE->MEMBER] = COUNT_OF_APPEARING_IN_CONDITION */ 26 | std::map ASTMemCount; 27 | std::map CFGMemCount; 28 | 29 | namespace { 30 | class ConditionChecker : public Checker< check::ASTDecl, 31 | check::BranchCondition, 32 | check::EndAnalysis> { 33 | public: 34 | void checkASTDecl(const FunctionDecl *FD, AnalysisManager &Mgr, BugReporter &BR) const; 35 | void checkEndAnalysis(ExplodedGraph &G, BugReporter &BR, ExprEngine &Eng) const; 36 | void checkBranchCondition(const Stmt *s, CheckerContext &Ctx) const; 37 | }; 38 | } // end anonymous namespace 39 | 40 | void ConditionChecker::checkASTDecl(const FunctionDecl *FD, AnalysisManager &Mgr, BugReporter &BR) const { 41 | const SourceManager &SM = Mgr.getSourceManager(); 42 | const ASTContext &ASTCtx = FD->getASTContext(); 43 | std::string funcName = FD->getNameInfo().getAsString(); 44 | if (FuncMap.find(funcName) != FuncMap.end()) { 45 | return; 46 | } 47 | 48 | if (!SM.isInMainFile(FD->getBeginLoc())) { 49 | return; 50 | } 51 | 52 | std::vector funcInfoVec, parmInfoList; 53 | funcInfoVec.push_back(FuncInfo(FD)); 54 | parmInfoList = ListAllParmInfo(SM, FD); 55 | funcInfoVec.insert(funcInfoVec.end(), parmInfoList.begin(), parmInfoList.end()); 56 | 57 | if (FD->hasBody()) { 58 | /* Local variable may initialized by functions parameters */ 59 | for (Stmt *c : FD->getBody()->children()) { 60 | std::vector localVarInfo; 61 | handleChildrenStmt(SM, c, searchLocalVar, &localVarInfo); 62 | for (stmtInfo tmpInfo : localVarInfo) { 63 | if (tmpInfo.typeName == "ParmVar") { 64 | funcInfoVec.insert(funcInfoVec.end(), localVarInfo.begin(), localVarInfo.end()); 65 | break; 66 | } 67 | } 68 | } 69 | /* Search if there are member operation or parameters in condition substatement */ 70 | for (Stmt *c : FD->getBody()->children()) { 71 | std::vector condInfo; 72 | handleChildrenStmt(SM, c, searchCondition, &condInfo); 73 | for (stmtInfo tmpInfo : condInfo) { 74 | if(tmpInfo.typeName == "MemExpr" || tmpInfo.typeName == "ParmVar") { 75 | funcInfoVec.insert(funcInfoVec.end(), condInfo.begin(), condInfo.end()); 76 | break; 77 | } 78 | } 79 | for (stmtInfo i : condInfo) { 80 | /* calculate the using of member operation */ 81 | if (i.typeName == "MemExpr") { 82 | std::string key = i.base + "->" + i.target; 83 | if (ASTMemCount.find(key) != ASTMemCount.end()) { 84 | ASTMemCount[key]++; 85 | } else { 86 | ASTMemCount[key] = 1; 87 | } 88 | } 89 | } 90 | } 91 | } 92 | 93 | FuncMap[funcName] = funcInfoVec; 94 | for (stmtInfo i : funcInfoVec) { 95 | llvm::outs() << i.toString() << "\n"; 96 | } 97 | llvm::outs() << "\n"; 98 | } 99 | 100 | void ConditionChecker::checkEndAnalysis(ExplodedGraph &G, BugReporter &BR, ExprEngine &Eng) const { 101 | llvm::outs() << "Count MemberExpr in condition statement(AST Parse): " << "\n"; 102 | for (auto const & m : ASTMemCount) { 103 | llvm::outs() << m.first << ":" << m.second << "\n"; 104 | } 105 | llvm::outs() << "Count MemberExpr in condition statement(CFG Parse): " << "\n"; 106 | for (auto const & m : CFGMemCount) { 107 | llvm::outs() << m.first << ":" << m.second << "\n"; 108 | } 109 | } 110 | 111 | void ConditionChecker::checkBranchCondition(const Stmt *s, CheckerContext &Ctx) const { 112 | ProgramStateRef State = Ctx.getState(); 113 | const LocationContext *LC = Ctx.getLocationContext(); 114 | SVal val = State->getSVal(s, LC); 115 | 116 | const SymExpr *SE = val.getAsSymbolicExpression(); 117 | std::string thisMemRegStr = ""; 118 | std::string funcName = ""; 119 | if (SE != nullptr) { 120 | std::vector tmp; 121 | if (SE->getOriginRegion() != nullptr) { 122 | thisMemRegStr = SE->getOriginRegion()->getString(); 123 | } 124 | const Decl *D = LC->getDecl(); 125 | 126 | if (D != nullptr) { 127 | const FunctionDecl *FD = D->getAsFunction(); 128 | if (FD != nullptr) { 129 | funcName = FD->getName(); 130 | } 131 | } 132 | parseSymExpr(SE, &tmp); 133 | if (tmp.size() > 0) { 134 | llvm::outs() << "Condition parse:\n"; 135 | for (symInfo s : tmp) { 136 | s.addFuncName(funcName); 137 | llvm::outs() << s.toString() << "\n"; 138 | if (s.typeName == "MemSymbol") { 139 | std::string key = s.targetStr; 140 | if (CFGMemCount.find(key) != CFGMemCount.end()) { 141 | CFGMemCount[key]++; 142 | } else { 143 | CFGMemCount[key] = 1; 144 | } 145 | } 146 | } 147 | } 148 | } else { 149 | return; 150 | } 151 | 152 | std::vector SymbolInfo; 153 | Optional dval = val.getAs(); 154 | if (dval) { 155 | ProgramStateRef cState = State->assume(*dval, true); 156 | if (cState != nullptr) { 157 | ConstraintRangeTy Constraints = cState->get(); 158 | if (!Constraints.isEmpty()) { 159 | for (ConstraintRangeTy::iterator i = Constraints.begin(); 160 | i != Constraints.end(); i++) { 161 | if (i.getKey()->getOriginRegion() != nullptr) { 162 | if (i.getKey()->getOriginRegion()->getString() == thisMemRegStr) { 163 | parseSymExpr(i.getKey(), &SymbolInfo); 164 | symInfo *tmp = nullptr; 165 | for (unsigned int j = 0; j < SymbolInfo.size(); j++) { 166 | if (SymbolInfo[j].typeName == "MemSymbol") { 167 | tmp = &SymbolInfo[j]; 168 | } 169 | } 170 | for (llvm::APSInt e : splitRangeSet(i.getData())) { 171 | if (tmp != nullptr) { 172 | tmp->addConcreteValue(e); 173 | } 174 | } 175 | for (symInfo s : SymbolInfo) { 176 | llvm::outs() << s.toString() << "\n"; 177 | } 178 | llvm::outs() << "\n\n"; 179 | } 180 | } 181 | } 182 | } 183 | } 184 | } 185 | return; 186 | } 187 | 188 | void ento::registerConditionChecker(CheckerManager &mgr) { 189 | mgr.registerChecker(); 190 | } 191 | 192 | bool ento::shouldRegisterConditionChecker(const LangOptions &LO) { 193 | return true; 194 | } 195 | -------------------------------------------------------------------------------- /static_analysis_tools/ConditionChecker/Parse.h: -------------------------------------------------------------------------------- 1 | #include "clang/StaticAnalyzer/Core/Checker.h" 2 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 3 | #include "clang/StaticAnalyzer/Core/CheckerManager.h" 4 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 5 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 6 | #include "llvm/ADT/SmallString.h" 7 | #include "llvm/ADT/StringExtras.h" 8 | #include "llvm/Support/raw_ostream.h" 9 | #include "clang/AST/ParentMap.h" 10 | #include "clang/Basic/TargetInfo.h" 11 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 12 | 13 | using namespace clang; 14 | using namespace ento; 15 | 16 | 17 | class stmtInfo { 18 | public: 19 | std::string typeName; 20 | unsigned int ID; 21 | std::string target; 22 | /* For MemberExpr base->target */ 23 | std::string base; 24 | std::string srcLine; 25 | 26 | void init(std::string tpnm, int64_t id, std::string targetinfo, std::string bsinfo, std::string srcline) { 27 | typeName = tpnm; 28 | ID = id; 29 | target = targetinfo; 30 | base = bsinfo; 31 | srcline.erase(std::remove(srcline.begin(), srcline.end(), '\n'), srcline.end()); 32 | srcline.erase(std::remove(srcline.begin(), srcline.end(), '\t'), srcline.end()); 33 | srcLine = srcline; 34 | } 35 | 36 | std::string toString() { 37 | std::string retStr, IDStr; 38 | char IDChars[0x10]; 39 | if (ID > 0) { 40 | sprintf(IDChars, "0x%x", ID); 41 | IDStr = "ID-" + std::string(IDChars); 42 | } 43 | retStr = "[" + typeName + "] "; 44 | if (ID != 0) { 45 | retStr = retStr.append("ID-" + IDStr + " "); 46 | } 47 | if (base != "") { 48 | retStr = retStr.append(base) + "->"; 49 | } 50 | retStr = retStr.append(target); 51 | if (srcLine != "") { 52 | if (target != "") { 53 | retStr = retStr.append("\n"); 54 | } 55 | retStr = retStr.append("RawSrcLine: " + srcLine); 56 | } 57 | return retStr; 58 | } 59 | }; 60 | 61 | class symInfo { 62 | public: 63 | std::string typeName; 64 | unsigned int ID; 65 | std::string targetStr; 66 | std::string funcName; 67 | std::vector concreteVal; 68 | 69 | void init(std::string nm, unsigned int id, std::string ts){ 70 | typeName = nm; 71 | ID = id; 72 | targetStr = ts; 73 | } 74 | void addFuncName(std::string fn) { 75 | funcName = fn; 76 | } 77 | void addConcreteValue(llvm::APSInt e) { 78 | concreteVal.push_back(e); 79 | } 80 | std::string toString() { 81 | std::string retStr = ""; 82 | retStr = retStr.append(funcName + " "); 83 | retStr = retStr.append("[" + typeName + "] "); 84 | retStr = retStr.append(targetStr); 85 | retStr = retStr.append(" {"); 86 | for (llvm::APSInt e : concreteVal) { 87 | retStr = retStr.append("0x" + e.toString(0x10) + ", "); 88 | } 89 | retStr = retStr.append("}"); 90 | return retStr; 91 | } 92 | }; 93 | 94 | typedef bool(*stmtHandle)(const SourceManager &SM, const Stmt *s, std::vector *info); 95 | 96 | /* Recursicely parse the children statement, use the stmtHandle function */ 97 | void handleChildrenStmt(const SourceManager &SM, const Stmt *s, stmtHandle handle, std::vector *info); 98 | void parseSymExpr(const SymExpr *s, std::vector *SymbolInfo); 99 | 100 | /* Implement of handle specified statement */ 101 | bool searchCondVar(const SourceManager &SM, const Stmt *s, std::vector *info); 102 | bool searchLocalVar(const SourceManager &SM, const Stmt *s, std::vector *info); 103 | bool searchParm(const SourceManager &SM, const Stmt *s, std::vector *info); 104 | bool searchCondition(const SourceManager &SM, const Stmt *s, std::vector *info); 105 | 106 | std::string srcLineToString(const SourceManager &SM, SourceLocation SRs, SourceLocation SRe); 107 | stmtInfo FuncInfo(const FunctionDecl *FD); 108 | std::vector ListAllParmInfo(const SourceManager &SM, const FunctionDecl *FD); 109 | std::vector splitRangeSet(RangeSet RS); 110 | -------------------------------------------------------------------------------- /static_analysis_tools/ConditionChecker/README.md: -------------------------------------------------------------------------------- 1 | # Clang checker for symbolic execution 2 | 3 | 4 | ## Introduction of Clang checker 5 | 6 | Clang checker can be a great static analysis tool, you can do lots of amazing work by write your checker. For example, you can write a checker to do taint analysis, symbolic execution ... 7 | We write a checker to static analyse which data structure is relevant to satisfy conditions constraint( c language). Unlike IR parser, clang checker still remain the programing syntax information in compile time, it's readable if you reconstruct the source code from these information. Since we just want to extract these information of symbolic inputs, but not run a symbolic execution base on them, these information should be more readable. So, Clang checker is the best choice in our case. 8 | 9 | 10 | ### Clang checker guide 11 | 12 | Clang checker have lots of great tutorial and document. You can easily build your clang with customized checker. 13 | * [Checker Developer Manual](https://clang-analyzer.llvm.org/checker_dev_manual.html) 14 | * [How to Write a Checker in 24 Hours](https://llvm.org/devmtg/2012-11/Zaks-Rose-Checker24Hours.pdf) 15 | * [Checker analyzer-guide](https://github.com/haoNoQ/clang-analyzer-guide/releases/download/v0.1/clang-analyzer-guide-v0.1.pdf) 16 | 17 | Also, you can alse refer to "/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp" and the checker implement under "/clang/lib/StaticAnalyzer/Checkers/*". 18 | 19 | 20 | ### Clang symbolic execution 21 | 22 | At first, we only want to calculate which members of data structure are used in condition statement frequently and which members used by a function. Actually, in this case, AST parse checker is enough, so we implement these in ConditionChecker::ASTDecl() interface. AST-base parse is much faster than path-sensitive parse. Statistic result will be displayed in ConditionChecker::EndAnalysis(). 23 | But, AST-base parse is hard to find out constraint of a condition. So, we also write a path-sensitive checker. Interface with parameter "CheckerContext &Ctx" is a path-sensitive checker interface. In path-sensitive parse, checker will walk thought all node( ExplodedNode) of ExplodedGraph. ExplodedGraph is a graph of paths of CFG and their ProgramState( clang option "-analyzer-checker=debug.ViewExplodedGraph" can dump the ExplodedNode). So, in our checker, while ConditionChecker::BranchCondition() is called, that means a branch condition is found in that path. We can extract the constriant( range or concrete value) from the ProgramState that attached to that node. 24 | 25 | 26 | ## Compare to other symbolic execution 27 | 28 | | Tool | static/dynamic | symbolize | parse source | 29 | |------|--------------- | --------- | ------------ | 30 | | Clang | static | original source | input of every func | 31 | | KLEE | static+dynamic | LLVM IR | input of entry func | 32 | | CBMC | static | original source | input of entry func | 33 | 34 | Compare to KLEE, clang checker is totally a static analyzer. Clang won't execute any program. Clang ProgramState will maintain the state( constraint) of reaching a position of one path. While KLEE, CBMC only symbolize the input of entry. So, we can see, if a local variable initialized by the input of entry and pass it to other functions. These functions may use it in condition, and KLEE will not trace the variable in such case. But this condition is also indirectly from input of entry. Clang treat inputs of any functions as symbolic variable, so we can trace those mishandled condition. 35 | * We have two tutorials for [KLEE](https://github.com/hardenedlinux/Debian-GNU-Linux-Profiles/blob/master/docs/harbian_qa/symexec/klee.md) and [CBMC](https://github.com/hardenedlinux/Debian-GNU-Linux-Profiles/blob/master/docs/harbian_qa/symexec/cbmc_kern.md). 36 | 37 | ## For kernel fuzzing 38 | In our case, we use syzkaller for kernel fuzzing. While syzkaller only collect coverage as feedback. We try to [trace more state](../syzkaller/kstat_demo/README.md) if it is widely use in condition statement. Clang path-sensitive checker is what we actually need. After static analysis, we calculate which states( data) are widely used in conditions. These states will be collected as state-base block( syzkaller resource) at runtime. And we also collect inputs of some important functions to help to fuzz important paths more efficiently. 39 | An example of part output: 40 | ``` 41 | clang -Xclang -analyze -Xclang -analyzer-checker=debug.ConditionChecker ... -c /root/linux/net/ipv4/tcp.c 42 | ... 43 | # AST-base parse 44 | [Function] ID-ID-0x271f29 tcp_ioctl 45 | [ParmVar] ID-ID-0x271ef4 struct sock *sk 46 | [ParmVar] ID-ID-0x271f04 intcmd 47 | [ParmVar] ID-ID-0x271f14 unsigned longarg 48 | [LocalVar] ID-ID-0x271f45 struct tcp_sock * 49 | [Condition] RawSrcLine: if (sk->sk_state == TCP_LISTEN) 50 | [BinaryOperator] unknown == 0x1ffb6f 51 | [MemExpr] ID-ID-0x2024cc struct sock_common->volatile unsigned char skc_state 52 | [MemExpr] ID-ID-0x202842 struct sock *->struct sock_common __sk_common 53 | [DeclRefExpr] ID-ID-0x1ffb6f int TCP_LISTEN 54 | ... 55 | # Path-sensitive parse 56 | ... 57 | tcp_poll [ElementCast] (struct tcp_sock)struct sock * {} 58 | tcp_poll [MemSymbol] struct socket *->struct sock * sk {} 59 | tcp_poll [MemSymbol] struct tcp_sock *->u16 urg_data {} 60 | tcp_poll [SymIntExpr] 0x100 {} 61 | ... 62 | tcp_poll [MemSymbol] struct sock *->int sk_err {} 63 | [MemSymbol] struct sock *->int sk_err {0x-80000000, 0x-1, 0x1, 0x7FFFFFFF, } 64 | ... 65 | 66 | # AST-base parse count 67 | ... 68 | struct sock *->struct sock_common __sk_common:39 69 | ... 70 | struct sock_common->volatile unsigned char skc_state:39 71 | ... 72 | struct tcp_sock *->u8 repair:14 73 | ... 74 | 75 | # Path-senstive parse count 76 | ... 77 | struct sock *->struct sock_common __sk_common:119 78 | ... 79 | struct sock_common *->volatile unsigned char skc_state:61 80 | ... 81 | struct tcp_sock *->u8 repair:78 82 | ``` -------------------------------------------------------------------------------- /static_analysis_tools/IRParser/extend_func.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "llvm/IR/DebugInfo.h" 11 | #include "llvm/IR/DIBuilder.h" 12 | #include "llvm/IR/Function.h" 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "log.h" 26 | #include "info.h" 27 | 28 | 29 | using namespace std; 30 | using namespace llvm; 31 | 32 | 33 | std::map funcsInfo; 34 | 35 | 36 | std::vector readFuncList(std::string funcListPath); 37 | void getCalledFunc(Module *mod, Function *func, int blockNum, int level); 38 | void writeToNewFuncList(std::vector funcList, std::string oldPath); 39 | void writeToLogDir(std::string fn, std::string funcCallTree, std::string dirPath); 40 | 41 | 42 | int main(int argc, const char *argv[]) { 43 | if (argc < 5 || argv[1] == nullptr) { 44 | outs() << "./extern_func functions_list ir_path call_depth block_num log_dir\n"; 45 | return 1; 46 | } 47 | std::string FuncListPath = argv[1]; 48 | std::string IRPath = argv[2]; 49 | unsigned depth = std::stoi(argv[3]); 50 | unsigned blockNum = std::stoi(argv[4]); 51 | std::string logDir = argv[5]; 52 | 53 | std::vector funcList = readFuncList(FuncListPath); 54 | std::vector extFuncList; 55 | 56 | LLVMContext ctx; 57 | SMDiagnostic err; 58 | std::unique_ptr mod_unique = parseIRFile(IRPath, err, ctx); 59 | if (mod_unique == nullptr) { 60 | outs() << FAIL << "Failed to open ir file: " << IRPath << "\n" << RESET; 61 | return 1; 62 | } 63 | Module *mod = mod_unique.get(); 64 | 65 | for (std::string fn : funcList) { 66 | Function *func = mod->getFunction(fn); 67 | getCalledFunc(std::move(mod), func, blockNum, depth); 68 | } 69 | 70 | for (std::string fn : funcList) { 71 | std::string funcCallTree; 72 | if (funcsInfo.find(fn) != funcsInfo.end()) 73 | funcCallTree = funcsInfo[fn].callTree(funcsInfo, 0, depth); 74 | else { 75 | funcCallTree = fn; 76 | outs() << FAIL << fn << " was not found!\n"; 77 | } 78 | writeToLogDir(fn, funcCallTree, logDir); 79 | } 80 | std::vector newFuncList; 81 | for (auto &fn : funcsInfo) { 82 | if (fn.second.getBlockNum() > blockNum) 83 | newFuncList.push_back(fn.first); 84 | } 85 | writeToNewFuncList(newFuncList, FuncListPath); 86 | } 87 | 88 | std::vector readFuncList(std::string funcListPath) { 89 | fstream funcListFile(funcListPath); 90 | std::vector funcList; 91 | std::string fn = ""; 92 | if (!funcListFile.is_open()) { 93 | outs() << FAIL << "Failed to open init function list\n" << RESET; 94 | return funcList; 95 | } 96 | while (getline(funcListFile, fn)) { 97 | if(fn != "") 98 | funcList.push_back(fn); 99 | } 100 | return funcList; 101 | } 102 | 103 | /* Recursively get the called functions, use blockNum and level limit functions */ 104 | void getCalledFunc(Module *mod, Function *func, int blockNum, int level) { 105 | if (level < 1) 106 | return; 107 | if (func == nullptr) { 108 | outs() << FAIL << "unvariable function\n"<< RESET; 109 | return; 110 | } 111 | if (func->size() < 1) { 112 | func = mod->getFunction(func->getName()); 113 | } 114 | 115 | if (func != nullptr) { 116 | funcInfoInCFG *thisFuncInfo = new funcInfoInCFG(func->getName(), func->size()); 117 | if (funcsInfo.find(func->getName()) == funcsInfo.end()) 118 | funcsInfo[func->getName()] = *thisFuncInfo; 119 | delete thisFuncInfo; 120 | } 121 | 122 | if (func != nullptr && func->size() > 0) { 123 | for (BasicBlock &bb : *func) { 124 | for (Instruction &i : bb) { 125 | CallInst *callInst = dyn_cast(&i); 126 | if (callInst != nullptr) { 127 | Function *calledFunc = callInst->getCalledFunction(); 128 | if (calledFunc == nullptr) { 129 | calledFunc = dyn_cast(callInst->getCalledValue()->stripPointerCasts()); 130 | } 131 | if (calledFunc != nullptr) { 132 | /* Skip the instument function */ 133 | if (calledFunc->getName().find("saniti") != std::string::npos) 134 | continue; 135 | if (calledFunc->getName().find("asan") != std::string::npos) 136 | continue; 137 | if (calledFunc->getName().find("llvm.") != std::string::npos) 138 | continue; 139 | /* Recursive call with a depth level */ 140 | funcsInfo[func->getName()].addCalledFunc(calledFunc->getName()); 141 | getCalledFunc(mod, calledFunc, blockNum, level - 1); 142 | } 143 | } 144 | } 145 | } 146 | } 147 | } 148 | 149 | void writeToNewFuncList(std::vector funcList, std::string oldPath) { 150 | ofstream newFuncList; 151 | newFuncList.open(oldPath + ".new"); 152 | for (std::string f : funcList) { 153 | newFuncList << f << "\n"; 154 | } 155 | newFuncList.close(); 156 | } 157 | 158 | void writeToLogDir(std::string fn, std::string funcCallTree, std::string dirPath) { 159 | ofstream funcLogFile; 160 | funcLogFile.open(dirPath + "/" + fn); 161 | funcLogFile << funcCallTree << "\n"; 162 | funcLogFile.close(); 163 | } 164 | -------------------------------------------------------------------------------- /static_analysis_tools/IRParser/info.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "log.h" 13 | 14 | using namespace std; 15 | 16 | class sanCallInfo { 17 | private: 18 | std::string rawInst; 19 | unsigned blockID; 20 | unsigned address; 21 | public: 22 | sanCallInfo(std::string rawI, unsigned bID) { 23 | rawInst = rawI; 24 | blockID = bID; 25 | } 26 | sanCallInfo(){}; 27 | void AttachAddress(unsigned addr) { 28 | address = addr; 29 | } 30 | unsigned getBlockID() { 31 | return blockID; 32 | } 33 | std::string getAsLine() { 34 | return std::to_string(address) + ": " + rawInst; 35 | } 36 | unsigned getAddress() { 37 | return address; 38 | } 39 | }; 40 | 41 | class blockInfo { 42 | private: 43 | unsigned blockID; 44 | std::string blockName; 45 | std::string funcName; 46 | unsigned long count; 47 | 48 | std::vector sanCalls; 49 | std::vector succBlock; 50 | std::vector predBlock; 51 | 52 | public: 53 | blockInfo(unsigned bID, std::string bName, std::string fName, unsigned cnt) { 54 | blockID = bID; 55 | blockName = bName; 56 | funcName = fName; 57 | count = cnt; 58 | } 59 | 60 | blockInfo(){}; 61 | 62 | unsigned getBlockID() { 63 | return blockID; 64 | } 65 | 66 | void addSuccBlock(unsigned bID) { 67 | for (unsigned i : succBlock) { 68 | if (i == bID) 69 | return; 70 | } 71 | succBlock.push_back(bID); 72 | } 73 | 74 | void addPredBlock(unsigned bID) { 75 | for (unsigned i : predBlock) { 76 | if (i == bID) 77 | return; 78 | } 79 | predBlock.push_back(bID); 80 | } 81 | 82 | void addSanCall(sanCallInfo sc) { 83 | sanCalls.push_back(sc); 84 | } 85 | 86 | std::vector getSanCalls() { 87 | return sanCalls; 88 | } 89 | 90 | unsigned getForwardEdgeNum() { 91 | /* We use this method to get the weight of this block */ 92 | //return count; 93 | return succBlock.size(); 94 | } 95 | 96 | std::string getAsJson() { 97 | std::string ret = ""; 98 | ret += "{\n"; 99 | ret += "Function: \"" + funcName + "\",\n"; 100 | ret += "Block: \"" + blockName + "\",\n"; 101 | ret += "BlockID: " + std::to_string(blockID) + ",\n"; 102 | ret += "Count: " + std::to_string(count) + ",\n"; 103 | ret += "Predblocks: ["; 104 | for (unsigned b : predBlock) 105 | ret += std::to_string(b) + ", "; 106 | ret += "],\n"; 107 | ret += "Succblocks: ["; 108 | for (unsigned b : succBlock) 109 | ret += std::to_string(b) + ", "; 110 | ret += "],\n"; 111 | ret += "SanitizerCall: [\n"; 112 | for (sanCallInfo sc : sanCalls) { 113 | ret += "\t" + sc.getAsLine() + ",\n"; 114 | } 115 | ret += "\t],\n"; 116 | ret += "}\n"; 117 | return ret; 118 | } 119 | }; 120 | 121 | class gepInfo { 122 | private: 123 | std::string structName; 124 | std::string fieldName; 125 | unsigned bitWidth; 126 | unsigned count; 127 | unsigned ID; 128 | 129 | unsigned hash(string s) { 130 | std::hash hashFunc; 131 | return hashFunc(s); 132 | } 133 | 134 | std::string stripNum(std::string name) { 135 | size_t len = name.size(); 136 | char tmp[len]; 137 | strncpy(tmp, name.c_str(), len); 138 | if (len < 1) 139 | return name; 140 | /* llvm will add suffix to variable name, we have to strip away*/ 141 | while ((tmp[len-1] <= '9' && tmp[len-1] >= '0' && len > 1) 142 | || (tmp[len-1] == 'i' && tmp[len-2] == '.' && len > 2) 143 | || (tmp[len-1] == '.' && len > 1)) { 144 | if (tmp[len-1] == 'i' && tmp[len-2] == '.') { 145 | tmp[len-1] = 0; 146 | tmp[len-2] = 0; 147 | len -= 2; 148 | continue; 149 | } 150 | tmp[len-1] = 0; 151 | len--; 152 | } 153 | name = name.substr(0, len); 154 | return name; 155 | } 156 | 157 | public: 158 | gepInfo(std::string srtName, std::string fName, unsigned bitWid) { 159 | structName = srtName; 160 | fieldName = fName; 161 | bitWidth = bitWid; 162 | ID = hash(getStructName()); 163 | count = 0; 164 | } 165 | 166 | gepInfo(){}; 167 | 168 | std::string getStructName() { 169 | return stripNum(structName) + "->" + stripNum(fieldName); 170 | } 171 | 172 | void incCount() {count++;} 173 | 174 | /* We use this method to get the weight of a kernel state */ 175 | unsigned getCount() {return count;} 176 | 177 | unsigned getGEPointerID() {return ID;} 178 | 179 | std::string getAsJson() { 180 | std::string ret; 181 | ret += "{\n"; 182 | ret += "\tName: " + structName + "->" + fieldName + ",\n"; 183 | ret += "\tBitWidth: " + std::to_string(bitWidth) + ",\n"; 184 | ret += "\tID: " + std::to_string(ID) + "\n"; 185 | ret += "}\n"; 186 | return ret; 187 | } 188 | }; 189 | 190 | class funcInfoInCFG { 191 | private: 192 | std::string funcName; 193 | unsigned blockNum; 194 | 195 | std::vector calledFuncs; 196 | public: 197 | funcInfoInCFG(std::string fName, unsigned bNum) { 198 | funcName = fName; 199 | blockNum = bNum; 200 | } 201 | 202 | funcInfoInCFG(){}; 203 | 204 | void addCalledFunc(std::string cFunc) { 205 | for (std::string f : calledFuncs) { 206 | if (f == cFunc) 207 | return; 208 | } 209 | calledFuncs.push_back(cFunc); 210 | } 211 | 212 | unsigned getBlockNum() {return blockNum;} 213 | 214 | std::string callTree(std::map funcInfoList, int startlevel, int depth) { 215 | std::string ret = thisFuncInfo(startlevel); 216 | if (depth < 1) 217 | return ret; 218 | for (std::string calledFunc : calledFuncs) { 219 | if (funcInfoList.find(calledFunc) != funcInfoList.end()) { 220 | ret += funcInfoList[calledFunc].callTree(funcInfoList, startlevel + 1, depth - 1); 221 | } 222 | } 223 | return ret; 224 | } 225 | std::string thisFuncInfo(int level) { 226 | std::string ret = "|"; 227 | for (unsigned i=0; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "llvm/IR/DebugInfo.h" 11 | #include "llvm/IR/DIBuilder.h" 12 | #include "llvm/IR/Function.h" 13 | #include 14 | #include 15 | #include 16 | #include "llvm/Support/BlockFrequency.h" 17 | #include "llvm/Analysis/BlockFrequencyInfo.h" 18 | #include "llvm/Analysis/BranchProbabilityInfo.h" 19 | #include "llvm/IR/Dominators.h" 20 | #include "llvm/Analysis/PostDominators.h" 21 | #include "llvm/Analysis/LoopInfo.h" 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "log.h" 32 | #include "info.h" 33 | 34 | 35 | using namespace llvm; 36 | using namespace std; 37 | 38 | 39 | std::vector getFuncListFromFile(std::string funcListFile); 40 | std::vector listIRFile(std::string IRFileDir); 41 | Function &getFuncFromMods(std::vector IRFiles, std::string funcName); 42 | std::map getBlockIDMap(Function *func); 43 | std::map getBlockInfo(Function *func, std::map blockIDMap); 44 | std::string getASMCodeFileName(std::string ASMCodeDir, std::string sourceFile); 45 | std::vector getSanCallsFromAsmLine(std::string asmFileName, std::string funcName, std::map blockInfosMap); 46 | std::vector getAddrFromObjdumpAsm(std::string vmLinux, std::string funcName); 47 | void writeDebugInfo(std::map blockInfos, std::string path); 48 | void writeFuncAddrMap(std::map blockInfo, std::string path); 49 | uint64_t encode(unsigned addr, unsigned num); 50 | 51 | 52 | int main(int argc, const char *argv[]) { 53 | if (argc < 5) { 54 | outs() << FAIL << "./kcov_map ir_dir asm_dir vmlinux func_list log_dir\n" << RESET; 55 | return 1; 56 | } 57 | std::string IRFileDir = argv[1]; 58 | std::string ASMCodeDir = argv[2]; 59 | std::string VMLinux = argv[3]; 60 | std::string FuncListFile = argv[4]; 61 | std::string LogDir = argv[5]; 62 | 63 | std::vector FuncList = getFuncListFromFile(FuncListFile); 64 | std::vector IRFiles = listIRFile(IRFileDir); 65 | 66 | for (std::string funcName : FuncList) { 67 | bool found = false; 68 | /* Search which riFile the function is loacted in */ 69 | for (std::string f : IRFiles) { 70 | LLVMContext context; 71 | SMDiagnostic error; 72 | std::unique_ptr mod = parseIRFile(f, error, context); 73 | Module const *mod_const = mod.get(); 74 | if (mod_const == nullptr) { 75 | outs() << FAIL_LINE("Failed to open " + f + "."); 76 | return 1; 77 | } 78 | 79 | Function *func = mod_const->getFunction(funcName); 80 | if (func == nullptr) continue; 81 | 82 | if (func != nullptr) { 83 | if (func->size() == 0) { 84 | //outs() << WARN_LINE("Function " + funcName + " declaration, pass"); 85 | continue; 86 | } 87 | found = true; 88 | outs() << SUCC_LINE("Function " + funcName + " was found"); 89 | 90 | /* Get the IR of function, extract block infomation */ 91 | std::map blockIDMap = getBlockIDMap(func); 92 | std::map blockInfosMap = getBlockInfo(func, blockIDMap); 93 | /* Get the sanitizer_* call of assamble code */ 94 | std::string asmFile = getASMCodeFileName(ASMCodeDir, mod_const->getSourceFileName()); 95 | std::vector sanCallInfos = getSanCallsFromAsmLine(asmFile, funcName, blockIDMap); 96 | /* objdump assebly code should be matched to assemble one by one */ 97 | std::vector objdumpAddrs = getAddrFromObjdumpAsm(VMLinux, funcName); 98 | if (sanCallInfos.size() != objdumpAddrs.size()) { 99 | outs() << std::to_string(sanCallInfos.size()) << ":" << std::to_string(objdumpAddrs.size()) << "\n"; 100 | outs() << FAIL_LINE("Function " + funcName + " assemble and objdump is mismatch\n"); 101 | continue; 102 | } 103 | 104 | unsigned idx = 0; 105 | for (sanCallInfo sc : sanCallInfos) { 106 | sc.AttachAddress(objdumpAddrs[idx]); 107 | blockInfosMap[sc.getBlockID()].addSanCall(sc); 108 | idx++; 109 | } 110 | writeDebugInfo(blockInfosMap, LogDir + "/" + funcName + ".json"); 111 | writeFuncAddrMap(blockInfosMap, LogDir + "/" + funcName + ".addr.map"); 112 | break; 113 | } 114 | } 115 | if (!found) 116 | outs() << FAIL_LINE("Function " + funcName + " was not found"); 117 | } 118 | } 119 | 120 | std::vector getFuncListFromFile(std::string funcListPath) { 121 | fstream funcListFile(funcListPath); 122 | std::vector funcList; 123 | std::string fn = ""; 124 | if (!funcListFile.is_open()) { 125 | outs() << FAIL_LINE( "Failed to open init function list"); 126 | return funcList; 127 | } 128 | while (getline(funcListFile, fn)) { 129 | if(fn != "") 130 | funcList.push_back(fn); 131 | } 132 | return funcList; 133 | } 134 | 135 | std::vector listIRFile(std::string IRDirPath) { 136 | std::vector irList; 137 | struct dirent *entry; 138 | DIR *dir = opendir(IRDirPath.c_str()); 139 | if (dir == NULL) { 140 | outs() << FAIL_LINE("Dir wrong"); 141 | return irList; 142 | } 143 | while ((entry = readdir(dir)) != NULL) { 144 | std::string fn(entry->d_name); 145 | if (fn.find(".ll") != std::string::npos) 146 | irList.push_back(IRDirPath + "/" + entry->d_name); 147 | } 148 | closedir(dir); 149 | return irList; 150 | } 151 | 152 | std::map getBlockIDMap(Function *func) { 153 | std::map blockIDMap; 154 | unsigned bID = 0, count = 0; 155 | std::string funcName = func->getName(); 156 | for (BasicBlock &bb : *func) { 157 | count++; 158 | std::string blockName = bb.getName(); 159 | if (blockName != "") 160 | blockIDMap[blockName] = bID++; 161 | else { 162 | bb.setValueName(ValueName::Create(funcName + "." + std::to_string(count))); 163 | blockIDMap[funcName + "." + std::to_string(count)] = bID++; 164 | } 165 | } 166 | 167 | return blockIDMap; 168 | } 169 | 170 | std::map getBlockInfo(Function *func, std::map blockIDMap) { 171 | std::map blockInfosMap; 172 | DominatorTree *DT = new DominatorTree(const_cast(func->getFunction())); 173 | LoopInfo *LI = new LoopInfo(*DT); 174 | BranchProbabilityInfo *BPI = new BranchProbabilityInfo(func->getFunction(), *LI); 175 | BlockFrequencyInfo *BFI = new BlockFrequencyInfo(func->getFunction(), *BPI, *LI); 176 | unsigned bID = 0; 177 | BasicBlock &entry = func->getEntryBlock(); 178 | 179 | /* The frequency of entry block is maxium, 180 | * all the maxium will formalize to 100 181 | */ 182 | unsigned long zoom = 1, maxFreq = BFI->getBlockFreq(&entry).getFrequency(); 183 | if (maxFreq > 100) 184 | zoom = maxFreq / 100; 185 | else if (maxFreq > 0 && maxFreq <= 100) 186 | zoom = 100 / maxFreq; 187 | else 188 | zoom = 1; 189 | if (zoom < 1) 190 | zoom = 1; 191 | 192 | for (BasicBlock &bb : *func) { 193 | bID++; 194 | std::string blockName = bb.getName(); 195 | std::string funcName = func->getName(); 196 | if (blockName == "") 197 | blockName = funcName + "." + std::to_string(bID); 198 | unsigned long weight = BFI->getBlockFreq(&bb).getFrequency(); 199 | if (maxFreq > 100) 200 | weight = weight / zoom; 201 | else 202 | weight = weight * zoom; 203 | /* weight/zoom maybe zero */ 204 | if (weight > 100 || weight < 1) 205 | weight = 1; 206 | 207 | blockInfo binfo(blockIDMap[blockName], blockName, funcName, weight); 208 | for (BasicBlock *predbb : predecessors(&bb)) { 209 | std::string predbbName = predbb->getName(); 210 | if (predbbName != "") 211 | binfo.addPredBlock(blockIDMap[predbbName]); 212 | else 213 | binfo.addPredBlock(0xffff); 214 | } 215 | for (BasicBlock *succbb : successors(&bb)) { 216 | std::string succbbName = succbb->getName(); 217 | if (succbbName != "") 218 | binfo.addSuccBlock(blockIDMap[succbbName]); 219 | else 220 | binfo.addSuccBlock(0xffff); 221 | } 222 | blockInfosMap[binfo.getBlockID()] = binfo; 223 | } 224 | delete DT; 225 | delete LI; 226 | delete BPI; 227 | delete BFI; 228 | 229 | return blockInfosMap; 230 | } 231 | 232 | std::string getASMCodeFileName(std::string ASMCodeDir, std::string sourceFile) { 233 | while (std::size_t pos = sourceFile.find("/") != std::string::npos) { 234 | sourceFile = sourceFile.substr(pos); 235 | } 236 | std::size_t pos = sourceFile.find("."); 237 | sourceFile = sourceFile.substr(0, pos + 1) + "s"; 238 | return ASMCodeDir + "/" + sourceFile; 239 | } 240 | 241 | std::vector getSanCallsFromAsmLine(std::string asmFileName, std::string funcName, std::map blockIDMap) { 242 | fstream asmFile(asmFileName); 243 | std::vector sanCallInfos; 244 | if (!asmFile.is_open()) { 245 | outs() << FAIL_LINE(asmFileName + " can't be found\n"); 246 | return sanCallInfos; 247 | } 248 | 249 | std::string ln; 250 | bool infunc = false; 251 | std::string blockName = ""; 252 | while (getline(asmFile, ln)) { 253 | if (ln.size() < 1) 254 | continue; 255 | if (ln.find(funcName + ":") != std::string::npos) { 256 | if (ln.find("@" + funcName) != std::string::npos) { 257 | infunc = true; 258 | continue; 259 | } 260 | } 261 | if (ln.find("Lfunc_end") != std::string::npos && infunc) { 262 | infunc = false; 263 | break; 264 | } 265 | if (infunc) { 266 | std::size_t foundPos = ln.find("# %"); 267 | if (foundPos != std::string::npos) { 268 | ln = ln.substr(foundPos + 3, ln.size()-1); 269 | foundPos = ln.find("# %"); 270 | if (foundPos != std::string::npos) 271 | ln = ln.substr(foundPos + 3, ln.size()-1); 272 | if (ln.find("SP_return") != std::string::npos) continue; 273 | if (blockIDMap.find(ln) == blockIDMap.end()) { 274 | continue; 275 | } 276 | blockName = ln; 277 | } 278 | if (ln.find("__sanitizer_cov_trace") != std::string::npos) { 279 | sanCallInfo scall(ln, blockIDMap[blockName]); 280 | sanCallInfos.push_back(scall); 281 | } 282 | } 283 | } 284 | return sanCallInfos; 285 | } 286 | 287 | std::vector getAddrFromObjdumpAsm(std::string vmLinux, std::string funcName) { 288 | std::vector address; 289 | std::string objdump = "objdump"; 290 | std::string disAsmFunc = "--disassemble="; 291 | std::string noRaw = "--no-show-raw-insn"; 292 | std::string cmd = objdump + " " + disAsmFunc+funcName + " " + noRaw + " " + vmLinux; 293 | FILE *pipe = popen(cmd.c_str(), "r"); 294 | if (!pipe) { 295 | outs() << FAIL_LINE("Failed to read objdump\n"); 296 | outs() << WARN_LINE("Try this command line: \"" + cmd + "\" to get the output\n"); 297 | return address; 298 | } 299 | 300 | char buffer[0x100]; 301 | bool infunc = false; 302 | bool foundSanCall = false; 303 | while (fgets(buffer, 0x100, pipe) != NULL) { 304 | std::string ln(buffer); 305 | if (ln.find("<" + funcName + ">:") != std::string::npos) { 306 | infunc = true; 307 | continue; 308 | } 309 | if (infunc && ln == "\n") 310 | break; 311 | if (infunc && foundSanCall) { 312 | foundSanCall = false; 313 | std::size_t colon = ln.find(":"); 314 | if (colon == std::string::npos) { 315 | outs() << FAIL_LINE("Failed to get address of " + ln); 316 | break; 317 | } 318 | uint64_t addr_full = std::stoull(ln.substr(0, colon), nullptr, 16); 319 | unsigned addr = unsigned(addr_full); 320 | address.push_back(addr); 321 | foundSanCall = false; 322 | } 323 | if (infunc) { 324 | if (ln.find("<__sanitizer_cov_trace") != std::string::npos) { 325 | foundSanCall = true; 326 | continue; 327 | } 328 | } 329 | } 330 | return address; 331 | } 332 | 333 | void writeDebugInfo(std::map blockInfos, std::string path) { 334 | ofstream json; 335 | json.open(path); 336 | for (auto i : blockInfos) { 337 | json << i.second.getAsJson(); 338 | } 339 | json.close(); 340 | } 341 | 342 | void writeFuncAddrMap(std::map blockInfos, std::string path) { 343 | ofstream map; 344 | map.open(path); 345 | for (auto i : blockInfos) { 346 | auto bi = i.second; 347 | for (auto sc : bi.getSanCalls()) { 348 | if (sc.getAsLine().find("trace_pc") != std::string::npos) 349 | map << "0x" << std::hex << encode(sc.getAddress(), bi.getForwardEdgeNum()) << "\n"; 350 | if (sc.getAsLine().find("trace_srt") != std::string::npos) 351 | map << "0x" << std::hex << encode(sc.getAddress(), 1) << "\n"; 352 | } 353 | } 354 | map.close(); 355 | } 356 | 357 | uint64_t encode(unsigned addr, unsigned num) { 358 | uint64_t ret = (uint64_t)num; 359 | return ((ret&0xffff)<<32) | (uint64_t)(addr&0xffffffff); 360 | } 361 | -------------------------------------------------------------------------------- /static_analysis_tools/IRParser/kstate_map.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "llvm/IR/DebugInfo.h" 11 | #include "llvm/IR/DIBuilder.h" 12 | #include "llvm/IR/Function.h" 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "log.h" 26 | #include "info.h" 27 | 28 | 29 | using namespace llvm; 30 | using namespace std; 31 | 32 | 33 | std::vector getFuncListFromFile(std::string funcListFile); 34 | std::vector getGEPInfoFromFunc(Function *func); 35 | void writeDebugInfo(std::vector info, std::string path); 36 | void writeFuncAddrMap(std::vector info, std::string path); 37 | 38 | int main(int argc, const char *argv[]) { 39 | if (argc < 3) { 40 | outs() << FAIL << "./kcov_map ir_path func_list log_dir\n" << RESET; 41 | return 1; 42 | } 43 | std::string IRPath = argv[1]; 44 | std::string FuncListFile = argv[2]; 45 | std::string LogDir = argv[3]; 46 | 47 | std::vector FuncList = getFuncListFromFile(FuncListFile); 48 | std::map gepInfoMap; 49 | LLVMContext context; 50 | SMDiagnostic error; 51 | std::unique_ptr mod = parseIRFile(IRPath, error, context); 52 | Module const *mod_const = mod.get(); 53 | if (mod_const == nullptr) { 54 | outs() << FAIL_LINE("Failed to open " + IRPath + "."); 55 | return 1; 56 | } 57 | 58 | for (std::string funcName : FuncList) { 59 | Function *func = mod_const->getFunction(funcName); 60 | if (func != nullptr && func->size() > 0) { 61 | outs() << SUCC_LINE("Function " + funcName + " was found"); 62 | std::vector gepInfos = getGEPInfoFromFunc(func); 63 | for (gepInfo i : gepInfos) { 64 | if (gepInfoMap.find(i.getGEPointerID()) == gepInfoMap.end()) { 65 | gepInfoMap[i.getGEPointerID()] = i; 66 | } 67 | gepInfoMap[i.getGEPointerID()].incCount(); 68 | } 69 | writeDebugInfo(gepInfos, LogDir + "/" + funcName + "state.json"); 70 | writeFuncAddrMap(gepInfos, LogDir + "/" + funcName + ".state.map"); 71 | continue; 72 | } 73 | if (func == nullptr) 74 | outs() << FAIL_LINE("Function " + funcName + " was not found"); 75 | } 76 | 77 | for (auto i : gepInfoMap) { 78 | outs() << i.second.getStructName() << " " << i.second.getCount() << "\n"; 79 | } 80 | } 81 | 82 | std::vector getFuncListFromFile(std::string funcListPath) { 83 | fstream funcListFile(funcListPath); 84 | std::vector funcList; 85 | std::string fn = ""; 86 | if (!funcListFile.is_open()) { 87 | outs() << FAIL_LINE( "Failed to open init function list"); 88 | return funcList; 89 | } 90 | while (getline(funcListFile, fn)) { 91 | if(fn != "") 92 | funcList.push_back(fn); 93 | } 94 | return funcList; 95 | } 96 | 97 | std::vector getGEPInfoFromFunc(Function *func) { 98 | std::map gepInfoMap; 99 | std::vector ret; 100 | for (BasicBlock &bb : *func) { 101 | for (Instruction &i : bb) { 102 | GetElementPtrInst *gepInst = dyn_cast(&i); 103 | if (gepInst != nullptr) { 104 | unsigned width = 0; 105 | if (gepInst->getType()) { 106 | if (gepInst->getType()->isIntegerTy()) 107 | width = gepInst->getType()->getIntegerBitWidth(); 108 | } 109 | 110 | if (gepInst->getSourceElementType()->isStructTy()) { 111 | std::string structName = gepInst->getSourceElementType()->getStructName(); 112 | std::string fieldName = gepInst->getName(); 113 | if (fieldName == "") continue; 114 | gepInfo thisGEP(structName, fieldName, width); 115 | if (!gepInfoMap[thisGEP.getStructName()]) { 116 | if (gepInst->getResultElementType()->isPointerTy()) { 117 | continue; 118 | } 119 | gepInfoMap[thisGEP.getStructName()] = true; 120 | ret.push_back(thisGEP); 121 | } else { 122 | thisGEP.incCount(); 123 | } 124 | } 125 | } 126 | } 127 | } 128 | return ret; 129 | } 130 | 131 | void writeDebugInfo(std::vector info, std::string path) { 132 | ofstream json; 133 | json.open(path); 134 | for (auto i : info) { 135 | json << i.getAsJson(); 136 | } 137 | json.close(); 138 | } 139 | 140 | void writeFuncAddrMap(std::vector info, std::string path) { 141 | ofstream map; 142 | map.open(path); 143 | for (auto i : info) { 144 | map << i.getStructName() << ": 0x" << std::hex << i.getGEPointerID() << "\n"; 145 | } 146 | map.close(); 147 | } 148 | -------------------------------------------------------------------------------- /static_analysis_tools/IRParser/log.h: -------------------------------------------------------------------------------- 1 | #define RESET "\033[0m" 2 | #define RED "\033[31m" 3 | #define GREEN "\033[32m" 4 | #define BLUE "\033[34m" 5 | 6 | #define WARN BLUE 7 | #define SUCC GREEN 8 | #define FAIL RED 9 | 10 | #define WARN_LINE(s) WARN << s << "\n" << RESET 11 | #define SUCC_LINE(s) SUCC << s << "\n" << RESET 12 | #define FAIL_LINE(s) FAIL << s << "\n" << RESET 13 | -------------------------------------------------------------------------------- /static_analysis_tools/kern_instrument/AssignTrackerPass/AssignTracker.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "llvm/IR/LegacyPassManager.h" 12 | #include "llvm/IR/CallSite.h" 13 | #include "llvm/IR/IRBuilder.h" 14 | #include "llvm/IR/InlineAsm.h" 15 | #include "llvm/ADT/Statistic.h" 16 | #include "llvm/IR/Function.h" 17 | #include "llvm/Pass.h" 18 | #include "llvm/Support/raw_ostream.h" 19 | 20 | typedef unsigned long long uint64; 21 | typedef unsigned int uint32; 22 | typedef unsigned short uint16; 23 | 24 | using namespace llvm; 25 | using namespace legacy; 26 | 27 | static const char *const SanCovTraceSrt1Name = "__sanitizer_cov_trace_srt1"; 28 | static const char *const SanCovTraceSrt2Name = "__sanitizer_cov_trace_srt2"; 29 | static const char *const SanCovTraceSrt4Name = "__sanitizer_cov_trace_srt4"; 30 | static const char *const SanCovTraceSrt8Name = "__sanitizer_cov_trace_srt8"; 31 | 32 | namespace { 33 | struct AssignTracker : public ModulePass { 34 | static char ID; // Pass identification, replacement for typeid 35 | FunctionCallee SanCovTraceSrt1; 36 | FunctionCallee SanCovTraceSrt2; 37 | FunctionCallee SanCovTraceSrt4; 38 | FunctionCallee SanCovTraceSrt8; 39 | Type *VoidTy; 40 | Type *Int8Ty; 41 | Type *Int16Ty; 42 | Type *Int32Ty; 43 | Type *Int64Ty; 44 | std::map StructIDMap; 45 | StringRef SourceFileName; 46 | 47 | LLVMContext *C; 48 | AssignTracker() : ModulePass(ID) {} 49 | 50 | bool runOnModule(Module &M) override { 51 | C = &M.getContext(); 52 | IRBuilder<> IRB(*C); 53 | 54 | VoidTy = IRB.getVoidTy(); 55 | Int8Ty = IRB.getInt8Ty(); 56 | Int16Ty = IRB.getInt16Ty(); 57 | Int32Ty = IRB.getInt32Ty(); 58 | Int64Ty = IRB.getInt64Ty(); 59 | 60 | SanCovTraceSrt1 = M.getOrInsertFunction(SanCovTraceSrt1Name, VoidTy, Int64Ty, Int8Ty); 61 | SanCovTraceSrt2 = M.getOrInsertFunction(SanCovTraceSrt2Name, VoidTy, Int64Ty, Int16Ty); 62 | SanCovTraceSrt4 = M.getOrInsertFunction(SanCovTraceSrt4Name, VoidTy, Int64Ty, Int32Ty); 63 | SanCovTraceSrt8 = M.getOrInsertFunction(SanCovTraceSrt8Name, VoidTy, Int64Ty, Int64Ty); 64 | SourceFileName = M.getName(); 65 | 66 | for (Function &F : M) 67 | instrumentFieldAssign(F); 68 | for (auto i : StructIDMap) { 69 | errs() << i.first << ": " << std::to_string(i.second) << "\n"; 70 | } 71 | return true; 72 | } 73 | void injectFieldAssignTracker(Instruction *I, uint64 id); 74 | void instrumentFieldAssign(Function &func); 75 | }; 76 | } 77 | 78 | std::string stripNum(std::string name) { 79 | size_t len = name.size(); 80 | char tmp[len]; 81 | strncpy(tmp, name.c_str(), len); 82 | if (len < 1) 83 | return name; 84 | while ((tmp[len-1] <= '9' && tmp[len-1] >= '0' && len > 1) 85 | || (tmp[len-1] == 'i' && tmp[len-2] == '.' && len > 2) 86 | || (tmp[len-1] == '.' && len > 1)) { 87 | if (tmp[len-1] == 'i' && tmp[len-2] == '.') { 88 | tmp[len-1] = 0; 89 | tmp[len-2] = 0; 90 | len -= 2; 91 | continue; 92 | } 93 | tmp[len-1] = 0; 94 | len--; 95 | } 96 | name = name.substr(0, len); 97 | return name; 98 | } 99 | 100 | uint16 crc16(std::string name) { 101 | unsigned len = name.length(); 102 | if (len == 0) 103 | return 0; 104 | char *tmp = (char*)malloc(len+1); 105 | strcpy(tmp, name.c_str()); 106 | uint16 data, hash = 0x3e7a, crc = 0xffff; 107 | for (unsigned i = 0; i < len; i++) { 108 | data = *(uint16*)(tmp + i); 109 | if ((crc&0x0001) ^ (data&0x0001)) 110 | crc = (crc >> 1) ^ (hash |0x8005); 111 | else 112 | crc >>= 1; 113 | hash = data ^ hash; 114 | } 115 | crc = ~crc; 116 | data = crc; 117 | crc = (crc << 8) | (data >> 8 &0xff); 118 | return crc; 119 | } 120 | 121 | bool isStruct(const Value *val, const Value *var) { 122 | if (!val->getType()->isIntegerTy()) 123 | return false; 124 | const GetElementPtrInst *gepInst = dyn_cast(var); 125 | if (gepInst == nullptr) 126 | return false; 127 | Type *greTy = gepInst->getResultElementType(); 128 | if (greTy && greTy->isPointerTy()) 129 | return false; 130 | if (gepInst->getSourceElementType()->isStructTy()) { 131 | const StructType *srtTy = dyn_cast(gepInst->getSourceElementType()); 132 | return srtTy->hasName(); 133 | } 134 | return false; 135 | } 136 | 137 | std::string getStructName(const Value *var) { 138 | const GetElementPtrInst *gepInst = dyn_cast(var); 139 | std::string srtName = gepInst->getSourceElementType()->getStructName(); 140 | std::string fieldName = gepInst->getName(); 141 | return stripNum(srtName) + "->" + stripNum(fieldName); 142 | } 143 | 144 | uint64 getSrtIDFromName(const Value *var) { 145 | const GetElementPtrInst *gepInst = dyn_cast(var); 146 | std::string srtName = gepInst->getSourceElementType()->getStructName(); 147 | std::string fieldName = gepInst->getName(); 148 | uint16 srtID = crc16(stripNum(srtName)); 149 | uint16 fieldID = crc16(stripNum(fieldName)); 150 | return (((uint64)srtID << 16) | (uint64)fieldID) & 0xffffffff; 151 | } 152 | 153 | uint64 getSourceFileID(std::string sourceFileName) { 154 | uint64 srcID = (uint64)crc16(sourceFileName); 155 | return srcID & 0xffff; 156 | } 157 | 158 | void AssignTracker::instrumentFieldAssign(Function &func) { 159 | if (!func.size()) 160 | return; 161 | for (BasicBlock &bb : func) { 162 | for (Instruction &i : bb) { 163 | if (StoreInst *si = dyn_cast(&i)) { 164 | const Value *val_op = si->getOperand(0); 165 | const Value *var_op = si->getPointerOperand(); 166 | if (isStruct(val_op, var_op)) { 167 | std::string srtName = getStructName(var_op); 168 | uint64 srtID = getSrtIDFromName(var_op); 169 | srtID |= (getSourceFileID(SourceFileName) & 0xffff) << 32; 170 | if (StructIDMap.find(srtName) == StructIDMap.end()) 171 | StructIDMap[srtName] = srtID; 172 | injectFieldAssignTracker(si, StructIDMap[srtName]); 173 | } 174 | } 175 | } 176 | } 177 | } 178 | 179 | void AssignTracker::injectFieldAssignTracker(Instruction *I, uint64 id) { 180 | IRBuilder<> IRB(I); 181 | Value *val = I->getOperand(0); 182 | unsigned bitWidth = val->getType()->getIntegerBitWidth(); 183 | switch (bitWidth) { 184 | case 8: { 185 | IRB.CreateCall(SanCovTraceSrt1, {IRB.getInt64(id), IRB.CreateIntCast(val, Int8Ty, true)}); 186 | break; 187 | } 188 | case 16: { 189 | IRB.CreateCall(SanCovTraceSrt2, {IRB.getInt64(id), IRB.CreateIntCast(val, Int16Ty, true)}); 190 | break; 191 | } 192 | case 32: { 193 | IRB.CreateCall(SanCovTraceSrt4, {IRB.getInt64(id), IRB.CreateIntCast(val, Int32Ty, true)}); 194 | break; 195 | } 196 | case 64: { 197 | IRB.CreateCall(SanCovTraceSrt8, {IRB.getInt64(id), IRB.CreateIntCast(val, Int64Ty, true)}); 198 | break; 199 | } 200 | } 201 | } 202 | 203 | char AssignTracker::ID = 0; 204 | static RegisterPass X("AssignTracker", "AssignTracker Pass", false, false); 205 | 206 | static void registerAssignTrackerPass(const PassManagerBuilder &, legacy::PassManagerBase &PM) 207 | { 208 | PM.add(new AssignTracker()); 209 | } 210 | 211 | static RegisterStandardPasses RegisterAPass(PassManagerBuilder::EP_OptimizerLast, registerAssignTrackerPass); 212 | -------------------------------------------------------------------------------- /static_analysis_tools/kern_instrument/AssignTrackerPass/AssignTracker.exports: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/static_analysis_tools/kern_instrument/AssignTrackerPass/AssignTracker.exports -------------------------------------------------------------------------------- /static_analysis_tools/kern_instrument/AssignTrackerPass/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # If we don't need RTTI or EH, there's no reason to export anything 2 | # from the hello plugin. 3 | if( NOT LLVM_REQUIRES_RTTI ) 4 | if( NOT LLVM_REQUIRES_EH ) 5 | set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/AssignTracker.exports) 6 | endif() 7 | endif() 8 | 9 | if(WIN32 OR CYGWIN) 10 | set(LLVM_LINK_COMPONENTS Core Support) 11 | endif() 12 | 13 | add_llvm_library( LLVMAssignTracker MODULE BUILDTREE_ONLY 14 | AssignTracker.cpp 15 | 16 | DEPENDS 17 | intrinsics_gen 18 | PLUGIN_TOOL 19 | opt 20 | ) 21 | -------------------------------------------------------------------------------- /static_analysis_tools/kern_instrument/kern_patch/0001-KCOV_SRT_TRACK-ok.patch: -------------------------------------------------------------------------------- 1 | From 8ba8bf36997ba5726098cedad261663129f4d806 Mon Sep 17 00:00:00 2001 2 | From: bins 3 | Date: Wed, 7 Oct 2020 04:28:44 -0400 4 | Subject: [PATCH] kernel struct assignment instrument 5 | 6 | --- 7 | kernel/kcov.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | 1 file changed, 62 insertions(+) 9 | 10 | diff --git a/kernel/kcov.c b/kernel/kcov.c 11 | index 6afae0bcbac4..b6a40fc40a94 100644 12 | --- a/kernel/kcov.c 13 | +++ b/kernel/kcov.c 14 | @@ -322,6 +322,68 @@ void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) 15 | EXPORT_SYMBOL(__sanitizer_cov_trace_switch); 16 | #endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ 17 | 18 | +static void notrace write_srt_data(u64 id, u64 val, u64 ip) 19 | +{ 20 | + struct task_struct *t; 21 | + u64 *area; 22 | + u64 count, start_index, end_pos, max_pos; 23 | + 24 | + t = current; 25 | + 26 | + if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t)) 27 | + return; 28 | + 29 | + ip = canonicalize_ip(ip); 30 | + 31 | + area = (u64 *)t->kcov_area; 32 | + max_pos = t->kcov_size * sizeof(unsigned long); 33 | + 34 | + count = READ_ONCE(area[0]); 35 | + 36 | + /* Every record is KCOV_WORDS_PER_STRU 64-bit words. */ 37 | + start_index = 1 + count; 38 | + end_pos = (start_index + 3) * sizeof(u64); 39 | + if (likely(end_pos <= max_pos)) { 40 | + area[start_index] = id; 41 | + area[start_index + 1] = val; 42 | + area[start_index + 2] = ip; 43 | + WRITE_ONCE(area[0], count + 3); 44 | + } 45 | +} 46 | + 47 | +void notrace __sanitizer_cov_trace_srt1(u64 id, u8 val) 48 | +{ 49 | + u64 id_64 = ((u64)0xefe1 << 48) | (u64)id; 50 | + u64 val_64 = (u64)val; 51 | + write_srt_data(id_64, val_64, _RET_IP_); 52 | +} 53 | +EXPORT_SYMBOL(__sanitizer_cov_trace_srt1); 54 | + 55 | +void notrace __sanitizer_cov_trace_srt2(u64 id, u16 val) 56 | +{ 57 | + u64 id_64 = ((u64)0xefe2 << 48) | (u64)id; 58 | + u64 val_64 = (u64)val; 59 | + write_srt_data(id_64, val_64, _RET_IP_); 60 | +} 61 | +EXPORT_SYMBOL(__sanitizer_cov_trace_srt2); 62 | + 63 | +void notrace __sanitizer_cov_trace_srt4(u64 id, u32 val) 64 | +{ 65 | + u64 id_64 = ((u64)0xefe4 << 48) | (u64)id; 66 | + u64 val_64 = (u64)val; 67 | + write_srt_data(id_64, val_64, _RET_IP_); 68 | +} 69 | +EXPORT_SYMBOL(__sanitizer_cov_trace_srt4); 70 | + 71 | +void notrace __sanitizer_cov_trace_srt8(u64 id, u64 val) 72 | +{ 73 | + u64 id_64 = ((u64)0xefe8 << 48) | (u64)id; 74 | + u64 val_64 = (u64)val; 75 | + write_srt_data(id_64, val_64, _RET_IP_); 76 | +} 77 | +EXPORT_SYMBOL(__sanitizer_cov_trace_srt8); 78 | + 79 | + 80 | static void kcov_start(struct task_struct *t, struct kcov *kcov, 81 | unsigned int size, void *area, enum kcov_mode mode, 82 | int sequence) 83 | -- 84 | 2.20.1 85 | 86 | -------------------------------------------------------------------------------- /survey.md: -------------------------------------------------------------------------------- 1 | ## Brief 2 | 3 | This document will introduce some features or design of customizing fuzzer. Firstly, most of fuzzer implemented its own Genetic Algorithm( GA). Some features can be classified to one of GA component. For example, the optimizing of generate, mutate and crossover. Other features, such as special feedback or satifying deep nested condition, is strongly depend on what project you fuzz, although these problem is very common in real-world project. 4 | 5 | Because this document is a by-product of customizing Linux kernel fuzzer(base on Syzkaller), Some problem appeared kernel fuzzing only. At the end this document, i will attach the paper the document involved, with a short introduction. 6 | 7 | 8 | ## GA of fuzzer 9 | 10 | In most fuzzers, GA is the engine of evolving testcase. For different purpose, the design of GA's components can be quite different. 11 | 12 | 13 | ### Generate & Mutate in evaluating programming 14 | 15 | In evolutionary programming, if mutation and generating only base on random inputs, that fuzzer will perform badly. Useful information help reducing the search space of evolving the testcase you want. Generally, these following informations can benefit mutating or generating: 16 | 1. symbolic execution: static analyse target, deriver which inputs is useful.( KLEE) 17 | 2. Dynamic taint analysis( DTA): dynamically trace inputs used by conditions.( VUzzer) 18 | 3. Dynamic taint analysis: dynamically trace which inputs can satisfy which conditions efficiently.( GREYONE) 19 | 4. Manually write manner: hard-code some special inputs or enum inputs.( Syzkaller) 20 | 5. Extract inputs from real-world program.( Moonshine) 21 | 22 | 23 | ### Crossover 24 | 25 | In real-world, if you want to fuzz the entire project, generated testcases always should be length-indeterminate. The classical single-point randomly crossover couldn't work well. Block stacking evolutionary programming would be more efficient. Specially, some testcase is state-base( for example: socket programming), generate and crossover base on state-base blocks help evolving complex context testcase. In our practice, in state-base programming, state-base block-stacking evolution perform better than randomly crossover. Here are some idea of block-stacking crossover: 26 | 1. Static analysis state dependence of real world testcase.( Moonshine) 27 | 2. Resource centric: treat generated testcase which use( create&operation) the same resource as a complex resource. Use them in the subsequent syscalls.( Syzkaller) 28 | 3. State-base Resource centric: classify testcase by states they trigger.( base on syzkaller resource centric) 29 | 4. Build N-Gram model for syscalls: select those testcases trigger a type of crash, build N-Gram model to analyse the pattern of crash testcases.( FastSyzkaller) 30 | 31 | 32 | ### Fitness 33 | 34 | Fitness is motivation of evolution in GA. A appropriate fitness reward helps efficiently select potential inputs or testcases. Moreover, gradient fitness will help evolving also. Fitness always base on what feedback fuzzer collected. 35 | 36 | 37 | #### coverage 38 | 39 | 1. CFG position weight fitness.( VUzzer) 40 | 2. Sum of basic-block weight fitness.( Syzkaller) 41 | 3. Class code: lower error handle fitness.(VUzzer) 42 | 4. Statistical calculation of testcase.( Syzkaller) 43 | 44 | Similar to VUzzer, we implement a customized syzkaller which support specifying the fuzz target and can be feasibly configure basic-block weight. Read this [document](syzkaller/cover_filter.md) for more information. 45 | 46 | * refer to the following survey 47 | 48 | 49 | #### state 50 | 51 | 1. Symbolic execution: static analyse call-stack input, weight them base on its CFG. 52 | 2. Targeted symbolic execution: matching testcases' stack-trace to BUG's stack-trace report.( Wildfire) 53 | 3. Distance of taint variable to condition expected value.( GREYONE) 54 | 55 | #### Exploit vs Explore 56 | 57 | A fuzzer for the entire project is usually a Multi-armed bandit problem. You may need to trade off explore and exploit. 58 | Trade off them in a fuzzer is difficult, so we try to combinate several fuzzer with different policy( base on syz-hub). Refer to our [multi-policy fuzzer](syzkaller/multi_policy/README.md). 59 | 60 | 61 | ## Other design 62 | 63 | Moreover, there are lots of design of fuzzer is base on what project you fuzz, it can't be classified into any step of GA, although it strongly associates with things mentioned above. 64 | 65 | 66 | ### Shortage of only coverage-guide fuzzer 67 | 68 | Coverage-guide is the most widely used feedback of fuzzer. But, some reserachers found it's not enough for some case. In userspace fuzzing: 69 | 1. Collecting coverage and memory accessing information as fuzzer feedback.( MemFuzz) 70 | 2. Collecting targeted functions' argument as feedback.( WildFire) 71 | In kernel fuzzing, state-base fuzz could be more useful, for example: 72 | ``` 73 | Coverage: 74 | Cov(socket+setsockopt$1)+Cov(socket+setsockopt$2)+Cov(socket+sendmsg(flag_not_expect)) = Cov(socket+setsockopt1+setsockopt2) = Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(ANY)) != Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(EXPECT_FLAG)) 75 | ``` 76 | Without any gradient, syzkaller won't collect any testcases to corpus until all inputs are randomly put into the right position. 77 | ``` 78 | State: 79 | State(socket+setsockopt$1)+State(socket+setsockopt$2) != State(socket+setsockopt1+setsockopt$2) != State(socket+setsockopt$1+setsocketopt$2+sendmsg(ANY)) != State(socket+setsockopt$1+setsocketopt$2+sendmsg(EXPECT_FLAG)) 80 | ``` 81 | If we try to collect state of testcases, it will lead fuzzer to generate more complex context testcase. In our practice, we static analyse which state is widely used in condition. Collect those testcases if they can trigger such state. Refer to syzkaller resource centric( block-stacking generate) mentioned before, these testcases will be resource( state-base block) which can be used to generate testcase. Refer to this [document](syzkaller/kstat_demo/README.md). But, that will maintain a lot of testcases in corpus, testcases should be weigted. Also, to avoid writing bpf manuly, we introduce another way to [track kernel state](syzkaller/kstate_resource.md). 82 | 83 | 84 | ### Shortage of Full Kernel Fuzzer 85 | 86 | FKF is multi-solution search space, need a good trade off between explore and exploit. 87 | 1. Syzkaller has no explicit fitness, but it maintain syscall-to-syscall markov chain for prios choise and mutation. The prios include static and dynamic prios. The dynamic prios come from calculating count of syscall pair in each testcase of corpus. Note that testcases may be conflict with each other. 88 | 2. Subsystem syscall set: syzkaller support enable/disable a subset of syscalls to fuzz. 89 | 3. Partly kernel fuzz: KCOV support only instement a part of source file in kernel. 90 | 4. Multi-policy fuzzer: base on syz-hub, customized targeted syz-manager with different feedback share testcases with each other if the testcases are interested by other syz-manager. Refer to this [document](syzkaller/multi_policy/README.md). 91 | 92 | 93 | ### Satisfy the condition constraint 94 | 95 | Of course, most ideas of offering information to mutating and generating mentioned above is for staifying condition constraint. There are also some useful way for helping fuzzer satisfy the condition constraint. 96 | 97 | 98 | #### Condition constraint satisfied by single input 99 | If we treat arguments of a function as a byte-base input. Some conditions constraint can be satisfied by mutating input of the function. In this situation, the following ways can be used to improve the performence of fuzzer. 100 | 1. Symbolic execution: static analysis of constraint, can't solve constraint indrectly from input, overhead.( KLEE) 101 | 2. Dynamic taint analysis( DTA): dynamically trace inputs used by conditions.( VUzzer) 102 | 3. Dynamic taint analysis: dynamically trace which inputs can satisfy which conditions efficiently.( GREYONE) 103 | 4. Weakening Strong Constraints: use QEMU Ting Code Generator to weaken strong constraints.( Qemu TCG) 104 | 5. comparison operand tracker: syzkaller use comparison tracker, __sanitizer_cov_trace_cmp for kernel.( KCOV_COMPARISON) 105 | 6. Syzkaller: manually write syscall description. 106 | 7. Matryoshka shows how they try to help AFL evolving input statify nested condition constraint.( Matryoshka) 107 | Also, i attach a comparison of these differences of these ways. 108 | 109 | | method | dependence | granularity | indirectly use | case | 110 | |--------|------------|-------------|---------------------------|------| 111 | | cmp instrument to track data-flow( DTA) | path-dependent | instruction-level | insensitive | VUzzer | 112 | | cmp instrumnet to check satifing | path-denpendent | instruction-level | sensitive | GREYONE | 113 | | memory monitor | memory monitor | function-level | sensitive | Matryoshka | 114 | | symbolic execution | path-independent | function-level | insensitive | KLEE/CBMC/ClangChecker | 115 | | KCOV_COMPARISON | path-dependent | instruction-level | sensitive | Syzkaller | 116 | | Qemu TCG | path-dependent | instruction-level | sensitive | QemuTCG + AFL | 117 | 118 | We can see comparison instrument can be use in DTA to solve nested condtion. But instrument depend on if the branch is reachable. And taint data monitor like VUzzer hard to trace complex indirectly taint( eg. memory copy). 119 | 120 | 121 | #### Note that in Linux kernel fuzzer: 122 | 123 | Syzkaller has powerful syscall descriptions, search space of a single syscall input was greatly reduce. The truly diffculty is to reach branches are depend on syscalls combination and propriate arguments. 124 | 1. Syzkaller resource: recently syzkaller introduce a feature: resource centric. Syzkaller treat testcases as resource if they create or operate the same kind data structure( resource also). And use these resource to generate or mutate new testcase. 125 | 2. MoonShine: static analysis real world testcase to get the dependence of syscalls. 126 | 3. State-base resource: in our customized syzkaller, only testcase trigger a special state feedback can be resource. Further more, maintain a relationship between syscalls sequence and kernel state may help more. 127 | Also refer to the chapter crossover mentioned above. 128 | 129 | Symbolic execution: if static analysis chose syscalls as entry, it will be effort and inefficient. Otherwise, if the entry is some kernel function in callstack may help more. Both [this paper](https://arxiv.org/abs/1903.02981) and [our fuzzer](syzkaller/kstat_demo/README.md) chose the second way. Get function-level input by using kernel function hook. We also have a [document](static_analysis_tools/README.md) of comparing some symbolic execution tools. Also, most of time, since kernel state is attach to kernel data structure, track the data structure is other way to track kernel state. Refer to this [document](syzkaller/kstate_resource.md). 130 | 131 | 132 | ## Paper 133 | 134 | [Weakening Strong Constraints for AFL](https://lifeasageek.github.io/class/cs52700-fall16/pages/prog-assignment-1.html): 135 | Strong constriant: a condition constraint need a bunch of memory to satisfy it. In this case, randomly mutating input will take a lot of time to satisfy it. 136 | Weakening strong constraint: try to slice the strong constraint to several weak constraints, replace that branch condition with several branch conditions. Each branch with weak branch can be easily satisfied. So the satisfying input can be gradually evoluted. 137 | The author use Qemu Tiny Code Generator( TCG), a instruction-by-instruction level instrumnetation, to weaken such strong constraints. 138 | 139 | [Compositional Fuzzing Aided by Targeted Symbolic Execution](https://arxiv.org/pdf/1903.02981.pdf): 140 | Targeted symbolic execution: symbolic execution only analyse inputs for reaching targets of interest. 141 | Isolated function: functions that are parameterized( targeted functions). 142 | 1. Repeatly generating testcases and populate testcases base on isolate functions' argements. 143 | 2. Run this testcases in another instrumented version of project, check if crash will happen. 144 | 3. If crash happened, run exploit testcases, collect the stack-trace information. Then try to generate testcases to macth it. Check if the target is reachable, if reachable, mutate inputs except those inputs satifying constrains of the path. 145 | 146 | [VUzzer: Application-aware Evolutionary Fuzzing](): 147 | Data-flow: dynamic taint analysis( DTA), implemented by instrument cmp instruction to trace which bits of input have an impact to the condition. The structure of input will be evoluted. 148 | Control-flow: assign weight to basic block base on its depth; Assign negative weight to error-handling code. 149 | Static analysis: get immediate value of comparison. 150 | 151 | [MoonShine: Optimizing OS Fuzzer Seed Selection with Trace Distillation](http://www.cs.columbia.edu/~suman/docs/moonshine.pdf): 152 | Use an extended Strace to trace real-world testcases. Extract inputs and dependences of syscalls from Strace output( seed distillation). The dependences are similar to syzkaller resource( after resource centric introducted). 153 | 154 | [GREYONE: Data Flow Sensitive Fuzzing](https://www.usenix.org/system/files/sec20spring_gan_prepub.pdf): 155 | Fuzzing-driven Taint Inference: it's also DTA. But, unlike VUzzer, GREYONE track which input can satisfy condition constriants. So condition variable indirectly initialized from inputs can be found also. 156 | Taint-Guided Mutation: prioritize input bytes that affect more untouched branches to mutate. 157 | Conformance-Guided Evolution: the distance of tainted variables to the value expected by condition. 158 | 159 | [Matryoshka: Fuzzing Deeply Nested Branches](https://arxiv.org/pdf/1905.12228.pdf): 160 | 1. Determinate all conditions constraint that target dependence on. Use taint analysis to determinate which conditions use same input. 161 | 2. Randomly mutate inputs to satisfy these condition constraints. If all conditions use the same input( at less one input is the same one) are satisfied, these inputs are called dependent inputs. 162 | 3. If the target is reached, that means all constraints can be satisfied by dependent inputs. if not, that means other inputs should be mutated to satisfy those conditions constraints that use indenpendent inputs. 163 | 164 | [FastSyzkaller: Improving Fuzz Efficiency for Linux Kernel Fuzzing](https://iopscience.iop.org/article/10.1088/1742-6596/1176/2/022013/pdf): 165 | FastSyzkaller classify crash type of syzkaller testcases, then use N-Gram model to extract N-Gram sequential syscall patterns from these testcases that may be potentially vulnerable. Generating new testcases from syscall patterns and pack them into the corpus. 166 | 167 | [MEMFUZZ: Using Memory Accesses to Guide Fuzzing](): 168 | 1. Enhance AFL LLVM instrumentation pass: instrument load and store instruction to collect memory accessing information. 169 | 2. Instrumentation site filtering: drop some information of memory accessing, for example, global variables or stack variables accessing. 170 | 3. Extend AFL runtime library for tracking memory accessing. Bloom-filter for deduplicating. 171 | -------------------------------------------------------------------------------- /syz_patch/0002-Calculate-prog-prior-base-on-weighted-pcs.patch: -------------------------------------------------------------------------------- 1 | From bc3063ef6207e1fafa82723a57b58bc938cb77c5 Mon Sep 17 00:00:00 2001 2 | From: Kaipeng Zeng 3 | Date: Tue, 20 Oct 2020 23:47:23 -0400 4 | Subject: [PATCH 2/3] Calculate prog prior base on weighted pcs 5 | 6 | --- 7 | prog/prio.go | 2 +- 8 | prog/prog.go | 1 + 9 | syz-fuzzer/fuzzer.go | 19 +++++++++++++++++-- 10 | syz-fuzzer/proc.go | 1 + 11 | syz-manager/filter.go | 14 ++++++++++++-- 12 | syz-manager/html.go | 4 ++++ 13 | syz-manager/rpc.go | 5 +++-- 14 | 7 files changed, 39 insertions(+), 7 deletions(-) 15 | 16 | diff --git a/prog/prio.go b/prog/prio.go 17 | index eee44cd4..3346c594 100644 18 | --- a/prog/prio.go 19 | +++ b/prog/prio.go 20 | @@ -164,7 +164,7 @@ func (target *Target) calcDynamicPrio(corpus []*Prog) [][]float32 { 21 | for _, c1 := range p.Calls[idx0+1:] { 22 | id0 := c0.Meta.ID 23 | id1 := c1.Meta.ID 24 | - prios[id0][id1] += 1.0 25 | + prios[id0][id1] += 1.0 * p.Weight 26 | } 27 | } 28 | } 29 | diff --git a/prog/prog.go b/prog/prog.go 30 | index bcc86fb0..7fd8006e 100644 31 | --- a/prog/prog.go 32 | +++ b/prog/prog.go 33 | @@ -11,6 +11,7 @@ type Prog struct { 34 | Target *Target 35 | Calls []*Call 36 | Comments []string 37 | + Weight float32 38 | } 39 | 40 | type Call struct { 41 | diff --git a/syz-fuzzer/fuzzer.go b/syz-fuzzer/fuzzer.go 42 | index 2463d9e9..8fc3cc44 100644 43 | --- a/syz-fuzzer/fuzzer.go 44 | +++ b/syz-fuzzer/fuzzer.go 45 | @@ -53,6 +53,7 @@ type Fuzzer struct { 46 | corpusHashes map[hash.Sig]struct{} 47 | corpusPrios []int64 48 | sumPrios int64 49 | + weightedPCs map[uint32]float32 50 | 51 | signalMu sync.RWMutex 52 | corpusSignal signal.Signal // signal of inputs in corpus 53 | @@ -460,6 +461,9 @@ func (fuzzer *Fuzzer) addInputToCorpus(p *prog.Prog, sign signal.Signal, sig has 54 | fuzzer.corpus = append(fuzzer.corpus, p) 55 | fuzzer.corpusHashes[sig] = struct{}{} 56 | prio := int64(len(sign)) 57 | + if len(fuzzer.weightedPCs) > 0 { 58 | + prio = int64(p.Weight) 59 | + } 60 | if sign.Empty() { 61 | prio = 1 62 | } 63 | @@ -563,8 +567,6 @@ func parseOutputType(str string) OutputType { 64 | } 65 | } 66 | 67 | -// Currently, only use GetWeightedPCsRes to check if filter enabled. 68 | -// Weighted PC table will not be used. 69 | func (fuzzer *Fuzzer) getWeightedPCs() { 70 | a := &rpctype.GetWeightedPCsArgs{} 71 | r := &rpctype.GetWeightedPCsRes{EnableFilter: false} 72 | @@ -574,4 +576,17 @@ func (fuzzer *Fuzzer) getWeightedPCs() { 73 | if r.EnableFilter { 74 | fuzzer.execOpts.Flags |= ipc.FlagEnableCoverageFilter 75 | } 76 | + if len(r.WeightedPCs) > 0 { 77 | + fuzzer.weightedPCs = r.WeightedPCs 78 | + } 79 | +} 80 | + 81 | +func (fuzzer *Fuzzer) calCoverWeight(pcs []uint32) float32 { 82 | + weight := float32(0.0) 83 | + for _, pc := range pcs { 84 | + if _, ok := fuzzer.weightedPCs[pc]; ok { 85 | + weight += fuzzer.weightedPCs[pc] 86 | + } 87 | + } 88 | + return weight 89 | } 90 | diff --git a/syz-fuzzer/proc.go b/syz-fuzzer/proc.go 91 | index fe19a17b..a4b0fb10 100644 92 | --- a/syz-fuzzer/proc.go 93 | +++ b/syz-fuzzer/proc.go 94 | @@ -170,6 +170,7 @@ func (proc *Proc) triageInput(item *WorkTriage) { 95 | Cover: inputCover.Serialize(), 96 | }) 97 | 98 | + item.p.Weight = proc.fuzzer.calCoverWeight(inputCover.Serialize()) 99 | proc.fuzzer.addInputToCorpus(item.p, inputSignal, sig) 100 | 101 | if item.flags&ProgSmashed == 0 { 102 | diff --git a/syz-manager/filter.go b/syz-manager/filter.go 103 | index 9cf2415d..ea3d173d 100644 104 | --- a/syz-manager/filter.go 105 | +++ b/syz-manager/filter.go 106 | @@ -53,8 +53,8 @@ func (mgr *Manager) initKcovFilter() { 107 | mgr.kcovFilter.initWeightedPCs(files, funcs, rawPCs) 108 | } 109 | 110 | -func (mgr *Manager) getWeightedPCs() bool { 111 | - return mgr.kcovFilter.enableFilter 112 | +func (mgr *Manager) getWeightedPCs() (bool, map[uint32]float32){ 113 | + return mgr.kcovFilter.enableFilter, mgr.kcovFilter.weightedPCs 114 | } 115 | 116 | func (filter *CoverFilter) initWeightedPCs(files, functions, rawPCsFiles []string) { 117 | @@ -245,3 +245,13 @@ func (filter *CoverFilter) putUint32(bytes []byte, value uint32) { 118 | binary.BigEndian.PutUint32(bytes, value) 119 | } 120 | } 121 | + 122 | +func (filter *CoverFilter) CalProgWeight(pcs []uint32) int { 123 | + prio := int(0) 124 | + for _, pc := range pcs { 125 | + if _, ok := filter.weightedPCs[pc]; ok { 126 | + prio += int(filter.weightedPCs[pc]) 127 | + } 128 | + } 129 | + return prio 130 | +} 131 | diff --git a/syz-manager/html.go b/syz-manager/html.go 132 | index 789c416d..3e1b7788 100644 133 | --- a/syz-manager/html.go 134 | +++ b/syz-manager/html.go 135 | @@ -185,6 +185,7 @@ func (mgr *Manager) httpCorpus(w http.ResponseWriter, r *http.Request) { 136 | Sig: sig, 137 | Short: p.String(), 138 | Cover: len(inp.Cover), 139 | + Prio: mgr.kcovFilter.CalProgWeight(inp.Cover), 140 | }) 141 | } 142 | sort.Slice(data.Inputs, func(i, j int) bool { 143 | @@ -601,6 +602,7 @@ type UIInput struct { 144 | Sig string 145 | Short string 146 | Cover int 147 | + Prio int 148 | } 149 | 150 | var summaryTemplate = html.CreatePage(` 151 | @@ -746,11 +748,13 @@ var corpusTemplate = html.CreatePage(` 152 | 153 | Coverage 154 | Program 155 | + Prio 156 | 157 | {{range $inp := $.Inputs}} 158 | 159 | {{$inp.Cover}} 160 | {{$inp.Short}} 161 | + {{printf "%d" $inp.Prio}} 162 | 163 | {{end}} 164 | 165 | diff --git a/syz-manager/rpc.go b/syz-manager/rpc.go 166 | index 980a447a..864156c6 100644 167 | --- a/syz-manager/rpc.go 168 | +++ b/syz-manager/rpc.go 169 | @@ -57,7 +57,7 @@ type RPCManagerView interface { 170 | newInput(inp rpctype.RPCInput, sign signal.Signal) bool 171 | candidateBatch(size int) []rpctype.RPCCandidate 172 | rotateCorpus() bool 173 | - getWeightedPCs() bool 174 | + getWeightedPCs() (bool, map[uint32]float32) 175 | } 176 | 177 | func startRPCServer(mgr *Manager) (*RPCServer, error) { 178 | @@ -334,7 +334,8 @@ func (serv *RPCServer) shutdownInstance(name string) []byte { 179 | func (serv *RPCServer) GetWeightedPCs(a *rpctype.GetWeightedPCsArgs, r *rpctype.GetWeightedPCsRes) error { 180 | serv.mu.Lock() 181 | defer serv.mu.Unlock() 182 | - enableFilter := serv.mgr.getWeightedPCs() 183 | + enableFilter, weightedPCs := serv.mgr.getWeightedPCs() 184 | r.EnableFilter = enableFilter 185 | + r.WeightedPCs = weightedPCs 186 | return nil 187 | } 188 | -- 189 | 2.20.1 190 | 191 | -------------------------------------------------------------------------------- /syzkaller/Bitmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/Bitmap.png -------------------------------------------------------------------------------- /syzkaller/Corpus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/Corpus.png -------------------------------------------------------------------------------- /syzkaller/CoverageOfFiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/CoverageOfFiles.png -------------------------------------------------------------------------------- /syzkaller/CoverageOfTargetFunctions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/CoverageOfTargetFunctions.png -------------------------------------------------------------------------------- /syzkaller/KernFunc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/KernFunc.png -------------------------------------------------------------------------------- /syzkaller/ProgState.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/ProgState.png -------------------------------------------------------------------------------- /syzkaller/TotalCoverage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/TotalCoverage.png -------------------------------------------------------------------------------- /syzkaller/cover_filter.md: -------------------------------------------------------------------------------- 1 | # Syzkaller coverage filter and weighted PCs 2 | 3 | ## Content 4 | 1. Usage. 5 | 2. Implement detail. 6 | 3. Practice. 7 | 8 | To implement coverage filter in syzkaller. we have to follow the next steps: 9 | 10 | 1. Get the LLVM ir code and assembly code of target. 11 | 2. Get the addresses map of target functions by analyzing ir code, assembly code and kernel ELF. 12 | 3. Support cover filter and weighted PCs in syzkaller. 13 | 14 | After step 1 and 2, you will get a addresses map contains addresses of any kernel functions you need. Also, you can attach weight to every PC base on LLVM ir analysis, eg. weighted PCs base on CFG information. 15 | 16 | [Advice from Dmitry](https://groups.google.com/g/syzkaller/c/IgwfGSdca3Q/m/dCsAiB03BgAJ), we implemented a more general and easy to use coverage filter. If you don't need prog prior base on weighted pcs, you can specify which functions or files to test in patched syzkaller manager configure. 17 | 18 | ## Usage 19 | 20 | ### Get LLVM ir code and assembly code 21 | 22 | Lots of static analysis tools can be used to parse ir code. But ir code know nothing about addresses of the final executable file while the assembly code holds both address offset and basic block information. By analyzing them, we can associate ir information with addresses. 23 | To get ir code and assembly code, you need to pick out the source file where your target functions located at. For example, if your target function is in /net/ipv4/tcp.c, you should run this command in your kernel build tree: 24 | 25 | ``` 26 | make CC=clang net/ipv4/tcp.o -n | grep tcp.c 27 | ``` 28 | 29 | to get the command of compiling tcp.c, command may look like: 30 | 31 | ``` 32 | clang ...... -c -o net/ipv4/tcp.o net/ipv4/tcp.c 33 | ``` 34 | 35 | To get the LLVM ir code of tcp.c, run: 36 | 37 | ``` 38 | clang ...... -S -o net/ipv4/tcp.ll net/ipv4/tcp.c -emit-llvm 39 | ``` 40 | 41 | To get the assembly code of tcp.c, run: 42 | 43 | ``` 44 | clang ...... -S -o net/ipv4/tcp.s net/ipv4/tcp.c 45 | ``` 46 | 47 | Also, linux kernel support: 48 | 49 | ``` 50 | make CC=clang net/ipv4/tcp.ll 51 | make CC=clang net/ipv4/tcp.s 52 | ``` 53 | 54 | Repeat the mentioned steps to get all ir codes and assembly codes of your target functions. Move them to a IR_DIR and ASM_DIR. Then build your kernel and get a VMLINUX file. 55 | 56 | ### Get PCs table 57 | 58 | We use a [kcov_map](../static_analysis_tools/IRParser/kcov_map.cpp) tool to get addresses of the kernel functions we are interested in. 59 | Run the following command to build kcov_map: 60 | 61 | ``` 62 | clang++-10 kcov_map.cpp -o kcov_map -O0 -g `llvm-config-10 --cxxflags --libs --ldflags --system-libs` 63 | ``` 64 | 65 | ``` 66 | ./kcov_map IR_DIR ASM_DIR VMLINUX_FILE FUNCTION_LIST LOG_DIR 67 | ``` 68 | 69 | FUNCTION_LIST has functions name that we need to get their addresses. 70 | IR_DIR: directory all the LLVM ir code we need. 71 | ASM_DIR: directory all the assembly code we need. 72 | VMLINUX_FILE: kernel ELF 73 | LOG_DIR: after run the command, kcov_map will creat a "*.json" and a "*.addr.map" for every function. 74 | Then run: 75 | 76 | ``` 77 | cat LOG_DIR/*.addr.map > funcaddr.map 78 | ``` 79 | 80 | Copy funcaddr.map to syzkaller work directory. 81 | This is only one of ways when we try to build functions addresses map with weight. You can explore how to build your functions addresses map for you need. 82 | 83 | #### Extend functions list 84 | 85 | In our practice, when we choose some member functions as entry, some functions may be a wrapper function but not the truly implement function. We use [extend_func](../static_analysis_tools/IRParser/extend_func.cpp) extend the function list. 86 | 87 | ``` 88 | clang++-10 extend_func.cpp -o extend_func -O0 -g `llvm-config-10 --cxxflags --libs --ldflags --system-libs` 89 | ``` 90 | 91 | ``` 92 | ./extend_func FUNCTION_LIST IR_DIR 93 | ``` 94 | 95 | You will get a FUNCTION_LIST.new which you can pass to kcov_map. 96 | 97 | ### Support cover filter in syzkaller 98 | 99 | #### Patch syzkaller 100 | 101 | Clone syzkaller, and run: 102 | 103 | ``` 104 | cd PATH_TO_SYZ_SRC 105 | git checkout ff4a334 106 | git apply PATH_TO_harbian-qa/syz_patch/*.patch 107 | ``` 108 | 109 | Build syzkaller as usual. 110 | 111 | #### Modify configure file 112 | 113 | Currently, syzkaller support passing regular expression to coverage filter. Add the following options in syz-manager configure file: 114 | 115 | ``` 116 | "cover": true, 117 | "cover_filter": { 118 | "files": [ 119 | "^net/core/sock.c$", 120 | "^net/sctp/", // file name start with the string 121 | "net/dccp/" // file name include the string 122 | ], 123 | "functions": [ 124 | "^foo$", 125 | "^bar", // all functions start with bar 126 | "baz" // all functions containing baz 127 | ], 128 | "pcs": "external/file/with/weighted/raw/pc/list" 129 | } 130 | ``` 131 | 132 | Also refer to [syzkaller document](https://www.github.com/google/syzkaller/blob/master/pkg/mgrconfig/config.go#L109-L117). 133 | Now you can run a syzkaller with cover filter. 134 | 135 | ## Implement detail of cover filter 136 | 137 | ### manager 138 | 139 | ### Implement files and functions filter 140 | 141 | Syzkaller manager.reportGenerator holds the file and function information of per pc. At the beginning of syz-manager, we use covfilter.go:createCoverageFilter() initialize reportGenerator. Then we walk throught all symbols and files, use regular expression to pich up those pcs belong to coverage filter functions and files. 142 | 143 | #### Read weighted pcs from funcaddr.map 144 | 145 | The configure specifies which funcaddr.map should be loaded and send to VM. Function initWeightedPCs in syz-manager/covfilter.go will read the funcaddr.map and maintain a coverfilter map in structure manager.Manager. This map can be used while calculating the weight of prog in web UI. 146 | 147 | #### RPC interface for sending addresses map to fuzzer 148 | 149 | Extend a getWeightedPCs interface in RPCManagerView in syz-manager/rpc.go for waiting client call( fuzzer) for getting a pcsWeight map. 150 | 151 | #### Display the pc and its weight in source code 152 | 153 | Use the syzkaller web UI "cover", we extend an interface called bitmap. It will convert PCs table to source lines. The color of lines is black means the block of this line won't be drop while fuzzing. The number at the left is the weight of that line. Note that there may be multiple block maps to a source line. Their weight will add to this line. 154 | 155 | ### fuzzer 156 | 157 | #### getPCsWeight from syz-manager 158 | 159 | Add a getWeightedPCs for fuzzer, so fuzzer can dynamically fetch PCs table from syz-manager. In other words, it's possible to dynamically distribute PCs table to different fuzzers. For example, reduce PCs weight while some block has been fully explored( [eg.](https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp)). 160 | 161 | #### Calculate the prog prio from its cover 162 | 163 | We implement a function calCoverWeight in syz-fuzzer/fuzzer.go to calculate the weight and attach to structure prog. You can implement your algorithm of calculating weight base on weighted pc in this function. 164 | 165 | #### Choose prog to mutate base on prog prio 166 | 167 | Syzkaller already has its prior choice base on signals length of the prog. We have to modify the addInputToCorpus function to use out prog weight. 168 | 169 | ### executor 170 | 171 | #### Read pcs map 172 | 173 | The executor/cov_filter.h implement function for getting PCs table from the map. 174 | 175 | ##### Fast cover filtering. 176 | 177 | Unlike manager and fuzzer, executor coverage filter run more frequently. Without a fast searching, if the PCs table grow up, the affect of performance can be a disaster. So we use a fast but rough way, bitmap, to address this program. 178 | We use createCoverageBitmap in syz-manager to create a bitmap for executor. 179 | Because address align, the lowest 4-bit is dropped off. So, for quickly setting and accessing the bit which record if a pc should be filtered, we can search by: 180 | ``` 181 | pc32 -= cov_filter->pcstart; 182 | pc32 = pc32 >> 4; 183 | uint32 idx = pc32 / 8; 184 | uint32 shift = pc32 % 8; 185 | return (cov_filter->bitmap[idx] & (1 << shift)) > 0; 186 | ``` 187 | The affect of performance will not grow up no mater how many PCs should be filtered. 188 | 189 | ## Some PCs-weight-guide fuzzing practice 190 | 191 | Cover filtering is quite certain that you can only set if the edge of that pc will be sent to fuzzer as a signal or not. But, weighted PCs can guide fuzzer to evolve prog flexibly. You can assign weight to PCs base on the result from LLVM ir static analysis. 192 | 193 | ### Cyclomatic complexity base on llvm CFG 194 | 195 | In the theory of cyclomatic [complexity](https://en.wikipedia.org/wiki/Cyclomatic_complexity), a function can be treated as a one-entry and one-exit model, the complexity can be easily calculated. In realistic application, complexity indicates that program testing should pay more attention to those functions that are more complex. 196 | 197 | ### Basic block count base on llvm BlockFrequenceInfo 198 | 199 | The LLVM class [BlockFrequencyInfo](https://llvm.org/doxygen/classllvm_1_1BlockFrequencyInfo.html) is a convenient way to get the frequency of a block will appear in all potential control-flow paths. It's reasonable that if a basic block appeared more frequently, mutate the prog that triggers this block has a higher probability to cover more other PCs edge. 200 | 201 | ### Basic block to basic block count base on llvm BranchProbabiltyInfo 202 | 203 | The LLVM class [BranchProbabiltyInfo](https://llvm.org/doxygen/classllvm_1_1BranchProbabilityInfo.html) is another tool that can be used in fuzzing. The class has information about the probability of from a block to another block. If you want the fuzzer to evolve a testcase can cover a specific basic block, it's a good choice that uses BranchProbabilityInfo weighted the PCs. 204 | 205 | ### Weighted function call stack 206 | 207 | The mentioned tools focus on if the functions should be fuzzed is already picked out, how to assign priorities to PCs base on CFG information. Sometimes, you may want to fuzz an approximate range, for example, a serial of functions from a call stack. LLVM class [CallGraph](https://llvm.org/doxygen/classllvm_1_1CallGraph.html) can help build the associate of functions call. You can assign low weight to those functions if they are deep and not so complex. 208 | -------------------------------------------------------------------------------- /syzkaller/design_implementation_intro.md: -------------------------------------------------------------------------------- 1 | # harbian-qa: State-based target directed fuzzer based on syzkaller 2 | 3 | * Author/maintainer: Kaipeng Zeng 4 | * Co-architect: Shawn C[ a.k.a "citypw"] 5 | 6 | ## 1. Summary & Background 7 | 8 | Syzkaller is the state-of-the-art kernel fuzzer. It's effective because of its powerful syscalls descript and resource rules. Particularly, after resource centric was introducted to syzkaller, it can efficiently generate testcases with a complex context. It is the best choice if you want to fuzz your kernel. 9 | 10 | While syzkaller can do targeted fuzz only by constraining syscalls, we can observe that it's no so efficient if you don't want to fuzz the entir kernel, for example, fuzzing a subsystem or several kernel functions. So, We improve syzkaller by introducing coverage filter and kernel state resource, to do targeted fuzz. Coverage filter avoids syzkaller pay too much attention to exploring uninteresting code. Kernel state resource evaluates if a testcase potentially helps to explore target. Both of them help syzkaller to fuzz the targeted code deeply and efficiently. Moreover, some syscalls which are not written for the target, can help to explore targeted code also. But if you don't do coverage filter, target can hardly benefit from them. 11 | 12 | ## 2. Feature of customized syzkaller 13 | 14 | 1. Kernel coverage filter and weighed PCs 15 | 2. Kernel state as a syzkaller resource 16 | 3. Extend syzkaller Web UI 17 | 18 | These [patches](../syz_patch) base on syzkaller-a2cdad9. 19 | 20 | ## 2.1 Support cover filter and weighted PCs 21 | 22 | The original syzkaller can only by constraining enable and disable syscalls to do a targeted fuzzing. Lots of code will be covered although we are not interested. And collect the testcase that trigger such edge will occupy a considerable proportion in the corpus while syzkaller generating and mutating new testcases base on corpus statistic. So, it will slow down the exploring and exploiting of the target. Also, in some cases, you may want to specify a code position gradient to tell fuzzer how to evolve testcases to touch the target position gradually. Or, you just want to fuzz some functions more frequently, maybe because of its complexity or importance. To make syzkaller a more targeted fuzzer, we implement a coverage filter and integrate it into syzkaller. It is not so rare in [userspace fuzzer](http://sharcs-project.eu/m/filer_public/48/8c/488c5fb7-9aad-4c87-ab9c-5ff251ebc73d/vuzzer_ndss17.pdf). And we try to implement it without patching kernel and can be flexibly configured in syzkaller. Even PCs weight can be change dynamically in fuzzing time. 23 | 24 | More design detail and usage can be found [here](cover_filter.md). Except how to implement coverage filter efficiently, we also show you some examples of how to use LLVM analysis information to create weighted PCs table to tell customized syzkaller how to evolve testcases. 25 | 26 | * Coverage filter has been merged by syzkaller, refer to [final section](#6-features-merged-by-syzkaller). 27 | 28 | ## 2.2 Syzkaller resource base on kernel state 29 | 30 | Syzkaller already has resource centric machanism which help to generate more complex state resource for subsequent syscalls. Actually, lots of kernel condition constraints are not determined by the input of a single syscall. Most of the time, solve such condition constraints require a sequence of syscalls and their appropriate inputs. Particularly, after we introduce coverage filter, the state of resource is essential. We couldn't assume that any code outside of our target functions contributes nothing in exploring target functions. If a testcase can create a special resource which help solve the following condition constraint met by other syscalls, we call them kernel state. So, we should collect the testcases which can trigger special states to corpus so that they can be used in extracting resource for generating testcases. 31 | 32 | General, in kernel, to pass over kernel functions efficiently, most of states are designed as a field of structure. For example, structure sock contains skc_state, skc_flags and so on. Base on this, we can instrument kernel where it stores or modifies some field of a structure( GetElementPointer operator in LLVM ir). We collect the information about which field of which structure is changed, the value of that field and the address of that operation. Before fuzz, we static analyze which field is used by your target functions more frequently, especially it's used in solving conditions. And tell fuzzer choose those testcases to extract resource more frequently if states of them may more frequently use by your target functions. Moreover, we also static analyze which value of fields are used in conditions. 33 | 34 | More design detail and usage can be found [here](kstate_resource.md). We will show you how to collect kernel state triggered by testcase and how to prior choose testcase to extract resource. Furthermore, base on some static analysis tool, you can specify which and state which value is more meaningful to fuzzer. We show you a semi-manual way for this situation. 35 | 36 | ## 2.3 Extend syzkaller Web UI 37 | 38 | #### Explicitly display the prior of prog and resource of a testcase 39 | 40 | We attach "signal len", "cover weight", "state len" and "Resource weight" to "/corpus" interface. The original use signal length as prog prior. We recalculate the prog prior base on what it covered. And we priorly choose prog to extract resource base on which state it triggers while original syzkaller is totally random. 41 | 42 | ![Corpus](Corpus.png) 43 | 44 | #### Coverage count of target functions 45 | 46 | No matter if you use coverage filter or not, you can pass a PCs table to patch syzkaller, and access the "/kernfunc" interface to get the information about how much of a function was covered in this fuzzer. 47 | 48 | * Currently, Syzkaller already has a interface `/funccover` cover this feature. We would not maintain such a redundant interface. Refer to [final section](#6-features-merged-by-syzkaller). 49 | 50 | ![KernFunc](KernFunc.png) 51 | 52 | #### Check the coverage filter configuration 53 | 54 | Access "/bitmap" interface to get the colored source code to check if your PCs table is right. 55 | 56 | * Currently, the filtered coverage report was merge by syzkaller, cover this feature. Refer to [final section](#6-features-merged-by-syzkaller). 57 | 58 | ![Bitmap](Bitmap.png) 59 | 60 | #### Display which syscall trigger which kernel state 61 | 62 | We attach kernel states triggered by prog to "/input" interface. You can conveniently trace syscalls behavior. 63 | 64 | ![ProgState](ProgState.png) 65 | 66 | ## 3. Practice and result 67 | 68 | ### A tcp-ipv6 fuzzing example 69 | 70 | We choose member functions of tcpv6_prot and inet6_stream_ops as target functions to fuzz tcp-ipv6. This is inspired by [DDVerify](http://www.cprover.org/ddverify/) which is a symbolic execution tool for kernel driver, it chooses member functions of driver operation as the entry. However, because some of these member functions are only a wrap but not the implement. It contains less basic block information. We use a tool called [extend_func]() to extend the functions list if there is any function with less basic block. We run syzkaller with five modes: 71 | 72 | 1. Syzkaller at a2cdad9. 73 | 2. Patched syzkaller, base on a2cdad9, disable all features we introduce. 74 | 3. Patched syzkaller, base on a2cdad9, enable coverage filter. 75 | 4. Patched syzkaller, base on a2cdad9, enable coverage filter and weighted resource base on kernel state. 76 | 5. Patched syzkaller, enable coverage filter and weighted resource, enable all syscalls in syzkaller. 77 | 78 | We use the same kernel and syscalls( except 5) for every mode. Every fuzzer run 6 times, and 8 hours( fuzzing time) per time. 79 | 80 | And here is some information of the result. 81 | 82 | ![TotalCoverage](TotalCoverage.png) 83 | ![CoverageOfFile](CoverageOfFiles.png) 84 | ![CoverageOfTargetFunctions](CoverageOfTargetFunctions.png) 85 | 86 | | Crash name | a2cdad9 | Patched | KCOV filter | KCOV filter + Weighted resource | all syscalls | 87 | | ---- | ---- | ---- | ---- | ---- | ---- | 88 | | BUG: soft lockip in io_uring_release | 0 | 0 | 0 | 0 | 5 | 89 | | WARNING in tcp_recvmsg | 0 | 0 | 4 | 1 | 0 | 90 | | WARNING in tcp_cleanup_rbuf | 0 | 0| 3 | 2 | 0 | 91 | | INFO: task hung in floppy_revailidate | 0 | 0 | 0 | 0 | 3 | 92 | | WARNING in __alloc_pages_nodemask | 0 | 0 | 0 | 0 | 1 | 93 | | divide error in __tcp_select_window | 0 | 0 | 0 | 0 | 1 | 94 | | WARNING in floppy_interrupt | 0 | 0 | 0 | 0 | 1| 95 | 96 | #### The performance influence of introducing these features 97 | 98 | The second mode fuzzer is for analyzing the affect of our patch. Although we didn't use those features, prog prior is signal length, and all resources prior is 1, but we can actually observe the executor was slown down. It is because the patched syzkaller have to recalculate prog prior also. The total executed has a 15% reduction. From average 332/sec to 281/sec. But, it seems the coverage has another result. The coverage of patched syzkaller is slightly more than the original syzkaller. We still do not know the reason for it. 99 | 100 | #### Coverage filter and weighted resource 101 | 102 | In the target function coverage aspect, we can see there is a great improvement after coverage filtering introduced. Coverage of files also show a better result, noted that there are functions in af_inet.c and af_inet6.c was not used by tcp-ipv6. That means if you only want to fuzz subsystem or driver, it's a good choice to do coverage filter and weighted resource fuzz. You can get a better result in a shorter time. 103 | 104 | #### Coverage filter, weighted resource and enable all syscalls. 105 | This is the most interesting work in this document, while it makes no assumption that fuzzing a kernel subsystem should only use syscalls for this subsystem. But we take another assumption that lots of code except targeted function and state change contribute little in fuzzing target functions. The result shows us some tcp-ipv6 kernel functions can be covered by not-socket-relative syscalls. But, without coverage filtering, fuzzer may pay more attention to explore the potential coverage of these syscalls. And without kernel state collecting, fuzzer may miss it, because it contributes nothing. That means, in kernel subsystem or driver fuzzing, determinate what you want to fuzz, instead of which syscalls can be used to fuzz, could be effective also. 106 | 107 | ## 4. Conclusions 108 | 109 | #### 4.1 Coverage filter and weighted 110 | 111 | The syzkaller does targeted fuzz by constraining enable syscalls. It can efficiently explore the potential coverage of a single syscall, because mutate input to trigger new coverage in the whole call stack is easier than exploring paths base on complex states( Actually, one of the reasons that syzkaller is efficient is its resource mechanism). We can see fully exploring the deep corner trigger by a single syscall input to early, will slow down syzkaller exploring other branches. Because, there are a bunch of short testcases occupy a great proportion in the corpus while syzkaller will generate new testcase base on them. Instead of collecting every code edge, we limit the target into a serial of important functions. We preferentially explore those functions, keep a balance in exploring every potential corner. Our example reveals that explore such a kernel without any emphasis, it's not always efficient. 112 | 113 | #### 4.2 Kernel state resource 114 | 115 | Syzkaller resource mechanism is very important. Unlike most userspace fuzzer, the coverage always triggered by the sequence of syscalls and their input. The return of a syscalls is the input of other syscalls, syscalls may change resource state also. Our work indicated that collect and mutate frequently those resources that have more states changed could help to generate a testcase with a more complex context. Particularly, when a special state is used in target functions for solving condition constraints, that will bring us new coverage. While syzkaller know nothing about kernel state, it can only extract resource from testcases totaly randomly. 116 | 117 | But, build a kernel with instrumentation is a little cumbrous if you change you fuzz target. The whole kernel instrumentation is OK, because we use kstate map to filter those states we don't need. But, it will greatly influence the performance of executor, observably it's impossible to do a bitmap filter like coverage filter. But, at the beginning, we used a [ebpf](kstat_demo) to collect kernel state to do a POC. You can manually write ebpf program to get states you need. It's configurable and more flexible but laborious and unstable and hardly scale up. 118 | 119 | #### 4.3 Enable all syscalls 120 | 121 | Our example shows us another way in fuzzing kernel: choose what to fuzz but not how to fuzz the target. While syzkaller need to pay a lot of effort to write syscalls and choose syscalls to fuzz. That is the only thing that can be controlled by user. If you want to fuzz a part of kernel, you should read the syscalls descript and look for which syscalls will cover this part, then pick them to the enable syscalls. 122 | 123 | After introducing coverage filter and kernel state resource, base on syzkaller powerful syscalls descript, we can enable all syscalls to fuzz a target. Those syscalls can hardly cover target code will rarely or never appear in corpus. The only thing you should do is find out what you want to fuzz. 124 | 125 | But, we still couldn't extricate from writing syscalls script. We try to run syzkaller without any syscalls with "$", the result is terrible. That means syscalls descript determine the potential coverage presently. We are exploring how to evolve syscalls automatically but not write syscalls script. We think it's possible to classify syscalls if any syscall can trigger a specific kernel state. It's one of the things we are interested in fuzzer. 126 | 127 | 128 | ## 5. Acknowledgments 129 | 130 | * [Special thanks to Dmitry Vyukov and all contributors of syzkaller!](https://www.github.com/google/syzkaller) 131 | * [Thanks to LLVM-project!](https://www.github.com/llvm/llvm-project) 132 | 133 | ## 6. Features merged by syzkaller 134 | 135 | 1. Some [discussion](https://groups.google.com/g/syzkaller/c/IgwfGSdca3Q/) in syzkaller mailing list. 136 | 2. [Support coverage filter](https://www.github.com/google/syzkaller/pull/2017). 137 | 3. Some [cleanup and improvement](https://www.github.com/google/syzkaller/pull/2318) for coverage filter from Dmitry. 138 | 4. [Support filter coverage filter report](https://www.github.com/google/syzkaller/pull/2343). 139 | 5. When this article firstly posted, `/funccover` was not supported by syzkaller. Look at this [commit](https://www.github.com/google/syzkaller/commit/06cecac3179071158ad28688dbec0e09095d1a6d), `/funccover` display the overview of the entire kernel functions, and more accurate than our `/kernfunc` interface. 140 | -------------------------------------------------------------------------------- /syzkaller/kstat_demo/README.md: -------------------------------------------------------------------------------- 1 | # Make syzkaller a state-based guided fuzzer 2 | 3 | ## Goal 4 | Make the syzkaller as a kernel-state-awareness fuzzer or state-based guided fuzzer. The fuzzer should collect the progs which cover the same code but with different kernel data state. Currently syzkaller only collect coverage information. I wonder if it's effective that make syzkaller more kernel-state-awareness. I'd finish collecting some socket state as syzkaller feedback currently. Using the coverage signal interface in syzkaller. And I will show you how to combine these features in a specified purpose fuzzing. 5 | 6 | ## Foundation of theory 7 | 8 | ### Why should we collect the state 9 | For example, assume the prog "socket--setsockopt$1--setsockopt$2--sendmsg(EXPECT_FLAG)" is a desired prog, if only coverage is collected, a pseudocode can be write down as: 10 | ``` 11 | // Cov(prog) is the coverage of a prog 12 | // We assume that only both setsockopt$1 and setsockopt$2 was used 13 | // before sendmsg, new coverage will appear in sendmsg(EXPECT_FLAG). 14 | // EXPECT_FLAG: The flag restrist sendmsg to a new branch 15 | Cov(socket+setsockopt$1)+Cov(socket+setsockopt$2)+Cov(socket+sendmsg(NOEXPECT_FLAG)) 16 | = Cov(socket+setsockopt1+setsockopt2) 17 | = Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(NOEXPECT_FLAG)) 18 | != Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(EXPECT_FLAG)) 19 | ``` 20 | The prog can't be put into corpus until a new coverage signal was detected. Without any gradient between subprog and desired prog. After adding state-based feedback, 21 | ``` 22 | // some State(prog) may be miss by syzkaller 23 | // Both of these combinations of syscall may help the coverage discovering 24 | State_or_Cov(socket+setsockopt$1)+State(socket+setsockopt$2) 25 | != State_or_Cov(socket+setsockopt1+setsockopt$2) 26 | != State_or_Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(ANY)) 27 | != State_or_Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(EXPECT_FLAG)) 28 | ``` 29 | Some prog with new state can be collected to corpus and used to generate and mutate. All of them is the gradient that help syzkaller to generate the desired prog. In recent syzkaller, a "resource centric" was introduce and it's quite similar to "state" what we need, although syzkaller use the whole corpus as "resource". The difference is, we mark a prog as resource when it can only build a special kernel state without a new coverage. 30 | 31 | ### Types of branch 32 | From another perspective, coverage is the same as how many branchs the fuzzer has solved. In practice, degree of diffculty in covering different type of branch are different. Kernel state can be restraint of branch. In a kernel function, there are some type of branch: 33 | 1. A condition directly determined by kernel function parameters. Without any impact from other syscalls. In other words, it can be easily covered by mutating a single syscall. 34 | In this [example](https://elixir.bootlin.com/linux/v4.20/source/net/ipv4/tcp.c#L1188), msg_flags is a branch-relative parameters which specified by the input of syscall 'sendmsg'. 35 | 36 | 2. A condition determined by kernel function parameters' historical state. 37 | In first [example](https://elixir.bootlin.com/linux/v4.20/source/net/ipv4/tcp.c#L1189), sk_state is a historical state which can be changed after calling listen/connect... In second [one](https://elixir.bootlin.com/linux/v4.20/source/net/ipv4/tcp.c#L1231), repair_queue is changed after calling setsockopt. 38 | 39 | 3. A condition determined by a local variable that can be changed in the kernel function. 40 | In this [example](https://elixir.bootlin.com/linux/v4.20/source/net/ipv4/tcp.c#L1346), local variable merge is changed by this [line](https://elixir.bootlin.com/linux/v4.20/source/net/ipv4/tcp.c#L1330). 41 | 42 | #### Which is not easy to be covered 43 | 44 | First one can be easily covered by syzkaller if powerful syscalls scriptions have been written. Collect function's input as feedbacl helps little coverage. Even though there are several paramters. 45 | 46 | The second one, need time to explore, especial nested condition. For example, in tcp-ipv6 testing, we should not assume that setsockopt/getsockopt/close/shutdown... have no impact on calling sendmsg. Enable too much syscalls will waste much time on exploring their coverage( Original syzkaller do this). Actually, it has no impact on sendmsg unless it trigger a special state for sendmsg( A new State(prog) was discovered). Collecting useful state before calling sendmsg, without collecting any coverage signal of other kernel functions could be more effective. It's actually what i done in state-base fuzzer. And it get a great improvement in some special purpos fuzzer. 47 | 48 | The third one need time to explore too. But it can't be solved by using ebpf feedback. ebpf know nothing about the internal of kernel function. I think fault-injection is a way that can help it. Kernel have a general framework to do function-ret-fault-injection. But it can't attach to inline function. ebpf use this framework also. It has much work to do with supporting a specified fault-injection in syzkaller. 49 | 50 | ### Result 51 | It got a great improvement in the second type of branch. [Here](tcp-ipv6/test.md) is a example for tcp-ipv6. It can easily cover some branch with restraint like "tp->repair", "tp->repair_queue == TCP_*_QUEUE", "sk->sk_state == TCP_CLOSE". All of these branch need more time to explore in original syzkaller. 52 | 53 | ## Usage 54 | ### Patch syzkaller 55 | First, you need to patch original syzkaller. 56 | ``` 57 | git checkout a34e2c33 58 | git apply *.patch 59 | ``` 60 | ### Gobpf as syzkaller feedback 61 | To build a ebpf as syzkaller feedback, run: 62 | ``` 63 | go build pipe_monitor.go 64 | ``` 65 | 66 | ### Run state-base syzkaller 67 | Just run syz-manager as original syzkaller. 68 | 69 | ### What can you customize 70 | 71 | #### Code and features 72 | 1. Add ebpf feedback and display in webui: run a ebpf monitor before execute_one, read pipe memory to get kernel socket state as syzkaller feedback. 73 | 2. Add coverage filter: filter coverage by address. I use syz-func2addr to get a function address from ELF. 74 | 3. pipe_monitor.go: load a ebpf text, monitor the socket state, feedback to syzkaller by using pipe memory. But it can't trace the historical state of a specific socket. 75 | 4. Add ret ebpfsig as resource: only prog with a special kernel state can be resource. 76 | 77 | * These patch base on upstream syzkaller: a34e2c33 78 | More detail refer to the code comments. 79 | 80 | #### ebpf, kernel data type 81 | 82 | ebpf text in ebpf/ebpftext.go is the only one file can be modified as your will. You can get any data you want by writing ebpf by yourself. Notice: 83 | 1. A hook function before kernel function should be named as kprobe_KFUNC_NAME and append to the list ProbePoint. 84 | 2. Similarly, a kernel function return hook should be named as kretprobe_KFUNC_NAME and append to the RetProbePoint. 85 | 3. The state send to syzkaller by using ebpf function "bpf_trace_printk". Currently, I use a uint64_t state. If you need state with other type, there are a lot work in syzkaller should be done to coordinary with ebpf's output. 86 | 87 | * kernel socket state: parse/parse.go is only for making the socket state readable. Modify it refer to you ebpf text as your will. Only for execprog. Now it's discarded. 88 | 89 | ## Some example 90 | pipe_monitor can run well with patched syzkaller. Without any different compare to original syzkaller's using. But you need write your ebpf to collect state you want. 91 | 92 | We had already used these featrue to do some fuzz: 93 | ### tcp-ipv6 subsystem fuzzer 94 | According to [this](#Which is not easy to be covered), to fuzz the tcp-ipv6 subsystem, I use the follow feature: 95 | 1. Use ebpf to collect the expected input of kernel function. 96 | 2. Kernel function coverage filtering. Only collect the coverage of _ops function 97 | 3. Filtering the all kernel function coverage except subsystem you need to fuzz. 98 | 99 | ### Arbitrary kernel function fuzzer 100 | 1. Use ebpf to collect socket state before return from syscalls. Mark this type of prog as resource. 101 | 2. Use ebpf to collect the expected input of a kernel function. 102 | 3. Filtering the all kernel function coverage except the one you need to fuzz. 103 | [Here](tcp-ipv6/test.md) are some comparisons of performance of different feedback fuzzer. 104 | 105 | ### Multi-policy fuzzer 106 | We also try to combine this different policy fuzzer by using syz-hub. [Here](../multi_policy/README.md) is a documentation. -------------------------------------------------------------------------------- /syzkaller/kstat_demo/ebpf/ebpf.go: -------------------------------------------------------------------------------- 1 | package ebpf 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/iovisor/gobpf/bcc" 8 | "github.com/iovisor/gobpf/pkg/tracepipe" 9 | ) 10 | 11 | /* 12 | * As an example, we monitor the state, type, flags in socket structure. 13 | * Use ebpf map is a better way to monitor kernel data state. 14 | * So, we print the state in every hook and handle them after as syzkaller 15 | * read coverage signal 16 | */ 17 | 18 | func EbpfInit() string { 19 | ebpf := EbpfSingle 20 | return ebpf 21 | } 22 | 23 | func Attachs(m *bcc.Module) { 24 | for _, funcname := range ProbePoint { 25 | attachProbe(funcname, m) 26 | } 27 | for _, funcname := range RetProbePoint { 28 | attachRetProbe(funcname, m) 29 | } 30 | } 31 | 32 | func ReadLine(tp *tracepipe.TracePipe, pid uint64) string { 33 | return readline(tp, pid) 34 | } 35 | 36 | /* Add kprobe__ at the beginning, your hookfunc should be kprobe__KERN_FUNCNAME */ 37 | func attachProbe(kprobepoint string, m *bcc.Module) { 38 | funcName := "kprobe__" + kprobepoint 39 | tmpKprobe, err := m.LoadKprobe(funcName) 40 | if err != nil { 41 | fmt.Fprintf(os.Stderr, "Failed to load %s: %s\n", kprobepoint, err); 42 | os.Exit(1) 43 | } 44 | 45 | err = m.AttachKprobe(kprobepoint, tmpKprobe) 46 | if err != nil { 47 | fmt.Fprintf(os.Stderr, "Failed to attach %s: %s\n", kprobepoint, err); 48 | os.Exit(1) 49 | } 50 | } 51 | 52 | func attachRetProbe(kretprobepoint string, m *bcc.Module) { 53 | funcName := "kretprobe__" + kretprobepoint 54 | tmpKretprobe, err := m.LoadKprobe(funcName) 55 | if err != nil { 56 | fmt.Fprintf(os.Stderr, "Failed to load %s: %s\n", kretprobepoint, err); 57 | os.Exit(1) 58 | } 59 | 60 | err = m.AttachKretprobe(kretprobepoint, tmpKretprobe) 61 | if err != nil { 62 | fmt.Fprintf(os.Stderr, "Failed to attach %s: %s\n", kretprobepoint, err); 63 | os.Exit(1) 64 | } 65 | } 66 | 67 | /* read a single line from ebpf, strip useless information */ 68 | func readline(tp *tracepipe.TracePipe, pid uint64) string { 69 | ret := "" 70 | te, err := tp.ReadLine() 71 | if err != nil { 72 | fmt.Fprintf(os.Stderr, "Failed to ReadLine\n", err); 73 | return ret 74 | } 75 | if (te.Message) != "" { 76 | ret = te.Message 77 | } 78 | return ret 79 | } 80 | -------------------------------------------------------------------------------- /syzkaller/kstat_demo/ebpf/ebpftext.go: -------------------------------------------------------------------------------- 1 | package ebpf 2 | 3 | /* High-32-bit: |-----|-sk_state-|-flags-|-sk_shutdown--|--state--| 4 | * |-----|---4bit---|--4bit-|-----2bit-----|--4bit---| 5 | * Low-32-bit: |-func-id-|---branch-related-argument---|--weight-| 6 | * |--4-bit--|-------n-bit-----------------|--4bit---| 7 | * The highest n-bit was empty. You can fill it as your will. 8 | * Collect data for a specified function will generate too much useless 9 | * signals. Hight-32-bit is only for general purpos. 10 | * In a monitored function, do not care too much about arguments 11 | * passed to called function. Just write another probe for it. 12 | */ 13 | 14 | const EbpfSingle string =` 15 | #include 16 | #include 17 | #define KBUILD_MODNAME "foo" 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #define SOCK_STATE_OPT 0x1 27 | #define SK_SHUTDOWN_OPT 0x2 28 | #define SOCK_FLAGS_OPT 0x4 29 | #define SK_STATE_OPT 0x8 30 | #define SK_FLAGS_OPT 0x10 31 | #define SK_ERR_OPT 0x20 32 | 33 | #define STATE_MASK 0xe000000000000000 34 | #define RETSTATE_MASK 0xf000000000000000 35 | 36 | static uint64_t set_func_id(uint32_t id) 37 | { 38 | uint64_t state = 0; 39 | state |= ((id&0xf) << 28); 40 | return state &= 0xf0000000; 41 | } 42 | 43 | static uint64_t set_state(struct sock *sk, int opt) 44 | { 45 | uint64_t state = 0, tmp; 46 | u8 bitfield; 47 | 48 | if (opt&SOCK_STATE_OPT) { 49 | tmp = sk->sk_socket->state&0xf; 50 | state |= (tmp << 32); 51 | } 52 | // SHUTDOWN_MASK 53 | if (opt&SK_SHUTDOWN_OPT) { 54 | tmp = sk->sk_shutdown&0x3; 55 | state |= (tmp << 36); 56 | } 57 | if (opt&SOCK_FLAGS_OPT) { 58 | tmp = sk->sk_socket->flags&0xf; 59 | state |= (tmp << 40); 60 | } 61 | //TCP_STATE_MASK 62 | if (opt&SK_STATE_OPT) { 63 | tmp = sk->sk_state&0xf; 64 | state |= (tmp << 44); 65 | } 66 | // SOL_SOCKET 67 | if (opt&SK_FLAGS_OPT) { 68 | tmp = sk->sk_flags&0xff; 69 | state |= (tmp << 48); 70 | } 71 | if (opt&SK_ERR_OPT) { 72 | if (sk->sk_err > 0) { 73 | tmp = 1; 74 | state |= (tmp << 49); 75 | } 76 | } 77 | return state; 78 | } 79 | 80 | static uint64_t set_mask(uint64_t state) 81 | { 82 | uint64_t tmp = STATE_MASK; 83 | return state|tmp; 84 | } 85 | 86 | // Don't case about which function give the state 87 | static uint64_t getretstate(struct sock *sk, int id) 88 | { 89 | uint64_t state = 0, tmp = 0; 90 | u8 bitfield; 91 | 92 | state |= set_state(sk, SK_SHUTDOWN_OPT|SK_STATE_OPT|SOCK_FLAGS_OPT|SK_STATE_OPT|SK_FLAGS_OPT|SK_ERR_OPT); 93 | 94 | // nonagle, repair 95 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1); 96 | if (bitfield&0xf0) { 97 | tmp = bitfield&0xf0; 98 | state |= ((tmp>>4) << 4); 99 | } 100 | if (bitfield&0x2) 101 | state |= 0x1 << 8; 102 | 103 | // defer_connect 104 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&inet_sk(sk)->rcv_tos)-1); 105 | if (bitfield&0xf0) { 106 | tmp = bitfield&0xf0; 107 | state = state | ((tmp>>4) << 9); 108 | } 109 | 110 | // ipv6only 111 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&sk->__sk_common.skc_bound_dev_if)-1); 112 | if (bitfield&0x4) { 113 | state = state | (1 << 13); 114 | } 115 | 116 | // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR 117 | tmp = tcp_sk(sk)->repair_queue & 0x3; 118 | state |= (tmp << 14); 119 | 120 | if(sk->sk_bound_dev_if) 121 | state |= (0x1 << 18); 122 | if(sk->sk_route_caps&NETIF_F_SG) 123 | state |= (0x1 << 20); 124 | if(tcp_sk(sk)->fastopen_rsk != NULL) 125 | state |= (0x1 << 21); 126 | if(tcp_sk(sk)->urg_data) 127 | state |= (0x1 << 22); 128 | if(tcp_sk(sk)->urg_seq) 129 | state |= (0x1 << 23); 130 | if (tcp_sk(sk)->saved_syn) 131 | state |= (0x1 << 24); 132 | if(tcp_sk(sk)->urg_data) 133 | state |= (0x1 << 25); 134 | if(tcp_sk(sk)->urg_seq) 135 | state |= (0x1 << 26); 136 | if(tcp_sk(sk)->linger2) 137 | state |= (0x1 << 27); 138 | if(tcp_sk(sk)->urg_seq == tcp_sk(sk)->copied_seq) 139 | state |= (0x1 << 28); 140 | if(sk->sk_lingertime) 141 | state |= (0x1 << 29); 142 | if(sk->sk_frag.page) 143 | state |= (0x1 << 30); 144 | 145 | tmp = RETSTATE_MASK; 146 | return state|tmp; 147 | } 148 | 149 | int kprobe__tcp_v6_init_sock(struct pt_regs *ctx, struct sock *sk) 150 | { 151 | uint64_t state = set_func_id(0); 152 | 153 | state = set_mask(state); 154 | bpf_trace_printk("%llx\n", state); 155 | return 0; 156 | } 157 | 158 | int kretprobe__tcp_v6_init_sock(struct pt_regs *ctx, struct sock *sk) 159 | { 160 | bpf_trace_printk("%llx\n", getretstate(sk,0)); 161 | return 0; 162 | } 163 | 164 | int kprobe__tcp_v6_connect(struct pt_regs *ctx, struct sock *sk) 165 | { 166 | uint64_t state = set_func_id(0x1); 167 | 168 | state = set_mask(state); 169 | bpf_trace_printk("%llx\n", state); 170 | return 0; 171 | } 172 | 173 | int kretprobe__tcp_v6_connect(struct pt_regs *ctx, struct sock *sk) 174 | { 175 | bpf_trace_printk("%llx\n", getretstate(sk, 1)); 176 | return 0; 177 | } 178 | 179 | int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk, struct msghdr *msg, size_t size) 180 | { 181 | uint64_t state = set_func_id(0x2), tmp = 0; 182 | u8 bitfield; 183 | 184 | tmp = sk->sk_state&0xf; 185 | if(tmp == TCP_ESTABLISHED || tmp == TCP_CLOSE || tmp == TCP_CLOSE_WAIT || tmp == TCP_SYN_SENT) 186 | state |= ((tmp&0xf) << 32); 187 | 188 | tmp = sk->sk_shutdown&0x3; 189 | if(tmp == SEND_SHUTDOWN) 190 | state |= ((tmp&0x3) << 36); 191 | 192 | tmp = sk->sk_flags&0xff; 193 | if(tmp == SOCK_ZEROCOPY) 194 | state |= ((tmp&0xff) << 40); 195 | 196 | // nonagle, repair 197 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1); 198 | if (bitfield&0xf0) { 199 | tmp = bitfield&0xf0; 200 | state |= ((tmp>>4) << 48); 201 | } 202 | tmp = 0x1; 203 | if (bitfield&0x2) 204 | state |= tmp << 52; 205 | 206 | // defer_connect 207 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&inet_sk(sk)->rcv_tos)-1); 208 | if (bitfield&0xf0) { 209 | tmp = bitfield&0xf0; 210 | state = state | ((tmp>>4) << 53); 211 | } 212 | 213 | // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR 214 | tmp = tcp_sk(sk)->repair_queue & 0x3; 215 | state |= (tmp << 57); 216 | 217 | 218 | // tp->fastopen_req 219 | if (tcp_sk(sk)->fastopen_req) 220 | state |= (0x1 << 16); 221 | if (tcp_sk(sk)->fastopen_rsk != NULL) 222 | state |= (0x1 << 17); 223 | 224 | // From syscalls argument 225 | // msg->msg_controllen 226 | if (msg->msg_controllen) 227 | state |= (0x1 << 20); 228 | // msg_data_left 229 | if (msg->msg_iter.count) 230 | state |= (0x1 << 27); 231 | 232 | state = set_mask(state); 233 | bpf_trace_printk("%llx\n", state); 234 | return 0; 235 | } 236 | 237 | int kretprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk) 238 | { 239 | bpf_trace_printk("%llx\n", getretstate(sk, 2)); 240 | return 0; 241 | } 242 | 243 | int kprobe__tcp_recvmsg(struct pt_regs *ctx, struct sock *sk, struct msghdr *msg, int flags) 244 | { 245 | uint64_t state = set_func_id(0x3), tmp = 0; 246 | u8 bitfield; 247 | 248 | tmp = sk->sk_state&0xf; 249 | //TCP_ESTABLISHED || tmp == TCP_CLOSE || tmp == TCP_CLOSE_WAIT || tmp == TCP_SYN_SENT) 250 | if(tmp) 251 | state |= ((tmp&0xf) << 32); 252 | 253 | tmp = sk->sk_shutdown&0x3; 254 | if(tmp == RCV_SHUTDOWN) 255 | state |= ((tmp&0x3) << 36); 256 | 257 | // SOCK_URGINLINE SOCK_DONE 258 | tmp = sk->sk_flags&0xff; 259 | if(tmp == SOCK_URGINLINE || tmp == SOCK_DONE) 260 | state |= ((tmp&0xff) << 42); 261 | 262 | // nonagle, repair 263 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1); 264 | if (bitfield&0xf0) { 265 | tmp = bitfield&0xf0; 266 | state |= ((tmp>>4) << 48); 267 | } 268 | tmp = 0x1; 269 | if (bitfield&0x2) 270 | state |= tmp << 52; 271 | 272 | // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR 273 | tmp = tcp_sk(sk)->repair_queue & 0x3; 274 | state |= (tmp << 57); 275 | 276 | // urg_data urg_seq 277 | if(tcp_sk(sk)->urg_data) 278 | state |= (0x1 << 1); 279 | if(tcp_sk(sk)->urg_seq == tcp_sk(sk)->copied_seq) 280 | state |= (0x1 << 2); 281 | if(sk->sk_err) 282 | state |= (0x1 << 3); 283 | // msg->msg_flags 284 | // MSG_PEEK MSG_OOB MSG_WAITALL MSG_TRUNC 285 | if (msg->msg_flags&MSG_PEEK) 286 | state |= (0x1 << 4); 287 | if (msg->msg_flags&MSG_OOB) 288 | state |= (0x1 << 5); 289 | if (msg->msg_flags&MSG_WAITALL) 290 | state |= (0x1 << 6); 291 | // msg->msg_flags 292 | if (msg->msg_flags&MSG_TRUNC) 293 | state |= (0x1 << 7); 294 | if (msg->msg_flags&MSG_ERRQUEUE) 295 | state |= (0x1 << 8); 296 | if(sk->sk_receive_queue.next) 297 | state |= (0x1 << 9); 298 | 299 | state = set_mask(state); 300 | bpf_trace_printk("%llx\n", state); 301 | return 0; 302 | } 303 | 304 | int kretprobe__tcp_recvmsg(struct pt_regs *ctx, struct sock *sk) 305 | { 306 | bpf_trace_printk("%llx\n", getretstate(sk, 3)); 307 | return 0; 308 | } 309 | 310 | int kprobe__tcp_close(struct pt_regs *ctx, struct sock *sk) 311 | { 312 | uint64_t state = set_func_id(0x4), tmp = 0; 313 | u8 bitfield; 314 | 315 | tmp = sk->sk_state&0xf; 316 | if(tmp == TCP_LISTEN || tmp == TCP_FIN_WAIT2 || tmp == TCP_CLOSE) 317 | state |= ((tmp&0xf) << 32); 318 | 319 | tmp = 1; 320 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1); 321 | if (bitfield&0x2) 322 | state |= (tmp << 8); 323 | 324 | tmp = 1; 325 | if (tcp_sk(sk)->linger2) 326 | state |= (tmp << 12); 327 | 328 | tmp = sk->sk_flags&0xff; 329 | if(tmp == SOCK_LINGER) 330 | state |= ((tmp&0xff) << 18); 331 | 332 | tmp = 1; 333 | if(sk->sk_lingertime) { 334 | state |= (tmp << 24); 335 | } 336 | 337 | state = set_mask(state); 338 | bpf_trace_printk("%llx\n", state); 339 | return 0; 340 | } 341 | 342 | int kretprobe__tcp_close(struct pt_regs *ctx, struct sock *sk) 343 | { 344 | bpf_trace_printk("%llx\n", getretstate(sk, 4)); 345 | return 0; 346 | } 347 | 348 | int kprobe__tcp_shutdown(struct pt_regs *ctx, struct sock *sk, int how) 349 | { 350 | uint64_t state = set_func_id(0x5), tmp = 0; 351 | 352 | tmp = how; 353 | state |= (tmp&0xff << 4); 354 | 355 | if ((1 << sk->sk_state)&(TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) 356 | state |= (0x1 << 12); 357 | 358 | state = set_mask(state); 359 | bpf_trace_printk("%llx\n", state); 360 | return 0; 361 | } 362 | 363 | int kretprobe__tcp_shutdown(struct pt_regs *ctx, struct sock *sk) 364 | { 365 | bpf_trace_printk("%llx\n", getretstate(sk, 5)); 366 | return 0; 367 | } 368 | 369 | int kprobe__tcp_setsockopt(struct pt_regs *ctx, struct sock *sk, int level, int optname) 370 | { 371 | uint64_t state = set_func_id(0x6), tmp = 0; 372 | u8 bitfield; 373 | struct tcp_sock *tp = tcp_sk(sk); 374 | 375 | tmp = sk->sk_state&0xf; 376 | if(tmp == TCP_ESTABLISHED || tmp == TCP_CLOSE || tmp == TCP_CLOSE_WAIT || tmp == TCP_LISTEN) 377 | state |= ((tmp&0xf) << 32); 378 | 379 | // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR 380 | tmp = tcp_sk(sk)->repair_queue & 0x3; 381 | state |= (tmp << 16); 382 | 383 | // tp->repair, tp->nonagle 384 | tmp = 1; 385 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1); 386 | if (bitfield&0x2) 387 | state = state | (tmp << 20); 388 | if (bitfield&0xf0) { 389 | tmp = bitfield; 390 | state |= ((tmp&0xf0 >> 4) << 24); 391 | } 392 | 393 | tmp = sk->sk_flags&0xff; 394 | if(tmp == SOCK_KEEPOPEN) 395 | state |= ((tmp&0xff) << 4); 396 | 397 | state = set_mask(state); 398 | bpf_trace_printk("%llx\n", state); 399 | return 0; 400 | } 401 | 402 | int kretprobe__tcp_setsockopt(struct pt_regs *ctx, struct sock *sk) 403 | { 404 | bpf_trace_printk("%llx\n", getretstate(sk, 6)); 405 | return 0; 406 | } 407 | 408 | int kprobe__tcp_getsockopt(struct pt_regs *ctx, struct sock *sk, int level, int optname) 409 | { 410 | uint64_t state = set_func_id(0x7), tmp = 0; 411 | u8 bitfield; 412 | struct tcp_sock *tp = tcp_sk(sk); 413 | 414 | tmp = sk->sk_state&0xf; 415 | if(tmp == TCP_CLOSE || tmp == TCP_LISTEN) 416 | state |= ((tmp&0xf) << 32); 417 | 418 | // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR 419 | tmp = tcp_sk(sk)->repair_queue & 0x3; 420 | state |= (tmp << 16); 421 | 422 | tmp = 1; 423 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1); 424 | if (bitfield&0x2) 425 | state |= (tmp << 20); 426 | 427 | tmp = 1; 428 | if (tp->saved_syn) { 429 | state |= (tmp << 24); 430 | } 431 | 432 | state = set_mask(state); 433 | bpf_trace_printk("%llx\n", state); 434 | return 0; 435 | } 436 | 437 | int kretprobe__tcp_getsockopt(struct pt_regs *ctx, struct sock *sk) 438 | { 439 | bpf_trace_printk("%llx\n", getretstate(sk, 7)); 440 | return 0; 441 | } 442 | 443 | int kprobe__inet_accept(struct pt_regs *ctx, struct socket *sock, struct socket* newsock, int flags, bool kern) 444 | { 445 | uint64_t state = set_func_id(0x8); 446 | 447 | if(kern) 448 | state = state | (0x1 << 4); 449 | state = set_mask(state); 450 | bpf_trace_printk("%llx\n", state); 451 | 452 | state = set_func_id(9); 453 | if(kern) 454 | state = state | (0x1 << 4); 455 | 456 | state = set_mask(state); 457 | bpf_trace_printk("%llx\n", state); 458 | return 0; 459 | } 460 | 461 | int kretprobe__inet_accept(struct pt_regs *ctx, struct socket *sock, struct socket* newsock) 462 | { 463 | bpf_trace_printk("%llx\n", getretstate(sock->sk, 8)); 464 | bpf_trace_printk("%llx\n", getretstate(newsock->sk, 9)); 465 | return 0; 466 | } 467 | 468 | int kprobe__inet_listen(struct pt_regs *ctx, struct socket *sock) 469 | { 470 | uint64_t state = set_func_id(0xa), tmp; 471 | 472 | tmp = sock->sk->sk_state&0xf; 473 | if(tmp == TCP_LISTEN || tmp == TCP_CLOSE) 474 | state |= ((tmp&0xf) << 32); 475 | 476 | state = set_mask(state); 477 | bpf_trace_printk("%llx\n", state); 478 | return 0; 479 | } 480 | 481 | int kretprobe__inet_listen(struct pt_regs *ctx, struct socket *sock) 482 | { 483 | bpf_trace_printk("%llx\n", getretstate(sock->sk, 0xa)); 484 | return 0; 485 | } 486 | 487 | int kprobe__tcp_ioctl(struct pt_regs *ctx, struct sock *sk, int cmd) 488 | { 489 | uint64_t state = set_func_id(0xb), tmp, mask; 490 | 491 | tmp = cmd; 492 | mask = SIOCINQ|SIOCATMARK|SIOCOUTQ|SIOCOUTQNSD; 493 | if (tmp==SIOCINQ || tmp==SIOCATMARK || tmp==SIOCOUTQ || tmp==SIOCOUTQNSD) 494 | state |= ((cmd&mask) << 4); 495 | state = set_mask(state); 496 | bpf_trace_printk("%llx\n", state); 497 | return 0; 498 | } 499 | 500 | int kretprobe__tcp_ioctl(struct pt_regs *ctx, struct sock *sk) 501 | { 502 | bpf_trace_printk("%llx\n", getretstate(sk, 0xb)); 503 | return 0; 504 | } 505 | 506 | int kprobe__inet6_bind(struct pt_regs *ctx, struct sock *sk, struct sockaddr *uaddr, bool with_lock) 507 | { 508 | uint64_t state = set_func_id(0xc); 509 | 510 | state = set_mask(state); 511 | bpf_trace_printk("%llx\n", state); 512 | return 0; 513 | } 514 | 515 | int kretprobe__inet6_bind(struct pt_regs *ctx, struct sock *sk) 516 | { 517 | bpf_trace_printk("%llx\n", getretstate(sk, 0xc)); 518 | return 0; 519 | } 520 | 521 | int kprobe__inet6_ioctl(struct pt_regs *ctx, struct sock *sk, int cmd) 522 | { 523 | uint64_t state = set_func_id(0xd), tmp, mask; 524 | 525 | tmp = cmd; 526 | mask = SIOCINQ|SIOCATMARK|SIOCOUTQ|SIOCOUTQNSD; 527 | if (tmp==SIOCINQ || tmp==SIOCATMARK || tmp==SIOCOUTQ || tmp==SIOCOUTQNSD) 528 | state |= ((cmd&(0x541B|0x8905|0x894b|0x5411)) << 4); 529 | state = set_mask(state); 530 | bpf_trace_printk("%llx\n", state); 531 | return 0; 532 | } 533 | 534 | int kretprobe__inet6_ioctl(struct pt_regs *ctx, struct sock *sk) 535 | { 536 | bpf_trace_printk("%llx\n", getretstate(sk, 0xd)); 537 | return 0; 538 | } 539 | 540 | int kprobe__inet6_getname(struct pt_regs *ctx, struct sock *sk, int cmd, int peer) 541 | { 542 | uint64_t state = set_func_id(0xe), tmp; 543 | 544 | tmp = 0x1; 545 | if (peer == 1) 546 | state |= (tmp << 4); 547 | 548 | state = set_mask(state); 549 | bpf_trace_printk("%llx\n", state); 550 | return 0; 551 | } 552 | 553 | int kretprobe__inet6_getname(struct pt_regs *ctx, struct sock *sk) 554 | { 555 | bpf_trace_printk("%llx\n", getretstate(sk, 0xe)); 556 | return 0; 557 | } 558 | 559 | ` 560 | /* Kernel probe/retprobe point */ 561 | var ProbePoint []string = []string{"tcp_v6_init_sock","tcp_v6_connect","tcp_sendmsg","tcp_recvmsg","tcp_close","tcp_shutdown","tcp_setsockopt","tcp_getsockopt","inet_accept","inet_listen", "tcp_ioctl", "inet6_bind", "inet6_getname","inet6_ioctl"} 562 | 563 | var RetProbePoint []string = []string{"tcp_v6_init_sock","tcp_v6_connect","tcp_sendmsg","tcp_recvmsg","tcp_close","tcp_shutdown","tcp_setsockopt","tcp_getsockopt","inet_accept","inet_listen", "tcp_ioctl", "inet6_bind", "inet6_getname","inet6_ioctl"} 564 | -------------------------------------------------------------------------------- /syzkaller/kstat_demo/ebpf_sample/ebpftext_recvmsg.go: -------------------------------------------------------------------------------- 1 | package ebpf 2 | 3 | /* High-32-bit: |-----|-sk_state-|-flags-|-sk_shutdown--|--state--| 4 | * |-----|---4bit---|--4bit-|-----2bit-----|--4bit---| 5 | * Low-32-bit: |-func-id-|---branch-related-argument---|--weight-| 6 | * |--4-bit--|-------n-bit-----------------|--4bit---| 7 | * The highest n-bit was empty. You can fill it as your will. 8 | * Collect data for a specified function will generate too much useless 9 | * signals. Hight-32-bit is only for general purpos. 10 | * In a monitored function, do not care too much about arguments 11 | * passed to called function. Just write another probe for it. 12 | */ 13 | 14 | const EbpfSingle string =` 15 | #include 16 | #include 17 | #define KBUILD_MODNAME "foo" 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #define SOCK_STATE_OPT 0x1 27 | #define SK_SHUTDOWN_OPT 0x2 28 | #define SOCK_FLAGS_OPT 0x4 29 | #define SK_STATE_OPT 0x8 30 | #define SK_FLAGS_OPT 0x10 31 | #define SK_ERR_OPT 0x20 32 | 33 | #define STATE_MASK 0xe000000000000000 34 | #define RETSTATE_MASK 0xf000000000000000 35 | 36 | static uint64_t set_func_id(uint32_t id) 37 | { 38 | uint64_t state = 0; 39 | state |= ((id&0xf) << 28); 40 | return state &= 0xf0000000; 41 | } 42 | 43 | static uint64_t set_state(struct sock *sk, int opt) 44 | { 45 | uint64_t state = 0, tmp; 46 | u8 bitfield; 47 | 48 | if (opt&SOCK_STATE_OPT) { 49 | tmp = sk->sk_socket->state&0xf; 50 | state |= (tmp << 32); 51 | } 52 | // SHUTDOWN_MASK 53 | if (opt&SK_SHUTDOWN_OPT) { 54 | tmp = sk->sk_shutdown&0x3; 55 | state |= (tmp << 36); 56 | } 57 | if (opt&SOCK_FLAGS_OPT) { 58 | tmp = sk->sk_socket->flags&0xf; 59 | state |= (tmp << 40); 60 | } 61 | //TCP_STATE_MASK 62 | if (opt&SK_STATE_OPT) { 63 | tmp = sk->sk_state&0xf; 64 | state |= (tmp << 44); 65 | } 66 | // SOL_SOCKET 67 | if (opt&SK_FLAGS_OPT) { 68 | tmp = sk->sk_flags&0xff; 69 | state |= (tmp << 48); 70 | } 71 | if (opt&SK_ERR_OPT) { 72 | if (sk->sk_err > 0) { 73 | tmp = 1; 74 | state |= (tmp << 49); 75 | } 76 | } 77 | return state; 78 | } 79 | 80 | static uint64_t set_mask(uint64_t state) 81 | { 82 | uint64_t tmp = STATE_MASK; 83 | return state|tmp; 84 | } 85 | 86 | // Don't case about which function give the state 87 | static uint64_t getretstate(struct sock *sk, int id) 88 | { 89 | uint64_t state = 0, tmp = 0; 90 | u8 bitfield; 91 | 92 | state |= set_state(sk, SOCK_STATE_OPT|SK_SHUTDOWN_OPT|SOCK_FLAGS_OPT|SK_STATE_OPT|SK_FLAGS_OPT|SK_ERR_OPT); 93 | // nonagle, repair 94 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1); 95 | if (bitfield&0xf0) { 96 | tmp = bitfield&0xf0; 97 | state |= ((tmp>>4) << 48); 98 | } 99 | tmp = 0x1; 100 | if (bitfield&0x2) 101 | state |= tmp << 52; 102 | 103 | // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR 104 | tmp = tcp_sk(sk)->repair_queue & 0x3; 105 | state |= (tmp << 57); 106 | 107 | 108 | // urg_data urg_seq 109 | if(tcp_sk(sk)->urg_data) 110 | state |= (0x1 << 1); 111 | if(tcp_sk(sk)->urg_seq == tcp_sk(sk)->copied_seq) 112 | state |= (0x1 << 2); 113 | if(sk->sk_receive_queue.next) 114 | state |= (0x1 << 3); 115 | 116 | tmp = RETSTATE_MASK; 117 | return state|tmp; 118 | } 119 | 120 | int kretprobe__tcp_v6_init_sock(struct pt_regs *ctx, struct sock *sk) 121 | { 122 | bpf_trace_printk("%llx\n", getretstate(sk,0)); 123 | return 0; 124 | } 125 | 126 | int kretprobe__tcp_v6_connect(struct pt_regs *ctx, struct sock *sk) 127 | { 128 | bpf_trace_printk("%llx\n", getretstate(sk, 1)); 129 | return 0; 130 | } 131 | 132 | int kprobe__tcp_recvmsg(struct pt_regs *ctx, struct sock *sk, struct msghdr *msg) 133 | { 134 | uint64_t state = set_func_id(0x3), tmp = 0; 135 | u8 bitfield; 136 | 137 | tmp = sk->sk_state&0xf; 138 | //TCP_ESTABLISHED || tmp == TCP_CLOSE || tmp == TCP_CLOSE_WAIT || tmp == TCP_SYN_SENT) 139 | if(tmp) 140 | state |= ((tmp&0xf) << 32); 141 | 142 | tmp = sk->sk_shutdown&0x3; 143 | if(tmp == RCV_SHUTDOWN) 144 | state |= ((tmp&0x3) << 36); 145 | 146 | // SOCK_URGINLINE SOCK_DONE 147 | tmp = sk->sk_flags&0xff; 148 | if(tmp == SOCK_URGINLINE || tmp == SOCK_DONE) 149 | state |= ((tmp&0xff) << 42); 150 | 151 | // nonagle, repair 152 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1); 153 | if (bitfield&0xf0) { 154 | tmp = bitfield&0xf0; 155 | state |= ((tmp>>4) << 48); 156 | } 157 | tmp = 0x1; 158 | if (bitfield&0x2) 159 | state |= tmp << 52; 160 | 161 | // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR 162 | tmp = tcp_sk(sk)->repair_queue & 0x3; 163 | state |= (tmp << 57); 164 | 165 | // urg_data urg_seq 166 | if(tcp_sk(sk)->urg_data) 167 | state |= (0x1 << 1); 168 | if(tcp_sk(sk)->urg_seq == tcp_sk(sk)->copied_seq) 169 | state |= (0x1 << 2); 170 | if(sk->sk_err) 171 | state |= (0x1 << 3); 172 | // msg->msg_flags 173 | // MSG_PEEK MSG_OOB MSG_WAITALL MSG_TRUNC 174 | if (msg->msg_flags&MSG_PEEK) 175 | state |= (0x1 << 4); 176 | if (msg->msg_flags&MSG_OOB) 177 | state |= (0x1 << 5); 178 | if (msg->msg_flags&MSG_WAITALL) 179 | state |= (0x1 << 6); 180 | // msg->msg_flags 181 | if (msg->msg_flags&MSG_TRUNC) 182 | state |= (0x1 << 7); 183 | if (msg->msg_flags&MSG_ERRQUEUE) 184 | state |= (0x1 << 8); 185 | if(sk->sk_receive_queue.next) 186 | state |= (0x1 << 9); 187 | 188 | state = set_mask(state); 189 | bpf_trace_printk("%llx\n", state); 190 | return 0; 191 | } 192 | 193 | int kretprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk) 194 | { 195 | bpf_trace_printk("%llx\n", getretstate(sk, 2)); 196 | return 0; 197 | } 198 | 199 | int kretprobe__tcp_recvmsg(struct pt_regs *ctx, struct sock *sk) 200 | { 201 | bpf_trace_printk("%llx\n", getretstate(sk, 3)); 202 | return 0; 203 | } 204 | 205 | int kretprobe__tcp_close(struct pt_regs *ctx, struct sock *sk) 206 | { 207 | bpf_trace_printk("%llx\n", getretstate(sk, 4)); 208 | return 0; 209 | } 210 | 211 | int kretprobe__tcp_shutdown(struct pt_regs *ctx, struct sock *sk) 212 | { 213 | bpf_trace_printk("%llx\n", getretstate(sk, 5)); 214 | return 0; 215 | } 216 | 217 | int kretprobe__tcp_setsockopt(struct pt_regs *ctx, struct sock *sk) 218 | { 219 | bpf_trace_printk("%llx\n", getretstate(sk, 6)); 220 | return 0; 221 | } 222 | 223 | int kretprobe__tcp_getsockopt(struct pt_regs *ctx, struct sock *sk) 224 | { 225 | bpf_trace_printk("%llx\n", getretstate(sk, 7)); 226 | return 0; 227 | } 228 | 229 | int kretprobe__inet_accept(struct pt_regs *ctx, struct socket *sock, struct socket* newsock) 230 | { 231 | bpf_trace_printk("%llx\n", getretstate(sock->sk, 8)); 232 | bpf_trace_printk("%llx\n", getretstate(newsock->sk, 9)); 233 | return 0; 234 | } 235 | 236 | int kretprobe__inet_listen(struct pt_regs *ctx, struct socket *sock) 237 | { 238 | bpf_trace_printk("%llx\n", getretstate(sock->sk, 0xa)); 239 | return 0; 240 | } 241 | 242 | int kretprobe__tcp_ioctl(struct pt_regs *ctx, struct sock *sk) 243 | { 244 | bpf_trace_printk("%llx\n", getretstate(sk, 0xb)); 245 | return 0; 246 | } 247 | 248 | int kretprobe__inet6_bind(struct pt_regs *ctx, struct sock *sk) 249 | { 250 | bpf_trace_printk("%llx\n", getretstate(sk, 0xc)); 251 | return 0; 252 | } 253 | 254 | int kretprobe__inet6_ioctl(struct pt_regs *ctx, struct sock *sk) 255 | { 256 | bpf_trace_printk("%llx\n", getretstate(sk, 0xd)); 257 | return 0; 258 | } 259 | 260 | int kretprobe__inet6_getname(struct pt_regs *ctx, struct sock *sk) 261 | { 262 | bpf_trace_printk("%llx\n", getretstate(sk, 0xe)); 263 | return 0; 264 | } 265 | 266 | ` 267 | /* Kernel probe/retprobe point */ 268 | var ProbePoint []string = []string{"tcp_recvmsg"} 269 | 270 | var RetProbePoint []string = []string{"tcp_v6_init_sock","tcp_v6_connect","tcp_sendmsg","tcp_recvmsg","tcp_close","tcp_shutdown","tcp_setsockopt","tcp_getsockopt","inet_accept","inet_listen", "tcp_ioctl", "inet6_bind", "inet6_getname","inet6_ioctl"} 271 | -------------------------------------------------------------------------------- /syzkaller/kstat_demo/ebpf_sample/ebpftext_sendmsg.go: -------------------------------------------------------------------------------- 1 | package ebpf 2 | 3 | /* High-32-bit: |-----|-sk_state-|-flags-|-sk_shutdown--|--state--| 4 | * |-----|---4bit---|--4bit-|-----2bit-----|--4bit---| 5 | * Low-32-bit: |-func-id-|---branch-related-argument---|--weight-| 6 | * |--4-bit--|-------n-bit-----------------|--4bit---| 7 | * The highest n-bit was empty. You can fill it as your will. 8 | * Collect data for a specified function will generate too much useless 9 | * signals. Hight-32-bit is only for general purpos. 10 | * In a monitored function, do not care too much about arguments 11 | * passed to called function. Just write another probe for it. 12 | */ 13 | 14 | const EbpfSingle string =` 15 | #include 16 | #include 17 | #define KBUILD_MODNAME "foo" 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #define SOCK_STATE_OPT 0x1 27 | #define SK_SHUTDOWN_OPT 0x2 28 | #define SOCK_FLAGS_OPT 0x4 29 | #define SK_STATE_OPT 0x8 30 | #define SK_FLAGS_OPT 0x10 31 | #define SK_ERR_OPT 0x20 32 | 33 | #define STATE_MASK 0xe000000000000000 34 | #define RETSTATE_MASK 0xf000000000000000 35 | 36 | static uint64_t set_func_id(uint32_t id) 37 | { 38 | uint64_t state = 0; 39 | state |= ((id&0xf) << 28); 40 | return state &= 0xf0000000; 41 | } 42 | 43 | static uint64_t set_state(struct sock *sk, int opt) 44 | { 45 | uint64_t state = 0, tmp; 46 | u8 bitfield; 47 | 48 | if (opt&SOCK_STATE_OPT) { 49 | tmp = sk->sk_socket->state&0xf; 50 | state |= (tmp << 32); 51 | } 52 | // SHUTDOWN_MASK 53 | if (opt&SK_SHUTDOWN_OPT) { 54 | tmp = sk->sk_shutdown&0x3; 55 | state |= (tmp << 36); 56 | } 57 | if (opt&SOCK_FLAGS_OPT) { 58 | tmp = sk->sk_socket->flags&0xf; 59 | state |= (tmp << 40); 60 | } 61 | //TCP_STATE_MASK 62 | if (opt&SK_STATE_OPT) { 63 | tmp = sk->sk_state&0xf; 64 | state |= (tmp << 44); 65 | } 66 | // SOL_SOCKET 67 | if (opt&SK_FLAGS_OPT) { 68 | tmp = sk->sk_flags&0xff; 69 | state |= (tmp << 48); 70 | } 71 | if (opt&SK_ERR_OPT) { 72 | if (sk->sk_err > 0) { 73 | tmp = 1; 74 | state |= (tmp << 49); 75 | } 76 | } 77 | return state; 78 | } 79 | 80 | static uint64_t set_mask(uint64_t state) 81 | { 82 | uint64_t tmp = STATE_MASK; 83 | return state|tmp; 84 | } 85 | 86 | // Don't case about which function give the state 87 | static uint64_t getretstate(struct sock *sk, int id) 88 | { 89 | uint64_t state = 0, tmp = 0; 90 | u8 bitfield; 91 | 92 | state |= set_state(sk, SOCK_STATE_OPT|SK_SHUTDOWN_OPT|SOCK_FLAGS_OPT|SK_STATE_OPT|SK_FLAGS_OPT|SK_ERR_OPT); 93 | // nonagle, repair 94 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1); 95 | if (bitfield&0xf0) { 96 | tmp = bitfield&0xf0; 97 | state |= ((tmp>>4) << 48); 98 | } 99 | tmp = 0x1; 100 | if (bitfield&0x2) 101 | state |= tmp << 52; 102 | 103 | // defer_connect 104 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&inet_sk(sk)->rcv_tos)-1); 105 | if (bitfield&0xf0) { 106 | tmp = bitfield&0xf0; 107 | state = state | ((tmp>>4) << 53); 108 | } 109 | 110 | // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR 111 | tmp = tcp_sk(sk)->repair_queue & 0x3; 112 | state |= (tmp << 57); 113 | 114 | 115 | // tp->fastopen_req 116 | if (tcp_sk(sk)->fastopen_req) 117 | state |= (0x1 << 16); 118 | if (tcp_sk(sk)->fastopen_rsk != NULL) 119 | state |= (0x1 << 17); 120 | 121 | tmp = RETSTATE_MASK; 122 | return state|tmp; 123 | } 124 | 125 | int kretprobe__tcp_v6_init_sock(struct pt_regs *ctx, struct sock *sk) 126 | { 127 | bpf_trace_printk("%llx\n", getretstate(sk,0)); 128 | return 0; 129 | } 130 | 131 | int kretprobe__tcp_v6_connect(struct pt_regs *ctx, struct sock *sk) 132 | { 133 | bpf_trace_printk("%llx\n", getretstate(sk, 1)); 134 | return 0; 135 | } 136 | 137 | int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk, struct msghdr *msg, size_t size) 138 | { 139 | uint64_t state = set_func_id(0x2), tmp = 0; 140 | u8 bitfield; 141 | 142 | tmp = sk->sk_state&0xf; 143 | if(tmp == TCP_ESTABLISHED || tmp == TCP_CLOSE || tmp == TCP_CLOSE_WAIT || tmp == TCP_SYN_SENT) 144 | state |= ((tmp&0xf) << 32); 145 | 146 | tmp = sk->sk_shutdown&0x3; 147 | if(tmp == SEND_SHUTDOWN) 148 | state |= ((tmp&0x3) << 36); 149 | 150 | tmp = sk->sk_flags&0xff; 151 | if(tmp == SOCK_ZEROCOPY) 152 | state |= ((tmp&0xff) << 40); 153 | 154 | // nonagle, repair 155 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1); 156 | if (bitfield&0xf0) { 157 | tmp = bitfield&0xf0; 158 | state |= ((tmp>>4) << 48); 159 | } 160 | tmp = 0x1; 161 | if (bitfield&0x2) 162 | state |= tmp << 52; 163 | 164 | // defer_connect 165 | bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&inet_sk(sk)->rcv_tos)-1); 166 | if (bitfield&0xf0) { 167 | tmp = bitfield&0xf0; 168 | state = state | ((tmp>>4) << 53); 169 | } 170 | 171 | // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR 172 | tmp = tcp_sk(sk)->repair_queue & 0x3; 173 | state |= (tmp << 57); 174 | 175 | 176 | // tp->fastopen_req 177 | if (tcp_sk(sk)->fastopen_req) 178 | state |= (0x1 << 16); 179 | if (tcp_sk(sk)->fastopen_rsk != NULL) 180 | state |= (0x1 << 17); 181 | 182 | // From syscalls argument 183 | // msg->msg_controllen 184 | /*if (msg->msg_controllen) 185 | state |= (0x1 << 20); 186 | // msg->msg_flags 187 | if (msg->msg_flags&MSG_OOB) 188 | state |= (0x1 << 21); 189 | // msg->msg_flags 190 | if (msg->msg_flags&MSG_MORE) 191 | state |= (0x1 << 22); 192 | // msg->msg_flags 193 | if (msg->msg_flags&MSG_EOR) 194 | state |= (0x1 << 23); 195 | // msg->msg_flags 196 | if (msg->msg_flags&MSG_ZEROCOPY) 197 | state |= (0x1 << 24); 198 | // msg_data_left 199 | if (msg->msg_iter.count) 200 | state |= (0x1 << 27);*/ 201 | 202 | state = set_mask(state); 203 | bpf_trace_printk("%llx\n", state); 204 | return 0; 205 | } 206 | 207 | int kretprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk) 208 | { 209 | bpf_trace_printk("%llx\n", getretstate(sk, 2)); 210 | return 0; 211 | } 212 | 213 | int kretprobe__tcp_recvmsg(struct pt_regs *ctx, struct sock *sk) 214 | { 215 | bpf_trace_printk("%llx\n", getretstate(sk, 3)); 216 | return 0; 217 | } 218 | 219 | int kretprobe__tcp_close(struct pt_regs *ctx, struct sock *sk) 220 | { 221 | bpf_trace_printk("%llx\n", getretstate(sk, 4)); 222 | return 0; 223 | } 224 | 225 | int kretprobe__tcp_shutdown(struct pt_regs *ctx, struct sock *sk) 226 | { 227 | bpf_trace_printk("%llx\n", getretstate(sk, 5)); 228 | return 0; 229 | } 230 | 231 | int kretprobe__tcp_setsockopt(struct pt_regs *ctx, struct sock *sk) 232 | { 233 | bpf_trace_printk("%llx\n", getretstate(sk, 6)); 234 | return 0; 235 | } 236 | 237 | int kretprobe__tcp_getsockopt(struct pt_regs *ctx, struct sock *sk) 238 | { 239 | bpf_trace_printk("%llx\n", getretstate(sk, 7)); 240 | return 0; 241 | } 242 | 243 | int kretprobe__inet_accept(struct pt_regs *ctx, struct socket *sock, struct socket* newsock) 244 | { 245 | bpf_trace_printk("%llx\n", getretstate(sock->sk, 8)); 246 | bpf_trace_printk("%llx\n", getretstate(newsock->sk, 9)); 247 | return 0; 248 | } 249 | 250 | int kretprobe__inet_listen(struct pt_regs *ctx, struct socket *sock) 251 | { 252 | bpf_trace_printk("%llx\n", getretstate(sock->sk, 0xa)); 253 | return 0; 254 | } 255 | 256 | int kretprobe__tcp_ioctl(struct pt_regs *ctx, struct sock *sk) 257 | { 258 | bpf_trace_printk("%llx\n", getretstate(sk, 0xb)); 259 | return 0; 260 | } 261 | 262 | int kretprobe__inet6_bind(struct pt_regs *ctx, struct sock *sk) 263 | { 264 | bpf_trace_printk("%llx\n", getretstate(sk, 0xc)); 265 | return 0; 266 | } 267 | 268 | int kretprobe__inet6_ioctl(struct pt_regs *ctx, struct sock *sk) 269 | { 270 | bpf_trace_printk("%llx\n", getretstate(sk, 0xd)); 271 | return 0; 272 | } 273 | 274 | int kretprobe__inet6_getname(struct pt_regs *ctx, struct sock *sk) 275 | { 276 | bpf_trace_printk("%llx\n", getretstate(sk, 0xe)); 277 | return 0; 278 | } 279 | 280 | ` 281 | /* Kernel probe/retprobe point */ 282 | var ProbePoint []string = []string{"tcp_sendmsg"} 283 | 284 | var RetProbePoint []string = []string{"tcp_v6_init_sock","tcp_v6_connect","tcp_sendmsg","tcp_recvmsg","tcp_close","tcp_shutdown","tcp_setsockopt","tcp_getsockopt","inet_accept","inet_listen", "tcp_ioctl", "inet6_bind", "inet6_getname","inet6_ioctl"} 285 | -------------------------------------------------------------------------------- /syzkaller/kstat_demo/parse/parse.go: -------------------------------------------------------------------------------- 1 | package parse 2 | 3 | import ( 4 | "log" 5 | ) 6 | 7 | /* Map socket state to readable kernel macro */ 8 | var sock_type = map[uint32]string { 9 | 1:"SOCK_DGRAM", 10 | 2:"SOCK_STREAM", 11 | 3:"SOCK_RAW", 12 | 4:"SOCK_RDM", 13 | 5:"SOCK_SEQPACKET", 14 | 6:"SOCK_DCCP", 15 | 10:"SOCK_PACKET", 16 | } 17 | 18 | var sock_state = map[uint32]string { 19 | 0:"SS_FREE", 20 | 1:"SS_UNCONNECTED", 21 | 2:"SS_CONNECTING", 22 | 3:"SS_CONNECTED", 23 | 4:"SS_DISCONNECYING", 24 | } 25 | 26 | var sock_flags = map[uint32]string { 27 | 2:"SOCK_NOSPACE", 28 | 3:"SOCK_PASSCRED", 29 | 4:"SOCK_PASSEC", 30 | } 31 | 32 | type flag struct { 33 | mask uint32 34 | shift uint32 35 | flagType map[uint32]string 36 | } 37 | 38 | /* flag structure, refer to ebpf/ebpf.go ebpf text */ 39 | func ParseFlags(rawSignal uint32) { 40 | var Signal = []flag { 41 | flag {mask:0x7, flagType:sock_flags, shift:0}, 42 | flag {mask:0xf, flagType:sock_type, shift:4}, 43 | flag {mask:0x7, flagType:sock_state, shift:8}, 44 | } 45 | 46 | for _, s := range Signal { 47 | parseFlag(rawSignal, s.mask, s.flagType, s.shift) 48 | } 49 | } 50 | 51 | func parseFlag(rawsignal uint32, mask uint32, flagtype map[uint32]string, shift uint32) { 52 | log.Printf("%s:%x covered", flagtype[(rawsignal&(mask<>shift], (rawsignal&(mask<>shift) 53 | } 54 | -------------------------------------------------------------------------------- /syzkaller/kstat_demo/pipe_monitor.go: -------------------------------------------------------------------------------- 1 | /* This monitor only collect single socket state, without any track. 2 | * It only know if a new state was detected, but know nothing about 3 | * which socket does the state belong to. 4 | */ 5 | 6 | package main 7 | 8 | import ( 9 | "os" 10 | "log" 11 | "fmt" 12 | "regexp" 13 | "flag" 14 | "strconv" 15 | 16 | "github.com/iovisor/gobpf/pkg/tracepipe" 17 | "github.com/iovisor/gobpf/bcc" 18 | 19 | "./ebpf" 20 | ) 21 | 22 | import "C" 23 | func main() { 24 | /* redirect stderr, there are some ebpf log or warning */ 25 | debug := flag.Bool("debug", false, "More debug information about ebpf") 26 | flag.Parse() 27 | _, w, _ := os.Pipe() 28 | old := os.Stderr 29 | if(!*debug) { 30 | old.Close() 31 | os.Stderr = w 32 | } 33 | 34 | /* ebpf text is in ebpf/ebpftext.go */ 35 | source := ebpf.EbpfInit() 36 | m := bcc.NewModule(source, []string{}) 37 | defer m.Close() 38 | /* Be sure your hook function named as "kprobe__KERN_FUNCNAME" */ 39 | ebpf.Attachs(m) 40 | 41 | tp, err := tracepipe.New() 42 | if err != nil { 43 | log.Fatal(err) 44 | } 45 | defer tp.Close() 46 | 47 | if (!*debug) { 48 | w.Close() 49 | os.Stderr = old 50 | } 51 | 52 | rawMessage, errMessage := tp.Channel() 53 | re := regexp.MustCompile("syz-executor") 54 | for (true) { 55 | select { 56 | case te := <- rawMessage: 57 | /* syz-exec has it own pid namespace 58 | * pick out those pid under the namespace can be more accurate 59 | */ 60 | if(re.FindString(te.Task) == "") { 61 | continue 62 | } 63 | rawSignal, err := strconv.ParseUint(te.Message, 16, 64) 64 | if (err != nil) { 65 | log.Println("Wrong rawSignal") 66 | continue 67 | } 68 | fmt.Printf("%016x\n", rawSignal) 69 | case err := <- errMessage: 70 | log.Fatal(err) 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /syzkaller/kstat_demo/state/state.go: -------------------------------------------------------------------------------- 1 | package state 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "os" 7 | "regexp" 8 | "strings" 9 | "log" 10 | ) 11 | 12 | /* In a syscall, Several messages from ebpf contain: 13 | * several kernel probe points and socketstates. 14 | * a syscall with a OpsId 15 | */ 16 | type Ops struct { 17 | OpsId int 18 | KprobePoint string 19 | SocketState map[uint64]uint64 20 | } 21 | 22 | /* record all the state collect by ebpf, a sock with a SockState */ 23 | type SockState struct { 24 | SockState []uint64 25 | SockOps []string 26 | } 27 | 28 | /* Only record the state coverage */ 29 | var StateList []uint64 30 | 31 | /* A syscall with a Ops */ 32 | var OpsList []Ops 33 | 34 | var id int = 0 35 | var tmp uint64 = 0 36 | 37 | /* Handle a message from ebpf */ 38 | func Handle(msg string) { 39 | key, value := extract(msg) 40 | /* Three type of message, refer the ebpf/ebpf.go */ 41 | switch key { 42 | case "[KPROBE_P]": 43 | OpsList = append(OpsList, newops(id, value)) 44 | tmp = 0 45 | id = len(OpsList) - 1 46 | OpsList[id].SocketState = make(map[uint64]uint64) 47 | case "[SOCKET_ID]": 48 | if(strings.Contains(value, "ptrval")) { 49 | fmt.Println("Socket id miss") 50 | misshandle() 51 | return 52 | } 53 | sockid := str2int(value) 54 | if(id > len(OpsList)-1) { 55 | fmt.Println("id out of range") 56 | misshandle() 57 | return 58 | } 59 | if _, ok := OpsList[id].SocketState[sockid]; !ok { 60 | OpsList[id].addsock(sockid) 61 | tmp = sockid 62 | } 63 | case "socket_state": 64 | /* Only record state, know nothing about which socket is it */ 65 | StateList= append(StateList, str2int(value)) 66 | if(id > len(OpsList)-1) { 67 | fmt.Println("id out of range") 68 | misshandle() 69 | return 70 | } 71 | 72 | if _, ok := OpsList[id].SocketState[tmp]; ok { 73 | OpsList[id].SocketState[tmp] = str2int(value) 74 | return 75 | } 76 | default: 77 | fmt.Fprint(os.Stderr, "Unknow message:\n", msg) 78 | } 79 | } 80 | 81 | /* From "a syscall with a Ops" to "a socket with several state" */ 82 | func Socklist() { 83 | SockList := make(map[uint64]SockState) 84 | for _, ops := range OpsList { 85 | for skid, skst := range ops.SocketState { 86 | var tmps SockState 87 | tmps = SockList[skid] 88 | if (len(tmps.SockState) == 0) { 89 | tmps.SockState = []uint64{skst} 90 | tmps.SockOps = []string{ops.KprobePoint} 91 | } else { 92 | tmps.SockState = append(tmps.SockState, skst) 93 | tmps.SockOps = append(tmps.SockOps, ops.KprobePoint) 94 | } 95 | SockList[skid] = tmps 96 | } 97 | } 98 | for skid, sock := range SockList { 99 | fmt.Println("Socket id is", skid) 100 | fmt.Printf("The state:%v\n", sock.SockState) 101 | fmt.Printf("The operations:%v\n", sock.SockOps) 102 | } 103 | } 104 | 105 | /* state change hash, as coverage signal in syzkall */ 106 | func hash(a uint64, b uint64) uint32{ 107 | a = a ^ b 108 | a = (a ^ 61) ^ (a >> 16) 109 | a = a + (a << 3) 110 | a = a ^ (a >> 4) 111 | a = a * 0x27d4eb2d 112 | a = a ^ (a >> 15) 113 | return uint32(a) 114 | } 115 | 116 | /* Only read state change, know nothing about state */ 117 | func SockStateHandle() []uint32 { 118 | var rawSignals []uint32 119 | if (len(OpsList) < 2) { 120 | rawSignals = append(rawSignals, 0xffffffff) 121 | return rawSignals 122 | } 123 | SockList := make(map[uint64]SockState) 124 | for _, ops := range OpsList { 125 | for skid, skst := range ops.SocketState { 126 | var tmps SockState 127 | tmps = SockList[skid] 128 | if (len(tmps.SockState) == 0) { 129 | tmps.SockState = []uint64{skst} 130 | tmps.SockOps = []string{ops.KprobePoint} 131 | } else { 132 | tmps.SockState = append(tmps.SockState, skst) 133 | tmps.SockOps = append(tmps.SockOps, ops.KprobePoint) 134 | } 135 | SockList[skid] = tmps 136 | } 137 | } 138 | for _, sock := range SockList { 139 | for i := 0; i < len(sock.SockState)-1; i++ { 140 | rawSignals = append(rawSignals, hash(sock.SockState[i], sock.SockState[i+1])) 141 | } 142 | } 143 | rawSignals = append(rawSignals, 0xffffffff) 144 | return rawSignals 145 | } 146 | 147 | /* Read all state coverage */ 148 | func Statelist() []uint32{ 149 | var rawSignals []uint32 150 | log.Printf("%d signals in statelist\n", len(StateList)) 151 | for _, s := range StateList { 152 | rawSignals = append(rawSignals, uint32(s)) 153 | } 154 | rawSignals = append(rawSignals, 0xffffffff) 155 | return rawSignals 156 | } 157 | 158 | /* Read Opslist */ 159 | func Opslist() { 160 | fmt.Println("There are", id, "operations of socket") 161 | for _, ops := range OpsList { 162 | fmt.Println("Kprobe point is:", ops.KprobePoint) 163 | for id, ss := range ops.SocketState { 164 | fmt.Println("Socket id is:", id) 165 | fmt.Println("Socket state:", ss) 166 | } 167 | fmt.Println("") 168 | } 169 | } 170 | 171 | /* Clear historical data */ 172 | func Stateclear() { 173 | if (len(OpsList) == 0) { 174 | return 175 | } 176 | OpsList = OpsList[0:0] 177 | StateList = StateList[0:0] 178 | tmp = 1 179 | id = 1 180 | } 181 | 182 | func (ops Ops)addsock(sockid uint64) { 183 | ops.SocketState[sockid] = 0 184 | } 185 | 186 | func extract(msg string) (key string, value string) { 187 | rkey := regexp.MustCompile(".*:") 188 | rvalue := regexp.MustCompile(":.*") 189 | key = rkey.FindString(msg) 190 | value = rvalue.FindString(msg) 191 | key = key[:len(key)-1] 192 | value = value[1:] 193 | return key, value 194 | } 195 | 196 | func str2int(str string) uint64 { 197 | ret, err := strconv.ParseUint(str, 16, 64) 198 | if err != nil { 199 | fmt.Fprint(os.Stderr, "Invaliable socket ID", err, "\n") 200 | } 201 | return ret 202 | } 203 | 204 | func newops(id int, kprobepoint string) Ops { 205 | ops := new(Ops) 206 | ops.OpsId = id 207 | ops.KprobePoint = kprobepoint 208 | return *ops 209 | } 210 | 211 | /* Handle unexpect message */ 212 | func misshandle(){ 213 | fmt.Println("Miss handle, historical data may be clean") 214 | } 215 | -------------------------------------------------------------------------------- /syzkaller/kstat_demo/syz_patch/kstate/0004-Support-retState-ebpfsig-resource.patch: -------------------------------------------------------------------------------- 1 | From 5d5632a31d9185118c2d48c4c4a02babb0e2f2a1 Mon Sep 17 00:00:00 2001 2 | From: Bins94 3 | Date: Thu, 2 Apr 2020 23:03:44 -0400 4 | Subject: [PATCH 4/5] Support retState ebpfsig resource 5 | 6 | --- 7 | executor/executor.cc | 7 ++++--- 8 | pkg/rpctype/rpctype.go | 1 + 9 | prog/clone.go | 5 +++-- 10 | prog/prog.go | 1 + 11 | prog/rand.go | 2 +- 12 | syz-fuzzer/proc.go | 16 ++++++++++++++++ 13 | syz-manager/html.go | 4 ++++ 14 | 7 files changed, 30 insertions(+), 6 deletions(-) 15 | 16 | diff --git a/executor/executor.cc b/executor/executor.cc 17 | index 69fa44ca..964b3ec4 100644 18 | --- a/executor/executor.cc 19 | +++ b/executor/executor.cc 20 | @@ -36,6 +36,7 @@ 21 | #endif 22 | 23 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 24 | +/* refer to the ebpf text */ 25 | #define RETSTATE_SIG_MASK 0xf000000000000000 26 | #define STATE_SIG_MASK 0xe000000000000000 27 | #define COVERAGE_SIG_MASK 0xd0000000ffffffff 28 | @@ -924,9 +925,9 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover 29 | uint64_t state = 0; 30 | if (ret > 0) { 31 | state = strtoul(buf, NULL, 16); 32 | - /* state signal start with 0xf 33 | - * Write out real state for fuzzer's further handle 34 | - */ 35 | + /* state/retstate signal start with 0xe/0xf 36 | + * Write out real state for fuzzer's further handle 37 | + */ 38 | write_output64(state); 39 | debug("A state signal 0x%016lx\n", state); 40 | n = 0; 41 | diff --git a/pkg/rpctype/rpctype.go b/pkg/rpctype/rpctype.go 42 | index fcc01a99..ac99e73c 100644 43 | --- a/pkg/rpctype/rpctype.go 44 | +++ b/pkg/rpctype/rpctype.go 45 | @@ -16,6 +16,7 @@ type RPCInput struct { 46 | Prog []byte 47 | Signal signal.Serial 48 | Cover []uint32 49 | + SpeRes bool 50 | } 51 | 52 | type RPCCandidate struct { 53 | diff --git a/prog/clone.go b/prog/clone.go 54 | index c95ae206..6f5cdd64 100644 55 | --- a/prog/clone.go 56 | +++ b/prog/clone.go 57 | @@ -9,8 +9,9 @@ import ( 58 | 59 | func (p *Prog) Clone() *Prog { 60 | p1 := &Prog{ 61 | - Target: p.Target, 62 | - Calls: make([]*Call, len(p.Calls)), 63 | + Target: p.Target, 64 | + Calls: make([]*Call, len(p.Calls)), 65 | + Resource: p.Resource, 66 | } 67 | newargs := make(map[*ResultArg]*ResultArg) 68 | for ci, c := range p.Calls { 69 | diff --git a/prog/prog.go b/prog/prog.go 70 | index 1600c0a2..1ff27046 100644 71 | --- a/prog/prog.go 72 | +++ b/prog/prog.go 73 | @@ -11,6 +11,7 @@ type Prog struct { 74 | Target *Target 75 | Calls []*Call 76 | Comments []string 77 | + Resource bool 78 | } 79 | 80 | type Call struct { 81 | diff --git a/prog/rand.go b/prog/rand.go 82 | index 8583fbdc..2b8fcc2a 100644 83 | --- a/prog/rand.go 84 | +++ b/prog/rand.go 85 | @@ -856,7 +856,7 @@ func (r *randGen) resourceCentric(s *state, t *ResourceType) (arg Arg, calls []* 86 | for idx := range r.Perm(len(s.corpus)) { 87 | p = s.corpus[idx].Clone() 88 | resources := getCompatibleResources(p, t.TypeName, r) 89 | - if len(resources) > 0 { 90 | + if len(resources) > 0 && p.Resource { 91 | resource = resources[r.Intn(len(resources))] 92 | break 93 | } 94 | diff --git a/syz-fuzzer/proc.go b/syz-fuzzer/proc.go 95 | index 0cb186b4..dabe3ce2 100644 96 | --- a/syz-fuzzer/proc.go 97 | +++ b/syz-fuzzer/proc.go 98 | @@ -162,12 +162,19 @@ func (proc *Proc) triageInput(item *WorkTriage) { 99 | data := item.p.Serialize() 100 | sig := hash.Hash(data) 101 | 102 | + item.p.Resource = true 103 | + /* base ebpfsig retstate feedback */ 104 | + if proc.fuzzer.config.Flags&ipc.FlagEnableEbpfSignal != 0 { 105 | + item.p.Resource = getResourceFlagFromSignal(item.info.Signal) 106 | + } 107 | + 108 | log.Logf(2, "added new input for %v to corpus:\n%s", logCallName, data) 109 | proc.fuzzer.sendInputToManager(rpctype.RPCInput{ 110 | Call: callName, 111 | Prog: data, 112 | Signal: inputSignal.Serialize(), 113 | Cover: inputCover.Serialize(), 114 | + SpeRes: item.p.Resource, 115 | }) 116 | 117 | proc.fuzzer.addInputToCorpus(item.p, inputSignal, sig) 118 | @@ -200,6 +207,15 @@ func getSignalAndCover(p *prog.Prog, info *ipc.ProgInfo, call int) (signal.Signa 119 | return signal.FromRaw(inf.Signal, signalPrio(p, inf, call)), inf.Cover 120 | } 121 | 122 | +func getResourceFlagFromSignal(Signal []uint64) bool { 123 | + for _, sig := range Signal { 124 | + if sig&0xf000000000000000 == 0xf000000000000000 { 125 | + return true 126 | + } 127 | + } 128 | + return false 129 | +} 130 | + 131 | func (proc *Proc) smashInput(item *WorkSmash) { 132 | if proc.fuzzer.faultInjectionEnabled && item.call != -1 { 133 | proc.failCall(item.p, item.call) 134 | diff --git a/syz-manager/html.go b/syz-manager/html.go 135 | index 46d61eda..66e0bfa9 100644 136 | --- a/syz-manager/html.go 137 | +++ b/syz-manager/html.go 138 | @@ -195,6 +195,7 @@ func (mgr *Manager) httpCorpus(w http.ResponseWriter, r *http.Request) { 139 | Short: p.String(), 140 | Cover: len(inp.Cover), 141 | Signal: inp.Signal.Deserialize(), 142 | + SpeRes: inp.SpeRes, 143 | }) 144 | } 145 | sort.Slice(data.Inputs, func(i, j int) bool { 146 | @@ -607,6 +608,7 @@ type UIInput struct { 147 | Short string 148 | Cover int 149 | Signal signal.Signal 150 | + SpeRes bool 151 | } 152 | 153 | var summaryTemplate = html.CreatePage(` 154 | @@ -752,11 +754,13 @@ var corpusTemplate = html.CreatePage(` 155 | 156 | Coverage 157 | Program 158 | + Resource 159 | 160 | {{range $inp := $.Inputs}} 161 | 162 | {{$inp.Cover}} 163 | {{$inp.Short}} 164 | + {{$inp.SpeRes}} 165 | 166 | {{end}} 167 | 168 | -- 169 | 2.20.1 170 | 171 | -------------------------------------------------------------------------------- /syzkaller/kstat_demo/syz_patch/kstate/0005-Add-monitor-binary-and-function-pcs-options-to-manag.patch: -------------------------------------------------------------------------------- 1 | From 5fbf7df6dae52f1ef8375e7f8d2118828e07e03f Mon Sep 17 00:00:00 2001 2 | From: Bins94 3 | Date: Thu, 2 Apr 2020 23:06:27 -0400 4 | Subject: [PATCH 5/5] Add monitor binary and function pcs options to manager 5 | configure options 6 | 7 | --- 8 | executor/bitmap.h | 2 +- 9 | executor/common_linux.h | 2 +- 10 | pkg/mgrconfig/config.go | 2 ++ 11 | syz-manager/manager.go | 11 +++++++++++ 12 | 4 files changed, 15 insertions(+), 2 deletions(-) 13 | 14 | diff --git a/executor/bitmap.h b/executor/bitmap.h 15 | index cb00e543..785513ba 100644 16 | --- a/executor/bitmap.h 17 | +++ b/executor/bitmap.h 18 | @@ -8,7 +8,7 @@ uint32* func_pcs; 19 | 20 | uint32 readPcs() 21 | { 22 | - FILE* f = fopen("/root/funcaddr.map", "r"); 23 | + FILE* f = fopen("/funcaddr.map", "r"); 24 | uint32 count = 0; 25 | if (f == NULL) 26 | return -1; 27 | diff --git a/executor/common_linux.h b/executor/common_linux.h 28 | index b64f413c..4046ae8b 100644 29 | --- a/executor/common_linux.h 30 | +++ b/executor/common_linux.h 31 | @@ -2707,7 +2707,7 @@ static int do_sandbox_none(void) 32 | close(monpipefd[0]); 33 | close(monpipefd[1]); 34 | debug("single ebpf start ...\n"); 35 | - execl("/root/pipe_monitor", "/root/pipe_monitor", "--debug", NULL); 36 | + execl("/pipe_monitor", "/pipe_monitor", "--debug", NULL); 37 | return 0; 38 | } 39 | /* ebpf loading is very slow, one time a vm restart */ 40 | diff --git a/pkg/mgrconfig/config.go b/pkg/mgrconfig/config.go 41 | index ac618096..e32a5cde 100644 42 | --- a/pkg/mgrconfig/config.go 43 | +++ b/pkg/mgrconfig/config.go 44 | @@ -119,4 +119,6 @@ type Config struct { 45 | SyzFuzzerBin string `json:"-"` 46 | SyzExecprogBin string `json:"-"` 47 | SyzExecutorBin string `json:"-"` 48 | + SyzMonitorBin string `json:"ebpfmonitor"` 49 | + SyzCovPcs string `json:"coverpcs"` 50 | } 51 | diff --git a/syz-manager/manager.go b/syz-manager/manager.go 52 | index 77379468..f6d88e3f 100644 53 | --- a/syz-manager/manager.go 54 | +++ b/syz-manager/manager.go 55 | @@ -556,6 +556,17 @@ func (mgr *Manager) runInstance(index int) (*Crash, error) { 56 | } 57 | } 58 | 59 | + /* scp ebpf monitor binary to machine */ 60 | + _, err = inst.Copy(mgr.cfg.SyzMonitorBin) 61 | + if err != nil { 62 | + return nil, fmt.Errorf("failed to copy binary: %v", err) 63 | + } 64 | + /* scp coverage filter pcs to machine */ 65 | + _, err = inst.Copy(mgr.cfg.SyzCovPcs) 66 | + if err != nil { 67 | + return nil, fmt.Errorf("failed to copy binary: %v", err) 68 | + } 69 | + 70 | fuzzerV := 0 71 | procs := mgr.cfg.Procs 72 | if *flagDebug { 73 | -- 74 | 2.20.1 75 | 76 | -------------------------------------------------------------------------------- /syzkaller/kstat_demo/tcp-ipv6/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "target": "linux/amd64", 3 | "http": "", 4 | "workdir": "", 5 | "kernel_obj": "", 6 | "image": "syzkalls.img", 7 | "sshkey": "/root/.ssh/id_rsa", 8 | "syzkaller": "/root/syzkalls/", 9 | "ebpfmonitor": "$PATH_TO_YOUR_EBPF_MONITOR", 10 | "sandbox": "none", 11 | "ebpfsig": true, 12 | "covfilter": true, 13 | "name": "debian-4.17-ipv4_tcp", 14 | "hub_client":"debian-4.17-ipv4_tcp", 15 | "hub_addr":"", 16 | "hub_key":"", 17 | "hub_synctime": 5, 18 | "suppressions": ["some known bug"], 19 | "enable_syscalls": ["ioctl$sock_inet6_tcp_SIOCATMARK", "ioctl$sock_inet_tcp_SIOCOUTQ", "ioctl$sock_inet6_tcp_SIOCOUTQ", "ioctl$sock_inet_tcp_SIOCOUTQNSD", "ioctl$sock_inet6_tcp_SIOCOUTQNSD", "setsockopt$inet6_tcp_TLS_RX", "setsockopt$inet6_tcp_TLS_TX", "setsockopt$inet6_tcp_TCP_FASTOPEN_KEY", "getsockopt$inet6_tcp_TCP_REPAIR_WINDOW", "setsockopt$inet6_tcp_TCP_REPAIR_WINDOW", "setsockopt$inet6_tcp_TCP_REPAIR_OPTIONS", "setsockopt$inet6_tcp_TCP_QUEUE_SEQ", "setsockopt$inet6_tcp_TCP_REPAIR_QUEUE", "setsockopt$inet6_tcp_TCP_REPAIR", "setsockopt$inet6_tcp_TCP_MD5SIG", "setsockopt$inet6_tcp_TCP_ULP", "setsockopt$inet6_tcp_TCP_CONGESTION", "getsockopt$inet6_tcp_TCP_ZEROCOPY_RECEIVE", "getsockopt$inet6_tcp_buf", "setsockopt$inet6_tcp_buf", "getsockopt$inet6_tcp_int", "setsockopt$inet6_tcp_int", "socket$inet6_tcp", "accept$inet6", "accept4$inet6", "bind$inet6", "connect$inet6", "sendmsg$inet6_tcp","sendto$inet6_tcp","sendmmsg$inet6_tcp", "recvfrom$inet6", "listen", "shutdown", "close", "syz_emit_ethernet", "syz_extract_tcp_res$synack", "syz_extract_tcp_res", "setsockopt$sock_int", "setsockopt$sock_linger", "setsockopt$sock_void", "setsockopt$SO_TIMESTAMP", "setsockopt$SO_TIMESTAMPING", "setsockopt$SO_BINDTODEVICE", "setsockopt$SO_ATTACH_FILTER"], 20 | "procs": 1, 21 | "type": "qemu", 22 | "vm": { 23 | "count": 1, 24 | "cpu": 6, 25 | "mem": 18384, 26 | "kernel": "boot/bzImage", 27 | "initrd": "initrd.img", 28 | "cmdline": "kmemleak=on reboot=warm net.ifnames=tap0 console=ttyS0 vsyscall=native rodata=n oops=panic panic_on_warn=1 panic=0 ftrace_dump_on_oops=orig_cpu earlyprintk=serial slub_debug=UZ root=/dev/sda1", 29 | "qemu_args": "-enable-kvm" 30 | 31 | -------------------------------------------------------------------------------- /syzkaller/kstat_demo/tcp-ipv6/data.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/kstat_demo/tcp-ipv6/data.tar.bz2 -------------------------------------------------------------------------------- /syzkaller/kstat_demo/tcp-ipv6/test.md: -------------------------------------------------------------------------------- 1 | # A sample of tcp-ipv6 fuzz 2 | 3 | ## Feature of customize syzkaller 4 | 1. Socket state( historical state) feedback. 5 | 2. Coverage filter 6 | 7 | I implement runtime state( sk->sk_state, tp->repair ...) feedback by using ebpf. ebpf collect a 64-bit state send to executor. The feedback looks like original syzkaller coverage signal. Send these signals to fuzzer. Coverage filter implement in executor. You can get kernel function address region use the [fun2addr](https://github.com/hardenedlinux/harbian-qa/blob/master/syz_patch/fun2addr.go). 8 | 9 | ## Usage 10 | Command for build ebpf monitor: 11 | ``` 12 | cd harbian-qa/syzkaller/kstat_demo 13 | mv kstat_demo/ebpf_sample/$YOUR_EBPF.go kstat_demo/ebpf/ebpftext.go 14 | go build pipe_monitor 15 | ``` 16 | Command for patching syzkaller: 17 | ``` 18 | cd /path/to/your/syzkaller/source 19 | git checkout bc2c6e45 20 | git apply /path/to/harbian-qa/syzkaller/kstat_demo/$PATCH_YOU_NEED.patch 21 | ``` 22 | After patching syzkaller, to filter coverage, address in executor/cov_filter.h should fit to you kernel. Use fun2addr as: 23 | ``` 24 | bin/syz-func2addr -v PATH_to_YOUR_VMLINUX -f FUNC_NAME -s 25 | ``` 26 | Get address of all functions you want to test. And write them to cov_filter.h. Then run make as original syzkaller to build it. 27 | 28 | ## Testcase 29 | I run six times both original and customize syzkaller. Two hours per time. The enable syscalls is extract from socket_inet6.txt and socket_inet_tcp.txt using this [tool](https://github.com/hardenedlinux/harbian-qa/blob/master/syz_patch/extract_syscall_names_from_prog.py). There are also some syscalls for ipv4_tcp have to be removed by hand. 30 | This is some coverage( customize vs. original in the table) of functions which monitored by my ebpf: 31 | 32 | |kern_func | 1 | 2 | 3 | 4 | 5 | 6 | 33 | | -------- | - | - | - | - | - | - | 34 | | tcp_v6_connect | 44/45 | 44/44 | 45/45 | 45/46 | 46/44 | 45/45 | 35 | | tcp_sendmsg_locked | 73/71 | 19/18 | 77/48 | 73/20 | 73/17 | 72/20 | 36 | | tcp_recvmsg | 54/33 | 35/33 | 35/33 | 54/36 | 36/33 | 48/36 | 37 | | tcp_setsockopt | 83/80 | 80/81 | 84/79 | 84/82 | 82/81 | 84/83 | 38 | | tcp_getsockopt | 61/59 | 57/59 | 56/57 | 61/60 | 58/58 | 60/58 | 39 | | inet_accept | 2/2 | 2/2 | 2/2 | 2/2 | 2/2 | 2/2 | 40 | | tcp_ioctl | 9/9 | 9/9 | 9/9 | 9/9 | 9/9 | 9/9 | 41 | 42 | Other example, I run six times both original and customize syzkaller. 2.5 hours per time. Number in brackets means: how many time syzkaller cover this line in 6-time-run, customize vs. original. It can be see that these lines can be easily covered in customize syzkaller: 43 | #### tp->repair/tp->repair_queue 44 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L1233 (5:0) 45 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L2687 (6:0) 46 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L2689 (5:0) 47 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L3106 (5:2) 48 | 49 | #### sk->sk_state 50 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L1259 (6:0) 51 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L2137 (6:2) 52 | Why can customize syzkaller cover these line more efficiently? Refer to [this](../README.md). 53 | 54 | ## Concludsion 55 | 1. Greater coverage than original syzkaller especially in function tcp_sendmsg. It is because historical state and nested condition can be covered easily. We can see in the second example. 56 | 57 | 2. The tcp_setsockopt coverage of customize syzkaller is only a little more then original syzkaller's because of powerful syscalls script. Most of uncovered code is similar in original syzkaller. Is it because of powerful syscalls script and mutation is not enough? 58 | 59 | ## RawData 60 | * [Data](data.tar.bz2) get from syzkaller web. It's not macth to the table. 61 | * The test will keep the same enable syscalls, run time, vm evironment... as more as possible. 62 | * Collect different data as feedback( ebpftext) get a great different result. -------------------------------------------------------------------------------- /syzkaller/kstate_resource.md: -------------------------------------------------------------------------------- 1 | # Kernel state based fuzzer: a LLVM approach 2 | 3 | ## Content 4 | 5 | 1. Usage. 6 | 2. Implement detail. 7 | 3. Practice 8 | 9 | To implement collect kernel states as syzkaller resource, we have to follow the next steps: 10 | 11 | 1. Build kernel with GEPOperator tracker instrument. 12 | 2. Support collecting kernel state in syzkaller. 13 | 3. Weighted kernel states for fuzzer. 14 | 15 | ## Usage 16 | 17 | ### Kernel instrument 18 | 19 | First, we need to implement a [LLVM pass](../static_analysis_tools/kern_instrument/AssignTrackerPass) to do instrument. While we already knew, lots of states of kernel are located in some field of structure. Tracking the store operation of a variable of GEPointer can detect states which may help to fuzzer. Then, refer to [this document](https://llvm.org/docs/WritingAnLLVMPass.html) to build you compiler with field assignment tracker. While building kernel, you have to add line such like: 20 | ``` 21 | CFLAGS_*.o = -Xclang -load -Xclang PATH_TO_YOUR_PASS.so -fno-discard-value-names 22 | ``` 23 | to Makefile for the object file you need to instrument it. The kernel state id is the crc of file name, structure name and field name. 24 | If you want track the whole kernel, try to add the mentioned CFLAGS to kcov-flag-$(CONFIG_CC_HAS_SANCOV_TRACE_PC). 25 | 26 | ### Implement the instrument function in kernel 27 | 28 | Refer to our [implement](../static_analysis_tools/kern_instrument/kern_patch) of instrument to collect kernel state. Then, build your kernel as usual. 29 | 30 | ### Patch syzkaller 31 | 32 | Clone syzkaller, run: 33 | ``` 34 | cd PATH_TO_SYZ_SRC 35 | git checkout ff4a334 36 | git apply PATH_TO_harbian-qa/syz_patch/*.patch 37 | ``` 38 | 39 | build syzakller as usual. Add the following line to configure file: 40 | 41 | ``` 42 | "kstate_filter": { 43 | "files": ["which_source_file_to_track", "base_on_filename_crc"], 44 | "states": ["which_struct_field_to_track", "base_on_struct_field_crc"] 45 | }, 46 | ``` 47 | 48 | You can use our tool [kstate_map](../static_analysis_tools/IRParser/kstate_map.cpp) get the kernel state map. run: 49 | 50 | ``` 51 | clang++-10 kstate_map.cpp -o kstate_map -O0 -g -fsanitize=address `llvm-config-10 --cxxflags --libs --ldflags --system-libs` 52 | ./kstate_map LLVM_IR_DIR ASM_DIR VMLINUX FUNCTION_LIST LOG_DIR 53 | ``` 54 | 55 | FUNCTION_LIST has the functions name we need to get their addresses. 56 | IR_DIR: directory all the LLVM ir code we need. 57 | LOG_DIR: after run the command, kstate_map will creat a "*.json" and a "*.state.map" for every function. 58 | Write the output to PATH_TO_KERNEL_STATE.map. And run patched syzkaller as usual. This map assigns weight base on the frequency of state using. 59 | 60 | ## Kernel state base fuzzer 61 | Now, you can run syzkaller as usual, and you can find there is a list of kernel states if you access a "\input" interface. You can also get states weight of every prog in "/corpus" interface. 62 | 63 | ## Implement detail of kernel state resource 64 | 65 | ### Kernel instrument 66 | 67 | We reuse the KCOV interface instead of using a separate mode. So, we encode the state id with 0xefe at the highest 12-bit. While syzkaller gets a kcov pc started with 0xefe, it realizes this pc is a kstate id and the value and address of the state will occupy the followed 2*64-bit. No matter how many bit the variable is, we formalize to 64-bit. Noted if you want to collect other information, you have to implement a corresponding syzkaller for it. 68 | 69 | ### Syzkaller support 70 | 71 | #### executor 72 | 73 | syz-executor have to pick out kernel states and send them out after all signal was sent. These handling can be found in our patch for executor.cc function write_coverage_signl. While executor read a pc started with 0xfefe, that means it receives a kernel state. And we use a chunk of shared memory for this state after coverage signal shared memory. syz-fuzzer will handle them later. 74 | 75 | #### syz-fuzzer 76 | 77 | Correspondingly, parseOutput in pkg/ipc.go is called by fuzzer and we add a readKernState for parse the executor output. And these kernel states information will be put into a structure called KernState in pkg/kstate/kstate.go. Every input from executor has an array for kernstate, and every prog has a state weight calculated from kernstates. Also, KernState support searching the map by its ID or ID^Value which called their hash. 78 | 79 | syz-fuzzer/fuzzer.go: calStateWeight will calculate the resouce weight of a prog. Minus count for eliminating the influence of the length of kstate. prog/rand.go: chooseReaProgramIdx function implement a prior choice of prog base on its states weight 80 | 81 | ## Kernel state guide fuzzing practice 82 | 83 | We have explored two ways in assigning weight to resources. 84 | 85 | #### Get frequency of using kernel state 86 | 87 | This tool is what we mentioned above kstate_map. We use LLVM api static analyze the using of states in target functions. Without any awareness of the value of a state, it just encourages fuzzer to preferentially choose and extract those progs that frequently rewrite important states. In other words, the prog has complex states. 88 | 89 | #### Specify kernel state value weight 90 | 91 | We use a [clang checker](../static_analysis_tools/ConditionChecker/) to get symbolic information of condition constraint: 92 | 93 | ``` 94 | clang -Xclang -analyze -Xclang -analyzer-checker=debug.ConditionChecker ...... -c -o *.o *.c 95 | ``` 96 | 97 | You can get some constraint value of variables. And patched syzkaller support a hash mode, if a ID^value can be found in the kstate map, use it as a unique state. So, you can specify a weight for a state with special value. Now, it can be specified in kstatemap manually only. 98 | -------------------------------------------------------------------------------- /syzkaller/multi_policy/0002-Add-coverage-filter.patch: -------------------------------------------------------------------------------- 1 | From aea85a5230e05eb01657437228d1f9dab5e8061d Mon Sep 17 00:00:00 2001 2 | From: Bins94 3 | Date: Sun, 24 Nov 2019 21:41:44 -0500 4 | Subject: [PATCH 2/6] Add coverage filter 5 | 6 | --- 7 | executor/cov_filter.h | 40 ++++++++++++++++++++++++++++++++++++++++ 8 | executor/executor.cc | 12 ++++++++++++ 9 | 2 files changed, 52 insertions(+) 10 | create mode 100644 executor/cov_filter.h 11 | 12 | diff --git a/executor/cov_filter.h b/executor/cov_filter.h 13 | new file mode 100644 14 | index 00000000..726db4b3 15 | --- /dev/null 16 | +++ b/executor/cov_filter.h 17 | @@ -0,0 +1,40 @@ 18 | +typedef unsigned int uint32; 19 | + 20 | +struct code_region { 21 | + uint32 start; 22 | + uint32 end; 23 | +}; 24 | + 25 | +/* Address of kernel function for filtering coverage signal */ 26 | +static struct code_region white_list[] = { 27 | + // do_mmap 28 | + {.start = 0x81757755, .end = 0x81758690}, 29 | + //tcp_v6_init_sock 30 | + {.start = 0x8294f438, .end = 0x8294f438}, 31 | + //tcp_v6_connect 32 | + {.start = 0x82954272, .end = 0x829608c3}, 33 | + //tcp_sendmsg_locked 34 | + {.start = 0x8269ced5, .end = 0x826a06b4}, 35 | + //tcp_recvmsg 36 | + {.start = 0x826912ae, .end = 0x826941c1}, 37 | + //tcp_close 38 | + {.start = 0x826a1053, .end = 0x826a1e4f}, 39 | + //tcp_shutdown 40 | + {.start = 0x8268d7d0, .end = 0x8268d8ad}, 41 | + //do_tcp_setsockopt 42 | + {.start = 0x82697629, .end = 0x82699833}, 43 | + //do_tcp_getsockopt 44 | + {.start = 0x8268da50, .end = 0x82690af4}, 45 | + //inet_accept 46 | + {.start = 0x8277b5f0, .end = 0x8277bb82}, 47 | + //inet_listen 48 | + {.start = 0x8277ccde, .end = 0x8277d293}, 49 | + //tcp_ioctl 50 | + {.start = 0x82699e2a, .end = 0x8269a444}, 51 | + //__inet6_bind 52 | + {.start = 0x82869017, .end = 0x8286a8be}, 53 | + //inet6_getname 54 | + {.start = 0x828658df, .end = 0x82865e9d}, 55 | + //inet6_ioctl 56 | + {.start = 0x82866292, .end = 0x82866461}, 57 | +}; 58 | diff --git a/executor/executor.cc b/executor/executor.cc 59 | index 21373a4c..c9cc34a4 100644 60 | --- a/executor/executor.cc 61 | +++ b/executor/executor.cc 62 | @@ -16,6 +16,7 @@ 63 | #include 64 | 65 | #include "defs.h" 66 | +#include "cov_filter.h" 67 | 68 | #if defined(__GNUC__) 69 | #define SYSCALLAPI 70 | @@ -870,6 +871,15 @@ thread_t* schedule_call(int call_index, int call_num, bool colliding, uint64 cop 71 | } 72 | 73 | #if SYZ_EXECUTOR_USES_SHMEM 74 | +bool cover_filter(uint32 pc) 75 | +{ 76 | + for (uint32 i = 0; i < (sizeof(white_list) / sizeof(uint64)); i++) { 77 | + if ((pc >= white_list[i].start) && (pc <= white_list[i].end)) 78 | + return true; 79 | + } 80 | + return false; 81 | +} 82 | + 83 | template 84 | void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover_count_pos, int monpipe) 85 | { 86 | @@ -911,6 +921,8 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover 87 | prev = hash(pc); 88 | if (dedup(sig)) 89 | continue; 90 | + if (!cover_filter(pc)) 91 | + continue; 92 | sig |= 0xd000000000000000; 93 | write_output64(sig & COVERAGE_SIG_MASK); 94 | nsig++; 95 | -- 96 | 2.20.1 97 | 98 | -------------------------------------------------------------------------------- /syzkaller/multi_policy/0003-Add-manager-configure-for-coverage-filter-and-ebpf-f.patch: -------------------------------------------------------------------------------- 1 | From 6f2595c08f2bc164fb18ab33ab5d3ce5c2ca8d1c Mon Sep 17 00:00:00 2001 2 | From: Bins94 3 | Date: Sun, 24 Nov 2019 21:57:42 -0500 4 | Subject: [PATCH 3/6] Add manager configure for coverage filter and ebpf 5 | feedback 6 | 7 | --- 8 | executor/executor.cc | 16 +++++++++++----- 9 | pkg/instance/instance.go | 12 ++++++------ 10 | pkg/ipc/ipc.go | 2 ++ 11 | pkg/mgrconfig/config.go | 6 ++++++ 12 | pkg/mgrconfig/load.go | 2 ++ 13 | syz-fuzzer/fuzzer.go | 8 ++++++++ 14 | syz-manager/manager.go | 2 +- 15 | tools/syz-runtest/runtest.go | 2 +- 16 | 8 files changed, 37 insertions(+), 13 deletions(-) 17 | 18 | diff --git a/executor/executor.cc b/executor/executor.cc 19 | index c9cc34a4..28f0bdb1 100644 20 | --- a/executor/executor.cc 21 | +++ b/executor/executor.cc 22 | @@ -125,6 +125,8 @@ static bool flag_enable_net_dev; 23 | static bool flag_enable_net_reset; 24 | static bool flag_enable_cgroups; 25 | static bool flag_enable_close_fds; 26 | +static bool flag_enable_cover_filter; 27 | +static bool flag_enable_ebpf_signal; 28 | static bool flag_enable_devlink_pci; 29 | 30 | static bool flag_collect_cover; 31 | @@ -491,6 +493,8 @@ void parse_env_flags(uint64 flags) 32 | flag_enable_cgroups = flags & (1 << 9); 33 | flag_enable_close_fds = flags & (1 << 10); 34 | flag_enable_devlink_pci = flags & (1 << 11); 35 | + flag_enable_cover_filter = flags & (1 << 15); 36 | + flag_enable_ebpf_signal = flags & (1 << 16); 37 | } 38 | 39 | #if SYZ_EXECUTOR_USES_FORK_SERVER 40 | @@ -904,10 +908,12 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover 41 | /* state signal start with 0xf 42 | * Write out real state for fuzzer's further handle 43 | */ 44 | - write_output64(state); 45 | - debug("A state signal %016lx\n", state); 46 | - n = 0; 47 | - nsig++; 48 | + if (flag_enable_ebpf_signal) { 49 | + write_output64(state); 50 | + debug("A state signal %016lx\n", state); 51 | + n = 0; 52 | + nsig++; 53 | + } 54 | } 55 | n++; 56 | } 57 | @@ -921,7 +927,7 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover 58 | prev = hash(pc); 59 | if (dedup(sig)) 60 | continue; 61 | - if (!cover_filter(pc)) 62 | + if (flag_enable_cover_filter && !cover_filter(pc)) 63 | continue; 64 | sig |= 0xd000000000000000; 65 | write_output64(sig & COVERAGE_SIG_MASK); 66 | diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go 67 | index b3e24aac..d73e7174 100644 68 | --- a/pkg/instance/instance.go 69 | +++ b/pkg/instance/instance.go 70 | @@ -299,7 +299,7 @@ func (inst *inst) testInstance() error { 71 | } 72 | 73 | cmd := OldFuzzerCmd(fuzzerBin, executorBin, "test", inst.cfg.TargetOS, inst.cfg.TargetArch, fwdAddr, 74 | - inst.cfg.Sandbox, 0, inst.cfg.Cover, true) 75 | + inst.cfg.Sandbox, 0, inst.cfg.Cover, true, false, false) 76 | outc, errc, err := inst.vm.Run(10*time.Minute, nil, cmd) 77 | if err != nil { 78 | return fmt.Errorf("failed to run binary in VM: %v", err) 79 | @@ -398,7 +398,7 @@ func (inst *inst) testProgram(command string, testTime time.Duration) error { 80 | } 81 | 82 | func FuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox string, procs, verbosity int, 83 | - cover, debug, test, runtest bool) string { 84 | + cover, debug, test, runtest bool, covfilter bool, ebpfsig bool) string { 85 | osArg := "" 86 | switch OS { 87 | case "akaros", "fuchsia": 88 | @@ -416,13 +416,13 @@ func FuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox string, procs, 89 | verbosityArg = fmt.Sprintf(" -vv=%v", verbosity) 90 | } 91 | return fmt.Sprintf("%v -executor=%v -name=%v -arch=%v%v -manager=%v -sandbox=%v"+ 92 | - " -procs=%v -cover=%v -debug=%v -test=%v%v%v", 93 | + " -procs=%v -cover=%v -debug=%v -test=%v%v%v -covfilter=%v -ebpfsig=%v", 94 | fuzzer, executor, name, arch, osArg, fwdAddr, sandbox, 95 | - procs, cover, debug, test, runtestArg, verbosityArg) 96 | + procs, cover, debug, test, runtestArg, verbosityArg, covfilter, ebpfsig) 97 | } 98 | 99 | -func OldFuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox string, procs int, cover, test bool) string { 100 | - return FuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox, procs, 0, cover, false, test, false) 101 | +func OldFuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox string, procs int, cover, test bool, covfilter bool, ebpfsig bool) string { 102 | + return FuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox, procs, 0, cover, false, test, false, covfilter, ebpfsig) 103 | } 104 | 105 | func ExecprogCmd(execprog, executor, OS, arch, sandbox string, repeat, threaded, collide bool, 106 | diff --git a/pkg/ipc/ipc.go b/pkg/ipc/ipc.go 107 | index c8dadaa7..2a05696c 100644 108 | --- a/pkg/ipc/ipc.go 109 | +++ b/pkg/ipc/ipc.go 110 | @@ -41,6 +41,8 @@ const ( 111 | // Executor does not know about these: 112 | FlagUseShmem // use shared memory instead of pipes for communication 113 | FlagUseForkServer // use extended protocol with handshake 114 | + FlagCoverFilter 115 | + FlagEbpfSignal 116 | ) 117 | 118 | // Per-exec flags for ExecOpts.Flags: 119 | diff --git a/pkg/mgrconfig/config.go b/pkg/mgrconfig/config.go 120 | index ea03c429..70604d1e 100644 121 | --- a/pkg/mgrconfig/config.go 122 | +++ b/pkg/mgrconfig/config.go 123 | @@ -68,7 +68,13 @@ type Config struct { 124 | 125 | // Use KCOV coverage (default: true). 126 | Cover bool `json:"cover"` 127 | + 128 | + /* Use coverage filter */ 129 | + Covfilter bool `json:"covfilter"` 130 | + /* Use ebpf feedback */ 131 | + Ebpfsig bool `json:"ebpfsig"` 132 | // Reproduce, localize and minimize crashers (default: true). 133 | + 134 | Reproduce bool `json:"reproduce"` 135 | 136 | // List of syscalls to test (optional). For example: 137 | diff --git a/pkg/mgrconfig/load.go b/pkg/mgrconfig/load.go 138 | index f02f3c59..c11ee7f7 100644 139 | --- a/pkg/mgrconfig/load.go 140 | +++ b/pkg/mgrconfig/load.go 141 | @@ -58,6 +58,8 @@ func defaultValues() *Config { 142 | return &Config{ 143 | SSHUser: "root", 144 | Cover: true, 145 | + Ebpfsig: false, 146 | + Covfilter: false, 147 | Reproduce: true, 148 | Sandbox: "none", 149 | RPC: ":0", 150 | diff --git a/syz-fuzzer/fuzzer.go b/syz-fuzzer/fuzzer.go 151 | index 5cc7df89..57ce2107 100644 152 | --- a/syz-fuzzer/fuzzer.go 153 | +++ b/syz-fuzzer/fuzzer.go 154 | @@ -115,6 +115,8 @@ func main() { 155 | flagPprof = flag.String("pprof", "", "address to serve pprof profiles") 156 | flagTest = flag.Bool("test", false, "enable image testing mode") // used by syz-ci 157 | flagRunTest = flag.Bool("runtest", false, "enable program testing mode") // used by pkg/runtest 158 | + flagCovFilter = flag.Bool("covfilter", false, "enable coverage filter") 159 | + flagEbpfSig = flag.Bool("ebpfsig", false, "enable ebpf feedback") 160 | ) 161 | flag.Parse() 162 | outputType := parseOutputType(*flagOutput) 163 | @@ -130,6 +132,12 @@ func main() { 164 | log.Fatalf("failed to create default ipc config: %v", err) 165 | } 166 | sandbox := ipc.FlagsToSandbox(config.Flags) 167 | + if *flagCovFilter { 168 | + config.Flags |= (1 << 15) 169 | + } 170 | + if *flagEbpfSig { 171 | + config.Flags |= (1 << 16) 172 | + } 173 | shutdown := make(chan struct{}) 174 | osutil.HandleInterrupts(shutdown) 175 | go func() { 176 | diff --git a/syz-manager/manager.go b/syz-manager/manager.go 177 | index 0aaee782..2af16686 100644 178 | --- a/syz-manager/manager.go 179 | +++ b/syz-manager/manager.go 180 | @@ -548,7 +548,7 @@ func (mgr *Manager) runInstance(index int) (*Crash, error) { 181 | defer atomic.AddUint32(&mgr.numFuzzing, ^uint32(0)) 182 | cmd := instance.FuzzerCmd(fuzzerBin, executorBin, fmt.Sprintf("vm-%v", index), 183 | mgr.cfg.TargetOS, mgr.cfg.TargetArch, fwdAddr, mgr.cfg.Sandbox, procs, fuzzerV, 184 | - mgr.cfg.Cover, *flagDebug, false, false) 185 | + mgr.cfg.Cover, *flagDebug, false, false, mgr.cfg.Covfilter, mgr.cfg.Ebpfsig) 186 | outc, errc, err := inst.Run(time.Hour, mgr.vmStop, cmd) 187 | if err != nil { 188 | return nil, fmt.Errorf("failed to run fuzzer: %v", err) 189 | diff --git a/tools/syz-runtest/runtest.go b/tools/syz-runtest/runtest.go 190 | index 538646a2..167ac7d8 100644 191 | --- a/tools/syz-runtest/runtest.go 192 | +++ b/tools/syz-runtest/runtest.go 193 | @@ -175,7 +175,7 @@ func (mgr *Manager) boot(name string, index int) (*report.Report, error) { 194 | } 195 | cmd := instance.FuzzerCmd(fuzzerBin, executorBin, name, 196 | mgr.cfg.TargetOS, mgr.cfg.TargetArch, fwdAddr, mgr.cfg.Sandbox, mgr.cfg.Procs, 0, 197 | - mgr.cfg.Cover, mgr.debug, false, true) 198 | + mgr.cfg.Cover, mgr.debug, false, true, false, false) 199 | outc, errc, err := inst.Run(time.Hour, mgr.vmStop, cmd) 200 | if err != nil { 201 | return nil, fmt.Errorf("failed to run fuzzer: %v", err) 202 | -- 203 | 2.20.1 204 | 205 | -------------------------------------------------------------------------------- /syzkaller/multi_policy/0004-Make-the-download-sync-time-configurable.patch: -------------------------------------------------------------------------------- 1 | From acd4230012fe6b950ae8eeeeb2f5e100ff0018dc Mon Sep 17 00:00:00 2001 2 | From: Bins94 3 | Date: Sun, 24 Nov 2019 22:06:08 -0500 4 | Subject: [PATCH 4/6] Make the download sync time configurable 5 | 6 | --- 7 | pkg/mgrconfig/config.go | 8 +++++--- 8 | pkg/mgrconfig/load.go | 17 +++++++++-------- 9 | pkg/rpctype/rpctype.go | 1 + 10 | syz-hub/hub.go | 27 ++++++++++++++++++++------- 11 | syz-hub/state/state.go | 21 +++++++++++++++++++++ 12 | syz-manager/html.go | 1 + 13 | syz-manager/hub.go | 21 ++++++++++++++------- 14 | syz-manager/manager.go | 7 +++++++ 15 | 8 files changed, 78 insertions(+), 25 deletions(-) 16 | 17 | diff --git a/pkg/mgrconfig/config.go b/pkg/mgrconfig/config.go 18 | index 70604d1e..0316d835 100644 19 | --- a/pkg/mgrconfig/config.go 20 | +++ b/pkg/mgrconfig/config.go 21 | @@ -36,9 +36,11 @@ type Config struct { 22 | // SSH user ("root" by default). 23 | SSHUser string `json:"ssh_user,omitempty"` 24 | 25 | - HubClient string `json:"hub_client,omitempty"` 26 | - HubAddr string `json:"hub_addr,omitempty"` 27 | - HubKey string `json:"hub_key,omitempty"` 28 | + HubClient string `json:"hub_client,omitempty"` 29 | + HubAddr string `json:"hub_addr,omitempty"` 30 | + HubKey string `json:"hub_key,omitempty"` 31 | + HubSyncTime int `json:"hub_synctime"` 32 | + 33 | 34 | // List of email addresses to receive notifications when bugs are encountered for the first time (optional). 35 | // Mailx is the only supported mailer. Please set it up prior to using this function. 36 | diff --git a/pkg/mgrconfig/load.go b/pkg/mgrconfig/load.go 37 | index c11ee7f7..f71e740d 100644 38 | --- a/pkg/mgrconfig/load.go 39 | +++ b/pkg/mgrconfig/load.go 40 | @@ -56,14 +56,15 @@ func LoadPartialFile(filename string) (*Config, error) { 41 | 42 | func defaultValues() *Config { 43 | return &Config{ 44 | - SSHUser: "root", 45 | - Cover: true, 46 | - Ebpfsig: false, 47 | - Covfilter: false, 48 | - Reproduce: true, 49 | - Sandbox: "none", 50 | - RPC: ":0", 51 | - Procs: 1, 52 | + SSHUser: "root", 53 | + Cover: true, 54 | + Ebpfsig: false, 55 | + Covfilter: false, 56 | + Reproduce: true, 57 | + Sandbox: "none", 58 | + RPC: ":0", 59 | + HubSyncTime: 1, 60 | + Procs: 1, 61 | } 62 | } 63 | 64 | diff --git a/pkg/rpctype/rpctype.go b/pkg/rpctype/rpctype.go 65 | index fcc01a99..dc8c6892 100644 66 | --- a/pkg/rpctype/rpctype.go 67 | +++ b/pkg/rpctype/rpctype.go 68 | @@ -96,6 +96,7 @@ type HubSyncArgs struct { 69 | Del []string 70 | // Repros found since last sync. 71 | Repros [][]byte 72 | + NeedCorpus bool 73 | } 74 | 75 | type HubSyncRes struct { 76 | diff --git a/syz-hub/hub.go b/syz-hub/hub.go 77 | index db8dd506..ab7adc52 100644 78 | --- a/syz-hub/hub.go 79 | +++ b/syz-hub/hub.go 80 | @@ -90,13 +90,26 @@ func (hub *Hub) Sync(a *rpctype.HubSyncArgs, r *rpctype.HubSyncRes) error { 81 | hub.mu.Lock() 82 | defer hub.mu.Unlock() 83 | 84 | - progs, more, err := hub.st.Sync(name, a.Add, a.Del) 85 | - if err != nil { 86 | - log.Logf(0, "sync error: %v", err) 87 | - return err 88 | + if a.NeedCorpus { 89 | + log.Logf(0, "Need corpus sync") 90 | + progs, more, err := hub.st.Sync(name, a.Add, a.Del) 91 | + if err != nil { 92 | + log.Logf(0, "sync error: %v", err) 93 | + return err 94 | + } 95 | + r.Progs = progs 96 | + r.More = more 97 | + } else { 98 | + log.Logf(0, "Send progs sync") 99 | + err := hub.st.SyncUpOnly(name, a.Add, a.Del) 100 | + if err != nil { 101 | + log.Logf(0, "sync up error: %v", err) 102 | + return err 103 | + } 104 | + r.Progs = nil 105 | + r.More = 0 106 | } 107 | - r.Progs = progs 108 | - r.More = more 109 | + 110 | for _, repro := range a.Repros { 111 | if err := hub.st.AddRepro(name, repro); err != nil { 112 | log.Logf(0, "add repro error: %v", err) 113 | @@ -112,7 +125,7 @@ func (hub *Hub) Sync(a *rpctype.HubSyncArgs, r *rpctype.HubSyncRes) error { 114 | } 115 | } 116 | log.Logf(0, "sync from %v: recv: add=%v del=%v repros=%v; send: progs=%v repros=%v pending=%v", 117 | - name, len(a.Add), len(a.Del), len(a.Repros), len(r.Progs), len(r.Repros), more) 118 | + name, len(a.Add), len(a.Del), len(a.Repros), len(r.Progs), len(r.Repros), r.More) 119 | return nil 120 | } 121 | 122 | diff --git a/syz-hub/state/state.go b/syz-hub/state/state.go 123 | index 7ccdf182..c238019c 100644 124 | --- a/syz-hub/state/state.go 125 | +++ b/syz-hub/state/state.go 126 | @@ -197,6 +197,27 @@ func (st *State) Sync(name string, add [][]byte, del []string) ([][]byte, int, e 127 | return progs, more, err 128 | } 129 | 130 | +func (st *State) SyncUpOnly(name string, add [][]byte, del []string) error { 131 | + mgr := st.Managers[name] 132 | + if mgr == nil || mgr.Connected.IsZero() { 133 | + return fmt.Errorf("unconnected manager %v", name) 134 | + } 135 | + if len(del) != 0 { 136 | + for _, sig := range del { 137 | + mgr.Corpus.Delete(sig) 138 | + } 139 | + if err := mgr.Corpus.Flush(); err != nil { 140 | + log.Logf(0, "failed to flush corpus database: %v", err) 141 | + } 142 | + st.purgeCorpus() 143 | + } 144 | + st.addInputs(mgr, add) 145 | + mgr.Added += len(add) 146 | + mgr.Deleted += len(del) 147 | + return nil 148 | +} 149 | + 150 | + 151 | func (st *State) AddRepro(name string, repro []byte) error { 152 | mgr := st.Managers[name] 153 | if mgr == nil || mgr.Connected.IsZero() { 154 | diff --git a/syz-manager/html.go b/syz-manager/html.go 155 | index 4a0a1fbc..e616898c 100644 156 | --- a/syz-manager/html.go 157 | +++ b/syz-manager/html.go 158 | @@ -124,6 +124,7 @@ func (mgr *Manager) collectStats() []UIStat { 159 | {Name: "uptime", Value: fmt.Sprint(time.Since(mgr.startTime) / 1e9 * 1e9)}, 160 | {Name: "fuzzing", Value: fmt.Sprint(mgr.fuzzingTime / 60e9 * 60e9)}, 161 | {Name: "corpus", Value: fmt.Sprint(len(mgr.corpus)), Link: "/corpus"}, 162 | + {Name: "last input", Value: fmt.Sprint(time.Since(mgr.lastInputTime) / 1e9 * 1e9)}, 163 | {Name: "triage queue", Value: fmt.Sprint(len(mgr.candidates))}, 164 | {Name: "cover", Value: fmt.Sprint(rawStats["cover"]), Link: "/cover"}, 165 | {Name: "signal", Value: fmt.Sprint(rawStats["signal"])}, 166 | diff --git a/syz-manager/hub.go b/syz-manager/hub.go 167 | index 5f85c8fe..a3c3716f 100644 168 | --- a/syz-manager/hub.go 169 | +++ b/syz-manager/hub.go 170 | @@ -48,14 +48,20 @@ type HubConnector struct { 171 | 172 | // HubManagerView restricts interface between HubConnector and Manager. 173 | type HubManagerView interface { 174 | + getNoInputTime() time.Duration 175 | getMinimizedCorpus() (corpus, repros [][]byte) 176 | addNewCandidates(progs [][]byte) 177 | } 178 | 179 | func (hc *HubConnector) loop() { 180 | + noInput := false 181 | var hub *rpctype.RPCClient 182 | for { 183 | time.Sleep(time.Minute) 184 | + noCovTime := hc.mgr.getNoInputTime() 185 | + if noCovTime > time.Duration(hc.cfg.HubSyncTime)*time.Minute { 186 | + noInput = true 187 | + } 188 | corpus, repros := hc.mgr.getMinimizedCorpus() 189 | hc.newRepros = append(hc.newRepros, repros...) 190 | if hub == nil { 191 | @@ -66,7 +72,7 @@ func (hc *HubConnector) loop() { 192 | } 193 | log.Logf(0, "connected to hub at %v, corpus %v", hc.cfg.HubAddr, len(corpus)) 194 | } 195 | - if err := hc.sync(hub, corpus); err != nil { 196 | + if err := hc.sync(hub, corpus, noInput); err != nil { 197 | log.Logf(0, "hub sync failed: %v", err) 198 | hub.Close() 199 | hub = nil 200 | @@ -103,11 +109,12 @@ func (hc *HubConnector) connect(corpus [][]byte) (*rpctype.RPCClient, error) { 201 | return hub, nil 202 | } 203 | 204 | -func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte) error { 205 | +func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte, noInput bool) error { 206 | a := &rpctype.HubSyncArgs{ 207 | - Client: hc.cfg.HubClient, 208 | - Key: hc.cfg.HubKey, 209 | - Manager: hc.cfg.Name, 210 | + Client: hc.cfg.HubClient, 211 | + Key: hc.cfg.HubKey, 212 | + Manager: hc.cfg.Name, 213 | + NeedCorpus: noInput, 214 | } 215 | sigs := make(map[hash.Sig]bool) 216 | for _, inp := range corpus { 217 | @@ -147,9 +154,9 @@ func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte) error { 218 | hc.stats.hubRecvRepro.add(len(r.Repros) - reproDropped) 219 | hc.stats.hubRecvReproDrop.add(reproDropped) 220 | log.Logf(0, "hub sync: send: add %v, del %v, repros %v;"+ 221 | - " recv: progs %v, repros %v; more %v", 222 | + " recv: progs %v, repros %v; more %v; need corpus: %v", 223 | len(a.Add), len(a.Del), len(a.Repros), 224 | - len(r.Progs)-progDropped, len(r.Repros)-reproDropped, r.More) 225 | + len(r.Progs)-progDropped, len(r.Repros)-reproDropped, r.More, noInput) 226 | a.Add = nil 227 | a.Del = nil 228 | a.Repros = nil 229 | diff --git a/syz-manager/manager.go b/syz-manager/manager.go 230 | index 2af16686..46aa0228 100644 231 | --- a/syz-manager/manager.go 232 | +++ b/syz-manager/manager.go 233 | @@ -56,6 +56,7 @@ type Manager struct { 234 | startTime time.Time 235 | firstConnect time.Time 236 | fuzzingTime time.Duration 237 | + lastInputTime time.Time 238 | stats *Stats 239 | crashTypes map[string]bool 240 | vmStop chan bool 241 | @@ -164,6 +165,7 @@ func RunManager(cfg *mgrconfig.Config, target *prog.Target, sysTarget *targets.T 242 | reporter: reporter, 243 | crashdir: crashdir, 244 | startTime: time.Now(), 245 | + lastInputTime: time.Now(), 246 | stats: new(Stats), 247 | crashTypes: make(map[string]bool), 248 | enabledSyscalls: syscalls, 249 | @@ -838,6 +840,10 @@ func saveReproStats(filename string, stats *repro.Stats) { 250 | osutil.WriteFile(filename, []byte(text)) 251 | } 252 | 253 | +func (mgr *Manager) getNoInputTime() time.Duration { 254 | + return time.Now().Sub(mgr.lastInputTime) 255 | +} 256 | + 257 | func (mgr *Manager) getMinimizedCorpus() (corpus, repros [][]byte) { 258 | mgr.mu.Lock() 259 | defer mgr.mu.Unlock() 260 | @@ -954,6 +960,7 @@ func (mgr *Manager) machineChecked(a *rpctype.CheckArgs) { 261 | func (mgr *Manager) newInput(inp rpctype.RPCInput, sign signal.Signal) { 262 | mgr.mu.Lock() 263 | defer mgr.mu.Unlock() 264 | + mgr.lastInputTime = time.Now() 265 | sig := hash.String(inp.Prog) 266 | if old, ok := mgr.corpus[sig]; ok { 267 | // The input is already present, but possibly with diffent signal/coverage/call. 268 | -- 269 | 2.20.1 270 | 271 | -------------------------------------------------------------------------------- /syzkaller/multi_policy/0005-Add-ret-ebpfsig-as-resource.patch: -------------------------------------------------------------------------------- 1 | From 9101f2871aa729cc9dd8812aec6c37f9b642fc1c Mon Sep 17 00:00:00 2001 2 | From: Bins94 3 | Date: Sun, 24 Nov 2019 22:19:09 -0500 4 | Subject: [PATCH 5/6] Add ret ebpfsig as resource 5 | 6 | --- 7 | executor/executor.cc | 8 ++++---- 8 | pkg/rpctype/rpctype.go | 1 + 9 | prog/clone.go | 5 +++-- 10 | prog/rand.go | 2 +- 11 | syz-fuzzer/proc.go | 15 +++++++++++++++ 12 | syz-manager/html.go | 4 ++++ 13 | 6 files changed, 28 insertions(+), 7 deletions(-) 14 | 15 | diff --git a/executor/executor.cc b/executor/executor.cc 16 | index 28f0bdb1..fdff884a 100644 17 | --- a/executor/executor.cc 18 | +++ b/executor/executor.cc 19 | @@ -36,8 +36,8 @@ 20 | #endif 21 | 22 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 23 | -#define RETSTATE_SIG_MASK 0xf000000000000000 24 | -#define STATE_SIG_MASK 0xe000000000000000 25 | +/*#define RETSTATE_SIG_MASK 0xf000000000000000*/ 26 | +/*#define STATE_SIG_MASK 0xe000000000000000*/ 27 | #define COVERAGE_SIG_MASK 0xd0000000ffffffff 28 | 29 | 30 | @@ -905,12 +905,12 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover 31 | uint64_t state = 0; 32 | if (ret > 0) { 33 | state = strtol(buf, NULL, 16); 34 | - /* state signal start with 0xf 35 | + /* state/retstate signal start with 0xe/0xf 36 | * Write out real state for fuzzer's further handle 37 | */ 38 | if (flag_enable_ebpf_signal) { 39 | write_output64(state); 40 | - debug("A state signal %016lx\n", state); 41 | + debug("A state signal %016lx\n", (uint64)state); 42 | n = 0; 43 | nsig++; 44 | } 45 | diff --git a/pkg/rpctype/rpctype.go b/pkg/rpctype/rpctype.go 46 | index dc8c6892..357f0e96 100644 47 | --- a/pkg/rpctype/rpctype.go 48 | +++ b/pkg/rpctype/rpctype.go 49 | @@ -16,6 +16,7 @@ type RPCInput struct { 50 | Prog []byte 51 | Signal signal.Serial 52 | Cover []uint32 53 | + SpeRes bool 54 | } 55 | 56 | type RPCCandidate struct { 57 | diff --git a/prog/clone.go b/prog/clone.go 58 | index 5ad82c35..855ab09b 100644 59 | --- a/prog/clone.go 60 | +++ b/prog/clone.go 61 | @@ -5,8 +5,9 @@ package prog 62 | 63 | func (p *Prog) Clone() *Prog { 64 | p1 := &Prog{ 65 | - Target: p.Target, 66 | - Calls: make([]*Call, len(p.Calls)), 67 | + Target: p.Target, 68 | + Calls: make([]*Call, len(p.Calls)), 69 | + Resource: p.Resource, 70 | } 71 | newargs := make(map[*ResultArg]*ResultArg) 72 | for ci, c := range p.Calls { 73 | diff --git a/prog/rand.go b/prog/rand.go 74 | index 70a93687..6ade7dc7 100644 75 | --- a/prog/rand.go 76 | +++ b/prog/rand.go 77 | @@ -816,7 +816,7 @@ func resourceCentric(t *ResourceType, s *state, r *randGen) (resource *ResultArg 78 | for idx := range r.Perm(len(s.corpus)) { 79 | p = s.corpus[idx].Clone() 80 | resources := getCompatibleResources(p, t.TypeName, r) 81 | - if len(resources) > 0 { 82 | + if len(resources) > 0 && p.Resource { 83 | resource = resources[r.Intn(len(resources))] 84 | break 85 | } 86 | diff --git a/syz-fuzzer/proc.go b/syz-fuzzer/proc.go 87 | index 98deebb9..a38df022 100644 88 | --- a/syz-fuzzer/proc.go 89 | +++ b/syz-fuzzer/proc.go 90 | @@ -166,6 +166,12 @@ func (proc *Proc) triageInput(item *WorkTriage) { 91 | data := item.p.Serialize() 92 | sig := hash.Hash(data) 93 | 94 | + item.p.Resource = true 95 | + /* ebpfsig feedback */ 96 | + if proc.fuzzer.config.Flags&(1<<14) > 0 { 97 | + item.p.Resource = getResourceFlagFromSignal(item.info.Signal) 98 | + } 99 | + 100 | log.Logf(2, "added new input for %v to corpus:\n%s", logCallName, data) 101 | proc.fuzzer.sendInputToManager(rpctype.RPCInput{ 102 | Call: callName, 103 | @@ -204,6 +210,15 @@ func getSignalAndCover(p *prog.Prog, info *ipc.ProgInfo, call int) (signal.Signa 104 | return signal.FromRaw(inf.Signal, signalPrio(p, inf, call)), inf.Cover 105 | } 106 | 107 | +func getResourceFlagFromSignal(Signal []uint64) bool { 108 | + for _, sig := range Signal { 109 | + if sig&0xf000000000000000 == 0xf000000000000000 { 110 | + return true 111 | + } 112 | + } 113 | + return false 114 | +} 115 | + 116 | func (proc *Proc) smashInput(item *WorkSmash) { 117 | if proc.fuzzer.faultInjectionEnabled && item.call != -1 { 118 | proc.failCall(item.p, item.call) 119 | diff --git a/syz-manager/html.go b/syz-manager/html.go 120 | index e616898c..8c563ef7 100644 121 | --- a/syz-manager/html.go 122 | +++ b/syz-manager/html.go 123 | @@ -218,6 +218,7 @@ func (mgr *Manager) httpCorpus(w http.ResponseWriter, r *http.Request) { 124 | Short: p.String(), 125 | Cover: len(inp.Cover), 126 | Signal: inp.Signal.Deserialize(), 127 | + SpeRes: inp.SpeRes, 128 | }) 129 | } 130 | sort.Slice(data.Inputs, func(i, j int) bool { 131 | @@ -641,6 +642,7 @@ type UIInput struct { 132 | Short string 133 | Cover int 134 | Signal signal.Signal 135 | + SpeRes bool 136 | } 137 | 138 | var summaryTemplate = html.CreatePage(` 139 | @@ -786,11 +788,13 @@ var corpusTemplate = html.CreatePage(` 140 | 141 | Coverage 142 | Program 143 | + Resource 144 | 145 | {{range $inp := $.Inputs}} 146 | 147 | {{$inp.Cover}} 148 | {{$inp.Short}} 149 | + {{$inp.SpeRes}} 150 | 151 | {{end}} 152 | 153 | -- 154 | 2.20.1 155 | 156 | -------------------------------------------------------------------------------- /syzkaller/multi_policy/0006-Add-monitot-binary-option-to-manager-configure.patch: -------------------------------------------------------------------------------- 1 | From 1d290b1cb2333d931942cad3bda9bda447942f5d Mon Sep 17 00:00:00 2001 2 | From: Bins94 3 | Date: Sun, 24 Nov 2019 22:23:26 -0500 4 | Subject: [PATCH 6/6] Add monitot binary option to manager configure 5 | 6 | --- 7 | executor/common_linux.h | 2 +- 8 | pkg/mgrconfig/config.go | 1 + 9 | syz-manager/manager.go | 4 ++++ 10 | 3 files changed, 6 insertions(+), 1 deletion(-) 11 | 12 | diff --git a/executor/common_linux.h b/executor/common_linux.h 13 | index bc58d71b..0a60a54f 100644 14 | --- a/executor/common_linux.h 15 | +++ b/executor/common_linux.h 16 | @@ -2164,7 +2164,7 @@ static int do_sandbox_none(void) 17 | close(monpipefd[0]); 18 | close(monpipefd[1]); 19 | debug("single ebpf start ...\n"); 20 | - execl("/root/pipe_monitor", "/root/pipe_monitor", "--debug", NULL); 21 | + execl("/pipe_monitor", "/pipe_monitor", "--debug", NULL); 22 | return 0; 23 | } 24 | /* ebpf loading is very slow, one time a vm restart */ 25 | diff --git a/pkg/mgrconfig/config.go b/pkg/mgrconfig/config.go 26 | index 0316d835..637ea249 100644 27 | --- a/pkg/mgrconfig/config.go 28 | +++ b/pkg/mgrconfig/config.go 29 | @@ -107,4 +107,5 @@ type Config struct { 30 | SyzFuzzerBin string `json:"-"` 31 | SyzExecprogBin string `json:"-"` 32 | SyzExecutorBin string `json:"-"` 33 | + SyzMonitorBin string `json:"ebpfmonitor"` 34 | } 35 | diff --git a/syz-manager/manager.go b/syz-manager/manager.go 36 | index 46aa0228..aab55c7e 100644 37 | --- a/syz-manager/manager.go 38 | +++ b/syz-manager/manager.go 39 | @@ -536,6 +536,10 @@ func (mgr *Manager) runInstance(index int) (*Crash, error) { 40 | if err != nil { 41 | return nil, fmt.Errorf("failed to copy binary: %v", err) 42 | } 43 | + _, err = inst.Copy(mgr.cfg.SyzMonitorBin) 44 | + if err != nil { 45 | + return nil, fmt.Errorf("failed to copy binary: %v", err) 46 | + } 47 | 48 | fuzzerV := 0 49 | procs := mgr.cfg.Procs 50 | -- 51 | 2.20.1 52 | 53 | -------------------------------------------------------------------------------- /syzkaller/multi_policy/README.md: -------------------------------------------------------------------------------- 1 | # Multiple policy fuzzer( syz-hub) 2 | 3 | ## Original syz-hub 4 | Syz-hub is a great tool to connect all the syz-managers. After all syz-managers connect to syz-hub, Every syz-manager will exchange their whole corpus with each other. This is called "Sync" in syz-hub, the interval of "Sync" is one minute which you can see a time.Sleep() in syz-manager/hub.go:loop(). After "Sync", every manager will check if the received progs can hit more coverage. You can immediately see a great number of "triage queue" after "Sync". That means syz-managers with different configure can exchange progs with each other also. 5 | 6 | ## Policy of fuzzer 7 | ### Original syzkaller fuzzer 8 | Actrually, there are some mechanisms of syzkaller: 9 | 1. The feedback( coverage) of progs determin if it can be sent to corpus 10 | 2. Corpus will affect the progs generating( by mutating, syscall-choisetable, affect the probability) 11 | 3. Generated progs determin which feedback may be received. 12 | 13 | Syzkaller run these iteratively, and the feedback probabily determin where to fuzz. Original syzkaller use coverage of the whole kernel as feedback. So, syzkaller is a coverage-guided fuzzer of kernel. And the "coverage-guided" is what we called the policy of syzkaller. 14 | 15 | ### Faster or deeper fuzzer. 16 | We have some survey of different-policy syzkaller. It shows that there is several point can be optimize if you want a directed fuzzer. For example, only want to fuzz sub-system of kernel. The customizing of these can be list: 17 | 1. Limit the coverage to a smaller scope. Include building kernel with partial-coverage( KCOV_INSTRUMENT_ALL=n), filtering coverage( by address). 18 | 2. Add other feedback. For example, we use ebpf collect the state of socket as feedbeck. 19 | 3. Directed fail-injection help cover the corners shouldn't be covered. 20 | 21 | Both 1 and 2 change the feedback of syzkaller. 1 limit syzkaller to fuzz a smaller scope of kernel. 2 directly introduce other feedback into syzkaller. 22 | Our test shows that using these features properly can help syzkaller more directed, deeper and faster. 23 | 24 | ## Customize syz-hub 25 | ### Connect different policy syz-manager 26 | It could be useful if we connect syz-managers with different policy. Different syz-managers focus on different sub-system or different scope. 27 | For example, one of syz-managers fuzz the whole kernel, others fuzz several sub-system. It take less time to fuzz deeper corner( sub-system). And corpus can be sync to all manager( the whole kernel one). In other word, deeper or faster fuzzer can be sync to the widely and shallow fuzzer. 28 | 29 | ### Customize feature of syz-hub 30 | Original syz-hub do "Sync" one time a minute. We know the corpus will affect the progs generating, frequently sync will guide all syz-managers to fuzz the same scope of kerenl. Spliting the upload( send out progs) and download( receive progs form) of corpus sync shows a better performence. The upload always done while "Sync" was called, and download sync only done if there is no coverage after a long time. So what we need to do is: 31 | syz-hub: splite the upload and download of corpus sync. 32 | syz-manager: add option for configuring the time of sync. Only download the corpus if there is no any input after a long time. 33 | 34 | ## Patch and usage 35 | ### Patch 36 | These patch base on syz-0d1034: 37 | 1. Add ebpf feedback 38 | 2. Filtering coverage by address 39 | 3. Configurable ebpfsig and coverage filtering 40 | 4. Split the upload and download of sync 41 | 42 | ### Usage 43 | Patch 1, 2 refer to [this](../kstat_demo/README.md). 44 | After patch the 3, you need specify some new option for syz-manager: 45 | * ebpfsig: true/false 46 | * covfilter: true/false 47 | After patch the 4, you need specify a new option for syz-manager: 48 | * hub_synctime: a integer 49 | This option specify how many minute without any input, a syz-manager can receive progs. 50 | Then you can run syzkaller as usual. 51 | 52 | ## A test for tcp/ipv6 53 | ### Original syz-hub 54 | syz-manager1: Only enable syscalls for tcp/ipv6 55 | syz-manager2: Only enable syscalls for tcp/ipv6 56 | sync time: 1 time a minute 57 | run time: 2h30min 58 | coverage( chose the maximum): 59 | 60 | | coverage | 1 | 2 | 3 | 4 | 5 | 6 | average | 61 | |----------| - | - | - | - | - | - | ------- | 62 | | total |10514 |9869 |10583 |10347 |10611 |8916 |10140 | 63 | | tcp.c |462 |460 |346 |471 |491 |359 |432 | 64 | 65 | (Most of handle function of tcp/ipv6 is in tcp.c) 66 | 67 | ### Multi-policy syz-hub 68 | syz-manager1: Only enable syscalls for tcp/ipv6 69 | syz-manager2: Only enable syscalls for tcp/ipv6, add ebpf to collect socker state as feedback, limit coverage to tcp/ipv6 kernel function. 70 | sync time: 3/4 minute without any input 71 | run time: 2h30min 72 | coverage( chose the maximum): 73 | 74 | | coverage | 1 | 2 | 3 | 4 | 5 | 6 | average | 75 | |----------| - | - | - | - | - | - | ------- | 76 | | total |9962 |10060 |9356 |10832 |8952 |10122 |9879 | 77 | | tcp.c |487 |525 |507 |506 |515 |493 |506 | 78 | 79 | ### Result 80 | * One of the syz-manager focus on tcp/ipv6 fuzz. It have a 2% decrease of total coverage. This is beacuse we use one of the two syz-manager to fuzz a smaller scope. 81 | * Introduction of ebpf feedback have a 17% increase of tcp.c coverage. That means our directed fuzzer do well in fuzzing the deeper corner. -------------------------------------------------------------------------------- /syzkaller/syzkaller.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/syzkaller.png --------------------------------------------------------------------------------