├── LICENSE
├── README.md
├── bug_report
    ├── README
    ├── config-4.6.2
    ├── linux-4.6.2
    │   ├── 20160617.log
    │   ├── 20160621.log
    │   └── 20160623.log
    └── trinity-log-20170511
    │   ├── trinity-child0.log
    │   ├── trinity-child1.log
    │   ├── trinity-child2.log
    │   ├── trinity-child3.log
    │   ├── trinity-child4.log
    │   ├── trinity-child5.log
    │   ├── trinity-child6.log
    │   ├── trinity-child7.log
    │   └── trinity.log
├── profiles
    └── local.cfg
├── static_analysis_tools
    ├── ConditionChecker
    │   ├── ConditionChecker.cpp
    │   ├── ConditionParse.cpp
    │   ├── Parse.h
    │   └── README.md
    ├── IRParser
    │   ├── extend_func.cpp
    │   ├── info.h
    │   ├── kcov_map.cpp
    │   ├── kstate_map.cpp
    │   └── log.h
    └── kern_instrument
    │   ├── AssignTrackerPass
    │       ├── AssignTracker.cpp
    │       ├── AssignTracker.exports
    │       └── CMakeLists.txt
    │   └── kern_patch
    │       └── 0001-KCOV_SRT_TRACK-ok.patch
├── survey.md
├── syz_patch
    ├── 0001-syz-manager-manager.go-executor-executor.cc-support-.patch
    ├── 0002-Calculate-prog-prior-base-on-weighted-pcs.patch
    └── 0003-Support-weighed-kstate-resource.patch
└── syzkaller
    ├── Bitmap.png
    ├── Corpus.png
    ├── CoverageOfFiles.png
    ├── CoverageOfTargetFunctions.png
    ├── KernFunc.png
    ├── ProgState.png
    ├── TotalCoverage.png
    ├── cover_filter.md
    ├── design_implementation_intro.md
    ├── kstat_demo
        ├── README.md
        ├── ebpf
        │   ├── ebpf.go
        │   └── ebpftext.go
        ├── ebpf_sample
        │   ├── ebpftext_gen.go
        │   ├── ebpftext_recvmsg.go
        │   └── ebpftext_sendmsg.go
        ├── parse
        │   └── parse.go
        ├── pipe_monitor.go
        ├── state
        │   └── state.go
        ├── syz_patch
        │   ├── cover_filter
        │   │   └── 0001-fuzzer-calculate-prog-prios-base-on-weighted-blocks.patch
        │   └── kstate
        │   │   ├── 0001-Support-ebpf-feedbac-and-display-in-webui.patch
        │   │   ├── 0004-Support-retState-ebpfsig-resource.patch
        │   │   └── 0005-Add-monitor-binary-and-function-pcs-options-to-manag.patch
        └── tcp-ipv6
        │   ├── config.json
        │   ├── data.tar.bz2
        │   └── test.md
    ├── kstate_resource.md
    ├── multi_policy
        ├── 0001-Add-ebpf-feedback-and-display-in-webui.patch
        ├── 0002-Add-coverage-filter.patch
        ├── 0003-Add-manager-configure-for-coverage-filter-and-ebpf-f.patch
        ├── 0004-Make-the-download-sync-time-configurable.patch
        ├── 0005-Add-ret-ebpfsig-as-resource.patch
        ├── 0006-Add-monitot-binary-option-to-manager-configure.patch
        └── README.md
    └── syzkaller.png


/README.md:
--------------------------------------------------------------------------------
 1 | # Harbian-QA
 2 | 
 3 | Testing matters to the software quality and security. The comprehensive testing process is likely to hunt more bugs which improve the stability of Hardened Debian GNU/Linux.
 4 | 
 5 | Bug hunting through fuzzer/*-sanitizer/etc...
 6 | 
 7 | * [(A/T/KT) - Sanitized GNU/Linux: a new way of bug hunter in FLOSS Community](http://hardenedlinux.org/system-security/2016/04/01/x_Sanitized-GNU-Linux-a-new-way-of-bug-hunter-in-FLOSS-Community.html)
 8 | * [Debugging a kernel crash found by syzkaller](http://vegardno.blogspot.in/2016/08/sync-debug.html)
 9 | * [A targeted kernel fuzzer bases on syzkaller](syzkaller/design_implementation_intro.md)
10 | * [Usage and implementation of coverage filter](syzkaller/cover_filter.md)
11 | * [Usage and implementation of kernel state resource](syzkaller/kstate_resource.md)
12 | 


--------------------------------------------------------------------------------
/bug_report/README:
--------------------------------------------------------------------------------
1 | You'll need GCC 6.x to build KCOV support:
2 | https://gcc.gnu.org/wiki/InstallingGCC
3 | 
4 | Linux Kernel panic issue: How to fix hung_task_timeout_secs and blocked for more than 120 seconds problem
5 | https://www.blackmoreops.com/2014/09/22/linux-kernel-panic-issue-fix-hung_task_timeout_secs-blocked-120-seconds-problem/
6 | 


--------------------------------------------------------------------------------
/profiles/local.cfg:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"http": "127.0.0.1:56741",
 3 | 	"workdir": "/citypw/src/github.com/google/syzkaller/workdir",
 4 | 	"vmlinux": "-",
 5 | 	"syzkaller": "/citypw/src/github.com/google/syzkaller",
 6 | 	"type": "local",
 7 | 	"count": 1,
 8 | 	"procs": 4,
 9 | 	"cpu": 2,
10 | 	"mem": 2048
11 | }
12 | 


--------------------------------------------------------------------------------
/static_analysis_tools/ConditionChecker/ConditionChecker.cpp:
--------------------------------------------------------------------------------
  1 | #include "clang/StaticAnalyzer/Core/Checker.h"
  2 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
  3 | #include "clang/StaticAnalyzer/Core/CheckerManager.h"
  4 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
  5 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
  6 | #include "llvm/ADT/SmallString.h"
  7 | #include "llvm/ADT/StringExtras.h"
  8 | #include "llvm/Support/raw_ostream.h"
  9 | #include "clang/AST/ParentMap.h"
 10 | #include "clang/Basic/TargetInfo.h"
 11 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 12 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 13 | #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
 14 | 
 15 | #include <iostream>
 16 | #include <map>
 17 | 
 18 | #include "Parse.h"
 19 | 
 20 | using namespace clang;
 21 | using namespace ento;
 22 | 
 23 | /* FuncMap[FUNC_NAME] = SUBSTMT_INFO_STRUCT*/
 24 | std::map<std::string, std::vector<stmtInfo>> FuncMap;
 25 | /* MemCount[BASE->MEMBER] = COUNT_OF_APPEARING_IN_CONDITION  */
 26 | std::map<std::string, unsigned int> ASTMemCount;
 27 | std::map<std::string, unsigned int> CFGMemCount;
 28 | 
 29 | namespace {
 30 |   class ConditionChecker : public Checker< check::ASTDecl<FunctionDecl>,
 31 | 					   check::BranchCondition,
 32 | 					   check::EndAnalysis> {
 33 |   public:
 34 |     void checkASTDecl(const FunctionDecl *FD, AnalysisManager &Mgr, BugReporter &BR) const;
 35 |     void checkEndAnalysis(ExplodedGraph &G, BugReporter &BR, ExprEngine &Eng) const;
 36 |     void checkBranchCondition(const Stmt *s, CheckerContext &Ctx) const;
 37 |   };
 38 | } // end anonymous namespace
 39 | 
 40 | void ConditionChecker::checkASTDecl(const FunctionDecl *FD, AnalysisManager &Mgr, BugReporter &BR) const {
 41 |   const SourceManager &SM = Mgr.getSourceManager();
 42 |   const ASTContext &ASTCtx = FD->getASTContext();
 43 |   std::string funcName = FD->getNameInfo().getAsString();
 44 |   if (FuncMap.find(funcName) != FuncMap.end()) {
 45 |     return;
 46 |   }
 47 | 
 48 |   if (!SM.isInMainFile(FD->getBeginLoc())) {
 49 |     return;
 50 |   }
 51 | 
 52 |   std::vector<stmtInfo> funcInfoVec, parmInfoList;
 53 |   funcInfoVec.push_back(FuncInfo(FD));
 54 |   parmInfoList = ListAllParmInfo(SM, FD);
 55 |   funcInfoVec.insert(funcInfoVec.end(), parmInfoList.begin(), parmInfoList.end());
 56 | 
 57 |   if (FD->hasBody()) {
 58 |     /* Local variable may initialized by functions parameters */
 59 |     for (Stmt *c : FD->getBody()->children()) {
 60 |       std::vector<stmtInfo> localVarInfo;
 61 |       handleChildrenStmt(SM, c, searchLocalVar, &localVarInfo);
 62 |       for (stmtInfo tmpInfo : localVarInfo) {
 63 | 	if (tmpInfo.typeName == "ParmVar") {
 64 | 	  funcInfoVec.insert(funcInfoVec.end(), localVarInfo.begin(), localVarInfo.end());
 65 | 	  break;
 66 | 	}
 67 |       }
 68 |     }
 69 |     /* Search if there are member operation or parameters in condition substatement */
 70 |     for (Stmt *c : FD->getBody()->children()) {
 71 |       std::vector<stmtInfo> condInfo;
 72 |       handleChildrenStmt(SM, c, searchCondition, &condInfo);
 73 |       for (stmtInfo tmpInfo : condInfo) {
 74 | 	if(tmpInfo.typeName == "MemExpr" || tmpInfo.typeName == "ParmVar") {
 75 | 	  funcInfoVec.insert(funcInfoVec.end(), condInfo.begin(), condInfo.end());
 76 | 	  break;
 77 | 	}
 78 |       }
 79 |       for (stmtInfo i : condInfo) {
 80 | 	/* calculate the using of member operation */
 81 | 	if (i.typeName == "MemExpr") {
 82 | 	  std::string key = i.base + "->" + i.target;
 83 | 	  if (ASTMemCount.find(key) != ASTMemCount.end()) {
 84 | 	    ASTMemCount[key]++;
 85 | 	  } else {
 86 | 	    ASTMemCount[key] = 1;
 87 | 	  }
 88 | 	}
 89 |       }
 90 |     }
 91 |   }
 92 |     
 93 |   FuncMap[funcName] = funcInfoVec;
 94 |   for (stmtInfo i : funcInfoVec) {
 95 |     llvm::outs() << i.toString() << "\n";
 96 |   }
 97 |   llvm::outs() << "\n";
 98 | }
 99 | 
100 | void ConditionChecker::checkEndAnalysis(ExplodedGraph &G, BugReporter &BR, ExprEngine &Eng) const {
101 |   llvm::outs() << "Count MemberExpr in condition statement(AST Parse): " << "\n";
102 |   for (auto const & m : ASTMemCount) {
103 |     llvm::outs() << m.first << ":" << m.second << "\n";
104 |   }
105 |   llvm::outs() << "Count MemberExpr in condition statement(CFG Parse): " << "\n";
106 |   for (auto const & m : CFGMemCount) {
107 |     llvm::outs() << m.first << ":" << m.second << "\n";
108 |   }
109 | }
110 | 
111 | void ConditionChecker::checkBranchCondition(const Stmt *s, CheckerContext &Ctx) const {
112 |   ProgramStateRef State = Ctx.getState();
113 |   const LocationContext *LC = Ctx.getLocationContext();
114 |   SVal val = State->getSVal(s, LC);
115 | 
116 |   const SymExpr *SE = val.getAsSymbolicExpression();
117 |   std::string thisMemRegStr = "";
118 |   std::string funcName = "";
119 |   if (SE != nullptr) {
120 |     std::vector<symInfo> tmp;
121 |     if (SE->getOriginRegion() != nullptr) {
122 |       thisMemRegStr = SE->getOriginRegion()->getString();
123 |     }
124 |     const Decl *D = LC->getDecl();
125 | 
126 |     if (D != nullptr) {
127 |       const FunctionDecl *FD = D->getAsFunction();
128 |       if (FD != nullptr) {
129 | 	funcName = FD->getName();
130 |       }
131 |     }
132 |     parseSymExpr(SE, &tmp);
133 |     if (tmp.size() > 0) {
134 |       llvm::outs() << "Condition parse:\n";
135 |       for (symInfo s : tmp) {
136 | 	s.addFuncName(funcName);
137 | 	llvm::outs() << s.toString() << "\n";
138 | 	if (s.typeName == "MemSymbol") {
139 | 	  std::string key = s.targetStr;
140 | 	  if (CFGMemCount.find(key) != CFGMemCount.end()) {
141 | 	    CFGMemCount[key]++;
142 | 	  } else {
143 | 	    CFGMemCount[key] = 1;
144 | 	  }
145 | 	}
146 |       }
147 |     }
148 |   } else {
149 |     return;
150 |   }
151 | 
152 |   std::vector<symInfo> SymbolInfo;
153 |   Optional<DefinedOrUnknownSVal> dval = val.getAs<DefinedOrUnknownSVal>();
154 |   if (dval) {
155 |     ProgramStateRef cState = State->assume(*dval, true);
156 |     if (cState != nullptr) {
157 |       ConstraintRangeTy Constraints = cState->get<ConstraintRange>();
158 |       if (!Constraints.isEmpty()) {
159 | 	for (ConstraintRangeTy::iterator i = Constraints.begin();
160 | 	     i != Constraints.end(); i++) {
161 | 	  if (i.getKey()->getOriginRegion() != nullptr) {
162 | 	    if (i.getKey()->getOriginRegion()->getString() == thisMemRegStr) {
163 | 	      parseSymExpr(i.getKey(), &SymbolInfo);
164 | 	      symInfo *tmp = nullptr;
165 | 	      for (unsigned int j = 0; j < SymbolInfo.size(); j++) {
166 | 		if (SymbolInfo[j].typeName == "MemSymbol") {
167 | 		  tmp = &SymbolInfo[j];
168 | 		}
169 | 	      }
170 | 	      for (llvm::APSInt e : splitRangeSet(i.getData())) {
171 | 		if (tmp != nullptr) {
172 | 		  tmp->addConcreteValue(e);
173 | 		}
174 | 	      }
175 | 	      for (symInfo s : SymbolInfo) {
176 | 		llvm::outs() << s.toString() << "\n";
177 | 	      }
178 | 	      llvm::outs() << "\n\n";
179 | 	    }
180 | 	  }
181 | 	}
182 |       }
183 |     }
184 |   }
185 |   return;
186 | }
187 | 
188 | void ento::registerConditionChecker(CheckerManager &mgr) {
189 |   mgr.registerChecker<ConditionChecker>();
190 | }
191 | 
192 | bool ento::shouldRegisterConditionChecker(const LangOptions &LO) {
193 |   return true;
194 | }
195 | 


--------------------------------------------------------------------------------
/static_analysis_tools/ConditionChecker/Parse.h:
--------------------------------------------------------------------------------
  1 | #include "clang/StaticAnalyzer/Core/Checker.h"
  2 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
  3 | #include "clang/StaticAnalyzer/Core/CheckerManager.h"
  4 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
  5 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
  6 | #include "llvm/ADT/SmallString.h"
  7 | #include "llvm/ADT/StringExtras.h"
  8 | #include "llvm/Support/raw_ostream.h"
  9 | #include "clang/AST/ParentMap.h"
 10 | #include "clang/Basic/TargetInfo.h"
 11 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 12 | 
 13 | using namespace clang;
 14 | using namespace ento;
 15 | 
 16 | 
 17 | class stmtInfo {
 18 |  public:
 19 |   std::string  typeName;
 20 |   unsigned int ID;
 21 |   std::string  target;
 22 |   /* For MemberExpr base->target */
 23 |   std::string  base;
 24 |   std::string  srcLine;
 25 | 
 26 |   void init(std::string tpnm, int64_t id, std::string targetinfo, std::string bsinfo, std::string srcline) {
 27 |     typeName = tpnm;
 28 |     ID = id;
 29 |     target = targetinfo;
 30 |     base = bsinfo;
 31 |     srcline.erase(std::remove(srcline.begin(), srcline.end(), '\n'), srcline.end());
 32 |     srcline.erase(std::remove(srcline.begin(), srcline.end(), '\t'), srcline.end());
 33 |     srcLine = srcline;
 34 |   }
 35 |   
 36 |   std::string toString() {    
 37 |     std::string retStr, IDStr;
 38 |     char IDChars[0x10];
 39 |     if (ID > 0) {
 40 |       sprintf(IDChars, "0x%x", ID);
 41 |       IDStr = "ID-" + std::string(IDChars);
 42 |     }
 43 |     retStr = "[" + typeName + "] ";
 44 |     if (ID != 0) {
 45 |       retStr = retStr.append("ID-" + IDStr + " ");
 46 |     }
 47 |     if (base != "") {
 48 |       retStr = retStr.append(base) + "->";
 49 |     }
 50 |     retStr = retStr.append(target);
 51 |     if (srcLine != "") {
 52 |       if (target != "") {
 53 | 	retStr = retStr.append("\n");
 54 |       }
 55 |       retStr = retStr.append("RawSrcLine: " + srcLine);
 56 |     }
 57 |     return retStr;
 58 |   }
 59 | };
 60 | 
 61 | class symInfo {
 62 |  public:
 63 |   std::string typeName;
 64 |   unsigned int ID;
 65 |   std::string targetStr;
 66 |   std::string funcName;
 67 |   std::vector<llvm::APSInt> concreteVal;
 68 | 
 69 |   void init(std::string nm, unsigned int id, std::string ts){
 70 |     typeName = nm;
 71 |     ID = id;
 72 |     targetStr = ts;
 73 |   }
 74 |   void addFuncName(std::string fn) {
 75 |     funcName = fn;
 76 |   }
 77 |   void addConcreteValue(llvm::APSInt e) {
 78 |     concreteVal.push_back(e);
 79 |   }
 80 |   std::string toString() {
 81 |     std::string retStr = "";
 82 |     retStr = retStr.append(funcName + " ");
 83 |     retStr = retStr.append("[" + typeName + "] ");
 84 |     retStr = retStr.append(targetStr);
 85 |     retStr = retStr.append(" {");
 86 |     for (llvm::APSInt e : concreteVal) {
 87 |       retStr = retStr.append("0x" + e.toString(0x10) + ", ");
 88 |     }
 89 |     retStr = retStr.append("}");
 90 |     return retStr;
 91 |   }
 92 | };
 93 | 
 94 | typedef bool(*stmtHandle)(const SourceManager &SM, const Stmt *s, std::vector<stmtInfo> *info);
 95 | 
 96 | /* Recursicely parse the children statement, use the stmtHandle function */
 97 | void handleChildrenStmt(const SourceManager &SM, const Stmt *s, stmtHandle handle, std::vector<stmtInfo> *info);
 98 | void parseSymExpr(const SymExpr *s, std::vector<symInfo> *SymbolInfo);
 99 | 
100 | /* Implement of handle specified statement */
101 | bool searchCondVar(const SourceManager &SM, const Stmt *s, std::vector<stmtInfo> *info);
102 | bool searchLocalVar(const SourceManager &SM, const Stmt *s, std::vector<stmtInfo> *info);
103 | bool searchParm(const SourceManager &SM, const Stmt *s, std::vector<stmtInfo> *info);
104 | bool searchCondition(const SourceManager &SM, const Stmt *s, std::vector<stmtInfo> *info);
105 | 
106 | std::string srcLineToString(const SourceManager &SM, SourceLocation SRs, SourceLocation SRe);
107 | stmtInfo FuncInfo(const FunctionDecl *FD);
108 | std::vector<stmtInfo> ListAllParmInfo(const SourceManager &SM, const FunctionDecl *FD);
109 | std::vector<llvm::APSInt> splitRangeSet(RangeSet RS);
110 | 


--------------------------------------------------------------------------------
/static_analysis_tools/ConditionChecker/README.md:
--------------------------------------------------------------------------------
 1 | # Clang checker for symbolic execution
 2 | 
 3 | 
 4 | ## Introduction of Clang checker
 5 | 
 6 | Clang checker can be a great static analysis tool, you can do lots of amazing work by write your checker. For example, you can write a checker to do taint analysis, symbolic execution ...  
 7 | We write a checker to static analyse which data structure is relevant to satisfy conditions constraint( c language). Unlike IR parser, clang checker still remain the programing syntax information in compile time, it's readable if you reconstruct the source code from these information. Since we just want to extract these information of symbolic inputs, but not run a symbolic execution base on them, these information should be more readable. So, Clang checker is the best choice in our case.
 8 | 
 9 | 
10 | ### Clang checker guide
11 | 
12 | Clang checker have lots of great tutorial and document. You can easily build your clang with customized checker.  
13 | * [Checker Developer Manual](https://clang-analyzer.llvm.org/checker_dev_manual.html)  
14 | * [How to Write a Checker in 24 Hours](https://llvm.org/devmtg/2012-11/Zaks-Rose-Checker24Hours.pdf)  
15 | * [Checker analyzer-guide](https://github.com/haoNoQ/clang-analyzer-guide/releases/download/v0.1/clang-analyzer-guide-v0.1.pdf)
16 | 
17 | Also, you can alse refer to "/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp" and the checker implement under "/clang/lib/StaticAnalyzer/Checkers/*".  
18 | 
19 | 
20 | ### Clang symbolic execution
21 | 
22 | At first, we only want to calculate which members of data structure are used in condition statement frequently and which members used by a function. Actually, in this case, AST parse checker is enough, so we implement these in ConditionChecker::ASTDecl() interface. AST-base parse is much faster than path-sensitive parse. Statistic result will be displayed in ConditionChecker::EndAnalysis().  
23 | But, AST-base parse is hard to find out constraint of a condition. So, we also write a path-sensitive checker. Interface with parameter "CheckerContext &Ctx" is a path-sensitive checker interface. In path-sensitive parse, checker will walk thought all node( ExplodedNode) of ExplodedGraph. ExplodedGraph is a graph of paths of CFG and their ProgramState( clang option "-analyzer-checker=debug.ViewExplodedGraph" can dump the ExplodedNode). So, in our checker, while ConditionChecker::BranchCondition() is called, that means a branch condition is found in that path. We can extract the constriant( range or concrete value) from the ProgramState that attached to that node.
24 | 
25 | 
26 | ## Compare to other symbolic execution
27 | 
28 | | Tool | static/dynamic | symbolize | parse source |
29 | |------|--------------- | --------- | ------------ |  
30 | | Clang | static | original source | input of every func |  
31 | | KLEE | static+dynamic | LLVM IR | input of entry func |  
32 | | CBMC | static | original source | input of entry func |
33 | 
34 | Compare to KLEE, clang checker is totally a static analyzer. Clang won't execute any program. Clang ProgramState will maintain the state( constraint) of reaching a position of one path. While KLEE, CBMC only symbolize the input of entry. So, we can see, if a local variable initialized by the input of entry and pass it to other functions. These functions may use it in condition, and KLEE will not trace the variable in such case. But this condition is also indirectly from input of entry. Clang treat inputs of any functions as symbolic variable, so we can trace those mishandled condition.
35 | * We have two tutorials for [KLEE](https://github.com/hardenedlinux/Debian-GNU-Linux-Profiles/blob/master/docs/harbian_qa/symexec/klee.md) and [CBMC](https://github.com/hardenedlinux/Debian-GNU-Linux-Profiles/blob/master/docs/harbian_qa/symexec/cbmc_kern.md).
36 | 
37 | ## For kernel fuzzing
38 | In our case, we use syzkaller for kernel fuzzing. While syzkaller only collect coverage as feedback. We try to [trace more state](../syzkaller/kstat_demo/README.md) if it is widely use in condition statement. Clang path-sensitive checker is what we actually need. After static analysis, we calculate which states( data) are widely used in conditions. These states will be collected as state-base block( syzkaller resource) at runtime. And we also collect inputs of some important functions to help to fuzz important paths more efficiently.
39 | An example of part output:
40 | ```
41 | clang -Xclang -analyze -Xclang -analyzer-checker=debug.ConditionChecker ... -c /root/linux/net/ipv4/tcp.c
42 | ...
43 | # AST-base parse
44 | [Function] ID-ID-0x271f29 tcp_ioctl
45 | [ParmVar] ID-ID-0x271ef4 struct sock *sk
46 | [ParmVar] ID-ID-0x271f04 intcmd
47 | [ParmVar] ID-ID-0x271f14 unsigned longarg
48 | [LocalVar] ID-ID-0x271f45 struct tcp_sock *
49 | [Condition] RawSrcLine: if (sk->sk_state == TCP_LISTEN)
50 | [BinaryOperator] unknown == 0x1ffb6f
51 | [MemExpr] ID-ID-0x2024cc struct sock_common->volatile unsigned char skc_state
52 | [MemExpr] ID-ID-0x202842 struct sock *->struct sock_common __sk_common
53 | [DeclRefExpr] ID-ID-0x1ffb6f int TCP_LISTEN
54 | ...
55 | # Path-sensitive parse
56 | ...
57 | tcp_poll [ElementCast] (struct tcp_sock)struct sock * {}
58 | tcp_poll [MemSymbol] struct socket *->struct sock * sk {}
59 | tcp_poll [MemSymbol] struct tcp_sock *->u16 urg_data {}
60 | tcp_poll [SymIntExpr] 0x100 {}
61 | ...
62 | tcp_poll [MemSymbol] struct sock *->int sk_err {}
63 |  [MemSymbol] struct sock *->int sk_err {0x-80000000, 0x-1, 0x1, 0x7FFFFFFF, }
64 | ...
65 | 
66 | # AST-base parse count
67 | ...
68 | struct sock *->struct sock_common __sk_common:39
69 | ...
70 | struct sock_common->volatile unsigned char skc_state:39
71 | ...
72 | struct tcp_sock *->u8 repair:14
73 | ...
74 | 
75 | # Path-senstive parse count
76 | ...
77 | struct sock *->struct sock_common __sk_common:119
78 | ...
79 | struct sock_common *->volatile unsigned char skc_state:61
80 | ...
81 | struct tcp_sock *->u8 repair:78
82 | ```  


--------------------------------------------------------------------------------
/static_analysis_tools/IRParser/extend_func.cpp:
--------------------------------------------------------------------------------
  1 | #include <llvm/IR/Module.h>
  2 | #include <llvm/IRReader/IRReader.h>
  3 | #include <llvm/IR/LLVMContext.h>
  4 | #include <llvm/Support/SourceMgr.h>
  5 | #include <llvm/Support/raw_ostream.h>
  6 | #include <llvm/IR/InstrTypes.h>
  7 | #include <llvm/IR/Instructions.h>
  8 | #include <llvm/Analysis/CmpInstAnalysis.h>
  9 | #include <llvm/IR/DebugLoc.h>
 10 | #include "llvm/IR/DebugInfo.h"
 11 | #include "llvm/IR/DIBuilder.h"
 12 | #include "llvm/IR/Function.h"
 13 | #include <llvm/IR/DebugInfoMetadata.h>
 14 | #include <llvm/IR/CFG.h>
 15 | #include <llvm/IR/Metadata.h>
 16 | 
 17 | #include <string>
 18 | #include <iostream>
 19 | #include <sstream>
 20 | #include <fstream>
 21 | #include <sys/types.h>
 22 | #include <dirent.h>
 23 | #include <vector>
 24 | 
 25 | #include "log.h"
 26 | #include "info.h"
 27 | 
 28 | 
 29 | using namespace std;
 30 | using namespace llvm;
 31 | 
 32 | 
 33 | std::map<std::string, funcInfoInCFG> funcsInfo;
 34 | 
 35 | 
 36 | std::vector<std::string> readFuncList(std::string funcListPath);
 37 | void getCalledFunc(Module *mod, Function *func, int blockNum, int level);
 38 | void writeToNewFuncList(std::vector<std::string> funcList, std::string oldPath);
 39 | void writeToLogDir(std::string fn, std::string funcCallTree, std::string dirPath);
 40 | 
 41 | 
 42 | int main(int argc, const char *argv[]) {
 43 |     if (argc < 5 || argv[1] == nullptr) {
 44 |         outs() << "./extern_func functions_list ir_path call_depth block_num log_dir\n";
 45 |         return 1;
 46 |     }
 47 |     std::string FuncListPath = argv[1];
 48 |     std::string IRPath = argv[2];
 49 |     unsigned depth = std::stoi(argv[3]);
 50 |     unsigned blockNum = std::stoi(argv[4]);
 51 |     std::string logDir = argv[5];
 52 | 
 53 |     std::vector<std::string> funcList = readFuncList(FuncListPath);
 54 |     std::vector<std::string> extFuncList;
 55 | 
 56 |     LLVMContext ctx;
 57 |     SMDiagnostic err;
 58 |     std::unique_ptr<Module> mod_unique = parseIRFile(IRPath, err, ctx);
 59 |     if (mod_unique == nullptr) {
 60 |         outs() << FAIL << "Failed to open ir file: " << IRPath << "\n" << RESET;
 61 |         return 1;
 62 |     }
 63 |     Module *mod = mod_unique.get();
 64 | 
 65 |     for (std::string fn : funcList) {
 66 |         Function *func = mod->getFunction(fn);
 67 |         getCalledFunc(std::move(mod), func, blockNum, depth);
 68 |     }
 69 | 
 70 |     for (std::string fn : funcList) {
 71 |         std::string funcCallTree;
 72 |         if (funcsInfo.find(fn) != funcsInfo.end())
 73 |             funcCallTree = funcsInfo[fn].callTree(funcsInfo, 0, depth);
 74 |         else {
 75 |             funcCallTree = fn;
 76 |             outs() << FAIL << fn << " was not found!\n";
 77 |         }
 78 |         writeToLogDir(fn, funcCallTree, logDir);
 79 |     }
 80 |     std::vector<std::string> newFuncList;
 81 |     for (auto &fn : funcsInfo) {
 82 |         if (fn.second.getBlockNum() > blockNum)
 83 |         newFuncList.push_back(fn.first);
 84 |     }
 85 |     writeToNewFuncList(newFuncList, FuncListPath);
 86 | }
 87 | 
 88 | std::vector<std::string> readFuncList(std::string funcListPath) {
 89 |     fstream funcListFile(funcListPath);
 90 |     std::vector<std::string> funcList;
 91 |     std::string fn = "";
 92 |     if (!funcListFile.is_open()) {
 93 |         outs() << FAIL << "Failed to open init function list\n" << RESET;
 94 |         return funcList;
 95 |     }
 96 |     while (getline(funcListFile, fn)) {
 97 |         if(fn != "")
 98 |             funcList.push_back(fn);
 99 |     }
100 |     return funcList;
101 | }
102 | 
103 | /* Recursively get the called functions, use blockNum and level limit functions */
104 | void getCalledFunc(Module *mod, Function *func, int blockNum, int level) {
105 |     if (level < 1)
106 |         return;
107 |     if (func == nullptr) {
108 |         outs() << FAIL << "unvariable function\n"<< RESET;
109 |         return;
110 |     }
111 |     if (func->size() < 1) {
112 |         func = mod->getFunction(func->getName());
113 |     }
114 | 
115 |     if (func != nullptr) {
116 |         funcInfoInCFG *thisFuncInfo = new funcInfoInCFG(func->getName(), func->size());
117 |         if (funcsInfo.find(func->getName()) == funcsInfo.end())
118 |             funcsInfo[func->getName()] = *thisFuncInfo;
119 |         delete thisFuncInfo;
120 |     }
121 | 
122 |     if (func != nullptr && func->size() > 0) {
123 |         for (BasicBlock &bb : *func) {
124 |             for (Instruction &i : bb) {
125 |                 CallInst *callInst = dyn_cast<CallInst>(&i);
126 |                 if (callInst != nullptr) {
127 |                     Function *calledFunc = callInst->getCalledFunction();
128 |                     if (calledFunc == nullptr) {
129 |                         calledFunc = dyn_cast<Function>(callInst->getCalledValue()->stripPointerCasts());
130 |                     }
131 |                     if (calledFunc != nullptr) {
132 |                         /* Skip the instument function */
133 |                         if (calledFunc->getName().find("saniti") != std::string::npos)
134 |                             continue;
135 |                         if (calledFunc->getName().find("asan") != std::string::npos)
136 |                             continue;
137 |                         if (calledFunc->getName().find("llvm.") != std::string::npos)
138 |                             continue;
139 |                         /* Recursive call with a depth level */
140 |                         funcsInfo[func->getName()].addCalledFunc(calledFunc->getName());
141 |                         getCalledFunc(mod, calledFunc, blockNum, level - 1);
142 |                     }
143 |                 }
144 |             }
145 |         }
146 |     }
147 | }
148 | 
149 | void writeToNewFuncList(std::vector<std::string> funcList, std::string oldPath) {
150 |     ofstream newFuncList;
151 |     newFuncList.open(oldPath + ".new");
152 |     for (std::string f : funcList) {
153 |         newFuncList << f << "\n";
154 |     }
155 |     newFuncList.close();
156 | }
157 | 
158 | void writeToLogDir(std::string fn, std::string funcCallTree, std::string dirPath) {
159 |     ofstream funcLogFile;
160 |     funcLogFile.open(dirPath + "/" + fn);
161 |     funcLogFile << funcCallTree << "\n";
162 |     funcLogFile.close();
163 | }
164 | 


--------------------------------------------------------------------------------
/static_analysis_tools/IRParser/info.h:
--------------------------------------------------------------------------------
  1 | #include <string>
  2 | #include <unordered_map>
  3 | 
  4 | #include <string>
  5 | #include <iostream>
  6 | #include <sstream>
  7 | #include <fstream>
  8 | #include <sys/types.h>
  9 | #include <dirent.h>
 10 | #include <vector>
 11 | 
 12 | #include "log.h"
 13 | 
 14 | using namespace std;
 15 | 
 16 | class sanCallInfo {
 17 | private:
 18 |     std::string rawInst;
 19 |     unsigned    blockID;
 20 |     unsigned    address;
 21 | public:
 22 |     sanCallInfo(std::string rawI, unsigned bID) {
 23 |         rawInst = rawI;
 24 |         blockID = bID;
 25 |     }
 26 |     sanCallInfo(){};
 27 |     void AttachAddress(unsigned addr) {
 28 |         address = addr;
 29 |     }
 30 |     unsigned getBlockID() {
 31 |         return blockID;
 32 |     }
 33 |     std::string getAsLine() {
 34 |         return std::to_string(address) + ": " + rawInst;
 35 |     }
 36 |     unsigned getAddress() {
 37 |         return address;
 38 |     }
 39 | };
 40 | 
 41 | class blockInfo {
 42 | private:
 43 |     unsigned      blockID;
 44 |     std::string   blockName;
 45 |     std::string   funcName;
 46 |     unsigned long count;
 47 | 
 48 |     std::vector<sanCallInfo> sanCalls;
 49 |     std::vector<unsigned>    succBlock;
 50 |     std::vector<unsigned>    predBlock;
 51 | 
 52 | public:
 53 |     blockInfo(unsigned bID, std::string bName, std::string fName, unsigned cnt) {
 54 |         blockID   = bID;
 55 |         blockName = bName;
 56 |         funcName  = fName;
 57 |         count     = cnt;
 58 |     }
 59 | 
 60 |     blockInfo(){};
 61 | 
 62 |     unsigned getBlockID() {
 63 |         return blockID;
 64 |     }
 65 | 
 66 |     void addSuccBlock(unsigned bID) {
 67 |         for (unsigned i : succBlock) {
 68 |             if (i == bID)
 69 |                 return;
 70 |         }
 71 |         succBlock.push_back(bID);
 72 |     }
 73 | 
 74 |     void addPredBlock(unsigned bID) {
 75 |         for (unsigned i : predBlock) {
 76 |             if (i == bID)
 77 |                 return;
 78 |         }
 79 |         predBlock.push_back(bID);
 80 |     }
 81 | 
 82 |     void addSanCall(sanCallInfo sc) {
 83 |         sanCalls.push_back(sc);
 84 |     }
 85 | 
 86 |     std::vector<sanCallInfo> getSanCalls() {
 87 |         return sanCalls;
 88 |     }
 89 | 
 90 |     unsigned getForwardEdgeNum() {
 91 |         /* We use this method to get the weight of this block */
 92 |         //return count;
 93 |         return succBlock.size();
 94 |     }
 95 | 
 96 |     std::string getAsJson() {
 97 |         std::string ret = "";
 98 |         ret += "{\n";
 99 |         ret += "Function: \"" + funcName + "\",\n";
100 |         ret += "Block: \"" + blockName + "\",\n";
101 |         ret += "BlockID: " + std::to_string(blockID) + ",\n";
102 |         ret += "Count: " + std::to_string(count) + ",\n";
103 |         ret += "Predblocks: [";
104 |         for (unsigned b : predBlock)
105 |             ret += std::to_string(b) + ", ";
106 |         ret += "],\n";
107 |         ret += "Succblocks: [";
108 |         for (unsigned b : succBlock)
109 |             ret += std::to_string(b) + ", ";
110 |         ret += "],\n";
111 |         ret += "SanitizerCall: [\n";
112 |         for (sanCallInfo sc : sanCalls) {
113 |             ret += "\t" + sc.getAsLine() + ",\n";
114 |         }
115 |         ret += "\t],\n";
116 |         ret += "}\n";
117 |         return ret;
118 |     }
119 | };
120 | 
121 | class gepInfo {
122 | private:
123 |     std::string structName;
124 |     std::string fieldName;
125 |     unsigned    bitWidth;
126 |     unsigned    count;
127 |     unsigned    ID;
128 | 
129 |     unsigned hash(string s) {
130 |         std::hash<std::string> hashFunc;
131 |         return hashFunc(s);
132 |     }
133 | 
134 |     std::string stripNum(std::string name) {
135 |         size_t len = name.size();
136 |         char tmp[len];
137 |         strncpy(tmp, name.c_str(), len);
138 |         if (len < 1)
139 |             return name;
140 |         /* llvm will add suffix to variable name, we have to strip away*/
141 |         while ((tmp[len-1] <= '9' && tmp[len-1] >= '0' && len > 1)
142 |                 || (tmp[len-1] == 'i' && tmp[len-2] == '.' && len > 2)
143 |                 || (tmp[len-1] == '.' && len > 1)) {
144 |             if (tmp[len-1] == 'i' && tmp[len-2] == '.') {
145 |                 tmp[len-1] = 0;
146 |                 tmp[len-2] = 0;
147 |                 len -= 2;
148 |                 continue;
149 |             }
150 |             tmp[len-1] = 0;
151 |             len--;
152 |         }
153 |         name = name.substr(0, len);
154 |         return name;
155 |     }
156 | 
157 | public:
158 |     gepInfo(std::string srtName, std::string fName, unsigned bitWid) {
159 |         structName = srtName;
160 |         fieldName  = fName;
161 |         bitWidth   = bitWid;
162 |         ID         = hash(getStructName());
163 |         count      = 0;
164 |     }
165 | 
166 |     gepInfo(){};
167 | 
168 |     std::string getStructName() {
169 |         return stripNum(structName) + "->" + stripNum(fieldName);
170 |     }
171 | 
172 |     void incCount() {count++;}
173 | 
174 |     /* We use this method to get the weight of a kernel state */
175 |     unsigned getCount() {return count;}
176 | 
177 |     unsigned getGEPointerID() {return ID;}
178 | 
179 |     std::string getAsJson() {
180 |         std::string ret;
181 |         ret += "{\n";
182 |         ret += "\tName: " + structName + "->" + fieldName + ",\n";
183 |         ret += "\tBitWidth: " + std::to_string(bitWidth) + ",\n";
184 |         ret += "\tID: " + std::to_string(ID) + "\n";
185 |         ret += "}\n";
186 |         return ret;
187 |     }
188 | };
189 | 
190 | class funcInfoInCFG {
191 | private:
192 |     std::string   funcName;
193 |     unsigned      blockNum;
194 | 
195 |     std::vector<std::string> calledFuncs;
196 | public:
197 |     funcInfoInCFG(std::string fName, unsigned bNum) {
198 |         funcName = fName;
199 |         blockNum = bNum;
200 |     }
201 |     
202 |     funcInfoInCFG(){};
203 | 
204 |     void addCalledFunc(std::string cFunc) {
205 |         for (std::string f : calledFuncs) {
206 |             if (f == cFunc)
207 |                 return;
208 |         }
209 |         calledFuncs.push_back(cFunc);
210 |     }
211 | 
212 |     unsigned getBlockNum() {return blockNum;}
213 | 
214 |     std::string callTree(std::map<std::string, funcInfoInCFG> funcInfoList, int startlevel, int depth) {
215 |         std::string ret = thisFuncInfo(startlevel);
216 |         if (depth < 1)
217 |             return ret;
218 |         for (std::string calledFunc : calledFuncs) {
219 |             if (funcInfoList.find(calledFunc) != funcInfoList.end()) {
220 |                 ret += funcInfoList[calledFunc].callTree(funcInfoList, startlevel + 1, depth - 1);
221 |             }
222 |         }
223 |         return ret;
224 |     }
225 |     std::string thisFuncInfo(int level) {
226 |         std::string ret = "|";
227 |         for (unsigned i=0; i<level; i++)
228 |             ret += "--";
229 |         ret += (funcName + "( " + std::to_string(blockNum) + ") -depth( " + std::to_string(level) + ")\n");
230 |         return ret;
231 |     }
232 | };
233 | 


--------------------------------------------------------------------------------
/static_analysis_tools/IRParser/kcov_map.cpp:
--------------------------------------------------------------------------------
  1 | #include <llvm/IR/Module.h>
  2 | #include <llvm/IRReader/IRReader.h>
  3 | #include <llvm/IR/LLVMContext.h>
  4 | #include <llvm/Support/SourceMgr.h>
  5 | #include <llvm/Support/raw_ostream.h>
  6 | #include <llvm/IR/InstrTypes.h>
  7 | #include <llvm/IR/Instructions.h>
  8 | #include <llvm/Analysis/CmpInstAnalysis.h>
  9 | #include <llvm/IR/DebugLoc.h>
 10 | #include "llvm/IR/DebugInfo.h"
 11 | #include "llvm/IR/DIBuilder.h"
 12 | #include "llvm/IR/Function.h"
 13 | #include <llvm/IR/DebugInfoMetadata.h>
 14 | #include <llvm/IR/CFG.h>
 15 | #include <llvm/IR/Metadata.h>
 16 | #include "llvm/Support/BlockFrequency.h"
 17 | #include "llvm/Analysis/BlockFrequencyInfo.h"
 18 | #include "llvm/Analysis/BranchProbabilityInfo.h"
 19 | #include "llvm/IR/Dominators.h"
 20 | #include "llvm/Analysis/PostDominators.h"
 21 | #include "llvm/Analysis/LoopInfo.h"
 22 | 
 23 | #include <iostream>
 24 | #include <sstream>
 25 | #include <fstream>
 26 | #include <stdio.h>
 27 | #include <vector>
 28 | #include <sys/types.h>
 29 | #include <dirent.h>
 30 | 
 31 | #include "log.h"
 32 | #include "info.h"
 33 | 
 34 | 
 35 | using namespace llvm;
 36 | using namespace std;
 37 | 
 38 | 
 39 | std::vector<std::string> getFuncListFromFile(std::string funcListFile);
 40 | std::vector<std::string> listIRFile(std::string IRFileDir);
 41 | Function &getFuncFromMods(std::vector<std::string> IRFiles, std::string funcName);
 42 | std::map<std::string, unsigned> getBlockIDMap(Function *func);
 43 | std::map<unsigned, blockInfo> getBlockInfo(Function *func, std::map<std::string, unsigned> blockIDMap);
 44 | std::string getASMCodeFileName(std::string ASMCodeDir, std::string sourceFile);
 45 | std::vector<sanCallInfo> getSanCallsFromAsmLine(std::string asmFileName, std::string funcName, std::map<std::string, unsigned> blockInfosMap);
 46 | std::vector<unsigned> getAddrFromObjdumpAsm(std::string vmLinux, std::string funcName);
 47 | void writeDebugInfo(std::map<unsigned, blockInfo> blockInfos, std::string path);
 48 | void writeFuncAddrMap(std::map<unsigned, blockInfo> blockInfo, std::string path);
 49 | uint64_t encode(unsigned addr, unsigned num);
 50 | 
 51 | 
 52 | int main(int argc, const char *argv[]) {
 53 |     if (argc < 5) {
 54 |         outs() << FAIL << "./kcov_map ir_dir asm_dir vmlinux func_list log_dir\n" << RESET;
 55 |         return 1;
 56 |     }
 57 |     std::string IRFileDir    = argv[1];
 58 |     std::string ASMCodeDir   = argv[2];
 59 |     std::string VMLinux      = argv[3];
 60 |     std::string FuncListFile = argv[4];
 61 |     std::string LogDir       = argv[5];
 62 | 
 63 |     std::vector<std::string> FuncList = getFuncListFromFile(FuncListFile);
 64 |     std::vector<std::string> IRFiles = listIRFile(IRFileDir);
 65 | 
 66 |     for (std::string funcName : FuncList) {
 67 |         bool found = false;
 68 |         /* Search which riFile the function is loacted in */
 69 |         for (std::string f : IRFiles) {
 70 |             LLVMContext context;
 71 |             SMDiagnostic error;
 72 |             std::unique_ptr<Module> mod = parseIRFile(f, error, context);
 73 |             Module const *mod_const = mod.get();
 74 |             if (mod_const == nullptr) {
 75 |                 outs() << FAIL_LINE("Failed to open " + f + ".");
 76 |                 return 1;
 77 |             }
 78 | 
 79 |             Function *func = mod_const->getFunction(funcName);
 80 |             if (func == nullptr) continue;
 81 | 
 82 |             if (func != nullptr) {
 83 |                 if (func->size() == 0) {
 84 |                     //outs() << WARN_LINE("Function " + funcName + " declaration, pass");
 85 |                     continue;
 86 |                 }
 87 |                 found = true;
 88 |                 outs() << SUCC_LINE("Function " + funcName + " was found");
 89 | 
 90 |                 /* Get the IR of function, extract block infomation */
 91 |                 std::map<std::string, unsigned> blockIDMap = getBlockIDMap(func);
 92 |                 std::map<unsigned, blockInfo> blockInfosMap = getBlockInfo(func, blockIDMap);
 93 |                 /* Get the sanitizer_* call of assamble code */
 94 |                 std::string asmFile = getASMCodeFileName(ASMCodeDir, mod_const->getSourceFileName());
 95 |                 std::vector<sanCallInfo> sanCallInfos = getSanCallsFromAsmLine(asmFile, funcName, blockIDMap);
 96 |                 /* objdump assebly code should be matched to assemble one by one */
 97 |                 std::vector<unsigned> objdumpAddrs = getAddrFromObjdumpAsm(VMLinux, funcName);
 98 |                 if (sanCallInfos.size() != objdumpAddrs.size()) {
 99 |                     outs() << std::to_string(sanCallInfos.size()) << ":" << std::to_string(objdumpAddrs.size()) << "\n";
100 |                     outs() << FAIL_LINE("Function " + funcName + " assemble and objdump is mismatch\n");
101 |                     continue;
102 |                 }
103 | 
104 |                 unsigned idx = 0;
105 |                 for (sanCallInfo sc : sanCallInfos) {
106 |                     sc.AttachAddress(objdumpAddrs[idx]);
107 |                     blockInfosMap[sc.getBlockID()].addSanCall(sc);
108 |                     idx++;
109 |                 }
110 |                 writeDebugInfo(blockInfosMap, LogDir + "/" + funcName + ".json");
111 |                 writeFuncAddrMap(blockInfosMap, LogDir + "/" + funcName + ".addr.map");
112 |                 break;
113 |             }
114 |         }
115 |         if (!found)
116 |             outs() << FAIL_LINE("Function " + funcName + " was not found");
117 |     }
118 | }
119 | 
120 | std::vector<std::string> getFuncListFromFile(std::string funcListPath) {
121 |     fstream funcListFile(funcListPath);
122 |     std::vector<std::string> funcList;
123 |     std::string fn = "";
124 |     if (!funcListFile.is_open()) {
125 |         outs() << FAIL_LINE( "Failed to open init function list");
126 |         return funcList;
127 |     }
128 |     while (getline(funcListFile, fn)) {
129 |         if(fn != "")
130 |                 funcList.push_back(fn);
131 |     }
132 |     return funcList;
133 | }
134 | 
135 | std::vector<std::string> listIRFile(std::string IRDirPath) {
136 |     std::vector<std::string> irList;
137 |     struct dirent *entry;
138 |     DIR *dir = opendir(IRDirPath.c_str());
139 |     if (dir == NULL) {
140 |         outs() << FAIL_LINE("Dir wrong");
141 |         return irList;
142 |     }
143 |     while ((entry = readdir(dir)) != NULL) {
144 |         std::string fn(entry->d_name);
145 |         if (fn.find(".ll") != std::string::npos)
146 |             irList.push_back(IRDirPath + "/" + entry->d_name);
147 |     }
148 |     closedir(dir);
149 |     return irList;
150 | }
151 | 
152 | std::map<std::string, unsigned> getBlockIDMap(Function *func) {
153 |     std::map<std::string, unsigned> blockIDMap;
154 |     unsigned bID = 0, count = 0;
155 |     std::string funcName = func->getName();
156 |     for (BasicBlock &bb : *func) {
157 |         count++;
158 |         std::string blockName = bb.getName();
159 |         if (blockName != "") 
160 |             blockIDMap[blockName] = bID++;
161 |         else {
162 |             bb.setValueName(ValueName::Create(funcName + "." + std::to_string(count)));
163 |             blockIDMap[funcName + "." + std::to_string(count)] = bID++;
164 |         }
165 |     }
166 | 
167 |     return blockIDMap;
168 | }
169 | 
170 | std::map<unsigned, blockInfo> getBlockInfo(Function *func, std::map<std::string, unsigned> blockIDMap) {
171 |     std::map<unsigned, blockInfo> blockInfosMap;
172 |     DominatorTree *DT = new DominatorTree(const_cast<Function &>(func->getFunction()));
173 |     LoopInfo *LI = new LoopInfo(*DT);
174 |     BranchProbabilityInfo *BPI = new BranchProbabilityInfo(func->getFunction(), *LI);
175 |     BlockFrequencyInfo *BFI = new BlockFrequencyInfo(func->getFunction(), *BPI, *LI);
176 |     unsigned bID = 0;
177 |     BasicBlock &entry = func->getEntryBlock();
178 | 
179 |     /* The frequency of entry block is maxium, 
180 |      * all the maxium will formalize to 100 
181 |      */
182 |     unsigned long zoom = 1, maxFreq = BFI->getBlockFreq(&entry).getFrequency();
183 |     if (maxFreq > 100)
184 |         zoom = maxFreq / 100;
185 |     else if (maxFreq > 0 && maxFreq <= 100)
186 |         zoom = 100 / maxFreq;
187 |     else
188 |         zoom = 1;
189 |     if (zoom < 1)
190 |         zoom = 1;
191 | 
192 |     for (BasicBlock &bb : *func) {
193 |         bID++;
194 |         std::string blockName = bb.getName();
195 |         std::string funcName = func->getName();
196 |         if (blockName == "")
197 |             blockName = funcName + "." + std::to_string(bID);
198 |         unsigned long weight = BFI->getBlockFreq(&bb).getFrequency();
199 |         if (maxFreq > 100)
200 |             weight = weight / zoom;
201 |         else
202 |             weight = weight * zoom;
203 |         /* weight/zoom maybe zero */
204 |         if (weight > 100 || weight < 1)
205 |             weight = 1;
206 | 
207 |         blockInfo binfo(blockIDMap[blockName], blockName, funcName, weight);
208 |         for (BasicBlock *predbb : predecessors(&bb)) {
209 |             std::string predbbName = predbb->getName();
210 |             if (predbbName != "")
211 |                 binfo.addPredBlock(blockIDMap[predbbName]);
212 |             else
213 |                 binfo.addPredBlock(0xffff);
214 |         }
215 |         for (BasicBlock *succbb : successors(&bb)) {
216 |             std::string succbbName = succbb->getName();
217 |             if (succbbName != "")
218 |                 binfo.addSuccBlock(blockIDMap[succbbName]);
219 |             else
220 |                 binfo.addSuccBlock(0xffff);
221 |         }
222 |         blockInfosMap[binfo.getBlockID()] = binfo;
223 |     }
224 |     delete DT;
225 |     delete LI;
226 |     delete BPI;
227 |     delete BFI;
228 | 
229 |     return blockInfosMap;
230 | }
231 | 
232 | std::string getASMCodeFileName(std::string ASMCodeDir, std::string sourceFile) {
233 |     while (std::size_t pos = sourceFile.find("/") != std::string::npos) {
234 |         sourceFile = sourceFile.substr(pos);
235 |     }
236 |     std::size_t pos = sourceFile.find(".");
237 |     sourceFile = sourceFile.substr(0, pos + 1) + "s";
238 |     return ASMCodeDir + "/" + sourceFile;
239 | }
240 | 
241 | std::vector<sanCallInfo> getSanCallsFromAsmLine(std::string asmFileName, std::string funcName, std::map<std::string, unsigned> blockIDMap) {
242 |     fstream asmFile(asmFileName);
243 |     std::vector<sanCallInfo> sanCallInfos;
244 |     if (!asmFile.is_open()) {
245 |         outs() << FAIL_LINE(asmFileName + " can't be found\n");
246 |         return sanCallInfos;
247 |     }
248 | 
249 |     std::string ln;
250 |     bool infunc = false;
251 |     std::string blockName = "";
252 |     while (getline(asmFile, ln)) {
253 |         if (ln.size() < 1)
254 |             continue;
255 |         if (ln.find(funcName + ":") != std::string::npos) {
256 |             if (ln.find("@" + funcName) != std::string::npos) {
257 |                 infunc = true;
258 |                 continue;
259 |             }
260 |         }
261 |         if (ln.find("Lfunc_end") != std::string::npos && infunc) {
262 |             infunc = false;
263 |             break;
264 |         }
265 |         if (infunc) {
266 |             std::size_t foundPos = ln.find("# %");
267 |             if (foundPos != std::string::npos) {
268 |                 ln = ln.substr(foundPos + 3, ln.size()-1);
269 |                 foundPos = ln.find("# %");
270 |                 if (foundPos != std::string::npos)
271 |                     ln = ln.substr(foundPos + 3, ln.size()-1);
272 |                 if (ln.find("SP_return") != std::string::npos) continue;
273 |                 if (blockIDMap.find(ln) == blockIDMap.end()) {
274 |                     continue;
275 |                 }
276 |                 blockName = ln;
277 |             }
278 |             if (ln.find("__sanitizer_cov_trace") != std::string::npos) {
279 |                 sanCallInfo scall(ln, blockIDMap[blockName]);
280 |                 sanCallInfos.push_back(scall);
281 |             }
282 |         }
283 |     }
284 |     return sanCallInfos;
285 | }
286 | 
287 | std::vector<unsigned> getAddrFromObjdumpAsm(std::string vmLinux, std::string funcName) {
288 |     std::vector<unsigned> address;
289 |     std::string objdump = "objdump";
290 |     std::string disAsmFunc = "--disassemble=";
291 |     std::string noRaw =  "--no-show-raw-insn";
292 |     std::string cmd = objdump + " " + disAsmFunc+funcName + " " + noRaw + " " + vmLinux;
293 |     FILE *pipe = popen(cmd.c_str(), "r");
294 |     if (!pipe) {
295 |         outs() << FAIL_LINE("Failed to read objdump\n");
296 |         outs() << WARN_LINE("Try this command line: \"" + cmd + "\" to get the output\n");
297 |         return address;
298 |     }
299 | 
300 |     char buffer[0x100];
301 |     bool infunc = false;
302 |     bool foundSanCall = false;
303 |     while (fgets(buffer, 0x100, pipe) != NULL) {
304 |         std::string ln(buffer);
305 |         if (ln.find("<" + funcName + ">:") != std::string::npos) {
306 |             infunc = true;
307 |             continue;
308 |         }
309 |         if (infunc && ln == "\n")
310 |             break;
311 |         if (infunc && foundSanCall) {
312 |             foundSanCall = false;
313 |             std::size_t colon = ln.find(":");
314 |             if (colon == std::string::npos) {
315 |                 outs() << FAIL_LINE("Failed to get address of " + ln);
316 |                 break;
317 |             }
318 |             uint64_t addr_full = std::stoull(ln.substr(0, colon), nullptr, 16);
319 |             unsigned addr = unsigned(addr_full);
320 |             address.push_back(addr);
321 |             foundSanCall = false;
322 |         }
323 |         if (infunc) {
324 |             if (ln.find("<__sanitizer_cov_trace") != std::string::npos) {
325 |                 foundSanCall = true;
326 |                 continue;
327 |             }
328 |         }
329 |     }
330 |     return address;
331 | }
332 | 
333 | void writeDebugInfo(std::map<unsigned, blockInfo> blockInfos, std::string path) {
334 |     ofstream json;
335 |     json.open(path);
336 |     for (auto i : blockInfos) {
337 |         json << i.second.getAsJson();
338 |     }
339 |     json.close();
340 | }
341 | 
342 | void writeFuncAddrMap(std::map<unsigned, blockInfo> blockInfos, std::string path) {
343 |     ofstream map;
344 |     map.open(path);
345 |     for (auto i : blockInfos) {
346 |         auto bi = i.second;
347 |         for (auto sc : bi.getSanCalls()) {
348 |             if (sc.getAsLine().find("trace_pc") != std::string::npos) 
349 |                 map << "0x" << std::hex << encode(sc.getAddress(), bi.getForwardEdgeNum()) << "\n";
350 |             if (sc.getAsLine().find("trace_srt") != std::string::npos)
351 |                 map << "0x" << std::hex << encode(sc.getAddress(), 1) << "\n";
352 |         }
353 |     }
354 |     map.close();
355 | }
356 | 
357 | uint64_t encode(unsigned addr, unsigned num) {
358 |     uint64_t ret = (uint64_t)num;
359 |     return  ((ret&0xffff)<<32) | (uint64_t)(addr&0xffffffff);
360 | }
361 | 


--------------------------------------------------------------------------------
/static_analysis_tools/IRParser/kstate_map.cpp:
--------------------------------------------------------------------------------
  1 | #include <llvm/IR/Module.h>
  2 | #include <llvm/IRReader/IRReader.h>
  3 | #include <llvm/IR/LLVMContext.h>
  4 | #include <llvm/Support/SourceMgr.h>
  5 | #include <llvm/Support/raw_ostream.h>
  6 | #include <llvm/IR/InstrTypes.h>
  7 | #include <llvm/IR/Instructions.h>
  8 | #include <llvm/Analysis/CmpInstAnalysis.h>
  9 | #include <llvm/IR/DebugLoc.h>
 10 | #include "llvm/IR/DebugInfo.h"
 11 | #include "llvm/IR/DIBuilder.h"
 12 | #include "llvm/IR/Function.h"
 13 | #include <llvm/IR/DebugInfoMetadata.h>
 14 | #include <llvm/IR/CFG.h>
 15 | #include <llvm/IR/Metadata.h>
 16 | 
 17 | #include <iostream>
 18 | #include <sstream>
 19 | #include <fstream>
 20 | #include <stdio.h>
 21 | #include <vector>
 22 | #include <sys/types.h>
 23 | #include <dirent.h>
 24 | 
 25 | #include "log.h"
 26 | #include "info.h"
 27 | 
 28 | 
 29 | using namespace llvm;
 30 | using namespace std;
 31 | 
 32 | 
 33 | std::vector<std::string> getFuncListFromFile(std::string funcListFile);
 34 | std::vector<gepInfo> getGEPInfoFromFunc(Function *func);
 35 | void writeDebugInfo(std::vector<gepInfo> info, std::string path);
 36 | void writeFuncAddrMap(std::vector<gepInfo> info, std::string path);
 37 | 
 38 | int main(int argc, const char *argv[]) {
 39 |     if (argc < 3) {
 40 |         outs() << FAIL << "./kcov_map ir_path func_list log_dir\n" << RESET;
 41 |         return 1;
 42 |     }
 43 |     std::string IRPath       = argv[1];
 44 |     std::string FuncListFile = argv[2];
 45 |     std::string LogDir       = argv[3];
 46 | 
 47 |     std::vector<std::string> FuncList = getFuncListFromFile(FuncListFile);
 48 |     std::map<unsigned, gepInfo> gepInfoMap;
 49 |     LLVMContext context;
 50 |     SMDiagnostic error;
 51 |     std::unique_ptr<Module> mod = parseIRFile(IRPath, error, context);
 52 |     Module const *mod_const = mod.get();
 53 |     if (mod_const == nullptr) {
 54 |         outs() << FAIL_LINE("Failed to open " + IRPath + ".");
 55 |         return 1;
 56 |     }
 57 | 
 58 |     for (std::string funcName : FuncList) {
 59 |         Function *func = mod_const->getFunction(funcName);
 60 |         if (func != nullptr && func->size() > 0) {
 61 |             outs() << SUCC_LINE("Function " + funcName + " was found");
 62 |             std::vector<gepInfo> gepInfos = getGEPInfoFromFunc(func);
 63 |             for (gepInfo i : gepInfos) {
 64 |                 if (gepInfoMap.find(i.getGEPointerID()) == gepInfoMap.end()) {
 65 |                     gepInfoMap[i.getGEPointerID()] = i;
 66 |                 }
 67 |                 gepInfoMap[i.getGEPointerID()].incCount();
 68 |             }
 69 |             writeDebugInfo(gepInfos, LogDir + "/" + funcName + "state.json");
 70 |             writeFuncAddrMap(gepInfos, LogDir + "/" + funcName + ".state.map");
 71 |             continue;
 72 |         }
 73 |         if (func == nullptr)
 74 |             outs() << FAIL_LINE("Function " + funcName + " was not found");
 75 |     }
 76 | 
 77 |     for (auto i : gepInfoMap) {
 78 |         outs() << i.second.getStructName() << " " << i.second.getCount() << "\n";
 79 |     }
 80 | }
 81 | 
 82 | std::vector<std::string> getFuncListFromFile(std::string funcListPath) {
 83 |     fstream funcListFile(funcListPath);
 84 |     std::vector<std::string> funcList;
 85 |     std::string fn = "";
 86 |     if (!funcListFile.is_open()) {
 87 |         outs() << FAIL_LINE( "Failed to open init function list");
 88 |         return funcList;
 89 |     }
 90 |     while (getline(funcListFile, fn)) {
 91 |         if(fn != "")
 92 |                 funcList.push_back(fn);
 93 |     }
 94 |     return funcList;
 95 | }
 96 | 
 97 | std::vector<gepInfo> getGEPInfoFromFunc(Function *func) {
 98 |     std::map<string, bool> gepInfoMap;
 99 |     std::vector<gepInfo> ret;
100 |     for (BasicBlock &bb : *func) {
101 |         for (Instruction &i : bb) {
102 |             GetElementPtrInst *gepInst = dyn_cast<GetElementPtrInst>(&i);
103 |             if (gepInst != nullptr) {
104 |                 unsigned width = 0;
105 |                 if (gepInst->getType()) {
106 |                     if (gepInst->getType()->isIntegerTy())
107 |                         width = gepInst->getType()->getIntegerBitWidth();
108 |                 }
109 | 
110 |                 if (gepInst->getSourceElementType()->isStructTy()) {
111 |                     std::string structName = gepInst->getSourceElementType()->getStructName();
112 |                     std::string fieldName = gepInst->getName();
113 |                     if (fieldName == "") continue;
114 |                     gepInfo thisGEP(structName, fieldName, width);
115 |                     if (!gepInfoMap[thisGEP.getStructName()]) {
116 |                         if (gepInst->getResultElementType()->isPointerTy()) {
117 |                             continue;
118 |                         }
119 |                         gepInfoMap[thisGEP.getStructName()] = true;
120 |                         ret.push_back(thisGEP);
121 |                     } else {
122 |                         thisGEP.incCount();
123 |                     }
124 |                 }
125 |             }
126 |         }
127 |     }
128 |     return ret;
129 | }
130 | 
131 | void writeDebugInfo(std::vector<gepInfo> info, std::string path) {
132 |     ofstream json;
133 |     json.open(path);
134 |     for (auto i : info) {
135 |         json << i.getAsJson();
136 |     }
137 |     json.close();
138 | }
139 | 
140 | void writeFuncAddrMap(std::vector<gepInfo> info, std::string path) {
141 |     ofstream map;
142 |     map.open(path);
143 |     for (auto i : info) {
144 |         map << i.getStructName() << ": 0x" << std::hex << i.getGEPointerID() << "\n";
145 |     }
146 |     map.close();
147 |     }
148 | 


--------------------------------------------------------------------------------
/static_analysis_tools/IRParser/log.h:
--------------------------------------------------------------------------------
 1 | #define RESET "\033[0m"
 2 | #define RED   "\033[31m"
 3 | #define GREEN "\033[32m"
 4 | #define BLUE  "\033[34m"
 5 | 
 6 | #define WARN  BLUE
 7 | #define SUCC  GREEN
 8 | #define FAIL  RED
 9 | 
10 | #define WARN_LINE(s) WARN << s << "\n" << RESET
11 | #define SUCC_LINE(s) SUCC << s << "\n" << RESET
12 | #define FAIL_LINE(s) FAIL << s << "\n" << RESET
13 | 


--------------------------------------------------------------------------------
/static_analysis_tools/kern_instrument/AssignTrackerPass/AssignTracker.cpp:
--------------------------------------------------------------------------------
  1 | #include <llvm/IR/LegacyPassManager.h>
  2 | #include <llvm/Transforms/IPO/PassManagerBuilder.h>
  3 | #include <llvm/IR/Module.h>
  4 | #include <llvm/IR/Value.h>
  5 | #include <llvm/IR/Type.h>
  6 | #include <llvm/IR/Operator.h>
  7 | #include <llvm/IR/Instructions.h>
  8 | #include <llvm/IR/InstrTypes.h>
  9 | #include <llvm/Bitcode/BitcodeWriter.h>
 10 | 
 11 | #include "llvm/IR/LegacyPassManager.h"
 12 | #include "llvm/IR/CallSite.h"
 13 | #include "llvm/IR/IRBuilder.h"
 14 | #include "llvm/IR/InlineAsm.h"
 15 | #include "llvm/ADT/Statistic.h"
 16 | #include "llvm/IR/Function.h"
 17 | #include "llvm/Pass.h"
 18 | #include "llvm/Support/raw_ostream.h"
 19 | 
 20 | typedef unsigned long long uint64;
 21 | typedef unsigned int uint32;
 22 | typedef unsigned short uint16;
 23 | 
 24 | using namespace llvm;
 25 | using namespace legacy;
 26 | 
 27 | static const char *const SanCovTraceSrt1Name = "__sanitizer_cov_trace_srt1";
 28 | static const char *const SanCovTraceSrt2Name = "__sanitizer_cov_trace_srt2";
 29 | static const char *const SanCovTraceSrt4Name = "__sanitizer_cov_trace_srt4";
 30 | static const char *const SanCovTraceSrt8Name = "__sanitizer_cov_trace_srt8";
 31 | 
 32 | namespace {
 33 |     struct AssignTracker : public ModulePass {
 34 |         static char ID; // Pass identification, replacement for typeid
 35 |         FunctionCallee SanCovTraceSrt1;
 36 |         FunctionCallee SanCovTraceSrt2;
 37 |         FunctionCallee SanCovTraceSrt4;
 38 |         FunctionCallee SanCovTraceSrt8;
 39 |         Type *VoidTy;
 40 |         Type *Int8Ty;
 41 |         Type *Int16Ty;
 42 |         Type *Int32Ty;
 43 |         Type *Int64Ty;
 44 |         std::map<std::string, uint64> StructIDMap;
 45 |         StringRef SourceFileName;
 46 | 
 47 |         LLVMContext *C;
 48 |         AssignTracker() : ModulePass(ID) {}
 49 | 
 50 |         bool runOnModule(Module &M) override {
 51 |             C = &M.getContext();
 52 |             IRBuilder<> IRB(*C);
 53 | 
 54 |             VoidTy = IRB.getVoidTy();
 55 |             Int8Ty = IRB.getInt8Ty();
 56 |             Int16Ty = IRB.getInt16Ty();
 57 |             Int32Ty = IRB.getInt32Ty();
 58 |             Int64Ty = IRB.getInt64Ty();
 59 | 
 60 |             SanCovTraceSrt1 = M.getOrInsertFunction(SanCovTraceSrt1Name, VoidTy, Int64Ty, Int8Ty);
 61 |             SanCovTraceSrt2 = M.getOrInsertFunction(SanCovTraceSrt2Name, VoidTy, Int64Ty, Int16Ty);
 62 |             SanCovTraceSrt4 = M.getOrInsertFunction(SanCovTraceSrt4Name, VoidTy, Int64Ty, Int32Ty);
 63 |             SanCovTraceSrt8 = M.getOrInsertFunction(SanCovTraceSrt8Name, VoidTy, Int64Ty, Int64Ty);
 64 |             SourceFileName = M.getName();
 65 | 
 66 |             for (Function &F : M)
 67 |                 instrumentFieldAssign(F);
 68 |             for (auto i : StructIDMap) {
 69 |                 errs() << i.first << ": " << std::to_string(i.second) << "\n";
 70 |             }
 71 |             return true;
 72 |         }
 73 |         void injectFieldAssignTracker(Instruction *I, uint64 id);
 74 |         void instrumentFieldAssign(Function &func);
 75 |     };
 76 | }
 77 | 
 78 | std::string stripNum(std::string name) {
 79 |     size_t len = name.size();
 80 |     char tmp[len];
 81 |     strncpy(tmp, name.c_str(), len);
 82 |     if (len < 1)
 83 |         return name;
 84 |     while ((tmp[len-1] <= '9' && tmp[len-1] >= '0' && len > 1)
 85 |             || (tmp[len-1] == 'i' && tmp[len-2] == '.' && len > 2)
 86 |             || (tmp[len-1] == '.' && len > 1)) {
 87 |         if (tmp[len-1] == 'i' && tmp[len-2] == '.') {
 88 |             tmp[len-1] = 0;
 89 |             tmp[len-2] = 0;
 90 |             len -= 2;
 91 |             continue;
 92 |         }
 93 |         tmp[len-1] = 0;
 94 |         len--;
 95 |     }
 96 |     name = name.substr(0, len);
 97 |     return name;
 98 | }
 99 | 
100 | uint16 crc16(std::string name) {
101 |     unsigned len = name.length();
102 |     if (len == 0)
103 |         return 0;
104 |     char *tmp = (char*)malloc(len+1);
105 |     strcpy(tmp, name.c_str());
106 |     uint16 data, hash = 0x3e7a, crc = 0xffff;
107 |     for (unsigned i = 0; i < len; i++) {
108 |         data = *(uint16*)(tmp + i);
109 |         if ((crc&0x0001) ^ (data&0x0001))
110 |             crc = (crc >> 1) ^ (hash |0x8005);
111 |         else
112 |             crc >>= 1;
113 |         hash = data ^ hash;
114 |     }
115 |     crc = ~crc;
116 |     data = crc;
117 |     crc = (crc << 8) | (data >> 8 &0xff);
118 |     return crc;
119 | }
120 | 
121 | bool isStruct(const Value *val, const Value *var) {
122 |     if (!val->getType()->isIntegerTy())
123 |         return false;
124 |     const GetElementPtrInst *gepInst = dyn_cast<GetElementPtrInst>(var);
125 |     if (gepInst == nullptr)
126 |         return false;
127 |     Type *greTy = gepInst->getResultElementType();
128 |     if (greTy && greTy->isPointerTy())
129 |         return false;
130 |     if (gepInst->getSourceElementType()->isStructTy()) {
131 |         const StructType *srtTy = dyn_cast<StructType>(gepInst->getSourceElementType());
132 |         return srtTy->hasName();
133 |     }
134 |     return false;
135 | }
136 | 
137 | std::string getStructName(const Value *var) {
138 |     const GetElementPtrInst *gepInst = dyn_cast<GetElementPtrInst>(var);
139 |     std::string srtName = gepInst->getSourceElementType()->getStructName();
140 |     std::string fieldName = gepInst->getName();
141 |     return stripNum(srtName) + "->" + stripNum(fieldName);
142 | }
143 | 
144 | uint64 getSrtIDFromName(const Value *var) {
145 |     const GetElementPtrInst *gepInst = dyn_cast<GetElementPtrInst>(var);
146 |     std::string srtName = gepInst->getSourceElementType()->getStructName();
147 |     std::string fieldName = gepInst->getName();
148 |     uint16 srtID = crc16(stripNum(srtName));
149 |     uint16 fieldID = crc16(stripNum(fieldName));
150 |     return (((uint64)srtID << 16) | (uint64)fieldID) & 0xffffffff;
151 | }
152 | 
153 | uint64 getSourceFileID(std::string sourceFileName) {
154 |     uint64 srcID = (uint64)crc16(sourceFileName);
155 |     return srcID & 0xffff;
156 | }
157 | 
158 | void AssignTracker::instrumentFieldAssign(Function &func) {
159 |     if (!func.size())
160 |         return;
161 |     for (BasicBlock &bb : func) {
162 |         for (Instruction &i : bb) {
163 |             if (StoreInst *si = dyn_cast<StoreInst>(&i)) {
164 |                 const Value *val_op = si->getOperand(0);
165 |                 const Value *var_op = si->getPointerOperand();
166 |                 if (isStruct(val_op, var_op)) {
167 |                     std::string srtName = getStructName(var_op);
168 |                     uint64 srtID = getSrtIDFromName(var_op);
169 |                     srtID |= (getSourceFileID(SourceFileName) & 0xffff) << 32;
170 |                     if (StructIDMap.find(srtName) == StructIDMap.end())
171 |                         StructIDMap[srtName] = srtID;
172 |                     injectFieldAssignTracker(si, StructIDMap[srtName]);
173 |                 }
174 |             }
175 |         }
176 |     }
177 | }
178 | 
179 | void AssignTracker::injectFieldAssignTracker(Instruction *I, uint64 id) {
180 |     IRBuilder<> IRB(I);
181 |     Value *val = I->getOperand(0);
182 |     unsigned bitWidth = val->getType()->getIntegerBitWidth();
183 |     switch (bitWidth) {
184 |         case 8: {
185 |             IRB.CreateCall(SanCovTraceSrt1, {IRB.getInt64(id), IRB.CreateIntCast(val, Int8Ty, true)});
186 |             break;
187 |         }
188 |         case 16: {
189 |             IRB.CreateCall(SanCovTraceSrt2, {IRB.getInt64(id), IRB.CreateIntCast(val, Int16Ty, true)});
190 |             break;
191 |         }
192 |         case 32: {
193 |             IRB.CreateCall(SanCovTraceSrt4, {IRB.getInt64(id), IRB.CreateIntCast(val, Int32Ty, true)});
194 |             break;
195 |         }
196 |         case 64: {
197 |             IRB.CreateCall(SanCovTraceSrt8, {IRB.getInt64(id), IRB.CreateIntCast(val, Int64Ty, true)});
198 |             break;
199 |         }
200 |     }
201 | }
202 | 
203 | char AssignTracker::ID = 0;
204 | static RegisterPass<AssignTracker> X("AssignTracker", "AssignTracker Pass", false, false);
205 | 
206 | static void registerAssignTrackerPass(const PassManagerBuilder &, legacy::PassManagerBase &PM)
207 | {
208 |     PM.add(new AssignTracker());
209 | }
210 | 
211 | static RegisterStandardPasses RegisterAPass(PassManagerBuilder::EP_OptimizerLast, registerAssignTrackerPass);
212 | 


--------------------------------------------------------------------------------
/static_analysis_tools/kern_instrument/AssignTrackerPass/AssignTracker.exports:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/static_analysis_tools/kern_instrument/AssignTrackerPass/AssignTracker.exports


--------------------------------------------------------------------------------
/static_analysis_tools/kern_instrument/AssignTrackerPass/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # If we don't need RTTI or EH, there's no reason to export anything
 2 | # from the hello plugin.
 3 | if( NOT LLVM_REQUIRES_RTTI )
 4 |   if( NOT LLVM_REQUIRES_EH )
 5 |     set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/AssignTracker.exports)
 6 |   endif()
 7 | endif()
 8 | 
 9 | if(WIN32 OR CYGWIN)
10 |   set(LLVM_LINK_COMPONENTS Core Support)
11 | endif()
12 | 
13 | add_llvm_library( LLVMAssignTracker MODULE BUILDTREE_ONLY
14 |   AssignTracker.cpp
15 | 
16 |   DEPENDS
17 |   intrinsics_gen
18 |   PLUGIN_TOOL
19 |   opt
20 |   )
21 | 


--------------------------------------------------------------------------------
/static_analysis_tools/kern_instrument/kern_patch/0001-KCOV_SRT_TRACK-ok.patch:
--------------------------------------------------------------------------------
 1 | From 8ba8bf36997ba5726098cedad261663129f4d806 Mon Sep 17 00:00:00 2001
 2 | From: bins <bins@kp-test>
 3 | Date: Wed, 7 Oct 2020 04:28:44 -0400
 4 | Subject: [PATCH] kernel struct assignment instrument
 5 | 
 6 | ---
 7 |  kernel/kcov.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++
 8 |  1 file changed, 62 insertions(+)
 9 | 
10 | diff --git a/kernel/kcov.c b/kernel/kcov.c
11 | index 6afae0bcbac4..b6a40fc40a94 100644
12 | --- a/kernel/kcov.c
13 | +++ b/kernel/kcov.c
14 | @@ -322,6 +322,68 @@ void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases)
15 |  EXPORT_SYMBOL(__sanitizer_cov_trace_switch);
16 |  #endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */
17 |  
18 | +static void notrace write_srt_data(u64 id, u64 val, u64 ip)
19 | +{
20 | +	struct task_struct *t;
21 | +	u64 *area;
22 | +	u64 count, start_index, end_pos, max_pos;
23 | +
24 | +	t = current;
25 | +
26 | +	if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t))
27 | +		return;
28 | +
29 | +	ip = canonicalize_ip(ip);
30 | +
31 | +	area = (u64 *)t->kcov_area;
32 | +	max_pos = t->kcov_size * sizeof(unsigned long);
33 | +
34 | +	count = READ_ONCE(area[0]);
35 | +
36 | +	/* Every record is KCOV_WORDS_PER_STRU 64-bit words. */
37 | +	start_index = 1 + count;
38 | +	end_pos = (start_index + 3) * sizeof(u64);
39 | +	if (likely(end_pos <= max_pos)) {
40 | +		area[start_index] = id;
41 | +		area[start_index + 1] = val;
42 | +		area[start_index + 2] = ip;
43 | +		WRITE_ONCE(area[0], count + 3);
44 | +	}
45 | +}
46 | +
47 | +void notrace __sanitizer_cov_trace_srt1(u64 id, u8 val)
48 | +{
49 | +	u64 id_64 = ((u64)0xefe1 << 48) | (u64)id;
50 | +	u64 val_64 = (u64)val;
51 | +	write_srt_data(id_64, val_64, _RET_IP_);
52 | +}
53 | +EXPORT_SYMBOL(__sanitizer_cov_trace_srt1);
54 | +
55 | +void notrace __sanitizer_cov_trace_srt2(u64 id, u16 val)
56 | +{
57 | +	u64 id_64 = ((u64)0xefe2 << 48) | (u64)id;
58 | +	u64 val_64 = (u64)val;
59 | +	write_srt_data(id_64, val_64, _RET_IP_);
60 | +}
61 | +EXPORT_SYMBOL(__sanitizer_cov_trace_srt2);
62 | +
63 | +void notrace __sanitizer_cov_trace_srt4(u64 id, u32 val)
64 | +{
65 | +	u64 id_64 = ((u64)0xefe4 << 48) | (u64)id;
66 | +	u64 val_64 = (u64)val;
67 | +	write_srt_data(id_64, val_64, _RET_IP_);
68 | +}
69 | +EXPORT_SYMBOL(__sanitizer_cov_trace_srt4);
70 | +
71 | +void notrace __sanitizer_cov_trace_srt8(u64 id, u64 val)
72 | +{
73 | +	u64 id_64 = ((u64)0xefe8 << 48) | (u64)id;
74 | +	u64 val_64 = (u64)val;
75 | +	write_srt_data(id_64, val_64, _RET_IP_);
76 | +}
77 | +EXPORT_SYMBOL(__sanitizer_cov_trace_srt8);
78 | +
79 | +
80 |  static void kcov_start(struct task_struct *t, struct kcov *kcov,
81 |  			unsigned int size, void *area, enum kcov_mode mode,
82 |  			int sequence)
83 | -- 
84 | 2.20.1
85 | 
86 | 


--------------------------------------------------------------------------------
/survey.md:
--------------------------------------------------------------------------------
  1 | ## Brief
  2 | 
  3 | This document will introduce some features or design of customizing fuzzer. Firstly, most of fuzzer implemented its own Genetic Algorithm( GA). Some features can be classified to one of GA component. For example, the optimizing of generate, mutate and crossover. Other features, such as special feedback or satifying deep nested condition, is strongly depend on what project you fuzz, although these problem is very common in real-world project.
  4 | 
  5 | Because this document is a by-product of customizing Linux kernel fuzzer(base on Syzkaller), Some problem appeared kernel fuzzing only. At the end this document, i will attach the paper the document involved, with a short introduction.  
  6 | 
  7 | 
  8 | ## GA of fuzzer
  9 | 
 10 | In most fuzzers, GA is the engine of evolving testcase. For different purpose, the design of GA's components can be quite different.
 11 | 
 12 | 
 13 | ### Generate & Mutate in evaluating programming
 14 | 
 15 | In evolutionary programming, if mutation and generating only base on random inputs, that fuzzer will perform badly. Useful information help reducing the search space of evolving the testcase you want. Generally, these following informations can benefit mutating or generating:  
 16 | 1. symbolic execution: static analyse target, deriver which inputs is useful.( KLEE)  
 17 | 2. Dynamic taint analysis( DTA): dynamically trace inputs used by conditions.( VUzzer)  
 18 | 3. Dynamic taint analysis: dynamically trace which inputs can satisfy which conditions efficiently.( GREYONE)  
 19 | 4. Manually write manner: hard-code some special inputs or enum inputs.( Syzkaller)  
 20 | 5. Extract inputs from real-world program.( Moonshine)  
 21 | 
 22 | 
 23 | ### Crossover
 24 | 
 25 | In real-world, if you want to fuzz the entire project, generated testcases always should be length-indeterminate. The classical single-point randomly crossover couldn't work well. Block stacking evolutionary programming would be more efficient. Specially, some testcase is state-base( for example: socket programming), generate and crossover base on state-base blocks help evolving complex context testcase. In our practice, in state-base programming, state-base block-stacking evolution perform better than randomly crossover. Here are some idea of block-stacking crossover:
 26 | 1. Static analysis state dependence of real world testcase.( Moonshine)  
 27 | 2. Resource centric: treat generated testcase which use( create&operation) the same resource as a complex resource. Use them in the subsequent syscalls.( Syzkaller)  
 28 | 3. State-base Resource centric: classify testcase by states they trigger.( base on syzkaller resource centric)  
 29 | 4. Build N-Gram model for syscalls: select those testcases trigger a type of crash, build N-Gram model to analyse the pattern of crash testcases.( FastSyzkaller)  
 30 | 
 31 | 
 32 | ### Fitness
 33 | 
 34 | Fitness is motivation of evolution in GA. A appropriate fitness reward helps efficiently select potential inputs or testcases. Moreover, gradient fitness will help evolving also. Fitness always base on what feedback fuzzer collected.
 35 | 
 36 | 
 37 | #### coverage
 38 | 
 39 | 1. CFG position weight fitness.( VUzzer)  
 40 | 2. Sum of basic-block weight fitness.( Syzkaller)  
 41 | 3. Class code: lower error handle fitness.(VUzzer)  
 42 | 4. Statistical calculation of testcase.( Syzkaller)  
 43 | 
 44 | Similar to VUzzer, we implement a customized syzkaller which support specifying the fuzz target and can be feasibly configure basic-block weight. Read this [document](syzkaller/cover_filter.md) for more information.
 45 | 
 46 | * refer to the following survey  
 47 | 
 48 | 
 49 | #### state
 50 | 
 51 | 1. Symbolic execution: static analyse call-stack input, weight them base on its CFG.  
 52 | 2. Targeted symbolic execution: matching testcases' stack-trace to BUG's stack-trace report.( Wildfire)  
 53 | 3. Distance of taint variable to condition expected value.( GREYONE)  
 54 | 
 55 | #### Exploit vs Explore
 56 | 
 57 | A fuzzer for the entire project is usually a Multi-armed bandit problem. You may need to trade off explore and exploit.
 58 | Trade off them in a fuzzer is difficult, so we try to combinate several fuzzer with different policy( base on syz-hub). Refer to our [multi-policy fuzzer](syzkaller/multi_policy/README.md).
 59 | 
 60 | 
 61 | ## Other design
 62 | 
 63 | Moreover, there are lots of design of fuzzer is base on what project you fuzz, it can't be classified into any step of GA, although it strongly associates with things mentioned above.
 64 | 
 65 | 
 66 | ### Shortage of only coverage-guide fuzzer  
 67 | 
 68 | Coverage-guide is the most widely used feedback of fuzzer. But, some reserachers found it's not enough for some case. In userspace fuzzing:
 69 | 1. Collecting coverage and memory accessing information as fuzzer feedback.( MemFuzz)  
 70 | 2. Collecting targeted functions' argument as feedback.( WildFire)  
 71 | In kernel fuzzing, state-base fuzz could be more useful, for example:
 72 | ```  
 73 | Coverage:
 74 | Cov(socket+setsockopt$1)+Cov(socket+setsockopt$2)+Cov(socket+sendmsg(flag_not_expect)) = Cov(socket+setsockopt1+setsockopt2) = Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(ANY)) != Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(EXPECT_FLAG))
 75 | ```  
 76 | Without any gradient, syzkaller won't collect any testcases to corpus until all inputs are randomly put into the right position.
 77 | ```  
 78 | State:
 79 | State(socket+setsockopt$1)+State(socket+setsockopt$2) != State(socket+setsockopt1+setsockopt$2) != State(socket+setsockopt$1+setsocketopt$2+sendmsg(ANY)) != State(socket+setsockopt$1+setsocketopt$2+sendmsg(EXPECT_FLAG))
 80 | ```  
 81 | If we try to collect state of testcases, it will lead fuzzer to generate more complex context testcase. In our practice, we static analyse which state is widely used in condition. Collect those testcases if they can trigger such state. Refer to syzkaller resource centric( block-stacking generate) mentioned before, these testcases will be resource( state-base block) which can be used to generate testcase. Refer to this [document](syzkaller/kstat_demo/README.md). But, that will maintain a lot of testcases in corpus, testcases should be weigted. Also, to avoid writing bpf manuly, we introduce another way to [track kernel state](syzkaller/kstate_resource.md).
 82 | 
 83 | 
 84 | ### Shortage of Full Kernel Fuzzer
 85 | 
 86 | FKF is multi-solution search space, need a good trade off between explore and exploit.  
 87 | 1. Syzkaller has no explicit fitness, but it maintain syscall-to-syscall markov chain for prios choise and mutation. The prios include static and dynamic prios. The dynamic prios come from calculating count of syscall pair in each testcase of corpus. Note that testcases may be conflict with each other.  
 88 | 2. Subsystem syscall set: syzkaller support enable/disable a subset of syscalls to fuzz.  
 89 | 3. Partly kernel fuzz: KCOV support only instement a part of source file in kernel.  
 90 | 4. Multi-policy fuzzer: base on syz-hub, customized targeted syz-manager with different feedback share testcases with each other if the testcases are interested by other syz-manager. Refer to this [document](syzkaller/multi_policy/README.md).  
 91 | 
 92 | 
 93 | ### Satisfy the condition constraint
 94 | 
 95 | Of course, most ideas of offering information to mutating and generating mentioned above is for staifying condition constraint. There are also some useful way for helping fuzzer satisfy the condition constraint.  
 96 | 
 97 | 
 98 | #### Condition constraint satisfied by single input
 99 | If we treat arguments of a function as a byte-base input. Some conditions constraint can be satisfied by mutating input of the function. In this situation, the following ways can be used to improve the performence of fuzzer.
100 | 1. Symbolic execution: static analysis of constraint, can't solve constraint indrectly from input, overhead.( KLEE)  
101 | 2. Dynamic taint analysis( DTA): dynamically trace inputs used by conditions.( VUzzer)  
102 | 3. Dynamic taint analysis: dynamically trace which inputs can satisfy which conditions efficiently.( GREYONE)  
103 | 4. Weakening Strong Constraints: use QEMU Ting Code Generator to weaken strong constraints.( Qemu TCG)  
104 | 5. comparison operand tracker: syzkaller use comparison tracker, __sanitizer_cov_trace_cmp for kernel.( KCOV_COMPARISON)  
105 | 6. Syzkaller: manually write syscall description.  
106 | 7. Matryoshka shows how they try to help AFL evolving input statify nested condition constraint.( Matryoshka)  
107 | Also, i attach a comparison of these differences of these ways.
108 | 
109 | | method | dependence | granularity | indirectly use | case |  
110 | |--------|------------|-------------|---------------------------|------|  
111 | | cmp instrument to track data-flow( DTA) | path-dependent | instruction-level | insensitive | VUzzer |  
112 | | cmp instrumnet to check satifing | path-denpendent | instruction-level | sensitive | GREYONE |  
113 | | memory monitor | memory monitor | function-level | sensitive | Matryoshka |  
114 | | symbolic execution | path-independent | function-level | insensitive | KLEE/CBMC/ClangChecker |  
115 | | KCOV_COMPARISON | path-dependent | instruction-level | sensitive | Syzkaller |  
116 | | Qemu TCG | path-dependent | instruction-level | sensitive | QemuTCG + AFL |  
117 | 
118 | We can see comparison instrument can be use in DTA to solve nested condtion. But instrument depend on if the branch is reachable. And taint data monitor like VUzzer hard to trace complex indirectly taint( eg. memory copy).
119 | 
120 | 
121 | #### Note that in Linux kernel fuzzer:
122 | 
123 | Syzkaller has powerful syscall descriptions, search space of a single syscall input was greatly reduce. The truly diffculty is to reach branches are depend on syscalls combination and propriate arguments.
124 | 1. Syzkaller resource: recently syzkaller introduce a feature: resource centric. Syzkaller treat testcases as resource if they create or operate the same kind data structure( resource also). And use these resource to generate or mutate new testcase.  
125 | 2. MoonShine: static analysis real world testcase to get the dependence of syscalls.  
126 | 3. State-base resource: in our customized syzkaller, only testcase trigger a special state feedback can be resource. Further more, maintain a relationship between syscalls sequence and kernel state may help more.  
127 | Also refer to the chapter crossover mentioned above.  
128 | 
129 | Symbolic execution: if static analysis chose syscalls as entry, it will be effort and inefficient. Otherwise, if the entry is some kernel function in callstack may help more. Both [this paper](https://arxiv.org/abs/1903.02981) and [our fuzzer](syzkaller/kstat_demo/README.md) chose the second way. Get function-level input by using kernel function hook. We also have a [document](static_analysis_tools/README.md) of comparing some symbolic execution tools. Also, most of time, since kernel state is attach to kernel data structure, track the data structure is other way to track kernel state. Refer to this [document](syzkaller/kstate_resource.md).
130 | 
131 | 
132 | ## Paper
133 | 
134 | [Weakening Strong Constraints for AFL](https://lifeasageek.github.io/class/cs52700-fall16/pages/prog-assignment-1.html):   
135 | Strong constriant: a condition constraint need a bunch of memory to satisfy it. In this case, randomly mutating input will take a lot of time to satisfy it.  
136 | Weakening strong constraint: try to slice the strong constraint to several weak constraints, replace that branch condition with several branch conditions. Each branch with weak branch can be easily satisfied. So the satisfying input can be gradually evoluted.  
137 | The author use Qemu Tiny Code Generator( TCG), a instruction-by-instruction level instrumnetation, to weaken such strong constraints.
138 | 
139 | [Compositional Fuzzing Aided by Targeted Symbolic Execution](https://arxiv.org/pdf/1903.02981.pdf):  
140 | Targeted symbolic execution: symbolic execution only analyse inputs for reaching targets of interest.  
141 | Isolated function: functions that are parameterized( targeted functions).  
142 | 1. Repeatly generating testcases and populate testcases base on isolate functions' argements.
143 | 2. Run this testcases in another instrumented version of project, check if crash will happen.
144 | 3. If crash happened, run exploit testcases, collect the stack-trace information. Then try to generate testcases to macth it. Check if the target is reachable, if reachable, mutate inputs except those inputs satifying constrains of the path.  
145 | 
146 | [VUzzer: Application-aware Evolutionary Fuzzing]():  
147 | Data-flow: dynamic taint analysis( DTA), implemented by instrument cmp instruction to trace which bits of input have an impact to the condition. The structure of input will be evoluted.
148 | Control-flow: assign weight to basic block base on its depth; Assign negative weight to error-handling code.  
149 | Static analysis: get immediate value of comparison.  
150 | 
151 | [MoonShine: Optimizing OS Fuzzer Seed Selection with Trace Distillation](http://www.cs.columbia.edu/~suman/docs/moonshine.pdf):  
152 | Use an extended Strace to trace real-world testcases. Extract inputs and dependences of syscalls from Strace output( seed distillation). The dependences are similar to syzkaller resource( after resource centric introducted).  
153 | 
154 | [GREYONE: Data Flow Sensitive Fuzzing](https://www.usenix.org/system/files/sec20spring_gan_prepub.pdf):  
155 | Fuzzing-driven Taint Inference: it's also DTA. But, unlike VUzzer, GREYONE track which input can satisfy condition constriants. So condition variable indirectly initialized from inputs can be found also.  
156 | Taint-Guided Mutation: prioritize input bytes that affect more untouched branches to mutate.  
157 | Conformance-Guided Evolution: the distance of tainted variables to the value expected by condition.  
158 | 
159 | [Matryoshka: Fuzzing Deeply Nested Branches](https://arxiv.org/pdf/1905.12228.pdf):  
160 | 1. Determinate all conditions constraint that target dependence on. Use taint analysis to determinate which conditions use same input.  
161 | 2. Randomly mutate inputs to satisfy these condition constraints. If all conditions use the same input( at less one input is the same one) are satisfied, these inputs are called dependent inputs.  
162 | 3. If the target is reached, that means all constraints can be satisfied by dependent inputs. if not, that means other inputs should be mutated to satisfy those conditions constraints that use indenpendent inputs.  
163 | 
164 | [FastSyzkaller: Improving Fuzz Efficiency for Linux Kernel Fuzzing](https://iopscience.iop.org/article/10.1088/1742-6596/1176/2/022013/pdf):  
165 | FastSyzkaller classify crash type of syzkaller testcases, then use N-Gram model to extract N-Gram sequential syscall patterns from these testcases that may be potentially vulnerable. Generating new testcases from syscall patterns and pack them into the corpus. 
166 | 
167 | [MEMFUZZ: Using Memory Accesses to Guide Fuzzing]():  
168 | 1. Enhance AFL LLVM instrumentation pass: instrument load and store instruction to collect memory accessing information.  
169 | 2. Instrumentation site filtering: drop some information of memory accessing, for example, global variables or stack variables accessing.  
170 | 3. Extend AFL runtime library for tracking memory accessing. Bloom-filter for deduplicating.  
171 | 


--------------------------------------------------------------------------------
/syz_patch/0002-Calculate-prog-prior-base-on-weighted-pcs.patch:
--------------------------------------------------------------------------------
  1 | From bc3063ef6207e1fafa82723a57b58bc938cb77c5 Mon Sep 17 00:00:00 2001
  2 | From: Kaipeng Zeng <kaipeng94@gmail.con>
  3 | Date: Tue, 20 Oct 2020 23:47:23 -0400
  4 | Subject: [PATCH 2/3] Calculate prog prior base on weighted pcs
  5 | 
  6 | ---
  7 |  prog/prio.go          |  2 +-
  8 |  prog/prog.go          |  1 +
  9 |  syz-fuzzer/fuzzer.go  | 19 +++++++++++++++++--
 10 |  syz-fuzzer/proc.go    |  1 +
 11 |  syz-manager/filter.go | 14 ++++++++++++--
 12 |  syz-manager/html.go   |  4 ++++
 13 |  syz-manager/rpc.go    |  5 +++--
 14 |  7 files changed, 39 insertions(+), 7 deletions(-)
 15 | 
 16 | diff --git a/prog/prio.go b/prog/prio.go
 17 | index eee44cd4..3346c594 100644
 18 | --- a/prog/prio.go
 19 | +++ b/prog/prio.go
 20 | @@ -164,7 +164,7 @@ func (target *Target) calcDynamicPrio(corpus []*Prog) [][]float32 {
 21 |  			for _, c1 := range p.Calls[idx0+1:] {
 22 |  				id0 := c0.Meta.ID
 23 |  				id1 := c1.Meta.ID
 24 | -				prios[id0][id1] += 1.0
 25 | +				prios[id0][id1] += 1.0 * p.Weight
 26 |  			}
 27 |  		}
 28 |  	}
 29 | diff --git a/prog/prog.go b/prog/prog.go
 30 | index bcc86fb0..7fd8006e 100644
 31 | --- a/prog/prog.go
 32 | +++ b/prog/prog.go
 33 | @@ -11,6 +11,7 @@ type Prog struct {
 34 |  	Target   *Target
 35 |  	Calls    []*Call
 36 |  	Comments []string
 37 | +	Weight   float32
 38 |  }
 39 |  
 40 |  type Call struct {
 41 | diff --git a/syz-fuzzer/fuzzer.go b/syz-fuzzer/fuzzer.go
 42 | index 2463d9e9..8fc3cc44 100644
 43 | --- a/syz-fuzzer/fuzzer.go
 44 | +++ b/syz-fuzzer/fuzzer.go
 45 | @@ -53,6 +53,7 @@ type Fuzzer struct {
 46 |  	corpusHashes map[hash.Sig]struct{}
 47 |  	corpusPrios  []int64
 48 |  	sumPrios     int64
 49 | +	weightedPCs  map[uint32]float32
 50 |  
 51 |  	signalMu     sync.RWMutex
 52 |  	corpusSignal signal.Signal // signal of inputs in corpus
 53 | @@ -460,6 +461,9 @@ func (fuzzer *Fuzzer) addInputToCorpus(p *prog.Prog, sign signal.Signal, sig has
 54 |  		fuzzer.corpus = append(fuzzer.corpus, p)
 55 |  		fuzzer.corpusHashes[sig] = struct{}{}
 56 |  		prio := int64(len(sign))
 57 | +		if len(fuzzer.weightedPCs) > 0 {
 58 | +			prio = int64(p.Weight)
 59 | +		}
 60 |  		if sign.Empty() {
 61 |  			prio = 1
 62 |  		}
 63 | @@ -563,8 +567,6 @@ func parseOutputType(str string) OutputType {
 64 |  	}
 65 |  }
 66 |  
 67 | -// Currently, only use GetWeightedPCsRes to check if filter enabled.
 68 | -// Weighted PC table will not be used.
 69 |  func (fuzzer *Fuzzer) getWeightedPCs() {
 70 |  	a := &rpctype.GetWeightedPCsArgs{}
 71 |  	r := &rpctype.GetWeightedPCsRes{EnableFilter: false}
 72 | @@ -574,4 +576,17 @@ func (fuzzer *Fuzzer) getWeightedPCs() {
 73 |  	if r.EnableFilter {
 74 |  		fuzzer.execOpts.Flags |= ipc.FlagEnableCoverageFilter
 75 |  	}
 76 | +	if len(r.WeightedPCs) > 0 {
 77 | +		fuzzer.weightedPCs = r.WeightedPCs
 78 | +	}
 79 | +}
 80 | +
 81 | +func (fuzzer *Fuzzer) calCoverWeight(pcs []uint32) float32 {
 82 | +	weight := float32(0.0)
 83 | +	for _, pc := range pcs {
 84 | +		if _, ok := fuzzer.weightedPCs[pc]; ok {
 85 | +			weight += fuzzer.weightedPCs[pc]
 86 | +		}
 87 | +	}
 88 | +	return weight
 89 |  }
 90 | diff --git a/syz-fuzzer/proc.go b/syz-fuzzer/proc.go
 91 | index fe19a17b..a4b0fb10 100644
 92 | --- a/syz-fuzzer/proc.go
 93 | +++ b/syz-fuzzer/proc.go
 94 | @@ -170,6 +170,7 @@ func (proc *Proc) triageInput(item *WorkTriage) {
 95 |  		Cover:  inputCover.Serialize(),
 96 |  	})
 97 |  
 98 | +	item.p.Weight = proc.fuzzer.calCoverWeight(inputCover.Serialize())
 99 |  	proc.fuzzer.addInputToCorpus(item.p, inputSignal, sig)
100 |  
101 |  	if item.flags&ProgSmashed == 0 {
102 | diff --git a/syz-manager/filter.go b/syz-manager/filter.go
103 | index 9cf2415d..ea3d173d 100644
104 | --- a/syz-manager/filter.go
105 | +++ b/syz-manager/filter.go
106 | @@ -53,8 +53,8 @@ func (mgr *Manager) initKcovFilter() {
107 |  	mgr.kcovFilter.initWeightedPCs(files, funcs, rawPCs)
108 |  }
109 |  
110 | -func (mgr *Manager) getWeightedPCs() bool {
111 | -	return mgr.kcovFilter.enableFilter
112 | +func (mgr *Manager) getWeightedPCs() (bool, map[uint32]float32){
113 | +	return mgr.kcovFilter.enableFilter, mgr.kcovFilter.weightedPCs
114 |  }
115 |  
116 |  func (filter *CoverFilter) initWeightedPCs(files, functions, rawPCsFiles []string) {
117 | @@ -245,3 +245,13 @@ func (filter *CoverFilter) putUint32(bytes []byte, value uint32) {
118 |  		binary.BigEndian.PutUint32(bytes, value)
119 |  	}
120 |  }
121 | +
122 | +func (filter *CoverFilter) CalProgWeight(pcs []uint32) int {
123 | +	prio := int(0)
124 | +	for _, pc := range pcs {
125 | +		if _, ok := filter.weightedPCs[pc]; ok {
126 | +			prio += int(filter.weightedPCs[pc])
127 | +		}
128 | +	}
129 | +	return prio
130 | +}
131 | diff --git a/syz-manager/html.go b/syz-manager/html.go
132 | index 789c416d..3e1b7788 100644
133 | --- a/syz-manager/html.go
134 | +++ b/syz-manager/html.go
135 | @@ -185,6 +185,7 @@ func (mgr *Manager) httpCorpus(w http.ResponseWriter, r *http.Request) {
136 |  			Sig:   sig,
137 |  			Short: p.String(),
138 |  			Cover: len(inp.Cover),
139 | +			Prio: mgr.kcovFilter.CalProgWeight(inp.Cover),
140 |  		})
141 |  	}
142 |  	sort.Slice(data.Inputs, func(i, j int) bool {
143 | @@ -601,6 +602,7 @@ type UIInput struct {
144 |  	Sig   string
145 |  	Short string
146 |  	Cover int
147 | +	Prio  int
148 |  }
149 |  
150 |  var summaryTemplate = html.CreatePage(`
151 | @@ -746,11 +748,13 @@ var corpusTemplate = html.CreatePage(`
152 |  	<tr>
153 |  		<th>Coverage</th>
154 |  		<th>Program</th>
155 | +		<th>Prio</th>
156 |  	</tr>
157 |  	{{range $inp := $.Inputs}}
158 |  	<tr>
159 |  		<td><a href='/cover?input={{$inp.Sig}}'>{{$inp.Cover}}</a></td>
160 |  		<td><a href="/input?sig={{$inp.Sig}}">{{$inp.Short}}</a></td>
161 | +		<td>{{printf "%d" $inp.Prio}}</td>
162 |  	</tr>
163 |  	{{end}}
164 |  </table>
165 | diff --git a/syz-manager/rpc.go b/syz-manager/rpc.go
166 | index 980a447a..864156c6 100644
167 | --- a/syz-manager/rpc.go
168 | +++ b/syz-manager/rpc.go
169 | @@ -57,7 +57,7 @@ type RPCManagerView interface {
170 |  	newInput(inp rpctype.RPCInput, sign signal.Signal) bool
171 |  	candidateBatch(size int) []rpctype.RPCCandidate
172 |  	rotateCorpus() bool
173 | -	getWeightedPCs() bool
174 | +	getWeightedPCs() (bool, map[uint32]float32)
175 |  }
176 |  
177 |  func startRPCServer(mgr *Manager) (*RPCServer, error) {
178 | @@ -334,7 +334,8 @@ func (serv *RPCServer) shutdownInstance(name string) []byte {
179 |  func (serv *RPCServer) GetWeightedPCs(a *rpctype.GetWeightedPCsArgs, r *rpctype.GetWeightedPCsRes) error {
180 |  	serv.mu.Lock()
181 |  	defer serv.mu.Unlock()
182 | -	enableFilter := serv.mgr.getWeightedPCs()
183 | +	enableFilter, weightedPCs := serv.mgr.getWeightedPCs()
184 |  	r.EnableFilter = enableFilter
185 | +	r.WeightedPCs = weightedPCs
186 |  	return nil
187 |  }
188 | -- 
189 | 2.20.1
190 | 
191 | 


--------------------------------------------------------------------------------
/syzkaller/Bitmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/Bitmap.png


--------------------------------------------------------------------------------
/syzkaller/Corpus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/Corpus.png


--------------------------------------------------------------------------------
/syzkaller/CoverageOfFiles.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/CoverageOfFiles.png


--------------------------------------------------------------------------------
/syzkaller/CoverageOfTargetFunctions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/CoverageOfTargetFunctions.png


--------------------------------------------------------------------------------
/syzkaller/KernFunc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/KernFunc.png


--------------------------------------------------------------------------------
/syzkaller/ProgState.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/ProgState.png


--------------------------------------------------------------------------------
/syzkaller/TotalCoverage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/TotalCoverage.png


--------------------------------------------------------------------------------
/syzkaller/cover_filter.md:
--------------------------------------------------------------------------------
  1 | # Syzkaller coverage filter and weighted PCs
  2 | 
  3 | ## Content
  4 | 1. Usage.
  5 | 2. Implement detail.
  6 | 3. Practice.
  7 | 
  8 | To implement coverage filter in syzkaller. we have to follow the next steps:
  9 | 
 10 | 1. Get the LLVM ir code and assembly code of target.
 11 | 2. Get the addresses map of target functions by analyzing ir code, assembly code and kernel ELF.
 12 | 3. Support cover filter and weighted PCs in syzkaller.
 13 | 
 14 | After step 1 and 2, you will get a addresses map contains addresses of any kernel functions you need. Also, you can attach weight to every PC base on LLVM ir analysis, eg. weighted PCs base on CFG information.
 15 | 
 16 | [Advice from Dmitry](https://groups.google.com/g/syzkaller/c/IgwfGSdca3Q/m/dCsAiB03BgAJ), we implemented a more general and easy to use coverage filter. If you don't need prog prior base on weighted pcs, you can specify which functions or files to test in patched syzkaller manager configure.
 17 | 
 18 | ## Usage
 19 | 
 20 | ### Get LLVM ir code and assembly code
 21 | 
 22 | Lots of static analysis tools can be used to parse ir code. But ir code know nothing about addresses of the final executable file while the assembly code holds both address offset and basic block information. By analyzing them, we can associate ir information with addresses.
 23 | To get ir code and assembly code, you need to pick out the source file where your target functions located at. For example, if your target function is in /net/ipv4/tcp.c, you should run this command in your kernel build tree:
 24 | 
 25 | ```  
 26 | make CC=clang net/ipv4/tcp.o -n | grep tcp.c
 27 | ```  
 28 | 
 29 | to get the command of compiling tcp.c, command may look like:
 30 | 
 31 | ```  
 32 | clang ...... -c -o net/ipv4/tcp.o net/ipv4/tcp.c
 33 | ```  
 34 | 
 35 | To get the LLVM ir code of tcp.c, run:
 36 | 
 37 | ```  
 38 | clang ...... -S -o net/ipv4/tcp.ll net/ipv4/tcp.c -emit-llvm
 39 | ```  
 40 | 
 41 | To get the assembly code of tcp.c, run:
 42 | 
 43 | ```  
 44 | clang ...... -S -o net/ipv4/tcp.s net/ipv4/tcp.c
 45 | ```  
 46 | 
 47 | Also, linux kernel support:
 48 | 
 49 | ```  
 50 | make CC=clang net/ipv4/tcp.ll
 51 | make CC=clang net/ipv4/tcp.s
 52 | ```  
 53 | 
 54 | Repeat the mentioned steps to get all ir codes and assembly codes of your target functions. Move them to a IR_DIR and ASM_DIR. Then build your kernel and get a VMLINUX file.
 55 | 
 56 | ### Get PCs table
 57 | 
 58 | We use a [kcov_map](../static_analysis_tools/IRParser/kcov_map.cpp) tool to get addresses of the kernel functions we are interested in.
 59 | Run the following command to build kcov_map:
 60 | 
 61 | ```  
 62 | clang++-10 kcov_map.cpp -o kcov_map -O0  -g `llvm-config-10 --cxxflags --libs --ldflags --system-libs`
 63 | ```  
 64 | 
 65 | ```  
 66 | ./kcov_map IR_DIR ASM_DIR VMLINUX_FILE FUNCTION_LIST LOG_DIR
 67 | ```  
 68 | 
 69 | FUNCTION_LIST has functions name that we need to get their addresses.
 70 | IR_DIR: directory all the LLVM ir code we need.
 71 | ASM_DIR: directory all the assembly code we need.
 72 | VMLINUX_FILE: kernel ELF
 73 | LOG_DIR: after run the command, kcov_map will creat a "*.json" and a "*.addr.map" for every function.
 74 | Then run:
 75 | 
 76 | ```  
 77 | cat LOG_DIR/*.addr.map > funcaddr.map
 78 | ```  
 79 | 
 80 | Copy funcaddr.map to syzkaller work directory.
 81 | This is only one of ways when we try to build functions addresses map with weight. You can explore how to build your functions addresses map for you need.
 82 | 
 83 | #### Extend functions list
 84 | 
 85 | In our practice, when we choose some member functions as entry, some functions may be a wrapper function but not the truly implement function. We use [extend_func](../static_analysis_tools/IRParser/extend_func.cpp) extend the function list.
 86 | 
 87 | ```  
 88 | clang++-10 extend_func.cpp -o extend_func -O0  -g `llvm-config-10 --cxxflags --libs --ldflags --system-libs`
 89 | ```  
 90 | 
 91 | ```  
 92 | ./extend_func FUNCTION_LIST IR_DIR
 93 | ```  
 94 | 
 95 | You will get a FUNCTION_LIST.new which you can pass to kcov_map.
 96 | 
 97 | ### Support cover filter in syzkaller
 98 | 
 99 | #### Patch syzkaller
100 | 
101 | Clone syzkaller, and run:
102 | 
103 | ```  
104 | cd PATH_TO_SYZ_SRC
105 | git checkout ff4a334
106 | git apply PATH_TO_harbian-qa/syz_patch/*.patch
107 | ```  
108 | 
109 | Build syzkaller as usual.
110 | 
111 | #### Modify configure file
112 | 
113 | Currently, syzkaller support passing regular expression to coverage filter. Add the following options in syz-manager configure file:
114 | 
115 | ```  
116 | "cover": true,
117 | "cover_filter": {
118 |     "files": [
119 |         "^net/core/sock.c$",
120 |         "^net/sctp/", // file name start with the string
121 |         "net/dccp/" // file name include the string
122 |     ],
123 |     "functions": [
124 |         "^foo$",
125 |         "^bar", // all functions start with bar
126 |         "baz" // all functions containing baz
127 |         ],
128 |     "pcs": "external/file/with/weighted/raw/pc/list"
129 | }
130 | ```  
131 | 
132 | Also refer to [syzkaller document](https://www.github.com/google/syzkaller/blob/master/pkg/mgrconfig/config.go#L109-L117).
133 | Now you can run a syzkaller with cover filter.
134 | 
135 | ## Implement detail of cover filter
136 | 
137 | ### manager
138 | 
139 | ### Implement files and functions filter
140 | 
141 | Syzkaller manager.reportGenerator holds the file and function information of per pc. At the beginning of syz-manager, we use covfilter.go:createCoverageFilter() initialize reportGenerator. Then we walk throught all symbols and files, use regular expression to pich up those pcs belong to coverage filter functions and files.
142 | 
143 | #### Read weighted pcs from funcaddr.map
144 | 
145 | The configure specifies which funcaddr.map should be loaded and send to VM. Function initWeightedPCs in syz-manager/covfilter.go will read the funcaddr.map and maintain a coverfilter map in structure manager.Manager. This map can be used while calculating the weight of prog in web UI.
146 | 
147 | #### RPC interface for sending addresses map to fuzzer
148 | 
149 | Extend a getWeightedPCs interface in RPCManagerView in syz-manager/rpc.go for waiting client call( fuzzer) for getting a pcsWeight map.
150 | 
151 | #### Display the pc and its weight in source code
152 | 
153 | Use the syzkaller web UI "cover", we extend an interface called bitmap. It will convert PCs table to source lines. The color of lines is black means the block of this line won't be drop while fuzzing. The number at the left is the weight of that line. Note that there may be multiple block maps to a source line. Their weight will add to this line.
154 | 
155 | ### fuzzer
156 | 
157 | #### getPCsWeight from syz-manager
158 | 
159 | Add a getWeightedPCs for fuzzer, so fuzzer can dynamically fetch PCs table from syz-manager. In other words, it's possible to dynamically distribute PCs table to different fuzzers. For example, reduce PCs weight while some block has been fully explored( [eg.](https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp)).
160 | 
161 | #### Calculate the prog prio from its cover
162 | 
163 | We implement a function calCoverWeight in syz-fuzzer/fuzzer.go to calculate the weight and attach to structure prog. You can implement your algorithm of calculating weight base on weighted pc in this function.
164 | 
165 | #### Choose prog to mutate base on prog prio
166 | 
167 | Syzkaller already has its prior choice base on signals length of the prog. We have to modify the addInputToCorpus function to use out prog weight.
168 | 
169 | ### executor
170 | 
171 | #### Read pcs map
172 | 
173 | The executor/cov_filter.h implement function for getting PCs table from the map.
174 | 
175 | ##### Fast cover filtering.
176 | 
177 | Unlike manager and fuzzer, executor coverage filter run more frequently. Without a fast searching, if the PCs table grow up, the affect of performance can be a disaster. So we use a fast but rough way, bitmap, to address this program.
178 | We use createCoverageBitmap in syz-manager to create a bitmap for executor.
179 | Because address align, the lowest 4-bit is dropped off. So, for quickly setting and accessing the bit which record if a pc should be filtered, we can search by:
180 | ```  
181 | pc32 -= cov_filter->pcstart;
182 | pc32 = pc32 >> 4;
183 | uint32 idx = pc32 / 8;
184 | uint32 shift = pc32 % 8;
185 | return (cov_filter->bitmap[idx] & (1 << shift)) > 0;
186 | ```  
187 | The affect of performance will not grow up no mater how many PCs should be filtered.
188 | 
189 | ## Some PCs-weight-guide fuzzing practice
190 | 
191 | Cover filtering is quite certain that you can only set if the edge of that pc will be sent to fuzzer as a signal or not. But, weighted PCs can guide fuzzer to evolve prog flexibly. You can assign weight to PCs base on the result from LLVM ir static analysis.
192 | 
193 | ### Cyclomatic complexity base on llvm CFG
194 | 
195 | In the theory of cyclomatic [complexity](https://en.wikipedia.org/wiki/Cyclomatic_complexity), a function can be treated as a one-entry and one-exit model, the complexity can be easily calculated. In realistic application, complexity indicates that program testing should pay more attention to those functions that are more complex.
196 | 
197 | ### Basic block count base on llvm BlockFrequenceInfo
198 | 
199 | The LLVM class [BlockFrequencyInfo](https://llvm.org/doxygen/classllvm_1_1BlockFrequencyInfo.html) is a convenient way to get the frequency of a block will appear in all potential control-flow paths. It's reasonable that if a basic block appeared more frequently, mutate the prog that triggers this block has a higher probability to cover more other PCs edge.
200 | 
201 | ### Basic block to basic block count base on llvm BranchProbabiltyInfo
202 | 
203 | The LLVM class [BranchProbabiltyInfo](https://llvm.org/doxygen/classllvm_1_1BranchProbabilityInfo.html) is another tool that can be used in fuzzing. The class has information about the probability of from a block to another block. If you want the fuzzer to evolve a testcase can cover a specific basic block, it's a good choice that uses BranchProbabilityInfo weighted the PCs.
204 | 
205 | ### Weighted function call stack
206 | 
207 | The mentioned tools focus on if the functions should be fuzzed is already picked out, how to assign priorities to PCs base on CFG information. Sometimes, you may want to fuzz an approximate range, for example, a serial of functions from a call stack. LLVM class [CallGraph](https://llvm.org/doxygen/classllvm_1_1CallGraph.html) can help build the associate of functions call. You can assign low weight to those functions if they are deep and not so complex.
208 | 


--------------------------------------------------------------------------------
/syzkaller/design_implementation_intro.md:
--------------------------------------------------------------------------------
  1 | # harbian-qa: State-based target directed fuzzer based on syzkaller
  2 | 
  3 | * Author/maintainer: Kaipeng Zeng
  4 | * Co-architect: Shawn C[ a.k.a "citypw"]
  5 | 
  6 | ## 1. Summary & Background
  7 | 
  8 | Syzkaller is the state-of-the-art kernel fuzzer. It's effective because of its powerful syscalls descript and resource rules. Particularly, after resource centric was introducted to syzkaller, it can efficiently generate testcases with a complex context. It is the best choice if you want to fuzz your kernel.
  9 | 
 10 | While syzkaller can do targeted fuzz only by constraining syscalls, we can observe that it's no so efficient if you don't want to fuzz the entir kernel, for example, fuzzing a subsystem or several kernel functions. So, We improve syzkaller by introducing coverage filter and kernel state resource, to do targeted fuzz. Coverage filter avoids syzkaller pay too much attention to exploring uninteresting code. Kernel state resource evaluates if a testcase potentially helps to explore target. Both of them help syzkaller to fuzz the targeted code deeply and efficiently. Moreover, some syscalls which are not written for the target, can help to explore targeted code also. But if you don't do coverage filter, target can hardly benefit from them.
 11 | 
 12 | ## 2. Feature of customized syzkaller
 13 | 
 14 | 1. Kernel coverage filter and weighed PCs
 15 | 2. Kernel state as a syzkaller resource
 16 | 3. Extend syzkaller Web UI
 17 | 
 18 | These [patches](../syz_patch) base on syzkaller-a2cdad9.
 19 | 
 20 | ## 2.1 Support cover filter and weighted PCs
 21 | 
 22 | The original syzkaller can only by constraining enable and disable syscalls to do a targeted fuzzing. Lots of code will be covered although we are not interested. And collect the testcase that trigger such edge will occupy a considerable proportion in the corpus while syzkaller generating and mutating new testcases base on corpus statistic. So, it will slow down the exploring and exploiting of the target. Also, in some cases, you may want to specify a code position gradient to tell fuzzer how to evolve testcases to touch the target position gradually. Or, you just want to fuzz some functions more frequently, maybe because of its complexity or importance. To make syzkaller a more targeted fuzzer, we implement a coverage filter and integrate it into syzkaller. It is not so rare in [userspace fuzzer](http://sharcs-project.eu/m/filer_public/48/8c/488c5fb7-9aad-4c87-ab9c-5ff251ebc73d/vuzzer_ndss17.pdf). And we try to implement it without patching kernel and can be flexibly configured in syzkaller. Even PCs weight can be change dynamically in fuzzing time.
 23 | 
 24 | More design detail and usage can be found [here](cover_filter.md). Except how to implement coverage filter efficiently, we also show you some examples of how to use LLVM analysis information to create weighted PCs table to tell customized syzkaller how to evolve testcases.
 25 | 
 26 | * Coverage filter has been merged by syzkaller, refer to [final section](#6-features-merged-by-syzkaller).
 27 | 
 28 | ## 2.2 Syzkaller resource base on kernel state
 29 | 
 30 | Syzkaller already has resource centric machanism which help to generate more complex state resource for subsequent syscalls. Actually, lots of kernel condition constraints are not determined by the input of a single syscall. Most of the time, solve such condition constraints require a sequence of syscalls and their appropriate inputs. Particularly, after we introduce coverage filter, the state of resource is essential. We couldn't assume that any code outside of our target functions contributes nothing in exploring target functions. If a testcase can create a special resource which help solve the following condition constraint met by other syscalls, we call them kernel state. So, we should collect the testcases which can trigger special states to corpus so that they can be used in extracting resource for generating testcases.
 31 | 
 32 | General, in kernel, to pass over kernel functions efficiently, most of states are designed as a field of structure. For example, structure sock contains skc_state, skc_flags and so on. Base on this, we can instrument kernel where it stores or modifies some field of a structure( GetElementPointer operator in LLVM ir). We collect the information about which field of which structure is changed, the value of that field and the address of that operation. Before fuzz, we static analyze which field is used by your target functions more frequently, especially it's used in solving conditions. And tell fuzzer choose those testcases to extract resource more frequently if states of them may more frequently use by your target functions. Moreover, we also static analyze which value of fields are used in conditions.
 33 | 
 34 | More design detail and usage can be found [here](kstate_resource.md). We will show you how to collect kernel state triggered by testcase and how to prior choose testcase to extract resource. Furthermore, base on some static analysis tool, you can specify which and state which value is more meaningful to fuzzer. We show you a semi-manual way for this situation.
 35 | 
 36 | ## 2.3 Extend syzkaller Web UI
 37 | 
 38 | #### Explicitly display the prior of prog and resource of a testcase
 39 | 
 40 | We attach "signal len", "cover weight", "state len" and "Resource weight" to "/corpus" interface. The original use signal length as prog prior. We recalculate the prog prior base on what it covered. And we priorly choose prog to extract resource base on which state it triggers while original syzkaller is totally random.
 41 | 
 42 | ![Corpus](Corpus.png)
 43 | 
 44 | #### Coverage count of target functions
 45 | 
 46 | No matter if you use coverage filter or not, you can pass a PCs table to patch syzkaller, and access the "/kernfunc" interface to get the information about how much of a function was covered in this fuzzer.
 47 | 
 48 | * Currently, Syzkaller already has a interface `/funccover` cover this feature. We would not maintain such a redundant interface. Refer to [final section](#6-features-merged-by-syzkaller).
 49 | 
 50 | ![KernFunc](KernFunc.png)
 51 | 
 52 | #### Check the coverage filter configuration
 53 | 
 54 | Access "/bitmap" interface to get the colored source code to check if your PCs table is right.
 55 | 
 56 | * Currently, the filtered coverage report was merge by syzkaller, cover this feature. Refer to [final section](#6-features-merged-by-syzkaller).
 57 | 
 58 | ![Bitmap](Bitmap.png)
 59 | 
 60 | #### Display which syscall trigger which kernel state
 61 | 
 62 | We attach kernel states triggered by prog to "/input" interface. You can conveniently trace syscalls behavior.
 63 | 
 64 | ![ProgState](ProgState.png)
 65 | 
 66 | ## 3. Practice and result
 67 | 
 68 | ### A tcp-ipv6 fuzzing example
 69 | 
 70 | We choose member functions of tcpv6_prot and inet6_stream_ops as target functions to fuzz tcp-ipv6. This is inspired by [DDVerify](http://www.cprover.org/ddverify/) which is a symbolic execution tool for kernel driver, it chooses member functions of driver operation as the entry. However, because some of these member functions are only a wrap but not the implement. It contains less basic block information. We use a tool called [extend_func]() to extend the functions list if there is any function with less basic block. We run syzkaller with five modes:
 71 | 
 72 | 1. Syzkaller at a2cdad9.
 73 | 2. Patched syzkaller, base on a2cdad9, disable all features we introduce.
 74 | 3. Patched syzkaller, base on a2cdad9, enable coverage filter.
 75 | 4. Patched syzkaller, base on a2cdad9, enable coverage filter and weighted resource base on kernel state.
 76 | 5. Patched syzkaller, enable coverage filter and weighted resource, enable all syscalls in syzkaller.
 77 | 
 78 | We use the same kernel and syscalls( except 5) for every mode. Every fuzzer run 6 times, and 8 hours( fuzzing time) per time.
 79 | 
 80 | And here is some information of the result.
 81 | 
 82 | ![TotalCoverage](TotalCoverage.png)
 83 | ![CoverageOfFile](CoverageOfFiles.png)
 84 | ![CoverageOfTargetFunctions](CoverageOfTargetFunctions.png)
 85 | 
 86 | | Crash name | a2cdad9 | Patched | KCOV filter | KCOV filter + Weighted resource | all syscalls |  
 87 | | ---- | ---- | ---- |  ---- |  ---- |  ---- |  
 88 | | BUG: soft lockip in io_uring_release | 0 | 0 | 0 | 0 | 5 |  
 89 | | WARNING in tcp_recvmsg | 0 | 0 | 4 | 1 | 0 |  
 90 | | WARNING in tcp_cleanup_rbuf | 0 | 0| 3 | 2 | 0 |  
 91 | | INFO: task hung in floppy_revailidate | 0 | 0 | 0 | 0 | 3 |  
 92 | | WARNING in __alloc_pages_nodemask | 0 | 0 | 0 | 0 | 1 |  
 93 | | divide error in __tcp_select_window | 0 | 0 | 0 | 0 | 1 |  
 94 | | WARNING in floppy_interrupt | 0 | 0 | 0 | 0 | 1|  
 95 | 
 96 | #### The performance influence of introducing these features
 97 | 
 98 | The second mode fuzzer is for analyzing the affect of our patch. Although we didn't use those features, prog prior is signal length, and all resources prior is 1, but we can actually observe the executor was slown down. It is because the patched syzkaller have to recalculate prog prior also. The total executed has a 15% reduction. From average 332/sec to 281/sec. But, it seems the coverage has another result. The coverage of patched syzkaller is slightly more than the original syzkaller. We still do not know the reason for it.
 99 | 
100 | #### Coverage filter and weighted resource
101 | 
102 | In the target function coverage aspect, we can see there is a great improvement after coverage filtering introduced. Coverage of files also show a better result, noted that there are functions in af_inet.c and af_inet6.c was not used by tcp-ipv6. That means if you only want to fuzz subsystem or driver, it's a good choice to do coverage filter and weighted resource fuzz. You can get a better result in a shorter time.
103 | 
104 | #### Coverage filter, weighted resource and enable all syscalls.
105 | This is the most interesting work in this document, while it makes no assumption that fuzzing a kernel subsystem should only use syscalls for this subsystem. But we take another assumption that lots of code except targeted function and state change contribute little in fuzzing target functions. The result shows us some tcp-ipv6 kernel functions can be covered by not-socket-relative syscalls. But, without coverage filtering, fuzzer may pay more attention to explore the potential coverage of these syscalls. And without kernel state collecting, fuzzer may miss it, because it contributes nothing. That means, in kernel subsystem or driver fuzzing, determinate what you want to fuzz, instead of which syscalls can be used to fuzz, could be effective also.
106 | 
107 | ## 4. Conclusions
108 | 
109 | #### 4.1 Coverage filter and weighted
110 | 
111 | The syzkaller does targeted fuzz by constraining enable syscalls. It can efficiently explore the potential coverage of a single syscall, because mutate input to trigger new coverage in the whole call stack is easier than exploring paths base on complex states( Actually, one of the reasons that syzkaller is efficient is its resource mechanism). We can see fully exploring the deep corner trigger by a single syscall input to early, will slow down syzkaller exploring other branches. Because, there are a bunch of short testcases occupy a great proportion in the corpus while syzkaller will generate new testcase base on them. Instead of collecting every code edge, we limit the target into a serial of important functions. We preferentially explore those functions, keep a balance in exploring every potential corner. Our example reveals that explore such a kernel without any emphasis, it's not always efficient.
112 | 
113 | #### 4.2 Kernel state resource
114 | 
115 | Syzkaller resource mechanism is very important. Unlike most userspace fuzzer, the coverage always triggered by the sequence of syscalls and their input. The return of a syscalls is the input of other syscalls, syscalls may change resource state also. Our work indicated that collect and mutate frequently those resources that have more states changed could help to generate a testcase with a more complex context. Particularly, when a special state is used in target functions for solving condition constraints, that will bring us new coverage. While syzkaller know nothing about kernel state, it can only extract resource from testcases totaly randomly.
116 | 
117 | But, build a kernel with instrumentation is a little cumbrous if you change you fuzz target. The whole kernel instrumentation is OK, because we use kstate map to filter those states we don't need. But, it will greatly influence the performance of executor, observably it's impossible to do a bitmap filter like coverage filter. But, at the beginning, we used a [ebpf](kstat_demo) to collect kernel state to do a POC. You can manually write ebpf program to get states you need. It's configurable and more flexible but laborious and unstable and hardly scale up.
118 | 
119 | #### 4.3 Enable all syscalls
120 | 
121 | Our example shows us another way in fuzzing kernel: choose what to fuzz but not how to fuzz the target. While syzkaller need to pay a lot of effort to write syscalls and choose syscalls to fuzz. That is the only thing that can be controlled by user. If you want to fuzz a part of kernel, you should read the syscalls descript and look for which syscalls will cover this part, then pick them to the enable syscalls.
122 | 
123 | After introducing coverage filter and kernel state resource, base on syzkaller powerful syscalls descript, we can enable all syscalls to fuzz a target. Those syscalls can hardly cover target code will rarely or never appear in corpus. The only thing you should do is find out what you want to fuzz.
124 | 
125 | But, we still couldn't extricate from writing syscalls script. We try to run syzkaller without any syscalls with "$", the result is terrible. That means syscalls descript determine the potential coverage presently. We are exploring how to evolve syscalls automatically but not write syscalls script. We think it's possible to classify syscalls if any syscall can trigger a specific kernel state. It's one of the things we are interested in fuzzer.
126 | 
127 | 
128 | ## 5. Acknowledgments
129 | 
130 | * [Special thanks to Dmitry Vyukov and all contributors of syzkaller!](https://www.github.com/google/syzkaller)
131 | * [Thanks to LLVM-project!](https://www.github.com/llvm/llvm-project)
132 | 
133 | ## 6. Features merged by syzkaller
134 | 
135 | 1. Some [discussion](https://groups.google.com/g/syzkaller/c/IgwfGSdca3Q/) in syzkaller mailing list.
136 | 2. [Support coverage filter](https://www.github.com/google/syzkaller/pull/2017).
137 | 3. Some [cleanup and improvement](https://www.github.com/google/syzkaller/pull/2318) for coverage filter from Dmitry.
138 | 4. [Support filter coverage filter report](https://www.github.com/google/syzkaller/pull/2343).
139 | 5. When this article firstly posted, `/funccover` was not supported by syzkaller. Look at this [commit](https://www.github.com/google/syzkaller/commit/06cecac3179071158ad28688dbec0e09095d1a6d), `/funccover` display the overview of the entire kernel functions, and more accurate than our `/kernfunc` interface.
140 | 


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/README.md:
--------------------------------------------------------------------------------
  1 | # Make syzkaller a state-based guided fuzzer
  2 | 
  3 | ## Goal
  4 | Make the syzkaller as a kernel-state-awareness fuzzer or state-based guided fuzzer. The fuzzer should collect the progs which cover the same code but with different kernel data state. Currently syzkaller only collect coverage information. I wonder if it's effective that make syzkaller more kernel-state-awareness. I'd finish collecting some socket state as syzkaller feedback currently. Using the coverage signal interface in syzkaller. And I will show you how to combine these features in a specified purpose fuzzing.
  5 | 
  6 | ## Foundation of theory
  7 | 
  8 | ### Why should we collect the state
  9 | For example, assume the prog "socket--setsockopt$1--setsockopt$2--sendmsg(EXPECT_FLAG)" is a desired prog, if only coverage is collected, a pseudocode can be write down as:  
 10 | ```
 11 | // Cov(prog) is the coverage of a prog
 12 | // We assume that only both setsockopt$1 and setsockopt$2 was used
 13 | // before sendmsg, new coverage will appear in sendmsg(EXPECT_FLAG).
 14 | // EXPECT_FLAG: The flag restrist sendmsg to a new branch
 15 | Cov(socket+setsockopt$1)+Cov(socket+setsockopt$2)+Cov(socket+sendmsg(NOEXPECT_FLAG))
 16 | = Cov(socket+setsockopt1+setsockopt2)
 17 | = Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(NOEXPECT_FLAG))
 18 | != Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(EXPECT_FLAG))
 19 | ```
 20 | The prog can't be put into corpus until a new coverage signal was detected. Without any gradient between subprog and desired prog. After adding state-based feedback, 
 21 | ```  
 22 | // some State(prog) may be miss by syzkaller
 23 | // Both of these combinations of syscall may help the coverage discovering 
 24 | State_or_Cov(socket+setsockopt$1)+State(socket+setsockopt$2)
 25 | != State_or_Cov(socket+setsockopt1+setsockopt$2)
 26 | != State_or_Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(ANY))
 27 | != State_or_Cov(socket+setsockopt$1+setsocketopt$2+sendmsg(EXPECT_FLAG))
 28 | ```
 29 | Some prog with new state can be collected to corpus and used to generate and mutate. All of them is the gradient that help syzkaller to generate the desired prog. In recent syzkaller, a "resource centric" was introduce and it's quite similar to "state" what we need, although syzkaller use the whole corpus as "resource". The difference is, we mark a prog as resource when it can only build a special kernel state without a new coverage.
 30 | 
 31 | ### Types of branch  
 32 | From another perspective, coverage is the same as how many branchs the fuzzer has solved. In practice, degree of diffculty in covering different type of branch are different. Kernel state can be restraint of branch. In a kernel function, there are some type of branch:
 33 | 1. A condition directly determined by kernel function parameters. Without any impact from other syscalls. In other words, it can be easily covered by mutating a single syscall.
 34 | In this [example](https://elixir.bootlin.com/linux/v4.20/source/net/ipv4/tcp.c#L1188), msg_flags is a branch-relative parameters which specified by the input of syscall 'sendmsg'.
 35 | 
 36 | 2. A condition determined by kernel function parameters' historical state.
 37 | In first [example](https://elixir.bootlin.com/linux/v4.20/source/net/ipv4/tcp.c#L1189), sk_state is a historical state which can be changed after calling listen/connect... In second [one](https://elixir.bootlin.com/linux/v4.20/source/net/ipv4/tcp.c#L1231), repair_queue is changed after calling setsockopt.
 38 | 
 39 | 3. A condition determined by a local variable that can be changed in the kernel function.
 40 | In this [example](https://elixir.bootlin.com/linux/v4.20/source/net/ipv4/tcp.c#L1346), local variable merge is changed by this [line](https://elixir.bootlin.com/linux/v4.20/source/net/ipv4/tcp.c#L1330).  
 41 | 
 42 | #### Which is not easy to be covered
 43 | 
 44 | First one can be easily covered by syzkaller if powerful syscalls scriptions have been written. Collect function's input as feedbacl helps little coverage. Even though there are several paramters.
 45 | 
 46 | The second one, need time to explore, especial nested condition. For example, in tcp-ipv6 testing, we should not assume that setsockopt/getsockopt/close/shutdown... have no impact on calling sendmsg. Enable too much syscalls will waste much time on exploring their coverage( Original syzkaller do this). Actually, it has no impact on sendmsg unless it trigger a special state for sendmsg( A new State(prog) was discovered). Collecting useful state before calling sendmsg, without collecting any coverage signal of other kernel functions could be more effective. It's actually what i done in state-base fuzzer. And it get a great improvement in some special purpos fuzzer.
 47 | 
 48 | The third one need time to explore too. But it can't be solved by using ebpf feedback. ebpf know nothing about the internal of kernel function. I think fault-injection is a way that can help it. Kernel have a general framework to do function-ret-fault-injection. But it can't attach to inline function. ebpf use this framework also. It has much work to do with supporting a specified fault-injection in syzkaller.
 49 | 
 50 | ### Result
 51 | It got a great improvement in the second type of branch. [Here](tcp-ipv6/test.md) is a example for tcp-ipv6. It can easily cover some branch with restraint like "tp->repair", "tp->repair_queue == TCP_*_QUEUE", "sk->sk_state == TCP_CLOSE". All of these branch need more time to explore in original syzkaller.
 52 | 
 53 | ## Usage  
 54 | ### Patch syzkaller  
 55 | First, you need to patch original syzkaller. 
 56 | ```  
 57 | git checkout a34e2c33
 58 | git apply *.patch
 59 | ```
 60 | ### Gobpf as syzkaller feedback  
 61 | To build a ebpf as syzkaller feedback, run:  
 62 | ```  
 63 | go build pipe_monitor.go
 64 | ```
 65 | 
 66 | ### Run state-base syzkaller
 67 | Just run syz-manager as original syzkaller.
 68 | 
 69 | ### What can you customize  
 70 | 
 71 | #### Code and features  
 72 | 1. Add ebpf feedback and display in webui: run a ebpf monitor before execute_one, read pipe memory to get kernel socket state as syzkaller feedback.
 73 | 2. Add coverage filter: filter coverage by address. I use syz-func2addr to get a function address from ELF.
 74 | 3. pipe_monitor.go: load a ebpf text, monitor the socket state, feedback to syzkaller by using pipe memory. But it can't trace the historical state of a specific socket.
 75 | 4. Add ret ebpfsig as resource: only prog with a special kernel state can be resource.
 76 | 
 77 | * These patch base on upstream syzkaller: a34e2c33  
 78 | More detail refer to the code comments. 
 79 | 
 80 | #### ebpf, kernel data type
 81 | 
 82 | ebpf text in ebpf/ebpftext.go is the only one file can be modified as your will. You can get any data you want by writing ebpf by yourself. Notice:
 83 | 1. A hook function before kernel function should be named as kprobe_KFUNC_NAME and append to the list ProbePoint.
 84 | 2. Similarly, a kernel function return hook should be named as kretprobe_KFUNC_NAME and append to the RetProbePoint.
 85 | 3. The state send to syzkaller by using ebpf function "bpf_trace_printk". Currently, I use a uint64_t state. If you need state with other type, there are a lot work in syzkaller should be done to coordinary with ebpf's output.
 86 | 
 87 | * kernel socket state: parse/parse.go is only for making the socket state readable. Modify it refer to you ebpf text as your will. Only for execprog. Now it's discarded.
 88 | 
 89 | ## Some example  
 90 | pipe_monitor can run well with patched syzkaller. Without any different compare to original syzkaller's using. But you need write your ebpf to collect state you want.
 91 | 
 92 | We had already used these featrue to do some fuzz:
 93 | ### tcp-ipv6 subsystem fuzzer
 94 | According to [this](#Which is not easy to be covered), to fuzz the tcp-ipv6 subsystem, I use the follow feature:
 95 | 1. Use ebpf to collect the expected input of kernel function.
 96 | 2. Kernel function coverage filtering. Only collect the coverage of _ops function
 97 | 3. Filtering the all kernel function coverage except subsystem you need to fuzz.	
 98 | 
 99 | ### Arbitrary kernel function fuzzer
100 | 1. Use ebpf to collect socket state before return from syscalls. Mark this type of prog as resource.
101 | 2. Use ebpf to collect the expected input of a kernel function.
102 | 3. Filtering the all kernel function coverage except the one you need to fuzz.  
103 | [Here](tcp-ipv6/test.md) are some comparisons of performance of different feedback fuzzer.
104 | 
105 | ### Multi-policy fuzzer
106 | We also try to combine this different policy fuzzer by using syz-hub. [Here](../multi_policy/README.md) is a documentation.


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/ebpf/ebpf.go:
--------------------------------------------------------------------------------
 1 | package ebpf
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/iovisor/gobpf/bcc"
 8 | 	"github.com/iovisor/gobpf/pkg/tracepipe"
 9 | )
10 | 
11 | /*
12 |  * As an example, we monitor the state, type, flags in socket structure.
13 |  * Use ebpf map is a better way to monitor kernel data state.
14 |  * So, we print the state in every hook and handle them after as syzkaller
15 |  * read coverage signal 
16 |  */
17 | 
18 | func EbpfInit() string {
19 | 	ebpf := EbpfSingle
20 | 	return ebpf
21 | }
22 | 
23 | func Attachs(m *bcc.Module) {
24 | 	for _, funcname := range ProbePoint {
25 | 		attachProbe(funcname, m)
26 | 	}
27 | 	for _, funcname := range RetProbePoint {
28 | 		attachRetProbe(funcname, m)
29 | 	}
30 | }
31 | 
32 | func ReadLine(tp *tracepipe.TracePipe, pid uint64) string {
33 | 	return readline(tp, pid)
34 | }
35 | 
36 | /* Add kprobe__ at the beginning, your hookfunc should be kprobe__KERN_FUNCNAME */
37 | func attachProbe(kprobepoint string, m *bcc.Module) {
38 | 	funcName := "kprobe__" + kprobepoint
39 | 	tmpKprobe, err := m.LoadKprobe(funcName)
40 | 	if err != nil {
41 | 		fmt.Fprintf(os.Stderr, "Failed to load %s: %s\n", kprobepoint, err);
42 | 		os.Exit(1)
43 | 	}
44 | 
45 | 	err = m.AttachKprobe(kprobepoint, tmpKprobe)
46 | 	if err != nil {
47 | 		fmt.Fprintf(os.Stderr, "Failed to attach %s: %s\n", kprobepoint, err);
48 | 		os.Exit(1)
49 | 	}
50 | }
51 | 
52 | func attachRetProbe(kretprobepoint string, m *bcc.Module) {
53 | 	funcName := "kretprobe__" + kretprobepoint
54 | 	tmpKretprobe, err := m.LoadKprobe(funcName)
55 | 	if err != nil {
56 | 		fmt.Fprintf(os.Stderr, "Failed to load %s: %s\n", kretprobepoint, err);
57 | 		os.Exit(1)
58 | 	}
59 | 
60 | 	err = m.AttachKretprobe(kretprobepoint, tmpKretprobe)
61 | 	if err != nil {
62 | 		fmt.Fprintf(os.Stderr, "Failed to attach %s: %s\n", kretprobepoint, err);
63 | 		os.Exit(1)
64 | 	}
65 | }
66 | 
67 | /* read a single line from ebpf, strip useless information */
68 | func readline(tp *tracepipe.TracePipe, pid uint64) string {
69 | 	ret := ""
70 | 	te, err := tp.ReadLine()
71 | 	if err != nil {
72 | 		fmt.Fprintf(os.Stderr, "Failed to ReadLine\n", err);
73 | 		return ret
74 | 	}
75 | 	if (te.Message) != "" {
76 | 		ret = te.Message
77 | 	}
78 | 	return ret
79 | }
80 | 


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/ebpf/ebpftext.go:
--------------------------------------------------------------------------------
  1 | package ebpf
  2 | 
  3 | /* High-32-bit: |-----|-sk_state-|-flags-|-sk_shutdown--|--state--|
  4 |  *              |-----|---4bit---|--4bit-|-----2bit-----|--4bit---|
  5 |  * Low-32-bit:  |-func-id-|---branch-related-argument---|--weight-|
  6 |  *              |--4-bit--|-------n-bit-----------------|--4bit---|
  7 |  * The highest n-bit was empty. You can fill it as your will.
  8 |  * Collect data for a specified function will generate too much useless 
  9 |  * signals. Hight-32-bit is only for general purpos.
 10 |  * In a monitored function, do not care too much about arguments 
 11 |  * passed to called function. Just write another probe for it.
 12 |  */ 
 13 | 
 14 | const EbpfSingle string =`
 15 | #include <net/sock.h>
 16 | #include <linux/net.h>
 17 | #define KBUILD_MODNAME "foo"
 18 | #include <linux/tcp.h>
 19 | #include <net/inet_sock.h>
 20 | #include <linux/ipv6.h>
 21 | #include <uapi/linux/sockios.h>
 22 | #include <uapi/asm-generic/ioctls.h>
 23 | #include <net/net_namespace.h>
 24 | #include <linux/skbuff.h>
 25 | 
 26 | #define SOCK_STATE_OPT  0x1
 27 | #define SK_SHUTDOWN_OPT 0x2
 28 | #define SOCK_FLAGS_OPT  0x4
 29 | #define SK_STATE_OPT    0x8
 30 | #define SK_FLAGS_OPT    0x10
 31 | #define SK_ERR_OPT      0x20
 32 | 
 33 | #define STATE_MASK      0xe000000000000000
 34 | #define RETSTATE_MASK   0xf000000000000000
 35 | 
 36 | static uint64_t set_func_id(uint32_t id)
 37 | {
 38 |     uint64_t state = 0;
 39 |     state |= ((id&0xf) << 28);
 40 |     return state &= 0xf0000000;
 41 | }
 42 | 
 43 | static uint64_t set_state(struct sock *sk, int opt)
 44 | {
 45 |     uint64_t state = 0, tmp;
 46 |     u8 bitfield;
 47 | 
 48 |     if (opt&SOCK_STATE_OPT) {
 49 |         tmp = sk->sk_socket->state&0xf;
 50 |         state |= (tmp << 32);
 51 |     }
 52 |     // SHUTDOWN_MASK
 53 |     if (opt&SK_SHUTDOWN_OPT) {
 54 |         tmp = sk->sk_shutdown&0x3;
 55 |         state |= (tmp << 36);
 56 |     }
 57 |     if (opt&SOCK_FLAGS_OPT) {
 58 |         tmp = sk->sk_socket->flags&0xf;
 59 |         state |= (tmp << 40);
 60 |     }
 61 |     //TCP_STATE_MASK
 62 |     if (opt&SK_STATE_OPT) {
 63 |         tmp = sk->sk_state&0xf;
 64 |         state |= (tmp << 44);
 65 |     }
 66 |     // SOL_SOCKET
 67 |     if (opt&SK_FLAGS_OPT) {
 68 |         tmp = sk->sk_flags&0xff;
 69 |         state |= (tmp << 48);
 70 |     }
 71 |     if (opt&SK_ERR_OPT) {
 72 |         if (sk->sk_err > 0) {
 73 |             tmp = 1;
 74 |             state |= (tmp << 49);
 75 |         }
 76 |     }
 77 |     return state;
 78 | }
 79 | 
 80 | static uint64_t set_mask(uint64_t state)
 81 | {
 82 |     uint64_t tmp = STATE_MASK;
 83 |     return state|tmp;
 84 | }
 85 | 
 86 | // Don't case about which function give the state
 87 | static uint64_t getretstate(struct sock *sk, int id)
 88 | {
 89 |     uint64_t state = 0, tmp = 0;
 90 |     u8 bitfield;
 91 | 
 92 |     state |= set_state(sk, SK_SHUTDOWN_OPT|SK_STATE_OPT|SOCK_FLAGS_OPT|SK_STATE_OPT|SK_FLAGS_OPT|SK_ERR_OPT);
 93 | 
 94 |     // nonagle, repair
 95 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1);
 96 |     if (bitfield&0xf0) {
 97 |         tmp = bitfield&0xf0;
 98 |         state |= ((tmp>>4) << 4);
 99 |     }
100 |     if (bitfield&0x2)
101 |         state |= 0x1 << 8;
102 | 
103 |     // defer_connect
104 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&inet_sk(sk)->rcv_tos)-1);
105 |     if (bitfield&0xf0) {
106 |         tmp = bitfield&0xf0;
107 |         state = state | ((tmp>>4) << 9);
108 |     }
109 | 
110 |     // ipv6only
111 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&sk->__sk_common.skc_bound_dev_if)-1);
112 |     if (bitfield&0x4) {
113 |         state = state | (1 << 13);
114 |     }
115 | 
116 |     // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR
117 |     tmp = tcp_sk(sk)->repair_queue & 0x3;
118 |     state |= (tmp << 14);
119 | 
120 |     if(sk->sk_bound_dev_if)
121 |         state |= (0x1 << 18);
122 |     if(sk->sk_route_caps&NETIF_F_SG)
123 |         state |= (0x1 << 20);
124 |     if(tcp_sk(sk)->fastopen_rsk != NULL)
125 |         state |= (0x1 << 21);
126 |     if(tcp_sk(sk)->urg_data)
127 |         state |= (0x1 << 22);
128 |     if(tcp_sk(sk)->urg_seq)
129 |         state |= (0x1 << 23);
130 |     if (tcp_sk(sk)->saved_syn)
131 |         state |= (0x1 << 24);
132 |     if(tcp_sk(sk)->urg_data)
133 |         state |= (0x1 << 25);
134 |     if(tcp_sk(sk)->urg_seq)
135 |         state |= (0x1 << 26);
136 |     if(tcp_sk(sk)->linger2)
137 |         state |= (0x1 << 27);
138 |     if(tcp_sk(sk)->urg_seq == tcp_sk(sk)->copied_seq)
139 |         state |= (0x1 << 28);
140 |     if(sk->sk_lingertime)
141 |         state |= (0x1 << 29);
142 |     if(sk->sk_frag.page)
143 |         state |= (0x1 << 30);
144 | 
145 |     tmp = RETSTATE_MASK;
146 |     return state|tmp;
147 | }
148 | 
149 | int kprobe__tcp_v6_init_sock(struct pt_regs *ctx, struct sock *sk)
150 | {
151 |     uint64_t state = set_func_id(0);
152 | 
153 |     state = set_mask(state);
154 |     bpf_trace_printk("%llx\n", state);
155 |     return 0;
156 | }
157 | 
158 | int kretprobe__tcp_v6_init_sock(struct pt_regs *ctx, struct sock *sk)
159 | {
160 |     bpf_trace_printk("%llx\n", getretstate(sk,0));
161 |     return 0;
162 | }
163 | 
164 | int kprobe__tcp_v6_connect(struct pt_regs *ctx, struct sock *sk)
165 | {
166 |     uint64_t state = set_func_id(0x1);
167 | 
168 |     state = set_mask(state);
169 |     bpf_trace_printk("%llx\n", state);
170 |     return 0;
171 | }
172 | 
173 | int kretprobe__tcp_v6_connect(struct pt_regs *ctx, struct sock *sk)
174 | {
175 |     bpf_trace_printk("%llx\n", getretstate(sk, 1));
176 |     return 0;
177 | }
178 | 
179 | int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk, struct msghdr *msg, size_t size)
180 | {
181 |     uint64_t state = set_func_id(0x2), tmp = 0;
182 |     u8 bitfield;
183 | 
184 |     tmp = sk->sk_state&0xf;
185 |     if(tmp == TCP_ESTABLISHED || tmp == TCP_CLOSE || tmp == TCP_CLOSE_WAIT || tmp == TCP_SYN_SENT)
186 |         state |= ((tmp&0xf) << 32);
187 | 
188 |     tmp = sk->sk_shutdown&0x3;
189 |     if(tmp == SEND_SHUTDOWN)
190 |         state |= ((tmp&0x3) << 36);
191 | 
192 |     tmp = sk->sk_flags&0xff;
193 |     if(tmp == SOCK_ZEROCOPY)
194 |         state |= ((tmp&0xff) << 40);
195 | 
196 |     // nonagle, repair
197 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1);
198 |     if (bitfield&0xf0) {
199 |         tmp = bitfield&0xf0;
200 |         state |= ((tmp>>4) << 48);
201 |     }
202 |     tmp = 0x1;
203 |     if (bitfield&0x2) 
204 |         state |= tmp << 52;
205 | 
206 |     // defer_connect
207 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&inet_sk(sk)->rcv_tos)-1);
208 |     if (bitfield&0xf0) {
209 |         tmp = bitfield&0xf0;
210 |         state = state | ((tmp>>4) << 53);
211 |     }
212 | 
213 |     // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR
214 |     tmp = tcp_sk(sk)->repair_queue & 0x3;
215 |     state |= (tmp << 57);
216 | 
217 | 
218 |     // tp->fastopen_req
219 |     if (tcp_sk(sk)->fastopen_req)
220 |         state |= (0x1 << 16);
221 |     if (tcp_sk(sk)->fastopen_rsk != NULL)
222 |         state |= (0x1 << 17);
223 | 
224 |     // From syscalls argument
225 |     // msg->msg_controllen
226 |     if (msg->msg_controllen)
227 |         state |= (0x1 << 20);
228 |     // msg_data_left
229 |     if (msg->msg_iter.count)
230 |         state |= (0x1 << 27);
231 | 
232 |     state = set_mask(state);
233 |     bpf_trace_printk("%llx\n", state);
234 |     return 0;
235 | }
236 | 
237 | int kretprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk)
238 | {
239 |     bpf_trace_printk("%llx\n", getretstate(sk, 2));
240 |     return 0;
241 | }
242 | 
243 | int kprobe__tcp_recvmsg(struct pt_regs *ctx, struct sock *sk, struct msghdr *msg, int flags)
244 | {
245 |     uint64_t state = set_func_id(0x3), tmp = 0;
246 |     u8 bitfield;
247 | 
248 |     tmp = sk->sk_state&0xf;
249 |     //TCP_ESTABLISHED || tmp == TCP_CLOSE || tmp == TCP_CLOSE_WAIT || tmp == TCP_SYN_SENT)
250 |     if(tmp) 
251 |         state |= ((tmp&0xf) << 32);
252 | 
253 |     tmp = sk->sk_shutdown&0x3;
254 |     if(tmp == RCV_SHUTDOWN)
255 |         state |= ((tmp&0x3) << 36);
256 | 
257 |     // SOCK_URGINLINE SOCK_DONE
258 |     tmp = sk->sk_flags&0xff;
259 |     if(tmp == SOCK_URGINLINE || tmp == SOCK_DONE)
260 |         state |= ((tmp&0xff) << 42);
261 | 
262 |     // nonagle, repair
263 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1);
264 |     if (bitfield&0xf0) {
265 |         tmp = bitfield&0xf0;
266 |         state |= ((tmp>>4) << 48);
267 |     }
268 |     tmp = 0x1;
269 |     if (bitfield&0x2) 
270 |         state |= tmp << 52;
271 | 
272 |     // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR
273 |     tmp = tcp_sk(sk)->repair_queue & 0x3;
274 |     state |= (tmp << 57);
275 | 
276 |     // urg_data urg_seq
277 |     if(tcp_sk(sk)->urg_data)
278 |         state |= (0x1 << 1);
279 |     if(tcp_sk(sk)->urg_seq == tcp_sk(sk)->copied_seq)
280 |         state |= (0x1 << 2);
281 |     if(sk->sk_err)
282 |        state |= (0x1 << 3);
283 |     // msg->msg_flags
284 |     // MSG_PEEK MSG_OOB MSG_WAITALL MSG_TRUNC
285 |     if (msg->msg_flags&MSG_PEEK)
286 |         state |= (0x1 << 4);
287 |     if (msg->msg_flags&MSG_OOB)
288 |         state |= (0x1 << 5);
289 |     if (msg->msg_flags&MSG_WAITALL)
290 |         state |= (0x1 << 6);
291 |     // msg->msg_flags
292 |     if (msg->msg_flags&MSG_TRUNC)
293 |         state |= (0x1 << 7);
294 |     if (msg->msg_flags&MSG_ERRQUEUE)
295 |         state |= (0x1 << 8);
296 |     if(sk->sk_receive_queue.next)
297 |         state |= (0x1 << 9);
298 | 
299 |     state = set_mask(state);
300 |     bpf_trace_printk("%llx\n", state);
301 |     return 0;
302 | }
303 | 
304 | int kretprobe__tcp_recvmsg(struct pt_regs *ctx, struct sock *sk)
305 | {
306 |     bpf_trace_printk("%llx\n", getretstate(sk, 3));
307 |     return 0;
308 | }
309 | 
310 | int kprobe__tcp_close(struct pt_regs *ctx, struct sock *sk)
311 | {
312 |     uint64_t state = set_func_id(0x4), tmp = 0;
313 |     u8 bitfield;
314 | 
315 |     tmp = sk->sk_state&0xf;
316 |     if(tmp == TCP_LISTEN || tmp == TCP_FIN_WAIT2 || tmp == TCP_CLOSE)
317 |         state |= ((tmp&0xf) << 32);
318 | 
319 |     tmp = 1;
320 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1);
321 |     if (bitfield&0x2) 
322 |         state |= (tmp << 8);
323 | 
324 |     tmp = 1;
325 |     if (tcp_sk(sk)->linger2)
326 |         state |= (tmp << 12);
327 | 
328 |     tmp = sk->sk_flags&0xff;
329 |     if(tmp == SOCK_LINGER)
330 |         state |= ((tmp&0xff) << 18);
331 | 
332 |     tmp = 1;
333 |     if(sk->sk_lingertime) {
334 |         state |= (tmp << 24);
335 |     }
336 | 
337 |     state = set_mask(state);
338 |     bpf_trace_printk("%llx\n", state);
339 |     return 0;
340 | }
341 | 
342 | int kretprobe__tcp_close(struct pt_regs *ctx, struct sock *sk)
343 | {
344 |     bpf_trace_printk("%llx\n", getretstate(sk, 4));
345 |     return 0;
346 | }
347 | 
348 | int kprobe__tcp_shutdown(struct pt_regs *ctx, struct sock *sk, int how)
349 | {
350 |     uint64_t state = set_func_id(0x5), tmp = 0;
351 | 
352 |     tmp = how;
353 |     state |= (tmp&0xff << 4);
354 | 
355 |     if ((1 << sk->sk_state)&(TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE_WAIT))
356 |         state |= (0x1 << 12);
357 | 
358 |     state =  set_mask(state);
359 |     bpf_trace_printk("%llx\n", state);
360 |     return 0;
361 | }
362 | 
363 | int kretprobe__tcp_shutdown(struct pt_regs *ctx, struct sock *sk)
364 | {
365 |     bpf_trace_printk("%llx\n", getretstate(sk, 5));
366 |     return 0;
367 | }
368 | 
369 | int kprobe__tcp_setsockopt(struct pt_regs *ctx, struct sock *sk, int level, int optname)
370 | {
371 |     uint64_t state = set_func_id(0x6), tmp = 0;
372 |     u8 bitfield;
373 |     struct tcp_sock *tp = tcp_sk(sk);
374 | 
375 |     tmp = sk->sk_state&0xf;
376 |     if(tmp == TCP_ESTABLISHED || tmp == TCP_CLOSE || tmp == TCP_CLOSE_WAIT || tmp == TCP_LISTEN)
377 |         state |= ((tmp&0xf) << 32);
378 | 
379 |     // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR
380 |     tmp = tcp_sk(sk)->repair_queue & 0x3;
381 |     state |= (tmp << 16);
382 | 
383 |     // tp->repair, tp->nonagle
384 |     tmp = 1;
385 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1);
386 |     if (bitfield&0x2)
387 |         state = state | (tmp << 20);
388 |     if (bitfield&0xf0) {
389 |         tmp = bitfield;
390 |         state |= ((tmp&0xf0 >> 4) << 24);
391 |     }
392 | 
393 |     tmp = sk->sk_flags&0xff;
394 |     if(tmp == SOCK_KEEPOPEN)
395 |         state |= ((tmp&0xff) << 4);
396 | 
397 |     state = set_mask(state);
398 |     bpf_trace_printk("%llx\n", state);
399 |     return 0;
400 | }
401 | 
402 | int kretprobe__tcp_setsockopt(struct pt_regs *ctx, struct sock *sk)
403 | {
404 |     bpf_trace_printk("%llx\n", getretstate(sk, 6));
405 |     return 0;
406 | }
407 | 
408 | int kprobe__tcp_getsockopt(struct pt_regs *ctx, struct sock *sk, int level, int optname)
409 | {
410 |     uint64_t state = set_func_id(0x7), tmp = 0;
411 |     u8 bitfield;
412 |     struct tcp_sock *tp = tcp_sk(sk);
413 | 
414 |     tmp = sk->sk_state&0xf;
415 |     if(tmp == TCP_CLOSE || tmp == TCP_LISTEN)
416 |         state |= ((tmp&0xf) << 32);
417 | 
418 |     // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR
419 |     tmp = tcp_sk(sk)->repair_queue & 0x3;
420 |     state |= (tmp << 16);
421 | 
422 |     tmp = 1;
423 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1);
424 |     if (bitfield&0x2)
425 |         state |= (tmp << 20);
426 | 
427 |     tmp = 1;
428 |     if (tp->saved_syn) {
429 |         state |= (tmp << 24);
430 |     }
431 | 
432 |     state = set_mask(state);
433 |     bpf_trace_printk("%llx\n", state);
434 |     return 0;
435 | }
436 | 
437 | int kretprobe__tcp_getsockopt(struct pt_regs *ctx, struct sock *sk)
438 | {
439 |     bpf_trace_printk("%llx\n", getretstate(sk, 7));
440 |     return 0;
441 | }
442 | 
443 | int kprobe__inet_accept(struct pt_regs *ctx, struct socket *sock, struct socket* newsock, int flags, bool kern)
444 | {
445 |     uint64_t state = set_func_id(0x8);
446 | 
447 |     if(kern)
448 |         state = state | (0x1 << 4);
449 |     state = set_mask(state);
450 |     bpf_trace_printk("%llx\n", state);
451 | 
452 |     state = set_func_id(9);
453 |     if(kern)
454 |         state = state | (0x1 << 4);
455 | 
456 |     state = set_mask(state);
457 |     bpf_trace_printk("%llx\n", state);
458 |     return 0;
459 | }
460 | 
461 | int kretprobe__inet_accept(struct pt_regs *ctx, struct socket *sock, struct socket* newsock)
462 | {
463 |     bpf_trace_printk("%llx\n", getretstate(sock->sk, 8));
464 |     bpf_trace_printk("%llx\n", getretstate(newsock->sk, 9));
465 |     return 0;
466 | }
467 | 
468 | int kprobe__inet_listen(struct pt_regs *ctx, struct socket *sock)
469 | {
470 |     uint64_t state = set_func_id(0xa), tmp;
471 | 
472 |     tmp = sock->sk->sk_state&0xf;
473 |     if(tmp == TCP_LISTEN || tmp == TCP_CLOSE)
474 |         state |= ((tmp&0xf) << 32);
475 | 
476 |     state = set_mask(state);
477 |     bpf_trace_printk("%llx\n", state);
478 |     return 0;
479 | }
480 | 
481 | int kretprobe__inet_listen(struct pt_regs *ctx, struct socket *sock)
482 | {
483 |     bpf_trace_printk("%llx\n", getretstate(sock->sk, 0xa));
484 |     return 0;
485 | }
486 | 
487 | int kprobe__tcp_ioctl(struct pt_regs *ctx, struct sock *sk, int cmd)
488 | {
489 |     uint64_t state = set_func_id(0xb), tmp, mask;
490 | 
491 |     tmp = cmd;
492 |     mask = SIOCINQ|SIOCATMARK|SIOCOUTQ|SIOCOUTQNSD;
493 |     if (tmp==SIOCINQ || tmp==SIOCATMARK || tmp==SIOCOUTQ || tmp==SIOCOUTQNSD)
494 |             state |= ((cmd&mask) << 4);
495 |     state = set_mask(state);
496 |     bpf_trace_printk("%llx\n", state);
497 |     return 0;
498 | }
499 | 
500 | int kretprobe__tcp_ioctl(struct pt_regs *ctx, struct sock *sk)
501 | {
502 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xb));
503 |     return 0;
504 | }
505 | 
506 | int kprobe__inet6_bind(struct pt_regs *ctx, struct sock *sk, struct sockaddr *uaddr, bool with_lock)
507 | {
508 |     uint64_t state = set_func_id(0xc);
509 | 
510 |     state = set_mask(state);
511 |     bpf_trace_printk("%llx\n", state);
512 |     return 0;
513 | }
514 | 
515 | int kretprobe__inet6_bind(struct pt_regs *ctx, struct sock *sk)
516 | {
517 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xc));
518 |     return 0;
519 | }
520 | 
521 | int kprobe__inet6_ioctl(struct pt_regs *ctx, struct sock *sk, int cmd)
522 | {
523 |     uint64_t state = set_func_id(0xd), tmp, mask;
524 | 
525 |     tmp = cmd;
526 |     mask = SIOCINQ|SIOCATMARK|SIOCOUTQ|SIOCOUTQNSD;
527 |     if (tmp==SIOCINQ || tmp==SIOCATMARK || tmp==SIOCOUTQ || tmp==SIOCOUTQNSD)
528 |         state |= ((cmd&(0x541B|0x8905|0x894b|0x5411)) << 4);
529 |     state = set_mask(state);
530 |     bpf_trace_printk("%llx\n", state);
531 |     return 0;
532 | }
533 | 
534 | int kretprobe__inet6_ioctl(struct pt_regs *ctx, struct sock *sk)
535 | {
536 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xd));
537 |     return 0;
538 | }
539 | 
540 | int kprobe__inet6_getname(struct pt_regs *ctx, struct sock *sk, int cmd, int peer)
541 | {
542 |     uint64_t state = set_func_id(0xe), tmp;
543 | 
544 |     tmp = 0x1;
545 |     if (peer == 1)
546 |         state |= (tmp << 4);
547 | 
548 |     state = set_mask(state);
549 |     bpf_trace_printk("%llx\n", state);
550 |     return 0;
551 | }
552 | 
553 | int kretprobe__inet6_getname(struct pt_regs *ctx, struct sock *sk)
554 | {
555 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xe));
556 |     return 0;
557 | }
558 | 
559 | `
560 | /* Kernel probe/retprobe point */
561 | var ProbePoint []string = []string{"tcp_v6_init_sock","tcp_v6_connect","tcp_sendmsg","tcp_recvmsg","tcp_close","tcp_shutdown","tcp_setsockopt","tcp_getsockopt","inet_accept","inet_listen", "tcp_ioctl", "inet6_bind", "inet6_getname","inet6_ioctl"}
562 | 
563 | var RetProbePoint []string = []string{"tcp_v6_init_sock","tcp_v6_connect","tcp_sendmsg","tcp_recvmsg","tcp_close","tcp_shutdown","tcp_setsockopt","tcp_getsockopt","inet_accept","inet_listen", "tcp_ioctl", "inet6_bind", "inet6_getname","inet6_ioctl"}
564 | 


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/ebpf_sample/ebpftext_recvmsg.go:
--------------------------------------------------------------------------------
  1 | package ebpf
  2 | 
  3 | /* High-32-bit: |-----|-sk_state-|-flags-|-sk_shutdown--|--state--|
  4 |  *              |-----|---4bit---|--4bit-|-----2bit-----|--4bit---|
  5 |  * Low-32-bit:  |-func-id-|---branch-related-argument---|--weight-|
  6 |  *              |--4-bit--|-------n-bit-----------------|--4bit---|
  7 |  * The highest n-bit was empty. You can fill it as your will.
  8 |  * Collect data for a specified function will generate too much useless 
  9 |  * signals. Hight-32-bit is only for general purpos.
 10 |  * In a monitored function, do not care too much about arguments 
 11 |  * passed to called function. Just write another probe for it.
 12 |  */ 
 13 | 
 14 | const EbpfSingle string =`
 15 | #include <net/sock.h>
 16 | #include <linux/net.h>
 17 | #define KBUILD_MODNAME "foo"
 18 | #include <linux/tcp.h>
 19 | #include <net/inet_sock.h>
 20 | #include <linux/ipv6.h>
 21 | #include <uapi/linux/sockios.h>
 22 | #include <uapi/asm-generic/ioctls.h>
 23 | #include <net/net_namespace.h>
 24 | #include <linux/skbuff.h>
 25 | 
 26 | #define SOCK_STATE_OPT  0x1
 27 | #define SK_SHUTDOWN_OPT 0x2
 28 | #define SOCK_FLAGS_OPT  0x4
 29 | #define SK_STATE_OPT    0x8
 30 | #define SK_FLAGS_OPT    0x10
 31 | #define SK_ERR_OPT      0x20
 32 | 
 33 | #define STATE_MASK      0xe000000000000000
 34 | #define RETSTATE_MASK   0xf000000000000000
 35 | 
 36 | static uint64_t set_func_id(uint32_t id)
 37 | {
 38 |     uint64_t state = 0;
 39 |     state |= ((id&0xf) << 28);
 40 |     return state &= 0xf0000000;
 41 | }
 42 | 
 43 | static uint64_t set_state(struct sock *sk, int opt)
 44 | {
 45 |     uint64_t state = 0, tmp;
 46 |     u8 bitfield;
 47 | 
 48 |     if (opt&SOCK_STATE_OPT) {
 49 |         tmp = sk->sk_socket->state&0xf;
 50 |         state |= (tmp << 32);
 51 |     }
 52 |     // SHUTDOWN_MASK
 53 |     if (opt&SK_SHUTDOWN_OPT) {
 54 |         tmp = sk->sk_shutdown&0x3;
 55 |         state |= (tmp << 36);
 56 |     }
 57 |     if (opt&SOCK_FLAGS_OPT) {
 58 |         tmp = sk->sk_socket->flags&0xf;
 59 |         state |= (tmp << 40);
 60 |     }
 61 |     //TCP_STATE_MASK
 62 |     if (opt&SK_STATE_OPT) {
 63 |         tmp = sk->sk_state&0xf;
 64 |         state |= (tmp << 44);
 65 |     }
 66 |     // SOL_SOCKET
 67 |     if (opt&SK_FLAGS_OPT) {
 68 |         tmp = sk->sk_flags&0xff;
 69 |         state |= (tmp << 48);
 70 |     }
 71 |     if (opt&SK_ERR_OPT) {
 72 |         if (sk->sk_err > 0) {
 73 |             tmp = 1;
 74 |             state |= (tmp << 49);
 75 |         }
 76 |     }
 77 |     return state;
 78 | }
 79 | 
 80 | static uint64_t set_mask(uint64_t state)
 81 | {
 82 |     uint64_t tmp = STATE_MASK;
 83 |     return state|tmp;
 84 | }
 85 | 
 86 | // Don't case about which function give the state
 87 | static uint64_t getretstate(struct sock *sk, int id)
 88 | {
 89 |     uint64_t state = 0, tmp = 0;
 90 |     u8 bitfield;
 91 | 
 92 |     state |= set_state(sk, SOCK_STATE_OPT|SK_SHUTDOWN_OPT|SOCK_FLAGS_OPT|SK_STATE_OPT|SK_FLAGS_OPT|SK_ERR_OPT);
 93 |     // nonagle, repair
 94 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1);
 95 |     if (bitfield&0xf0) {
 96 |         tmp = bitfield&0xf0;
 97 |         state |= ((tmp>>4) << 48);
 98 |     }
 99 |     tmp = 0x1;
100 |     if (bitfield&0x2) 
101 |         state |= tmp << 52;
102 | 
103 |     // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR
104 |     tmp = tcp_sk(sk)->repair_queue & 0x3;
105 |     state |= (tmp << 57);
106 | 
107 | 
108 |     // urg_data urg_seq
109 |     if(tcp_sk(sk)->urg_data)
110 |         state |= (0x1 << 1);
111 |     if(tcp_sk(sk)->urg_seq == tcp_sk(sk)->copied_seq)
112 |         state |= (0x1 << 2);
113 |     if(sk->sk_receive_queue.next)
114 |         state |= (0x1 << 3);
115 | 
116 |     tmp = RETSTATE_MASK;
117 |     return state|tmp;
118 | }
119 | 
120 | int kretprobe__tcp_v6_init_sock(struct pt_regs *ctx, struct sock *sk)
121 | {
122 |     bpf_trace_printk("%llx\n", getretstate(sk,0));
123 |     return 0;
124 | }
125 | 
126 | int kretprobe__tcp_v6_connect(struct pt_regs *ctx, struct sock *sk)
127 | {
128 |     bpf_trace_printk("%llx\n", getretstate(sk, 1));
129 |     return 0;
130 | }
131 | 
132 | int kprobe__tcp_recvmsg(struct pt_regs *ctx, struct sock *sk, struct msghdr *msg)
133 | {
134 |     uint64_t state = set_func_id(0x3), tmp = 0;
135 |     u8 bitfield;
136 | 
137 |     tmp = sk->sk_state&0xf;
138 |     //TCP_ESTABLISHED || tmp == TCP_CLOSE || tmp == TCP_CLOSE_WAIT || tmp == TCP_SYN_SENT)
139 |     if(tmp) 
140 |         state |= ((tmp&0xf) << 32);
141 | 
142 |     tmp = sk->sk_shutdown&0x3;
143 |     if(tmp == RCV_SHUTDOWN)
144 |         state |= ((tmp&0x3) << 36);
145 | 
146 |     // SOCK_URGINLINE SOCK_DONE
147 |     tmp = sk->sk_flags&0xff;
148 |     if(tmp == SOCK_URGINLINE || tmp == SOCK_DONE)
149 |         state |= ((tmp&0xff) << 42);
150 | 
151 |     // nonagle, repair
152 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1);
153 |     if (bitfield&0xf0) {
154 |         tmp = bitfield&0xf0;
155 |         state |= ((tmp>>4) << 48);
156 |     }
157 |     tmp = 0x1;
158 |     if (bitfield&0x2) 
159 |         state |= tmp << 52;
160 | 
161 |     // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR
162 |     tmp = tcp_sk(sk)->repair_queue & 0x3;
163 |     state |= (tmp << 57);
164 | 
165 |     // urg_data urg_seq
166 |     if(tcp_sk(sk)->urg_data)
167 |         state |= (0x1 << 1);
168 |     if(tcp_sk(sk)->urg_seq == tcp_sk(sk)->copied_seq)
169 |         state |= (0x1 << 2);
170 |     if(sk->sk_err)
171 |        state |= (0x1 << 3);
172 |     // msg->msg_flags
173 |     // MSG_PEEK MSG_OOB MSG_WAITALL MSG_TRUNC
174 |     if (msg->msg_flags&MSG_PEEK)
175 |         state |= (0x1 << 4);
176 |     if (msg->msg_flags&MSG_OOB)
177 |         state |= (0x1 << 5);
178 |     if (msg->msg_flags&MSG_WAITALL)
179 |         state |= (0x1 << 6);
180 |     // msg->msg_flags
181 |     if (msg->msg_flags&MSG_TRUNC)
182 |         state |= (0x1 << 7);
183 |     if (msg->msg_flags&MSG_ERRQUEUE)
184 |         state |= (0x1 << 8);
185 |     if(sk->sk_receive_queue.next)
186 |         state |= (0x1 << 9);
187 | 
188 |     state = set_mask(state);
189 |     bpf_trace_printk("%llx\n", state);
190 |     return 0;
191 | }
192 | 
193 | int kretprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk)
194 | {
195 |     bpf_trace_printk("%llx\n", getretstate(sk, 2));
196 |     return 0;
197 | }
198 | 
199 | int kretprobe__tcp_recvmsg(struct pt_regs *ctx, struct sock *sk)
200 | {
201 |     bpf_trace_printk("%llx\n", getretstate(sk, 3));
202 |     return 0;
203 | }
204 | 
205 | int kretprobe__tcp_close(struct pt_regs *ctx, struct sock *sk)
206 | {
207 |     bpf_trace_printk("%llx\n", getretstate(sk, 4));
208 |     return 0;
209 | }
210 | 
211 | int kretprobe__tcp_shutdown(struct pt_regs *ctx, struct sock *sk)
212 | {
213 |     bpf_trace_printk("%llx\n", getretstate(sk, 5));
214 |     return 0;
215 | }
216 | 
217 | int kretprobe__tcp_setsockopt(struct pt_regs *ctx, struct sock *sk)
218 | {
219 |     bpf_trace_printk("%llx\n", getretstate(sk, 6));
220 |     return 0;
221 | }
222 | 
223 | int kretprobe__tcp_getsockopt(struct pt_regs *ctx, struct sock *sk)
224 | {
225 |     bpf_trace_printk("%llx\n", getretstate(sk, 7));
226 |     return 0;
227 | }
228 | 
229 | int kretprobe__inet_accept(struct pt_regs *ctx, struct socket *sock, struct socket* newsock)
230 | {
231 |     bpf_trace_printk("%llx\n", getretstate(sock->sk, 8));
232 |     bpf_trace_printk("%llx\n", getretstate(newsock->sk, 9));
233 |     return 0;
234 | }
235 | 
236 | int kretprobe__inet_listen(struct pt_regs *ctx, struct socket *sock)
237 | {
238 |     bpf_trace_printk("%llx\n", getretstate(sock->sk, 0xa));
239 |     return 0;
240 | }
241 | 
242 | int kretprobe__tcp_ioctl(struct pt_regs *ctx, struct sock *sk)
243 | {
244 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xb));
245 |     return 0;
246 | }
247 | 
248 | int kretprobe__inet6_bind(struct pt_regs *ctx, struct sock *sk)
249 | {
250 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xc));
251 |     return 0;
252 | }
253 | 
254 | int kretprobe__inet6_ioctl(struct pt_regs *ctx, struct sock *sk)
255 | {
256 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xd));
257 |     return 0;
258 | }
259 | 
260 | int kretprobe__inet6_getname(struct pt_regs *ctx, struct sock *sk)
261 | {
262 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xe));
263 |     return 0;
264 | }
265 | 
266 | `
267 | /* Kernel probe/retprobe point */
268 | var ProbePoint []string = []string{"tcp_recvmsg"}
269 | 
270 | var RetProbePoint []string = []string{"tcp_v6_init_sock","tcp_v6_connect","tcp_sendmsg","tcp_recvmsg","tcp_close","tcp_shutdown","tcp_setsockopt","tcp_getsockopt","inet_accept","inet_listen", "tcp_ioctl", "inet6_bind", "inet6_getname","inet6_ioctl"}
271 | 


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/ebpf_sample/ebpftext_sendmsg.go:
--------------------------------------------------------------------------------
  1 | package ebpf
  2 | 
  3 | /* High-32-bit: |-----|-sk_state-|-flags-|-sk_shutdown--|--state--|
  4 |  *              |-----|---4bit---|--4bit-|-----2bit-----|--4bit---|
  5 |  * Low-32-bit:  |-func-id-|---branch-related-argument---|--weight-|
  6 |  *              |--4-bit--|-------n-bit-----------------|--4bit---|
  7 |  * The highest n-bit was empty. You can fill it as your will.
  8 |  * Collect data for a specified function will generate too much useless 
  9 |  * signals. Hight-32-bit is only for general purpos.
 10 |  * In a monitored function, do not care too much about arguments 
 11 |  * passed to called function. Just write another probe for it.
 12 |  */ 
 13 | 
 14 | const EbpfSingle string =`
 15 | #include <net/sock.h>
 16 | #include <linux/net.h>
 17 | #define KBUILD_MODNAME "foo"
 18 | #include <linux/tcp.h>
 19 | #include <net/inet_sock.h>
 20 | #include <linux/ipv6.h>
 21 | #include <uapi/linux/sockios.h>
 22 | #include <uapi/asm-generic/ioctls.h>
 23 | #include <net/net_namespace.h>
 24 | #include <linux/skbuff.h>
 25 | 
 26 | #define SOCK_STATE_OPT  0x1
 27 | #define SK_SHUTDOWN_OPT 0x2
 28 | #define SOCK_FLAGS_OPT  0x4
 29 | #define SK_STATE_OPT    0x8
 30 | #define SK_FLAGS_OPT    0x10
 31 | #define SK_ERR_OPT      0x20
 32 | 
 33 | #define STATE_MASK      0xe000000000000000
 34 | #define RETSTATE_MASK   0xf000000000000000
 35 | 
 36 | static uint64_t set_func_id(uint32_t id)
 37 | {
 38 |     uint64_t state = 0;
 39 |     state |= ((id&0xf) << 28);
 40 |     return state &= 0xf0000000;
 41 | }
 42 | 
 43 | static uint64_t set_state(struct sock *sk, int opt)
 44 | {
 45 |     uint64_t state = 0, tmp;
 46 |     u8 bitfield;
 47 | 
 48 |     if (opt&SOCK_STATE_OPT) {
 49 |         tmp = sk->sk_socket->state&0xf;
 50 |         state |= (tmp << 32);
 51 |     }
 52 |     // SHUTDOWN_MASK
 53 |     if (opt&SK_SHUTDOWN_OPT) {
 54 |         tmp = sk->sk_shutdown&0x3;
 55 |         state |= (tmp << 36);
 56 |     }
 57 |     if (opt&SOCK_FLAGS_OPT) {
 58 |         tmp = sk->sk_socket->flags&0xf;
 59 |         state |= (tmp << 40);
 60 |     }
 61 |     //TCP_STATE_MASK
 62 |     if (opt&SK_STATE_OPT) {
 63 |         tmp = sk->sk_state&0xf;
 64 |         state |= (tmp << 44);
 65 |     }
 66 |     // SOL_SOCKET
 67 |     if (opt&SK_FLAGS_OPT) {
 68 |         tmp = sk->sk_flags&0xff;
 69 |         state |= (tmp << 48);
 70 |     }
 71 |     if (opt&SK_ERR_OPT) {
 72 |         if (sk->sk_err > 0) {
 73 |             tmp = 1;
 74 |             state |= (tmp << 49);
 75 |         }
 76 |     }
 77 |     return state;
 78 | }
 79 | 
 80 | static uint64_t set_mask(uint64_t state)
 81 | {
 82 |     uint64_t tmp = STATE_MASK;
 83 |     return state|tmp;
 84 | }
 85 | 
 86 | // Don't case about which function give the state
 87 | static uint64_t getretstate(struct sock *sk, int id)
 88 | {
 89 |     uint64_t state = 0, tmp = 0;
 90 |     u8 bitfield;
 91 | 
 92 |     state |= set_state(sk, SOCK_STATE_OPT|SK_SHUTDOWN_OPT|SOCK_FLAGS_OPT|SK_STATE_OPT|SK_FLAGS_OPT|SK_ERR_OPT);
 93 |     // nonagle, repair
 94 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1);
 95 |     if (bitfield&0xf0) {
 96 |         tmp = bitfield&0xf0;
 97 |         state |= ((tmp>>4) << 48);
 98 |     }
 99 |     tmp = 0x1;
100 |     if (bitfield&0x2) 
101 |         state |= tmp << 52;
102 | 
103 |     // defer_connect
104 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&inet_sk(sk)->rcv_tos)-1);
105 |     if (bitfield&0xf0) {
106 |         tmp = bitfield&0xf0;
107 |         state = state | ((tmp>>4) << 53);
108 |     }
109 | 
110 |     // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR
111 |     tmp = tcp_sk(sk)->repair_queue & 0x3;
112 |     state |= (tmp << 57);
113 | 
114 | 
115 |     // tp->fastopen_req
116 |     if (tcp_sk(sk)->fastopen_req)
117 |         state |= (0x1 << 16);
118 |     if (tcp_sk(sk)->fastopen_rsk != NULL)
119 |         state |= (0x1 << 17);
120 | 
121 |     tmp = RETSTATE_MASK;
122 |     return state|tmp;
123 | }
124 | 
125 | int kretprobe__tcp_v6_init_sock(struct pt_regs *ctx, struct sock *sk)
126 | {
127 |     bpf_trace_printk("%llx\n", getretstate(sk,0));
128 |     return 0;
129 | }
130 | 
131 | int kretprobe__tcp_v6_connect(struct pt_regs *ctx, struct sock *sk)
132 | {
133 |     bpf_trace_printk("%llx\n", getretstate(sk, 1));
134 |     return 0;
135 | }
136 | 
137 | int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk, struct msghdr *msg, size_t size)
138 | {
139 |     uint64_t state = set_func_id(0x2), tmp = 0;
140 |     u8 bitfield;
141 | 
142 |     tmp = sk->sk_state&0xf;
143 |     if(tmp == TCP_ESTABLISHED || tmp == TCP_CLOSE || tmp == TCP_CLOSE_WAIT || tmp == TCP_SYN_SENT)
144 |         state |= ((tmp&0xf) << 32);
145 | 
146 |     tmp = sk->sk_shutdown&0x3;
147 |     if(tmp == SEND_SHUTDOWN)
148 |         state |= ((tmp&0x3) << 36);
149 | 
150 |     tmp = sk->sk_flags&0xff;
151 |     if(tmp == SOCK_ZEROCOPY)
152 |         state |= ((tmp&0xff) << 40);
153 | 
154 |     // nonagle, repair
155 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&tcp_sk(sk)->repair_queue)-1);
156 |     if (bitfield&0xf0) {
157 |         tmp = bitfield&0xf0;
158 |         state |= ((tmp>>4) << 48);
159 |     }
160 |     tmp = 0x1;
161 |     if (bitfield&0x2) 
162 |         state |= tmp << 52;
163 | 
164 |     // defer_connect
165 |     bpf_probe_read(&bitfield, sizeof(bitfield), (void*)((long)&inet_sk(sk)->rcv_tos)-1);
166 |     if (bitfield&0xf0) {
167 |         tmp = bitfield&0xf0;
168 |         state = state | ((tmp>>4) << 53);
169 |     }
170 | 
171 |     // TCP_NO_QUEUE,TCP_RECV_QUEUE,TCP_SEND_QUEUE,TCP_QUEUES_NR
172 |     tmp = tcp_sk(sk)->repair_queue & 0x3;
173 |     state |= (tmp << 57);
174 | 
175 | 
176 |     // tp->fastopen_req
177 |     if (tcp_sk(sk)->fastopen_req)
178 |         state |= (0x1 << 16);
179 |     if (tcp_sk(sk)->fastopen_rsk != NULL)
180 |         state |= (0x1 << 17);
181 | 
182 |     // From syscalls argument
183 |     // msg->msg_controllen
184 |     /*if (msg->msg_controllen)
185 |         state |= (0x1 << 20);
186 |     // msg->msg_flags
187 |     if (msg->msg_flags&MSG_OOB)
188 |         state |= (0x1 << 21);
189 |     // msg->msg_flags
190 |     if (msg->msg_flags&MSG_MORE)
191 |         state |= (0x1 << 22);
192 |     // msg->msg_flags
193 |     if (msg->msg_flags&MSG_EOR)
194 |         state |= (0x1 << 23);
195 |     // msg->msg_flags
196 |     if (msg->msg_flags&MSG_ZEROCOPY)
197 |         state |= (0x1 << 24);
198 |     // msg_data_left
199 |     if (msg->msg_iter.count)
200 |         state |= (0x1 << 27);*/
201 | 
202 |     state = set_mask(state);
203 |     bpf_trace_printk("%llx\n", state);
204 |     return 0;
205 | }
206 | 
207 | int kretprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk)
208 | {
209 |     bpf_trace_printk("%llx\n", getretstate(sk, 2));
210 |     return 0;
211 | }
212 | 
213 | int kretprobe__tcp_recvmsg(struct pt_regs *ctx, struct sock *sk)
214 | {
215 |     bpf_trace_printk("%llx\n", getretstate(sk, 3));
216 |     return 0;
217 | }
218 | 
219 | int kretprobe__tcp_close(struct pt_regs *ctx, struct sock *sk)
220 | {
221 |     bpf_trace_printk("%llx\n", getretstate(sk, 4));
222 |     return 0;
223 | }
224 | 
225 | int kretprobe__tcp_shutdown(struct pt_regs *ctx, struct sock *sk)
226 | {
227 |     bpf_trace_printk("%llx\n", getretstate(sk, 5));
228 |     return 0;
229 | }
230 | 
231 | int kretprobe__tcp_setsockopt(struct pt_regs *ctx, struct sock *sk)
232 | {
233 |     bpf_trace_printk("%llx\n", getretstate(sk, 6));
234 |     return 0;
235 | }
236 | 
237 | int kretprobe__tcp_getsockopt(struct pt_regs *ctx, struct sock *sk)
238 | {
239 |     bpf_trace_printk("%llx\n", getretstate(sk, 7));
240 |     return 0;
241 | }
242 | 
243 | int kretprobe__inet_accept(struct pt_regs *ctx, struct socket *sock, struct socket* newsock)
244 | {
245 |     bpf_trace_printk("%llx\n", getretstate(sock->sk, 8));
246 |     bpf_trace_printk("%llx\n", getretstate(newsock->sk, 9));
247 |     return 0;
248 | }
249 | 
250 | int kretprobe__inet_listen(struct pt_regs *ctx, struct socket *sock)
251 | {
252 |     bpf_trace_printk("%llx\n", getretstate(sock->sk, 0xa));
253 |     return 0;
254 | }
255 | 
256 | int kretprobe__tcp_ioctl(struct pt_regs *ctx, struct sock *sk)
257 | {
258 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xb));
259 |     return 0;
260 | }
261 | 
262 | int kretprobe__inet6_bind(struct pt_regs *ctx, struct sock *sk)
263 | {
264 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xc));
265 |     return 0;
266 | }
267 | 
268 | int kretprobe__inet6_ioctl(struct pt_regs *ctx, struct sock *sk)
269 | {
270 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xd));
271 |     return 0;
272 | }
273 | 
274 | int kretprobe__inet6_getname(struct pt_regs *ctx, struct sock *sk)
275 | {
276 |     bpf_trace_printk("%llx\n", getretstate(sk, 0xe));
277 |     return 0;
278 | }
279 | 
280 | `
281 | /* Kernel probe/retprobe point */
282 | var ProbePoint []string = []string{"tcp_sendmsg"}
283 | 
284 | var RetProbePoint []string = []string{"tcp_v6_init_sock","tcp_v6_connect","tcp_sendmsg","tcp_recvmsg","tcp_close","tcp_shutdown","tcp_setsockopt","tcp_getsockopt","inet_accept","inet_listen", "tcp_ioctl", "inet6_bind", "inet6_getname","inet6_ioctl"}
285 | 


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/parse/parse.go:
--------------------------------------------------------------------------------
 1 | package parse
 2 | 
 3 | import (
 4 | 	"log"
 5 | )
 6 | 
 7 | /* Map socket state to readable kernel macro */
 8 | var sock_type = map[uint32]string {
 9 | 	1:"SOCK_DGRAM",
10 | 	2:"SOCK_STREAM",
11 | 	3:"SOCK_RAW",
12 | 	4:"SOCK_RDM",
13 | 	5:"SOCK_SEQPACKET",
14 | 	6:"SOCK_DCCP",
15 | 	10:"SOCK_PACKET",
16 | }
17 | 
18 | var sock_state = map[uint32]string {
19 | 	0:"SS_FREE",
20 | 	1:"SS_UNCONNECTED",
21 | 	2:"SS_CONNECTING",
22 | 	3:"SS_CONNECTED",
23 | 	4:"SS_DISCONNECYING",
24 | }
25 | 
26 | var sock_flags = map[uint32]string {
27 | 	2:"SOCK_NOSPACE",
28 | 	3:"SOCK_PASSCRED",
29 | 	4:"SOCK_PASSEC",
30 | }
31 | 
32 | type flag struct {
33 | 	mask     uint32
34 | 	shift    uint32
35 | 	flagType map[uint32]string
36 | }
37 | 
38 | /* flag structure, refer to ebpf/ebpf.go ebpf text */
39 | func ParseFlags(rawSignal uint32) {
40 | 	var Signal = []flag {
41 | 		flag {mask:0x7, flagType:sock_flags, shift:0},
42 | 		flag {mask:0xf, flagType:sock_type, shift:4},
43 | 		flag {mask:0x7, flagType:sock_state, shift:8},
44 | 	}
45 | 
46 | 	for _, s := range Signal {
47 | 		parseFlag(rawSignal, s.mask, s.flagType, s.shift)
48 | 	}
49 | }
50 | 
51 | func parseFlag(rawsignal uint32, mask uint32, flagtype map[uint32]string, shift uint32) {
52 | 	log.Printf("%s:%x covered", flagtype[(rawsignal&(mask<<shift))>>shift], (rawsignal&(mask<<shift))>>shift)
53 | }
54 | 


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/pipe_monitor.go:
--------------------------------------------------------------------------------
 1 | /* This monitor only collect single socket state, without any track.
 2 |  * It only know if a new state was detected, but know nothing about 
 3 |  * which socket does the state belong to.
 4 |  */
 5 | 
 6 | package main
 7 | 
 8 | import (
 9 | 	"os"
10 | 	"log"
11 | 	"fmt"
12 | 	"regexp"
13 | 	"flag"
14 | 	"strconv"
15 | 	
16 | 	"github.com/iovisor/gobpf/pkg/tracepipe"
17 |         "github.com/iovisor/gobpf/bcc"
18 | 
19 | 	"./ebpf"
20 | )
21 | 
22 | import "C"
23 | func main() {
24 | 	/* redirect stderr, there are some ebpf log or warning */
25 | 	debug := flag.Bool("debug", false, "More debug information about ebpf")
26 | 	flag.Parse()
27 | 	_, w, _ := os.Pipe()
28 | 	old := os.Stderr
29 | 	if(!*debug) {
30 | 		old.Close()
31 | 		os.Stderr = w
32 | 	}
33 | 
34 | 	/* ebpf text is in ebpf/ebpftext.go */
35 | 	source := ebpf.EbpfInit()
36 | 	m := bcc.NewModule(source, []string{})
37 | 	defer m.Close()
38 | 	/* Be sure your hook function named as "kprobe__KERN_FUNCNAME" */
39 | 	ebpf.Attachs(m)
40 | 
41 | 	tp, err := tracepipe.New()
42 | 	if err != nil {
43 | 		log.Fatal(err)
44 | 	}
45 | 	defer tp.Close()
46 | 
47 | 	if (!*debug) {
48 | 		w.Close()
49 | 		os.Stderr = old
50 | 	}
51 | 
52 | 	rawMessage, errMessage := tp.Channel()
53 | 	re := regexp.MustCompile("syz-executor")
54 | 	for (true) {
55 | 		select {
56 | 		case te := <- rawMessage:
57 | 			/* syz-exec has it own pid namespace
58 |                          * pick out those pid under the namespace can be more accurate
59 |                          */
60 | 			if(re.FindString(te.Task) == "") {
61 | 				continue
62 | 			}
63 | 			rawSignal, err := strconv.ParseUint(te.Message, 16, 64)
64 | 			if (err != nil) {
65 | 				log.Println("Wrong rawSignal")
66 | 				continue
67 | 			}
68 | 			fmt.Printf("%016x\n", rawSignal)
69 | 		case err := <- errMessage:
70 | 			log.Fatal(err)
71 | 		}
72 | 	}
73 | }
74 | 


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/state/state.go:
--------------------------------------------------------------------------------
  1 | package state
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strconv"
  6 | 	"os"
  7 | 	"regexp"
  8 | 	"strings"
  9 | 	"log"
 10 | )
 11 | 
 12 | /* In a syscall, Several messages from ebpf contain:
 13 |  * several kernel probe points and socketstates. 
 14 |  * a syscall with a OpsId 
 15 |  */
 16 | type Ops struct {
 17 | 	OpsId       int
 18 | 	KprobePoint string
 19 | 	SocketState map[uint64]uint64
 20 | }
 21 | 
 22 | /* record all the state collect by ebpf, a sock with a SockState */
 23 | type SockState struct {
 24 | 	SockState []uint64
 25 | 	SockOps   []string
 26 | }
 27 | 
 28 | /* Only record the state coverage */
 29 | var StateList []uint64
 30 | 
 31 | /* A syscall with a Ops */
 32 | var OpsList []Ops
 33 | 
 34 | var id int = 0
 35 | var tmp uint64 = 0
 36 | 
 37 | /* Handle a message from ebpf */
 38 | func Handle(msg string) {
 39 | 	key, value := extract(msg)
 40 | 	/* Three type of message, refer the ebpf/ebpf.go */
 41 | 	switch key {
 42 | 	case "[KPROBE_P]":
 43 | 		OpsList = append(OpsList, newops(id, value))
 44 | 		tmp = 0
 45 | 		id = len(OpsList) - 1
 46 | 		OpsList[id].SocketState = make(map[uint64]uint64)
 47 | 	case "[SOCKET_ID]":
 48 | 		if(strings.Contains(value, "ptrval")) {
 49 | 			fmt.Println("Socket id miss")
 50 | 			misshandle()
 51 | 			return
 52 | 		}
 53 | 		sockid := str2int(value)
 54 | 		if(id > len(OpsList)-1) {
 55 | 			fmt.Println("id out of range")
 56 | 			misshandle()
 57 | 			return
 58 | 		}
 59 | 		if _, ok := OpsList[id].SocketState[sockid]; !ok {
 60 | 			OpsList[id].addsock(sockid)
 61 | 			tmp = sockid
 62 | 			}
 63 | 	case "socket_state":
 64 | 		/* Only record state, know nothing about which socket is it */
 65 | 		StateList= append(StateList, str2int(value))
 66 | 		if(id > len(OpsList)-1) {
 67 | 			fmt.Println("id out of range")
 68 | 			misshandle()
 69 | 			return
 70 | 		}
 71 | 
 72 | 		if _, ok := OpsList[id].SocketState[tmp]; ok {
 73 | 			OpsList[id].SocketState[tmp] =  str2int(value)
 74 | 			return
 75 | 			}
 76 | 	default:
 77 | 		fmt.Fprint(os.Stderr, "Unknow message:\n", msg)
 78 | 	}
 79 | }
 80 | 
 81 | /* From "a syscall with a Ops" to "a socket with several state" */
 82 | func Socklist() {
 83 | 	SockList := make(map[uint64]SockState)
 84 | 	for _, ops := range OpsList {
 85 | 		for skid, skst := range ops.SocketState {
 86 | 			var tmps SockState
 87 | 			tmps = SockList[skid]
 88 | 			if (len(tmps.SockState) == 0) {
 89 | 				tmps.SockState = []uint64{skst}
 90 | 				tmps.SockOps = []string{ops.KprobePoint}
 91 | 			} else {
 92 | 				tmps.SockState = append(tmps.SockState, skst)
 93 | 				tmps.SockOps = append(tmps.SockOps, ops.KprobePoint)
 94 | 			}
 95 | 			SockList[skid] = tmps
 96 | 		}
 97 | 	}
 98 | 	for skid, sock := range SockList {
 99 | 		fmt.Println("Socket id is", skid)
100 | 		fmt.Printf("The state:%v\n", sock.SockState)
101 | 		fmt.Printf("The operations:%v\n", sock.SockOps)
102 | 	}
103 | }
104 | 
105 | /* state change hash, as coverage signal in syzkall */
106 | func hash(a uint64, b uint64) uint32{
107 | 	a = a ^ b
108 | 	a = (a ^ 61) ^ (a >> 16)
109 | 	a = a + (a << 3)
110 | 	a = a ^ (a >> 4)
111 | 	a = a * 0x27d4eb2d
112 | 	a = a ^ (a >> 15)
113 | 	return uint32(a)
114 | }
115 | 
116 | /* Only read state change, know nothing about state */
117 | func SockStateHandle() []uint32 {
118 | 	var rawSignals []uint32
119 | 	if (len(OpsList) < 2) {
120 | 		rawSignals = append(rawSignals, 0xffffffff)
121 | 		return rawSignals
122 | 	}
123 | 	SockList := make(map[uint64]SockState)
124 | 	for _, ops := range OpsList {
125 | 		for skid, skst := range ops.SocketState {
126 | 			var tmps SockState
127 | 			tmps = SockList[skid]
128 | 			if (len(tmps.SockState) == 0) {
129 | 				tmps.SockState = []uint64{skst}
130 | 				tmps.SockOps = []string{ops.KprobePoint}
131 | 			} else {
132 | 				tmps.SockState = append(tmps.SockState, skst)
133 | 				tmps.SockOps = append(tmps.SockOps, ops.KprobePoint)
134 | 			}
135 | 			SockList[skid] = tmps
136 | 		}
137 | 	}
138 | 	for _, sock := range SockList {
139 | 		for i := 0; i < len(sock.SockState)-1; i++ {
140 | 			rawSignals = append(rawSignals, hash(sock.SockState[i], sock.SockState[i+1]))
141 | 		}
142 | 	}
143 | 	rawSignals = append(rawSignals, 0xffffffff)
144 | 	return rawSignals
145 | }
146 | 
147 | /* Read all state coverage */
148 | func Statelist() []uint32{
149 | 	var rawSignals []uint32
150 | 	log.Printf("%d signals in statelist\n", len(StateList))
151 | 	for _, s := range StateList {
152 | 		rawSignals = append(rawSignals, uint32(s))
153 | 	}
154 | 	rawSignals = append(rawSignals, 0xffffffff)
155 | 	return rawSignals
156 | }
157 | 
158 | /* Read Opslist */
159 | func Opslist() {
160 | 	fmt.Println("There are", id, "operations of socket")
161 | 	for _, ops := range OpsList {
162 | 		fmt.Println("Kprobe point is:", ops.KprobePoint)
163 | 		for id, ss := range ops.SocketState {
164 | 			fmt.Println("Socket id is:", id)
165 | 			fmt.Println("Socket state:", ss)
166 | 		}
167 | 		fmt.Println("")
168 | 	}
169 | }
170 | 
171 | /* Clear historical data */
172 | func Stateclear() {
173 | 	if (len(OpsList) == 0) {
174 | 		return
175 | 	}
176 | 	OpsList = OpsList[0:0]
177 | 	StateList = StateList[0:0]
178 | 	tmp = 1
179 | 	id = 1
180 | }	
181 | 
182 | func (ops Ops)addsock(sockid uint64) {
183 | 	ops.SocketState[sockid] = 0
184 | }
185 | 
186 | func extract(msg string) (key string, value string) {
187 | 	rkey := regexp.MustCompile(".*:")
188 | 	rvalue := regexp.MustCompile(":.*")
189 | 	key = rkey.FindString(msg)
190 | 	value = rvalue.FindString(msg)
191 | 	key = key[:len(key)-1]
192 | 	value = value[1:]
193 | 	return key, value
194 | }
195 | 
196 | func str2int(str string) uint64 {
197 | 	ret, err := strconv.ParseUint(str, 16, 64)
198 | 	if err != nil {
199 | 		fmt.Fprint(os.Stderr, "Invaliable socket ID", err, "\n")
200 | 	}
201 | 	return ret
202 | }
203 | 
204 | func newops(id int,  kprobepoint string) Ops {
205 | 	ops := new(Ops)
206 | 	ops.OpsId = id
207 | 	ops.KprobePoint = kprobepoint
208 | 	return *ops
209 | }
210 | 
211 | /* Handle unexpect message */
212 | func misshandle(){
213 | 	fmt.Println("Miss handle, historical data may be clean")
214 | }
215 | 


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/syz_patch/kstate/0004-Support-retState-ebpfsig-resource.patch:
--------------------------------------------------------------------------------
  1 | From 5d5632a31d9185118c2d48c4c4a02babb0e2f2a1 Mon Sep 17 00:00:00 2001
  2 | From: Bins94 <kaipeng94@gmail.com>
  3 | Date: Thu, 2 Apr 2020 23:03:44 -0400
  4 | Subject: [PATCH 4/5] Support retState ebpfsig resource
  5 | 
  6 | ---
  7 |  executor/executor.cc   |  7 ++++---
  8 |  pkg/rpctype/rpctype.go |  1 +
  9 |  prog/clone.go          |  5 +++--
 10 |  prog/prog.go           |  1 +
 11 |  prog/rand.go           |  2 +-
 12 |  syz-fuzzer/proc.go     | 16 ++++++++++++++++
 13 |  syz-manager/html.go    |  4 ++++
 14 |  7 files changed, 30 insertions(+), 6 deletions(-)
 15 | 
 16 | diff --git a/executor/executor.cc b/executor/executor.cc
 17 | index 69fa44ca..964b3ec4 100644
 18 | --- a/executor/executor.cc
 19 | +++ b/executor/executor.cc
 20 | @@ -36,6 +36,7 @@
 21 |  #endif
 22 |  
 23 |  #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 24 | +/* refer to the ebpf text */
 25 |  #define RETSTATE_SIG_MASK 0xf000000000000000
 26 |  #define STATE_SIG_MASK 0xe000000000000000
 27 |  #define COVERAGE_SIG_MASK 0xd0000000ffffffff
 28 | @@ -924,9 +925,9 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover
 29 |  			uint64_t state = 0;
 30 |  			if (ret > 0) {
 31 |  				state = strtoul(buf, NULL, 16);
 32 | -				/* state signal start with 0xf
 33 | -	       * Write out real state for fuzzer's further handle
 34 | -	       */
 35 | +				/* state/retstate signal start with 0xe/0xf
 36 | +				 * Write out real state for fuzzer's further handle
 37 | +				 */
 38 |  				write_output64(state);
 39 |  				debug("A state signal 0x%016lx\n", state);
 40 |  				n = 0;
 41 | diff --git a/pkg/rpctype/rpctype.go b/pkg/rpctype/rpctype.go
 42 | index fcc01a99..ac99e73c 100644
 43 | --- a/pkg/rpctype/rpctype.go
 44 | +++ b/pkg/rpctype/rpctype.go
 45 | @@ -16,6 +16,7 @@ type RPCInput struct {
 46 |  	Prog   []byte
 47 |  	Signal signal.Serial
 48 |  	Cover  []uint32
 49 | +	SpeRes bool
 50 |  }
 51 |  
 52 |  type RPCCandidate struct {
 53 | diff --git a/prog/clone.go b/prog/clone.go
 54 | index c95ae206..6f5cdd64 100644
 55 | --- a/prog/clone.go
 56 | +++ b/prog/clone.go
 57 | @@ -9,8 +9,9 @@ import (
 58 |  
 59 |  func (p *Prog) Clone() *Prog {
 60 |  	p1 := &Prog{
 61 | -		Target: p.Target,
 62 | -		Calls:  make([]*Call, len(p.Calls)),
 63 | +		Target:   p.Target,
 64 | +		Calls:    make([]*Call, len(p.Calls)),
 65 | +		Resource: p.Resource,
 66 |  	}
 67 |  	newargs := make(map[*ResultArg]*ResultArg)
 68 |  	for ci, c := range p.Calls {
 69 | diff --git a/prog/prog.go b/prog/prog.go
 70 | index 1600c0a2..1ff27046 100644
 71 | --- a/prog/prog.go
 72 | +++ b/prog/prog.go
 73 | @@ -11,6 +11,7 @@ type Prog struct {
 74 |  	Target   *Target
 75 |  	Calls    []*Call
 76 |  	Comments []string
 77 | +	Resource bool
 78 |  }
 79 |  
 80 |  type Call struct {
 81 | diff --git a/prog/rand.go b/prog/rand.go
 82 | index 8583fbdc..2b8fcc2a 100644
 83 | --- a/prog/rand.go
 84 | +++ b/prog/rand.go
 85 | @@ -856,7 +856,7 @@ func (r *randGen) resourceCentric(s *state, t *ResourceType) (arg Arg, calls []*
 86 |  	for idx := range r.Perm(len(s.corpus)) {
 87 |  		p = s.corpus[idx].Clone()
 88 |  		resources := getCompatibleResources(p, t.TypeName, r)
 89 | -		if len(resources) > 0 {
 90 | +		if len(resources) > 0 && p.Resource {
 91 |  			resource = resources[r.Intn(len(resources))]
 92 |  			break
 93 |  		}
 94 | diff --git a/syz-fuzzer/proc.go b/syz-fuzzer/proc.go
 95 | index 0cb186b4..dabe3ce2 100644
 96 | --- a/syz-fuzzer/proc.go
 97 | +++ b/syz-fuzzer/proc.go
 98 | @@ -162,12 +162,19 @@ func (proc *Proc) triageInput(item *WorkTriage) {
 99 |  	data := item.p.Serialize()
100 |  	sig := hash.Hash(data)
101 |  
102 | +	item.p.Resource = true
103 | +	/* base ebpfsig retstate feedback */
104 | +	if proc.fuzzer.config.Flags&ipc.FlagEnableEbpfSignal != 0 {
105 | +		item.p.Resource = getResourceFlagFromSignal(item.info.Signal)
106 | +	}
107 | +
108 |  	log.Logf(2, "added new input for %v to corpus:\n%s", logCallName, data)
109 |  	proc.fuzzer.sendInputToManager(rpctype.RPCInput{
110 |  		Call:   callName,
111 |  		Prog:   data,
112 |  		Signal: inputSignal.Serialize(),
113 |  		Cover:  inputCover.Serialize(),
114 | +		SpeRes: item.p.Resource,
115 |  	})
116 |  
117 |  	proc.fuzzer.addInputToCorpus(item.p, inputSignal, sig)
118 | @@ -200,6 +207,15 @@ func getSignalAndCover(p *prog.Prog, info *ipc.ProgInfo, call int) (signal.Signa
119 |  	return signal.FromRaw(inf.Signal, signalPrio(p, inf, call)), inf.Cover
120 |  }
121 |  
122 | +func getResourceFlagFromSignal(Signal []uint64) bool {
123 | +	for _, sig := range Signal {
124 | +		if sig&0xf000000000000000 == 0xf000000000000000 {
125 | +			return true
126 | +		}
127 | +	}
128 | +	return false
129 | +}
130 | +
131 |  func (proc *Proc) smashInput(item *WorkSmash) {
132 |  	if proc.fuzzer.faultInjectionEnabled && item.call != -1 {
133 |  		proc.failCall(item.p, item.call)
134 | diff --git a/syz-manager/html.go b/syz-manager/html.go
135 | index 46d61eda..66e0bfa9 100644
136 | --- a/syz-manager/html.go
137 | +++ b/syz-manager/html.go
138 | @@ -195,6 +195,7 @@ func (mgr *Manager) httpCorpus(w http.ResponseWriter, r *http.Request) {
139 |  			Short:  p.String(),
140 |  			Cover:  len(inp.Cover),
141 |  			Signal: inp.Signal.Deserialize(),
142 | +			SpeRes: inp.SpeRes,
143 |  		})
144 |  	}
145 |  	sort.Slice(data.Inputs, func(i, j int) bool {
146 | @@ -607,6 +608,7 @@ type UIInput struct {
147 |  	Short  string
148 |  	Cover  int
149 |  	Signal signal.Signal
150 | +	SpeRes bool
151 |  }
152 |  
153 |  var summaryTemplate = html.CreatePage(`
154 | @@ -752,11 +754,13 @@ var corpusTemplate = html.CreatePage(`
155 |  	<tr>
156 |  		<th>Coverage</th>
157 |  		<th>Program</th>
158 | +		<th>Resource</th>
159 |  	</tr>
160 |  	{{range $inp := $.Inputs}}
161 |  	<tr>
162 |  		<td><a href='/cover?input={{$inp.Sig}}'>{{$inp.Cover}}</a></td>
163 |  		<td><a href="/input?sig={{$inp.Sig}}">{{$inp.Short}}</a></td>
164 | +                <td><a href="/input?sig={{$inp.Sig}}">{{$inp.SpeRes}}</a></td>
165 |  	</tr>
166 |  	{{end}}
167 |  </table>
168 | -- 
169 | 2.20.1
170 | 
171 | 


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/syz_patch/kstate/0005-Add-monitor-binary-and-function-pcs-options-to-manag.patch:
--------------------------------------------------------------------------------
 1 | From 5fbf7df6dae52f1ef8375e7f8d2118828e07e03f Mon Sep 17 00:00:00 2001
 2 | From: Bins94 <kaipeng94@gmail.com>
 3 | Date: Thu, 2 Apr 2020 23:06:27 -0400
 4 | Subject: [PATCH 5/5] Add monitor binary and function pcs options to manager
 5 |  configure options
 6 | 
 7 | ---
 8 |  executor/bitmap.h       |  2 +-
 9 |  executor/common_linux.h |  2 +-
10 |  pkg/mgrconfig/config.go |  2 ++
11 |  syz-manager/manager.go  | 11 +++++++++++
12 |  4 files changed, 15 insertions(+), 2 deletions(-)
13 | 
14 | diff --git a/executor/bitmap.h b/executor/bitmap.h
15 | index cb00e543..785513ba 100644
16 | --- a/executor/bitmap.h
17 | +++ b/executor/bitmap.h
18 | @@ -8,7 +8,7 @@ uint32* func_pcs;
19 |  
20 |  uint32 readPcs()
21 |  {
22 | -	FILE* f = fopen("/root/funcaddr.map", "r");
23 | +	FILE* f = fopen("/funcaddr.map", "r");
24 |  	uint32 count = 0;
25 |  	if (f == NULL)
26 |  		return -1;
27 | diff --git a/executor/common_linux.h b/executor/common_linux.h
28 | index b64f413c..4046ae8b 100644
29 | --- a/executor/common_linux.h
30 | +++ b/executor/common_linux.h
31 | @@ -2707,7 +2707,7 @@ static int do_sandbox_none(void)
32 |  		close(monpipefd[0]);
33 |  		close(monpipefd[1]);
34 |  		debug("single ebpf start ...\n");
35 | -		execl("/root/pipe_monitor", "/root/pipe_monitor", "--debug", NULL);
36 | +		execl("/pipe_monitor", "/pipe_monitor", "--debug", NULL);
37 |  		return 0;
38 |  	}
39 |  	/* ebpf loading is very slow, one time a vm restart */
40 | diff --git a/pkg/mgrconfig/config.go b/pkg/mgrconfig/config.go
41 | index ac618096..e32a5cde 100644
42 | --- a/pkg/mgrconfig/config.go
43 | +++ b/pkg/mgrconfig/config.go
44 | @@ -119,4 +119,6 @@ type Config struct {
45 |  	SyzFuzzerBin   string `json:"-"`
46 |  	SyzExecprogBin string `json:"-"`
47 |  	SyzExecutorBin string `json:"-"`
48 | +	SyzMonitorBin  string `json:"ebpfmonitor"`
49 | +	SyzCovPcs      string `json:"coverpcs"`
50 |  }
51 | diff --git a/syz-manager/manager.go b/syz-manager/manager.go
52 | index 77379468..f6d88e3f 100644
53 | --- a/syz-manager/manager.go
54 | +++ b/syz-manager/manager.go
55 | @@ -556,6 +556,17 @@ func (mgr *Manager) runInstance(index int) (*Crash, error) {
56 |  		}
57 |  	}
58 |  
59 | +	/* scp ebpf monitor binary to machine */
60 | +	_, err = inst.Copy(mgr.cfg.SyzMonitorBin)
61 | +	if err != nil {
62 | +		return nil, fmt.Errorf("failed to copy binary: %v", err)
63 | +	}
64 | +	/* scp coverage filter pcs to machine */
65 | +	_, err = inst.Copy(mgr.cfg.SyzCovPcs)
66 | +	if err != nil {
67 | +		return nil, fmt.Errorf("failed to copy binary: %v", err)
68 | +	}
69 | +
70 |  	fuzzerV := 0
71 |  	procs := mgr.cfg.Procs
72 |  	if *flagDebug {
73 | -- 
74 | 2.20.1
75 | 
76 | 


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/tcp-ipv6/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "target": "linux/amd64",
 3 |     "http": "",
 4 |     "workdir": "",
 5 |     "kernel_obj": "",
 6 |     "image": "syzkalls.img",
 7 |     "sshkey": "/root/.ssh/id_rsa",
 8 |     "syzkaller": "/root/syzkalls/",
 9 |     "ebpfmonitor": "$PATH_TO_YOUR_EBPF_MONITOR",
10 |     "sandbox": "none",
11 |     "ebpfsig": true,
12 |     "covfilter": true,
13 |     "name": "debian-4.17-ipv4_tcp",
14 |     "hub_client":"debian-4.17-ipv4_tcp",
15 |     "hub_addr":"",
16 |     "hub_key":"",
17 |     "hub_synctime": 5,
18 |     "suppressions": ["some known bug"],
19 |     "enable_syscalls": ["ioctl$sock_inet6_tcp_SIOCATMARK", "ioctl$sock_inet_tcp_SIOCOUTQ", "ioctl$sock_inet6_tcp_SIOCOUTQ", "ioctl$sock_inet_tcp_SIOCOUTQNSD", "ioctl$sock_inet6_tcp_SIOCOUTQNSD", "setsockopt$inet6_tcp_TLS_RX", "setsockopt$inet6_tcp_TLS_TX", "setsockopt$inet6_tcp_TCP_FASTOPEN_KEY", "getsockopt$inet6_tcp_TCP_REPAIR_WINDOW", "setsockopt$inet6_tcp_TCP_REPAIR_WINDOW", "setsockopt$inet6_tcp_TCP_REPAIR_OPTIONS", "setsockopt$inet6_tcp_TCP_QUEUE_SEQ", "setsockopt$inet6_tcp_TCP_REPAIR_QUEUE", "setsockopt$inet6_tcp_TCP_REPAIR", "setsockopt$inet6_tcp_TCP_MD5SIG", "setsockopt$inet6_tcp_TCP_ULP", "setsockopt$inet6_tcp_TCP_CONGESTION", "getsockopt$inet6_tcp_TCP_ZEROCOPY_RECEIVE", "getsockopt$inet6_tcp_buf", "setsockopt$inet6_tcp_buf", "getsockopt$inet6_tcp_int", "setsockopt$inet6_tcp_int", "socket$inet6_tcp", "accept$inet6", "accept4$inet6", "bind$inet6", "connect$inet6", "sendmsg$inet6_tcp","sendto$inet6_tcp","sendmmsg$inet6_tcp", "recvfrom$inet6", "listen", "shutdown", "close", "syz_emit_ethernet", "syz_extract_tcp_res$synack", "syz_extract_tcp_res", "setsockopt$sock_int", "setsockopt$sock_linger", "setsockopt$sock_void", "setsockopt$SO_TIMESTAMP", "setsockopt$SO_TIMESTAMPING", "setsockopt$SO_BINDTODEVICE", "setsockopt$SO_ATTACH_FILTER"],
20 |     "procs": 1,
21 |     "type": "qemu",
22 |     "vm": {
23 | 	"count": 1,
24 | 	"cpu": 6,
25 | 	"mem": 18384,
26 | 	"kernel": "boot/bzImage",
27 | 	"initrd": "initrd.img",
28 | 	"cmdline": "kmemleak=on reboot=warm net.ifnames=tap0 console=ttyS0 vsyscall=native rodata=n oops=panic panic_on_warn=1 panic=0 ftrace_dump_on_oops=orig_cpu earlyprintk=serial slub_debug=UZ root=/dev/sda1",
29 | 	"qemu_args": "-enable-kvm"
30 | 	
31 | 


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/tcp-ipv6/data.tar.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/kstat_demo/tcp-ipv6/data.tar.bz2


--------------------------------------------------------------------------------
/syzkaller/kstat_demo/tcp-ipv6/test.md:
--------------------------------------------------------------------------------
 1 | # A sample of tcp-ipv6 fuzz
 2 | 
 3 | ## Feature of customize syzkaller
 4 | 1. Socket state( historical state) feedback.
 5 | 2. Coverage filter
 6 | 
 7 | I implement runtime state( sk->sk_state, tp->repair ...) feedback by using ebpf. ebpf collect a 64-bit state send to executor. The feedback looks like original syzkaller coverage signal. Send these signals to fuzzer. Coverage filter implement in executor. You can get kernel function address region use the [fun2addr](https://github.com/hardenedlinux/harbian-qa/blob/master/syz_patch/fun2addr.go).
 8 | 
 9 | ## Usage  
10 | Command for build ebpf monitor:
11 | ```  
12 | cd harbian-qa/syzkaller/kstat_demo
13 | mv kstat_demo/ebpf_sample/$YOUR_EBPF.go kstat_demo/ebpf/ebpftext.go
14 | go build pipe_monitor
15 | ```  
16 | Command for patching syzkaller:
17 | ```  
18 | cd /path/to/your/syzkaller/source
19 | git checkout bc2c6e45
20 | git apply /path/to/harbian-qa/syzkaller/kstat_demo/$PATCH_YOU_NEED.patch
21 | ```
22 | After patching syzkaller, to filter coverage, address in executor/cov_filter.h should fit to you kernel. Use fun2addr as:
23 | ```  
24 | bin/syz-func2addr -v PATH_to_YOUR_VMLINUX -f FUNC_NAME -s
25 | ```
26 | Get address of all functions you want to test. And write them to cov_filter.h. Then run make as original syzkaller to build it.
27 | 
28 | ## Testcase
29 | I run six times both original and customize syzkaller. Two hours per time. The enable syscalls is extract from socket_inet6.txt and socket_inet_tcp.txt using this [tool](https://github.com/hardenedlinux/harbian-qa/blob/master/syz_patch/extract_syscall_names_from_prog.py). There are also some syscalls for ipv4_tcp have to be removed by hand.
30 | This is some coverage( customize vs. original in the table) of functions which monitored by my ebpf:  
31 | 
32 | |kern_func | 1 | 2 | 3 | 4 | 5 | 6 |  
33 | | -------- | - | - | - | - | - | - |  
34 | | tcp_v6_connect | 44/45 | 44/44 | 45/45 | 45/46 | 46/44 | 45/45 |  
35 | | tcp_sendmsg_locked | 73/71 | 19/18 | 77/48 | 73/20 | 73/17 | 72/20 |  
36 | | tcp_recvmsg | 54/33 | 35/33 | 35/33 | 54/36 | 36/33 | 48/36 |  
37 | | tcp_setsockopt | 83/80 | 80/81 | 84/79 | 84/82 | 82/81 | 84/83 |  
38 | | tcp_getsockopt | 61/59 | 57/59 | 56/57 | 61/60 | 58/58 | 60/58 |  
39 | | inet_accept | 2/2 | 2/2 | 2/2 | 2/2 | 2/2 | 2/2 |  
40 | | tcp_ioctl | 9/9 | 9/9 | 9/9 | 9/9 | 9/9 | 9/9 |
41 | 
42 | Other example, I run six times both original and customize syzkaller. 2.5 hours per time. Number in brackets means: how many time syzkaller cover this line in 6-time-run, customize vs. original. It can be see that these lines can be easily covered in customize syzkaller:  
43 | #### tp->repair/tp->repair_queue  
44 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L1233 (5:0)  
45 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L2687 (6:0)  
46 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L2689 (5:0)  
47 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L3106 (5:2)  
48 | 
49 | #### sk->sk_state  
50 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L1259 (6:0)  
51 | https://elixir.bootlin.com/linux/v4.17/source/net/ipv4/tcp.c#L2137 (6:2)  
52 | Why can customize syzkaller cover these line more efficiently? Refer to [this](../README.md).
53 | 
54 | ## Concludsion
55 | 1. Greater coverage than original syzkaller especially in function tcp_sendmsg. It is because historical state and nested condition can be covered easily. We can see in the second example.
56 | 
57 | 2. The tcp_setsockopt coverage of customize syzkaller is only a little more then original syzkaller's because of powerful syscalls script. Most of uncovered code is similar in original syzkaller. Is it because of powerful syscalls script and mutation is not enough?
58 | 
59 | ## RawData
60 | * [Data](data.tar.bz2) get from syzkaller web. It's not macth to the table.
61 | * The test will keep the same enable syscalls, run time, vm evironment... as more as possible.
62 | * Collect different data as feedback( ebpftext) get a great different result.


--------------------------------------------------------------------------------
/syzkaller/kstate_resource.md:
--------------------------------------------------------------------------------
 1 | # Kernel state based fuzzer: a LLVM approach
 2 | 
 3 | ## Content
 4 | 
 5 | 1. Usage.
 6 | 2. Implement detail.
 7 | 3. Practice
 8 | 
 9 | To implement collect kernel states as syzkaller resource, we have to follow the next steps:
10 | 
11 | 1. Build kernel with GEPOperator tracker instrument.
12 | 2. Support collecting kernel state in syzkaller.
13 | 3. Weighted kernel states for fuzzer.
14 | 
15 | ## Usage
16 | 
17 | ### Kernel instrument
18 | 
19 | First, we need to implement a [LLVM pass](../static_analysis_tools/kern_instrument/AssignTrackerPass)  to do instrument. While we already knew, lots of states of kernel are located in some field of structure. Tracking the store operation of a variable of GEPointer can detect states which may help to fuzzer. Then, refer to [this document](https://llvm.org/docs/WritingAnLLVMPass.html) to build you compiler with field assignment tracker. While building kernel, you have to add line such like:
20 | ```  
21 | CFLAGS_*.o = -Xclang -load -Xclang PATH_TO_YOUR_PASS.so -fno-discard-value-names
22 | ```  
23 | to Makefile for the object file you need to instrument it. The kernel state id is the crc of file name, structure name and field name.
24 | If you want track the whole kernel, try to add the mentioned CFLAGS to kcov-flag-$(CONFIG_CC_HAS_SANCOV_TRACE_PC).
25 | 
26 | ### Implement the instrument function in kernel
27 | 
28 | Refer to our [implement](../static_analysis_tools/kern_instrument/kern_patch) of instrument to collect kernel state. Then, build your kernel as usual.
29 | 
30 | ### Patch syzkaller
31 | 
32 | Clone syzkaller, run:
33 | ```  
34 | cd PATH_TO_SYZ_SRC
35 | git checkout ff4a334
36 | git apply PATH_TO_harbian-qa/syz_patch/*.patch
37 | ```   
38 | 
39 | build syzakller as usual. Add the following line to configure file:
40 | 
41 | ```  
42 | "kstate_filter": {
43 |         "files": ["which_source_file_to_track", "base_on_filename_crc"],
44 |         "states": ["which_struct_field_to_track", "base_on_struct_field_crc"]
45 |     },
46 | ```  
47 | 
48 | You can use our tool [kstate_map](../static_analysis_tools/IRParser/kstate_map.cpp) get the kernel state map. run:
49 | 
50 | ```  
51 | clang++-10 kstate_map.cpp -o kstate_map -O0  -g -fsanitize=address `llvm-config-10 --cxxflags --libs --ldflags --system-libs`
52 | ./kstate_map LLVM_IR_DIR ASM_DIR VMLINUX FUNCTION_LIST LOG_DIR
53 | ```  
54 | 
55 | FUNCTION_LIST has the functions name we need to get their addresses.
56 | IR_DIR: directory all the LLVM ir code we need.
57 | LOG_DIR: after run the command, kstate_map will creat a "*.json" and a "*.state.map" for every function.
58 | Write the output to PATH_TO_KERNEL_STATE.map. And run patched syzkaller as usual. This map assigns weight base on the frequency of state using. 
59 | 
60 | ## Kernel state base fuzzer
61 | Now, you can run syzkaller as usual, and you can find there is a list of kernel states if you access a "\input" interface. You can also get states weight of every prog in "/corpus" interface.
62 | 
63 | ## Implement detail of kernel state resource
64 | 
65 | ### Kernel instrument
66 | 
67 | We reuse the KCOV interface instead of using a separate mode. So, we encode the state id with 0xefe at the highest 12-bit. While syzkaller gets a kcov pc started with 0xefe, it realizes this pc is a kstate id and the value and address of the state will occupy the followed 2*64-bit. No matter how many bit the variable is, we formalize to 64-bit. Noted if you want to collect other information, you have to implement a corresponding syzkaller for it.
68 | 
69 | ### Syzkaller support
70 | 
71 | #### executor
72 | 
73 | syz-executor have to pick out kernel states and send them out after all signal was sent. These handling can be found in our patch for executor.cc function write_coverage_signl. While executor read a pc started with 0xfefe, that means it receives a kernel state. And we use a chunk of shared memory for this state after coverage signal shared memory. syz-fuzzer will handle them later.
74 | 
75 | #### syz-fuzzer
76 | 
77 | Correspondingly, parseOutput in pkg/ipc.go is called by fuzzer and we add a readKernState for parse the executor output. And these kernel states information will be put into a structure called KernState in pkg/kstate/kstate.go. Every input from executor has an array for kernstate, and every prog has a state weight calculated from kernstates. Also, KernState support searching the map by its ID or ID^Value which called their hash.
78 | 
79 | syz-fuzzer/fuzzer.go: calStateWeight will calculate the resouce weight of a prog. Minus count for eliminating the influence of the length of kstate. prog/rand.go: chooseReaProgramIdx function implement a prior choice of prog base on its states weight
80 | 
81 | ## Kernel state guide fuzzing practice
82 | 
83 | We have explored two ways in assigning weight to resources.
84 | 
85 | #### Get frequency of using kernel state
86 | 
87 | This tool is what we mentioned above kstate_map. We use LLVM api static analyze the using of states in target functions. Without any awareness of the value of a state, it just encourages fuzzer to preferentially choose and extract those progs that frequently rewrite important states. In other words, the prog has complex states.
88 | 
89 | #### Specify kernel state value weight
90 | 
91 | We use a [clang checker](../static_analysis_tools/ConditionChecker/) to get symbolic information of condition constraint:
92 | 
93 | ```  
94 | clang -Xclang -analyze -Xclang -analyzer-checker=debug.ConditionChecker  ...... -c -o *.o *.c
95 | ```  
96 | 
97 | You can get some constraint value of variables. And patched syzkaller support a hash mode, if a ID^value can be found in the kstate map, use it as a unique state. So, you can specify a weight for a state with special value. Now, it can be specified in kstatemap manually only.
98 | 


--------------------------------------------------------------------------------
/syzkaller/multi_policy/0002-Add-coverage-filter.patch:
--------------------------------------------------------------------------------
 1 | From aea85a5230e05eb01657437228d1f9dab5e8061d Mon Sep 17 00:00:00 2001
 2 | From: Bins94 <kaipeng94@gmail.com>
 3 | Date: Sun, 24 Nov 2019 21:41:44 -0500
 4 | Subject: [PATCH 2/6] Add coverage filter
 5 | 
 6 | ---
 7 |  executor/cov_filter.h | 40 ++++++++++++++++++++++++++++++++++++++++
 8 |  executor/executor.cc  | 12 ++++++++++++
 9 |  2 files changed, 52 insertions(+)
10 |  create mode 100644 executor/cov_filter.h
11 | 
12 | diff --git a/executor/cov_filter.h b/executor/cov_filter.h
13 | new file mode 100644
14 | index 00000000..726db4b3
15 | --- /dev/null
16 | +++ b/executor/cov_filter.h
17 | @@ -0,0 +1,40 @@
18 | +typedef unsigned int uint32;
19 | +
20 | +struct code_region {
21 | +        uint32 start;
22 | +        uint32 end;
23 | +};
24 | +
25 | +/* Address of kernel function for filtering coverage signal */
26 | +static struct code_region white_list[] = {
27 | +        // do_mmap
28 | +        {.start = 0x81757755, .end = 0x81758690},
29 | +        //tcp_v6_init_sock
30 | +        {.start = 0x8294f438, .end = 0x8294f438},
31 | +        //tcp_v6_connect
32 | +        {.start = 0x82954272, .end = 0x829608c3},
33 | +	//tcp_sendmsg_locked
34 | +	{.start = 0x8269ced5, .end = 0x826a06b4},
35 | +	//tcp_recvmsg
36 | +	{.start = 0x826912ae, .end = 0x826941c1},
37 | +	//tcp_close
38 | +	{.start = 0x826a1053, .end = 0x826a1e4f},
39 | +	//tcp_shutdown
40 | +	{.start = 0x8268d7d0, .end = 0x8268d8ad},
41 | +	//do_tcp_setsockopt
42 | +	{.start = 0x82697629, .end = 0x82699833},
43 | +	//do_tcp_getsockopt
44 | +	{.start = 0x8268da50, .end = 0x82690af4},
45 | +	//inet_accept
46 | +	{.start = 0x8277b5f0, .end = 0x8277bb82},
47 | +	//inet_listen
48 | +	{.start = 0x8277ccde, .end = 0x8277d293},
49 | +	//tcp_ioctl
50 | +	{.start = 0x82699e2a, .end = 0x8269a444},
51 | +	//__inet6_bind
52 | +	{.start = 0x82869017, .end = 0x8286a8be},
53 | +	//inet6_getname
54 | +	{.start = 0x828658df, .end = 0x82865e9d},
55 | +	//inet6_ioctl
56 | +	{.start = 0x82866292, .end = 0x82866461},
57 | +};
58 | diff --git a/executor/executor.cc b/executor/executor.cc
59 | index 21373a4c..c9cc34a4 100644
60 | --- a/executor/executor.cc
61 | +++ b/executor/executor.cc
62 | @@ -16,6 +16,7 @@
63 |  #include <unistd.h>
64 |  
65 |  #include "defs.h"
66 | +#include "cov_filter.h"
67 |  
68 |  #if defined(__GNUC__)
69 |  #define SYSCALLAPI
70 | @@ -870,6 +871,15 @@ thread_t* schedule_call(int call_index, int call_num, bool colliding, uint64 cop
71 |  }
72 |  
73 |  #if SYZ_EXECUTOR_USES_SHMEM
74 | +bool cover_filter(uint32 pc)
75 | +{
76 | +        for (uint32 i = 0; i < (sizeof(white_list) / sizeof(uint64)); i++) {
77 | +                if ((pc >= white_list[i].start) && (pc <= white_list[i].end))
78 | +                        return true;
79 | +	}
80 | +	return false;
81 | +}
82 | +
83 |  template <typename cover_data_t>
84 |  void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover_count_pos, int monpipe)
85 |  {
86 | @@ -911,6 +921,8 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover
87 |  		prev = hash(pc);
88 |  		if (dedup(sig))
89 |  			continue;
90 | +		if (!cover_filter(pc))
91 | +		        continue;
92 |  		sig |= 0xd000000000000000;
93 |  		write_output64(sig & COVERAGE_SIG_MASK);
94 |  		nsig++;
95 | -- 
96 | 2.20.1
97 | 
98 | 


--------------------------------------------------------------------------------
/syzkaller/multi_policy/0003-Add-manager-configure-for-coverage-filter-and-ebpf-f.patch:
--------------------------------------------------------------------------------
  1 | From 6f2595c08f2bc164fb18ab33ab5d3ce5c2ca8d1c Mon Sep 17 00:00:00 2001
  2 | From: Bins94 <kaipeng94@gmail.com>
  3 | Date: Sun, 24 Nov 2019 21:57:42 -0500
  4 | Subject: [PATCH 3/6] Add manager configure for coverage filter and ebpf
  5 |  feedback
  6 | 
  7 | ---
  8 |  executor/executor.cc         | 16 +++++++++++-----
  9 |  pkg/instance/instance.go     | 12 ++++++------
 10 |  pkg/ipc/ipc.go               |  2 ++
 11 |  pkg/mgrconfig/config.go      |  6 ++++++
 12 |  pkg/mgrconfig/load.go        |  2 ++
 13 |  syz-fuzzer/fuzzer.go         |  8 ++++++++
 14 |  syz-manager/manager.go       |  2 +-
 15 |  tools/syz-runtest/runtest.go |  2 +-
 16 |  8 files changed, 37 insertions(+), 13 deletions(-)
 17 | 
 18 | diff --git a/executor/executor.cc b/executor/executor.cc
 19 | index c9cc34a4..28f0bdb1 100644
 20 | --- a/executor/executor.cc
 21 | +++ b/executor/executor.cc
 22 | @@ -125,6 +125,8 @@ static bool flag_enable_net_dev;
 23 |  static bool flag_enable_net_reset;
 24 |  static bool flag_enable_cgroups;
 25 |  static bool flag_enable_close_fds;
 26 | +static bool flag_enable_cover_filter;
 27 | +static bool flag_enable_ebpf_signal;
 28 |  static bool flag_enable_devlink_pci;
 29 |  
 30 |  static bool flag_collect_cover;
 31 | @@ -491,6 +493,8 @@ void parse_env_flags(uint64 flags)
 32 |  	flag_enable_cgroups = flags & (1 << 9);
 33 |  	flag_enable_close_fds = flags & (1 << 10);
 34 |  	flag_enable_devlink_pci = flags & (1 << 11);
 35 | +	flag_enable_cover_filter = flags & (1 << 15);
 36 | +	flag_enable_ebpf_signal = flags & (1 << 16);
 37 |  }
 38 |  
 39 |  #if SYZ_EXECUTOR_USES_FORK_SERVER
 40 | @@ -904,10 +908,12 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover
 41 |  			/* state signal start with 0xf
 42 |                          * Write out real state for fuzzer's further handle
 43 |                          */
 44 | -			write_output64(state);
 45 | -			debug("A state signal %016lx\n", state);
 46 | -			n = 0;
 47 | -			nsig++;
 48 | +			if (flag_enable_ebpf_signal) {
 49 | +			        write_output64(state);
 50 | +				debug("A state signal %016lx\n", state);
 51 | +				n = 0;
 52 | +				nsig++;
 53 | +			}
 54 |  		}
 55 |  		n++;
 56 |  	}
 57 | @@ -921,7 +927,7 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover
 58 |  		prev = hash(pc);
 59 |  		if (dedup(sig))
 60 |  			continue;
 61 | -		if (!cover_filter(pc))
 62 | +		if (flag_enable_cover_filter && !cover_filter(pc))
 63 |  		        continue;
 64 |  		sig |= 0xd000000000000000;
 65 |  		write_output64(sig & COVERAGE_SIG_MASK);
 66 | diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go
 67 | index b3e24aac..d73e7174 100644
 68 | --- a/pkg/instance/instance.go
 69 | +++ b/pkg/instance/instance.go
 70 | @@ -299,7 +299,7 @@ func (inst *inst) testInstance() error {
 71 |  	}
 72 |  
 73 |  	cmd := OldFuzzerCmd(fuzzerBin, executorBin, "test", inst.cfg.TargetOS, inst.cfg.TargetArch, fwdAddr,
 74 | -		inst.cfg.Sandbox, 0, inst.cfg.Cover, true)
 75 | +		inst.cfg.Sandbox, 0, inst.cfg.Cover, true, false, false)
 76 |  	outc, errc, err := inst.vm.Run(10*time.Minute, nil, cmd)
 77 |  	if err != nil {
 78 |  		return fmt.Errorf("failed to run binary in VM: %v", err)
 79 | @@ -398,7 +398,7 @@ func (inst *inst) testProgram(command string, testTime time.Duration) error {
 80 |  }
 81 |  
 82 |  func FuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox string, procs, verbosity int,
 83 | -	cover, debug, test, runtest bool) string {
 84 | +	cover, debug, test, runtest bool, covfilter bool, ebpfsig bool) string {
 85 |  	osArg := ""
 86 |  	switch OS {
 87 |  	case "akaros", "fuchsia":
 88 | @@ -416,13 +416,13 @@ func FuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox string, procs,
 89 |  		verbosityArg = fmt.Sprintf(" -vv=%v", verbosity)
 90 |  	}
 91 |  	return fmt.Sprintf("%v -executor=%v -name=%v -arch=%v%v -manager=%v -sandbox=%v"+
 92 | -		" -procs=%v -cover=%v -debug=%v -test=%v%v%v",
 93 | +		" -procs=%v -cover=%v -debug=%v -test=%v%v%v -covfilter=%v -ebpfsig=%v",
 94 |  		fuzzer, executor, name, arch, osArg, fwdAddr, sandbox,
 95 | -		procs, cover, debug, test, runtestArg, verbosityArg)
 96 | +		procs, cover, debug, test, runtestArg, verbosityArg, covfilter, ebpfsig)
 97 |  }
 98 |  
 99 | -func OldFuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox string, procs int, cover, test bool) string {
100 | -	return FuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox, procs, 0, cover, false, test, false)
101 | +func OldFuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox string, procs int, cover, test bool, covfilter bool, ebpfsig bool) string {
102 | +	return FuzzerCmd(fuzzer, executor, name, OS, arch, fwdAddr, sandbox, procs, 0, cover, false, test, false, covfilter, ebpfsig)
103 |  }
104 |  
105 |  func ExecprogCmd(execprog, executor, OS, arch, sandbox string, repeat, threaded, collide bool,
106 | diff --git a/pkg/ipc/ipc.go b/pkg/ipc/ipc.go
107 | index c8dadaa7..2a05696c 100644
108 | --- a/pkg/ipc/ipc.go
109 | +++ b/pkg/ipc/ipc.go
110 | @@ -41,6 +41,8 @@ const (
111 |  	// Executor does not know about these:
112 |  	FlagUseShmem      // use shared memory instead of pipes for communication
113 |  	FlagUseForkServer // use extended protocol with handshake
114 | +	FlagCoverFilter
115 | +	FlagEbpfSignal
116 |  )
117 |  
118 |  // Per-exec flags for ExecOpts.Flags:
119 | diff --git a/pkg/mgrconfig/config.go b/pkg/mgrconfig/config.go
120 | index ea03c429..70604d1e 100644
121 | --- a/pkg/mgrconfig/config.go
122 | +++ b/pkg/mgrconfig/config.go
123 | @@ -68,7 +68,13 @@ type Config struct {
124 |  
125 |  	// Use KCOV coverage (default: true).
126 |  	Cover bool `json:"cover"`
127 | +
128 | +	/* Use coverage filter */
129 | +	Covfilter bool `json:"covfilter"`
130 | +	/* Use ebpf feedback */
131 | +	Ebpfsig bool `json:"ebpfsig"`
132 |  	// Reproduce, localize and minimize crashers (default: true).
133 | +
134 |  	Reproduce bool `json:"reproduce"`
135 |  
136 |  	// List of syscalls to test (optional). For example:
137 | diff --git a/pkg/mgrconfig/load.go b/pkg/mgrconfig/load.go
138 | index f02f3c59..c11ee7f7 100644
139 | --- a/pkg/mgrconfig/load.go
140 | +++ b/pkg/mgrconfig/load.go
141 | @@ -58,6 +58,8 @@ func defaultValues() *Config {
142 |  	return &Config{
143 |  		SSHUser:   "root",
144 |  		Cover:     true,
145 | +		Ebpfsig:   false,
146 | +		Covfilter: false,
147 |  		Reproduce: true,
148 |  		Sandbox:   "none",
149 |  		RPC:       ":0",
150 | diff --git a/syz-fuzzer/fuzzer.go b/syz-fuzzer/fuzzer.go
151 | index 5cc7df89..57ce2107 100644
152 | --- a/syz-fuzzer/fuzzer.go
153 | +++ b/syz-fuzzer/fuzzer.go
154 | @@ -115,6 +115,8 @@ func main() {
155 |  		flagPprof   = flag.String("pprof", "", "address to serve pprof profiles")
156 |  		flagTest    = flag.Bool("test", false, "enable image testing mode")      // used by syz-ci
157 |  		flagRunTest = flag.Bool("runtest", false, "enable program testing mode") // used by pkg/runtest
158 | +		flagCovFilter = flag.Bool("covfilter", false, "enable coverage filter")
159 | +		flagEbpfSig   = flag.Bool("ebpfsig", false, "enable ebpf feedback")
160 |  	)
161 |  	flag.Parse()
162 |  	outputType := parseOutputType(*flagOutput)
163 | @@ -130,6 +132,12 @@ func main() {
164 |  		log.Fatalf("failed to create default ipc config: %v", err)
165 |  	}
166 |  	sandbox := ipc.FlagsToSandbox(config.Flags)
167 | +	if *flagCovFilter {
168 | +		config.Flags |= (1 << 15)
169 | +	}
170 | +	if *flagEbpfSig {
171 | +		config.Flags |= (1 << 16)
172 | +	}
173 |  	shutdown := make(chan struct{})
174 |  	osutil.HandleInterrupts(shutdown)
175 |  	go func() {
176 | diff --git a/syz-manager/manager.go b/syz-manager/manager.go
177 | index 0aaee782..2af16686 100644
178 | --- a/syz-manager/manager.go
179 | +++ b/syz-manager/manager.go
180 | @@ -548,7 +548,7 @@ func (mgr *Manager) runInstance(index int) (*Crash, error) {
181 |  	defer atomic.AddUint32(&mgr.numFuzzing, ^uint32(0))
182 |  	cmd := instance.FuzzerCmd(fuzzerBin, executorBin, fmt.Sprintf("vm-%v", index),
183 |  		mgr.cfg.TargetOS, mgr.cfg.TargetArch, fwdAddr, mgr.cfg.Sandbox, procs, fuzzerV,
184 | -		mgr.cfg.Cover, *flagDebug, false, false)
185 | +		mgr.cfg.Cover, *flagDebug, false, false, mgr.cfg.Covfilter, mgr.cfg.Ebpfsig)
186 |  	outc, errc, err := inst.Run(time.Hour, mgr.vmStop, cmd)
187 |  	if err != nil {
188 |  		return nil, fmt.Errorf("failed to run fuzzer: %v", err)
189 | diff --git a/tools/syz-runtest/runtest.go b/tools/syz-runtest/runtest.go
190 | index 538646a2..167ac7d8 100644
191 | --- a/tools/syz-runtest/runtest.go
192 | +++ b/tools/syz-runtest/runtest.go
193 | @@ -175,7 +175,7 @@ func (mgr *Manager) boot(name string, index int) (*report.Report, error) {
194 |  	}
195 |  	cmd := instance.FuzzerCmd(fuzzerBin, executorBin, name,
196 |  		mgr.cfg.TargetOS, mgr.cfg.TargetArch, fwdAddr, mgr.cfg.Sandbox, mgr.cfg.Procs, 0,
197 | -		mgr.cfg.Cover, mgr.debug, false, true)
198 | +		mgr.cfg.Cover, mgr.debug, false, true, false, false)
199 |  	outc, errc, err := inst.Run(time.Hour, mgr.vmStop, cmd)
200 |  	if err != nil {
201 |  		return nil, fmt.Errorf("failed to run fuzzer: %v", err)
202 | -- 
203 | 2.20.1
204 | 
205 | 


--------------------------------------------------------------------------------
/syzkaller/multi_policy/0004-Make-the-download-sync-time-configurable.patch:
--------------------------------------------------------------------------------
  1 | From acd4230012fe6b950ae8eeeeb2f5e100ff0018dc Mon Sep 17 00:00:00 2001
  2 | From: Bins94 <kaipeng94@gmail.com>
  3 | Date: Sun, 24 Nov 2019 22:06:08 -0500
  4 | Subject: [PATCH 4/6] Make the download sync time configurable
  5 | 
  6 | ---
  7 |  pkg/mgrconfig/config.go |  8 +++++---
  8 |  pkg/mgrconfig/load.go   | 17 +++++++++--------
  9 |  pkg/rpctype/rpctype.go  |  1 +
 10 |  syz-hub/hub.go          | 27 ++++++++++++++++++++-------
 11 |  syz-hub/state/state.go  | 21 +++++++++++++++++++++
 12 |  syz-manager/html.go     |  1 +
 13 |  syz-manager/hub.go      | 21 ++++++++++++++-------
 14 |  syz-manager/manager.go  |  7 +++++++
 15 |  8 files changed, 78 insertions(+), 25 deletions(-)
 16 | 
 17 | diff --git a/pkg/mgrconfig/config.go b/pkg/mgrconfig/config.go
 18 | index 70604d1e..0316d835 100644
 19 | --- a/pkg/mgrconfig/config.go
 20 | +++ b/pkg/mgrconfig/config.go
 21 | @@ -36,9 +36,11 @@ type Config struct {
 22 |  	// SSH user ("root" by default).
 23 |  	SSHUser string `json:"ssh_user,omitempty"`
 24 |  
 25 | -	HubClient string `json:"hub_client,omitempty"`
 26 | -	HubAddr   string `json:"hub_addr,omitempty"`
 27 | -	HubKey    string `json:"hub_key,omitempty"`
 28 | +	HubClient   string `json:"hub_client,omitempty"`
 29 | +	HubAddr     string `json:"hub_addr,omitempty"`
 30 | +	HubKey      string `json:"hub_key,omitempty"`
 31 | +	HubSyncTime int    `json:"hub_synctime"`
 32 | +
 33 |  
 34 |  	// List of email addresses to receive notifications when bugs are encountered for the first time (optional).
 35 |  	// Mailx is the only supported mailer. Please set it up prior to using this function.
 36 | diff --git a/pkg/mgrconfig/load.go b/pkg/mgrconfig/load.go
 37 | index c11ee7f7..f71e740d 100644
 38 | --- a/pkg/mgrconfig/load.go
 39 | +++ b/pkg/mgrconfig/load.go
 40 | @@ -56,14 +56,15 @@ func LoadPartialFile(filename string) (*Config, error) {
 41 |  
 42 |  func defaultValues() *Config {
 43 |  	return &Config{
 44 | -		SSHUser:   "root",
 45 | -		Cover:     true,
 46 | -		Ebpfsig:   false,
 47 | -		Covfilter: false,
 48 | -		Reproduce: true,
 49 | -		Sandbox:   "none",
 50 | -		RPC:       ":0",
 51 | -		Procs:     1,
 52 | +		SSHUser:     "root",
 53 | +		Cover:       true,
 54 | +		Ebpfsig:     false,
 55 | +		Covfilter:   false,
 56 | +		Reproduce:   true,
 57 | +		Sandbox:     "none",
 58 | +		RPC:         ":0",
 59 | +		HubSyncTime: 1,
 60 | +		Procs:       1,
 61 |  	}
 62 |  }
 63 |  
 64 | diff --git a/pkg/rpctype/rpctype.go b/pkg/rpctype/rpctype.go
 65 | index fcc01a99..dc8c6892 100644
 66 | --- a/pkg/rpctype/rpctype.go
 67 | +++ b/pkg/rpctype/rpctype.go
 68 | @@ -96,6 +96,7 @@ type HubSyncArgs struct {
 69 |  	Del []string
 70 |  	// Repros found since last sync.
 71 |  	Repros [][]byte
 72 | +	NeedCorpus bool
 73 |  }
 74 |  
 75 |  type HubSyncRes struct {
 76 | diff --git a/syz-hub/hub.go b/syz-hub/hub.go
 77 | index db8dd506..ab7adc52 100644
 78 | --- a/syz-hub/hub.go
 79 | +++ b/syz-hub/hub.go
 80 | @@ -90,13 +90,26 @@ func (hub *Hub) Sync(a *rpctype.HubSyncArgs, r *rpctype.HubSyncRes) error {
 81 |  	hub.mu.Lock()
 82 |  	defer hub.mu.Unlock()
 83 |  
 84 | -	progs, more, err := hub.st.Sync(name, a.Add, a.Del)
 85 | -	if err != nil {
 86 | -		log.Logf(0, "sync error: %v", err)
 87 | -		return err
 88 | +	if a.NeedCorpus {
 89 | +		log.Logf(0, "Need corpus sync")
 90 | +		progs, more, err := hub.st.Sync(name, a.Add, a.Del)
 91 | +		if err != nil {
 92 | +			log.Logf(0, "sync error: %v", err)
 93 | +			return err
 94 | +		}
 95 | +		r.Progs = progs
 96 | +		r.More = more
 97 | +	} else {
 98 | +		log.Logf(0, "Send progs sync")
 99 | +		err := hub.st.SyncUpOnly(name, a.Add, a.Del)
100 | +		if err != nil {
101 | +			log.Logf(0, "sync up error: %v", err)
102 | +			return err
103 | +		}
104 | +		r.Progs = nil
105 | +		r.More = 0
106 |  	}
107 | -	r.Progs = progs
108 | -	r.More = more
109 | +
110 |  	for _, repro := range a.Repros {
111 |  		if err := hub.st.AddRepro(name, repro); err != nil {
112 |  			log.Logf(0, "add repro error: %v", err)
113 | @@ -112,7 +125,7 @@ func (hub *Hub) Sync(a *rpctype.HubSyncArgs, r *rpctype.HubSyncRes) error {
114 |  		}
115 |  	}
116 |  	log.Logf(0, "sync from %v: recv: add=%v del=%v repros=%v; send: progs=%v repros=%v pending=%v",
117 | -		name, len(a.Add), len(a.Del), len(a.Repros), len(r.Progs), len(r.Repros), more)
118 | +		name, len(a.Add), len(a.Del), len(a.Repros), len(r.Progs), len(r.Repros), r.More)
119 |  	return nil
120 |  }
121 |  
122 | diff --git a/syz-hub/state/state.go b/syz-hub/state/state.go
123 | index 7ccdf182..c238019c 100644
124 | --- a/syz-hub/state/state.go
125 | +++ b/syz-hub/state/state.go
126 | @@ -197,6 +197,27 @@ func (st *State) Sync(name string, add [][]byte, del []string) ([][]byte, int, e
127 |  	return progs, more, err
128 |  }
129 |  
130 | +func (st *State) SyncUpOnly(name string, add [][]byte, del []string) error {
131 | +	mgr := st.Managers[name]
132 | +	if mgr == nil || mgr.Connected.IsZero() {
133 | +		return fmt.Errorf("unconnected manager %v", name)
134 | +	}
135 | +	if len(del) != 0 {
136 | +		for _, sig := range del {
137 | +			mgr.Corpus.Delete(sig)
138 | +		}
139 | +		if err := mgr.Corpus.Flush(); err != nil {
140 | +			log.Logf(0, "failed to flush corpus database: %v", err)
141 | +		}
142 | +		st.purgeCorpus()
143 | +	}
144 | +	st.addInputs(mgr, add)
145 | +	mgr.Added += len(add)
146 | +	mgr.Deleted += len(del)
147 | +	return nil
148 | +}
149 | +
150 | +
151 |  func (st *State) AddRepro(name string, repro []byte) error {
152 |  	mgr := st.Managers[name]
153 |  	if mgr == nil || mgr.Connected.IsZero() {
154 | diff --git a/syz-manager/html.go b/syz-manager/html.go
155 | index 4a0a1fbc..e616898c 100644
156 | --- a/syz-manager/html.go
157 | +++ b/syz-manager/html.go
158 | @@ -124,6 +124,7 @@ func (mgr *Manager) collectStats() []UIStat {
159 |  		{Name: "uptime", Value: fmt.Sprint(time.Since(mgr.startTime) / 1e9 * 1e9)},
160 |  		{Name: "fuzzing", Value: fmt.Sprint(mgr.fuzzingTime / 60e9 * 60e9)},
161 |  		{Name: "corpus", Value: fmt.Sprint(len(mgr.corpus)), Link: "/corpus"},
162 | +		{Name: "last input", Value: fmt.Sprint(time.Since(mgr.lastInputTime) / 1e9 * 1e9)},
163 |  		{Name: "triage queue", Value: fmt.Sprint(len(mgr.candidates))},
164 |  		{Name: "cover", Value: fmt.Sprint(rawStats["cover"]), Link: "/cover"},
165 |  		{Name: "signal", Value: fmt.Sprint(rawStats["signal"])},
166 | diff --git a/syz-manager/hub.go b/syz-manager/hub.go
167 | index 5f85c8fe..a3c3716f 100644
168 | --- a/syz-manager/hub.go
169 | +++ b/syz-manager/hub.go
170 | @@ -48,14 +48,20 @@ type HubConnector struct {
171 |  
172 |  // HubManagerView restricts interface between HubConnector and Manager.
173 |  type HubManagerView interface {
174 | +	getNoInputTime() time.Duration
175 |  	getMinimizedCorpus() (corpus, repros [][]byte)
176 |  	addNewCandidates(progs [][]byte)
177 |  }
178 |  
179 |  func (hc *HubConnector) loop() {
180 | +	noInput := false
181 |  	var hub *rpctype.RPCClient
182 |  	for {
183 |  		time.Sleep(time.Minute)
184 | +		noCovTime := hc.mgr.getNoInputTime()
185 | +		if noCovTime > time.Duration(hc.cfg.HubSyncTime)*time.Minute {
186 | +			noInput = true
187 | +		}
188 |  		corpus, repros := hc.mgr.getMinimizedCorpus()
189 |  		hc.newRepros = append(hc.newRepros, repros...)
190 |  		if hub == nil {
191 | @@ -66,7 +72,7 @@ func (hc *HubConnector) loop() {
192 |  			}
193 |  			log.Logf(0, "connected to hub at %v, corpus %v", hc.cfg.HubAddr, len(corpus))
194 |  		}
195 | -		if err := hc.sync(hub, corpus); err != nil {
196 | +		if err := hc.sync(hub, corpus, noInput); err != nil {
197 |  			log.Logf(0, "hub sync failed: %v", err)
198 |  			hub.Close()
199 |  			hub = nil
200 | @@ -103,11 +109,12 @@ func (hc *HubConnector) connect(corpus [][]byte) (*rpctype.RPCClient, error) {
201 |  	return hub, nil
202 |  }
203 |  
204 | -func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte) error {
205 | +func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte, noInput bool) error {
206 |  	a := &rpctype.HubSyncArgs{
207 | -		Client:  hc.cfg.HubClient,
208 | -		Key:     hc.cfg.HubKey,
209 | -		Manager: hc.cfg.Name,
210 | +		Client:     hc.cfg.HubClient,
211 | +		Key:        hc.cfg.HubKey,
212 | +		Manager:    hc.cfg.Name,
213 | +		NeedCorpus: noInput,
214 |  	}
215 |  	sigs := make(map[hash.Sig]bool)
216 |  	for _, inp := range corpus {
217 | @@ -147,9 +154,9 @@ func (hc *HubConnector) sync(hub *rpctype.RPCClient, corpus [][]byte) error {
218 |  		hc.stats.hubRecvRepro.add(len(r.Repros) - reproDropped)
219 |  		hc.stats.hubRecvReproDrop.add(reproDropped)
220 |  		log.Logf(0, "hub sync: send: add %v, del %v, repros %v;"+
221 | -			" recv: progs %v, repros %v; more %v",
222 | +			" recv: progs %v, repros %v; more %v; need corpus: %v",
223 |  			len(a.Add), len(a.Del), len(a.Repros),
224 | -			len(r.Progs)-progDropped, len(r.Repros)-reproDropped, r.More)
225 | +			len(r.Progs)-progDropped, len(r.Repros)-reproDropped, r.More, noInput)
226 |  		a.Add = nil
227 |  		a.Del = nil
228 |  		a.Repros = nil
229 | diff --git a/syz-manager/manager.go b/syz-manager/manager.go
230 | index 2af16686..46aa0228 100644
231 | --- a/syz-manager/manager.go
232 | +++ b/syz-manager/manager.go
233 | @@ -56,6 +56,7 @@ type Manager struct {
234 |  	startTime      time.Time
235 |  	firstConnect   time.Time
236 |  	fuzzingTime    time.Duration
237 | +	lastInputTime  time.Time
238 |  	stats          *Stats
239 |  	crashTypes     map[string]bool
240 |  	vmStop         chan bool
241 | @@ -164,6 +165,7 @@ func RunManager(cfg *mgrconfig.Config, target *prog.Target, sysTarget *targets.T
242 |  		reporter:         reporter,
243 |  		crashdir:         crashdir,
244 |  		startTime:        time.Now(),
245 | +		lastInputTime:    time.Now(),
246 |  		stats:            new(Stats),
247 |  		crashTypes:       make(map[string]bool),
248 |  		enabledSyscalls:  syscalls,
249 | @@ -838,6 +840,10 @@ func saveReproStats(filename string, stats *repro.Stats) {
250 |  	osutil.WriteFile(filename, []byte(text))
251 |  }
252 |  
253 | +func (mgr *Manager) getNoInputTime() time.Duration {
254 | +       return time.Now().Sub(mgr.lastInputTime)
255 | +}
256 | +
257 |  func (mgr *Manager) getMinimizedCorpus() (corpus, repros [][]byte) {
258 |  	mgr.mu.Lock()
259 |  	defer mgr.mu.Unlock()
260 | @@ -954,6 +960,7 @@ func (mgr *Manager) machineChecked(a *rpctype.CheckArgs) {
261 |  func (mgr *Manager) newInput(inp rpctype.RPCInput, sign signal.Signal) {
262 |  	mgr.mu.Lock()
263 |  	defer mgr.mu.Unlock()
264 | +	mgr.lastInputTime = time.Now()
265 |  	sig := hash.String(inp.Prog)
266 |  	if old, ok := mgr.corpus[sig]; ok {
267 |  		// The input is already present, but possibly with diffent signal/coverage/call.
268 | -- 
269 | 2.20.1
270 | 
271 | 


--------------------------------------------------------------------------------
/syzkaller/multi_policy/0005-Add-ret-ebpfsig-as-resource.patch:
--------------------------------------------------------------------------------
  1 | From 9101f2871aa729cc9dd8812aec6c37f9b642fc1c Mon Sep 17 00:00:00 2001
  2 | From: Bins94 <kaipeng94@gmail.com>
  3 | Date: Sun, 24 Nov 2019 22:19:09 -0500
  4 | Subject: [PATCH 5/6] Add ret ebpfsig as resource
  5 | 
  6 | ---
  7 |  executor/executor.cc   |  8 ++++----
  8 |  pkg/rpctype/rpctype.go |  1 +
  9 |  prog/clone.go          |  5 +++--
 10 |  prog/rand.go           |  2 +-
 11 |  syz-fuzzer/proc.go     | 15 +++++++++++++++
 12 |  syz-manager/html.go    |  4 ++++
 13 |  6 files changed, 28 insertions(+), 7 deletions(-)
 14 | 
 15 | diff --git a/executor/executor.cc b/executor/executor.cc
 16 | index 28f0bdb1..fdff884a 100644
 17 | --- a/executor/executor.cc
 18 | +++ b/executor/executor.cc
 19 | @@ -36,8 +36,8 @@
 20 |  #endif
 21 |  
 22 |  #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 23 | -#define RETSTATE_SIG_MASK 0xf000000000000000
 24 | -#define STATE_SIG_MASK 0xe000000000000000
 25 | +/*#define RETSTATE_SIG_MASK 0xf000000000000000*/
 26 | +/*#define STATE_SIG_MASK 0xe000000000000000*/
 27 |  #define COVERAGE_SIG_MASK 0xd0000000ffffffff
 28 |  
 29 |  
 30 | @@ -905,12 +905,12 @@ void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover
 31 |  		uint64_t state = 0;
 32 |  		if (ret > 0) {
 33 |  		        state = strtol(buf, NULL, 16);
 34 | -			/* state signal start with 0xf
 35 | +			/* state/retstate signal start with 0xe/0xf
 36 |                          * Write out real state for fuzzer's further handle
 37 |                          */
 38 |  			if (flag_enable_ebpf_signal) {
 39 |  			        write_output64(state);
 40 | -				debug("A state signal %016lx\n", state);
 41 | +				debug("A state signal %016lx\n", (uint64)state);
 42 |  				n = 0;
 43 |  				nsig++;
 44 |  			}
 45 | diff --git a/pkg/rpctype/rpctype.go b/pkg/rpctype/rpctype.go
 46 | index dc8c6892..357f0e96 100644
 47 | --- a/pkg/rpctype/rpctype.go
 48 | +++ b/pkg/rpctype/rpctype.go
 49 | @@ -16,6 +16,7 @@ type RPCInput struct {
 50 |  	Prog   []byte
 51 |  	Signal signal.Serial
 52 |  	Cover  []uint32
 53 | +	SpeRes bool
 54 |  }
 55 |  
 56 |  type RPCCandidate struct {
 57 | diff --git a/prog/clone.go b/prog/clone.go
 58 | index 5ad82c35..855ab09b 100644
 59 | --- a/prog/clone.go
 60 | +++ b/prog/clone.go
 61 | @@ -5,8 +5,9 @@ package prog
 62 |  
 63 |  func (p *Prog) Clone() *Prog {
 64 |  	p1 := &Prog{
 65 | -		Target: p.Target,
 66 | -		Calls:  make([]*Call, len(p.Calls)),
 67 | +		Target:   p.Target,
 68 | +		Calls:    make([]*Call, len(p.Calls)),
 69 | +		Resource: p.Resource,
 70 |  	}
 71 |  	newargs := make(map[*ResultArg]*ResultArg)
 72 |  	for ci, c := range p.Calls {
 73 | diff --git a/prog/rand.go b/prog/rand.go
 74 | index 70a93687..6ade7dc7 100644
 75 | --- a/prog/rand.go
 76 | +++ b/prog/rand.go
 77 | @@ -816,7 +816,7 @@ func resourceCentric(t *ResourceType, s *state, r *randGen) (resource *ResultArg
 78 |  	for idx := range r.Perm(len(s.corpus)) {
 79 |  		p = s.corpus[idx].Clone()
 80 |  		resources := getCompatibleResources(p, t.TypeName, r)
 81 | -		if len(resources) > 0 {
 82 | +		if len(resources) > 0 && p.Resource {
 83 |  			resource = resources[r.Intn(len(resources))]
 84 |  			break
 85 |  		}
 86 | diff --git a/syz-fuzzer/proc.go b/syz-fuzzer/proc.go
 87 | index 98deebb9..a38df022 100644
 88 | --- a/syz-fuzzer/proc.go
 89 | +++ b/syz-fuzzer/proc.go
 90 | @@ -166,6 +166,12 @@ func (proc *Proc) triageInput(item *WorkTriage) {
 91 |  	data := item.p.Serialize()
 92 |  	sig := hash.Hash(data)
 93 |  
 94 | +	item.p.Resource = true
 95 | +	/* ebpfsig feedback */
 96 | +	if proc.fuzzer.config.Flags&(1<<14) > 0 {
 97 | +		item.p.Resource = getResourceFlagFromSignal(item.info.Signal)
 98 | +	}
 99 | +
100 |  	log.Logf(2, "added new input for %v to corpus:\n%s", logCallName, data)
101 |  	proc.fuzzer.sendInputToManager(rpctype.RPCInput{
102 |  		Call:   callName,
103 | @@ -204,6 +210,15 @@ func getSignalAndCover(p *prog.Prog, info *ipc.ProgInfo, call int) (signal.Signa
104 |  	return signal.FromRaw(inf.Signal, signalPrio(p, inf, call)), inf.Cover
105 |  }
106 |  
107 | +func getResourceFlagFromSignal(Signal []uint64) bool {
108 | +	for _, sig := range Signal {
109 | +		if sig&0xf000000000000000 == 0xf000000000000000 {
110 | +			return true
111 | +		}
112 | +	}
113 | +	return false
114 | +}
115 | +
116 |  func (proc *Proc) smashInput(item *WorkSmash) {
117 |  	if proc.fuzzer.faultInjectionEnabled && item.call != -1 {
118 |  		proc.failCall(item.p, item.call)
119 | diff --git a/syz-manager/html.go b/syz-manager/html.go
120 | index e616898c..8c563ef7 100644
121 | --- a/syz-manager/html.go
122 | +++ b/syz-manager/html.go
123 | @@ -218,6 +218,7 @@ func (mgr *Manager) httpCorpus(w http.ResponseWriter, r *http.Request) {
124 |  			Short:  p.String(),
125 |  			Cover:  len(inp.Cover),
126 |  			Signal: inp.Signal.Deserialize(),
127 | +			SpeRes: inp.SpeRes,
128 |  		})
129 |  	}
130 |  	sort.Slice(data.Inputs, func(i, j int) bool {
131 | @@ -641,6 +642,7 @@ type UIInput struct {
132 |  	Short  string
133 |  	Cover  int
134 |  	Signal signal.Signal
135 | +	SpeRes bool
136 |  }
137 |  
138 |  var summaryTemplate = html.CreatePage(`
139 | @@ -786,11 +788,13 @@ var corpusTemplate = html.CreatePage(`
140 |  	<tr>
141 |  		<th>Coverage</th>
142 |  		<th>Program</th>
143 | +                <th>Resource</th>
144 |  	</tr>
145 |  	{{range $inp := $.Inputs}}
146 |  	<tr>
147 |  		<td><a href='/cover?input={{$inp.Sig}}'>{{$inp.Cover}}</a></td>
148 |  		<td><a href="/input?sig={{$inp.Sig}}">{{$inp.Short}}</a></td>
149 | +                <td><a href="/input?sig={{$inp.Sig}}">{{$inp.SpeRes}}</a></td>
150 |  	</tr>
151 |  	{{end}}
152 |  </table>
153 | -- 
154 | 2.20.1
155 | 
156 | 


--------------------------------------------------------------------------------
/syzkaller/multi_policy/0006-Add-monitot-binary-option-to-manager-configure.patch:
--------------------------------------------------------------------------------
 1 | From 1d290b1cb2333d931942cad3bda9bda447942f5d Mon Sep 17 00:00:00 2001
 2 | From: Bins94 <kaipeng94@gmail.com>
 3 | Date: Sun, 24 Nov 2019 22:23:26 -0500
 4 | Subject: [PATCH 6/6] Add monitot binary option to manager configure
 5 | 
 6 | ---
 7 |  executor/common_linux.h | 2 +-
 8 |  pkg/mgrconfig/config.go | 1 +
 9 |  syz-manager/manager.go  | 4 ++++
10 |  3 files changed, 6 insertions(+), 1 deletion(-)
11 | 
12 | diff --git a/executor/common_linux.h b/executor/common_linux.h
13 | index bc58d71b..0a60a54f 100644
14 | --- a/executor/common_linux.h
15 | +++ b/executor/common_linux.h
16 | @@ -2164,7 +2164,7 @@ static int do_sandbox_none(void)
17 |  		close(monpipefd[0]);
18 |  		close(monpipefd[1]);
19 |  		debug("single ebpf start ...\n");
20 | -		execl("/root/pipe_monitor", "/root/pipe_monitor", "--debug", NULL);
21 | +		execl("/pipe_monitor", "/pipe_monitor", "--debug", NULL);
22 |  		return 0;
23 |  	}
24 |  	/* ebpf loading is very slow, one time a vm restart */
25 | diff --git a/pkg/mgrconfig/config.go b/pkg/mgrconfig/config.go
26 | index 0316d835..637ea249 100644
27 | --- a/pkg/mgrconfig/config.go
28 | +++ b/pkg/mgrconfig/config.go
29 | @@ -107,4 +107,5 @@ type Config struct {
30 |  	SyzFuzzerBin   string `json:"-"`
31 |  	SyzExecprogBin string `json:"-"`
32 |  	SyzExecutorBin string `json:"-"`
33 | +	SyzMonitorBin  string `json:"ebpfmonitor"`
34 |  }
35 | diff --git a/syz-manager/manager.go b/syz-manager/manager.go
36 | index 46aa0228..aab55c7e 100644
37 | --- a/syz-manager/manager.go
38 | +++ b/syz-manager/manager.go
39 | @@ -536,6 +536,10 @@ func (mgr *Manager) runInstance(index int) (*Crash, error) {
40 |  	if err != nil {
41 |  		return nil, fmt.Errorf("failed to copy binary: %v", err)
42 |  	}
43 | +	_, err = inst.Copy(mgr.cfg.SyzMonitorBin)
44 | +	if err != nil {
45 | +		return nil, fmt.Errorf("failed to copy binary: %v", err)
46 | +	}
47 |  
48 |  	fuzzerV := 0
49 |  	procs := mgr.cfg.Procs
50 | -- 
51 | 2.20.1
52 | 
53 | 


--------------------------------------------------------------------------------
/syzkaller/multi_policy/README.md:
--------------------------------------------------------------------------------
 1 | # Multiple policy fuzzer( syz-hub)
 2 | 
 3 | ## Original syz-hub
 4 | Syz-hub is a great tool to connect all the syz-managers. After all syz-managers connect to syz-hub, Every syz-manager will exchange their whole corpus with each other. This is called "Sync" in syz-hub, the interval of "Sync" is one minute which you can see a time.Sleep() in syz-manager/hub.go:loop(). After "Sync", every manager will check if the received progs can hit more coverage. You can immediately see a great number of "triage queue" after "Sync". That means syz-managers with different configure can exchange progs with each other also.
 5 | 
 6 | ## Policy of fuzzer
 7 | ### Original syzkaller fuzzer
 8 | Actrually, there are some mechanisms of syzkaller:
 9 | 1. The feedback( coverage) of progs determin if it can be sent to corpus
10 | 2. Corpus will affect the progs generating( by mutating, syscall-choisetable, affect the probability)
11 | 3. Generated progs determin which feedback may be received.
12 | 
13 | Syzkaller run these iteratively, and the feedback probabily determin where to fuzz. Original syzkaller use coverage of the whole kernel as feedback. So, syzkaller is a coverage-guided fuzzer of kernel. And the "coverage-guided" is what we called the policy of syzkaller.
14 | 
15 | ### Faster or deeper fuzzer.
16 | We have some survey of different-policy syzkaller. It shows that there is several point can be optimize if you want a directed fuzzer. For example, only want to fuzz sub-system of kernel. The customizing of these can be list:
17 | 1. Limit the coverage to a smaller scope. Include building kernel with partial-coverage( KCOV_INSTRUMENT_ALL=n), filtering coverage( by address).
18 | 2. Add other feedback. For example, we use ebpf collect the state of socket as feedbeck.
19 | 3. Directed fail-injection help cover the corners shouldn't be covered.
20 | 
21 | Both 1 and 2 change the feedback of syzkaller. 1 limit syzkaller to fuzz a smaller scope of kernel. 2 directly introduce other feedback into syzkaller.
22 | Our test shows that using these features properly can help syzkaller more directed, deeper and faster.
23 | 
24 | ## Customize syz-hub
25 | ### Connect different policy syz-manager
26 | It could be useful if we connect syz-managers with different policy. Different syz-managers focus on different sub-system or different scope.
27 | For example, one of syz-managers fuzz the whole kernel, others fuzz several sub-system. It take less time to fuzz deeper corner( sub-system). And corpus can be sync to all manager( the whole kernel one). In other word, deeper or faster fuzzer can be sync to the widely and shallow fuzzer.
28 | 
29 | ### Customize feature of syz-hub
30 | Original syz-hub do "Sync" one time a minute. We know the corpus will affect the progs generating, frequently sync will guide all syz-managers to fuzz the same scope of kerenl. Spliting the upload( send out progs) and download( receive progs form) of corpus sync shows a better performence. The upload always done while "Sync" was called, and download sync only done if there is no coverage after a long time. So what we need to do is:
31 | syz-hub: splite the upload and download of corpus sync.
32 | syz-manager: add option for configuring the time of sync. Only download the corpus if there is no any input after a long time.
33 | 
34 | ## Patch and usage
35 | ### Patch
36 | These patch base on syz-0d1034:
37 | 1. Add ebpf feedback
38 | 2. Filtering coverage by address
39 | 3. Configurable ebpfsig and coverage filtering
40 | 4. Split the upload and download of sync
41 | 
42 | ### Usage
43 | Patch 1, 2 refer to [this](../kstat_demo/README.md).
44 | After patch the 3, you need specify some new option for syz-manager:
45 | * ebpfsig:   true/false
46 | * covfilter: true/false
47 | After patch the 4, you need specify a new option for syz-manager:
48 | * hub_synctime: a integer
49 | This option specify how many minute without any input, a syz-manager can receive progs.
50 | Then you can run syzkaller as usual.
51 | 
52 | ## A test for tcp/ipv6
53 | ### Original syz-hub
54 | syz-manager1: Only enable syscalls for tcp/ipv6
55 | syz-manager2: Only enable syscalls for tcp/ipv6
56 | sync time: 1 time a minute
57 | run time: 2h30min
58 | coverage( chose the maximum):
59 | 
60 | | coverage | 1 | 2 | 3 | 4 | 5 | 6 | average |  
61 | |----------| - | - | - | - | - | - | ------- |  
62 | | total |10514 |9869 |10583 |10347 |10611 |8916 |10140 |  
63 | | tcp.c |462 |460 |346 |471 |491 |359 |432 |
64 | 
65 | (Most of handle function of tcp/ipv6 is in tcp.c)
66 | 
67 | ### Multi-policy syz-hub
68 | syz-manager1: Only enable syscalls for tcp/ipv6
69 | syz-manager2: Only enable syscalls for tcp/ipv6, add ebpf to collect socker state as feedback, limit coverage to tcp/ipv6 kernel function.
70 | sync time: 3/4 minute without any input
71 | run time: 2h30min
72 | coverage( chose the maximum):
73 | 
74 | | coverage | 1 | 2 | 3 | 4 | 5 | 6 | average |  
75 | |----------| - | - | - | - | - | - | ------- |  
76 | | total |9962 |10060 |9356 |10832 |8952 |10122 |9879 |  
77 | | tcp.c |487 |525 |507 |506 |515 |493 |506 |  
78 | 
79 | ### Result
80 | * One of the syz-manager focus on tcp/ipv6 fuzz. It have a 2% decrease of total coverage. This is beacuse we use one of the two	syz-manager to fuzz a smaller scope.
81 | * Introduction of ebpf feedback have a 17% increase of tcp.c coverage. That means our directed fuzzer do well in fuzzing the deeper corner.


--------------------------------------------------------------------------------
/syzkaller/syzkaller.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hardenedlinux/harbian-qa/47e0e3dc3a2187d8c34befa2cdb60aea4b9a1451/syzkaller/syzkaller.png


--------------------------------------------------------------------------------