├── .gitattributes ├── .gitignore ├── .travis.yml ├── Documentation ├── BUILDING.rst ├── CONTRIBUTIONS.rst ├── SPEC.rst └── TODO.rst ├── ISSUE_TEMPLATE.md ├── LICENSE ├── Makefile ├── Makefile.windows ├── README.md ├── _config.yml ├── appveyor.yml ├── bitmap.h ├── compiler.h ├── epage.c ├── exit.c ├── hotplug.c ├── introspect.c ├── inttypes.h ├── ksm.c ├── ksm.h ├── ksm ├── ksm.sln ├── ksm │ ├── ksm.inf │ ├── ksm.vcxproj │ └── ksm.vcxproj.filters ├── ksm_tests │ ├── ksm_tests.inf │ ├── ksm_tests.vcxproj │ └── ksm_tests.vcxproj.filters └── ksm_um │ ├── ksm_um.vcxproj │ └── ksm_um.vcxproj.filters ├── list.h ├── main_linux.c ├── main_nt.c ├── make_nonpagable.py ├── mm.c ├── mm.h ├── percpu.h ├── print.c ├── resubv.c ├── sandbox.c ├── tests ├── README.rst ├── allgood.c ├── ept.c ├── export.h ├── fail_entry.c ├── go_vmx.c ├── main.c ├── vmx_exit.c ├── vmx_setup.c └── x64.asm ├── um ├── um.c └── um.h ├── vcpu.c ├── vmx.S ├── vmx.asm ├── vmx.h └── x86.h /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | 10 | # User-specific files (MonoDevelop/Xamarin Studio) 11 | *.userprefs 12 | 13 | # Build results 14 | [Dd]ebug/ 15 | [Dd]ebugPublic/ 16 | [Rr]elease/ 17 | [Rr]eleases/ 18 | [Xx]64/ 19 | [Xx]86/ 20 | [Bb]uild/ 21 | bld/ 22 | [Bb]in/ 23 | [Oo]bj/ 24 | 25 | # Visual Studio 2015 cache/options directory 26 | .vs/ 27 | # Uncomment if you have tasks that create the project's static files in wwwroot 28 | #wwwroot/ 29 | 30 | # MSTest test Results 31 | [Tt]est[Rr]esult*/ 32 | [Bb]uild[Ll]og.* 33 | 34 | # NUNIT 35 | *.VisualState.xml 36 | TestResult.xml 37 | 38 | # Build Results of an ATL Project 39 | [Dd]ebugPS/ 40 | [Rr]eleasePS/ 41 | dlldata.c 42 | 43 | # DNX 44 | project.lock.json 45 | artifacts/ 46 | 47 | *.o 48 | *_i.c 49 | *_p.c 50 | *_i.h 51 | *.ilk 52 | *.meta 53 | *.obj 54 | *.pch 55 | *.pdb 56 | *.pgc 57 | *.pgd 58 | *.rsp 59 | *.sbr 60 | *.tlb 61 | *.tli 62 | *.tlh 63 | *.tmp 64 | *.tmp_proj 65 | *.log 66 | *.vspscc 67 | *.vssscc 68 | .builds 69 | *.pidb 70 | *.svclog 71 | *.scc 72 | 73 | # Chutzpah Test files 74 | _Chutzpah* 75 | 76 | # Visual C++ cache files 77 | ipch/ 78 | *.aps 79 | *.ncb 80 | *.opendb 81 | *.opensdf 82 | *.sdf 83 | *.cachefile 84 | *.VC.db 85 | 86 | # Visual Studio profiler 87 | *.psess 88 | *.vsp 89 | *.vspx 90 | *.sap 91 | 92 | # TFS 2012 Local Workspace 93 | $tf/ 94 | 95 | # Guidance Automation Toolkit 96 | *.gpState 97 | 98 | # ReSharper is a .NET coding add-in 99 | _ReSharper*/ 100 | *.[Rr]e[Ss]harper 101 | *.DotSettings.user 102 | 103 | # JustCode is a .NET coding add-in 104 | .JustCode 105 | 106 | # TeamCity is a build add-in 107 | _TeamCity* 108 | 109 | # DotCover is a Code Coverage Tool 110 | *.dotCover 111 | 112 | # NCrunch 113 | _NCrunch_* 114 | .*crunch*.local.xml 115 | nCrunchTemp_* 116 | 117 | # MightyMoose 118 | *.mm.* 119 | AutoTest.Net/ 120 | 121 | # Web workbench (sass) 122 | .sass-cache/ 123 | 124 | # Installshield output folder 125 | [Ee]xpress/ 126 | 127 | # DocProject is a documentation generator add-in 128 | DocProject/buildhelp/ 129 | DocProject/Help/*.HxT 130 | DocProject/Help/*.HxC 131 | DocProject/Help/*.hhc 132 | DocProject/Help/*.hhk 133 | DocProject/Help/*.hhp 134 | DocProject/Help/Html2 135 | DocProject/Help/html 136 | 137 | # Click-Once directory 138 | publish/ 139 | 140 | # Publish Web Output 141 | *.[Pp]ublish.xml 142 | *.azurePubxml 143 | 144 | # TODO: Un-comment the next line if you do not want to checkin 145 | # your web deploy settings because they may include unencrypted 146 | # passwords 147 | #*.pubxml 148 | *.publishproj 149 | 150 | # NuGet Packages 151 | *.nupkg 152 | # The packages folder can be ignored because of Package Restore 153 | **/packages/* 154 | # except build/, which is used as an MSBuild target. 155 | !**/packages/build/ 156 | # Uncomment if necessary however generally it will be regenerated when needed 157 | #!**/packages/repositories.config 158 | # NuGet v3's project.json files produces more ignoreable files 159 | *.nuget.props 160 | *.nuget.targets 161 | 162 | # Microsoft Azure Build Output 163 | csx/ 164 | *.build.csdef 165 | 166 | # Microsoft Azure Emulator 167 | ecf/ 168 | rcf/ 169 | 170 | # Microsoft Azure ApplicationInsights config file 171 | ApplicationInsights.config 172 | 173 | # Windows Store app package directory 174 | AppPackages/ 175 | BundleArtifacts/ 176 | 177 | # Visual Studio cache files 178 | # files ending in .cache can be ignored 179 | *.[Cc]ache 180 | # but keep track of directories ending in .cache 181 | !*.[Cc]ache/ 182 | 183 | # Others 184 | ClientBin/ 185 | [Ss]tyle[Cc]op.* 186 | ~$* 187 | *~* 188 | *.dbmdl 189 | *.dbproj.schemaview 190 | *.pfx 191 | *.publishsettings 192 | node_modules/ 193 | orleans.codegen.cs 194 | 195 | # RIA/Silverlight projects 196 | Generated_Code/ 197 | 198 | # Backup & report files from converting an old project file 199 | # to a newer Visual Studio version. Backup files are not needed, 200 | # because we have git ;-) 201 | _UpgradeReport_Files/ 202 | Backup*/ 203 | UpgradeLog*.XML 204 | UpgradeLog*.htm 205 | 206 | # SQL Server files 207 | *.mdf 208 | *.ldf 209 | 210 | # Business Intelligence projects 211 | *.rdl.data 212 | *.bim.layout 213 | *.bim_*.settings 214 | 215 | # Microsoft Fakes 216 | FakesAssemblies/ 217 | 218 | # GhostDoc plugin setting file 219 | *.GhostDoc.xml 220 | 221 | # Node.js Tools for Visual Studio 222 | .ntvs_analysis.dat 223 | 224 | # Visual Studio 6 build log 225 | *.plg 226 | 227 | # Visual Studio 6 workspace options file 228 | *.opt 229 | 230 | # Visual Studio LightSwitch build output 231 | **/*.HTMLClient/GeneratedArtifacts 232 | **/*.DesktopClient/GeneratedArtifacts 233 | **/*.DesktopClient/ModelManifest.xml 234 | **/*.Server/GeneratedArtifacts 235 | **/*.Server/ModelManifest.xml 236 | _Pvt_Extensions 237 | 238 | # LightSwitch generated files 239 | GeneratedArtifacts/ 240 | ModelManifest.xml 241 | 242 | # Paket dependency manager 243 | .paket/paket.exe 244 | 245 | # FAKE - F# Make 246 | .fake/ 247 | 248 | # Linux 249 | *.symvers 250 | *.ko 251 | *.cmd 252 | .tmp_versions/ 253 | *.mod.c 254 | *.order 255 | 256 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: linux 2 | language: c 3 | compiler: gcc 4 | dist: trusty 5 | sudo: required 6 | 7 | cache: 8 | apt: true 9 | 10 | notifications: 11 | email: false 12 | 13 | env: 14 | global: 15 | - secure: "bO7lVBHOi821tVLdtk0M38C5eVPeoEdt3EGTZSU8O4R34geUmjYlrBTZycP+6W3LhzeuZ319vcEfGmNYbp/ZOIqZd5ix01a+ALixgrxtfS2ar37HDTPxnr60Y3cBJzfTmIZD0zVSo443V8Yiyqh7hBGjLdy8/JvKLXxQtyhGC6B/hmmVomu9JCNOCYPgXVQzLQMOhKWj4KovvPn6UoEAKBEyKtryxl+mDLKKmfg8CWn+mi/75CbcGtpXpsCVMMIF8Z2At7Yo0dn6UZLX2nVHYVpyY/lp8WR3BgJxNAVApHQkfOJAm0ElSDyo7kFAnv4HAhlpLba2iH4HMvf2o/b+QPKKljahJuzg4pRtUVrV8e7qzTFOYwcmrS2ZUQC5M7xaNyPreBVhz+dOsvdVrt0fGsZjEgPNUcnWRY2UQSel/rulECAECgysaJ/zWs6KrntSMv6GTXuDK7Svv+RlfnqKishKYRdaKpC83DO09hW0+AO6tXDNgqhPQYb6mopXGWG0Hzgo8c6Pq4Qki7ez2QPQoVnq0Fff6cgqVnRbIo8xHb4M8WBFtRTZunL/K8EuPZ0FBIiQIvMujBz1ic4dnUtfLhmXouR+73/NzPE+qgGafW9BOJ9W/qnHfF7SIkSATMquP9wIAdZOXsYGpiyv59h2GZLaW0zQUmZlRaahnCI2UlU=" 16 | 17 | matrix: 18 | fast_finish: true 19 | exclude: 20 | - compiler: gcc 21 | include: 22 | - os: linux 23 | env: 24 | - B=MinGW 25 | install: 26 | - sudo apt-get install -y gcc-mingw-w64-x86-64 binutils-mingw-w64-x86-64 27 | script: 28 | - make -f Makefile.windows C=1 Q=1 29 | - os: linux 30 | env: 31 | - B=Linux 32 | before_install: 33 | - echo -n | openssl s_client -connect scan.coverity.com:443 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | sudo tee -a /etc/ssl/certs/ca-certificates.crt 34 | install: 35 | - sudo apt-get install -y linux-headers-$(uname -r) 36 | script: 37 | - make 38 | addons: 39 | coverity_scan: 40 | project: 41 | name: "asamy/ksm" 42 | description: "Build submitted via Travis CI" 43 | notification_email: f.fallen45@gmail.com 44 | build_command_prepend: "make clean" 45 | build_command: "make" 46 | branch_pattern: master 47 | -------------------------------------------------------------------------------- /Documentation/BUILDING.rst: -------------------------------------------------------------------------------- 1 | Building 2 | ======== 3 | 4 | Enabling certain features / tests 5 | --------------------------------- 6 | 7 | You can define one or more of the following: 8 | 9 | - `INTROSPECT_ENGINE` - Enables a tiny physical memory introspection engine 10 | - `PMEM_SANDBOX` - Enables userspace physical memory virtualizer 11 | - `EPAGE_HOOK` - Enables executable page shadow hook 12 | - `ENABLE_PML` - Enables Page Modification Log if supported. 13 | - `ENABLE_RESUBV` - Enable S1-3-S4 power state monitoring for re-virtualization 14 | - `NESTED_VMX` - Enable experimental VT-x nesting 15 | - `ENABLE_FILEPRINT` - Available on Windows only. Enables loggin to disk 16 | - `ENABLE_DBGPRINT` - Available on Windows only. Enables `DbgPrint` log. 17 | - `VCPU_TRACER_LOG` - Outputs a useless message on some VM-Exit handlers, this can be replaced with something more useful such as performance measurements, etc. See `ksm.h` for more information. 18 | 19 | Building for Linux 20 | ------------------ 21 | 22 | Install kernel headers: 23 | 24 | - Debian/Ubuntu: `[sudo] apt-get install linux-headers-$(uname -r)` 25 | - ArchLinux: `[sudo] pacman -S linux-headers` 26 | - Fedora: `[sudo] yum install kernel-devel kernel-headers` 27 | 28 | Targets: 29 | 30 | - `all` - Build the kernel module and the userspace app 31 | - `umk` - Build the userspace app only 32 | - `dri` - Build the kernel module only 33 | - `clean` - Clean everything 34 | - `install` - Installs to kernel module dir (root required) 35 | - `load` - Load the kernel module (root required) 36 | - `unload` - Unload the kernel module (root required) 37 | 38 | Then `make `, e.g.: `make umk` (all is default). 39 | 40 | Building for Windows 41 | -------------------- 42 | 43 | Under MinGW 44 | ---------------------- 45 | 46 | **Warning**: The MinGW build is known to be unstable under Windows 10, so it's 47 | not recommended, rather use the VS project to compile for Windows. 48 | 49 | Makefile variables 50 | ------------------- 51 | 52 | You can pass one or more of the following variables to your `make` command: 53 | 54 | - `WINVER=0x0602` - Explicility specify windows version to build for. 55 | - `C=1` - Prepare for cross-compiling. 56 | - `Q=1` - Be quiet. 57 | - `BIN_DIR=arg` - Generate binary and symbols to this directory 58 | - `OBJ_DIR=arg` - Generate object files to this directory 59 | - `DEP_DIR=arg` - Generate dependency files to this directory 60 | - `CROSS_INC=arg` - Path to include directory if they reside in a special place 61 | - `CROSS_LIB=arg` - Path to library directory if they reside in a special place 62 | - `CROSS_BUILD=arg` - Prefix to toolchain binaries (e.g. `x86-_64-w64-mingw32-`) 63 | 64 | Targets: 65 | 66 | - `all` - Builds the driver and the usermode app 67 | - `umk` - Build just the usermode app 68 | - `dri` - Build just the driver 69 | - `clean` - Clean everything 70 | 71 | You may need to adjust the windows version you're compiling for, in that case 72 | adjust `WINVER` inside the Makefile manually or pass it through commandline: 73 | 74 | make -f Makefile.windows C=1 WINVER=0x0602 all 75 | 76 | Cross under Linux 77 | ----------------- 78 | 79 | Install the following packages: 80 | 81 | - Debian/Ubuntu: `[sudo] apt-get install gcc-mingw-w64-x86-64 binutils-mingw-w64-x86-64` 82 | - ArchLinux: `[sudo] pacman -S mingw-w64-gcc` 83 | - Fedora: `[sudo] yum install mingw64-gcc` 84 | 85 | Then `make -f Makefile.windows C=1 all` 86 | 87 | Under Native 88 | ------------ 89 | 90 | Natively, you'll want to adjust (or pass in command line) DDK paths, e.g.: 91 | 92 | `mingw32-make -f Makefile.windows CROSS_INC=/path/to/include/ddk all` 93 | 94 | Or, simply just edit Makefile.windows manually. Also make sure to adjust your 95 | environment variables (PATH) to point to the right `bin/` directory where the 96 | compiler, etc lie. 97 | 98 | Compiling under MSVC 99 | -------------------- 100 | 101 | The solution under `ksm/` directory is a VS 2015 solution. 102 | 103 | To build it under MSVC, you'll need the following: 104 | 105 | 1. VS2015_ 106 | 2. SDK_ 107 | 3. WDK_ 108 | 109 | .. _VS2015: https://www.visualstudio.com/downloads/ 110 | .. _SDK: https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk 111 | .. _WDK: https://developer.microsoft.com/en-us/windows/hardware/windows-driver-kit 112 | 113 | **NOTE**: You need to adjust the Windows version you are targetting via the 114 | project properities when inside Visual Studio, right click the Project (`ksm`) 115 | then go to Driver Settings -> General -> Target OS Version. 116 | 117 | Then you can build it via either the VS interface (right click the project then build), 118 | or the hardway if you prefer, from VS command line, simply cd to where `ksm` is and: 119 | 120 | msbuild ksm\ksm.sln 121 | 122 | Or: 123 | 124 | msbuild ksm\ksm\ksm.vcxproj 125 | 126 | Loading the driver 127 | ------------------ 128 | 129 | On Linux: 130 | 131 | - Load: `sudo make load` 132 | - Unload: `sudo make unload` 133 | - Output: `sudo dmesg -wH` 134 | 135 | On Windows: 136 | 137 | In commandline as administrator: 138 | 139 | 1. `sc create ksm type= kernel binPath= C:\path\to\your\ksm.sys` 140 | 2. `sc start ksm` 141 | 142 | Unloading: 143 | 144 | `sc stop ksm` 145 | 146 | Output can be seen via DebugView or WinDBG if live debugging 147 | Note: You might want to execute `ed Kd_DEFAULT_Mask 8` to see any output. 148 | 149 | **Note for Windows 10**: DebugView seems to be having problems starting a 2nd 150 | time there, to workaround this, rename it's driver 151 | `C:\windows\system32\drivers\Dbgv.sys` to something else, then start it again. 152 | 153 | Using the driver 154 | ---------------- 155 | 156 | Since you started it, it does nothing, it's waiting for the usermode app to 157 | instruct it, to do so, run the usermode app as root/admin which will run an 158 | IOCTL to the driver to tell it to virtualize the system, then you can give it 159 | Process Identifiers (PIDs) to sandbox. 160 | 161 | -------------------------------------------------------------------------------- /Documentation/CONTRIBUTIONS.rst: -------------------------------------------------------------------------------- 1 | Contributions 2 | ============= 3 | 4 | Contributions are really appreciated and can be submitted by one of the following: 5 | 6 | - Patches (e-mail) 7 | - Github pull requests 8 | - git request-pull 9 | 10 | See also TODO.md for more information on what you can do. 11 | 12 | The following is _not_ required, but **prefered**: 13 | 14 | - Put your copyright on top of the file(s) you edit along with a tiny description 15 | with your changes. 16 | - Format your git commit messages properly (A signed-off-by is good but 17 | **not** required, note: you can use `git commit --signoff` instead of writing 18 | manually. See also Linux kernel contribution guidelines for more perks): 19 | 20 | Code Style 21 | ---------- 22 | 23 | KSM uses the Linux kernel coding style, if you're unfamiliar with that, there 24 | are multiple editor configurations that adhere to this available on the 25 | internet, for vim you can use my configuration, here: 26 | 27 | https://github.com/asamy/vimfiles 28 | 29 | To make this short, these are the rules: 30 | 31 | - Use 8 tabs (Not spaces!) 32 | - Preprocessor macros: leftmost column 33 | - Labels: leftmost column 34 | - Lines should be aligned relative to opening parenthesis 35 | - Case contents are not intended, but relative to the switch statement 36 | - Opening braces for structures is on the same line 37 | - Opening braces for functions/control-blocks is on the _next_ line 38 | - Opening braces for scopes is on the _next_ line 39 | - Closing braces for empty or inlined structures should be on the same line as the structure definition 40 | - If-else if-else braces should be on the same line as the paranethesis, 41 | - Do not use braces for one-lined if/elseif/else 42 | 43 | Setting up your git tree (If you're unfamiliar with Git) 44 | -------------------------------------------------------- 45 | 46 | For the sake of simplicity, we're going to use some names placeholders: 47 | 48 | 1. `LOCAL_BRANCH` - is your local branch you're going to be committing to (e.g. `my-changes`). 49 | 2. `REMOTE_BRANCH` - is the branch name you have in your remote repository (e.g. `pull-me`, can be the same as `LOCAL_BRANCH`). 50 | 3. `REMOTE_URL` - Your remote repository URL (e.g. https://github.com/XXX/ksm). 51 | Note: you do not have to have a remote repository, you can commit to 52 | your local copy, then just use patches, see below. 53 | 4. `USER_NAME` - Your github username 54 | 55 | Clone the repository locally: 56 | 57 | git clone https://github.com/USER_NAME/ksm 58 | 59 | **Note**: replace USER_NAME with mine (asamy) if you're not going to use 60 | pull-requests. 61 | 62 | Switch to a new branch: 63 | 64 | git checkout -b LOCAL_BRANCH 65 | 66 | Setup remote (**Optional**: skip if you want to use the full URL each time): 67 | 68 | git remote add upstream https://github.com/asamy/ksm 69 | 70 | If there are changes in my tree that you want to get, then: 71 | 72 | git pull --rebase upstream master 73 | 74 | This will rebase my changes on top of your local tree. 75 | 76 | **Note**: If you skipped remote setup, then replace `upstream` with the 77 | URL. 78 | 79 | **Note**: You might want to switch to the master branch first to pull 80 | my changes there, then switch back to your branch, then merge them 81 | together later using `git merge --ff master` 82 | 83 | If you have local changes, `--rebase` will stop and ask you to commit, you can 84 | do this without comitting: 85 | 86 | git stash && git pull --rebase upstream master && git stash pop 87 | 88 | What this does is 1) stashes your changes, 2) pulls my changes and prepares to 89 | rebase your stashed changes on top of mine, 3) pops the stashed changes on 90 | top, if there any conflicts, then it will let you know and you should fix them. 91 | 92 | Then commit your changes: 93 | 94 | git commit -a --signoff -m "commit message" 95 | 96 | Submitting your changes 97 | ----------------------- 98 | 99 | If you're going to use patches, then simply: 100 | 101 | git format-patch HEAD~X 102 | 103 | Where X is the number of commits to create patches from, can be ommitted to 104 | take HEAD (i.e. most recent) commit only, e.g.: 105 | 106 | git format-patch HEAD~ 107 | 108 | (You can use commit hashes, too.) 109 | 110 | You can then use the patch file(s) as an attachment and e-mail them manually, or 111 | you can use `git send-email` to do it for you. 112 | 113 | Using pull requests 114 | ------------------- 115 | 116 | You have 2 options (if using 1st, then skip the rest): 117 | 118 | 1. If you're using github fork, you can just use the github pull request 119 | interface. 120 | 2. If you're going to use git request-pull follow. 121 | 122 | Note: You should always push your changes before making a pull request 123 | (regardless of the option used), like this: 124 | 125 | git push origin REMOTE_BRANCH 126 | 127 | Using git-request-pull 128 | ---------------------- 129 | 130 | (Skip this if you're using Github pull requests.) 131 | 132 | Usage: 133 | 134 | git request-pull START_COMMIT REPOSITORY_URL END_COMMIT 135 | 136 | To summarize a branch changes: 137 | 138 | git request-pull abcd https://github.com/USER_NAME/ksm HEAD 139 | 140 | Which will summarize changes from commit `abcd` to `HEAD` of which you can then 141 | e-mail me that summary. 142 | 143 | You can also use: 144 | 145 | git request-pull master https://github.com/USER_NAME/ksm LOCAL_BRANCH:REMOTE_BRANCH 146 | 147 | Which will summarize changes from the local master branch 148 | Which is a "diff" between my tree and yours. 149 | 150 | `REMOTE_BRANCH` can be omitted if same as `LOCAL_BRANCH`. 151 | You can also specify a tag of your choice, in that case, use tag names instead 152 | of commit hashes/branch names. 153 | 154 | -------------------------------------------------------------------------------- /Documentation/TODO.rst: -------------------------------------------------------------------------------- 1 | TODO / In development 2 | --------------------- 3 | 4 | - APIC virtualization (Partially implemented, needs testing & fixes) 5 | - UEFI support 6 | - Intel TXT support 7 | - AMD-V with NPT support 8 | - Nesting support (Some fixes needed and support for minor features) 9 | - Introspection 10 | - More documentation 11 | - Finish writing tests 12 | - Failsafe state (e.g. when an unexpected thing happens, turn off and restore state to a valid one.) 13 | 14 | See also Github issues. Some of these features are unfortunately not 15 | (fully) implemented due to lack of hardware (support) or similar. 16 | 17 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 7 | 8 | ### Type of this issue (please specify) 9 | 10 | - [ ] This is a bug in the upstream tree as-is unmodified. 11 | - [ ] This is a support matter (i.e. your own modified tree) 12 | - [ ] This is a technical question 13 | 14 | ### System information 15 | 16 | 1. CPU: WRITE_HERE (Codename: AND_HERE) 17 | 2. Kernel: WRITE_HERE 18 | 3. Kernel version: WRITE_HERE Build number: AND_HERE 19 | 20 | ### Build Configuration 21 | 22 | - EPAGE_HOOK 23 | - PMEM_SANDBOX 24 | - ... 25 | 26 | **REMOVE IF UNRELATED** 27 | 28 | ### Issue description 29 | 30 | Write as much as you can, reference the files that you uploaded in the uploaded 31 | section with their respective numbers instead of names, e.g. [1] then it refers 32 | to #1 file in "Files" section. 33 | 34 | ### Files 35 | 36 | Crash related files, etc, remove this text and fill the lines with something if 37 | any, otherwise completely remove this section. 38 | 39 | 1. FILE 1 (DESC IF POSSIBLE): https://downloadlinkhere/ 40 | 2. FILE 2 (DESC IF POSSIBLE): https://heregoesthedownloadlink/ 41 | 3. and so on. 42 | 43 | Please refer to each file uploaded in this section in the Issue description 44 | part, by using [x] where x is the number of the file here. 45 | 46 | If it's a crash you'll need to upload: 47 | 48 | #### For Windows 49 | 50 | - A minidump (C:\windows\minidump) or a memory dump (C:\windows\memory.dmp). Former prefered. 51 | - The compiled .sys and the .pdb/.dbg file 52 | - The Kernel executable if possible, e.g. ntoskrnl.exe from C:\Windows\System32 53 | 54 | #### For Linux 55 | 56 | - `ksmlinux.ko` and `ksmlinux.o` 57 | - Stack dump from dmesg or kernel panic 58 | 59 | **REMOVE IF NOTHING PROVIDED.** 60 | 61 | ### Stack Trace 62 | 63 | Paste stack trace text here if any. **REMOVE IF NOTHING PROVIDED.** 64 | 65 | ### Current Behavior 66 | 67 | Replace this body with your own, provide what the current behavior is. **REMOVE 68 | IF UNRELATED.** 69 | 70 | ### Expected Behavior 71 | 72 | Replace this body with your own, provide what you're expecting. **REMOVE IF 73 | UNRELATED.** 74 | 75 | ### Inline code / patches to be used when reproducing 76 | 77 | Please only use this if the code really is inline, i.e. up to 40 lines of code 78 | here, otherwise upload a patch via the Uploaded files 79 | 80 | **REMOVE IF NOTHING PROVIDED.** 81 | 82 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # ksm - a really simple and fast x64 hypervisor 3 | # Copyright (C) 2016, 2017 Ahmed Samy 4 | # 5 | # Makefile for the Linux kernel module only. 6 | # 7 | # This program is free software; you can redistribute it and/or modify it 8 | # under the terms and conditions of the GNU General Public License, 9 | # version 2, as published by the Free Software Foundation. 10 | # 11 | # This program is distributed in the hope it will be useful, but WITHOUT 12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 | # more details. 15 | # 16 | # You should have received a copy of the GNU General Public License along with 17 | # this program; If not, see . 18 | obj-m += ksmlinux.o 19 | ksmlinux-objs := exit.o hotplug.o introspect.o ksm.o sandbox.o \ 20 | epage.o resubv.o vcpu.o mm.o main_linux.o vmx.o 21 | ccflags-y := -Wno-unused-variable -Wno-declaration-after-statement -Wno-unused-function \ 22 | -DDBG -DENABLE_PRINT -std=gnu99 23 | 24 | UM_SRC := um/um.c 25 | UM_BIN := a.out 26 | 27 | BIN := ksmlinux.ko 28 | KVERSION := $(shell uname -r) 29 | KDIR := /lib/modules/$(KVERSION) 30 | KBUILD := $(KDIR)/build 31 | PWD := $(shell pwd) 32 | MAKEFLAGS += --no-print-directory 33 | 34 | all: 35 | @make -C $(KBUILD) M=$(PWD) modules 36 | @$(CC) $(UM_SRC) -o $(UM_BIN) 37 | @echo " CC $(UM_SRC)" 38 | 39 | umk: 40 | @$(CC) $(UM_SRC) -o $(UM_BIN) 41 | @echo " CC $(UM_SRC)" 42 | 43 | dri: 44 | @make -C $(KBUILD) M=$(PWD) modules 45 | 46 | clean: 47 | @make -C $(KBUILD) M=$(PWD) clean 48 | @$(RM) $(UM_BIN) 49 | @echo " CLEAN $(UM_BIN)" 50 | 51 | install: $(BIN) 52 | @cp $(BIN) $(KDIR) 53 | 54 | load: 55 | @echo Loading $(BIN) 56 | @insmod $(BIN) 57 | 58 | unload: 59 | @echo Unloading $(BIN) 60 | @rmmod $(BIN) 61 | -------------------------------------------------------------------------------- /Makefile.windows: -------------------------------------------------------------------------------- 1 | # 2 | # ksm - a really simple and fast x64 hypervisor 3 | # Copyright (C) 2016, 2017 Ahmed Samy 4 | # 5 | # Makefile for MinGW and others. 6 | # To cross-compile, pass C=1: 7 | # make C=1 8 | # to compile under native MinGW: 9 | # mingw32-make 10 | # 11 | # This program is free software; you can redistribute it and/or modify it 12 | # under the terms and conditions of the GNU General Public License, 13 | # version 2, as published by the Free Software Foundation. 14 | # 15 | # This program is distributed in the hope it will be useful, but WITHOUT 16 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 18 | # more details. 19 | # 20 | # You should have received a copy of the GNU General Public License along with 21 | # this program; If not, see . 22 | # See if we are cross compiling. 23 | ifeq ("$(origin C)", "command line") 24 | CROSS_BUILD ?= x86_64-w64-mingw32- 25 | CROSS_INC ?= /usr/x86_64-w64-mingw32/include/ddk 26 | CROSS_LIB ?= /usr/x86_64-w64-mingw32/lib 27 | endif 28 | 29 | PREPEND.1 := @ 30 | PREPEND := $(PREPEND.$(Q)) 31 | 32 | CC = $(CROSS_BUILD)gcc 33 | STRIP = $(CROSS_BUILD)strip 34 | OBJCOPY = $(CROSS_BUILD)objcopy --only-keep-debug 35 | 36 | # Windows versions: 37 | # 0x0601 = Windows 7 38 | # 0x0602 = Windows 8 39 | # 0x0603 = Windows 8.1 40 | # 0x0A00 = Windows 10 41 | WINVER ?= 0x0A00 42 | 43 | DEPFLAGS = -MT $@ -MMD -MP -MF $(DEP_DIR)/$*.d 44 | DBGFLAGS = -DDBG -O2 45 | CFLAGS = -I$(CROSS_INC) $(DBGFLAGS) -D_WIN32_WINNT=$(WINVER) -DENABLE_DBGPRINT \ 46 | -std=c99 -Wno-multichar -municode -fno-stack-protector -fms-extensions -fno-stack-check \ 47 | -mno-stack-arg-probe -fno-asynchronous-unwind-tables -pipe 48 | AFLAGS = $(CFLAGS) -D__ASSEMBLY__ 49 | LDFLAGS = -shared -Wl,--subsystem,native -Wl,--dynamicbase -Wl,--stack=0x6000 \ 50 | -Wl,--file-alignment,0x1000 -Wl,--section-alignment,0x1000 \ 51 | -Wl,--entry,DriverEntry -Wl,--nxcompat -Wl,--exclude-all-symbols \ 52 | -Wl,--enable-stdcall-fixup -nostartfiles -nostdlib 53 | LIBS = -L$(CROSS_LIB) -lntoskrnl -lhal -lmingwex 54 | 55 | UM_SRC = um/um.c 56 | UM_BIN = a.out 57 | UM_LIB = -lntdll 58 | 59 | SRC = exit.c hotplug.c introspect.c ksm.c sandbox.c mm.c \ 60 | main_nt.c epage.c print.c resubv.c vcpu.c 61 | ASM = vmx.S 62 | 63 | BIN_DIR ?= bin 64 | OBJ_DIR ?= obj 65 | DEP_DIR ?= dep 66 | DEP = $(SRC:%.c=$(DEP_DIR)/%.d) 67 | OBJ = $(SRC:%.c=$(OBJ_DIR)/%.o) 68 | OBJ += $(ASM:%.S=$(OBJ_DIR)/%.o) 69 | 70 | TARGET = $(BIN_DIR)/ksm.sys 71 | SYMBOL = $(BIN_DIR)/ksm.sym 72 | 73 | .PHONY: all clean umk dri 74 | .PRECIOUS: $(DEP_DIR)/%.d 75 | 76 | all: $(TARGET) $(UM_BIN) 77 | clean: 78 | $(RM) $(UM_BIN) $(TARGET) $(SYMBOL) $(OBJ) $(DEP) 79 | umk: $(UM_BIN) 80 | dri: $(TARGET) 81 | 82 | $(TARGET): $(BIN_DIR) $(DEP_DIR) $(OBJ_DIR) $(OBJ) $(DEP) 83 | $(PREPEND)$(CC) $(LDFLAGS) -o $@ $(OBJ) $(LIBS) 84 | $(PREPEND)$(OBJCOPY) $@ $(SYMBOL) 85 | $(PREPEND)$(STRIP) $@ 86 | @echo " LD $@" 87 | @echo " All done, install pefile then" 88 | @echo " Run make_nonpageable.py on $@" 89 | 90 | $(UM_BIN): 91 | $(PREPEND)$(CC) $(UM_SRC) -o $(UM_BIN) $(UM_LIB) 92 | @echo " LD $@" 93 | 94 | $(OBJ_DIR)/%.o: %.c $(DEP_DIR)/%.d 95 | $(PREPEND)$(CC) -c $(CFLAGS) $(DEPFLAGS) -o $@ $< 96 | @echo " CC $@" 97 | 98 | $(OBJ_DIR)/%.o: %.S $(DEP_DIR)/%.d 99 | $(PREPEND)$(CC) -c $(AFLAGS) $(DEPFLAGS) -o $@ $< 100 | @echo " AS $@" 101 | 102 | -include $(DEP) 103 | $(DEP_DIR)/%.d: ; 104 | 105 | $(DEP_DIR): 106 | @mkdir -p $(DEP_DIR) 107 | 108 | $(OBJ_DIR): 109 | @mkdir -p $(OBJ_DIR) 110 | 111 | $(BIN_DIR): 112 | @mkdir -p $(BIN_DIR) 113 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ksm v1.6-dev [![Build Status](https://img.shields.io/travis/asamy/ksm/master.svg?style=flat-square&label=Linux)](https://travis-ci.org/asamy/ksm) [![Build Status](https://img.shields.io/appveyor/ci/asamy/ksm/master.svg?style=flat-square&label=Windows)](https://ci.appveyor.com/project/asamy/ksm) Coverity Scan Build Status [![BountySource](https://www.bountysource.com/badge/team?team_id=189129&style=raised)](https://www.bountysource.com/teams/ksm?utm_source=ksm&utm_medium=shield&utm_campaign=raised) 2 | 3 | A really simple and lightweight x64 hypervisor written in C for Intel processors. 4 | KSM has a self-contained physical memory introspection engine and userspace physical 5 | memory virtualization which can be enabled at compiletime. 6 | 7 | Currently, KSM runs on Windows and Linux kernels natively, and aims to support 8 | macOS by 2017, if you want to port KSM see `Documentation/SPEC.rst` for more information. 9 | 10 | **Note**: You can find Windows 10 precompiled binaries [here](https://ci.appveyor.com/project/asamy/ksm). 11 | 12 | ## Purpose 13 | 14 | Unlike other hypervisors (e.g. KVM, XEN, etc.), KSM's purpose is not to run 15 | other Operating Systems, instead, KSM can be used as an extra layer of 16 | protection to the existing running OS. This type of virtualization is usually 17 | seen in Anti-viruses, or sandboxers or even Viruses. KSM also supports 18 | nesting, that means it can emulate other hardware-assisted virtualization tools 19 | (VT-x). 20 | 21 | ## Usage under Linux (+sandbox) 22 | 23 | [![asciicast](https://asciinema.org/a/10cu6v7c6l0j4532cww8tq1a1.png)](https://asciinema.org/a/10cu6v7c6l0j4532cww8tq1a1) 24 | 25 | ## Features 26 | 27 | - IDT Shadowing 28 | - EPT violation #VE (Disabled when unavailable - At least Broadwell required) 29 | - EPTP switching VMFUNC (Emulated when unavailable - At least Haswell required) 30 | - APIC virtualization (Experimental, do not use) 31 | - VMX Nesting (Experimental, do not use) 32 | - Builtin Userspace physical memory sandboxer (Optional) 33 | - Builtin Introspection engine (Optional) 34 | 35 | ## Requirements 36 | 37 | - An Intel processor (with VT-x and EPT support) 38 | - A working C compiler (GCC or Microsoft compiler aka CL are supported) 39 | 40 | ## Supported Kernels 41 | 42 | - Windows NT kernel (7/8/8.1/10) 43 | - Linux kernel (tested under 3.16, 4.8.13 and mainline) 44 | 45 | ## Documentation 46 | 47 | - [Building](https://github.com/asamy/ksm/blob/master/Documentation/BUILDING.rst) 48 | - [Contributions](https://github.com/asamy/ksm/blob/master/Documentation/CONTRIBUTIONS.rst) 49 | - [Specification](https://github.com/asamy/ksm/blob/master/Documentation/SPEC.rst) 50 | - [TODO](https://github.com/asamy/ksm/blob/master/Documentation/TODO.rst) 51 | 52 | ## Module integration 53 | 54 | Few modular examples are included to illustrate usage, those are: 55 | 56 | - `epage.c` - A shadow executale page hooking mechanism using multiple EPTP. 57 | - `introspect.c` - A small and stupid physical memory introspection engine using EPT. 58 | - `sandbox.c` - A small, incomplete and simple userspace physical memory sandbox. 59 | 60 | See Documentation/BUILDING.rst on how to enable those modules while building. 61 | 62 | ## Issues (bugs, features, etc.) 63 | 64 | Feel free to use Github Issues, there is an Issue Template to help you file 65 | things as required. 66 | 67 | ## References 68 | 69 | - Linux kernel (KVM) 70 | - HyperPlatform 71 | - XEN 72 | 73 | ## License 74 | 75 | GPL v2, see LICENSE file. Note that some code is thirdparty, respective 76 | licenses and/or copyright should be there, if you think otherwise, feel free to mail me. 77 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-hacker -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | os: Visual Studio 2015 2 | 3 | branches: 4 | only: 5 | - master 6 | 7 | platform: 8 | - x64 9 | 10 | configuration: 11 | - Debug 12 | - Release 13 | 14 | matrix: 15 | fast_finish: true 16 | 17 | artifacts: 18 | - path: ksm\x64\**\ksm.sys 19 | - path: ksm\x64\**\ksm.pdb 20 | - path: ksm\x64\**\ksm_um.exe 21 | -------------------------------------------------------------------------------- /bitmap.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * A simple bitmap to easily manage large bitmaps. Bitmaps can be 6 | * 32-bit or 64-bit, 64-bit if on GCC or so, because unsigned long is 7 | * 64-bit there, however, MSVC treats unsigned long as 32-bit, so each 8 | * entry can handle up to that and is determined via BITMAP_NBITS. 9 | * 10 | * For usage examples, see ksm.c: init_msr_bitmaps() / init_io_bitmaps(). 11 | * Those initialize the MSR/IO bitmaps required for the VMM to run e.g. 12 | * nested VMMs, etc. 13 | * 14 | * Some functions from the Linux kernel bitmap implementation: 15 | * lib/find_bit.c 16 | * 17 | * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. 18 | * Written by David Howells (dhowells@redhat.com) 19 | * 20 | * Rewritten by Yury Norov to decrease 21 | * size and improve performance, 2015. 22 | * 23 | * This program is free software; you can redistribute it and/or 24 | * modify it under the terms of the GNU General Public License 25 | * as published by the Free Software Foundation; either version 26 | * 2 of the License, or (at your option) any later version. 27 | */ 28 | #ifndef __BITMAP_H 29 | #define __BITMAP_H 30 | 31 | #ifndef __linux__ 32 | #ifndef CHAR_BIT 33 | #define CHAR_BIT 8 34 | #endif 35 | 36 | #define BITMAP_BITS (sizeof(unsigned long) * CHAR_BIT) 37 | #define BIT_MASK(nr) (1UL << ((nr) % BITMAP_BITS)) 38 | #define BIT_WORD(nr) ((nr) / BITMAP_BITS) 39 | #define DECLARE_BITMAP(name, bits) \ 40 | unsigned long name[DIV_ROUND_UP(bits, BITMAP_BITS)] 41 | 42 | static inline void set_bit(unsigned long nr, unsigned long *bmp) 43 | { 44 | bmp[BIT_WORD(nr)] |= BIT_MASK(nr); 45 | } 46 | 47 | static inline void clear_bit(unsigned long nr, unsigned long *bmp) 48 | { 49 | bmp[BIT_WORD(nr)] &= ~BIT_MASK(nr); 50 | } 51 | 52 | static inline bool test_bit(unsigned long nr, volatile const unsigned long *bmp) 53 | { 54 | return !!(bmp[BIT_WORD(nr)] & BIT_MASK(nr)); 55 | } 56 | 57 | static inline unsigned long count_bits(unsigned long count) 58 | { 59 | return DIV_ROUND_UP(count, BITMAP_BITS) * sizeof(unsigned long); 60 | } 61 | 62 | static inline void fill_bits(unsigned long *bmp, unsigned char bits, unsigned long count) 63 | { 64 | memset(bmp, bits, count_bits(count)); 65 | } 66 | 67 | static inline void clear_bits(unsigned long *bmp, unsigned long count) 68 | { 69 | return fill_bits(bmp, 0, count); 70 | } 71 | 72 | static inline unsigned long __ffs(unsigned long x) 73 | { 74 | #ifdef _MSC_VER 75 | unsigned long i; 76 | _BitScanForward(&i, x); 77 | return i; 78 | #else 79 | __asm __volatile("rep; bsf %1, %0" 80 | : "=r" (x) 81 | : "rm" (x)); 82 | return x; 83 | #endif 84 | } 85 | 86 | static inline unsigned long __ffz(unsigned long x) 87 | { 88 | return __ffs(~x); 89 | } 90 | 91 | static inline unsigned long __ffs64(unsigned long long x) 92 | { 93 | #ifdef _MSC_VER 94 | unsigned long i; 95 | _BitScanForward64(&i, x); 96 | return i; 97 | #else 98 | return __ffs(x); 99 | #endif 100 | } 101 | 102 | static inline unsigned long find_first_bit(unsigned long *bmp, unsigned long size) 103 | { 104 | unsigned long i; 105 | 106 | for (i = 0; i * BITMAP_BITS < size; ++i) 107 | if (bmp[i]) 108 | return min(i * BITMAP_BITS + __ffs(bmp[i]), size); 109 | 110 | return size; 111 | } 112 | 113 | static inline unsigned long find_first_zero_bit(unsigned long *bmp, unsigned long size) 114 | { 115 | unsigned long i; 116 | 117 | for (i = 0; i * BITMAP_BITS < size; ++i) 118 | if (bmp[i] != ~0UL) 119 | return min(i * BITMAP_BITS + __ffz(bmp[i]), size); 120 | 121 | return size; 122 | } 123 | 124 | #endif 125 | #endif 126 | -------------------------------------------------------------------------------- /compiler.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * Define UM before including this file to eliminate unneeded 6 | * definitions. 7 | * 8 | * Public domain. 9 | */ 10 | #ifndef __COMPILER_H 11 | #define __COMPILER_H 12 | 13 | #include "inttypes.h" 14 | 15 | #ifdef DBG 16 | #ifdef __linux__ 17 | #define BREAK() (void)0 //__asm __volatile("int $3") 18 | #else 19 | #define BREAK() do { \ 20 | if (KD_DEBUGGER_ENABLED && !KD_DEBUGGER_NOT_PRESENT) \ 21 | __debugbreak(); \ 22 | } while (0) 23 | #endif 24 | #else 25 | #define BREAK() (void)0 26 | #endif 27 | 28 | #define BREAK_ON(cond) do { \ 29 | if (!!(cond)) \ 30 | BREAK(); \ 31 | } while (0) 32 | 33 | #ifndef __linux__ 34 | /* Windows definitions */ 35 | #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) 36 | 37 | /* Sync with Linux */ 38 | #define BUG_ON(cond) do { \ 39 | if (!!(cond)) \ 40 | __ud2(); \ 41 | } while (0) 42 | 43 | #define WARN_ON(cond) do { \ 44 | if (!!(cond)) \ 45 | KSM_DEBUG("Badness in %s at %s:%d\n", __func__, __FILE__, __LINE__); \ 46 | } while (0) 47 | 48 | /* Don't expose this to the assembler, it won't understand it. */ 49 | #ifndef __ASSEMBLY__ 50 | /* Long names */ 51 | typedef unsigned char uint8_t; 52 | typedef unsigned short uint16_t; 53 | typedef unsigned long int uint32_t; 54 | typedef unsigned long long uint64_t; 55 | 56 | typedef signed char sint8_t; 57 | typedef signed short sint16_t; 58 | typedef signed long int sint32_t; 59 | typedef signed long long sint64_t; 60 | 61 | /* Short names */ 62 | typedef uint8_t u8; 63 | typedef uint16_t u16; 64 | typedef uint32_t u32; 65 | typedef uint64_t u64; 66 | 67 | typedef sint8_t s8; 68 | typedef sint16_t s16; 69 | typedef sint32_t s32; 70 | typedef sint64_t s64; 71 | 72 | #ifndef _UINTPTR_T_DEFINED 73 | typedef unsigned long long uintptr_t; 74 | #define _UINTPTR_T_DEFINED 75 | #endif 76 | 77 | #ifndef _INTPTR_T_DEFINED 78 | typedef signed long long intptr_t; 79 | #define _INTPTR_T_DEFINED 80 | #endif 81 | 82 | #ifndef _bool_true_false_are_defined 83 | typedef _Bool bool; 84 | #define true 1 85 | #define false 0 86 | #define _bool_true_false_are_defined 87 | #endif 88 | 89 | /* Avoid NT retardism */ 90 | #define container_of(address, type, field) CONTAINING_RECORD(address, type, field) 91 | /* OTOH - MSVC does not have typeof. Hack it. */ 92 | #define container_off_var(var, member) \ 93 | ((char *)&(var)->member - (char *)(var)) 94 | #define container_of_var(ptr, var, member) \ 95 | ((char *)ptr - container_off_var(var, member)) 96 | 97 | #ifndef UM 98 | #include "list.h" 99 | 100 | typedef KSPIN_LOCK spinlock_t; 101 | #define spin_lock_init KeInitializeSpinLock 102 | #define spin_lock(s) \ 103 | KLOCK_QUEUE_HANDLE q; \ 104 | KeAcquireInStackQueuedSpinLock((s), &q) 105 | #define spin_unlock(s) \ 106 | KeReleaseInStackQueuedSpinLock(&(q)) 107 | #define spin_lock_irqsave(s,f) spin_lock((s)); (void)f 108 | #define spin_unlock_irqrestore(s,f) spin_unlock((s)); (void)f 109 | 110 | NTKERNELAPI UCHAR *NTAPI PsGetProcessImageFileName(PEPROCESS process); 111 | #endif 112 | #endif 113 | 114 | #ifdef _MSC_VER 115 | /* MSVC specific definitions */ 116 | /* Disable annoying warnings */ 117 | #pragma warning(disable:4115) /* 'type' : named type definition in parentheses */ 118 | #pragma warning(disable:4201) /* nonstandard extension used : nameless struct/union */ 119 | #pragma warning(disable:4214) /* nonstandard extension used : bit field types other than int */ 120 | #pragma warning(disable:4702) /* unreachable code */ 121 | #pragma warning(disable:4100) /* 'identifier' : unreferenced formal parameter */ 122 | #pragma warning(disable:4200) /* nonstandard extension used : zero-sized array in struct/union */ 123 | #pragma warning(disable:4098) /* 'function' : void function returning a value */ 124 | #pragma warning(disable:4204) /* nonstandard extension used : non-constant aggregate initializer */ 125 | #pragma warning(disable:4189) /* 'identifier' : local variable is initialized but not referenced */ 126 | #pragma warning(disable:4706) /* assignment within conditional expression */ 127 | #pragma warning(disable:4221) /* nonstandard extension used : 'identifier' : cannot be initialized using address of automatic variable */ 128 | #pragma warning(disable:4054) /* 'conversion' : from function pointer 'type1' to data pointer 'type2' */ 129 | 130 | #define __align(alignment) __declspec(align(alignment)) 131 | #define __packed 132 | #else 133 | /* GCC (Windows) specific definitions */ 134 | #define _In_ 135 | #define _In_opt_ 136 | #define __align(alignment) __attribute__((__aligned__(alignment))) 137 | #ifndef __forceinline 138 | #define __forceinline __attribute__((always_inline)) inline 139 | #endif 140 | #define __packed __attribute__((__packed__)) 141 | #include 142 | 143 | #define STATUS_HV_CPUID_FEATURE_VALIDATION_ERROR 0xC035003C 144 | #define STATUS_HV_NOT_ALLOWED_WITH_NESTED_VIRT_ACTIVE 0xC0350071 145 | #define STATUS_HV_FEATURE_UNAVAILABLE 0xC035001E 146 | #define STATUS_HV_ACCESS_DENIED 0xC0350006 147 | #define STATUS_HV_NOT_PRESENT 0xC0351000 148 | #endif 149 | 150 | #if defined(ENABLE_DBGPRINT) || defined(ENABLE_FILEPRINT) 151 | #define ENABLE_PRINT 152 | #endif 153 | #define ERR_NOTH STATUS_HV_NOT_PRESENT 154 | #define ERR_CPUID STATUS_HV_CPUID_FEATURE_VALIDATION_ERROR 155 | #define ERR_BUSY STATUS_DEVICE_BUSY 156 | #define ERR_FEAT STATUS_HV_FEATURE_UNAVAILABLE 157 | #define ERR_UNSUP STATUS_NOT_SUPPORTED 158 | #define ERR_DENIED STATUS_HV_ACCESS_DENIED 159 | #define ERR_NOMEM STATUS_NO_MEMORY 160 | #define ERR_EXCEPT GetExceptionCode() 161 | #define ERR_RANGE STATUS_BUFFER_OVERFLOW 162 | #define ERR_INVAL STATUS_INVALID_PARAMETER_1 163 | #define ERR_EXIST STATUS_ADDRESS_ALREADY_EXISTS 164 | #else 165 | /* Linux definitions */ 166 | #ifndef __ASSEMBLY__ 167 | #include 168 | #endif 169 | #include 170 | 171 | #define __align(alignment) __attribute__((__aligned__(alignment))) 172 | 173 | #define ERR_NOTH -ENOENT 174 | #define ERR_CPUID -EOPNOTSUPP 175 | #define ERR_BUSY -EBUSY 176 | #define ERR_FEAT -ENOENT 177 | #define ERR_UNSUP -EOPNOTSUPP 178 | #define ERR_DENIED -EACCES 179 | #define ERR_NOMEM -ENOMEM 180 | #define ERR_EXCEPT -EACCES 181 | #define ERR_RANGE -ERANGE 182 | #define ERR_INVAL -EINVAL 183 | #define ERR_EXIST -EEXIST 184 | #endif 185 | 186 | #ifndef __func__ 187 | #define __func__ __FUNCTION__ 188 | #endif 189 | 190 | #endif 191 | -------------------------------------------------------------------------------- /epage.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * Executable page hooking, see comments down below for more 6 | * information. 7 | * 8 | * This program is free software; you can redistribute it and/or modify it 9 | * under the terms and conditions of the GNU General Public License, 10 | * version 2, as published by the Free Software Foundation. 11 | * 12 | * This program is distributed in the hope it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 | * more details. 16 | * 17 | * You should have received a copy of the GNU General Public License along with 18 | * this program; If not, see . 19 | */ 20 | #ifdef EPAGE_HOOK 21 | #ifdef __linux__ 22 | #include 23 | #else 24 | #include 25 | #endif 26 | 27 | #include "ksm.h" 28 | #include "percpu.h" 29 | 30 | /*! 31 | * To use this interface, call ksm_hook_epage() on the target function, 32 | * e.g.: 33 | * \code 34 | * ksm_hook_epage(MmMapIoSpace, hkMmMapIoSpace); 35 | * \endcode 36 | * 37 | * And for original function call: 38 | * \code 39 | * vcpu_vmfunc(EPTP_NORMAL, 0); 40 | * void *ret = MmMapIoSpace(x, y, z); 41 | * vcpu_vmfunc(EPTP_EXHOOK, 0); 42 | * return ret; 43 | * \endcode 44 | */ 45 | void ksm_handle_epage(struct vcpu *vcpu, struct epage_info *epage) 46 | { 47 | struct ept *ept = &vcpu->ept; 48 | struct ksm *k = vcpu_to_ksm(vcpu); 49 | int mt = ept_memory_type(k, epage->dpa); 50 | 51 | /* Called from vmcall (exit.c) */ 52 | ept_alloc_page(EPT4(ept, EPTP_EXHOOK), EPT_ACCESS_EXEC, mt, epage->dpa, epage->cpa); 53 | ept_alloc_page(EPT4(ept, EPTP_RWHOOK), EPT_ACCESS_RW, mt, epage->dpa, epage->dpa); 54 | ept_alloc_page(EPT4(ept, EPTP_NORMAL), EPT_ACCESS_ALL, mt, epage->dpa, epage->dpa); 55 | 56 | cpu_invvpid(k, epage->origin); 57 | cpu_invept(k, epage->dpa, EPTP(ept, vcpu_eptp_idx(vcpu))); 58 | } 59 | 60 | void ksm_handle_epage_ve(struct epage_info *epage, struct ept_ve_around *ve) 61 | { 62 | /* called from an EPT violation */ 63 | if (ve->info->exit & EPT_ACCESS_RW) 64 | ve->eptp_next = EPTP_RWHOOK; 65 | else 66 | ve->eptp_next = EPTP_EXHOOK; 67 | } 68 | 69 | #ifndef __linux__ 70 | #include 71 | #endif 72 | struct trampoline { 73 | u8 push; 74 | u32 lo; 75 | u32 mov; 76 | u32 hi; 77 | u32 ret; 78 | } __packed; 79 | #ifndef __linux__ 80 | #include 81 | #endif 82 | 83 | static void epage_init_trampoline(struct trampoline *trampo, u64 to) 84 | { 85 | // push lo 86 | // mov dword ptr [rsp + 0x4], hi 87 | // ret 88 | trampo->push = 0x68; 89 | trampo->lo = (u32)to; 90 | trampo->mov = 0x042444C7; 91 | trampo->hi = to >> 32; 92 | trampo->ret = 0xC3; 93 | } 94 | 95 | static DEFINE_DPC(__do_hook_page, __vmx_vmcall, HCALL_HOOK, ctx); 96 | static DEFINE_DPC(__do_unhook_page, __vmx_vmcall, HCALL_UNHOOK, ctx); 97 | 98 | /* 99 | * Note!!! 100 | * This function is not very robust, e.g. pages that are not 101 | * physically contiguous will cause havoc, on the Linux kernel 102 | * this can be a problem when hooking kernel pages, specfiically 103 | * module pages as those are allocated using vmalloc() and are not 104 | * physically contiguous, so be careful. 105 | * 106 | * On windows, kernel pages are always physically contiguous unless they are 107 | * in the PAGE section, so this will handle most cases. 108 | * 109 | * On windows, you can lock pages using: 110 | * \code 111 | * PMDL mdl = IoAllocateMdl(original, PAGE_SIZE, FALSE, FALSE, NULL); 112 | * MmProbeAndLockPages(mdl, KernelMode, IoReadAccess); 113 | * \endcode 114 | * 115 | * Then unlock in unhook: 116 | * \code 117 | * MmUnlockPages(mdl); 118 | * IoFreeMdl(mdl); 119 | * \endcode 120 | * 121 | * Notes on hooking out-of-kernel pages (e.g. userspace pages or similar): 122 | * 123 | * When hooking a userspace specific function, you should first attach to that 124 | * specific process (if not already), to make sure that the current CR3 is 125 | * updated. Also do note that userspace pages tend to be paged out all the 126 | * time, so the above notes also apply. 127 | * 128 | * Do also note the inline-code provided above is not tested, but should work. 129 | */ 130 | struct epage_info *ksm_prepare_epage(void *original, void *redirect, bool *exist) 131 | { 132 | struct epage_info *epage; 133 | u8 *code_page; 134 | void *aligned = (void *)page_align(original); 135 | uintptr_t code_start = (uintptr_t)original - (uintptr_t)aligned; 136 | struct trampoline trampo; 137 | 138 | BUG_ON(!ksm); 139 | epage_init_trampoline(&trampo, (uintptr_t)redirect); 140 | epage = ksm_find_epage(ksm, __pa(original)); 141 | if (epage) { 142 | /* 143 | * Hooking another function in same page. 144 | * 145 | * Simply just overwrite the start of the 146 | * function to the trampoline... 147 | */ 148 | *exist = true; 149 | code_page = epage->c_va; 150 | memcpy(code_page + code_start, &trampo, sizeof(trampo)); 151 | return epage; 152 | } 153 | 154 | epage = mm_alloc_pool(sizeof(*epage)); 155 | if (!epage) 156 | return NULL; 157 | 158 | code_page = mm_alloc_page(); 159 | if (!code_page) { 160 | mm_free_pool(epage, sizeof(*epage)); 161 | return NULL; 162 | } 163 | 164 | memcpy(code_page, aligned, PAGE_SIZE); 165 | memcpy(code_page + code_start, &trampo, sizeof(trampo)); 166 | 167 | epage->c_va = code_page; 168 | epage->cpa = __pa(code_page); 169 | epage->dpa = __pa(original); 170 | epage->origin = (u64)aligned; 171 | return epage; 172 | } 173 | 174 | int ksm_hook_epage_on_cpu(struct epage_info *epage, int cpu) 175 | { 176 | CALL_DPC_ON_CPU(cpu, __do_hook_page, return -1, epage); 177 | return DPC_RET(); 178 | } 179 | 180 | int ksm_hook_epage(void *original, void *redirect) 181 | { 182 | struct epage_info *epage; 183 | bool exist = false; 184 | 185 | epage = ksm_prepare_epage(original, redirect, &exist); 186 | if (!epage) 187 | return ERR_NOMEM; 188 | 189 | if (exist) 190 | return 0; 191 | 192 | CALL_DPC(__do_hook_page, epage); 193 | spin_lock(&ksm->epage_lock); 194 | list_add_tail(&epage->link, &ksm->epage_list); 195 | spin_unlock(&ksm->epage_lock); 196 | return DPC_RET(); 197 | } 198 | 199 | static inline void ksm_free_epage(struct epage_info *epage) 200 | { 201 | mm_free_page(epage->c_va); 202 | mm_free_pool(epage, sizeof(*epage)); 203 | } 204 | 205 | int __ksm_unhook_epage(struct epage_info *epage) 206 | { 207 | CALL_DPC(__do_unhook_page, (void *)epage->dpa); 208 | spin_lock(&ksm->epage_lock); 209 | list_del(&epage->link); 210 | spin_unlock(&ksm->epage_lock); 211 | ksm_free_epage(epage); 212 | return DPC_RET(); 213 | } 214 | 215 | int ksm_unhook_epage(struct ksm *k, void *va) 216 | { 217 | struct epage_info *epage = ksm_find_epage(k, __pa(va)); 218 | if (!epage) 219 | return ERR_NOTH; 220 | 221 | return __ksm_unhook_epage(epage); 222 | } 223 | 224 | struct epage_info *ksm_find_epage(struct ksm *k, uintptr_t gpa) 225 | { 226 | struct epage_info *epage = NULL; 227 | struct epage_info *ret = NULL; 228 | 229 | spin_lock(&k->epage_lock); 230 | list_for_each_entry(epage, &k->epage_list, link) 231 | { 232 | if (epage->dpa >> PAGE_SHIFT == gpa >> PAGE_SHIFT) { 233 | ret = epage; 234 | break; 235 | } 236 | } 237 | 238 | spin_unlock(&k->epage_lock); 239 | return ret; 240 | } 241 | 242 | int ksm_epage_init(struct ksm *k) 243 | { 244 | INIT_LIST_HEAD(&k->epage_list); 245 | spin_lock_init(&k->epage_lock); 246 | return 0; 247 | } 248 | 249 | int ksm_epage_exit(struct ksm *k) 250 | { 251 | struct epage_info *epage = NULL; 252 | struct epage_info *next; 253 | 254 | list_for_each_entry_safe(epage, next, &k->epage_list, link) 255 | ksm_free_epage(epage); 256 | 257 | return 0; 258 | } 259 | 260 | #endif 261 | -------------------------------------------------------------------------------- /hotplug.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * Public domain 6 | */ 7 | #ifdef __linux__ 8 | #include 9 | #else 10 | #include 11 | #endif 12 | 13 | #include "ksm.h" 14 | #include "compiler.h" 15 | 16 | #ifdef __linux__ 17 | static inline void do_cpu(void *v) 18 | { 19 | int(*f) (struct ksm *) = v; 20 | int ret = f(ksm); 21 | 22 | KSM_DEBUG("On CPU calling %d\n", ret); 23 | } 24 | 25 | static int ksm_hotplug_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) 26 | { 27 | unsigned long cpu = (unsigned long)hcpu; 28 | 29 | KSM_DEBUG("CPU %ld action: %ld\n", cpu, action); 30 | switch (action) { 31 | case CPU_ONLINE: 32 | case CPU_ONLINE_FROZEN: 33 | get_online_cpus(); 34 | if (cpu_online(cpu)) 35 | smp_call_function_single(cpu, do_cpu, __ksm_init_cpu, 1); 36 | put_online_cpus(); 37 | break; 38 | case CPU_DOWN_PREPARE: 39 | case CPU_DYING: 40 | case CPU_DYING_FROZEN: 41 | smp_call_function_single(cpu, do_cpu, __ksm_exit_cpu, 1); 42 | break; 43 | } 44 | 45 | return NOTIFY_OK; 46 | } 47 | 48 | static struct notifier_block cpu_notify = { 49 | .notifier_call = ksm_hotplug_cpu 50 | }; 51 | 52 | int register_cpu_callback(void) 53 | { 54 | register_hotcpu_notifier(&cpu_notify); 55 | return 0; 56 | } 57 | 58 | void unregister_cpu_callback(void) 59 | { 60 | unregister_hotcpu_notifier(&cpu_notify); 61 | } 62 | #else 63 | static void *hotplug_cpu; 64 | 65 | static void ksm_hotplug_cpu(void *ctx, PKE_PROCESSOR_CHANGE_NOTIFY_CONTEXT change_ctx, PNTSTATUS op_status) 66 | { 67 | /* CPU Hotplug callback, a CPU just came online. */ 68 | GROUP_AFFINITY affinity; 69 | GROUP_AFFINITY prev; 70 | PPROCESSOR_NUMBER pnr; 71 | int status; 72 | 73 | if (change_ctx->State == KeProcessorAddCompleteNotify) { 74 | pnr = &change_ctx->ProcNumber; 75 | affinity.Group = pnr->Group; 76 | affinity.Mask = 1ULL << pnr->Number; 77 | KeSetSystemGroupAffinityThread(&affinity, &prev); 78 | 79 | KSM_DEBUG_RAW("New processor\n"); 80 | status = __ksm_init_cpu(ksm); 81 | if (!NT_SUCCESS(status)) 82 | *op_status = status; 83 | 84 | KeRevertToUserGroupAffinityThread(&prev); 85 | } 86 | } 87 | 88 | int register_cpu_callback(void) 89 | { 90 | hotplug_cpu = KeRegisterProcessorChangeCallback(ksm_hotplug_cpu, NULL, 0); 91 | if (!hotplug_cpu) 92 | return STATUS_UNSUCCESSFUL; 93 | 94 | return 0; 95 | } 96 | 97 | void unregister_cpu_callback(void) 98 | { 99 | KeDeregisterProcessorChangeCallback(hotplug_cpu); 100 | } 101 | 102 | #endif 103 | -------------------------------------------------------------------------------- /introspect.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * This program is free software; you can redistribute it and/or modify it 6 | * under the terms and conditions of the GNU General Public License, 7 | * version 2, as published by the Free Software Foundation. 8 | * 9 | * This program is distributed in the hope it will be useful, but WITHOUT 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 | * more details. 13 | * 14 | * You should have received a copy of the GNU General Public License along with 15 | * this program; If not, see . 16 | */ 17 | #ifdef INTROSPECT_ENGINE 18 | #ifdef __linux__ 19 | #include 20 | #include 21 | #include 22 | #else 23 | #include 24 | #include 25 | #endif 26 | 27 | #include "ksm.h" 28 | #include "percpu.h" 29 | #include "um/um.h" 30 | 31 | #define INTROSPECT_WATCH 1 32 | #define INTROSPECT_UNWATCH 2 33 | 34 | struct introspect_addr { 35 | u64 gpa; 36 | u64 gva; 37 | int access; 38 | int hits; 39 | int miss; 40 | struct list_head link; 41 | }; 42 | 43 | struct introspect_call { 44 | int type; 45 | struct introspect_addr *addr; 46 | }; 47 | static DEFINE_DPC(__call_introspect, __vmx_vmcall, HCALL_INTROSPECT, ctx); 48 | 49 | int ksm_introspect_init(struct ksm *k) 50 | { 51 | spin_lock_init(&k->watch_lock); 52 | INIT_LIST_HEAD(&k->watch_list); 53 | return 0; 54 | } 55 | 56 | int ksm_introspect_exit(struct ksm *k) 57 | { 58 | int r = 0; 59 | struct introspect_addr *addr = NULL; 60 | struct introspect_addr *next = NULL; 61 | 62 | list_for_each_entry_safe(addr, next, &k->watch_list, link) { 63 | list_del(&addr->link); 64 | __mm_free_pool(addr); 65 | } 66 | 67 | return r; 68 | } 69 | 70 | static inline struct introspect_addr *__find_watched_addr(struct ksm *k, u64 gpa) 71 | { 72 | struct introspect_addr *addr = NULL; 73 | struct introspect_addr *ret = NULL; 74 | 75 | list_for_each_entry(addr, &k->watch_list, link) { 76 | if (addr->gpa >> PAGE_SHIFT == gpa >> PAGE_SHIFT) { 77 | ret = addr; 78 | break; 79 | } 80 | } 81 | 82 | return ret; 83 | } 84 | 85 | static inline struct introspect_addr *find_watched_addr(struct ksm *k, u64 gpa) 86 | { 87 | struct introspect_addr *ret; 88 | spin_lock(&k->watch_lock); 89 | ret = __find_watched_addr(k, gpa); 90 | spin_unlock(&k->watch_lock); 91 | return ret; 92 | } 93 | 94 | bool ksm_introspect_handle_vmcall(struct vcpu *vcpu, uintptr_t arg) 95 | { 96 | struct ept *ept = &vcpu->ept; 97 | struct ksm *k = vcpu_to_ksm(vcpu); 98 | struct introspect_call *call; 99 | struct introspect_addr *addr; 100 | u64 *epte; 101 | int mt; 102 | 103 | call = (struct introspect_call *)arg; 104 | addr = call->addr; 105 | switch (call->type) { 106 | case INTROSPECT_WATCH: 107 | /* 108 | * ->access is what they want to monitor, so take those bits 109 | * out so we can trap on that access. 110 | */ 111 | mt = ept_memory_type(k, addr->gpa); 112 | epte = ept_alloc_page(EPT4(ept, EPTP_DEFAULT), 113 | addr->access ^ EPT_ACCESS_ALL, mt, addr->gpa, addr->gpa); 114 | if (!epte) 115 | return false; 116 | 117 | cpu_invept(k, addr->gpa, EPTP(ept, EPTP_DEFAULT)); 118 | return true; 119 | case INTROSPECT_UNWATCH: 120 | epte = ept_pte(EPT4(ept, EPTP_DEFAULT), addr->gpa); 121 | if (!epte) 122 | return false; 123 | 124 | __set_epte_ar(epte, EPT_ACCESS_ALL); 125 | cpu_invept(k, addr->gpa, EPTP(ept, EPTP_DEFAULT)); 126 | return true; 127 | default: 128 | KSM_DEBUG("unknown call type %d\n", call->type); 129 | break; 130 | } 131 | 132 | return false; 133 | } 134 | 135 | bool ksm_introspect_handle_ept(struct ept_ve_around *ve) 136 | { 137 | struct vcpu *vcpu; 138 | struct ve_except_info *info; 139 | struct introspect_addr *addr; 140 | struct ksm *k; 141 | struct ept *ept; 142 | u64 *epte; 143 | 144 | info = ve->info; 145 | vcpu = ve->vcpu; 146 | ept = &vcpu->ept; 147 | k = vcpu_to_ksm(vcpu); 148 | 149 | addr = find_watched_addr(k, info->gpa); 150 | WARN_ON(!addr); 151 | if (!addr) { 152 | /* This can happen? */ 153 | ve->eptp_next = EPTP_DEFAULT; 154 | return true; 155 | } 156 | 157 | epte = ept_pte(EPT4(ept, info->eptp), info->gpa); 158 | BUG_ON(!epte); 159 | 160 | if (info->exit & addr->access) { 161 | __set_epte_ar(epte, info->exit & EPT_AR_MASK); 162 | 163 | /* It's a hit only if the offset matches... */ 164 | if (addr_offset(info->gpa) >= addr_offset(addr->gpa)) { 165 | addr->hits++; 166 | KSM_DEBUG_RAW("Hit!\n"); 167 | } else { 168 | addr->miss++; 169 | KSM_DEBUG_RAW("Miss offset\n"); 170 | } 171 | } else { 172 | addr->miss++; 173 | __set_epte_ar(epte, addr->access ^ EPT_ACCESS_ALL); 174 | KSM_DEBUG_RAW("Miss!\n"); 175 | } 176 | 177 | KSM_DEBUG("Addr %p: %d hits %d miss\n", (void *)info->gpa, addr->hits, addr->miss); 178 | ve->invalidate = true; 179 | return true; 180 | } 181 | 182 | int ksm_introspect_start(struct ksm *k) 183 | { 184 | if (k->active_vcpus == 0) 185 | return ERR_NOTH; 186 | 187 | return vcpu_vmfunc(EPTP_DEFAULT, 0); 188 | } 189 | 190 | int ksm_introspect_stop(struct ksm *k) 191 | { 192 | if (k->active_vcpus == 0) 193 | return ERR_NOTH; 194 | 195 | return vcpu_vmfunc(EPTP_NORMAL, 0); 196 | } 197 | 198 | int ksm_introspect_add_watch(struct ksm *k, struct watch_ioctl *watch) 199 | { 200 | struct introspect_addr *addr; 201 | int r; 202 | 203 | addr = find_watched_addr(k, watch->addr); 204 | if (addr) 205 | return ERR_EXIST; 206 | 207 | addr = mm_alloc_pool(sizeof(*addr)); 208 | if (!addr) 209 | return ERR_NOMEM; 210 | 211 | addr->gpa = watch->addr; 212 | addr->access = watch->access; 213 | CALL_DPC(__call_introspect, &(struct introspect_call) { 214 | .type = INTROSPECT_WATCH, 215 | .addr = addr, 216 | }); 217 | r = DPC_RET(); 218 | if (r != 0) { 219 | __mm_free_pool(addr); 220 | return r; 221 | } 222 | 223 | spin_lock(&k->watch_lock); 224 | list_add(&addr->link, &k->watch_list); 225 | spin_unlock(&k->watch_lock); 226 | return 0; 227 | } 228 | 229 | int ksm_introspect_rem_watch(struct ksm *k, struct watch_ioctl *watch) 230 | { 231 | struct introspect_addr *addr; 232 | int ret = ERR_INVAL; 233 | 234 | addr = find_watched_addr(k, watch->addr); 235 | if (!addr) 236 | return ret; 237 | 238 | CALL_DPC(__call_introspect, &(struct introspect_call) { 239 | .type = INTROSPECT_UNWATCH, 240 | .addr = addr 241 | }); 242 | ret = DPC_RET(); 243 | 244 | spin_lock(&k->watch_lock); 245 | list_del(&addr->link); 246 | __mm_free_pool(addr); 247 | spin_unlock(&k->watch_lock); 248 | return ret; 249 | } 250 | 251 | int ksm_introspect_collect(struct ksm *k, struct watch_ioctl *watch) 252 | { 253 | struct introspect_addr *addr; 254 | void *v; 255 | 256 | addr = find_watched_addr(k, watch->addr); 257 | if (!addr) 258 | return ERR_INVAL; 259 | 260 | v = mm_remap(page_align(watch->addr), PAGE_SIZE); 261 | if (!v) 262 | return ERR_NOMEM; 263 | 264 | memcpy(watch->buf, v, PAGE_SIZE); 265 | mm_unmap(v, PAGE_SIZE); 266 | 267 | watch->hits = addr->hits; 268 | watch->miss = addr->miss; 269 | return 0; 270 | } 271 | 272 | #endif 273 | -------------------------------------------------------------------------------- /inttypes.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 1997-2001, 2004 Free Software Foundation, Inc. 2 | This file is part of the GNU C Library. 3 | 4 | The GNU C Library is free software; you can redistribute it and/or 5 | modify it under the terms of the GNU Lesser General Public 6 | License as published by the Free Software Foundation; either 7 | version 2.1 of the License, or (at your option) any later version. 8 | 9 | The GNU C Library is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | Lesser General Public License for more details. 13 | 14 | You should have received a copy of the GNU Lesser General Public 15 | License along with the GNU C Library; if not, write to the Free 16 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 17 | 02111-1307 USA. */ 18 | 19 | /* 20 | * ISO C99: 7.8 Format conversion of integer types 21 | */ 22 | 23 | #ifndef _INTTYPES_H 24 | #define _INTTYPES_H 1 25 | 26 | # define __PRI64_PREFIX "ll" 27 | #ifdef __linux__ 28 | # define __PRIPTR_PREFIX "l" 29 | #else 30 | # define __PRIPTR_PREFIX "ll" 31 | #endif 32 | 33 | /* Macros for printing format specifiers. */ 34 | 35 | /* Decimal notation. */ 36 | # define PRId8 "d" 37 | # define PRId16 "d" 38 | # define PRId32 "d" 39 | # define PRId64 __PRI64_PREFIX "d" 40 | 41 | # define PRIdLEAST8 "d" 42 | # define PRIdLEAST16 "d" 43 | # define PRIdLEAST32 "d" 44 | # define PRIdLEAST64 __PRI64_PREFIX "d" 45 | 46 | # define PRIdFAST8 "d" 47 | # define PRIdFAST16 __PRIPTR_PREFIX "d" 48 | # define PRIdFAST32 __PRIPTR_PREFIX "d" 49 | # define PRIdFAST64 __PRI64_PREFIX "d" 50 | 51 | 52 | # define PRIi8 "i" 53 | # define PRIi16 "i" 54 | # define PRIi32 "i" 55 | # define PRIi64 __PRI64_PREFIX "i" 56 | 57 | # define PRIiLEAST8 "i" 58 | # define PRIiLEAST16 "i" 59 | # define PRIiLEAST32 "i" 60 | # define PRIiLEAST64 __PRI64_PREFIX "i" 61 | 62 | # define PRIiFAST8 "i" 63 | # define PRIiFAST16 __PRIPTR_PREFIX "i" 64 | # define PRIiFAST32 __PRIPTR_PREFIX "i" 65 | # define PRIiFAST64 __PRI64_PREFIX "i" 66 | 67 | /* Octal notation. */ 68 | # define PRIo8 "o" 69 | # define PRIo16 "o" 70 | # define PRIo32 "o" 71 | # define PRIo64 __PRI64_PREFIX "o" 72 | 73 | # define PRIoLEAST8 "o" 74 | # define PRIoLEAST16 "o" 75 | # define PRIoLEAST32 "o" 76 | # define PRIoLEAST64 __PRI64_PREFIX "o" 77 | 78 | # define PRIoFAST8 "o" 79 | # define PRIoFAST16 __PRIPTR_PREFIX "o" 80 | # define PRIoFAST32 __PRIPTR_PREFIX "o" 81 | # define PRIoFAST64 __PRI64_PREFIX "o" 82 | 83 | /* Unsigned integers. */ 84 | # define PRIu8 "u" 85 | # define PRIu16 "u" 86 | # define PRIu32 "u" 87 | # define PRIu64 __PRI64_PREFIX "u" 88 | 89 | # define PRIuLEAST8 "u" 90 | # define PRIuLEAST16 "u" 91 | # define PRIuLEAST32 "u" 92 | # define PRIuLEAST64 __PRI64_PREFIX "u" 93 | 94 | # define PRIuFAST8 "u" 95 | # define PRIuFAST16 __PRIPTR_PREFIX "u" 96 | # define PRIuFAST32 __PRIPTR_PREFIX "u" 97 | # define PRIuFAST64 __PRI64_PREFIX "u" 98 | 99 | /* lowercase hexadecimal notation. */ 100 | # define PRIx8 "x" 101 | # define PRIx16 "x" 102 | # define PRIx32 "x" 103 | # define PRIx64 __PRI64_PREFIX "x" 104 | 105 | # define PRIxLEAST8 "x" 106 | # define PRIxLEAST16 "x" 107 | # define PRIxLEAST32 "x" 108 | # define PRIxLEAST64 __PRI64_PREFIX "x" 109 | 110 | # define PRIxFAST8 "x" 111 | # define PRIxFAST16 __PRIPTR_PREFIX "x" 112 | # define PRIxFAST32 __PRIPTR_PREFIX "x" 113 | # define PRIxFAST64 __PRI64_PREFIX "x" 114 | 115 | /* UPPERCASE hexadecimal notation. */ 116 | # define PRIX8 "X" 117 | # define PRIX16 "X" 118 | # define PRIX32 "X" 119 | # define PRIX64 __PRI64_PREFIX "X" 120 | 121 | # define PRIXLEAST8 "X" 122 | # define PRIXLEAST16 "X" 123 | # define PRIXLEAST32 "X" 124 | # define PRIXLEAST64 __PRI64_PREFIX "X" 125 | 126 | # define PRIXFAST8 "X" 127 | # define PRIXFAST16 __PRIPTR_PREFIX "X" 128 | # define PRIXFAST32 __PRIPTR_PREFIX "X" 129 | # define PRIXFAST64 __PRI64_PREFIX "X" 130 | 131 | 132 | /* Macros for printing `intmax_t' and `uintmax_t'. */ 133 | # define PRIdMAX __PRI64_PREFIX "d" 134 | # define PRIiMAX __PRI64_PREFIX "i" 135 | # define PRIoMAX __PRI64_PREFIX "o" 136 | # define PRIuMAX __PRI64_PREFIX "u" 137 | # define PRIxMAX __PRI64_PREFIX "x" 138 | # define PRIXMAX __PRI64_PREFIX "X" 139 | 140 | 141 | /* Macros for printing `intptr_t' and `uintptr_t'. */ 142 | # define PRIdPTR __PRIPTR_PREFIX "d" 143 | # define PRIiPTR __PRIPTR_PREFIX "i" 144 | # define PRIoPTR __PRIPTR_PREFIX "o" 145 | # define PRIuPTR __PRIPTR_PREFIX "u" 146 | # define PRIxPTR __PRIPTR_PREFIX "x" 147 | # define PRIXPTR __PRIPTR_PREFIX "X" 148 | 149 | 150 | /* Macros for scanning format specifiers. */ 151 | 152 | /* Signed decimal notation. */ 153 | # define SCNd8 "hhd" 154 | # define SCNd16 "hd" 155 | # define SCNd32 "d" 156 | # define SCNd64 __PRI64_PREFIX "d" 157 | 158 | # define SCNdLEAST8 "hhd" 159 | # define SCNdLEAST16 "hd" 160 | # define SCNdLEAST32 "d" 161 | # define SCNdLEAST64 __PRI64_PREFIX "d" 162 | 163 | # define SCNdFAST8 "hhd" 164 | # define SCNdFAST16 __PRIPTR_PREFIX "d" 165 | # define SCNdFAST32 __PRIPTR_PREFIX "d" 166 | # define SCNdFAST64 __PRI64_PREFIX "d" 167 | 168 | /* Signed decimal notation. */ 169 | # define SCNi8 "hhi" 170 | # define SCNi16 "hi" 171 | # define SCNi32 "i" 172 | # define SCNi64 __PRI64_PREFIX "i" 173 | 174 | # define SCNiLEAST8 "hhi" 175 | # define SCNiLEAST16 "hi" 176 | # define SCNiLEAST32 "i" 177 | # define SCNiLEAST64 __PRI64_PREFIX "i" 178 | 179 | # define SCNiFAST8 "hhi" 180 | # define SCNiFAST16 __PRIPTR_PREFIX "i" 181 | # define SCNiFAST32 __PRIPTR_PREFIX "i" 182 | # define SCNiFAST64 __PRI64_PREFIX "i" 183 | 184 | /* Unsigned decimal notation. */ 185 | # define SCNu8 "hhu" 186 | # define SCNu16 "hu" 187 | # define SCNu32 "u" 188 | # define SCNu64 __PRI64_PREFIX "u" 189 | 190 | # define SCNuLEAST8 "hhu" 191 | # define SCNuLEAST16 "hu" 192 | # define SCNuLEAST32 "u" 193 | # define SCNuLEAST64 __PRI64_PREFIX "u" 194 | 195 | # define SCNuFAST8 "hhu" 196 | # define SCNuFAST16 __PRIPTR_PREFIX "u" 197 | # define SCNuFAST32 __PRIPTR_PREFIX "u" 198 | # define SCNuFAST64 __PRI64_PREFIX "u" 199 | 200 | /* Octal notation. */ 201 | # define SCNo8 "hho" 202 | # define SCNo16 "ho" 203 | # define SCNo32 "o" 204 | # define SCNo64 __PRI64_PREFIX "o" 205 | 206 | # define SCNoLEAST8 "hho" 207 | # define SCNoLEAST16 "ho" 208 | # define SCNoLEAST32 "o" 209 | # define SCNoLEAST64 __PRI64_PREFIX "o" 210 | 211 | # define SCNoFAST8 "hho" 212 | # define SCNoFAST16 __PRIPTR_PREFIX "o" 213 | # define SCNoFAST32 __PRIPTR_PREFIX "o" 214 | # define SCNoFAST64 __PRI64_PREFIX "o" 215 | 216 | /* Hexadecimal notation. */ 217 | # define SCNx8 "hhx" 218 | # define SCNx16 "hx" 219 | # define SCNx32 "x" 220 | # define SCNx64 __PRI64_PREFIX "x" 221 | 222 | # define SCNxLEAST8 "hhx" 223 | # define SCNxLEAST16 "hx" 224 | # define SCNxLEAST32 "x" 225 | # define SCNxLEAST64 __PRI64_PREFIX "x" 226 | 227 | # define SCNxFAST8 "hhx" 228 | # define SCNxFAST16 __PRIPTR_PREFIX "x" 229 | # define SCNxFAST32 __PRIPTR_PREFIX "x" 230 | # define SCNxFAST64 __PRI64_PREFIX "x" 231 | 232 | 233 | /* Macros for scanning `intmax_t' and `uintmax_t'. */ 234 | # define SCNdMAX __PRI64_PREFIX "d" 235 | # define SCNiMAX __PRI64_PREFIX "i" 236 | # define SCNoMAX __PRI64_PREFIX "o" 237 | # define SCNuMAX __PRI64_PREFIX "u" 238 | # define SCNxMAX __PRI64_PREFIX "x" 239 | 240 | /* Macros for scaning `intptr_t' and `uintptr_t'. */ 241 | # define SCNdPTR __PRIPTR_PREFIX "d" 242 | # define SCNiPTR __PRIPTR_PREFIX "i" 243 | # define SCNoPTR __PRIPTR_PREFIX "o" 244 | # define SCNuPTR __PRIPTR_PREFIX "u" 245 | # define SCNxPTR __PRIPTR_PREFIX "x" 246 | 247 | #endif /* inttypes.h */ 248 | -------------------------------------------------------------------------------- /ksm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * This program is free software; you can redistribute it and/or modify it 6 | * under the terms and conditions of the GNU General Public License, 7 | * version 2, as published by the Free Software Foundation. 8 | * 9 | * This program is distributed in the hope it will be useful, but WITHOUT 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 | * more details. 13 | * 14 | * You should have received a copy of the GNU General Public License along with 15 | * this program; If not, see . 16 | */ 17 | #ifdef __linux__ 18 | #include 19 | #include 20 | #include 21 | #else 22 | #include 23 | #include 24 | #endif 25 | 26 | #include "ksm.h" 27 | #include "percpu.h" 28 | #include "bitmap.h" 29 | 30 | /* 31 | * This file mostly manages CPUs initialization and deinitialization 32 | * but is not limited to that, it also initializes shared stuff and defines 33 | * some shared functions such as ksm_read_virt()/ksm_write_virt(), which can 34 | * be called from root mode to read/write to a guest virtual address. 35 | * 36 | * For per-cpu initializaiton see vcpu.c. 37 | * For VM-exit handlers see exit.c. 38 | * For the macro magic (aka DEFINE_DPC, etc.) see percpu.h. 39 | * 40 | * The `ksm' structure is a shared structure, it shares common things between 41 | * all CPUs such as I/O bitmaps, MSR bitmap, etc, however, this global variable 42 | * `ksm' is not supposed to be used inside root, you should instead utilize the 43 | * function (defined in ksm.h): vcpu_to_ksm() as follows: 44 | * struct ksm *k = vcpu_to_ksm(vcpu); 45 | */ 46 | struct ksm *ksm = NULL; 47 | 48 | /* 49 | * Setup the MSR bitmap. 50 | * There are 4 things here: 51 | * - Read bitmap low (aka MSR indices of 0 to 1FFFH) 52 | * offset: +0 53 | * - Read bitmap high (aka MSR indices of 0xC0000000 to 0xC0001FFFH) 54 | * offset; +1024 55 | * - Write bitmap low (same thing as read low) 56 | * offset: +2048 57 | * - Write bitmap high (same thing as read high) 58 | * offset: +3072 59 | * 60 | * To opt-in for an MSR vm-exit, simply set the bit of it. 61 | * Note: for high msrs, subtract it with 0xC0000000, e.g.: 62 | * set_bit(MSR_STAR - 0xC0000000, write_hi); 63 | * 64 | * We currently opt in for reads to MSRs that are VT-x related, so that we can 65 | * emulate VT-x ("nesting"). 66 | * 67 | * Note: No real reason to opt-in for writes to VT-x MSRs, those are readonly 68 | * anyway and the CPU will throw #GP to any writes there. 69 | * 70 | * See also: 71 | * vcpu_handle_rdmsr() in exit.c 72 | * vcpu_handle_wrmsr() in exit.c 73 | */ 74 | static inline void init_msr_bitmap(struct ksm *k) 75 | { 76 | unsigned long *read_lo = (unsigned long *)k->msr_bitmap; 77 | set_bit(MSR_IA32_FEATURE_CONTROL, read_lo); 78 | #ifdef NESTED_VMX 79 | for (u32 msr = MSR_IA32_VMX_BASIC; msr <= MSR_IA32_VMX_VMFUNC; ++msr) 80 | set_bit(msr, read_lo); 81 | #endif 82 | 83 | #ifdef NESTED_VMX 84 | unsigned long *write_lo = (unsigned long *)((char *)k->msr_bitmap + 2048); 85 | set_bit(MSR_IA32_FEATURE_CONTROL, write_lo); 86 | #endif 87 | } 88 | 89 | static inline void init_io_bitmaps(struct ksm *k) 90 | { 91 | /* 92 | * Setuo I/O bitmaps, see: 93 | * vcpu_handle_io_instr() in exit.c 94 | */ 95 | 96 | #if 0 /* This can be anonying */ 97 | unsigned long *bitmap_a = (unsigned long *)(k->io_bitmap_a); 98 | set_bit(0x60, bitmap_a); /* PS/2 Mice */ 99 | set_bit(0x64, bitmap_a); /* PS/2 Mice and keyboard */ 100 | #endif 101 | } 102 | 103 | /* 104 | * Virtualizes current CPU. 105 | */ 106 | int __ksm_init_cpu(struct ksm *k) 107 | { 108 | struct vcpu *vcpu; 109 | int ret = ERR_NOMEM; 110 | u64 feat_ctl; 111 | u64 required_feat_bits = FEATURE_CONTROL_LOCKED | 112 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 113 | 114 | vcpu = ksm_cpu(k); 115 | if (vcpu->subverted) { 116 | KSM_DEBUG_RAW("CPU already subverted\n"); 117 | return 0; 118 | } 119 | 120 | #ifdef __linux__ 121 | if (tboot_enabled()) 122 | required_feat_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; 123 | #endif 124 | 125 | feat_ctl = __readmsr(MSR_IA32_FEATURE_CONTROL); 126 | if ((feat_ctl & required_feat_bits) != required_feat_bits) { 127 | if (feat_ctl & FEATURE_CONTROL_LOCKED) 128 | return ERR_BUSY; 129 | 130 | __writemsr(MSR_IA32_FEATURE_CONTROL, feat_ctl | required_feat_bits); 131 | feat_ctl = __readmsr(MSR_IA32_FEATURE_CONTROL); 132 | if ((feat_ctl & required_feat_bits) != required_feat_bits) 133 | return ERR_DENIED; 134 | } 135 | 136 | ret = vcpu_init(vcpu); 137 | if (ret < 0) { 138 | KSM_DEBUG_RAW("failed to create vcpu, oom?\n"); 139 | return ret; 140 | } 141 | 142 | /* Saves state and calls vcpu_run() (Defined in assembly, vmx.{S,asm} */ 143 | ret = __vmx_vminit(vcpu); 144 | KSM_DEBUG("%s: Started: %d\n", proc_name(), !ret); 145 | 146 | if (ret < 0) 147 | goto out; 148 | 149 | vcpu->subverted = true; 150 | k->active_vcpus++; 151 | return 0; 152 | 153 | out: 154 | vcpu_free(vcpu); 155 | __writecr4(__readcr4() & ~X86_CR4_VMXE); 156 | return ret; 157 | } 158 | 159 | /* 160 | * Subvert (i.e. virtualize) all processors, this should be 161 | * called on initialization or to re-virtualize. 162 | */ 163 | static DEFINE_DPC(__call_init, __ksm_init_cpu, ctx); 164 | int ksm_subvert(struct ksm *k) 165 | { 166 | CALL_DPC(__call_init, k); 167 | return DPC_RET(); 168 | } 169 | 170 | /* 171 | * Initialize and allocate the shared structure. 172 | */ 173 | int ksm_init(struct ksm **kp) 174 | { 175 | struct mtrr_range *range; 176 | struct ksm *k; 177 | int info[4]; 178 | int ret = ERR_NOMEM; 179 | int i; 180 | u64 vpid; 181 | u64 req = KSM_EPT_REQUIRED_EPT 182 | #ifdef ENABLE_PML 183 | | VMX_EPT_AD_BIT 184 | #endif 185 | #ifdef EPAGE_HOOK 186 | | VMX_EPT_EXECUTE_ONLY_BIT 187 | #endif 188 | ; 189 | 190 | __cpuidex(info, 1, 0); 191 | if (!(info[2] & (1 << (X86_FEATURE_VMX & 31)))) 192 | return ERR_CPUID; 193 | 194 | if (__readcr4() & X86_CR4_VMXE) 195 | return ERR_BUSY; 196 | 197 | vpid = __readmsr(MSR_IA32_VMX_EPT_VPID_CAP); 198 | if ((vpid & req) != req) 199 | return ERR_FEAT; 200 | 201 | k = mm_alloc_pool(sizeof(*k)); 202 | if (!k) 203 | return ret; 204 | 205 | k->vpid_ept = vpid; 206 | KSM_DEBUG("EPT/VPID caps: 0x%016llX\n", vpid); 207 | 208 | ret = mm_cache_ram_ranges(&k->ranges[0], &k->range_count); 209 | if (ret < 0) 210 | goto out_ksm; 211 | 212 | KSM_DEBUG("%d physical memory ranges\n", k->range_count); 213 | for (i = 0; i < k->range_count; ++i) 214 | KSM_DEBUG("Range: 0x%016llX -> 0x%016llX\n", k->ranges[i].start, k->ranges[i].end); 215 | 216 | /* MTRR */ 217 | mm_cache_mtrr_ranges(&k->mtrr_ranges[0], &k->mtrr_count, &k->mtrr_def); 218 | KSM_DEBUG("%d MTRR ranges (%d default type)\n", k->mtrr_count, k->mtrr_def); 219 | for (i = 0; i < k->mtrr_count; i++) { 220 | range = &k->mtrr_ranges[i]; 221 | KSM_DEBUG("MTRR Range: 0x%016llX -> 0x%016llX fixed: %d type: %d\n", 222 | range->start, range->end, range->fixed, range->type); 223 | } 224 | 225 | #ifdef EPAGE_HOOK 226 | ret = ksm_epage_init(k); 227 | if (ret < 0) 228 | goto out_ksm; 229 | #endif 230 | 231 | #ifdef PMEM_SANDBOX 232 | ret = ksm_sandbox_init(k); 233 | if (ret < 0) 234 | goto out_epage; 235 | #endif 236 | 237 | #ifdef INTROSPECT_ENGINE 238 | ret = ksm_introspect_init(k); 239 | if (ret < 0) 240 | goto out_sbox; 241 | #endif 242 | 243 | ret = register_power_callback(); 244 | if (ret < 0) 245 | goto out_intro; 246 | 247 | ret = register_cpu_callback(); 248 | if (ret == 0) { 249 | init_msr_bitmap(k); 250 | init_io_bitmaps(k); 251 | *kp = k; 252 | return ret; 253 | } 254 | 255 | unregister_power_callback(); 256 | out_intro: 257 | #ifdef INTROSPECT_ENGINE 258 | ksm_introspect_exit(k); 259 | out_sbox: 260 | #endif 261 | #ifdef PMEM_SANDBOX 262 | ksm_sandbox_exit(k); 263 | out_epage: 264 | #endif 265 | #ifdef EPAGE_HOOK 266 | ksm_epage_exit(k); 267 | #endif 268 | out_ksm: 269 | mm_free_pool(k, sizeof(*k)); 270 | return ret; 271 | } 272 | 273 | /* 274 | * Devirtualizes current processor, if the current processor 275 | * is not virtualized, an error is returned. 276 | */ 277 | int __ksm_exit_cpu(struct ksm *k) 278 | { 279 | int ret = ERR_NOTH; 280 | struct vcpu *vcpu = ksm_cpu(k); 281 | if (!vcpu->subverted) 282 | return ret; 283 | 284 | ret = __vmx_vmcall(HCALL_STOP, NULL); 285 | if (ret == 0) { 286 | k->active_vcpus--; 287 | vcpu->subverted = false; 288 | vcpu_free(vcpu); 289 | __writecr4(__readcr4() & ~X86_CR4_VMXE); 290 | } 291 | 292 | return ret; 293 | } 294 | 295 | /* 296 | * Devirtualize all processors, returning an error if one or 297 | * more aren't virtualized... 298 | */ 299 | DEFINE_DPC(__call_exit, __ksm_exit_cpu, ctx); 300 | int ksm_unsubvert(struct ksm *k) 301 | { 302 | if (k->active_vcpus == 0) 303 | return ERR_NOTH; 304 | 305 | CALL_DPC(__call_exit, k); 306 | return DPC_RET(); 307 | } 308 | 309 | /* 310 | * Frees resources and devirtualizes all processors, 311 | * Only called on driver unload... 312 | */ 313 | int ksm_free(struct ksm *k) 314 | { 315 | int ret; 316 | 317 | /* These may need virtualization active... */ 318 | #ifdef PMEM_SANDBOX 319 | ksm_sandbox_exit(k); 320 | #endif 321 | #ifdef INTROSPECT_ENGINE 322 | ksm_introspect_exit(k); 323 | #endif 324 | 325 | /* Desubvert all: */ 326 | ret = ksm_unsubvert(k); 327 | 328 | #ifdef EPAGE_HOOK 329 | ksm_epage_exit(k); 330 | #endif 331 | 332 | unregister_cpu_callback(); 333 | unregister_power_callback(); 334 | mm_free_pool(k, sizeof(*k)); 335 | return ret; 336 | } 337 | 338 | /* 339 | * Hook the IDT entry at index @n, and redirect it to the function 340 | * @h, should always succeed unless one of the processors are not 341 | * virtualized, may throw an exception since it does __vmx_vmcall 342 | * without checking. 343 | */ 344 | static DEFINE_DPC(__call_idt_hook, __vmx_vmcall, HCALL_IDT, ctx); 345 | int ksm_hook_idt(unsigned n, void *h) 346 | { 347 | CALL_DPC(__call_idt_hook, &(struct shadow_idt_entry) { 348 | .n = n, 349 | .h = h, 350 | }); 351 | return DPC_RET(); 352 | } 353 | 354 | /* 355 | * Unhook an IDT entry at index @n, restoring last known one. 356 | * Note: if you call `ksm_hook_idt` on same entry twice, then this will 357 | * restore the one from first call, not the original! 358 | * 359 | * IDT is always restored to the real one when devirtualization happens, 360 | * disregarding all entries that were set prior. 361 | */ 362 | static DEFINE_DPC(__call_idt_unhook, __vmx_vmcall, HCALL_UIDT, ctx); 363 | int ksm_free_idt(unsigned n) 364 | { 365 | CALL_DPC(__call_idt_unhook, &(struct shadow_idt_entry) { 366 | .n = n, 367 | .h = NULL, 368 | }); 369 | return DPC_RET(); 370 | } 371 | 372 | /* 373 | * Write @data of length @len into @gva. 374 | * If it returns false, a fault should be injected. 375 | */ 376 | bool ksm_write_virt(struct vcpu *vcpu, u64 gva, const u8 *data, size_t len) 377 | { 378 | pte_t *pte; 379 | u64 hpa; 380 | size_t off; 381 | size_t copy; 382 | uintptr_t cr3; 383 | char *tmp; 384 | 385 | off = 0; 386 | cr3 = vmcs_read(GUEST_CR3); 387 | while (len) { 388 | pte = __gva_to_gpa(vcpu, cr3, gva, 389 | PAGE_PRESENT | PAGE_WRITE); 390 | if (!pte) 391 | return false; 392 | 393 | if (!gpa_to_hpa(vcpu, PAGE_PPA(pte), &hpa)) 394 | return false; 395 | 396 | tmp = mm_remap(hpa, PAGE_SIZE); 397 | if (!tmp) 398 | return false; 399 | 400 | /* Write up to remaining in the page, not in len. */ 401 | off = addr_offset(gva); 402 | copy = min(len, PAGE_SIZE - off); 403 | memcpy(tmp + off, data, copy); 404 | mm_unmap(tmp, PAGE_SIZE); 405 | 406 | /* Mark it dirty */ 407 | mark_pte_dirty(pte); 408 | 409 | len -= copy; 410 | data += copy; 411 | gva += copy; 412 | } 413 | 414 | return true; 415 | } 416 | 417 | /* 418 | * Read from @gpa into @data of length @len 419 | * If it returns false, a fault should be injected. 420 | */ 421 | bool ksm_read_virt(struct vcpu *vcpu, u64 gva, u8 *data, size_t len) 422 | { 423 | pte_t *pte; 424 | u64 hpa; 425 | size_t off; 426 | size_t copy; 427 | uintptr_t cr3; 428 | u8 *tmp; 429 | u8 *d; 430 | 431 | d = data; 432 | off = 0; 433 | cr3 = vmcs_read(GUEST_CR3); 434 | while (len) { 435 | pte = __gva_to_gpa(vcpu, cr3, gva, PAGE_PRESENT); 436 | if (!pte) 437 | return false; 438 | 439 | if (!gpa_to_hpa(vcpu, PAGE_PPA(pte), &hpa)) 440 | return false; 441 | 442 | tmp = mm_remap(hpa, PAGE_SIZE); 443 | if (!tmp) 444 | return false; 445 | 446 | /* Read up to remaining in the page, not in len. */ 447 | off = addr_offset(gva); 448 | copy = min(len, PAGE_SIZE - off); 449 | memcpy(d, tmp + off, copy); 450 | mm_unmap(tmp, PAGE_SIZE); 451 | 452 | /* Mark it accessed */ 453 | mark_pte_accessed(pte); 454 | 455 | len -= copy; 456 | d += copy; 457 | gva += copy; 458 | } 459 | 460 | return true; 461 | } 462 | -------------------------------------------------------------------------------- /ksm/ksm.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25420.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ksm", "ksm\ksm.vcxproj", "{A6906F8A-279E-439B-8B7A-74AFF4B82502}" 7 | EndProject 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ksm_um", "ksm_um\ksm_um.vcxproj", "{C7188CA8-344E-4BDB-BB33-A8BA5D1A6516}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|x64 = Debug|x64 13 | Debug|x86 = Debug|x86 14 | Release|x64 = Release|x64 15 | Release|x86 = Release|x86 16 | EndGlobalSection 17 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 18 | {A6906F8A-279E-439B-8B7A-74AFF4B82502}.Debug|x64.ActiveCfg = Debug|x64 19 | {A6906F8A-279E-439B-8B7A-74AFF4B82502}.Debug|x64.Build.0 = Debug|x64 20 | {A6906F8A-279E-439B-8B7A-74AFF4B82502}.Debug|x86.ActiveCfg = Debug|x64 21 | {A6906F8A-279E-439B-8B7A-74AFF4B82502}.Release|x64.ActiveCfg = Release|x64 22 | {A6906F8A-279E-439B-8B7A-74AFF4B82502}.Release|x64.Build.0 = Release|x64 23 | {A6906F8A-279E-439B-8B7A-74AFF4B82502}.Release|x86.ActiveCfg = Release|x64 24 | {C7188CA8-344E-4BDB-BB33-A8BA5D1A6516}.Debug|x64.ActiveCfg = Debug|x64 25 | {C7188CA8-344E-4BDB-BB33-A8BA5D1A6516}.Debug|x64.Build.0 = Debug|x64 26 | {C7188CA8-344E-4BDB-BB33-A8BA5D1A6516}.Debug|x86.ActiveCfg = Debug|Win32 27 | {C7188CA8-344E-4BDB-BB33-A8BA5D1A6516}.Debug|x86.Build.0 = Debug|Win32 28 | {C7188CA8-344E-4BDB-BB33-A8BA5D1A6516}.Release|x64.ActiveCfg = Release|x64 29 | {C7188CA8-344E-4BDB-BB33-A8BA5D1A6516}.Release|x64.Build.0 = Release|x64 30 | {C7188CA8-344E-4BDB-BB33-A8BA5D1A6516}.Release|x86.ActiveCfg = Release|Win32 31 | {C7188CA8-344E-4BDB-BB33-A8BA5D1A6516}.Release|x86.Build.0 = Release|Win32 32 | EndGlobalSection 33 | GlobalSection(SolutionProperties) = preSolution 34 | HideSolutionNode = FALSE 35 | EndGlobalSection 36 | EndGlobal 37 | -------------------------------------------------------------------------------- /ksm/ksm/ksm.inf: -------------------------------------------------------------------------------- 1 | ; 2 | ; ksm.inf 3 | ; 4 | 5 | [Version] 6 | Signature="$WINDOWS NT$" 7 | Class=Sample ; TODO: edit Class 8 | ClassGuid={78A1C341-4539-11d3-B88D-00C04FAD5171} ; TODO: edit ClassGuid 9 | Provider=%ManufacturerName% 10 | CatalogFile=ksm.cat 11 | DriverVer= ; TODO: set DriverVer in stampinf property pages 12 | 13 | [DestinationDirs] 14 | DefaultDestDir = 12 15 | 16 | ; ================= Class section ===================== 17 | 18 | [ClassInstall32] 19 | Addreg=SampleClassReg 20 | 21 | [SampleClassReg] 22 | HKR,,,0,%ClassName% 23 | HKR,,Icon,,-5 24 | 25 | [SourceDisksNames] 26 | 1 = %DiskName%,,,"" 27 | 28 | [SourceDisksFiles] 29 | ksm.sys = 1,, 30 | 31 | ;***************************************** 32 | ; Install Section 33 | ;***************************************** 34 | 35 | [Manufacturer] 36 | %ManufacturerName%=Standard,NT$ARCH$ 37 | 38 | [Standard.NT$ARCH$] 39 | %ksm.DeviceDesc%=ksm_Device, Root\ksm ; TODO: edit hw-id 40 | 41 | [ksm_Device.NT] 42 | CopyFiles=Drivers_Dir 43 | 44 | [Drivers_Dir] 45 | ksm.sys 46 | 47 | ;-------------- Service installation 48 | [ksm_Device.NT.Services] 49 | AddService = ksm,%SPSVCINST_ASSOCSERVICE%, ksm_Service_Inst 50 | 51 | ; -------------- ksm driver install sections 52 | [ksm_Service_Inst] 53 | DisplayName = %ksm.SVCDESC% 54 | ServiceType = 1 ; SERVICE_KERNEL_DRIVER 55 | StartType = 3 ; SERVICE_DEMAND_START 56 | ErrorControl = 1 ; SERVICE_ERROR_NORMAL 57 | ServiceBinary = %12%\ksm.sys 58 | 59 | ; 60 | ;--- ksm_Device Coinstaller installation ------ 61 | ; 62 | 63 | [DestinationDirs] 64 | ksm_Device_CoInstaller_CopyFiles = 11 65 | 66 | [ksm_Device.NT.CoInstallers] 67 | AddReg=ksm_Device_CoInstaller_AddReg 68 | CopyFiles=ksm_Device_CoInstaller_CopyFiles 69 | 70 | [ksm_Device_CoInstaller_AddReg] 71 | HKR,,CoInstallers32,0x00010000, "WdfCoInstaller$KMDFCOINSTALLERVERSION$.dll,WdfCoInstaller" 72 | 73 | [ksm_Device_CoInstaller_CopyFiles] 74 | WdfCoInstaller$KMDFCOINSTALLERVERSION$.dll 75 | 76 | [SourceDisksFiles] 77 | WdfCoInstaller$KMDFCOINSTALLERVERSION$.dll=1 ; make sure the number matches with SourceDisksNames 78 | 79 | [ksm_Device.NT.Wdf] 80 | KmdfService = ksm, ksm_wdfsect 81 | [ksm_wdfsect] 82 | KmdfLibraryVersion = $KMDFVERSION$ 83 | 84 | [Strings] 85 | SPSVCINST_ASSOCSERVICE= 0x00000002 86 | ManufacturerName="" ;TODO: Replace with your manufacturer name 87 | ClassName="Samples" ; TODO: edit ClassName 88 | DiskName = "ksm Installation Disk" 89 | ksm.DeviceDesc = "ksm Device" 90 | ksm.SVCDESC = "ksm Service" 91 | -------------------------------------------------------------------------------- /ksm/ksm/ksm.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | {A6906F8A-279E-439B-8B7A-74AFF4B82502} 15 | {1bc93793-694f-48fe-9372-81e2b05556fd} 16 | v4.5 17 | 12.0 18 | Debug 19 | Win32 20 | ksm 21 | 22 | 23 | 24 | 25 | 26 | true 27 | WindowsKernelModeDriver10.0 28 | Driver 29 | KMDF 30 | Desktop 31 | false 32 | 33 | 34 | 35 | 36 | false 37 | WindowsKernelModeDriver10.0 38 | Driver 39 | KMDF 40 | Desktop 41 | false 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | DbgengKernelDebugger 53 | http://timestamp.digicert.com 54 | 55 | 56 | 57 | 58 | DbgengKernelDebugger 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | sha256 69 | 70 | 71 | ENABLE_DBGPRINT;NESTED_VMX;%(PreprocessorDefinitions) 72 | false 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | _WIN64;_AMD64_;AMD64;ENABLE_DBGPRINT;ENABLE_RESUBV;%(PreprocessorDefinitions) 86 | false 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | __ASSEMBLY__ 122 | 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /ksm/ksm/ksm.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {8E41214B-6785-4CFE-B992-037D68949A14} 14 | inf;inv;inx;mof;mc; 15 | 16 | 17 | 18 | 19 | Driver Files 20 | 21 | 22 | 23 | 24 | Source Files 25 | 26 | 27 | Source Files 28 | 29 | 30 | Source Files 31 | 32 | 33 | Source Files 34 | 35 | 36 | Source Files 37 | 38 | 39 | Source Files 40 | 41 | 42 | Source Files 43 | 44 | 45 | Source Files 46 | 47 | 48 | Source Files 49 | 50 | 51 | Source Files 52 | 53 | 54 | Source Files 55 | 56 | 57 | 58 | 59 | Header Files 60 | 61 | 62 | Header Files 63 | 64 | 65 | Header Files 66 | 67 | 68 | Header Files 69 | 70 | 71 | Header Files 72 | 73 | 74 | Header Files 75 | 76 | 77 | Header Files 78 | 79 | 80 | Header Files 81 | 82 | 83 | Header Files 84 | 85 | 86 | 87 | 88 | Source Files 89 | 90 | 91 | -------------------------------------------------------------------------------- /ksm/ksm_tests/ksm_tests.inf: -------------------------------------------------------------------------------- 1 | ; 2 | ; ksm_tests.inf 3 | ; 4 | 5 | [Version] 6 | Signature="$WINDOWS NT$" 7 | Class=Sample ; TODO: edit Class 8 | ClassGuid={78A1C341-4539-11d3-B88D-00C04FAD5171} ; TODO: edit ClassGuid 9 | Provider=%ManufacturerName% 10 | CatalogFile=ksm_tests.cat 11 | DriverVer= ; TODO: set DriverVer in stampinf property pages 12 | 13 | [DestinationDirs] 14 | DefaultDestDir = 12 15 | 16 | ; ================= Class section ===================== 17 | 18 | [ClassInstall32] 19 | Addreg=SampleClassReg 20 | 21 | [SampleClassReg] 22 | HKR,,,0,%ClassName% 23 | HKR,,Icon,,-5 24 | 25 | [SourceDisksNames] 26 | 1 = %DiskName%,,,"" 27 | 28 | [SourceDisksFiles] 29 | ksm_tests.sys = 1,, 30 | 31 | ;***************************************** 32 | ; Install Section 33 | ;***************************************** 34 | 35 | [Manufacturer] 36 | %ManufacturerName%=Standard,NT$ARCH$ 37 | 38 | [Standard.NT$ARCH$] 39 | %ksm_tests.DeviceDesc%=ksm_tests_Device, Root\ksm_tests ; TODO: edit hw-id 40 | 41 | [ksm_tests_Device.NT] 42 | CopyFiles=Drivers_Dir 43 | 44 | [Drivers_Dir] 45 | ksm_tests.sys 46 | 47 | ;-------------- Service installation 48 | [ksm_tests_Device.NT.Services] 49 | AddService = ksm_tests,%SPSVCINST_ASSOCSERVICE%, ksm_tests_Service_Inst 50 | 51 | ; -------------- ksm_tests driver install sections 52 | [ksm_tests_Service_Inst] 53 | DisplayName = %ksm_tests.SVCDESC% 54 | ServiceType = 1 ; SERVICE_KERNEL_DRIVER 55 | StartType = 3 ; SERVICE_DEMAND_START 56 | ErrorControl = 1 ; SERVICE_ERROR_NORMAL 57 | ServiceBinary = %12%\ksm_tests.sys 58 | 59 | ; 60 | ;--- ksm_tests_Device Coinstaller installation ------ 61 | ; 62 | 63 | [DestinationDirs] 64 | ksm_tests_Device_CoInstaller_CopyFiles = 11 65 | 66 | [ksm_tests_Device.NT.CoInstallers] 67 | AddReg=ksm_tests_Device_CoInstaller_AddReg 68 | CopyFiles=ksm_tests_Device_CoInstaller_CopyFiles 69 | 70 | [ksm_tests_Device_CoInstaller_AddReg] 71 | HKR,,CoInstallers32,0x00010000, "WdfCoInstaller$KMDFCOINSTALLERVERSION$.dll,WdfCoInstaller" 72 | 73 | [ksm_tests_Device_CoInstaller_CopyFiles] 74 | WdfCoInstaller$KMDFCOINSTALLERVERSION$.dll 75 | 76 | [SourceDisksFiles] 77 | WdfCoInstaller$KMDFCOINSTALLERVERSION$.dll=1 ; make sure the number matches with SourceDisksNames 78 | 79 | [ksm_tests_Device.NT.Wdf] 80 | KmdfService = ksm_tests, ksm_tests_wdfsect 81 | [ksm_tests_wdfsect] 82 | KmdfLibraryVersion = $KMDFVERSION$ 83 | 84 | [Strings] 85 | SPSVCINST_ASSOCSERVICE= 0x00000002 86 | ManufacturerName="" ;TODO: Replace with your manufacturer name 87 | ClassName="Samples" ; TODO: edit ClassName 88 | DiskName = "ksm_tests Installation Disk" 89 | ksm_tests.DeviceDesc = "ksm_tests Device" 90 | ksm_tests.SVCDESC = "ksm_tests Service" 91 | -------------------------------------------------------------------------------- /ksm/ksm_tests/ksm_tests.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | {05C0BBF7-43B5-4056-A556-7644A9B34C1A} 15 | {1bc93793-694f-48fe-9372-81e2b05556fd} 16 | v4.5 17 | 12.0 18 | Debug 19 | Win32 20 | ksm_tests 21 | 22 | 23 | 24 | Windows7 25 | true 26 | WindowsKernelModeDriver10.0 27 | Driver 28 | KMDF 29 | Desktop 30 | false 31 | 32 | 33 | Windows10 34 | false 35 | WindowsKernelModeDriver10.0 36 | Driver 37 | KMDF 38 | Universal 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | DbgengKernelDebugger 50 | 51 | 52 | DbgengKernelDebugger 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | false 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /ksm/ksm_tests/ksm_tests.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | {8E41214B-6785-4CFE-B992-037D68949A14} 18 | inf;inv;inx;mof;mc; 19 | 20 | 21 | 22 | 23 | Driver Files 24 | 25 | 26 | 27 | 28 | Source Files 29 | 30 | 31 | Source Files 32 | 33 | 34 | Source Files 35 | 36 | 37 | Source Files 38 | 39 | 40 | Source Files 41 | 42 | 43 | Source Files 44 | 45 | 46 | Source Files 47 | 48 | 49 | 50 | 51 | Header Files 52 | 53 | 54 | 55 | 56 | Source Files 57 | 58 | 59 | -------------------------------------------------------------------------------- /ksm/ksm_um/ksm_um.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | {C7188CA8-344E-4BDB-BB33-A8BA5D1A6516} 29 | ksm_um 30 | 8.1 31 | 32 | 33 | 34 | Application 35 | true 36 | v140 37 | MultiByte 38 | 39 | 40 | Application 41 | false 42 | v140 43 | true 44 | MultiByte 45 | 46 | 47 | Application 48 | true 49 | v140 50 | MultiByte 51 | 52 | 53 | Application 54 | false 55 | v140 56 | true 57 | MultiByte 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | Level3 81 | Disabled 82 | true 83 | 84 | 85 | 86 | 87 | Level3 88 | Disabled 89 | true 90 | _CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions) 91 | 92 | 93 | 94 | 95 | Level3 96 | MaxSpeed 97 | true 98 | true 99 | true 100 | 101 | 102 | true 103 | true 104 | 105 | 106 | 107 | 108 | Level3 109 | MaxSpeed 110 | true 111 | true 112 | true 113 | MultiThreaded 114 | _CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions) 115 | 116 | 117 | true 118 | true 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /ksm/ksm_um/ksm_um.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | 14 | 15 | Source Files 16 | 17 | 18 | 19 | 20 | Header Files 21 | 22 | 23 | -------------------------------------------------------------------------------- /main_linux.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * Main entrypoint for the Linux kernel module. 6 | * 7 | * This program is free software; you can redistribute it and/or modify it 8 | * under the terms and conditions of the GNU General Public License, 9 | * version 2, as published by the Free Software Foundation. 10 | * 11 | * This program is distributed in the hope it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 | * more details. 15 | * 16 | * You should have received a copy of the GNU General Public License along with 17 | * this program; If not, see . 18 | */ 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "ksm.h" 26 | #include "um/um.h" 27 | 28 | static struct mm_struct *mm = NULL; 29 | static int major_no; 30 | static struct class *class; 31 | 32 | static long ksm_ioctl(struct file *filp, unsigned int cmd, unsigned long args) 33 | { 34 | int ret = -EINVAL; 35 | int __maybe_unused pid = 0; 36 | struct watch_ioctl *watch = NULL; 37 | KSM_DEBUG("ioctl from %s: cmd(0x%08X)\n", 38 | current->comm, cmd); 39 | 40 | if (mm && current->mm != mm) { 41 | KSM_DEBUG("not processing ioctl from %s\n", current->comm); 42 | goto out; 43 | } 44 | 45 | #ifdef INTROSPECT_ENGINE 46 | if (cmd >= KSM_IOCTL_INTRO_WATCH && cmd <= KSM_IOCTL_INTRO_STATS) { 47 | watch = kmalloc(sizeof(*watch), GFP_KERNEL | __GFP_ZERO); 48 | if (!watch) 49 | return -ENOMEM; 50 | } 51 | #endif 52 | 53 | switch (cmd) { 54 | #ifdef PMEM_SANDBOX 55 | case KSM_IOCTL_SANDBOX: 56 | ret = -EFAULT; 57 | if (copy_from_user(&pid, (const void __force *)args, sizeof(pid))) 58 | break; 59 | 60 | KSM_DEBUG("sandboxing %d\n", pid); 61 | ret = ksm_sandbox(ksm, pid); 62 | break; 63 | case KSM_IOCTL_UNBOX: 64 | ret = -EFAULT; 65 | if (copy_from_user(&pid, (const void __force *)args, sizeof(pid))) 66 | break; 67 | 68 | KSM_DEBUG("unsandboxing %d\n", pid); 69 | ret = ksm_unbox(ksm, pid); 70 | break; 71 | #endif 72 | case KSM_IOCTL_SUBVERT: 73 | if (!mm) { 74 | /* Steal their mm... */ 75 | mm = current->active_mm; 76 | atomic_inc(&mm->mm_count); 77 | ksm->host_pgd = __pa(mm->pgd); 78 | } 79 | 80 | ret = ksm_subvert(ksm); 81 | break; 82 | case KSM_IOCTL_UNSUBVERT: 83 | ret = ksm_unsubvert(ksm); 84 | if (ret == 0 && mm) { 85 | KSM_DEBUG("derefering stolen mm\n"); 86 | mmdrop(mm); 87 | mm = NULL; 88 | } 89 | break; 90 | #ifdef INTROSPECT_ENGINE 91 | case KSM_IOCTL_INTRO_START: 92 | ret = ksm_introspect_start(ksm); 93 | break; 94 | case KSM_IOCTL_INTRO_STOP: 95 | ret = ksm_introspect_stop(ksm); 96 | break; 97 | case KSM_IOCTL_INTRO_WATCH: 98 | ret = -EFAULT; 99 | if (copy_from_user(watch, (const void __force *)args, sizeof(*watch))) 100 | break; 101 | 102 | ret = ksm_introspect_add_watch(ksm, watch); 103 | break; 104 | case KSM_IOCTL_INTRO_UNWATCH: 105 | ret = -EFAULT; 106 | if (copy_from_user(watch, (const void __force *)args, sizeof(*watch))) 107 | break; 108 | 109 | ret = ksm_introspect_rem_watch(ksm, watch); 110 | break; 111 | case KSM_IOCTL_INTRO_STATS: 112 | ret = -EFAULT; 113 | if (copy_from_user(watch, (const void __force *)args, sizeof(*watch))) 114 | break; 115 | 116 | ret = ksm_introspect_collect(ksm, watch); 117 | if (ret < 0) 118 | break; 119 | 120 | if (copy_to_user((void __force *)args, watch, sizeof(*watch))) 121 | ret = -EFAULT; 122 | break; 123 | #endif 124 | default: 125 | KSM_DEBUG("unknown ioctl code 0x%08X\n", cmd); 126 | ret = -EINVAL; 127 | break; 128 | } 129 | 130 | out: 131 | if (watch) 132 | kfree(watch); 133 | 134 | KSM_DEBUG("ioctl ret: %d\n", ret); 135 | return ret; 136 | } 137 | 138 | static int ksm_open(struct inode *node, struct file *filp) 139 | { 140 | KSM_DEBUG("open() from %s\n", current->comm); 141 | return 0; 142 | } 143 | 144 | static int ksm_release(struct inode *inode, struct file *filp) 145 | { 146 | KSM_DEBUG("release() from %s\n", current->comm); 147 | return 0; 148 | } 149 | 150 | static struct file_operations ksm_fops = { 151 | .open = ksm_open, 152 | .release = ksm_release, 153 | .unlocked_ioctl = ksm_ioctl, 154 | }; 155 | 156 | static int ksm_reboot(struct notifier_block *nb, unsigned long action, 157 | void *data) 158 | { 159 | ksm_unsubvert(ksm); 160 | return 0; 161 | } 162 | 163 | static struct notifier_block reboot_notify = { 164 | .notifier_call = ksm_reboot, 165 | }; 166 | 167 | static int __init ksm_start(void) 168 | { 169 | int ret = -ENOMEM; 170 | struct device *dev; 171 | 172 | ret = ksm_init(&ksm); 173 | if (ret < 0) 174 | return ret; 175 | 176 | major_no = register_chrdev(0, UM_DEVICE_NAME, &ksm_fops); 177 | if (major_no < 0) 178 | goto out_exit; 179 | 180 | ret = -ENODEV; 181 | KSM_DEBUG("Major: %d\n", major_no); 182 | 183 | class = class_create(THIS_MODULE, UM_DEVICE_NAME); 184 | if (!class) 185 | goto out_unregister; 186 | 187 | dev = device_create(class, NULL, MKDEV(major_no, 0), NULL, UM_DEVICE_NAME); 188 | if (dev) { 189 | register_reboot_notifier(&reboot_notify); 190 | KSM_DEBUG_RAW("ready\n"); 191 | return 0; 192 | } 193 | 194 | KSM_DEBUG_RAW("failed to create device\n"); 195 | class_destroy(class); 196 | 197 | out_unregister: 198 | unregister_chrdev(major_no, UM_DEVICE_NAME); 199 | out_exit: 200 | ksm_free(ksm); 201 | return ret; 202 | } 203 | 204 | static void __exit ksm_cleanup(void) 205 | { 206 | int ret, active; 207 | 208 | device_destroy(class, MKDEV(major_no, 0)); 209 | class_destroy(class); 210 | unregister_chrdev(major_no, UM_DEVICE_NAME); 211 | unregister_reboot_notifier(&reboot_notify); 212 | 213 | active = ksm->active_vcpus; 214 | ret = ksm_free(ksm); 215 | KSM_DEBUG("%d were active: ret: %d\n", active, ret); 216 | 217 | if (mm) 218 | mmdrop(mm); 219 | } 220 | 221 | module_init(ksm_start); 222 | module_exit(ksm_cleanup); 223 | 224 | MODULE_AUTHOR("Ahmed Samy"); 225 | MODULE_LICENSE("GPL"); 226 | 227 | -------------------------------------------------------------------------------- /main_nt.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * Main windows kernel driver entry point. 6 | * 7 | * This program is free software; you can redistribute it and/or modify it 8 | * under the terms and conditions of the GNU General Public License, 9 | * version 2, as published by the Free Software Foundation. 10 | * 11 | * This program is distributed in the hope it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 | * more details. 15 | * 16 | * You should have received a copy of the GNU General Public License along with 17 | * this program; If not, see . 18 | */ 19 | #include 20 | #include 21 | 22 | #include "ksm.h" 23 | #include "um/um.h" 24 | 25 | typedef struct _LDR_DATA_TABLE_ENTRY { 26 | LIST_ENTRY InLoadOrderLinks; 27 | LIST_ENTRY InMemoryOrderLinks; 28 | LIST_ENTRY InInitializationOrderLinks; 29 | PVOID DllBase; 30 | PVOID EntryPoint; 31 | ULONG SizeOfImage; 32 | UNICODE_STRING FullDllName; 33 | UNICODE_STRING BaseDllName; 34 | ULONG Flags; 35 | USHORT LoadCount; 36 | USHORT TlsIndex; 37 | LIST_ENTRY HashLinks; 38 | ULONG TimeDateStamp; 39 | } LDR_DATA_TABLE_ENTRY, *PLDR_DATA_TABLE_ENTRY; 40 | 41 | #ifndef __GNUC__ 42 | DRIVER_INITIALIZE DriverEntry; 43 | #pragma alloc_text(INIT, DriverEntry) 44 | #endif 45 | 46 | PLIST_ENTRY PsLoadedModuleList; 47 | uintptr_t g_driver_base; 48 | uintptr_t g_driver_size; 49 | 50 | uintptr_t pxe_base = 0xfffff6fb7dbed000ull; 51 | uintptr_t ppe_base = 0xfffff6fb7da00000ull; 52 | uintptr_t pde_base = 0xfffff6fb40000000ull; 53 | uintptr_t pte_base = 0xfffff68000000000ull; 54 | 55 | /* Master process cr3 */ 56 | static uintptr_t caller_cr3 = 0; 57 | 58 | static inline NTSTATUS check_dynamic_pgtables(void) 59 | { 60 | /* On Windows 10 build 14316+ Page table base addresses are not static. */ 61 | RTL_OSVERSIONINFOW osv; 62 | osv.dwOSVersionInfoSize = sizeof(osv); 63 | 64 | NTSTATUS status = RtlGetVersion(&osv); 65 | if (!NT_SUCCESS(status)) 66 | return status; 67 | 68 | if (osv.dwMajorVersion >= 10 && osv.dwBuildNumber >= 14316) { 69 | static const u8 pattern[] = { 70 | 0x48, 0x8b, 0x04, 0xd0, // mov rax, [rax+rdx*8] 71 | 0x48, 0xc1, 0xe0, 0x19, // shl rax, 19h 72 | 0x48, 0xba, // mov rdx, ????????`???????? ; PTE_BASE 73 | }; 74 | 75 | u8 *base = (u8 *)MmGetVirtualForPhysical; 76 | bool found = false; 77 | for (size_t i = 0; i <= 0x50 - sizeof(pattern); ++i) { 78 | if (RtlCompareMemory(pattern, &base[i], sizeof(pattern)) == sizeof(pattern)) { 79 | pte_base = *(uintptr_t *)(base + i + sizeof(pattern)); 80 | 81 | uintptr_t idx = (pte_base >> PGD_SHIFT_P) & PTX_MASK; 82 | pde_base = pte_base | (idx << PUD_SHIFT_P); 83 | ppe_base = pde_base | (idx << PMD_SHIFT_P); 84 | pxe_base = ppe_base | (idx << PTE_SHIFT_P); 85 | found = true; 86 | break; 87 | } 88 | } 89 | 90 | if (!found) 91 | return STATUS_NOT_FOUND; 92 | 93 | uintptr_t tmp = (uintptr_t)MmGetVirtualForPhysical; 94 | KSM_DEBUG("PXE: %p PPE %p PDE %p PTE %p\n", pxe_base, ppe_base, pde_base, pte_base); 95 | KSM_DEBUG("Addr 0x%X 0x%X\n", __pa((uintptr_t *)tmp), va_to_pa(tmp)); 96 | } 97 | 98 | return STATUS_SUCCESS; 99 | } 100 | 101 | static void DriverUnload(PDRIVER_OBJECT driverObject) 102 | { 103 | int ret; 104 | UNICODE_STRING deviceLink; 105 | 106 | UNREFERENCED_PARAMETER(driverObject); 107 | RtlInitUnicodeString(&deviceLink, KSM_DOS_NAME); 108 | 109 | ret = ksm_free(ksm); 110 | KSM_DEBUG("ret: 0x%08X\n", ret); 111 | #ifdef ENABLE_PRINT 112 | print_exit(); 113 | #endif 114 | IoDeleteSymbolicLink(&deviceLink); 115 | IoUnregisterShutdownNotification(driverObject->DeviceObject); 116 | IoDeleteDevice(driverObject->DeviceObject); 117 | } 118 | 119 | static NTSTATUS DriverDispatch(PDEVICE_OBJECT deviceObject, PIRP irp) 120 | { 121 | NTSTATUS status = STATUS_SUCCESS; 122 | PIO_STACK_LOCATION stack; 123 | void *buf; 124 | u32 inlen; 125 | u32 ioctl; 126 | 127 | stack = IoGetCurrentIrpStackLocation(irp); 128 | switch (stack->MajorFunction) { 129 | case IRP_MJ_DEVICE_CONTROL: 130 | buf = irp->AssociatedIrp.SystemBuffer; 131 | ioctl = stack->Parameters.DeviceIoControl.IoControlCode; 132 | inlen = stack->Parameters.DeviceIoControl.InputBufferLength; 133 | KSM_DEBUG("%s: IOCTL: 0x%08X of length: %d\n", proc_name(), ioctl, inlen); 134 | 135 | if (caller_cr3 != 0 && caller_cr3 != __readcr3()) { 136 | KSM_DEBUG("%s: not processing ioctl\n", proc_name()); 137 | status = STATUS_ABANDONED; 138 | break; 139 | } 140 | 141 | switch (ioctl) { 142 | #ifdef PMEM_SANDBOX 143 | case KSM_IOCTL_SANDBOX: 144 | if (inlen < 4) 145 | status = STATUS_INFO_LENGTH_MISMATCH; 146 | else 147 | status = ksm_sandbox(ksm, (pid_t)(*(int *)buf)); 148 | break; 149 | case KSM_IOCTL_UNBOX: 150 | if (inlen < 4) 151 | status = STATUS_INFO_LENGTH_MISMATCH; 152 | else 153 | status = ksm_unbox(ksm, (pid_t)(*(int *)buf)); 154 | break; 155 | #endif 156 | case KSM_IOCTL_SUBVERT: 157 | caller_cr3 = __readcr3(); 158 | status = ksm_subvert(ksm); 159 | break; 160 | case KSM_IOCTL_UNSUBVERT: 161 | status = ksm_unsubvert(ksm); 162 | if (NT_SUCCESS(status)) 163 | caller_cr3 = 0; 164 | break; 165 | #ifdef INTROSPECT_ENGINE 166 | case KSM_IOCTL_INTRO_START: 167 | status = ksm_introspect_start(ksm); 168 | break; 169 | case KSM_IOCTL_INTRO_STOP: 170 | status = ksm_introspect_stop(ksm); 171 | break; 172 | case KSM_IOCTL_INTRO_WATCH: 173 | if (inlen < sizeof(struct watch_ioctl)) 174 | status = STATUS_INFO_LENGTH_MISMATCH; 175 | else 176 | status = ksm_introspect_add_watch(ksm, (struct watch_ioctl *)buf); 177 | break; 178 | case KSM_IOCTL_INTRO_UNWATCH: 179 | if (inlen < sizeof(struct watch_ioctl)) 180 | status = STATUS_INFO_LENGTH_MISMATCH; 181 | else 182 | status = ksm_introspect_rem_watch(ksm, (struct watch_ioctl *)buf); 183 | break; 184 | case KSM_IOCTL_INTRO_STATS: 185 | if (inlen < sizeof(struct watch_ioctl)) 186 | status = STATUS_INFO_LENGTH_MISMATCH; 187 | else if (NT_SUCCESS(status = ksm_introspect_collect(ksm, (struct watch_ioctl *)buf))) 188 | irp->IoStatus.Information = sizeof(struct watch_ioctl); 189 | break; 190 | #endif 191 | default: 192 | status = STATUS_NOT_SUPPORTED; 193 | break; 194 | } 195 | break; 196 | case IRP_MJ_SHUTDOWN: 197 | /* Ignore return value */ 198 | ksm_unsubvert(ksm); 199 | break; 200 | case IRP_MJ_CREATE: 201 | KSM_DEBUG("open from %s\n", proc_name()); 202 | break; 203 | case IRP_MJ_CLOSE: 204 | KSM_DEBUG("close from %s\n", proc_name()); 205 | break; 206 | default: 207 | KSM_DEBUG("unhandled func %X\n", stack->MajorFunction); 208 | break; 209 | } 210 | 211 | irp->IoStatus.Status = status; 212 | IoCompleteRequest(irp, IO_NO_INCREMENT); 213 | return status; 214 | } 215 | 216 | NTSTATUS DriverEntry(PDRIVER_OBJECT driverObject, PUNICODE_STRING registryPath) 217 | { 218 | NTSTATUS status; 219 | LDR_DATA_TABLE_ENTRY *entry; 220 | UNICODE_STRING deviceName; 221 | PDEVICE_OBJECT deviceObject; 222 | UNICODE_STRING deviceLink; 223 | 224 | #ifdef ENABLE_PRINT 225 | /* Stupid printing interface */ 226 | if (!NT_SUCCESS(status = print_init())) { 227 | DbgPrint("failed to initialize log: 0x%08X\n", status); 228 | return status; 229 | } 230 | #endif 231 | 232 | if (!NT_SUCCESS(status = check_dynamic_pgtables())) 233 | goto err; 234 | 235 | entry = driverObject->DriverSection; 236 | PsLoadedModuleList = entry->InLoadOrderLinks.Flink; 237 | 238 | KSM_DEBUG("We're mapped at %p (size: %d bytes (%d KB), on %d pages)\n", 239 | entry->DllBase, entry->SizeOfImage, 240 | entry->SizeOfImage / 1024, entry->SizeOfImage / PAGE_SIZE); 241 | g_driver_base = (uintptr_t)entry->DllBase; 242 | g_driver_size = entry->SizeOfImage; 243 | 244 | if (!NT_SUCCESS(status = ksm_init(&ksm))) 245 | goto err; 246 | 247 | RtlInitUnicodeString(&deviceName, KSM_DEVICE_NAME); 248 | status = IoCreateDevice(driverObject, 0, &deviceName, 249 | KSM_DEVICE_MAGIC, 0, FALSE, &deviceObject); 250 | if (!NT_SUCCESS(status)) 251 | goto exit; 252 | 253 | if (!NT_SUCCESS(IoRegisterShutdownNotification(deviceObject))) 254 | goto err2; 255 | 256 | driverObject->DriverUnload = DriverUnload; 257 | driverObject->MajorFunction[IRP_MJ_SHUTDOWN] = 258 | driverObject->MajorFunction[IRP_MJ_CREATE] = 259 | driverObject->MajorFunction[IRP_MJ_CLOSE] = 260 | driverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL] = DriverDispatch; 261 | 262 | RtlInitUnicodeString(&deviceLink, KSM_DOS_NAME); 263 | if (NT_SUCCESS(status = IoCreateSymbolicLink(&deviceLink, &deviceName))) { 264 | KSM_DEBUG_RAW("ready\n"); 265 | ksm->host_pgd = __readcr3(); 266 | goto out; 267 | } 268 | 269 | IoUnregisterShutdownNotification(deviceObject); 270 | err2: 271 | IoDeleteDevice(deviceObject); 272 | exit: 273 | ksm_free(ksm); 274 | err: 275 | #ifdef ENABLE_PRINT 276 | print_exit(); 277 | #endif 278 | out: 279 | KSM_DEBUG("ret: 0x%08X\n", status); 280 | return status; 281 | } 282 | -------------------------------------------------------------------------------- /make_nonpagable.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3.2 2 | # Mark executable sections as non-pagable, this is necessary because VMX root mode runs 3 | # with interrupts off, so all pages must be physically contiguous. 4 | # This file is only for the MinGW build! 5 | import sys 6 | try: 7 | import pefile 8 | except: 9 | print "Please install pefile before running this script." 10 | 11 | pe = pefile.PE(sys.argv[1]) 12 | for section in pe.sections: 13 | if ".text" in section.Name: 14 | section.Characteristics |= 0x68000000 15 | 16 | pe.write(sys.argv[1]) 17 | 18 | -------------------------------------------------------------------------------- /mm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * kmap_virt() from KSplice: 6 | * Copyright (C) 2007-2009 Ksplice, Inc. 7 | * Authors: Jeff Arnold, Anders Kaseorg, Tim Abbott 8 | * 9 | * This program is free software; you can redistribute it and/or modify it 10 | * under the terms and conditions of the GNU General Public License, 11 | * version 2, as published by the Free Software Foundation. 12 | * 13 | * This program is distributed in the hope it will be useful, but WITHOUT 14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 16 | * more details. 17 | * 18 | * You should have received a copy of the GNU General Public License along with 19 | * this program; If not, see . 20 | */ 21 | #ifdef __linux__ 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #else 28 | #include 29 | #endif 30 | 31 | #include "ksm.h" 32 | #include "mm.h" 33 | #include "compiler.h" 34 | 35 | #ifdef __linux__ 36 | extern struct resource iomem_resource; 37 | 38 | void *mm_remap(u64 phys, size_t size) 39 | { 40 | unsigned long offset = addr_offset(phys); 41 | struct page *page; 42 | void *ret; 43 | 44 | /* For now this supports one-page at a time. */ 45 | WARN_ON(size > PAGE_SIZE); 46 | 47 | page = pfn_to_page(phys >> PAGE_SHIFT); 48 | ret = vmap(&page, 1, VM_LOCKED, PAGE_KERNEL); 49 | if (!ret) 50 | return NULL; 51 | 52 | return (void *)(ret + offset); 53 | } 54 | 55 | void mm_unmap(void *vaddr, size_t size) 56 | { 57 | void *addr = (void *)((unsigned long)vaddr & PAGE_MASK); 58 | vunmap(addr); 59 | } 60 | 61 | void *mm_remap_iomem(u64 phys, size_t size) 62 | { 63 | return ioremap(phys, size); 64 | } 65 | 66 | void mm_unmap_iomem(void *addr, size_t size) 67 | { 68 | return iounmap((void __iomem __force *)addr); 69 | } 70 | 71 | /* 72 | * Original: 73 | * * map_writable creates a shadow page mapping of the range 74 | * [addr, addr + len) so that we can write to code mapped read-only. 75 | * 76 | * It is similar to a generalized version of x86's text_poke. But 77 | * because one cannot use vmalloc/vfree() inside stop_machine, we use 78 | * map_writable to map the pages before stop_machine, then use the 79 | * mapping inside stop_machine, and unmap the pages afterwards. 80 | * 81 | * https://github.com/jirislaby/ksplice 82 | * kmodsrc/ksplice.c 83 | * 84 | * Converted to take a page protection instead. 85 | */ 86 | void *kmap_virt(void *addr, size_t len, pgprot_t prot) 87 | { 88 | int i; 89 | void *vaddr; 90 | int nr_pages = DIV_ROUND_UP(offset_in_page(addr) + len, PAGE_SIZE); 91 | struct page **pages = kmalloc(nr_pages * sizeof(*pages), GFP_KERNEL); 92 | void *page_addr = (void *)((unsigned long)addr & PAGE_MASK); 93 | 94 | if (!pages) 95 | return NULL; 96 | 97 | for (i = 0; i < nr_pages; ++i) { 98 | if (!__module_address((unsigned long)page_addr)) { 99 | pages[i] = virt_to_page(page_addr); 100 | WARN_ON(!PageReserved(pages[i])); 101 | } else { 102 | /* Modules are allocated via vmalloc() which is 103 | * non-contiguous. */ 104 | pages[i] = vmalloc_to_page(page_addr); 105 | } 106 | 107 | if (!pages[i]) { 108 | kfree(pages); 109 | return NULL; 110 | } 111 | 112 | page_addr += PAGE_SIZE; 113 | } 114 | 115 | vaddr = vmap(pages, nr_pages, VM_MAP, prot); 116 | kfree(pages); 117 | if (!vaddr) 118 | return NULL; 119 | 120 | return vaddr + offset_in_page(addr); 121 | } 122 | 123 | static void iter_resource(struct pmem_range *ranges, 124 | struct resource *resource, 125 | const char *match, 126 | int *curr) 127 | { 128 | struct resource *tmp; 129 | if (*curr >= MAX_RANGES) 130 | return; 131 | 132 | for (tmp = resource; tmp && *curr < MAX_RANGES; tmp = tmp->child) { 133 | if (strcmp(tmp->name, match) == 0) { 134 | ranges[*curr].start = tmp->start; 135 | ranges[*curr].end = tmp->end; 136 | ++*curr; 137 | } 138 | 139 | if (tmp->sibling) 140 | iter_resource(ranges, tmp->sibling, match, curr); 141 | } 142 | } 143 | 144 | int mm_cache_ram_ranges(struct pmem_range *ranges, int *range_count) 145 | { 146 | iter_resource(ranges, &iomem_resource, "System RAM", range_count); 147 | return 0; 148 | } 149 | #else 150 | 151 | int mm_cache_ram_ranges(struct pmem_range *ranges, int *range_count) 152 | { 153 | int run; 154 | uintptr_t addr; 155 | uintptr_t size; 156 | PPHYSICAL_MEMORY_RANGE pm_ranges; 157 | 158 | pm_ranges = MmGetPhysicalMemoryRanges(); 159 | if (!pm_ranges) 160 | return ERR_NOMEM; 161 | 162 | for (run = 0; run < MAX_RANGES; ++run) { 163 | addr = pm_ranges[run].BaseAddress.QuadPart; 164 | size = pm_ranges[run].NumberOfBytes.QuadPart; 165 | if (!addr && !size) 166 | break; 167 | 168 | ranges[run].start = addr; 169 | ranges[run].end = addr + size; 170 | } 171 | 172 | *range_count = run; 173 | ExFreePool(pm_ranges); 174 | return 0; 175 | } 176 | #endif 177 | 178 | static inline void make_mtrr_range(struct mtrr_range *range, bool fixed, u8 type, 179 | u64 start, u64 end) 180 | { 181 | range->fixed = fixed; 182 | range->type = type; 183 | range->start = start; 184 | range->end = end; 185 | } 186 | 187 | void mm_cache_mtrr_ranges(struct mtrr_range *ranges, int *range_count, u8 *def_type) 188 | { 189 | u64 def, cap; 190 | u64 msr; 191 | u32 val; 192 | u64 base; 193 | u64 offset; 194 | int num_var; 195 | int idx = 0; 196 | int i; 197 | u64 len; 198 | 199 | def = __readmsr(MSR_MTRRdefType); 200 | *def_type = def & 0xFF; 201 | 202 | cap = __readmsr(MSR_MTRRcap); 203 | num_var = cap & 0xFF; 204 | 205 | if ((cap >> 8) & 1 && (def >> 10) & 1) { 206 | /* Read fixed range MTRRs. */ 207 | for (msr = __readmsr(MSR_MTRRfix64K_00000), offset = 0x10000, base = 0; 208 | msr != 0; msr >>= 8, base += offset) 209 | make_mtrr_range(&ranges[idx++], true, msr & 0xff, base, base + 0x10000); 210 | 211 | for (val = MSR_MTRRfix16K_80000, offset = 0x4000; val <= MSR_MTRRfix16K_A0000; ++val) 212 | for (msr = __readmsr(val), base = 0x80000; 213 | msr != 0; msr >>= 8, base += offset) 214 | make_mtrr_range(&ranges[idx++], true, msr & 0xff, base, base + 0x4000); 215 | 216 | for (val = MSR_MTRRfix4K_C0000, offset = 0x1000; val <= MSR_MTRRfix4K_F8000; ++val) 217 | for (msr = __readmsr(val), base = 0xC0000; 218 | msr != 0; msr >>= 8, base += offset) 219 | make_mtrr_range(&ranges[idx++], true, msr & 0xff, base, base + 0x1000); 220 | } 221 | 222 | for (i = 0; i < num_var; i++) { 223 | msr = __readmsr(MSR_MTRR_PHYS_MASK + i * 2); 224 | if (!((msr >> 11) & 1)) 225 | continue; 226 | 227 | len = 1ull << __ffs64(msr & PAGE_PA_MASK); 228 | base = __readmsr(MSR_MTRR_PHYS_BASE + i * 2); 229 | make_mtrr_range(&ranges[idx++], false, 230 | base & 0xff, 231 | base & PAGE_PA_MASK, 232 | (base & PAGE_PA_MASK) + len); 233 | } 234 | 235 | *range_count = idx; 236 | } 237 | -------------------------------------------------------------------------------- /mm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * This program is free software; you can redistribute it and/or modify it 6 | * under the terms and conditions of the GNU General Public License, 7 | * version 2, as published by the Free Software Foundation. 8 | * 9 | * This program is distributed in the hope it will be useful, but WITHOUT 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 | * more details. 13 | * 14 | * You should have received a copy of the GNU General Public License along with 15 | * this program; If not, see . 16 | */ 17 | #ifndef __MM_H 18 | #define __MM_H 19 | 20 | #ifdef __linux__ 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #endif 28 | 29 | #define PGD_SHIFT_P 39 30 | #define PUD_SHIFT_P 30 31 | #define PMD_SHIFT_P 21 32 | #define PTE_SHIFT_P 12 33 | 34 | #define VA_BITS 48 35 | #define VA_MASK ((1ULL << VA_BITS) - 1) 36 | #define VA_SHIFT 16 37 | 38 | #ifndef PTX_MASK 39 | #define PTX_MASK 0x1FF 40 | #endif 41 | #define PUD_MASK_P 0x3FFFF 42 | #define PMD_MASK_P 0x7FFFFFF 43 | #define PTE_MASK_P 0xFFFFFFFFF 44 | 45 | /* 46 | * The traditional page table management carries on, but 47 | * the naming convention is different per-platform, so not 48 | * to be confused, here are the names: 49 | * 50 | * Regular (AMD tables): PML4, PDPT, PDT, PT 51 | * Linux tables: PML4, PGD, PUD, PMD 52 | * Windows tables: PML4, PXE, PPE, PDE 53 | * 54 | * To sync this, we just use linux convention because it's more 55 | * convenient. 56 | */ 57 | #define PAGE_PRESENT 0x1 /* Present and locked in the lock position */ 58 | #define PAGE_WRITE 0x2 /* Writable */ 59 | #define PAGE_USER 0x4 /* User page */ 60 | #define PAGE_WRITETHRU 0x8 /* Write through */ 61 | #define PAGE_CACHEDISABLE 0x10 /* No caching */ 62 | #define PAGE_ACCESSED 0x20 /* Processor: Set when accessed */ 63 | #define PAGE_DIRTY 0x40 /* Processor: Set when wrote to */ 64 | #define PAGE_LARGE 0x80 /* Large page */ 65 | #define PAGE_GLOBAL 0x100 /* Global page, see CR4.PGE */ 66 | #define PAGE_COPYONWRITE 0x200 /* CoW mapping */ 67 | #define PAGE_PROTOTYPE 0x400 /* Linux: Used for I/O mapping */ 68 | #define PAGE_TRANSIT 0x800 /* Linux: Hidden by kmemcheck */ 69 | #define PAGE_PA_MASK (0xFFFFFFFFFULL << PAGE_SHIFT) 70 | #define PAGE_PA(page) ((page) & PAGE_PA_MASK) 71 | #define PAGE_FN(page) (((page) >> PTE_SHIFT_P) & PTE_MASK_P) 72 | #define PAGE_PPA(pte) (PAGE_PA(pte->pte)) 73 | #define PAGE_PFN(pte) (PAGE_FN(pte->pte)) 74 | #define PAGE_NX 0x8000000000000000 /* No execute */ 75 | #define PAGE_LPRESENT (PAGE_PRESENT | PAGE_LARGE) 76 | 77 | #define PGF_PRESENT 0x1 /* present fault */ 78 | #define PGF_WRITE 0x2 /* write fault */ 79 | #define PGF_SP 0x4 /* supervisor fault (SMEP, SMAP) */ 80 | #define PGF_RSVD 0x8 /* reserved bit was set fault */ 81 | #define PGF_FETCH 0x10 /* fetch fault */ 82 | #define PGF_PK 0x20 /* Protection key fault */ 83 | #define PGF_SGX 0x40 /* SGX induced fault */ 84 | 85 | #define PGD_INDEX_P(addr) (((addr) >> PGD_SHIFT_P) & PTX_MASK) 86 | #define PUD_INDEX_P(addr) (((addr) >> PUD_SHIFT_P) & PTX_MASK) 87 | #define PMD_INDEX_P(addr) (((addr) >> PMD_SHIFT_P) & PTX_MASK) 88 | #define PTE_INDEX_P(addr) (((addr) >> PTE_SHIFT_P) & PTX_MASK) 89 | 90 | #ifndef __linux__ 91 | /* be in the same boat */ 92 | typedef struct { unsigned long long pgd; } pgd_t; 93 | typedef struct { unsigned long long pud; } pud_t; 94 | typedef struct { unsigned long long pmd; } pmd_t; 95 | typedef struct { unsigned long long pte; } pte_t; 96 | 97 | /* Determined at runtime (on Windows 10 these are not static.) */ 98 | extern uintptr_t pxe_base; 99 | extern uintptr_t ppe_base; 100 | extern uintptr_t pde_base; 101 | extern uintptr_t pte_base; 102 | 103 | #define __pa(va) \ 104 | MmGetPhysicalAddress((void *)(va)).QuadPart 105 | #define __va(pa) \ 106 | (uintptr_t *)MmGetVirtualForPhysical((PHYSICAL_ADDRESS) { .QuadPart = (uintptr_t)(pa) }) 107 | 108 | #define pte_present(p) ((((pte_t *)(&(p)))->pte) & (PAGE_PRESENT | PAGE_GLOBAL)) 109 | 110 | static inline pgd_t *pgd_offset(uintptr_t cr3, uintptr_t va) 111 | { 112 | return (pgd_t *)__va(PAGE_PA(cr3)) + PGD_INDEX_P(va); 113 | } 114 | 115 | static inline pud_t *pud_offset(pgd_t *pgd, uintptr_t va) 116 | { 117 | return (pud_t *)__va(PAGE_PPA(pgd)) + PUD_INDEX_P(va); 118 | } 119 | 120 | static inline pmd_t *pmd_offset(pud_t *pud, uintptr_t va) 121 | { 122 | return (pmd_t *)__va(PAGE_PPA(pud)) + PMD_INDEX_P(va); 123 | } 124 | 125 | static inline pte_t *pte_offset(pmd_t *pmd, uintptr_t va) 126 | { 127 | return (pte_t *)__va(PAGE_PPA(pmd)) + PTE_INDEX_P(va); 128 | } 129 | #endif 130 | 131 | #define pte_large(p) ((((pte_t *)(&(p)))->pte) & PAGE_LARGE) 132 | #define page_align(addr) ((uintptr_t)(addr) & ~(PAGE_SIZE - 1)) 133 | 134 | static inline bool page_aligned(uintptr_t addr) 135 | { 136 | return (addr & (PAGE_SIZE - 1)) == 0; 137 | } 138 | 139 | static inline size_t round_to_pages(size_t size) 140 | { 141 | return (size >> PAGE_SHIFT) + ((size & (PAGE_SIZE - 1)) != 0); 142 | } 143 | 144 | static inline u16 addr_offset(uintptr_t addr) 145 | { 146 | return addr & (PAGE_SIZE - 1); 147 | } 148 | 149 | static inline bool same_page(uintptr_t a1, uintptr_t a2) 150 | { 151 | return page_align(a1) == page_align(a2); 152 | } 153 | 154 | static inline bool is_canonical_addr(u64 addr) 155 | { 156 | return (s64)addr >> 47 == (s64)addr >> 63; 157 | } 158 | 159 | #ifdef __linux__ 160 | static inline pgd_t *va_to_pgd(uintptr_t va) 161 | { 162 | return pgd_offset(current->mm, va); 163 | } 164 | 165 | static inline pud_t *va_to_pud(uintptr_t va) 166 | { 167 | return pud_offset(va_to_pgd(va), va); 168 | } 169 | 170 | static inline pmd_t *va_to_pmd(uintptr_t va) 171 | { 172 | return pmd_offset(va_to_pud(va), va); 173 | } 174 | 175 | static inline pte_t *va_to_pte(uintptr_t va) 176 | { 177 | return pte_offset_kernel(va_to_pmd(va), va); 178 | } 179 | 180 | static inline pte_t *pte_from_cr3_va(uintptr_t cr3, uintptr_t va) 181 | { 182 | pgd_t *pgd; 183 | pud_t *pud; 184 | pmd_t *pmd; 185 | 186 | pgd = pgd_offset(current->mm, va); 187 | if (pgd_none(*pgd) || pgd_bad(*pgd)) 188 | return NULL; 189 | 190 | pud = pud_offset(pgd, va); 191 | if (pud_none(*pud) || pud_bad(*pud)) 192 | return NULL; 193 | 194 | pmd = pmd_offset(pud, va); 195 | if (pmd_none(*pmd) || pmd_bad(*pmd)) 196 | return NULL; 197 | 198 | if (pmd_large(*pmd)) 199 | return (pte_t *)pmd; 200 | 201 | return pte_offset_kernel(pmd, va); 202 | } 203 | 204 | static inline void *mm_alloc_page(void) 205 | { 206 | return (void *)get_zeroed_page(GFP_KERNEL | GFP_ATOMIC); 207 | } 208 | 209 | static inline void __mm_free_page(void *v) 210 | { 211 | free_page((unsigned long)v); 212 | } 213 | 214 | static inline void *mm_alloc_pool(size_t size) 215 | { 216 | return kmalloc(size, GFP_KERNEL | GFP_ATOMIC | __GFP_ZERO); 217 | } 218 | 219 | static inline void __mm_free_pool(void *v) 220 | { 221 | kfree(v); 222 | } 223 | 224 | static inline bool mm_is_kernel_addr(void *va) 225 | { 226 | return (uintptr_t)va >= PAGE_OFFSET; 227 | } 228 | 229 | extern void *mm_remap(u64 phys, size_t size); 230 | extern void mm_unmap(void *addr, size_t size); 231 | extern void *mm_remap_iomem(u64 phys, size_t size); 232 | extern void mm_unmap_iomem(void *addr, size_t size); 233 | extern void *kmap_virt(void *addr, size_t len, pgprot_t prot); 234 | static inline void *kmap_exec(void *addr, size_t len) 235 | { 236 | return kmap_virt(addr, len, PAGE_KERNEL_EXEC); 237 | } 238 | 239 | static inline void *kmap_write(void *addr, size_t len) 240 | { 241 | return kmap_virt(addr, len, PAGE_KERNEL); 242 | } 243 | #else 244 | /* 245 | * You can use the following functions for address translation in general 246 | * but if you're translating a userspace address, then either make sure 247 | * the cr3 is set to that userspace process, or use pgd_offset(), etc 248 | * functions instead. 249 | */ 250 | static inline pgd_t *va_to_pgd(uintptr_t va) 251 | { 252 | uintptr_t off = (va >> PGD_SHIFT_P) & PTX_MASK; 253 | return (pgd_t *)pxe_base + off; 254 | } 255 | 256 | static inline pud_t *va_to_pud(uintptr_t va) 257 | { 258 | uintptr_t off = (va >> PUD_SHIFT_P) & PUD_MASK_P; 259 | return (pud_t *)ppe_base + off; 260 | } 261 | 262 | static inline pmd_t *va_to_pmd(uintptr_t va) 263 | { 264 | uintptr_t off = (va >> PMD_SHIFT_P) & PMD_MASK_P; 265 | return (pmd_t *)pde_base + off; 266 | } 267 | 268 | static inline pte_t *va_to_pte(uintptr_t va) 269 | { 270 | uintptr_t off = (va >> PTE_SHIFT_P) & PTE_MASK_P; 271 | return (pte_t *)pte_base + off; 272 | } 273 | 274 | static inline pte_t *pte_from_cr3_va(uintptr_t cr3, uintptr_t va) 275 | { 276 | pgd_t *pgd; 277 | pud_t *pud; 278 | pmd_t *pmd; 279 | 280 | pgd = pgd_offset(cr3, va); 281 | if (!pte_present(*pgd)) 282 | return NULL; 283 | 284 | pud = pud_offset(pgd, va); 285 | if (!pte_present(*pud)) 286 | return NULL; 287 | 288 | pmd = pmd_offset(pud, va); 289 | if (!pte_present(*pmd)) 290 | return NULL; 291 | 292 | if (pte_large(pmd)) 293 | return (pte_t *)pmd; 294 | 295 | return pte_offset(pmd, va); 296 | } 297 | 298 | static inline void *mm_remap(u64 phys, size_t size) 299 | { 300 | return MmMapIoSpace((PHYSICAL_ADDRESS) { .QuadPart = phys }, 301 | size, MmNonCached); 302 | } 303 | 304 | static inline void mm_unmap(void *addr, size_t size) 305 | { 306 | return MmUnmapIoSpace(addr, size); 307 | } 308 | 309 | static inline void *mm_remap_iomem(u64 phys, size_t size) 310 | { 311 | return mm_remap(phys, size); 312 | } 313 | 314 | static inline void mm_unmap_iomem(void *addr, size_t size) 315 | { 316 | return mm_unmap(addr, size); 317 | } 318 | 319 | static inline void *mm_alloc_page(void) 320 | { 321 | void *v = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, 0); 322 | if (v) 323 | memset(v, 0, PAGE_SIZE); 324 | 325 | return v; 326 | } 327 | 328 | static inline void __mm_free_page(void *v) 329 | { 330 | ExFreePoolWithTag(v, 0); 331 | } 332 | 333 | static inline void *mm_alloc_pool(size_t size) 334 | { 335 | void *v = ExAllocatePoolWithTag(NonPagedPool, size, 0); 336 | if (v) 337 | memset(v, 0, size); 338 | 339 | return v; 340 | } 341 | 342 | static inline void __mm_free_pool(void *v) 343 | { 344 | ExFreePool(v); 345 | } 346 | 347 | static inline bool mm_is_kernel_addr(void *va) 348 | { 349 | return va >= MmSystemRangeStart; 350 | } 351 | #endif 352 | 353 | static inline void mm_free_pool(void *v, size_t size) 354 | { 355 | memset(v, 0, size); 356 | __mm_free_pool(v); 357 | } 358 | 359 | static inline void mm_free_page(void *v) 360 | { 361 | memset(v, 0, PAGE_SIZE); 362 | __mm_free_page(v); 363 | } 364 | 365 | static inline u64 va_to_pa(uintptr_t va) 366 | { 367 | pte_t *pte = (pte_t *)va_to_pmd(va); 368 | if (!pte_large(*pte)) 369 | pte = va_to_pte(va); 370 | 371 | if (!pte_present(*pte)) 372 | return 0; 373 | 374 | return PAGE_PPA(pte) | addr_offset(va); 375 | } 376 | 377 | static inline void set_pte_flags(pte_t *pte, int flags) 378 | { 379 | if (pte && (pte->pte & flags) != flags) 380 | pte->pte |= flags; 381 | } 382 | 383 | static inline void mark_pte_dirty(pte_t *pte) 384 | { 385 | set_pte_flags(pte, PAGE_DIRTY); 386 | } 387 | 388 | static inline void mark_pte_accessed(pte_t *pte) 389 | { 390 | set_pte_flags(pte, PAGE_ACCESSED); 391 | } 392 | 393 | struct pmem_range { 394 | u64 start; 395 | u64 end; 396 | }; 397 | 398 | #define MAX_RANGES 32 399 | extern int mm_cache_ram_ranges(struct pmem_range *ranges, int *count); 400 | 401 | struct mtrr_range { 402 | bool fixed; 403 | u8 type; 404 | u64 start; 405 | u64 end; 406 | }; 407 | 408 | #define MAX_VAR_MTRR 255 409 | #define MAX_FIXED_MTRR 11*8 410 | #define MAX_MTRR MAX_VAR_MTRR + MAX_FIXED_MTRR 411 | extern void mm_cache_mtrr_ranges(struct mtrr_range *ranges, int *count, u8 *def_type); 412 | 413 | #endif 414 | -------------------------------------------------------------------------------- /percpu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * DPC is short for Deferred Procedure Call. 6 | * 7 | * Per-processor macros 8 | * Public domain. 9 | * 10 | * This file handles per-processor callbacks, on Linux it uses SMP API 11 | * to send a call-function IPI to the specified processor(s), on 12 | * Windows, this uses the DPC API. 13 | * 14 | * The following macros are defined here: 15 | * DEFINE_DPC(name, call, args...) 16 | * Example: 17 | * static DEFINE_DPC(do_something, 18 | * __vmx_vmcall, 19 | * ctx); 20 | * ctx is an optional parameter that is passed to the function 21 | * when called by the IPI, it does not have to be used. 22 | * 23 | * To call the DPC: 24 | * CALL_DPC(name, args...) 25 | * Example: 26 | * CALL_DPC(do_something, &my_context); 27 | * Which will call it on all online processors. 28 | * 29 | * To call the DPC on one CPU only: 30 | * CALL_DPC_ON_CPU(cpu, name, fail, args...) 31 | * Example: 32 | * CALL_DPC_ON_CPU(cpu, do_something, goto out, ctx); 33 | * out: 34 | * ... handle fail here ... 35 | * 36 | * To get the return value: 37 | * DPC_RET(): 38 | * This macro returns a logical OR'd variable, basically the return 39 | * value of the callback(s) OR'd. 40 | * Note: the variable returned by this macro is per-file (a static variable), so 41 | * you might want to account for that also. 42 | */ 43 | #ifndef __PERCPU_H 44 | #define __PERCPU_H 45 | 46 | static int __g_dpc_logical_rval = 0; 47 | 48 | #ifndef __linux__ 49 | NTKERNELAPI VOID KeGenericCallDpc(PKDEFERRED_ROUTINE Routine, 50 | PVOID Context); 51 | NTKERNELAPI VOID KeSignalCallDpcDone(PVOID SystemArgument1); 52 | NTKERNELAPI LOGICAL KeSignalCallDpcSynchronize(PVOID SystemArgument2); 53 | 54 | #define DEFINE_DPC(name, call, ...) \ 55 | VOID __percpu_##name(PRKDPC dpc, void *ctx, void *sys0, void *sys1) \ 56 | { \ 57 | UNREFERENCED_PARAMETER(dpc); \ 58 | __g_dpc_logical_rval |= (call) (__VA_ARGS__); \ 59 | KeSignalCallDpcSynchronize(sys1); \ 60 | KeSignalCallDpcDone(sys0); \ 61 | } 62 | 63 | #define CALL_DPC(name, ...) do { \ 64 | __g_dpc_logical_rval = 0; \ 65 | KeGenericCallDpc(__percpu_##name, __VA_ARGS__); \ 66 | } while (0) 67 | 68 | #define CALL_DPC_ON_CPU(cpu, name, fail, ...) do { \ 69 | __g_dpc_logical_rval = 0; \ 70 | PROCESSOR_NUMBER proc_nr; \ 71 | KeGetProcessorNumberFromIndex((cpu), &proc_nr); \ 72 | PKDPC dpc = mm_alloc_pool(sizeof(*dpc)); \ 73 | if (!dpc) \ 74 | fail; \ 75 | KeInitializeDpc(dpc, __percpu_##name, __VA_ARGS__); \ 76 | KeSetImportanceDpc(dpc, HighImportance); \ 77 | KeSetTargetProcessorDpcEx(dpc, &proc_nr); \ 78 | KeInsertQueueDpc(dpc, NULL, NULL); \ 79 | } while (0) 80 | #else 81 | #define DEFINE_DPC(name, call, ...) \ 82 | void __percpu_##name(void *ctx) \ 83 | { \ 84 | __g_dpc_logical_rval |= (call) (__VA_ARGS__); \ 85 | } 86 | 87 | #define CALL_DPC(name, ...) do { \ 88 | int cpu; \ 89 | __g_dpc_logical_rval = 0; \ 90 | for_each_online_cpu(cpu) \ 91 | smp_call_function_single(cpu, __percpu_##name, __VA_ARGS__, 1); \ 92 | } while (0) 93 | 94 | #define CALL_DPC_ON_CPU(cpu, name, fail, ...) do { \ 95 | __g_dpc_logical_rval = 0; \ 96 | smp_call_function_single(cpu, __percpu_##name, __VA_ARGS__, 1); \ 97 | } while (0) 98 | #endif 99 | 100 | #define DPC_RET() __g_dpc_logical_rval 101 | #endif 102 | -------------------------------------------------------------------------------- /print.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * Windows only. 6 | * 7 | * This program is free software; you can redistribute it and/or modify it 8 | * under the terms and conditions of the GNU General Public License, 9 | * version 2, as published by the Free Software Foundation. 10 | * 11 | * This program is distributed in the hope it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 | * more details. 15 | * 16 | * You should have received a copy of the GNU General Public License along with 17 | * this program; If not, see . 18 | */ 19 | #if defined(ENABLE_DBGPRINT) || defined(ENABLE_FILEPRINT) 20 | 21 | /* 22 | * A stupid kernel debug printing interface so that we don't hang 23 | * the kernel when we are inside VMX root. 24 | * 25 | * Fileprint: workaround stupid error due to ntifs under MinGW-w64: 26 | * ntifs.h: error flexible array in union 27 | * 28 | * As far as I know, the only fix would be editing the ntifs.h file 29 | * itself by and just replacing [] with [0] will fix it. 30 | */ 31 | #ifdef ENABLE_FILEPRINT 32 | #include 33 | #else 34 | #include 35 | #endif 36 | 37 | #include 38 | #ifdef _MSC_VER 39 | #include 40 | #endif 41 | 42 | #include "ksm.h" 43 | 44 | #define PRINT_FLUSH_DELAY 500 45 | #define PRINT_BUF_ATONCE 512 46 | #define PRINT_BUF_BUFFERS 2 47 | #define PRINT_BUF_SHIFT PAGE_SHIFT 48 | #define PRINT_BUF_STRIDE (1 << PRINT_BUF_SHIFT) 49 | #define PRINT_BUF_SIZE (PRINT_BUF_STRIDE * PRINT_BUF_BUFFERS) 50 | 51 | #ifdef ENABLE_FILEPRINT 52 | #define FILE_PATH L"\\SystemRoot\\ksm.log" 53 | #endif 54 | 55 | /* 56 | * @head_use - points to the head of the buffer we should be buffering to 57 | * @next_use - points to next buffering location 58 | * @next - specifies next index of the buffer slice to use 59 | * @next_off - specifies next offset to print off the current buffer slice, 60 | * since DbgPrint() only allows up to 512 chars. 61 | * 62 | * head_use is switched between buf + 0 and buf + PRINT_BUF_STRIDE, to avoid 63 | * confusions and to make it better in terms of performance, between do_print() 64 | * and print_thread(). 65 | * 66 | * The spin lock is used to synchronize updates to @head_use and @next_use. 67 | * For synchronization of writes, a barrier is used to make sure that print_thread() 68 | * will see the head being updated. 69 | */ 70 | static volatile bool do_exit = false; 71 | static volatile bool exited = false; 72 | static volatile bool work = false; 73 | static char buf[PRINT_BUF_SIZE]; 74 | static char *head_use = buf; 75 | static char *next_use = buf; 76 | static size_t next = 0; 77 | static size_t next_off = 0; 78 | static KSPIN_LOCK lock; 79 | #ifdef ENABLE_FILEPRINT 80 | static ERESOURCE resource; 81 | static HANDLE file; 82 | #endif 83 | 84 | #ifndef _MSC_VER 85 | /* 86 | * Taken from: 87 | * https://searchcode.com/codesearch/view/20802857/ 88 | * 89 | * public domain. 90 | */ 91 | typedef char *STRSAFE_LPSTR; 92 | typedef const char *STRSAFE_LPCSTR; 93 | 94 | #ifndef NTSTRSAFE_MAX_CCH 95 | #define NTSTRSAFE_MAX_CCH 2147483647 96 | #endif 97 | #define NTSTRSAFEAPI static __inline NTSTATUS NTAPI 98 | 99 | /* Exported by ntoskrnl */ 100 | extern int __cdecl _vsnprintf(char *, size_t, const char *, va_list); 101 | 102 | NTSTRSAFEAPI RtlStringVPrintfWorkerA(STRSAFE_LPSTR pszDest, size_t cchDest, STRSAFE_LPCSTR pszFormat, va_list argList) 103 | { 104 | NTSTATUS Status = STATUS_SUCCESS; 105 | if (cchDest == 0) 106 | Status = STATUS_INVALID_PARAMETER; 107 | else { 108 | int iRet; 109 | size_t cchMax; 110 | cchMax = cchDest - 1; 111 | iRet = _vsnprintf(pszDest, cchMax, pszFormat, argList); 112 | if ((iRet < 0) || (((size_t)iRet) > cchMax)) { 113 | pszDest += cchMax; 114 | *pszDest = '\0'; 115 | Status = STATUS_BUFFER_OVERFLOW; 116 | } else 117 | if (((size_t)iRet) == cchMax) { 118 | pszDest += cchMax; 119 | *pszDest = '\0'; 120 | } 121 | } 122 | return Status; 123 | } 124 | 125 | NTSTRSAFEAPI RtlStringCchVPrintfA(STRSAFE_LPSTR pszDest, size_t cchDest, STRSAFE_LPCSTR pszFormat, va_list argList) 126 | { 127 | if (cchDest > NTSTRSAFE_MAX_CCH) 128 | return STATUS_INVALID_PARAMETER; 129 | return RtlStringVPrintfWorkerA(pszDest, cchDest, pszFormat, argList); 130 | } 131 | #endif 132 | 133 | static inline int sleep_ms(s32 ms) 134 | { 135 | return KeDelayExecutionThread(KernelMode, FALSE, &(LARGE_INTEGER) { 136 | .QuadPart = -(10000 * ms) 137 | }); 138 | } 139 | 140 | static inline char *stpcpy(char *dst, const char *src) 141 | { 142 | const size_t len = strlen(src); 143 | return (char *)memcpy(dst, src, len + 1) + len; 144 | } 145 | 146 | static inline void print_flush(void) 147 | { 148 | char on_stack[PRINT_BUF_ATONCE + 1]; 149 | char *printbuf; 150 | size_t max; 151 | size_t rem; 152 | size_t len; 153 | KLOCK_QUEUE_HANDLE q; 154 | #ifdef ENABLE_FILEPRINT 155 | IO_STATUS_BLOCK sblk; 156 | #endif 157 | 158 | smp_rmb(); 159 | KeAcquireInStackQueuedSpinLock(&lock, &q); 160 | printbuf = buf + next_off + ((next & (PRINT_BUF_BUFFERS - 1)) << PRINT_BUF_SHIFT); 161 | max = next_use - head_use; 162 | 163 | strncpy(on_stack, printbuf, PRINT_BUF_ATONCE); 164 | on_stack[PRINT_BUF_ATONCE] = '\0'; 165 | len = strlen(on_stack); 166 | next_off += len; 167 | rem = max - next_off; 168 | 169 | if (rem == 0) { 170 | head_use = buf + ((++next & (PRINT_BUF_BUFFERS - 1)) << PRINT_BUF_SHIFT); 171 | next_use = head_use; 172 | next_off = 0; 173 | smp_wmb(); 174 | } 175 | KeReleaseInStackQueuedSpinLock(&q); 176 | 177 | #ifdef ENABLE_DBGPRINT 178 | DbgPrintEx(DPFLTR_DEFAULT_ID, DPFLTR_ERROR_LEVEL, "%s", on_stack); 179 | #endif 180 | #ifdef ENABLE_FILEPRINT 181 | ExEnterCriticalRegionAndAcquireResourceExclusive(&resource); 182 | ZwWriteFile(file, NULL, NULL, NULL, 183 | &sblk, on_stack, (u32)len, 184 | NULL, NULL); 185 | ExReleaseResourceAndLeaveCriticalRegion(&resource); 186 | #endif 187 | } 188 | 189 | static void print_thread(void) 190 | { 191 | /* 192 | * Note: This thread most of the time (if not all) will be running 193 | * on a different processor other than the caller of do_print(). 194 | * 195 | * We need this to sort of "queue" debug prints to avoid windbg 196 | * hanging around because DbgPrintEx() needs to do IPI and stuff 197 | * so it needs to be called with interrupts enabled, which in our 198 | * case, they are mostly not especially inside VM exit. 199 | */ 200 | while (!do_exit) { 201 | while (next_use == head_use && !do_exit) 202 | sleep_ms(PRINT_FLUSH_DELAY); 203 | 204 | print_flush(); 205 | cpu_relax(); 206 | } 207 | 208 | if (next_use != head_use) 209 | print_flush(); 210 | 211 | #ifdef _MSC_VER 212 | InterlockedExchange8((char *)&exited, true); 213 | #else 214 | __sync_bool_compare_and_swap(&exited, false, true); 215 | #endif 216 | PsTerminateSystemThread(STATUS_SUCCESS); 217 | } 218 | 219 | NTSTATUS print_init(void) 220 | { 221 | HANDLE hThread; 222 | CLIENT_ID cid; 223 | NTSTATUS status; 224 | #ifdef ENABLE_FILEPRINT 225 | IO_STATUS_BLOCK sblk; 226 | OBJECT_ATTRIBUTES oa; 227 | UNICODE_STRING path; 228 | 229 | RtlInitUnicodeString(&path, FILE_PATH); 230 | InitializeObjectAttributes(&oa, &path, 231 | OBJ_KERNEL_HANDLE | OBJ_CASE_INSENSITIVE, 232 | NULL, NULL); 233 | status = ZwCreateFile(&file, FILE_APPEND_DATA | SYNCHRONIZE, 234 | &oa, &sblk, NULL, FILE_ATTRIBUTE_NORMAL, 235 | FILE_SHARE_READ, FILE_OPEN_IF, 236 | FILE_SYNCHRONOUS_IO_ALERT | FILE_NON_DIRECTORY_FILE, 237 | NULL, 0); 238 | if (!NT_SUCCESS(status)) 239 | return status; 240 | 241 | if (!NT_SUCCESS(status = ExInitializeResourceLite(&resource))) 242 | goto err_file; 243 | #endif 244 | 245 | KeInitializeSpinLock(&lock); 246 | if (NT_SUCCESS(status = PsCreateSystemThread(&hThread, STANDARD_RIGHTS_ALL, 247 | NULL, NULL, &cid, 248 | (PKSTART_ROUTINE)print_thread, NULL))) { 249 | ZwClose(hThread); 250 | return status; 251 | } 252 | 253 | #ifdef ENABLE_FILEPRINT 254 | ExDeleteResourceLite(&resource); 255 | err_file: 256 | ZwClose(file); 257 | #endif 258 | return status; 259 | } 260 | 261 | void print_exit(void) 262 | { 263 | #ifdef _MSC_VER 264 | InterlockedExchange8((char *)&do_exit, true); 265 | #else 266 | __sync_bool_compare_and_swap(&do_exit, false, true); 267 | #endif 268 | while (!exited) 269 | cpu_relax(); 270 | 271 | #ifdef ENABLE_FILEPRINT 272 | ExDeleteResourceLite(&resource); 273 | ZwClose(file); 274 | #endif 275 | } 276 | 277 | void do_print(const char *fmt, ...) 278 | { 279 | char buffer[PRINT_BUF_ATONCE]; 280 | va_list va; 281 | size_t len; 282 | size_t pos; 283 | NTSTATUS status; 284 | KLOCK_QUEUE_HANDLE q; 285 | 286 | va_start(va, fmt); 287 | status = RtlStringCchVPrintfA(buffer, sizeof(buffer), fmt, va); 288 | va_end(va); 289 | 290 | if (NT_SUCCESS(status)) { 291 | if (__readeflags() & X86_EFLAGS_IF) { 292 | /* 293 | * No need to queue, DbgPrint uses IPIs to do some stuff, we can 294 | * use it safely here. 295 | * 296 | * This will not branch inside a VM-exit, simply because the IF flag 297 | * is clear for obvious reasons. 298 | */ 299 | #ifdef ENABLE_FILEPRINT 300 | IO_STATUS_BLOCK sblk; 301 | if (!KeAreAllApcsDisabled() && NT_SUCCESS(ZwWriteFile(file, NULL, NULL, NULL, 302 | &sblk, buffer, (ULONG)strlen(buffer), 303 | NULL, NULL))) 304 | ZwFlushBuffersFile(file, &sblk); 305 | #endif 306 | #ifdef ENABLE_DBGPRINT 307 | DbgPrintEx(DPFLTR_DEFAULT_ID, DPFLTR_ERROR_LEVEL, "%s", buffer); 308 | #endif 309 | } else { 310 | KeAcquireInStackQueuedSpinLock(&lock, &q); 311 | 312 | len = strlen(buffer); 313 | pos = next_use - head_use; 314 | if (pos + len < PRINT_BUF_STRIDE) { 315 | next_use = stpcpy(next_use, buffer); 316 | smp_wmb(); 317 | } 318 | 319 | KeReleaseInStackQueuedSpinLock(&q); 320 | } 321 | } 322 | } 323 | #endif 324 | -------------------------------------------------------------------------------- /resubv.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * Public domain 6 | */ 7 | #ifdef ENABLE_RESUBV 8 | #ifdef __linux__ 9 | #include 10 | #else 11 | #include 12 | #endif 13 | 14 | #include "ksm.h" 15 | #include "compiler.h" 16 | 17 | #ifdef __linux__ 18 | static void ksm_resume(void) 19 | { 20 | KSM_DEBUG("in resume: %d\n", ksm_subvert(ksm)); 21 | } 22 | 23 | static int ksm_suspend(void) 24 | { 25 | KSM_DEBUG("in suspend: %d\n", ksm_unsubvert(ksm)); 26 | return 0; 27 | } 28 | 29 | static struct syscore_ops syscore_ops = { 30 | .resume = ksm_resume, 31 | .suspend = ksm_suspend, 32 | }; 33 | 34 | int register_power_callback(void) 35 | { 36 | register_syscore_ops(&syscore_ops); 37 | return 0; 38 | } 39 | 40 | void unregister_power_callback(void) 41 | { 42 | unregister_syscore_ops(&syscore_ops); 43 | } 44 | #else 45 | typedef struct _DEV_EXT { 46 | void *CbRegistration; 47 | PCALLBACK_OBJECT CbObject; 48 | } DEV_EXT, *PDEV_EXT; 49 | static DEV_EXT g_dev_ext; 50 | 51 | static void power_callback(PDEV_EXT ctx, void *arg0, void *arg1) 52 | { 53 | if (arg0 != (void *)PO_CB_SYSTEM_STATE_LOCK) 54 | return; 55 | 56 | if (arg1 == (void *)0) 57 | ksm_unsubvert(ksm); 58 | else if (arg0 == (void *)1) 59 | ksm_subvert(ksm); 60 | } 61 | 62 | int register_power_callback(void) 63 | { 64 | OBJECT_ATTRIBUTES obj; 65 | UNICODE_STRING name; 66 | NTSTATUS status; 67 | PDEV_EXT ext = &g_dev_ext; 68 | 69 | RtlInitUnicodeString(&name, L"\\Callback\\PowerState"); 70 | InitializeObjectAttributes(&obj, &name, OBJ_CASE_INSENSITIVE, NULL, NULL); 71 | 72 | status = ExCreateCallback(&ext->CbObject, &obj, FALSE, TRUE); 73 | if (!NT_SUCCESS(status)) 74 | return status; 75 | 76 | ext->CbRegistration = ExRegisterCallback(ext->CbObject, 77 | (PCALLBACK_FUNCTION)power_callback, 78 | ext); 79 | if (!ext->CbRegistration) { 80 | ObDereferenceObject(ext->CbObject); 81 | return STATUS_UNSUCCESSFUL; 82 | } 83 | 84 | return STATUS_SUCCESS; 85 | } 86 | 87 | void unregister_power_callback(void) 88 | { 89 | PDEV_EXT ext = &g_dev_ext; 90 | if (ext->CbRegistration) 91 | ExUnregisterCallback(ext->CbRegistration); 92 | 93 | if (ext->CbObject) 94 | ObDereferenceObject(ext->CbObject); 95 | } 96 | #endif 97 | #endif 98 | -------------------------------------------------------------------------------- /sandbox.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * Userspace physical memory sandbox. 6 | * 7 | * This program is free software; you can redistribute it and/or modify it 8 | * under the terms and conditions of the GNU General Public License, 9 | * version 2, as published by the Free Software Foundation. 10 | * 11 | * This program is distributed in the hope it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 | * more details. 15 | * 16 | * You should have received a copy of the GNU General Public License along with 17 | * this program; If not, see . 18 | */ 19 | #ifdef PMEM_SANDBOX 20 | #ifdef __linux__ 21 | #include 22 | #include 23 | #include 24 | #else 25 | #include 26 | #include 27 | #endif 28 | 29 | #include "ksm.h" 30 | #include "mm.h" 31 | #include "percpu.h" 32 | 33 | /* 34 | * Note #1: 35 | * Not to be confused with full-application sandboxing (e.g. filesystem, 36 | * etc.), this is just a physical memory sandboxer. 37 | * 38 | * This is basically CoW (copy-on-write) implementation but on the 39 | * physical level, so when a registered application writes to a 40 | * memory region, another region is allocated and the original one is 41 | * redirected to that one with a copy of the contents in it, then only 42 | * that application will see that copy and not others. 43 | * 44 | * Note #2: 45 | * we can't re-use the epage-hook mechanism here, because the 46 | * pages are not known yet, they are just set to read-execute access only, no 47 | * write-access, when there is a write, we copy the page. 48 | * 49 | * Note #3: 50 | * This still needs a lot of work, and is quite "barebones" for now... 51 | * Some work would be perhaps replacing the cr3-load-exiting with something less 52 | * costy. 53 | * 54 | * Note #4: 55 | * Be careful with this, it's not well tested and quite frankly, may not be very 56 | * good performance wise, you have been warned... 57 | */ 58 | struct cow_page { 59 | u64 gpa; 60 | u64 hpa; 61 | void *hva; 62 | struct list_head link; 63 | }; 64 | 65 | struct sa_task { 66 | pid_t pid; 67 | u64 pgd; 68 | u16 eptp[KSM_MAX_VCPUS]; 69 | spinlock_t lock; 70 | struct list_head pages; 71 | struct list_head link; 72 | }; 73 | 74 | static inline u16 task_eptp(struct sa_task *task) 75 | { 76 | return task->eptp[cpu_nr()]; 77 | } 78 | 79 | static inline void free_cow_page(struct cow_page *page) 80 | { 81 | list_del(&page->link); 82 | mm_free_page(page->hva); 83 | __mm_free_pool(page); 84 | } 85 | 86 | bool ksm_sandbox_handle_vmcall(struct vcpu *vcpu, uintptr_t arg) 87 | { 88 | struct sa_task *task = (struct sa_task *)arg; 89 | u16 eptp = task_eptp(task); 90 | if (vcpu_eptp_idx(vcpu) == eptp) { 91 | if (vcpu->last_switch) 92 | vcpu_switch_root_eptp(vcpu, vcpu->eptp_before); 93 | else 94 | vcpu_switch_root_eptp(vcpu, EPTP_DEFAULT); 95 | } 96 | 97 | vcpu->last_switch = NULL; 98 | if (eptp != EPT_MAX_EPTP_LIST) 99 | ept_free_ptr(&vcpu->ept, eptp); 100 | 101 | return true; 102 | } 103 | 104 | static DEFINE_DPC(__free_sa_task, __vmx_vmcall, HCALL_SA_TASK, ctx); 105 | static inline void __free_sa_task(struct ksm *k, struct sa_task *task) 106 | { 107 | struct cow_page *page = NULL; 108 | struct cow_page *next = NULL; 109 | 110 | list_for_each_entry_safe(page, next, &task->pages, link) 111 | free_cow_page(page); 112 | 113 | list_del(&task->link); 114 | __mm_free_pool(task); 115 | } 116 | 117 | static inline void free_sa_task(struct ksm *k, struct sa_task *task) 118 | { 119 | CALL_DPC(__free_sa_task, task); 120 | __free_sa_task(k, task); 121 | } 122 | 123 | int ksm_sandbox_init(struct ksm *k) 124 | { 125 | spin_lock_init(&k->task_lock); 126 | INIT_LIST_HEAD(&k->task_list); 127 | return 0; 128 | } 129 | 130 | int ksm_sandbox_exit(struct ksm *k) 131 | { 132 | struct sa_task *task = NULL; 133 | struct sa_task *next = NULL; 134 | list_for_each_entry_safe(task, next, &k->task_list, link) 135 | __free_sa_task(k, task); 136 | 137 | return 0; 138 | } 139 | 140 | static inline int create_sa_task(struct ksm *k, pid_t pid, u64 pgd) 141 | { 142 | struct sa_task *task; 143 | int i; 144 | 145 | task = mm_alloc_pool(sizeof(*task)); 146 | if (!task) 147 | return ERR_NOMEM; 148 | 149 | task->pgd = pgd; 150 | task->pid = pid; 151 | INIT_LIST_HEAD(&task->pages); 152 | spin_lock_init(&task->lock); 153 | for (i = 0; i < KSM_MAX_VCPUS; ++i) 154 | task->eptp[i] = EPT_MAX_EPTP_LIST; 155 | 156 | spin_lock(&k->task_lock); 157 | list_add(&task->link, &k->task_list); 158 | spin_unlock(&k->task_lock); 159 | return 0; 160 | } 161 | 162 | static inline struct cow_page *ksm_sandbox_copy_page(struct vcpu *vcpu, 163 | struct sa_task *task, 164 | u64 gpa) 165 | { 166 | char *hva; 167 | char *h; 168 | struct cow_page *page; 169 | 170 | h = mm_remap(page_align(gpa), PAGE_SIZE); 171 | if (!h) 172 | return false; 173 | 174 | page = mm_alloc_pool(sizeof(*page)); 175 | if (!page) 176 | goto err_page; 177 | 178 | hva = mm_alloc_page(); 179 | if (!hva) 180 | goto err_cow; 181 | 182 | memcpy(hva, h, PAGE_SIZE); 183 | mm_unmap(h, PAGE_SIZE); 184 | 185 | page->gpa = gpa; 186 | page->hpa = __pa(hva); 187 | page->hva = hva; 188 | 189 | spin_lock(&task->lock); 190 | list_add(&page->link, &task->pages); 191 | spin_unlock(&task->lock); 192 | return page; 193 | 194 | err_cow: 195 | __mm_free_pool(page); 196 | err_page: 197 | mm_unmap(h, PAGE_SIZE); 198 | return NULL; 199 | } 200 | 201 | static struct sa_task *find_sa_task(struct ksm *k, pid_t pid) 202 | { 203 | struct sa_task *task = NULL; 204 | struct sa_task *ret = NULL; 205 | 206 | spin_lock(&k->task_lock); 207 | list_for_each_entry(task, &k->task_list, link) { 208 | if (task->pid == pid) { 209 | ret = task; 210 | break; 211 | } 212 | } 213 | spin_unlock(&k->task_lock); 214 | return ret; 215 | } 216 | 217 | int ksm_sandbox(struct ksm *k, pid_t pid) 218 | { 219 | if (find_sa_task(k, pid)) 220 | return ERR_EXIST; 221 | 222 | #ifdef __linux__ 223 | struct pid *tsk_pid = find_vpid(pid); 224 | struct task_struct *tsk; 225 | 226 | if (!tsk_pid) 227 | return -ESRCH; 228 | 229 | tsk = pid_task(tsk_pid, PIDTYPE_PID); 230 | if (!tsk) 231 | return -EINVAL; /* can this happen? */ 232 | 233 | /* Ignore anonymous processes */ 234 | WARN_ON(!tsk->mm); 235 | if (!tsk->mm) 236 | return -EFAULT; 237 | 238 | return create_sa_task(k, pid, __pa(tsk->mm->pgd) & PAGE_PA_MASK); 239 | #else 240 | NTSTATUS status; 241 | PEPROCESS process; 242 | KAPC_STATE apc; 243 | uintptr_t pgd; 244 | 245 | status = PsLookupProcessByProcessId((HANDLE)pid, &process); 246 | if (!NT_SUCCESS(status)) 247 | return status; 248 | 249 | KeStackAttachProcess(process, &apc); 250 | pgd = __readcr3() & PAGE_PA_MASK; 251 | KeUnstackDetachProcess(&apc); 252 | ObfDereferenceObject(process); 253 | return create_sa_task(k, pid, pgd); 254 | #endif 255 | } 256 | 257 | int ksm_unbox(struct ksm *k, pid_t pid) 258 | { 259 | struct sa_task *task = NULL; 260 | int ret = ERR_NOTH; 261 | 262 | spin_lock(&k->task_lock); 263 | list_for_each_entry(task, &k->task_list, link) { 264 | if (task->pid == pid) { 265 | free_sa_task(k, task); 266 | ret = 0; 267 | break; 268 | } 269 | } 270 | spin_unlock(&k->task_lock); 271 | return ret; 272 | } 273 | 274 | static struct sa_task *__find_sa_task_pgd(struct ksm *k, u64 pgd) 275 | { 276 | struct sa_task *task = NULL; 277 | 278 | list_for_each_entry(task, &k->task_list, link) 279 | if (task->pgd == pgd) 280 | return task; 281 | return NULL; 282 | } 283 | 284 | static struct sa_task *__find_sa_task_eptp(struct ksm *k, u16 eptp) 285 | { 286 | struct sa_task *task = NULL; 287 | 288 | list_for_each_entry(task, &k->task_list, link) 289 | if (task_eptp(task) == eptp) 290 | return task; 291 | return NULL; 292 | } 293 | 294 | bool ksm_sandbox_handle_ept(struct ept_ve_around *ve) 295 | { 296 | struct sa_task *task; 297 | struct cow_page *page; 298 | struct ve_except_info *info; 299 | struct vcpu *vcpu; 300 | struct ept *ept; 301 | struct ksm *k; 302 | u64 *epte; 303 | 304 | vcpu = ve->vcpu; 305 | ept = &vcpu->ept; 306 | info = ve->info; 307 | k = vcpu_to_ksm(vcpu); 308 | 309 | spin_lock(&k->task_lock); 310 | task = __find_sa_task_eptp(k, info->eptp); 311 | spin_unlock(&k->task_lock); 312 | if (!task) { 313 | ve->eptp_next = EPTP_DEFAULT; 314 | BREAK_ON(1); 315 | return true; 316 | } 317 | 318 | epte = ept_pte(EPT4(ept, info->eptp), info->gpa); 319 | BUG_ON(!epte); 320 | 321 | if (info->exit & EPT_ACCESS_WRITE) { 322 | KSM_DEBUG("allocating cow page for GPA %p GVA %p AC %X)\n", 323 | (void *)info->gpa, (void *)info->gla, (int)info->exit & EPT_AR_MASK); 324 | 325 | page = ksm_sandbox_copy_page(vcpu, task, info->gpa); 326 | WARN_ON(!page); 327 | if (!page) 328 | goto manually_fix; 329 | 330 | __set_epte_ar_inplace(epte, info->exit & EPT_AR_MASK); 331 | __set_epte_pfn(epte, page->hpa >> PAGE_SHIFT); 332 | } else { 333 | manually_fix: 334 | BREAK_ON(1); 335 | KSM_DEBUG("Manually fixing AR for %p (0x%X)\n", (void *)info->gpa, (int)info->exit & EPT_AR_MASK); 336 | __set_epte_ar_inplace(epte, info->exit & EPT_AR_MASK); 337 | } 338 | 339 | ve->invalidate = true; 340 | return true; 341 | } 342 | 343 | void ksm_sandbox_handle_cr3(struct vcpu *vcpu, u64 cr3) 344 | { 345 | struct ksm *k; 346 | struct sa_task *task; 347 | u16 *eptp; 348 | 349 | k = vcpu_to_ksm(vcpu); 350 | spin_lock(&k->task_lock); 351 | task = __find_sa_task_pgd(k, cr3 & PAGE_PA_MASK); 352 | spin_unlock(&k->task_lock); 353 | if (task) { 354 | eptp = &task->eptp[cpu_nr()]; 355 | if (*eptp == EPT_MAX_EPTP_LIST) 356 | BUG_ON(!ept_create_ptr(&vcpu->ept, EPT_ACCESS_RX, eptp)); 357 | 358 | vcpu->last_switch = task; 359 | vcpu->eptp_before = vcpu_eptp_idx(vcpu); 360 | vcpu_switch_root_eptp(vcpu, *eptp); 361 | } else if (vcpu->last_switch) { 362 | vcpu_switch_root_eptp(vcpu, vcpu->eptp_before); 363 | vcpu->last_switch = NULL; 364 | } 365 | } 366 | 367 | #endif 368 | -------------------------------------------------------------------------------- /tests/README.rst: -------------------------------------------------------------------------------- 1 | Tests 2 | ===== 3 | 4 | Currently, the tests are still "under construction", and are in-tree for future 5 | expansion, feel free to send patches. 6 | 7 | -------------------------------------------------------------------------------- /tests/allgood.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int run_allgood(void) 5 | { 6 | return 1; 7 | } 8 | -------------------------------------------------------------------------------- /tests/ept.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int run_ept(void) 5 | { 6 | return 1; 7 | } 8 | -------------------------------------------------------------------------------- /tests/export.h: -------------------------------------------------------------------------------- 1 | #ifndef __EXPORTS_H 2 | #define __EXPORTS_H 3 | 4 | extern int run_allgood(void); 5 | extern int run_fail_entry(void); 6 | extern int run_go_vmx(void); 7 | extern int run_ept(void); 8 | 9 | extern int initialize_features(void); 10 | extern int give_me_root(void *vmxon); 11 | extern int init_vmcs(void *vmcs); 12 | extern void adjust_ctl_val(u32 msr, u32 *val); 13 | extern bool setup_basic_vmcs(u32 other_primary, u32 other_secondary, uintptr_t sp, uintptr_t ip, uintptr_t stack); 14 | extern int launch_vcpu(void); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /tests/fail_entry.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int run_fail_entry(void) 5 | { 6 | return 1; 7 | } 8 | -------------------------------------------------------------------------------- /tests/go_vmx.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int run_govmx(void) 5 | { 6 | return 1; 7 | } 8 | -------------------------------------------------------------------------------- /tests/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../compiler.h" 4 | #include "export.h" 5 | 6 | NTSTATUS DriverEntry(PDRIVER_OBJECT driverObject, PUNICODE_STRING registryPath) 7 | { 8 | int ret = 0; 9 | DbgPrint("Running tests\n"); 10 | 11 | ret |= run_allgood(); 12 | ret |= run_fail_entry(); 13 | ret |= run_go_vmx(); 14 | ret |= run_ept(); 15 | 16 | /* Just return failure anyway */ 17 | DbgPrint("Done running all tests: %d\n", ret); 18 | return STATUS_UNSUCCESSFUL; 19 | } 20 | -------------------------------------------------------------------------------- /tests/vmx_exit.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asamy/ksm/6ef52b87903b25c62560d80e8fbec0867e5c2a8b/tests/vmx_exit.c -------------------------------------------------------------------------------- /tests/vmx_setup.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "../compiler.h" 5 | #include "../asm.h" 6 | #include "../x86.h" 7 | #include "../mm.h" 8 | #include "../vmx.h" 9 | #include "../segment.h" 10 | 11 | /* assembly */ 12 | extern void vmx_ep(void); 13 | 14 | /* stack */ 15 | extern __align(PAGE_SIZE) u8 kstack[HOST_STACK_SIZE]; 16 | 17 | int initialize_features(void) 18 | { 19 | __try { 20 | /* Required MSR_IA32_FEATURE_CONTROL bits: */ 21 | const u64 required_feat_bits = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 22 | 23 | uintptr_t feat_ctl = __readmsr(MSR_IA32_FEATURE_CONTROL); 24 | if ((feat_ctl & required_feat_bits) == required_feat_bits) 25 | return 0; 26 | 27 | /* Attempt to set bits in place */ 28 | __writemsr(MSR_IA32_FEATURE_CONTROL, feat_ctl | required_feat_bits); 29 | 30 | feat_ctl = __readmsr(MSR_IA32_FEATURE_CONTROL); 31 | if ((feat_ctl & required_feat_bits) == required_feat_bits) 32 | return 0; 33 | } __except (EXCEPTION_EXECUTE_HANDLER) 34 | { 35 | } 36 | 37 | return -1; 38 | } 39 | 40 | /* Give me a vmxon, I'll give you root access. */ 41 | int give_me_root(void *vmxon) 42 | { 43 | u64 cr0 = __readcr0(); 44 | cr0 &= __readmsr(MSR_IA32_VMX_CR0_FIXED1); 45 | cr0 |= __readmsr(MSR_IA32_VMX_CR0_FIXED0); 46 | __writecr0(cr0); 47 | 48 | u64 cr4 = __readcr4(); 49 | cr4 &= __readmsr(MSR_IA32_VMX_CR4_FIXED1); 50 | cr4 |= __readmsr(MSR_IA32_VMX_CR4_FIXED0); 51 | __writecr4(cr4); 52 | 53 | u64 vmx = __readmsr(MSR_IA32_VMX_BASIC); 54 | *(u32 *)vmxon = (u32)vmx; 55 | 56 | uintptr_t pa = __pa(vmxon); 57 | if (__vmx_on(&pa)) 58 | return -1; 59 | 60 | return 0; 61 | } 62 | 63 | /* Give me a VMCS region, I'll initialize it for you */ 64 | int init_vmcs(void *vmcs) 65 | { 66 | u64 vmx = __readmsr(MSR_IA32_VMX_BASIC); 67 | *(u32 *)vmcs = (u32)vmx; 68 | 69 | uintptr_t pa = __pa(vmcs); 70 | if (__vmx_vmclear(&pa)) 71 | return false; 72 | 73 | return __vmx_vmptrld(&pa) == 0; 74 | } 75 | 76 | static inline void adjust_ctl_val(u32 msr, u32 *val) 77 | { 78 | u64 v = __readmsr(msr); 79 | *val &= (u32)(v >> 32); /* bit == 0 in high word ==> must be zero */ 80 | *val |= (u32)v; /* bit == 1 in low word ==> must be one */ 81 | } 82 | 83 | static inline u32 __accessright(u16 selector) 84 | { 85 | if (selector) 86 | return (__lar(selector) >> 8) & 0xF0FF; 87 | 88 | /* unusable */ 89 | return 0x10000; 90 | } 91 | 92 | bool setup_basic_vmcs(u32 other_primary, u32 other_secondary, uintptr_t sp, uintptr_t ip) 93 | { 94 | struct gdtr gdtr; 95 | __sgdt(&gdtr); 96 | 97 | struct gdtr idtr; 98 | __sidt(&idtr); 99 | 100 | u64 cr0 = __readcr0(); 101 | u64 cr3 = __readcr3(); 102 | u64 cr4 = __readcr4(); 103 | u64 err = 0; 104 | 105 | u16 es = __reades(); 106 | u16 cs = __readcs(); 107 | u16 ss = __readss(); 108 | u16 ds = __readds(); 109 | u16 fs = __readfs(); 110 | u16 gs = __readgs(); 111 | u16 ldt = __sldt(); 112 | u16 tr = __str(); 113 | 114 | u32 msr_off = 0; 115 | if (__readmsr(MSR_IA32_VMX_BASIC) & VMX_BASIC_TRUE_CTLS) 116 | msr_off = 0xC; 117 | 118 | u32 vm_entry = VM_ENTRY_IA32E_MODE; 119 | adjust_ctl_val(MSR_IA32_VMX_ENTRY_CTLS + msr_off, &vm_entry); 120 | 121 | u32 vm_exit = VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_HOST_ADDR_SPACE_SIZE; 122 | adjust_ctl_val(MSR_IA32_VMX_EXIT_CTLS + msr_off, &vm_exit); 123 | 124 | u32 vm_pinctl = 0; 125 | adjust_ctl_val(MSR_IA32_VMX_PINBASED_CTLS + msr_off, &vm_pinctl); 126 | 127 | u32 vm_cpuctl = CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | other_primary; 128 | adjust_ctl_val(MSR_IA32_VMX_PROCBASED_CTLS + msr_off, &vm_cpuctl); 129 | 130 | u32 vm_2ndctl = other_secondary; 131 | adjust_ctl_val(MSR_IA32_VMX_PROCBASED_CTLS2, &vm_2ndctl); 132 | 133 | /* Processor control fields */ 134 | err |= __vmx_vmwrite(PIN_BASED_VM_EXEC_CONTROL, vm_pinctl); 135 | err |= __vmx_vmwrite(CPU_BASED_VM_EXEC_CONTROL, vm_cpuctl); 136 | err |= __vmx_vmwrite(SECONDARY_VM_EXEC_CONTROL, vm_2ndctl); 137 | err |= __vmx_vmwrite(VM_EXIT_CONTROLS, vm_exit); 138 | err |= __vmx_vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); 139 | err |= __vmx_vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); 140 | err |= __vmx_vmwrite(VM_ENTRY_CONTROLS, vm_entry); 141 | err |= __vmx_vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); 142 | err |= __vmx_vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); 143 | 144 | err |= __vmx_vmwrite(EXCEPTION_BITMAP, 0); 145 | err |= __vmx_vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); 146 | err |= __vmx_vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0); 147 | err |= __vmx_vmwrite(CR3_TARGET_COUNT, 0); 148 | err |= __vmx_vmwrite(VMCS_LINK_POINTER, -1ULL); 149 | 150 | /* CR0/CR4 controls */ 151 | err |= __vmx_vmwrite(CR0_GUEST_HOST_MASK, 0); 152 | err |= __vmx_vmwrite(CR4_GUEST_HOST_MASK, 0); 153 | err |= __vmx_vmwrite(CR0_READ_SHADOW, cr0); 154 | err |= __vmx_vmwrite(CR4_READ_SHADOW, cr4); 155 | 156 | /* Guest */ 157 | err |= __vmx_vmwrite(GUEST_ES_SELECTOR, es); 158 | err |= __vmx_vmwrite(GUEST_CS_SELECTOR, cs); 159 | err |= __vmx_vmwrite(GUEST_SS_SELECTOR, ss); 160 | err |= __vmx_vmwrite(GUEST_DS_SELECTOR, ds); 161 | err |= __vmx_vmwrite(GUEST_FS_SELECTOR, fs); 162 | err |= __vmx_vmwrite(GUEST_GS_SELECTOR, gs); 163 | err |= __vmx_vmwrite(GUEST_LDTR_SELECTOR, ldt); 164 | err |= __vmx_vmwrite(GUEST_TR_SELECTOR, tr); 165 | err |= __vmx_vmwrite(GUEST_ES_LIMIT, __segmentlimit(es)); 166 | err |= __vmx_vmwrite(GUEST_CS_LIMIT, __segmentlimit(cs)); 167 | err |= __vmx_vmwrite(GUEST_SS_LIMIT, __segmentlimit(ss)); 168 | err |= __vmx_vmwrite(GUEST_DS_LIMIT, __segmentlimit(ds)); 169 | err |= __vmx_vmwrite(GUEST_FS_LIMIT, __segmentlimit(fs)); 170 | err |= __vmx_vmwrite(GUEST_GS_LIMIT, __segmentlimit(gs)); 171 | err |= __vmx_vmwrite(GUEST_LDTR_LIMIT, __segmentlimit(ldt)); 172 | err |= __vmx_vmwrite(GUEST_TR_LIMIT, __segmentlimit(tr)); 173 | err |= __vmx_vmwrite(GUEST_GDTR_LIMIT, gdtr.limit); 174 | err |= __vmx_vmwrite(GUEST_IDTR_LIMIT, idtr.limit); 175 | err |= __vmx_vmwrite(GUEST_ES_AR_BYTES, __accessright(es)); 176 | err |= __vmx_vmwrite(GUEST_CS_AR_BYTES, __accessright(cs)); 177 | err |= __vmx_vmwrite(GUEST_SS_AR_BYTES, __accessright(ss)); 178 | err |= __vmx_vmwrite(GUEST_DS_AR_BYTES, __accessright(ds)); 179 | err |= __vmx_vmwrite(GUEST_FS_AR_BYTES, __accessright(fs)); 180 | err |= __vmx_vmwrite(GUEST_GS_AR_BYTES, __accessright(gs)); 181 | err |= __vmx_vmwrite(GUEST_LDTR_AR_BYTES, __accessright(ldt)); 182 | err |= __vmx_vmwrite(GUEST_TR_AR_BYTES, __accessright(tr)); 183 | err |= __vmx_vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); 184 | err |= __vmx_vmwrite(GUEST_ACTIVITY_STATE, 0); 185 | err |= __vmx_vmwrite(GUEST_IA32_DEBUGCTL, __readmsr(MSR_IA32_DEBUGCTLMSR)); 186 | err |= __vmx_vmwrite(GUEST_SYSENTER_CS, __readmsr(MSR_IA32_SYSENTER_CS)); 187 | err |= __vmx_vmwrite(GUEST_CR0, cr0); 188 | err |= __vmx_vmwrite(GUEST_CR3, cr3); 189 | err |= __vmx_vmwrite(GUEST_CR4, cr4); 190 | err |= __vmx_vmwrite(GUEST_ES_BASE, 0); 191 | err |= __vmx_vmwrite(GUEST_CS_BASE, 0); 192 | err |= __vmx_vmwrite(GUEST_SS_BASE, 0); 193 | err |= __vmx_vmwrite(GUEST_DS_BASE, 0); 194 | err |= __vmx_vmwrite(GUEST_FS_BASE, __readmsr(MSR_IA32_FS_BASE)); 195 | err |= __vmx_vmwrite(GUEST_GS_BASE, __readmsr(MSR_IA32_GS_BASE)); 196 | err |= __vmx_vmwrite(GUEST_LDTR_BASE, __segmentbase(gdtr.base, ldt)); 197 | err |= __vmx_vmwrite(GUEST_TR_BASE, __segmentbase(gdtr.base, tr)); 198 | err |= __vmx_vmwrite(GUEST_GDTR_BASE, gdtr.base); 199 | err |= __vmx_vmwrite(GUEST_IDTR_BASE, idtr.base); 200 | err |= __vmx_vmwrite(GUEST_DR7, __readdr(7)); 201 | err |= __vmx_vmwrite(GUEST_RSP, sp); 202 | err |= __vmx_vmwrite(GUEST_RIP, ip); 203 | err |= __vmx_vmwrite(GUEST_RFLAGS, __readeflags()); 204 | err |= __vmx_vmwrite(GUEST_SYSENTER_ESP, __readmsr(MSR_IA32_SYSENTER_ESP)); 205 | err |= __vmx_vmwrite(GUEST_SYSENTER_EIP, __readmsr(MSR_IA32_SYSENTER_EIP)); 206 | 207 | /* Host */ 208 | err |= __vmx_vmwrite(HOST_ES_SELECTOR, es & 0xf8); 209 | err |= __vmx_vmwrite(HOST_CS_SELECTOR, cs & 0xf8); 210 | err |= __vmx_vmwrite(HOST_SS_SELECTOR, ss & 0xf8); 211 | err |= __vmx_vmwrite(HOST_DS_SELECTOR, ds & 0xf8); 212 | err |= __vmx_vmwrite(HOST_FS_SELECTOR, fs & 0xf8); 213 | err |= __vmx_vmwrite(HOST_GS_SELECTOR, gs & 0xf8); 214 | err |= __vmx_vmwrite(HOST_TR_SELECTOR, tr & 0xf8); 215 | err |= __vmx_vmwrite(HOST_CR0, cr0); 216 | err |= __vmx_vmwrite(HOST_CR3, cr3); 217 | err |= __vmx_vmwrite(HOST_CR4, cr4); 218 | err |= __vmx_vmwrite(HOST_FS_BASE, __readmsr(MSR_IA32_FS_BASE)); 219 | err |= __vmx_vmwrite(HOST_GS_BASE, __readmsr(MSR_IA32_GS_BASE)); 220 | err |= __vmx_vmwrite(HOST_TR_BASE, __segmentbase(gdtr.base, tr)); 221 | err |= __vmx_vmwrite(HOST_GDTR_BASE, gdtr.base); 222 | err |= __vmx_vmwrite(HOST_IDTR_BASE, idtr.base); 223 | err |= __vmx_vmwrite(HOST_IA32_SYSENTER_CS, __readmsr(MSR_IA32_SYSENTER_CS)); 224 | err |= __vmx_vmwrite(HOST_IA32_SYSENTER_ESP, __readmsr(MSR_IA32_SYSENTER_ESP)); 225 | err |= __vmx_vmwrite(HOST_IA32_SYSENTER_EIP, __readmsr(MSR_IA32_SYSENTER_EIP)); 226 | err |= __vmx_vmwrite(HOST_RSP, (uintptr_t)kstack + HOST_STACK_SIZE); 227 | err |= __vmx_vmwrite(HOST_RIP, (uintptr_t)vmx_ep); 228 | 229 | return err == 0; 230 | } 231 | 232 | int launch_vcpu(void) 233 | { 234 | size_t vmerr; 235 | uint8_t err = __vmx_vmread(VM_INSTRUCTION_ERROR, &vmerr); 236 | if (err) 237 | DbgPrint("VM_INSTRUCTION_ERROR: %zd\n", vmerr); 238 | 239 | err = __vmx_vmlaunch(); 240 | if (err) { 241 | __vmx_vmread(VM_INSTRUCTION_ERROR, &vmerr); 242 | DbgPrint("__vmx_vmlaunch(): failed %d %d\n", err, vmerr); 243 | } 244 | 245 | return err; 246 | } 247 | -------------------------------------------------------------------------------- /tests/x64.asm: -------------------------------------------------------------------------------- 1 | EXTERN init_vcpu:PROC 2 | EXTERN handle_exit:PROC 3 | EXTERN handle_fail:PROC 4 | 5 | .CONST 6 | 7 | ; Saves all general purpose registers to the stack 8 | PUSHAQ MACRO 9 | push r15 10 | push r14 11 | push r13 12 | push r12 13 | push r11 14 | push r10 15 | push r9 16 | push r8 17 | push rdi 18 | push rsi 19 | push rbp 20 | push -1 21 | push rbx 22 | push rdx 23 | push rcx 24 | push rax 25 | ENDM 26 | 27 | POPAQ MACRO 28 | pop rax 29 | pop rcx 30 | pop rdx 31 | pop rbx 32 | add rsp, 8 33 | pop rbp 34 | pop rsi 35 | pop rdi 36 | pop r8 37 | pop r9 38 | pop r10 39 | pop r11 40 | pop r12 41 | pop r13 42 | pop r14 43 | pop r15 44 | ENDM 45 | 46 | .CODE 47 | 48 | vmx_init PROC 49 | pushfq 50 | PUSHAQ ; -8 * 16 51 | 52 | ; rcx contains vcpu 53 | mov rdx, rsp ; SP 54 | mov r8, do_resume ; IP after success 55 | 56 | sub rsp, 20h 57 | call init_vcpu 58 | add rsp, 20h 59 | 60 | ; if we get here, we failed 61 | POPAQ 62 | popfq 63 | xor al, al 64 | ret 65 | 66 | do_resume: 67 | POPAQ 68 | popfq 69 | mov al, 1 70 | ret 71 | vmx_init ENDP 72 | 73 | vmx_ep PROC 74 | ; This is the VM entry point, aka root mode. 75 | ; This saves guest registers (as they are untouched for now) 76 | ; and restores control to guest if all good, otherwise, fail. 77 | ; 78 | ; All interrupts are disabled at this point. 79 | PUSHAQ 80 | mov rcx, rsp 81 | 82 | sub rsp, 48h 83 | call handle_exit 84 | add rsp, 48h 85 | 86 | test al, al 87 | jz exit 88 | 89 | POPAQ 90 | vmresume ; give them control 91 | jmp error ; something went wrong. 92 | 93 | exit: 94 | ; at this point: 95 | ; rax = eflags 96 | ; rdx = rsp 97 | ; rcx = return address (aka RIP prior to this call plus instruction length) 98 | POPAQ 99 | vmxoff 100 | jna error 101 | 102 | push rax 103 | popfq ; eflags to indicate success 104 | 105 | mov rsp, rdx ; stack pointer 106 | push rcx ; return address (rip + instr len) 107 | ret 108 | 109 | error: 110 | ; otherwise, we are rip. 111 | pushfq 112 | PUSHAQ 113 | mov rcx, rsp 114 | 115 | sub rsp, 28h 116 | call handle_fail 117 | add rsp, 28h 118 | 119 | do_hlt: 120 | hlt ; not reached 121 | jmp do_hlt 122 | vmx_ep ENDP 123 | 124 | __vmx_vmcall PROC 125 | ; assumes: 126 | ; rcx = hypercall 127 | ; rdx = data 128 | vmcall 129 | setna al 130 | ret 131 | __vmx_vmcall ENDP 132 | 133 | ; it's unsafe to call this function directly, so unless 134 | ; you're 100% sure the CPU supports it, use vcpu_vmfunc instead. 135 | __vmx_vmfunc PROC 136 | ; assumes: 137 | ; ecx = EPTP index 138 | ; edx = function 139 | mov eax, edx 140 | db 0fh, 01h, 0d4h 141 | setna al 142 | ret 143 | __vmx_vmfunc ENDP 144 | 145 | __lgdt PROC 146 | lgdt fword ptr [rcx] 147 | ret 148 | __lgdt ENDP 149 | 150 | __sgdt PROC 151 | sgdt [rcx] 152 | ret 153 | __sgdt ENDP 154 | 155 | __lldt PROC 156 | lldt cx 157 | ret 158 | __lldt ENDP 159 | 160 | __sldt PROC 161 | sldt ax 162 | ret 163 | __sldt ENDP 164 | 165 | __ltr PROC 166 | ltr cx 167 | ret 168 | __ltr ENDP 169 | 170 | __str PROC 171 | str ax 172 | ret 173 | __str ENDP 174 | 175 | __reades PROC 176 | mov ax, es 177 | ret 178 | __reades ENDP 179 | 180 | __readcs PROC 181 | mov ax, cs 182 | ret 183 | __readcs ENDP 184 | 185 | __readss PROC 186 | mov ax, ss 187 | ret 188 | __readss ENDP 189 | 190 | __readds PROC 191 | mov ax, ds 192 | ret 193 | __readds ENDP 194 | 195 | __readfs PROC 196 | mov ax, fs 197 | ret 198 | __readfs ENDP 199 | 200 | __readgs PROC 201 | mov ax, gs 202 | ret 203 | __readgs ENDP 204 | 205 | __lar PROC 206 | lar rax, rcx 207 | ret 208 | __lar ENDP 209 | 210 | __writecr2 PROC 211 | mov cr2, rcx 212 | ret 213 | __writecr2 ENDP 214 | 215 | __invd PROC 216 | invd 217 | ret 218 | __invd ENDP 219 | 220 | __invept PROC 221 | invept ecx, oword ptr [rdx] 222 | setna al 223 | ret 224 | __invept ENDP 225 | 226 | __invvpid PROC 227 | invvpid ecx, oword ptr [rdx] 228 | setna al 229 | ret 230 | __invvpid ENDP 231 | 232 | PURGE PUSHAQ 233 | PURGE POPAQ 234 | END 235 | -------------------------------------------------------------------------------- /um/um.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifdef _WIN32 10 | #include 11 | #include 12 | #include 13 | #else 14 | #include 15 | #include 16 | 17 | typedef unsigned char u8; 18 | typedef unsigned int u32; 19 | #endif 20 | 21 | #define UM 22 | #include "um.h" 23 | #include "../compiler.h" 24 | 25 | #ifdef _WIN32 26 | extern NTSTATUS NTAPI ZwDeviceIoControlFile(HANDLE h, HANDLE hEvent, PIO_APC_ROUTINE apc, void *apc_ctx, 27 | PIO_STATUS_BLOCK status_block, u32 ctl_code, 28 | void *input, u32 in_len, void *output, u32 out_len); 29 | #pragma comment(lib, "ntdll.lib") 30 | typedef HANDLE devfd_t; 31 | typedef DWORD pidtype_t; 32 | #else 33 | typedef int devfd_t; 34 | typedef int pidtype_t; 35 | #endif 36 | 37 | static inline devfd_t open_device(void) 38 | { 39 | #ifdef _WIN32 40 | return CreateFileW(UM_DEVICE_PATH, 41 | GENERIC_READ | GENERIC_WRITE, 42 | FILE_SHARE_READ | FILE_SHARE_WRITE, 43 | NULL, OPEN_EXISTING, 0, NULL); 44 | #else 45 | return open(UM_DEVICE_PATH, O_RDWR); 46 | #endif 47 | } 48 | 49 | static inline void close_device(devfd_t device) 50 | { 51 | #ifdef _WIN32 52 | CloseHandle(device); 53 | #else 54 | close(device); 55 | #endif 56 | } 57 | 58 | static inline int do_ioctl(devfd_t device, unsigned int cmd, void *param, u32 len) 59 | { 60 | #ifdef _WIN32 61 | IO_STATUS_BLOCK blk; 62 | return ZwDeviceIoControlFile(device, NULL, NULL, NULL, &blk, 63 | cmd, param, len, 64 | param, len); 65 | #else 66 | return ioctl(device, cmd, param); 67 | #endif 68 | } 69 | 70 | static inline pidtype_t __get_pid(void) 71 | { 72 | #ifdef _WIN32 73 | return (pidtype_t)GetCurrentProcessId(); 74 | #else 75 | return getpid(); 76 | #endif 77 | } 78 | 79 | static inline bool getchr(char *o) 80 | { 81 | while (scanf("%c", o) == 1) { 82 | if (*o == '\n' || isspace(*o)) 83 | continue; 84 | 85 | return true; 86 | } 87 | 88 | return false; 89 | } 90 | 91 | static void print_hex_ascii_line(const u8 *payload, size_t len, size_t offset) 92 | { 93 | size_t i, gap; 94 | u8 ch; 95 | 96 | printf("%05zd ", offset); 97 | /* hex */ 98 | for (ch = *payload, i = 0; i < len; ch = payload[++i]) { 99 | printf("%02X ", ch); 100 | if (i == 7) 101 | putchar(' '); 102 | } 103 | if (len < 8) 104 | putchar(' '); 105 | 106 | if (len < 16) { 107 | gap = 16 - len; 108 | for (i = 0; i < gap; ++i) 109 | printf(" "); 110 | } 111 | printf(" "); 112 | 113 | /* ascii (if printable) */ 114 | for (ch = *payload, i = 0; i < len; ch = payload[++i]) { 115 | if (isprint(ch)) 116 | putchar(ch); 117 | else 118 | putchar('.'); 119 | } 120 | 121 | putchar('\n'); 122 | } 123 | 124 | static void print_payload(const u8 *payload, size_t len) 125 | { 126 | size_t len_rem = len; 127 | size_t line_width = 16; 128 | size_t line_len; 129 | size_t offset = 0; 130 | const u8 *ch = payload; 131 | 132 | if (len <= 0) 133 | return; 134 | 135 | if (len <= line_width) { 136 | print_hex_ascii_line(ch, len, offset); 137 | return; 138 | } 139 | 140 | for (;;) { 141 | line_len = line_width % len_rem; 142 | print_hex_ascii_line(ch, line_len, offset); 143 | len_rem -= line_len; 144 | ch += line_len; 145 | offset += line_width; 146 | if (len_rem <= line_width) { 147 | print_hex_ascii_line(ch, len_rem, offset); 148 | break; 149 | } 150 | } 151 | } 152 | 153 | int main(int ac, char *av[]) 154 | { 155 | devfd_t dev; 156 | int ret = -1; 157 | int pid; 158 | u32 cmd; 159 | char c; 160 | struct watch_ioctl *w = malloc(sizeof(*w)); 161 | 162 | dev = open_device(); 163 | if (dev < 0) { 164 | perror("open device"); 165 | goto err; 166 | } 167 | 168 | ret = do_ioctl(dev, KSM_IOCTL_SUBVERT, NULL, 0); 169 | if (ret < 0) { 170 | perror("subvert"); 171 | goto out; 172 | } 173 | 174 | printf("Our pid: %d\n", __get_pid()); 175 | printf("i = introspect, s = sandbox, q = quit\n"); 176 | while (1) { 177 | printf("Say request> "); 178 | if (!getchr(&c)) 179 | continue; 180 | 181 | switch (c) { 182 | case 'q': 183 | puts("Quit"); 184 | goto unsub; 185 | case 'i': 186 | printf("s = start, d = stop, a = add, r = remove, e = stats\n"); 187 | printf("Introspect> "); 188 | if (!getchr(&c)) 189 | goto unsub; 190 | 191 | switch (c) { 192 | case 's': 193 | ret = do_ioctl(dev, KSM_IOCTL_INTRO_START, NULL, 0); 194 | break; 195 | case 'd': 196 | ret = do_ioctl(dev, KSM_IOCTL_INTRO_STOP, NULL, 0); 197 | break; 198 | case 'a': 199 | printf("Type addr then access (i.e. 0x1000 0x2 to remove write-access): "); 200 | if (scanf("%llX %hX", &w->addr, &w->access) != 2) 201 | break; 202 | 203 | printf("Watching 0x%016llX on %hX access\n", w->addr, w->access); 204 | if (w->access & ~7) { 205 | printf("invalid access bits: 0x%02hX\n", w->access); 206 | break; 207 | } 208 | 209 | ret = do_ioctl(dev, KSM_IOCTL_INTRO_WATCH, w, sizeof(*w)); 210 | break; 211 | case 'r': 212 | printf("Address> "); 213 | if (!scanf("%llX", &w->addr)) 214 | break; 215 | 216 | printf("Unwatching 0x%016llX\n", w->addr); 217 | ret = do_ioctl(dev, KSM_IOCTL_INTRO_UNWATCH, w, sizeof(*w)); 218 | break; 219 | case 'e': 220 | printf("Address> "); 221 | if (!scanf("%llX", &w->addr)) 222 | break; 223 | 224 | ret = do_ioctl(dev, KSM_IOCTL_INTRO_STATS, w, sizeof(*w)); 225 | if (ret == 0) { 226 | printf("Stats for 0x%016llX\n", w->addr); 227 | printf("\tHits: %d\n", w->hits); 228 | printf("\tMisses: %d\n", w->miss); 229 | printf("Buffer:\n"); 230 | print_payload((const u8 *)w->buf, 0x1000); 231 | } 232 | break; 233 | default: 234 | ret = -EINVAL; 235 | printf("unknown request: %c\n", c); 236 | break; 237 | } 238 | 239 | printf("ret: 0x%08X\n", ret); 240 | break; 241 | case 's': 242 | printf("Pid (Negative to unbox)> "); 243 | if (!scanf("%d", &pid)) 244 | goto unsub; 245 | 246 | if (pid < 0) { 247 | pid = -pid; 248 | cmd = KSM_IOCTL_UNBOX; 249 | printf("Unsandboxing %d... ", pid); 250 | } else { 251 | cmd = KSM_IOCTL_SANDBOX; 252 | printf("Sandboxing %d... ", pid); 253 | } 254 | 255 | ret = do_ioctl(dev, cmd, &pid, sizeof(pid)); 256 | printf("0x%08X (%s)\n", ret, ret == 0 ? "OK" : "FAILED"); 257 | break; 258 | default: 259 | printf("unknown request: %c\n", c); 260 | break; 261 | } 262 | } 263 | 264 | unsub: 265 | printf("unsubverting now\n"); 266 | ret = do_ioctl(dev, KSM_IOCTL_UNSUBVERT, NULL, 0); 267 | out: 268 | close_device(dev); 269 | err: 270 | free(w); 271 | printf("ret: 0x%08X\n", ret); 272 | return ret; 273 | } 274 | -------------------------------------------------------------------------------- /um/um.h: -------------------------------------------------------------------------------- 1 | /* 2 | * User mode definitions for IOCTL codes, and such. 3 | * Stuff that are shared between both usermode and the kernel. 4 | */ 5 | #ifndef __UM_H 6 | #define __UM_H 7 | 8 | #ifndef __linux__ 9 | #include 10 | #endif 11 | struct watch_ioctl { 12 | unsigned long long addr; 13 | unsigned short access; 14 | int hits; 15 | int miss; 16 | char buf[0x1000]; 17 | } __packed; 18 | #ifndef __linux__ 19 | #include 20 | #endif 21 | 22 | #ifdef __linux__ 23 | #include 24 | #include 25 | 26 | #define UM_DEVICE_NAME "ksm" 27 | #define UM_DEVICE_PATH "/dev/" UM_DEVICE_NAME 28 | 29 | #define KSM_DEVICE_MAGIC 'K' 30 | #define KSM_IOCTL_SANDBOX _IOW(KSM_DEVICE_MAGIC, 0, int) 31 | #define KSM_IOCTL_UNBOX _IOW(KSM_DEVICE_MAGIC, 1, int) 32 | #define KSM_IOCTL_SUBVERT _IO(KSM_DEVICE_MAGIC, 2) 33 | #define KSM_IOCTL_UNSUBVERT _IO(KSM_DEVICE_MAGIC, 3) 34 | #define KSM_IOCTL_INTRO_START _IO(KSM_DEVICE_MAGIC, 4) 35 | #define KSM_IOCTL_INTRO_STOP _IO(KSM_DEVICE_MAGIC, 5) 36 | #define KSM_IOCTL_INTRO_WATCH _IOW(KSM_DEVICE_MAGIC, 6, struct watch_ioctl) 37 | #define KSM_IOCTL_INTRO_UNWATCH _IOW(KSM_DEVICE_MAGIC, 7, struct watch_ioctl) 38 | #define KSM_IOCTL_INTRO_STATS _IOWR(KSM_DEVICE_MAGIC, 8, struct watch_ioctl) 39 | #else 40 | #define UM_DEVICE_NAME L"ksm" 41 | #define UM_DEVICE_PATH L"\\\\.\\" UM_DEVICE_NAME 42 | 43 | #define KSM_DEVICE_NAME L"\\Device\\" UM_DEVICE_NAME 44 | #define KSM_DOS_NAME L"\\DosDevices\\" UM_DEVICE_NAME 45 | 46 | #define KSM_DEVICE_MAGIC 0x00008008 47 | #define KSM_IOCTL_SANDBOX (ULONG)CTL_CODE(KSM_DEVICE_MAGIC, 0x800, \ 48 | METHOD_BUFFERED, FILE_READ_ACCESS | FILE_WRITE_ACCESS) 49 | #define KSM_IOCTL_UNBOX (ULONG)CTL_CODE(KSM_DEVICE_MAGIC, 0x801, \ 50 | METHOD_BUFFERED, FILE_READ_ACCESS | FILE_WRITE_ACCESS) 51 | #define KSM_IOCTL_SUBVERT (ULONG)CTL_CODE(KSM_DEVICE_MAGIC, 0x802, \ 52 | METHOD_BUFFERED, FILE_READ_ACCESS | FILE_WRITE_ACCESS) 53 | #define KSM_IOCTL_UNSUBVERT (ULONG)CTL_CODE(KSM_DEVICE_MAGIC, 0x803, \ 54 | METHOD_BUFFERED, FILE_READ_ACCESS | FILE_WRITE_ACCESS) 55 | #define KSM_IOCTL_INTRO_START (ULONG)CTL_CODE(KSM_DEVICE_MAGIC, 0x804, \ 56 | METHOD_BUFFERED, FILE_READ_ACCESS | FILE_WRITE_ACCESS) 57 | #define KSM_IOCTL_INTRO_STOP (ULONG)CTL_CODE(KSM_DEVICE_MAGIC, 0x805, \ 58 | METHOD_BUFFERED, FILE_READ_ACCESS | FILE_WRITE_ACCESS) 59 | #define KSM_IOCTL_INTRO_WATCH (ULONG)CTL_CODE(KSM_DEVICE_MAGIC, 0x806, \ 60 | METHOD_BUFFERED, FILE_READ_ACCESS | FILE_WRITE_ACCESS) 61 | #define KSM_IOCTL_INTRO_UNWATCH (ULONG)CTL_CODE(KSM_DEVICE_MAGIC, 0x807, \ 62 | METHOD_BUFFERED, FILE_READ_ACCESS | FILE_WRITE_ACCESS) 63 | #define KSM_IOCTL_INTRO_STATS (ULONG)CTL_CODE(KSM_DEVICE_MAGIC, 0x808, \ 64 | METHOD_BUFFERED, FILE_READ_ACCESS | FILE_WRITE_ACCESS) 65 | #endif 66 | #endif 67 | -------------------------------------------------------------------------------- /vmx.S: -------------------------------------------------------------------------------- 1 | /* 2 | * ksm - a really simple and fast x64 hypervisor 3 | * Copyright (C) 2016, 2017 Ahmed Samy 4 | * 5 | * Assembly stuff for GCC (AT&T aka GAS) only. See vmx.asm for MASM. 6 | * 7 | * This program is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation; either version 2 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License along 18 | * with this program; if not, write to the Free Software Foundation, Inc., 19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 20 | */ 21 | #ifndef __ASSEMBLY__ 22 | #error __ASSEMBLY__ is not defined... 23 | #endif 24 | 25 | #include "compiler.h" 26 | 27 | #ifdef __linux__ 28 | /* Arguments */ 29 | #define REG_A1 %rdi 30 | #define REG_A2 %rsi 31 | #define REG_A3 %rdx 32 | #define REG_A4 %rcx 33 | /* Volatile registers (caller-saved) */ 34 | #define REG_V1 %rdx 35 | #define REG_V2 %rcx 36 | #define REG_V3 %rax 37 | #define REG_V4 %r8 38 | #define REG_V5 %r9 39 | #define REG_V6 %r10 40 | #define REG_V7 %r11 41 | #define REG_V8 %rdi 42 | #define REG_V9 %rsi 43 | #else 44 | /* Arguments */ 45 | #define REG_A1 %rcx 46 | #define REG_A2 %rdx 47 | #define REG_A3 %r8 48 | #define REG_A4 %r9 49 | /* Volatile registers (caller-saved) */ 50 | #define REG_V1 %rax 51 | #define REG_V2 %rcx 52 | #define REG_V3 %rdx 53 | #define REG_V4 %r8 54 | #define REG_V5 %r9 55 | #define REG_V6 %r10 56 | #define REG_V7 %r11 57 | #endif 58 | 59 | .macro PUSH_REGS 60 | pushq %r15 61 | pushq %r14 62 | pushq %r13 63 | pushq %r12 64 | pushq %r11 65 | pushq %r10 66 | pushq %r9 67 | pushq %r8 68 | pushq %rdi 69 | pushq %rsi 70 | pushq %rbp 71 | subq $8, %rsp /* placeholder */ 72 | pushq %rbx 73 | pushq %rdx 74 | pushq %rcx 75 | pushq %rax 76 | .endm 77 | 78 | .macro POP_REGS 79 | popq %rax 80 | popq %rcx 81 | popq %rdx 82 | popq %rbx 83 | addq $8, %rsp 84 | popq %rbp 85 | popq %rsi 86 | popq %rdi 87 | popq %r8 88 | popq %r9 89 | popq %r10 90 | popq %r11 91 | popq %r12 92 | popq %r13 93 | popq %r14 94 | popq %r15 95 | .endm 96 | 97 | #define KFRAME_RPL -0x66 98 | #define KFRAME_CSR -0x64 99 | #define KFRAME_V1 -0x60 100 | #define KFRAME_V2 -0x58 101 | #define KFRAME_V3 -0x50 102 | #define KFRAME_V4 -0x48 103 | #define KFRAME_V5 -0x40 104 | #define KFRAME_V6 -0x38 105 | #define KFRAME_V7 -0x30 106 | #define KFRAME_V8 -0x28 107 | #define KFRAME_V9 -0x20 108 | #define KFRAME_XMM0 -0x10 109 | #define KFRAME_XMM1 0x00 110 | #define KFRAME_XMM2 0x10 111 | #define KFRAME_XMM3 0x20 112 | #define KFRAME_XMM4 0x30 113 | #define KFRAME_XMM5 0x40 114 | #define KFRAME_SS 0x108 115 | #define KFRAME_RSP 0x100 116 | #define KFRAME_FLGS 0x0F8 117 | #define KFRAME_CS 0x0F0 118 | #define KFRAME_EC 0x0E0 119 | #define KFRAME_IP 0x0E8 120 | 121 | .macro TRAP_SAVE_GPR 122 | pushq %rbp 123 | subq $0x158, %rsp 124 | leaq 0x80(%rsp), %rbp 125 | movq REG_V1, KFRAME_V1(%rbp) 126 | movq REG_V2, KFRAME_V2(%rbp) 127 | movq REG_V3, KFRAME_V3(%rbp) 128 | movq REG_V4, KFRAME_V4(%rbp) 129 | movq REG_V5, KFRAME_V5(%rbp) 130 | movq REG_V6, KFRAME_V6(%rbp) 131 | movq REG_V7, KFRAME_V7(%rbp) 132 | #ifdef REG_V8 133 | movq REG_V8, KFRAME_V8(%rbp) 134 | #endif 135 | #ifdef REG_V9 136 | movq REG_V9, KFRAME_V9(%rbp) 137 | #endif 138 | .endm 139 | 140 | .macro TRAP_REST_GPR 141 | #ifdef REG_V9 142 | movq KFRAME_V9(%rbp), REG_V9 143 | #endif 144 | #ifdef REG_V8 145 | movq KFRAME_V8(%rbp), REG_V8 146 | #endif 147 | movq KFRAME_V7(%rbp), REG_V7 148 | movq KFRAME_V6(%rbp), REG_V6 149 | movq KFRAME_V5(%rbp), REG_V5 150 | movq KFRAME_V4(%rbp), REG_V4 151 | movq KFRAME_V3(%rbp), REG_V3 152 | movq KFRAME_V2(%rbp), REG_V2 153 | movq KFRAME_V1(%rbp), REG_V1 154 | movq %rbp, %rsp 155 | movq 0xD8(%rbp), %rbp 156 | addq $0xE8, %rsp 157 | .endm 158 | 159 | .macro TRAP_SAVE_XMM 160 | #ifndef __linux__ 161 | stmxcsr KFRAME_CSR(%rbp) 162 | ldmxcsr %gs:0x180 163 | movaps %xmm0, KFRAME_XMM0(%rbp) 164 | movaps %xmm1, KFRAME_XMM1(%rbp) 165 | movaps %xmm2, KFRAME_XMM2(%rbp) 166 | movaps %xmm3, KFRAME_XMM3(%rbp) 167 | movaps %xmm4, KFRAME_XMM4(%rbp) 168 | movaps %xmm5, KFRAME_XMM5(%rbp) 169 | #endif 170 | .endm 171 | 172 | .macro TRAP_REST_XMM 173 | #ifndef __linux__ 174 | ldmxcsr KFRAME_CSR(%rbp) 175 | movaps KFRAME_XMM0(%rbp), %xmm0 176 | movaps KFRAME_XMM1(%rbp), %xmm1 177 | movaps KFRAME_XMM2(%rbp), %xmm2 178 | movaps KFRAME_XMM3(%rbp), %xmm3 179 | movaps KFRAME_XMM4(%rbp), %xmm4 180 | movaps KFRAME_XMM5(%rbp), %xmm5 181 | #endif 182 | .endm 183 | 184 | .macro TRAP_ENTER 185 | TRAP_SAVE_GPR 186 | movw KFRAME_CS(%rbp), %ax 187 | andb $1, %al 188 | movb %al, KFRAME_RPL(%rbp) 189 | jz 1f 190 | swapgs 191 | 1: 192 | cld 193 | TRAP_SAVE_XMM 194 | .endm 195 | 196 | .macro TRAP_EXIT 197 | testb $1, KFRAME_RPL(%rbp) 198 | jz 1f 199 | 200 | TRAP_REST_XMM 201 | TRAP_REST_GPR 202 | swapgs 203 | iretq 204 | 205 | 1: 206 | TRAP_REST_XMM 207 | TRAP_REST_GPR 208 | iretq 209 | .endm 210 | 211 | .macro irq_entry symbol:req handler:req no_error_code:req 212 | .globl \symbol 213 | \symbol: 214 | .if \no_error_code 215 | subq $8, %rsp 216 | .endif 217 | TRAP_ENTER 218 | movq KFRAME_CS(%rbp), REG_A1 219 | movq KFRAME_IP(%rbp), REG_A2 220 | subq $0x20, %rsp 221 | call \handler 222 | addq $0x20, %rsp 223 | TRAP_EXIT 224 | .endm 225 | 226 | .text 227 | 228 | /* EPT violations */ 229 | irq_entry __ept_violation __ept_handle_violation 1 230 | 231 | .globl __vmx_vminit 232 | __vmx_vminit: 233 | pushfq 234 | PUSH_REGS 235 | 236 | /* parameter one is pass-through (vcpu). */ 237 | movq %rsp, REG_A2 238 | movabs $do_resume, REG_A3 239 | 240 | subq $0x20, %rsp 241 | call vcpu_run 242 | addq $0x20, %rsp 243 | 244 | /* If we get here, we failed */ 245 | POP_REGS 246 | popfq 247 | 248 | movl ERR_DENIED, %eax 249 | ret 250 | 251 | do_resume: 252 | /* Succeeded */ 253 | POP_REGS 254 | popfq 255 | 256 | xorl %eax, %eax 257 | ret 258 | 259 | .globl __vmx_entrypoint 260 | __vmx_entrypoint: 261 | /* 262 | * Host entry point (aka VMX root mode). 263 | * Note: all interrupts are disabled at this point. 264 | * 265 | * Save all guest general purpose registers, then let the C handler do 266 | * the rest. 267 | * 268 | * This is how the stack looks like after PUSH_REGS: 269 | * +0 = %rax 270 | * +8 = %rcx 271 | * +16 = %rdx 272 | * +24 = %rbx 273 | * +32 = %rsp (garbage, overwritten with real value in 274 | * vcpu_handle_exit()) 275 | * +40 = %rbp 276 | * +48 = %rsi 277 | * +56 = %rdi 278 | * +64 = %r8 279 | * +72 = %r9 280 | * +80 = %r10 281 | * +88 = %r11 282 | * +96 = %r12 283 | * +104 = %r13 284 | * +112 = %r14 285 | * +120 = %r15 286 | * +128 = vcpu (set in vcpu_create()) 287 | * 288 | * Note: we pass vcpu->stack -8 to HOST_RSP, this is because 289 | * at this specific area, it's set to the vcpu pointer. So basically, 290 | * a pop here (before PUSH_REGS) will pop vcpu pointer, but the 291 | * registers in this specific context are guest registers, we need 292 | * their values as-is untouched, we may also modify them later on 293 | * depending on the event (e.g. cpuid, etc.) 294 | */ 295 | PUSH_REGS 296 | movq %rsp, REG_A1 297 | 298 | subq $0x20, %rsp 299 | call vcpu_handle_exit 300 | addq $0x20, %rsp 301 | 302 | testb %al, %al 303 | jz 1f 304 | 305 | POP_REGS 306 | vmresume 307 | 308 | /* Something went wrong during re-entry to guest... */ 309 | jmp 2f 310 | 311 | 1: 312 | /* See exit.c (vcpu_do_exit) */ 313 | POP_REGS 314 | vmxoff 315 | jna 2f 316 | 317 | /* Give them their stack pointer */ 318 | movq %rdx, %rsp 319 | 320 | /* and their rflags (adjusted to indicate successful vmcall) */ 321 | pushq %rax 322 | popfq 323 | 324 | /* 325 | * rcx contains return address, which is guest's rip + instr_len (aka 326 | * VMCALL length) 327 | */ 328 | pushq %rcx 329 | ret 330 | 331 | 2: 332 | /* Either vmresume or vmxoff failure... */ 333 | nop 334 | pushfq 335 | PUSH_REGS 336 | movq %rsp, REG_A1 337 | subq $0x20, %rsp 338 | call vcpu_handle_fail 339 | addq $0x20, %rsp 340 | 341 | 342 | 3: /* shouldn't come here, but incase it does, hlt forever: */ 343 | hlt 344 | jmp 3b 345 | 346 | -------------------------------------------------------------------------------- /vmx.asm: -------------------------------------------------------------------------------- 1 | ; Copyright (c) 2015, 2016 tandasat. All rights reserved. 2 | ; Original initialization code 3 | ; Copyright (C) 2016, 2017 asamy 4 | ; improvements and added support for IDT #VE handling, 5 | ; and optimized some operations 6 | ; 7 | ; This file is specific to Windows, it only compiles with MASM and thus should 8 | ; only be used with the VS project. 9 | ; 10 | ; For GCC (or general AT&T aka GAS) assembly, you should look at vmx.S and various 11 | ; inlined assembly in x86.h/vmx.h 12 | EXTERN vcpu_run : PROC 13 | EXTERN vcpu_handle_exit : PROC 14 | EXTERN vcpu_handle_fail : PROC 15 | EXTERN __ept_handle_violation : PROC 16 | 17 | KFRAME_RPL = -56h 18 | KFRAME_CSR = -54h 19 | KFRAME_RAX = -50h 20 | KFRAME_RCX = -48h 21 | KFRAME_RDX = -40h 22 | KFRAME_R8 = -38h 23 | KFRAME_R9 = -30h 24 | KFRAME_R10 = -28h 25 | KFRAME_R11 = -20h 26 | KFRAME_XMM0 = -10h 27 | KFRAME_XMM1 = +0h 28 | KFRAME_XMM2 = +10h 29 | KFRAME_XMM3 = +20h 30 | KFRAME_XMM4 = +30h 31 | KFRAME_XMM5 = +40h 32 | KFRAME_SS = +108h 33 | KFRAME_RSP = +100h 34 | KFRAME_FLGS = +0F8h 35 | KFRAME_CS = +0F0h 36 | KFRAME_EC = +0E0h 37 | KFRAME_IP = +0E8h 38 | 39 | .CONST 40 | 41 | ; Saves all general purpose registers to the stack 42 | ; RSP is read from VMCS. 43 | PUSHAQ MACRO 44 | push r15 45 | push r14 46 | push r13 47 | push r12 48 | push r11 49 | push r10 50 | push r9 51 | push r8 52 | push rdi 53 | push rsi 54 | push rbp 55 | sub rsp, 8 ; placeholder 56 | push rbx 57 | push rdx 58 | push rcx 59 | push rax 60 | ENDM 61 | 62 | POPAQ MACRO 63 | pop rax 64 | pop rcx 65 | pop rdx 66 | pop rbx 67 | add rsp, 8 68 | pop rbp 69 | pop rsi 70 | pop rdi 71 | pop r8 72 | pop r9 73 | pop r10 74 | pop r11 75 | pop r12 76 | pop r13 77 | pop r14 78 | pop r15 79 | ENDM 80 | 81 | TRAP_SAVE_GPR MACRO 82 | ; stack: 83 | ; ss (+40) 84 | ; rsp (+32) 85 | ; rflags (+24) 86 | ; cs (+16) 87 | ; ip (+8) 88 | ; ec (+0) <-- rsp 89 | 90 | push rbp ; save rbp 91 | sub rsp, 158h ; squeeze it to make shit fit 92 | lea rbp, [rsp + 80h] 93 | 94 | ; stack: 95 | ; ss (+188h) 96 | ; rsp (+180h) 97 | ; rflags (+178h) 98 | ; cs (+170h) 99 | ; ip (+168h) 100 | ; ec (+160h) 101 | ; rbp (+158h) <- original rbp saved 102 | ; frame (+080h) <- actual rbp pointing here 103 | ; data (+000h) <- rsp 104 | mov [rbp + KFRAME_RAX], rax 105 | mov [rbp + KFRAME_RCX], rcx 106 | mov [rbp + KFRAME_RDX], rdx 107 | mov [rbp + KFRAME_R8], r8 108 | mov [rbp + KFRAME_R9], r9 109 | mov [rbp + KFRAME_R10], r10 110 | mov [rbp + KFRAME_R11], r11 111 | ENDM 112 | 113 | ; cleans up stack from TRAP_SAVE_GP. 114 | TRAP_REST_GPR MACRO 115 | mov r11, [rbp + KFRAME_R11] 116 | mov r10, [rbp + KFRAME_R10] 117 | mov r9, [rbp + KFRAME_R9] 118 | mov r8, [rbp + KFRAME_R8] 119 | mov rdx, [rbp + KFRAME_RDX] 120 | mov rcx, [rbp + KFRAME_RCX] 121 | mov rax, [rbp + KFRAME_RAX] 122 | 123 | mov rsp, rbp 124 | mov rbp, qword ptr [rbp + 0D8h] 125 | add rsp, 0E8h 126 | ENDM 127 | 128 | ; save XMM registers and CSR 129 | TRAP_SAVE_XMM MACRO 130 | stmxcsr dword ptr [rbp + KFRAME_CSR] 131 | ldmxcsr dword ptr gs:[180h] 132 | movaps [rbp + KFRAME_XMM0], xmm0 133 | movaps [rbp + KFRAME_XMM1], xmm1 134 | movaps [rbp + KFRAME_XMM2], xmm2 135 | movaps [rbp + KFRAME_XMM3], xmm3 136 | movaps [rbp + KFRAME_XMM4], xmm4 137 | movaps [rbp + KFRAME_XMM5], xmm5 138 | ENDM 139 | 140 | ; cleans up XMM registers and CSR 141 | TRAP_REST_XMM MACRO 142 | ldmxcsr dword ptr[rbp + KFRAME_CSR] 143 | movaps xmm0, xmmword ptr[rbp + KFRAME_XMM0] 144 | movaps xmm1, xmmword ptr[rbp + KFRAME_XMM1] 145 | movaps xmm2, xmmword ptr[rbp + KFRAME_XMM2] 146 | movaps xmm3, xmmword ptr[rbp + KFRAME_XMM3] 147 | movaps xmm4, xmmword ptr[rbp + KFRAME_XMM4] 148 | movaps xmm5, xmmword ptr[rbp + KFRAME_XMM5] 149 | ENDM 150 | 151 | ; General IDT trap handler (entry) 152 | ; assumes: 153 | ; 1) There is an error code on the stack 154 | ; 2) NO_SWAP_LABEL is provided in case the trap is a kernel mode trap. 155 | ; Note: This does not save XMM registers, you need to do that with TRAP_SAVE_GP_XMM. 156 | ; 157 | ; Saves non-volatile registers on the frame pointer and jumps to NO_SWAP_LABEL if no 158 | ; GS swapping required (MSR_IA32_KERNEL_GS_BASE <-> MSR_IA32_GS_BASE), otherwise does 159 | ; swapgs and that's it. 160 | ; 161 | ; See __ept_violation below on how this is used. 162 | TRAP_ENTER MACRO NO_SWAP_LABEL, NO_ERROR_CODE 163 | IFNB 164 | sub rsp, 8 165 | ENDIF 166 | 167 | ; align stack then save general purpose registers. 168 | TRAP_SAVE_GPR 169 | 170 | ; see if we're coming from usermode, if so, swap gs. 171 | mov ax, word ptr [rbp + KFRAME_CS] 172 | and al, 1 173 | mov [rbp + KFRAME_RPL], al 174 | jz &NO_SWAP_LABEL& 175 | 176 | ; ok we're coming from usermode, swap to kernel gs. 177 | swapgs 178 | 179 | &NO_SWAP_LABEL&: 180 | ; clear direction flag 181 | cld 182 | 183 | ; save XMM 184 | TRAP_SAVE_XMM 185 | ENDM 186 | 187 | TRAP_EXIT MACRO NO_SWAP_LABEL 188 | ; see if we're coming from usermode, if so, swap back gs 189 | test byte ptr [rbp + KFRAME_RPL], 1 190 | jz &NO_SWAP_LABEL& 191 | 192 | ; ok we're coming from usermode 193 | TRAP_REST_XMM 194 | TRAP_REST_GPR 195 | swapgs 196 | iretq 197 | 198 | &NO_SWAP_LABEL&: 199 | TRAP_REST_XMM 200 | TRAP_REST_GPR 201 | iretq 202 | ENDM 203 | 204 | .CODE 205 | 206 | ; Returns 0 on success, -1 on failure 207 | __vmx_vminit PROC 208 | pushfq 209 | PUSHAQ ; -8 * 16 210 | 211 | ; rcx contains vcpu 212 | mov rdx, rsp ; SP 213 | mov r8, do_resume ; IP after success 214 | 215 | sub rsp, 20h 216 | call vcpu_run 217 | add rsp, 20h 218 | 219 | ; if we get here, we failed 220 | POPAQ 221 | popfq 222 | 223 | mov eax, -1 224 | ret 225 | 226 | do_resume: 227 | POPAQ 228 | popfq 229 | 230 | xor eax, eax 231 | ret 232 | __vmx_vminit ENDP 233 | 234 | __vmx_entrypoint PROC 235 | ; This is the VM entry point, aka root mode. 236 | ; This saves guest registers (as they are untouched for now) 237 | ; and restores control to guest if all good, otherwise, fail. 238 | ; 239 | ; All interrupts are disabled at this point. 240 | PUSHAQ 241 | mov rcx, rsp 242 | 243 | sub rsp, 20h 244 | call vcpu_handle_exit 245 | add rsp, 20h 246 | 247 | test al, al 248 | jz exit 249 | 250 | POPAQ 251 | vmresume ; give them control 252 | jmp error ; something went wrong. 253 | 254 | exit: 255 | ; at this point: 256 | ; rax = eflags 257 | ; rdx = rsp 258 | ; rcx = return address (aka RIP prior to this call plus instruction length) 259 | POPAQ 260 | vmxoff 261 | jna error 262 | 263 | ; Give them their stack pointer 264 | mov rsp, rdx 265 | 266 | push rax 267 | popfq ; eflags to indicate success 268 | 269 | push rcx ; return address (rip + instr len) 270 | ret 271 | 272 | error: 273 | ; otherwise, we are rip. 274 | pushfq 275 | PUSHAQ 276 | mov rcx, rsp 277 | 278 | sub rsp, 28h 279 | call vcpu_handle_fail 280 | add rsp, 28h 281 | 282 | do_hlt: 283 | hlt ; not reached 284 | jmp do_hlt 285 | __vmx_entrypoint ENDP 286 | 287 | __vmx_vmcall PROC 288 | ; assumes: 289 | ; rcx = hypercall 290 | ; rdx = data 291 | vmcall 292 | setna al 293 | ret 294 | __vmx_vmcall ENDP 295 | 296 | ; it's unsafe to call this function directly, so unless 297 | ; you're 100% sure the CPU supports it, use vcpu_vmfunc instead. 298 | __vmx_vmfunc PROC 299 | ; assumes: 300 | ; ecx = EPTP index 301 | ; edx = function 302 | mov eax, edx 303 | db 0fh, 01h, 0d4h 304 | setna al 305 | ret 306 | __vmx_vmfunc ENDP 307 | 308 | __lgdt PROC 309 | lgdt fword ptr [rcx] 310 | ret 311 | __lgdt ENDP 312 | 313 | __sgdt PROC 314 | sgdt [rcx] 315 | ret 316 | __sgdt ENDP 317 | 318 | __lldt PROC 319 | lldt cx 320 | ret 321 | __lldt ENDP 322 | 323 | __sldt PROC 324 | sldt ax 325 | ret 326 | __sldt ENDP 327 | 328 | __ltr PROC 329 | ltr cx 330 | ret 331 | __ltr ENDP 332 | 333 | __str PROC 334 | str ax 335 | ret 336 | __str ENDP 337 | 338 | __reades PROC 339 | mov ax, es 340 | ret 341 | __reades ENDP 342 | 343 | __readcs PROC 344 | mov ax, cs 345 | ret 346 | __readcs ENDP 347 | 348 | __readss PROC 349 | mov ax, ss 350 | ret 351 | __readss ENDP 352 | 353 | __readds PROC 354 | mov ax, ds 355 | ret 356 | __readds ENDP 357 | 358 | __readfs PROC 359 | mov ax, fs 360 | ret 361 | __readfs ENDP 362 | 363 | __readgs PROC 364 | mov ax, gs 365 | ret 366 | __readgs ENDP 367 | 368 | __lar PROC 369 | lar rax, rcx 370 | ret 371 | __lar ENDP 372 | 373 | __writecr2 PROC 374 | mov cr2, rcx 375 | ret 376 | __writecr2 ENDP 377 | 378 | __invd PROC 379 | invd 380 | ret 381 | __invd ENDP 382 | 383 | __invept PROC 384 | invept rcx, oword ptr [rdx] 385 | setna al 386 | ret 387 | __invept ENDP 388 | 389 | __invvpid PROC 390 | invvpid rcx, oword ptr [rdx] 391 | setna al 392 | ret 393 | __invvpid ENDP 394 | 395 | __ept_violation PROC 396 | ; #VE handler, standard interrupt handling then 397 | ; calls C handler aka __ept_handle_violation, see ept.c 398 | TRAP_ENTER ept_no_swap, 1 399 | 400 | mov rcx, [rbp + KFRAME_CS] 401 | mov rdx, [rbp + KFRAME_IP] 402 | 403 | sub rsp, 20h 404 | call __ept_handle_violation 405 | add rsp, 20h 406 | 407 | TRAP_EXIT ept_ret_no_swap 408 | __ept_violation ENDP 409 | 410 | PURGE PUSHAQ 411 | PURGE POPAQ 412 | PURGE TRAP_ENTER 413 | PURGE TRAP_EXIT 414 | PURGE TRAP_SAVE_GPR 415 | PURGE TRAP_REST_GPR 416 | PURGE TRAP_SAVE_XMM 417 | PURGE TRAP_REST_XMM 418 | END 419 | --------------------------------------------------------------------------------