├── .gitignore ├── Archive ├── .gitmodules ├── 2020-11-23_CMOs-TG--minutes--surrender.mediawiki ├── CMOs-proposal-spreadsheet.xlsx ├── Makefile ├── Original-README.md ├── Ri5-CMOs-proposal.html ├── Ri5-CMOs-proposal.pdf ├── admin │ ├── README.admin.md │ └── WIP │ │ └── separating_riscv-CMOs_and_riscv-CMOs-discuss.md ├── agendas-and-minutes │ ├── 2020-09-23_CMOs-TG_meeting │ ├── 2020-09-23_CMOs-TG_meeting_out-of-cycle │ ├── 2020-09-23_CMOs_TG_meeting │ ├── 2020-09-28_CMOs-TG_meeting_recurring │ ├── 2020-10-12_CMOs-TG_meeting │ ├── 2020-10-26_CMOs-TG_meeting │ ├── 2020-11-09_CMOs-TG_cache_index_ops │ ├── 2020-11-23_CMOs-TG--agenda--surrender.mediawiki │ ├── 2020-11-23_CMOs-TG--minutes--surrender.mediawiki │ └── README.md ├── discussion-files │ └── RISC_V_range_CMOs_bad_v1.00.pdf ├── hack-relative-URLs-in-github-project-main-repo.md ├── old-specs │ ├── Zicmobase.adoc │ └── background.adoc └── wiki │ ├── ======================================TOC-spacer.asciidoc │ ├── Actual-CMO-Operations.md │ ├── Administrivia---CMOS-TG.md │ ├── Administrivia.md │ ├── Agenda-for-CMOs-TG.md │ ├── An-even-quicker-and-dirtier-summary-of-proposed-instruction-encodings-for-RISC-V-CMOs.md │ ├── Arguments-against-address-range-CMO.AR.md │ ├── Block-memory-operations---such-as-MEMSET-and-MEMCOPY.md │ ├── CBO.UX-vs-CMO.ALL-vs-CMO.UR.asciidoc.md │ ├── CMO-goals.md │ ├── CMO-operation-list-for-encodings.md │ ├── CMOs-(Cache-Management-Operations).md │ ├── CMOs-Not-Based-on-Memory-Address.md │ ├── CMOs-WG-Draft-Proposed-Charter.md │ ├── CMOs-cut-across-many-fields.md │ ├── CMOs-proportional-to-cache-size-rather-than-address-range.md │ ├── CMOs-proposal.md │ ├── Draft-CMO-proposals.md │ ├── Example-of-Config-WG-charter.md │ ├── Examples-of-other-Working-Group-charters.md │ ├── Extended-CMO-types.md │ ├── Fixed-Block-Size-Prefetches-and-CMOs.md │ ├── Home.md │ ├── How-to-search-this-wiki,-repo,-issues,-etc..md │ ├── I-am-frustrated-that-we-are-going-around-in-circles--with-respect-to--modulation-of-CMOs.md │ ├── ISSUE---process-migration-argues-for-whole-cache-invalidation-operations-and-against-the-partial-progress-loop-construct.md │ ├── Instructions-that-Support-Partial-Progress.md │ ├── Mailing-lists-interested-in-CMOs.md │ ├── Makefile.OBSOLETE │ ├── Mandatory-versus-Optional-CMOs,-PREFETCHES,-and-CPHs.md │ ├── Meeting-11-09-2020.md │ ├── Microarchitecture-Structure-Range-CMOs.md │ ├── Non-CMO-stuff-to-be-deleted.md │ ├── Overview-of-CMO-operations.md │ ├── Privilege-for-CMOs.md │ ├── Problems-editing-GitHub-wiki-using-speech-recognition.md │ ├── Quantization,-dequantization,-and-interpolation-instructions--for-DL,-math,-etc..md │ ├── Quick-and-Dirty-Proposal-for-RISC-V-CMOs.md │ ├── Quick-and-dirty-list-of-Actual-CMOs.md │ ├── RISC-V-CMO-proposal.md │ ├── RISC-V-needs-CMOs,-and-hence-a-CMO-Working-Group.md │ ├── RISC-V-standard-disclaimer.md │ ├── Ri5-CMOs-proposal.asciidoc │ ├── STATUS---almost-done---maybe.md │ ├── Sharing-Drawings-and-Diagrams.md │ ├── SourceDest-to-support-Exception-Transparency.asciidoc │ ├── TOC---Table-of-Contents.md │ ├── Terminology-for-instructions-that-manage-microarchitecture-state-such-as-caches,-prefetchers-and-predictors.md │ ├── Variable-Address-Range-CMOs.md │ ├── Variable-Address-Range-Instructions-like-CMOs,-MEMSET,-MEMZERO,-and-MEMCOPY.md │ ├── Virtual-or-Physical-CMO-instruction-flavor.md │ ├── Why-CMOs.xlsx-was-written-in-Excel.md │ ├── Wiki-and-Repo-crosslink-issues.md │ ├── _Footer.md │ ├── _Sidebar.md │ ├── cmo_type-CMO-instruction-flavor.md │ ├── draft-CMO-domains-and-levels.asciidoc │ ├── draft-CMO-instruction-formats.asciidoc │ ├── draft-CMO-issues.asciidoc │ ├── draft-CMO-type-spreadsheet.asciidoc │ ├── draft-Fixed-Block-Size-Prefetches-and-CMOs.asciidoc │ ├── draft-Microarchitecture-Cache-Index-CMO.UR---CBO.UX.asciidoc │ ├── draft-Privilege-for-CMOs.asciidoc │ ├── draft-Variable-Address-Range-CMOs.asciidoc │ ├── draft-actual-CMO-operations.asciidoc │ ├── draft-microarchitecture-timing-state-flushes.asciidoc │ ├── files │ ├── Ri5-CMOs-proposal.html │ └── Ri5-CMOs-proposal.pdf │ ├── generated-HTML-and-PDF-for-CMOs-proposal.md │ ├── hack-relative-URLs-in-github-project-wiki-repo.md │ ├── in-band-tagging-pointers.md │ ├── interception,-modulation,-and-mapping-of-CMOs.md │ ├── issues-wrt-repo-and-wiki-split.md │ ├── microarchitecture-range-loop.asciidoc │ ├── skins │ ├── asciidoc-classic-ag.css │ └── asciidoctor.css │ ├── techpubs-info.asciidoc │ ├── techpubs.asciidoc │ ├── terminology-notation.asciidoc │ ├── variable-address-range-loop.asciidoc │ ├── voice-typos-editing-this-wiki.md │ └── wiki-stuff---Notes-on-GitHub-wiki-pages.md ├── CMO-Phase-1-Scope.md ├── LICENSE.md ├── Makefile ├── Makefile.pwsh ├── README.md ├── cmobase ├── Zicbom.adoc ├── Zicbop.adoc ├── Zicboz.adoc ├── acknowledgements.adoc ├── background.adoc ├── cache_behavior.adoc ├── cmobase.adoc ├── colophon.adoc ├── csr_state.adoc ├── extensions.adoc ├── images │ ├── RISC-V-logo.svg │ └── circuit.png ├── insns │ ├── cbo.clean.adoc │ ├── cbo.flush.adoc │ ├── cbo.inval.adoc │ ├── cbo.zero.adoc │ ├── prefetch.i.adoc │ ├── prefetch.r.adoc │ └── prefetch.w.adoc ├── instructions.adoc └── introduction.adoc ├── resources ├── fonts │ ├── DroidSans-Bold.ttf │ ├── DroidSans.ttf │ ├── cmunbbx.ttf │ ├── cmunbmo.ttf │ ├── cmunbmr.ttf │ ├── cmunbso.ttf │ ├── cmunbtl.ttf │ ├── cmunbto.ttf │ ├── cmunbxo.ttf │ ├── cmunsi.ttf │ ├── cmunso.ttf │ ├── cmunss.ttf │ ├── cmunsx.ttf │ ├── droid-sans-fallback.ttf │ ├── mplus-1mn-bold.ttf │ ├── mplus-1mn-light.ttf │ ├── mplus-1mn-medium.ttf │ ├── mplus-1mn-regular.ttf │ ├── mplus-1mn-thin.ttf │ ├── mplus-1p-black.ttf │ ├── mplus-1p-bold.ttf │ ├── mplus-1p-heavy.ttf │ ├── mplus-1p-light.ttf │ ├── mplus-1p-medium.ttf │ ├── mplus-1p-regular-fallback.ttf │ ├── mplus-1p-regular.ttf │ └── mplus-1p-thin.ttf └── themes │ └── risc-v_spec-pdf.yml └── specifications ├── cmobase-v0.3.pdf ├── cmobase-v0.4.pdf ├── cmobase-v0.5.0.pdf ├── cmobase-v0.5.1.pdf ├── cmobase-v0.5.2.pdf ├── cmobase-v0.6.pdf ├── cmobase-v1.0-rc1.pdf ├── cmobase-v1.0-rc2.pdf ├── cmobase-v1.0.1.pdf └── cmobase-v1.0.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.html 3 | diag-* 4 | *.txt 5 | *~ 6 | \#*\# 7 | build/ 8 | .asciidoctor 9 | cmobase-*.pdf 10 | autogenerated/ -------------------------------------------------------------------------------- /Archive/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "riscv-CMOs.wiki"] 2 | path = riscv-CMOs.wiki 3 | url = git@github.com:riscv/riscv-CMOs.wiki.git 4 | [submodule "riscv-CMOs-discuss"] 5 | path = riscv-CMOs-discuss 6 | url = git@github.com:riscv/riscv-CMOs-discuss.git 7 | -------------------------------------------------------------------------------- /Archive/2020-11-23_CMOs-TG--minutes--surrender.mediawiki: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Archive/CMOs-proposal-spreadsheet.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/Archive/CMOs-proposal-spreadsheet.xlsx -------------------------------------------------------------------------------- /Archive/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for riscv/riscv-CMOs 2 | 3 | # Main purposes 4 | 5 | # (1) Encapsulate knowledge of how to update submodule after git clone 6 | # see make rule git-post-clone 7 | 8 | # (2) Generate HTML and PDF documentation from wiki pages 9 | # see make rules such as generated-docs and open-docs-in-browser 10 | 11 | ######################################################################################## 12 | 13 | default: open-docs-in-browser 14 | 15 | always: 16 | 17 | ######################################################################################## 18 | # As of <2020-08-12 Wednesday, August 12, WW33> first main purpose is 19 | # to encapsulate knowledge of how to update submodule for wiki 20 | # corresponding to repo (since document may be produced from wiki 21 | # files) 22 | 23 | # git stuff 24 | # mainly to remind me about git submodule commands 25 | # that I do not know by heart 26 | # (and think are kluges anyway) 27 | 28 | # submodule 29 | # git submodule add git@github.com:riscv/riscv-CMOs.wiki.git 30 | # TBD: this is imperfect: clone of a clone does not clone clone's submodule(s) 31 | 32 | # run `make git-post-clone' right after git clone of Ri5-stuff 33 | # to update submodules (currently only Ri5-stuff.wiki) 34 | git-post-clone: 35 | git submodule init 36 | git submodule update 37 | 38 | 39 | git-diff: 40 | git diff --submodule 41 | 42 | 43 | # echo DEBUG: - I'm not really debugging 44 | # I just want these messages colorized (in emacs) 45 | # and I already have colorization cvode for DEBUG:.* 46 | # whereas my attempt at colorizing INFO:.* failed 47 | # <2020-05-14> 48 | git-status: 49 | @echo DEBUG: $C;git status 50 | @(echo DEBUG: $C/$W ;cd $W; git status) 51 | 52 | 53 | 54 | 55 | 56 | 57 | ######################################################################################## 58 | # As of Wednesday, May 6, 2020-05-06 a main purpose is to run a 59 | # command to expand the AsciiDoc include directives so that you can 60 | # get a better idea what will actually look like 61 | 62 | # currently AsciiDoc,since supposedly RISC-V standard. I would prefer 63 | # Markdown or RST (since AsciiDoc does not play well on GitHub wiki)) 64 | 65 | # TBD: ifdef for Linux as well as WindowsCygwin 66 | # TBD: run GitHub server side as well as checked out workspace 67 | 68 | HTML_VIEWER=/cygdrive/c/Windows/explorer.exe 69 | PDF_VIEWER=/cygdrive/c/Windows/explorer.exe 70 | WEB_VIEWER=start URL... 71 | 72 | C:=$(shell basename `pwd`) 73 | 74 | 75 | W=riscv-CMOs.wiki 76 | 77 | # TBD: auto-deduce wiki submodule directory 78 | 79 | # TBD: make this into a project in a box template 80 | 81 | 82 | 83 | # checking in generated docs 84 | # a) to make visible on web/GitHub 85 | # b) because toolchain fragile 86 | # TBD: checking in redundant copies, in wiki and parent, 87 | # mostly because belongs and should be versioned with wiki, 88 | # but displays only in parent. 89 | 90 | git-ci: git-ci-generated-docs 91 | @echo 'Only doing git-ci-generated-docs' 92 | @echo 'checkin non-generated stuff by hand' 93 | 94 | M='committing generated HTML and PDF files' 95 | git-ci-generated-docs: 96 | -git ci -m $M Ri5-CMOs-proposal.html 97 | -git ci -m $M Ri5-CMOs-proposal.pdf 98 | -(cd $W;git ci -m $M Ri5-CMOs-proposal.html) 99 | -(cd $W;git ci -m $M Ri5-CMOs-proposal.pdf) 100 | 101 | 102 | 103 | # Make and display proposal draft 104 | 105 | open-docs-in-browser: open-local-docs-in-browser 106 | open-local-docs-in-browser: open-html-in-browser open-pdf-in-browser 107 | 108 | open-github-docs-in-browser: 109 | 110 | 111 | open-html-in-browser: $W/Ri5-CMOs-proposal.html 112 | @# KLUGE: Windows HTML viewer does not understand / paths 113 | @# either need to convert / --> /, cd, or cygpath 114 | -(cd $W;$(HTML_VIEWER) Ri5-CMOs-proposal.html) 115 | open-pdf-in-browser: $W/Ri5-CMOs-proposal.pdf 116 | @# KLUGE: Windows PDF viewer does not understand / paths 117 | @# either need to convert / --> /, cd, or cygpath 118 | -(cd $W;$(PDF_VIEWER) Ri5-CMOs-proposal.pdf) 119 | 120 | ASCIIDOCTOR=/home/glew/bin/asciidoctor 121 | ASCIIDOCTOR_PDF=/home/glew/bin/asciidoctor-pdf 122 | #TBD: Move asciidoctor to standard location, not my ~glew user directory 123 | # TBD: Linux tools 124 | 125 | generated-docs: ./Ri5-CMOs-proposal.html ./Ri5-CMOs-proposal.pdf 126 | 127 | ./Ri5-CMOs-proposal.html $W/Ri5-CMOs-proposal.html: always $W/Ri5-CMOs-proposal.asciidoc $W/*.asciidoc 128 | $(ASCIIDOCTOR) -b html $W/Ri5-CMOs-proposal.asciidoc -o $W/Ri5-CMOs-proposal.html 129 | cp $W/Ri5-CMOs-proposal.html . 130 | ./Ri5-CMOs-proposal.pdf $W/Ri5-CMOs-proposal.pdf: always $W/Ri5-CMOs-proposal.asciidoc $W/*.asciidoc 131 | $(ASCIIDOCTOR_PDF) -b pdf $W/Ri5-CMOs-proposal.asciidoc -o $W/Ri5-CMOs-proposal.pdf 132 | cp $W/Ri5-CMOs-proposal.pdf . 133 | 134 | # TBD: should I eliminate one of the generated .html files - likely will cause problems since redundant 135 | # But... I really want to have the generated HTML in the wiki, not the parent. 136 | 137 | 138 | ######################################################################################## 139 | 140 | # Make utilities 141 | 142 | # TBD: make clean ... cleanest 143 | # TBD: BOM (Bill of Materials) 144 | 145 | # While it would be nice to have real tags for the documents, and wiki pages, e.g. for sections 146 | # at the moment all I am really using the tags for is to do global tags-query-replace in emacs 147 | # so I only need the filenames, not any patterns. 148 | 149 | # TBD: Some will object to such make targets for editing convenience, 150 | # especially for a minority editor like emacs. When there is proper 151 | # Makefile BOM support these targets may no longer be necessary, but 152 | # it would be better if they were augmented to provide more complete 153 | # tag functionality. 154 | 155 | tags-ad TAGS: always 156 | cp /dev/null TAGS 157 | etags --append --langdef=asciidoc --langmap=asciidoc:.asciidoc --regex-asciidoc='/^=+\\(.*\\)/\\1/' $W/*.asciidoc 158 | 159 | tags tags-all: tags-ad 160 | etags --append --langdef=markdown --langmap=markdown:.md --regex-markdown='/^=+\\(.*\\)/\\1/' $W/*.md 161 | -------------------------------------------------------------------------------- /Archive/Original-README.md: -------------------------------------------------------------------------------- 1 | # riscv-CMOs 2 | 3 | Cache Management Operations (CMOs) for RISC-V 4 | 5 | * Created by: Stephano Cetola 6 | * Requested by: Andy Glew 7 | 8 | 9 | * TBD: working group 10 | * TBD: riscv mailing list 11 | 12 | ## Charter 13 | 14 | The Cache Management Operation, or CMO, task group intends to define data cache 15 | management operations for the RISC-V architecture, providing support for use-cases 16 | such as software-managed cache coherence, power management, persistent storage, 17 | security, and RAS. In the process, a data cache model will be developed, and the 18 | interactions of CMOs with the memory ordering model will be specified. In addition, 19 | the CMO specification will attempt to minimize the requirements on system design 20 | and will not prescribe a specific cache state model or cache coherence protocol. 21 | The CMO TG will coordinate with other RISC-V committees and task groups and with 22 | external parties to ensure consistency and interoperability with respect to any 23 | cache-related features and extensions. 24 | 25 | ## related GitHub repos and wikis for CMOs TG 26 | 27 | * top: https://github.com/riscv/riscv-CMOs 28 | * for admin stuff like minutes, drafts 29 | * top-wiki: https://github.com/riscv/riscv-CMOs/wiki 30 | * discuss: https://github.com/riscv/riscv-CMOs-discuss 31 | * members can add/change 32 | * mainly in wiki: top-wiki: https://github.com/riscv/riscv-CMOs-discuss/wiki 33 | 34 | * git clone --recurse git@github.com:riscv/riscv-CMOs.git 35 | => 36 | ``` 37 | $> tree -d riscv-CMOs/ 38 | riscv-CMOs/ 39 | |-- admin 40 | |-- agendas-and-minutes 41 | |-- discussion-files 42 | |-- riscv-CMOs-discuss 43 | | |-- discussion-files 44 | | `-- riscv-CMOs-discuss.wiki 45 | `-- riscv-CMOs.wiki 46 | |-- files 47 | `-- skins 48 | ``` 49 | 50 | Note that riscv-CMOs/wiki and riscv-CNOs-discuss/wiki are duplicated (artifact of original creation 2020-11-13, should be ceaned up soon), 51 | as are some reated files referred to by wiki. 52 | 53 | ## Wiki-centric 54 | 55 | The active work on the proposal is in the wiki. 56 | Eventually it may be moved to the main repository, 57 | although there are tools to assemble the actual proposed spec for 58 | publication from the wiki directly. 59 | Such tools, 60 | also things like highlight unfinished parts of the proposal on the wiki, 61 | will be placed in the main repository, i.e. here. 62 | 63 | Q: is there a way to treat this project on github, both "main git repo" and "wiki git repo", as the same object? Otherwise will just check out the reps separately, and coordinate. 64 | * 5/7/2020: set up wiki as a submodule of repo 65 | 66 | ## Key wiki pages [@](https://github.com/riscv/riscv-CMOs/wiki) 67 | * [RISC-V needs CMOs, and hence a CMO Working Group](https://github.com/riscv/riscv-CMOs/wiki/RISC-V-needs-CMOs%2C-and-hence-a-CMO-Working-Group) 68 | * email seeking WG approval and call for participation 69 | * [CMOs WG Draft Proposed Charter](https://github.com/riscv/riscv-CMOs/wiki/CMOs-WG-Draft-Proposed-Charter) 70 | * [Draft CMO proposals](https://github.com/riscv/riscv-CMOs/wiki/Draft-CMO-proposals) 71 | (for that matter, also the other parts of the project, like issues) 72 | * [[generated-HTML-and-PDF-for-CMOs-proposal]] (local) 73 | * on web: https://github.com/riscv/riscv-CMOs/wiki/generated-HTML-and-PDF-for-CMOs-proposal 74 | 75 | 76 | 77 | 78 | 79 | ## Links 80 | 81 | Project on GitHib: 82 | * https://github.com/riscv/riscv-CMOs 83 | * https://github.com/riscv/riscv-CMOs/wiki 84 | * TBD: links that work when checked out locally as well as on GitHub 85 | 86 | Relative, when checked out 87 | * relative href="../../wiki", 88 | * if you have cloned both project git repos, code and wiki 89 | * this may link to your local clone, rather than back to github 90 | 91 | 92 | ## Originally from 93 | 94 | * https://github.com/AndyGlew/Ri5-stuff 95 | * https://github.com/AndyGlew/Ri5-stuff.wiki 96 | -------------------------------------------------------------------------------- /Archive/Ri5-CMOs-proposal.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/Archive/Ri5-CMOs-proposal.pdf -------------------------------------------------------------------------------- /Archive/admin/README.admin.md: -------------------------------------------------------------------------------- 1 | riscv-CMOs/admin/README.admin 2 | 3 | this directory for "admin", "sysadmin", "webmastery" stuff 4 | 5 | for the RISC-V CMOs TG GitHub repos 6 | 7 | https://github.com/riscv/riscv-CMOs 8 | and 9 | https://github.com/riscv/riscv-CMOs-discuss 10 | 11 | including their associated wikis, etc. 12 | 13 | 14 | See subdirectories such as WIP (Work In Progress) 15 | and (eventually) COMPETED or the like. 16 | -------------------------------------------------------------------------------- /Archive/admin/WIP/separating_riscv-CMOs_and_riscv-CMOs-discuss.md: -------------------------------------------------------------------------------- 1 | Whining: 2 | 3 | Should this WIP entry be inthe isse tracker, wiki, or repo admin/WIP? yes/no/maybe to all. 4 | 5 | Damn, I hate GitHub's limitations! 6 | 7 | The wiki doesn't have subdirectories, so gets messy. 8 | The repo supports Markdown, but not wiki style [link targets that do not exist yet]. 9 | The issue tracker gets messy, like so many issue trackers. 10 | Oh, for twiki or foswiki... Or for MEMEX, as we may think 11 | 12 | --- 13 | 14 | Originally: single repo+wiki riscv-CMOs, containing both proposal and wiki. 15 | 16 | Want: 17 | * TG members able to post to wiki 18 | * but draft proposals, minutes, etc., writeable by opnly a few people. 19 | 20 | Since GitHub access control seems to ve per-repo, we are splitting the original repo into two: 21 | 22 | riscv-CMOs 23 | * to contain protected stuff like drafys and minutes 24 | 25 | riscv-CMOs-discuss 26 | * writeable by TG members 27 | * e.g. wiki 28 | * TBD: publish how TG members can gain wiki post access 29 | 30 | # DONE 31 | 32 | 2020-11-09 ... 10: Ag requested new repo, waffled on names 33 | 34 | 2020-11-11: Stephano Cetola set up new repo, named the riscv-CMOs-members 35 | 36 | 2020-11-12: 37 | * Ag renamed it riscv-CMOs-dscuss 38 | * duplicated old->new, both repo and wiki (full git history)\ 39 | * fixed submodules so that old repo->old wiki, new rep->new wiki 40 | 41 | # To Do 42 | 43 | * Split conteht - deleting and/or disentangling -discuss and non-discuss contet 44 | 45 | * most repo files will stay in riscv-CMOs, non-discuss 46 | * leave README, etc., in riscv-CMOs-discuss repo and wiki pointing to the old repo (and vice versa) 47 | 48 | * wiki files 49 | * some will stay in the old risc-CMOs wiki, some in the new 50 | * some will need to be edited, fxed up, disentangled 51 | 52 | * issue tracker 53 | * fortunately did not propagate when repo+wiki hostory transferred. 54 | * good fr this task, but someties wanted in other stuations. 55 | 56 | * draft proposal 57 | * currently in wiki - dupred old and new 58 | * verify can still build in old place 59 | 60 | * decide if sgoud be removed from new, and from wiki overall 61 | * ==> will break wiki links all ovrr 62 | * GitHub wiki nopt good for trackig wiki page rtenaing and deletions :-( 63 | 64 | * once the badsic admin stuff is removed from riscv-CMOs-dscuss, open it up to TG mermbes to use 65 | * publish how to get access to wiki 66 | 67 | * update crosslinks in other CMO TG places 68 | * old and new repo + wiki on GitHub 69 | * RISC-V Confluence wiki 70 | * RISC-V Googke drive pages 71 | * groups.io mailing list pages, files, wiki etc. (yet anoter wiki :-( ) 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /Archive/agendas-and-minutes/2020-09-23_CMOs-TG_meeting: -------------------------------------------------------------------------------- 1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date corresponds to the filename. 2 | 3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes. 4 | 5 | TBD: move the actual minutes and notes here, and/or link to else-wherever, e.g. email archives. 6 | -------------------------------------------------------------------------------- /Archive/agendas-and-minutes/2020-09-23_CMOs-TG_meeting_out-of-cycle: -------------------------------------------------------------------------------- 1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date corresponds to the filename. 2 | 3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes. 4 | 5 | TBD: move the actual minutes and notes here, and/or link to else-wherever, e.g. email archives. 6 | -------------------------------------------------------------------------------- /Archive/agendas-and-minutes/2020-09-23_CMOs_TG_meeting: -------------------------------------------------------------------------------- 1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date corresponds to the filename. 2 | 3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes. 4 | 5 | TBD: move the actual minutes and notes here, and/or link to else-wherever, e.g. email archives. 6 | -------------------------------------------------------------------------------- /Archive/agendas-and-minutes/2020-09-28_CMOs-TG_meeting_recurring: -------------------------------------------------------------------------------- 1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date corresponds to the filename. 2 | 3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes. 4 | 5 | TBD: move the actual minutes and notes here, and/or link to else-wherever, e.g. email archives. 6 | -------------------------------------------------------------------------------- /Archive/agendas-and-minutes/2020-10-12_CMOs-TG_meeting: -------------------------------------------------------------------------------- 1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date corresponds to the filename. 2 | 3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes. 4 | 5 | TBD: move the actual minutes and notes here, and/or link to else-wherever, e.g. email archives. 6 | -------------------------------------------------------------------------------- /Archive/agendas-and-minutes/2020-10-26_CMOs-TG_meeting: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Archive/agendas-and-minutes/2020-11-09_CMOs-TG_cache_index_ops: -------------------------------------------------------------------------------- 1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date correspionds to the filename. 2 | 3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes. 4 | 5 | TBD: move the actual minutes and notes here, and/or link to whereer elsewhere, e.g. email archives. 6 | -------------------------------------------------------------------------------- /Archive/agendas-and-minutes/2020-11-23_CMOs-TG--agenda--surrender.mediawiki: -------------------------------------------------------------------------------- 1 | Agenda suggested by DK and Ag for meeting. 2 | 3 | [[2020-11-23_CMOs-TG--agenda--surrender.mediawiki|agenda]] 4 | [[2020-11-23_CMOs-TG--minutes--surrender.mediawiki|minutes]] 5 | 6 | __TOC__ 7 | 8 | = Item #0 - administrivia = 9 | 10 | Separate GitHub repos have been set up 11 | * https://github.com/riscv/riscv-CMOs 12 | * https://github.com/riscv/riscv-CMOs-discuss 13 | 14 | To post to -discuss you need a GitHub accouunt, and then email Ag or DK. 15 | 16 | Repos originally cloned, WIOP to diverge. 17 | 18 | 19 | 20 | = Item #1 - Surrender wrt Range Oriented CMO.UR and CMO.AR = 21 | 22 | In the interests of making progrsss on other issues, and unblocking I/D consistency (DW) J-extension 23 | 24 | Am reworking spec rework spec for CBO.EA, CBO.UX, CMO.ALL 25 | 26 | Will leave CMO.UR and CMO.UX in for now, but heavily flag - just to collect input. 27 | 28 | Encodings: would like to set RD=X0, to permit CMO.UR or CMO.AR in future (if we eventually go that way, as Ag/KA/AW hope.) 29 | 30 | TBD: discovery. 31 | 32 | Fall-out: CMO.ALL perf 33 | 34 | == Item #1' - different block sizes for INVAL and FLUSH == 35 | 36 | 4-sector cache -- CBO.INVAL.{EA,UX} could easily be faster than CBO.FLUSH.{EA,UX}. 37 | Performance matters... especially if used for security 38 | 39 | DK thinks Ag is just trying to sneak ranges in by the back door. 40 | 41 | 42 | 43 | == Item #2 - Use case for Power Mgmt/Persistence == 44 | 45 | Flush vs Clean ... to whatever level will persist 46 | * e.g. to battery backed DRAM 47 | * e.g. to persistence cache, if removing power from some (but not all) CPUs 48 | * NVRAM 49 | ** multiple levwels... 1st copy, RAID, Remote 50 | ** 51 | 52 | 53 | Q: does this mean more levels than 54 | * POC(P*) <-- SW coherency 55 | * POC(P*,IO*) <-- IO 56 | 57 | 58 | = Bonus Time = 59 | 60 | == legal transformations of CMOs == 61 | 62 | Is it legal for an implementation to transform CLEAN --> FLUSH? 63 | * AFAIK yes 64 | 65 | Is it legal for an implementation to transform INVAL --> FLUSH? 66 | * Yes, for DMA I/O 67 | * Defeats purpose for performance. 68 | 69 | Separate instances within use case 70 | * Flush dirty data, so that it doesn't overwrite non-coherent writes from others 71 | ** FLUSH 72 | ** INVAL - if you don't care about data in cache 73 | *** Advanced: Safer Inval / Clean and Zero 74 | * Eliminate Stale Data (should be no dirty data) 75 | ** CLEAN 76 | ** INVAL - if you don't care about data in cache 77 | ** Advanced: INVAL-CLERAN (not dirty) 78 | 79 | On a WT cache 80 | * FLUSH == INVAL 81 | * CLEAN == NOP 82 | 83 | When using CMOs for performance, can they be ignored? 84 | * A: Yes? ... but would require separate instruction encodings 85 | * advisory/hint vs mandatory 86 | 87 | == local/global == 88 | * "local" => flush path from P to $ specified... 89 | * "global" == flush all in coherence domain 90 | 91 | * obviously, noncoherent => "local" 92 | * obviously(?), noncoherent I/O use cases => global (flush all in coherence domain) 93 | 94 | Q: do we provide separate encodings? ... for local/global 95 | 96 | Who might ever want to have both global and local? 97 | * Global: NC I/O 98 | * Local: 99 | ** performance (performance wants full control) 100 | *** e.g. producer/consumer performance through shared coherent cache 101 | 102 | Observation: no local operations on coherent system => non-coherent may be faster 103 | * Glew opinion: 104 | ** I would like to make coherent as fast as possible, approaching non-coherent in as many cases as possible 105 | ** I would like to remove performance as a reason not to do coherent. 106 | -------------------------------------------------------------------------------- /Archive/agendas-and-minutes/README.md: -------------------------------------------------------------------------------- 1 | Minutes for the RISC-V CMOs TG 2 | Cache Management Operations 3 | 4 | 2020-11-11: Ri5 TG leadership announced policy/standard for storing meeting minutes so that people can find them easily. 5 | * in GitHub repo for TG 6 | * not wiki :-( 7 | * subfolder agendas-and-minutes 8 | * datestamped filename prefix YYYY-MM-DD 9 | 10 | Further rules for this CMOs TG: 11 | 12 | * YYYY-MM-DD_CMOs-TG_meeting 13 | * _meeting, to distinguish from other items that may be stored here 14 | * _meeting, not _minutes, because more often informal and incomplete notes rather than formal and complete minutes 15 | * YYYY-MM-DD_CMOs-TG_agenda 16 | * if we announce an agenda in advance - if by email, please also record here 17 | * it is OK to place both agenda and notes/minutes in the same _meeting file 18 | * YYYY-MM-DD_CMOs-TG_meeting_OTHER_STUFF 19 | * can add OTHER_STUFF, like main topic, presentation, to datestamped filename - useful in browsing 20 | 21 | * if more than one meeting in a day, add timestamp YYYY-MM-DD_hhmm_CMOs-TG_meeting... 22 | * still sorts with other datestamped but not timestamped filenames 23 | * not ISO8601 - e.g. colons : not legal in Windows filenames. T reduces legibility. 24 | * hhmm - not hh only - followuing at least that part of ISO8601 25 | 26 | * Examples: 27 | * 28 | 29 | * Least Common Denominator filenames 30 | * brief 31 | * alpha (a-z A-Z), numeric (0-9), punctuation -_ ... 32 | * avoid whitespace, non-windows characters <>:"/\|?* 33 | * typically use _ or - instead of whitespace 34 | 35 | TBD: moved/link elsewhere more detailed LCD filenames stuff 36 | * why 37 | * Users/members have already had problems cloning repo filenames containing charcagers like : that are illegal on Windows 38 | * Filename length limits: ??? - keep short, but not too short ... 39 | * Avoid characters 40 | * Windows: <>:"/\|?* 41 | * Convenience: no whitespace (including space and newline) 42 | * use underscore _ where whitespace would be natural 43 | * avoid the usual special filenames such as . or .., initial ~, ... 44 | * try to avoid filenames that can be used for exploits, like `"'{}[]() ... 45 | * regrets 46 | * common punctuation can really improve readability - but is often problem. E.g. ?!()[]{}... 47 | 48 | 49 | 50 | Prior minutes for this TG are in email, and certain other places. 51 | 52 | 2020-11-11: creating placeholders for old minutes - basically empty files 53 | * recording that meeting was held 54 | * TBD: copy/move, and/or provide links to existing minutes/material 55 | -------------------------------------------------------------------------------- /Archive/discussion-files/RISC_V_range_CMOs_bad_v1.00.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/Archive/discussion-files/RISC_V_range_CMOs_bad_v1.00.pdf -------------------------------------------------------------------------------- /Archive/hack-relative-URLs-in-github-project-main-repo.md: -------------------------------------------------------------------------------- 1 | This github project has at least two git repos, the main repo and the wiki repo. 2 | * main: https://github.com/AndyGlew/Ri5-stuff.git 3 | * wiki https://github.com/AndyGlew/Ri5-stuff.wiki.git 4 | 5 | 6 | I want to use relative links between the workspaces that ordinarily correspond to these repos, 7 | * both on the github website and when I have cloned elsewhere 8 | * e.g. so that I can clone both together and be consistent 9 | * TBD: exact (automated) procedure to clone both repos and stay relatively consistent 10 | 11 | Markdown syntax relative links fail: 12 | * [[..]] - \[[..]] 13 | * [[../../wiki]] - \[[../../wiki]] 14 | 15 | Trying HTML relative links: 16 | * FAIL: href="wiki" relative - \href="wiki" relative]\ 17 | * fails because it resolves to https://github.com/AndyGlew/Ri5-stuff/blob/master/wiki, 18 | * i.e. the relative position is not https://github.com/AndyGlew/Ri5-stuff but is blob/master/wiki 19 | * which tells us what we need to know 20 | * href="." relative underneath that. 21 | * href=".." relative - I doubt that such an "escape upwards" will work, but... WOW! it works 22 | * href="../.." relative 23 | * href="../../wiki" relative YIPPEE! can link from main to wiki 24 | * href="../../.." relative https://github.com/AndyGlew/, 25 | * href="../../../.." relative https://github.com, 26 | 27 | I am so used to websites not allowing ascending relative components in URLs that I wonder if there is a security hole here... Should not be as long as cannot actually escape an areas mapped to the logged in user or guest. 28 | 29 | Recording this in two places: 30 | * main: https://github.com/AndyGlew/Ri5-stuff/blob/master/hack-relative-URLs-in-github-project-main-repo.md 31 | * wiki: https://github.com/AndyGlew/Ri5-stuff/wiki/hack-relative-URLs-in-github-project-wiki-repo 32 | TBD: can I CSE this stuff, transclude, to reduce duplication? 33 | 34 | 35 | Bottom line: relative links 36 | * from wiki 37 | * to project "root" from wiki: href=".." 38 | * to main from wiki: href="../blob/master/README.md" 39 | * to user "root" from wiki: href="../.." 40 | * from main 41 | * to project "root" from main: href="../.." 42 | * to user "root" from main: href="../../.." 43 | * to wiki from main: href="../../wiki" 44 | -------------------------------------------------------------------------------- /Archive/wiki/======================================TOC-spacer.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | // strange name ================================TOC-space.asciidoc 3 | // so that it appears as visual space in the file that does the includes of the subfiles. 4 | 5 | 6 | :leveloffset: 0 7 | 8 | 9 | :!sectnums: 10 | 11 | 12 | == . 13 | 14 | 15 | // this section serves solely as a space in the TOC (Table Of Contents), between numbered content and appendixes 16 | 17 | :sectnums: 18 | -------------------------------------------------------------------------------- /Archive/wiki/Actual-CMO-Operations.md: -------------------------------------------------------------------------------- 1 | # 2 | 3 | The spreadsheet [CMOs.xlsx](https://github.com/riscv/riscv-CMOs/commits/master/CMOs.xlsx) 4 | is a list of some of the desired CMO operation. It is by no means a complete list. 5 | 6 | The version uploaded as of 2020-04-30_08.04.31 (TBD: provide link to GitHub version) counts these. 7 | These counts suggest the regular format In the next [section](#Regular-format) 8 | 9 | # Regular format 10 | 11 | | por | bits | name | description | 12 | | --- | --- | --- | --- | 13 | | n? | 1 | LG | 0=>local, 1=> global | 14 | | y | 3 | scope | e.g. cache level to flush to
although sometimes not strictly a level
(8 encodings used) | 15 | | y | 4 | cmop | operation type
?? encodings used | 16 | | n | 1 | sec | security related, 0=no, 1=> flush predictors and prefetchers | 17 | 18 | Issue: 19 | * LG: should we just assume that all CMOs must be "shoot down", applicabe to all of a coherence domain? 20 | * saves one bit, at the cost of performance for some "advanced" cases (like some supercomputers) 21 | * sec: do we need the sec bit for address range CMO.VAR, or only for "whole cache"? 22 | * this saves one bit for the most expensive CMO.VAR instruction format 23 | 24 | Bottom line: we can fit into 7 bits by making compromises, 8 bits fairly easily, although 9 bits is all rows abocve, and I would prefer 10 bits. 25 | 26 | Not orthogonal: a very few operations require write permission, but not enough to warrant an orthogonal bit. 27 | 28 | ## Scope encodings 29 | 30 | We get away with "only" eight encodings, three bits, by overloading - using the same encoding to indicate slightly different things for outbound operations (pulls/flushes) and inbound operations. 31 | 32 | | for push CMOs | for pull CMOs (prefetchesd) | 33 | | --| -- | 34 | | to pou(I$,D$) | to I$ | 35 | | to pou coherent processor caches | to L1 D$ 36 | | to pou non-coherent processor caches | to L2$ pou(I,D ) 37 | | to pou non-coherent I/O | to L3$ 38 | | to ordinary DRAM | from NVRAM to DRAM 39 | | to battery backed up DRAM 40 | | to NVRAM ( first point of persistence) 41 | | to all NVRAM (full persistence) | 42 | 43 | we can of course argue about details, to try to reduce the count 44 | * do we need to have two points of NVRAM persistence, first and all? 45 | * e.g. Keith Packard 46 | * e.g. HP "Machine" (TBD: ref) 47 | * do we need to distinguish DRAM from battery backed DRAM 48 | * there are existence proofs, but we don't necessarily need to order them 49 | * do we need to distinguish processor coherence from I/O coherence? 50 | * could I/O coherence be just DRAM 51 | 52 | But at the very least, I am sure that most people agree that we need at least four scopes, and probably more. => 3 bits. My biggest concern is that we should probably provide four bits rather than three. 53 | 54 | NOT HANDLED: 55 | * Prefetch operations might want to "skip" certain cache levels 56 | * e.g. fetch into L1 but no other levels 57 | * e.g. fetch into L1 and L3 but not L2 58 | * Prefetch operations that want to stop - may want only to prefetch from into L1 from L2 or L3, 59 | but not from DRAM if missing L3 (to avoid saturating DRAM bus) 60 | * CMOs that specify remote caches 61 | * e.g. P1 executes a CMO to prefetch/flush into some other processor P2's cache 62 | * like ARM stashing 63 | 64 | 65 | ## [[CMO operation list for encodings]] 66 | 67 | Placing this into a separate wiki page to make the table easier to edit. 68 | * too hard to edit in long page 69 | * would use section editing, except that GitHub wiki does not have that 70 | * would use transclusion, except GitHub wiki does not have that 71 | 72 | The table uses B+x? syntax to indicate priority classes 73 | 74 | | Count | Priority / Extension 75 | | --- | --- | 76 | | 5 | Base 77 | | 1 | +xIO | invalidate clean
better / more secure way for noncoherent I/O 78 | | 2 | + xD+ | safer discards - easier to secure
safest discard is ZALLOC/DCBZ without a cache target and bus support 79 | | 1 | +xLRU | 80 | | 1 | +xPE | PREFETCH-E 81 | | 2 | +xL | fetch and lock 82 | | 2 | +xxLP | private RAM/ROM versions of fetch and lock 83 | | 1 | +xA | no-fill ALLOC, like DCBA (security hole, but some still want speed) 84 | | 2 | +xZ | ZALLOC
... + LOCK | zero allocate, e.g. DCBZ 85 | | 1 | +xW | way locking ... 86 | 87 | Bottom line: 5 base CMO types => 3 bits. 88 | 89 | 17 with all of the above => 5 bits (i.e. more than 4) 90 | 91 | B+xZ+xL+xLRU gets us to 9 encodings => highly likely that we will need more than 3 bits. 92 | 93 | # Excel spreadsheet "CMOS.xlsx" 94 | Thursday, April 9, 2020-04-09: 95 | * originally (2020-04-09) in personal in GitHub repo at https://github.com/AndyGlew/Ri5-stuff/blob/master/CMOs.xlsx 96 | * now (2020-08-12) in official location https://github.com/riscv/riscv-CMOs/blob/master/CMOs.xlsx 97 | * (probably has more recent copies elsewhere, e.g. personal machine or cloud Drive) 98 | is a "list" of CMOs. Not exactly a list, more like a table from which the actual list can be generated. 99 | Many rows of the table can be expanded into several different CMO operations 100 | with different privilege requirements, caches affected, etc. 101 | 102 | TBD: actually generate a "flat" list. Preferably by script, so that I can automatically go back between the expanded list and a compact form that is folded with common sub expressions that is easier to understand. 103 | 104 | [[Why CMOs.xlsx was written in Excel]] 105 | 106 | 107 | # OLD, Obsolete 108 | [[Quick and dirty list of Actual CMOs]] -------------------------------------------------------------------------------- /Archive/wiki/Administrivia---CMOS-TG.md: -------------------------------------------------------------------------------- 1 | Sept 2020: [[Administrivia - CMOS TG]] 2 | * mailing list running - lots of traffic 3 | * archives at https://lists.riscv.org/g/tech-cmo/topics 4 | * first meetings scheduled 5 | * see RISC-V TG calendar https://sites.google.com/a/riscv.org/risc-v-staff/home/tech-groups-cal 6 | * no, there is no way you can get a filtered view just for the groups you are a member of 7 | * not in this new Google based calendar system - unlike the older RISC-V calendar system 8 | * regular recurring meetings: starting Monday Sept 28, 2020, 9am US Pacific time, every 2nd Monday therafter 9 | * [Email announcing Regular meetings, Mon 9am (US Pacific), starting 9/28](https://lists.riscv.org/g/tech-cmo/topics?p=recentpostdate/sticky,,,100,1,0,76916100#thread76916100) 10 | * first meeting, earlier, out-of-cycle: 11 | * [Email announcing first meeting of CMO TG, out-of-cycle, Wed 9-23, 5pm ](https://lists.riscv.org/g/tech-cmo/topic/out_of_cycle_meeting_rv/76917401?p=,,,100,0,0,0::recentpostdate%2Fsticky,,,100,1,0,76917401) 12 | * testing to see how/if notifications of repo and wiki posts get sent to list (and if that will be too annoying) 13 | * it seems that main repo integrations will get email notifications, but not wiki checkins 14 | * both look too annoying - I will probably disable them by default. Folks can add them themselves if they wish. 15 | 16 | 17 | -------------------------------------------------------------------------------- /Archive/wiki/Administrivia.md: -------------------------------------------------------------------------------- 1 | 2 | Cache Management Operations (CMOs) for RISC-V 3 | 4 | * Created by: Stephano Cetola 5 | * Requested by: Andy Glew 6 | 7 | TBD: working group 8 | 9 | TBD: riscv group, including mailing list 10 | * should eventually have the typical riscv.org setup, 11 | probably https://lists.riscv.org/g/tech-CMOs 12 | but not set up yet 13 | 14 | GitHub locations 15 | * https://github.com/riscv/riscv-CMOs 16 | * https://github.com/riscv/riscv-CMOs/wiki 17 | * TBD: links that work when checked out locally as well as on GitHub 18 | 19 | 20 | Note: wiki more than repo: 21 | * at the moment / at start, much of this will be in the wiki rather than the repo 22 | * since much of my (Glew's) initial proposal was arranged in such a wiki-centric manner, with the repo mainly for build tools 23 | * if this continues, the repo will mainly hold support stuff such as Makefile and tools to generate documents 24 | * TBD: set up git modules so that repo has wiki as a submodule, and versioned together 25 | * see https://github.com/AndyGlew/Test-GitHub-stuff/wiki/Q%3A-why-am-I-trying-to-do-non-wiki-stuff-in-the-GitHub-wiki%3F 26 | -------------------------------------------------------------------------------- /Archive/wiki/Agenda-for-CMOs-TG.md: -------------------------------------------------------------------------------- 1 | This is a suggested list of agenda items, topics, for the RISC-V CMOs Tech Group. 2 | 3 | This is NOT supposed to be a [[List of interesting topics related to CMOs]]. That shopuld live elsewhere, although it will undoubtedly be crosslinked with this agenda. 4 | 5 | This agenda is intended to reflect technical decisions and issues that must be settle in order to acheive consensus as to one or more RISC-V extensions for CMOs. 6 | 7 | # Location History of CMOs TG Agenda 8 | 9 | Current home of agenda - see [CMOs TG agenda on Google Drive](https://docs.google.com/document/d/1--__AiQkusBoIGCLiXfod_yXCfjwQTmdWEnvCa3hbBw/edit#) 10 | 11 | Originally created in https://github.com/riscv/riscv-CMOs/wiki/Agenda-for-CMOs-TG 12 | 13 | Then moved to https://lists.riscv.org/g/tech-cmo/wiki/23454 14 | when I realized that the GitHub wiki did not allot out copy/paste of HTML text with links 15 | 16 | And now moved to Google Docs https://docs.google.com/document/d/1--__AiQkusBoIGCLiXfod_yXCfjwQTmdWEnvCa3hbBw/edit# 17 | 18 | When I started worrying about continuous incremental backup. 19 | 20 | I then quickly learned how much I hate the fact that Google Docs does not allow you to create links to pages that do not exist yet the way wiki does. This was almost enough to make me switch over to using Microsoft OneNote using OneDrive - but the lack of nested sections within sections sent me back to Google Drive. 21 | 22 | There is no single satisfactory solution, that matches all of my criteria. 23 | -------------------------------------------------------------------------------- /Archive/wiki/Arguments-against-address-range-CMO.AR.md: -------------------------------------------------------------------------------- 1 | As expected, address range CMO.AR has been quite contentious. See the mailing list archives are lots of discussion. 2 | 3 | Derek Williams of IBM prepared a large slide set arguing that address range is a bad idea. 4 | * [Derek Williams (IBM) has prepared a large slide set arguing that address range CMO.AR is a bad idea](https://github.com/riscv/riscv-CMOs/blob/master/discussion-files/RISC_V_range_CMOs_bad_v1.00.pdf): https://github.com/riscv/riscv-CMOs/blob/master/discussion-files/RISC_V_range_CMOs_bad_v1.00.pdf 5 | 6 | 7 | -------------------------------------------------------------------------------- /Archive/wiki/CBO.UX-vs-CMO.ALL-vs-CMO.UR.asciidoc.md: -------------------------------------------------------------------------------- 1 | Some traditional RISC ISAs instructions that invalidate by (set,way). 2 | [[if bound to an instruction]] we call this CBO.UX.?? -- CBO standing for "Cache Block Operation", UX standing for " microarchitecture index" e.g. (set,way), ?? being other fields such as the actual operation (CLEAN, DISCARD, INVALIDATE, INVALIDATE-S), and cache(s) involved. 3 | 4 | Problems with CBO.UX include: 5 | 6 | * exposing microarchitecture details to code that might otherwise be portable 7 | * inability to take advantage of hardware optimizations like bulk invalidates and state machines 8 | 9 | Nevertheless, this is in many ways simplest possible approach 10 | 11 | Code that uses this operation to invalidate an entire cache looks like 12 | 13 | nEntries := read # of entries from config ... 14 | FOR n FROM 0 to nEntries DO 15 | CBO.UX rs1:nEntries 16 | 17 | Code that uses this operation to invalidate a single cacxhe line, e.g. as read from a machine check error report 18 | 19 | numEntry := read error CSR 20 | CBO.UX rs1:numEntry 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | Many machines have FSMs that iterate over the entire cache specified, and/or bulk invalidates that "instantaneously" invalidate a cache for some operations and/or some entries. [[If bound to an instruction]] we call this CMO.ALL.$id. 29 | 30 | Problems with CMO.ALL include 31 | 32 | * interruptability/restartability with partial progress 33 | ** frequently CMO.ALL implementations are not interruptible. 34 | *** This is not acceptable for many systems, especially real-time. 35 | ** if interruptible, issues with restartability 36 | *** CMO.ALL can be made restart with partial progress if there is state like a CSR from which it resumes on return from an interrupt. 37 | **** but we dislike adding new state 38 | *** or, CMO.ALL may be interruptible but may have to resume from the beginning on return from interrupt 39 | **** forward progress problems => highly undesirable 40 | 41 | 42 | This proposal defines a CMO.UR instruction in such a way that allows <>, 43 | with a loop such as that below: 44 | 45 | include::microarchitecture-range-loop.asciidoc[] 46 | 47 | -------------------------------------------------------------------------------- /Archive/wiki/CMO-goals.md: -------------------------------------------------------------------------------- 1 | Goals: 2 | * it should be possible for some or even most CMOs to be invoked from user mode unprivileged code, but privileged code must be able to control or forbid unprivileged access to CMOs 3 | * implementations can range from simple, one cache line at a time, to more complicated state machines 4 | * transparent support for events such as page faults, debug exceptions, machine check error exceptions, etc. 5 | * no virtualization holes - e.g. CMOs do not allow the user to observe page faults, except by timing as is already possible 6 | * long-duration CMOs can be interrupted, i.e. are nonblocking to the hart that is running them. They can be resumed partway along, and do not have to restart from scratch. Conversely, if such interruptability interferes with the guarantees that security usage models require, this must be exposed and possibly prevented if privilege allows 7 | * transparency/interruptability/resumability means that 8 | * on an exception the PC points to the CMO instruction, not the instruction after it 9 | * the OS is not required to parse the CMO instruction in order to determine how to handle exceptions such as page faults 10 | * ordinary X scalar registers are modified to indicate partial progress, and are read back on exception return. 11 | -------------------------------------------------------------------------------- /Archive/wiki/CMO-operation-list-for-encodings.md: -------------------------------------------------------------------------------- 1 | (This page [[CMO operation list for encodings]] 2 | extracted from parent [[Actual CMO Operations]] 3 | because GitHub wiki doesn't have section editing 4 | (but also lacks transclusion :-( ) ) 5 | 6 | 7 | The list below ... + annotated according to priority / possible extensions (trying to diet, reduce to <= 8, 3 bits) 8 | * B = base - surely must have 9 | * +x?? - possible extensions 10 | 11 | 12 | 13 | | priority | rw | name | detail | 14 | | --- | --- | --- | --- | 15 | | | 16 | | B | r | WRITEBACK
IBM: CLEAN | dirty --wb--> clean,
clean-->unaffected 17 | | B | r | WB-INVALIDATE
IBM: FLUSH | dirty --wb--> clean,
clean-->unaffected 18 | | +xIO | r | INVALIDATE CLEAN | clean --> invalid
dirty --> unaffected
secure
suitable for NC I/O 19 | | B | w | INVALIDATE
IBM: DISCARD | clean --> invalid,
dirty -- no wb --> invalid
e.g. n on-coherent I/O, reset 20 | | +xD+ | w | safer discards | see elsewhere 21 | | | 22 | | +xLRU| r | Set LRU | wish: prefetches/loads/stores that have LRU / not MRU / non-temporal hints 23 | | | 24 | | B | r | PREFETCH-R | PREFETCH-X has I$ target | ?? eliminate by making PREFETCH-R with I$ target
multilevel I$ 25 | | B | r | PREFETCH-W | prefetch to write, may be clean or dirty 26 | | +xPE| r | ? PREFETCH-E | prefetch as if to write, but must be clean
may need to update outer $/DRAM on way 27 | | | 28 | | +xL | r | FETCH-W + LOCK | like creating local writable copy of shared RAM 29 | | +xL | r | FETCH-R + LOCK | like creating local copy of shared ROM 30 | | +xxLP | r | FETCH-E + LOCK | like creating private ROM 31 | | +xxLP | r | FETCH-EW + LOCK | like creating private RAM 32 | | | 33 | | .xA | w | NO-FILL ALLOC | like DCBA (security hole) 34 | | .xZ | w | ZALLOC | like DCBZ 35 | | .xZ | w | ZALLOC + LOCK | like creating local RAM
TBD: private / shared 36 | | | 37 | | .xW | r | way locking | beyond scope, way mask separate 38 | 39 | -------------------------------------------------------------------------------- /Archive/wiki/CMOs-(Cache-Management-Operations).md: -------------------------------------------------------------------------------- 1 | ## Recent 2 | 3 | Soon: RISC-V Foundation Working Group TBD 4 | 5 | [[Ri5-CMOs-proposal]] 6 | * See [[generated HTML and PDF for CMOs proposal]] 7 | 8 | ### History 9 | 10 | This history section it is very much out of date, see instead that within [[Ri5-CMOs-proposal]]. 11 | 12 | Wednesday, May 6, 2020-05-06: 13 | * switching to use asciidoc for actual proposal: [[CMOs-proposal]] links to [[Ri5-CMOs-proposal]] 14 | * See [[generated HTML and PDF for CMOs proposal]] 15 | * actual proposal WIP: [[Ri5-CMOs-proposal]] 16 | * files converted from wiki to asciidoc ("draft" prefix distinguishes) 17 | * [[draft Privilege for CMOs]] <-- [[Privilege for CMOs]] 18 | * [[draft-Fixed-Block-Size-Prefetches-and-CMOs]] <-- [[Fixed-Block-Size-Prefetches-and-CMOs]] 19 | 20 | 21 | Finished stuff? - to be converted to asciidoc / draft 22 | 23 | * [[Privilege for CMOs]] 24 | * finish the [[Actual CMO operations]] list 25 | * finish the [[Semi-formal Abstract Model for CMOs]] 26 | * TBD: transcribe to wiki from the OneNote notebook and email where this was written up. 27 | * needs: How don cache flushes on non-inclusive caches wArm 28 | * lots of rationale and explanation 29 | 30 | 31 | ## Terminology 32 | 33 | Briefly: this document, at this time, uses the term "CMO" (Cache Management Operation) generically for operations that have mandatory semantics (like cache flushes for purposes of software managed consistency or security timing channel mitigation) but also operations that have optional semantics (such as prefetch instructions as well as hints that a cache line is no longer needed). See [[Terminology for instructions that manage microarchitecture state such as caches, prefetchers and predictors]]. 34 | 35 | 36 | ## Converging on Proposals 37 | 38 | It is eventually necessary to converge on a single proposal. While this proposal may not be final, and different parts may be at different stages of maturity, the links here are to what I believe are the latest and greatest. 39 | 40 | 41 | ## CMO Instruction Formats 42 | 43 | * [[Fixed Block Size Prefetches and CMOs]] 44 | * [[STATUS: almost done - maybe]] 45 | * AW OK, most reviewers so far okay 46 | * Instruction encodings chosen 47 | * [[Instruction Name Choice]] - my suggestions, but I expect to be overruled 48 | * SUMMARY: 49 | * 64 byte fixed size block 50 | * PREFETCH.64B.R and PREFETCH.64B.W: Memory[reg+imm12], i.e. I–format with RD=0 51 | * CMO.64B.CLEAN, CMO.64B.FLUSH: Memory[reg], e.g. R–format, but only need one register 52 | 53 | * [[Variable Address Range CMOs]] 54 | * STATUS: converging, expect arguments 55 | * 01-23-2020: reviewers have accepted explanation of register definitions suitable for interruptability, but still think the 2 acceptable definitions are "strange". I am trying to guess which one will be most acceptable. 56 | * ISSUE: [[CMO-types issue]]: abstraction, efficiency, extensibility 57 | 58 | * [[Microarchitecture Structure Range CMOs]] 59 | * STATUS: 60 | * Recent 61 | * 03-02-2020: changes after AW discussion 62 | * 01-16-2020: reviewers rejected overloading address range CMOs for efficiency :-( 63 | * 01-20-2020: new proposal [[Non-Address Based CMOs for Abstraction and Efficiency]] 64 | * 01-22-2020: first SW/OS reviewer okay on concept, hardware reviewer interested but questioning 65 | * ISSUE: [[CMO-types issue]]: abstraction, efficiency, extensibility 66 | 67 | ## [[Actual CMO Operations]] 68 | 69 | The section and linked pages above discusses the CMO instruction formats 70 | 71 | The page [[Actual CMO Operations]] discusses the actual cache management operations such as: 72 | * CLEAN: write back dirty data, but leave clean data behind in structure 73 | * FLUSH: writeback dirty data, and invalidate all data in structure 74 | * Invalidate Branch Predictors and Prefetchers: e.g. for timing channel mitigation 75 | 76 | ## [[Privilege for CMOs]] 77 | 78 | Actual proposal: [[Privilege for CMOs]] 79 | 80 | Further discussion and/or rationale 81 | * [[I am frustrated that we are going around in circles with respect to modulation of CMOs]] - I hope the new subproposal [[Privilege for CMOs]] breaks us out of this nonproductivee spin loop 82 | * [[interception and modulation of CMOs]] 83 | 84 | ## [[Semi-formal Abstract Model for CMOs]] 85 | 86 | TBD: transcribed to wiki from the OneNote notebook and email where this was written up. 87 | 88 | Overview: 89 | * most abstract: the operations a user wishes to perform 90 | * implementation dependent: the operations that HW provides. including, e.g., arbitrary numbers and levels of caches 91 | * intermediate level of abstraction between the above: abstract HW CMO operations 92 | * restricting levels of the memory hierarchy 93 | ... 94 | 95 | 96 | ## Stuff along the way 97 | 98 | Some of this stuff along the way will be rejected alternatives, nevertheless preserved, e.g. in case they need to be revived. Other of this stuff along the way constitutes rationales and explanations, which may be used, rewritten, or reorganized in support of the converged proposal. 99 | 100 | TBD: eliminate obviously dated and obsolete stuff, which can always be obtained from the get history, or at least tag it as dated and obsolete with references to the up-to-date stuff. TBD: separate final or near final from historical stuff. 101 | 102 | [[Overview of CMO operations]] - why needed, goals, etc. 103 | 104 | [[Quick and Dirty Proposal for RISC-V CMOs]] 105 | 106 | * [[An even quicker and dirtier summary of proposed instruction encodings for RISC-V CMOs]] 107 | -------------------------------------------------------------------------------- /Archive/wiki/CMOs-Not-Based-on-Memory-Address.md: -------------------------------------------------------------------------------- 1 | Some applications only need to flush known addresses or address ranges out of their caches. 2 | 3 | * E.g. a JIT code generator may know precisely what instructions it has generated, and conversely which old instructions it has zeroed or overwritten, so it knows precisely what address range needs to be invalidated from a noncoherent I cache. 4 | 5 | Other applications don't know the address ranges. 6 | 7 | * E.g. security code targeting timing channels may not know what addresses the user program it is switching between has touched. It is unrealistic to flush all possible addresses, O(size of address space). Such security code only really needs to touch the caches that it is worried about. E.g. in the seL4 example from Gernot Heiser, the microkernel only needs to flush all of the L1 I$ and D$, not the L2. 8 | * E.g. even security code that is doing its own flushes, e.g. of a lookup table and memory that might provide a cache residency channel, while it might know the size of the lookup table, it also has to flush all other addresses that map to the same sets in the cache as its own data. This suggests a hybrid... That I will go into right now. 9 | 10 | * E.g. software coherency management by the operating system for user processes that touch a lot of memory. As in HPC systems. 11 | 12 | Reviewers of the early versions of this CMO proposal emphasized that it was important to have such "whole cache invalidates" as well as address range invalidates. In fact, for security, they said address range invalidates were useless. 13 | 14 | I had hoped that an address range invalidate that was larger than an entire cache might be optimized to invalidate the cache, not every cache line in the address space. However, other reviewers prefer not to have that optimization. 15 | 16 | --- 17 | 18 | Briefly: the possibility of monolithic instructions like Intel x86 WBINVD and INVD 19 | * WBINVD is typically a microcode scan, and inherently O(number of dirty lines) if not O(number of lines in cache) 20 | * INVD may be O(number of lines in cache), or it may be O(1) complexity if there is a [[bulk invalidate]] operation 21 | O(N) scans that are not interruptible or a problem. 22 | 23 | 24 | The traditional way of doing efficient, interruptible, non-address range cache invalidates is to do something like 25 | 26 | LOOP over caches and predictors 27 | Read the particular cache parameters, number of sets, number of ways, from something like CPUID 28 | FOR s FROM 0 TO number of sets 29 | FOR w FROM 0 TO number of ways 30 | flush or invalidate (set,way) 31 | 32 | Obviously this has many issues: 33 | * it exposes the microarchitecture 34 | * you may need to do this for multiple caches, and all software may not be aware of new caches 35 | * the very concept of way associativity is questionable in some modern computer architecture work, e.g. skewed associativity 36 | * skewed associativity does not break things if a loop such as the above is used to invalidate the entire cache 37 | * but skewed associativity breaks things if the user assumes that it understands the function that hashes address lines to sets within the cache, and tries to be smart and save work by only invalidating particular sets. 38 | * Privilege issues 39 | * we want to be able to do invalidates in user mode. The above cannot be allowed in general. ... TBD: I must be faster 40 | * set/way locking 41 | 42 | --- 43 | 44 | # My proposal for non-address range CMOs 45 | 46 | Early in time, near boot 47 | OS is assumed to have investigated the CPUID cache configuration 48 | (especially if it were in some format like XML the way I would prefer to be in order to be extensible) 49 | 50 | Early in program, or near boot time 51 | cmo_handle <-- syscall by user to OS saying "this is what I want to invalidate" 52 | user may have inspected cache configuration from CPUID 53 | or OS may have done so, and have heuristics that give user more abstraction 54 | 55 | 56 | At point where the CMO is needed 57 | ... 58 | t0 <-- read _time 59 | regCmoIndex := maximum positive signed integer, E.g. 0x7FFF.FFFF on RV32 60 | LOOP 61 | CMO.UR( src regH:=cmo_handle, src_dst regIndex ) 62 | BAD: rd: regIndex_end <--- CMO.UR( rs1 src regH:=cmo_handle, src rs2 regIndex_start ) 63 | UNTIL regIndex <= 0 64 | 65 | GH: FENCE here until all done 66 | 67 | WAIT until t0+delta 68 | 69 | 70 | 71 | CMO.UR( src regH:=cmo_handle, rd:start_addr rs1:end_addr ) 72 | 73 | CMO.UR.( src regH:=cmo_handle, rd:start_addr rs1:end_addr ) 74 | 75 | * 1 flush/clean 76 | * 1 mandatory/advisory 77 | * bitmask 78 | * I1, I0, D1, BP, BTB, RSI 79 | * on chui predictors 80 | * exteernal 81 | 82 | O(1) 83 | 84 | 85 | 86 | 87 | 88 | Expect: e.g. if invalidated a cache with 256 entries 89 | 90 | CMO.UR called with regIndex = 0x7F... 91 | => map to 255 92 | => then decrement 93 | 94 | GH: I asked Gernot about exposing the index space 95 | 96 | 97 | 98 | Multiple Caches / predictors ... 99 | 100 | 101 | Index space 102 | 103 | 0-255 L1$ I cache 104 | 105 | 256-512 D$ 106 | 107 | 1G-1G+256M => outermost 108 | 109 | 110 | 111 | GH: initiate WB 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | GH: flush D-ccahe concurrently with any others. 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | GH: I asked if final regIndex < 0 ==> errors is a piotebtial hole 137 | GH: doesn't want user 138 | GH: 139 | 140 | 141 | 142 | 143 | cmo_handle 144 | bit 0 = 0 => abstract as above 145 | =1 hardwired parameter 146 | bitmap of which caches and predictors 147 | 148 | 149 | defaults? 150 | r0 => a reasonable default 151 | -------------------------------------------------------------------------------- /Archive/wiki/CMOs-WG-Draft-Proposed-Charter.md: -------------------------------------------------------------------------------- 1 | [[Examples of other Working Group charters]] 2 | - CMO group charter modelled on ... 3 | * [[Example: Config WG charter]] 4 | 5 | 6 | The following proposed charter is probably too long for the Technical steering committee. 7 | 8 | Some, but probably not all, of these details, explanations, and requirements about what is and is not in scope for the CMO working group may be worked out once the CMO working group has started. 9 | 10 | 11 | # CMO Task Group Charter 12 | 13 | Acronym: CMOs = Cache Management Operations 14 | 15 | The CMO Task Group will: 16 | * define instructions (and CSRs if necessary) performing cache management operations 17 | 18 | 19 | Requirement: 20 | * CMO instructions may be executed by user mode (if system software permits) 21 | * however, system software must have the ability to prevent less privileged software from executing CMO instructions 22 | 23 | Therefore, it is proposed that the CMO working group will be a subgroup reporting to the RISC-V Privileged Architecture task group. 24 | 25 | 26 | Use cases for CMO instructions include: 27 | * security 28 | * e.g. flushing microarchitecture state to mitigate timing channel security vulnerabilities such as Spectre 29 | * hence "CMOs" will be extended to cover branch predictors, prefetchers, and other microarchitecture state that affects performance 30 | * software managed cache coherence when hardware cache coherence is not available or incomplete 31 | * e.g. incoherent I/O DMAs 32 | * e.g. multiprocessor systems where cache coherence is not available between all nodes 33 | * e.g. interaction with external hardware accelerators that may not implement hardware cache coherence 34 | * performance tuning 35 | * e.g. evicting data no longer needed between program phases, to avoid thrashing data that is needed across program phases 36 | * *possibly* cache prefetch instructions and/or cache usage pattern hints 37 | * power management 38 | * e.g. flushing caches to battery backed-up DRAM, or NVRAM 39 | * persistence for reliability 40 | * e.g. flushing caches to RAID NVRAM and/or remote state 41 | * e.g. cache flushes for checkpointing of long-running applications in HPC systems 42 | * debugging 43 | * e.g. external hardware debuggers may need to write instructions or memory in systems lacking cache coherence 44 | 45 | CMOs cut cross many domains, ranging from simple microcontroller systems with no hardware cache coherency, 46 | through cache coherent application and server processors, through HPC systems. 47 | The CMO working group will coordinate with the task groups and working groups and standing committees for these areas of overlap. 48 | 49 | The goal of any CMO ISA extension proposals will be to permit portable software in all or most of the above use cases. 50 | The CMO task group will only define a set of CMO instructions that can reasonably be expected to be portable. 51 | If not applicable to an implementation such CMO instructions will do nothing. (e.g. flushing dirty data in a system that does not have writeback caches). 52 | 53 | It is expected that implementations may have cache microarchitecture and hence cache flushes that will not be part of the standard CMO instruction set. 54 | However, there will be worst-case maximally conservative CMO instructions that can flush all caches including such implementation specific caches. 55 | Implementations are expected to have less conservative more precise cache flushes that are not part of the standard CMO instruction set. 56 | 57 | 58 | The CMO working group will not: 59 | * define the instruction/data coherence instructions necessary for on-the-fly code generation, e.g. in the J extension 60 | * however, the CMO working group will coordinate with the working groups defining instruction/data coherence 61 | * certain CMO instructions will probably overlap, e.g. flushing the instruction caches 62 | * the CMO working group will NOT address TLB shootdown or ASID coherency 63 | * the CMO working group will NOT define config/discovery mechanisms to allow software (system or user) to determine the cache microarchitecture 64 | * the CMO working group will NOT define cache protocols 65 | * e.g. CMOs will assume that caches can contain clean and/or dirty data, but no more states than that 66 | * the CMO working group will *probably NOT* define cache modes such as no-fill, which may be required to perform reliable hardware reset 67 | 68 | Requirement: CMO instructions *must* work with the most common cache microarchitectures, including 69 | * strictly inclusive and exclusive 70 | * non-strictly inclusive and exclusive hierarchies 71 | * 72 | 73 | Requirement: implementations of varying levels of sophistication 74 | * it *must* be possible to implement CMO instructions a cache line at a time 75 | * it must be possible (and reasonably good performance) to implement CMO instructions by trapping to M mode 76 | * desirable: bulk flush, e.g. invalidating clean data without writing back 77 | * desirable: implementations using hardware state machines 78 | 79 | It is expected that the CPU will not necessarily know in advance all of the caches in a system. Requirement: it must be reasonable to interface CPU CMO instructions to control external caches (e.g. so that portable software can reliably do things like mitigate cache timing channels for security). Example of such an interface: Trapping the CMO instructions to M mode and emulating them via system specific mechanisms to flush external caches. -------------------------------------------------------------------------------- /Archive/wiki/CMOs-cut-across-many-fields.md: -------------------------------------------------------------------------------- 1 | We have explained that CMOs are a crosscutting issue in many places, listing examples of domains involved, including 2 | * the initial email (TBD: link) 3 | * the proposed charter 4 | * many wiki pages 5 | 6 | It would be nice to have a master list of such examples. 7 | This is in many ways a first cut at a list of use cases - 8 | although I would want such a list tp be inclusive, including even wild ass examples, 9 | whereas eventually the list of use cases will undoubtedly be pruned. 10 | [TBD: link to Glew rant about how use cases can lead to blinkered thinking] 11 | 12 | 13 | Along this line: 14 | 15 | * [[Mailing lists interested in CMOs]] 16 | 17 | -------------------------------------------------------------------------------- /Archive/wiki/CMOs-proportional-to-cache-size-rather-than-address-range.md: -------------------------------------------------------------------------------- 1 | CMOs based on virtual or physical addresses, whether fixed size or variable ranges, are easy to express in a portable manner. It is also easy to make such memory address based CMOs available to user code - perform the permission checks implied by page tables virtual addresses and/or physical memory permission structures like the RISC-V PMPs/sPMPs. 2 | 3 | The big problem with memory address range based CMOs is that they are often significantly less efficient than CMOs based on cache microarchitecture. For example, it is horribly wasteful to have to scan an address range of size 4 GB when you know that the largest cache of concern is only 4 MB in size. If we assume that an operation has to be done for every 64B cache line, the address based scan touches 2^26 cache lines, where is the cache size based scan touches only 2^16 cache lines. 4 | 5 | However, it is TBD whether we can architect a reasonably portable solution CMOs based on cache microarchitecture, which I might call CMOs proportional to cache size rather than address range. See that last page for a tentative proposal. 6 | 7 | # Optimizing large address range CMOs into efficient cache size proportional CMOs 8 | 9 | One possible approach is to allowing an implementation of a variable range CMO.VAR.* over [lo,hi) to perform an efficient cache size based scan 10 | 11 | * e.g. if hi-lo, the size of the region, is less than the size of the cache 12 | * i.e. if we can guarantee that there are no lines that need to be flushed that are not in the cache 13 | * although this might fail for some noninclusive cache architectures (such as Intel L1 and L2 (or MLC) caches; although modern Intel LLCs or snoop filters are inclusive) 14 | * and for "funky" mappings of memory addresses to cache (set,way) locations 15 | 16 | Reviewers of this CMO proposal were surprisingly resistant to allowing this optimization. Partly because of justifiable FUD of unanticipated consequences. Partly because some such reviewers anticipated implementing the variable range CMO.VAR.* In terms of per cache line CMO operations, so would not have the opportunity to perform these "physical cache parameter optimizations". Indeed, the possibility of such optimizations is one of the big motivations for implementing variable address range CMOs by a state machine (or equivalently by smart software aware of the cache structure). 17 | 18 | # [[CMOs based on cache microarchitecture]] 19 | 20 | ... TBD ... loop based on (set,way) structure, 21 | i.e. addressing cache lines directly 22 | 23 | Obviously exposes microarchitecture. Probably not desirable to expose to user mode. 24 | 25 | Problematic when there are multiple levels of cache: 26 | 27 | May need to loop over cach and within each cache over all possible lines within the cache es 28 | 29 | inclusive cache architectures with backwards and validate can illuminate some but not all of that complexity 30 | 31 | 32 | # Abstracting Efficient Cache Size Proportional CMOs 33 | 34 | ... I think we can do this. But I know that I'm going to be crucified for "complexity". Although that just might be my Intel PTSD speaking. 35 | -------------------------------------------------------------------------------- /Archive/wiki/CMOs-proposal.md: -------------------------------------------------------------------------------- 1 | See [[Ri5-CMOs-proposal]] 2 | 3 | 4 | -------------------------------------------------------------------------------- /Archive/wiki/Draft-CMO-proposals.md: -------------------------------------------------------------------------------- 1 | * [[Draft CMO proposals]] 2 | * I (Glew) created a draft CMO proposal 3 | * old: originally in my personal GitHub https://github.com/AndyGlew/Ri5-stuff/wiki/Ri5-CMOs-proposal 4 | * TBD: remove reference 5 | * new: https://github.com/riscv/riscv-CMOs/wiki/Ri5-CMOs-proposal 6 | * [[generated-HTML-and-PDF-for-CMOs-proposal]] (local) 7 | * on web: https://github.com/riscv/riscv-CMOs/wiki/generated-HTML-and-PDF-for-CMOs-proposal 8 | 9 | * See issue https://github.com/riscv/riscv-CMOs/issues/2 10 | * update issue with migration status 11 | 12 | I expect other working group members will have their own proposals, whether full or in part, and will provide links here as they become available. 13 | 14 | TBD: links to generated versions 15 | * OLD: Glew personal wiki: https://github.com/AndyGlew/Ri5-stuff/wiki/generated-HTML-and-PDF-for-CMOs-proposal 16 | * NEW: official : https://github.com/riscv/riscv-CMOs/wiki/generated-HTML-and-PDF-for-CMOs-proposal 17 | -------------------------------------------------------------------------------- /Archive/wiki/Example-of-Config-WG-charter.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | > From: Tim Newsome 4 | > 5 | > Sent: Monday, July 27, 2020 12:09PM 6 | > 7 | > To: Tech-Config 8 | > 9 | > Subject: Re: [RISC-V] [tech-config] updated charter 10 | > 11 | >   12 | > On 7/27/2020 12:09 PM, Tim Newsome wrote: 13 | > 14 | > The proposal is in the pull request on github.  15 | 16 | > https://github.com/riscv/configuration-structure/pull/5 17 | 18 | > (You can see the actual text by clicking the "Files changed" link.) 19 | > 20 | > If anybody disagrees with that language, please comment on github or 21 | > start an e-mail discussion with your proposed change and 22 | > reasoning. Until you see your new text appear in that pull request, 23 | > it will not be voted on at the next meeting, so speak up repeatedly 24 | > if you have to. All I've seen so far is people here and there 25 | > mentioning that maybe something would be nice or is also 26 | > relevant. If I missed something, please repeat it. 27 | > 28 | > Tim 29 | 30 | # Task Group Charter 31 | 32 | The Configuration Structure Task Group will: 33 | * Specify syntax and semantics for a static data structure that can accommodate 34 | all implementation parameters of RISC-V standards: the configuration 35 | structure. There will be two configuration structure formats: a 36 | machine-readable format intended to be embedded in hardware, and a 37 | human-readable format intended for people to work with directly. 38 | * Specify how M-mode software can discover and access any present 39 | machine-readable configuration structures. 40 | * Provide a tool that can translate between the machine-readable and 41 | human-readable formats. 42 | 43 | Implementation parameters are details that a RISC-V specification explicitly 44 | leaves up to an implementation. This includes hart-specific details like the 45 | kinds of hardware triggers supported, as well as details that are outside 46 | harts such as the supported abstract debug commands. 47 | 48 | The configuration structure should: 49 | * be flexible enough that future task groups won’t feel the need to 50 | create another structure used to describe implementation parameters. 51 | * be easy to translate into other data structures. 52 | 53 | The configuration structure is intended to be used: 54 | * to describe RISC-V hardware profiles 55 | * by firmware and BIOSes during the boot process 56 | * by debuggers 57 | * by a tool chain to build software tailored to a configuration profile 58 | -------------------------------------------------------------------------------- /Archive/wiki/Examples-of-other-Working-Group-charters.md: -------------------------------------------------------------------------------- 1 | [[Example: Config WG charter]] 2 | -------------------------------------------------------------------------------- /Archive/wiki/Extended-CMO-types.md: -------------------------------------------------------------------------------- 1 | There are more types of CMOs 2 | * than are represented in computer architecture textbooks 3 | * than can be fit into small number of instruction encodings. 4 | 5 | Therefore, I propose 6 | 7 | a) that can be placed in a encoding that maps to one of 2 CSRs that contains the 8 | 9 | or 10 | 11 | b) that can be placed in a register operand passed to the CMO instruction. 12 | 13 | 14 | The first approach, using CSRs to hold the , is IMHO preferred, because it exposes less of the microarchitecture while supporting greater architectural flexibility, and requires less complexity to make secure (non-forgeable). The latter approach, placing the in a register operand is correspondingly deprecated, and is not part of the active proposal. 15 | 16 | 17 | = in a CSR 18 | 19 | The last is used to say "use the specified in CSR_TBD". 20 | 21 | The ISA does not define the format of the , although this proposal provides a basic recommendation. 22 | 23 | I propose that OS or platform specific software abstract things as follows: 24 | * User code makes a system call that tells the OS, e.g. "I am only trying to synchronize with threads/processes running on other harts/CPUs with which I share an L3$, so flush/invalidate the L1$, L2$, and everything all the way to the L3$, but don't flush the L3$ or L4$.: - when the standard flush operations wouyld also flush the L4. 25 | * OS determines if the user is allowed to do the operation, error if not 26 | * OS determines the implementation dependent encodings to be placed in the CSR 27 | * OS returns to user 28 | * user can now use the CMO.* instructions with =use CSR that contains 29 | * OS knows that the user is allowed to use the CMO, because it tested it at the time it was set up. 30 | 31 | = in a register input 32 | 33 | Intead of 34 | * CMO.VAR.. rd, rs1 35 | * rd=nbytes, rs1=hi_addr 36 | * CMO.FSZ... rs1 37 | * rs1=addr 38 | 39 | Use an additional register 40 | * CMO.VAR.. rd, rs1, rs2 41 | * rd=nbytes, rs1=hi_addr, rs2= 42 | * CMO.FSZ... rs1 43 | * rs1=addr, rs2= 44 | 45 | I would prefer that the ISA did NOT define the format. 46 | I propose that OS or platform specific software abstract things as follows: 47 | * User code makes a system call that tells the OS, e.g. "I am only trying to synchronize with threads/processes running on other harts/CPUs with which I share an L3$, so flush/invalidate the L1$, L2$, and everything all the way to the L3$, but don't flush the L3$ or L4$.: - when the standard flush operations wouyld also flush the L4. 48 | * OS determines if the user is allowed to do the operation, error if not 49 | * OS returns to the user an encoding that it can pass as the rs2 cmo_type value above. 50 | * user can now use the CMO.* instructions with rs2=value returned by OS 51 | * However, OS must prevent user from forging access to CMOs that they should not be allowed. 52 | * e.g. it may be a handfle number, mapped to a full CMO encoding in a table, with table index checks 53 | * or OS may have loaded a list of permitted encodings, that HW must check user provided value against. 54 | 55 | = encoding - reference implementation 56 | 57 | I would prefer that the ISA did NOT define the format. 58 | 59 | But nevertheless I want to provide a reference example. 60 | 61 | Bits in an XLEN register value 62 | 63 | * 1-bit: 64 | * writeback dirty data 65 | * invalidate dirty data without writing back - security sensitive!!! 66 | * 1-bit: 67 | * invalidate all lines scanned 68 | * leave clean lines 69 | * 1-bit: I: applies to all caches that can hold instrtuctions 70 | * 1-bit D: applies to all caches that can hold data 71 | * note: bitmask, so can CMO I-only, D-only, or both 72 | * 3-bits: cache depth 73 | * systems with L0..L4 caches are available nowadays - this allows up to 8 levels of hierarchy 74 | * cache numbering is system specific, e.g. the L1/L2 may be exclusive 75 | * 3-bits: virtual/physical guest/host ... 76 | * 000 = (guest) virtual 77 | * 001 = (guest) physical 78 | * 010 = host virtual 79 | * 011 = host physical 80 | * ... reserved 81 | * 1-bit: use cache uarch parameters 82 | * 3-bits: cache number 83 | * 1-bit: flush all 84 | * 16-bits: way mask <-- e.g. if user is given only certain ways for isolation 85 | * ?? 86 | * 8-bits: value to be placed on a bus transaction to flush external caches outside the CPU. 87 | 88 | 89 | * pou = I & D 90 | * poc 91 | * pop = point of persistence (battery backed uop DRAM) 92 | * pop = point of persistence (NVRAM) 93 | 94 | 95 | 96 | 97 | It can be seen that this can quickly exceed 32-bits. And I am not trying very hard. 98 | 99 | Nevertheless, this format is NOT part of the architecture. Just a suggestion. 100 | -------------------------------------------------------------------------------- /Archive/wiki/How-to-search-this-wiki,-repo,-issues,-etc..md: -------------------------------------------------------------------------------- 1 | I was remarkably slow figuring out how to search this wiki. 2 | 3 | See [HOW-TO: search wiki on GitHub](https://1drv.ms/u/s!AsM0rpNELR4xgQm9sCzSiwsl_KjJ?wd=target%28HOW-TO.one%7C9FD63E37-17E4-4075-AEB2-593259BBE1C5%2FHOW-TO%3A%20search%20wiki%20on%20GitHub%7CAC5F362B-239A-416C-8405-D59AD13F1791%2F%29) 4 | 5 | BRIEF: standard GitHub search using the search box in the upper left-hand corner works, but you must remember to change the "view", to look at the wiki items returned. By default you will only be looking at the code items, and there may be none. WARNING: if you have a narrow window, you may not see the count of wiki items returned by the search. if you miss the horizontal scrollbar ... 6 | 7 | The [search](../search?q=ENTER-QUERY-HERE&type=wikis) link here and in the sidebar is an attempt to make this friendlier, by setting the view returned by the link to be wikis, but you will have to fill in the search term in the new page. 8 | 9 | --- 10 | 11 | In fact, I was on the verge of giving up on the GitHub wiki, instead migrating to RISC V's new official Confluence wiki, https://wiki.riscv.org. 12 | See [CMO TG page on wiki.riscv.org](https://wiki.riscv.org/display/TECH/CMO+%28Cache+Management+Operations%29+TG). 13 | 14 | I may still migrate to Confluence, to get other good features like copy/paste of links and formatted text and images, and better access control. 15 | But at least now I figured out how to search this GitHub wiki. -------------------------------------------------------------------------------- /Archive/wiki/I-am-frustrated-that-we-are-going-around-in-circles--with-respect-to--modulation-of-CMOs.md: -------------------------------------------------------------------------------- 1 | Current proposal, hoping to break this deadlock: [[Privilege for CMOs]] 2 | 3 | # Interception, Modulation, and Mapping of CMOs 4 | 5 | # Original Proposal: CSRs ansd system calls. 6 | 7 | My (Ag's) original proposal looked something like this: 8 | 9 | * CMO instructions that contain CSR operand, along with and address or a (set,way) cache entry number that indicated what to flush 10 | * A CSR operand for each such CMO instruction, that contained an encoding that indicated which caches aned branch predictors need 11 | 12 | however, it is necessary to accompany this with a system call: 13 | 1. since the user cannot write such a CSR directly 14 | 2. since different software systems may allow may allow (some) users to perform a CMO, while the same or other software systems may disallow (some) users from performing that same CMO 15 | * i.e. the privilege required for a CMO depends on the system software. It is NOT KNOWN to CPU hardware or the ISA 16 | 3. since there needs to be a mapping between abstract user level CMO's and the operations that the hardware actually performs 17 | 18 | Mapping 19 | 20 | local cluster 21 | HW coherent MOESI 22 | SW coherence 23 | between clusters 24 | 25 | SW P -> C 26 | 27 | MOESI 28 | * flush all dirty data in local cluster to the poc(P,C) 29 | MESI 30 | * no thread migration 31 | * flush local CPU only 32 | * thread - flush all cluster 33 | 34 | 35 | Point_of_Unification = pocvg(P.I,P.D) 36 | * pocvg(P*.I,P*.D) 37 | 38 | Point_of_Coherence = pocvg(P1.D,p2.D;address) 39 | 40 | Point_of_Persistence = pocvg(P1,NVRAM) or pocvg( 41 | * 42 | 43 | Point_of_Serialization = per address 44 | * FENCE.COMPLETION = persistence / SW coherency / MMIO 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /Archive/wiki/ISSUE---process-migration-argues-for-whole-cache-invalidation-operations-and-against-the-partial-progress-loop-construct.md: -------------------------------------------------------------------------------- 1 | // TBD: BUG: the filename with a colon in it seems to cause some tools problem, like emacs tags-queruy-replace 2 | // next-file: Opening input file: No such file or directory, /cygdrive/c/Users/glew/Documents/GitHub/Ri5-stuff/Ri5-stuff.wiki/ISSUE!-process-migration-argues-for-whole-cache-invalidation-operations-and-against-the-partial-progress-loop-construct.md 3 | // TBD: rename 4 | 5 | The [[parent page||Non-Address-Based-CMOs-for-Abstraction-and-Efficiency]] 6 | from which this issue was created 7 | said: 8 | 9 | > A thread might migrate from one CPU to another while the CMO loop construct is in progress. If this is done it is the responsibility of the system performing the migration to ensure that the desired semantics are obtained. For example, the code that is being migrated might be restricted to only apply to cache levels common to all processors migrated across. Or similarly the runtime performing the migration might be required to ensure that all necessary caches are consistent. 10 | 11 | Referring to the [[CMO.UR loop construct]]: 12 | 13 | ~~~~~~ 14 | reg_for_cmo_index := 1<<(XLEN-1)-1 15 | LOOP 16 | CMO.UR RD:reg_for_cmo_index, RS1:reg_for_cmo_descriptor 17 | UNTIL reg_for_cmo_index <= 0 18 | ~~~~~~ 19 | 20 | The definition of the CMO.UR instruction in the parent page, with RD as a source/destination register holding the CMO UR index, allows the CMO.UR instruction to be interruptible and restartable. Interruptability and restartability does not depend on the loop construct above. 21 | 22 | What the loop construct gives us is that it permits non-state machine implementations. E.g. CMO.UR might touch one and only one cache line on each invocation. 23 | 24 | In many situations this CMO.UR loop construct will be executed by privileged code. Probably locked onto a single processor. Not subject to process migration. If this is the case, the loop construct causes no problems. 25 | 26 | However, it is desirable that such cache management operations be performed by code that has the least privilege possible. For example, a user level web browser implementation of a sandbox might wish to flush L1 I cache and D cache timing channels when transitioning between code inside the sandbox and code outside the sandbox. Obviously this would be simplest if the caches involved had no dirty data, e.g. if the L1 data cache were write through, and if there were bulk invalidates. But even caches that contain no dirty data sometimes have no bulk invalidates, and need to sequence over the entries in the cache. 27 | 28 | The possibility of a thread migration while user code is executing the CMO.UR loop construct raises some issues. 29 | (Or, equivalently, a guest OS being migrated by a hypervisor.) 30 | 31 | If the thread that is performing the CMO loop construct is migrated, and if it is invalidating or flushing a cache that is local to its original processor, and not shared, then the semantics are completely ambiguous. Half of the cache flush might be performed on the first processor, half on the second. 32 | 33 | (Note that Derek Williams of IBM has resolved similar issues for the export.I and import.I instruction sequences related to dynamic codegeneration for the J extension. However, as far as I can tell this resolution depends on nonlocal effects for the export.I instruction. That might not be possible for CMOs in general.) 34 | 35 | This page does not propose to resolve this problem. 36 | 37 | This page only wishes to point out that the partial completion loop construct is itself part of the problem. 38 | 39 | If the CMO.UR instruction did not need to be wrapped in the partial completion loop construct then it might be possible for the runtime code that is performing the thread migration to observe the program counter at which the thread that is being migrated lies, determine that it is a CMO.UR instruction, and take the necessary steps. This is because, if the CMO.UR instruction were "whole cache", the PC at the time of migration would unambiguously indicate that a cache management operation is in flight. 40 | 41 | Note that "whole cache" does not mean non-interruptible. The interruptability of the CMO.UR instruction is not at all related to the loop. The interruptability is based on actually being interruptible, and also having source/dest operands so that no special treatment is needed by the interrupt handler. All the loop construct provides is the ability for an implementation not to have a sequencer. 42 | 43 | If the CMO.UR instruction is embedded in the loop construct, it may be difficult for the runtime that is performing the thread migration to determine that a cache management operation is in flight. Certainly the PC does not necessarily point to the CMO.UR instruction. It might be possible to require that the loop be very specific, potentially only the CMO.UR instruction and the end of loop branch. If that were the case, the runtime might be able to detect the CMO loop construct. However, we are on a slippery slope. The CMO loop construct might be very compact, but there could be other operations interleaved in the middle of the loop. Indeed, the CMO loop construct might be compact, but a binary rewriting tool may heavens are inserted other instructions, e.g. for timing, between the instructions. Any deviations make it more and more difficult for the runtime to detect that a cache management operation is in flight. 44 | 45 | If the runtime can detect the cache management operation is in flight, and if that operation semantics is affected by the migration, the runtime has several options 46 | 1. Perhaps the runtime could defer the migration until after the CMOs completed 47 | 2. Perhaps the runtime could complete the operation itself on behalf of of the thread, before the thread is migrated (e.g. a hypervisor might complete the operation before migrating a guest OS) 48 | 3. The runtime could complete the operation, but still let the migrated code also think that it is completing the operation. That would lead to redundant invalidations or flushes. 49 | 50 | These options are not available if the runtime cannot easily detect the cache management operation is in flight. 51 | 52 | MORAL: the partial completion loop, a.k.a. the CMO.UR loop construct, can make things more difficult, compared to a sequencer that does a "whole cache operation". 53 | 54 | 55 | -- 56 | 57 | Similar problems occur for variable address range based CMOs, CMO.VAR. And indeed, for loops wrapping around fixed block size CMO.FSZ. However, the microarchitecture based invalidations of CMO.UR are inherently more subject to local interpretations than are the address based invalidations of CMO.VAR and CMO.FSZ. 58 | -------------------------------------------------------------------------------- /Archive/wiki/Instructions-that-Support-Partial-Progress.md: -------------------------------------------------------------------------------- 1 | # Resume vs Restart 2 | 3 | The instructions of most modern computers are "all or nothing". They either execute completely, or if they cannot complete they are stopped, the problem is cleared up, and the instructions are **restarted** as if from the very beginning. 4 | 5 | This has not always been the case. E.g. the Motorola 68000 family of microprocessors had some relatively CISCy instructions, implemented using not just microcode but also nano code, and famously could receive an exception, e.g. a page fault, in the middle of one of these microcode operations. The microcode/microarchitecture state was saved, including in a not publicly documented "stack puke" area on the processor stack. The exception handler could do its job, and then return to the partially completed instruction, picking up where it had left off from the "stack puke" area. 6 | 7 | (I (Ag) vividly remember meeting a Motorola kernel developer when Motorola acquired the OS group that I was then working at. This Motorola kernel developer was adamant that the most reliable way to program an exception handler was to only reliant information in the stack puke area - which was documented to Motorola internal developers, although not to the outside world. He said that there were so many errors in the control and status registers of devices such as the I/O MMU that they could not be relied on.) 8 | 9 | TBD: other examples of resumable instructions. 10 | 11 | # Partial Progress is not necessarily "resume from microcode puke" 12 | 13 | This topic page discusses instructions that are not "all or nothing". Instructions that can support partial progress, permanently commit as much work as possible, and then save state in a form such that the instruction can be resumed without having to repeat any extra work already completed. 14 | 15 | This is not necessarily "resume from microcode or microarchitecture" state. For the purposes of this topic page it is emphatically not. 16 | 17 | In fact, for the purposes of this page the distinction between resume and restart is blurred. The instructions discussed here accomplish their "partial progress" by modifying architectural state. On an exception or other circumstance in which the instruction execution is interrupted, ordinary registers are written. On exception return ordinary registers are read. In some circumstances the registers involved are source/destination; in some circumstances, the instruction is "restarted as if from the beginning", however the starting point, the initial state for the instruction, has been modified, so that it does not need to repeat work already done. Therefore the term "partial progress" as in "instructions that support partial progress" is used rather than "resumable". "Partial progress" instructions may be considered to be either restarted or resumed, or something in between. 18 | 19 | Moreover, this is not an issue of RISC versus CISC. Some of the instructions described here are arguably RISC instructions. 20 | 21 | # Examples of "Partial Progress" - x86 REP STOS and REP MOVS 22 | 23 | Probably the most familiar modern examples (in 2020) of instructions that make partial progress are the x86 block memory operations, REP MOVS and REP STOS. REP STOS fills a block of memory with a value from a register. REP MOVS copies one memory block to another. 24 | 25 | STOS and MOVS are the most prominent members of a family of x86 "string" operations that include CMPS (compare), SCAS (scan), and LODS (load). These "string" operations are composed with repeat prefixes REP (repeat well count not zero), REPE/REPZ and REPNE/REPZ (repeat until equal/zero or not-equal/non-zero). The string operations are provided in flavors of different sizes - 8-bit byte, 16-bit word, 32-bit double word. 26 | 27 | Architecturally, the string operations such as STOS and MOVS are simple instructions, that are repeated automatically by the REP repeat instruction. 28 | 29 | STOSB performs the following operation 30 | ~~~~~~ 31 | STORE.BYTE Memory[ DI ] := AL 32 | DI := DI + (1 IF DF == 0 ELSE -1) 33 | ~~~~~~ 34 | 35 | MOVSB performs the following operation 36 | ~~~~~~ 37 | tmp := LOAD.BYTE Memory[ SI ] 38 | STORE.BYTE Memory[ DI ] := AL 39 | SI := SI + (1 IF DF == 0 ELSE -1) 40 | DI := DI + (1 IF DF == 0 ELSE -1) 41 | ~~~~~~ 42 | 43 | The repeat prefix REP repeats the string operation to which it is applied, e.h. STOSB or MOVSB, decrements a counter (in register CX/ECX/RCX), and repeats until the counter reaches zero. The conditional versions REP[EZ] and REPN[EZ] can terminate early if a condition is met. 44 | 45 | STOSx and MOVSx can be used as independent instructions. 46 | 47 | REP STOSx and REP MOVSx can be viewed as loops around the "simple" instructions STOSx and MOVSx. 48 | 49 | But most modern x86 systems use "fast strings", and implement REP STOSx and REP MOVSx as if they were combined or fused into a single instruction that performs many simple operations. E.g. instead of REP STOSx storing a byte at a time, the optimize version can store 16, 32 or more bits at a time. The optimized version may use cache protocol operations not available to ordinary instructions. The optimized version behaves as if it were a loop around the simple version, but is optimized to be efficient as possible. The optimizations may be accomplished by microcode, or by hardware state machines, or by a combination of both. 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /Archive/wiki/Makefile.OBSOLETE: -------------------------------------------------------------------------------- 1 | # Makefile for Ri5-CMOs-proposal 2 | # in Ri5-stuff.wiki 3 | # https://github.com/AndyGlew/Ri5-stuff/wiki 4 | 5 | # this Makefile is now obsolete 6 | # and should be deleted. 7 | # It's functionality has been moved to 8 | # https://github.com/AndyGlew/Ri5-stuff/Makefile 9 | # where 10 | # https://github.com/AndyGlew/Ri5-stuff 11 | # gets https://github.com/AndyGlew/Ri5-stuff/wiki 12 | # as a submodule. 13 | 14 | 15 | 16 | # As of Wednesday, May 6, 2020-05-06 17 | # The main purpose is to run a command to expand the AsciiDoc include directives 18 | # so that you can get a better idea what will actually look like 19 | 20 | HTML_VIEWER=/cygdrive/c/Windows/explorer.exe 21 | 22 | open-html-in-browser: Ri5-CMOs-proposal.html 23 | # KLUGE: Windows HTML viewer does not understand / paths 24 | # either need to convert / --> /, cd, or cygpath 25 | $(HTML_VIEWER) Ri5-CMOs-proposal.html 26 | 27 | ASCIIDOCTOR=/home/glew/bin/asciidoctor 28 | #TBD: Move asciidoctor to standard location 29 | 30 | Ri5-CMOs-proposal.html: Ri5-CMOs-proposal.asciidoc 31 | $(ASCIIDOCTOR) -b html Ri5-CMOs-proposal.asciidoc -o Ri5-CMOs-proposal.html 32 | -------------------------------------------------------------------------------- /Archive/wiki/Mandatory-versus-Optional-CMOs,-PREFETCHES,-and-CPHs.md: -------------------------------------------------------------------------------- 1 | NOTE: in my terminology CMO is a generic term, that includes both optional or advisory operations such as PREFETCH instructions and CPH (Cache Prefetch Hint) such as posts store/push out instructions, but also includes mandatory operations such as cache flushes and invalidations for security timing channels mitigation, software manage coherence, and persistence. 2 | 3 | If you use an alternate terminology where PREFETCH and CMO and CPH instructions are mutually exclusive categories, the concern still arises 4 | 5 | It is traditional that it should be possible to treat performance-related instructions such as PREFETCH and CPH instructions as NOPs. They are optional, and only influence timing, not time free program semantics. However, mandatory CMOs such as cache flushes for software manage coherence cannot be treated as NOPs. Arguably, on a machine that does not implement the CMOs, they should be trapped as a legal instructions. Better to trap, and possibly emulate, than to not accomplish what they are supposed to do, and have the program break, although possibly not in obvious ways. 6 | 7 | Unfortunately there is a middle ground: software coherence. On a system that truly lacks some if not all hardware coherence features, the cache flush's and other CMOs required to enable software coherence absolutely must be performed. However, it has happened more than once that such a system was created long ago, and that eventually hardware cache coherence was implemented. In which case such CMOs might be ignored. I.e. whether a CMO is mandatory or optional may depend on the platform configuration e.g. whether hardware cache coherence is implanted or not. (Note: this applies to software coherence, and possibly some forms of power management. It probably does not apply to persistence to NVRAM.) 8 | 9 | A case in point is the EXPORT.I instruction proposed to support dynamic code generation on RISC-V. Some, traditional RISC instruction sets do not support I cache consistency with the data cache. On these instructions EXPORT.I is required to perform a cache action, essentially invalidating I cache lines. (Complementary instruction IMPORT.I might flush post cache instruction pipelines). However, some CPUs have decided that it is just plain easier to support I cache consistency. On such machines it may not be necessary for EXPORT.I to invalidate I cache lines. Arguably, EXPORT.I might still need to do stuff related to data stores and instruction fetch pipeline consistency, in conjunction with IMPORT.I. However still other systems have made both EXPORT.I and IMPORT.I unnecessary, and can treat both as NOPs. 10 | 11 | The point here is that there are both instruction set architecture and microarchitecture considerations relevant to mandatory. 12 | 13 | E.g. the EXPORT.I and IMPORT.I functionality is mandatory from an instruction set architecture point of view. But some microarchitectures might make it unnecessary. 14 | 15 | -- 16 | 17 | Also, similar cache invalidate and flush operations may be optional for some purposes and mandatory for others. 18 | 19 | For example, cache flushes when treated as CPH (Cache Performance Hints) may be ignored on a system that is hardware consistent. After all, they should only influence performance. In fact, it is probably desirable to have a control that allows them to be enabled or disabled, since quite often cache performance hints and prefetches turn out to be less effective than the predictors and prefetchers of an advanced microarchitecture. However, cache flushes should never be disabled for security related timing channel mitigation. 20 | 21 | I have considered having a mandatory/optional bit in any that is passed to CMO instructions, and possibly also PREFETCH instructions and CPH instructions. (But probably only if in a general-purpose register, or a CSR implicit input operand, for such instructions. It is unlikely that we have enough instruction encoding space to provide such an orthogonal bit if the [[]] is encoded in the instruction itself.) 22 | 23 | Considerations such as the above - the fact that on some microarchitectures CMO optional/mandatory depends on both usage and microarchitecture - suggest that a single mandatory/optional bit is not insufficient. There probably need to be more types of discretion. 24 | 25 | Possibly: 26 | * optional, for performance only. Can always be made into a no-op 27 | * mandatory if no hardware cache coherence, optional (possibly always disabled) if hardware cache coherence 28 | * although note: computer architects often delude themselves into thinking that their system is 100% hardware cache coherent, when in reality the platform in which it is embedded may make it not always hardware cache coherent 29 | * mandatory, e.g. for a cache push out or flush, if the CPU or whatever caches the data is being pushed out from are not retained in the power saving mode. Optional if they are retained. 30 | * note: this is of questionable value, since many systems have multiple power saving modes, some of which retain state in devices such as CPUs, some of which do not. 31 | * always mandatory 32 | * I suspect that security related timing channel mitigation flushes will always be mandatory. Although they may be selective, only applying to certain levels of the cache. And they will probably apply to hardware data structures such as branch predictors as well as to caches. 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /Archive/wiki/Meeting-11-09-2020.md: -------------------------------------------------------------------------------- 1 | Github wiki 2 | - minutes 3 | 4 | Github access 5 | * readable to the world 6 | * write access 7 | * must have GitHub account 8 | 9 | ... AB: wants restricted wiki .... 10 | 11 | ... TBD: Ag fix up ... 12 | 13 | More agenda 14 | 15 | 16 | * Cache index / (set,way) 17 | 18 | --- 19 | 20 | * CMO.SEC 21 | 22 | * CMO.AR -------------------------------------------------------------------------------- /Archive/wiki/Non-CMO-stuff-to-be-deleted.md: -------------------------------------------------------------------------------- 1 | 2 | The https://github.com/riscv/riscv-CMOs/ repo and wiki 3 | were forked from a personal repo and wiki 4 | https://github.com/AndyGlew/Ri5-stuff 5 | that also contained non-CMO stuff. 6 | 7 | The non-CMO stuff should be deleted. 8 | 9 | ## Other - non-CMO topics (to be deleted from riscv-CMOs repo+wiki) 10 | 11 | 12 | ### Aside: 13 | * [[Problems editing GitHub wiki using speech recognition]] 14 | 15 | 16 | ### Side: multipart LUT4 instruction in RV32 for the crypto extension 17 | * looks like this is now part of the crypto proposal 18 | 19 | ### Supporter: 20 | * [[in-band tagging pointers]] 21 | * I am very interested in proposals for using some bits of pointers to improve... stuff like security, sandboxing, etc. 22 | * although: in my own work, capabilities inspired, 23 | * I have deprecated some such approaches as only 15/16ths secure (4 bits, get it?) 24 | * since my own capabilities projects were either cancelled or emasculated, I'll take 15/16ths over 0/16ths 25 | 26 | ### Possible future topics for RISC-V 27 | 28 | * [[Quantization, dequantization, and interpolation instructions for DL, math, etc.]] 29 | -------------------------------------------------------------------------------- /Archive/wiki/Overview-of-CMO-operations.md: -------------------------------------------------------------------------------- 1 | 2 | RISC-V systems need cache management operations, aka CMOs. As far as I know, so far such operations have been defined in an implementation specific manner. Other computer architectures define cache management operations, often via a mixture of user level instructions, privileged instructions, and platform specific operations accessed via MMIO control registers. See [[Survey of CMOs in Modern Computer Architectures]]. 3 | 4 | Purposes of CMOs include: 5 | * performance tuning 6 | * security, e.g. mitigating Spectre-like information leak security vulnerabilities 7 | * persistence, e.g. nonvolatile RAM in the memory hierarchy 8 | * power management, e.g. flushing caches before removing their power 9 | * software managed cache coherence, e.g. non-coherent I and D caches 10 | * bank switching of physical memory, e.g. HP's "Machine" 11 | * reset, hot plug (not necessarily current high priority) 12 | 13 | These use cases have different needs. 14 | * User level access to these CMOs are desirable in some cases, but not required for all 15 | * Some affect only data and/or instruction caches and related parts of the memory system 16 | * Others, e.g. security, need to influence other microarchitectures state like branch predictors 17 | * Some need to interact with other CPUs, not necessarily RISC-V or from the same vendor, and possibly non-CPU devices 18 | * Some CMOs may be ignored (performance), while others are required for correctness (SW coherency, power management, security) 19 | * Scope 20 | * Some CMOs affect only a smallish excise block like a cache line 21 | * Others affect a range of physical or virtual addresses 22 | * Others want to affect an entire cache, or a partition thereof 23 | * some CMOs may be optimized, e.g. performed in the background 24 | * whereas other CMO use cases may require control over timing 25 | 26 | The biggest problem with CMOs in general is that cache architectures in particular, and microarchitectures state in general, can be highly diverse. 27 | * Cache architecture 28 | * How many levels of I and D? Are I and D unified at some level? 29 | * How many levels, and how big? 30 | * Associativity, skewed 31 | * LRU policy... 32 | * What caches are shared between separate CPUs/harts/other smart devices? 33 | * Mesh versus hierarchical? 34 | * Virtual versus physical 35 | * inclusive versus exclusive versus neither inclusive nor exclusive 36 | * clean/write-through vs dirty/write-back 37 | * does hardware support "flash invalidate", or is it necessary to scan the cache either in software or hardware? 38 | * other microarchitecture 39 | * there are more forms of microarchitecture state, branch predictors, prefetchers, etc. than are imagined by any computer architecture textbook 40 | * security timing channel mitigation requires the ability to flush or reset nearly all such microarchitecture state that influences execution timing. 41 | * Most other applications do not 42 | 43 | The term "CMO (Cache Management Operation)" may be too specific. A more generic term may describe the needs of security and performance management - "microarchitecture state management operations (uSMO)"? Unfortunately, I do not have a good more generic term. For that matter, it is not clear that non-cash state 44 | 45 | This CMO proposal 46 | * defines a small standard set of targeted cache operations 47 | * but also provides a standard way to invoke nonstandard implementation specific cache operations 48 | * e.g. figure out what your application needs to do, which may require knowledge of the CPU and platform architecture 49 | * if nonstandard asked the OS for permission to do these actions 50 | * use the standard CMO instructions defined here to invoke the nonstandard actions described and encoded above 51 | 52 | 53 | -------------------------------------------------------------------------------- /Archive/wiki/Privilege-for-CMOs.md: -------------------------------------------------------------------------------- 1 | Aside: I apologize for [[voice typos editing this wiki]] 2 | 3 | Actual proposal/draft [[draft-Privilege-for-CMOs]] 4 | forked from discussion [[Privilege-for-CMOs]] 5 | 6 | 7 | # PROPOSAL: 8 | 9 | Each CMO.VAR.* and CMO.UR.* is mapped to a number 0..Ncmo-1, where Ncmo is the Number of CMO instruction encodings. 10 | 11 | (Note: the encodings do not necessarily have a contiguous field that corresponds to these values.) 12 | 13 | CSR [[CMO_Privilege]] contains Ncmo 2-bit fields where bitfield CMO_Privilege.2b[J] indicates the privilege required to perform the corresponding CMO operation J. 14 | 15 | The 2-bit fields are encoded as follows: 16 | * 00 => disabled. 17 | * 01 => traps to M mode 18 | * 10 => reserved 19 | * 11 => can execute in any mode, including user mode 20 | 21 | The disabled behavior is as follows: 22 | 23 | CMO_Privilege.2[J] => CMO.#J 24 | * the instruction does not actually perform any cache maintenance operation. 25 | * but it returns a value such that the [[canonical range CMO loop]] exits 26 | * CMO.VAR rd:next_addr, rs1=rd:start_addr, rs2:stop_addr 27 | * sets RD to stop_addr 28 | * CMO.UR rd:next_entry, rs1:start_entry 29 | * sets RD to -1 30 | 31 | # RATIONALE: 32 | 33 | Requirement: in some CPU implementations all or some CMOs *must* be trapped to M-mode and emulated. E.g. caches that require MMIOs or CSR actions to flush, which are not directly connected to 34 | 35 | Requirement: in some platform configurations some CMOs may *optionally* be trapped to M-mode and emulated. E.g. [[CMOs involving idiosyncratic external caches and devices]], devices that use MMIOs or CSRs to perform CMOs, and which are not (yet?) directly connected to whatever 36 | 37 | Requirement: it is highly desirable to be able to perform CMOs in user mode. E.g. for performance. But also for security, persistence, since everywhere the [[Principle of Least Privilege]] should apply: e.g. the cache management may be performed by a privileged user process, i.e. a process that is part of the operating system but which is running at reduced privilege. In such a system the operating system or hypervisor may choose to context switch the CSR_Privilege CSR, or bitfields therein. 38 | 39 | Requirement: even though it is highly desirable to be able to perform CMOs in user mode, in some situations allowing arbitrary user mode code to perform CMOs is a security vulnerability. vulnerability possibilities include: information leaks, denial of service, and facilitating RowHammer attacks. 40 | 41 | Requirement: many CMOs should be permitted to user code, e.g. flush dirty data, since they do nothing that user code cannot itself do using ordinary load and store instructions. Such CMOs are typically advisory or performance related. note that doing this using ordinary load and store instructions might require detailed microarchitecture knowledge, or might be unreliable in the presence of speculation that can affect things like LRU bits. 42 | 43 | Requirement: some CMOs should *not* be permitted to user code. E.g. discard or forget dirty data without writing it back. This is a security vulnerability in most situations. (But not all - although the situations in which it is not a security vulnerability are quite rare, e.g. certain varieties of supercomputers, although possibly also privileged software, parts of the OS, running in user mode.) 44 | 45 | Requirement: some CMOs may usefully be disabled. 46 | * Typically performance related CMOs, such as flushing to a shared cache level, or prefetching using the range CMOs Software is notorious for thinking that it knows the best thing to do, 47 | * Also possibly software based on assumptions that do not apply to the current system 48 | * e.g. system software may be written so that it can work with incoherent MMIO 49 | but may be running on a system that has coherent MMIO 50 | * e.g. persistence software written so that it can work with limited nonvolatile storage 51 | running on a system where all memory is nonvolatile 52 | 53 | Requirement: Sometimes there needs to be a mapping between the CMO that a user wants and the CMOs that hardware provides, where the mapping is not known to CPU hardware, not known to user code, but depends on the operating system and/or runtime, and might dynamically depend on the operating system and/or runtime. 54 | * e.g. For performance related CMOs, the user may only know that she wants to flush whatever caches are smaller than a particular size like 32K. The user does not know which caches those are on a particular system. 55 | * e.g. in software coherence all dirty data written by the sending process P_producer may need to be flushed to a shared cache level so that it can be read by the consuming process P_consumer 56 | * consider if the sending process P_producer is part of a HW coherent cache consistency domain, but the receiving process P_consumer is part of a different such domain 57 | * if the hardware cache consistency domain permits cache-to-cache migration of dirty data, then all caches in that dirty domain be flushed. 58 | * however, if the hardware cache consistency domain does NOT permit cache-to-cache migration, then 59 | * if the system software performs thread or process migration between CPUs that do not share caches 60 | * without cache flushes => THEN this SW dirty domain must be flushed 61 | * but if the system software performs cache flushes on thread migration, 62 | => THEN only the local processor cache need be flushed. 63 | * if the system software does not perform thread or process migration, t 64 | hen only the local processor cache be flushed. 65 | Other processor caches in the HW clean consistency domain do not need to be flushed. 66 | 67 | Optionally trapping such CMOs allows the system or runtime software to choose the most appropriate hardware CMO for the users' need. 68 | 69 | WHINING: 70 | * I had originally planned to define CSR operands for the CMO instructions, both to provide the privilege modulation (trapping, disabling) and mapping functionalitiess of the requirements listed above. 71 | * key reviewers reject this possibility, and/or suggest providing it only later if the need is proven 72 | * however, thesse key reviiewers CANNOT deny the requirements of enabling or disabling CMOs listed above 73 | * therefore, providing this compact privilege mechanism. 74 | * I am actually just as happy not to defiine the CSR operand to coontain an encoding of CMO operations desired, since I can easily imagine that in some circumstances more than one CSR will be required. E.g. a CSR that might contain a way mask. Therefore, this " permission vector" approach allows the actual CSR is to be defined later, while enabling [[privilege modulation]] today. 75 | -------------------------------------------------------------------------------- /Archive/wiki/Problems-editing-GitHub-wiki-using-speech-recognition.md: -------------------------------------------------------------------------------- 1 | TBD: move to https://github.com/AndyGlew/Ag-stuff/wiki 2 | 3 | I am having a bit of writer's block composing 4 | 5 | [[CMOs (Cache Management Operations)]], 6 | specifically the list of [[Actual CMO Operations]] for consideration. 7 | 8 | Not only is this long list something I want to be able to automatically extract tables from, keeping in think 9 | 10 | But I'm also just plain having trouble composing this text using Dragon speech recognition, which I use because of my computeritis. 11 | 12 | Oh, shit: I think the problem was that I was using the wrong web browser. 13 | 14 | Dragon speech control was not working in Microsoft Edge. I was confused, hitting my head against the wall, since these were things that I had already been doing! Ironically, I am/was using Microsoft Edge because supposedly Microsoft Edge has better speech support. Most annoying, the extension had been somehow disabled. 15 | 16 | Similarly, speech control is not working in Firefox. Which I was also trying to switch to. 17 | 18 | But... The Dragon extension for chrome is working in my personal chrome profile. 19 | 20 | It was not working in my Si5 chrome profile. I suppose it is a good thing that the different profiles have different sets of extensions, but it tripped me out. It's working now that the extension is installed. 21 | 22 | Yes, now I can... 23 | 24 | Shit, I was pulling out my hair over this for several days !! :-) I would have investigated it earlier, but there was always email that I could quickly reply to. 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /Archive/wiki/Quantization,-dequantization,-and-interpolation-instructions--for-DL,-math,-etc..md: -------------------------------------------------------------------------------- 1 | STATUS: TBD: not proposed yet for RISC-V, but I expect to do so when the time comes 2 | 3 | It is often advantageous in deep learning to *quantize* the data. E.g. to represent 16 or 32 bit data in memory using only two or four bits. 4 | * E.g. dequantizing (expanding or unpacking) the two bit numbers into 16-bit data to perform computations, and then quantizing (compressing or packing) the 16-bit data into two bits to restore back in memory. Thereby saving memory bandwidth but not compute bandwidth. 5 | * Some systems can actually do computations in the narrower widths - which essentially amounts to having the dequantization and quantization logic in the pipeline to the arithmetic units. Thereby saving both memory bandwidth and computational unit bandwidth. 6 | 7 | Dequantization at its simplest is essentially a classic indexed lookup table. In the vector instruction set has a VRGATHER instruction that accomplishes this, although IRC its smallest index is eight bits wide. Conceptually it is not difficult to imagine extending VRGATHER to use vectors of two bit or four bit values to perform the indexing. For that matter, memory lookup tables... Although two and four bit quantized values don't really need memory lookup tables. 8 | 9 | Another use case: mapping one of the several varieties of 8-bit floating-point or 8-bit LNS to standard 16-bit or 32-bit floating-point. 10 | 11 | The obvious or counterpart to such a dequantization instruction is a quantization instruction. 12 | 13 | Anyway, to quantize, you map something like a 16-bit number to a 2 or 4 bit number. 14 | 15 | Linear quantization is usually not the right thing to do. I.e. it is not just extracting the higher order bits. 16 | 17 | Nonlinear quantization is essentially determining in which interval the wider number resides. 18 | 19 | E.g. to quantize an unsigned 16-bit number N to a 2 bit number M, you do 20 | if 0 <= N < T0 then M = 00 21 | else if T0 <= N < T1 then M = 01 22 | else if T1 <= N < T2 then M = 10 23 | else M = 11 24 | and it is convenient to pack the 3 16-bit values T0,T1,T2 into a 64-bit register 25 | RANGE_LUT_64 = ( T0, T1, T2 ) 26 | leaving 64-bits unused. 27 | 28 | I call this RANGE_LUT_64 or INTERVAL_LUT – it is not exactly the same as an ordinary indexed LUTs, such as is used in cryptography or dequantization. I call the latter INDEXED_LUTs. Some computer arithmetic subdisciplines call this sort of comparison based thing a LUT, and also have other LUTs that are similar to ternary CAMs, sometimes conceptually in ROM (which synthesize to less regular but more compact logic). 29 | 30 | I will certainly be proposing this instruction at some point to assist RISC-V deep learning. Probably as part of some V-DL extension - deep learning in the vector register file - but also possibly a scalar register file version. As you can imagine, the [[multipart instruction approach]] can also be used here if there are too many conceptual operands to fit in the classic RISC two or three input model. 31 | 32 | In fact, the most annoying thing about this sort of quantization instruction using a RANGE_LUT is that the RANGE_LUT does not make use of a full vector operand or typical quantization's like 16 bits to 2 bits. Even when doing something like quantizing 32 bits to 4 bits, it really wants to have to vector operands of different lengths. Which as far as I can tell is something that the vector instruction set is not naturally suited for. 33 | 34 | -- 35 | 36 | Piling on: 37 | 38 | Some math dequantization instructions are not just a simple indexed LUT operation. E.g. they may LUT a smallish number of the top bits, and concatenate with or otherwise combine with low bits of the value to be looked up (which is no longer an index). GPU texture units do something like this, although usually in 2D or 3D, not a single dimension, and combined with a special cache for the values that are looked up, at different resolutions (MIP mapping levels). 39 | 40 | Furthermore, a rather common operation in numerically intensive code is to do piecewise linear interpolation. E.g. have a set of data breakpoints like in the INTERVAL_LUT, and if the value does not exactly match one of those breakpoints, then interpolate. Sometimes linearly sometimes with fancier interpolation functions. 41 | 42 | But, again, I cannot imagine uses for these operations in cryptography. I don't think the cryptography will normally want to use interpolation, and especially not any form of approximate arithmetic. I sometimes wonder if it could be useful for curves in cryptography. But the piecewise intervals are far too many and should not be compressible. 43 | 44 | -- 45 | 46 | Generalized: 47 | * INDEX_LUT 48 | * INTERVAL_LUT 49 | * GPU texture lookup = INDEX_LUT and interpolation (typically in two or more dimensions) 50 | * piecewise interpolation = RANGE_LUT and interpolation 51 | 52 | -------------------------------------------------------------------------------- /Archive/wiki/Quick-and-dirty-list-of-Actual-CMOs.md: -------------------------------------------------------------------------------- 1 | 2 | # CLEAN 3 | also known as: write back 4 | 5 | writes dirty data out of the cache, leaving clean data behind. 6 | 7 | I.e. the cache contents after this operation should be all have been written back. 8 | 9 | [[Issue: Q: should CMOs like clean and flush update LRU]] 10 | 11 | [[CMO Scope]]: local, 12 | 13 | [[CMO target]]: 14 | 15 | Examples: 16 | * flush to point of persistence (NVRAM) 17 | * flush two point of persistence 18 | 19 | 20 | * flush two point of persistence (battery backed up DRAM) 21 | * flush two point of coherence (SW managed cache consistency 22 | * flush to shared cache level 23 | 24 | issue: vocabulary/terminology: I am very much used to saying "flush to DRAM" indicating that all dirty accesses should be sent to DRAM. I am not at all used to saying "clean to DRAM". 25 | 26 | 27 | 28 | #Flush 29 | a.k.a. write back and invalidate 30 | 31 | Writes dirty data out of the cache. 32 | 33 | I.e. the cache contents after this operation all have been invalidated. 34 | 35 | # DISCARD 36 | 37 | a.k.a. Invalidate, Forget 38 | 39 | Actually throws away dirty data in the cache, to the extent that is permissible by the cache protocol. 40 | 41 | Motivation: once temporary memory buffers are no longer needed, it is "wasteful" to write the temporary values back to memory. Of interest mostly for really large caches, or to avoid writing back unneeded dirty data to NV RAM in a persistent memory system. 42 | 43 | Analogy: SSD TRIM commands. 44 | 45 | Unsafe: 46 | * this can expose old data in memory that was overwritten by values that are now being forgotten. 47 | * note that system code may 48 | -------------------------------------------------------------------------------- /Archive/wiki/RISC-V-CMO-proposal.md: -------------------------------------------------------------------------------- 1 | This proposal for CMOs was prepared by Andy Glew, 2 | and is a start for discussion of the CMO TG. 3 | 4 | There may be other proposals from opthe TG members. 5 | TBD: copy here and/or link there. 6 | 7 | The https://github.com/riscv/riscv-CMOs/ repo and wiki were forked from a personal repo and wiki https://github.com/AndyGlew/Ri5-stuff that also contained non-CMO stuff. 8 | 9 | 10 | ## "Released" Proposal - what you are probably looking for 11 | 12 | As of Sept 2020 there is not really a "release" procedure. 13 | PDF and HTML are preparted in an offline clone of the repo+wiki, and pushed to GitHub. 14 | 15 | TBD: imitate the main and V specs, automatically generating PDF and HTML on the GitHub servers on checkin/push. 16 | (Not just push, as long as we continue to use a proposal embedded in the wiki.) 17 | 18 | TBD: fix the tools so that the work on systems other than Andy Glew's personal PC with cygwin. 19 | 20 | [[generated-HTML-and-PDF-for-CMOs-proposal]] (local) 21 | * on web: https://github.com/riscv/riscv-CMOs/wiki/generated-HTML-and-PDF-for-CMOs-proposal 22 | 23 | 24 | 25 | 26 | ## Work in Progress (WIP) 27 | 28 | Sept 2020: [[Administrivia - CMOS TG]] 29 | * mailing list running - lots of traffic 30 | * first meetings scheduled (see page) 31 | * testing to see how/if notifications of repo and wiki posts get sent to list (and if that will be too annoying) 32 | 33 | Jan-Aug 2020: [[CMOs (Cache Management Operations)]] 34 | 35 | * [[RISC-V needs CMOs, and hence a CMO Working Group]] 36 | * [[CMOs WG Draft Proposed Charter]] 37 | * [[Mailing lists interested in CMOs]] 38 | 39 | * [[Draft CMO proposals]] 40 | * I (Glew) created a draft CMO proposal 41 | * originally in my personal GitHub https://github.com/AndyGlew/Ri5-stuff/wiki/Ri5-CMOs-proposal 42 | * eventually in official GitHub https://github.com/riscv/riscv-CMOs/wiki/Ri5-CMOs-proposal 43 | * TBD: migrate that to riscv.org wiki 44 | * See issue https://github.com/riscv/riscv-CMOs/issues/2 45 | * [[CMOs cut across many fields]] 46 | 47 | * [[Ri5-CMOs-proposal]] 48 | * See [[generated HTML and PDF for CMOs proposal]] 49 | 50 | * [[Use Cases for CMOs and Prefetches]] 51 | * See above, in particular [[Mailing lists interested in CMOs]] 52 | * TBD: formalize CMO use cases for use in tracking progress, ensuring coverage, and restricting scope 53 | 54 | 55 | 56 | 57 | ## [[Administrivia]] 58 | 59 | Network locations - GitHub repo, wiki, mailing lists, etc. 60 | 61 | See [[TOC - Table of Contents]] 62 | * almost certainly out of date 63 | * TBD: [[automate generation and update of wiki TOC]] as wiki evolves 64 | -------------------------------------------------------------------------------- /Archive/wiki/RISC-V-needs-CMOs,-and-hence-a-CMO-Working-Group.md: -------------------------------------------------------------------------------- 1 | All successful computer instruction sets have Cache Management Operations (CMOs). 2 | 3 | Several RISC-V systems have already defined implementation specific CMO instructions. 4 | It is desirable to have standard CMO instructions to facilitate portable software. 5 | 6 | CMOs do things like flushing dirty data and invalidating clean data for use cases that include 7 | non-coherent DMA I/O, 8 | security (e.g. Spectre), 9 | power management (flush to battery backed-up DRAM), 10 | persistence (flush to NVRAM), 11 | and more. 12 | 13 | CMOs cut across several problem domains. It is desirable to have a consistent approach, rather than different idiosyncratic instructions for different problem domains. 14 | RISC-V therefore needs a CMO working group that will coordinate with any working groups in those overlapping domains. 15 | 16 | ### Administrivia 17 | 18 | 2020/8/5: Email proposing this will soon be sent to the RISC-V Technical Steering Committee 19 | and other mailing lists, seeking approval of the formation of such a CMO working group. 20 | 21 | Here linked is a wiki version of the WG proposal [[RISC V needs CMOs, and hence a CMO Working Group]]. 22 | Also a [[CMOs WG Draft Proposed Charter]] - although probably too long. 23 | 24 | **Assuming the CMO WG is approved:** 25 | 26 | Please indicate if you are interested by replying to this email (to me, Andy Glew). 27 | To faciliate scheduling of meetings, please indicate timezone. 28 | 29 | A risc.org mailing list should be set up soon. 30 | 31 | We have already set up https://github.com/riscv/riscv-CMOs, 32 | and will arrange permissions for working group members as soon as possible. 33 | 34 | Here linked is a [[CMOs WG Draft Proposed Charter]]. 35 | 36 | Proposals: 37 | * At least one CMO proposal has been developed in some detail. It is linked to from https://github.com/riscv/riscv-CMOs, and may soon be moved to this official place. 38 | * We welcome: Other proposals, and/or examples of implementation specific CMO extensions already implemented 39 | 40 | -------------------------------------------------------------------------------- /Archive/wiki/RISC-V-standard-disclaimer.md: -------------------------------------------------------------------------------- 1 | Here is the RISC-V standard disclaimer 2 | 3 | I am sending this as email, so that I can pin it to the top of our message archive. 4 | TBD: link. 5 | 6 | I am also posting it on our wiki https://github.com/riscv/riscv-CMOs/wiki/RISC-V-standard-disclaimer 7 | 8 | I will post or link to it from our meeting announcements and agendae. 9 | TBD: link. 10 | 11 | 12 | -------- Forwarded Message -------- 13 | 14 | Subject: [RISC-V] [tech-chairs] disclaimer slides to add to your group meeting agendas 15 | Date: Mon, 28 Sep 2020 09:12:25 -0700 16 | From: mark 17 | To: chairs , tsc 18 | 19 | 20 | https://drive.google.com/file/d/1FmXDqa20NNjtfFyPdcT7__-AfO7ke9J_/view?usp=sharing 21 | 22 | Because I am a bit obsessive about this sort of thing, I will here extract the text of this disclaimer. However, note that this extracted text may be obsoleted if the disclaimer in an official place is edited, so please refer to the Google Drive link if necessary, and to any other official place it may be moved to in the future. NOTE: Google search did not find this for me (Does Google search ever index Google Drive?), but did return similar documents Such as those whose URLs are in the below: 23 | 24 | # Antitrust Policy Notice 25 | 26 | RISC-V International meetings involve participation by industry competitors, and it 27 | is the intention of RISC-V International to conduct all its activities in accordance 28 | with applicable antitrust and competition laws. It is therefore extremely important 29 | that attendees adhere to meeting agendas, and be aware of, and not participate 30 | in, any activities that are prohibited under applicable US state, federal or foreign 31 | antitrust and competition laws. 32 | 33 | Examples of types of actions that are prohibited at RISC-V International meetings 34 | and in connection with RISC-V International activities are described in the RISC-V 35 | International Regulations Article 7 available here: https://riscv.org/regulations/ 36 | 37 | If you have questions about these matters, please contact your company counsel. 38 | 39 | # RISC-V International 40 | 41 | RISC-V is a free and open ISA enabling a new era of processor innovation 42 | through open standard collaboration. Born in academia and research, RISC-V ISA 43 | delivers a new level of free, extensible software and hardware freedom on 44 | architecture, paving the way for the next 50 years of computing design and 45 | innovation. 46 | 47 | We are a transparent, collaborative community where all are welcomed, and all 48 | members are encouraged to participate. 49 | 50 | We as members, contributors, and leaders pledge to make participation in our 51 | community a harassment-free experience for everyone. 52 | 53 | https://riscv.org/risc-v-international-community-code-of-conduct/ -------------------------------------------------------------------------------- /Archive/wiki/STATUS---almost-done---maybe.md: -------------------------------------------------------------------------------- 1 | This wiki is for work on a proposal for RISC-V. 2 | 3 | Therefore, any status of along the lines of "almost done?" Is always contingent on whether it will be accepted by the working group(s) to which it is being applied. 4 | 5 | Therefore, [[STATUS: almost done - maybe]] means that from my point of view, the authors point of view, it is reasonably complete and has started being acceptable from key bottleneck reviewers. 6 | 7 | TBD: [[Proposal Status Tags]]. -------------------------------------------------------------------------------- /Archive/wiki/Sharing-Drawings-and-Diagrams.md: -------------------------------------------------------------------------------- 1 | Drawings and diagrams are good for technical work, like computer architecture, like RISC-V instruction set design and platform architecture. 2 | 3 | Unfortunately, there doesn't seem to be a ubiquitous standard for exchanging drawings. At least not diagrams with smart objects like glue and connectors. SVG may be good enough for simple 2D drawings, but as far as I know the SVGConnector standard has been stalled since 2011. 4 | 5 | I asked about what people use on the RISC-V crypto mailing list, and got answers that I will summarize as follows 6 | 7 | * Visio 8 | *proprietary 9 | * exports/imports to SVG, etc. 10 | * the .VSD / .VSDX proprietary file formats seem to be the most common exchange format for diagrams 11 | 12 | * LucidChart 13 | * proprietary 14 | * exports/imports to SVG, Visio file formats 15 | * however, round tripping Vidio -> LucidChart --> Visio --> LucidChart is reported to be unreliable 16 | 17 | * Markdeep 18 | * MJO, Marrku on crypto list 19 | * MarkDeep http://casual-effects.com/markdeep/ internally for CPU documentation. In addition to your easy MarkDown tables and code snippets, it has ASCII type block graphics. Has been sufficient for this particular purpose, but certainly has limits. 20 | 21 | * https://www.draw.io/ 22 | * Ben Marshall, University of Bristol 23 | 24 | * It's free, works online, or as a wrapped up desktop app for offline use. 25 | * It saves things in a proprietary format unfortunately, but has all the usual export targets: pdf/jpeg/png/svg. Even experimental support for VSDX, which I think is viseo? 26 | * I can also send you a link which entirely encodes the diagram[1] so others can copy/edit it. It's not the live sharing/collaboration thing which google docs does so well, but it's a good alternative. 27 | 28 | * LibreOffice Draw 29 | * Free, glue dots, connectors and SVG export. 30 | 31 | 32 | 33 | * Google Docs Drawing??? 34 | * Be refers to it 35 | * IIRC I have tried, but was unhappy. Probably because of off-line behavior, lack of, but I don't remember exactly. -------------------------------------------------------------------------------- /Archive/wiki/SourceDest-to-support-Exception-Transparency.asciidoc: -------------------------------------------------------------------------------- 1 | === *_Source/dest_* to support *_exception transparency_* 2 | 3 | This instruction family is *_restartable after partial 4 | completion_*. E.g. on an exception such as a page fault or debug 5 | address breakpoint the output register RD is set to the data address 6 | of the exception, and since the instruction is *_source/dest_*, with 7 | the register numbers in RD and RS1 required to be the same, returning 8 | from the exception to the CMO.UR instruction will pick up execution 9 | where it left off. 10 | 11 | [NOTE] 12 | .Rationale: source/dest by requiring RD=RS1 13 | ==== 14 | This proposal has chosen to implement *_source/dest_* by 15 | requiring separate register fields RD and RS1 to contain the same 16 | value. An alternative was to make register field RD both an input and 17 | an output, allowing RS1 and RS2 to be used for other inputs. Separate 18 | RD=RS1 source/dest is more natural for a RISC instruction decoder, and 19 | detecting RD=RS1 has already been performed for other RISC-V 20 | instructions, e.g. in the V extension. However separate RD=RS1 21 | "wastes" instruction encodings by making RD!=RS1 illegal, and leaves 22 | no register free in the CMO.VAR instruction format for any 3rd operand such as the CMO type, hence 23 | requiring . in the instruction encoding. 24 | 25 | TBD: see *_who cares about RD=RS1 source/dest?_* 26 | ==== -------------------------------------------------------------------------------- /Archive/wiki/TOC---Table-of-Contents.md: -------------------------------------------------------------------------------- 1 | The github wiki apparently has online macros such as TOC (Table Of Contents) disabled. 2 | 3 | That's okay, I've written TOCs for other wikis that operate offline. Not yet using here. 4 | 5 | This just a baby first step: links extracted. 6 | 7 | * TBD: hand edit to logical structure. 8 | * TBD: delete non-TOC links 9 | * TBD: collect non-linked / non-TOC'ed pages 10 | 11 | === 12 | 13 | Issues 14 | 15 | Dang, no easy crosslinking between Github wiki and issues. 16 | 17 | TBD: auto generate issue summaries 18 | 19 | * not for all, but for important pending issues 20 | * TBD: mark in the issue database? 21 | 22 | 23 | Official GitHub repo issues: 24 | 25 | * https://github.com/riscv/riscv-CMOs/issues/2 26 | 27 | Original/personal issues: 28 | 29 | * https://github.com/AndyGlew/Ri5-stuff/issues/2 30 | * Verify that the recommended [[partial instruction completion loop constructs]] for CMOs operate correctly if optional prefetches or hints are treated as NOPs. #2 31 | # TBD: migrate pending issues from personal to official CMOs depository 32 | 33 | === 34 | 35 | TBD: this is badly out of datae - so out of date, it should be abandoned if the scripts are not rerun soon. 2020-08-12 36 | 37 | * [[An-even-quicker-and-dirtier-summary-of-proposed-instruction-encodings-for-RISC-V-CMOs]] 38 | * [[Fixed Block Size Prefetches and CMOs]] 39 | * [[Block-memory-operations:-such-as-MEMSET-and-MEMCOPY]] 40 | * [[C library block memory operations]] 41 | * [[exception transparency for instructions with partial completion]] 42 | * [[partial completion]] 43 | * [[CMO-goals]] 44 | * [[CMOs-(Cache-Management-Operations)]] 45 | * [[An even quicker and dirtier summary of proposed instruction encodings for RISC-V CMOs]] 46 | * [[CMO-types issue]] 47 | * [[Consensus Work in Progress]] 48 | * [[Fixed Block Size Prefetches and CMOs]] 49 | * [[Instruction Name Choice]] 50 | * [[Non-Address Based CMOs for Abstraction and Efficiency]] 51 | * [[Overview of CMO operations]] 52 | * [[Quick and Dirty Proposal for RISC-V CMOs]] 53 | * [[STATUS: almost done? - maybe]] 54 | * [[Terminology for instructions that manage microarchitecture state such as caches, prefetchers and predictors]] 55 | * [[Variable Address Range CMOs]] 56 | * [[CMOs-Not-Based-on-Memory-Address]] 57 | * [[bulk invalidate]] 58 | * [[CMOs-proportional-to-cache-size-rather-than-address-range]] 59 | * [[CMOs based on cache microarchitecture]] 60 | * [[Extended-CMO-types]] 61 | * [[Fixed-Block-Size-Prefetches-and-CMOs]] 62 | * [[An even quicker and dirtier summary of proposed instruction encodings for RISC-V CMOs]] 63 | * [[Extended CMO Type]] 64 | * [[Mnemonics and Names]] 65 | * [[STATUS: almost done - maybe]] 66 | * [[Home]] 67 | * [[CMOs (Cache Management Operations)]] 68 | * [[Quantization, dequantization, and interpolation instructions for DL, math, etc.]] 69 | * [[Sharing Drawings and Diagrams]] 70 | * [[hack-relative-URLs-in-github-project-wiki-repo]] 71 | * [[ISSUE:-process-migration-argues-for-whole-cache-invalidation-operations-and-against-the-partial-progress-loop-construct]] 72 | * [[CMO.UR loop construct]] 73 | * [[parent page||Non-Address-Based-CMOs-for-Abstraction-and-Efficiency]] 74 | * [[Instructions-that-Support-Partial-Progress]] 75 | * [[Mandatory-versus-Optional-CMOs,-PREFETCHES,-and-CPHs]] 76 | * [[]] 77 | * [[Non-Address-Based-CMOs-for-Abstraction-and-Efficiency]] 78 | * [[(see issue)|ISSUE: process migration argues for whole cache invalidation operations and against the partial progress loop construct]] 79 | * [[CMO UR descriptor operand]] 80 | * [[CMO UR index]] 81 | * [[CMO descriptor]] 82 | * [[CMO loop construct]] 83 | * [[CMO.UR loop construct]] 84 | * [[Instructions that Support Partial Progress]] 85 | * [[RISC-V hardware error reporting]] 86 | * [[machine check]] 87 | * [[system description such as CPUID or config string]] 88 | * [[trap or use for other instructions]] 89 | * [[Overview-of-CMO-operations]] 90 | * [[Survey of CMOs in Modern Computer Architectures]] 91 | * [[Quantization,-dequantization,-and-interpolation-instructions--for-DL,-math,-etc.]] 92 | * [[multipart instruction approach]] 93 | * [[Quick-and-Dirty-Proposal-for-RISC-V-CMOs]] 94 | * [[|cmo_type CMO instruction flavor]] 95 | * [[|Virtual or Physical CMO instruction flavor]] 96 | * [[CMO goals]] 97 | * [[CMO variable address range alternatives]] 98 | * [[CMOs Not Based on Memory Address]] 99 | * [[CMOs based on cache microarchitecture]] 100 | * [[CMOs proportional to cache size rather than address range]] 101 | * [[Instantaneous Flushes of Predictor and Cache State]] 102 | * [[Transparent Resumeability Prefers SrcDst Register Operands]] 103 | * [[full memory addressing mode rs1+imm12 for prefetches and CMOs]] 104 | * [[STATUS:-almost-done - maybe]] 105 | * [[Proposal Status Tags]] 106 | * [[STATUS: almost done - maybe]] 107 | * [[Sharing-Drawings-and-Diagrams]] 108 | * [[Some-Page]] 109 | * [[Terminology-for-instructions-that-manage-microarchitecture-state-such-as-caches,-prefetchers-and-predictors]] 110 | * [[At the time of writing pages in this document (wiki)]] 111 | * [[BTB]] 112 | * [[CMOs (Cache Management Operations)]] 113 | * [[CMOs-(Cache-Management-Operations)]] 114 | * [[CPH (Cache Performance Hints)]] 115 | * [[CPH (Cache Performance Hints) instructions]] 116 | * [[HWDS (Hardware Data Structure)]] 117 | * [[List of optional versus mandatory CMOs]] 118 | * [[Mandatory versus Optional CMOs, PREFETCHES, and CPHs]] 119 | * [[PREFETCH instructions]] 120 | * [[Performance Related Hardware Data Structure]] 121 | * [[Performance Related Hardware Data Structures]] 122 | * [[TLB]] 123 | * [[at the time of writing]] 124 | * [[branch predictor stew|https://patents.google.com/patent/US7143273B2/en]] 125 | * [[cache push out or post-store]] 126 | * [[clean or flush]] 127 | * [[optional prefetches or post-store]] 128 | * [[pHWDS]] 129 | * [[partial instruction completion loop constructs]] 130 | * [[partial instruction completion|Instructions-that-Support-Partial-Progress]] 131 | * [[Virtual-or-Physical-CMO-instruction-flavor]] 132 | * [[cmo_type-CMO-instruction-flavor]] 133 | * [[|Extended CMO types]] 134 | * [[Point of Long Term Persistence]] 135 | * [[Point of Short Term Persistence]] 136 | * [[Point of Unification]] 137 | * [[Survey of CMOs in Modern Computer Architectures]] 138 | * [[hack-relative-URLs-in-github-project-wiki-repo]] 139 | * [[..]] 140 | * [[../../wiki]] 141 | -------------------------------------------------------------------------------- /Archive/wiki/Virtual-or-Physical-CMO-instruction-flavor.md: -------------------------------------------------------------------------------- 1 | Both the variable range and fixed size CMO instructions specify an address, or a range of addresses. 2 | -------------------------------------------------------------------------------- /Archive/wiki/Why-CMOs.xlsx-was-written-in-Excel.md: -------------------------------------------------------------------------------- 1 | CMOs.xlsx, in GitHub repo at https://github.com/riscv/riscv-CMOs/blob/master/CMOs.xlsx 2 | is a "list" of CMOs. Not exactly a list, more like a table from which the actual list can be generated. 3 | 4 | I am in bit embarrassed that I wrote this in Excel, since I prefer to use open tools, easily parseable by scripts etc. 5 | 6 | The main reason I wrote this in Excel is that I wanted arrows in my spreadsheet stuck to cells of the spreadsheet. Similarly diagrams. 7 | 8 | (Oh, f***: I would have pasted a screen-snip of the diagrams here. But this wiki does not support pasting of images, instead you have to save the image as a separate file. This is another shortcoming of a tool that would make me like to switch to something more capable.) 9 | 10 | Google Sheets does not seem to have this feature. In Google sheets you can draw arrows etc. on top of your spreadsheet, but if the cells are resized the arrow stayed the same size and you have to manually adjust them. 11 | 12 | TBD: can OpenOffice provide this feature? I have not yet tried. 13 | 14 | 15 | Similarly, I rely on the ability to collapse groups of rows and/or columns. 16 | 17 | I would actually like to use pivot charts for this, but Excel pivot charts are insufficiently powerful. 18 | -------------------------------------------------------------------------------- /Archive/wiki/Wiki-and-Repo-crosslink-issues.md: -------------------------------------------------------------------------------- 1 | Relative links can cross-link between GitHub wikis and repo on GitHub itself. 2 | 3 | However, when cloned, the wiki and the repo are different git archives. If care is taken the relative cross-links will work. I have made the wiki a submodule of the repo, so that if the repo is cloned recursively the links should still worl=k. 4 | 5 | The wiki links will nearly always work, whether on GitHub itself or in a clone, but the links from wiki to repo (and back again) may not always work if cloned without taking the nesting into account. -------------------------------------------------------------------------------- /Archive/wiki/_Footer.md: -------------------------------------------------------------------------------- 1 | Jump to: [wiki](Home), [TOC](TOC---Table-of-Contents), [repo](..) 2 | -------------------------------------------------------------------------------- /Archive/wiki/_Sidebar.md: -------------------------------------------------------------------------------- 1 | | [This wiki](./Home), [search](../search?q=ENTER-QUERY-HERE&type=wikis)
[repo](..), [issues](../issues)
| 2 | |---| 3 | | [TOC](TOC---Table-of-Contents) | 4 | | GH abs | 5 | |[[Wiki and Repo crosslink issues]]| 6 | | [Edit this Sidebar](_Sidebar/_edit) | 7 | | [Generic Sidebar](https://github.com/AndyGlew/Ag-stuff/wiki/Ag-Common-Sidebar-and-Footer) | 8 | -------------------------------------------------------------------------------- /Archive/wiki/cmo_type-CMO-instruction-flavor.md: -------------------------------------------------------------------------------- 1 | There are more possible configurations of caches than are conceived of by computer architecture textbooks. E.g. number of I/D/unified levels, inclusive/exclusive/neither, virtual/physical, etc. Let alone prefetchers, victim choosers (LRU), etc. 2 | 3 | In general portable software does not want to need to know about cache microarchitecture details. 4 | But sometimes software needs to, or benefits from, knowing microarchitecture details. 5 | * performance: 6 | * "I probably don't need this data again, so you might as well get it out of the cache" 7 | * vs "this loop nest has one operand that fits in a 32K L1$, and one that doesn't fit in a 4MB cache, 8 | so use LRU for the first and MRU for the second" 9 | * power management: 10 | * "I am putting the whole system to sleep and will not be retaining data in the caches, so flush all data to battery backed up DRAM" 11 | * vs "I am powering off CPU core #1 but not #2, so flush all dirty data in the caches that will be powered off" 12 | * vs "I happen to know that I can power off the L2$ and still operate the L1$ and the L3$, so do that" 13 | * security 14 | * "flush/invalidate/reset all possible microarchitecture state that might be a timing channel" 15 | * vs "I am using way partitioning to isolate users in the large L2$, so flush the L1$ completely but do not topuch the L2$" 16 | * SW coherence 17 | * flush/invalidate all caches between me and DRAM" 18 | * vs "I am only trying to synchronize with threads/processes running on other harts/CPUs with which I share an L3$, so flush/invalidate the L1$, L2$, and everything all the way to the L3$, but don't flush the L3$ or L4$. 19 | * how might SW know this? Not on a general purpose OS with process migration. But perhaps in an embedded/HPC system, or via processor affinity. 20 | 21 | Many more examples are not just possible, but have been built in the real world, requested of CPU vendors, or proposed by academics. 22 | 23 | 24 | This proposal does NOT try to comprehend or represent all possible such CMO types. 25 | 26 | 27 | This proposal places a small number of such possibilities in the instruction encoding. 28 | 29 | WARNING: terminology confusion: Intel and IBM define "flush" oppositely. In Intel x86, "flush" means "evict dirty data", maybe/maybe not leaving clean data behind. In IBM POWER, "flush" means invalidate data without writing it back. What Intel calls a flush IBM calls a clean. What IBM calls a flush Intel calls an invalidate. TBD: what terminology should RISC-V use? Until determined, I will write out verbosely 30 | 31 | * Flush - write out dirty data 32 | * what is left behind 33 | * leaving clean data behind, e.g. in S state 34 | * leaving invalid cache lines behind 35 | * depth 36 | * to "[[Point of Unification]]" 37 | * to DRAM 38 | * to battery backed up DRAM 39 | * to non-volatile storage (NVRAM) 40 | * which: data and/or instruction [see note 2] 41 | * Prefetch [see note 3] 42 | * prefetch type 43 | * prefetch data to read 44 | * prefetch data to write 45 | * prefetch instructions 46 | * prefetch bias 47 | * place in LRU, i.e. expect temporal locality 48 | * place in MRU, i.e. expect non-temporal locality 49 | 50 | Even the list above expands to 2*4*2 + 4*2 = 24 possibilities. Probably more that we want to spend opcode space on. 51 | 52 | Enumerating by priority 53 | 1. D, writeout dirty, leave clean behind, to [[Point of Unification]] 54 | * use: performance 55 | 1. D, writeout dirty, invalidate all, to [[Point of Unification]] 56 | * use: SW coherence 57 | 1. D, writeout dirty, leave clean behind, to [[Point of Long Term Persistence]] (NVRAM) 58 | * use: persistence 59 | 1. D, writeout dirty, leave clean behind, to [[Point of Short Term Persistence]] (e.g. battery backed up DRAM in a phone) 60 | * use: power management 61 | 62 | 1. Prefetch D to read, LRU 63 | 1. Prefetch D to write, LRU 64 | 1. Prefetch D to read, MRU 65 | 1. Prefetch I, LRU 66 | 67 | ==> 8 encodings. 68 | 69 | Actually, I would prefer to have 1 or 2 less than a power of two in-instruction encodings. 70 | 71 | In general, for all of the that cannot pe represented in that small set, I propose to reserve encodings and/or instruction formats for [[|Extended CMO types]] 72 | 73 | 74 | 75 | 76 | 77 | TBD: compare to a [[Survey of CMOs in Modern Computer Architectures]] 78 | 79 | Note 1: in this small in-instruction-encoding set we are NOT including destructive and security damaging operations like "invalidate cache line even if dirty", as in Intel's INVD instruction or IBM's DCBA. Nor are we including operations like "allocate zero filled cache line without read-for-ownership", as in IBM's DCBZ, which are secure, but which may expose the cache line size. (However, I expect that customers will strongly request DCBZ, so I consider it wise to reserve instruction encoding space.) 80 | 81 | Note 2: the EXPORT.I instruction proposed by the J extension WG essentially is equivalent to 82 | CMO.VAR.VA.. 83 | in general, this CMO proposal defers to that EXPORT.I proposal, and will not provide any instruction related CMOs. I am listing them here only to ensure coverage. 84 | 85 | Note 3: it is TBD whether cache prefetches will be part of the CMO proposal. Prefetch instructions usually want to have addressing modes comparable to normal memory reference instructions, e.g. Memory[reg+offset], where the prefetch offset is increased by a fetch-ahead delta. Therefore, if prefetches are included, the CMO.FSZ.* format should be extended to have a memory addressing mode. There may not be enough instruction encoding space in ILEN=32 to allow this. For that matter, certain 86 | -------------------------------------------------------------------------------- /Archive/wiki/draft-CMO-domains-and-levels.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | === CMO memory hierarchy domains and levels .. 3 | 4 | The . property specified the domains and levels involved in CMO operations. 5 | 6 | "Domains" refers to CMOs that flush data from not just one cache, but from severral layers of cache. 7 | Sometimes by flushing an outer inclusive layer. 8 | Sometimes by traversing multiple levels. 9 | 10 | Actual implementations may have many idiosyncratic caches and other parts of the memory hierarchy. 11 | 12 | There should bne a standard RISC-V way to flush such non-standard implementation specific cache levels, but that is not part of this proposal. 13 | 14 | Instead this proposal defines a small(?) number of abstract cache layers. Implementation cache layers will be mapped onto these layers. 15 | 16 | These pseudo-abstract layers are 17 | 18 | Cache levels and domains 19 | 20 | * POC(I,D) 21 | ** the Point of Consistency for Instructions and Data, for the common case of inconsistent instruction and data caches 22 | ** ARM calls this the Point of Unification 23 | * The POC(ID) defibnes two domains that may need to be flushed 24 | ** I-->POC(ID) - the path from processor through I$ to the Point of ID consistency 25 | ** D-->POC(ID) - the path from processor through D$ to the Point of ID consistency 26 | 27 | * POC(D*), domain P*-->POC(D*) 28 | ** the path from any or all of a set of processors to the common level for all processors in that set. 29 | ** ARM calls this the Point of Inner Comsistency 30 | ** assumed cache coherent in this domain 31 | ** used for performance optimizations, not correctness 32 | 33 | * POC(Unc), domain P*-->POC(Unc) 34 | ** the path from any or all of a set of non-cache-coherent processors to a common point 35 | ** SW managed consistency works if this domain is flushed to POC(Unc) 36 | 37 | * POC(Uio), domain P*-->POC(Uio) 38 | ** the path from any or all of a set of non-cache-coherent processors to a point in common with non-coherent I/O 39 | ** SW managed consistency for I/O devices works if this domain is flushed to POC(Uio) 40 | 41 | Frequently, POC(Unc), POC(Uio) are identical. Frequently, POC(Unc), POC(Uio) are DRAM. But not always, therefore distinguished. 42 | 43 | Memory, Volatile and Non-Volatile 44 | 45 | * M, domain P*-->M 46 | ** memory, eg DRAM 47 | ** not necessarily battery backed up 48 | 49 | * BM, domain P*-->BM 50 | ** memory that survives power removal from system parts such as harts 51 | ** frequently the same as main memory, bit not always. May be a subset. 52 | 53 | * NV1, domain P*-->MN/BM-->NV 54 | ** memory that survives even when batteries fail 55 | *** i.e,. last years, not days 56 | * NVR, domain P*-->MN/BM-->NV-->NVR 57 | ** a;;, last, or redundant/reliable level of nonvolatile memory 58 | ** memory that tolerates failures of other NV1 components 59 | *** i.e,. last years, not days 60 | 61 | 62 | *_Count_*: 9 - 4 bits 63 | 64 | Unfortunately, would like local/global flavors of the above. => 5 bits => exceeds 256 emncodimngs. 65 | 66 | So need to compress more. 67 | 68 | TBD... 69 | -------------------------------------------------------------------------------- /Archive/wiki/draft-CMO-issues.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | # Other issues for this CMO Proposal 3 | 4 | [NOTE] 5 | ==== 6 | Discussion, issues, and rationale, have been embedded in this in such NOTE sections, interleaved with normative text. 7 | 8 | This section serves to capture such issues that did not naturally get interleaved elsewhere. 9 | ==== 10 | 11 | [NOTE] 12 | .Extensibility Limitation: Non-address tag specific invalidations 13 | ==== 14 | Many computer systems have "special tags" - non-address tags - in their caches - e.g. security domains - and want selective cache invalidations and flushes for such special-tags. 15 | 16 | The instruction format in this current CMO proposal cannot be extended to do this. The address range CMO.AR already uses all three register fields in the standard RISC-V R format, so there is no free register operand to specify the special-tag. The microarchitecture index CMO.UR only uses 2 register fields, but it encoding is packed such that CMO.UR = CMO.AR with rs2=x0, so again there is no free register operand specify the special-tag. 17 | 18 | This is acceptable for use case of security information leak mitigation, which requires the entire cache to be invalidated or flushed. 19 | 20 | But there are other use cases which can benefit from selective special-tag invalidations. In particular, when the special-tag is being recycled, when it was used for an old process that is no longer running, and is needed for a new process. 21 | 22 | Also, it seems natural to extend this CMO proposal to TLB invalidation, but it is quite common in computer instruction sets to provide PID or ASID or VMID specific invalidations. Not just when recycling such a special-tag, but also when translations are changed. 23 | ==== 24 | 25 | [NOTE] 26 | .CPU hardware may not be aware of system configuration 27 | ==== 28 | Operations such as "flush to the point of I/O coherence" 29 | are dependent not on CPU microarchitecture but on system architecture. 30 | E.g. the point of I/O coherence may be DRAM, or it may be a last level cache, if the I/O device can do cache line injection. 31 | Indeed, the point of I/O coherence may be different for different devices in the same system. 32 | SW may only want to do the minimum necessary for the device it is working with. 33 | There is no provision in this CMO proposal for that. 34 | 35 | Similarly, cache flushes for security related information channel mitigation 36 | may in general need to flush all cache levels, L1-L2-L3 37 | (or at least up to the cache level where the bandwidth of the channels is acceptably low). 38 | However, in other situations some of the outer cache levels may be partitioned and not require flushing, e.g. by cache ways. 39 | 40 | Exactly which levels of cache need to be flushed for any particular operation is not known to the CPU, may be system hardware dependent, but may also be system software dependent. 41 | 42 | In general, what CMO should be used, .>., should be mapped from abstract CMO concepts to which caches actually must be involved. 43 | There is no provision in this memo for such mapping in this proposal, except for trapping and emulating by M-mode. 44 | 45 | Realistically this will probably mean that the abstract CMO operations in this proposal are useless. Programmers will need to figure out which caches actually get modified by any of the instructions, and will probably ignore the abstractions. 46 | This is no better than the current state of the art. 47 | ==== 48 | -------------------------------------------------------------------------------- /Archive/wiki/draft-CMO-type-spreadsheet.asciidoc: -------------------------------------------------------------------------------- 1 | === CMO type spreadsheet 2 | 3 | A spreadsheet CMOs.xlsx presents the desired CMO types in a format 4 | more convenient than this asciidoc text. 5 | 6 | This spreadsheet is available on GitHub at https://github.com/riscv/riscv-CMOs/blob/master/CMOs.xlsx 7 | 8 | TBD: ensure that the latest version of the spreadsheet has been uploaded. As of <2020-06-11 Thursday, June 11, WW24> the version online is dated April 30th. 9 | -------------------------------------------------------------------------------- /Archive/wiki/draft-actual-CMO-operations.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | === Actual CMO operations .. 3 | 4 | ==== Actual CMO operations- flushes and prefetches, etc. 5 | 6 | This proposal includes the following actual CMO operations. Short names are listed here - more cvomplete deascriptions in a section below. 7 | 8 | * Traditional CMOs: CLEAN, FLUSH, INVALIDATE-I$, DISCARD 9 | 10 | * Less Common: INVALIDATE-CLEAN, SET-LRU, LOCK-LINE. 11 | 12 | Space should be reserved for more operations, included SAFER_DISCARD_1 and SAFER_DISCARD_2, that remedy the security deficiences of the DISCARD operation (the well known PowerPC DCBA) while preserving much of the performance advantage. 13 | 14 | In addition to these CBOs that perform various forms of flushes and invalidates, 15 | this proposal includes operations that are often not called CMOs. 16 | 17 | * Prefetches: PREFETCH-R, PREFETCH-EW, PREFETCH-X - using the variable address range approach. 18 | 19 | * Destructive: ZALLOC - allocate a zero-filled-cache line. 20 | 21 | Some have requested locking versions: ZALLOC-and-LOCK, and FETCH-R/W/X-and-LOCK. 22 | 23 | *_COUNT_*: 13 encodings: 4 bits. 24 | 25 | ==== Security / Timing Channel Bit 26 | 27 | Requirement: in addition to flushing caches, it is also required, for 28 | timing channel mitigation such as in Spectre, to flush 29 | microarchitecture mechanisms that can provide timing channekls, such as 30 | LRU bits, predictors and prefetchers. Some of these are associated 31 | with cache entries - hence the security/timing channel "bit". 32 | Not actually a bit - applied only to 2 CMOs. 33 | 34 | The security property is applied to the CMO.UR variants that leave no data behind: 35 | FLUSH and INVALIDATE. 36 | 37 | This increases the *_COUNT_* to 15 encodings: 4 bits. 38 | 39 | ==== Detailed description of CMO operations 40 | 41 | Unfortunately, there is no widespread agreement as to what CMO names should be. It is therefore necessary to define their behavior more completely according to cache states. 42 | 43 | Without loss of generality we will mention only tywo cache states, 44 | Clean and Dirty, relevant to writeback caches. Writethrough and 45 | instruction caches contain only clean data, so may map to more than 46 | one operation that handles dirty data. 47 | 48 | Traditi0nal CMOs 49 | 50 | * CLEAN 51 | ** Dirty-->WB-->Clean 52 | ** Clean-->Clean 53 | * FLUSH 54 | ** Dirty-->WB-->Invalid 55 | ** Clean-->Invalid 56 | ** Alternate names 57 | ** Intel calls this WBINVD 58 | ** Special considerations: security/timing channel variant for CMO.UR 59 | * DISCARD 60 | ** Dirty-->no WB-->Invalid 61 | ** Clean-->Invalid 62 | ** Alternate names 63 | ** Intel calls this INVD 64 | ** Special considerations: 65 | *** security/timing channel variant for CMO.UR 66 | *** security hole 67 | **** there are several safedr variants of DISCARD, reserving space for bit not actually part of this proposal 68 | * DISCARD-CLEAN 69 | ** Dirty-->unaffected 70 | ** Clean-->Invalid 71 | ** Special considerations: 72 | *** can be used in some incoherehnt I/O use cases 73 | *** remedies the security problems of DISCARD - safe for user mode 74 | * SET-LRU 75 | ** CMO.VAR only 76 | ** most useful special case of the class of replacement algorithm manipulation CMOs 77 | 78 | Operations not typically considered CMOs: 79 | 80 | * PREFETCH-R 81 | * PREFETCH-W 82 | ** prefetches in exclusive clean or dirty state - ready for writes with least possible expense 83 | * PREFETCH-X 84 | ** prefetch code, to execute 85 | ** like PREFETCH-R, except targetting I$ level(s) 86 | 87 | Destructive 88 | 89 | * ZALLOC 90 | ** allocate cache line with reading - zero filling 91 | ** PowerPC DCBZ 92 | * ALLOC 93 | ** allocate cache line with reading - using whatever was there before 94 | ** security hole - but still sometimes used 95 | ** PowerPC DCBA 96 | 97 | Locking variants of the above 98 | * FETCH-R-and-LOCK 99 | * FETCH-W-and-LOCK 100 | * FETCH-X-and-LOCK 101 | * ZALLOC-and-LOCK 102 | * ALLOC-and-LOCK 103 | 104 | 105 | *_Count*_: 15 operations - 4 bits 106 | -------------------------------------------------------------------------------- /Archive/wiki/draft-microarchitecture-timing-state-flushes.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | === Flushes of Microarchitecture State that Affects Timing Channels 3 | 4 | Requirement: *_all_* microarchitecture state that influences timing, such as predictors, prefetchers, cache LRU bits, etc., 5 | should be invalidated by the most global CMO.UR.ALL.TC instruction, i.e. with the timing_channel enabled property indicated by the .. 6 | 7 | It is expected that subsets of such microarchitecture state will be associated with other CMO.UR.*.timing_channel instructions. 8 | 9 | NOTE: E.g. the instruction cache invalidation CMO.UR.I.TC may invalidate simple branch predictors, 10 | but not the L2 cache LRU bits. 11 | Which microarchitecture timing state is associated with which CMO.UR.* instructions is implementation dependent. 12 | There should be a way to discover such associations, but that is not part of this proposal. 13 | 14 | 15 | 16 | The phrasing "all microarchitecture timing state ... should be invalidated" 17 | is defined to mean "within the implementation dependent security model of an implementation". 18 | Some implementations may not invalidate any microarchitecture state. 19 | and should therefore be considered insecure for use cases that involve untrusted users. 20 | Other implementations may invalidate some but not all. 21 | These limitations should be documented so that users can determine if an implementation is suitable for their security requirements. 22 | Such documentation is not part of this proposal. 23 | 24 | Permission: CMO.UR.* without the TC property may invalidate such microarchitectures timing channel state. I.e. it is permitted to be more conservative than is required. 25 | 26 | TIP: however, it is expected that use cases such as software managed cache coherency will require invalidating caches, but will not require invalidating timing state, so performance would benefit by distinguishing CMO.*.TC=1 from CMO.*.TC=0. 27 | 28 | Permission: CMO.VAR.* instructions, i.e. memory address range based instructions, may invalidate microarchitecture timing state, but are not required to do so. 29 | 30 | NOTE: ISSUE: should we provide orthogonal encodings CMO.VAR.*.TC (currently proposed), or should we save encoding space by not providing them? 31 | 32 | Requirement: either the CMO.*.TC instructions unconditionally trap, or the <> is implemented, allowing system software to enforce trapping if desired. 33 | 34 | NOTE: There is no requirement to unconditionally trap unimplemented 35 | CMO.*.TC instructions, even on implementations that do not make any attempt to 36 | invalidate icroarchitecture timing state. This allows code that uses CMO.*.TC 37 | to run portably on such systems. 38 | But such code on such systems is only secure if the system makes guarantees such as not having entrusted users. 39 | System software such as an OS is encouraged to use the <> to trap such instructions when the guarantee is not met. 40 | 41 | 42 | 43 | 44 | 45 | 46 | [NOTE] 47 | ==== 48 | Microarchitecture timing channels data structures 49 | are inherently implementation dependent. 50 | 51 | Some of these structures can be "instantaneously" invalidated, i.e. in O(1) time, not proportional to size or number of elements. 52 | 53 | However, some of these structures cannot be instantaneously invalidated, and must be scanned or iterated over. 54 | 55 | Different implementations may implement conceptually similar structures in either way. 56 | E.g. a branch predictor might be O(1) invalidated inside the CPU; 57 | but some components of some branch predictors are implemented outside the CPU and must be scanned 58 | e.g. several companies have placed branch predictor information in the L2 cache. 59 | 60 | Some of these structures, such as LRU bits and some large branch 61 | predictors, are associated with memory addresses, and are invalidated 62 | by the CMO.* range instructions when the appropriate bit in 63 | the . funct7 is set, aka the "security" bit 64 | 65 | Some of these mechanisms are not naturally associated with caches explicitly managed by the CMO.* instructions' . 66 | E.g. while it might be reasonable to associate fully tagged BTBs with branch addresses in memory, 67 | branch predictor pattern history tables (PHTs) are usually hashed and have no tags. 68 | 69 | Nevertheless, it is required that CMO.UR.ALL.TC will invalidate all microarchitecture timing channels state, 70 | ranging from branch predictors inside the CPU to LRU bits in external caches. 71 | ==== 72 | 73 | [NOTE] 74 | ==== 75 | ISSUE: this proposal does not provide any ability to invalidate microarchitecture timing state such as branch predictors 76 | independent of the instruction cache, or some other cache. Should it? 77 | 78 | CMO.UR.*.TC invalidations of microarchitecture timing state 79 | are required to mitigate timing channels for security - e.g. to mitigate security flaws such as Spectre. 80 | They are occasionally also desired to improved reproduceability of benchmarks and tests. 81 | 82 | As far as we know, security timing channel nearly always requires invalidating caches - instruction and data cache timing channels are ubiquitous. 83 | such caches need not be invalidated for timing channels mitigation only where (a) there are no caches, or (b) the capacitors are strictly partitioned. 84 | Therefore, for security, it seems reasonable to always couple branch predictor invalidation to cache invalidation/flushing. 85 | 86 | Non-security purposes, such as testability and benchmarking, may prefer not to invalidate microarchitecture timing state, but that is not part of this proposal. 87 | ==== 88 | 89 | 90 | 91 | 92 | [WARNING] 93 | ==== 94 | Unfortunately, in many implementations CMO.UR.ALL.TC cannot guarantee that all microarchitecture timing channels state has been invalidated, 95 | for the same reasons that CMO.UR.* cannot guarantee that a cache is entirely invalid after the instruction. 96 | Except for strictly inclusive caches. 97 | In the presence of non strictly inclusive caches, 98 | e.g. exclusive L1/L2 cache hierarchies 99 | a CMO.UR.* a line may be in the L2 cache when the L1 cache is scanned, 100 | but may migrate to the L1 cache before the set it resides in is scanned in the L2 cache. 101 | Such behavior is implementation dependent. 102 | Implementations may provide special cache modes such as "no fill cache mode" 103 | that permit complete invalidation to be guaranteed, 104 | but such modes typically are not allowed to user mode. 105 | 106 | The conditions in which CMO.UR.*.TC can guarantee complete invalidation must be documented, 107 | and should be discoverable, although such discovery mechanisms are not part of this proposal. 108 | ==== 109 | -------------------------------------------------------------------------------- /Archive/wiki/files/Ri5-CMOs-proposal.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/Archive/wiki/files/Ri5-CMOs-proposal.pdf -------------------------------------------------------------------------------- /Archive/wiki/generated-HTML-and-PDF-for-CMOs-proposal.md: -------------------------------------------------------------------------------- 1 | Asciidoctor [[Ri5-CMOs-proposal]] is used to generate HTML and PDF 2 | from .asciidoc source files which are pages on this wiki. 3 | 4 | The generated files can be found in the parent project repo: 5 | * [HTML-rendered](https://htmlpreview.github.io/?https://github.com/riscv/riscv-CMOs/blob/master/Ri5-CMOs-proposal.html) - as rendered by the htmlpreview proxy 6 | * [HTML-source](https://github.com/riscv/riscv-CMOs/blob/master/Ri5-CMOs-proposal.html) - GitHub renders as plain text if not proxied 7 | * [PDF](https://github.com/riscv/riscv-CMOs/blob/master/Ri5-CMOs-proposal.pdf) 8 | * PDF displays 9 | 10 | It would be better to have the generated HTML and PDF on the wiki 11 | because that's where it belongs, since generated from wiki. 12 | Unfortunately HTML and PDF do not display properly in a GitHub wiki. 13 | Raw HTML displayed as text, not rendered; PDF downloads. 14 | In the product repo, since HTML and PDF are displayed there. 15 | 16 | Because the wiki and project have separate git repos, they may not match, 17 | i.e. the repo HTML and PDF may be stale. 18 | 19 | Even in the wiki the HTML and PDF may be out of date, since scripts must be run to generate. 20 | But more likely to be consistent. -------------------------------------------------------------------------------- /Archive/wiki/hack-relative-URLs-in-github-project-wiki-repo.md: -------------------------------------------------------------------------------- 1 | This github project has at least two git repos, the main repo and the wiki repo. 2 | * main: https://github.com/riscv/riscv-CMOs.git 3 | * wiki https://github.com/riscv/riscv-CMOs.wiki.git 4 | 5 | I want to use relative links between the workspaces that ordinarily correspond to these repos, 6 | * both on the github website and when I have cloned elsewhere 7 | * e.g. so that I can clone both together and be consistent 8 | * TBD: exact (automated) procedure to clone both repos and stay relatively consistent 9 | 10 | Markdown syntax relative links fail: 11 | * [[..]] - \[[..]] 12 | * [[../../wiki]] - \[[../../wiki]] 13 | 14 | Trying HTML relative links: 15 | * FAIL: href="wiki" relative - \href="wiki" relative]\ 16 | * fails because it resolves to https://github.com/riscv/riscv-CMOs/blob/master/wiki, 17 | * i.e. the relative position is not https://github.com/riscv/riscv-CMOs but is blob/master/wiki 18 | * which tells us what we need to know 19 | * href="." relative underneath that. 20 | * href=".." 21 | * href="../.." relative 22 | * href="../blob/master/hack-relative-URLs-in-github-project-main-repo.md" 23 | * https:../blob/master/hack-relative-URLs-in-github-project-main-repo.md 24 | 25 | 26 | I am so used to websites not allowing ascending relative components in URLs that I wonder if there is a security hole here... Should not be as long as cannot actually escape an areas mapped to the logged in user or guest. 27 | 28 | Recording this in two places: 29 | * main: https://github.com/riscv/riscv-CMOs/blob/master/hack-relative-URLs-in-github-project-main-repo.md 30 | * wiki: https://github.com/riscv/riscv-CMOs/wiki/hack-relative-URLs-in-github-project-wiki-repo 31 | TBD: can I CSE this stuff, transclude, to reduce duplication? 32 | 33 | Bottom line: relative links 34 | * from wiki 35 | * to project "root" from wiki: href=".." 36 | * to main from wiki: href="../blob/master/README.md" 37 | * to user "root" from wiki: href="../.." 38 | * from main 39 | * to project "root" from main: href="../.." 40 | * to user "root" from main: href="../../.." 41 | * to wiki from main: href="../../wiki" -------------------------------------------------------------------------------- /Archive/wiki/in-band-tagging-pointers.md: -------------------------------------------------------------------------------- 1 | RISC-V is considering in-band tagging of pointers. Workgroups incvolved include 2 | * J extension (dynamic ;anguages like JavaScript) 3 | * TEE Trusted Execution Environment security 4 | although IMHO (Glew opinion) we are missing a group concerned specifically with preventing 5 | 6 | People involved 7 | * Kostya Serebryany (Google) 8 | * Lee Campbell (Google) 9 | * ... ?? Nvidia ?? ... 10 | * ... ?? Russian group that built TBI and MTE on RISC-V FPGA ??... 11 | 12 | More accurately, RISC-V is considering pre Virtual Address Transformations 13 | since Lee Campbell has proposed a transformation 14 | 15 | ~~~ 16 | address <-- (pointer&mask) | (substitute&~mask) 17 | ~~~ 18 | -------------------------------------------------------------------------------- /Archive/wiki/interception,-modulation,-and-mapping-of-CMOs.md: -------------------------------------------------------------------------------- 1 | See [[Privilege for CMOs]]. This privilege mechanism provides a basic way of trapping CMOs, e.g. to prevent users or guest OSes from performing operations that might be security holes, and also software mapping, e.g. to M-mode which might use idiosyncratic MMIO locations to manage external caches that are not fully integrated with the CPU instruction set or bus transactions. 2 | 3 | I have been unable to persuade people that there is need for a more general mapping mechanism (even though there is). 4 | 5 | Therefore, if you want to do things like 6 | * map user operation "flush all made by this thread to NVRAM persistent storage" 7 | NOT to the "CMO shootdown" operation 8 | * "flush all dirty data from all CPU caches in the coherence domain ..." 9 | to the more efficient 10 | * "flush only writes made on the local hart..." 11 | Because system software knows that there is no hardware cache to cache migration of dirty cache lines, 12 | and no software thread migration between harts/processors 13 | 14 | This ISA provides no such ability to do such mapping cheaply. 15 | 16 | If you want to do this, then you've got to trap and emulate. 17 | -------------------------------------------------------------------------------- /Archive/wiki/issues-wrt-repo-and-wiki-split.md: -------------------------------------------------------------------------------- 1 | * which files go where? 2 | * it might have been better to split before propagating 3 | * too late - now I will just have to make opposite actions in each repo+wiki 4 | 5 | * branch structure / names -- main/master 6 | 7 | -------------------------------------------------------------------------------- /Archive/wiki/microarchitecture-range-loop.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | // The CMO.UR instruction is intended to be used in a software loop such as that below: 3 | 4 | In pseudocode: 5 | 6 | ---- 7 | x11 := 0 8 | LOOP 9 | CMO.UR.<> x11,x11 10 | WHILE X11 > 0 11 | ---- 12 | 13 | In assembly code: 14 | 15 | ---- 16 | ADDI x11,x0,x0 17 | L: CMO.UR.<> x11,x11 18 | BGEZ L 19 | ---- 20 | -------------------------------------------------------------------------------- /Archive/wiki/techpubs-info.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | [appendix] 3 | == Techpubs Information 4 | 5 | === Conventions specific to this document. 6 | 7 | 8 | ==== GitHub wiki markdown \[[links]]` are broken 9 | Bold italic *_links_* indicate text that should be links to pages in the original wiki. 10 | The tools used to generate this document HTML and PDF from asciidoc and markdown 11 | do not handle these links (yet). 12 | 13 | ==== Rationale using AsciiDoctor NOTE admonition 14 | [NOTE] 15 | ==== 16 | _Rationale_ and other background information is indicated by AsciiDoctor NOTE sections such as this. 17 | 18 | TBD: I would really prefer to design an explicit RATIONALE style or admonition, but I do not know how to do this in AsciiDoctor yet. 19 | Therefore, repurposed AsciiDoctor's existing NOTE admonition style. 20 | Unfortunately, this has problems such as section headers not being allowed in the note/rationale text, 21 | and conversely that note/rationale text does not appear in the Table of Contents (TOC). 22 | Therefore, section headers do not appear as these rationale/notes, even if the entire section is rationale, not normative. 23 | 24 | ==== 25 | 26 | === Techpubs Information 27 | 28 | This source document: {docname}{docfilesuffix} 29 | 30 | * docdatetime: {docdatetime} - last modified date and time 31 | ** unfortunately, this is only for the topmost file, NOT across all of the included files 32 | * localdatetime: {localdatetime} - when generated 33 | 34 | 35 | Revisions - manually maintained, frequently obsolete: 36 | 37 | * revdate: {revdate} 38 | * revnumber: {revnumber} 39 | * revremark: {revremark} 40 | 41 | More techpubs information, including history thrashing as to how and where to build and store, on wiki page 42 | link:techpub[] / link:file:techpubs.asciidoc[] 43 | (TBD: fix so that works both checked out as file: links and on GitHub wiki). 44 | 45 | ==== Locations 46 | 47 | You may be reading this in any of several different places: 48 | 49 | ===== On GitHub - Official 50 | 51 | ** This document's source files (mostly asciidoc) on its Github repository wiki: 52 | 53 | *** https://github.com/riscv/riscv-CMOs/wiki 54 | *** top of document: https://github.com/riscv/riscv-CMOs/wiki/{docname} 55 | **** this is a .asciidoc file, rendered by GitHub's wiki 56 | **** asciidoc includes link to other parts of the document 57 | **** the wiki contains other pages, not part of the document, some of which provide more background 58 | 59 | ** Generated HTML and PDF files on GitHub: 60 | 61 | *** https://github.com/riscv/riscv-CMOs/ 62 | **** HTML: https://github.com/riscv/riscv-CMOs/blob/master/Ri5-CMOs-proposal.html 63 | ***** displays raw, does not render 64 | **** PDF: https://github.com/riscv/riscv-CMOs/blob/master/Ri5-CMOs-proposal.pdf 65 | ***** displays - in GitHub's ugly way 66 | *** https://github.com/riscv/riscv-CMOs/wiki 67 | **** PDF: https://github.com/riscv/riscv-CMOs/wiki/Ri5-CMOs-proposal.pdf 68 | ***** downloads, does not display 69 | **** HTML: https://github.com/riscv/riscv-CMOs/wiki/Ri5-CMOs-proposal.html 70 | ***** displays raw, does not render 71 | 72 | 73 | 74 | ===== On GitHub - Original 75 | 76 | TBD: remove these references when official locations fully verified 77 | 78 | Originally written in author's personal GitHub repo+wiki: 79 | 80 | ** This document's source files (mostly asciidoc) on its Github repository wiki: 81 | 82 | *** https://github.com/AndyGlew/Ri5-stuff/wiki 83 | *** top of document: https://github.com/AndyGlew/Ri5-stuff/wiki/{docname} 84 | **** this is a .asciidoc file, rendered by GitHub's wiki 85 | **** asciidoc includes link to other parts of the document 86 | **** the wiki contains other pages, not part of the document, some of which provide more background 87 | 88 | ** Generated HTML and PDF files on GitHub: 89 | 90 | *** https://github.com/AndyGlew/Ri5-stuff/ 91 | **** HTML: https://github.com/AndyGlew/Ri5-stuff/blob/master/Ri5-CMOs-proposal.html 92 | ***** displays raw, does not render 93 | **** PDF: https://github.com/AndyGlew/Ri5-stuff/blob/master/Ri5-CMOs-proposal.pdf 94 | ***** displays - in GitHub's ugly way 95 | *** https://github.com/AndyGlew/Ri5-stuff/wiki 96 | **** PDF: https://github.com/AndyGlew/Ri5-stuff/wiki/Ri5-CMOs-proposal.pdf 97 | ***** downloads, does not display 98 | **** HTML: https://github.com/AndyGlew/Ri5-stuff/wiki/Ri5-CMOs-proposal.html 99 | ***** displays raw, does not render 100 | 101 | ===== Local Workspace/clone 102 | 103 | * On your local system, where you may have cloned the GitHub parent and wiki repositories, and where you may have built the document: 104 | 105 | ** local where built: 106 | *** won't work from web 107 | *** link:file:{docname}.html[] 108 | *** link:file:{docname}.pdf[] 109 | *** link:file:{docname}.asciidoc[] 110 | 111 | When and where converted (i.e. when asciidoctor was run, to generate this file): 112 | 113 | ** docfile: {docfile} - full path 114 | ** localdatetime: {localdatetime} - when generated 115 | ** outfile: {outfile} - full path of the output file 116 | ** TBD: what system (PC, Linux system) was asciidoctor run on? 117 | 118 | Note: paths local to system document generated on are mostly meaningless to others, 119 | but have already been helpful finding source for orphaned drafts generated as PDF and HTML. 120 | -------------------------------------------------------------------------------- /Archive/wiki/techpubs.asciidoc: -------------------------------------------------------------------------------- 1 | == TechPubs Thrashing 2 | === Wiki --> AsciiDoc 3 | This proposal was developed on a wiki, 4 | originally https://github.com/AndyGlew/Ri5-stuff/wiki/CMOs-proposal, 5 | later https://github.com/riscv/riscv-CMOs/wiki/CMOs-proposal, 6 | as a set of interlinked pages, 7 | and is in the process of being converted to more compact AsciiDoc proposl 8 | 9 | Why?: 10 | 11 | * Wikis get messy. 12 | * Proliferation of pages. 13 | * Hard to tell which pages are part of the official proposal in which pages are just random commentary 14 | * at least it's hard unless you develop tools to do this, I don't want to spend the time to do it 15 | * AsciiDoc seems to be the closest thing RISC-V has to a standard documentation format 16 | * Although it is by no means standard as of 2020-05-06 17 | * the original manuals were written in TeX 18 | * some AsciiDoc 19 | * some in Word, PowerPoint, ... 20 | 21 | === Redundant AsciiDoc files? 22 | 23 | * link:CMOs-proposal[] - original, then thrashed trying to get asciidoc to build 24 | * link:Ri5-CMOs-proposal[] - asciidoc created elsewhere 25 | 26 | 27 | === Thrashed wrt Location 28 | 29 | * Originally in wiki https://github.com/AndyGlew/Ri5-stuff/wiki/CMOs-proposal 30 | * Moved to repo 31 | * repo: https://github.com/AndyGlew/Ri5-CMOs-proposal/blob/master/Ri5-CMOs-proposal 32 | * document: https://github.com/AndyGlew/Ri5-CMOs-proposal/blob/master/Ri5-CMOs-proposal.adoc 33 | 34 | Why? 35 | 36 | * GitHub wiki does not support includes 37 | * Git does not ad-hoc subrepo checkouts 38 | * although I expect some idiot is going to tell me that sparse checkouts are good enough. They are not. 39 | 40 | Then moved back to original repo/wiki 41 | * the wiki is a submodule of the repo 42 | * wiki https://github.com/AndyGlew/Ri5-stuff/wiki/CMOs-proposal 43 | * repo https://github.com/AndyGlew/Ri5-stuff 44 | * where tools such as Makefile are kept 45 | * Makefile lives in repo 46 | * HTML is grnerated in wiki 47 | * where can be linked 48 | 49 | 50 | --- 51 | 52 | 05:37 PDT, 2020-05-07 Thursday, May 7, WW19 53 | 54 | moved stuff from git@github.com:AndyGlew/Ri5-CMOs-proposal.git to git@github.com:AndyGlew/Ri5-stuff.wiki.git 55 | 56 | moved stuff from 57 | * git@github.com:AndyGlew/Ri5-CMOs-proposal.git 58 | to 59 | * git@github.com:AndyGlew/Ri5-stuff.wiki.git 60 | and also 61 | * git@github.com:AndyGlew/Ri5-stuff.git 62 | 63 | with 64 | * git@github.com:AndyGlew/Ri5-stuff.wiki.git 65 | a submodule of 66 | * git@github.com:AndyGlew/Ri5-stuff.git 67 | 68 | In this, Ri5-stuff.wiki 69 | 70 | 71 | AsciiDoc source files for proposal document: 72 | * Ri5-CMOs-proposal.adoc 73 | * techpubs.adoc 74 | 75 | Generated proposal: 76 | * Ri5-CMOs-proposal.html 77 | Generated by: 78 | * Makefile 79 | -------------------------------------------------------------------------------- /Archive/wiki/terminology-notation.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | [appendix] 3 | == Terminology 4 | 5 | TBD: automatically generate from marks in line as in LaTeX (AsciiDoc is probably incapable). 6 | 7 | CMO:: 8 | Cache Management Operation. A superset of prefetches, post-stores, and other operations. 9 | 10 | Post-Store:: 11 | A cute Berkeleyism for CMOs that push data from a near cache to a further cache, 12 | contrasting with Pre-Fetch. 13 | 14 | 15 | Pre-Fetch, PREFETCH:: 16 | Move data within cache hierarchy, typically from further out (e.g. memory) to closer in. (Possibly from one processor's cache to another.) 17 | 18 | Ri5:: 19 | shorthand for RISC-V. TBD change from Golew specific Ri5 to more standard RV 20 | 21 | RV::standard shorthand for RISC-V (?). 22 | 23 | 24 | 25 | [appendix] 26 | == Notation 27 | 28 | === Encoding Bitstring Notation 29 | 30 | See https://github.com/AndyGlew/comp-arch.net/wiki/Instruction-Encoding-Notation 31 | 32 | This document uses notation such as this below for instruction encoding bit strings: 33 | 34 | PREFETCH.64B.R: `imm12.rs1:5.110.rd=00000.0010011`, e.g. ORI with RD=x0 35 | 36 | In these bitstrings 37 | 38 | * 0 and 1 respond to bit values 39 | 40 | * fields are specified by rs2:5, rd=00000, etc 41 | * i.e. fieldname:width 42 | * e.g. fieldname=value (width implied) 43 | 44 | * punctuation is used to improve readability, such as period ".", underscore "_", and comma "," 45 | 46 | Bit positions are numbered with bit 0 on the right, in the usual RISC-V manner. 47 | 48 | Elsewhere in the RISC-V toolchain a similar notation is used, 49 | with additions such as allowing blanks to separate fields, 50 | and allowing fields to be specified out of order by specifying bit positions such as 51 | `rd=5..9` 52 | 53 | AW: https://github.com/riscv/riscv-opcodes is where the current instruction encoding metadata lives. 54 | See comment at the top of https://github.com/riscv/riscv-opcodes/blob/master/opcodes-rvv 55 | for description of notation. 56 | 57 | 58 | TBD: unify these notations. 59 | 60 | TBD: generate pretty human readable tables ("opcode maps") from these notations. 61 | 62 | (The RISC-V toolchain already generates "tables" which are essentially lists of encodings. By "pretty" I mean the sort of table that looks like a hierarchy of Karnaugh maps, as is traditional. Other tools also generate nice diagrams of per-instruction encodings and fields.) 63 | -------------------------------------------------------------------------------- /Archive/wiki/variable-address-range-loop.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | // The CMO.VAR instruction is intended to be used in a software loop such as that below: 3 | 4 | 5 | In pseudocode: 6 | 7 | ---- 8 | x11 := lwb 9 | x12 := upb (= lwb + size_in_bytes) 10 | LOOP 11 | CMO.VAR.<> x11,x11,x12 12 | UNTIL x1 ==x12 13 | ---- 14 | 15 | In assembly code: 16 | 17 | ---- 18 | x11 := lwb 19 | x12 := upb 20 | L: CMO.VAR.<> x11,x11,x12 21 | bne x11,x12,L 22 | ---- 23 | -------------------------------------------------------------------------------- /Archive/wiki/voice-typos-editing-this-wiki.md: -------------------------------------------------------------------------------- 1 | I like wikis. 2 | 3 | however, I find it necessary to use voice recognition (Dragon on a PC) to enter text because of computeritis 4 | 5 | unfortunately, the GitHub/Gollum wiki, edited online in an editor like Google Chrome, has many problems when edited using voice recognition. 6 | 7 | For example: The double letters ii and oo highlighted below 8 | 9 | > I am actually just as happy not to **defiine** the CSR operand to **coontain ** an encoding of CMO operations desired, 10 | 11 | I apologize for such errors in the current wiki text. I will correct them as I find them during review. I can avoid them if I edit off-line using software that is more ready for speech recognition. However sometimes I make these edits online, just for efficieency. ( There is another typo **ee** :-( ) 12 | 13 | -------------------------------------------------------------------------------- /Archive/wiki/wiki-stuff---Notes-on-GitHub-wiki-pages.md: -------------------------------------------------------------------------------- 1 | Is it possible that this GitHub wiki does not allow me to create a reference to a page that does not yet exist? 2 | 3 | To me, that is the essence of wiki. 4 | 5 | False alarm: links to pages that do not exist yet work. At least in Markdown. I think also in Asciidoc wiki pages. Mostly with the [[Markdown link syntax in Asciidoc wiki pages]]. But I believe also with traditional Asciidoc links - thdy are just harder to type. 6 | 7 | Also, annoyingly, Asciidoc wiki pages do not color links that do not exist distinct from links that do exist. That is probably my second most iomportant wiki property. -------------------------------------------------------------------------------- /CMO-Phase-1-Scope.md: -------------------------------------------------------------------------------- 1 | # CMO Phase 1 Scope 2 | 3 | ## Introduction 4 | 5 | This document summarizes the current plan-of-record for Phase 1 of the CMO 6 | extension. Additional details will be provided by various extension proposal 7 | documents. 8 | 9 | ## Sub-extensions 10 | 11 | Phase 1 is divided into three sub-extensions that add the following 12 | instructions and features: 13 | 14 | * Cache Block Management Operations (Zicbom) 15 | * CBO.INVAL - Invalidate Cache Block (at effective address) 16 | * CBO.CLEAN - Clean Cache Block (at effective address) 17 | * CBO.FLUSH - Flush Cache Block (at effective address) 18 | * Memory ordering with respect to other memory accesses 19 | * Cache Block Zero Operations (Zicboz) 20 | * CBO.ZERO - Zero Cache Block (at effective address) 21 | * Memory ordering with respect to other memory accesses 22 | * Cache Block Prefetch Operations (Zicbop) 23 | * PREFETCH.R - Prefetch Cache Block for Read (at effective address) 24 | * PREFETCH.W - Prefetch Cache Block for Write (at effective address) 25 | * PREFETCH.I - Prefetch Cache Block for Instruction Fetch (at effective 26 | address) 27 | 28 | Instructions in the Zicbom and Zicboz sub-extensions support a [rs1] addressing 29 | mode. Instructions in the Zicbop sub-extension may support a modified form of a 30 | [rs1+imm12] addressing mode. 31 | 32 | For Phase 1, Zicbom instructions operate to the copy of data in memory, while 33 | Zicboz updates the values of memory corresponding to a memory location like 34 | stores. Zicbop instructions may allocated in any cache as well as none. 35 | 36 | ## Closed Issues 37 | 38 | _Note:_ "Closed" implies that a given issue has been documented in the 39 | specification 40 | 41 | * Permissions, protection, and access control (i.e. relationship to translation, 42 | PMAs, PMPs, and privilege level) 43 | * Safe transformations of operations 44 | * HW: safe transformations enable implementation choices 45 | * SW: safe transformations allow less privileged software to use ops 46 | * Memory ordering model (i.e. relationship to loads, stores, FENCEs, SFENCEs, etc.) 47 | * Discovery of block sizes and types 48 | * Final instruction encodings 49 | 50 | ## Open Issues 51 | 52 | 53 | ## Roadmap 54 | 55 | These topics are deferred from Phase 1 and may be considered in Phase 2 and 56 | onward: 57 | 58 | * Effects of mismatched PMA/VA attributes and changing attributes 59 | * Additional levels or points of convergence for system optimization 60 | * Levels (Points of Persistence) beyond the POC for all harts and devices 61 | * Temporality (reuse) hints for PREFETCH operations 62 | * Cache Block Operations by Index 63 | * CBO.INVAL.IX - Invalidate Cache Block at Index 64 | * CBO.CLEAN.IX - Clean Cache Block at Index 65 | * CBO.FLUSH.IX - Flush Cache Block at Index 66 | * CMO.ALL 67 | * DEMOTE 68 | * Completion semantics 69 | * Security-related CMOs, e.g. CMO.ALL.SEC 70 | * Non-uniform (i.e. mixed) block sizes 71 | * Operations that return values, i.e. ranges, e.g. CMO.op.AR and CMO.op.UR 72 | * Others? 73 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Makefile 3 | # 4 | # Makefile for linux; at present, this file is used primarily to enable building pdfs in local linux 5 | # systems (Makefile.pwsh is the approved publishing flow); please contact the CMO TG chairs for the 6 | # latest publishing methodology 7 | # 8 | 9 | SPEC=cmobase 10 | # version is TG version number; should be updated in both Makefiles 11 | VERSION=1.0.1 12 | DATE=$(shell date +%Y.%m.%d) 13 | COMMITDATE=$(shell git show -s --format=%ci | cut -d ' ' -f 1) 14 | GITVERSION=$(shell git describe --tag --always --dirty) 15 | # moved STAGE/revremark to cmobase.adoc 16 | 17 | 18 | $(SPEC)-v$(VERSION).pdf: $(SPEC)/$(SPEC).adoc \ 19 | $(SPEC)/*.adoc \ 20 | $(SPEC)/insns/*.adoc \ 21 | $(SPEC)/autogenerated/revision.adoc-snippet 22 | asciidoctor-pdf -r asciidoctor-diagram \ 23 | -D . \ 24 | -a toc \ 25 | -a compress \ 26 | -a pdf-style=resources/themes/risc-v_spec-pdf.yml \ 27 | -a pdf-fontsdir=resources/fonts \ 28 | -o $@ \ 29 | $< 30 | gs -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/printer -sOutputFile=opt-$@ $@ && mv opt-$@ $@ 31 | 32 | $(SPEC)/autogenerated: 33 | -mkdir $@ 34 | 35 | 36 | $(SPEC)/autogenerated/revision.adoc-snippet: Makefile $(SPEC)/autogenerated FORCE 37 | echo ":revdate: ${COMMITDATE}" > $@-tmp 38 | echo ":revnumber: ${VERSION}-${GITVERSION}" >> $@-tmp 39 | # echo ":revremark: ${STAGE}" >> $@-tmp 40 | diff $@ $@-tmp || mv $@-tmp $@ 41 | 42 | clean: 43 | rm -f $(SPEC)-*.pdf 44 | 45 | FORCE: 46 | -------------------------------------------------------------------------------- /Makefile.pwsh: -------------------------------------------------------------------------------- 1 | # 2 | # Makefile.pwsh 3 | # 4 | # Makefile for Windows powershell; assumes make has been installed (in addition to the standard doc 5 | # flow tools): 6 | # http://gnuwin32.sourceforge.net/packages/make.htm 7 | # 8 | # This file is the currently approved pdf publishing flow 9 | # 10 | 11 | PWSHCMD=powershell -noprofile -command 12 | SPEC=cmobase 13 | # version is TG version number; should be updated in both Makefiles 14 | VERSION=1.0.1 15 | DATE=$(shell date -UFormat \"%Y.%m.%d\") 16 | COMMITDATE=$(shell git log -1 --format=%cs) 17 | GITVERSION=$(shell git describe --tag --always --dirty) 18 | # moved STAGE/revremark to cmobase.adoc 19 | 20 | 21 | $(SPEC)-v$(VERSION).pdf: $(SPEC)/$(SPEC).adoc \ 22 | $(SPEC)/*.adoc \ 23 | $(SPEC)/insns/*.adoc \ 24 | $(SPEC)/autogenerated/revision.adoc-snippet 25 | asciidoctor-pdf -r asciidoctor-diagram \ 26 | -D . \ 27 | -a toc \ 28 | -a compress \ 29 | -a pdf-style=resources/themes/risc-v_spec-pdf.yml \ 30 | -a pdf-fontsdir=resources/fonts \ 31 | -o $@ \ 32 | $< 33 | # ignore ghostscript optimization for now 34 | # gs -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/printer -sOutputFile=opt-$@ $@ && mv opt-$@ $@ 35 | 36 | 37 | $(SPEC)/autogenerated: 38 | $(PWSHCMD) mkdir $@ 39 | 40 | 41 | # the original Makefile wrote to a temp file ($@-tmp) and compared with the 42 | # previous version; ignore that for now 43 | $(SPEC)/autogenerated/revision.adoc-snippet: Makefile $(SPEC)/autogenerated FORCE 44 | $(PWSHCMD) echo (':revdate: ' + '${COMMITDATE}') > $@ 45 | $(PWSHCMD) echo (':revnumber: ' + '${VERSION}' + '-' + '${GITVERSION}') >> $@ 46 | # $(PWSHCMD) echo (':revremark: ' + '${STAGE}') >> $@ 47 | # $(PWSHCMD) diff (cat $@) (cat $@-tmp) || $(PWSHCMD) mv $@-tmp $@ 48 | 49 | 50 | clean: 51 | $(PWSHCMD) rm $(SPEC)-*.pdf 52 | 53 | 54 | FORCE: 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cache Management Operations for RISC-V 2 | 3 | _Note: The RISC-V CMO TG is currently in maintenance mode. Updates to the existing extensions, Zicbom, Zicboz, and Zicbop, will be made as necessary. Please contact help@riscv.org with any questions._ 4 | 5 | This repository is the working area for the RISC-V Cache Management Operation 6 | Task Group (CMO TG). The TG is primarily responsible for developing ISA 7 | extensions in the area of cache management. 8 | 9 | The repo is organized as follows: 10 | 11 | * Archive - contains old TG information including Andy Glew's initial proposals 12 | and wiki pages 13 | * cmobase - this is the main working area for Phase 1 TG documentation 14 | * specifications - the latest published spec is here 15 | * CMO-Phase-1-Scope.md - provides a summary of the current TG goals and roadmap 16 | * LICENSE.md - license information 17 | * README.md - this file 18 | 19 | ## Other Resources 20 | 21 | Administrative information, such as the TG charter, meeting agendas and minutes, 22 | etc., can be found here: 23 | 24 | https://github.com/riscv-admin-docs/cache-management-ops 25 | 26 | Additional TG information can be found here (RISC-V members only): 27 | 28 | https://lists.riscv.org/g/tech-cmo 29 | 30 | Email can be sent to the TG mailing list (RISC-V members only): 31 | 32 | tech-cmo@lists.riscv.org 33 | 34 | ## Licensing 35 | 36 | The files in this repository are licensed under the Creative Commons Attribution 37 | 4.0 International License (CC-BY 4.0). 38 | 39 | The full license text is available at 40 | https://creativecommons.org/licenses/by/4.0/. 41 | -------------------------------------------------------------------------------- /cmobase/Zicbom.adoc: -------------------------------------------------------------------------------- 1 | [#Zicbom,reftext="Cache-Block Management Instructions"] 2 | === Cache-Block Management Instructions 3 | 4 | Cache-block management instructions enable software running on a set of coherent 5 | agents to communicate with a set of non-coherent agents by performing one of the 6 | following operations: 7 | 8 | * An invalidate operation makes data from store operations performed by a set of 9 | non-coherent agents visible to the set of coherent agents at a point common to 10 | both sets by deallocating all copies of a cache block from the set of coherent 11 | caches up to that point 12 | 13 | * A clean operation makes data from store operations performed by the set of 14 | coherent agents visible to a set of non-coherent agents at a point common to 15 | both sets by performing a write transfer of a copy of a cache block to that 16 | point provided a coherent agent performed a store operation that modified the 17 | data in the cache block since the previous invalidate, clean, or flush 18 | operation on the cache block 19 | 20 | * A flush operation atomically performs a clean operation followed by an 21 | invalidate operation 22 | 23 | In the Zicbom extension, the instructions operate to a point common to _all_ 24 | agents in the system. In other words, an invalidate operation ensures that store 25 | operations from all non-coherent agents visible to agents in the set of coherent 26 | agents, and a clean operation ensures that store operations from coherent agents 27 | visible to all non-coherent agents. 28 | 29 | **** 30 | 31 | _The Zicbom extension does not prohibit agents that fall outside of the above 32 | architectural definition; however, software cannot rely on the defined cache 33 | operations to have the desired effects with respect to those agents._ 34 | 35 | _Future extensions may define different sets of agents for the purposes of 36 | performance optimization._ 37 | 38 | **** 39 | 40 | These instructions operate on the cache block whose effective address is 41 | specified in _rs1_. The effective address is translated into a corresponding 42 | physical address by the appropriate translation mechanisms. 43 | 44 | The following instructions comprise the Zicbom extension: 45 | 46 | [%header,cols="^1,^1,4,8"] 47 | |=== 48 | |RV32 49 | |RV64 50 | |Mnemonic 51 | |Instruction 52 | 53 | |✓ 54 | |✓ 55 | |cbo.clean _base_ 56 | |<<#insns-cbo_clean>> 57 | 58 | |✓ 59 | |✓ 60 | |cbo.flush _base_ 61 | |<<#insns-cbo_flush>> 62 | 63 | |✓ 64 | |✓ 65 | |cbo.inval _base_ 66 | |<<#insns-cbo_inval>> 67 | 68 | |=== 69 | -------------------------------------------------------------------------------- /cmobase/Zicbop.adoc: -------------------------------------------------------------------------------- 1 | [#Zicbop,reftext="Cache-Block Prefetch Instructions"] 2 | === Cache-Block Prefetch Instructions 3 | 4 | Cache-block prefetch instructions are HINTs to the hardware to indicate that 5 | software intends to perform a particular type of memory access in the near 6 | future. The types of memory accesses are instruction fetch, data read (i.e. 7 | load), and data write (i.e. store). 8 | 9 | These instructions operate on the cache block whose effective address is the sum 10 | of the base address specified in _rs1_ and the sign-extended offset encoded in 11 | _imm[11:0]_, where _imm[4:0]_ shall equal `0b00000`. The effective address is 12 | translated into a corresponding physical address by the appropriate translation 13 | mechanisms. 14 | 15 | **** 16 | 17 | _Cache-block prefetch instructions are encoded as ORI instructions with rd equal 18 | to `0b00000`; however, for the purposes of effective address calculation, this 19 | field is also interpreted as imm[4:0] like a store instruction._ 20 | 21 | **** 22 | 23 | The following instructions comprise the Zicbop extension: 24 | 25 | [%header,cols="^1,^1,4,8"] 26 | |=== 27 | |RV32 28 | |RV64 29 | |Mnemonic 30 | |Instruction 31 | 32 | |✓ 33 | |✓ 34 | |prefetch.i _offset_(_base_) 35 | |<<#insns-prefetch_i>> 36 | 37 | |✓ 38 | |✓ 39 | |prefetch.r _offset_(_base_) 40 | |<<#insns-prefetch_r>> 41 | 42 | |✓ 43 | |✓ 44 | |prefetch.w _offset_(_base_) 45 | |<<#insns-prefetch_w>> 46 | 47 | |=== 48 | -------------------------------------------------------------------------------- /cmobase/Zicboz.adoc: -------------------------------------------------------------------------------- 1 | [#Zicboz,reftext="Cache-Block Zero Instructions"] 2 | === Cache-Block Zero Instructions 3 | 4 | Cache-block zero instructions store zeros to the set of bytes corresponding to a 5 | cache block. An implementation may update the bytes in any order and with any 6 | granularity and atomicity, including individual bytes. 7 | 8 | **** 9 | 10 | _Cache-block zero instructions store zeros independently of whether data from 11 | the underlying memory locations are cacheable. In addition, this specification 12 | does not constrain how the bytes are written._ 13 | 14 | **** 15 | 16 | These instructions operate on the cache block, or the memory locations 17 | corresponding to the cache block, whose effective address is specified in _rs1_. 18 | The effective address is translated into a corresponding physical address by the 19 | appropriate translation mechanisms. 20 | 21 | The following instructions comprise the Zicboz extension: 22 | 23 | [%header,cols="^1,^1,4,8"] 24 | |=== 25 | |RV32 26 | |RV64 27 | |Mnemonic 28 | |Instruction 29 | 30 | |✓ 31 | |✓ 32 | |cbo.zero _base_ 33 | |<<#insns-cbo_zero>> 34 | 35 | |=== 36 | -------------------------------------------------------------------------------- /cmobase/acknowledgements.adoc: -------------------------------------------------------------------------------- 1 | [acknowledgments] 2 | == Acknowledgments 3 | 4 | Contributors to this specification (in alphabetical order) include: + 5 | Allen Baum, 6 | Paul Donahue, 7 | Greg Favor, 8 | Andy Glew, 9 | John Ingalls, 10 | David Kruckemyer, 11 | Josh Scheid, 12 | Philipp Tomsich, 13 | Paul Walmsley, 14 | and 15 | Derek Williams 16 | 17 | We express our gratitude to everyone that contributed to, reviewed, or improved 18 | this specification through their comments and questions. 19 | -------------------------------------------------------------------------------- /cmobase/cmobase.adoc: -------------------------------------------------------------------------------- 1 | [[cmobase]] 2 | = RISC-V Base Cache Management Operation ISA Extensions 3 | :description: Base Cache Management Operation ISA Extensions 4 | :company: RISC-V 5 | // Pull in variable with revision info 6 | include::autogenerated/revision.adoc-snippet[] 7 | :revremark: Ratified 8 | :url-riscv: http://riscv.org 9 | :doctype: book 10 | //:doctype: report 11 | :preface-title: Licensing and Acknowledgments 12 | :colophon: 13 | :appendix-caption: Appendix 14 | :imagesdir: images 15 | :title-logo-image: image:RISC-V-logo.svg[pdfwidth=3.25in,align=center] 16 | //:page-background-image: image:draft.svg[opacity=20%] 17 | //:title-page-background-image: none 18 | :back-cover-image: image:circuit.png[opacity=25%] 19 | // Settings: 20 | :experimental: 21 | :reproducible: 22 | // needs to be changed 23 | :WaveDromEditorApp: ~/wavedrom-cli/cli/wavedrom-cli.js 24 | :imagesoutdir: images 25 | :icons: font 26 | :lang: en 27 | :listing-caption: Listing 28 | :sectnums: 29 | :toc: left 30 | :toclevels: 4 31 | :source-highlighter: pygments 32 | ifdef::backend-pdf[] 33 | :source-highlighter: coderay 34 | endif::[] 35 | :data-uri: 36 | :hide-uri-scheme: 37 | :stem: 38 | :footnote: 39 | :xrefstyle: short 40 | :csrname: envcfg 41 | 42 | // Front-Matters 43 | include::colophon.adoc[] 44 | include::acknowledgements.adoc[] 45 | 46 | [preface] 47 | == Pseudocode for instruction semantics 48 | 49 | The semantics of each instruction in the <<#insns>> chapter is expressed in a 50 | SAIL-like syntax. 51 | 52 | // Chapters 53 | include::introduction.adoc[] 54 | include::background.adoc[] 55 | include::csr_state.adoc[] 56 | include::extensions.adoc[] 57 | include::instructions.adoc[] 58 | 59 | [appendix] 60 | == Software guide 61 | -------------------------------------------------------------------------------- /cmobase/colophon.adoc: -------------------------------------------------------------------------------- 1 | [colophon] 2 | == Colophon 3 | 4 | This document is in the link:http://riscv.org/spec-state[Ratified] state. No 5 | changes are allowed. Any desired or needed changes can be the subject of a 6 | follow-on new extension. Ratified extensions are never revised. For more 7 | information, see link:http://riscv.org/spec-state[here]. 8 | 9 | [NOTE] 10 | .Copyright and licensure: 11 | This document is released under the 12 | https://creativecommons.org/licenses/by/4.0/[Creative Commons Attribution 4.0 13 | International License]. 14 | -------------------------------------------------------------------------------- /cmobase/csr_state.adoc: -------------------------------------------------------------------------------- 1 | [#csr_state,reftext="Control and Status Register State"] 2 | == Control and Status Register State 3 | 4 | **** 5 | 6 | _The CMO extensions rely on state in {csrname} CSRs that will be defined in a 7 | future update to the privileged architecture. If this CSR update is not 8 | ratified, the CMO extension will define its own CSRs._ 9 | 10 | **** 11 | 12 | Three CSRs control the execution of CMO instructions: 13 | 14 | * `m{csrname}` 15 | * `s{csrname}` 16 | * `h{csrname}` 17 | 18 | The `s{csrname}` register is used by all supervisor modes, including VS-mode. A 19 | hypervisor is responsible for saving and restoring `s{csrname}` on guest context 20 | switches. The `h{csrname}` register is only present if the H-extension is 21 | implemented and enabled. 22 | 23 | Each `x{csrname}` register (where `x` is `m`, `s`, or `h`) has the following 24 | generic format: 25 | 26 | .Generic Format for x{csrname} CSRs 27 | [cols="^10,^10,80a"] 28 | |=== 29 | | Bits | Name | Description 30 | 31 | | [5:4] | `CBIE` | Cache Block Invalidate instruction Enable 32 | 33 | Enables the execution of the cache block invalidate instruction, `CBO.INVAL`, in 34 | a lower privilege mode: 35 | 36 | * `00`: The instruction raises an illegal instruction or virtual instruction 37 | exception 38 | * `01`: The instruction is executed and performs a flush operation 39 | * `10`: _Reserved_ 40 | * `11`: The instruction is executed and performs an invalidate operation 41 | 42 | | [6] | `CBCFE` | Cache Block Clean and Flush instruction Enable 43 | 44 | Enables the execution of the cache block clean instruction, `CBO.CLEAN`, and the 45 | cache block flush instruction, `CBO.FLUSH`, in a lower privilege mode: 46 | 47 | * `0`: The instruction raises an illegal instruction or virtual instruction 48 | exception 49 | * `1`: The instruction is executed 50 | 51 | | [7] | `CBZE` | Cache Block Zero instruction Enable 52 | 53 | Enables the execution of the cache block zero instruction, `CBO.ZERO`, in a 54 | lower privilege mode: 55 | 56 | * `0`: The instruction raises an illegal instruction or virtual instruction 57 | exception 58 | * `1`: The instruction is executed 59 | 60 | |=== 61 | 62 | The x{csrname} registers control CBO instruction execution based on the current 63 | privilege mode and the state of the appropriate CSRs, as detailed below. 64 | 65 | A `CBO.INVAL` instruction executes or raises either an illegal instruction 66 | exception or a virtual instruction exception based on the state of the 67 | `x{csrname}.CBIE` fields: 68 | 69 | [source,sail,subs="attributes+"] 70 | -- 71 | 72 | // illegal instruction exceptions 73 | if (((priv_mode != M) && (m{csrname}.CBIE == 00)) || 74 | ((priv_mode == U) && (s{csrname}.CBIE == 00))) 75 | { 76 | 77 | } 78 | // virtual instruction exceptions 79 | else if (((priv_mode == VS) && (h{csrname}.CBIE == 00)) || 80 | ((priv_mode == VU) && ((h{csrname}.CBIE == 00) || (s{csrname}.CBIE == 00)))) 81 | { 82 | 83 | } 84 | // execute instruction 85 | else 86 | { 87 | if (((priv_mode != M) && (m{csrname}.CBIE == 01)) || 88 | ((priv_mode == U) && (s{csrname}.CBIE == 01)) || 89 | ((priv_mode == VS) && (h{csrname}.CBIE == 01)) || 90 | ((priv_mode == VU) && ((h{csrname}.CBIE == 01) || (s{csrname}.CBIE == 01)))) 91 | { 92 | 93 | } 94 | else 95 | { 96 | 97 | } 98 | } 99 | 100 | 101 | -- 102 | 103 | **** 104 | 105 | _Until a modified cache block has updated memory, a `CBO.INVAL` instruction may 106 | expose stale data values in memory if the CSRs are programmed to perform an 107 | invalidate operation. This behavior may result in a security hole if lower 108 | privileged level software performs an invalidate operation and accesses 109 | sensitive information in memory._ 110 | 111 | _To avoid such holes, higher privileged level software must perform either a 112 | clean or flush operation on the cache block before permitting lower privileged 113 | level software to perform an invalidate operation on the block. Alternatively, 114 | higher privileged level software may program the CSRs so that `CBO.INVAL` 115 | either traps or performs a flush operation in a lower privileged level._ 116 | 117 | **** 118 | 119 | A `CBO.CLEAN` or `CBO.FLUSH` instruction executes or raises an illegal 120 | instruction or virtual instruction exception based on the state of the 121 | `x{csrname}.CBCFE` bits: 122 | 123 | [source,sail,subs="attributes+"] 124 | -- 125 | 126 | // illegal instruction exceptions 127 | if (((priv_mode != M) && !m{csrname}.CBCFE) || 128 | ((priv_mode == U) && !s{csrname}.CBCFE)) 129 | { 130 | 131 | } 132 | // virtual instruction exceptions 133 | else if (((priv_mode == VS) && !h{csrname}.CBCFE) || 134 | ((priv_mode == VU) && !(h{csrname}.CBCFE && s{csrname}.CBCFE))) 135 | { 136 | 137 | } 138 | // execute instruction 139 | else 140 | { 141 | 142 | } 143 | 144 | -- 145 | 146 | Finally, a `CBO.ZERO` instruction executes or raises an illegal instruction or 147 | virtual instruction exception based on the state of the `x{csrname}.CBZE` bits: 148 | 149 | [source,sail,subs="attributes+"] 150 | -- 151 | 152 | // illegal instruction exceptions 153 | if (((priv_mode != M) && !m{csrname}.CBZE) || 154 | ((priv_mode == U) && !s{csrname}.CBZE)) 155 | { 156 | 157 | } 158 | // virtual instruction exceptions 159 | else if (((priv_mode == VS) && !h{csrname}.CBZE) || 160 | ((priv_mode == VU) && !(h{csrname}.CBZE && s{csrname}.CBZE))) 161 | { 162 | 163 | } 164 | // execute instruction 165 | else 166 | { 167 | 168 | } 169 | 170 | -- 171 | 172 | Each `x{csrname}` register is WARL; however, software should determine the legal 173 | values from the execution environment discovery mechanism. 174 | -------------------------------------------------------------------------------- /cmobase/extensions.adoc: -------------------------------------------------------------------------------- 1 | [#extensions,reftext="Extensions"] 2 | == Extensions 3 | 4 | CMO instructions are defined in the following extensions: 5 | 6 | * <<#Zicbom>> 7 | * <<#Zicboz>> 8 | * <<#Zicbop>> 9 | 10 | include::Zicbom.adoc[] 11 | include::Zicboz.adoc[] 12 | include::Zicbop.adoc[] 13 | -------------------------------------------------------------------------------- /cmobase/images/RISC-V-logo.svg: -------------------------------------------------------------------------------- 1 | 2 | RISC-V logoimage/svg+xml 3 | -------------------------------------------------------------------------------- /cmobase/images/circuit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/cmobase/images/circuit.png -------------------------------------------------------------------------------- /cmobase/insns/cbo.clean.adoc: -------------------------------------------------------------------------------- 1 | [#insns-cbo_clean,reftext="Cache Block Clean"] 2 | === cbo.clean 3 | 4 | Synopsis:: 5 | Perform a clean operation on a cache block 6 | 7 | Mnemonic:: 8 | cbo.clean _offset_(_base_) 9 | 10 | Encoding:: 11 | [wavedrom, , svg] 12 | .... 13 | {reg:[ 14 | { bits: 7, name: 0xF, attr: ['MISC-MEM'] }, 15 | { bits: 5, name: 0x0 }, 16 | { bits: 3, name: 0x2, attr: ['CBO'] }, 17 | { bits: 5, name: 'rs1', attr: ['base'] }, 18 | { bits: 12, name: 0x001, attr: ['CBO.CLEAN'] }, 19 | ]} 20 | .... 21 | 22 | Description:: 23 | 24 | A *cbo.clean* instruction performs a clean operation on the cache block whose 25 | effective address is the base address specified in _rs1_. The offset operand may 26 | be omitted; otherwise, any expression that computes the offset shall evaluate to 27 | zero. The instruction operates on the set of coherent caches accessed by the 28 | agent executing the instruction. 29 | 30 | Operation:: 31 | [source,sail] 32 | -- 33 | TODO 34 | -- 35 | -------------------------------------------------------------------------------- /cmobase/insns/cbo.flush.adoc: -------------------------------------------------------------------------------- 1 | [#insns-cbo_flush,reftext="Cache Block Flush"] 2 | === cbo.flush 3 | 4 | Synopsis:: 5 | Perform a flush operation on a cache block 6 | 7 | Mnemonic:: 8 | cbo.flush _offset_(_base_) 9 | 10 | Encoding:: 11 | [wavedrom, , svg] 12 | .... 13 | {reg:[ 14 | { bits: 7, name: 0xF, attr: ['MISC-MEM'] }, 15 | { bits: 5, name: 0x0 }, 16 | { bits: 3, name: 0x2, attr: ['CBO'] }, 17 | { bits: 5, name: 'rs1', attr: ['base'] }, 18 | { bits: 12, name: 0x002, attr: ['CBO.FLUSH'] }, 19 | ]} 20 | .... 21 | 22 | Description:: 23 | 24 | A *cbo.flush* instruction performs a flush operation on the cache block whose 25 | effective address is the base address specified in _rs1_. The offset operand may 26 | be omitted; otherwise, any expression that computes the offset shall evaluate to 27 | zero. The instruction operates on the set of coherent caches accessed by the 28 | agent executing the instruction. 29 | 30 | Operation:: 31 | [source,sail] 32 | -- 33 | TODO 34 | -- 35 | -------------------------------------------------------------------------------- /cmobase/insns/cbo.inval.adoc: -------------------------------------------------------------------------------- 1 | [#insns-cbo_inval,reftext="Cache Block Invalidate"] 2 | === cbo.inval 3 | 4 | Synopsis:: 5 | Perform an invalidate operation on a cache block 6 | 7 | Mnemonic:: 8 | cbo.inval _offset_(_base_) 9 | 10 | Encoding:: 11 | [wavedrom, , svg] 12 | .... 13 | {reg:[ 14 | { bits: 7, name: 0xF, attr: ['MISC-MEM'] }, 15 | { bits: 5, name: 0x0 }, 16 | { bits: 3, name: 0x2, attr: ['CBO'] }, 17 | { bits: 5, name: 'rs1', attr: ['base'] }, 18 | { bits: 12, name: 0x000, attr: ['CBO.INVAL'] }, 19 | ]} 20 | .... 21 | 22 | Description:: 23 | 24 | A *cbo.inval* instruction performs an invalidate operation on the cache block 25 | whose effective address is the base address specified in _rs1_. The offset 26 | operand may be omitted; otherwise, any expression that computes the offset shall 27 | evaluate to zero. The instruction operates on the set of coherent caches 28 | accessed by the agent executing the instruction. Depending on CSR programming, 29 | the instruction may perform a flush operation instead of an invalidate 30 | operation. 31 | 32 | Operation:: 33 | [source,sail] 34 | -- 35 | TODO 36 | -- 37 | -------------------------------------------------------------------------------- /cmobase/insns/cbo.zero.adoc: -------------------------------------------------------------------------------- 1 | [#insns-cbo_zero,reftext="Cache Block Zero"] 2 | === cbo.zero 3 | 4 | Synopsis:: 5 | Store zeros to the full set of bytes corresponding to a cache block 6 | 7 | Mnemonic:: 8 | cbo.zero _offset_(_base_) 9 | 10 | Encoding:: 11 | [wavedrom, , svg] 12 | .... 13 | {reg:[ 14 | { bits: 7, name: 0xF, attr: ['MISC-MEM'] }, 15 | { bits: 5, name: 0x0 }, 16 | { bits: 3, name: 0x2, attr: ['CBO'] }, 17 | { bits: 5, name: 'rs1', attr: ['base'] }, 18 | { bits: 12, name: 0x004, attr: ['CBO.ZERO'] }, 19 | ]} 20 | .... 21 | 22 | Description:: 23 | 24 | A *cbo.zero* instruction performs stores of zeros to the full set of bytes 25 | corresponding to the cache block whose effective address is the base address 26 | specified in _rs1_. The offset operand may be omitted; otherwise, any expression 27 | that computes the offset shall evaluate to zero. An implementation may or may 28 | not update the entire set of bytes atomically. 29 | 30 | Operation:: 31 | [source,sail] 32 | -- 33 | TODO 34 | -- 35 | -------------------------------------------------------------------------------- /cmobase/insns/prefetch.i.adoc: -------------------------------------------------------------------------------- 1 | [#insns-prefetch_i,reftext="Cache Block Prefetch for Instruction Fetch"] 2 | === prefetch.i 3 | 4 | Synopsis:: 5 | Provide a HINT to hardware that a cache block is likely to be accessed by an 6 | instruction fetch in the near future 7 | 8 | Mnemonic:: 9 | prefetch.i _offset_(_base_) 10 | 11 | Encoding:: 12 | [wavedrom, , svg] 13 | .... 14 | {reg:[ 15 | { bits: 7, name: 0x13, attr: ['OP-IMM'] }, 16 | { bits: 5, name: 0x0, attr: ['offset[4:0]'] }, 17 | { bits: 3, name: 0x6, attr: ['ORI'] }, 18 | { bits: 5, name: 'rs1', attr: ['base'] }, 19 | { bits: 5, name: 0x0, attr: ['PREFETCH.I'] }, 20 | { bits: 7, name: 'imm[11:5]', attr: ['offset[11:5]'] }, 21 | ]} 22 | .... 23 | 24 | Description:: 25 | 26 | A *prefetch.i* instruction indicates to hardware that the cache block whose 27 | effective address is the sum of the base address specified in _rs1_ and the 28 | sign-extended offset encoded in _imm[11:0]_, where _imm[4:0]_ equals `0b00000`, 29 | is likely to be accessed by an instruction fetch in the near future. 30 | 31 | **** 32 | 33 | _An implementation may opt to cache a copy of the cache block in a cache 34 | accessed by an instruction fetch in order to improve memory access latency, but 35 | this behavior is not required._ 36 | 37 | **** 38 | 39 | Operation:: 40 | [source,sail] 41 | -- 42 | TODO 43 | -- 44 | -------------------------------------------------------------------------------- /cmobase/insns/prefetch.r.adoc: -------------------------------------------------------------------------------- 1 | [#insns-prefetch_r,reftext="Cache Block Prefetch for Data Read"] 2 | === prefetch.r 3 | 4 | Synopsis:: 5 | Provide a HINT to hardware that a cache block is likely to be accessed by a data 6 | read in the near future 7 | 8 | Mnemonic:: 9 | prefetch.r _offset_(_base_) 10 | 11 | Encoding:: 12 | [wavedrom, , svg] 13 | .... 14 | {reg:[ 15 | { bits: 7, name: 0x13, attr: ['OP-IMM'] }, 16 | { bits: 5, name: 0x0, attr: ['offset[4:0]'] }, 17 | { bits: 3, name: 0x6, attr: ['ORI'] }, 18 | { bits: 5, name: 'rs1', attr: ['base'] }, 19 | { bits: 5, name: 0x1, attr: ['PREFETCH.R'] }, 20 | { bits: 7, name: 'imm[11:5]', attr: ['offset[11:5]'] }, 21 | ]} 22 | .... 23 | 24 | Description:: 25 | 26 | A *prefetch.r* instruction indicates to hardware that the cache block whose 27 | effective address is the sum of the base address specified in _rs1_ and the 28 | sign-extended offset encoded in _imm[11:0]_, where _imm[4:0]_ equals `0b00000`, 29 | is likely to be accessed by a data read (i.e. load) in the near future. 30 | 31 | **** 32 | 33 | _An implementation may opt to cache a copy of the cache block in a cache 34 | accessed by a data read in order to improve memory access latency, but this 35 | behavior is not required._ 36 | 37 | **** 38 | 39 | Operation:: 40 | [source,sail] 41 | -- 42 | TODO 43 | -- 44 | -------------------------------------------------------------------------------- /cmobase/insns/prefetch.w.adoc: -------------------------------------------------------------------------------- 1 | [#insns-prefetch_w,reftext="Cache Block Prefetch for Data Write"] 2 | === prefetch.w 3 | 4 | Synopsis:: 5 | Provide a HINT to hardware that a cache block is likely to be accessed by a data 6 | write in the near future 7 | 8 | Mnemonic:: 9 | prefetch.w _offset_(_base_) 10 | 11 | Encoding:: 12 | [wavedrom, , svg] 13 | .... 14 | {reg:[ 15 | { bits: 7, name: 0x13, attr: ['OP-IMM'] }, 16 | { bits: 5, name: 0x0, attr: ['offset[4:0]'] }, 17 | { bits: 3, name: 0x6, attr: ['ORI'] }, 18 | { bits: 5, name: 'rs1', attr: ['base'] }, 19 | { bits: 5, name: 0x3, attr: ['PREFETCH.W'] }, 20 | { bits: 7, name: 'imm[11:5]', attr: ['offset[11:5]'] }, 21 | ]} 22 | .... 23 | 24 | Description:: 25 | 26 | A *prefetch.w* instruction indicates to hardware that the cache block whose 27 | effective address is the sum of the base address specified in _rs1_ and the 28 | sign-extended offset encoded in _imm[11:0]_, where _imm[4:0]_ equals `0b00000`, 29 | is likely to be accessed by a data write (i.e. store) in the near future. 30 | 31 | **** 32 | 33 | _An implementation may opt to cache a copy of the cache block in a cache 34 | accessed by a data write in order to improve memory access latency, but this 35 | behavior is not required._ 36 | 37 | **** 38 | 39 | Operation:: 40 | [source,sail] 41 | -- 42 | TODO 43 | -- 44 | -------------------------------------------------------------------------------- /cmobase/instructions.adoc: -------------------------------------------------------------------------------- 1 | [#insns,reftext="Instructions"] 2 | == Instructions 3 | include::insns/cbo.clean.adoc[] 4 | <<< 5 | include::insns/cbo.flush.adoc[] 6 | <<< 7 | include::insns/cbo.inval.adoc[] 8 | <<< 9 | include::insns/cbo.zero.adoc[] 10 | <<< 11 | include::insns/prefetch.i.adoc[] 12 | <<< 13 | include::insns/prefetch.r.adoc[] 14 | <<< 15 | include::insns/prefetch.w.adoc[] 16 | -------------------------------------------------------------------------------- /cmobase/introduction.adoc: -------------------------------------------------------------------------------- 1 | [#intro,reftext="Introduction"] 2 | == Introduction 3 | 4 | _Cache-management operation_ (or _CMO_) instructions perform operations on 5 | copies of data in the memory hierarchy. In general, CMO instructions operate on 6 | cached copies of data, but in some cases, a CMO instruction may operate on 7 | memory locations directly. Furthermore, CMO instructions are grouped by 8 | operation into the following classes: 9 | 10 | * A _management_ instruction manipulates cached copies of data with respect to a 11 | set of agents that can access the data 12 | * A _zero_ instruction zeros out a range of memory locations, potentially 13 | allocating cached copies of data in one or more caches 14 | * A _prefetch_ instruction indicates to hardware that data at a given memory 15 | location may be accessed in the near future, potentially allocating cached 16 | copies of data in one or more caches 17 | 18 | This document introduces a base set of CMO ISA extensions that operate 19 | specifically on cache blocks or the memory locations corresponding to a cache 20 | block; these are known as _cache-block operation_ (or _CBO_) instructions. Each 21 | of the above classes of instructions represents an extension in this 22 | specification: 23 | 24 | * The _Zicbom_ extension defines a set of cache-block management instructions: 25 | `CBO.INVAL`, `CBO.CLEAN`, and `CBO.FLUSH` 26 | * The _Zicboz_ extension defines a cache-block zero instruction: `CBO.ZERO` 27 | * The _Zicbop_ extension defines a set of cache-block prefetch instructions: 28 | `PREFETCH.R`, `PREFETCH.W`, and `PREFETCH.I` 29 | 30 | The execution behavior of the above instructions is also modified by CSR state 31 | added by this specification. 32 | 33 | The remainder of this document provides general background information on CMO 34 | instructions and describes each of the above ISA extensions. 35 | 36 | **** 37 | 38 | _The term CMO encompasses all operations on caches or resources related to 39 | caches. The term CBO represents a subset of CMOs that operate only on cache 40 | blocks. The first CMO extensions only define CBOs._ 41 | 42 | **** 43 | -------------------------------------------------------------------------------- /resources/fonts/DroidSans-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/DroidSans-Bold.ttf -------------------------------------------------------------------------------- /resources/fonts/DroidSans.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/DroidSans.ttf -------------------------------------------------------------------------------- /resources/fonts/cmunbbx.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbbx.ttf -------------------------------------------------------------------------------- /resources/fonts/cmunbmo.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbmo.ttf -------------------------------------------------------------------------------- /resources/fonts/cmunbmr.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbmr.ttf -------------------------------------------------------------------------------- /resources/fonts/cmunbso.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbso.ttf -------------------------------------------------------------------------------- /resources/fonts/cmunbtl.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbtl.ttf -------------------------------------------------------------------------------- /resources/fonts/cmunbto.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbto.ttf -------------------------------------------------------------------------------- /resources/fonts/cmunbxo.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbxo.ttf -------------------------------------------------------------------------------- /resources/fonts/cmunsi.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunsi.ttf -------------------------------------------------------------------------------- /resources/fonts/cmunso.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunso.ttf -------------------------------------------------------------------------------- /resources/fonts/cmunss.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunss.ttf -------------------------------------------------------------------------------- /resources/fonts/cmunsx.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunsx.ttf -------------------------------------------------------------------------------- /resources/fonts/droid-sans-fallback.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/droid-sans-fallback.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1mn-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1mn-bold.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1mn-light.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1mn-light.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1mn-medium.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1mn-medium.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1mn-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1mn-regular.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1mn-thin.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1mn-thin.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1p-black.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-black.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1p-bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-bold.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1p-heavy.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-heavy.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1p-light.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-light.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1p-medium.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-medium.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1p-regular-fallback.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-regular-fallback.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1p-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-regular.ttf -------------------------------------------------------------------------------- /resources/fonts/mplus-1p-thin.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-thin.ttf -------------------------------------------------------------------------------- /specifications/cmobase-v0.3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.3.pdf -------------------------------------------------------------------------------- /specifications/cmobase-v0.4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.4.pdf -------------------------------------------------------------------------------- /specifications/cmobase-v0.5.0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.5.0.pdf -------------------------------------------------------------------------------- /specifications/cmobase-v0.5.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.5.1.pdf -------------------------------------------------------------------------------- /specifications/cmobase-v0.5.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.5.2.pdf -------------------------------------------------------------------------------- /specifications/cmobase-v0.6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.6.pdf -------------------------------------------------------------------------------- /specifications/cmobase-v1.0-rc1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v1.0-rc1.pdf -------------------------------------------------------------------------------- /specifications/cmobase-v1.0-rc2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v1.0-rc2.pdf -------------------------------------------------------------------------------- /specifications/cmobase-v1.0.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v1.0.1.pdf -------------------------------------------------------------------------------- /specifications/cmobase-v1.0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v1.0.pdf --------------------------------------------------------------------------------