├── .gitignore
├── Archive
├── .gitmodules
├── 2020-11-23_CMOs-TG--minutes--surrender.mediawiki
├── CMOs-proposal-spreadsheet.xlsx
├── Makefile
├── Original-README.md
├── Ri5-CMOs-proposal.html
├── Ri5-CMOs-proposal.pdf
├── admin
│ ├── README.admin.md
│ └── WIP
│ │ └── separating_riscv-CMOs_and_riscv-CMOs-discuss.md
├── agendas-and-minutes
│ ├── 2020-09-23_CMOs-TG_meeting
│ ├── 2020-09-23_CMOs-TG_meeting_out-of-cycle
│ ├── 2020-09-23_CMOs_TG_meeting
│ ├── 2020-09-28_CMOs-TG_meeting_recurring
│ ├── 2020-10-12_CMOs-TG_meeting
│ ├── 2020-10-26_CMOs-TG_meeting
│ ├── 2020-11-09_CMOs-TG_cache_index_ops
│ ├── 2020-11-23_CMOs-TG--agenda--surrender.mediawiki
│ ├── 2020-11-23_CMOs-TG--minutes--surrender.mediawiki
│ └── README.md
├── discussion-files
│ └── RISC_V_range_CMOs_bad_v1.00.pdf
├── hack-relative-URLs-in-github-project-main-repo.md
├── old-specs
│ ├── Zicmobase.adoc
│ └── background.adoc
└── wiki
│ ├── ======================================TOC-spacer.asciidoc
│ ├── Actual-CMO-Operations.md
│ ├── Administrivia---CMOS-TG.md
│ ├── Administrivia.md
│ ├── Agenda-for-CMOs-TG.md
│ ├── An-even-quicker-and-dirtier-summary-of-proposed-instruction-encodings-for-RISC-V-CMOs.md
│ ├── Arguments-against-address-range-CMO.AR.md
│ ├── Block-memory-operations---such-as-MEMSET-and-MEMCOPY.md
│ ├── CBO.UX-vs-CMO.ALL-vs-CMO.UR.asciidoc.md
│ ├── CMO-goals.md
│ ├── CMO-operation-list-for-encodings.md
│ ├── CMOs-(Cache-Management-Operations).md
│ ├── CMOs-Not-Based-on-Memory-Address.md
│ ├── CMOs-WG-Draft-Proposed-Charter.md
│ ├── CMOs-cut-across-many-fields.md
│ ├── CMOs-proportional-to-cache-size-rather-than-address-range.md
│ ├── CMOs-proposal.md
│ ├── Draft-CMO-proposals.md
│ ├── Example-of-Config-WG-charter.md
│ ├── Examples-of-other-Working-Group-charters.md
│ ├── Extended-CMO-types.md
│ ├── Fixed-Block-Size-Prefetches-and-CMOs.md
│ ├── Home.md
│ ├── How-to-search-this-wiki,-repo,-issues,-etc..md
│ ├── I-am-frustrated-that-we-are-going-around-in-circles--with-respect-to--modulation-of-CMOs.md
│ ├── ISSUE---process-migration-argues-for-whole-cache-invalidation-operations-and-against-the-partial-progress-loop-construct.md
│ ├── Instructions-that-Support-Partial-Progress.md
│ ├── Mailing-lists-interested-in-CMOs.md
│ ├── Makefile.OBSOLETE
│ ├── Mandatory-versus-Optional-CMOs,-PREFETCHES,-and-CPHs.md
│ ├── Meeting-11-09-2020.md
│ ├── Microarchitecture-Structure-Range-CMOs.md
│ ├── Non-CMO-stuff-to-be-deleted.md
│ ├── Overview-of-CMO-operations.md
│ ├── Privilege-for-CMOs.md
│ ├── Problems-editing-GitHub-wiki-using-speech-recognition.md
│ ├── Quantization,-dequantization,-and-interpolation-instructions--for-DL,-math,-etc..md
│ ├── Quick-and-Dirty-Proposal-for-RISC-V-CMOs.md
│ ├── Quick-and-dirty-list-of-Actual-CMOs.md
│ ├── RISC-V-CMO-proposal.md
│ ├── RISC-V-needs-CMOs,-and-hence-a-CMO-Working-Group.md
│ ├── RISC-V-standard-disclaimer.md
│ ├── Ri5-CMOs-proposal.asciidoc
│ ├── STATUS---almost-done---maybe.md
│ ├── Sharing-Drawings-and-Diagrams.md
│ ├── SourceDest-to-support-Exception-Transparency.asciidoc
│ ├── TOC---Table-of-Contents.md
│ ├── Terminology-for-instructions-that-manage-microarchitecture-state-such-as-caches,-prefetchers-and-predictors.md
│ ├── Variable-Address-Range-CMOs.md
│ ├── Variable-Address-Range-Instructions-like-CMOs,-MEMSET,-MEMZERO,-and-MEMCOPY.md
│ ├── Virtual-or-Physical-CMO-instruction-flavor.md
│ ├── Why-CMOs.xlsx-was-written-in-Excel.md
│ ├── Wiki-and-Repo-crosslink-issues.md
│ ├── _Footer.md
│ ├── _Sidebar.md
│ ├── cmo_type-CMO-instruction-flavor.md
│ ├── draft-CMO-domains-and-levels.asciidoc
│ ├── draft-CMO-instruction-formats.asciidoc
│ ├── draft-CMO-issues.asciidoc
│ ├── draft-CMO-type-spreadsheet.asciidoc
│ ├── draft-Fixed-Block-Size-Prefetches-and-CMOs.asciidoc
│ ├── draft-Microarchitecture-Cache-Index-CMO.UR---CBO.UX.asciidoc
│ ├── draft-Privilege-for-CMOs.asciidoc
│ ├── draft-Variable-Address-Range-CMOs.asciidoc
│ ├── draft-actual-CMO-operations.asciidoc
│ ├── draft-microarchitecture-timing-state-flushes.asciidoc
│ ├── files
│ ├── Ri5-CMOs-proposal.html
│ └── Ri5-CMOs-proposal.pdf
│ ├── generated-HTML-and-PDF-for-CMOs-proposal.md
│ ├── hack-relative-URLs-in-github-project-wiki-repo.md
│ ├── in-band-tagging-pointers.md
│ ├── interception,-modulation,-and-mapping-of-CMOs.md
│ ├── issues-wrt-repo-and-wiki-split.md
│ ├── microarchitecture-range-loop.asciidoc
│ ├── skins
│ ├── asciidoc-classic-ag.css
│ └── asciidoctor.css
│ ├── techpubs-info.asciidoc
│ ├── techpubs.asciidoc
│ ├── terminology-notation.asciidoc
│ ├── variable-address-range-loop.asciidoc
│ ├── voice-typos-editing-this-wiki.md
│ └── wiki-stuff---Notes-on-GitHub-wiki-pages.md
├── CMO-Phase-1-Scope.md
├── LICENSE.md
├── Makefile
├── Makefile.pwsh
├── README.md
├── cmobase
├── Zicbom.adoc
├── Zicbop.adoc
├── Zicboz.adoc
├── acknowledgements.adoc
├── background.adoc
├── cache_behavior.adoc
├── cmobase.adoc
├── colophon.adoc
├── csr_state.adoc
├── extensions.adoc
├── images
│ ├── RISC-V-logo.svg
│ └── circuit.png
├── insns
│ ├── cbo.clean.adoc
│ ├── cbo.flush.adoc
│ ├── cbo.inval.adoc
│ ├── cbo.zero.adoc
│ ├── prefetch.i.adoc
│ ├── prefetch.r.adoc
│ └── prefetch.w.adoc
├── instructions.adoc
└── introduction.adoc
├── resources
├── fonts
│ ├── DroidSans-Bold.ttf
│ ├── DroidSans.ttf
│ ├── cmunbbx.ttf
│ ├── cmunbmo.ttf
│ ├── cmunbmr.ttf
│ ├── cmunbso.ttf
│ ├── cmunbtl.ttf
│ ├── cmunbto.ttf
│ ├── cmunbxo.ttf
│ ├── cmunsi.ttf
│ ├── cmunso.ttf
│ ├── cmunss.ttf
│ ├── cmunsx.ttf
│ ├── droid-sans-fallback.ttf
│ ├── mplus-1mn-bold.ttf
│ ├── mplus-1mn-light.ttf
│ ├── mplus-1mn-medium.ttf
│ ├── mplus-1mn-regular.ttf
│ ├── mplus-1mn-thin.ttf
│ ├── mplus-1p-black.ttf
│ ├── mplus-1p-bold.ttf
│ ├── mplus-1p-heavy.ttf
│ ├── mplus-1p-light.ttf
│ ├── mplus-1p-medium.ttf
│ ├── mplus-1p-regular-fallback.ttf
│ ├── mplus-1p-regular.ttf
│ └── mplus-1p-thin.ttf
└── themes
│ └── risc-v_spec-pdf.yml
└── specifications
├── cmobase-v0.3.pdf
├── cmobase-v0.4.pdf
├── cmobase-v0.5.0.pdf
├── cmobase-v0.5.1.pdf
├── cmobase-v0.5.2.pdf
├── cmobase-v0.6.pdf
├── cmobase-v1.0-rc1.pdf
├── cmobase-v1.0-rc2.pdf
├── cmobase-v1.0.1.pdf
└── cmobase-v1.0.pdf
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.html
3 | diag-*
4 | *.txt
5 | *~
6 | \#*\#
7 | build/
8 | .asciidoctor
9 | cmobase-*.pdf
10 | autogenerated/
--------------------------------------------------------------------------------
/Archive/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "riscv-CMOs.wiki"]
2 | path = riscv-CMOs.wiki
3 | url = git@github.com:riscv/riscv-CMOs.wiki.git
4 | [submodule "riscv-CMOs-discuss"]
5 | path = riscv-CMOs-discuss
6 | url = git@github.com:riscv/riscv-CMOs-discuss.git
7 |
--------------------------------------------------------------------------------
/Archive/2020-11-23_CMOs-TG--minutes--surrender.mediawiki:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Archive/CMOs-proposal-spreadsheet.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/Archive/CMOs-proposal-spreadsheet.xlsx
--------------------------------------------------------------------------------
/Archive/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for riscv/riscv-CMOs
2 |
3 | # Main purposes
4 |
5 | # (1) Encapsulate knowledge of how to update submodule after git clone
6 | # see make rule git-post-clone
7 |
8 | # (2) Generate HTML and PDF documentation from wiki pages
9 | # see make rules such as generated-docs and open-docs-in-browser
10 |
11 | ########################################################################################
12 |
13 | default: open-docs-in-browser
14 |
15 | always:
16 |
17 | ########################################################################################
18 | # As of <2020-08-12 Wednesday, August 12, WW33> first main purpose is
19 | # to encapsulate knowledge of how to update submodule for wiki
20 | # corresponding to repo (since document may be produced from wiki
21 | # files)
22 |
23 | # git stuff
24 | # mainly to remind me about git submodule commands
25 | # that I do not know by heart
26 | # (and think are kluges anyway)
27 |
28 | # submodule
29 | # git submodule add git@github.com:riscv/riscv-CMOs.wiki.git
30 | # TBD: this is imperfect: clone of a clone does not clone clone's submodule(s)
31 |
32 | # run `make git-post-clone' right after git clone of Ri5-stuff
33 | # to update submodules (currently only Ri5-stuff.wiki)
34 | git-post-clone:
35 | git submodule init
36 | git submodule update
37 |
38 |
39 | git-diff:
40 | git diff --submodule
41 |
42 |
43 | # echo DEBUG: - I'm not really debugging
44 | # I just want these messages colorized (in emacs)
45 | # and I already have colorization cvode for DEBUG:.*
46 | # whereas my attempt at colorizing INFO:.* failed
47 | # <2020-05-14>
48 | git-status:
49 | @echo DEBUG: $C;git status
50 | @(echo DEBUG: $C/$W ;cd $W; git status)
51 |
52 |
53 |
54 |
55 |
56 |
57 | ########################################################################################
58 | # As of Wednesday, May 6, 2020-05-06 a main purpose is to run a
59 | # command to expand the AsciiDoc include directives so that you can
60 | # get a better idea what will actually look like
61 |
62 | # currently AsciiDoc,since supposedly RISC-V standard. I would prefer
63 | # Markdown or RST (since AsciiDoc does not play well on GitHub wiki))
64 |
65 | # TBD: ifdef for Linux as well as WindowsCygwin
66 | # TBD: run GitHub server side as well as checked out workspace
67 |
68 | HTML_VIEWER=/cygdrive/c/Windows/explorer.exe
69 | PDF_VIEWER=/cygdrive/c/Windows/explorer.exe
70 | WEB_VIEWER=start URL...
71 |
72 | C:=$(shell basename `pwd`)
73 |
74 |
75 | W=riscv-CMOs.wiki
76 |
77 | # TBD: auto-deduce wiki submodule directory
78 |
79 | # TBD: make this into a project in a box template
80 |
81 |
82 |
83 | # checking in generated docs
84 | # a) to make visible on web/GitHub
85 | # b) because toolchain fragile
86 | # TBD: checking in redundant copies, in wiki and parent,
87 | # mostly because belongs and should be versioned with wiki,
88 | # but displays only in parent.
89 |
90 | git-ci: git-ci-generated-docs
91 | @echo 'Only doing git-ci-generated-docs'
92 | @echo 'checkin non-generated stuff by hand'
93 |
94 | M='committing generated HTML and PDF files'
95 | git-ci-generated-docs:
96 | -git ci -m $M Ri5-CMOs-proposal.html
97 | -git ci -m $M Ri5-CMOs-proposal.pdf
98 | -(cd $W;git ci -m $M Ri5-CMOs-proposal.html)
99 | -(cd $W;git ci -m $M Ri5-CMOs-proposal.pdf)
100 |
101 |
102 |
103 | # Make and display proposal draft
104 |
105 | open-docs-in-browser: open-local-docs-in-browser
106 | open-local-docs-in-browser: open-html-in-browser open-pdf-in-browser
107 |
108 | open-github-docs-in-browser:
109 |
110 |
111 | open-html-in-browser: $W/Ri5-CMOs-proposal.html
112 | @# KLUGE: Windows HTML viewer does not understand / paths
113 | @# either need to convert / --> /, cd, or cygpath
114 | -(cd $W;$(HTML_VIEWER) Ri5-CMOs-proposal.html)
115 | open-pdf-in-browser: $W/Ri5-CMOs-proposal.pdf
116 | @# KLUGE: Windows PDF viewer does not understand / paths
117 | @# either need to convert / --> /, cd, or cygpath
118 | -(cd $W;$(PDF_VIEWER) Ri5-CMOs-proposal.pdf)
119 |
120 | ASCIIDOCTOR=/home/glew/bin/asciidoctor
121 | ASCIIDOCTOR_PDF=/home/glew/bin/asciidoctor-pdf
122 | #TBD: Move asciidoctor to standard location, not my ~glew user directory
123 | # TBD: Linux tools
124 |
125 | generated-docs: ./Ri5-CMOs-proposal.html ./Ri5-CMOs-proposal.pdf
126 |
127 | ./Ri5-CMOs-proposal.html $W/Ri5-CMOs-proposal.html: always $W/Ri5-CMOs-proposal.asciidoc $W/*.asciidoc
128 | $(ASCIIDOCTOR) -b html $W/Ri5-CMOs-proposal.asciidoc -o $W/Ri5-CMOs-proposal.html
129 | cp $W/Ri5-CMOs-proposal.html .
130 | ./Ri5-CMOs-proposal.pdf $W/Ri5-CMOs-proposal.pdf: always $W/Ri5-CMOs-proposal.asciidoc $W/*.asciidoc
131 | $(ASCIIDOCTOR_PDF) -b pdf $W/Ri5-CMOs-proposal.asciidoc -o $W/Ri5-CMOs-proposal.pdf
132 | cp $W/Ri5-CMOs-proposal.pdf .
133 |
134 | # TBD: should I eliminate one of the generated .html files - likely will cause problems since redundant
135 | # But... I really want to have the generated HTML in the wiki, not the parent.
136 |
137 |
138 | ########################################################################################
139 |
140 | # Make utilities
141 |
142 | # TBD: make clean ... cleanest
143 | # TBD: BOM (Bill of Materials)
144 |
145 | # While it would be nice to have real tags for the documents, and wiki pages, e.g. for sections
146 | # at the moment all I am really using the tags for is to do global tags-query-replace in emacs
147 | # so I only need the filenames, not any patterns.
148 |
149 | # TBD: Some will object to such make targets for editing convenience,
150 | # especially for a minority editor like emacs. When there is proper
151 | # Makefile BOM support these targets may no longer be necessary, but
152 | # it would be better if they were augmented to provide more complete
153 | # tag functionality.
154 |
155 | tags-ad TAGS: always
156 | cp /dev/null TAGS
157 | etags --append --langdef=asciidoc --langmap=asciidoc:.asciidoc --regex-asciidoc='/^=+\\(.*\\)/\\1/' $W/*.asciidoc
158 |
159 | tags tags-all: tags-ad
160 | etags --append --langdef=markdown --langmap=markdown:.md --regex-markdown='/^=+\\(.*\\)/\\1/' $W/*.md
161 |
--------------------------------------------------------------------------------
/Archive/Original-README.md:
--------------------------------------------------------------------------------
1 | # riscv-CMOs
2 |
3 | Cache Management Operations (CMOs) for RISC-V
4 |
5 | * Created by: Stephano Cetola
6 | * Requested by: Andy Glew
7 |
8 |
9 | * TBD: working group
10 | * TBD: riscv mailing list
11 |
12 | ## Charter
13 |
14 | The Cache Management Operation, or CMO, task group intends to define data cache
15 | management operations for the RISC-V architecture, providing support for use-cases
16 | such as software-managed cache coherence, power management, persistent storage,
17 | security, and RAS. In the process, a data cache model will be developed, and the
18 | interactions of CMOs with the memory ordering model will be specified. In addition,
19 | the CMO specification will attempt to minimize the requirements on system design
20 | and will not prescribe a specific cache state model or cache coherence protocol.
21 | The CMO TG will coordinate with other RISC-V committees and task groups and with
22 | external parties to ensure consistency and interoperability with respect to any
23 | cache-related features and extensions.
24 |
25 | ## related GitHub repos and wikis for CMOs TG
26 |
27 | * top: https://github.com/riscv/riscv-CMOs
28 | * for admin stuff like minutes, drafts
29 | * top-wiki: https://github.com/riscv/riscv-CMOs/wiki
30 | * discuss: https://github.com/riscv/riscv-CMOs-discuss
31 | * members can add/change
32 | * mainly in wiki: top-wiki: https://github.com/riscv/riscv-CMOs-discuss/wiki
33 |
34 | * git clone --recurse git@github.com:riscv/riscv-CMOs.git
35 | =>
36 | ```
37 | $> tree -d riscv-CMOs/
38 | riscv-CMOs/
39 | |-- admin
40 | |-- agendas-and-minutes
41 | |-- discussion-files
42 | |-- riscv-CMOs-discuss
43 | | |-- discussion-files
44 | | `-- riscv-CMOs-discuss.wiki
45 | `-- riscv-CMOs.wiki
46 | |-- files
47 | `-- skins
48 | ```
49 |
50 | Note that riscv-CMOs/wiki and riscv-CNOs-discuss/wiki are duplicated (artifact of original creation 2020-11-13, should be ceaned up soon),
51 | as are some reated files referred to by wiki.
52 |
53 | ## Wiki-centric
54 |
55 | The active work on the proposal is in the wiki.
56 | Eventually it may be moved to the main repository,
57 | although there are tools to assemble the actual proposed spec for
58 | publication from the wiki directly.
59 | Such tools,
60 | also things like highlight unfinished parts of the proposal on the wiki,
61 | will be placed in the main repository, i.e. here.
62 |
63 | Q: is there a way to treat this project on github, both "main git repo" and "wiki git repo", as the same object? Otherwise will just check out the reps separately, and coordinate.
64 | * 5/7/2020: set up wiki as a submodule of repo
65 |
66 | ## Key wiki pages [@](https://github.com/riscv/riscv-CMOs/wiki)
67 | * [RISC-V needs CMOs, and hence a CMO Working Group](https://github.com/riscv/riscv-CMOs/wiki/RISC-V-needs-CMOs%2C-and-hence-a-CMO-Working-Group)
68 | * email seeking WG approval and call for participation
69 | * [CMOs WG Draft Proposed Charter](https://github.com/riscv/riscv-CMOs/wiki/CMOs-WG-Draft-Proposed-Charter)
70 | * [Draft CMO proposals](https://github.com/riscv/riscv-CMOs/wiki/Draft-CMO-proposals)
71 | (for that matter, also the other parts of the project, like issues)
72 | * [[generated-HTML-and-PDF-for-CMOs-proposal]] (local)
73 | * on web: https://github.com/riscv/riscv-CMOs/wiki/generated-HTML-and-PDF-for-CMOs-proposal
74 |
75 |
76 |
77 |
78 |
79 | ## Links
80 |
81 | Project on GitHib:
82 | * https://github.com/riscv/riscv-CMOs
83 | * https://github.com/riscv/riscv-CMOs/wiki
84 | * TBD: links that work when checked out locally as well as on GitHub
85 |
86 | Relative, when checked out
87 | * relative href="../../wiki",
88 | * if you have cloned both project git repos, code and wiki
89 | * this may link to your local clone, rather than back to github
90 |
91 |
92 | ## Originally from
93 |
94 | * https://github.com/AndyGlew/Ri5-stuff
95 | * https://github.com/AndyGlew/Ri5-stuff.wiki
96 |
--------------------------------------------------------------------------------
/Archive/Ri5-CMOs-proposal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/Archive/Ri5-CMOs-proposal.pdf
--------------------------------------------------------------------------------
/Archive/admin/README.admin.md:
--------------------------------------------------------------------------------
1 | riscv-CMOs/admin/README.admin
2 |
3 | this directory for "admin", "sysadmin", "webmastery" stuff
4 |
5 | for the RISC-V CMOs TG GitHub repos
6 |
7 | https://github.com/riscv/riscv-CMOs
8 | and
9 | https://github.com/riscv/riscv-CMOs-discuss
10 |
11 | including their associated wikis, etc.
12 |
13 |
14 | See subdirectories such as WIP (Work In Progress)
15 | and (eventually) COMPETED or the like.
16 |
--------------------------------------------------------------------------------
/Archive/admin/WIP/separating_riscv-CMOs_and_riscv-CMOs-discuss.md:
--------------------------------------------------------------------------------
1 | Whining:
2 |
3 | Should this WIP entry be inthe isse tracker, wiki, or repo admin/WIP? yes/no/maybe to all.
4 |
5 | Damn, I hate GitHub's limitations!
6 |
7 | The wiki doesn't have subdirectories, so gets messy.
8 | The repo supports Markdown, but not wiki style [link targets that do not exist yet].
9 | The issue tracker gets messy, like so many issue trackers.
10 | Oh, for twiki or foswiki... Or for MEMEX, as we may think
11 |
12 | ---
13 |
14 | Originally: single repo+wiki riscv-CMOs, containing both proposal and wiki.
15 |
16 | Want:
17 | * TG members able to post to wiki
18 | * but draft proposals, minutes, etc., writeable by opnly a few people.
19 |
20 | Since GitHub access control seems to ve per-repo, we are splitting the original repo into two:
21 |
22 | riscv-CMOs
23 | * to contain protected stuff like drafys and minutes
24 |
25 | riscv-CMOs-discuss
26 | * writeable by TG members
27 | * e.g. wiki
28 | * TBD: publish how TG members can gain wiki post access
29 |
30 | # DONE
31 |
32 | 2020-11-09 ... 10: Ag requested new repo, waffled on names
33 |
34 | 2020-11-11: Stephano Cetola set up new repo, named the riscv-CMOs-members
35 |
36 | 2020-11-12:
37 | * Ag renamed it riscv-CMOs-dscuss
38 | * duplicated old->new, both repo and wiki (full git history)\
39 | * fixed submodules so that old repo->old wiki, new rep->new wiki
40 |
41 | # To Do
42 |
43 | * Split conteht - deleting and/or disentangling -discuss and non-discuss contet
44 |
45 | * most repo files will stay in riscv-CMOs, non-discuss
46 | * leave README, etc., in riscv-CMOs-discuss repo and wiki pointing to the old repo (and vice versa)
47 |
48 | * wiki files
49 | * some will stay in the old risc-CMOs wiki, some in the new
50 | * some will need to be edited, fxed up, disentangled
51 |
52 | * issue tracker
53 | * fortunately did not propagate when repo+wiki hostory transferred.
54 | * good fr this task, but someties wanted in other stuations.
55 |
56 | * draft proposal
57 | * currently in wiki - dupred old and new
58 | * verify can still build in old place
59 |
60 | * decide if sgoud be removed from new, and from wiki overall
61 | * ==> will break wiki links all ovrr
62 | * GitHub wiki nopt good for trackig wiki page rtenaing and deletions :-(
63 |
64 | * once the badsic admin stuff is removed from riscv-CMOs-dscuss, open it up to TG mermbes to use
65 | * publish how to get access to wiki
66 |
67 | * update crosslinks in other CMO TG places
68 | * old and new repo + wiki on GitHub
69 | * RISC-V Confluence wiki
70 | * RISC-V Googke drive pages
71 | * groups.io mailing list pages, files, wiki etc. (yet anoter wiki :-( )
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
--------------------------------------------------------------------------------
/Archive/agendas-and-minutes/2020-09-23_CMOs-TG_meeting:
--------------------------------------------------------------------------------
1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date corresponds to the filename.
2 |
3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes.
4 |
5 | TBD: move the actual minutes and notes here, and/or link to else-wherever, e.g. email archives.
6 |
--------------------------------------------------------------------------------
/Archive/agendas-and-minutes/2020-09-23_CMOs-TG_meeting_out-of-cycle:
--------------------------------------------------------------------------------
1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date corresponds to the filename.
2 |
3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes.
4 |
5 | TBD: move the actual minutes and notes here, and/or link to else-wherever, e.g. email archives.
6 |
--------------------------------------------------------------------------------
/Archive/agendas-and-minutes/2020-09-23_CMOs_TG_meeting:
--------------------------------------------------------------------------------
1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date corresponds to the filename.
2 |
3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes.
4 |
5 | TBD: move the actual minutes and notes here, and/or link to else-wherever, e.g. email archives.
6 |
--------------------------------------------------------------------------------
/Archive/agendas-and-minutes/2020-09-28_CMOs-TG_meeting_recurring:
--------------------------------------------------------------------------------
1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date corresponds to the filename.
2 |
3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes.
4 |
5 | TBD: move the actual minutes and notes here, and/or link to else-wherever, e.g. email archives.
6 |
--------------------------------------------------------------------------------
/Archive/agendas-and-minutes/2020-10-12_CMOs-TG_meeting:
--------------------------------------------------------------------------------
1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date corresponds to the filename.
2 |
3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes.
4 |
5 | TBD: move the actual minutes and notes here, and/or link to else-wherever, e.g. email archives.
6 |
--------------------------------------------------------------------------------
/Archive/agendas-and-minutes/2020-10-26_CMOs-TG_meeting:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Archive/agendas-and-minutes/2020-11-09_CMOs-TG_cache_index_ops:
--------------------------------------------------------------------------------
1 | This is a placeholder for minutes wrt the meeting of the CMOs TG whose date correspionds to the filename.
2 |
3 | Placeholder created as soon as I saw announcement of Ri5 standard for TG minutes.
4 |
5 | TBD: move the actual minutes and notes here, and/or link to whereer elsewhere, e.g. email archives.
6 |
--------------------------------------------------------------------------------
/Archive/agendas-and-minutes/2020-11-23_CMOs-TG--agenda--surrender.mediawiki:
--------------------------------------------------------------------------------
1 | Agenda suggested by DK and Ag for meeting.
2 |
3 | [[2020-11-23_CMOs-TG--agenda--surrender.mediawiki|agenda]]
4 | [[2020-11-23_CMOs-TG--minutes--surrender.mediawiki|minutes]]
5 |
6 | __TOC__
7 |
8 | = Item #0 - administrivia =
9 |
10 | Separate GitHub repos have been set up
11 | * https://github.com/riscv/riscv-CMOs
12 | * https://github.com/riscv/riscv-CMOs-discuss
13 |
14 | To post to -discuss you need a GitHub accouunt, and then email Ag or DK.
15 |
16 | Repos originally cloned, WIOP to diverge.
17 |
18 |
19 |
20 | = Item #1 - Surrender wrt Range Oriented CMO.UR and CMO.AR =
21 |
22 | In the interests of making progrsss on other issues, and unblocking I/D consistency (DW) J-extension
23 |
24 | Am reworking spec rework spec for CBO.EA, CBO.UX, CMO.ALL
25 |
26 | Will leave CMO.UR and CMO.UX in for now, but heavily flag - just to collect input.
27 |
28 | Encodings: would like to set RD=X0, to permit CMO.UR or CMO.AR in future (if we eventually go that way, as Ag/KA/AW hope.)
29 |
30 | TBD: discovery.
31 |
32 | Fall-out: CMO.ALL perf
33 |
34 | == Item #1' - different block sizes for INVAL and FLUSH ==
35 |
36 | 4-sector cache -- CBO.INVAL.{EA,UX} could easily be faster than CBO.FLUSH.{EA,UX}.
37 | Performance matters... especially if used for security
38 |
39 | DK thinks Ag is just trying to sneak ranges in by the back door.
40 |
41 |
42 |
43 | == Item #2 - Use case for Power Mgmt/Persistence ==
44 |
45 | Flush vs Clean ... to whatever level will persist
46 | * e.g. to battery backed DRAM
47 | * e.g. to persistence cache, if removing power from some (but not all) CPUs
48 | * NVRAM
49 | ** multiple levwels... 1st copy, RAID, Remote
50 | **
51 |
52 |
53 | Q: does this mean more levels than
54 | * POC(P*) <-- SW coherency
55 | * POC(P*,IO*) <-- IO
56 |
57 |
58 | = Bonus Time =
59 |
60 | == legal transformations of CMOs ==
61 |
62 | Is it legal for an implementation to transform CLEAN --> FLUSH?
63 | * AFAIK yes
64 |
65 | Is it legal for an implementation to transform INVAL --> FLUSH?
66 | * Yes, for DMA I/O
67 | * Defeats purpose for performance.
68 |
69 | Separate instances within use case
70 | * Flush dirty data, so that it doesn't overwrite non-coherent writes from others
71 | ** FLUSH
72 | ** INVAL - if you don't care about data in cache
73 | *** Advanced: Safer Inval / Clean and Zero
74 | * Eliminate Stale Data (should be no dirty data)
75 | ** CLEAN
76 | ** INVAL - if you don't care about data in cache
77 | ** Advanced: INVAL-CLERAN (not dirty)
78 |
79 | On a WT cache
80 | * FLUSH == INVAL
81 | * CLEAN == NOP
82 |
83 | When using CMOs for performance, can they be ignored?
84 | * A: Yes? ... but would require separate instruction encodings
85 | * advisory/hint vs mandatory
86 |
87 | == local/global ==
88 | * "local" => flush path from P to $ specified...
89 | * "global" == flush all in coherence domain
90 |
91 | * obviously, noncoherent => "local"
92 | * obviously(?), noncoherent I/O use cases => global (flush all in coherence domain)
93 |
94 | Q: do we provide separate encodings? ... for local/global
95 |
96 | Who might ever want to have both global and local?
97 | * Global: NC I/O
98 | * Local:
99 | ** performance (performance wants full control)
100 | *** e.g. producer/consumer performance through shared coherent cache
101 |
102 | Observation: no local operations on coherent system => non-coherent may be faster
103 | * Glew opinion:
104 | ** I would like to make coherent as fast as possible, approaching non-coherent in as many cases as possible
105 | ** I would like to remove performance as a reason not to do coherent.
106 |
--------------------------------------------------------------------------------
/Archive/agendas-and-minutes/README.md:
--------------------------------------------------------------------------------
1 | Minutes for the RISC-V CMOs TG
2 | Cache Management Operations
3 |
4 | 2020-11-11: Ri5 TG leadership announced policy/standard for storing meeting minutes so that people can find them easily.
5 | * in GitHub repo for TG
6 | * not wiki :-(
7 | * subfolder agendas-and-minutes
8 | * datestamped filename prefix YYYY-MM-DD
9 |
10 | Further rules for this CMOs TG:
11 |
12 | * YYYY-MM-DD_CMOs-TG_meeting
13 | * _meeting, to distinguish from other items that may be stored here
14 | * _meeting, not _minutes, because more often informal and incomplete notes rather than formal and complete minutes
15 | * YYYY-MM-DD_CMOs-TG_agenda
16 | * if we announce an agenda in advance - if by email, please also record here
17 | * it is OK to place both agenda and notes/minutes in the same _meeting file
18 | * YYYY-MM-DD_CMOs-TG_meeting_OTHER_STUFF
19 | * can add OTHER_STUFF, like main topic, presentation, to datestamped filename - useful in browsing
20 |
21 | * if more than one meeting in a day, add timestamp YYYY-MM-DD_hhmm_CMOs-TG_meeting...
22 | * still sorts with other datestamped but not timestamped filenames
23 | * not ISO8601 - e.g. colons : not legal in Windows filenames. T reduces legibility.
24 | * hhmm - not hh only - followuing at least that part of ISO8601
25 |
26 | * Examples:
27 | *
28 |
29 | * Least Common Denominator filenames
30 | * brief
31 | * alpha (a-z A-Z), numeric (0-9), punctuation -_ ...
32 | * avoid whitespace, non-windows characters <>:"/\|?*
33 | * typically use _ or - instead of whitespace
34 |
35 | TBD: moved/link elsewhere more detailed LCD filenames stuff
36 | * why
37 | * Users/members have already had problems cloning repo filenames containing charcagers like : that are illegal on Windows
38 | * Filename length limits: ??? - keep short, but not too short ...
39 | * Avoid characters
40 | * Windows: <>:"/\|?*
41 | * Convenience: no whitespace (including space and newline)
42 | * use underscore _ where whitespace would be natural
43 | * avoid the usual special filenames such as . or .., initial ~, ...
44 | * try to avoid filenames that can be used for exploits, like `"'{}[]() ...
45 | * regrets
46 | * common punctuation can really improve readability - but is often problem. E.g. ?!()[]{}...
47 |
48 |
49 |
50 | Prior minutes for this TG are in email, and certain other places.
51 |
52 | 2020-11-11: creating placeholders for old minutes - basically empty files
53 | * recording that meeting was held
54 | * TBD: copy/move, and/or provide links to existing minutes/material
55 |
--------------------------------------------------------------------------------
/Archive/discussion-files/RISC_V_range_CMOs_bad_v1.00.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/Archive/discussion-files/RISC_V_range_CMOs_bad_v1.00.pdf
--------------------------------------------------------------------------------
/Archive/hack-relative-URLs-in-github-project-main-repo.md:
--------------------------------------------------------------------------------
1 | This github project has at least two git repos, the main repo and the wiki repo.
2 | * main: https://github.com/AndyGlew/Ri5-stuff.git
3 | * wiki https://github.com/AndyGlew/Ri5-stuff.wiki.git
4 |
5 |
6 | I want to use relative links between the workspaces that ordinarily correspond to these repos,
7 | * both on the github website and when I have cloned elsewhere
8 | * e.g. so that I can clone both together and be consistent
9 | * TBD: exact (automated) procedure to clone both repos and stay relatively consistent
10 |
11 | Markdown syntax relative links fail:
12 | * [[..]] - \[[..]]
13 | * [[../../wiki]] - \[[../../wiki]]
14 |
15 | Trying HTML relative links:
16 | * FAIL: href="wiki" relative - \href="wiki" relative]\
17 | * fails because it resolves to https://github.com/AndyGlew/Ri5-stuff/blob/master/wiki,
18 | * i.e. the relative position is not https://github.com/AndyGlew/Ri5-stuff but is blob/master/wiki
19 | * which tells us what we need to know
20 | * href="." relative underneath that.
21 | * href=".." relative - I doubt that such an "escape upwards" will work, but... WOW! it works
22 | * href="../.." relative
23 | * href="../../wiki" relative YIPPEE! can link from main to wiki
24 | * href="../../.." relative https://github.com/AndyGlew/,
25 | * href="../../../.." relative https://github.com,
26 |
27 | I am so used to websites not allowing ascending relative components in URLs that I wonder if there is a security hole here... Should not be as long as cannot actually escape an areas mapped to the logged in user or guest.
28 |
29 | Recording this in two places:
30 | * main: https://github.com/AndyGlew/Ri5-stuff/blob/master/hack-relative-URLs-in-github-project-main-repo.md
31 | * wiki: https://github.com/AndyGlew/Ri5-stuff/wiki/hack-relative-URLs-in-github-project-wiki-repo
32 | TBD: can I CSE this stuff, transclude, to reduce duplication?
33 |
34 |
35 | Bottom line: relative links
36 | * from wiki
37 | * to project "root" from wiki: href=".."
38 | * to main from wiki: href="../blob/master/README.md"
39 | * to user "root" from wiki: href="../.."
40 | * from main
41 | * to project "root" from main: href="../.."
42 | * to user "root" from main: href="../../.."
43 | * to wiki from main: href="../../wiki"
44 |
--------------------------------------------------------------------------------
/Archive/wiki/======================================TOC-spacer.asciidoc:
--------------------------------------------------------------------------------
1 |
2 | // strange name ================================TOC-space.asciidoc
3 | // so that it appears as visual space in the file that does the includes of the subfiles.
4 |
5 |
6 | :leveloffset: 0
7 |
8 |
9 | :!sectnums:
10 |
11 |
12 | == .
13 |
14 |
15 | // this section serves solely as a space in the TOC (Table Of Contents), between numbered content and appendixes
16 |
17 | :sectnums:
18 |
--------------------------------------------------------------------------------
/Archive/wiki/Actual-CMO-Operations.md:
--------------------------------------------------------------------------------
1 | #
2 |
3 | The spreadsheet [CMOs.xlsx](https://github.com/riscv/riscv-CMOs/commits/master/CMOs.xlsx)
4 | is a list of some of the desired CMO operation. It is by no means a complete list.
5 |
6 | The version uploaded as of 2020-04-30_08.04.31 (TBD: provide link to GitHub version) counts these.
7 | These counts suggest the regular format In the next [section](#Regular-format)
8 |
9 | # Regular format
10 |
11 | | por | bits | name | description |
12 | | --- | --- | --- | --- |
13 | | n? | 1 | LG | 0=>local, 1=> global |
14 | | y | 3 | scope | e.g. cache level to flush to although sometimes not strictly a level (8 encodings used) |
15 | | y | 4 | cmop | operation type ?? encodings used |
16 | | n | 1 | sec | security related, 0=no, 1=> flush predictors and prefetchers |
17 |
18 | Issue:
19 | * LG: should we just assume that all CMOs must be "shoot down", applicabe to all of a coherence domain?
20 | * saves one bit, at the cost of performance for some "advanced" cases (like some supercomputers)
21 | * sec: do we need the sec bit for address range CMO.VAR, or only for "whole cache"?
22 | * this saves one bit for the most expensive CMO.VAR instruction format
23 |
24 | Bottom line: we can fit into 7 bits by making compromises, 8 bits fairly easily, although 9 bits is all rows abocve, and I would prefer 10 bits.
25 |
26 | Not orthogonal: a very few operations require write permission, but not enough to warrant an orthogonal bit.
27 |
28 | ## Scope encodings
29 |
30 | We get away with "only" eight encodings, three bits, by overloading - using the same encoding to indicate slightly different things for outbound operations (pulls/flushes) and inbound operations.
31 |
32 | | for push CMOs | for pull CMOs (prefetchesd) |
33 | | --| -- |
34 | | to pou(I$,D$) | to I$ |
35 | | to pou coherent processor caches | to L1 D$
36 | | to pou non-coherent processor caches | to L2$ pou(I,D )
37 | | to pou non-coherent I/O | to L3$
38 | | to ordinary DRAM | from NVRAM to DRAM
39 | | to battery backed up DRAM
40 | | to NVRAM ( first point of persistence)
41 | | to all NVRAM (full persistence) |
42 |
43 | we can of course argue about details, to try to reduce the count
44 | * do we need to have two points of NVRAM persistence, first and all?
45 | * e.g. Keith Packard
46 | * e.g. HP "Machine" (TBD: ref)
47 | * do we need to distinguish DRAM from battery backed DRAM
48 | * there are existence proofs, but we don't necessarily need to order them
49 | * do we need to distinguish processor coherence from I/O coherence?
50 | * could I/O coherence be just DRAM
51 |
52 | But at the very least, I am sure that most people agree that we need at least four scopes, and probably more. => 3 bits. My biggest concern is that we should probably provide four bits rather than three.
53 |
54 | NOT HANDLED:
55 | * Prefetch operations might want to "skip" certain cache levels
56 | * e.g. fetch into L1 but no other levels
57 | * e.g. fetch into L1 and L3 but not L2
58 | * Prefetch operations that want to stop - may want only to prefetch from into L1 from L2 or L3,
59 | but not from DRAM if missing L3 (to avoid saturating DRAM bus)
60 | * CMOs that specify remote caches
61 | * e.g. P1 executes a CMO to prefetch/flush into some other processor P2's cache
62 | * like ARM stashing
63 |
64 |
65 | ## [[CMO operation list for encodings]]
66 |
67 | Placing this into a separate wiki page to make the table easier to edit.
68 | * too hard to edit in long page
69 | * would use section editing, except that GitHub wiki does not have that
70 | * would use transclusion, except GitHub wiki does not have that
71 |
72 | The table uses B+x? syntax to indicate priority classes
73 |
74 | | Count | Priority / Extension
75 | | --- | --- |
76 | | 5 | Base
77 | | 1 | +xIO | invalidate clean better / more secure way for noncoherent I/O
78 | | 2 | + xD+ | safer discards - easier to secure safest discard is ZALLOC/DCBZ without a cache target and bus support
79 | | 1 | +xLRU |
80 | | 1 | +xPE | PREFETCH-E
81 | | 2 | +xL | fetch and lock
82 | | 2 | +xxLP | private RAM/ROM versions of fetch and lock
83 | | 1 | +xA | no-fill ALLOC, like DCBA (security hole, but some still want speed)
84 | | 2 | +xZ | ZALLOC ... + LOCK | zero allocate, e.g. DCBZ
85 | | 1 | +xW | way locking ...
86 |
87 | Bottom line: 5 base CMO types => 3 bits.
88 |
89 | 17 with all of the above => 5 bits (i.e. more than 4)
90 |
91 | B+xZ+xL+xLRU gets us to 9 encodings => highly likely that we will need more than 3 bits.
92 |
93 | # Excel spreadsheet "CMOS.xlsx"
94 | Thursday, April 9, 2020-04-09:
95 | * originally (2020-04-09) in personal in GitHub repo at https://github.com/AndyGlew/Ri5-stuff/blob/master/CMOs.xlsx
96 | * now (2020-08-12) in official location https://github.com/riscv/riscv-CMOs/blob/master/CMOs.xlsx
97 | * (probably has more recent copies elsewhere, e.g. personal machine or cloud Drive)
98 | is a "list" of CMOs. Not exactly a list, more like a table from which the actual list can be generated.
99 | Many rows of the table can be expanded into several different CMO operations
100 | with different privilege requirements, caches affected, etc.
101 |
102 | TBD: actually generate a "flat" list. Preferably by script, so that I can automatically go back between the expanded list and a compact form that is folded with common sub expressions that is easier to understand.
103 |
104 | [[Why CMOs.xlsx was written in Excel]]
105 |
106 |
107 | # OLD, Obsolete
108 | [[Quick and dirty list of Actual CMOs]]
--------------------------------------------------------------------------------
/Archive/wiki/Administrivia---CMOS-TG.md:
--------------------------------------------------------------------------------
1 | Sept 2020: [[Administrivia - CMOS TG]]
2 | * mailing list running - lots of traffic
3 | * archives at https://lists.riscv.org/g/tech-cmo/topics
4 | * first meetings scheduled
5 | * see RISC-V TG calendar https://sites.google.com/a/riscv.org/risc-v-staff/home/tech-groups-cal
6 | * no, there is no way you can get a filtered view just for the groups you are a member of
7 | * not in this new Google based calendar system - unlike the older RISC-V calendar system
8 | * regular recurring meetings: starting Monday Sept 28, 2020, 9am US Pacific time, every 2nd Monday therafter
9 | * [Email announcing Regular meetings, Mon 9am (US Pacific), starting 9/28](https://lists.riscv.org/g/tech-cmo/topics?p=recentpostdate/sticky,,,100,1,0,76916100#thread76916100)
10 | * first meeting, earlier, out-of-cycle:
11 | * [Email announcing first meeting of CMO TG, out-of-cycle, Wed 9-23, 5pm ](https://lists.riscv.org/g/tech-cmo/topic/out_of_cycle_meeting_rv/76917401?p=,,,100,0,0,0::recentpostdate%2Fsticky,,,100,1,0,76917401)
12 | * testing to see how/if notifications of repo and wiki posts get sent to list (and if that will be too annoying)
13 | * it seems that main repo integrations will get email notifications, but not wiki checkins
14 | * both look too annoying - I will probably disable them by default. Folks can add them themselves if they wish.
15 |
16 |
17 |
--------------------------------------------------------------------------------
/Archive/wiki/Administrivia.md:
--------------------------------------------------------------------------------
1 |
2 | Cache Management Operations (CMOs) for RISC-V
3 |
4 | * Created by: Stephano Cetola
5 | * Requested by: Andy Glew
6 |
7 | TBD: working group
8 |
9 | TBD: riscv group, including mailing list
10 | * should eventually have the typical riscv.org setup,
11 | probably https://lists.riscv.org/g/tech-CMOs
12 | but not set up yet
13 |
14 | GitHub locations
15 | * https://github.com/riscv/riscv-CMOs
16 | * https://github.com/riscv/riscv-CMOs/wiki
17 | * TBD: links that work when checked out locally as well as on GitHub
18 |
19 |
20 | Note: wiki more than repo:
21 | * at the moment / at start, much of this will be in the wiki rather than the repo
22 | * since much of my (Glew's) initial proposal was arranged in such a wiki-centric manner, with the repo mainly for build tools
23 | * if this continues, the repo will mainly hold support stuff such as Makefile and tools to generate documents
24 | * TBD: set up git modules so that repo has wiki as a submodule, and versioned together
25 | * see https://github.com/AndyGlew/Test-GitHub-stuff/wiki/Q%3A-why-am-I-trying-to-do-non-wiki-stuff-in-the-GitHub-wiki%3F
26 |
--------------------------------------------------------------------------------
/Archive/wiki/Agenda-for-CMOs-TG.md:
--------------------------------------------------------------------------------
1 | This is a suggested list of agenda items, topics, for the RISC-V CMOs Tech Group.
2 |
3 | This is NOT supposed to be a [[List of interesting topics related to CMOs]]. That shopuld live elsewhere, although it will undoubtedly be crosslinked with this agenda.
4 |
5 | This agenda is intended to reflect technical decisions and issues that must be settle in order to acheive consensus as to one or more RISC-V extensions for CMOs.
6 |
7 | # Location History of CMOs TG Agenda
8 |
9 | Current home of agenda - see [CMOs TG agenda on Google Drive](https://docs.google.com/document/d/1--__AiQkusBoIGCLiXfod_yXCfjwQTmdWEnvCa3hbBw/edit#)
10 |
11 | Originally created in https://github.com/riscv/riscv-CMOs/wiki/Agenda-for-CMOs-TG
12 |
13 | Then moved to https://lists.riscv.org/g/tech-cmo/wiki/23454
14 | when I realized that the GitHub wiki did not allot out copy/paste of HTML text with links
15 |
16 | And now moved to Google Docs https://docs.google.com/document/d/1--__AiQkusBoIGCLiXfod_yXCfjwQTmdWEnvCa3hbBw/edit#
17 |
18 | When I started worrying about continuous incremental backup.
19 |
20 | I then quickly learned how much I hate the fact that Google Docs does not allow you to create links to pages that do not exist yet the way wiki does. This was almost enough to make me switch over to using Microsoft OneNote using OneDrive - but the lack of nested sections within sections sent me back to Google Drive.
21 |
22 | There is no single satisfactory solution, that matches all of my criteria.
23 |
--------------------------------------------------------------------------------
/Archive/wiki/Arguments-against-address-range-CMO.AR.md:
--------------------------------------------------------------------------------
1 | As expected, address range CMO.AR has been quite contentious. See the mailing list archives are lots of discussion.
2 |
3 | Derek Williams of IBM prepared a large slide set arguing that address range is a bad idea.
4 | * [Derek Williams (IBM) has prepared a large slide set arguing that address range CMO.AR is a bad idea](https://github.com/riscv/riscv-CMOs/blob/master/discussion-files/RISC_V_range_CMOs_bad_v1.00.pdf): https://github.com/riscv/riscv-CMOs/blob/master/discussion-files/RISC_V_range_CMOs_bad_v1.00.pdf
5 |
6 |
7 |
--------------------------------------------------------------------------------
/Archive/wiki/CBO.UX-vs-CMO.ALL-vs-CMO.UR.asciidoc.md:
--------------------------------------------------------------------------------
1 | Some traditional RISC ISAs instructions that invalidate by (set,way).
2 | [[if bound to an instruction]] we call this CBO.UX.?? -- CBO standing for "Cache Block Operation", UX standing for " microarchitecture index" e.g. (set,way), ?? being other fields such as the actual operation (CLEAN, DISCARD, INVALIDATE, INVALIDATE-S), and cache(s) involved.
3 |
4 | Problems with CBO.UX include:
5 |
6 | * exposing microarchitecture details to code that might otherwise be portable
7 | * inability to take advantage of hardware optimizations like bulk invalidates and state machines
8 |
9 | Nevertheless, this is in many ways simplest possible approach
10 |
11 | Code that uses this operation to invalidate an entire cache looks like
12 |
13 | nEntries := read # of entries from config ...
14 | FOR n FROM 0 to nEntries DO
15 | CBO.UX rs1:nEntries
16 |
17 | Code that uses this operation to invalidate a single cacxhe line, e.g. as read from a machine check error report
18 |
19 | numEntry := read error CSR
20 | CBO.UX rs1:numEntry
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 | Many machines have FSMs that iterate over the entire cache specified, and/or bulk invalidates that "instantaneously" invalidate a cache for some operations and/or some entries. [[If bound to an instruction]] we call this CMO.ALL.$id.
29 |
30 | Problems with CMO.ALL include
31 |
32 | * interruptability/restartability with partial progress
33 | ** frequently CMO.ALL implementations are not interruptible.
34 | *** This is not acceptable for many systems, especially real-time.
35 | ** if interruptible, issues with restartability
36 | *** CMO.ALL can be made restart with partial progress if there is state like a CSR from which it resumes on return from an interrupt.
37 | **** but we dislike adding new state
38 | *** or, CMO.ALL may be interruptible but may have to resume from the beginning on return from interrupt
39 | **** forward progress problems => highly undesirable
40 |
41 |
42 | This proposal defines a CMO.UR instruction in such a way that allows <>,
43 | with a loop such as that below:
44 |
45 | include::microarchitecture-range-loop.asciidoc[]
46 |
47 |
--------------------------------------------------------------------------------
/Archive/wiki/CMO-goals.md:
--------------------------------------------------------------------------------
1 | Goals:
2 | * it should be possible for some or even most CMOs to be invoked from user mode unprivileged code, but privileged code must be able to control or forbid unprivileged access to CMOs
3 | * implementations can range from simple, one cache line at a time, to more complicated state machines
4 | * transparent support for events such as page faults, debug exceptions, machine check error exceptions, etc.
5 | * no virtualization holes - e.g. CMOs do not allow the user to observe page faults, except by timing as is already possible
6 | * long-duration CMOs can be interrupted, i.e. are nonblocking to the hart that is running them. They can be resumed partway along, and do not have to restart from scratch. Conversely, if such interruptability interferes with the guarantees that security usage models require, this must be exposed and possibly prevented if privilege allows
7 | * transparency/interruptability/resumability means that
8 | * on an exception the PC points to the CMO instruction, not the instruction after it
9 | * the OS is not required to parse the CMO instruction in order to determine how to handle exceptions such as page faults
10 | * ordinary X scalar registers are modified to indicate partial progress, and are read back on exception return.
11 |
--------------------------------------------------------------------------------
/Archive/wiki/CMO-operation-list-for-encodings.md:
--------------------------------------------------------------------------------
1 | (This page [[CMO operation list for encodings]]
2 | extracted from parent [[Actual CMO Operations]]
3 | because GitHub wiki doesn't have section editing
4 | (but also lacks transclusion :-( ) )
5 |
6 |
7 | The list below ... + annotated according to priority / possible extensions (trying to diet, reduce to <= 8, 3 bits)
8 | * B = base - surely must have
9 | * +x?? - possible extensions
10 |
11 |
12 |
13 | | priority | rw | name | detail |
14 | | --- | --- | --- | --- |
15 | | |
16 | | B | r | WRITEBACK IBM: CLEAN | dirty --wb--> clean, clean-->unaffected
17 | | B | r | WB-INVALIDATE IBM: FLUSH | dirty --wb--> clean, clean-->unaffected
18 | | +xIO | r | INVALIDATE CLEAN | clean --> invalid dirty --> unaffected secure suitable for NC I/O
19 | | B | w | INVALIDATE IBM: DISCARD | clean --> invalid, dirty -- no wb --> invalid e.g. n on-coherent I/O, reset
20 | | +xD+ | w | safer discards | see elsewhere
21 | | |
22 | | +xLRU| r | Set LRU | wish: prefetches/loads/stores that have LRU / not MRU / non-temporal hints
23 | | |
24 | | B | r | PREFETCH-R | PREFETCH-X has I$ target | ?? eliminate by making PREFETCH-R with I$ target multilevel I$
25 | | B | r | PREFETCH-W | prefetch to write, may be clean or dirty
26 | | +xPE| r | ? PREFETCH-E | prefetch as if to write, but must be clean may need to update outer $/DRAM on way
27 | | |
28 | | +xL | r | FETCH-W + LOCK | like creating local writable copy of shared RAM
29 | | +xL | r | FETCH-R + LOCK | like creating local copy of shared ROM
30 | | +xxLP | r | FETCH-E + LOCK | like creating private ROM
31 | | +xxLP | r | FETCH-EW + LOCK | like creating private RAM
32 | | |
33 | | .xA | w | NO-FILL ALLOC | like DCBA (security hole)
34 | | .xZ | w | ZALLOC | like DCBZ
35 | | .xZ | w | ZALLOC + LOCK | like creating local RAM TBD: private / shared
36 | | |
37 | | .xW | r | way locking | beyond scope, way mask separate
38 |
39 |
--------------------------------------------------------------------------------
/Archive/wiki/CMOs-(Cache-Management-Operations).md:
--------------------------------------------------------------------------------
1 | ## Recent
2 |
3 | Soon: RISC-V Foundation Working Group TBD
4 |
5 | [[Ri5-CMOs-proposal]]
6 | * See [[generated HTML and PDF for CMOs proposal]]
7 |
8 | ### History
9 |
10 | This history section it is very much out of date, see instead that within [[Ri5-CMOs-proposal]].
11 |
12 | Wednesday, May 6, 2020-05-06:
13 | * switching to use asciidoc for actual proposal: [[CMOs-proposal]] links to [[Ri5-CMOs-proposal]]
14 | * See [[generated HTML and PDF for CMOs proposal]]
15 | * actual proposal WIP: [[Ri5-CMOs-proposal]]
16 | * files converted from wiki to asciidoc ("draft" prefix distinguishes)
17 | * [[draft Privilege for CMOs]] <-- [[Privilege for CMOs]]
18 | * [[draft-Fixed-Block-Size-Prefetches-and-CMOs]] <-- [[Fixed-Block-Size-Prefetches-and-CMOs]]
19 |
20 |
21 | Finished stuff? - to be converted to asciidoc / draft
22 |
23 | * [[Privilege for CMOs]]
24 | * finish the [[Actual CMO operations]] list
25 | * finish the [[Semi-formal Abstract Model for CMOs]]
26 | * TBD: transcribe to wiki from the OneNote notebook and email where this was written up.
27 | * needs: How don cache flushes on non-inclusive caches wArm
28 | * lots of rationale and explanation
29 |
30 |
31 | ## Terminology
32 |
33 | Briefly: this document, at this time, uses the term "CMO" (Cache Management Operation) generically for operations that have mandatory semantics (like cache flushes for purposes of software managed consistency or security timing channel mitigation) but also operations that have optional semantics (such as prefetch instructions as well as hints that a cache line is no longer needed). See [[Terminology for instructions that manage microarchitecture state such as caches, prefetchers and predictors]].
34 |
35 |
36 | ## Converging on Proposals
37 |
38 | It is eventually necessary to converge on a single proposal. While this proposal may not be final, and different parts may be at different stages of maturity, the links here are to what I believe are the latest and greatest.
39 |
40 |
41 | ## CMO Instruction Formats
42 |
43 | * [[Fixed Block Size Prefetches and CMOs]]
44 | * [[STATUS: almost done - maybe]]
45 | * AW OK, most reviewers so far okay
46 | * Instruction encodings chosen
47 | * [[Instruction Name Choice]] - my suggestions, but I expect to be overruled
48 | * SUMMARY:
49 | * 64 byte fixed size block
50 | * PREFETCH.64B.R and PREFETCH.64B.W: Memory[reg+imm12], i.e. I–format with RD=0
51 | * CMO.64B.CLEAN, CMO.64B.FLUSH: Memory[reg], e.g. R–format, but only need one register
52 |
53 | * [[Variable Address Range CMOs]]
54 | * STATUS: converging, expect arguments
55 | * 01-23-2020: reviewers have accepted explanation of register definitions suitable for interruptability, but still think the 2 acceptable definitions are "strange". I am trying to guess which one will be most acceptable.
56 | * ISSUE: [[CMO-types issue]]: abstraction, efficiency, extensibility
57 |
58 | * [[Microarchitecture Structure Range CMOs]]
59 | * STATUS:
60 | * Recent
61 | * 03-02-2020: changes after AW discussion
62 | * 01-16-2020: reviewers rejected overloading address range CMOs for efficiency :-(
63 | * 01-20-2020: new proposal [[Non-Address Based CMOs for Abstraction and Efficiency]]
64 | * 01-22-2020: first SW/OS reviewer okay on concept, hardware reviewer interested but questioning
65 | * ISSUE: [[CMO-types issue]]: abstraction, efficiency, extensibility
66 |
67 | ## [[Actual CMO Operations]]
68 |
69 | The section and linked pages above discusses the CMO instruction formats
70 |
71 | The page [[Actual CMO Operations]] discusses the actual cache management operations such as:
72 | * CLEAN: write back dirty data, but leave clean data behind in structure
73 | * FLUSH: writeback dirty data, and invalidate all data in structure
74 | * Invalidate Branch Predictors and Prefetchers: e.g. for timing channel mitigation
75 |
76 | ## [[Privilege for CMOs]]
77 |
78 | Actual proposal: [[Privilege for CMOs]]
79 |
80 | Further discussion and/or rationale
81 | * [[I am frustrated that we are going around in circles with respect to modulation of CMOs]] - I hope the new subproposal [[Privilege for CMOs]] breaks us out of this nonproductivee spin loop
82 | * [[interception and modulation of CMOs]]
83 |
84 | ## [[Semi-formal Abstract Model for CMOs]]
85 |
86 | TBD: transcribed to wiki from the OneNote notebook and email where this was written up.
87 |
88 | Overview:
89 | * most abstract: the operations a user wishes to perform
90 | * implementation dependent: the operations that HW provides. including, e.g., arbitrary numbers and levels of caches
91 | * intermediate level of abstraction between the above: abstract HW CMO operations
92 | * restricting levels of the memory hierarchy
93 | ...
94 |
95 |
96 | ## Stuff along the way
97 |
98 | Some of this stuff along the way will be rejected alternatives, nevertheless preserved, e.g. in case they need to be revived. Other of this stuff along the way constitutes rationales and explanations, which may be used, rewritten, or reorganized in support of the converged proposal.
99 |
100 | TBD: eliminate obviously dated and obsolete stuff, which can always be obtained from the get history, or at least tag it as dated and obsolete with references to the up-to-date stuff. TBD: separate final or near final from historical stuff.
101 |
102 | [[Overview of CMO operations]] - why needed, goals, etc.
103 |
104 | [[Quick and Dirty Proposal for RISC-V CMOs]]
105 |
106 | * [[An even quicker and dirtier summary of proposed instruction encodings for RISC-V CMOs]]
107 |
--------------------------------------------------------------------------------
/Archive/wiki/CMOs-Not-Based-on-Memory-Address.md:
--------------------------------------------------------------------------------
1 | Some applications only need to flush known addresses or address ranges out of their caches.
2 |
3 | * E.g. a JIT code generator may know precisely what instructions it has generated, and conversely which old instructions it has zeroed or overwritten, so it knows precisely what address range needs to be invalidated from a noncoherent I cache.
4 |
5 | Other applications don't know the address ranges.
6 |
7 | * E.g. security code targeting timing channels may not know what addresses the user program it is switching between has touched. It is unrealistic to flush all possible addresses, O(size of address space). Such security code only really needs to touch the caches that it is worried about. E.g. in the seL4 example from Gernot Heiser, the microkernel only needs to flush all of the L1 I$ and D$, not the L2.
8 | * E.g. even security code that is doing its own flushes, e.g. of a lookup table and memory that might provide a cache residency channel, while it might know the size of the lookup table, it also has to flush all other addresses that map to the same sets in the cache as its own data. This suggests a hybrid... That I will go into right now.
9 |
10 | * E.g. software coherency management by the operating system for user processes that touch a lot of memory. As in HPC systems.
11 |
12 | Reviewers of the early versions of this CMO proposal emphasized that it was important to have such "whole cache invalidates" as well as address range invalidates. In fact, for security, they said address range invalidates were useless.
13 |
14 | I had hoped that an address range invalidate that was larger than an entire cache might be optimized to invalidate the cache, not every cache line in the address space. However, other reviewers prefer not to have that optimization.
15 |
16 | ---
17 |
18 | Briefly: the possibility of monolithic instructions like Intel x86 WBINVD and INVD
19 | * WBINVD is typically a microcode scan, and inherently O(number of dirty lines) if not O(number of lines in cache)
20 | * INVD may be O(number of lines in cache), or it may be O(1) complexity if there is a [[bulk invalidate]] operation
21 | O(N) scans that are not interruptible or a problem.
22 |
23 |
24 | The traditional way of doing efficient, interruptible, non-address range cache invalidates is to do something like
25 |
26 | LOOP over caches and predictors
27 | Read the particular cache parameters, number of sets, number of ways, from something like CPUID
28 | FOR s FROM 0 TO number of sets
29 | FOR w FROM 0 TO number of ways
30 | flush or invalidate (set,way)
31 |
32 | Obviously this has many issues:
33 | * it exposes the microarchitecture
34 | * you may need to do this for multiple caches, and all software may not be aware of new caches
35 | * the very concept of way associativity is questionable in some modern computer architecture work, e.g. skewed associativity
36 | * skewed associativity does not break things if a loop such as the above is used to invalidate the entire cache
37 | * but skewed associativity breaks things if the user assumes that it understands the function that hashes address lines to sets within the cache, and tries to be smart and save work by only invalidating particular sets.
38 | * Privilege issues
39 | * we want to be able to do invalidates in user mode. The above cannot be allowed in general. ... TBD: I must be faster
40 | * set/way locking
41 |
42 | ---
43 |
44 | # My proposal for non-address range CMOs
45 |
46 | Early in time, near boot
47 | OS is assumed to have investigated the CPUID cache configuration
48 | (especially if it were in some format like XML the way I would prefer to be in order to be extensible)
49 |
50 | Early in program, or near boot time
51 | cmo_handle <-- syscall by user to OS saying "this is what I want to invalidate"
52 | user may have inspected cache configuration from CPUID
53 | or OS may have done so, and have heuristics that give user more abstraction
54 |
55 |
56 | At point where the CMO is needed
57 | ...
58 | t0 <-- read _time
59 | regCmoIndex := maximum positive signed integer, E.g. 0x7FFF.FFFF on RV32
60 | LOOP
61 | CMO.UR( src regH:=cmo_handle, src_dst regIndex )
62 | BAD: rd: regIndex_end <--- CMO.UR( rs1 src regH:=cmo_handle, src rs2 regIndex_start )
63 | UNTIL regIndex <= 0
64 |
65 | GH: FENCE here until all done
66 |
67 | WAIT until t0+delta
68 |
69 |
70 |
71 | CMO.UR( src regH:=cmo_handle, rd:start_addr rs1:end_addr )
72 |
73 | CMO.UR.( src regH:=cmo_handle, rd:start_addr rs1:end_addr )
74 |
75 | * 1 flush/clean
76 | * 1 mandatory/advisory
77 | * bitmask
78 | * I1, I0, D1, BP, BTB, RSI
79 | * on chui predictors
80 | * exteernal
81 |
82 | O(1)
83 |
84 |
85 |
86 |
87 |
88 | Expect: e.g. if invalidated a cache with 256 entries
89 |
90 | CMO.UR called with regIndex = 0x7F...
91 | => map to 255
92 | => then decrement
93 |
94 | GH: I asked Gernot about exposing the index space
95 |
96 |
97 |
98 | Multiple Caches / predictors ...
99 |
100 |
101 | Index space
102 |
103 | 0-255 L1$ I cache
104 |
105 | 256-512 D$
106 |
107 | 1G-1G+256M => outermost
108 |
109 |
110 |
111 | GH: initiate WB
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 | GH: flush D-ccahe concurrently with any others.
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 | GH: I asked if final regIndex < 0 ==> errors is a piotebtial hole
137 | GH: doesn't want user
138 | GH:
139 |
140 |
141 |
142 |
143 | cmo_handle
144 | bit 0 = 0 => abstract as above
145 | =1 hardwired parameter
146 | bitmap of which caches and predictors
147 |
148 |
149 | defaults?
150 | r0 => a reasonable default
151 |
--------------------------------------------------------------------------------
/Archive/wiki/CMOs-WG-Draft-Proposed-Charter.md:
--------------------------------------------------------------------------------
1 | [[Examples of other Working Group charters]]
2 | - CMO group charter modelled on ...
3 | * [[Example: Config WG charter]]
4 |
5 |
6 | The following proposed charter is probably too long for the Technical steering committee.
7 |
8 | Some, but probably not all, of these details, explanations, and requirements about what is and is not in scope for the CMO working group may be worked out once the CMO working group has started.
9 |
10 |
11 | # CMO Task Group Charter
12 |
13 | Acronym: CMOs = Cache Management Operations
14 |
15 | The CMO Task Group will:
16 | * define instructions (and CSRs if necessary) performing cache management operations
17 |
18 |
19 | Requirement:
20 | * CMO instructions may be executed by user mode (if system software permits)
21 | * however, system software must have the ability to prevent less privileged software from executing CMO instructions
22 |
23 | Therefore, it is proposed that the CMO working group will be a subgroup reporting to the RISC-V Privileged Architecture task group.
24 |
25 |
26 | Use cases for CMO instructions include:
27 | * security
28 | * e.g. flushing microarchitecture state to mitigate timing channel security vulnerabilities such as Spectre
29 | * hence "CMOs" will be extended to cover branch predictors, prefetchers, and other microarchitecture state that affects performance
30 | * software managed cache coherence when hardware cache coherence is not available or incomplete
31 | * e.g. incoherent I/O DMAs
32 | * e.g. multiprocessor systems where cache coherence is not available between all nodes
33 | * e.g. interaction with external hardware accelerators that may not implement hardware cache coherence
34 | * performance tuning
35 | * e.g. evicting data no longer needed between program phases, to avoid thrashing data that is needed across program phases
36 | * *possibly* cache prefetch instructions and/or cache usage pattern hints
37 | * power management
38 | * e.g. flushing caches to battery backed-up DRAM, or NVRAM
39 | * persistence for reliability
40 | * e.g. flushing caches to RAID NVRAM and/or remote state
41 | * e.g. cache flushes for checkpointing of long-running applications in HPC systems
42 | * debugging
43 | * e.g. external hardware debuggers may need to write instructions or memory in systems lacking cache coherence
44 |
45 | CMOs cut cross many domains, ranging from simple microcontroller systems with no hardware cache coherency,
46 | through cache coherent application and server processors, through HPC systems.
47 | The CMO working group will coordinate with the task groups and working groups and standing committees for these areas of overlap.
48 |
49 | The goal of any CMO ISA extension proposals will be to permit portable software in all or most of the above use cases.
50 | The CMO task group will only define a set of CMO instructions that can reasonably be expected to be portable.
51 | If not applicable to an implementation such CMO instructions will do nothing. (e.g. flushing dirty data in a system that does not have writeback caches).
52 |
53 | It is expected that implementations may have cache microarchitecture and hence cache flushes that will not be part of the standard CMO instruction set.
54 | However, there will be worst-case maximally conservative CMO instructions that can flush all caches including such implementation specific caches.
55 | Implementations are expected to have less conservative more precise cache flushes that are not part of the standard CMO instruction set.
56 |
57 |
58 | The CMO working group will not:
59 | * define the instruction/data coherence instructions necessary for on-the-fly code generation, e.g. in the J extension
60 | * however, the CMO working group will coordinate with the working groups defining instruction/data coherence
61 | * certain CMO instructions will probably overlap, e.g. flushing the instruction caches
62 | * the CMO working group will NOT address TLB shootdown or ASID coherency
63 | * the CMO working group will NOT define config/discovery mechanisms to allow software (system or user) to determine the cache microarchitecture
64 | * the CMO working group will NOT define cache protocols
65 | * e.g. CMOs will assume that caches can contain clean and/or dirty data, but no more states than that
66 | * the CMO working group will *probably NOT* define cache modes such as no-fill, which may be required to perform reliable hardware reset
67 |
68 | Requirement: CMO instructions *must* work with the most common cache microarchitectures, including
69 | * strictly inclusive and exclusive
70 | * non-strictly inclusive and exclusive hierarchies
71 | *
72 |
73 | Requirement: implementations of varying levels of sophistication
74 | * it *must* be possible to implement CMO instructions a cache line at a time
75 | * it must be possible (and reasonably good performance) to implement CMO instructions by trapping to M mode
76 | * desirable: bulk flush, e.g. invalidating clean data without writing back
77 | * desirable: implementations using hardware state machines
78 |
79 | It is expected that the CPU will not necessarily know in advance all of the caches in a system. Requirement: it must be reasonable to interface CPU CMO instructions to control external caches (e.g. so that portable software can reliably do things like mitigate cache timing channels for security). Example of such an interface: Trapping the CMO instructions to M mode and emulating them via system specific mechanisms to flush external caches.
--------------------------------------------------------------------------------
/Archive/wiki/CMOs-cut-across-many-fields.md:
--------------------------------------------------------------------------------
1 | We have explained that CMOs are a crosscutting issue in many places, listing examples of domains involved, including
2 | * the initial email (TBD: link)
3 | * the proposed charter
4 | * many wiki pages
5 |
6 | It would be nice to have a master list of such examples.
7 | This is in many ways a first cut at a list of use cases -
8 | although I would want such a list tp be inclusive, including even wild ass examples,
9 | whereas eventually the list of use cases will undoubtedly be pruned.
10 | [TBD: link to Glew rant about how use cases can lead to blinkered thinking]
11 |
12 |
13 | Along this line:
14 |
15 | * [[Mailing lists interested in CMOs]]
16 |
17 |
--------------------------------------------------------------------------------
/Archive/wiki/CMOs-proportional-to-cache-size-rather-than-address-range.md:
--------------------------------------------------------------------------------
1 | CMOs based on virtual or physical addresses, whether fixed size or variable ranges, are easy to express in a portable manner. It is also easy to make such memory address based CMOs available to user code - perform the permission checks implied by page tables virtual addresses and/or physical memory permission structures like the RISC-V PMPs/sPMPs.
2 |
3 | The big problem with memory address range based CMOs is that they are often significantly less efficient than CMOs based on cache microarchitecture. For example, it is horribly wasteful to have to scan an address range of size 4 GB when you know that the largest cache of concern is only 4 MB in size. If we assume that an operation has to be done for every 64B cache line, the address based scan touches 2^26 cache lines, where is the cache size based scan touches only 2^16 cache lines.
4 |
5 | However, it is TBD whether we can architect a reasonably portable solution CMOs based on cache microarchitecture, which I might call CMOs proportional to cache size rather than address range. See that last page for a tentative proposal.
6 |
7 | # Optimizing large address range CMOs into efficient cache size proportional CMOs
8 |
9 | One possible approach is to allowing an implementation of a variable range CMO.VAR.* over [lo,hi) to perform an efficient cache size based scan
10 |
11 | * e.g. if hi-lo, the size of the region, is less than the size of the cache
12 | * i.e. if we can guarantee that there are no lines that need to be flushed that are not in the cache
13 | * although this might fail for some noninclusive cache architectures (such as Intel L1 and L2 (or MLC) caches; although modern Intel LLCs or snoop filters are inclusive)
14 | * and for "funky" mappings of memory addresses to cache (set,way) locations
15 |
16 | Reviewers of this CMO proposal were surprisingly resistant to allowing this optimization. Partly because of justifiable FUD of unanticipated consequences. Partly because some such reviewers anticipated implementing the variable range CMO.VAR.* In terms of per cache line CMO operations, so would not have the opportunity to perform these "physical cache parameter optimizations". Indeed, the possibility of such optimizations is one of the big motivations for implementing variable address range CMOs by a state machine (or equivalently by smart software aware of the cache structure).
17 |
18 | # [[CMOs based on cache microarchitecture]]
19 |
20 | ... TBD ... loop based on (set,way) structure,
21 | i.e. addressing cache lines directly
22 |
23 | Obviously exposes microarchitecture. Probably not desirable to expose to user mode.
24 |
25 | Problematic when there are multiple levels of cache:
26 |
27 | May need to loop over cach and within each cache over all possible lines within the cache es
28 |
29 | inclusive cache architectures with backwards and validate can illuminate some but not all of that complexity
30 |
31 |
32 | # Abstracting Efficient Cache Size Proportional CMOs
33 |
34 | ... I think we can do this. But I know that I'm going to be crucified for "complexity". Although that just might be my Intel PTSD speaking.
35 |
--------------------------------------------------------------------------------
/Archive/wiki/CMOs-proposal.md:
--------------------------------------------------------------------------------
1 | See [[Ri5-CMOs-proposal]]
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Archive/wiki/Draft-CMO-proposals.md:
--------------------------------------------------------------------------------
1 | * [[Draft CMO proposals]]
2 | * I (Glew) created a draft CMO proposal
3 | * old: originally in my personal GitHub https://github.com/AndyGlew/Ri5-stuff/wiki/Ri5-CMOs-proposal
4 | * TBD: remove reference
5 | * new: https://github.com/riscv/riscv-CMOs/wiki/Ri5-CMOs-proposal
6 | * [[generated-HTML-and-PDF-for-CMOs-proposal]] (local)
7 | * on web: https://github.com/riscv/riscv-CMOs/wiki/generated-HTML-and-PDF-for-CMOs-proposal
8 |
9 | * See issue https://github.com/riscv/riscv-CMOs/issues/2
10 | * update issue with migration status
11 |
12 | I expect other working group members will have their own proposals, whether full or in part, and will provide links here as they become available.
13 |
14 | TBD: links to generated versions
15 | * OLD: Glew personal wiki: https://github.com/AndyGlew/Ri5-stuff/wiki/generated-HTML-and-PDF-for-CMOs-proposal
16 | * NEW: official : https://github.com/riscv/riscv-CMOs/wiki/generated-HTML-and-PDF-for-CMOs-proposal
17 |
--------------------------------------------------------------------------------
/Archive/wiki/Example-of-Config-WG-charter.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | > From: Tim Newsome
4 | >
5 | > Sent: Monday, July 27, 2020 12:09PM
6 | >
7 | > To: Tech-Config
8 | >
9 | > Subject: Re: [RISC-V] [tech-config] updated charter
10 | >
11 | >
12 | > On 7/27/2020 12:09 PM, Tim Newsome wrote:
13 | >
14 | > The proposal is in the pull request on github.
15 |
16 | > https://github.com/riscv/configuration-structure/pull/5
17 |
18 | > (You can see the actual text by clicking the "Files changed" link.)
19 | >
20 | > If anybody disagrees with that language, please comment on github or
21 | > start an e-mail discussion with your proposed change and
22 | > reasoning. Until you see your new text appear in that pull request,
23 | > it will not be voted on at the next meeting, so speak up repeatedly
24 | > if you have to. All I've seen so far is people here and there
25 | > mentioning that maybe something would be nice or is also
26 | > relevant. If I missed something, please repeat it.
27 | >
28 | > Tim
29 |
30 | # Task Group Charter
31 |
32 | The Configuration Structure Task Group will:
33 | * Specify syntax and semantics for a static data structure that can accommodate
34 | all implementation parameters of RISC-V standards: the configuration
35 | structure. There will be two configuration structure formats: a
36 | machine-readable format intended to be embedded in hardware, and a
37 | human-readable format intended for people to work with directly.
38 | * Specify how M-mode software can discover and access any present
39 | machine-readable configuration structures.
40 | * Provide a tool that can translate between the machine-readable and
41 | human-readable formats.
42 |
43 | Implementation parameters are details that a RISC-V specification explicitly
44 | leaves up to an implementation. This includes hart-specific details like the
45 | kinds of hardware triggers supported, as well as details that are outside
46 | harts such as the supported abstract debug commands.
47 |
48 | The configuration structure should:
49 | * be flexible enough that future task groups won’t feel the need to
50 | create another structure used to describe implementation parameters.
51 | * be easy to translate into other data structures.
52 |
53 | The configuration structure is intended to be used:
54 | * to describe RISC-V hardware profiles
55 | * by firmware and BIOSes during the boot process
56 | * by debuggers
57 | * by a tool chain to build software tailored to a configuration profile
58 |
--------------------------------------------------------------------------------
/Archive/wiki/Examples-of-other-Working-Group-charters.md:
--------------------------------------------------------------------------------
1 | [[Example: Config WG charter]]
2 |
--------------------------------------------------------------------------------
/Archive/wiki/Extended-CMO-types.md:
--------------------------------------------------------------------------------
1 | There are more types of CMOs
2 | * than are represented in computer architecture textbooks
3 | * than can be fit into small number of instruction encodings.
4 |
5 | Therefore, I propose
6 |
7 | a) that can be placed in a encoding that maps to one of 2 CSRs that contains the
8 |
9 | or
10 |
11 | b) that can be placed in a register operand passed to the CMO instruction.
12 |
13 |
14 | The first approach, using CSRs to hold the , is IMHO preferred, because it exposes less of the microarchitecture while supporting greater architectural flexibility, and requires less complexity to make secure (non-forgeable). The latter approach, placing the in a register operand is correspondingly deprecated, and is not part of the active proposal.
15 |
16 |
17 | = in a CSR
18 |
19 | The last is used to say "use the specified in CSR_TBD".
20 |
21 | The ISA does not define the format of the , although this proposal provides a basic recommendation.
22 |
23 | I propose that OS or platform specific software abstract things as follows:
24 | * User code makes a system call that tells the OS, e.g. "I am only trying to synchronize with threads/processes running on other harts/CPUs with which I share an L3$, so flush/invalidate the L1$, L2$, and everything all the way to the L3$, but don't flush the L3$ or L4$.: - when the standard flush operations wouyld also flush the L4.
25 | * OS determines if the user is allowed to do the operation, error if not
26 | * OS determines the implementation dependent encodings to be placed in the CSR
27 | * OS returns to user
28 | * user can now use the CMO.* instructions with =use CSR that contains
29 | * OS knows that the user is allowed to use the CMO, because it tested it at the time it was set up.
30 |
31 | = in a register input
32 |
33 | Intead of
34 | * CMO.VAR.. rd, rs1
35 | * rd=nbytes, rs1=hi_addr
36 | * CMO.FSZ... rs1
37 | * rs1=addr
38 |
39 | Use an additional register
40 | * CMO.VAR.. rd, rs1, rs2
41 | * rd=nbytes, rs1=hi_addr, rs2=
42 | * CMO.FSZ... rs1
43 | * rs1=addr, rs2=
44 |
45 | I would prefer that the ISA did NOT define the format.
46 | I propose that OS or platform specific software abstract things as follows:
47 | * User code makes a system call that tells the OS, e.g. "I am only trying to synchronize with threads/processes running on other harts/CPUs with which I share an L3$, so flush/invalidate the L1$, L2$, and everything all the way to the L3$, but don't flush the L3$ or L4$.: - when the standard flush operations wouyld also flush the L4.
48 | * OS determines if the user is allowed to do the operation, error if not
49 | * OS returns to the user an encoding that it can pass as the rs2 cmo_type value above.
50 | * user can now use the CMO.* instructions with rs2=value returned by OS
51 | * However, OS must prevent user from forging access to CMOs that they should not be allowed.
52 | * e.g. it may be a handfle number, mapped to a full CMO encoding in a table, with table index checks
53 | * or OS may have loaded a list of permitted encodings, that HW must check user provided value against.
54 |
55 | = encoding - reference implementation
56 |
57 | I would prefer that the ISA did NOT define the format.
58 |
59 | But nevertheless I want to provide a reference example.
60 |
61 | Bits in an XLEN register value
62 |
63 | * 1-bit:
64 | * writeback dirty data
65 | * invalidate dirty data without writing back - security sensitive!!!
66 | * 1-bit:
67 | * invalidate all lines scanned
68 | * leave clean lines
69 | * 1-bit: I: applies to all caches that can hold instrtuctions
70 | * 1-bit D: applies to all caches that can hold data
71 | * note: bitmask, so can CMO I-only, D-only, or both
72 | * 3-bits: cache depth
73 | * systems with L0..L4 caches are available nowadays - this allows up to 8 levels of hierarchy
74 | * cache numbering is system specific, e.g. the L1/L2 may be exclusive
75 | * 3-bits: virtual/physical guest/host ...
76 | * 000 = (guest) virtual
77 | * 001 = (guest) physical
78 | * 010 = host virtual
79 | * 011 = host physical
80 | * ... reserved
81 | * 1-bit: use cache uarch parameters
82 | * 3-bits: cache number
83 | * 1-bit: flush all
84 | * 16-bits: way mask <-- e.g. if user is given only certain ways for isolation
85 | * ??
86 | * 8-bits: value to be placed on a bus transaction to flush external caches outside the CPU.
87 |
88 |
89 | * pou = I & D
90 | * poc
91 | * pop = point of persistence (battery backed uop DRAM)
92 | * pop = point of persistence (NVRAM)
93 |
94 |
95 |
96 |
97 | It can be seen that this can quickly exceed 32-bits. And I am not trying very hard.
98 |
99 | Nevertheless, this format is NOT part of the architecture. Just a suggestion.
100 |
--------------------------------------------------------------------------------
/Archive/wiki/How-to-search-this-wiki,-repo,-issues,-etc..md:
--------------------------------------------------------------------------------
1 | I was remarkably slow figuring out how to search this wiki.
2 |
3 | See [HOW-TO: search wiki on GitHub](https://1drv.ms/u/s!AsM0rpNELR4xgQm9sCzSiwsl_KjJ?wd=target%28HOW-TO.one%7C9FD63E37-17E4-4075-AEB2-593259BBE1C5%2FHOW-TO%3A%20search%20wiki%20on%20GitHub%7CAC5F362B-239A-416C-8405-D59AD13F1791%2F%29)
4 |
5 | BRIEF: standard GitHub search using the search box in the upper left-hand corner works, but you must remember to change the "view", to look at the wiki items returned. By default you will only be looking at the code items, and there may be none. WARNING: if you have a narrow window, you may not see the count of wiki items returned by the search. if you miss the horizontal scrollbar ...
6 |
7 | The [search](../search?q=ENTER-QUERY-HERE&type=wikis) link here and in the sidebar is an attempt to make this friendlier, by setting the view returned by the link to be wikis, but you will have to fill in the search term in the new page.
8 |
9 | ---
10 |
11 | In fact, I was on the verge of giving up on the GitHub wiki, instead migrating to RISC V's new official Confluence wiki, https://wiki.riscv.org.
12 | See [CMO TG page on wiki.riscv.org](https://wiki.riscv.org/display/TECH/CMO+%28Cache+Management+Operations%29+TG).
13 |
14 | I may still migrate to Confluence, to get other good features like copy/paste of links and formatted text and images, and better access control.
15 | But at least now I figured out how to search this GitHub wiki.
--------------------------------------------------------------------------------
/Archive/wiki/I-am-frustrated-that-we-are-going-around-in-circles--with-respect-to--modulation-of-CMOs.md:
--------------------------------------------------------------------------------
1 | Current proposal, hoping to break this deadlock: [[Privilege for CMOs]]
2 |
3 | # Interception, Modulation, and Mapping of CMOs
4 |
5 | # Original Proposal: CSRs ansd system calls.
6 |
7 | My (Ag's) original proposal looked something like this:
8 |
9 | * CMO instructions that contain CSR operand, along with and address or a (set,way) cache entry number that indicated what to flush
10 | * A CSR operand for each such CMO instruction, that contained an encoding that indicated which caches aned branch predictors need
11 |
12 | however, it is necessary to accompany this with a system call:
13 | 1. since the user cannot write such a CSR directly
14 | 2. since different software systems may allow may allow (some) users to perform a CMO, while the same or other software systems may disallow (some) users from performing that same CMO
15 | * i.e. the privilege required for a CMO depends on the system software. It is NOT KNOWN to CPU hardware or the ISA
16 | 3. since there needs to be a mapping between abstract user level CMO's and the operations that the hardware actually performs
17 |
18 | Mapping
19 |
20 | local cluster
21 | HW coherent MOESI
22 | SW coherence
23 | between clusters
24 |
25 | SW P -> C
26 |
27 | MOESI
28 | * flush all dirty data in local cluster to the poc(P,C)
29 | MESI
30 | * no thread migration
31 | * flush local CPU only
32 | * thread - flush all cluster
33 |
34 |
35 | Point_of_Unification = pocvg(P.I,P.D)
36 | * pocvg(P*.I,P*.D)
37 |
38 | Point_of_Coherence = pocvg(P1.D,p2.D;address)
39 |
40 | Point_of_Persistence = pocvg(P1,NVRAM) or pocvg(
41 | *
42 |
43 | Point_of_Serialization = per address
44 | * FENCE.COMPLETION = persistence / SW coherency / MMIO
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/Archive/wiki/ISSUE---process-migration-argues-for-whole-cache-invalidation-operations-and-against-the-partial-progress-loop-construct.md:
--------------------------------------------------------------------------------
1 | // TBD: BUG: the filename with a colon in it seems to cause some tools problem, like emacs tags-queruy-replace
2 | // next-file: Opening input file: No such file or directory, /cygdrive/c/Users/glew/Documents/GitHub/Ri5-stuff/Ri5-stuff.wiki/ISSUE!-process-migration-argues-for-whole-cache-invalidation-operations-and-against-the-partial-progress-loop-construct.md
3 | // TBD: rename
4 |
5 | The [[parent page||Non-Address-Based-CMOs-for-Abstraction-and-Efficiency]]
6 | from which this issue was created
7 | said:
8 |
9 | > A thread might migrate from one CPU to another while the CMO loop construct is in progress. If this is done it is the responsibility of the system performing the migration to ensure that the desired semantics are obtained. For example, the code that is being migrated might be restricted to only apply to cache levels common to all processors migrated across. Or similarly the runtime performing the migration might be required to ensure that all necessary caches are consistent.
10 |
11 | Referring to the [[CMO.UR loop construct]]:
12 |
13 | ~~~~~~
14 | reg_for_cmo_index := 1<<(XLEN-1)-1
15 | LOOP
16 | CMO.UR RD:reg_for_cmo_index, RS1:reg_for_cmo_descriptor
17 | UNTIL reg_for_cmo_index <= 0
18 | ~~~~~~
19 |
20 | The definition of the CMO.UR instruction in the parent page, with RD as a source/destination register holding the CMO UR index, allows the CMO.UR instruction to be interruptible and restartable. Interruptability and restartability does not depend on the loop construct above.
21 |
22 | What the loop construct gives us is that it permits non-state machine implementations. E.g. CMO.UR might touch one and only one cache line on each invocation.
23 |
24 | In many situations this CMO.UR loop construct will be executed by privileged code. Probably locked onto a single processor. Not subject to process migration. If this is the case, the loop construct causes no problems.
25 |
26 | However, it is desirable that such cache management operations be performed by code that has the least privilege possible. For example, a user level web browser implementation of a sandbox might wish to flush L1 I cache and D cache timing channels when transitioning between code inside the sandbox and code outside the sandbox. Obviously this would be simplest if the caches involved had no dirty data, e.g. if the L1 data cache were write through, and if there were bulk invalidates. But even caches that contain no dirty data sometimes have no bulk invalidates, and need to sequence over the entries in the cache.
27 |
28 | The possibility of a thread migration while user code is executing the CMO.UR loop construct raises some issues.
29 | (Or, equivalently, a guest OS being migrated by a hypervisor.)
30 |
31 | If the thread that is performing the CMO loop construct is migrated, and if it is invalidating or flushing a cache that is local to its original processor, and not shared, then the semantics are completely ambiguous. Half of the cache flush might be performed on the first processor, half on the second.
32 |
33 | (Note that Derek Williams of IBM has resolved similar issues for the export.I and import.I instruction sequences related to dynamic codegeneration for the J extension. However, as far as I can tell this resolution depends on nonlocal effects for the export.I instruction. That might not be possible for CMOs in general.)
34 |
35 | This page does not propose to resolve this problem.
36 |
37 | This page only wishes to point out that the partial completion loop construct is itself part of the problem.
38 |
39 | If the CMO.UR instruction did not need to be wrapped in the partial completion loop construct then it might be possible for the runtime code that is performing the thread migration to observe the program counter at which the thread that is being migrated lies, determine that it is a CMO.UR instruction, and take the necessary steps. This is because, if the CMO.UR instruction were "whole cache", the PC at the time of migration would unambiguously indicate that a cache management operation is in flight.
40 |
41 | Note that "whole cache" does not mean non-interruptible. The interruptability of the CMO.UR instruction is not at all related to the loop. The interruptability is based on actually being interruptible, and also having source/dest operands so that no special treatment is needed by the interrupt handler. All the loop construct provides is the ability for an implementation not to have a sequencer.
42 |
43 | If the CMO.UR instruction is embedded in the loop construct, it may be difficult for the runtime that is performing the thread migration to determine that a cache management operation is in flight. Certainly the PC does not necessarily point to the CMO.UR instruction. It might be possible to require that the loop be very specific, potentially only the CMO.UR instruction and the end of loop branch. If that were the case, the runtime might be able to detect the CMO loop construct. However, we are on a slippery slope. The CMO loop construct might be very compact, but there could be other operations interleaved in the middle of the loop. Indeed, the CMO loop construct might be compact, but a binary rewriting tool may heavens are inserted other instructions, e.g. for timing, between the instructions. Any deviations make it more and more difficult for the runtime to detect that a cache management operation is in flight.
44 |
45 | If the runtime can detect the cache management operation is in flight, and if that operation semantics is affected by the migration, the runtime has several options
46 | 1. Perhaps the runtime could defer the migration until after the CMOs completed
47 | 2. Perhaps the runtime could complete the operation itself on behalf of of the thread, before the thread is migrated (e.g. a hypervisor might complete the operation before migrating a guest OS)
48 | 3. The runtime could complete the operation, but still let the migrated code also think that it is completing the operation. That would lead to redundant invalidations or flushes.
49 |
50 | These options are not available if the runtime cannot easily detect the cache management operation is in flight.
51 |
52 | MORAL: the partial completion loop, a.k.a. the CMO.UR loop construct, can make things more difficult, compared to a sequencer that does a "whole cache operation".
53 |
54 |
55 | --
56 |
57 | Similar problems occur for variable address range based CMOs, CMO.VAR. And indeed, for loops wrapping around fixed block size CMO.FSZ. However, the microarchitecture based invalidations of CMO.UR are inherently more subject to local interpretations than are the address based invalidations of CMO.VAR and CMO.FSZ.
58 |
--------------------------------------------------------------------------------
/Archive/wiki/Instructions-that-Support-Partial-Progress.md:
--------------------------------------------------------------------------------
1 | # Resume vs Restart
2 |
3 | The instructions of most modern computers are "all or nothing". They either execute completely, or if they cannot complete they are stopped, the problem is cleared up, and the instructions are **restarted** as if from the very beginning.
4 |
5 | This has not always been the case. E.g. the Motorola 68000 family of microprocessors had some relatively CISCy instructions, implemented using not just microcode but also nano code, and famously could receive an exception, e.g. a page fault, in the middle of one of these microcode operations. The microcode/microarchitecture state was saved, including in a not publicly documented "stack puke" area on the processor stack. The exception handler could do its job, and then return to the partially completed instruction, picking up where it had left off from the "stack puke" area.
6 |
7 | (I (Ag) vividly remember meeting a Motorola kernel developer when Motorola acquired the OS group that I was then working at. This Motorola kernel developer was adamant that the most reliable way to program an exception handler was to only reliant information in the stack puke area - which was documented to Motorola internal developers, although not to the outside world. He said that there were so many errors in the control and status registers of devices such as the I/O MMU that they could not be relied on.)
8 |
9 | TBD: other examples of resumable instructions.
10 |
11 | # Partial Progress is not necessarily "resume from microcode puke"
12 |
13 | This topic page discusses instructions that are not "all or nothing". Instructions that can support partial progress, permanently commit as much work as possible, and then save state in a form such that the instruction can be resumed without having to repeat any extra work already completed.
14 |
15 | This is not necessarily "resume from microcode or microarchitecture" state. For the purposes of this topic page it is emphatically not.
16 |
17 | In fact, for the purposes of this page the distinction between resume and restart is blurred. The instructions discussed here accomplish their "partial progress" by modifying architectural state. On an exception or other circumstance in which the instruction execution is interrupted, ordinary registers are written. On exception return ordinary registers are read. In some circumstances the registers involved are source/destination; in some circumstances, the instruction is "restarted as if from the beginning", however the starting point, the initial state for the instruction, has been modified, so that it does not need to repeat work already done. Therefore the term "partial progress" as in "instructions that support partial progress" is used rather than "resumable". "Partial progress" instructions may be considered to be either restarted or resumed, or something in between.
18 |
19 | Moreover, this is not an issue of RISC versus CISC. Some of the instructions described here are arguably RISC instructions.
20 |
21 | # Examples of "Partial Progress" - x86 REP STOS and REP MOVS
22 |
23 | Probably the most familiar modern examples (in 2020) of instructions that make partial progress are the x86 block memory operations, REP MOVS and REP STOS. REP STOS fills a block of memory with a value from a register. REP MOVS copies one memory block to another.
24 |
25 | STOS and MOVS are the most prominent members of a family of x86 "string" operations that include CMPS (compare), SCAS (scan), and LODS (load). These "string" operations are composed with repeat prefixes REP (repeat well count not zero), REPE/REPZ and REPNE/REPZ (repeat until equal/zero or not-equal/non-zero). The string operations are provided in flavors of different sizes - 8-bit byte, 16-bit word, 32-bit double word.
26 |
27 | Architecturally, the string operations such as STOS and MOVS are simple instructions, that are repeated automatically by the REP repeat instruction.
28 |
29 | STOSB performs the following operation
30 | ~~~~~~
31 | STORE.BYTE Memory[ DI ] := AL
32 | DI := DI + (1 IF DF == 0 ELSE -1)
33 | ~~~~~~
34 |
35 | MOVSB performs the following operation
36 | ~~~~~~
37 | tmp := LOAD.BYTE Memory[ SI ]
38 | STORE.BYTE Memory[ DI ] := AL
39 | SI := SI + (1 IF DF == 0 ELSE -1)
40 | DI := DI + (1 IF DF == 0 ELSE -1)
41 | ~~~~~~
42 |
43 | The repeat prefix REP repeats the string operation to which it is applied, e.h. STOSB or MOVSB, decrements a counter (in register CX/ECX/RCX), and repeats until the counter reaches zero. The conditional versions REP[EZ] and REPN[EZ] can terminate early if a condition is met.
44 |
45 | STOSx and MOVSx can be used as independent instructions.
46 |
47 | REP STOSx and REP MOVSx can be viewed as loops around the "simple" instructions STOSx and MOVSx.
48 |
49 | But most modern x86 systems use "fast strings", and implement REP STOSx and REP MOVSx as if they were combined or fused into a single instruction that performs many simple operations. E.g. instead of REP STOSx storing a byte at a time, the optimize version can store 16, 32 or more bits at a time. The optimized version may use cache protocol operations not available to ordinary instructions. The optimized version behaves as if it were a loop around the simple version, but is optimized to be efficient as possible. The optimizations may be accomplished by microcode, or by hardware state machines, or by a combination of both.
50 |
51 |
52 |
53 |
54 |
--------------------------------------------------------------------------------
/Archive/wiki/Makefile.OBSOLETE:
--------------------------------------------------------------------------------
1 | # Makefile for Ri5-CMOs-proposal
2 | # in Ri5-stuff.wiki
3 | # https://github.com/AndyGlew/Ri5-stuff/wiki
4 |
5 | # this Makefile is now obsolete
6 | # and should be deleted.
7 | # It's functionality has been moved to
8 | # https://github.com/AndyGlew/Ri5-stuff/Makefile
9 | # where
10 | # https://github.com/AndyGlew/Ri5-stuff
11 | # gets https://github.com/AndyGlew/Ri5-stuff/wiki
12 | # as a submodule.
13 |
14 |
15 |
16 | # As of Wednesday, May 6, 2020-05-06
17 | # The main purpose is to run a command to expand the AsciiDoc include directives
18 | # so that you can get a better idea what will actually look like
19 |
20 | HTML_VIEWER=/cygdrive/c/Windows/explorer.exe
21 |
22 | open-html-in-browser: Ri5-CMOs-proposal.html
23 | # KLUGE: Windows HTML viewer does not understand / paths
24 | # either need to convert / --> /, cd, or cygpath
25 | $(HTML_VIEWER) Ri5-CMOs-proposal.html
26 |
27 | ASCIIDOCTOR=/home/glew/bin/asciidoctor
28 | #TBD: Move asciidoctor to standard location
29 |
30 | Ri5-CMOs-proposal.html: Ri5-CMOs-proposal.asciidoc
31 | $(ASCIIDOCTOR) -b html Ri5-CMOs-proposal.asciidoc -o Ri5-CMOs-proposal.html
32 |
--------------------------------------------------------------------------------
/Archive/wiki/Mandatory-versus-Optional-CMOs,-PREFETCHES,-and-CPHs.md:
--------------------------------------------------------------------------------
1 | NOTE: in my terminology CMO is a generic term, that includes both optional or advisory operations such as PREFETCH instructions and CPH (Cache Prefetch Hint) such as posts store/push out instructions, but also includes mandatory operations such as cache flushes and invalidations for security timing channels mitigation, software manage coherence, and persistence.
2 |
3 | If you use an alternate terminology where PREFETCH and CMO and CPH instructions are mutually exclusive categories, the concern still arises
4 |
5 | It is traditional that it should be possible to treat performance-related instructions such as PREFETCH and CPH instructions as NOPs. They are optional, and only influence timing, not time free program semantics. However, mandatory CMOs such as cache flushes for software manage coherence cannot be treated as NOPs. Arguably, on a machine that does not implement the CMOs, they should be trapped as a legal instructions. Better to trap, and possibly emulate, than to not accomplish what they are supposed to do, and have the program break, although possibly not in obvious ways.
6 |
7 | Unfortunately there is a middle ground: software coherence. On a system that truly lacks some if not all hardware coherence features, the cache flush's and other CMOs required to enable software coherence absolutely must be performed. However, it has happened more than once that such a system was created long ago, and that eventually hardware cache coherence was implemented. In which case such CMOs might be ignored. I.e. whether a CMO is mandatory or optional may depend on the platform configuration e.g. whether hardware cache coherence is implanted or not. (Note: this applies to software coherence, and possibly some forms of power management. It probably does not apply to persistence to NVRAM.)
8 |
9 | A case in point is the EXPORT.I instruction proposed to support dynamic code generation on RISC-V. Some, traditional RISC instruction sets do not support I cache consistency with the data cache. On these instructions EXPORT.I is required to perform a cache action, essentially invalidating I cache lines. (Complementary instruction IMPORT.I might flush post cache instruction pipelines). However, some CPUs have decided that it is just plain easier to support I cache consistency. On such machines it may not be necessary for EXPORT.I to invalidate I cache lines. Arguably, EXPORT.I might still need to do stuff related to data stores and instruction fetch pipeline consistency, in conjunction with IMPORT.I. However still other systems have made both EXPORT.I and IMPORT.I unnecessary, and can treat both as NOPs.
10 |
11 | The point here is that there are both instruction set architecture and microarchitecture considerations relevant to mandatory.
12 |
13 | E.g. the EXPORT.I and IMPORT.I functionality is mandatory from an instruction set architecture point of view. But some microarchitectures might make it unnecessary.
14 |
15 | --
16 |
17 | Also, similar cache invalidate and flush operations may be optional for some purposes and mandatory for others.
18 |
19 | For example, cache flushes when treated as CPH (Cache Performance Hints) may be ignored on a system that is hardware consistent. After all, they should only influence performance. In fact, it is probably desirable to have a control that allows them to be enabled or disabled, since quite often cache performance hints and prefetches turn out to be less effective than the predictors and prefetchers of an advanced microarchitecture. However, cache flushes should never be disabled for security related timing channel mitigation.
20 |
21 | I have considered having a mandatory/optional bit in any that is passed to CMO instructions, and possibly also PREFETCH instructions and CPH instructions. (But probably only if in a general-purpose register, or a CSR implicit input operand, for such instructions. It is unlikely that we have enough instruction encoding space to provide such an orthogonal bit if the [[]] is encoded in the instruction itself.)
22 |
23 | Considerations such as the above - the fact that on some microarchitectures CMO optional/mandatory depends on both usage and microarchitecture - suggest that a single mandatory/optional bit is not insufficient. There probably need to be more types of discretion.
24 |
25 | Possibly:
26 | * optional, for performance only. Can always be made into a no-op
27 | * mandatory if no hardware cache coherence, optional (possibly always disabled) if hardware cache coherence
28 | * although note: computer architects often delude themselves into thinking that their system is 100% hardware cache coherent, when in reality the platform in which it is embedded may make it not always hardware cache coherent
29 | * mandatory, e.g. for a cache push out or flush, if the CPU or whatever caches the data is being pushed out from are not retained in the power saving mode. Optional if they are retained.
30 | * note: this is of questionable value, since many systems have multiple power saving modes, some of which retain state in devices such as CPUs, some of which do not.
31 | * always mandatory
32 | * I suspect that security related timing channel mitigation flushes will always be mandatory. Although they may be selective, only applying to certain levels of the cache. And they will probably apply to hardware data structures such as branch predictors as well as to caches.
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/Archive/wiki/Meeting-11-09-2020.md:
--------------------------------------------------------------------------------
1 | Github wiki
2 | - minutes
3 |
4 | Github access
5 | * readable to the world
6 | * write access
7 | * must have GitHub account
8 |
9 | ... AB: wants restricted wiki ....
10 |
11 | ... TBD: Ag fix up ...
12 |
13 | More agenda
14 |
15 |
16 | * Cache index / (set,way)
17 |
18 | ---
19 |
20 | * CMO.SEC
21 |
22 | * CMO.AR
--------------------------------------------------------------------------------
/Archive/wiki/Non-CMO-stuff-to-be-deleted.md:
--------------------------------------------------------------------------------
1 |
2 | The https://github.com/riscv/riscv-CMOs/ repo and wiki
3 | were forked from a personal repo and wiki
4 | https://github.com/AndyGlew/Ri5-stuff
5 | that also contained non-CMO stuff.
6 |
7 | The non-CMO stuff should be deleted.
8 |
9 | ## Other - non-CMO topics (to be deleted from riscv-CMOs repo+wiki)
10 |
11 |
12 | ### Aside:
13 | * [[Problems editing GitHub wiki using speech recognition]]
14 |
15 |
16 | ### Side: multipart LUT4 instruction in RV32 for the crypto extension
17 | * looks like this is now part of the crypto proposal
18 |
19 | ### Supporter:
20 | * [[in-band tagging pointers]]
21 | * I am very interested in proposals for using some bits of pointers to improve... stuff like security, sandboxing, etc.
22 | * although: in my own work, capabilities inspired,
23 | * I have deprecated some such approaches as only 15/16ths secure (4 bits, get it?)
24 | * since my own capabilities projects were either cancelled or emasculated, I'll take 15/16ths over 0/16ths
25 |
26 | ### Possible future topics for RISC-V
27 |
28 | * [[Quantization, dequantization, and interpolation instructions for DL, math, etc.]]
29 |
--------------------------------------------------------------------------------
/Archive/wiki/Overview-of-CMO-operations.md:
--------------------------------------------------------------------------------
1 |
2 | RISC-V systems need cache management operations, aka CMOs. As far as I know, so far such operations have been defined in an implementation specific manner. Other computer architectures define cache management operations, often via a mixture of user level instructions, privileged instructions, and platform specific operations accessed via MMIO control registers. See [[Survey of CMOs in Modern Computer Architectures]].
3 |
4 | Purposes of CMOs include:
5 | * performance tuning
6 | * security, e.g. mitigating Spectre-like information leak security vulnerabilities
7 | * persistence, e.g. nonvolatile RAM in the memory hierarchy
8 | * power management, e.g. flushing caches before removing their power
9 | * software managed cache coherence, e.g. non-coherent I and D caches
10 | * bank switching of physical memory, e.g. HP's "Machine"
11 | * reset, hot plug (not necessarily current high priority)
12 |
13 | These use cases have different needs.
14 | * User level access to these CMOs are desirable in some cases, but not required for all
15 | * Some affect only data and/or instruction caches and related parts of the memory system
16 | * Others, e.g. security, need to influence other microarchitectures state like branch predictors
17 | * Some need to interact with other CPUs, not necessarily RISC-V or from the same vendor, and possibly non-CPU devices
18 | * Some CMOs may be ignored (performance), while others are required for correctness (SW coherency, power management, security)
19 | * Scope
20 | * Some CMOs affect only a smallish excise block like a cache line
21 | * Others affect a range of physical or virtual addresses
22 | * Others want to affect an entire cache, or a partition thereof
23 | * some CMOs may be optimized, e.g. performed in the background
24 | * whereas other CMO use cases may require control over timing
25 |
26 | The biggest problem with CMOs in general is that cache architectures in particular, and microarchitectures state in general, can be highly diverse.
27 | * Cache architecture
28 | * How many levels of I and D? Are I and D unified at some level?
29 | * How many levels, and how big?
30 | * Associativity, skewed
31 | * LRU policy...
32 | * What caches are shared between separate CPUs/harts/other smart devices?
33 | * Mesh versus hierarchical?
34 | * Virtual versus physical
35 | * inclusive versus exclusive versus neither inclusive nor exclusive
36 | * clean/write-through vs dirty/write-back
37 | * does hardware support "flash invalidate", or is it necessary to scan the cache either in software or hardware?
38 | * other microarchitecture
39 | * there are more forms of microarchitecture state, branch predictors, prefetchers, etc. than are imagined by any computer architecture textbook
40 | * security timing channel mitigation requires the ability to flush or reset nearly all such microarchitecture state that influences execution timing.
41 | * Most other applications do not
42 |
43 | The term "CMO (Cache Management Operation)" may be too specific. A more generic term may describe the needs of security and performance management - "microarchitecture state management operations (uSMO)"? Unfortunately, I do not have a good more generic term. For that matter, it is not clear that non-cash state
44 |
45 | This CMO proposal
46 | * defines a small standard set of targeted cache operations
47 | * but also provides a standard way to invoke nonstandard implementation specific cache operations
48 | * e.g. figure out what your application needs to do, which may require knowledge of the CPU and platform architecture
49 | * if nonstandard asked the OS for permission to do these actions
50 | * use the standard CMO instructions defined here to invoke the nonstandard actions described and encoded above
51 |
52 |
53 |
--------------------------------------------------------------------------------
/Archive/wiki/Privilege-for-CMOs.md:
--------------------------------------------------------------------------------
1 | Aside: I apologize for [[voice typos editing this wiki]]
2 |
3 | Actual proposal/draft [[draft-Privilege-for-CMOs]]
4 | forked from discussion [[Privilege-for-CMOs]]
5 |
6 |
7 | # PROPOSAL:
8 |
9 | Each CMO.VAR.* and CMO.UR.* is mapped to a number 0..Ncmo-1, where Ncmo is the Number of CMO instruction encodings.
10 |
11 | (Note: the encodings do not necessarily have a contiguous field that corresponds to these values.)
12 |
13 | CSR [[CMO_Privilege]] contains Ncmo 2-bit fields where bitfield CMO_Privilege.2b[J] indicates the privilege required to perform the corresponding CMO operation J.
14 |
15 | The 2-bit fields are encoded as follows:
16 | * 00 => disabled.
17 | * 01 => traps to M mode
18 | * 10 => reserved
19 | * 11 => can execute in any mode, including user mode
20 |
21 | The disabled behavior is as follows:
22 |
23 | CMO_Privilege.2[J] => CMO.#J
24 | * the instruction does not actually perform any cache maintenance operation.
25 | * but it returns a value such that the [[canonical range CMO loop]] exits
26 | * CMO.VAR rd:next_addr, rs1=rd:start_addr, rs2:stop_addr
27 | * sets RD to stop_addr
28 | * CMO.UR rd:next_entry, rs1:start_entry
29 | * sets RD to -1
30 |
31 | # RATIONALE:
32 |
33 | Requirement: in some CPU implementations all or some CMOs *must* be trapped to M-mode and emulated. E.g. caches that require MMIOs or CSR actions to flush, which are not directly connected to
34 |
35 | Requirement: in some platform configurations some CMOs may *optionally* be trapped to M-mode and emulated. E.g. [[CMOs involving idiosyncratic external caches and devices]], devices that use MMIOs or CSRs to perform CMOs, and which are not (yet?) directly connected to whatever
36 |
37 | Requirement: it is highly desirable to be able to perform CMOs in user mode. E.g. for performance. But also for security, persistence, since everywhere the [[Principle of Least Privilege]] should apply: e.g. the cache management may be performed by a privileged user process, i.e. a process that is part of the operating system but which is running at reduced privilege. In such a system the operating system or hypervisor may choose to context switch the CSR_Privilege CSR, or bitfields therein.
38 |
39 | Requirement: even though it is highly desirable to be able to perform CMOs in user mode, in some situations allowing arbitrary user mode code to perform CMOs is a security vulnerability. vulnerability possibilities include: information leaks, denial of service, and facilitating RowHammer attacks.
40 |
41 | Requirement: many CMOs should be permitted to user code, e.g. flush dirty data, since they do nothing that user code cannot itself do using ordinary load and store instructions. Such CMOs are typically advisory or performance related. note that doing this using ordinary load and store instructions might require detailed microarchitecture knowledge, or might be unreliable in the presence of speculation that can affect things like LRU bits.
42 |
43 | Requirement: some CMOs should *not* be permitted to user code. E.g. discard or forget dirty data without writing it back. This is a security vulnerability in most situations. (But not all - although the situations in which it is not a security vulnerability are quite rare, e.g. certain varieties of supercomputers, although possibly also privileged software, parts of the OS, running in user mode.)
44 |
45 | Requirement: some CMOs may usefully be disabled.
46 | * Typically performance related CMOs, such as flushing to a shared cache level, or prefetching using the range CMOs Software is notorious for thinking that it knows the best thing to do,
47 | * Also possibly software based on assumptions that do not apply to the current system
48 | * e.g. system software may be written so that it can work with incoherent MMIO
49 | but may be running on a system that has coherent MMIO
50 | * e.g. persistence software written so that it can work with limited nonvolatile storage
51 | running on a system where all memory is nonvolatile
52 |
53 | Requirement: Sometimes there needs to be a mapping between the CMO that a user wants and the CMOs that hardware provides, where the mapping is not known to CPU hardware, not known to user code, but depends on the operating system and/or runtime, and might dynamically depend on the operating system and/or runtime.
54 | * e.g. For performance related CMOs, the user may only know that she wants to flush whatever caches are smaller than a particular size like 32K. The user does not know which caches those are on a particular system.
55 | * e.g. in software coherence all dirty data written by the sending process P_producer may need to be flushed to a shared cache level so that it can be read by the consuming process P_consumer
56 | * consider if the sending process P_producer is part of a HW coherent cache consistency domain, but the receiving process P_consumer is part of a different such domain
57 | * if the hardware cache consistency domain permits cache-to-cache migration of dirty data, then all caches in that dirty domain be flushed.
58 | * however, if the hardware cache consistency domain does NOT permit cache-to-cache migration, then
59 | * if the system software performs thread or process migration between CPUs that do not share caches
60 | * without cache flushes => THEN this SW dirty domain must be flushed
61 | * but if the system software performs cache flushes on thread migration,
62 | => THEN only the local processor cache need be flushed.
63 | * if the system software does not perform thread or process migration, t
64 | hen only the local processor cache be flushed.
65 | Other processor caches in the HW clean consistency domain do not need to be flushed.
66 |
67 | Optionally trapping such CMOs allows the system or runtime software to choose the most appropriate hardware CMO for the users' need.
68 |
69 | WHINING:
70 | * I had originally planned to define CSR operands for the CMO instructions, both to provide the privilege modulation (trapping, disabling) and mapping functionalitiess of the requirements listed above.
71 | * key reviewers reject this possibility, and/or suggest providing it only later if the need is proven
72 | * however, thesse key reviiewers CANNOT deny the requirements of enabling or disabling CMOs listed above
73 | * therefore, providing this compact privilege mechanism.
74 | * I am actually just as happy not to defiine the CSR operand to coontain an encoding of CMO operations desired, since I can easily imagine that in some circumstances more than one CSR will be required. E.g. a CSR that might contain a way mask. Therefore, this " permission vector" approach allows the actual CSR is to be defined later, while enabling [[privilege modulation]] today.
75 |
--------------------------------------------------------------------------------
/Archive/wiki/Problems-editing-GitHub-wiki-using-speech-recognition.md:
--------------------------------------------------------------------------------
1 | TBD: move to https://github.com/AndyGlew/Ag-stuff/wiki
2 |
3 | I am having a bit of writer's block composing
4 |
5 | [[CMOs (Cache Management Operations)]],
6 | specifically the list of [[Actual CMO Operations]] for consideration.
7 |
8 | Not only is this long list something I want to be able to automatically extract tables from, keeping in think
9 |
10 | But I'm also just plain having trouble composing this text using Dragon speech recognition, which I use because of my computeritis.
11 |
12 | Oh, shit: I think the problem was that I was using the wrong web browser.
13 |
14 | Dragon speech control was not working in Microsoft Edge. I was confused, hitting my head against the wall, since these were things that I had already been doing! Ironically, I am/was using Microsoft Edge because supposedly Microsoft Edge has better speech support. Most annoying, the extension had been somehow disabled.
15 |
16 | Similarly, speech control is not working in Firefox. Which I was also trying to switch to.
17 |
18 | But... The Dragon extension for chrome is working in my personal chrome profile.
19 |
20 | It was not working in my Si5 chrome profile. I suppose it is a good thing that the different profiles have different sets of extensions, but it tripped me out. It's working now that the extension is installed.
21 |
22 | Yes, now I can...
23 |
24 | Shit, I was pulling out my hair over this for several days !! :-) I would have investigated it earlier, but there was always email that I could quickly reply to.
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/Archive/wiki/Quantization,-dequantization,-and-interpolation-instructions--for-DL,-math,-etc..md:
--------------------------------------------------------------------------------
1 | STATUS: TBD: not proposed yet for RISC-V, but I expect to do so when the time comes
2 |
3 | It is often advantageous in deep learning to *quantize* the data. E.g. to represent 16 or 32 bit data in memory using only two or four bits.
4 | * E.g. dequantizing (expanding or unpacking) the two bit numbers into 16-bit data to perform computations, and then quantizing (compressing or packing) the 16-bit data into two bits to restore back in memory. Thereby saving memory bandwidth but not compute bandwidth.
5 | * Some systems can actually do computations in the narrower widths - which essentially amounts to having the dequantization and quantization logic in the pipeline to the arithmetic units. Thereby saving both memory bandwidth and computational unit bandwidth.
6 |
7 | Dequantization at its simplest is essentially a classic indexed lookup table. In the vector instruction set has a VRGATHER instruction that accomplishes this, although IRC its smallest index is eight bits wide. Conceptually it is not difficult to imagine extending VRGATHER to use vectors of two bit or four bit values to perform the indexing. For that matter, memory lookup tables... Although two and four bit quantized values don't really need memory lookup tables.
8 |
9 | Another use case: mapping one of the several varieties of 8-bit floating-point or 8-bit LNS to standard 16-bit or 32-bit floating-point.
10 |
11 | The obvious or counterpart to such a dequantization instruction is a quantization instruction.
12 |
13 | Anyway, to quantize, you map something like a 16-bit number to a 2 or 4 bit number.
14 |
15 | Linear quantization is usually not the right thing to do. I.e. it is not just extracting the higher order bits.
16 |
17 | Nonlinear quantization is essentially determining in which interval the wider number resides.
18 |
19 | E.g. to quantize an unsigned 16-bit number N to a 2 bit number M, you do
20 | if 0 <= N < T0 then M = 00
21 | else if T0 <= N < T1 then M = 01
22 | else if T1 <= N < T2 then M = 10
23 | else M = 11
24 | and it is convenient to pack the 3 16-bit values T0,T1,T2 into a 64-bit register
25 | RANGE_LUT_64 = ( T0, T1, T2 )
26 | leaving 64-bits unused.
27 |
28 | I call this RANGE_LUT_64 or INTERVAL_LUT – it is not exactly the same as an ordinary indexed LUTs, such as is used in cryptography or dequantization. I call the latter INDEXED_LUTs. Some computer arithmetic subdisciplines call this sort of comparison based thing a LUT, and also have other LUTs that are similar to ternary CAMs, sometimes conceptually in ROM (which synthesize to less regular but more compact logic).
29 |
30 | I will certainly be proposing this instruction at some point to assist RISC-V deep learning. Probably as part of some V-DL extension - deep learning in the vector register file - but also possibly a scalar register file version. As you can imagine, the [[multipart instruction approach]] can also be used here if there are too many conceptual operands to fit in the classic RISC two or three input model.
31 |
32 | In fact, the most annoying thing about this sort of quantization instruction using a RANGE_LUT is that the RANGE_LUT does not make use of a full vector operand or typical quantization's like 16 bits to 2 bits. Even when doing something like quantizing 32 bits to 4 bits, it really wants to have to vector operands of different lengths. Which as far as I can tell is something that the vector instruction set is not naturally suited for.
33 |
34 | --
35 |
36 | Piling on:
37 |
38 | Some math dequantization instructions are not just a simple indexed LUT operation. E.g. they may LUT a smallish number of the top bits, and concatenate with or otherwise combine with low bits of the value to be looked up (which is no longer an index). GPU texture units do something like this, although usually in 2D or 3D, not a single dimension, and combined with a special cache for the values that are looked up, at different resolutions (MIP mapping levels).
39 |
40 | Furthermore, a rather common operation in numerically intensive code is to do piecewise linear interpolation. E.g. have a set of data breakpoints like in the INTERVAL_LUT, and if the value does not exactly match one of those breakpoints, then interpolate. Sometimes linearly sometimes with fancier interpolation functions.
41 |
42 | But, again, I cannot imagine uses for these operations in cryptography. I don't think the cryptography will normally want to use interpolation, and especially not any form of approximate arithmetic. I sometimes wonder if it could be useful for curves in cryptography. But the piecewise intervals are far too many and should not be compressible.
43 |
44 | --
45 |
46 | Generalized:
47 | * INDEX_LUT
48 | * INTERVAL_LUT
49 | * GPU texture lookup = INDEX_LUT and interpolation (typically in two or more dimensions)
50 | * piecewise interpolation = RANGE_LUT and interpolation
51 |
52 |
--------------------------------------------------------------------------------
/Archive/wiki/Quick-and-dirty-list-of-Actual-CMOs.md:
--------------------------------------------------------------------------------
1 |
2 | # CLEAN
3 | also known as: write back
4 |
5 | writes dirty data out of the cache, leaving clean data behind.
6 |
7 | I.e. the cache contents after this operation should be all have been written back.
8 |
9 | [[Issue: Q: should CMOs like clean and flush update LRU]]
10 |
11 | [[CMO Scope]]: local,
12 |
13 | [[CMO target]]:
14 |
15 | Examples:
16 | * flush to point of persistence (NVRAM)
17 | * flush two point of persistence
18 |
19 |
20 | * flush two point of persistence (battery backed up DRAM)
21 | * flush two point of coherence (SW managed cache consistency
22 | * flush to shared cache level
23 |
24 | issue: vocabulary/terminology: I am very much used to saying "flush to DRAM" indicating that all dirty accesses should be sent to DRAM. I am not at all used to saying "clean to DRAM".
25 |
26 |
27 |
28 | #Flush
29 | a.k.a. write back and invalidate
30 |
31 | Writes dirty data out of the cache.
32 |
33 | I.e. the cache contents after this operation all have been invalidated.
34 |
35 | # DISCARD
36 |
37 | a.k.a. Invalidate, Forget
38 |
39 | Actually throws away dirty data in the cache, to the extent that is permissible by the cache protocol.
40 |
41 | Motivation: once temporary memory buffers are no longer needed, it is "wasteful" to write the temporary values back to memory. Of interest mostly for really large caches, or to avoid writing back unneeded dirty data to NV RAM in a persistent memory system.
42 |
43 | Analogy: SSD TRIM commands.
44 |
45 | Unsafe:
46 | * this can expose old data in memory that was overwritten by values that are now being forgotten.
47 | * note that system code may
48 |
--------------------------------------------------------------------------------
/Archive/wiki/RISC-V-CMO-proposal.md:
--------------------------------------------------------------------------------
1 | This proposal for CMOs was prepared by Andy Glew,
2 | and is a start for discussion of the CMO TG.
3 |
4 | There may be other proposals from opthe TG members.
5 | TBD: copy here and/or link there.
6 |
7 | The https://github.com/riscv/riscv-CMOs/ repo and wiki were forked from a personal repo and wiki https://github.com/AndyGlew/Ri5-stuff that also contained non-CMO stuff.
8 |
9 |
10 | ## "Released" Proposal - what you are probably looking for
11 |
12 | As of Sept 2020 there is not really a "release" procedure.
13 | PDF and HTML are preparted in an offline clone of the repo+wiki, and pushed to GitHub.
14 |
15 | TBD: imitate the main and V specs, automatically generating PDF and HTML on the GitHub servers on checkin/push.
16 | (Not just push, as long as we continue to use a proposal embedded in the wiki.)
17 |
18 | TBD: fix the tools so that the work on systems other than Andy Glew's personal PC with cygwin.
19 |
20 | [[generated-HTML-and-PDF-for-CMOs-proposal]] (local)
21 | * on web: https://github.com/riscv/riscv-CMOs/wiki/generated-HTML-and-PDF-for-CMOs-proposal
22 |
23 |
24 |
25 |
26 | ## Work in Progress (WIP)
27 |
28 | Sept 2020: [[Administrivia - CMOS TG]]
29 | * mailing list running - lots of traffic
30 | * first meetings scheduled (see page)
31 | * testing to see how/if notifications of repo and wiki posts get sent to list (and if that will be too annoying)
32 |
33 | Jan-Aug 2020: [[CMOs (Cache Management Operations)]]
34 |
35 | * [[RISC-V needs CMOs, and hence a CMO Working Group]]
36 | * [[CMOs WG Draft Proposed Charter]]
37 | * [[Mailing lists interested in CMOs]]
38 |
39 | * [[Draft CMO proposals]]
40 | * I (Glew) created a draft CMO proposal
41 | * originally in my personal GitHub https://github.com/AndyGlew/Ri5-stuff/wiki/Ri5-CMOs-proposal
42 | * eventually in official GitHub https://github.com/riscv/riscv-CMOs/wiki/Ri5-CMOs-proposal
43 | * TBD: migrate that to riscv.org wiki
44 | * See issue https://github.com/riscv/riscv-CMOs/issues/2
45 | * [[CMOs cut across many fields]]
46 |
47 | * [[Ri5-CMOs-proposal]]
48 | * See [[generated HTML and PDF for CMOs proposal]]
49 |
50 | * [[Use Cases for CMOs and Prefetches]]
51 | * See above, in particular [[Mailing lists interested in CMOs]]
52 | * TBD: formalize CMO use cases for use in tracking progress, ensuring coverage, and restricting scope
53 |
54 |
55 |
56 |
57 | ## [[Administrivia]]
58 |
59 | Network locations - GitHub repo, wiki, mailing lists, etc.
60 |
61 | See [[TOC - Table of Contents]]
62 | * almost certainly out of date
63 | * TBD: [[automate generation and update of wiki TOC]] as wiki evolves
64 |
--------------------------------------------------------------------------------
/Archive/wiki/RISC-V-needs-CMOs,-and-hence-a-CMO-Working-Group.md:
--------------------------------------------------------------------------------
1 | All successful computer instruction sets have Cache Management Operations (CMOs).
2 |
3 | Several RISC-V systems have already defined implementation specific CMO instructions.
4 | It is desirable to have standard CMO instructions to facilitate portable software.
5 |
6 | CMOs do things like flushing dirty data and invalidating clean data for use cases that include
7 | non-coherent DMA I/O,
8 | security (e.g. Spectre),
9 | power management (flush to battery backed-up DRAM),
10 | persistence (flush to NVRAM),
11 | and more.
12 |
13 | CMOs cut across several problem domains. It is desirable to have a consistent approach, rather than different idiosyncratic instructions for different problem domains.
14 | RISC-V therefore needs a CMO working group that will coordinate with any working groups in those overlapping domains.
15 |
16 | ### Administrivia
17 |
18 | 2020/8/5: Email proposing this will soon be sent to the RISC-V Technical Steering Committee
19 | and other mailing lists, seeking approval of the formation of such a CMO working group.
20 |
21 | Here linked is a wiki version of the WG proposal [[RISC V needs CMOs, and hence a CMO Working Group]].
22 | Also a [[CMOs WG Draft Proposed Charter]] - although probably too long.
23 |
24 | **Assuming the CMO WG is approved:**
25 |
26 | Please indicate if you are interested by replying to this email (to me, Andy Glew).
27 | To faciliate scheduling of meetings, please indicate timezone.
28 |
29 | A risc.org mailing list should be set up soon.
30 |
31 | We have already set up https://github.com/riscv/riscv-CMOs,
32 | and will arrange permissions for working group members as soon as possible.
33 |
34 | Here linked is a [[CMOs WG Draft Proposed Charter]].
35 |
36 | Proposals:
37 | * At least one CMO proposal has been developed in some detail. It is linked to from https://github.com/riscv/riscv-CMOs, and may soon be moved to this official place.
38 | * We welcome: Other proposals, and/or examples of implementation specific CMO extensions already implemented
39 |
40 |
--------------------------------------------------------------------------------
/Archive/wiki/RISC-V-standard-disclaimer.md:
--------------------------------------------------------------------------------
1 | Here is the RISC-V standard disclaimer
2 |
3 | I am sending this as email, so that I can pin it to the top of our message archive.
4 | TBD: link.
5 |
6 | I am also posting it on our wiki https://github.com/riscv/riscv-CMOs/wiki/RISC-V-standard-disclaimer
7 |
8 | I will post or link to it from our meeting announcements and agendae.
9 | TBD: link.
10 |
11 |
12 | -------- Forwarded Message --------
13 |
14 | Subject: [RISC-V] [tech-chairs] disclaimer slides to add to your group meeting agendas
15 | Date: Mon, 28 Sep 2020 09:12:25 -0700
16 | From: mark
17 | To: chairs , tsc
18 |
19 |
20 | https://drive.google.com/file/d/1FmXDqa20NNjtfFyPdcT7__-AfO7ke9J_/view?usp=sharing
21 |
22 | Because I am a bit obsessive about this sort of thing, I will here extract the text of this disclaimer. However, note that this extracted text may be obsoleted if the disclaimer in an official place is edited, so please refer to the Google Drive link if necessary, and to any other official place it may be moved to in the future. NOTE: Google search did not find this for me (Does Google search ever index Google Drive?), but did return similar documents Such as those whose URLs are in the below:
23 |
24 | # Antitrust Policy Notice
25 |
26 | RISC-V International meetings involve participation by industry competitors, and it
27 | is the intention of RISC-V International to conduct all its activities in accordance
28 | with applicable antitrust and competition laws. It is therefore extremely important
29 | that attendees adhere to meeting agendas, and be aware of, and not participate
30 | in, any activities that are prohibited under applicable US state, federal or foreign
31 | antitrust and competition laws.
32 |
33 | Examples of types of actions that are prohibited at RISC-V International meetings
34 | and in connection with RISC-V International activities are described in the RISC-V
35 | International Regulations Article 7 available here: https://riscv.org/regulations/
36 |
37 | If you have questions about these matters, please contact your company counsel.
38 |
39 | # RISC-V International
40 |
41 | RISC-V is a free and open ISA enabling a new era of processor innovation
42 | through open standard collaboration. Born in academia and research, RISC-V ISA
43 | delivers a new level of free, extensible software and hardware freedom on
44 | architecture, paving the way for the next 50 years of computing design and
45 | innovation.
46 |
47 | We are a transparent, collaborative community where all are welcomed, and all
48 | members are encouraged to participate.
49 |
50 | We as members, contributors, and leaders pledge to make participation in our
51 | community a harassment-free experience for everyone.
52 |
53 | https://riscv.org/risc-v-international-community-code-of-conduct/
--------------------------------------------------------------------------------
/Archive/wiki/STATUS---almost-done---maybe.md:
--------------------------------------------------------------------------------
1 | This wiki is for work on a proposal for RISC-V.
2 |
3 | Therefore, any status of along the lines of "almost done?" Is always contingent on whether it will be accepted by the working group(s) to which it is being applied.
4 |
5 | Therefore, [[STATUS: almost done - maybe]] means that from my point of view, the authors point of view, it is reasonably complete and has started being acceptable from key bottleneck reviewers.
6 |
7 | TBD: [[Proposal Status Tags]].
--------------------------------------------------------------------------------
/Archive/wiki/Sharing-Drawings-and-Diagrams.md:
--------------------------------------------------------------------------------
1 | Drawings and diagrams are good for technical work, like computer architecture, like RISC-V instruction set design and platform architecture.
2 |
3 | Unfortunately, there doesn't seem to be a ubiquitous standard for exchanging drawings. At least not diagrams with smart objects like glue and connectors. SVG may be good enough for simple 2D drawings, but as far as I know the SVGConnector standard has been stalled since 2011.
4 |
5 | I asked about what people use on the RISC-V crypto mailing list, and got answers that I will summarize as follows
6 |
7 | * Visio
8 | *proprietary
9 | * exports/imports to SVG, etc.
10 | * the .VSD / .VSDX proprietary file formats seem to be the most common exchange format for diagrams
11 |
12 | * LucidChart
13 | * proprietary
14 | * exports/imports to SVG, Visio file formats
15 | * however, round tripping Vidio -> LucidChart --> Visio --> LucidChart is reported to be unreliable
16 |
17 | * Markdeep
18 | * MJO, Marrku on crypto list
19 | * MarkDeep http://casual-effects.com/markdeep/ internally for CPU documentation. In addition to your easy MarkDown tables and code snippets, it has ASCII type block graphics. Has been sufficient for this particular purpose, but certainly has limits.
20 |
21 | * https://www.draw.io/
22 | * Ben Marshall, University of Bristol
23 |
24 | * It's free, works online, or as a wrapped up desktop app for offline use.
25 | * It saves things in a proprietary format unfortunately, but has all the usual export targets: pdf/jpeg/png/svg. Even experimental support for VSDX, which I think is viseo?
26 | * I can also send you a link which entirely encodes the diagram[1] so others can copy/edit it. It's not the live sharing/collaboration thing which google docs does so well, but it's a good alternative.
27 |
28 | * LibreOffice Draw
29 | * Free, glue dots, connectors and SVG export.
30 |
31 |
32 |
33 | * Google Docs Drawing???
34 | * Be refers to it
35 | * IIRC I have tried, but was unhappy. Probably because of off-line behavior, lack of, but I don't remember exactly.
--------------------------------------------------------------------------------
/Archive/wiki/SourceDest-to-support-Exception-Transparency.asciidoc:
--------------------------------------------------------------------------------
1 | === *_Source/dest_* to support *_exception transparency_*
2 |
3 | This instruction family is *_restartable after partial
4 | completion_*. E.g. on an exception such as a page fault or debug
5 | address breakpoint the output register RD is set to the data address
6 | of the exception, and since the instruction is *_source/dest_*, with
7 | the register numbers in RD and RS1 required to be the same, returning
8 | from the exception to the CMO.UR instruction will pick up execution
9 | where it left off.
10 |
11 | [NOTE]
12 | .Rationale: source/dest by requiring RD=RS1
13 | ====
14 | This proposal has chosen to implement *_source/dest_* by
15 | requiring separate register fields RD and RS1 to contain the same
16 | value. An alternative was to make register field RD both an input and
17 | an output, allowing RS1 and RS2 to be used for other inputs. Separate
18 | RD=RS1 source/dest is more natural for a RISC instruction decoder, and
19 | detecting RD=RS1 has already been performed for other RISC-V
20 | instructions, e.g. in the V extension. However separate RD=RS1
21 | "wastes" instruction encodings by making RD!=RS1 illegal, and leaves
22 | no register free in the CMO.VAR instruction format for any 3rd operand such as the CMO type, hence
23 | requiring . in the instruction encoding.
24 |
25 | TBD: see *_who cares about RD=RS1 source/dest?_*
26 | ====
--------------------------------------------------------------------------------
/Archive/wiki/TOC---Table-of-Contents.md:
--------------------------------------------------------------------------------
1 | The github wiki apparently has online macros such as TOC (Table Of Contents) disabled.
2 |
3 | That's okay, I've written TOCs for other wikis that operate offline. Not yet using here.
4 |
5 | This just a baby first step: links extracted.
6 |
7 | * TBD: hand edit to logical structure.
8 | * TBD: delete non-TOC links
9 | * TBD: collect non-linked / non-TOC'ed pages
10 |
11 | ===
12 |
13 | Issues
14 |
15 | Dang, no easy crosslinking between Github wiki and issues.
16 |
17 | TBD: auto generate issue summaries
18 |
19 | * not for all, but for important pending issues
20 | * TBD: mark in the issue database?
21 |
22 |
23 | Official GitHub repo issues:
24 |
25 | * https://github.com/riscv/riscv-CMOs/issues/2
26 |
27 | Original/personal issues:
28 |
29 | * https://github.com/AndyGlew/Ri5-stuff/issues/2
30 | * Verify that the recommended [[partial instruction completion loop constructs]] for CMOs operate correctly if optional prefetches or hints are treated as NOPs. #2
31 | # TBD: migrate pending issues from personal to official CMOs depository
32 |
33 | ===
34 |
35 | TBD: this is badly out of datae - so out of date, it should be abandoned if the scripts are not rerun soon. 2020-08-12
36 |
37 | * [[An-even-quicker-and-dirtier-summary-of-proposed-instruction-encodings-for-RISC-V-CMOs]]
38 | * [[Fixed Block Size Prefetches and CMOs]]
39 | * [[Block-memory-operations:-such-as-MEMSET-and-MEMCOPY]]
40 | * [[C library block memory operations]]
41 | * [[exception transparency for instructions with partial completion]]
42 | * [[partial completion]]
43 | * [[CMO-goals]]
44 | * [[CMOs-(Cache-Management-Operations)]]
45 | * [[An even quicker and dirtier summary of proposed instruction encodings for RISC-V CMOs]]
46 | * [[CMO-types issue]]
47 | * [[Consensus Work in Progress]]
48 | * [[Fixed Block Size Prefetches and CMOs]]
49 | * [[Instruction Name Choice]]
50 | * [[Non-Address Based CMOs for Abstraction and Efficiency]]
51 | * [[Overview of CMO operations]]
52 | * [[Quick and Dirty Proposal for RISC-V CMOs]]
53 | * [[STATUS: almost done? - maybe]]
54 | * [[Terminology for instructions that manage microarchitecture state such as caches, prefetchers and predictors]]
55 | * [[Variable Address Range CMOs]]
56 | * [[CMOs-Not-Based-on-Memory-Address]]
57 | * [[bulk invalidate]]
58 | * [[CMOs-proportional-to-cache-size-rather-than-address-range]]
59 | * [[CMOs based on cache microarchitecture]]
60 | * [[Extended-CMO-types]]
61 | * [[Fixed-Block-Size-Prefetches-and-CMOs]]
62 | * [[An even quicker and dirtier summary of proposed instruction encodings for RISC-V CMOs]]
63 | * [[Extended CMO Type]]
64 | * [[Mnemonics and Names]]
65 | * [[STATUS: almost done - maybe]]
66 | * [[Home]]
67 | * [[CMOs (Cache Management Operations)]]
68 | * [[Quantization, dequantization, and interpolation instructions for DL, math, etc.]]
69 | * [[Sharing Drawings and Diagrams]]
70 | * [[hack-relative-URLs-in-github-project-wiki-repo]]
71 | * [[ISSUE:-process-migration-argues-for-whole-cache-invalidation-operations-and-against-the-partial-progress-loop-construct]]
72 | * [[CMO.UR loop construct]]
73 | * [[parent page||Non-Address-Based-CMOs-for-Abstraction-and-Efficiency]]
74 | * [[Instructions-that-Support-Partial-Progress]]
75 | * [[Mandatory-versus-Optional-CMOs,-PREFETCHES,-and-CPHs]]
76 | * [[]]
77 | * [[Non-Address-Based-CMOs-for-Abstraction-and-Efficiency]]
78 | * [[(see issue)|ISSUE: process migration argues for whole cache invalidation operations and against the partial progress loop construct]]
79 | * [[CMO UR descriptor operand]]
80 | * [[CMO UR index]]
81 | * [[CMO descriptor]]
82 | * [[CMO loop construct]]
83 | * [[CMO.UR loop construct]]
84 | * [[Instructions that Support Partial Progress]]
85 | * [[RISC-V hardware error reporting]]
86 | * [[machine check]]
87 | * [[system description such as CPUID or config string]]
88 | * [[trap or use for other instructions]]
89 | * [[Overview-of-CMO-operations]]
90 | * [[Survey of CMOs in Modern Computer Architectures]]
91 | * [[Quantization,-dequantization,-and-interpolation-instructions--for-DL,-math,-etc.]]
92 | * [[multipart instruction approach]]
93 | * [[Quick-and-Dirty-Proposal-for-RISC-V-CMOs]]
94 | * [[|cmo_type CMO instruction flavor]]
95 | * [[|Virtual or Physical CMO instruction flavor]]
96 | * [[CMO goals]]
97 | * [[CMO variable address range alternatives]]
98 | * [[CMOs Not Based on Memory Address]]
99 | * [[CMOs based on cache microarchitecture]]
100 | * [[CMOs proportional to cache size rather than address range]]
101 | * [[Instantaneous Flushes of Predictor and Cache State]]
102 | * [[Transparent Resumeability Prefers SrcDst Register Operands]]
103 | * [[full memory addressing mode rs1+imm12 for prefetches and CMOs]]
104 | * [[STATUS:-almost-done - maybe]]
105 | * [[Proposal Status Tags]]
106 | * [[STATUS: almost done - maybe]]
107 | * [[Sharing-Drawings-and-Diagrams]]
108 | * [[Some-Page]]
109 | * [[Terminology-for-instructions-that-manage-microarchitecture-state-such-as-caches,-prefetchers-and-predictors]]
110 | * [[At the time of writing pages in this document (wiki)]]
111 | * [[BTB]]
112 | * [[CMOs (Cache Management Operations)]]
113 | * [[CMOs-(Cache-Management-Operations)]]
114 | * [[CPH (Cache Performance Hints)]]
115 | * [[CPH (Cache Performance Hints) instructions]]
116 | * [[HWDS (Hardware Data Structure)]]
117 | * [[List of optional versus mandatory CMOs]]
118 | * [[Mandatory versus Optional CMOs, PREFETCHES, and CPHs]]
119 | * [[PREFETCH instructions]]
120 | * [[Performance Related Hardware Data Structure]]
121 | * [[Performance Related Hardware Data Structures]]
122 | * [[TLB]]
123 | * [[at the time of writing]]
124 | * [[branch predictor stew|https://patents.google.com/patent/US7143273B2/en]]
125 | * [[cache push out or post-store]]
126 | * [[clean or flush]]
127 | * [[optional prefetches or post-store]]
128 | * [[pHWDS]]
129 | * [[partial instruction completion loop constructs]]
130 | * [[partial instruction completion|Instructions-that-Support-Partial-Progress]]
131 | * [[Virtual-or-Physical-CMO-instruction-flavor]]
132 | * [[cmo_type-CMO-instruction-flavor]]
133 | * [[|Extended CMO types]]
134 | * [[Point of Long Term Persistence]]
135 | * [[Point of Short Term Persistence]]
136 | * [[Point of Unification]]
137 | * [[Survey of CMOs in Modern Computer Architectures]]
138 | * [[hack-relative-URLs-in-github-project-wiki-repo]]
139 | * [[..]]
140 | * [[../../wiki]]
141 |
--------------------------------------------------------------------------------
/Archive/wiki/Virtual-or-Physical-CMO-instruction-flavor.md:
--------------------------------------------------------------------------------
1 | Both the variable range and fixed size CMO instructions specify an address, or a range of addresses.
2 |
--------------------------------------------------------------------------------
/Archive/wiki/Why-CMOs.xlsx-was-written-in-Excel.md:
--------------------------------------------------------------------------------
1 | CMOs.xlsx, in GitHub repo at https://github.com/riscv/riscv-CMOs/blob/master/CMOs.xlsx
2 | is a "list" of CMOs. Not exactly a list, more like a table from which the actual list can be generated.
3 |
4 | I am in bit embarrassed that I wrote this in Excel, since I prefer to use open tools, easily parseable by scripts etc.
5 |
6 | The main reason I wrote this in Excel is that I wanted arrows in my spreadsheet stuck to cells of the spreadsheet. Similarly diagrams.
7 |
8 | (Oh, f***: I would have pasted a screen-snip of the diagrams here. But this wiki does not support pasting of images, instead you have to save the image as a separate file. This is another shortcoming of a tool that would make me like to switch to something more capable.)
9 |
10 | Google Sheets does not seem to have this feature. In Google sheets you can draw arrows etc. on top of your spreadsheet, but if the cells are resized the arrow stayed the same size and you have to manually adjust them.
11 |
12 | TBD: can OpenOffice provide this feature? I have not yet tried.
13 |
14 |
15 | Similarly, I rely on the ability to collapse groups of rows and/or columns.
16 |
17 | I would actually like to use pivot charts for this, but Excel pivot charts are insufficiently powerful.
18 |
--------------------------------------------------------------------------------
/Archive/wiki/Wiki-and-Repo-crosslink-issues.md:
--------------------------------------------------------------------------------
1 | Relative links can cross-link between GitHub wikis and repo on GitHub itself.
2 |
3 | However, when cloned, the wiki and the repo are different git archives. If care is taken the relative cross-links will work. I have made the wiki a submodule of the repo, so that if the repo is cloned recursively the links should still worl=k.
4 |
5 | The wiki links will nearly always work, whether on GitHub itself or in a clone, but the links from wiki to repo (and back again) may not always work if cloned without taking the nesting into account.
--------------------------------------------------------------------------------
/Archive/wiki/_Footer.md:
--------------------------------------------------------------------------------
1 | Jump to: [wiki](Home), [TOC](TOC---Table-of-Contents), [repo](..)
2 |
--------------------------------------------------------------------------------
/Archive/wiki/_Sidebar.md:
--------------------------------------------------------------------------------
1 | | [This wiki](./Home), [search](../search?q=ENTER-QUERY-HERE&type=wikis) [repo](..), [issues](../issues) |
2 | |---|
3 | | [TOC](TOC---Table-of-Contents) |
4 | | GH abs |
5 | |[[Wiki and Repo crosslink issues]]|
6 | | [Edit this Sidebar](_Sidebar/_edit) |
7 | | [Generic Sidebar](https://github.com/AndyGlew/Ag-stuff/wiki/Ag-Common-Sidebar-and-Footer) |
8 |
--------------------------------------------------------------------------------
/Archive/wiki/cmo_type-CMO-instruction-flavor.md:
--------------------------------------------------------------------------------
1 | There are more possible configurations of caches than are conceived of by computer architecture textbooks. E.g. number of I/D/unified levels, inclusive/exclusive/neither, virtual/physical, etc. Let alone prefetchers, victim choosers (LRU), etc.
2 |
3 | In general portable software does not want to need to know about cache microarchitecture details.
4 | But sometimes software needs to, or benefits from, knowing microarchitecture details.
5 | * performance:
6 | * "I probably don't need this data again, so you might as well get it out of the cache"
7 | * vs "this loop nest has one operand that fits in a 32K L1$, and one that doesn't fit in a 4MB cache,
8 | so use LRU for the first and MRU for the second"
9 | * power management:
10 | * "I am putting the whole system to sleep and will not be retaining data in the caches, so flush all data to battery backed up DRAM"
11 | * vs "I am powering off CPU core #1 but not #2, so flush all dirty data in the caches that will be powered off"
12 | * vs "I happen to know that I can power off the L2$ and still operate the L1$ and the L3$, so do that"
13 | * security
14 | * "flush/invalidate/reset all possible microarchitecture state that might be a timing channel"
15 | * vs "I am using way partitioning to isolate users in the large L2$, so flush the L1$ completely but do not topuch the L2$"
16 | * SW coherence
17 | * flush/invalidate all caches between me and DRAM"
18 | * vs "I am only trying to synchronize with threads/processes running on other harts/CPUs with which I share an L3$, so flush/invalidate the L1$, L2$, and everything all the way to the L3$, but don't flush the L3$ or L4$.
19 | * how might SW know this? Not on a general purpose OS with process migration. But perhaps in an embedded/HPC system, or via processor affinity.
20 |
21 | Many more examples are not just possible, but have been built in the real world, requested of CPU vendors, or proposed by academics.
22 |
23 |
24 | This proposal does NOT try to comprehend or represent all possible such CMO types.
25 |
26 |
27 | This proposal places a small number of such possibilities in the instruction encoding.
28 |
29 | WARNING: terminology confusion: Intel and IBM define "flush" oppositely. In Intel x86, "flush" means "evict dirty data", maybe/maybe not leaving clean data behind. In IBM POWER, "flush" means invalidate data without writing it back. What Intel calls a flush IBM calls a clean. What IBM calls a flush Intel calls an invalidate. TBD: what terminology should RISC-V use? Until determined, I will write out verbosely
30 |
31 | * Flush - write out dirty data
32 | * what is left behind
33 | * leaving clean data behind, e.g. in S state
34 | * leaving invalid cache lines behind
35 | * depth
36 | * to "[[Point of Unification]]"
37 | * to DRAM
38 | * to battery backed up DRAM
39 | * to non-volatile storage (NVRAM)
40 | * which: data and/or instruction [see note 2]
41 | * Prefetch [see note 3]
42 | * prefetch type
43 | * prefetch data to read
44 | * prefetch data to write
45 | * prefetch instructions
46 | * prefetch bias
47 | * place in LRU, i.e. expect temporal locality
48 | * place in MRU, i.e. expect non-temporal locality
49 |
50 | Even the list above expands to 2*4*2 + 4*2 = 24 possibilities. Probably more that we want to spend opcode space on.
51 |
52 | Enumerating by priority
53 | 1. D, writeout dirty, leave clean behind, to [[Point of Unification]]
54 | * use: performance
55 | 1. D, writeout dirty, invalidate all, to [[Point of Unification]]
56 | * use: SW coherence
57 | 1. D, writeout dirty, leave clean behind, to [[Point of Long Term Persistence]] (NVRAM)
58 | * use: persistence
59 | 1. D, writeout dirty, leave clean behind, to [[Point of Short Term Persistence]] (e.g. battery backed up DRAM in a phone)
60 | * use: power management
61 |
62 | 1. Prefetch D to read, LRU
63 | 1. Prefetch D to write, LRU
64 | 1. Prefetch D to read, MRU
65 | 1. Prefetch I, LRU
66 |
67 | ==> 8 encodings.
68 |
69 | Actually, I would prefer to have 1 or 2 less than a power of two in-instruction encodings.
70 |
71 | In general, for all of the that cannot pe represented in that small set, I propose to reserve encodings and/or instruction formats for [[|Extended CMO types]]
72 |
73 |
74 |
75 |
76 |
77 | TBD: compare to a [[Survey of CMOs in Modern Computer Architectures]]
78 |
79 | Note 1: in this small in-instruction-encoding set we are NOT including destructive and security damaging operations like "invalidate cache line even if dirty", as in Intel's INVD instruction or IBM's DCBA. Nor are we including operations like "allocate zero filled cache line without read-for-ownership", as in IBM's DCBZ, which are secure, but which may expose the cache line size. (However, I expect that customers will strongly request DCBZ, so I consider it wise to reserve instruction encoding space.)
80 |
81 | Note 2: the EXPORT.I instruction proposed by the J extension WG essentially is equivalent to
82 | CMO.VAR.VA..
83 | in general, this CMO proposal defers to that EXPORT.I proposal, and will not provide any instruction related CMOs. I am listing them here only to ensure coverage.
84 |
85 | Note 3: it is TBD whether cache prefetches will be part of the CMO proposal. Prefetch instructions usually want to have addressing modes comparable to normal memory reference instructions, e.g. Memory[reg+offset], where the prefetch offset is increased by a fetch-ahead delta. Therefore, if prefetches are included, the CMO.FSZ.* format should be extended to have a memory addressing mode. There may not be enough instruction encoding space in ILEN=32 to allow this. For that matter, certain
86 |
--------------------------------------------------------------------------------
/Archive/wiki/draft-CMO-domains-and-levels.asciidoc:
--------------------------------------------------------------------------------
1 |
2 | === CMO memory hierarchy domains and levels ..
3 |
4 | The . property specified the domains and levels involved in CMO operations.
5 |
6 | "Domains" refers to CMOs that flush data from not just one cache, but from severral layers of cache.
7 | Sometimes by flushing an outer inclusive layer.
8 | Sometimes by traversing multiple levels.
9 |
10 | Actual implementations may have many idiosyncratic caches and other parts of the memory hierarchy.
11 |
12 | There should bne a standard RISC-V way to flush such non-standard implementation specific cache levels, but that is not part of this proposal.
13 |
14 | Instead this proposal defines a small(?) number of abstract cache layers. Implementation cache layers will be mapped onto these layers.
15 |
16 | These pseudo-abstract layers are
17 |
18 | Cache levels and domains
19 |
20 | * POC(I,D)
21 | ** the Point of Consistency for Instructions and Data, for the common case of inconsistent instruction and data caches
22 | ** ARM calls this the Point of Unification
23 | * The POC(ID) defibnes two domains that may need to be flushed
24 | ** I-->POC(ID) - the path from processor through I$ to the Point of ID consistency
25 | ** D-->POC(ID) - the path from processor through D$ to the Point of ID consistency
26 |
27 | * POC(D*), domain P*-->POC(D*)
28 | ** the path from any or all of a set of processors to the common level for all processors in that set.
29 | ** ARM calls this the Point of Inner Comsistency
30 | ** assumed cache coherent in this domain
31 | ** used for performance optimizations, not correctness
32 |
33 | * POC(Unc), domain P*-->POC(Unc)
34 | ** the path from any or all of a set of non-cache-coherent processors to a common point
35 | ** SW managed consistency works if this domain is flushed to POC(Unc)
36 |
37 | * POC(Uio), domain P*-->POC(Uio)
38 | ** the path from any or all of a set of non-cache-coherent processors to a point in common with non-coherent I/O
39 | ** SW managed consistency for I/O devices works if this domain is flushed to POC(Uio)
40 |
41 | Frequently, POC(Unc), POC(Uio) are identical. Frequently, POC(Unc), POC(Uio) are DRAM. But not always, therefore distinguished.
42 |
43 | Memory, Volatile and Non-Volatile
44 |
45 | * M, domain P*-->M
46 | ** memory, eg DRAM
47 | ** not necessarily battery backed up
48 |
49 | * BM, domain P*-->BM
50 | ** memory that survives power removal from system parts such as harts
51 | ** frequently the same as main memory, bit not always. May be a subset.
52 |
53 | * NV1, domain P*-->MN/BM-->NV
54 | ** memory that survives even when batteries fail
55 | *** i.e,. last years, not days
56 | * NVR, domain P*-->MN/BM-->NV-->NVR
57 | ** a;;, last, or redundant/reliable level of nonvolatile memory
58 | ** memory that tolerates failures of other NV1 components
59 | *** i.e,. last years, not days
60 |
61 |
62 | *_Count_*: 9 - 4 bits
63 |
64 | Unfortunately, would like local/global flavors of the above. => 5 bits => exceeds 256 emncodimngs.
65 |
66 | So need to compress more.
67 |
68 | TBD...
69 |
--------------------------------------------------------------------------------
/Archive/wiki/draft-CMO-issues.asciidoc:
--------------------------------------------------------------------------------
1 |
2 | # Other issues for this CMO Proposal
3 |
4 | [NOTE]
5 | ====
6 | Discussion, issues, and rationale, have been embedded in this in such NOTE sections, interleaved with normative text.
7 |
8 | This section serves to capture such issues that did not naturally get interleaved elsewhere.
9 | ====
10 |
11 | [NOTE]
12 | .Extensibility Limitation: Non-address tag specific invalidations
13 | ====
14 | Many computer systems have "special tags" - non-address tags - in their caches - e.g. security domains - and want selective cache invalidations and flushes for such special-tags.
15 |
16 | The instruction format in this current CMO proposal cannot be extended to do this. The address range CMO.AR already uses all three register fields in the standard RISC-V R format, so there is no free register operand to specify the special-tag. The microarchitecture index CMO.UR only uses 2 register fields, but it encoding is packed such that CMO.UR = CMO.AR with rs2=x0, so again there is no free register operand specify the special-tag.
17 |
18 | This is acceptable for use case of security information leak mitigation, which requires the entire cache to be invalidated or flushed.
19 |
20 | But there are other use cases which can benefit from selective special-tag invalidations. In particular, when the special-tag is being recycled, when it was used for an old process that is no longer running, and is needed for a new process.
21 |
22 | Also, it seems natural to extend this CMO proposal to TLB invalidation, but it is quite common in computer instruction sets to provide PID or ASID or VMID specific invalidations. Not just when recycling such a special-tag, but also when translations are changed.
23 | ====
24 |
25 | [NOTE]
26 | .CPU hardware may not be aware of system configuration
27 | ====
28 | Operations such as "flush to the point of I/O coherence"
29 | are dependent not on CPU microarchitecture but on system architecture.
30 | E.g. the point of I/O coherence may be DRAM, or it may be a last level cache, if the I/O device can do cache line injection.
31 | Indeed, the point of I/O coherence may be different for different devices in the same system.
32 | SW may only want to do the minimum necessary for the device it is working with.
33 | There is no provision in this CMO proposal for that.
34 |
35 | Similarly, cache flushes for security related information channel mitigation
36 | may in general need to flush all cache levels, L1-L2-L3
37 | (or at least up to the cache level where the bandwidth of the channels is acceptably low).
38 | However, in other situations some of the outer cache levels may be partitioned and not require flushing, e.g. by cache ways.
39 |
40 | Exactly which levels of cache need to be flushed for any particular operation is not known to the CPU, may be system hardware dependent, but may also be system software dependent.
41 |
42 | In general, what CMO should be used, .>., should be mapped from abstract CMO concepts to which caches actually must be involved.
43 | There is no provision in this memo for such mapping in this proposal, except for trapping and emulating by M-mode.
44 |
45 | Realistically this will probably mean that the abstract CMO operations in this proposal are useless. Programmers will need to figure out which caches actually get modified by any of the instructions, and will probably ignore the abstractions.
46 | This is no better than the current state of the art.
47 | ====
48 |
--------------------------------------------------------------------------------
/Archive/wiki/draft-CMO-type-spreadsheet.asciidoc:
--------------------------------------------------------------------------------
1 | === CMO type spreadsheet
2 |
3 | A spreadsheet CMOs.xlsx presents the desired CMO types in a format
4 | more convenient than this asciidoc text.
5 |
6 | This spreadsheet is available on GitHub at https://github.com/riscv/riscv-CMOs/blob/master/CMOs.xlsx
7 |
8 | TBD: ensure that the latest version of the spreadsheet has been uploaded. As of <2020-06-11 Thursday, June 11, WW24> the version online is dated April 30th.
9 |
--------------------------------------------------------------------------------
/Archive/wiki/draft-actual-CMO-operations.asciidoc:
--------------------------------------------------------------------------------
1 |
2 | === Actual CMO operations ..
3 |
4 | ==== Actual CMO operations- flushes and prefetches, etc.
5 |
6 | This proposal includes the following actual CMO operations. Short names are listed here - more cvomplete deascriptions in a section below.
7 |
8 | * Traditional CMOs: CLEAN, FLUSH, INVALIDATE-I$, DISCARD
9 |
10 | * Less Common: INVALIDATE-CLEAN, SET-LRU, LOCK-LINE.
11 |
12 | Space should be reserved for more operations, included SAFER_DISCARD_1 and SAFER_DISCARD_2, that remedy the security deficiences of the DISCARD operation (the well known PowerPC DCBA) while preserving much of the performance advantage.
13 |
14 | In addition to these CBOs that perform various forms of flushes and invalidates,
15 | this proposal includes operations that are often not called CMOs.
16 |
17 | * Prefetches: PREFETCH-R, PREFETCH-EW, PREFETCH-X - using the variable address range approach.
18 |
19 | * Destructive: ZALLOC - allocate a zero-filled-cache line.
20 |
21 | Some have requested locking versions: ZALLOC-and-LOCK, and FETCH-R/W/X-and-LOCK.
22 |
23 | *_COUNT_*: 13 encodings: 4 bits.
24 |
25 | ==== Security / Timing Channel Bit
26 |
27 | Requirement: in addition to flushing caches, it is also required, for
28 | timing channel mitigation such as in Spectre, to flush
29 | microarchitecture mechanisms that can provide timing channekls, such as
30 | LRU bits, predictors and prefetchers. Some of these are associated
31 | with cache entries - hence the security/timing channel "bit".
32 | Not actually a bit - applied only to 2 CMOs.
33 |
34 | The security property is applied to the CMO.UR variants that leave no data behind:
35 | FLUSH and INVALIDATE.
36 |
37 | This increases the *_COUNT_* to 15 encodings: 4 bits.
38 |
39 | ==== Detailed description of CMO operations
40 |
41 | Unfortunately, there is no widespread agreement as to what CMO names should be. It is therefore necessary to define their behavior more completely according to cache states.
42 |
43 | Without loss of generality we will mention only tywo cache states,
44 | Clean and Dirty, relevant to writeback caches. Writethrough and
45 | instruction caches contain only clean data, so may map to more than
46 | one operation that handles dirty data.
47 |
48 | Traditi0nal CMOs
49 |
50 | * CLEAN
51 | ** Dirty-->WB-->Clean
52 | ** Clean-->Clean
53 | * FLUSH
54 | ** Dirty-->WB-->Invalid
55 | ** Clean-->Invalid
56 | ** Alternate names
57 | ** Intel calls this WBINVD
58 | ** Special considerations: security/timing channel variant for CMO.UR
59 | * DISCARD
60 | ** Dirty-->no WB-->Invalid
61 | ** Clean-->Invalid
62 | ** Alternate names
63 | ** Intel calls this INVD
64 | ** Special considerations:
65 | *** security/timing channel variant for CMO.UR
66 | *** security hole
67 | **** there are several safedr variants of DISCARD, reserving space for bit not actually part of this proposal
68 | * DISCARD-CLEAN
69 | ** Dirty-->unaffected
70 | ** Clean-->Invalid
71 | ** Special considerations:
72 | *** can be used in some incoherehnt I/O use cases
73 | *** remedies the security problems of DISCARD - safe for user mode
74 | * SET-LRU
75 | ** CMO.VAR only
76 | ** most useful special case of the class of replacement algorithm manipulation CMOs
77 |
78 | Operations not typically considered CMOs:
79 |
80 | * PREFETCH-R
81 | * PREFETCH-W
82 | ** prefetches in exclusive clean or dirty state - ready for writes with least possible expense
83 | * PREFETCH-X
84 | ** prefetch code, to execute
85 | ** like PREFETCH-R, except targetting I$ level(s)
86 |
87 | Destructive
88 |
89 | * ZALLOC
90 | ** allocate cache line with reading - zero filling
91 | ** PowerPC DCBZ
92 | * ALLOC
93 | ** allocate cache line with reading - using whatever was there before
94 | ** security hole - but still sometimes used
95 | ** PowerPC DCBA
96 |
97 | Locking variants of the above
98 | * FETCH-R-and-LOCK
99 | * FETCH-W-and-LOCK
100 | * FETCH-X-and-LOCK
101 | * ZALLOC-and-LOCK
102 | * ALLOC-and-LOCK
103 |
104 |
105 | *_Count*_: 15 operations - 4 bits
106 |
--------------------------------------------------------------------------------
/Archive/wiki/draft-microarchitecture-timing-state-flushes.asciidoc:
--------------------------------------------------------------------------------
1 |
2 | === Flushes of Microarchitecture State that Affects Timing Channels
3 |
4 | Requirement: *_all_* microarchitecture state that influences timing, such as predictors, prefetchers, cache LRU bits, etc.,
5 | should be invalidated by the most global CMO.UR.ALL.TC instruction, i.e. with the timing_channel enabled property indicated by the ..
6 |
7 | It is expected that subsets of such microarchitecture state will be associated with other CMO.UR.*.timing_channel instructions.
8 |
9 | NOTE: E.g. the instruction cache invalidation CMO.UR.I.TC may invalidate simple branch predictors,
10 | but not the L2 cache LRU bits.
11 | Which microarchitecture timing state is associated with which CMO.UR.* instructions is implementation dependent.
12 | There should be a way to discover such associations, but that is not part of this proposal.
13 |
14 |
15 |
16 | The phrasing "all microarchitecture timing state ... should be invalidated"
17 | is defined to mean "within the implementation dependent security model of an implementation".
18 | Some implementations may not invalidate any microarchitecture state.
19 | and should therefore be considered insecure for use cases that involve untrusted users.
20 | Other implementations may invalidate some but not all.
21 | These limitations should be documented so that users can determine if an implementation is suitable for their security requirements.
22 | Such documentation is not part of this proposal.
23 |
24 | Permission: CMO.UR.* without the TC property may invalidate such microarchitectures timing channel state. I.e. it is permitted to be more conservative than is required.
25 |
26 | TIP: however, it is expected that use cases such as software managed cache coherency will require invalidating caches, but will not require invalidating timing state, so performance would benefit by distinguishing CMO.*.TC=1 from CMO.*.TC=0.
27 |
28 | Permission: CMO.VAR.* instructions, i.e. memory address range based instructions, may invalidate microarchitecture timing state, but are not required to do so.
29 |
30 | NOTE: ISSUE: should we provide orthogonal encodings CMO.VAR.*.TC (currently proposed), or should we save encoding space by not providing them?
31 |
32 | Requirement: either the CMO.*.TC instructions unconditionally trap, or the <> is implemented, allowing system software to enforce trapping if desired.
33 |
34 | NOTE: There is no requirement to unconditionally trap unimplemented
35 | CMO.*.TC instructions, even on implementations that do not make any attempt to
36 | invalidate icroarchitecture timing state. This allows code that uses CMO.*.TC
37 | to run portably on such systems.
38 | But such code on such systems is only secure if the system makes guarantees such as not having entrusted users.
39 | System software such as an OS is encouraged to use the <> to trap such instructions when the guarantee is not met.
40 |
41 |
42 |
43 |
44 |
45 |
46 | [NOTE]
47 | ====
48 | Microarchitecture timing channels data structures
49 | are inherently implementation dependent.
50 |
51 | Some of these structures can be "instantaneously" invalidated, i.e. in O(1) time, not proportional to size or number of elements.
52 |
53 | However, some of these structures cannot be instantaneously invalidated, and must be scanned or iterated over.
54 |
55 | Different implementations may implement conceptually similar structures in either way.
56 | E.g. a branch predictor might be O(1) invalidated inside the CPU;
57 | but some components of some branch predictors are implemented outside the CPU and must be scanned
58 | e.g. several companies have placed branch predictor information in the L2 cache.
59 |
60 | Some of these structures, such as LRU bits and some large branch
61 | predictors, are associated with memory addresses, and are invalidated
62 | by the CMO.* range instructions when the appropriate bit in
63 | the . funct7 is set, aka the "security" bit
64 |
65 | Some of these mechanisms are not naturally associated with caches explicitly managed by the CMO.* instructions' .
66 | E.g. while it might be reasonable to associate fully tagged BTBs with branch addresses in memory,
67 | branch predictor pattern history tables (PHTs) are usually hashed and have no tags.
68 |
69 | Nevertheless, it is required that CMO.UR.ALL.TC will invalidate all microarchitecture timing channels state,
70 | ranging from branch predictors inside the CPU to LRU bits in external caches.
71 | ====
72 |
73 | [NOTE]
74 | ====
75 | ISSUE: this proposal does not provide any ability to invalidate microarchitecture timing state such as branch predictors
76 | independent of the instruction cache, or some other cache. Should it?
77 |
78 | CMO.UR.*.TC invalidations of microarchitecture timing state
79 | are required to mitigate timing channels for security - e.g. to mitigate security flaws such as Spectre.
80 | They are occasionally also desired to improved reproduceability of benchmarks and tests.
81 |
82 | As far as we know, security timing channel nearly always requires invalidating caches - instruction and data cache timing channels are ubiquitous.
83 | such caches need not be invalidated for timing channels mitigation only where (a) there are no caches, or (b) the capacitors are strictly partitioned.
84 | Therefore, for security, it seems reasonable to always couple branch predictor invalidation to cache invalidation/flushing.
85 |
86 | Non-security purposes, such as testability and benchmarking, may prefer not to invalidate microarchitecture timing state, but that is not part of this proposal.
87 | ====
88 |
89 |
90 |
91 |
92 | [WARNING]
93 | ====
94 | Unfortunately, in many implementations CMO.UR.ALL.TC cannot guarantee that all microarchitecture timing channels state has been invalidated,
95 | for the same reasons that CMO.UR.* cannot guarantee that a cache is entirely invalid after the instruction.
96 | Except for strictly inclusive caches.
97 | In the presence of non strictly inclusive caches,
98 | e.g. exclusive L1/L2 cache hierarchies
99 | a CMO.UR.* a line may be in the L2 cache when the L1 cache is scanned,
100 | but may migrate to the L1 cache before the set it resides in is scanned in the L2 cache.
101 | Such behavior is implementation dependent.
102 | Implementations may provide special cache modes such as "no fill cache mode"
103 | that permit complete invalidation to be guaranteed,
104 | but such modes typically are not allowed to user mode.
105 |
106 | The conditions in which CMO.UR.*.TC can guarantee complete invalidation must be documented,
107 | and should be discoverable, although such discovery mechanisms are not part of this proposal.
108 | ====
109 |
--------------------------------------------------------------------------------
/Archive/wiki/files/Ri5-CMOs-proposal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/Archive/wiki/files/Ri5-CMOs-proposal.pdf
--------------------------------------------------------------------------------
/Archive/wiki/generated-HTML-and-PDF-for-CMOs-proposal.md:
--------------------------------------------------------------------------------
1 | Asciidoctor [[Ri5-CMOs-proposal]] is used to generate HTML and PDF
2 | from .asciidoc source files which are pages on this wiki.
3 |
4 | The generated files can be found in the parent project repo:
5 | * [HTML-rendered](https://htmlpreview.github.io/?https://github.com/riscv/riscv-CMOs/blob/master/Ri5-CMOs-proposal.html) - as rendered by the htmlpreview proxy
6 | * [HTML-source](https://github.com/riscv/riscv-CMOs/blob/master/Ri5-CMOs-proposal.html) - GitHub renders as plain text if not proxied
7 | * [PDF](https://github.com/riscv/riscv-CMOs/blob/master/Ri5-CMOs-proposal.pdf)
8 | * PDF displays
9 |
10 | It would be better to have the generated HTML and PDF on the wiki
11 | because that's where it belongs, since generated from wiki.
12 | Unfortunately HTML and PDF do not display properly in a GitHub wiki.
13 | Raw HTML displayed as text, not rendered; PDF downloads.
14 | In the product repo, since HTML and PDF are displayed there.
15 |
16 | Because the wiki and project have separate git repos, they may not match,
17 | i.e. the repo HTML and PDF may be stale.
18 |
19 | Even in the wiki the HTML and PDF may be out of date, since scripts must be run to generate.
20 | But more likely to be consistent.
--------------------------------------------------------------------------------
/Archive/wiki/hack-relative-URLs-in-github-project-wiki-repo.md:
--------------------------------------------------------------------------------
1 | This github project has at least two git repos, the main repo and the wiki repo.
2 | * main: https://github.com/riscv/riscv-CMOs.git
3 | * wiki https://github.com/riscv/riscv-CMOs.wiki.git
4 |
5 | I want to use relative links between the workspaces that ordinarily correspond to these repos,
6 | * both on the github website and when I have cloned elsewhere
7 | * e.g. so that I can clone both together and be consistent
8 | * TBD: exact (automated) procedure to clone both repos and stay relatively consistent
9 |
10 | Markdown syntax relative links fail:
11 | * [[..]] - \[[..]]
12 | * [[../../wiki]] - \[[../../wiki]]
13 |
14 | Trying HTML relative links:
15 | * FAIL: href="wiki" relative - \href="wiki" relative]\
16 | * fails because it resolves to https://github.com/riscv/riscv-CMOs/blob/master/wiki,
17 | * i.e. the relative position is not https://github.com/riscv/riscv-CMOs but is blob/master/wiki
18 | * which tells us what we need to know
19 | * href="." relative underneath that.
20 | * href=".."
21 | * href="../.." relative
22 | * href="../blob/master/hack-relative-URLs-in-github-project-main-repo.md"
23 | * https:../blob/master/hack-relative-URLs-in-github-project-main-repo.md
24 |
25 |
26 | I am so used to websites not allowing ascending relative components in URLs that I wonder if there is a security hole here... Should not be as long as cannot actually escape an areas mapped to the logged in user or guest.
27 |
28 | Recording this in two places:
29 | * main: https://github.com/riscv/riscv-CMOs/blob/master/hack-relative-URLs-in-github-project-main-repo.md
30 | * wiki: https://github.com/riscv/riscv-CMOs/wiki/hack-relative-URLs-in-github-project-wiki-repo
31 | TBD: can I CSE this stuff, transclude, to reduce duplication?
32 |
33 | Bottom line: relative links
34 | * from wiki
35 | * to project "root" from wiki: href=".."
36 | * to main from wiki: href="../blob/master/README.md"
37 | * to user "root" from wiki: href="../.."
38 | * from main
39 | * to project "root" from main: href="../.."
40 | * to user "root" from main: href="../../.."
41 | * to wiki from main: href="../../wiki"
--------------------------------------------------------------------------------
/Archive/wiki/in-band-tagging-pointers.md:
--------------------------------------------------------------------------------
1 | RISC-V is considering in-band tagging of pointers. Workgroups incvolved include
2 | * J extension (dynamic ;anguages like JavaScript)
3 | * TEE Trusted Execution Environment security
4 | although IMHO (Glew opinion) we are missing a group concerned specifically with preventing
5 |
6 | People involved
7 | * Kostya Serebryany (Google)
8 | * Lee Campbell (Google)
9 | * ... ?? Nvidia ?? ...
10 | * ... ?? Russian group that built TBI and MTE on RISC-V FPGA ??...
11 |
12 | More accurately, RISC-V is considering pre Virtual Address Transformations
13 | since Lee Campbell has proposed a transformation
14 |
15 | ~~~
16 | address <-- (pointer&mask) | (substitute&~mask)
17 | ~~~
18 |
--------------------------------------------------------------------------------
/Archive/wiki/interception,-modulation,-and-mapping-of-CMOs.md:
--------------------------------------------------------------------------------
1 | See [[Privilege for CMOs]]. This privilege mechanism provides a basic way of trapping CMOs, e.g. to prevent users or guest OSes from performing operations that might be security holes, and also software mapping, e.g. to M-mode which might use idiosyncratic MMIO locations to manage external caches that are not fully integrated with the CPU instruction set or bus transactions.
2 |
3 | I have been unable to persuade people that there is need for a more general mapping mechanism (even though there is).
4 |
5 | Therefore, if you want to do things like
6 | * map user operation "flush all made by this thread to NVRAM persistent storage"
7 | NOT to the "CMO shootdown" operation
8 | * "flush all dirty data from all CPU caches in the coherence domain ..."
9 | to the more efficient
10 | * "flush only writes made on the local hart..."
11 | Because system software knows that there is no hardware cache to cache migration of dirty cache lines,
12 | and no software thread migration between harts/processors
13 |
14 | This ISA provides no such ability to do such mapping cheaply.
15 |
16 | If you want to do this, then you've got to trap and emulate.
17 |
--------------------------------------------------------------------------------
/Archive/wiki/issues-wrt-repo-and-wiki-split.md:
--------------------------------------------------------------------------------
1 | * which files go where?
2 | * it might have been better to split before propagating
3 | * too late - now I will just have to make opposite actions in each repo+wiki
4 |
5 | * branch structure / names -- main/master
6 |
7 |
--------------------------------------------------------------------------------
/Archive/wiki/microarchitecture-range-loop.asciidoc:
--------------------------------------------------------------------------------
1 |
2 | // The CMO.UR instruction is intended to be used in a software loop such as that below:
3 |
4 | In pseudocode:
5 |
6 | ----
7 | x11 := 0
8 | LOOP
9 | CMO.UR.<> x11,x11
10 | WHILE X11 > 0
11 | ----
12 |
13 | In assembly code:
14 |
15 | ----
16 | ADDI x11,x0,x0
17 | L: CMO.UR.<> x11,x11
18 | BGEZ L
19 | ----
20 |
--------------------------------------------------------------------------------
/Archive/wiki/techpubs-info.asciidoc:
--------------------------------------------------------------------------------
1 |
2 | [appendix]
3 | == Techpubs Information
4 |
5 | === Conventions specific to this document.
6 |
7 |
8 | ==== GitHub wiki markdown \[[links]]` are broken
9 | Bold italic *_links_* indicate text that should be links to pages in the original wiki.
10 | The tools used to generate this document HTML and PDF from asciidoc and markdown
11 | do not handle these links (yet).
12 |
13 | ==== Rationale using AsciiDoctor NOTE admonition
14 | [NOTE]
15 | ====
16 | _Rationale_ and other background information is indicated by AsciiDoctor NOTE sections such as this.
17 |
18 | TBD: I would really prefer to design an explicit RATIONALE style or admonition, but I do not know how to do this in AsciiDoctor yet.
19 | Therefore, repurposed AsciiDoctor's existing NOTE admonition style.
20 | Unfortunately, this has problems such as section headers not being allowed in the note/rationale text,
21 | and conversely that note/rationale text does not appear in the Table of Contents (TOC).
22 | Therefore, section headers do not appear as these rationale/notes, even if the entire section is rationale, not normative.
23 |
24 | ====
25 |
26 | === Techpubs Information
27 |
28 | This source document: {docname}{docfilesuffix}
29 |
30 | * docdatetime: {docdatetime} - last modified date and time
31 | ** unfortunately, this is only for the topmost file, NOT across all of the included files
32 | * localdatetime: {localdatetime} - when generated
33 |
34 |
35 | Revisions - manually maintained, frequently obsolete:
36 |
37 | * revdate: {revdate}
38 | * revnumber: {revnumber}
39 | * revremark: {revremark}
40 |
41 | More techpubs information, including history thrashing as to how and where to build and store, on wiki page
42 | link:techpub[] / link:file:techpubs.asciidoc[]
43 | (TBD: fix so that works both checked out as file: links and on GitHub wiki).
44 |
45 | ==== Locations
46 |
47 | You may be reading this in any of several different places:
48 |
49 | ===== On GitHub - Official
50 |
51 | ** This document's source files (mostly asciidoc) on its Github repository wiki:
52 |
53 | *** https://github.com/riscv/riscv-CMOs/wiki
54 | *** top of document: https://github.com/riscv/riscv-CMOs/wiki/{docname}
55 | **** this is a .asciidoc file, rendered by GitHub's wiki
56 | **** asciidoc includes link to other parts of the document
57 | **** the wiki contains other pages, not part of the document, some of which provide more background
58 |
59 | ** Generated HTML and PDF files on GitHub:
60 |
61 | *** https://github.com/riscv/riscv-CMOs/
62 | **** HTML: https://github.com/riscv/riscv-CMOs/blob/master/Ri5-CMOs-proposal.html
63 | ***** displays raw, does not render
64 | **** PDF: https://github.com/riscv/riscv-CMOs/blob/master/Ri5-CMOs-proposal.pdf
65 | ***** displays - in GitHub's ugly way
66 | *** https://github.com/riscv/riscv-CMOs/wiki
67 | **** PDF: https://github.com/riscv/riscv-CMOs/wiki/Ri5-CMOs-proposal.pdf
68 | ***** downloads, does not display
69 | **** HTML: https://github.com/riscv/riscv-CMOs/wiki/Ri5-CMOs-proposal.html
70 | ***** displays raw, does not render
71 |
72 |
73 |
74 | ===== On GitHub - Original
75 |
76 | TBD: remove these references when official locations fully verified
77 |
78 | Originally written in author's personal GitHub repo+wiki:
79 |
80 | ** This document's source files (mostly asciidoc) on its Github repository wiki:
81 |
82 | *** https://github.com/AndyGlew/Ri5-stuff/wiki
83 | *** top of document: https://github.com/AndyGlew/Ri5-stuff/wiki/{docname}
84 | **** this is a .asciidoc file, rendered by GitHub's wiki
85 | **** asciidoc includes link to other parts of the document
86 | **** the wiki contains other pages, not part of the document, some of which provide more background
87 |
88 | ** Generated HTML and PDF files on GitHub:
89 |
90 | *** https://github.com/AndyGlew/Ri5-stuff/
91 | **** HTML: https://github.com/AndyGlew/Ri5-stuff/blob/master/Ri5-CMOs-proposal.html
92 | ***** displays raw, does not render
93 | **** PDF: https://github.com/AndyGlew/Ri5-stuff/blob/master/Ri5-CMOs-proposal.pdf
94 | ***** displays - in GitHub's ugly way
95 | *** https://github.com/AndyGlew/Ri5-stuff/wiki
96 | **** PDF: https://github.com/AndyGlew/Ri5-stuff/wiki/Ri5-CMOs-proposal.pdf
97 | ***** downloads, does not display
98 | **** HTML: https://github.com/AndyGlew/Ri5-stuff/wiki/Ri5-CMOs-proposal.html
99 | ***** displays raw, does not render
100 |
101 | ===== Local Workspace/clone
102 |
103 | * On your local system, where you may have cloned the GitHub parent and wiki repositories, and where you may have built the document:
104 |
105 | ** local where built:
106 | *** won't work from web
107 | *** link:file:{docname}.html[]
108 | *** link:file:{docname}.pdf[]
109 | *** link:file:{docname}.asciidoc[]
110 |
111 | When and where converted (i.e. when asciidoctor was run, to generate this file):
112 |
113 | ** docfile: {docfile} - full path
114 | ** localdatetime: {localdatetime} - when generated
115 | ** outfile: {outfile} - full path of the output file
116 | ** TBD: what system (PC, Linux system) was asciidoctor run on?
117 |
118 | Note: paths local to system document generated on are mostly meaningless to others,
119 | but have already been helpful finding source for orphaned drafts generated as PDF and HTML.
120 |
--------------------------------------------------------------------------------
/Archive/wiki/techpubs.asciidoc:
--------------------------------------------------------------------------------
1 | == TechPubs Thrashing
2 | === Wiki --> AsciiDoc
3 | This proposal was developed on a wiki,
4 | originally https://github.com/AndyGlew/Ri5-stuff/wiki/CMOs-proposal,
5 | later https://github.com/riscv/riscv-CMOs/wiki/CMOs-proposal,
6 | as a set of interlinked pages,
7 | and is in the process of being converted to more compact AsciiDoc proposl
8 |
9 | Why?:
10 |
11 | * Wikis get messy.
12 | * Proliferation of pages.
13 | * Hard to tell which pages are part of the official proposal in which pages are just random commentary
14 | * at least it's hard unless you develop tools to do this, I don't want to spend the time to do it
15 | * AsciiDoc seems to be the closest thing RISC-V has to a standard documentation format
16 | * Although it is by no means standard as of 2020-05-06
17 | * the original manuals were written in TeX
18 | * some AsciiDoc
19 | * some in Word, PowerPoint, ...
20 |
21 | === Redundant AsciiDoc files?
22 |
23 | * link:CMOs-proposal[] - original, then thrashed trying to get asciidoc to build
24 | * link:Ri5-CMOs-proposal[] - asciidoc created elsewhere
25 |
26 |
27 | === Thrashed wrt Location
28 |
29 | * Originally in wiki https://github.com/AndyGlew/Ri5-stuff/wiki/CMOs-proposal
30 | * Moved to repo
31 | * repo: https://github.com/AndyGlew/Ri5-CMOs-proposal/blob/master/Ri5-CMOs-proposal
32 | * document: https://github.com/AndyGlew/Ri5-CMOs-proposal/blob/master/Ri5-CMOs-proposal.adoc
33 |
34 | Why?
35 |
36 | * GitHub wiki does not support includes
37 | * Git does not ad-hoc subrepo checkouts
38 | * although I expect some idiot is going to tell me that sparse checkouts are good enough. They are not.
39 |
40 | Then moved back to original repo/wiki
41 | * the wiki is a submodule of the repo
42 | * wiki https://github.com/AndyGlew/Ri5-stuff/wiki/CMOs-proposal
43 | * repo https://github.com/AndyGlew/Ri5-stuff
44 | * where tools such as Makefile are kept
45 | * Makefile lives in repo
46 | * HTML is grnerated in wiki
47 | * where can be linked
48 |
49 |
50 | ---
51 |
52 | 05:37 PDT, 2020-05-07 Thursday, May 7, WW19
53 |
54 | moved stuff from git@github.com:AndyGlew/Ri5-CMOs-proposal.git to git@github.com:AndyGlew/Ri5-stuff.wiki.git
55 |
56 | moved stuff from
57 | * git@github.com:AndyGlew/Ri5-CMOs-proposal.git
58 | to
59 | * git@github.com:AndyGlew/Ri5-stuff.wiki.git
60 | and also
61 | * git@github.com:AndyGlew/Ri5-stuff.git
62 |
63 | with
64 | * git@github.com:AndyGlew/Ri5-stuff.wiki.git
65 | a submodule of
66 | * git@github.com:AndyGlew/Ri5-stuff.git
67 |
68 | In this, Ri5-stuff.wiki
69 |
70 |
71 | AsciiDoc source files for proposal document:
72 | * Ri5-CMOs-proposal.adoc
73 | * techpubs.adoc
74 |
75 | Generated proposal:
76 | * Ri5-CMOs-proposal.html
77 | Generated by:
78 | * Makefile
79 |
--------------------------------------------------------------------------------
/Archive/wiki/terminology-notation.asciidoc:
--------------------------------------------------------------------------------
1 |
2 | [appendix]
3 | == Terminology
4 |
5 | TBD: automatically generate from marks in line as in LaTeX (AsciiDoc is probably incapable).
6 |
7 | CMO::
8 | Cache Management Operation. A superset of prefetches, post-stores, and other operations.
9 |
10 | Post-Store::
11 | A cute Berkeleyism for CMOs that push data from a near cache to a further cache,
12 | contrasting with Pre-Fetch.
13 |
14 |
15 | Pre-Fetch, PREFETCH::
16 | Move data within cache hierarchy, typically from further out (e.g. memory) to closer in. (Possibly from one processor's cache to another.)
17 |
18 | Ri5::
19 | shorthand for RISC-V. TBD change from Golew specific Ri5 to more standard RV
20 |
21 | RV::standard shorthand for RISC-V (?).
22 |
23 |
24 |
25 | [appendix]
26 | == Notation
27 |
28 | === Encoding Bitstring Notation
29 |
30 | See https://github.com/AndyGlew/comp-arch.net/wiki/Instruction-Encoding-Notation
31 |
32 | This document uses notation such as this below for instruction encoding bit strings:
33 |
34 | PREFETCH.64B.R: `imm12.rs1:5.110.rd=00000.0010011`, e.g. ORI with RD=x0
35 |
36 | In these bitstrings
37 |
38 | * 0 and 1 respond to bit values
39 |
40 | * fields are specified by rs2:5, rd=00000, etc
41 | * i.e. fieldname:width
42 | * e.g. fieldname=value (width implied)
43 |
44 | * punctuation is used to improve readability, such as period ".", underscore "_", and comma ","
45 |
46 | Bit positions are numbered with bit 0 on the right, in the usual RISC-V manner.
47 |
48 | Elsewhere in the RISC-V toolchain a similar notation is used,
49 | with additions such as allowing blanks to separate fields,
50 | and allowing fields to be specified out of order by specifying bit positions such as
51 | `rd=5..9`
52 |
53 | AW: https://github.com/riscv/riscv-opcodes is where the current instruction encoding metadata lives.
54 | See comment at the top of https://github.com/riscv/riscv-opcodes/blob/master/opcodes-rvv
55 | for description of notation.
56 |
57 |
58 | TBD: unify these notations.
59 |
60 | TBD: generate pretty human readable tables ("opcode maps") from these notations.
61 |
62 | (The RISC-V toolchain already generates "tables" which are essentially lists of encodings. By "pretty" I mean the sort of table that looks like a hierarchy of Karnaugh maps, as is traditional. Other tools also generate nice diagrams of per-instruction encodings and fields.)
63 |
--------------------------------------------------------------------------------
/Archive/wiki/variable-address-range-loop.asciidoc:
--------------------------------------------------------------------------------
1 |
2 | // The CMO.VAR instruction is intended to be used in a software loop such as that below:
3 |
4 |
5 | In pseudocode:
6 |
7 | ----
8 | x11 := lwb
9 | x12 := upb (= lwb + size_in_bytes)
10 | LOOP
11 | CMO.VAR.<> x11,x11,x12
12 | UNTIL x1 ==x12
13 | ----
14 |
15 | In assembly code:
16 |
17 | ----
18 | x11 := lwb
19 | x12 := upb
20 | L: CMO.VAR.<> x11,x11,x12
21 | bne x11,x12,L
22 | ----
23 |
--------------------------------------------------------------------------------
/Archive/wiki/voice-typos-editing-this-wiki.md:
--------------------------------------------------------------------------------
1 | I like wikis.
2 |
3 | however, I find it necessary to use voice recognition (Dragon on a PC) to enter text because of computeritis
4 |
5 | unfortunately, the GitHub/Gollum wiki, edited online in an editor like Google Chrome, has many problems when edited using voice recognition.
6 |
7 | For example: The double letters ii and oo highlighted below
8 |
9 | > I am actually just as happy not to **defiine** the CSR operand to **coontain ** an encoding of CMO operations desired,
10 |
11 | I apologize for such errors in the current wiki text. I will correct them as I find them during review. I can avoid them if I edit off-line using software that is more ready for speech recognition. However sometimes I make these edits online, just for efficieency. ( There is another typo **ee** :-( )
12 |
13 |
--------------------------------------------------------------------------------
/Archive/wiki/wiki-stuff---Notes-on-GitHub-wiki-pages.md:
--------------------------------------------------------------------------------
1 | Is it possible that this GitHub wiki does not allow me to create a reference to a page that does not yet exist?
2 |
3 | To me, that is the essence of wiki.
4 |
5 | False alarm: links to pages that do not exist yet work. At least in Markdown. I think also in Asciidoc wiki pages. Mostly with the [[Markdown link syntax in Asciidoc wiki pages]]. But I believe also with traditional Asciidoc links - thdy are just harder to type.
6 |
7 | Also, annoyingly, Asciidoc wiki pages do not color links that do not exist distinct from links that do exist. That is probably my second most iomportant wiki property.
--------------------------------------------------------------------------------
/CMO-Phase-1-Scope.md:
--------------------------------------------------------------------------------
1 | # CMO Phase 1 Scope
2 |
3 | ## Introduction
4 |
5 | This document summarizes the current plan-of-record for Phase 1 of the CMO
6 | extension. Additional details will be provided by various extension proposal
7 | documents.
8 |
9 | ## Sub-extensions
10 |
11 | Phase 1 is divided into three sub-extensions that add the following
12 | instructions and features:
13 |
14 | * Cache Block Management Operations (Zicbom)
15 | * CBO.INVAL - Invalidate Cache Block (at effective address)
16 | * CBO.CLEAN - Clean Cache Block (at effective address)
17 | * CBO.FLUSH - Flush Cache Block (at effective address)
18 | * Memory ordering with respect to other memory accesses
19 | * Cache Block Zero Operations (Zicboz)
20 | * CBO.ZERO - Zero Cache Block (at effective address)
21 | * Memory ordering with respect to other memory accesses
22 | * Cache Block Prefetch Operations (Zicbop)
23 | * PREFETCH.R - Prefetch Cache Block for Read (at effective address)
24 | * PREFETCH.W - Prefetch Cache Block for Write (at effective address)
25 | * PREFETCH.I - Prefetch Cache Block for Instruction Fetch (at effective
26 | address)
27 |
28 | Instructions in the Zicbom and Zicboz sub-extensions support a [rs1] addressing
29 | mode. Instructions in the Zicbop sub-extension may support a modified form of a
30 | [rs1+imm12] addressing mode.
31 |
32 | For Phase 1, Zicbom instructions operate to the copy of data in memory, while
33 | Zicboz updates the values of memory corresponding to a memory location like
34 | stores. Zicbop instructions may allocated in any cache as well as none.
35 |
36 | ## Closed Issues
37 |
38 | _Note:_ "Closed" implies that a given issue has been documented in the
39 | specification
40 |
41 | * Permissions, protection, and access control (i.e. relationship to translation,
42 | PMAs, PMPs, and privilege level)
43 | * Safe transformations of operations
44 | * HW: safe transformations enable implementation choices
45 | * SW: safe transformations allow less privileged software to use ops
46 | * Memory ordering model (i.e. relationship to loads, stores, FENCEs, SFENCEs, etc.)
47 | * Discovery of block sizes and types
48 | * Final instruction encodings
49 |
50 | ## Open Issues
51 |
52 |
53 | ## Roadmap
54 |
55 | These topics are deferred from Phase 1 and may be considered in Phase 2 and
56 | onward:
57 |
58 | * Effects of mismatched PMA/VA attributes and changing attributes
59 | * Additional levels or points of convergence for system optimization
60 | * Levels (Points of Persistence) beyond the POC for all harts and devices
61 | * Temporality (reuse) hints for PREFETCH operations
62 | * Cache Block Operations by Index
63 | * CBO.INVAL.IX - Invalidate Cache Block at Index
64 | * CBO.CLEAN.IX - Clean Cache Block at Index
65 | * CBO.FLUSH.IX - Flush Cache Block at Index
66 | * CMO.ALL
67 | * DEMOTE
68 | * Completion semantics
69 | * Security-related CMOs, e.g. CMO.ALL.SEC
70 | * Non-uniform (i.e. mixed) block sizes
71 | * Operations that return values, i.e. ranges, e.g. CMO.op.AR and CMO.op.UR
72 | * Others?
73 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | #
2 | # Makefile
3 | #
4 | # Makefile for linux; at present, this file is used primarily to enable building pdfs in local linux
5 | # systems (Makefile.pwsh is the approved publishing flow); please contact the CMO TG chairs for the
6 | # latest publishing methodology
7 | #
8 |
9 | SPEC=cmobase
10 | # version is TG version number; should be updated in both Makefiles
11 | VERSION=1.0.1
12 | DATE=$(shell date +%Y.%m.%d)
13 | COMMITDATE=$(shell git show -s --format=%ci | cut -d ' ' -f 1)
14 | GITVERSION=$(shell git describe --tag --always --dirty)
15 | # moved STAGE/revremark to cmobase.adoc
16 |
17 |
18 | $(SPEC)-v$(VERSION).pdf: $(SPEC)/$(SPEC).adoc \
19 | $(SPEC)/*.adoc \
20 | $(SPEC)/insns/*.adoc \
21 | $(SPEC)/autogenerated/revision.adoc-snippet
22 | asciidoctor-pdf -r asciidoctor-diagram \
23 | -D . \
24 | -a toc \
25 | -a compress \
26 | -a pdf-style=resources/themes/risc-v_spec-pdf.yml \
27 | -a pdf-fontsdir=resources/fonts \
28 | -o $@ \
29 | $<
30 | gs -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/printer -sOutputFile=opt-$@ $@ && mv opt-$@ $@
31 |
32 | $(SPEC)/autogenerated:
33 | -mkdir $@
34 |
35 |
36 | $(SPEC)/autogenerated/revision.adoc-snippet: Makefile $(SPEC)/autogenerated FORCE
37 | echo ":revdate: ${COMMITDATE}" > $@-tmp
38 | echo ":revnumber: ${VERSION}-${GITVERSION}" >> $@-tmp
39 | # echo ":revremark: ${STAGE}" >> $@-tmp
40 | diff $@ $@-tmp || mv $@-tmp $@
41 |
42 | clean:
43 | rm -f $(SPEC)-*.pdf
44 |
45 | FORCE:
46 |
--------------------------------------------------------------------------------
/Makefile.pwsh:
--------------------------------------------------------------------------------
1 | #
2 | # Makefile.pwsh
3 | #
4 | # Makefile for Windows powershell; assumes make has been installed (in addition to the standard doc
5 | # flow tools):
6 | # http://gnuwin32.sourceforge.net/packages/make.htm
7 | #
8 | # This file is the currently approved pdf publishing flow
9 | #
10 |
11 | PWSHCMD=powershell -noprofile -command
12 | SPEC=cmobase
13 | # version is TG version number; should be updated in both Makefiles
14 | VERSION=1.0.1
15 | DATE=$(shell date -UFormat \"%Y.%m.%d\")
16 | COMMITDATE=$(shell git log -1 --format=%cs)
17 | GITVERSION=$(shell git describe --tag --always --dirty)
18 | # moved STAGE/revremark to cmobase.adoc
19 |
20 |
21 | $(SPEC)-v$(VERSION).pdf: $(SPEC)/$(SPEC).adoc \
22 | $(SPEC)/*.adoc \
23 | $(SPEC)/insns/*.adoc \
24 | $(SPEC)/autogenerated/revision.adoc-snippet
25 | asciidoctor-pdf -r asciidoctor-diagram \
26 | -D . \
27 | -a toc \
28 | -a compress \
29 | -a pdf-style=resources/themes/risc-v_spec-pdf.yml \
30 | -a pdf-fontsdir=resources/fonts \
31 | -o $@ \
32 | $<
33 | # ignore ghostscript optimization for now
34 | # gs -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/printer -sOutputFile=opt-$@ $@ && mv opt-$@ $@
35 |
36 |
37 | $(SPEC)/autogenerated:
38 | $(PWSHCMD) mkdir $@
39 |
40 |
41 | # the original Makefile wrote to a temp file ($@-tmp) and compared with the
42 | # previous version; ignore that for now
43 | $(SPEC)/autogenerated/revision.adoc-snippet: Makefile $(SPEC)/autogenerated FORCE
44 | $(PWSHCMD) echo (':revdate: ' + '${COMMITDATE}') > $@
45 | $(PWSHCMD) echo (':revnumber: ' + '${VERSION}' + '-' + '${GITVERSION}') >> $@
46 | # $(PWSHCMD) echo (':revremark: ' + '${STAGE}') >> $@
47 | # $(PWSHCMD) diff (cat $@) (cat $@-tmp) || $(PWSHCMD) mv $@-tmp $@
48 |
49 |
50 | clean:
51 | $(PWSHCMD) rm $(SPEC)-*.pdf
52 |
53 |
54 | FORCE:
55 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Cache Management Operations for RISC-V
2 |
3 | _Note: The RISC-V CMO TG is currently in maintenance mode. Updates to the existing extensions, Zicbom, Zicboz, and Zicbop, will be made as necessary. Please contact help@riscv.org with any questions._
4 |
5 | This repository is the working area for the RISC-V Cache Management Operation
6 | Task Group (CMO TG). The TG is primarily responsible for developing ISA
7 | extensions in the area of cache management.
8 |
9 | The repo is organized as follows:
10 |
11 | * Archive - contains old TG information including Andy Glew's initial proposals
12 | and wiki pages
13 | * cmobase - this is the main working area for Phase 1 TG documentation
14 | * specifications - the latest published spec is here
15 | * CMO-Phase-1-Scope.md - provides a summary of the current TG goals and roadmap
16 | * LICENSE.md - license information
17 | * README.md - this file
18 |
19 | ## Other Resources
20 |
21 | Administrative information, such as the TG charter, meeting agendas and minutes,
22 | etc., can be found here:
23 |
24 | https://github.com/riscv-admin-docs/cache-management-ops
25 |
26 | Additional TG information can be found here (RISC-V members only):
27 |
28 | https://lists.riscv.org/g/tech-cmo
29 |
30 | Email can be sent to the TG mailing list (RISC-V members only):
31 |
32 | tech-cmo@lists.riscv.org
33 |
34 | ## Licensing
35 |
36 | The files in this repository are licensed under the Creative Commons Attribution
37 | 4.0 International License (CC-BY 4.0).
38 |
39 | The full license text is available at
40 | https://creativecommons.org/licenses/by/4.0/.
41 |
--------------------------------------------------------------------------------
/cmobase/Zicbom.adoc:
--------------------------------------------------------------------------------
1 | [#Zicbom,reftext="Cache-Block Management Instructions"]
2 | === Cache-Block Management Instructions
3 |
4 | Cache-block management instructions enable software running on a set of coherent
5 | agents to communicate with a set of non-coherent agents by performing one of the
6 | following operations:
7 |
8 | * An invalidate operation makes data from store operations performed by a set of
9 | non-coherent agents visible to the set of coherent agents at a point common to
10 | both sets by deallocating all copies of a cache block from the set of coherent
11 | caches up to that point
12 |
13 | * A clean operation makes data from store operations performed by the set of
14 | coherent agents visible to a set of non-coherent agents at a point common to
15 | both sets by performing a write transfer of a copy of a cache block to that
16 | point provided a coherent agent performed a store operation that modified the
17 | data in the cache block since the previous invalidate, clean, or flush
18 | operation on the cache block
19 |
20 | * A flush operation atomically performs a clean operation followed by an
21 | invalidate operation
22 |
23 | In the Zicbom extension, the instructions operate to a point common to _all_
24 | agents in the system. In other words, an invalidate operation ensures that store
25 | operations from all non-coherent agents visible to agents in the set of coherent
26 | agents, and a clean operation ensures that store operations from coherent agents
27 | visible to all non-coherent agents.
28 |
29 | ****
30 |
31 | _The Zicbom extension does not prohibit agents that fall outside of the above
32 | architectural definition; however, software cannot rely on the defined cache
33 | operations to have the desired effects with respect to those agents._
34 |
35 | _Future extensions may define different sets of agents for the purposes of
36 | performance optimization._
37 |
38 | ****
39 |
40 | These instructions operate on the cache block whose effective address is
41 | specified in _rs1_. The effective address is translated into a corresponding
42 | physical address by the appropriate translation mechanisms.
43 |
44 | The following instructions comprise the Zicbom extension:
45 |
46 | [%header,cols="^1,^1,4,8"]
47 | |===
48 | |RV32
49 | |RV64
50 | |Mnemonic
51 | |Instruction
52 |
53 | |✓
54 | |✓
55 | |cbo.clean _base_
56 | |<<#insns-cbo_clean>>
57 |
58 | |✓
59 | |✓
60 | |cbo.flush _base_
61 | |<<#insns-cbo_flush>>
62 |
63 | |✓
64 | |✓
65 | |cbo.inval _base_
66 | |<<#insns-cbo_inval>>
67 |
68 | |===
69 |
--------------------------------------------------------------------------------
/cmobase/Zicbop.adoc:
--------------------------------------------------------------------------------
1 | [#Zicbop,reftext="Cache-Block Prefetch Instructions"]
2 | === Cache-Block Prefetch Instructions
3 |
4 | Cache-block prefetch instructions are HINTs to the hardware to indicate that
5 | software intends to perform a particular type of memory access in the near
6 | future. The types of memory accesses are instruction fetch, data read (i.e.
7 | load), and data write (i.e. store).
8 |
9 | These instructions operate on the cache block whose effective address is the sum
10 | of the base address specified in _rs1_ and the sign-extended offset encoded in
11 | _imm[11:0]_, where _imm[4:0]_ shall equal `0b00000`. The effective address is
12 | translated into a corresponding physical address by the appropriate translation
13 | mechanisms.
14 |
15 | ****
16 |
17 | _Cache-block prefetch instructions are encoded as ORI instructions with rd equal
18 | to `0b00000`; however, for the purposes of effective address calculation, this
19 | field is also interpreted as imm[4:0] like a store instruction._
20 |
21 | ****
22 |
23 | The following instructions comprise the Zicbop extension:
24 |
25 | [%header,cols="^1,^1,4,8"]
26 | |===
27 | |RV32
28 | |RV64
29 | |Mnemonic
30 | |Instruction
31 |
32 | |✓
33 | |✓
34 | |prefetch.i _offset_(_base_)
35 | |<<#insns-prefetch_i>>
36 |
37 | |✓
38 | |✓
39 | |prefetch.r _offset_(_base_)
40 | |<<#insns-prefetch_r>>
41 |
42 | |✓
43 | |✓
44 | |prefetch.w _offset_(_base_)
45 | |<<#insns-prefetch_w>>
46 |
47 | |===
48 |
--------------------------------------------------------------------------------
/cmobase/Zicboz.adoc:
--------------------------------------------------------------------------------
1 | [#Zicboz,reftext="Cache-Block Zero Instructions"]
2 | === Cache-Block Zero Instructions
3 |
4 | Cache-block zero instructions store zeros to the set of bytes corresponding to a
5 | cache block. An implementation may update the bytes in any order and with any
6 | granularity and atomicity, including individual bytes.
7 |
8 | ****
9 |
10 | _Cache-block zero instructions store zeros independently of whether data from
11 | the underlying memory locations are cacheable. In addition, this specification
12 | does not constrain how the bytes are written._
13 |
14 | ****
15 |
16 | These instructions operate on the cache block, or the memory locations
17 | corresponding to the cache block, whose effective address is specified in _rs1_.
18 | The effective address is translated into a corresponding physical address by the
19 | appropriate translation mechanisms.
20 |
21 | The following instructions comprise the Zicboz extension:
22 |
23 | [%header,cols="^1,^1,4,8"]
24 | |===
25 | |RV32
26 | |RV64
27 | |Mnemonic
28 | |Instruction
29 |
30 | |✓
31 | |✓
32 | |cbo.zero _base_
33 | |<<#insns-cbo_zero>>
34 |
35 | |===
36 |
--------------------------------------------------------------------------------
/cmobase/acknowledgements.adoc:
--------------------------------------------------------------------------------
1 | [acknowledgments]
2 | == Acknowledgments
3 |
4 | Contributors to this specification (in alphabetical order) include: +
5 | Allen Baum,
6 | Paul Donahue,
7 | Greg Favor,
8 | Andy Glew,
9 | John Ingalls,
10 | David Kruckemyer,
11 | Josh Scheid,
12 | Philipp Tomsich,
13 | Paul Walmsley,
14 | and
15 | Derek Williams
16 |
17 | We express our gratitude to everyone that contributed to, reviewed, or improved
18 | this specification through their comments and questions.
19 |
--------------------------------------------------------------------------------
/cmobase/cmobase.adoc:
--------------------------------------------------------------------------------
1 | [[cmobase]]
2 | = RISC-V Base Cache Management Operation ISA Extensions
3 | :description: Base Cache Management Operation ISA Extensions
4 | :company: RISC-V
5 | // Pull in variable with revision info
6 | include::autogenerated/revision.adoc-snippet[]
7 | :revremark: Ratified
8 | :url-riscv: http://riscv.org
9 | :doctype: book
10 | //:doctype: report
11 | :preface-title: Licensing and Acknowledgments
12 | :colophon:
13 | :appendix-caption: Appendix
14 | :imagesdir: images
15 | :title-logo-image: image:RISC-V-logo.svg[pdfwidth=3.25in,align=center]
16 | //:page-background-image: image:draft.svg[opacity=20%]
17 | //:title-page-background-image: none
18 | :back-cover-image: image:circuit.png[opacity=25%]
19 | // Settings:
20 | :experimental:
21 | :reproducible:
22 | // needs to be changed
23 | :WaveDromEditorApp: ~/wavedrom-cli/cli/wavedrom-cli.js
24 | :imagesoutdir: images
25 | :icons: font
26 | :lang: en
27 | :listing-caption: Listing
28 | :sectnums:
29 | :toc: left
30 | :toclevels: 4
31 | :source-highlighter: pygments
32 | ifdef::backend-pdf[]
33 | :source-highlighter: coderay
34 | endif::[]
35 | :data-uri:
36 | :hide-uri-scheme:
37 | :stem:
38 | :footnote:
39 | :xrefstyle: short
40 | :csrname: envcfg
41 |
42 | // Front-Matters
43 | include::colophon.adoc[]
44 | include::acknowledgements.adoc[]
45 |
46 | [preface]
47 | == Pseudocode for instruction semantics
48 |
49 | The semantics of each instruction in the <<#insns>> chapter is expressed in a
50 | SAIL-like syntax.
51 |
52 | // Chapters
53 | include::introduction.adoc[]
54 | include::background.adoc[]
55 | include::csr_state.adoc[]
56 | include::extensions.adoc[]
57 | include::instructions.adoc[]
58 |
59 | [appendix]
60 | == Software guide
61 |
--------------------------------------------------------------------------------
/cmobase/colophon.adoc:
--------------------------------------------------------------------------------
1 | [colophon]
2 | == Colophon
3 |
4 | This document is in the link:http://riscv.org/spec-state[Ratified] state. No
5 | changes are allowed. Any desired or needed changes can be the subject of a
6 | follow-on new extension. Ratified extensions are never revised. For more
7 | information, see link:http://riscv.org/spec-state[here].
8 |
9 | [NOTE]
10 | .Copyright and licensure:
11 | This document is released under the
12 | https://creativecommons.org/licenses/by/4.0/[Creative Commons Attribution 4.0
13 | International License].
14 |
--------------------------------------------------------------------------------
/cmobase/csr_state.adoc:
--------------------------------------------------------------------------------
1 | [#csr_state,reftext="Control and Status Register State"]
2 | == Control and Status Register State
3 |
4 | ****
5 |
6 | _The CMO extensions rely on state in {csrname} CSRs that will be defined in a
7 | future update to the privileged architecture. If this CSR update is not
8 | ratified, the CMO extension will define its own CSRs._
9 |
10 | ****
11 |
12 | Three CSRs control the execution of CMO instructions:
13 |
14 | * `m{csrname}`
15 | * `s{csrname}`
16 | * `h{csrname}`
17 |
18 | The `s{csrname}` register is used by all supervisor modes, including VS-mode. A
19 | hypervisor is responsible for saving and restoring `s{csrname}` on guest context
20 | switches. The `h{csrname}` register is only present if the H-extension is
21 | implemented and enabled.
22 |
23 | Each `x{csrname}` register (where `x` is `m`, `s`, or `h`) has the following
24 | generic format:
25 |
26 | .Generic Format for x{csrname} CSRs
27 | [cols="^10,^10,80a"]
28 | |===
29 | | Bits | Name | Description
30 |
31 | | [5:4] | `CBIE` | Cache Block Invalidate instruction Enable
32 |
33 | Enables the execution of the cache block invalidate instruction, `CBO.INVAL`, in
34 | a lower privilege mode:
35 |
36 | * `00`: The instruction raises an illegal instruction or virtual instruction
37 | exception
38 | * `01`: The instruction is executed and performs a flush operation
39 | * `10`: _Reserved_
40 | * `11`: The instruction is executed and performs an invalidate operation
41 |
42 | | [6] | `CBCFE` | Cache Block Clean and Flush instruction Enable
43 |
44 | Enables the execution of the cache block clean instruction, `CBO.CLEAN`, and the
45 | cache block flush instruction, `CBO.FLUSH`, in a lower privilege mode:
46 |
47 | * `0`: The instruction raises an illegal instruction or virtual instruction
48 | exception
49 | * `1`: The instruction is executed
50 |
51 | | [7] | `CBZE` | Cache Block Zero instruction Enable
52 |
53 | Enables the execution of the cache block zero instruction, `CBO.ZERO`, in a
54 | lower privilege mode:
55 |
56 | * `0`: The instruction raises an illegal instruction or virtual instruction
57 | exception
58 | * `1`: The instruction is executed
59 |
60 | |===
61 |
62 | The x{csrname} registers control CBO instruction execution based on the current
63 | privilege mode and the state of the appropriate CSRs, as detailed below.
64 |
65 | A `CBO.INVAL` instruction executes or raises either an illegal instruction
66 | exception or a virtual instruction exception based on the state of the
67 | `x{csrname}.CBIE` fields:
68 |
69 | [source,sail,subs="attributes+"]
70 | --
71 |
72 | // illegal instruction exceptions
73 | if (((priv_mode != M) && (m{csrname}.CBIE == 00)) ||
74 | ((priv_mode == U) && (s{csrname}.CBIE == 00)))
75 | {
76 |
77 | }
78 | // virtual instruction exceptions
79 | else if (((priv_mode == VS) && (h{csrname}.CBIE == 00)) ||
80 | ((priv_mode == VU) && ((h{csrname}.CBIE == 00) || (s{csrname}.CBIE == 00))))
81 | {
82 |
83 | }
84 | // execute instruction
85 | else
86 | {
87 | if (((priv_mode != M) && (m{csrname}.CBIE == 01)) ||
88 | ((priv_mode == U) && (s{csrname}.CBIE == 01)) ||
89 | ((priv_mode == VS) && (h{csrname}.CBIE == 01)) ||
90 | ((priv_mode == VU) && ((h{csrname}.CBIE == 01) || (s{csrname}.CBIE == 01))))
91 | {
92 |
93 | }
94 | else
95 | {
96 |
97 | }
98 | }
99 |
100 |
101 | --
102 |
103 | ****
104 |
105 | _Until a modified cache block has updated memory, a `CBO.INVAL` instruction may
106 | expose stale data values in memory if the CSRs are programmed to perform an
107 | invalidate operation. This behavior may result in a security hole if lower
108 | privileged level software performs an invalidate operation and accesses
109 | sensitive information in memory._
110 |
111 | _To avoid such holes, higher privileged level software must perform either a
112 | clean or flush operation on the cache block before permitting lower privileged
113 | level software to perform an invalidate operation on the block. Alternatively,
114 | higher privileged level software may program the CSRs so that `CBO.INVAL`
115 | either traps or performs a flush operation in a lower privileged level._
116 |
117 | ****
118 |
119 | A `CBO.CLEAN` or `CBO.FLUSH` instruction executes or raises an illegal
120 | instruction or virtual instruction exception based on the state of the
121 | `x{csrname}.CBCFE` bits:
122 |
123 | [source,sail,subs="attributes+"]
124 | --
125 |
126 | // illegal instruction exceptions
127 | if (((priv_mode != M) && !m{csrname}.CBCFE) ||
128 | ((priv_mode == U) && !s{csrname}.CBCFE))
129 | {
130 |
131 | }
132 | // virtual instruction exceptions
133 | else if (((priv_mode == VS) && !h{csrname}.CBCFE) ||
134 | ((priv_mode == VU) && !(h{csrname}.CBCFE && s{csrname}.CBCFE)))
135 | {
136 |
137 | }
138 | // execute instruction
139 | else
140 | {
141 |
142 | }
143 |
144 | --
145 |
146 | Finally, a `CBO.ZERO` instruction executes or raises an illegal instruction or
147 | virtual instruction exception based on the state of the `x{csrname}.CBZE` bits:
148 |
149 | [source,sail,subs="attributes+"]
150 | --
151 |
152 | // illegal instruction exceptions
153 | if (((priv_mode != M) && !m{csrname}.CBZE) ||
154 | ((priv_mode == U) && !s{csrname}.CBZE))
155 | {
156 |
157 | }
158 | // virtual instruction exceptions
159 | else if (((priv_mode == VS) && !h{csrname}.CBZE) ||
160 | ((priv_mode == VU) && !(h{csrname}.CBZE && s{csrname}.CBZE)))
161 | {
162 |
163 | }
164 | // execute instruction
165 | else
166 | {
167 |
168 | }
169 |
170 | --
171 |
172 | Each `x{csrname}` register is WARL; however, software should determine the legal
173 | values from the execution environment discovery mechanism.
174 |
--------------------------------------------------------------------------------
/cmobase/extensions.adoc:
--------------------------------------------------------------------------------
1 | [#extensions,reftext="Extensions"]
2 | == Extensions
3 |
4 | CMO instructions are defined in the following extensions:
5 |
6 | * <<#Zicbom>>
7 | * <<#Zicboz>>
8 | * <<#Zicbop>>
9 |
10 | include::Zicbom.adoc[]
11 | include::Zicboz.adoc[]
12 | include::Zicbop.adoc[]
13 |
--------------------------------------------------------------------------------
/cmobase/images/RISC-V-logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/cmobase/images/circuit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/cmobase/images/circuit.png
--------------------------------------------------------------------------------
/cmobase/insns/cbo.clean.adoc:
--------------------------------------------------------------------------------
1 | [#insns-cbo_clean,reftext="Cache Block Clean"]
2 | === cbo.clean
3 |
4 | Synopsis::
5 | Perform a clean operation on a cache block
6 |
7 | Mnemonic::
8 | cbo.clean _offset_(_base_)
9 |
10 | Encoding::
11 | [wavedrom, , svg]
12 | ....
13 | {reg:[
14 | { bits: 7, name: 0xF, attr: ['MISC-MEM'] },
15 | { bits: 5, name: 0x0 },
16 | { bits: 3, name: 0x2, attr: ['CBO'] },
17 | { bits: 5, name: 'rs1', attr: ['base'] },
18 | { bits: 12, name: 0x001, attr: ['CBO.CLEAN'] },
19 | ]}
20 | ....
21 |
22 | Description::
23 |
24 | A *cbo.clean* instruction performs a clean operation on the cache block whose
25 | effective address is the base address specified in _rs1_. The offset operand may
26 | be omitted; otherwise, any expression that computes the offset shall evaluate to
27 | zero. The instruction operates on the set of coherent caches accessed by the
28 | agent executing the instruction.
29 |
30 | Operation::
31 | [source,sail]
32 | --
33 | TODO
34 | --
35 |
--------------------------------------------------------------------------------
/cmobase/insns/cbo.flush.adoc:
--------------------------------------------------------------------------------
1 | [#insns-cbo_flush,reftext="Cache Block Flush"]
2 | === cbo.flush
3 |
4 | Synopsis::
5 | Perform a flush operation on a cache block
6 |
7 | Mnemonic::
8 | cbo.flush _offset_(_base_)
9 |
10 | Encoding::
11 | [wavedrom, , svg]
12 | ....
13 | {reg:[
14 | { bits: 7, name: 0xF, attr: ['MISC-MEM'] },
15 | { bits: 5, name: 0x0 },
16 | { bits: 3, name: 0x2, attr: ['CBO'] },
17 | { bits: 5, name: 'rs1', attr: ['base'] },
18 | { bits: 12, name: 0x002, attr: ['CBO.FLUSH'] },
19 | ]}
20 | ....
21 |
22 | Description::
23 |
24 | A *cbo.flush* instruction performs a flush operation on the cache block whose
25 | effective address is the base address specified in _rs1_. The offset operand may
26 | be omitted; otherwise, any expression that computes the offset shall evaluate to
27 | zero. The instruction operates on the set of coherent caches accessed by the
28 | agent executing the instruction.
29 |
30 | Operation::
31 | [source,sail]
32 | --
33 | TODO
34 | --
35 |
--------------------------------------------------------------------------------
/cmobase/insns/cbo.inval.adoc:
--------------------------------------------------------------------------------
1 | [#insns-cbo_inval,reftext="Cache Block Invalidate"]
2 | === cbo.inval
3 |
4 | Synopsis::
5 | Perform an invalidate operation on a cache block
6 |
7 | Mnemonic::
8 | cbo.inval _offset_(_base_)
9 |
10 | Encoding::
11 | [wavedrom, , svg]
12 | ....
13 | {reg:[
14 | { bits: 7, name: 0xF, attr: ['MISC-MEM'] },
15 | { bits: 5, name: 0x0 },
16 | { bits: 3, name: 0x2, attr: ['CBO'] },
17 | { bits: 5, name: 'rs1', attr: ['base'] },
18 | { bits: 12, name: 0x000, attr: ['CBO.INVAL'] },
19 | ]}
20 | ....
21 |
22 | Description::
23 |
24 | A *cbo.inval* instruction performs an invalidate operation on the cache block
25 | whose effective address is the base address specified in _rs1_. The offset
26 | operand may be omitted; otherwise, any expression that computes the offset shall
27 | evaluate to zero. The instruction operates on the set of coherent caches
28 | accessed by the agent executing the instruction. Depending on CSR programming,
29 | the instruction may perform a flush operation instead of an invalidate
30 | operation.
31 |
32 | Operation::
33 | [source,sail]
34 | --
35 | TODO
36 | --
37 |
--------------------------------------------------------------------------------
/cmobase/insns/cbo.zero.adoc:
--------------------------------------------------------------------------------
1 | [#insns-cbo_zero,reftext="Cache Block Zero"]
2 | === cbo.zero
3 |
4 | Synopsis::
5 | Store zeros to the full set of bytes corresponding to a cache block
6 |
7 | Mnemonic::
8 | cbo.zero _offset_(_base_)
9 |
10 | Encoding::
11 | [wavedrom, , svg]
12 | ....
13 | {reg:[
14 | { bits: 7, name: 0xF, attr: ['MISC-MEM'] },
15 | { bits: 5, name: 0x0 },
16 | { bits: 3, name: 0x2, attr: ['CBO'] },
17 | { bits: 5, name: 'rs1', attr: ['base'] },
18 | { bits: 12, name: 0x004, attr: ['CBO.ZERO'] },
19 | ]}
20 | ....
21 |
22 | Description::
23 |
24 | A *cbo.zero* instruction performs stores of zeros to the full set of bytes
25 | corresponding to the cache block whose effective address is the base address
26 | specified in _rs1_. The offset operand may be omitted; otherwise, any expression
27 | that computes the offset shall evaluate to zero. An implementation may or may
28 | not update the entire set of bytes atomically.
29 |
30 | Operation::
31 | [source,sail]
32 | --
33 | TODO
34 | --
35 |
--------------------------------------------------------------------------------
/cmobase/insns/prefetch.i.adoc:
--------------------------------------------------------------------------------
1 | [#insns-prefetch_i,reftext="Cache Block Prefetch for Instruction Fetch"]
2 | === prefetch.i
3 |
4 | Synopsis::
5 | Provide a HINT to hardware that a cache block is likely to be accessed by an
6 | instruction fetch in the near future
7 |
8 | Mnemonic::
9 | prefetch.i _offset_(_base_)
10 |
11 | Encoding::
12 | [wavedrom, , svg]
13 | ....
14 | {reg:[
15 | { bits: 7, name: 0x13, attr: ['OP-IMM'] },
16 | { bits: 5, name: 0x0, attr: ['offset[4:0]'] },
17 | { bits: 3, name: 0x6, attr: ['ORI'] },
18 | { bits: 5, name: 'rs1', attr: ['base'] },
19 | { bits: 5, name: 0x0, attr: ['PREFETCH.I'] },
20 | { bits: 7, name: 'imm[11:5]', attr: ['offset[11:5]'] },
21 | ]}
22 | ....
23 |
24 | Description::
25 |
26 | A *prefetch.i* instruction indicates to hardware that the cache block whose
27 | effective address is the sum of the base address specified in _rs1_ and the
28 | sign-extended offset encoded in _imm[11:0]_, where _imm[4:0]_ equals `0b00000`,
29 | is likely to be accessed by an instruction fetch in the near future.
30 |
31 | ****
32 |
33 | _An implementation may opt to cache a copy of the cache block in a cache
34 | accessed by an instruction fetch in order to improve memory access latency, but
35 | this behavior is not required._
36 |
37 | ****
38 |
39 | Operation::
40 | [source,sail]
41 | --
42 | TODO
43 | --
44 |
--------------------------------------------------------------------------------
/cmobase/insns/prefetch.r.adoc:
--------------------------------------------------------------------------------
1 | [#insns-prefetch_r,reftext="Cache Block Prefetch for Data Read"]
2 | === prefetch.r
3 |
4 | Synopsis::
5 | Provide a HINT to hardware that a cache block is likely to be accessed by a data
6 | read in the near future
7 |
8 | Mnemonic::
9 | prefetch.r _offset_(_base_)
10 |
11 | Encoding::
12 | [wavedrom, , svg]
13 | ....
14 | {reg:[
15 | { bits: 7, name: 0x13, attr: ['OP-IMM'] },
16 | { bits: 5, name: 0x0, attr: ['offset[4:0]'] },
17 | { bits: 3, name: 0x6, attr: ['ORI'] },
18 | { bits: 5, name: 'rs1', attr: ['base'] },
19 | { bits: 5, name: 0x1, attr: ['PREFETCH.R'] },
20 | { bits: 7, name: 'imm[11:5]', attr: ['offset[11:5]'] },
21 | ]}
22 | ....
23 |
24 | Description::
25 |
26 | A *prefetch.r* instruction indicates to hardware that the cache block whose
27 | effective address is the sum of the base address specified in _rs1_ and the
28 | sign-extended offset encoded in _imm[11:0]_, where _imm[4:0]_ equals `0b00000`,
29 | is likely to be accessed by a data read (i.e. load) in the near future.
30 |
31 | ****
32 |
33 | _An implementation may opt to cache a copy of the cache block in a cache
34 | accessed by a data read in order to improve memory access latency, but this
35 | behavior is not required._
36 |
37 | ****
38 |
39 | Operation::
40 | [source,sail]
41 | --
42 | TODO
43 | --
44 |
--------------------------------------------------------------------------------
/cmobase/insns/prefetch.w.adoc:
--------------------------------------------------------------------------------
1 | [#insns-prefetch_w,reftext="Cache Block Prefetch for Data Write"]
2 | === prefetch.w
3 |
4 | Synopsis::
5 | Provide a HINT to hardware that a cache block is likely to be accessed by a data
6 | write in the near future
7 |
8 | Mnemonic::
9 | prefetch.w _offset_(_base_)
10 |
11 | Encoding::
12 | [wavedrom, , svg]
13 | ....
14 | {reg:[
15 | { bits: 7, name: 0x13, attr: ['OP-IMM'] },
16 | { bits: 5, name: 0x0, attr: ['offset[4:0]'] },
17 | { bits: 3, name: 0x6, attr: ['ORI'] },
18 | { bits: 5, name: 'rs1', attr: ['base'] },
19 | { bits: 5, name: 0x3, attr: ['PREFETCH.W'] },
20 | { bits: 7, name: 'imm[11:5]', attr: ['offset[11:5]'] },
21 | ]}
22 | ....
23 |
24 | Description::
25 |
26 | A *prefetch.w* instruction indicates to hardware that the cache block whose
27 | effective address is the sum of the base address specified in _rs1_ and the
28 | sign-extended offset encoded in _imm[11:0]_, where _imm[4:0]_ equals `0b00000`,
29 | is likely to be accessed by a data write (i.e. store) in the near future.
30 |
31 | ****
32 |
33 | _An implementation may opt to cache a copy of the cache block in a cache
34 | accessed by a data write in order to improve memory access latency, but this
35 | behavior is not required._
36 |
37 | ****
38 |
39 | Operation::
40 | [source,sail]
41 | --
42 | TODO
43 | --
44 |
--------------------------------------------------------------------------------
/cmobase/instructions.adoc:
--------------------------------------------------------------------------------
1 | [#insns,reftext="Instructions"]
2 | == Instructions
3 | include::insns/cbo.clean.adoc[]
4 | <<<
5 | include::insns/cbo.flush.adoc[]
6 | <<<
7 | include::insns/cbo.inval.adoc[]
8 | <<<
9 | include::insns/cbo.zero.adoc[]
10 | <<<
11 | include::insns/prefetch.i.adoc[]
12 | <<<
13 | include::insns/prefetch.r.adoc[]
14 | <<<
15 | include::insns/prefetch.w.adoc[]
16 |
--------------------------------------------------------------------------------
/cmobase/introduction.adoc:
--------------------------------------------------------------------------------
1 | [#intro,reftext="Introduction"]
2 | == Introduction
3 |
4 | _Cache-management operation_ (or _CMO_) instructions perform operations on
5 | copies of data in the memory hierarchy. In general, CMO instructions operate on
6 | cached copies of data, but in some cases, a CMO instruction may operate on
7 | memory locations directly. Furthermore, CMO instructions are grouped by
8 | operation into the following classes:
9 |
10 | * A _management_ instruction manipulates cached copies of data with respect to a
11 | set of agents that can access the data
12 | * A _zero_ instruction zeros out a range of memory locations, potentially
13 | allocating cached copies of data in one or more caches
14 | * A _prefetch_ instruction indicates to hardware that data at a given memory
15 | location may be accessed in the near future, potentially allocating cached
16 | copies of data in one or more caches
17 |
18 | This document introduces a base set of CMO ISA extensions that operate
19 | specifically on cache blocks or the memory locations corresponding to a cache
20 | block; these are known as _cache-block operation_ (or _CBO_) instructions. Each
21 | of the above classes of instructions represents an extension in this
22 | specification:
23 |
24 | * The _Zicbom_ extension defines a set of cache-block management instructions:
25 | `CBO.INVAL`, `CBO.CLEAN`, and `CBO.FLUSH`
26 | * The _Zicboz_ extension defines a cache-block zero instruction: `CBO.ZERO`
27 | * The _Zicbop_ extension defines a set of cache-block prefetch instructions:
28 | `PREFETCH.R`, `PREFETCH.W`, and `PREFETCH.I`
29 |
30 | The execution behavior of the above instructions is also modified by CSR state
31 | added by this specification.
32 |
33 | The remainder of this document provides general background information on CMO
34 | instructions and describes each of the above ISA extensions.
35 |
36 | ****
37 |
38 | _The term CMO encompasses all operations on caches or resources related to
39 | caches. The term CBO represents a subset of CMOs that operate only on cache
40 | blocks. The first CMO extensions only define CBOs._
41 |
42 | ****
43 |
--------------------------------------------------------------------------------
/resources/fonts/DroidSans-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/DroidSans-Bold.ttf
--------------------------------------------------------------------------------
/resources/fonts/DroidSans.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/DroidSans.ttf
--------------------------------------------------------------------------------
/resources/fonts/cmunbbx.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbbx.ttf
--------------------------------------------------------------------------------
/resources/fonts/cmunbmo.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbmo.ttf
--------------------------------------------------------------------------------
/resources/fonts/cmunbmr.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbmr.ttf
--------------------------------------------------------------------------------
/resources/fonts/cmunbso.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbso.ttf
--------------------------------------------------------------------------------
/resources/fonts/cmunbtl.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbtl.ttf
--------------------------------------------------------------------------------
/resources/fonts/cmunbto.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbto.ttf
--------------------------------------------------------------------------------
/resources/fonts/cmunbxo.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunbxo.ttf
--------------------------------------------------------------------------------
/resources/fonts/cmunsi.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunsi.ttf
--------------------------------------------------------------------------------
/resources/fonts/cmunso.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunso.ttf
--------------------------------------------------------------------------------
/resources/fonts/cmunss.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunss.ttf
--------------------------------------------------------------------------------
/resources/fonts/cmunsx.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/cmunsx.ttf
--------------------------------------------------------------------------------
/resources/fonts/droid-sans-fallback.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/droid-sans-fallback.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1mn-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1mn-bold.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1mn-light.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1mn-light.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1mn-medium.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1mn-medium.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1mn-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1mn-regular.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1mn-thin.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1mn-thin.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1p-black.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-black.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1p-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-bold.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1p-heavy.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-heavy.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1p-light.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-light.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1p-medium.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-medium.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1p-regular-fallback.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-regular-fallback.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1p-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-regular.ttf
--------------------------------------------------------------------------------
/resources/fonts/mplus-1p-thin.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/resources/fonts/mplus-1p-thin.ttf
--------------------------------------------------------------------------------
/specifications/cmobase-v0.3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.3.pdf
--------------------------------------------------------------------------------
/specifications/cmobase-v0.4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.4.pdf
--------------------------------------------------------------------------------
/specifications/cmobase-v0.5.0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.5.0.pdf
--------------------------------------------------------------------------------
/specifications/cmobase-v0.5.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.5.1.pdf
--------------------------------------------------------------------------------
/specifications/cmobase-v0.5.2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.5.2.pdf
--------------------------------------------------------------------------------
/specifications/cmobase-v0.6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v0.6.pdf
--------------------------------------------------------------------------------
/specifications/cmobase-v1.0-rc1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v1.0-rc1.pdf
--------------------------------------------------------------------------------
/specifications/cmobase-v1.0-rc2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v1.0-rc2.pdf
--------------------------------------------------------------------------------
/specifications/cmobase-v1.0.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v1.0.1.pdf
--------------------------------------------------------------------------------
/specifications/cmobase-v1.0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/riscv/riscv-CMOs/8e8a8e01e1e2761579eaaa0f1c0f0c1efac9eddb/specifications/cmobase-v1.0.pdf
--------------------------------------------------------------------------------