├── README.md ├── ipd ├── 0001 │ └── README.md ├── 0002 │ └── README.md ├── 0003 │ └── README.adoc ├── 0004 │ └── README.md ├── 0005 │ └── README.md ├── 0006 │ └── README.md ├── 0007 │ └── README.md ├── 0008 │ └── README.md ├── 0009 │ └── README.md ├── 0010 │ └── README.md ├── 0011 │ └── README.md ├── 0012 │ └── README.md ├── 0013 │ ├── README.md │ └── ddi_dma_cookie_iter.9f.pdf ├── 0014 │ ├── README.md │ └── Y2038-warning.png ├── 0015 │ └── README.md ├── 0016 │ └── README.md ├── 0017 │ └── README.md ├── 0018 │ └── README.md ├── 0019 │ └── README.md ├── 0020 │ └── README.adoc ├── 0021 │ └── README.md ├── 0022 │ └── README.md ├── 0023 │ └── README.md ├── 0024 │ └── README.md ├── 0025 │ └── README.adoc ├── 0026 │ └── README.md ├── 0027 │ └── README.md ├── 0028 │ └── README.md ├── 0029 │ └── README.md ├── 0030 │ └── README.md ├── 0031 │ └── README.md ├── 0032 │ └── README.md ├── 0033 │ └── README.md ├── 0034 │ └── README.md ├── 0035 │ └── README.md ├── 0036 │ └── README.md ├── 0037 │ └── README.md ├── 0038 │ └── README.adoc ├── 0039 │ └── README.adoc ├── 0040 │ └── README.md ├── 0041 │ └── README.adoc ├── 0042 │ └── README.md ├── 0043 │ └── README.adoc ├── 0044 │ └── README.adoc ├── 0045 │ └── README.adoc ├── 0046 │ └── README.adoc ├── 0047 │ └── README.adoc ├── 0048 │ └── README.adoc ├── 0049 │ └── README.adoc ├── 0050 │ └── README.adoc ├── 0051 │ └── README.adoc ├── 0052 │ └── README.adoc ├── 0053 │ └── README.adoc └── 0054 │ ├── README.md │ └── schema-revs.sql └── prototypes └── README.adoc /README.md: -------------------------------------------------------------------------------- 1 | # illumos Project Discussion 2 | 3 | IPDs are a shrink-to-fit description of larger scale (in either code size or 4 | impact) project wishing to make changes to illumos. They should describe the 5 | what and the why and give people the opportunity to comment on both. 6 | 7 | An IPD is at its heart, just a README.md in a numbered directory in this 8 | repository, existing IPDs are enumerated below for easy reference. Further 9 | information is available at the end of this document. 10 | 11 | ## IPDs 12 | 13 | | state | IPD | 14 | | --------- | ------------------------------------------------------------- | 15 | | predraft | [IPD 1 Virtual Environment for Jenkins Build and Test](./ipd/0001/README.md) | 16 | | published | [IPD 2 Running smatch for illumos builds](./ipd/0002/README.md) | 17 | | published | [IPD 3 Link management improvements](./ipd/0003/README.adoc) | 18 | | published | [IPD 4 Manual Page Section Renumbering](./ipd/0004/README.md) | 19 | | published | [IPD 5 Rationalize SPARC platform support](./ipd/0005/README.md) | 20 | | draft | [IPD 6 allocb(): The `pri` argument, and use of KM_NORMALPRI](./ipd/0006/README.md) | 21 | | published | [IPD 7 illumos GCC maintenance](./ipd/0007/README.md) | 22 | | published | [IPD 8 EOF NCA/NL7C](./ipd/0008/README.md) | 23 | | published | [IPD 9 PCI Alias Disambiguation](./ipd/0009/README.md) | 24 | | published | [IPD 10 full argv in ps](./ipd/0010/README.md) | 25 | | published | [IPD 11 NFS Server for Zones (NFS-Zone)](./ipd/0011/README.md) | 26 | | published | [IPD 12 /proc/_PID_/fdinfo/](./ipd/0012/README.md) | 27 | | published | [IPD 13 Safer DDI DMA Cookie Functions](./ipd/0013/README.md) | 28 | | predraft | [IPD 14 illumos and Y2038](./ipd/0014/README.md) | 29 | | published | [IPD 15 bhyve integration/upstream](./ipd/0015/README.md) | 30 | | published | [IPD 16 EOF SunOS 4 binary compatibility](./ipd/0016/README.md) | 31 | | draft | [IPD 17 SMF Runtime Directory Creation Support](./ipd/0017/README.md) 32 | | published | [IPD 18 overlay network integration/upstream](./ipd/0018/README.md) 33 | | published | [IPD 19 Sunset SPARC](./ipd/0019/README.md) 34 | | published | [IPD 20 Kernel Test Facility](./ipd/0020/README.adoc) 35 | | published | [IPD 21 PCI Platform Unification](./ipd/0021/README.md) 36 | | draft | [IPD 22 Unsharing shared Libraries](./ipd/0022/README.md) 37 | | predraft | [IPD 23 Xen and the Art of Operating System Maintenance: A Removal of a Platform](./ipd/0023/README.md) 38 | | predraft | [IPD 24 Support for 64-bit ARM](./ipd/0024/README.md) 39 | | draft | [IPD 25 Authenticated pfexec](./ipd/0025/README.adoc) 40 | | draft | [IPD 26 Sunset CardBus and PC Card](./ipd/0026/README.md) 41 | | published | [IPD 27 Sunset TNF](./ipd/0027/README.md) 42 | | draft | [IPD 28 EOF Legacy Network Driver interfaces](./ipd/0028/README.md) 43 | | published | [IPD 29 Sunset Sockets Direct Protocol](./ipd/0029/README.md) 44 | | draft | [IPD 30 Remove obsolete SCSA functions](./ipd/0030/README.md) 45 | | published | [IPD 31 Kernel interface stability documentation](./ipd/0031/README.md) 46 | | draft | [IPD 32 Introduce scsi_hba_pkt_mapin](./ipd/0032/README.md) 47 | | predraft | [IPD 33 Obsolete legacy SCSI HBA API](./ipd/0033/README.md) 48 | | draft | [IPD 34 Rationalize Kernel Architecture Module Paths](./ipd/0034/README.md) 49 | | draft | [IPD 35 Sunset VTOC - SPARC](./ipd/0035/README.md) 50 | | draft | [IPD 36 Rationalize $(MACH64) Command Paths](./ipd/0036/README.md) 51 | | published | [IPD 37 Vendor-specific Command, Log, and Feature Support in nvmeadm(8)](./ipd/0037/README.md) 52 | | published | [IPD 38 Signal Handling, Extended FPU State, ucontexts, x86, and You](./ipd/0038/README.adoc) 53 | | published | [IPD 39 Datalink Media Types](./ipd/0039/README.adoc) 54 | | draft | [IPD 40 Cross compilation for illumos](./ipd/0040/README.md) 55 | | published | [IPD 41 Improving PCI devinfo Naming and Future Platforms](./ipd/0041/README.adoc) 56 | | draft | [IPD 42 Sunset native printing](./ipd/0042/README.md) 57 | | published | [IPD 43 NVMe 2.0, libnvme, and the nvme(4D) ioctl interface](./ipd/0043/README.adoc) 58 | | predraft | [IPD 44 Distribution as a first class concept](./ipd/0044/README.adoc) 59 | | draft | [IPD 45 Flow trees in the MAC datapath](./ipd/0045/README.adoc) 60 | | predraft | [IPD 46 IP Tunnel and IPsec ergonomics](./ipd/0046/README.adoc) 61 | | predraft | [IPD 47 Trust, but VERIFY(): Assertions in the Kernel](./ipd/0047/README.adoc) 62 | | predraft | [IPD 48 Improving Illumos on IPv6-primary and IPv6-only networks](./ipd/0048/README.adoc) 63 | | published | [IPD 49 Advancing the C Standard in illumos](./ipd/0049/README.adoc) 64 | | predraft | [IPD 50 ZFS Maintenance and Consumption of OpenZFS Technology](./ipd/0050/README.adoc) 65 | | published | [IPD 51 Time Zone Information Maintenance](./ipd/0051/README.adoc) 66 | | draft | [IPD 52 Extensible Boot Image Support](./ipd/0052/README.adoc) 67 | | draft | [IPD 53 Retiring `fipe(4D)`](./ipd/0053/README.adoc) 68 | | predraft | [IPD 54 Upgrading Illumos system sqlite to version 3](./ipd/0054/README.md) 69 | 70 | ## Contributing 71 | 72 | Contributions are welcome. A good rule of thumb as to whether you _should_ 73 | have an IPD is whether you are making a change with high impact to other 74 | developers or users (introducing or removing a supported platform, doing 75 | something with non-obvious compatibility constraints), or engaging in a 76 | long-term project that will likely integrate in pieces, to provide the overall 77 | picture. 78 | 79 | For your first contribution, you might want to just submit a pull request to 80 | this repository. Going forward if this is a thing that you will do again, 81 | we'll probably give you write access to this repository so you can just add 82 | your new IPDs as they come up. 83 | 84 | ## Format 85 | 86 | An IPD has a short header block indicating authorship (that's you), 87 | sponsorship (we'll get to that), and state. 88 | 89 | ### States 90 | 91 | #### predraft 92 | 93 | You've started writing your IPD and you want to share it narrowly, or even 94 | just to reserve your a number in this repository. You're _predraft_, maybe 95 | you only have a title and a short paragraph right now, that's fine. 96 | 97 | #### draft 98 | 99 | You've finished writing and explaining, and now you're going to send your IPD 100 | to the [developer mailing list](mailto:developer@lists.illumos.org), this is a 101 | draft, you're going to receive feedback so it's not complete, but it's close. 102 | 103 | #### published 104 | 105 | One or more people from the [illumos core 106 | team](https://illumos.org/docs/about/leadership/) have agreed that what you've 107 | described is a good thing, and that we should do it. Your IPD is done and 108 | published (though is not immutable! If you find more information would be 109 | useful later, please add it!) 110 | 111 | ### Sponsorship 112 | 113 | "Sponsor" is a weird word here, it's just the person or people on the illumos 114 | core team who were ok with your IPD. Don't worry about it. 115 | -------------------------------------------------------------------------------- /ipd/0001/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: Joshua M. Clulow 3 | state: predraft 4 | --- 5 | 6 | # IPD 1 Virtual Environment for Jenkins Build and Test 7 | 8 | To aid in improved efficiency and consistency in the process of [integrating 9 | changes](https://wiki.illumos.org/display/illumos/How+To+Contribute) into 10 | [illumos-gate](https://github.com/illumos/illumos-gate), it would help to have 11 | the project provide central infrastructure that can run a full 12 | [nightly](https://illumos.org/man/1ONBLD/nightly) build of any particular 13 | change. It would also aid in testing to be able to take bits built from that 14 | change, boot them in a virtual machine, and run some of our automated test 15 | suites. 16 | 17 | This project will explore the provision of such infrastructure at 18 | https://illumos.org and how to fold it in to our integration process. 19 | 20 | ## Operating System Issues 21 | 22 | There are a number of paper cuts that stand in the way of a stream-lined 23 | process, some of which represent operating system bugs -- or at least areas 24 | where we could make enhancements. A non-exhaustive list appears below: 25 | 26 | * [Bug 9985](https://www.illumos.org/issues/9985) blkdev devices can have an invalid devid 27 | * [Bug 10012](https://www.illumos.org/issues/10012) vioblk should not accept an all-zero serial number 28 | * [Bug 7119](https://www.illumos.org/issues/7119) boot should be more resilient to physical path to bootfs changing 29 | * [Bug 1857](https://www.illumos.org/issues/1857) "No SOF interrupts have been received..USB UHCI is unusable" under KVM 30 | -------------------------------------------------------------------------------- /ipd/0002/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: John Levon 3 | state: published 4 | --- 5 | 6 | # IPD 2 Running smatch for Illumos builds 7 | 8 | ## Introduction 9 | 10 | As part of Illumos's historical inheritance we, until very recently, ran lint 11 | against a significant portion of the source code (as mostly defined by 12 | [Makefile.lint](https://github.com/illumos/illumos-gate/blob/master/usr/src/Makefile.lint)). 13 | 14 | This was a continuing pain point for Illumos developers: we don't have the lint 15 | source, and the current version we have access to is gradually getting less and 16 | less able to compile the source it's given. Use of lint was also a blocker for 17 | other improvements, such as an improved definition of `NULL`. 18 | 19 | Recent consensus has led to us removing the requirement for developers to lint 20 | their changes. 21 | 22 | However, lint was still finding real bugs. In particular, complaining about code 23 | that fails to check the return value of functions still seems useful; it's 24 | highlighted real bugs in the past, and seems likely to continue to do so. 25 | 26 | While newer GCC versions have greatly improved checks, this particular checking 27 | behaviour is not supported by any warning option. GCC can only check return 28 | values for functions explicitly marked, and does not respect cast-to-void, which 29 | we use to silence lint right now. 30 | 31 | There is an alternative: [smatch](https://repo.or.cz/w/smatch.git). 32 | This is a [sparse](https://sparse.wiki.kernel.org/index.php/Main_Page)-based 33 | static checker, mainly aimed at the Linux kernel. While it has a large number of 34 | Linux-specific checks, it's also usable as a general static checker. What is 35 | particularly interesting about smatch is that it is written in C, and is easily 36 | hackable. This should be considered a great advantage over many other checkers, 37 | which are either closed source, written in a language understood by few, or 38 | both. 39 | 40 | We will use smatch to replace at least 41 | the checked-return functionality of lint across the Illumos source base. In 42 | fact, smatch is already far superior to that of lint: the parser catches calls 43 | through function pointers that lint does not, does not complain about `memset()` 44 | or `printf()` etc. 45 | 46 | The approachability of smatch is also appealing for other reasons, as it would 47 | also to add source-specific checks relatively easily. For example, unchecked 48 | `kmem_alloc(..., KM_NOSLEEP)`, unchecked user-supplied integers, Spectre gadget 49 | discovery, etc. 50 | 51 | In addition, a range of existing smatch checks are already catching real, new, bugs in the 52 | Illumos code base. 53 | 54 | ## Implementation 55 | 56 | Currently smatch is able to compile all of the Illumos gate, modulo some code 57 | that uses `_Complex` and related types. This is implemented by defining smatch 58 | as a shadow compiler: since smatch is designed to effectively take all of GCC's 59 | options, this works relatively well. 60 | 61 | A number of options were considered for disabling or modifying smatch checks for 62 | parts of the source. For example, it makes little sense for us to run smatch 63 | against some 3rd-party source integrated into `illumos-gate`. A source base 64 | with some ... history ... uncovered quite a few peculiarities that required 65 | smatch changes. Code like: 66 | 67 | ``` 68 | #define elink_cb_get_friendly_name(cb) '' 69 | ``` 70 | 71 | or 72 | 73 | ``` 74 | char * 75 | copyn(s1, s2, n) 76 | register char *s1, *s2; 77 | { 78 | ``` 79 | 80 | requires either disabling smatch for that code, or disabling one or more of the 81 | smatch checks. Some of the latter are sparse-level, and may lack a disabling 82 | flag in upstream; these are being added to smatch as needed. 83 | 84 | As smatch is a shadow compiler, it runs against *all the code*, as opposed to 85 | lint, which was a separate pass defined in Makefiles. The approach being taken 86 | is to modify the Makefiles as needed. For example, to completely disable smatch 87 | in a sub-directory: 88 | 89 | ``` 90 | SMATCH=off 91 | ``` 92 | 93 | which becomes `-_smatch=off`. *cw* will spot this and not run smatch against 94 | those source files. `usr/src/Makefile.smatch` also defines a few default flags, 95 | where the checks are triggered by too many false positives, or too much legacy 96 | code. 97 | 98 | Specific checks can also be disabled (or enabled) like this: 99 | 100 | ``` 101 | SMOFF += uninitialized,check_check_deref,unreachable 102 | SMOFF += -_smatch=-Wno-vla 103 | ``` 104 | 105 | (The latter is an example of sparse-level check.) 106 | 107 | This will mean a large number of one-line changes to Makefiles, but ultimately 108 | seems preferable to disabling large sections of the source base like 109 | `Makefile.lint` does. Where infeasible, we will still be disabling smatch for 110 | particular sub-directories. 111 | 112 | A related question is how to integrate smatch itself into the build environment. 113 | 114 | smatch itself ships with data files that are closely tied to the source base 115 | under inspection. The current version defines two different projects, 116 | `illumos_kernel` for `usr/src/uts` and `illumos_user` for the rest of Illumos, 117 | and specific function names are listed there for various reasons. We also 118 | anticipate some source-specific checks being added as described above. 119 | 120 | For these reasons, it seems preferable to ship a version of smatch source under 121 | `usr/src/tools`, and build and run it directly from there. This is a local copy 122 | of `github.com/illumos/smatch/tree/illumos`. 123 | 124 | ## Upstreaming changes 125 | 126 | As mentioned above, there have already been several changes as part of the proof 127 | of concept, and upstreaming has gone well. There will inevitably be some 128 | changes not relevant for upstreaming though. In particular, it doesn't seem to 129 | make sense to upstream the Illumos data files themselves, as they are tied to 130 | the source revision, not smatch itself. There is also at least one change 131 | rejected by upstream that we rely on. 132 | 133 | ## Updating smatch 134 | 135 | If we need to resync with upstream smatch, the procedure is as follows: 136 | 137 | 1. Pull upstream into the `master` branch of `https://github.com/illumos/smatch` 138 | 1. Merge into the `illumos` branch. 139 | 1. Tag as e.g. `0.9.1-il-1` 140 | 1. Copy sources over to `illumos-gate` `usr/src/tools/smatch/src` 141 | 1. Update `usr/src/tools/smatch/Makefile` with the new tag information 142 | 1. RTI 143 | 144 | ## Caveats and Risks 145 | 146 | smatch does not cover C++ code or SPARC code, unlike lint. Other architectures 147 | are unknown. 148 | 149 | smatch integration/compatibility with clang/LLVM is unknown. 150 | 151 | smatch cannot parse everything in our gate, and has known deficiencies (for 152 | example, `__NORETURN` is not properly respected). 153 | 154 | Several locations cause smatch to time out (after 60 seconds typically). We 155 | should investigate why, and potentially fix smatch. 156 | 157 | sparse's handling of the default macro definitions is extremely basic: essentially, 158 | we define just enough of the expected compiler and hardware environment to enable us 159 | to compile the sources by hand. Even the `cgcc` wrapper provided with sparse hard-codes 160 | a bunch of these macros. The risk here is that we miss significant checks by not defining 161 | the right set of macros we expect. 162 | 163 | If the upstream project dies for whatever reason, we will have the burden of 164 | maintaining smatch, and potentially sparse, ourselves. However, if needed, the 165 | size and scope of these projects mean this is fairly doable. 166 | 167 | A larger risk is the upstream sparse project taking a radical new direction that 168 | does not suit our needs. 169 | 170 | ## Policy changes 171 | 172 | It's anticipated that at some point we would require a clean smatch build for 173 | changes submitted to RTI. 174 | 175 | ## Future work 176 | 177 | As mentioned, there are a lot of additional checks that could be added. 178 | 179 | smatch can also be used in a looser analysis sense, for investigating properties 180 | of the source. For example, it's possible to use smatch for tainting data. 181 | 182 | We could and should gradually enable more smatch across the source base. 183 | -------------------------------------------------------------------------------- /ipd/0004/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: Joshua M. Clulow 3 | sponsor: Richard Lowe 4 | state: published 5 | --- 6 | 7 | # IPD 4 Manual Page Section Renumbering 8 | 9 | According to the [Wikipedia article on manual pages][wp-man], most operating 10 | systems with manual pages use the following section numbering scheme: 11 | 12 | | Section | Description | 13 | | ------- | ----------- | 14 | | 1 | General commands | 15 | | 2 | System calls | 16 | | 3 | Library functions, covering in particular the C standard library | 17 | | 4 | Special files (usually devices, those found in /dev) and drivers | 18 | | 5 | File formats and conventions | 19 | | 6 | Games and screensavers | 20 | | 7 | Miscellanea | 21 | | 8 | System administration commands and daemons | 22 | | 9 | Kernel routines | 23 | 24 | See the _Other Operating Systems_ section below for a more comprehensive survey 25 | of modern operating systems. 26 | 27 | The article also notes that _System V_-derived platforms have historically used 28 | a different scheme. This scheme is, for historical reasons, the one presently 29 | used by illumos today: 30 | 31 | | Section | Description | 32 | | ------- | ----------- | 33 | | 1 | General commands | 34 | | 1M | System administration commands and daemons | 35 | | 2 | System calls | 36 | | 3 | C library functions | 37 | | 4 | File formats and conventions | 38 | | 5 | Miscellanea | 39 | | 6 | Games and screensavers | 40 | | 7 | Special files (usually devices, those found in /dev) and drivers | 41 | | 9 | Kernel routines | 42 | 43 | This scheme presents a number of challenges when dealing with software 44 | primarily developed for other platforms. Using the section numbers they expect 45 | results in pages ending up in the wrong section when installed on an illumos 46 | system -- of particular note is section 5 for file formats, like 47 | _rsyncd.conf(5)_; and section 8, like _zpool(8)_. 48 | 49 | Even if it were a reasonable proposition for every software package in the 50 | world to parameterise their manual page build process, that would still make it 51 | hard to refer to a particular page in documentation not delivered with the 52 | installed software. An article about configuring _rsync_ might reasonably 53 | reference _rsyncd.conf(5)_, even though an idiomatically delivered page would 54 | be _rsyncd.conf(4)_ on an illumos system today. As ZFS sees increasing use and 55 | development on other operating systems, more articles will be written that 56 | refer to _zpool(8)_ and _zfs(8)_, rather than _zpool(1M)_ and _zfs(1M)_. 57 | 58 | ## Proposed Renumbering 59 | 60 | | Current Section | Proposed New Section | 61 | | --------------- | -------------------- | 62 | | 1M | 8 | 63 | | 4 | 5 | 64 | | 5 | 7 | 65 | | 7\* | 4\* | 66 | 67 | Administrative commands are presently documented in a subsection, 1M. The 68 | contents of this subsection would move to the top level of the new section 8. 69 | 70 | The subsections of section 7 (e.g., 7D, 7FS, 7I, etc) would become subsections 71 | of the new section 4 (i.e., 4D, 4FS, 4I, etc). Section 4 & 5 do not appear to 72 | have subsections today, though there is an apparently vestigial 4B which would 73 | likely just discard as part of this transition. 74 | 75 | ## Manual Page Search Order 76 | 77 | Several accommodations should be made to improve the user experience through 78 | this transition. The approach described below is similar to the one described 79 | in a [blog post about the section renumbering in Solaris 11.4][alanc]. 80 | 81 | ### Backwards Compatibility 82 | 83 | The `man` command should be made aware of the mapping from old to new section 84 | names, in order to aid users in the transition. 85 | 86 | If a user requests a manual page from one of the renumbered sections (e.g., 87 | _ip(7P)_) but that page is not found on disk by `man`, a fallback search will 88 | be attempted in the new section (i.e., _ip(4P)_). In practice there are few 89 | manual pages which actually overlap between the sections we seek to renumber, 90 | so this seems likely to help most people most of the time. 91 | 92 | ### Automatic Subsection Search 93 | 94 | Users from other platforms are likely less used to the presence of subsections 95 | in the manual. In many cases this isn't a problem: `man malloc` will find the 96 | correct page, _malloc(3C)_. When no specific section is requested, `man` will 97 | look in each section and subsection in turn and display the first match. 98 | 99 | In some cases it's more complicated. A user looking for the `basename()` 100 | library routine will probably start with `man basename`, hitting the 101 | manual page for the `basename` _command_. Realising their mistake, they 102 | will perhaps reflexively check in section 3; alas: 103 | 104 | ``` 105 | $ man -s 3 basename 106 | No manual entry for basename in section(s) 3 107 | ``` 108 | 109 | The manual page for the `basename()` routine actually appears (with other C 110 | library routines) in 3C. The `man` command could, upon not finding a relevant 111 | page in the top-level section, attempt a search of any relevant subsections. 112 | This would use the same search order as if the user had provided no `-s` option 113 | to `man`, except constrained to subsections of the nominated top-level section. 114 | 115 | ### Transition For Unbundled Software 116 | 117 | Most distributions of illumos ship some quantity of software from 118 | consolidations other than `illumos-gate`. An inexhaustive survey suggests that 119 | a variety of choices have been made about whether to, or how to, transform 120 | third party pages; e.g., to take a section 8 page and ship it in section 1M. 121 | Where transformation has occurred through patches or otherwise, it hasn't been 122 | strictly uniform or even necessarily correct. 123 | 124 | There isn't a strategy that can completely avoid a flag day for sections 4, 5, 125 | and 7, as they overlap before and after the transition. As there is presently 126 | no section 8 and there will be no section 1M in the future, we can keep 1M in 127 | the appropriate position in the search order so that at least administrative 128 | command pages will still be found by `man` prior to being moved. 129 | 130 | ## Other Operating Systems 131 | 132 | A survey of several actively maintained operating systems in the UNIX family 133 | suggests that manual page section numbering is indeed effectively uniform. A 134 | review of the specifics, using phrasing from each platform's documentation, 135 | appears below with references. 136 | 137 | ### Linux 138 | 139 | According to [man(1)][linux-man1] at the [Linux man-pages project][lmpp], the 140 | following section numbers are in use: 141 | 142 | | Section | Description | 143 | | ------- | ----------- | 144 | | 1 | Executable programs or shell commands | 145 | | 2 | System calls (functions provided by the kernel) | 146 | | 3 | Library calls (functions within program libraries) | 147 | | 4 | Special files (usually found in `/dev`) | 148 | | 5 | File formats and conventions; e.g., `/etc/passwd` | 149 | | 6 | Games | 150 | | 7 | Miscellaneous (including macro packages and conventions); e.g., _man(7)_, _groff(7)_ | 151 | | 8 | System administration commands (usually only for root) | 152 | | 9 | Kernel routines [Non standard] | 153 | 154 | ### FreeBSD 155 | 156 | According to [man(1)][freebsd-man1] in the [FreeBSD manual pages][freebsd-man] 157 | for FreeBSD 12, the following section numbers are in use: 158 | 159 | | Section | Description | 160 | | ------- | ----------- | 161 | | 1 | General Commands Manual | 162 | | 2 | System Calls Manual | 163 | | 3 | Library Functions Manual | 164 | | 4 | Kernel Interfaces Manual | 165 | | 5 | File Formats Manual | 166 | | 6 | Games Manual | 167 | | 7 | Miscellaneous Information Manual | 168 | | 8 | System Manager's Manual | 169 | | 9 | Kernel Developer's Manual | 170 | 171 | ### OpenBSD 172 | 173 | According to [man(1)][openbsd-man1] from OpenBSD, the following section numbers are in use: 174 | 175 | | Section | Description | 176 | | ------- | ----------- | 177 | | 1 | General commands (tools and utilities) | 178 | | 2 | System calls and error numbers | 179 | | 3 | Library functions | 180 | | 3p | perl(1) programmer's reference guide | 181 | | 4 | Device drivers | 182 | | 5 | File formats | 183 | | 6 | Games | 184 | | 7 | Miscellaneous information | 185 | | 8 | System maintenance and operation commands | 186 | | 9 | Kernel internals | 187 | 188 | Notably, the OpenBSD manual has at least one documented subsection: 3P for Perl libraries. 189 | 190 | ### NetBSD 191 | 192 | The [NetBSD manual][netbsd-man] appears to contain at least the following sections: 193 | 194 | | Section | Description | 195 | | ------- | ----------- | 196 | | 1 | General commands | 197 | | 2 | System calls and error numbers | 198 | | 3 | C library functions | 199 | | 3f | FORTRAN library functions | 200 | | 3lua | Lua modules | 201 | | 4 | Special files and hardware support | 202 | | 5 | File formats | 203 | | 6 | Games and demos | 204 | | 7 | Miscellaneous information pages | 205 | | 8 | System maintenance commands | 206 | | 9 | Kernel internals | 207 | | 9lua | Lua kernel bindings | 208 | 209 | Notably, the NetBSD manual has several subsections. 210 | 211 | ### Solaris 11.4 212 | 213 | According to [man(1)][sol114-man1] from [Oracle Solaris 11.4], the following sections are in use: 214 | 215 | | Section | Description | 216 | | ------- | ----------- | 217 | | 1 | Commands available with the operating system | 218 | | 2 | System calls | 219 | | 2D | DTrace Providers | 220 | | 3 | Functions found in various libraries | 221 | | 3\* | Collections of related libraries | 222 | | 4 | Various device and network interfaces | 223 | | 4D | Special files that refer to specific hardware peripherals and device drivers | 224 | | 4FS | Programmatic interface for several file systems supported by Oracle Solaris | 225 | | 4I | Ioctl requests which apply to a class of drivers or subsystems | 226 | | 4M | STREAMS modules | 227 | | 4P | Network protocols available in Oracle Solaris | 228 | | 5 | Formats of various files | 229 | | 6 | Games and screensavers | 230 | | 7 | Miscellaneous documentation such as character-set tables | 231 | | 8 | Commands primarily used for system maintenance | 232 | | 8S | SMF services | 233 | | 9 | Reference information needed to write device drivers | 234 | | 9E | Entry-point routines a developer can include in a device driver | 235 | | 9F | Kernel functions available for use by device drivers | 236 | | 9P | Driver properties | 237 | | 9S | Data structures used by drivers to share information between the driver and the kernel | 238 | 239 | Oracle Solaris shares a common heritage with the illumos code base, as 240 | evidenced by the similarly prolific use of subsections throughout the manual. 241 | Note that Oracle Solaris performed a [similar renumbering of their manual 242 | sections][alanc] with the release of version 11.4. 243 | 244 | 245 | 246 | 247 | [lmpp]: https://www.kernel.org/doc/man-pages/ 248 | [linux-man1]: http://man7.org/linux/man-pages/man1/man.1.html 249 | [freebsd-man]: https://www.freebsd.org/cgi/man.cgi 250 | [freebsd-man1]: https://www.freebsd.org/cgi/man.cgi?query=man&apropos=0&sektion=0&manpath=FreeBSD+12.0-RELEASE+and+Ports&arch=default&format=html 251 | [openbsd-man1]: https://man.openbsd.org/man.1 252 | [netbsd-man]: http://man.netbsd.org 253 | [sol114-man1]: https://docs.oracle.com/cd/E88353_01/html/E37839/man-1.html 254 | [alanc]: https://blogs.oracle.com/solaris/normalizing-man-page-section-numbers-in-solaris-114-v2 255 | [wp-man]: https://en.wikipedia.org/wiki/Man_page 256 | -------------------------------------------------------------------------------- /ipd/0005/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: Peter Tribble 3 | sponsor: Joshua M. Clulow 4 | state: published 5 | --- 6 | 7 | # IPD 5 Rationalize SPARC platform support 8 | 9 | 10 | ## Introduction 11 | 12 | The illumos codebase contains support for a large variety of Sun 13 | desktops and servers. Given that some models are old, rare, and expensive, 14 | there's a likelihood that nobody running illumos will ever possess some 15 | of these hardware models, and some of the code we have may not only be 16 | useless, but also untestable. 17 | 18 | What of this code is useful and worth keeping and fixing, and what should 19 | be dropped? 20 | 21 | The aim of this project is twofold: to reduce the maintenance burden by 22 | removing code that has no utility, and enabling better code quality and 23 | support for the platforms that remain. 24 | 25 | This project takes place in the context of significant changes within the 26 | illumos ecosystem, namely: the addition of gcc7 as a shadow compiler (and 27 | its potential promotion to the primary compiler), the replacement of lint 28 | by smatch, redefinition of NULL, and other potential modernizations of the 29 | toolchain (amongst which, on SPARC, we might include replacemnt of the 30 | old Sun assembler with the GNU assembler). Clearly, reducing the volume 31 | of code to be modernized would be a benefit. 32 | 33 | The plan is to keep support largely as-is for the sun4v platform, but to 34 | limit sun4u support to those systems which current users either have or 35 | might easily be able to obtain. This essentially means that we will support 36 | desktop or volume server systems, but remove support for the specialist 37 | high-end server ranges. 38 | 39 | An informal survey of SPARC models known to be currently running an illumos 40 | distribution, or that have run illumos in the past, generated no surprises: 41 | 42 | * Ultra 5 43 | * Ultra 60 44 | * Sun Blade 1000 45 | * Sun Blade 1500 46 | * Sun Blade 2000 47 | * Sun Blade 2500 48 | * V210 49 | * V240 50 | * V245 51 | * V490 52 | * T1000 53 | * T2000 54 | * T5120 55 | * T5220 56 | * T5140 57 | * T5240 58 | 59 | ## Candidates for removal include: 60 | 61 | The starfire range - the venerable Sun E10K. This has already been removed 62 | in [10318](https://www.illumos.org/issues/10318). 63 | 64 | The V880z, a dedicated graphics variant of the V880 with an XVR-4000 graphics 65 | card, was removed in [6027](https://www.illumos.org/issues/6027). 66 | 67 | The sunfire servers - E3000-E600, E3500-E6500. While not a terribly complex 68 | platform, there are no known users, it's 2 decades old, and ties us to sbus, 69 | sf, and socal. 70 | 71 | The starcat range - The F15K and variants. Like the starfire, these were big 72 | expensive systems requiring dedicated controller hardware. See issue 73 | [10864](https://www.illumos.org/issues/10864). 74 | 75 | The serengeti range, which are the newer Sun-Fire E2900-E6800 systems. Although 76 | more modern, there are no known users, and there's a big blob of complex 77 | code. 78 | 79 | The Lightweight 8, or V1280, which is some serengeti boards in a 80 | volume server chassis. 81 | 82 | Certain Netra systems, specifically the NetraCT compactPCI blade chassis 83 | systems. (Code names are montecarlo for the SUNW,UltraSPARC-IIi-Netract; 84 | makaha for SUNW,UltraSPARC-IIe-NetraCT-40; sputnik for 85 | SUNW,UltraSPARC-IIe-NetraCT-60; and snowbird for SUNW,Netra-CP2300.) 86 | 87 | The B100s server blade. 88 | 89 | ## Candidates not marked for removal at this time 90 | 91 | Certain platforms bring in a certain amount of code complexity which 92 | would qualify them for removal, but there are good reasons for keeping 93 | them. 94 | 95 | The opl, or Olympus platform, which is the Fujitsu-derived M-series. 96 | The reason here is that the M3000 model is readily available and cheap. 97 | It may be possible to thin out the opl support to exclude the complex 98 | domain and DR operations. 99 | 100 | The Ultra-2, which would be the last remaining sbus system. The issue here 101 | is that the Ultra-2 is the base platform from which many of the smaller 102 | desktop systems are inherited. 103 | 104 | ## Implementation 105 | 106 | The proposed plan for implementation is to work through the platforms, 107 | removing one at a time (similar to starfire removal). Breaking the work 108 | up like this makes each step more manageable, and also allows easier triage 109 | if anything does get broken. 110 | 111 | It's likely that there will need to be final cleanup of unused modules, as 112 | there are some modules that are shared between platforms. 113 | 114 | It is planned to defer other work, such as gcc7, NULL, and smatch, until 115 | this project has completed, to avoid having to clean up code that is then 116 | removed. 117 | -------------------------------------------------------------------------------- /ipd/0006/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Dan McDonald 3 | state: draft 4 | --- 5 | 6 | # IPD 6 `allocb`(9F): The `pri` argument, and use of KM_NORMALPRI 7 | 8 | ## Introduction 9 | 10 | Today, the `allocb`(9F) kernel function documents a priority field (`pri`), 11 | but literally states in its documentation that it is, "no longer used". 12 | Indeed, a [source 13 | inspection](http://src.illumos.org/source/xref/illumos-gate/usr/src/uts/common/io/stream.c#414) 14 | indicates that `pri` is ignored. In spite of this, the approximately 250 15 | callers of `allocb`(9F) use all three different priority values, in vain. 16 | 17 | Possibly independent of the priority parameter, the kernel memory flags used 18 | for `allocb`(9F) are always KM_NOSLEEP, that is, non-blocking. This makes 19 | sense, given `allocb`(9F) can be called in interrupt context. A 20 | OpenSolaris-era bugfix, 21 | [6675738](https://github.com/illumos/illumos-gate/commit/23a80de1aec78d238d06caf311eaceb81dd5a440), 22 | introduced KM_NORMALPRI, requesting to use a less-persistent allocation for 23 | non-blocking allocations. DTrace adopted this as [illumos issue 24 | 1452](https://github.com/illumos/illumos-gate/commit/6fb4854bed54ce82bd8610896b64ddebcd4af706#diff-64e6f1587817235d06f7d2db19a97967) 25 | early in the life of illumos. 26 | 27 | Three questions fall out of the prior two observations: 28 | 29 | 1.) Should `allocb`(9F) exploit KM_NORMALPRI? 30 | 31 | 2.) If the answer to #1 is "maybe", should the priority argument in 32 | `allocb`(9F) have meaning again? 33 | 34 | 3.) If via certain answers to the prior two questions priority remains 35 | unused, should it be removed outright? 36 | 37 | ## Measurements and observations needed 38 | 39 | The `allocb`(9F) function should be measured and observed in a way similarly to 40 | illumos 1452. A loaded system should be able to trigger an agressive 41 | reclaim, and DTrace can likely be employed to detect it. 42 | 43 | ## Implementation 44 | 45 | An intial implementation would find the places in `allocb`(9F) that use 46 | KM_NOSLEEP and, depending on design decisions surrounding the priority 47 | argument, logical-or the KM_NORMALPRI flag as well. 48 | 49 | ## Scope of fix 50 | 51 | While this IPD focusses on allocation flags solely on the `allocb`(9F) 52 | function, other STREAMS mblk allocators like `esballoc`(9F) (and variants) 53 | also could benefit from KM_NORMALPRI as well. 54 | -------------------------------------------------------------------------------- /ipd/0007/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Richard Lowe, John Levon, Andy Fiddaman 3 | state: published 4 | --- 5 | 6 | # IPD 7 illumos GCC maintenance 7 | 8 | ## Introduction 9 | 10 | This IPD describes the use and maintainence of the illumos-specific GCC branch 11 | used for building, found on GitHub at http://github.com/illumos/gcc. 12 | 13 | ## Status Quo 14 | 15 | The currently used branches of `illumos/gcc` are: 16 | 17 | | name | description | 18 | | ----------- | -------------------------------------------------------------| 19 | | `il-14_2_0` | GCC 14.2 branch: current primary compiler | 20 | | `il-10_4_0` | GCC 10.4 branch: current shadow compiler | 21 | 22 | Each version of the compiler we intended to endorse/insist upon the use of 23 | during the RTI process is tagged in the form `gcc-X.Y.Z-il-N` where _X.Y.Z_ is 24 | the GCC version, and _N_ is a monotonically increasing integer to 25 | differentiate versions of our patches. 26 | 27 | Other branches may exist in the form of work-in-progress, candidate builds or 28 | older compiler versions no longer in use. 29 | 30 | Note that OpenIndiana, OmniOS and SmartOS have some slight differences around 31 | library search paths so usually apply an additional patch on top of those 32 | present in the branch. 33 | 34 | ## Method of Shipping the Patched GCC 35 | 36 | Distributions can take the tarball provided from the github tags and versions, 37 | and integrate that into their build systems as the upstream tarball, not 38 | needing to maintain a patch set based on our git repository. 39 | 40 | Alternatively, distributions wishing to update or further patch GCC can easily 41 | take a fork of `illumos/gcc`, work within it, and use that tree with their build 42 | system. Allowing much easier contribution of those changes upstream. The intent 43 | is that any suitable changes are folded back into the official branch and 44 | release tags as needed. 45 | 46 | ## Developing the illumos GCC 47 | 48 | People wishing to work on newer versions of gcc may have the appropriate 49 | branch created in `illumos/gcc` early in the development cycle to facilitate 50 | cooperation with anyone else who may be planning a similar update. 51 | 52 | Changes wishing to be integrated to illumos/gcc should be submitted in the 53 | form of pull requests where that is possible, or github issues requesting a 54 | branch be created onto which pull requests may be submitted, or a branch be 55 | pulled up into `illumos/gcc` to establish a new version branch. 56 | 57 | ## Endorsing a new version of GCC for use with `illumos-gate` 58 | 59 | New versions of GCC for `illumos-gate` need to be discussed with the 60 | core team. In general, moving to a newer version of GCC involves co-ordination 61 | between the main stakeholders (OpenIndiana, MNX, OmniOS and Oxide, most 62 | usually), and a set of testing/validation. 63 | 64 | ### Testing 65 | 66 | Updating the compiler, especially over a major version, has historically been 67 | a tricky proposition, often involving new optimizations that break code 68 | (admittedly, usually code relying on undefined behaviour). Careful testing 69 | should be done of any change. 70 | 71 | Test results from the GCC suite should show no regressions from a mainline 72 | GCC of equivalent version, and any regressions relative to the last endorsed 73 | GCC must be carefully evaluated (hopefully, there would be none). 74 | 75 | Test results from the illumos tests should be favourably comparable to a 76 | baseline with the current compiler, manual testing of debug facilities 77 | (`-msave-args`/`libsaveargs`) should show no regressions. The DTrace test 78 | suite in particular is relevant here. 79 | 80 | A special class of bug which has proved difficult in the past is one which 81 | influences _observability_ but not necessarily correctness. Special care must 82 | be taken to verify that the compiler has not regressed; some specific things 83 | to look out for are: 84 | 85 | 1. No fbt or pid probe previously visible to DTrace should now be invisible. 86 | 87 | 1. Care should be taken that GCC has not produced cloned special purpose 88 | versions of symbols (these tend to be named in the form `foo.xy.N` where 89 | _foo_ is the original symbol, _xy_ is an optimizer pass, and _N_ a sequence 90 | number). 91 | 92 | 1. CTF type information should be checked compared to the old compiler (diffs 93 | of `ctfdump -c` are helpful here). There is often some natural churn, but 94 | any significant differences in missing or different types should be evaluated 95 | carefully. 96 | 97 | 1. Some spot checking of fbt probe location is good: check that fbt begin 98 | probes are placed before any branches at the start of the function. This is 99 | what `-fno-shrink-wrap` is for. 100 | 101 | 1. It's worth reviewing the list of 102 | https://gcc.gnu.org/onlinedocs/gcc-10.4.0/gcc/Optimize-Options.html for any 103 | new compiler version, to see if there's any pernicious things we might want 104 | to disable. The list of optimizations we need to disable seems to grow every 105 | time we update. 106 | 107 | 1. Check through the list of changes (e.g. 108 | https://gcc.gnu.org/gcc-10/changes.html) for anything that might affect us. 109 | 110 | The testing that was performed as part of moving to gcc10 as the primary 111 | compiler may be a useful reference, although every compiler change will likely 112 | bring its own challenges - See . 113 | 114 | ## Submitting Patches to GCC 115 | 116 | It would be good to begin the process of submitting patches upstream to GCC, 117 | though historically we have not for various reasons. Some of our patches are 118 | particularly opinionated and unsuitable for general use, and will likely be 119 | our own forever. Patches not in this category should at least be considered 120 | for submitting upstream. 121 | -------------------------------------------------------------------------------- /ipd/0008/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: Peter Tribble 3 | sponsors: Garrett D'Amore 4 | state: published 5 | --- 6 | 7 | # IPD 8 EOF NCA/NL7C 8 | 9 | NCA/NL7C is a kernel-based web cache accelerator. 10 | 11 | The current implementation in illumos is a compatible Network Layer 7 Cache 12 | (NL7C) as part of SOCKFS. It's a replacement for the older SNCA product that 13 | was designed to be more generic while implementing the same interfaces. 14 | 15 | However, that generic extension never happened. So we have a subsystem that 16 | only caches http, not https or http/2. Not only that, it doesn't work in 17 | zones, which is where users expect applications to run, and it only supports 18 | IPv4. 19 | 20 | The web has moved on. Secure http is now expected and ubiquitous. Load 21 | balancers and reverse proxies are commonplace. Modern web servers are 22 | much more performant than in the past. Content Delivery Networks shoulder 23 | the main burden of accelerating delivery of static assets to end users. 24 | 25 | NCA in its current form is obsolete, misaligned with current practice, and 26 | should be removed. 27 | 28 | Removing NCA from the path also simplifies the implementation of sockfs and 29 | sendfile. 30 | 31 | ## Implementation Tickets 32 | 33 | Removing these bits is occurring in at least the following changes: 34 | 35 | - [14767 retire kssl](https://www.illumos.org/issues/14767) 36 | - [14768 retire nca](https://www.illumos.org/issues/14768) 37 | -------------------------------------------------------------------------------- /ipd/0010/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: John Levon 3 | state: draft 4 | --- 5 | 6 | # IPD 10 full argv in ps 7 | 8 | Currently, `ps -ef` limits the displayed argv string to 80 characters maximum. 9 | This is because it gets this value from `/proc/pid/psinfo`'s `pr_psargs[]`. 10 | 11 | This value is populated at `exec()` time, and does not reflect any changes the 12 | process may make to its argv subsequently. This is both a bug and a feature. 13 | 14 | The only way to get a longer argv is via `pargs` or `ps auxww`, both of which 15 | require permissions to read the target process. This longer value reflects the 16 | current process arguments, which may have changed. 17 | 18 | This IPD is proposing a few changes: 19 | 20 | ## /proc/pid/cmdline 21 | 22 | We'll introduce a new `proc(4)` file, `/proc/pid/cmdline`. This has been in SmartOS 23 | for a long time under the `lx` brand. It's a `\0`-separated string of the 24 | current `argv` of the process, equivalent to that reported after the first line 25 | of `pargs`: 26 | 27 | ``` 28 | # pargs 7228 29 | 7228: /usr/lib/smtp/sendmail/sendmail -Ac -q15m 30 | argv[0]: sendmail: Queue runner@00:15:00 for /var/spool/clientmqueue 31 | argv[1]: 32 | argv[2]: /var/spool/clientmqueue 33 | # cat /proc/7228/cmdline 34 | sendmail: Queue runner@00:15:00 for /var/spool/clientmqueue 35 | # 36 | ``` 37 | 38 | Permissions on this file are `0444`. Note that the usual security boundaries around `/proc`, 39 | such as zones, missing `proc_info` privilege, etc. are sufficient to hide this 40 | file in the same way as other `/proc/` files on a per-process basis. 41 | 42 | This new file is explicitly Linux-compatible, on the basis that it is what 43 | most software these days is likely to be expecting. In particular: 44 | 2 45 | - instead of looking at the `argv[]` array itself, it records the original 46 | argv string area, and exposes *that*. For example, we wouldn't see `argv[1]` 47 | and `argv[2]` as seen in pargs above in `/proc/pid/cmdline`. The process 48 | did not intend to expose those. 49 | 50 | - there is a `setproctitle()` hack: essentially, if the last byte in the argv 51 | string area is no longer `'\0'`, then we assume that the application has 52 | modified its argv (under `lx` brand, this would be via `setproctitle()`). 53 | In that case, we will happily read and display the string beyond the confines 54 | of the original argv area (up to a page in size). 55 | 56 | ## `ps -ef` 57 | 58 | While total consensus is not going to happen here, probably the majority view was 59 | that it was preferable to expand the output of `ps -ef` and `ps auxww` by default 60 | as part of these changes, thus using the current process argv as discussed above: 61 | 62 | ``` 63 | # ps -ef 64 | root 347644 22491 0 Aug 06 ? 0:00 /home/gk/src/smartos-jlevon/projects/illumos/usr/src/cmd/svc/configd/svc.configd-native -p -d /var/run/scfdrAAAaSG.nv -r /tmp/build_live-1001.306619/a/usr/lib/brand/joyent-minimal/repository.db 65 | ... 66 | 0000907 98875 96779 0 Aug 09 ? 17:45 postgres: moray moray 172.27.15.15(56902) idle 67 | ... 68 | # ps -o pid,args -p 119010 69 | PID COMMAND 70 | 119010 /opt/marlin/build/node/bin/node --abort-on-uncaught-exception /opt/marlin/lib/a 71 | # ps -f -o pid,args -p 119010 72 | PID COMMAND 73 | 119010 /opt/marlin/build/node/bin/node --abort-on-uncaught-exception /opt/marlin/lib/agent/lackey.js /var/run/.marlin.1422402e-cc98-44d9-9adb-f963cfcdfe15.sock 74 | # ps -f -o pid,args,pid -p 119010 75 | PID COMMAND PID 76 | 119010 /opt/marlin/build/node/bin/node --abort-on-uncaught-exception /opt/marlin/lib/ag 119010 77 | # ps -o pid,args -p 98875 78 | PID COMMAND 79 | 98875 /opt/postgresql/9.2.4/bin/postgres -D /manatee/pg/data 80 | # ps -f -o pid,args -p 98875 81 | PID COMMAND 82 | 98875 postgres: moray moray 172.27.15.15(56902) idle 83 | ``` 84 | 85 | There was concern over scripts incorrectly doing `ps -ef | grep ...` changing behaviour 86 | due to additional arguments being visible, or a replaced argv no longer matching. The 87 | same could potentially apply to `pgrep(1)` and especially `pkill(1)`. 88 | 89 | While this is definitely a concern, ultimately most people felt this improvement is 90 | worth the risk here. However, I'm proposing introducing a safety valve: 91 | 92 | ``` 93 | # ps -ef | grep lackey 94 | root 164522 162737 0 Aug 10 ? 0:02 /opt/marlin/build/node/bin/node --abort-on-uncaught-exception /opt/marlin/lib/agent/lackey.js /var/run/.marlin.34e2146e-ce75-455a-9c36-ccf37446f553.sock 95 | ... 96 | # SHORT_PSARGS=1 ps -ef | grep lackey 97 | # 98 | # SHORT_PSARGS=1 pgrep lackey 99 | # 100 | ``` 101 | 102 | Feel free to bikeshed this name. Like POSIXLY_CORRECT, the value is ignored, as long 103 | as it's set. 104 | 105 | ## `ps(1b)` 106 | 107 | The existing behaviour of `ps(1b)` is a little obscure. Without permissions, 108 | `pr_psargs[]` is used. Presuming `ps(1b)` can read the target process, however, 109 | then: 110 | 111 | - if the terminal width is < 132, the `w` options behave as described in the 112 | man page: a single `w` provides 132 characters of argv (wrapping past the 113 | terminal edge), and two or more show the whole argv. 114 | 115 | - otherwise, without any `w`, whatever fits in the terminal is shown (this is 116 | similar to how Linux `ps(1)` works). 117 | 118 | - with one or more `w`, the entire argv is shown. 119 | 120 | This behaviour appears to simply be a bug in the way the arguments are handled. 121 | We'll fix this as part of these changes. 122 | 123 | With two or more `w` flags, `ps(1b)` will now report the whole argv, regardless 124 | of whether we can read the target process or not. 125 | 126 | ## `pgrep(1)` 127 | 128 | `pgrep(1)` and its nom de guerre `pkill(1)` currently only match against 129 | `pr_psargs[]`. We'll also change those so they use `/proc/pid/cmdline`, obeying 130 | `SHORT_PSARGS` if set. 131 | 132 | ## Security issues 133 | 134 | The effect of these changes is that characters 80 onwards of a process's argv 135 | are globally visible (modulo the existing security boundaries as mentioned). 136 | There was some concern over programs taking secrets on the command line that may 137 | now be exposed. 138 | 139 | However, such programs were always totally broken: they are exposed on other 140 | operating systems, and nobody knows of one that's careful enough to respect and 141 | refuse to accept secrets in the existing public 80-character space. 142 | 143 | ## Future enhancements 144 | 145 | In the future, we may record the whole argv at `exec()` time in the kernel. We 146 | could then add options to report that instead of the current argv. (This would 147 | probably be most useful in `pargs(1)` though.) 148 | 149 | `pargs(1)` currently works by inspecting the target process address space. It could 150 | be changed to use `/proc/pid/cmdline` instead. However, it's full of scary code 151 | about translating between locales, so nobody really wants to go there. One for 152 | the intrepid? 153 | -------------------------------------------------------------------------------- /ipd/0011/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Dan McDonald 3 | state: published 4 | --- 5 | 6 | # IPD 11 NFS Server for Zones (NFS-Zone) 7 | 8 | ## Introduction 9 | 10 | Formerly the Network File System (NFS) server could only be instantiated in 11 | the global zone. This document describes how NFS is now able to instantiate 12 | in any non-global zone. This document assumes the reader has some 13 | understanding of NFS. 14 | 15 | illumos issue [11083](https://illumos.org/issues/11083/) tracks the main 16 | thrust of this effort. Precursor issues 17 | [2988](https://illumos.org/issues/2988/) and 18 | [11945](https://illumos.org/issues/11945) are also part of the fallout from 19 | this project, and have been integrated into illumos-gate as of December 2019. 20 | 21 | ## History and Fundamentals 22 | 23 | Before Oracle closed OpenSolaris, there was a work-in-progress to solve this 24 | problem. Unfortunately, even a larval version of this work did not escape 25 | into the open-source world. Post-illumos, multiple attempts tried to make 26 | NFS service in a zone (abbreviated to NFS-Zone for the rest of this 27 | document). 28 | 29 | One distribution, NexentaStor, managed to implement a version in their child 30 | of illumos. This work from Nexenta forms the basis for what is planned to be 31 | brought to illumos-gate. 32 | 33 | Prior to this project, NFS instantiated its state assuming it would only run 34 | in the global zone. This state spans three different kernel modules: 35 | `nfssrv` (the NFS server kernel module), `klmmod` (kernel lock manager 36 | module), and `sharefs` (the kernel "sharetab" filesystem). 37 | 38 | ### NFS Server State 39 | 40 | A majority of the NFS server state can be instantiated into Zone-Specific 41 | Data (ZSD). Prior to this project, modules instantiated ZSD for each of: 42 | 43 | - NFS Export Table (`nfs_export_t`) 44 | 45 | - NFS server instances for NFSv2 (`struct nfs_srv`), NFSv3 (`struct 46 | nfs3_srv`), and NFSv4 (`struct nfs4_srv`) 47 | 48 | - NFS Authentication data 49 | 50 | These are all now pointers from a single `nfs_globals_t` structure, which 51 | contains a zone ID, and a link in a list of all per-zone NFS globals. 52 | 53 | One structure needs to be globally tracked, because each structure instance 54 | directly references vnodes, which are only scoped globally. Each NFS Export 55 | Information (`exportinfo_t`) is kept in a global-zone tree. With this 56 | project, each `exportinfo_t` also includes a zone ID, AND a backpointer to 57 | its zone-specific NFS Export Table. The Implementation section will discuss 58 | this linkage further. 59 | 60 | ### Kernel Lock Manager State 61 | 62 | The kernel lock manager's `struct nlm_globals` are already instantiated 63 | per-zone. This project introduces a cached zone ID (`nlm_zoneid`) to make 64 | other operations simpler, especially those that use other modules' per-zone 65 | data structures. 66 | 67 | ### ShareFS State 68 | 69 | ShareFS now instantiates its globals per-zone in `sharetab_globals_t`. 70 | Unlike the lock manager, sharefs does not depend directly on data structure 71 | in NFS itself. 72 | 73 | ## Implementation 74 | 75 | As mentioned in the Fundamentals section, to create per-zone NFS services, 76 | data structures that were global-zone exclusive must become instantiable 77 | per-zone. The Zone-Specific Data (ZSD) mechanism for illumos zones provides 78 | a clean interface to perform straightforward per-zone instantiations of what 79 | would be considered global state in a single-global-zone machine. 80 | 81 | Complicating the use of ZSD, however, is that especially during bring-up and 82 | tear-down, some of the functionality of NFS assumed its state was always run 83 | in the same zone context as the data itself. Experimentation during bring-up 84 | showed that not to be the case. To see the problem, and its solution, we 85 | must first examine the workings of Zone-Specific Data (ZSD). 86 | 87 | ### Zone-Specific Data (ZSD) 88 | 89 | Zone-Specific Data (ZSD) for a kernel module is well-described. Every zone 90 | instance will have ZSD associated for the module if it so chooses. When a 91 | kernel module's init() function gets called, it can choose to register ZSD by 92 | calling `zone_key_create()` and providing three callback functions: 93 | zone_init(), zone_shutdown(), and zone_fini(). The zone_init function 94 | returns a pointer to module-allocated ZSD, which are then passed to the 95 | zone_shutdown and zone_fini functions. 96 | 97 | ### ZSD Handling Outside Zone Context 98 | 99 | What is not obvious is that whatever zone's thread loads the kernel module 100 | will be the thread that also invokes zone_init() for ALL RUNNING ZONES. This 101 | means any function called by zone_init() MUST NOT assume its ZSD is part of 102 | the thread manipulating it. The initial work on this project made this 103 | mistake. It is hard to detect this condition because most of the time the 104 | global zone loads the kernel module prior to zones bringup. Only if a zone 105 | kicks off a kernel module load can this condition occur. 106 | 107 | ### Distribution Differences Matter 108 | 109 | During bringup of this project, bringing SmartOS in for testing illustrated 110 | the above ZSD handling issues. Since the global zone on SmartOS is almost 111 | always minimally used, the NFS server modules are almost always brought up by 112 | the first zone that shares via NFS. 113 | 114 | SmartOS zones ("joyent" or "joyent-minimal" brand) do NOT have their zone's 115 | root at a proper filesystem boundary. In SmartOS a zones/$ZONE_UUID dataset 116 | gets created, and /zones/$ZONE_UUID/root is merely a directory in that 117 | filesystem. This means NFS code that traverses a directory tree upwards 118 | until a filesystem boundary or "/" must not only check for a filesystem 119 | boundary (vnode's VROOT flag set), but also check for a vnode that is the 120 | zone's root. 121 | 122 | ### Data Structure Linkages 123 | 124 | This project brings NFS "globals" under per-zone structures, and may augment 125 | other "globals" already instantiated per-zone. They are laid out in the 126 | following sets of ZSD: 127 | 128 | #### ALREADY EXISTING 129 | 130 | - struct nlm_globals (`klmmod`): Lock Manager ZSD 131 | 132 | - struct flock_globals (`genunix`): File-locking state used exclusively by klmmod 133 | 134 | #### NEW WITH THIS PROJECT 135 | 136 | - nfs_globals_t (`nfssrv`): NFS server ZSD; contains other sub-fields which are 137 | per-zone: 138 | - nfs_export_t: NFS Export Table 139 | - struct nfs_srv: NFSv2 server state 140 | - struct nfs3_srv: NFSv3 server state 141 | - struct nfs4_srv: NFSv4 server state 142 | - struct nfsauth_globals: NFS Authentication state 143 | 144 | - nfscmd_globals_t (`nfs`): NFS command state for in-zone nfsd. 145 | 146 | Because of the Lock Manager globals, the NFS Server globals, and the 147 | needs-to-be-globally-indexed `exportinfo_t` are all in 148 | different sets of ZSD, each of these contains a Zone ID in them. When 149 | searching both sets of structures from an arbitrary zone context, 150 | correspondence can be done with Zone ID comparisons. 151 | 152 | In some cases (usually involving `exportinfo_t`), a few pointer dereferences 153 | can determine a zone's root vnode. It is important to track this, because as 154 | pointed out in the Distribution Differences Matter section, in some zone 155 | brands, the zone's root vnode is NOT a filesystem boundary (i.e. the VROOT 156 | flag is not necessarily set), and in-zone NFS share must stop at the zone 157 | root, instead of the global zone's root. 158 | 159 | ## Man page changes 160 | 161 | The initial version of this project made no manual page changes, and the 162 | initial push to illumos-gate will not as well. A survey of manual pages that 163 | reference two or more of NFS, zones, or sharefs yielded the following list of 164 | potential man pages: 165 | 166 | - `dfshares`(1m) 167 | - `dfshares_nfs`(1m) 168 | - `kadmin`(1m) 169 | - `kclient`(1m) 170 | - `mount`(1m) 171 | - `mount_nfs`(1m) 172 | - `mountd`(1m) 173 | - `nfs4cbd`(1m) 174 | - `nfsd`(1m) 175 | - `nfslogd`(1m) 176 | - `nfsmapid`(1m) 177 | - `nfsstat`(1m) 178 | - `rquotad`(1m) 179 | - `share`(1m) 180 | - `share_nfs`(1m) 181 | - `shareall`(1m) 182 | - `sharectl`(1m) 183 | - `sharemgr`(1m) 184 | - `statd`(1m) 185 | - `unshare`(1m) 186 | - `unshare_nfs`(1m) 187 | - `zfs`(1m) 188 | - `dfstab`(4)p 189 | - `nfs`(4) 190 | - `nfslog.conf`(4) 191 | - `nfssec.conf`(4) 192 | - `rpc`(4) 193 | - `sharetab`(4) 194 | - `nfssec`(5) 195 | - `zones`(5) 196 | - `sharefs`(7fs) 197 | 198 | These will be further audited for possible changes to make administrators 199 | aware of per-zone NFS service. illumos issue 200 | [12278](https://illumos.org/issues/12278/) tracks the subsequent man page 201 | changes. 202 | 203 | ## Testing 204 | 205 | Testing has included a series of smoke, use, and mild-stress testing on a 206 | SmartOS compute node that is serving NFS both from its global zone and a 207 | non-global zone. It has been done so under both DEBUG and non-DEBUG kernels, 208 | the former of which found several issues after the initial code drop which 209 | are now fixed. Some of those same tests were done on an OmniOSce bloody with 210 | this project, running on VMware Fusion. 211 | 212 | The Linux "nfstest" package: https://wiki.linux-nfs.org/wiki/index.php/NFStest 213 | can be used (as long as NFSv4.1 is excluded) as a regression and 214 | interoperability test. As of December, 2019, this project's changes have 215 | not affected the results of the Linux NFS tests. See 216 | http://kebe.com/~danmcd/webrevs/nfs-zone/linux-nfs-test/ for results and 217 | details. (NOTE: Later these may be moved to the `old` directory inside 218 | Dan's webrevs directory, insert "old/" between "webrevs/" and "nfs-zone".) 219 | 220 | illumos issue [11083](https://illumos.org/issues/11083/) has additional 221 | testing details. 222 | 223 | ## Potential Future Issues 224 | 225 | The sharefs filesystem does not have its own set of privileges that can be 226 | delegated into a zone. The sharetab.c source file has a block comment 227 | describing this: 228 | 229 | ``` 230 | * TODO: This basically overloads the definition/use of 231 | * PRIV_SYS_NFS to work around the limitation of PRIV_SYS_CONFIG 232 | * in a zone. Solaris 11 solved this by implementing a PRIV_SYS_SHARE 233 | * we should do the same and replace the use of PRIV_SYS_NFS here and 234 | * in zfs_secpolicy_share. 235 | ``` 236 | 237 | And this should be addressed as a separate bug as well. 238 | -------------------------------------------------------------------------------- /ipd/0014/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Peter Tribble, Dan McDonald, Rich Lowe (will need others...) 3 | state: predraft 4 | --- 5 | 6 | # IPD 14 illumos and Y2038 7 | 8 | ![Y2038 is coming, pardon the US-centric date layout and assumption of UTC.](Y2038-warning.png "Y2038 is coming, pardon the US-centric date layout and assumption of UTC.") 9 | 10 | ## Introduction 11 | 12 | The Year 2038 problem (Y2038) is a shorthand for the class of problems 13 | related to the use of a 32-bit `time_t` in 32-bit illumos programs, 14 | libraries, and even some kernel modules designed and implemented when 15 | `time_t` was actually 32-bit. 2038 is the year 2^31-1 seconds after the UNIX 16 | epoch of GMT-midnight January 1, 1970. 17 | 18 | While illumos has eliminated 32-bit kernels in x86/amd64 environments, 32-bit 19 | userland code (commands and libraries) are still supported. Indeed, the vast 20 | majority, approximately 90%, of illumos utilities are only built 32-bit. 21 | 22 | The use of 32-bit code may impact other areas beyond time_t. For example, 23 | there may be applications that have not been updated to deal with large 24 | files. Another problem may be the use of a 32-bit ino_t, as zfs supports 25 | many more files on a single filesystem. 26 | 27 | ## Possible solutions 28 | 29 | The problem of large files was addressed within a 32-bit system by adding 30 | a new transitional compilation environment for 32-bit software, see 31 | lfcompile(5). This approach was specifically to allow code to access 64-bit 32 | quantities at a time when a full 64-bit option was not available. It 33 | would be possible to define an additional transitional compilation 34 | environment for a larger time_t (or any other properties). Such an 35 | approach has recently been added to Linux, for example. However, that 36 | is aimed at supporting 32-bit hardware beyond 2038. 37 | 38 | However, any approach of that type requires software to opt-in, and requires 39 | applications to be rebuilt from source. Existing binaries cannot be fixed, as 40 | the 32-bit quantities are embedded. 41 | 42 | Given that recompilation is required anyway, and we only support 64-bit 43 | hardware, it seems much easier to rebuild as 64-bit and be done with it. 44 | Such an approach would also address any other 32-bit limitations. 45 | 46 | ## Other areas impacted 47 | 48 | It's possible that there may be areas beyond userland applications that 49 | are impacted. Such as: 50 | 51 | * 32-bit timestamps in the UFS on-disk format 52 | * 32-bit timestamps in utmp, wtmp, lastlog, information (w(1), uptime(1) etc.) 53 | * 32-bit timestamps used for "uniqueness" via uniqtime32() in the kernel 54 | * ZFS clamps times to 32bit in zfs_setattr(), but says the problem year is 2039 55 | * `KRB5_KDB_EXPIRATION` is suspiciously Jan 1st 2038 56 | * NFS `nfs_allow_preepoch_time` tunable uses 57 | * Olson zic/zoneinfo needs updating to the "new" format (I think libc 58 | `localtime.c` talks about this) 59 | * illumos BPF has `struct bpf_timeval` which is an "on the wire" 32-bit 60 | `struct timeval`-alike. 61 | 62 | ## Implementation 63 | 64 | The basic plan is to simply rebuild all libraries and utilities as 64-bit. 65 | This approach is operationally simple, most utilities can be attacked 66 | independently, in any order, potentially in parallel, and the work spread 67 | across multiple contributors. 68 | 69 | In most cases, it is envisaged that a new 64-bit utility be created, in 70 | the relevant 64-bit subdirectory, so that it can be tested. Once found 71 | acceptable, the 32-bit utility will be removed and replaced by the new 72 | version. 73 | 74 | ## Issues 75 | 76 | Utilities built as 64-bit will need to be tested functionally, independent 77 | of testing for Y2038 itself. 78 | 79 | In some cases, 64-bit applications can cause problems as they are not 80 | constrained by the 32-bit address space, and will consume excessive 81 | resources rather than fail (the 64-bit ls is not shipped in SmartOS for 82 | this reason, it appears). 83 | 84 | Oracle Solaris are somewhat ahead of us here, and we can take note of 85 | issues they have reported. For example, issues hit during the 32->64 86 | conversion were people who had installed 32-bit PAM modules without 64-bit 87 | versions (though this could happen with any interface that allows for 88 | external plugin modules), and impacts of the larger fd limit in 64-bit 89 | programs (such as huge select masks or loops over all possible fd's needing 90 | to be converted to closefrom()/fdwalk() calls). There's a [public 91 | discussion](https://blogs.oracle.com/solaris/moving-oracle-solaris-to-lp64-bit-by-bit-v2) 92 | available. 93 | 94 | Known items with issues: 95 | 96 | * pam_smb_passwd 97 | * topo plugins 98 | * `vuid_event.h`'s `struct firm_event` uses `struct timeval32` even in LP64 environments. (It might, however, be used as a pre-hrtime_t interval timer; hard to say without further investigation.) 99 | 100 | ## Legacy applications 101 | 102 | The plan above only covers the artefacts from illumos-gate. Going beyond 103 | that requires involvement of the various illumos distributions. 104 | 105 | The distributions will need to rebuild the applications and packages they 106 | ship (in many cases the applications will already be 64-bit clean, due to 107 | exposure to other 64-bit operating systems). 108 | 109 | In addition, the distributions and their communities will be a conduit for 110 | identifying any applications that cannot be recompiled. This information 111 | would give visibility into the scale of the remaining problem, which would 112 | inform any decisions as to whether any further work is necessary. 113 | 114 | ## Workarounds 115 | 116 | For those applications that cannot be rebuilt, some workarounds have been 117 | suggested. They're mentioned here for completeness. 118 | 119 | From Andrew Gabriel: 120 | 121 | I remember thinking about this some years ago, and had some ideas to 122 | keep some existing programs working, although nothing works for all 123 | possible use cases. These would work for programs that simply use it to 124 | create timestamped log records, but not if they write time_t values out 125 | to files for storage and use much later, or exchange values with other 126 | programs. Might still work in some cases if they do calculations on 127 | time_t too. 128 | 129 | One was to have an environment variable which allowed you to respecify 130 | what date the 32 bit (time_t)0 is. 131 | 132 | Another was to assume that it is unsigned, so you lose the ability to 133 | represent 1901-1969 in exchange for being able to represent 2035-2106. 134 | Again, might want an environment variable to enable or disable this 135 | behaviour, although not having it enabled after the 32 bit cutoff date 136 | would seem pointless. 137 | 138 | These should be relatively trivial to implement. (I did implement 139 | something similar with an LD_PRELOAD interceptor shim in Solaris 8 and 140 | Solaris 10, although it was for a slightly different purpose.) 141 | 142 | 143 | ## Testing 144 | -------------------------------------------------------------------------------- /ipd/0014/Y2038-warning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/illumos/ipd/5f6d353603a3853d84eaf5d07ad25c7d250f1420/ipd/0014/Y2038-warning.png -------------------------------------------------------------------------------- /ipd/0015/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Andy Fiddaman, Hans Rosenfeld, Patrick Mooney 3 | sponsor: Joshua M. Clulow 4 | state: published 5 | mail: https://illumos.topicbox.com/groups/developer/Tcc767c8497fb4c78/ipd-15-bhyve-integration-upstreaming 6 | --- 7 | 8 | # IPD 15 bhyve integration/upstream 9 | 10 | ## Introduction 11 | 12 | bhyve, pronounced beehive, is a hypervisor/virtual machine manager that 13 | supports most processors which have hardware virtualisation support. 14 | 15 | bhyve was originally integrated into FreeBSD by NetApp in around 2011 where it 16 | became part of the base system with FreeBSD 10.0-RELEASE. It continued to 17 | evolve and was ported to illumos by Pluribus Networks in around 2013 and they 18 | contributed the resulting code to the illumos community in late 2017. From 19 | there, Joyent worked on integrating bhyve into their illumos fork, bringing it 20 | up-to-date with bhyve from FreeBSD-11.1 and making many improvements along the 21 | way. 22 | 23 | Some slides on Joyent's work in this area were presented 24 | [at bhyvecon 2018](https://www.youtube.com/watch?v=90ihmO281GE) 25 | 26 | bhyve has also been successfully side-ported from SmartOS into OmniOS and has 27 | been available there since release r151028 (November 2018). 28 | 29 | This IPD is for discussion around upstreaming bhyve to illumos-gate. 30 | 31 | ## Approach 32 | 33 | As part of their porting effort, the OmniOS developers tagged the relevant 34 | commits that were taken from SmartOS. This means that it is relatively 35 | easy to identify the commits that need to be considered as part of the 36 | upstreaming work, and it will be possible to compare the resulting branches 37 | with both SmartOS and OmniOS. OmniOS is closer to gate which will help in 38 | finding any conflicts or missing pieces. 39 | 40 | It is proposed to upstream code in several phases: 41 | 42 | 1. Bhyve pre-requisites 43 | 2. Bhyve (driver and userland component) 44 | 3. Viona pre-requisites 45 | 4. Viona driver 46 | 5. PCI Pass-through support 47 | 6. MDB support 48 | 7. Bhyve zone brand (TBC) 49 | 50 | Hans has been working on collating the list of required commits from and the 51 | current state of play is available at 52 | https://us-east.manta.joyent.com/hrosenfeld/public/bhyve.html 53 | 54 | ### 1. Bhyve pre-requisites 55 | 56 | There are a number of SmartOS features and changes which are necessary to 57 | support bhyve. At the time of writing, 10 have been identified which are 58 | self-contained and can be integrated independently. When integrated, 59 | not all will have any current consumers but they lay the groundwork for 60 | the next phase. 61 | 62 | Note - some of the text here is taken directly from the associated Joyent 63 | issue. 64 | 65 | 1. sdev plugin framework 66 | 67 | This is a generic sdev (/dev) plugin framework from Joyent, part of their 68 | Bardiche/vnd networking project that has not been upstreamed. We need this 69 | for bhyve since it uses a dynamic sdev plugin to manage entries within 70 | /dev/vmm/ 71 | 72 | 2. [OS-6549](https://smartos.org/bugview/OS-6549) vmm segment driver 73 | 74 | Applications such as the coming bhyve will want userspace access to the 75 | regions of kmem allocated to be the guest memory. While `seg_umap` exists to 76 | achieve a similar purpose, it was designed to be very constrained (a single 77 | page) in its capabilities. Rather than updating that for now, a new segment 78 | driver with lessened restrictions could be created. 79 | 80 | 3. [OS-6627](https://smartos.org/bugview/OS-6627) increase `get_max_pages` 81 | 82 | This increases the maximum pages that can be retrieved in order to allow 83 | virtual machines to consume a significant portion of the system's memory. 84 | 85 | 4. [OS-6688](https://smartos.org/bugview/OS-6688) combine `misc_link_i386` handlers 86 | 87 | Several handlers in usr/src/cmd/devfsadm/i386/misc\_link\_i386.c are 88 | essentially the same: they name their /dev/ entry after the minor name. In 89 | preparation for some new bhyve entries, let's use common code for these. 90 | 91 | 5. [OS-6633](https://smartos.org/bugview/OS-6633) add `cyclic_move_here()` 92 | 93 | When they are initially created, cyclics are generally placed on the cpu 94 | which was running the thread which allocated them. Reprogramming operations 95 | for the cyclic result in cross-call-like behavior when performed from other 96 | CPUs. This is fine for many cases, but certain applications may wish to 97 | localize that cyclic for better reprogramming performance. 98 | 99 | 6. [OS-6684](https://smartos.org/bugview/OS-6684) cyclic reprogramming can race with removal 100 | 101 | Bug fix for cyclic reprogramming 102 | 103 | 7. [OS-7034](https://smartos.org/bugview/OS-7034) ctxops should use stack ordering for save/restore 104 | 105 | bhyve uses ctxop functions to ensure that guest FPU state is maintained on 106 | the CPU when the thread is running (and is properly stashed when a context 107 | switch occurs. In the past (prior to bhyve and eagerFPU, especially) there 108 | weren't ordering constraints between ctxop handlers, since they were largely 109 | independent of one another. Bhyve makes the case that now, they should be 110 | associated with the thread in such a way that allows stack-like traversal for 111 | savectx/restorectx. That is: most->least recent for save, 112 | least->most recent for restore. 113 | 114 | 8. [OS-7096](https://smartos.org/bugview/OS-7096) `installctx` needs `kpreempt_disable` protection 115 | 116 | Fix race in ctxops 117 | 118 | 9. [OS-7104](https://smartos.org/bugview/OS-7104) export hrtime params for pvclock impls 119 | 120 | There is an associated commit for KVM that goes along with this. 121 | Any distribution using KVM will need to consider this in conjunction with the 122 | update to gate. 123 | 124 | 10. Add HMA framework 125 | 126 | Add a hypervisor management framework to allow KVM and bhyve to co-exist. 127 | 128 | This will require distributions to make concurrent changes to KVM and 129 | Virtualbox if they ship them. 130 | 131 | ### 2. Bhyve (driver and userland component) 132 | 133 | With the pre-requisites in place, the main bhyve component can be integrated. 134 | To allow proper attribution we'll do this in two changesets: 135 | 136 | 1. [OS-6409](https://smartos.org/bugview/OS-6409) import Pluribus bhyve port 137 | 138 | This is the original code drop that Joyent received from Pluribus Networks, Inc. 139 | It is not wired up for build or any checks, it's essentially dead code without 140 | the following commit. 141 | 142 | 2. Everything else that is currently in the branch 143 | [bhyve/bhyve](https://github.com/hrosenfeld/illumos-gate/commits/bhyve/bhyve), 144 | squashed together as one big commit. 145 | 146 | This will update bhyve to the state of illumos-joyent as of late January 147 | 2020. It will be wired up for building and packaging. At this point it will 148 | be usable from the global zone. 149 | 150 | 151 | ### 3. Viona pre-requisites 152 | 153 | The accelerated viona network driver for bhyve does have a few prerequisites, 154 | too. They can all be found in the branch 155 | [bhyve/viona-prereq](https://github.com/hrosenfeld/illumos-gate/commits/bhyve/viona-prereq). 156 | 157 | 1. [OS-6761](https://smartos.org/bugview/OS-6761) hcksum routines are too verbose\ 158 | [OS-6762](https://smartos.org/bugview/OS-6762) want `mac_hcksum_clone` function 159 | 2. [OS-4600](https://smartos.org/bugview/OS-4600) vnd can receive packets without checksums 160 | 3. [OS-7727](https://smartos.org/bugview/OS-7727) want mac rx barrier function 161 | 4. [OS-5845](https://smartos.org/bugview/OS-5845) lx aio performance improvements and move into kernel 162 | 5. [OS-2340](https://smartos.org/bugview/OS-2340) vnics should support LSO\ 163 | [OS-6778](https://smartos.org/bugview/OS-6778) MAC loopback traffic should avoid cksum work\ 164 | [OS-6794](https://smartos.org/bugview/OS-6794) want LSO support in viona\ 165 | [OS-7319](https://smartos.org/bugview/OS-7319) dangling ref in `mac_sw_cksum()`\ 166 | [OS-7331](https://smartos.org/bugview/OS-7331) `mac_sw_cksum()` drops valid UDP traffic 167 | 6. [OS-7556](https://smartos.org/bugview/OS-7556) IPv6 packets dropped after crossing MAC-loopback 168 | 7. [OS-7564](https://smartos.org/bugview/OS-7564) panic in `mac_hw_emul()` 169 | 8. [OS-7520](https://smartos.org/bugview/OS-7520) OS-6778 broke IPv4 forwarding\ 170 | [OS-6878](https://smartos.org/bugview/OS-6878) `mac_fix_cksum` is incomplete\ 171 | [OS-7806](https://smartos.org/bugview/OS-7806) cannot move link from NGZ to GZ 172 | 9. [OS-7924](https://smartos.org/bugview/OS-7924) OS-7520 regressed some instances of IP forwarding 173 | 10. [OS-8027](https://smartos.org/bugview/OS-8027) reinstate mac-loopback hardware emulation on Tx (undo OS-6778) 174 | 11. [OS-7904](https://smartos.org/bugview/OS-7904) simnet has bogus `mi_tx_cksum_flags`\ 175 | [OS-7905](https://smartos.org/bugview/OS-7905) `mac_tx()` is too eager to emulate hardware offloads 176 | 177 | The first four changes can be upstreamed independently. The individual changes 178 | beginning at OS-2340 and ending at OS-8027 need to be squashed and upstreamed 179 | as one change, the initial work caused issues that were fixed or partially 180 | backed out in later commits. The remaining commit for OS-7904 and OS-7905 seems 181 | to depend on those changes, even if only to avoid merge conflicts. 182 | 183 | ### 4. Viona driver 184 | 185 | The accelerated viona network driver for bhyve will be integrated separately. 186 | The changes in the branch 187 | [bhyve/viona](https://github.com/hrosenfeld/illumos-gate/commits/bhyve/viona) 188 | will be squashed into one commit with Patrick Mooney set as the author. 189 | All other contributors will be listed in the commit message. 190 | 191 | ### 5. PCI Pass-through support 192 | 193 | Support for passing PCI devices through to Bhyve guests. The changes in the 194 | branch [bhyve/passthru](https://github.com/hrosenfeld/illumos-gate/commits/bhyve/passthru) 195 | will be squashed into one commit with Hans Rosenfeld as the author. All other 196 | contributors will be listed in the commit message. 197 | 198 | ### 6. MDB support 199 | 200 | Add bhyve target support to mdb. The changes in the branch 201 | [bhyve/mdb-bhyve](https://github.com/hrosenfeld/illumos-gate/commits/bhyve/mdb-bhyve) 202 | will be squashed into one commit with Hans Rosenfeld as the author. All other 203 | contributors will be listed in the commit message. 204 | 205 | ### 7. Bhyve zone brand 206 | 207 | A zone brand for deploying bhyve VMs in non-global zones. 208 | 209 | SmartOS and OmniOS each have a bhyve zone brand, but they are not the same. 210 | If a brand is upstreamed to gate, it is likely to be based on the OmniOS one, 211 | perhaps with useful additions from SmartOS. 212 | 213 | 214 | ## Additional components 215 | 216 | Unlike FreeBSD, where a specialized loader (bhyveload) is able to boot FreeBSD 217 | guests directly, bhyve on illumos typically makes use of a boot ROM for initial 218 | VM start-up and loading of the guest bootloader/OS. There is a bhyve-specific 219 | fork of the uefi-edk2 repository bearing the necessary patches to make it 220 | functional under the hvm environment. For now, it will be left up to the 221 | downstream distributions to decide on how (if at all) they wish to build and 222 | ship such a ROM. (Integrating that build process into smartos-live proved to be 223 | a challenge at Joyent, so the boot ROM artifacts were periodically built "by 224 | hand" and stashed as binaries in the repository.) 225 | -------------------------------------------------------------------------------- /ipd/0016/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: Peter Tribble, Toomas Soome 3 | sponsor: Joshua M. Clulow 4 | state: published 5 | --- 6 | 7 | # IPD 16 EOF SunOS 4 binary compatibility 8 | 9 | On SPARC (but not intel), illumos retains support for running binaries 10 | from SunOS 4. This involves 2 components: 11 | 12 | * aoutexec kernel support 13 | * libbc library emulation 14 | 15 | There appears to be little demand for this capability - few or no users have 16 | any such binaries. 17 | 18 | While the aoutexec component is relatively trivial, libbc contains a lot 19 | of code that is difficult to test. There is also a significant quantity of 20 | assembler. The maintenance burden associated with this old code is 21 | considerable, especially in the context of the various projects to 22 | modernize the codebase and the toolchain. We're carrying around a lot of 23 | dead weight here. 24 | 25 | This subsystem also requires ucblib, preventing its removal. 26 | Removal of ucblib is not within the scope of this project. 27 | 28 | This project proposes to remove both libbc and aoutexec. 29 | 30 | It has subsequently come to light that the file libmp.so.1 is also 31 | present for SunOS 4 binary compatibility, and can be removed. This file 32 | is shipped for both SPARC and intel. 33 | 34 | ## Related issues: 35 | 36 | * [Bug 12292](https://www.illumos.org/issues/12292) retire libbc 37 | -------------------------------------------------------------------------------- /ipd/0018/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Andy Fiddaman 3 | sponsor: Dan McDonald 4 | state: published 5 | --- 6 | 7 | # IPD 18 overlay network integration/upstream 8 | 9 | ## Introduction 10 | 11 | In 2014, illumos-joyent gained support for a new dladm device called an 12 | `overlay`. In 2020, illumos-omnios imported this feature which formed part 13 | of their r151034 release. This IPD is to discuss how this feature can be 14 | upstreamed to illumos-gate. 15 | 16 | Overlay networks are a form of network virtualisation which, in illumos terms, 17 | can most simply be thought of as an Etherstub which can span multiple hosts. 18 | 19 | From the man page: 20 | 21 | ``` 22 | Overlay devices are a GLDv3 device that allows users to create overlay 23 | networks that can be used to form the basis of network virtualization and 24 | software defined networking. Overlay networks allow a single physical 25 | network, often called an underlay network, to provide the means for 26 | creating multiple logical, isolated, and discrete layer two and layer 27 | three networks on top of it. 28 | 29 | Overlay devices are administered through dladm(1M). Overlay devices 30 | themselves cannot be plumbed up with IP, vnd, or any other protocol. 31 | Instead, like an etherstub, they allow for VNICs to be created on top of 32 | them. Like an etherstub, an overlay device acts as a local switch; 33 | however, when it encounters a non-local destination address, it instead 34 | looks up where it should send the packet, encapsulates it, and sends it 35 | out another interface in the system. 36 | 37 | A single overlay device encapsulates the logic to answer two different, 38 | but related, questions: 39 | 40 | 1. How should a packet be transformed and put on the wire? 41 | 2. Where should a transformed packet be sent? 42 | 43 | Each of these questions is answered by a plugin. The first question is 44 | answered by what's called an encapsulation plugin. The second question 45 | is answered by what's called a search plugin. Packets are encapsulated 46 | and decapsulated using the encapsulation plugin by the kernel. The 47 | search plugins are all user land plugins that are consumed by the varpd 48 | service whose FMRI is svc:/network/varpd:default. This separation allows 49 | for the kernel to be responsible for the data path, while having the 50 | search plugins in userland allows the system to provide a much more 51 | expressive interface. 52 | ``` 53 | 54 | In the illumos-joyent implementation, there is a single encapsulation plugin, 55 | VXLAN, providing the Virtual eXtensible Local Area Network protocol, 56 | [RFC7348](https://tools.ietf.org/html/rfc7348) 57 | 58 | Three search plugins are implemented: 59 | 60 | * direct - 61 | A point-to-point module that can be used to create an overlay that forwards 62 | all non-local traffic to a single destination. 63 | 64 | * files - 65 | A plugin that specifies where traffic should be sent based on a mapping 66 | file. 67 | 68 | * svp - 69 | A dynamic plugin that uses a proprietry protocol (portlan) to look up the 70 | destination address for a frame. 71 | 72 | > The `svp` plugin is Joyent Triton specific, does not exist in the OmniOS 73 | > port and **is not part of this initial proposed upstream work**. 74 | 75 | Due to the nature of Joyent SmartOS, with its read-only root and centralised 76 | configuration file, support for overlay persistence was not required and 77 | therefore not implemented as part of the integration. The OmniOS port 78 | included additional work to enable persistence and this will be upstreamed 79 | as part of this work. 80 | 81 | ## Approach 82 | 83 | It is proposed to upstream code from OmniOS in three phases: 84 | 85 | 1. Any standalone pre-requisite changes, whether or not there is a consumer 86 | at this stage. 87 | 88 | 2. A commit implementing the overlay driver and the accompanying 89 | userland components. This will be usable but lacking features such as 90 | persistence across reboots. 91 | 92 | 3. Several follow-up commits from OmniOS. 93 | 94 | ## Commits 95 | 96 | The commits which are proposed for the three phases are as follows. Each 97 | change will also be updated as per current gate standards including removal 98 | of lint targets and cleanups to whitespace etc. 99 | 100 | > NB: Commits without an OS-xxxx ID are from OmniOS rather than Joyent SmartOS 101 | 102 | 1. Pre-requisites 103 | 104 | > Each of these will be reviewed and integrated separately. 105 | 106 | * OS-3894 want librename 107 | * OS-3886 Implement id\_space as a library 108 | * OS-3884 Want libbunyan 109 | * OS-4112 stack overflow from promisc callbacks 110 | * OS-3893 sendfile compat checks shouldn't be done in so\_sendmblk 111 | * OS-3948 refhash could be used outside of mpt\_sas 112 | * OS-3949 want string property ranges for mac 113 | * OS-3080 Need direct callbacks from socket upcalls via ksocket 114 | * OS-3944 snoop should support vxlan 115 | * OS-4245 mac\_rx\_srs\_process stack depth needs to account for harder usage 116 | * OS-4009 Want UDP src port hashing for VXLAN 117 | 118 | 2. Main commit 119 | 120 | > This will be reviewed and integrated as one. 121 | 122 | * [13500 Want support for "overlay" networks](https://www.illumos.org/issues/13500) 123 | 124 | Including: 125 | 126 | * OS-3000 I for one, welcome my overlay network overlords 127 | * OS-3943 want vxlan support 128 | * OS-3945 want varpd direct plugin 129 | * OS-3946 want varpd files plugin 130 | * OS-3987 property looks better with a 'y' 131 | * OS-3983 overlay\_target\_lookup\_request() doesn't properly populate vlan 132 | info 133 | * OS-3000 I for one, welcome my overlay network overlords (add missing files) 134 | * OS-3960 varpd should drop privs 135 | * OS-3973 overlay\_target\_ioct\_list overdoes its copyout 136 | * OS-3218 libvarpd's fork handler is a time bomb 137 | * OS-3993 overlays sometimes think they're vnics 138 | * OS-4010 Automate assigning rings to overlay based vnics 139 | * OS-4080 launching a second varpd confuses the world 140 | * OS-4077 varpd should live in /usr/lib 141 | * OS-4079 zero out the fma message on restore to help with mdb confusion 142 | * OS-4087 dladm show-overlay -f doesn't properly show degraded state 143 | * OS-4111 dladm show-overlay often has column overflow 144 | * OS-4159 error messages when dladm create-vnic fails are mostly useless 145 | * OS-3958 want documentation for overlay devices 146 | * OS-4182 need dladm create-overlay -t 147 | * OS-4174 long options for dladm \*-overlay 148 | * OS-4179 want search plugin in overlay property list 149 | * OS-4181 Clean up duplicate VXLAN\_MAGIC definition 150 | * OS-4179 want search plugin in overlay property list (fix debug) 151 | * OS-4370 varpd should support getting an include path from SMF 152 | * OS-4203 varpd stayed in carbonite after signal delivery 153 | * OS-4373 varpd plugins should not link against libvarpd 154 | * OS-4397 varpd dumps core due to race on shutdown 155 | * OS-4086 overlay driver can lose track of link status 156 | * OS-3994 varpd loses PRIV\_DL\_CONFIG 157 | * OS-5298 overlay driver degradation shouldn't impact data link status 158 | * OS-5299 varpd direct plugin doesn't properly restore its mutex 159 | * OS-6890 .WAIT doesn't work as an actual target in varpd 160 | * OS-6943 varpd not listed as an install\_h target 161 | * OS-6946 varpd structs fail ctfdiff check 162 | * OS-6980 libvarpd leaks varpd\_query\_ts 163 | * OS-6847 vxlan header allocation should think about mblk chains 164 | * OS-7243 libvarpd\_c\_destroy gets away with pointer murder 165 | * OS-7501 overlay(7D) can receive packets with DB\_CKSUMFLAGS() set 166 | * OS-7516 so\_krecv\_unblock() double-mutex-exits 167 | * OS-4498 custr\_cstr() should never return NULL (overlay) 168 | * OS-8027 reinstate mac-loopback hardware emulation on Tx (undo OS-6778) 169 | * OS-6127 "dladm show-overlay " exits zero when varpd doesn't know 170 | about the overlay 171 | * OS-7276 various illumos fixes needed for newer GCC versions (overlay) 172 | * OS-6920 Split the custr functions into their own library (overlay/dladm) 173 | * OS-7141 Overlay device-creation weirdness contributes to varpd boot 174 | failures 175 | * OS-6908 Makefiles missing 'all' target 176 | * OS-4958 Typo in overlay.5 177 | * OS-6175 fix manual pages for newer mdoc lint 178 | * OS-4928 overlay\_files.4 broken with new mandoc 179 | * OS-5377 stack overflow from round trip through mac and overlay 180 | * OS-8245 varpd, -fstack-protector woes 181 | 182 | The following changes from OmniOS will be part of the same commit but 183 | uploaded as a separate patch set to aid review: 184 | 185 | * dladm: remove unused function prototype 186 | * varp no-longer needs -lnsl 187 | * dladm now links with libvarpd (fix rcm) 188 | * varpd: Remove duplicate clobber target 189 | * Fix crash in dladm create-overlay 190 | * Add VXLAN to etc/services 191 | * overlay: Add package manifest 192 | 193 | 3. Follow-ups 194 | 195 | > Each of these will be reviewed and integrated separately. 196 | 197 | * Add missing overlay class to show-link description 198 | * Add stderr as default varpd bunyan stream 199 | * Show a better error message when a VNIC cannot be brought up on an overlay 200 | network due to the encapsulation plugin being unable to bind a socket 201 | * Overlays should persist across reboots 202 | * OS-7088 cyclics corked on overlay socket with full queue (#335) 203 | 204 | ## Future Work 205 | 206 | This initial work brings the overlay implementation from SmartOS and OmniOS 207 | into gate as-is, in a way that is useful today. Possible future work items 208 | include: 209 | 210 | * Better interoperability with other VXLAN implementations, such as those 211 | that use multicast (inc. FreeBSD - the pluggable nature of varpd means 212 | that it will be possible to add a multicast resolver plugin). 213 | Interoperability with Solaris and VMware's NSX could also be investigated. 214 | 215 | * Overlays currently only support the VTEP (VXLAN tunnel endpoint) being 216 | instantiated in the global zone's netstack. It would be useful to add the 217 | ability to select a zone's netstack to be used for this underlay network. 218 | 219 | * Publicly specify and upstream Triton's SVP (SDC VXLAN Protocol). 220 | 221 | ## References 222 | 223 | * https://man.omnios.org/man5/overlay.5.html 224 | * https://github.com/joyent/illumos-joyent/blob/dev-overlay/README 225 | * http://dtrace.org/blogs/rm/2014/07/25/illumos-overlay-networks-development-preview-01/ 226 | * http://dtrace.org/blogs/rm/2014/09/23/illumos-overlay-networks-development-preview-02/ 227 | * https://tools.ietf.org/html/rfc7348 228 | 229 | -------------------------------------------------------------------------------- /ipd/0019/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: Joshua M. Clulow 3 | sponsors: Robert Mustacchi , Garrett D'Amore 4 | state: published 5 | --- 6 | 7 | # IPD 19 Sunset SPARC 8 | 9 | ## Goal 10 | 11 | Officially end SPARC support in illumos and remove the SPARC code from the 12 | tree. 13 | 14 | ## Background 15 | 16 | When the illumos project was formed in 2010 as a fork of OpenSolaris, the 17 | operating system contained support for 32-bit and 64-bit x86 machines, and for 18 | various 64-bit SPARC machines from Sun Microsystems. In 2018, we [officially 19 | dropped support for 32-bit x86 systems](https://www.illumos.org/issues/8685), 20 | leaving just 64-bit x86 and SPARC. 21 | 22 | The most recent SPARC machines for which we have relatively direct and complete 23 | support were contemporary at the time of the fork; viz., the UltraSPARC T2 24 | family of servers, such as the T5120 and T5220. The last of these systems 25 | reached their end-of-life between 2011 and 2012. In the decade hence, the size 26 | and quality of the pool of second hand systems available through eBay and other 27 | vendors has dwindled, and prices have risen to match. Desktop systems in 28 | particular are popular for collectors, and are thus now staggeringly expensive 29 | if you can find them at all. As a result, the pool of machines available to 30 | build the software is extremely limited; the project does not currently have 31 | access to a permanent official SPARC build machine. 32 | 33 | Without ready access to build machines, one might consider cross compilation. 34 | Though we have some support for cross-architecture software generation in the 35 | tools, the operating system does not currently support being cross compiled in 36 | full. Work would be needed to complete surgery to Makefiles and arrange for 37 | packaged cross-architecture C compilers, amongst other things. 38 | 39 | In theory one might emulate SPARC systems with QEMU, but reports in the field 40 | suggest that this does not work well enough to run modern illumos. Even if it 41 | did, it may take a very long time -- e.g., weeks! -- to build the operating 42 | system under full emulation. 43 | 44 | In addition to the core of illumos, the external software ecosystem has changed 45 | a lot in ten years. Many new projects have emerged that generate program text 46 | at runtime (JIT) or which do not use established code generation systems like 47 | LLVM or GCC that have SPARC support; e.g., Go and Node.js. Some projects could 48 | in theory support illumos on SPARC, like Rust, but it will still require a not 49 | inconsiderable amount of work to get there. There is growing interest for 50 | use of Rust in the development of the core of illumos, and lack of current 51 | support for SPARC inhibits those efforts. 52 | 53 | If a community of users was going to emerge to provide engineering effort and 54 | build resources for SPARC, it likely would have done so by now. It is always 55 | sad to close a chapter in our history, and SPARC systems represent a strong and 56 | positive memory for many of us. Nonetheless, the time has arrived to begin the 57 | process of removing SPARC support from the operating system. 58 | 59 | ## What Would This Enable? 60 | 61 | A non-exhaustive list of project work that members of the project would like 62 | to undertake, where SPARC support presents a barrier today includes: 63 | 64 | - retiring the now-ancient GCC 4.4.4 shadow compiler that remains chiefly 65 | to support the SPARC platform 66 | - use of newer GCC versions and newer C standards to enable improvements 67 | such as better compile-time assertions (`CTASSERT()`, see 68 | [12994](https://www.illumos.org/issues/12994), etc) 69 | - cleanup of some of the internals of [mac(9E)](https://illumos.org/man/9E/mac) 70 | which have some facilities that exist only for specific SPARC hardware 71 | - reworking of some of the interpreted programs in `usr/src/tools` with faster 72 | and more featureful tools written in Rust 73 | - use of Rust to implement new facilities in the kernel, in libraries and in 74 | commands 75 | 76 | ## Strategy and Timeline 77 | 78 | 1. **Replace GCC 4.4.4 shadow with GCC 10 shadow** *(done, see [bug 79 | 14149](https://www.illumos.org/issues/14149))* 80 | 1. **Update project documentation to make a clear statement about platform support** *(immediate)* 81 | - e.g., https://illumos.org/docs/about/#supported-hardware-platforms 82 | - Only 64-bit x86 systems are supported 83 | 1. **Stop accepting changes to code for SPARC** *(immediate)* 84 | 1. **Delete the SPARC code from the tree** *(coming months)* 85 | - Care must be taken not to break anything, but one benefit of dropping the platform is cleaning up a _lot_ of code that is mostly not relevant anymore so we should likely do this deliberately and not just clean up occasional files "as we go" 86 | - Even though there will be just one architecture after the removal, any machinery that exists to support multiple architectures must be kept to enable future porting work (e.g., ARM or RISC-V) 87 | - We should retain support for interpreting SPARC _data_ where it is not in the way; e.g., 88 | - `mdb` can retain support for SPARC core files, ELF notes, etc 89 | - `dis`, `libdisasm`, etc, can continue to disassemble SPARC program text 90 | -------------------------------------------------------------------------------- /ipd/0021/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: Robert Mustacchi 3 | sponsors: Dan McDonald 4 | state: published 5 | --- 6 | 7 | # IPD 21 PCI Platform Unification 8 | 9 | ## Goal 10 | 11 | Unify more of the PCI configuration and implementation logic to make it 12 | easier to add support for more features and port the system to 13 | additional platforms. This will serve as a template for other parts of 14 | the system as well. 15 | 16 | ## Background 17 | 18 | Due to the rather disparate nature of the PROM on SPARC systems and the 19 | BIOS on x86, illumos has traditionally ended up with several different 20 | drivers that re-implement major portions of PCI device enumeration and 21 | root complexes. Sometimes, this is a case where an x86 driver was at one 22 | point copied from SPARC and vice-versa. In particular, this has led to 23 | several cases where the following are duplicated and different depending 24 | on the platform: 25 | 26 | * PCI device enumeration at boot time 27 | * Entirely separate logic for hotplug enumeration 28 | * Basic parts of nexus drivers 29 | * Pieces like ioctls on devices can easily end up cross-platform 30 | 31 | This has made things more challenging to fix in the system. For example, 32 | consider the following: 33 | 34 | 1. Even on x86, the logic for how base properties are added to device 35 | nodes varies between whether the device was hotplugged or not, leading 36 | to more places to try and find things to catch. 37 | 38 | 2. Hotplug logic can deal with a bridge that doesn't have a valid PCI 39 | bus assigned; however, boot-time code assumes that some firmware entity 40 | (e.g. BIOS, PROM, etc.) has gone through and set up all the devices. 41 | This makes it harder to actually run illumos on other systems and get 42 | them going. 43 | 44 | 3. Adding support for new platforms and architectures means duplicating 45 | a bunch of these and trying to figure out what it should be, which only 46 | makes the problem worse. 47 | 48 | The crux of this is that the platforms today are not well factored. This 49 | will only get worse and makes it harder to add support for new 50 | platforms. We know that there will be more coming with support for 51 | aarch64, RISC-V, and even alternative firmware implementations on x86 52 | which don't rely on ACPI/UEFI. 53 | 54 | There are already several parts of the system that are generally 55 | factored this way, even inside of the existing PCI code. For example: 56 | 57 | * Most of the PCIe initialization and error handling has been 58 | generalized. 59 | 60 | * The PCIe bridge driver is mostly common code, with a bit of platform 61 | specific code (e.g. `pcieb_x86.c` and historically `pcieb_sparc.c`). 62 | 63 | * PCIe cfgacc bits are mostly common today, with callouts into platform 64 | specific code. 65 | 66 | This is a good step in the right direction, we just need to take this 67 | another step further and continue this across the broader PCIe 68 | implementation. 69 | 70 | Finally, there are a bunch of changes that are coming in the industry 71 | with respect to PCIe hotplug. In particular, there are more and more 72 | platform-specific features being added such as the system firmware 73 | intermediary, different error containment mechanisms, and more. Most of 74 | this spurned on by the adoption of NVMe. This means that there are going 75 | to be more and more platform-specific pieces over time, which really 76 | emphasizes the importance of having a solid, common foundation. We don't 77 | want to repeat a large chunk of the current per-platform enumeration 78 | history going forward. 79 | 80 | ### Proposal 81 | 82 | The primary thrust of this IPD is that we should take the existing x86 83 | implementations of PCI functionality and over time, refactor it so that 84 | there is a platform specific and general part to it. Concretely, this 85 | means: 86 | 87 | * Isolating things like ACPI. In particular, ACPI can be used on 88 | multiple platforms (e.g. ARM SBSA); however, right now it is intimately 89 | tied to an x86 implementation. Similarly, there are platforms that don't 90 | use it. 91 | 92 | * Either separating out platform-specific knowledge or being OK with 93 | actually sharing that information between platforms. 94 | 95 | More specifically, we'd like to do the following: 96 | 97 | * First, introduce a new series of headers that describe platform 98 | specific functionality that something needs to implement. The goal with 99 | this is to be a general trend that other subsystems can use. These 100 | headers would not be shipped and would reside in a new sys subdirectory: 101 | `uts/common/sys/plat/`. The goal is to move platform-specific needs into 102 | one location to make it easier to answer the question, 'what is required 103 | to port illumos to a new architecture'. 104 | 105 | * Introduce the first header and split into this which would be 106 | `uts/common/sys/plat/pci_prd.h` which stands for PCI Platform Resource 107 | Discovery. The goal of this would be to abstract the myriad resource 108 | discovery initialization pieces from this. The initial split would would 109 | leave the existing `pci_autoconfig` module still specific to x86 and 110 | would transform a large chunk of the existing `pci_resource.c` logic 111 | into an i86pc specific implementation. 112 | - This would require platforms to implement a new `pci_prd` module, 113 | which would become a dependency. 114 | - PCI bus renumbering is a specific feature that exists partially on 115 | x86 today. It theoretically uses the ACPI `_BBN` to renumber unit 116 | addresses; however, this has only ever been enabled for the Sun 117 | X8400. We would remove this logic from `pci_boot.c`. Importantly, 118 | this would only impact a single machine and even then, only if 119 | someone performed a fresh installation. Because every other x86 120 | system in the past decade has never utilized this, and this only 121 | comes into play upon first installation (because unit addresses 122 | are all cached), there is very litle impact from removing this. 123 | 124 | Even just implementing this much will make it easier for folks who are 125 | looking to port illumos to new systems by making it easier to see what 126 | is actually platform specific here. 127 | 128 | Once this is done there are several parallel pieces of work that can be 129 | done: 130 | 131 | * Right now there are three to four copies of the memlist code that all 132 | work in slightly different circumstance and expect different things 133 | around allocation. Some of these use both the forward and rear pointers, 134 | while others don't. This makes it very hard to actually move memlists 135 | around between subsystems. The various files all expect this to be moved 136 | to common code, but none have been done today. This would seek to merge 137 | the different implementations and allow this to be exercised by userland 138 | test suites to aid in testing. We have found several bugs and 139 | assumptions while prototyping various systems at Oxide in this disparate 140 | logic. 141 | 142 | * We could then fold in the PCI hotplug enumeration and the boot 143 | enumeration into one. This would allow us to be able to handle PCI bus 144 | renumbering at boot time which is becoming equally important for hotplug 145 | systems when the platform firmware may not be able to accurately set 146 | things up. This also would be a way to get rid of multiple settings of 147 | the various reg properties. 148 | 149 | * We could make the ACPI PCI platform resource discovery common code, 150 | allowing for use on other architectures where ACPI is becoming more 151 | prevalent for better and for worse. 152 | 153 | * We could finally make the x86 `pci_autoconfig` module actually common 154 | code, which would make it simpler to support a broader set of platforms. 155 | 156 | ### Stability 157 | 158 | An important thing to emphasize is that the PCI PRD, or really any of 159 | the intended `` headers are always intended to be private 160 | to illumos. This is important for a few reasons: 161 | 162 | 1. We don't know the right interfaces and what we need will change as we 163 | have more ports land. 164 | 165 | 2. Unlike leaf device drivers which are able to be fairly isolated, this 166 | is not true of this part of the system. If someone has their own 167 | platform, that is going to be hard to maintain as a stable interface and 168 | would overly constrain us. 169 | 170 | Nothing in this IPD should be construed as suggesting that these will be 171 | stable, rather this is just a way to improve the situation for us and 172 | make it easier for us to maintain the system and support additional 173 | platforms. 174 | -------------------------------------------------------------------------------- /ipd/0022/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Jason King 3 | state: draft 4 | --- 5 | 6 | # IPD 22 Unsharing Shared Libraries 7 | 8 | ## Introduction 9 | 10 | For decades, shared libraries have been used for runtime code sharing by applications. 11 | As part of the implementation on illumos (and many other UNIX-like systems), the machine code instructions in the library (aka the 'text') also are shared in RAM. 12 | That is, the virtual memory (VM) system on the illumos kernel will map the same physical pages of RAM that contain the text segments of a shared library into all of the processes using that shared library. 13 | While this is largely benefical, for libraries that handle sensitive information such as crypto keys, this can be a detriment. 14 | Cache timing attacks may allow malicious processes running on the same host to exploit this sharing of text pages to exfiltrate sensitive data. 15 | This proposal is to introduce a new ELF section flag as well as a new security flag that will allow shared libraries or applications that opt-in to eliminate this sharing of pages of shared libraries in the VM subsystem and thus reduce the exposure to timing attacks. 16 | 17 | ## Interfaces 18 | 19 | Within an ELF object, the machine code is typically contained in the `.text` section with the `SHF_ALLOC` and `SHF_EXECINSTR` flags set for that section. 20 | We propose adding a new dynamic tag (e.g. `DT_NOSHARE`) with a value chosen from the appropriate range (likely an unused value between `DT_SUNW_ENCODING` and `DT_HIOS`). 21 | When this tag present in the `.dynamic` section of an ELF executable or shared library, the corresponding `Elf{32,64}_Dyn.d_un.d_val` value shall be either `0` or `1`. 22 | All other values are currently undefined. 23 | When `d_val` is `1`, the kernel will not share mappings of the ELF object between processes. 24 | Support for a new linker option (e.g. `-z noshare`) will be added to `ld(1)` to generate ELF objects with this flag set. 25 | It's anticipated libraries such as `libcrypto` or `libssl` (from openssl/libressl/etc) will be among the ones to use this (though it is likely that distributions delivering these libraries will need to add this flag to their build scripts). 26 | Additionally, a new security flag (e.g. `PROC_SEC_NOSHARE`) will be added that will prevent sharing of text pages for all mapped text segments of a process -- regardless of the presense or absense of the `DT_NOSHARE` tag in any libraries that are mapped into the process. 27 | The dynamic tag as well as the security flag act as a logical 'OR' to trigger the non-sharing behavior. 28 | This is to allow programs that deal with large amounts of sensitive data to disable it for all shared objects instead of trying to so in a piecemeal fashion and risk missing a library. 29 | 30 | ## Implementation 31 | 32 | While an implementation has not been written yet, we can look at the code of the existing VM subsystem to help guide the implementation. 33 | Currently, there is a feature (disabled by default) intended for NUMA systems that will duplicate the pages of a text section into newly allocated anonymous memory implemented by the `segvn_textrepl()` and `segvn_textunrepl()` functions. 34 | The intention for that feature is (based on the comments) to allow the instructions to reside in RAM 'close' to the cores that will be executing it. 35 | The text pages of a shared library are 'faulted' in, and in `segvn_fault()` instead of mapping the vnode's pages, instead a segment of anonymous memory is allocated and the contents of the vnode's pages are copied to this anonymous memory. 36 | 37 | The above feature as it turns out appears (from an initial look) to be largely what is desired -- except that instead of duplicating the pages based on NUMA topology, it is controlled by flags in the object itself. 38 | This seems like it could (at minimum) be used as a basis for a proof of concept. 39 | Such an approach will increase the amount of swap space that is reserved, but it seems like a reasonable tradeoff for the initial implementation. 40 | As more experience is gained, a determination can be made if it would be worthwhile to make the 'unsharing' smarter or if a different implementation may be more adventageous (while preserving the proposed interfaces for enabling the feature). 41 | For example, it may be worth eventually eliminating the need to reserve swap space for the unshared pages. 42 | Instead, the kernel could merely page-in the original pages from from original object on disk in the event the in-RAM pages had to be discarded due to memory pressure (assuming pages are mapped read-only). 43 | -------------------------------------------------------------------------------- /ipd/0023/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: Joshua M. Clulow 3 | state: predraft 4 | --- 5 | 6 | # IPD 23 Xen and the Art of Operating System Maintenance: A Removal of a Platform 7 | 8 | ## Goal 9 | 10 | Reduce maintenance burden and allow clean-up by removing special cases for the 11 | `i86xpv` platform. This platform exists solely to allow illumos to operate as 12 | a Xen paravirtualised guest, which is made effectively obsolete by 13 | hardware-assisted virtualisation features in modern CPUs; "hardware virtual 14 | machine" or HVM in Xen parlance. 15 | 16 | ## Background 17 | 18 | When the [Xen hypervisor](https://en.wikipedia.org/wiki/Xen) was first 19 | released, x86 CPUs did not provide hardware-assisted virtualisation features. 20 | In order to provide both isolation and reasonable performance on these CPUs, 21 | Xen provided a "paravirtualised" platform; i.e., one where the guest operating 22 | system must be modified to work. 23 | 24 | The `i86xpv` platform in illumos is a modification of the mainstream x86 25 | platform (`i86pc`) that operates as a paravirtualised Xen guest. Because of 26 | the kind of modifications required, most of the code is common with `i86pc`, 27 | except for a soup of `#ifdef` and other conditional code. A coarse estimate 28 | suggests many points of modification in both the `i86pc` platform code, 29 | and the `intel` architecture-level code: 30 | 31 | ``` 32 | $ grep -lr '#if.*def.*xpv' uts/intel | wc -l 33 | 25 34 | $ grep -rc '#if.*def.*xpv' uts/intel | awk -F: '{ q += $NF; } END { print q }' 35 | 105 36 | 37 | $ grep -lr '#if.*def.*xpv' uts/i86pc | wc -l 38 | 67 39 | $ grep -rc '#if.*def.*xpv' uts/i86pc | awk -F: '{ q += $NF; } END { print q }' 40 | 496 41 | ``` 42 | 43 | These modifications are often present in parts of the code that are complex, 44 | and already challenging to maintain, such as in the virtual memory subsystem 45 | and early boot code. 46 | 47 | This code is largely unmaintained, because guest modification for 48 | paravirtualisation has not been necessary under Xen for many years. AWS was 49 | probably the most commercially relevent venue for Xen paravirtualisation, and 50 | they have long since replaced it: 51 | 52 | * first with Xen HVM, which uses some of the original Xen drivers but on the 53 | `i86pc` platform; this covers instance types like T2, M4, etc. 54 | * then later with KVM and the Nitro platform; this covers newer instance types 55 | like T3, etc. 56 | 57 | Removing this platform is, in many senses, analogous to removing the 32-bit x86 58 | kernel: it presents a maintenance burden for mainstream 64-bit x86 work, 59 | without seeing any serious deployment in the field. 60 | 61 | ## Proposal 62 | 63 | Care must be taken to preserve drivers that are still useful in Xen HVM guests 64 | such as those available in AWS EC2. These drivers are built under 65 | `uts/i86pc/i86hvm`, and a partial list appears below: 66 | 67 | * `xpv`, a support driver for operating under Xen HVM 68 | * `xpvd`, the "virtual device nexus driver", which enumerates PV devices 69 | * `xdf`, the Xen block device driver 70 | * `xnf`, the Xen ethernet device driver 71 | 72 | The source for these drivers should be relocated from its present split of 73 | locations (`uts/i86xpv` and `uts/i86pc/i86hvm`) into `uts/i86pc/io/xen`. The 74 | module builds for the relevant modules should be moved to either the top level 75 | of `uts/i86pc`, as with other drivers, or possibly even to `uts/intel`, as with 76 | `vioblk` and `vioif`. 77 | 78 | Once these drivers are moved aside, we should remove the rest of the 79 | `uts/i86xpv` tree altogether, and anything else that builds software specific 80 | to the `i86xpv` platform. 81 | 82 | ### Xen HVM `cmdk` stub driver 83 | 84 | An unfortunate historical decision in Xen means that block devices are often 85 | exposed concurrently via two separate storage controller interfaces: an 86 | emulated PCI IDE controller, and the Xen `xdf` device. To prevent confusion, 87 | we must not try to access the disks via IDE, preferring `xdf`. 88 | 89 | PCI IDE devices on illumos involve several drivers: at the top, the `pci-ide` 90 | nexus binds to the `pciclass,0101` alias. That driver then uses `cmdk`, the 91 | ATA disk driver, to attach child nodes for detected disks. 92 | 93 | To prevent `cmdk` from attaching on Xen HVM systems, we have invented the 94 | fiction of an "`i86hvm` semi-platform". A stub module that does nothing is 95 | delivered as `/platform/i86hvm/kernel/drv/amd64/cmdk` and it would appear we 96 | prefer modules in `/platform/i86hvm` to `/platform/i86pc` on Xen HVM. 97 | 98 | ~~Another historical wart is that we do not appear to register `pci-ide` 99 | through `/etc/driver_aliases`, but rather through the obscure and outdated 100 | `/boot/solaris/devicedb/master` database. This should likely be corrected 101 | first.~~ _[see issue 14628?]_ Then, we can either modify `pci-ide` to ignore 102 | Xen devices, or provide a HVM-specific stub device that will attach to a 103 | Xen-specific alias. 104 | 105 | In an AWS guest, we can see the PCI device has these aliases: 106 | 107 | * `pci8086,7010.5853.1.0` 108 | * `pci8086,7010.5853.1` 109 | * `pci5853,1,s` 110 | * `pci5853,1` 111 | * `pci8086,7010.0` 112 | * `pci8086,7010,p` 113 | * `pci8086,...7010` 114 | * `pciclass,010180` 115 | * `pciclass,0101` 116 | 117 | It would, as is becoming a theme, regrettably seem that _every_ PCI device Xen 118 | exposes has the same subsystem ID, `0001`. Fortunately, `pci8086,7010.5853.1` 119 | would represent the combination of Xen and (emulated) 82371SB PIIX3 IDE, and 120 | we could bind a stub driver to that. 121 | 122 | Alternatively, `pci-ide` could refuse to enumerate devices when Xen is the 123 | vendor. 124 | 125 | Other than `cmdk` stub shenanigans, the rest of the "semi-platform" can likely 126 | be collapsed into `i86pc` without further issues. 127 | 128 | ## Related Tickets 129 | 130 | * [14628 ancient devicedb should be removed](https://www.illumos.org/issues/14628) 131 | -------------------------------------------------------------------------------- /ipd/0026/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Garrett D'Amore 3 | sponsor: 4 | state: draft 5 | --- 6 | 7 | # Sunset CardBus and PC Card 8 | 9 | ## Introduction 10 | 11 | The CardBus (PC Card) stack in illumos dates from OpenSolaris, and was 12 | introduced as a contribution from Tadpole, extending the legacy 16-bit PCMCIA 13 | support already present in Solaris. It was used on SPARC laptops. 14 | 15 | CardBus is 32-bit extension of the PCMCIA standard, and more or less can 16 | be thought of as an extension of PCMCIA to support PCI-style semantics, 17 | including 32-bit transfers and bus mastering. 18 | 19 | CardBus itself is long obsolete, and was replaced by ExpressCard (and really 20 | USB) in the early 2000s. Apparently some special purpose systems were 21 | still produced with CardBus as late as 2012. 22 | 23 | Our kernel no longer has any support for devices likely to be found 24 | on CardBus nodes, with the possible exception of CompactFlash devices 25 | masquerading as IDE on CardBus or (more likely!) PCMCIA. 26 | 27 | The CardBus APIs in our kernel are modeled on APIS specified by JEIDA, 28 | and are very unlike every other nexus interface in the kernel. 29 | It is one of the last things using certain legacy kernel APIs as well. 30 | 31 | The PCI nexus implementation contains certain code that exists only 32 | to support CardBus as well. 33 | 34 | We are unaware of any use of CardBus by anyone using illumos in the 35 | last decade or so. 36 | 37 | ## Proposal 38 | 39 | We propose to simply remove the cardbus stack altogether. 40 | This will also remove the last vestiges of PCMCIA support. 41 | 42 | Kernel APIs related to cardbus -- the `csx_Put8()`, `csx_Get8()`, and similar 43 | functions (generally all starting with `csx_`) would be removed. 44 | These are currently not in a dedicated cardbus module, but part of the 45 | common kernel DDI. 46 | 47 | ## Prior Discussion 48 | 49 | This has been discussed before. Pure 16-bit PCMCIA support was 50 | removed around a decade ago, and back in 2014 a proposal to remove 51 | CardBus itself was floated, along with a review. 52 | This discussion was on the illumos mailing lists, in the following 53 | threads: 54 | 55 | * [obsolete legacy PCMIA](https://illumos.topicbox.com/groups/developer/Te2c90b02ebe5b0aa-M526606b14e4160e4e3231875/obsolete-legacy-pcmcia) 56 | 57 | * [proposed EOF of PCMCIA](https://illumos.topicbox.com/groups/developer/T3be2124e9f17aa04-Maa11cbaf947ea116077801cf/proposal-eof-pcmcia-bits) 58 | 59 | * [webrev for removing cardbus (2014)](https://illumos.topicbox.com/groups/developer/T5edf352487b49a3b-Mcf2cecc58cdd912926f8bd63/webrev-removal-of-cardbus) 60 | 61 | ## Related Issues 62 | 63 | * [680 pm_create_components out to be cleaned up](https://www.illumos.org/issues/680) 64 | * [2398 pcs driver should be removed](https://www.illumos.org/issues/2398) 65 | * [5075 EOF cardbus & pcmcia](https://www.illumos.org/issues/5075) 66 | * [8510 pcmcia: typo in pcmcia_prop_op](https://www.illumos.org/issues/8510) 67 | -------------------------------------------------------------------------------- /ipd/0027/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Toomas Soome 3 | sponsor: Garrett D'Amore 4 | state: published 5 | --- 6 | 7 | # Sunset TNF 8 | 9 | ## Introduction 10 | 11 | From TRACING(3TNF): 12 | TNF is set of programs and API's that can be used to present a high-level view of the performance of an executable, a library, or part of the kernel. 13 | 14 | In short, it is superseded by dtrace and has become digital waste. 15 | 16 | ## Proposal 17 | 18 | Remove TNF. The implementation is consisting of userland programs, headers and libraries and kernel API/probes: 19 | 20 | 1. tnf kernel module 21 | 2. tnf feature integration in parts of kernel 22 | 3. tnf feature integration in kernel modules: av1394, hci1394 , hermon, ibmf, s1394, tavor 23 | 4. libtnf, libtnfctl and libtnfproble libraries, manuals 24 | 5. prex, tnfdump and tnfxtract commands 25 | 6. packaging. 26 | 27 | I have prepared the change: 28 | 29 | [link to gerrit review](https://code.illumos.org/c/illumos-gate/+/1707) 30 | 31 | ## Conversion of Pre-Existing Probes 32 | 33 | Some probes, specifically in the scheduler, should probalby be converted to static DTrace probes. 34 | Specifically the following probes will be replaced with DTRACE_SCHED1() probes as follows: 35 | 36 | * schedctl_failsafe -- becomes schedctl__failsafe, taking kthread_t 37 | * swapin_lwp -- becomes swapin__lwp, taking kthread_t 38 | * swapout_lwp -- becomes swapout__lwp, taking kthread_t 39 | * swapout_process -- becomes swapout__process, taking proc_t 40 | 41 | There will be no effort to convert the remaining TNF probes. 42 | Most of them are in either the obsolescent IEEE1394 stack or the tavor Infiniband HCA. 43 | Many of them (most!) can easily be handled with just the DTrace FBT provider. 44 | 45 | ## Prior Discussion 46 | 47 | [illumos-developer](https://illumos.topicbox.com/groups/developer/T35ec4a1cf45f3206-Me7b3ac7e1ca6b0c8ac78b971/tnf) 48 | 49 | ## Related Issues 50 | 51 | [14079 remove TNF](https://www.illumos.org/issues/14079) 52 | -------------------------------------------------------------------------------- /ipd/0028/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Garrett D'Amore 3 | sponsor: 4 | state: draft 5 | --- 6 | 7 | # EOF legacy Network Driver interfaces 8 | 9 | ## Introduction 10 | 11 | Solaris networking has long heritage of supporting many different 12 | networking technologies, and over the years numerous changes 13 | to the network stack have occurred, but as each of these changes 14 | has happened, more and more baggage (technical debt) has been 15 | kept in the name of compatibility. 16 | 17 | It's time to clean this up. 18 | 19 | ## Description 20 | 21 | This technical debt adds to the maintenance burden, and winds up 22 | bit rotting as the code paths no longer are executed or even testable. 23 | 24 | Specifically, the following technologies are obsolete, and unlikely 25 | to be used by any current illumos users. (In some cases they have 26 | been obsolete even before illumos was founded.) 27 | 28 | 1. Token Ring 29 | 30 | Token ring, aka IEEE 802.5, is a technology from the 1980s. 31 | In theory peak speeds of 100 Mbps were possible, but such 32 | devices were very rare, and users are more likely to have 33 | experienced 16 Mbps in the early 1990s. No driver for such 34 | hardware exists for illumos (legacy SPARC drivers were closed 35 | source) and we do not believe anyone has ever used token ring with 36 | modern illumos or on Solaris x86, or in the past decade on SPARC 37 | for that matter. 38 | 39 | 2. FDDI 40 | 41 | Fiber distributed data interface, aka IEEE 802.4, was an optical 42 | networking technology. It was used in some data centers on SPARC 43 | and Sbus cards. It has a maximum speed of 200 Mbps, and a larger 44 | 4K MTU, which made it interesting in the 1990s. It is unknown if 45 | an x86 driver for any hardware for illumos existed; a PCI form-factor 46 | was known to exist. (Ethernet owes a lot to work done by FDDI.) 47 | 48 | 3. DLPI (specifically driver support) 49 | 50 | Historically, SYSVR4 network drivers (and Solaris drivers by 51 | derivation) implemented to a standard called the Data Link Provider 52 | Interface (and most recently -- in the 1990s, DLPIv2). 53 | 54 | The DLPI is a streams based interface for device drivers. 55 | It was replaced in Solaris 10, as it was found that the lock 56 | contention caused by STREAMS greatly limited scalability. 57 | 58 | Not all device drivers were converted -- specifically some 59 | network device drivers produced exclusively for SPARC were 60 | never converted. (The most famous of these is Cassini.) 61 | 62 | Some enhancements were made (extensions) to help DLPI drivers 63 | perform better. The M_MULTIDATA message type being the primary 64 | one. The Cassini driver is the only known consumer of this, and 65 | that driver has never been made open source, nor is it supported 66 | on x86 illumos systems. 67 | 68 | Note that the DLPI is still provided by the GLDv3 (mac) framework, 69 | for application use. Applications may use the DLPI to access 70 | low-level details or for accessing raw link layer protocols. 71 | This IPD does not propose to change the support by GLDv3 for DLPI 72 | applications. 73 | 74 | 4. GLDv2 75 | 76 | Because the DLPI (and STREAMS) was difficult to write to, the 77 | Solaris team created a generic layer, the GLD (generic LAN driver) 78 | that was intended to make writing typical network drivers easier, 79 | and move much of the common trickier parts of the STREAMS and 80 | DLPI logic into a common driver maintained by the OS team. 81 | This layer was enhanced at one point to be the GLDv2 we have now. 82 | 83 | The GLDv2 was used by a number of drivers before GLDv3 was 84 | widely available. Note that GLDv3 is a completely different beast, 85 | and there is no code shared between GLDv2 or GLDv3. 86 | 87 | It is generally fairly trivial to convert a GLDv2 to GLDv3 -- the 88 | author of this IPD did numerous such conversions, including one that 89 | was done in a half-day on a bet (Stephen Lau, I think you still owe 90 | me something for that, but I don't remember what the stakes were!) 91 | 92 | In the Solaris 10 time frame, some open source developers 93 | (including the author!) wrote GLDv2 drivers, and there were a 94 | few GLDv2 drivers in the core OS as well. 95 | 96 | In the illumos source code, the only remaining direct consumer of 97 | GLDv2 is chxge. (The USB GEM module has code to support GLDv2, but 98 | it is a compile time option, and it uses GLDv3 by default.) 99 | 100 | Note that GLDv2 still sadly has aspects that are linked to STREAMs. 101 | The GLDv3 hides all streams based APIs from the driver author. 102 | 103 | 5. Softmac (partial) 104 | 105 | The Softmac was created mostly as an adaptive layer between the 106 | GLDv3 and DLPI and GLDv2 drivers. Every driver uses it indirectly, 107 | as it plays a role in the vanity network naming logic, and it 108 | collaborates with dlmgmtd for this purpose. However, the vast 109 | majority of the code in it is an attempt to provide compatibilty 110 | for legacy networks. With the sunset of SPARC, the last such 111 | legacy network card of any possible interest (Cassini) is no longer 112 | a concern. We can clean all this up. 113 | 114 | 6. M_MULTIDATA 115 | 116 | In order to help legacy Cassini and DLPI hardware perform well, 117 | a special message format was provided to help amortize the cost 118 | of traversing the STREAMS boundaries. 119 | 120 | However, the complexity of this means that numerous STREAMS 121 | functions have to specifically check each message to see if they 122 | are of this form. This is a tax on all network traffic, 123 | as well as a lot of non-network traffic (e.g. serial ports 124 | and ttys are implemented using STREAMS). 125 | 126 | 7. DLPI style 2 nodes. 127 | 128 | DLPI drivers historically could support minor node cloning 129 | by creating a special "UNBOUND" minor instance, e.g. "/dev/hme". 130 | 131 | An open of this special minor number was not associated with 132 | any real instance, but the STREAM would be bound to a specific 133 | interface (PPA) using a DLPI message. This style of access was 134 | common with most legacy SPARC network drivers. It was also 135 | responsible for many race conditions and bugs in the early 2000s. 136 | (It is also partly why we need getinfo(9e).) 137 | 138 | A simpler interface exists, where the PPA is part of the device 139 | minor node. For example, instead of "/dev/hme" we have "/dev/hme0". 140 | This is the style 1 interface, and is supported by GLDv3. 141 | (Historically GLDv2 supported both style 1 and style 2 minor nodes.) 142 | 143 | ## Implementation Steps 144 | 145 | In order to clean this up, there are distinct bodies of work that 146 | can be taken. 147 | Some of these have dependencies with each other, and some don't. 148 | 149 | 150 | 1. M_MULTIDATA support can just be removed. 151 | 152 | This is mostly finding and removing the references to it. 153 | This occurs mainly in the STREAMS utility functions, but also 154 | in the network layer. 155 | 156 | 2. Convert chxgbe to use GLDv3 157 | 158 | This step is necessary before GLDv2 can be eliminated. 159 | Fortunately, the effort to perform such a conversion is generally 160 | not very large, and in so doing, we may expose additional capabilities 161 | to chxge (such as better support for multiple RX and TX rings) 162 | to take advantage of in future work. 163 | 164 | 3. Remove the #ifdef'd out for GLDv2 from usbgem. 165 | 166 | Not strictly required, but a nice clean up. 167 | This can happen immediately. 168 | 169 | 4. Mark GLDv2 and DLPI *Provider* Sides Obsolete. 170 | 171 | This means updating man pages and such to direct users towards 172 | GLDv3. This can happen immediately upon approval. 173 | 174 | 5. Remove support for non-ethernet transports from GLDv2. 175 | 176 | There are no such consumers. This can happen *now*. 177 | (GLDv2 retains code for FDDI, Token Ring, and Infiniband. 178 | Infiniband is already moved over to GLDv3, and the other two 179 | are already obsolete. This can happen immediately upon approval. 180 | 181 | While here, we should remove support for style 2 nodes from GLDv2. 182 | There are exactly zero GLDv2 providers who need to export style 2 nodes. 183 | 184 | 6. Remove support for Token Ring and FDDI in any other places 185 | 186 | There is at least some special handling for TPR in softmac. 187 | Probably in other places (snoop?) that can be cleaned up. 188 | 189 | 7. Retire GLDv2. 190 | 191 | Once there are no more consumers for it, we can remove it. 192 | This may take some time, and we may need to figure out if there 193 | are other providers in the system. 194 | 195 | 8. Provide a modern TAP driver. 196 | 197 | The current TAP driver used with OpenVPN is based on DLPI. 198 | This could (and should!) be converted to a GLDv3 driver. 199 | The driver masquerades as an Ethernet device. We should 200 | deliver this in-tree as well. 201 | 202 | 9. Remove the legacy DLPI logic in softmac. 203 | 204 | This step can only happen once there are no more GLDv2 or 205 | pure DLPI providers left to worry about. In particular, 206 | both chxgbe and tap will need to be addressed. There may 207 | be others. 208 | 209 | 10. Remove DLPI conversion support in the Softmac. 210 | 211 | Most of softmac is a compatilibity shim to facilitate the 212 | use of DLPI (and also GLDv2) by making them appear as GLDv3. 213 | (There are some compromises made here, however.) This code 214 | can go, once we have no such drivers any more. 215 | 216 | 11. Move the vanity naming from softmac to GLDv3. 217 | 218 | If we only have GLDv3 drivers, then GLDv3 can handle the part 219 | of softmac that exists to support vanity names. This will 220 | simplify the logic, and allow us to remove net_dacf as well. 221 | 222 | 12. Consider eliminating support for style 2 DLPI PPAs. 223 | 224 | Some special providers (legacy tun/tap) behave as style 2 225 | providers. If those are converted to GLDv3, then there won't 226 | be any further need for style 2. Applications such as snoop that 227 | have code to work with style providers can be cleaned up. 228 | 229 | (Historical note: Originally some applications *only* had supoprt 230 | for style 2, because legacy Sun SPARC drivers only supported style 2.) 231 | 232 | ## References 233 | 234 | * [PSARC 2002/276 TCP Multi-Data Transmit](https://illumos.org/opensolaris/ARChive/PSARC/2002/276/) 235 | * [PSARC 2004/594 Multi-Data Transmit Extensions](https://illumos.org/opensolaris/ARChive/PSARC/2004/594/) 236 | -------------------------------------------------------------------------------- /ipd/0029/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Garrett D'Amore 3 | state: Published 4 | --- 5 | 6 | # Sunset Sockets Direct Protocol 7 | 8 | ## Introduction 9 | 10 | Sockets Direct Protocol was created as a high performance 11 | stream transport (SOCK_STREAM) on top of RDMA, and in particular 12 | Infiniband. 13 | 14 | In illumos, a closed source module exists for it (sdpib), which 15 | makes use of various non-public APIs. 16 | 17 | There are also modules "socksdp" and "sdp", which are not closed source, 18 | but are dependent upon sdpib to provide any meaningful use. 19 | (In theory SDP could run over other transports besides IB, but that has 20 | never been implemented for illumos.) 21 | 22 | Finally there is also an administrative command, sdpadmin.8. 23 | 24 | Sockets Direct Protocol is also now deprecated (for about ten years or so). 25 | 26 | The author is unaware of any use of SDP in illumos. 27 | It's not even clear that IB is getting (or has ever gotten) any use in illumos. 28 | The only IB cards we have driver support for are now also quite obsolescent. 29 | (We are not proposing to remove such drivers in this case, although 30 | it's reasonable that a future IPD might propose such.) 31 | 32 | ## Description 33 | 34 | We propose to simply remove the closed source sdpib strmod module, as well 35 | as the sdp and socksdp modules. Further, we would remove the sdpadmin(8) 36 | command (and associated documentation.) 37 | 38 | This potentially also will make it easier to clean up other 39 | interfaces that module may be using, at some future date. 40 | -------------------------------------------------------------------------------- /ipd/0030/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Garrett D'Amore 3 | sponsor: 4 | state: draft 5 | --- 6 | 7 | # Remove obsolete SCSA functions 8 | 9 | ## Introduction 10 | 11 | SCSA is the API provided for SCSI HBAs and targets. 12 | It has evolved quite a lot over the years, but we still 13 | have a number of interfaces which are completely unused, 14 | and may actually be unsafe. 15 | 16 | It's time to finally clean up this technical debt, which 17 | may ease other work later. 18 | 19 | ## Description 20 | 21 | The following interfaces have been marked Obsolete for a 22 | very, very long time (Solaris 8 at least). Furthermore, 23 | they have no known consumers (possible legacy closed source 24 | SPARC HBA drivers not withstanding). 25 | 26 | * scsi_dmaget 27 | * scsi_dmafree 28 | * scsi_pktalloc 29 | * scsi_resalloc 30 | * scsi_resfree 31 | * scsi_pktfree 32 | * makecom 33 | * makecom_g0 34 | * makecom_g1 35 | * makecom_g5 36 | * scsi_slave 37 | * get_pktiopb 38 | * free_pktiopb 39 | 40 | In a few cases removing these functions will potentially make life better by 41 | removing code paths which actually hurt sustaining efforts. 42 | For example, cleaning up and optimizing the core DMA logic would be easier if 43 | the get_pktiopb interface didn't need to be updated. 44 | 45 | We propose to remove the implementation and all references to these 46 | functions. 47 | -------------------------------------------------------------------------------- /ipd/0031/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Garrett D'Amore 3 | sponsor: Dan McDonald 4 | state: published 5 | --- 6 | 7 | # Kernel Interace Stability Documentation 8 | 9 | ## Introduction 10 | 11 | Hearkening back to the days of early Solaris 2, 12 | we have documentation which discrens the difference 13 | between SVR4 standard DDI and Solaris (now illumos) extensions. 14 | 15 | This was deemed important when there was an attempt to 16 | harmonize the various SVR4 flavors of UNIX, back in the 1990s. 17 | 18 | There is no longer any real meaningful compatibility 19 | between SVR4 systems and Solaris or illumos. 20 | Driver developers must write a device driver that 21 | utilizes illumos-specific functions. 22 | 23 | Additionally, the author is unaware of *anyone* who has 24 | ever used this discriminating information for any useful 25 | purpose for at least two decades. 26 | 27 | It's quite possible that the information was *never* useful, 28 | to anyone, ever. It's only conceivable use would have been 29 | to facilitate porting drivers from another SVR4 to Solaris. 30 | This is not something that the author believes anyone has undertaken 31 | this millenium. 32 | 33 | Conversely, Interface Stability as used in other sections 34 | (section 2 and 3 of the Reference Manual) is very useful, as 35 | it conveys details such as Committed or Obsolete, and may 36 | also convey additional clarifying information. 37 | 38 | Sometimes this information was presented in the INTERFACE LEVEL 39 | chapter, and sometimes it was in the Stability entry for the 40 | ATTRIBUTES table, and sometimes it wasn't presented at all. 41 | Ocassionally it was presented in both places. 42 | 43 | ## Description 44 | 45 | We propose to fold the Interface Stability (Committed, Evolving, and Unstable.) 46 | and any clarifying text into the INTERFACE STABILITY chapter as has been done 47 | for other sections. 48 | This should utilize content from INTERFACE LEVEL or the ATTRIBUTES table 49 | when present. 50 | 51 | The actual INTERFACE LEVEL chapter should then be removed, as well as 52 | any references to illumos vs. "generic" DDI/DKI. 53 | (All of these interfaces are the illumos DDI.) 54 | 55 | The ATTRIBUTES table should be removed as well. 56 | In some cases an ARCHITECTURE field may be present. 57 | For those cases, that information should be placed into an 58 | ARCHITECTURE chapter as is done for other sections of the manual. 59 | 60 | Note that the INTERFACE STABILITY and ARCHITECTURE chapters are 61 | well documented by mdoc(4). 62 | 63 | ## Implementation 64 | 65 | We should opportunistically fix manual pages. 66 | Alternatively, a single large change to update 67 | the manual all at once can be contemplated. 68 | 69 | Conceivably we could update mandoc so that linting man pages 70 | complains when it finds an INTERFACE LEVEL chapter. 71 | Perhaps this should also be done if an ATTRIBUTES chapter is found. 72 | -------------------------------------------------------------------------------- /ipd/0032/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Garrett D'Amore 3 | sponsor: 4 | state: draft 5 | --- 6 | 7 | # Introduce scsi_hba_pkt_mapin 8 | 9 | ## Introduction 10 | 11 | There are two interfaces used for initializing SCSI 12 | packets in HBAs: 13 | 14 | tran_setup_pkt(9e): 15 | 16 | int prefix_tran_setup_pkt(struct scsi_pkt *pkt, 17 | int (*callback) (caddr_t), caddr_t arg); 18 | 19 | tran_init_pkt(9e): 20 | 21 | struct scsi_pkt *prefixtran_init_pkt(struct scsi_address *ap, 22 | struct scsi_pkt *pkt, struct buf *bp, int cmdlen, 23 | int statuslen, int tgtlen, intflags, int (*callback, 24 | caddr_t),caddr_t arg); 25 | 26 | Newer drivers should use the *vastly* simpler (to use) 27 | tran_setup_pkt (which also involves the use an explicit 28 | tran_pkt_constructor). By so doing, the HBA framework takes 29 | care of a ton of complexity around DMA windows (partial DMA 30 | mapping for example), setting up DMA binding, and so forth. 31 | 32 | This works very well for most situations. 33 | 34 | However, very occasionally a driver needs to access the address 35 | associated with the bp from kernel space. 36 | 37 | For example, some HBAs require certain commands to be emulated, 38 | or for the results of commands to be massaged. For example the 39 | hv_storvsc driver from Delphix adjusts INQUIRY responses to 40 | work around limitations in older versions of Windows. 41 | 42 | Some RAID cards emulate responses as well, but only for a very 43 | small set of commands. 44 | 45 | The only way to do this right now is to keep the bp passed in 46 | to tran_init_pkt, and call bp_mapin(9F) when this is needed. 47 | 48 | Today, drivers that need to access the data region associated with 49 | the buffer have to pay the entire cost of supporting the legacy 50 | API. 51 | 52 | It would be nice if they could get a way to do the bp_mapin() and 53 | get the associated addresses so that they could access data fields 54 | directly. 55 | 56 | ## Proposal 57 | 58 | We propose a new API be added. This would only be supported for 59 | use with drivers that use tran_setup_pkt(9e): 60 | 61 | int scsi_hba_pkt_mapin(scsi_pkt_t *pkt, caddr_t *addrp, size_t *lenp); 62 | 63 | This function would only be usable once tran_start(9e) is called, 64 | and before scsi_hba_comp(9F) is called. It should be callable 65 | from user and kernel contexts (like bp_mapin). 66 | 67 | Note that because of this requirement, drivers that need to do 68 | this mapping may need to do so using a helper taskq or similar to get 69 | out of interrupt context as tran_start() may be called in interrupt 70 | context. However, the most frequent use case of this is for commands 71 | like INQUIRY, which generally are not executed from interrupt context. 72 | 73 | On success, this function: 74 | 75 | * Maps in the buffer (bp_mapin()). 76 | * Stores in addrp the kernel address corresponding to the physical address 77 | pkt->pkt_cookies[0].dmac_laddress 78 | * Stores in lenp the sum of the pkt cookie sizes. 79 | * Returns 1 (like other SCSA functions - corresopnding to TRUE) 80 | 81 | On failure, this function: 82 | 83 | * Returns false 84 | 85 | ## Failure conditions 86 | 87 | The failure conditiosn we anticipate are: 88 | 89 | * No cookies (no transfer for the packet), e.g. for TEST UNIT READY 90 | commmand. 91 | * The packet was not initialized using tran_setup_pkt. 92 | 93 | ## No mapout needed 94 | 95 | The framework for bufs automatically does a bp_mapout when the buf is done. 96 | So there is no need to do so explicitly. We expect that this API will 97 | be only rarely used anyway. 98 | 99 | However, if need be, we could explicitly do a bp_mapout() in the 100 | code for scsi_hba_comp(). 101 | -------------------------------------------------------------------------------- /ipd/0033/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Garrett D'Amore 3 | sponsor: 4 | state: predraft 5 | --- 6 | 7 | # Obsolete old style SCSI HBA API 8 | 9 | ## Introduction 10 | 11 | There are two interfaces used for initializing SCSI 12 | packets in HBAs: 13 | 14 | tran_setup_pkt(9e): 15 | 16 | int prefix_tran_setup_pkt(struct scsi_pkt *pkt, 17 | int (*callback) (caddr_t), caddr_t arg); 18 | 19 | tran_init_pkt(9e): 20 | 21 | struct scsi_pkt *prefixtran_init_pkt(struct scsi_address *ap, 22 | struct scsi_pkt *pkt, struct buf *bp, int cmdlen, 23 | int statuslen, int tgtlen, intflags, int (*callback, 24 | caddr_t),caddr_t arg); 25 | 26 | Newer drivers should use the *vastly* simpler (to use) 27 | tran_setup_pkt (which also involves the use an explicit 28 | tran_pkt_constructor). By so doing, the HBA framework takes 29 | care of a ton of complexity around DMA windows (partial DMA 30 | mapping for example), setting up DMA binding, and so forth. 31 | 32 | This works very well for most situations. 33 | 34 | Use of the scsi_init_pkt interfaces, which was the old way 35 | to write a driver, is fairly error prone, and many older 36 | drivers had bugs in this area of code. 37 | 38 | The other area where SCSI drivers wind up having complexity 39 | and confusion is around the scsi_hba_attach vs 40 | scsi_hba_attach and handling of SCSI addresses. 41 | 42 | Today, unless one uses iport(9), it is impossible to 43 | write a DDI compliant SCSI HBA unless one wishes to only 44 | support SPI with a maximum of 7 targets per bus. 45 | That's not typical for most situations today. 46 | 47 | Additionally the following flags are at best confusing: 48 | 49 | * SCSI_HBA_ADDR_CLONE - clones the scsi_address per target (old style) 50 | * SCSI_HBA_ADDR_COMPLEX - modern HBAs should use this 51 | * SCSI_HBA_TRAN_CDB - allocates CDB area, modern HBA should always supply 52 | * SCSI_HBA_TRAN_SCB - allocates SCB area, modern HBA should always supply 53 | * SCSI_HBA_HBA - used to indicate driver is using iport(9) 54 | 55 | It would be better if everyone stopped using the older APIs. 56 | 57 | Unfortunately the documentation makes this somewhat less 58 | than obvious, as it simply refers the new style APIs as 59 | an "alternative", and the mixed docuumentation for legacy 60 | APIs makes the task of writing a driver a lot more challenging. 61 | 62 | ## Proposal 63 | 64 | This proposal does not change any *code*, but it does 65 | propose to change the Stabiliy level for some SCSI APIs. 66 | 67 | The following APIs would be marked Obsolete: 68 | 69 | * tran_init_pkt(9e)j 70 | * tran_destroy_pkt(9e) 71 | * tran_sync_pkt(9e) 72 | * tran_quiesce(9e) - only for SPI drivers 73 | * tran_unquiesce(9e) - only for SPI drivers 74 | * scsi_hba_attach(9f) 75 | * SCSI_HBA_TRAN_CLONE 76 | 77 | We propose a new flag, which is the combination of several other 78 | flags: 79 | 80 | SCSI_HBA_TRAN_V3 combines: 81 | 82 | * SCSI_HBA_TRAN_CDB 83 | * SCSI_HBA_TRAN_SCB 84 | * SCSI_HBA_ADDR_COMPLEX 85 | * SCSI_HBA_HBA 86 | 87 | This "V3" means that the driver is fully compliant to SCSAv3 88 | and uses no legacy SCSI APIs. 89 | (TODO: IS THIS A GOOD NAME? PERHAPS A BETTER ONE? OR WE COULD 90 | REPLACE scsi_hba_attach_setup() with a function that takes 91 | no flags and simply passes the combination of these three?) 92 | 93 | Additionally, tran_setup_pkt should be marked "mandatory" for new drivers. 94 | We propose a new API be added. This would only be supported for 95 | use with drivers that use tran_setup_pkt(9e). 96 | 97 | Legacy entry points should have their details combined into a single 98 | manual page, that it makes it clear that these interfaces are 99 | obsolete and provides clear guidance about newer APIs to use. 100 | 101 | -------------------------------------------------------------------------------- /ipd/0034/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Garrett D'Amore 3 | sponsor: 4 | state: draft 5 | --- 6 | 7 | # Rationalize Kernel Architecture Module Paths 8 | 9 | ## TLDR (Abstract) 10 | 11 | This IPD proposes to end the notion of "special" or "privileged" 12 | kernel architectures, by including the kernel architecture (e.g. 13 | amd64, or in the future arm64 or whatever) in the kernel directory 14 | paths. This matches the existing practice for 64-bit amd64, as 15 | well as for the recently removed sparcv9 architecture. It would 16 | prohibit another architecture (say arm) from following the old 17 | approach of 32-bit architectures not including the architecture 18 | name in the module path. 19 | 20 | ## Background 21 | 22 | In the beginning, the only architecture possible for 23 | Solaris was sun4. A 32-bit architecture, with kernel 24 | modules located directories based solely on their function, 25 | such as /kernel/drv or /usr/kernel/strmod. 26 | 27 | When platforms got added, we added a /platform directory, 28 | so that different platforms ("implementations") 29 | based on the same architecture could deliver different 30 | versions of modules. For example, we used to have 31 | /platform/SUNW,Ultra-1 and /platform/SUNW,Ultra-450, 32 | each of which had subdirectories such as kernel, etc. 33 | 34 | At some point we added i386 as an architecture, with the 35 | single platform "i86pc". In all other respects it followed 36 | the same model as sparc. 37 | 38 | In the Solaris 7 time frame, we grew support for 64-bit kernels, 39 | and a new "architecture", sparcv9 was born. However, this 64-bit 40 | architecture could coexist with 32-bit binaries on the same system. 41 | To discriminate between 32-bit and 64-bit kernel modules (and remember 42 | the choice to boot 32- or 64-bit was able to be made at boot time), 43 | the 64-bit kernel architecture was inserted into the module path. 44 | 45 | For example, a SCSI driver might have paths like this: 46 | 47 | /kernel/drv/fas <- 32-bit sparc binary 48 | /kernel/drv/sparcv9/fas <- 64-bit sparcv9 binary 49 | 50 | Or on x86: 51 | 52 | /kernel/drv/mpt <- 32-bit i386 binary 53 | /kernel/drv/amd64/mpt <- 64-bit amd64 binary 54 | 55 | Before illumos was forked from OpenSolaris, the 32-bit SPARC platform 56 | support had been retired. Not too long ago we also retired support 57 | for 32-bit i386 kernels. And even more recently, we retired 58 | support for SPARC altogether. 59 | 60 | That leaves us with 61 | 62 | /kernel/drv/amd64/mpt 63 | 64 | (And similarly for /platform paths or /usr/kernel, and also for 65 | other kinds of modules besides drivers.) 66 | 67 | ## Proposal 68 | 69 | We propose to codify the current practice, and forever prohibit the 70 | old practice of kernel load paths that do not include the kernel 71 | architecture. At present the only kernel architecture supported by 72 | illumos is "amd64", although we might expect "aarch64" (or perhaps 73 | it will be called "arm64") to be added to this list, as well as 74 | perhaps "riscv".) 75 | 76 | Thus there will no longer be an "implied" architecture if none is 77 | specified. 78 | 79 | This approach should simplify packaging and documentation. 80 | 81 | Note that there are no changes needed to code to effect this change 82 | today. The only things that should probably be fixed here would 83 | be clarifications in man pages that list explicit architecture load 84 | paths. (They can change to listing e.g. /kernel/drv/${KARCH}/driver 85 | instead of enumerating them for each architecture.) 86 | 87 | None of this has any effect on platform names (i86pc, i86hvm, or 88 | possible future platform implementation names. Multiple platforms 89 | can share the same kernel architecture.) 90 | 91 | ## Future Directions 92 | 93 | It seems somewhat unlikley that we will ever need to support both 94 | 32- and 64-bit architecture kernels on the same system, or even 95 | to deliver "dual" architecture systems in the future. Likely at 96 | some point even legacy i386 32-bit userland bits will be something we 97 | do not deliver, and even if we do, 32-bit support for i386 may be 98 | something of a special case rather than something we do again as part 99 | of our mainstream design. 100 | 101 | Thus, we may wish to change the packaging code to eliminate the 102 | ARCH64 variable, and just replace it with a KARCH variable for packaging. 103 | Arguably this would eliminate some of the special cases. 104 | -------------------------------------------------------------------------------- /ipd/0035/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: Garrett D'Amore 3 | sponsors: 4 | state: draft 5 | --- 6 | 7 | # IPD 35 Sunset VTOC - SPARC 8 | 9 | ## Abstract 10 | 11 | We propose to eliminate support for the legacy VTOC format used 12 | on SPARC systems (with a maximum of 8 slices). The 16 slice 13 | format used on x86 systems is not affected by this IPD. 14 | 15 | ## Background 16 | 17 | Solaris on SPARC had a legacy going back to SunOS 4 (and likely further back 18 | to SunOS 3 or even 2) of partitioning a disk into "slices", and assigning 19 | minor numbers for each device. 20 | 21 | For reasons lost to memory, the decision to support a maximum of 8 slices 22 | per physical disk was made, with some slices (slice 2 in particular) being 23 | special (slice 2 refers to the whole disk.) 24 | 25 | When Solaris was ported to x86, a different format was chosen -- likely 26 | to accommodate other SYSV UNIX implementations available on the platform 27 | at the time. Instead of 8 slices, a maximum of 16 are supported on 28 | x86 platforms. 29 | 30 | To this day, even for GPT disks, a maximum of 16 slices are available. 31 | 32 | While arguably 7 is more than sufficient (and arguably in the modern era almost 33 | every uses the entire disk without slicing using GPT instead of FAT 34 | partitions), the current convention is still up to 16. Additionally 35 | minor numbers have been allocated to refer to the "whole disk", instead of 36 | making slice 2 "magical". 37 | 38 | There are a number of places (scattered throughout the driver stack, FMA, 39 | various user utilities and libraries) that have to cope with this dichotomy. 40 | Generally this is done via #ifdef sparc etc. In some cases, if neither 41 | sparc nor i386 is defined, then a compilation error is triggered. 42 | 43 | IPD 19 approved the sunset of SPARC altogether, and work is underway 44 | to remove it. 45 | 46 | ## Proposal 47 | 48 | We think it would be better, and easier for future platform porters, 49 | to just firmly adopt the conventions used on the x86 platform, and 50 | discard all support for legacy SPARC VTOCs. 51 | 52 | This means that ifdef's can be removed, and we can assume that the i386 53 | implementation for disk labeling is the only one we support. 54 | 55 | (As an aside, we hope new platforms will adopt GPT rather than FAT style 56 | partitioning.) 57 | -------------------------------------------------------------------------------- /ipd/0036/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Garrett D'Amore 3 | sponsor: 4 | state: draft 5 | --- 6 | 7 | # Rationalize $(MACH64) Command Paths 8 | 9 | ## TLDR (Abstract) 10 | 11 | This IPD provides guidance on the use of architecture specific 12 | paths in runtime search paths such as /usr/bin and library 13 | search paths such as /usr/lib. 14 | 15 | It helps establish a set of guidelines leading to more 16 | comprehensive support for 64-bit architectures. 17 | 18 | It also proposes that no new 32-bit architectures will be 19 | introduced to illumos. 20 | 21 | ## Goals 22 | 23 | The purpose of this IPD is to provide guidance for developers 24 | working on new platform ports, as well as for the eventual migration 25 | of much of our user space to being fully 64-bit compatible. 26 | 27 | A hard requirement is not to break binary compatibility for the 28 | large set of existing 32-bit binaries for i386. 29 | 30 | ## Background 31 | 32 | In the early days of Solaris, for a given system there was only 33 | one architecture suppored for a given system (either sparc or i386). 34 | 35 | When 64-bit support was introduced, it was introduced incrementally, 36 | such that somethings that needed to be 64-bit (to facilitate working 37 | with larger amounts of data in commands like "tar") were added, but 38 | 32-bit equivalents were left behind. 39 | These additions were made in a subdirectory called $(MACH64) -- 40 | sparcv9 for sparc, and amd64 for i386. 41 | 42 | Additionally, at the time, it was possible to select the architecture 43 | (i386 or amd64, and sparc or sparcv9) at boot time. 44 | 45 | For user commands, the decision was to generally prefer 64-bit versions 46 | over the 32-bit ones, when it was possible to do so. 47 | To facilitate this, those commands that were delivered as dual 48 | architecture commands had a 32-bit version delivered in a subdirectory 49 | called $(MACH) (i386 or sparc), and the parent (e.g. /usr/bin) 50 | directory had the program hardlinked to isaexec, which would choose 51 | the optimal version (from the subdirectory appropriate) and execute it. 52 | 53 | Additionally, to facilitate development of both 32 and 64-bit programs, 54 | as well as binary compatibility, shared libraries were organized along 55 | similar lines, except that the 32-bit versions were left behind in the 56 | parent (/usr/lib, etc.) in order to avoid breaking binary compatibility. 57 | 58 | This organization allowed coexistence of 32 and 64 bit binaries, but 59 | it complicated delivery of software, as well as rules for linking 60 | software, etc. 61 | 62 | Today, illumos does not supports only 64-bit kernels. 63 | It is unlikely that support for 32-bit mode kernel operation is likely 64 | to ever occur again. At present core illumos only supports amd64, 65 | although work is in progress to support other architectures, most 66 | notably aarch64. 67 | 68 | A number of user space components exist which are not today capable 69 | of running in 64-bit mode. We consider this a defect in those 70 | components. 71 | 72 | ## Proposal 73 | 74 | In recognition of the following: 75 | 76 | * No new 32-bit architectures will be introduced which support 32-bit mode execution. 77 | * No support for bi-architecture is likely to occur, and we should not attempt to facilitate it. 78 | * We should try to make the system easier to understand for administrators, users, and developers. 79 | 80 | we therefore propose: 81 | 82 | * "Deprecation" of /usr/bin/${MACH} and /usr/bin/${MACH64}, as well as /usr/lib/${MACH64} 83 | - this includes other prefixes such as /usr/platform, etc. as well as notionally equivalent 84 | directories like /usr/sbin. 85 | - new ports should refrain from introducing these directories 86 | * Packaging manifests should have their references to the above directories qualified with i386_ONLY. 87 | * For i386/amd64 only, the *LIBRARY* 64-bit may continue to be used for new deliveries, as existing 88 | search directories already exist. 89 | * For new 64-bit platforms, the following symbolic link should be installed /usr/lib/64 -> . 90 | - also for /usr/platform/*/lib, /lib, etc. 91 | - this allows Makefiles to still use /64 in link rules 92 | - it moves towards using simpler linker paths elsewhere 93 | * Distributions on amd64 MAY choose to dispense with 32-bit compatibility, and use the same appraoch 94 | discussed here for other architectures. This will come at an expense to binary compatibility with 95 | other i386 distributions. 96 | * All user-space commands and libraries should be made to function in 64-bit mode, treating any 97 | failure to do so as a bug. 98 | - this is necessary to support some new architectures 99 | - it may facilitate work towards Y2038 compliance (see [IPD 14](../0014/README.md)). 100 | * Commands which currently deliver executables into usr/bin/amd64 should, when the command is 101 | converted to 64-bit by default, leave behind a symbolic link in usr/bin/amd64 to help any 102 | scripts or users that have muscle memory tied to the 64-bit path. 103 | * Use of /usr/bin/i386 should be exceedingly rare. In general we would prefer that nothing 104 | deliver there, although there may be specific exceptional cases for it, such as for tools that 105 | have to be 32-bit to support the 32-bit environment (for example 32-bit mdb is required for 106 | full support when debuggin 32-bit binaries.) 107 | 108 | ## Future Directions 109 | 110 | It may be desirable in the future to relegate 32-bit libraries to a separate directory for legacy libraries, such as `/usr/lib/i386`. 111 | This can be done without breaking binary compatibility if the loader is modified to explicitly search 112 | these paths when resolving symbols for a 32-bit binary. 113 | 114 | At that time, it may be possible and desirable to move the contents of /usr/lib/amd64 to /usr/lib (and leave 115 | behind a symbolic link). 116 | 117 | ## Related Cases 118 | 119 | * [IPD 14 illumos and Y2038](../0014/README.md) 120 | * [IPD 19 Sunset SPARC](../0019/README.md) 121 | * [IPD 34 Rationalize Kernel Architecture Module Paths](../0034/README.md) 122 | -------------------------------------------------------------------------------- /ipd/0040/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Richard Lowe 3 | sponsor: 4 | state: draft 5 | --- 6 | 7 | # Cross compilation for illumos 8 | 9 | As part of the [ARM project](../0024/README.md) a cross compilation 10 | environment is necessary. This has also been mooted as a desirable step for 11 | restoring SPARC to life, or for any other future platform support in illumos. 12 | 13 | Here we attempt to describe what has been done as part of the ARM project, and 14 | any necessary future work, in terms separated from the specifics of that 15 | project. 16 | 17 | ## Goals 18 | 19 | We need to be able to build a full, valid, set of illumos packages on an 20 | illumos host of a different architecture than that which we target. 21 | 22 | ## Non-Goals 23 | 24 | Building on non-illumos hosts is a specific non-goal, but a future project is 25 | welcome (and able) to tackle that using this as a starting point. 26 | 27 | ## Theory 28 | 29 | ### Build system 30 | 31 | Within the illumos build system, we already refer to the target machine 32 | symbolically via the `$(MACH)` and `$(MACH64)` macros, which are used 33 | throughout to refer to the host or target machine architecture (`i386` and 34 | `amd64`, and previously `sparc` and `sparcv9` respectively). 35 | 36 | We separate this into macros used for the target -- `$(MACH)` and `$(MACH64)` 37 | as before -- and those for the host, `$(NATIVE_MACH)` and `$(NATIVE_MACH64)`. 38 | 39 | All software built for the host machine must now use the `NATIVE` prefixed 40 | macros (`NATIVECC` etc.) to do its work, and all native paths must be built in 41 | terms of `$(NATIVE_MACH)` etc where relevant. 42 | 43 | As an example, the tools install binary becomes 44 | `$(ONBLD_TOOLS)/bin/$(NATIVE_MACH)/install` as `$(MACH)` now exclusively 45 | refers to the target machine. 46 | 47 | This leaves the majority of the non-native build system alone, as `$(MACH)`, 48 | settings based on `$(MACH)` all continue to -- correctly -- refer to the 49 | target environment. 50 | 51 | ### `ADJUNCT_PROTO`, or sysroot 52 | 53 | We in illumos have the concept of an adjunct to the proto area where 54 | dependencies for the target system required by illumos but not part of illumos 55 | can be found. 56 | 57 | This unfortunately was done to solve two somewhat related but actually 58 | separate problems. 59 | 60 | 1. A desire to build in a logically cross-compilation environment where other 61 | software for the target system does not match that on the host. 62 | 2. A need, on SmartOS from where this work originated, to not rely on the 63 | contents of /usr which are read-only and derived from the boot media. 64 | 65 | We make the treatment of #1 more thorough, at the expense of making #2 66 | somewhat more problematic. 67 | 68 | The concept of the adjunct proto is extended to make it an actual sysroot, 69 | rather than just one in spirit. 70 | 71 | This means that (for builds targeting the target rather than the build 72 | system), `ADJUNCT_PROTO` is used fully instead of the root file system, rather 73 | than in addition to it. This fulfils both our goal of not using any 74 | build-system files for a target build (which would otherwise either fail 75 | mysteriously, or succeed erroneously), and one of the original goals of 76 | `ADJUNCT_PROTO`, that a native build in an adjunct environment is _really_ a 77 | cross compilation to a different system of the same ISA. 78 | 79 | The #2 use of the adjunct proto makes life more complicated. Theoretically, 80 | anything not in the (incomplete, SmartOS) adjunct will be found from the 81 | build proto area. Unfortunately this is not _quite_ true for complex reasons, 82 | and likely SmartOS will need to arrange to have, at least, the C runtime 83 | objects in their proto area. It would be better, both from a correctness and 84 | a maintenance standpoint, for SmartOS to switch to a complete sysroot, but I 85 | understand that their build system does not allow for this in practice. 86 | 87 | We have elected to solve the larger problems thoroughly, and trust in the 88 | SmartOS maintainers to fix the problems specific to their system. 89 | 90 | It is envisioned that in the majority of situations a suitable system root can 91 | be constructed via the operating system packaging facilities installing into 92 | an alternate root directory. 93 | 94 | Using the image packaging system as an example one can `pkg image-create` a 95 | zone image, and install precisely that software into it that is suitable for 96 | the target machine. Archives of these images can be distributed to end-users 97 | or other build machines, either in the form of tarballs, package `.p5p` 98 | archives, etc. 99 | 100 | Other systems could use their native package format to do the equivalent, or 101 | otherwise use the root filesystem image output from an appliance build 102 | process. 103 | 104 | ### Tools 105 | 106 | We will require all our build tools to be capable of operating in a cross 107 | environment, ideally without further help (such as is the case with `ld(1)`, 108 | etc.), at worst the addition of a `--target` type argument. 109 | 110 | Tools such as `cw(1ONBLD)` have been modified to remove all their 111 | target-specific knowledge, other tools are innately ok, some tools, such as 112 | `dtrace(8)` and `elfwrap(1)` require future work. 113 | 114 | ## Operation 115 | 116 | A new `-T` flag is added to `nightly(1ONBLD)` and `bldenv(1ONBLD)` allowing the 117 | specification of the machine the build is to target, it is envisioned that 118 | environment files will be specified such as to be correct regardless of 119 | machine by overriding the `$(MACH)`-prefixed variables rather than their uses. 120 | For instance, one would set `i386_PRIMARY_CC` and `aarch64_PRIMARY_CC` not 121 | `PRIMARY_CC`. 122 | 123 | `nightly` and `bldenv` are further adjusted to make clear what is being built, 124 | with `bldenv` saying: 125 | 126 | ``` 127 | Build type is aarch64/DEBUG (cross) 128 | VERSION is arm64/pkgdepend-0-g18528b4d131 129 | RELEASE_DATE is April 2023 130 | ``` 131 | 132 | For a cross build from i386 to aarch64, for example, and nightly's output 133 | including the target in its header lines 134 | 135 | ``` 136 | ==== Build errors (aarch64/DEBUG) ==== 137 | ``` 138 | -------------------------------------------------------------------------------- /ipd/0041/README.adoc: -------------------------------------------------------------------------------- 1 | :showtitle: 2 | :toc: left 3 | :numbered: 4 | :icons: font 5 | :state: published 6 | :revremark: State: {state} 7 | :authors: Robert Mustacchi 8 | :sponsor: Rich Lowe 9 | 10 | = IPD 41 Improving PCI devinfo Naming and Future Platforms 11 | {authors} 12 | 13 | [cols="3"] 14 | |=== 15 | |Authors: {author} 16 | |Sponsor: {sponsor} 17 | |State: {state} 18 | |=== 19 | 20 | https://github.com/illumos/ipd/blob/master/ipd/0009/README.md[IPD 9 PCI 21 | Alias Disambiguation] laid out the path for what we should do to try to 22 | solve a thorny ID problem. However there are two additional things that 23 | we need to deal with: 24 | 25 | 1. Type 1 PCI devices do not have subsystems in place as part of their 26 | compatible properties. 27 | 2. What should the set of PCI aliases, node names, and compatible 28 | properties be for future platforms and how do we best harmonize this 29 | with what x86 has done today. 30 | 31 | We have subsystem IDs for everything today other than bridges. While 32 | attempting to put together a fix for issues like 33 | https://www.illumos.org/issues/15587[15587 PCI bridge subsystem IDs are 34 | ignored], the history and nuance made this a little bit trickier and 35 | means that some of the straightforward paths are not options. This 36 | IPD suggests what the path forward on x86 should be and then concludes 37 | with what should be done on new platforms. 38 | 39 | == Summary of Proposed Changes 40 | 41 | * PCI bridges will now always populate the `subsystem-vendor-id` and 42 | `subsystem-id` properties if present. 43 | * x86 PCI Bridges **will not** change their devinfo node name to the 44 | subsystem ID. New platforms will use the subsystem ID if present. 45 | * New platforms (i.e. ARM, RISC-V, etc.) will name nodes either `pciex` 46 | or `pci` depending on whether or not they are actually PCI Express or 47 | traditional PCI devices. 48 | * Bridges will only included the unambiguous subsystem ID PCI alias (e.g. 49 | `pci1234,5678,s`). New platforms will not include the ambiguous form 50 | of primary or subsystem IDs for any devices. 51 | * New Platforms will continue to follow the IPD 9 suggestion of not 52 | including PCI aliases for PCI Express devices. 53 | * Logic to set the compatible aliases, node names, and related will be 54 | shared across all platforms and will no longer be duplicated in both 55 | the boot and hotplug paths. 56 | 57 | == Subsystems for PCI Bridges 58 | 59 | PCI devices have multiple different types of headers that are found in 60 | configuration space. These are called Type-x where x is the value of the 61 | field. There are three such headers defined right now: 62 | 63 | * Type 0 headers: These are used by almost all PCI and PCIe devices 64 | other than bridges. 65 | * Type 1 headers: These are used for PCI-PCI bridges. 66 | * Type 2 headers: These are used by Cardbus (and we will ignore for the 67 | rest of this IPD) 68 | 69 | In the initial 0x40 byte header there are a number of fields that are 70 | the same between the different PCI headers. These include things like 71 | the device ID, vendor ID, class code, interrupt pin, and capabilities 72 | pointer. However, there are many properties that are different. For 73 | example the layout of base address registers is one major case. Most 74 | relevant for this IPD is that of the PCI subsystem IDs. 75 | 76 | While Type 0 and Type 2 headers have defined subsystem IDs in this part 77 | of the space, Type 1 headers do not. Instead, for type 1 headers there 78 | is an optional PCI capability for a subsystem ID. This capability, with 79 | code `0xd`, is found in the traditional PCI configuration space and is 80 | not part of extended configuration space. 81 | 82 | == devinfo tree node names 83 | 84 | PCI and PCI express nodes use two different pieces to try and construct 85 | the node name that is used in the devinfo tree. The node name is fairly 86 | important as this is what we use for the name in the actual `/devices` 87 | file system. 88 | 89 | While it is possible for `/devices` paths to change, there is a general 90 | expectation that some things have some amount of consistency. The most 91 | notable piece of this is due to how ZFS encodes information about how to 92 | boot systems. Here, `/devices` paths are used as part of booting in 93 | particular on i86pc based systems. This is most notable through changes 94 | like https://illumos.org/issues/7119[7119 boot should handle change in 95 | physical path to ZFS root devices]. While 7119 did help for some 96 | devices, it does not help for everything. This is worth noting as we 97 | delve further. 98 | 99 | A second example of this is with FMA's retire store and faulty devices. 100 | The retire store is populated based on the /devices path of entries and 101 | that makes it into persistent files. The retire store is more in the 102 | case of something like `/etc/path_to_inst` where changes can be dealt 103 | with though they have some side effects that can be dealt with. While 104 | the retire store unretiring a device is not as bad (but not great), 105 | having `/etc/path_to_inst` change does have an impact on instance naming 106 | which in turn can impact networking configuration. 107 | 108 | This is structured of the form `,`. IDs 0 and 1 are 109 | the subsystem vendor ID and subsystem ID if both are valid, otherwise 110 | they are the primary vendor and device ID. This was the same on both 111 | SPARC and x86. The biggest difference between them has been the prefix. 112 | On x86 the prefix is always `pci`; however, on SPARC the prefix was 113 | either `pciex` or `pci`, depending on whether or not the device was a 114 | PCI Express device. 115 | 116 | The reason that this is problematic for bridges is that neither SPARC or 117 | x86 ever did look for the subsystem IDs for bridges (both platforms 118 | `pcicfg_set_childnode_props` in the hotplug path just assume Type 0 119 | headers, though it is possible the PROM did something here). If we just 120 | properly found the subsystem ID and just used that blindly, then we 121 | would end up changing the `/devices` paths of everything and that would 122 | lead to boot failures for a number of different configurations. This 123 | means that the simple path of just using the new device ID isn't what we 124 | should be doing. 125 | 126 | When faced with the stark challenge of breaking most installs or not, the 127 | answer is quite simple: do not. This means that we cannot change the IDs 128 | used on x86 for bridges even if a subsystem exists. We can still set the 129 | various devinfo properties such as `subsystem-vendor-id` and 130 | `subsystem-id`. 131 | 132 | For non-x86 platforms that are new and therefore not constrained with 133 | this compatibility problem, we will move forward such that the prefix 134 | part of the node name is either `pciex` or `pci` depending on whether or 135 | not the device is a PCI express device or not. In addition, we propose 136 | that they use the subsystem ID when present for bridges, eliminating the 137 | distinction between Type 0 and Type 1 systems. 138 | 139 | === 1275 Generic Names 140 | 141 | SPARC opted to use generic names for different devices based on 142 | the class code. So rather than naming a device `pci8086,10de` for the 143 | Intel 82574L, it would instead name itself `ethernet`. This has 144 | traditionally been controlled by definitions in `sys/isa_defs.h`. x86 145 | has always defined `_DONT_USE_1275_GENERIC_NAMES` which stops this 146 | behavior. With SPARC no longer being supported, this will be removed 147 | entirely and new platforms will not try to use 1275 generic names at 148 | this time. 149 | 150 | The main reason for this is that this list cannot easily be added to. 151 | For example, the above lists never had support for the NVMe device 152 | class. Once support for the device is added, we really should avoid 153 | trying to change its name in `/devices`. This is not a hard constraint 154 | to say that node names cannot change, but rather based on some of the 155 | challenges with the structuring of booting with ZFS and others, we 156 | basically view it as not being worth the benefit. `/devices` isn't 157 | really meant as an interface for humans, but rather is for the system 158 | itself. 159 | 160 | == Bridge Compatible IDs 161 | 162 | The `compatible` property for such devices is discussed at length in 163 | https://github.com/illumos/ipd/blob/master/ipd/0009/README.md[IPD 9 PCI 164 | Alias Disambiguation]. The main focus of that effort was the addition of 165 | the suffixed versions of non-fully qualified PCI IDs where we used `,p` 166 | and `,s` to indicate if an ID was the primary or subsystem IDs. 167 | 168 | Because x86 has never exposed the ambiguous versions of the bridge 169 | subsystem ID, we propose that we **do not** include it here. While this 170 | is a slight difference from other x86 devices, it ultimately takes us 171 | more down the path that we want to be in and also provides a layer of 172 | safety. Issues like the miss-programmed device in 173 | https://www.illumos.org/issues/11610[11610 PCI ID ambiguity leads to 174 | driver induced mayhem] may be lurking and given the prominence of 175 | PCI-PCI bridges, the use of the preferred form will provide us a degree 176 | of safety. 177 | 178 | Due to the fact that bridges have always included the primary 179 | vendor/device ID alias, we cannot get rid of that for x86 bridges; 180 | however, for new platforms, we should not include the ambiguous device 181 | IDs at all and only use the `,p` and `,s` versions. 182 | 183 | New platforms have a little bit more freedom in this space. IPD 9 184 | already proposed that we do not include PCI aliases for PCIe devices. 185 | However, for PCI devices that we encounter we should ask what subset of 186 | IDs to include are. It is tempting to follow suite and eliminate the 187 | non-fully qualified subsystem IDs entirely, but we currently suggest 188 | that we include the suffixed versions. 189 | 190 | In summary, this means that PCI ID aliases will be ordered as: 191 | 192 | . pci,... 193 | . pci,.. 194 | . pci.,s 195 | . pci. (x86 only) 196 | . pci,. 197 | . pci,,p 198 | . pci, (x86 only) 199 | . pciclass, 200 | . pciclass, 201 | 202 | == pcieadm enhancements 203 | 204 | Along with this work, we will enhance pcieadm show-devs with the 205 | following top-level fields: 206 | 207 | * `SVID`: Subsystem Vendor ID 208 | * `SSID`: Subsystem ID 209 | * `SUBSYSTEM`: The string form of the subsystem from the PCI IDs 210 | database 211 | 212 | == Centralizing Logic 213 | 214 | One last part of this is that we really should clean up the per-platform 215 | nature of this. Both x86 and SPARC had separate copies of all the logic 216 | to set basic devinfo properties on PCI devices in the boot path. Even 217 | worse, x86 has different copies with slightly different behavior in the 218 | boot and hotplug case. The hotplug case missed the original IPD 9 219 | efforts as a result. 220 | 221 | Rather than continuing to have this copied and pasted around the gate, 222 | we should instead have a single set of logic for setting this up which 223 | can encode these rules for future platforms now. Work on future 224 | platforms is allowed to modify these decisions based on the reality on 225 | the ground as the port is being done, but it is our hope that this 226 | simplifies the effort. 227 | 228 | The initial location of this will be the `pcie` module. This is being 229 | chosen mostly out of practicality. The two current drivers of this logic 230 | are the `pcicfg` module and the `pci_autoconfig` module on x86. Both of 231 | these depend on the `pcie` module. While a bit surprising, the `pci` 232 | module is not used as part of this process unless something else ends up 233 | causing it to be loaded. The `pci` module is actually a nexus driver 234 | whose PCI Express equivalent is `npe`. 235 | -------------------------------------------------------------------------------- /ipd/0042/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | authors: Peter Tribble 3 | sponsors: 4 | state: draft 5 | --- 6 | 7 | # IPD 42 Sunset native printing 8 | 9 | ## Introduction 10 | 11 | The native print system in illumos has received essentially no attention 12 | throughout the lifetime of the project. Distributions, by and large, 13 | ship CUPS - a modern, supported print system that is also widely used 14 | elsewhere. 15 | 16 | At this time the native printing components are dead weight, and are 17 | potentially blockers for other projects, such as 18 | 19 | * [IPD 14 illumos and Y2038](../0014/README.md) 20 | * [IPD 24 Support for 64-bit ARM](../0024/README.md) 21 | 22 | largely due to the current implementation being purely 32-bit. 23 | 24 | The actively supported printing consumer in illumos is the smb stack, which 25 | uses CUPS. 26 | 27 | A combination of lack of use, lack of maintenance, no 64-bit code, and the 28 | presence of a superior alternative, suggests that removal of our implementation 29 | would be beneficial. 30 | 31 | ## Current implementation and transition 32 | 33 | The current printing implementation foresaw the elimination of the legacy 34 | lp stack and its replacement by CUPS. A `print-service` command is installed 35 | in /usr/sbin, and the user invoked commands are symbolic links to that binary. 36 | The binary then invokes either the legacy command (installed under /usr/lib/lp) 37 | or the CUPS variant (expected to be installed under /usr/lib/cups). 38 | 39 | Invoking the `print-service` command directly allows an administrator to 40 | switch between different implementations of the print service. 41 | 42 | Ultimately, this implies that removing the legacy print service is a flag 43 | day for any distribution using this mechanism. An investigation of the current 44 | state of distribution printing indicates that: 45 | 46 | * OpenIndiana uses the print-service mechanism and would need to rebuild 47 | CUPS to install directly under /usr after the native lp print system was 48 | removed 49 | * OmniOS ships CUPS separately as part of omnios-extra, but installs it in a 50 | non-conflicting path 51 | * SmartOS does not ship a printing system at all, but CUPS is available 52 | from pkgsrc 53 | * Tribblix does not use the print-service mechanism and ships CUPS in the 54 | regular path; in the next release the native print system will not even be 55 | available as an option 56 | 57 | A possibility would be to ship just the print-service wrapper, but default 58 | it to CUPS. 59 | 60 | ## Packages 61 | 62 | The following packages (under usr/src/pkg/manifests) would be affected. All 63 | content would be removed and the packages marked obsolete. 64 | 65 | * print-lp-compatibility-sunos4.p5m 66 | * library-print-open-printing-ipp.p5m 67 | * library-print-open-printing-lpd.p5m 68 | * library-print-open-printing.p5m 69 | 70 | This is libpapi. There's also usr/src/man/man3/Intro.3 and 71 | usr/src/man/man3lib/libpapi.3lib. 72 | 73 | * print-lp-filter-postscript-lp-filter.p5m 74 | * print-lp-ipp-ipp-listener.p5m 75 | * print-lp-ipp-libipp.p5m 76 | * print-lp-print-client-commands.p5m 77 | * print-lp-print-manager-legacy.p5m 78 | * print-lp.p5m 79 | 80 | There are some packages that contain print-related files: 81 | 82 | * compatibility-ucb.p5m 83 | 84 | This has the man pages for the print utilities shipped in 85 | print-lp-compatibility-sunos4.p5m. 86 | 87 | * consolidation-osnet-osnet-message-files.p5m 88 | * system-trusted.p5m 89 | 90 | There are html files really associated with trusted. Given that the 91 | auths are only implemented for our print system, not CUPS, we should 92 | probably remove these and at least the reference to the html help from 93 | auth_attr, if not the actual auths themselves. 94 | 95 | There are also print files associated with putting labels on printed 96 | files. 97 | 98 | ## Source code 99 | 100 | This implies the removal of 101 | 102 | * usr/src/cmd/print 103 | * usr/src/cmd/lp 104 | * usr/src/lib/print 105 | 106 | And man pages 107 | 108 | * cancel.1 109 | * download.1 110 | * dpost.1 111 | * enable.1 112 | * lp.1 113 | * lpstat.1 114 | * postio.1 115 | * postprint.1 116 | * postreverse.1 117 | 118 | * lpc.1b 119 | * lpq.1b 120 | * lpr.1b 121 | * lprm.1b 122 | * lptest.1b 123 | 124 | * printers.5 125 | 126 | * accept.8 127 | * lpadmin.8 128 | * lpfilter.8 129 | * lpforms.8 130 | * lpget.8 131 | * lpmove.8 132 | * lpsched.8 133 | * lpset.8 134 | * lpshut.8 135 | * lpsystem.8 136 | * lpusers.8 137 | 138 | And references exist in 139 | 140 | * nsswitch.conf.5 141 | 142 | ## Existing bugs 143 | 144 | There are a couple of - very old - existing bugs that suggest the removal of 145 | the native printing stack 146 | 147 | * [1229 EOF SVr4 print support](https://www.illumos.org/issues/1229) 148 | * [2837 remove print/lp* from gate and use CUPS from userland](https://www.illumos.org/issues/2837) 149 | 150 | And some preparatory work has already removed the old java printmgr gui 151 | 152 | * [13180 Remove printmgr, as it doesn't work with any current java](https://www.illumos.org/issues/13180) 153 | 154 | ## Open Questions 155 | 156 | Should printer support be removed from nsswitch? 157 | -------------------------------------------------------------------------------- /ipd/0048/README.adoc: -------------------------------------------------------------------------------- 1 | :showtitle: 2 | :toc: left 3 | :numbered: 4 | :icons: font 5 | :state: predraft 6 | :revremark: State: {state} 7 | :authors: Bill Sommerfeld 8 | :sponsor: 9 | :source-highlighter: pygments 10 | :stem: latexmath 11 | ifdef::env-github[] 12 | :tip-caption: :bulb: 13 | :note-caption: :information_source: 14 | :important-caption: :heavy_exclamation_mark: 15 | :caution-caption: :fire: 16 | :warning-caption: :warning: 17 | endif::[] 18 | 19 | = IPD 48 Improving Illumos on IPv6-primary and IPv6-only networks 20 | {authors} 21 | 22 | == Introduction 23 | 24 | Many network providers are moving to a world where IPv6 is the 25 | preferred network layer protocol - including charging extra for IPv4 26 | addresses. 27 | 28 | Illumos is missing a few functional pieces that complicate its 29 | installation and use on an IPv6-only network. 30 | 31 | NOTE: in the predraft state this is something of a 32 | not-entirely-coherent laundry list of issues I've noticed and which 33 | I've started to work on. Future updates will include specific 34 | proposals. 35 | 36 | == Configuring autoconfiguration preferences 37 | 38 | Some autoconfigured parameters learned from an interface (notably 39 | default route and DNS client configuration) may have system-global 40 | impact; there should be a way for an administrator to control which 41 | interfaces can be used as source for these parameters. 42 | 43 | == DNS resolver autoconfiguration 44 | 45 | === DNS configuration via DHCPv6 46 | 47 | There is currently no way for DNS configuration to make its way from 48 | the DHCP client to /etc/resolv.conf or other resolver configuration. 49 | 50 | === DNS configuration from Router Advertisement options 51 | 52 | As with DHCPv6, there is currently no way for DNS configuration to 53 | make its way from ndpd client to /etc/resolv.conf or other resolver 54 | configuration. 55 | 56 | == ndpd.conf vs ipadm 57 | 58 | The in.ndpd daemon draws its configuration mainly from 59 | `/etc/inet/ndpd.conf` but can pull some configuration from ipadm via 60 | ipmgmtd; we should try to move more of this -- perhaps all of it -- 61 | into ipadm. 62 | 63 | == DHCPv6 prefix delegation 64 | 65 | == Site-local addresses in documentation 66 | 67 | We have far too many examples using the long-deprecated site-local 68 | addresses in `fec0::/10`; these should generally change to use ULA 69 | (unique local addresses) in `fd00::/8`. 70 | 71 | == ipadm irregularities 72 | 73 | === addrprop disabled for addrconf addresses 74 | 75 | 76 | 77 | === deprecated addrconf addresses get lost 78 | 79 | If a router advertisement is deprecated, the addresses shown in `ipadm 80 | show-addr` output are shown as `intf0/??` instead of with the name of 81 | their associated address object. 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /ipd/0050/README.adoc: -------------------------------------------------------------------------------- 1 | :showtitle: 2 | :toc: left 3 | :numbered: 4 | :icons: font 5 | :state: predraft 6 | :revremark: State: {state} 7 | :authors: Dan McDonald , Josh Clulow 8 | :sponsor: 9 | 10 | = IPD 50 ZFS Maintenance and Consumption of OpenZFS Technology 11 | {authors} 12 | 13 | [cols="3"] 14 | |=== 15 | |Authors: {author} 16 | |Sponsor: {sponsor} 17 | |State: {state} 18 | |=== 19 | 20 | The Zettabyte File System (ZFS) is core technology for illumos, and its 21 | flagship file system. Until 2019, illumos ZFS was the upstream for the 22 | OpenZFS community as well. The file system is one of the most important 23 | things an operating system provides as a layer of infrastructure. illumos 24 | maintains ZFS as first party code, in keeping with all of our other standards 25 | and practices. 26 | 27 | In the past six years, illumos ZFS has incorporated features from the current 28 | OpenZFS project. Not all have been successfully incorportated. The purpose 29 | of this RFD is to restate our commitment to ZFS in illumos being first-party 30 | code, what we learned from past consumption of OpenZFS technology, and to lay 31 | out policies and strategies for incorporating OpenZFS technology into illumos 32 | ZFS going forward. 33 | 34 | == Background 35 | 36 | The history of illumos as a still-open fork of Sun Microsystems' OpenSolaris 37 | is well-documented[https://illumos.org/docs/about/history/]. As part of 38 | illumos, the original ZFS implementation continued to be open-source even 39 | after Oracle closed Solaris in August, 2011. Not long after that, other 40 | operating systems began porting ZFS to their own platforms. FreeBSD took 41 | illumos to be their upstream, and the ZFS on Linux (commonly abbreviated as 42 | ZoL) project forked off illumos ZFS for porting into Linux. 43 | 44 | In 2013, the OpenZFS project chartered. Its official upstream was illumos 45 | ZFS, and it included all ZFS ports in its community. This continued for a 46 | number of years. During that time, ZoL moved forward at a more aggressive 47 | pace of development. In 2019, ZoL merged in to the OpenZFS downstream and 48 | OpenZFS became a completely independent project. 49 | 50 | == Lessons Learned and Ongoing illumos ZFS Policy 51 | 52 | A primary goal of illumos is reliability: both the integrity of data stored 53 | by users, and the availability of that data on running systems. Performance 54 | and new features are a secondary goal. When illumos takes patches from 55 | working branches or from forks, the patches need to be reviewed locally by 56 | our community, and the illumos core team reserves the right to modify them to 57 | meet local standards. All changes must be tested in their final form; it's 58 | good to highlight testing from other projects in your notes, but it is 59 | generally not sufficient on its own. 60 | 61 | This primary goal has occasionally come into conflict with desired technology 62 | in OpenZFS. Consuming such technology usually resolves these conflicts merely 63 | by further testing, or by changes which illumos will offer back to OpenZFS. A 64 | key to making sure this works is for illumos to ensure that the OpenZFS 65 | technology being consumed is, per earlier, in its final form. That has 66 | sometimes not been the case. 67 | 68 | `XXX KEBE SAYS INSERT ZFS CRYPTO TEXT HERE` 69 | 70 | [Possible jbk fix]https://github.com/jasonbking/illumos-gate/blob/zfs-crypto-dnode/usr/src/uts/common/fs/zfs/sys/dmu_objset.h#L70-L86 71 | 72 | 73 | `XXX KEBE SAYS INSERT ALEX-WILSON-INSPIRED COMPRESSED-ARC SITUATION` 74 | 75 | 76 | == Interoperability 77 | 78 | A possible concern for illumos ZFS is interoperability with OpenZFS and with 79 | Oracle Solaris, especially in the sending and receipt of ZFS send streams, as 80 | well as whole-pool import or export. While not always practical, using send 81 | streams or importing or exporting whole pool could be more efficient than 82 | using file-level primitives for data transfer. 83 | 84 | With respect to Oracle Solaris, the send-stream advice is simply to make sure 85 | the sending dataset was generated from a SPA version of 28 or less, and a and 86 | ZFS Posix Layer (ZPL) version that was equal to or less than 5 on both 87 | illumos and Oracle Solaris. The same versioning restrictions apply to the 88 | import and export of pools. 89 | 90 | OpenZFS introduce the concept of feature-flags, and SPA version 5000 91 | indicates that pool feature-flags are present, and should be examined. 92 | 93 | `XXX KEBE SAYS A LOT MORE NEEDS TO GO HERE... or is it its own IPD?` 94 | 95 | === On illumos Consuming OpenZFS. 96 | 97 | ``` 98 | XXX KEBE SAYS this section was inspired by discussions with Toomas Soome, 99 | and will need further clarification and expansion. 100 | ``` 101 | 102 | IF illumos decides, ala FreeBSD, to completely consume a 103 | named-release[https://github.com/openzfs/zfs/releases] of OpenZFS, it would 104 | have to be a large-project integration, similar to, and larger in scope than, 105 | something like [NFS server in a 106 | zone.]https://github.com/illumos/ipd/blob/master/ipd/0011/README.md 107 | 108 | To that end, ANY experimentation with a pull of OpenZFS should be performed 109 | in a downstream repo from illumos-gate, and maybe if it is sufficiently 110 | stable, an `openzfs-X.Y` which appears to be the unit of tracking granularity 111 | among OpenZFS releases (2.1, 2.2, 2.3) may be the best way to differentiate 112 | the stable base illumos ZFS from any works-in-progress to bring in an OpenZFS 113 | release. IF the illumos main/master branch accepts an OpenZFS release in the 114 | future, a similar process would ensue for any version increase in either of X 115 | or Y. 116 | 117 | == The illumos Policy for Consuming OpenZFS Technology 118 | 119 | -------------------------------------------------------------------------------- /ipd/0051/README.adoc: -------------------------------------------------------------------------------- 1 | :showtitle: 2 | :toc: left 3 | :numbered: 4 | :icons: font 5 | :state: published 6 | :revremark: State: {state} 7 | :authors: Robert Mustacchi 8 | :sponsor: Joshua M. Clulow 9 | 10 | = IPD 51 Time Zone Information Maintenance 11 | {authors} 12 | 13 | [cols="3"] 14 | |=== 15 | |Authors: {author} 16 | |Sponsor: {sponsor} 17 | |State: {state} 18 | |=== 19 | 20 | This IPD goes into the background of how time zone information is stored 21 | and maintained in illumos and the shared IANA database. This IPD 22 | covers at a high-level: 23 | 24 | * Guidelines for how to perform updates to time zone code 25 | ** The existing time zone data update path is not really changed 26 | * Future directions for improving the state of time zone interfaces and 27 | functionality 28 | * Clarifications to the stability of zic(8) and zdump(8) 29 | 30 | == Background 31 | 32 | Time zone information describes information about the messy reality of 33 | time zones, when time shifts for things such as daylight savings time, 34 | and all the shifts that have happened over the decades. This information 35 | is currently maintained by https://www.iana.org/time-zones[IANA] and has 36 | been called the Olson database and zoneinfo. The collection of 37 | information published by IANA can be broken down into three logical 38 | collections: 39 | 40 | . There is the time zone database information itself. This contains the 41 | information about the different time zones that exist, when changes to 42 | them occur, the various areas and locations they are broken into, etc. 43 | In illumos this information is generally kept in 44 | `usr/src/data/zoneinfo`. This information is generally distributed by 45 | IANA as the tzdata package. 46 | 47 | . There are the tools that are used to parse and dump the time zone 48 | information itself. These are the time zone compiler 49 | https://illumos.org/man/8/zic[zic(8)] and the time zone dumper 50 | https://illumos.org/man/8/zdump[zdump(8)]. This is generally distributed 51 | as part of the tzcode package. In illumos these are found in 52 | `usr/src/cmd/zic` and `usr/src/cmd/zdump`. 53 | 54 | . The last bit that IANA distributes is code that represents the common 55 | standard C and POSIX interfaces such as 56 | https://illumos.org/man/3C/localtime[localtime(3C)]. Our code for these 57 | implementations is derived from it, but it is not used verbatim. This is 58 | different from `zic` and `zdump` where we more or less use the upstream 59 | code more or less as is (with some minor reorganization between 60 | `private.h` and `tzfile.h`). These contents are also part of the 61 | tzcode package. 62 | 63 | Separately from this, illumos has libzoneinfo, which provides a set of 64 | private interfaces that are used to get access to the zone information 65 | database and information about the various areas, countries, and time 66 | zones within those. 67 | 68 | === Maintenance in illumos 69 | 70 | To date, illumos has been regularly updating the time zone information 71 | portion of this. This process has been relatively streamlined with 72 | documentation in-tree for how to proceed and how to update the various 73 | packaging. Here's a link to the current (snapshoted at the point of 74 | writing) version of 75 | https://github.com/illumos/illumos-gate/blob/a5b7f75fd1ed126068f42cd7ee368ce34c95973f/usr/src/data/zoneinfo/README.illumos[README.illumos] 76 | which describes how this process has been performed. In general, this 77 | IPD does not propose much of any changes to this part of the process. It 78 | has been working relatively well, but starting with 2025a requires 79 | updated tools. 80 | 81 | The tools in particular have not been updated since well before illumos 82 | was forked from OpenSolaris, except for a minor update in the form of 83 | https://www.illumos.org/issues/6869[#6869 Update zdump to better-handle 84 | POSIX timezones]. In general folks have been focused on keeping the zone 85 | information data up to date. 86 | 87 | === Compiled Zone Information Files 88 | 89 | The compiled form of the timezone information database has been 90 | documented in a series of RFCs. The latest of this writing is 91 | https://www.rfc-editor.org/rfc/rfc9636.html[RFC 9636], which documents 92 | the v4 format. The database files themselves are backwards compatible, 93 | which is an important property. A reader that only knows about v1 can 94 | still read a v2-v4 file and will get a limited amount of information 95 | with some caveats. 96 | 97 | These files all live `/usr/share/lib/zoneinfo` on illumos. Importantly, 98 | the fact that these files are compliant to the RFC is important. Other 99 | software such as golang will use the system files, but implement their 100 | own readers. This means that we cannot arbitrarily change the format of 101 | these files; however, within illumos itself, the only thing that should 102 | ever be parsing them is libc and zdump. 103 | 104 | == Updating tzcode tools 105 | 106 | When updating the tzcode, we focus first on the tools. This requires 107 | updating zic, zdump, and the related pieces. Here are basic properties 108 | for how to perform these updates. Note, we should always update zdump 109 | and zic together. We should not update them separately. 110 | 111 | * There is only one system copy of `private.h`, `tzfile.h`, `tzdir.h`, 112 | and `version.h`. `tzfile.h` is found in `usr/src/head`. The rest of 113 | these are found in `usr/src/cmd/zic`. Both the tools zic build and 114 | zdump point to these copies. There should not be any duplicates in 115 | tree. 116 | 117 | * Update the `TZVERSION` in `usr/src/cmd/zic/version.h` to refer to the 118 | version of the upstream code. Do not change the path to reporting 119 | bugs. 120 | 121 | * The upstream `zic.c` and `zdump.c` files should be able to be used 122 | directly and copied in. The only difference we may need is an explicit 123 | inclusion of `tzfile.h` in zdump.c due to the changes that we perform 124 | to tzfile.h. 125 | 126 | * In the upstream `private.h` there is a section that begins with the 127 | comment `Handy macros that are independent of tzfile implementation.`. 128 | These macros are used by libc and a few other pieces. These should be 129 | removed from `private.h` and placed in `tzfile.h` for the time being. 130 | Otherwise this file can more or less be taken verbatim. 131 | 132 | * Manually diff what has changed in `tzfile.h` using a tool like 133 | `vimdiff` or similar. 134 | 135 | The remaining portion after this is testing related. See the later 136 | section on testing in this document. Manual pages should generally not 137 | be taken verbatim, but changes should be merged in in a way that follows 138 | the existing consistency of illumos manual pages. 139 | 140 | Future work will evaluate changes required to libc and how those should 141 | be incorporated. In general, we expect those to be less frequent than 142 | updates to these files and recommend that that be on a more ad-hoc 143 | basis. 144 | 145 | Finally, it's worth noting that it generally is going to be worth 146 | updating the tzcode portions separately from the tzdata portions as that 147 | allows easier validation in both directions. 148 | 149 | == Unpackaging `tzfile.h` 150 | 151 | `tzfile.h` is meant to be a private file that describes how to parse a 152 | large degree of the database and related internal pieces that are 153 | shared. The file itself has a note that asks it not to be present in the 154 | system include directory. Today it lives in `usr/src/head` and has been 155 | incorrectly shipped for quite some time! 156 | 157 | While it's useful to have this file be in the proto area for building 158 | purposes, it should not be packaged and shipped out. We should remove 159 | this to continue to abide by the request to not copy it, which makes 160 | sense given it's all about internal implementation details. 161 | 162 | Concretely to validate this we will work with the community to perform 163 | larger packaging builds. 164 | 165 | == zic(8) and zdump(8) stability 166 | 167 | Currently zic(8) and zdump(8) are marked as committed interfaces. In 168 | general, we should consider that we are no longer maintaining these 169 | commands directly (nor where we ever) and should likely admit that they 170 | will follow upstream's stability, which generally is a stable interface 171 | except when something is marked as experimental. 172 | 173 | Here we propose that we update the language in the manual to reflect 174 | this. If there are breaking changes, we should carefully consider them 175 | and take care to minimize user impact. 176 | 177 | == Testing Time Zone Changes 178 | 179 | An important thing here is performing testing for changes. When changing 180 | the code, whether the tools or libc portions, we believe it's important 181 | to do the following: 182 | 183 | * Confirm that none of the contents of libzoneinfo have changed before 184 | and after this change. They should only ever change if the data has 185 | changed. 186 | 187 | * Confirm how old and new zdump parse the same data. Specifically this 188 | is comparing for each time zone: 189 | ** How does an old and new zdump print output from data compiled from 190 | the old zic. 191 | ** Hows the old zdump against the old zic output compare to the data 192 | compiled from new zic and printed with a new zdump. 193 | 194 | * How does libc's internal reader state change. This will be less useful 195 | when we're updating libc, but in all other cases it should be the same 196 | for all time zones before and after absent a specific bug that is 197 | being fixed. 198 | 199 | * Testing third-party readers such as `golang` to verify that they read 200 | updated data correctly. 201 | 202 | To facilitate this, we propose the addition of a new test suite tz-tests 203 | that installs like the others into `/opt/tz-tests` with a package of 204 | `system/test/tztest`. Unlike other test suites there will be no default 205 | Runfile. These are mostly a series of utilities to help facilitate 206 | running and validating the above. 207 | 208 | For testing changes to the time zone data, the libzoneinfo related 209 | pieces of the above test suite will be useful. Otherwise, the most 210 | useful thing to do is to test several of the time change that are going 211 | on manually be manipulating the `TZ` environment variable and using 212 | date(1) or other utilities to print times in the future and past around 213 | the DST-like changes that are occurring in that zone. 214 | 215 | == Future Directions 216 | 217 | There is a large degree of future work that this IPD proposes that we 218 | should investigate. This is broken into a few different concrete pieces 219 | that can proceed in parallel. 220 | 221 | === Version 2 TZif and Beyond 222 | 223 | Right now illumos libc only reads the TZif version 1 file format. This 224 | should be updated to cover versions two and beyond. As part of this, we 225 | should be able to at least in an LP64 environment represent and deal 226 | with 64-bit `time_t`. The question of what to do with 32-bit compilation 227 | environments is left to the broader discussion in 228 | https://github.com/illumos/ipd/blob/master/ipd/0014/README.md[IPD 14 229 | illumos and Y2038]. 230 | 231 | As part of this, we also suggest that we go and audit the differences in 232 | the other logic around localtime. There will be a need to increase some 233 | of the macros in `tzfile.h` to match upstream as part of this, that have 234 | been held back for the first update as they are tied to later versions 235 | and libc logic. 236 | 237 | === tzalloc(3C) and localtime_rz(3C) 238 | 239 | While https://illumos.org/man/3C/localtime_r[localtime_r(3C)], 240 | https://illumos.org/man/3C/ctime_r[ctime_r(3C)], and related utilities 241 | allow the information that is used to fill them out to be re-entrant, 242 | they do not allow for an arbitrary way to specify a time zone. To date, 243 | the only way of really changing the time zone for a process is to 244 | perform something like setting the `TZ` environment variable. 245 | 246 | To deal with portions of this NetBSD introduced a version of localtime 247 | and related functions that allows one to pass an opaque timezone 248 | structure. These structures are specifically allocated and freed and 249 | kept separate from the system time zone information and caches. This 250 | provides a useful way forward for most software that needs to make 251 | queries about timezone information and the allocation routine allows one 252 | to specifically specify a time zone in a string, allowing for this to be 253 | parsed much the same way the `TZ` environment variable is. 254 | 255 | The IANA code and NetBSD differ in where `const` is in a few signatures. 256 | The IANA code generally also makes it into glibc and other places, so 257 | that means we'll need to understand the lay of the land when we get to 258 | this and if there's a way to construct things compatibly with both. We 259 | defer any such selection until we get to such an implementation. 260 | Regardless, we believe this is a useful direction for the system to add 261 | features and we should consider adding this same NetBSD inspired set the 262 | same way that the IANA tools have. 263 | 264 | == Summary of Changes 265 | 266 | This summarizes the changes that we're proposing: 267 | 268 | * As part of updating to tzcode 2025a: 269 | ** We will no longer ship `tzfile.h`. 270 | ** We will update the stability statements in zic(8) and zdump(8). 271 | * We will create a new set of tools to aid testing. 272 | * We will adopt the proposed process for updating tzcode and tzdata. 273 | ** The two should generally be updated in separate commits. 274 | ** The existing tzdata process does not change. 275 | 276 | As future work we will: 277 | 278 | * Update libc to support TZif version 2+. 279 | * Explore support for the tzalloc and localtime_rz family of functions. 280 | -------------------------------------------------------------------------------- /ipd/0053/README.adoc: -------------------------------------------------------------------------------- 1 | :showtitle: 2 | :toc: left 3 | :numbered: 4 | :icons: font 5 | :state: draft 6 | :revremark: State: {state} 7 | :authors: iximeow 8 | :sponsor: 9 | 10 | = IPD 50 Retiring `fipe(4D)` 11 | {authors} 12 | 13 | [cols="3"] 14 | |=== 15 | |Authors: {author} 16 | |Sponsor: {sponsor} 17 | |State: {state} 18 | |=== 19 | 20 | == Introduction 21 | 22 | The `fipe(4D)` driver provides some power-saving functionality on 23 | particular systems with "Fully Buffered DIMM" modules. Specifically, 24 | systems with a North Bridge chipset with vendor ID 8086 and device ID 25 | 1A38 or 360B. In product name terms, these are the Intel 5000 and 7300 26 | series chipsets, used in Intel motherboards supporting Xeon processors 27 | of similar models (5000 series, 7000 series, including the later Core 28 | variants with similar model numbers and L/E/X prefixes) 29 | 30 | While the NetBurst cores predate x86-64 by just a bit, it seems these 31 | Xeon models are all Prescott or later, putting these at some of the 32 | oldest still-supported x86-64 processors. 33 | 34 | `fipe(4D)` was integrated in commit `eca2601c`, from 35 | https://illumos.org/opensolaris/ARChive/PSARC/2009/289/index.html[PSARC/2009/289] 36 | "FBDIMM Idle Power Enhancement (FIPE) driver". 37 | 38 | On source review, I think wakeups at inopportune times can cause 39 | `fipe(4D)` to idle just as a system has become busy, and it is the sole 40 | motivator for additional complication to i86pc power management for all 41 | x86 systems. 42 | 43 | `fipe(4D)` was the sole outside-the-kernel user of the CPU Idle 44 | Notification framework added with 45 | https://illumos.org/opensolaris/ARChive/PSARC/2009/115/index.html[PASRC/2009/115]. 46 | The notification framework itself is fine - it's also used for lazy TLB 47 | flushing and DTrace probes around CPU idle and wake - but the 48 | `check_func` and its argument are provided and subsequently ignored by 49 | both default callbacks, and only used by `fipe`. So, without `fipe` we 50 | could simplify the interface to CPU idle notifications, making 51 | `usr/src/uts/i86pc/os/cpupm/cpu_idle.c` more obviously correct for all 52 | systems. 53 | 54 | This IPD proposes: 55 | 56 | * Removing `usr/src/uts/i86pc/io/fipe`, as well as its header and 57 | Makefile rules 58 | * Marking the `fipe` package `obsolete` 59 | 60 | And with `fipe(4D)` retired, a followup change to: 61 | * Remove the `check_func` and `check_arg` parameters to 62 | `cpu_idle_enter` 63 | * Remove the `acpi_cpu{_mwait_ipi,_mwait,}_check_wakeup` functions 64 | * Cleanup as appropriate around `cpu_idle.c`. 65 | 66 | Behavioral changes will only be seen on Xeon systems with chipsets 67 | `fipe` supported, as named above, and the behavioral change will be 68 | higher idle power consumption and heat. Those systems should otherwise 69 | work as well as before, and notably should see no change under load. 70 | 71 | == Background 72 | 73 | `fipe(4D)` was integrated back when it seemed that FB-DIMM might be the 74 | future of memory architecture. As history would have it, though, the 75 | industry moved towards DDR3 and registered DIMMs, rather than FB-DIMM 76 | and the corresponding "Advanced Memory Buffer" (AMB) modules to 77 | communicate with them. 78 | 79 | I'm fuzzy on many of the details here, but it seems that the AMB itself 80 | was a substantial additional power draw and heat producer. FIPE seems to 81 | be a feature on corresponding chipsets to power off some parts of the 82 | DIMM while memory is unused, reducing idle power and heat some. 83 | 84 | The check functions added with the CPU Idle Notification framework which 85 | `fipe(4D)` builds on, though, are (perhaps surprisingly) stateful: they 86 | may enable and disable interrupts, and may register that a CPU has 87 | exited idle if an interrupt was processed. 88 | 89 | `fipe(4D)` is the only caller of these check functions, and calls them 90 | only after checking that when the current CPU is idled, all CPUs will be 91 | idle. From source review, I believe that if a CPU wakes between this 92 | point and actually idling, for example in handling a NIC interrupt, 93 | `fipe(4D)` will still incorrectly take operations to reduce FBDIMM power 94 | use. This is another point where documentation and effect are hard to 95 | track down: would the system actually idle? Will the idle attempt be a 96 | no-op? Will something else happen? 97 | 98 | In the best case, it would be great to document, or test, or at least 99 | file issues to follow up here. But recognizing that the memory design 100 | was removed from product roadmaps almost 15 years ago, maybe that's more 101 | effort than is appropriate. 102 | 103 | == Implementation 104 | 105 | With much appreciation to the original code, the implementation of this 106 | IPD would mostly be reverting `PSARC/2009/289` in one change, then the 107 | callback-before-entering-CPU-idle changes in `PASRC/2009/115`. 108 | 109 | The commits no longer revert cleanly, and `PSARC/2009/115` in regards 110 | other than `check_func` still seems quite useful! This is a general 111 | statement of direction more than precise depiction of changes. 112 | 113 | We could, instead, refactor this driver and CPU idle notifications so 114 | that checking if a CPU can enter idle does not, itself, de-idle CPUs. 115 | This could come with additional documentation for `cpu_idle_enter` and 116 | the idle notification system should be used, or what invariants a 117 | `check_func` must uphold. This feels needlessly risky for the relevant 118 | systems though, since we would want to test changes on those systems at 119 | a miminum. 120 | -------------------------------------------------------------------------------- /ipd/0054/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | author: Bill Sommerfeld 3 | sponsor: 4 | state: predraft 5 | --- 6 | 7 | # Considerations around updating illumos-gate system sqlite to sqlite 3 8 | 9 | ## Background 10 | 11 | Several components of illumos-gate, most notably SMF, rely on the 12 | SQLite database to store configuration information. The version of 13 | SQLite used for this purpose is a patched version of 2.8.15, which was 14 | released in 2004; it uses a database format that is not compatible 15 | with current releases of SQLite 3. 16 | 17 | The idmap facility and the "libsmb" library used by the SMB server 18 | also use sqlite2. 19 | 20 | The developers of SQLite have committed to maintain the database 21 | [through at least the year 2050](https://sqlite.org/lts.html); they 22 | have also made a [long-term commitment to the 3.x file 23 | format](https://www.sqlite.org/formatchng.html). 24 | 25 | ## SQLite in SMF 26 | 27 | Within SMF, svc.configd is the only program which directly reads or 28 | writes the SMF configuration database. 29 | 30 | svc.configd runs very early (its start is special-cased in svc.startd) and 31 | as a result has to cope with a very constrained early environment, 32 | including a potentially read-only filesystem. 33 | 34 | ### Upgrading the SMF database content 35 | 36 | SQLite databases can in theory be upgraded to version 3 via a 37 | dump/restore pass that can be scripted using the old and new versions 38 | of the sqlite CLI. 39 | 40 | As a test, I dumped my workstation's /etc/svc/repository.db into sql 41 | dump format with sqlite 2, then loaded it into a recent sqlite3. 42 | 43 | The process worked without any reported errors. The resulting 44 | database file was about half the size of the older format (3.34MB vs 45 | 6.64M). 46 | 47 | I then as a test dumped it again from sqlite3, compared the two dumps, 48 | and found a few discrepancies. 49 | 50 | In particular, sqlite uses a somewhat fuzzy typing system, and 51 | versions 2 and 3 have slightly different interpretations of this. 52 | 53 | SQLite integers are signed 64 bit values. SMF has been storing unsigned 64-bit values. 54 | 55 | One value (18446744073709551615, perhaps better known as 56 | 0xffffffffffffffff) appeared a few times in the original dump. In the 57 | dump from the sqlite3 database, I found '1.84467440737096e+19' instead 58 | -- it's larger than the largest signed 64-bit integer, so sqlite3 59 | converts it to floating point. 60 | 61 | Ironically, several of these values started off as a signed integer 62 | (-1) in the SMF manifests that defined them; had they been handled by 63 | the rest of SMF consistently as a signed value the conversion to 64 | floating point could have been avoided. 65 | 66 | ### Upgrading sqlite in illumos-gate: 67 | 68 | Need to look at local addons to libsqlite (idmap project added some 69 | utf8 case conversion code!); convert this to the sqlite3 function 70 | plugin interface? 71 | 72 | Likewise, install the CLI into usr/src/cmd/sqlite3, for install into /lib/svc/bin 73 | 74 | 2) Build and package two versions of svc.configd - one built against 75 | libsqlite-sys, and the other against libsqlite3-sys, operating on 76 | different-named databases. 77 | 78 | 3) Build a conversion script that operates on an inactive mounted BE, 79 | converting its /etc/svc/repository.db file into its sqlite3 counterpart. 80 | 81 | 4) Modify svc.startd to pick one or the other svc.configd based on 82 | which repository database is present. 83 | 84 | ## Other consumers of sqlite 2 in illumos-gate 85 | 86 | ### Upgrading sqlite in idmap: 87 | 88 | idmap stores one table in each of two databases. One of them is a 89 | cache which lives in /var/run which can be convered by recreating it 90 | (and there is code in idmapd which already does this sort of 91 | conversion). 92 | 93 | The other database is found in /var/idmap/idmap.db; conversion could 94 | be handled by having the daemon run a separate conversion program if 95 | the new format database wasn't found. 96 | 97 | ### Upgrading sqlite in smbsrv/libsmb: 98 | 99 | The libsmb library has code which accesses two persistent databases: 100 | 101 | "/var/smb/smbhosts.db" 102 | "/var/smb/smbgroup.db" 103 | 104 | TODO: identify a a point early in the service dependency graph that 105 | would allow the insertion of a conversion program/service. 106 | 107 | ## Strawman upgrade sequence: 108 | 109 | 1) Import a version of sqlite3.x.y into usr/src/lib/libsqlite3; 110 | install it as /lib/libsqlite3-sys.so.3.x.y. This library is not 111 | expected to rapidly track upstream sqlite3 112 | 113 | 2) Convert idmap and smbsrv/libsmb to use sqlite3 first. 114 | 115 | 3) Create dual-version svc.configd (possibly as two binaries) and a 116 | database conversion program. 117 | 118 | ... 119 | -------------------------------------------------------------------------------- /ipd/0054/schema-revs.sql: -------------------------------------------------------------------------------- 1 | -- schema revs: 2 | -- 1) strict mode 3 | -- 2) value_tbl -> blob with constraints on blob length & contents 4 | -- 3) pitch id_tbl and let database pick id values via AUTOINCREMENT? no, 5 | -- but keep INTEGER PRIMARY KEY where possible; use composite keys 6 | -- otherwise 7 | -- 4) add foreign key constraints to ensure that id-based references to other tables point at valid data 8 | -- 5) update various application-id's in database header 9 | pragma application_id = 10 | pragma user_version = ?? 11 | 12 | -- PRAGMA encoding = 'UTF-8'; 13 | -- PRAGMA journal_mode = ? 14 | -- "To achieve the best long-term query performance without the need to do a detailed engineering analysis of the application schema and SQL, it is recommended that applications run "PRAGMA optimize" (with no arguments) just before closing each database connection. Long-running applications might also benefit from setting a timer to run "PRAGMA optimize" every few hours. " 15 | 16 | --PRAGMA secure_delete = true; ? 17 | 18 | -- PRAGMA shrink_memory after manifest import? 19 | PRAGMA trusted_schema = false; 20 | 21 | 22 | 23 | -- Other ideas: 24 | -- 1) persistent prepared statements for most queries 25 | -- 2) minimize functionality of system libsqlite to reduce footprint w/o 26 | -- sacrificing performance. 27 | -- 28 | -- Open questions: 29 | -- 1) WAL or not? Would speed up manifest import but not help most of the 30 | -- time 31 | 32 | -- Read through sqlite.org for the scattered best-practice 33 | -- recommendations (optimize, vacuum) 34 | -- see https://www.sqlite.org/threadsafe.html 35 | 36 | CREATE TABLE instance_tbl ( 37 | instance_id INTEGER PRIMARY KEY, 38 | instance_name CHAR(256) NOT NULL, 39 | instance_svc INTEGER NOT NULL); -> references service_tbl(svc_id) 40 | 41 | CREATE INDEX instance_tbl_name ON instance_tbl (instance_svc, instance_name); 42 | 43 | CREATE TABLE pg_tbl ( 44 | pg_id INTEGER PRIMARY KEY, 45 | pg_parent_id INTEGER NOT NULL, 46 | pg_name CHAR(256) NOT NULL, 47 | pg_type CHAR(256) NOT NULL, 48 | pg_flags INTEGER NOT NULL, 49 | pg_gen_id INTEGER NOT NULL); 50 | 51 | CREATE INDEX pg_tbl_name ON pg_tbl (pg_parent_id, pg_name); 52 | CREATE INDEX pg_tbl_parent ON pg_tbl (pg_parent_id); 53 | CREATE INDEX pg_tbl_type ON pg_tbl (pg_parent_id, pg_type); 54 | 55 | CREATE TABLE prop_lnk_tbl ( 56 | lnk_prop_id INTEGER PRIMARY KEY, 57 | lnk_pg_id INTEGER NOT NULL, references pg_tbl(pg_id?) 58 | lnk_gen_id INTEGER NOT NULL, 59 | lnk_prop_name CHAR(256) NOT NULL, 60 | lnk_prop_type CHAR(2) NOT NULL, 61 | lnk_val_id INTEGER); references value_tbl(value_id) 62 | 63 | CREATE INDEX prop_lnk_tbl_base ON prop_lnk_tbl (lnk_pg_id, lnk_gen_id); 64 | CREATE INDEX prop_lnk_tbl_val ON prop_lnk_tbl (lnk_val_id); 65 | 66 | CREATE TABLE schema_version ( 67 | schema_version INTEGER 68 | ); 69 | 70 | CREATE TABLE service_tbl ( 71 | svc_id INTEGER PRIMARY KEY, 72 | svc_name CHAR(256) NOT NULL 73 | ); 74 | 75 | CREATE INDEX service_tbl_name ON service_tbl (svc_name); 76 | 77 | CREATE TABLE snaplevel_lnk_tbl ( 78 | snaplvl_level_id INTEGER NOT NULL, 79 | snaplvl_pg_id INTEGER NOT NULL, 80 | snaplvl_pg_name CHAR(256) NOT NULL, 81 | snaplvl_pg_type CHAR(256) NOT NULL, 82 | snaplvl_pg_flags INTEGER NOT NULL, 83 | snaplvl_gen_id INTEGER NOT NULL 84 | ); 85 | 86 | CREATE INDEX snaplevel_lnk_tbl_id ON snaplevel_lnk_tbl (snaplvl_pg_id); 87 | CREATE INDEX snaplevel_lnk_tbl_level ON snaplevel_lnk_tbl (snaplvl_level_id); 88 | 89 | CREATE TABLE snaplevel_tbl ( 90 | snap_id INTEGER NOT NULL, 91 | snap_level_num INTEGER NOT NULL, 92 | snap_level_id INTEGER NOT NULL, 93 | snap_level_service_id INTEGER NOT NULL, 94 | snap_level_service CHAR(256) NOT NULL, 95 | snap_level_instance_id INTEGER NULL, 96 | snap_level_instance CHAR(256) NULL 97 | ); 98 | 99 | CREATE INDEX snaplevel_tbl_id ON snaplevel_tbl (snap_id); 100 | 101 | CREATE TABLE snapshot_lnk_tbl ( 102 | lnk_id INTEGER PRIMARY KEY, 103 | lnk_inst_id INTEGER NOT NULL, 104 | lnk_snap_name CHAR(256) NOT NULL, 105 | lnk_snap_id INTEGER NOT NULL 106 | ); 107 | 108 | CREATE INDEX snapshot_lnk_tbl_name ON snapshot_lnk_tbl (lnk_inst_id, lnk_snap_name); 109 | CREATE INDEX snapshot_lnk_tbl_snapid ON snapshot_lnk_tbl (lnk_snap_id); 110 | 111 | --- Original 112 | 113 | CREATE TABLE value_tbl ( 114 | value_id INTEGER NOT NULL, 115 | value_type CHAR(1) NOT NULL, 116 | value_value VARCHAR NOT NULL, 117 | value_order INTEGER DEFAULT 0); 118 | 119 | --- New: 120 | 121 | -- primary key is (value_id, value_order); no rowid 122 | 123 | CREATE TABLE value_tbl ( 124 | value_id INTEGER NOT NULL, 125 | value_type CHAR(1) NOT NULL, -- add constraints to length == 1 126 | value_value BLOB NOT NULL, -- constrain length based on value_type 127 | value_order INTEGER DEFAULT 0 128 | ) STRICT; 129 | 130 | -- types are: 131 | -- b boolean -> sqlite integer (0 or 1) 132 | -- c count -> sqlite integer if < 2^63; 8-byte blob (in big-endian order) if >= 2^63 133 | -- i integer -> native SQLite integer 134 | -- o opaque -> varying blob 135 | -- s string -> varying blob containing utf8 text 136 | -- t time 137 | 138 | -- requires more convoluted code for fetching from value table but may be worth it in the on-disk space saving. 139 | 140 | -- sqlite3_prepare_v2/v3 -> sqlite3_bind_* -> sqlite3_step() -> sqlite3_column_type() -> sqlite3_column_*() -> sqlite3_reset() -> sqlite3_finalize() 141 | 142 | -- REP_PROTOCOL_TYPE_INVALID = '\0', 143 | -- REP_PROTOCOL_TYPE_BOOLEAN = 'b', -> 0 (false) or 1 (true) 144 | -- REP_PROTOCOL_TYPE_COUNT = 'c', -> 64-bit BLOB (8 bytes) 145 | -- REP_PROTOCOL_TYPE_INTEGER = 'i', -> INTEGER 146 | -- REP_PROTOCOL_TYPE_TIME = 't', -> ??? not observed in practice 147 | -- REP_PROTOCOL_TYPE_STRING = 's', -> TEXT 148 | -- REP_PROTOCOL_TYPE_OPAQUE = 'o', -> BLOB 149 | -- 150 | -- Compound types: 151 | -- REP_PROTOCOL_SUBTYPE_USTRING = REP_PROTOCOL_TYPE_STRING|('u' << 8), 152 | -- REP_PROTOCOL_SUBTYPE_URI = REP_PROTOCOL_TYPE_STRING|('U' << 8), 153 | -- REP_PROTOCOL_SUBTYPE_FMRI = REP_PROTOCOL_TYPE_STRING|('f' << 8), 154 | -- 155 | -- REP_PROTOCOL_SUBTYPE_HOST = REP_PROTOCOL_TYPE_STRING|('h' << 8), 156 | -- REP_PROTOCOL_SUBTYPE_HOSTNAME = REP_PROTOCOL_TYPE_STRING|('N' << 8), 157 | -- REP_PROTOCOL_SUBTYPE_NETADDR = REP_PROTOCOL_TYPE_STRING|('n' << 8), 158 | -- REP_PROTOCOL_SUBTYPE_NETADDR_V4 = REP_PROTOCOL_TYPE_STRING|('4' << 8), 159 | -- REP_PROTOCOL_SUBTYPE_NETADDR_V6 = REP_PROTOCOL_TYPE_STRING|('6' << 8) 160 | 161 | -- skip; instead define primary key as (value_id, value_order). 162 | 163 | CREATE INDEX value_tbl_id ON value_tbl (value_id); 164 | ----- 165 | 166 | -- Trigger fodder for value_tbl: 167 | 168 | create table t ( 169 | id integer not null, 170 | seq integer not null, 171 | type text not null, 172 | value any not null, 173 | primary key(id, seq) 174 | ) without rowid; 175 | 176 | 177 | 178 | CREATE TRIGGER typecheck_insert 179 | BEFORE INSERT ON t 180 | FOR EACH ROW 181 | WHEN (SELECT count(*) from t WHERE t.id = NEW.id) > 0 AND 182 | ((SELECT t.type from t where t.id = NEW.id order by t.seq limit 1) != NEW.type) 183 | 184 | BEGIN 185 | SELECT RAISE(ROLLBACK, 'Mismatched type (insert)'); 186 | END; 187 | 188 | CREATE TRIGGER typecheck_update 189 | BEFORE UPDATE OF type ON t 190 | FOR EACH ROW 191 | WHEN (OLD.type != NEW.type) AND 192 | ((SELECT count(*) from t WHERE t.id = NEW.id) > 1) 193 | BEGIN 194 | SELECT RAISE(ROLLBACK, 'Mismatched type (update)'); 195 | END; 196 | -------------------------------------------------------------------------------- /prototypes/README.adoc: -------------------------------------------------------------------------------- 1 | :showtitle: 2 | :toc: left 3 | :numbered: 4 | :icons: font 5 | :state: published 6 | :revremark: State: {state} 7 | :authors: Glorfindel , Ecthelion 8 | :sponsor: 9 | 10 | = IPD 11 | {authors} 12 | 13 | [cols="3"] 14 | |=== 15 | |Authors: {author} 16 | |Sponsor: {sponsor} 17 | |State: {state} 18 | |=== 19 | 20 | 21 | --------------------------------------------------------------------------------