├── .gitignore
├── .jcheck
    └── conf
├── LICENSE
├── Makefile
├── README.md
├── eg-drafts
    ├── deconstruction-patterns-records-and-classes.md
    └── reconstruction-records-and-classes.md
└── site
    ├── _index.md
    ├── design-notes
        ├── constables.md
        ├── data-classes-historical-1.html
        ├── data-classes-historical-2.html
        ├── on-ramp.md
        ├── patterns
        │   ├── exhaustiveness.md
        │   ├── extending-switch-for-patterns.md
        │   ├── pattern-match-object-model.md
        │   ├── pattern-match-semantics.md
        │   ├── pattern-match-translation.md
        │   ├── pattern-matching-for-java.md
        │   ├── towards-member-patterns.md
        │   └── type-patterns-in-switch.md
        ├── records-and-sealed-classes.md
        ├── templated-strings.md
        └── towards-better-serialization.md
    └── guides
        ├── lvti-faq.md
        ├── lvti-style-guide.md
        ├── text-blocks-guide.head
        └── text-blocks-guide.md


/.gitignore:
--------------------------------------------------------------------------------
1 | /build/
2 | .DS_Store
3 | /ojweb-generate/
4 | *~
5 | 


--------------------------------------------------------------------------------
/.jcheck/conf:
--------------------------------------------------------------------------------
 1 | ;
 2 | ; Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
 3 | ; DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 4 | ;
 5 | ; This code is free software; you can redistribute it and/or modify it
 6 | ; under the terms of the GNU General Public License version 2 only, as
 7 | ; published by the Free Software Foundation.
 8 | ;
 9 | ; This code is distributed in the hope that it will be useful, but WITHOUT
10 | ; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 | ; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 | ; version 2 for more details (a copy is included in the LICENSE file that
13 | ; accompanied this code).
14 | ;
15 | ; You should have received a copy of the GNU General Public License version
16 | ; 2 along with this work; if not, write to the Free Software Foundation,
17 | ; Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 | ;
19 | ; Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 | ; or visit www.oracle.com if you need additional information or have any
21 | ; questions.
22 | ;
23 | 
24 | [general]
25 | project=amber
26 | repository=amber-docs
27 | jbs=jdk
28 | 
29 | [checks]
30 | error=author,committer,whitespace,executable,symlink
31 | 
32 | [census]
33 | version=0
34 | domain=openjdk.org
35 | 
36 | [checks "whitespace"]
37 | files=.*\.java$|.*\.cpp$|.*\.hpp$|.*\.c$|.*\.h$
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | The GNU General Public License (GPL)
  2 | 
  3 | Version 2, June 1991
  4 | 
  5 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.
  6 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  7 | 
  8 | Everyone is permitted to copy and distribute verbatim copies of this license
  9 | document, but changing it is not allowed.
 10 | 
 11 | Preamble
 12 | 
 13 | The licenses for most software are designed to take away your freedom to share
 14 | and change it.  By contrast, the GNU General Public License is intended to
 15 | guarantee your freedom to share and change free software--to make sure the
 16 | software is free for all its users.  This General Public License applies to
 17 | most of the Free Software Foundation's software and to any other program whose
 18 | authors commit to using it.  (Some other Free Software Foundation software is
 19 | covered by the GNU Library General Public License instead.) You can apply it to
 20 | your programs, too.
 21 | 
 22 | When we speak of free software, we are referring to freedom, not price.  Our
 23 | General Public Licenses are designed to make sure that you have the freedom to
 24 | distribute copies of free software (and charge for this service if you wish),
 25 | that you receive source code or can get it if you want it, that you can change
 26 | the software or use pieces of it in new free programs; and that you know you
 27 | can do these things.
 28 | 
 29 | To protect your rights, we need to make restrictions that forbid anyone to deny
 30 | you these rights or to ask you to surrender the rights.  These restrictions
 31 | translate to certain responsibilities for you if you distribute copies of the
 32 | software, or if you modify it.
 33 | 
 34 | For example, if you distribute copies of such a program, whether gratis or for
 35 | a fee, you must give the recipients all the rights that you have.  You must
 36 | make sure that they, too, receive or can get the source code.  And you must
 37 | show them these terms so they know their rights.
 38 | 
 39 | We protect your rights with two steps: (1) copyright the software, and (2)
 40 | offer you this license which gives you legal permission to copy, distribute
 41 | and/or modify the software.
 42 | 
 43 | Also, for each author's protection and ours, we want to make certain that
 44 | everyone understands that there is no warranty for this free software.  If the
 45 | software is modified by someone else and passed on, we want its recipients to
 46 | know that what they have is not the original, so that any problems introduced
 47 | by others will not reflect on the original authors' reputations.
 48 | 
 49 | Finally, any free program is threatened constantly by software patents.  We
 50 | wish to avoid the danger that redistributors of a free program will
 51 | individually obtain patent licenses, in effect making the program proprietary.
 52 | To prevent this, we have made it clear that any patent must be licensed for
 53 | everyone's free use or not licensed at all.
 54 | 
 55 | The precise terms and conditions for copying, distribution and modification
 56 | follow.
 57 | 
 58 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 59 | 
 60 | 0. This License applies to any program or other work which contains a notice
 61 | placed by the copyright holder saying it may be distributed under the terms of
 62 | this General Public License.  The "Program", below, refers to any such program
 63 | or work, and a "work based on the Program" means either the Program or any
 64 | derivative work under copyright law: that is to say, a work containing the
 65 | Program or a portion of it, either verbatim or with modifications and/or
 66 | translated into another language.  (Hereinafter, translation is included
 67 | without limitation in the term "modification".) Each licensee is addressed as
 68 | "you".
 69 | 
 70 | Activities other than copying, distribution and modification are not covered by
 71 | this License; they are outside its scope.  The act of running the Program is
 72 | not restricted, and the output from the Program is covered only if its contents
 73 | constitute a work based on the Program (independent of having been made by
 74 | running the Program).  Whether that is true depends on what the Program does.
 75 | 
 76 | 1. You may copy and distribute verbatim copies of the Program's source code as
 77 | you receive it, in any medium, provided that you conspicuously and
 78 | appropriately publish on each copy an appropriate copyright notice and
 79 | disclaimer of warranty; keep intact all the notices that refer to this License
 80 | and to the absence of any warranty; and give any other recipients of the
 81 | Program a copy of this License along with the Program.
 82 | 
 83 | You may charge a fee for the physical act of transferring a copy, and you may
 84 | at your option offer warranty protection in exchange for a fee.
 85 | 
 86 | 2. You may modify your copy or copies of the Program or any portion of it, thus
 87 | forming a work based on the Program, and copy and distribute such modifications
 88 | or work under the terms of Section 1 above, provided that you also meet all of
 89 | these conditions:
 90 | 
 91 |     a) You must cause the modified files to carry prominent notices stating
 92 |     that you changed the files and the date of any change.
 93 | 
 94 |     b) You must cause any work that you distribute or publish, that in whole or
 95 |     in part contains or is derived from the Program or any part thereof, to be
 96 |     licensed as a whole at no charge to all third parties under the terms of
 97 |     this License.
 98 | 
 99 |     c) If the modified program normally reads commands interactively when run,
100 |     you must cause it, when started running for such interactive use in the
101 |     most ordinary way, to print or display an announcement including an
102 |     appropriate copyright notice and a notice that there is no warranty (or
103 |     else, saying that you provide a warranty) and that users may redistribute
104 |     the program under these conditions, and telling the user how to view a copy
105 |     of this License.  (Exception: if the Program itself is interactive but does
106 |     not normally print such an announcement, your work based on the Program is
107 |     not required to print an announcement.)
108 | 
109 | These requirements apply to the modified work as a whole.  If identifiable
110 | sections of that work are not derived from the Program, and can be reasonably
111 | considered independent and separate works in themselves, then this License, and
112 | its terms, do not apply to those sections when you distribute them as separate
113 | works.  But when you distribute the same sections as part of a whole which is a
114 | work based on the Program, the distribution of the whole must be on the terms
115 | of this License, whose permissions for other licensees extend to the entire
116 | whole, and thus to each and every part regardless of who wrote it.
117 | 
118 | Thus, it is not the intent of this section to claim rights or contest your
119 | rights to work written entirely by you; rather, the intent is to exercise the
120 | right to control the distribution of derivative or collective works based on
121 | the Program.
122 | 
123 | In addition, mere aggregation of another work not based on the Program with the
124 | Program (or with a work based on the Program) on a volume of a storage or
125 | distribution medium does not bring the other work under the scope of this
126 | License.
127 | 
128 | 3. You may copy and distribute the Program (or a work based on it, under
129 | Section 2) in object code or executable form under the terms of Sections 1 and
130 | 2 above provided that you also do one of the following:
131 | 
132 |     a) Accompany it with the complete corresponding machine-readable source
133 |     code, which must be distributed under the terms of Sections 1 and 2 above
134 |     on a medium customarily used for software interchange; or,
135 | 
136 |     b) Accompany it with a written offer, valid for at least three years, to
137 |     give any third party, for a charge no more than your cost of physically
138 |     performing source distribution, a complete machine-readable copy of the
139 |     corresponding source code, to be distributed under the terms of Sections 1
140 |     and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     c) Accompany it with the information you received as to the offer to
143 |     distribute corresponding source code.  (This alternative is allowed only
144 |     for noncommercial distribution and only if you received the program in
145 |     object code or executable form with such an offer, in accord with
146 |     Subsection b above.)
147 | 
148 | The source code for a work means the preferred form of the work for making
149 | modifications to it.  For an executable work, complete source code means all
150 | the source code for all modules it contains, plus any associated interface
151 | definition files, plus the scripts used to control compilation and installation
152 | of the executable.  However, as a special exception, the source code
153 | distributed need not include anything that is normally distributed (in either
154 | source or binary form) with the major components (compiler, kernel, and so on)
155 | of the operating system on which the executable runs, unless that component
156 | itself accompanies the executable.
157 | 
158 | If distribution of executable or object code is made by offering access to copy
159 | from a designated place, then offering equivalent access to copy the source
160 | code from the same place counts as distribution of the source code, even though
161 | third parties are not compelled to copy the source along with the object code.
162 | 
163 | 4. You may not copy, modify, sublicense, or distribute the Program except as
164 | expressly provided under this License.  Any attempt otherwise to copy, modify,
165 | sublicense or distribute the Program is void, and will automatically terminate
166 | your rights under this License.  However, parties who have received copies, or
167 | rights, from you under this License will not have their licenses terminated so
168 | long as such parties remain in full compliance.
169 | 
170 | 5. You are not required to accept this License, since you have not signed it.
171 | However, nothing else grants you permission to modify or distribute the Program
172 | or its derivative works.  These actions are prohibited by law if you do not
173 | accept this License.  Therefore, by modifying or distributing the Program (or
174 | any work based on the Program), you indicate your acceptance of this License to
175 | do so, and all its terms and conditions for copying, distributing or modifying
176 | the Program or works based on it.
177 | 
178 | 6. Each time you redistribute the Program (or any work based on the Program),
179 | the recipient automatically receives a license from the original licensor to
180 | copy, distribute or modify the Program subject to these terms and conditions.
181 | You may not impose any further restrictions on the recipients' exercise of the
182 | rights granted herein.  You are not responsible for enforcing compliance by
183 | third parties to this License.
184 | 
185 | 7. If, as a consequence of a court judgment or allegation of patent
186 | infringement or for any other reason (not limited to patent issues), conditions
187 | are imposed on you (whether by court order, agreement or otherwise) that
188 | contradict the conditions of this License, they do not excuse you from the
189 | conditions of this License.  If you cannot distribute so as to satisfy
190 | simultaneously your obligations under this License and any other pertinent
191 | obligations, then as a consequence you may not distribute the Program at all.
192 | For example, if a patent license would not permit royalty-free redistribution
193 | of the Program by all those who receive copies directly or indirectly through
194 | you, then the only way you could satisfy both it and this License would be to
195 | refrain entirely from distribution of the Program.
196 | 
197 | If any portion of this section is held invalid or unenforceable under any
198 | particular circumstance, the balance of the section is intended to apply and
199 | the section as a whole is intended to apply in other circumstances.
200 | 
201 | It is not the purpose of this section to induce you to infringe any patents or
202 | other property right claims or to contest validity of any such claims; this
203 | section has the sole purpose of protecting the integrity of the free software
204 | distribution system, which is implemented by public license practices.  Many
205 | people have made generous contributions to the wide range of software
206 | distributed through that system in reliance on consistent application of that
207 | system; it is up to the author/donor to decide if he or she is willing to
208 | distribute software through any other system and a licensee cannot impose that
209 | choice.
210 | 
211 | This section is intended to make thoroughly clear what is believed to be a
212 | consequence of the rest of this License.
213 | 
214 | 8. If the distribution and/or use of the Program is restricted in certain
215 | countries either by patents or by copyrighted interfaces, the original
216 | copyright holder who places the Program under this License may add an explicit
217 | geographical distribution limitation excluding those countries, so that
218 | distribution is permitted only in or among countries not thus excluded.  In
219 | such case, this License incorporates the limitation as if written in the body
220 | of this License.
221 | 
222 | 9. The Free Software Foundation may publish revised and/or new versions of the
223 | General Public License from time to time.  Such new versions will be similar in
224 | spirit to the present version, but may differ in detail to address new problems
225 | or concerns.
226 | 
227 | Each version is given a distinguishing version number.  If the Program
228 | specifies a version number of this License which applies to it and "any later
229 | version", you have the option of following the terms and conditions either of
230 | that version or of any later version published by the Free Software Foundation.
231 | If the Program does not specify a version number of this License, you may
232 | choose any version ever published by the Free Software Foundation.
233 | 
234 | 10. If you wish to incorporate parts of the Program into other free programs
235 | whose distribution conditions are different, write to the author to ask for
236 | permission.  For software which is copyrighted by the Free Software Foundation,
237 | write to the Free Software Foundation; we sometimes make exceptions for this.
238 | Our decision will be guided by the two goals of preserving the free status of
239 | all derivatives of our free software and of promoting the sharing and reuse of
240 | software generally.
241 | 
242 | NO WARRANTY
243 | 
244 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR
245 | THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN OTHERWISE
246 | STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE
247 | PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
248 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
249 | FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND
250 | PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE,
251 | YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
252 | 
253 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL
254 | ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE
255 | PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
256 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
257 | INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
258 | BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
259 | FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER
260 | OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
261 | 
262 | END OF TERMS AND CONDITIONS
263 | 
264 | How to Apply These Terms to Your New Programs
265 | 
266 | If you develop a new program, and you want it to be of the greatest possible
267 | use to the public, the best way to achieve this is to make it free software
268 | which everyone can redistribute and change under these terms.
269 | 
270 | To do so, attach the following notices to the program.  It is safest to attach
271 | them to the start of each source file to most effectively convey the exclusion
272 | of warranty; and each file should have at least the "copyright" line and a
273 | pointer to where the full notice is found.
274 | 
275 |     One line to give the program's name and a brief idea of what it does.
276 | 
277 |     Copyright (C) <year> <name of author>
278 | 
279 |     This program is free software; you can redistribute it and/or modify it
280 |     under the terms of the GNU General Public License as published by the Free
281 |     Software Foundation; either version 2 of the License, or (at your option)
282 |     any later version.
283 | 
284 |     This program is distributed in the hope that it will be useful, but WITHOUT
285 |     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
286 |     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
287 |     more details.
288 | 
289 |     You should have received a copy of the GNU General Public License along
290 |     with this program; if not, write to the Free Software Foundation, Inc.,
291 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
292 | 
293 | Also add information on how to contact you by electronic and paper mail.
294 | 
295 | If the program is interactive, make it output a short notice like this when it
296 | starts in an interactive mode:
297 | 
298 |     Gnomovision version 69, Copyright (C) year name of author Gnomovision comes
299 |     with ABSOLUTELY NO WARRANTY; for details type 'show w'.  This is free
300 |     software, and you are welcome to redistribute it under certain conditions;
301 |     type 'show c' for details.
302 | 
303 | The hypothetical commands 'show w' and 'show c' should show the appropriate
304 | parts of the General Public License.  Of course, the commands you use may be
305 | called something other than 'show w' and 'show c'; they could even be
306 | mouse-clicks or menu items--whatever suits your program.
307 | 
308 | You should also get your employer (if you work as a programmer) or your school,
309 | if any, to sign a "copyright disclaimer" for the program, if necessary.  Here
310 | is a sample; alter the names:
311 | 
312 |     Yoyodyne, Inc., hereby disclaims all copyright interest in the program
313 |     'Gnomovision' (which makes passes at compilers) written by James Hacker.
314 | 
315 |     signature of Ty Coon, 1 April 1989
316 | 
317 |     Ty Coon, President of Vice
318 | 
319 | This General Public License does not permit incorporating your program into
320 | proprietary programs.  If your program is a subroutine library, you may
321 | consider it more useful to permit linking proprietary applications with the
322 | library.  If this is what you want to do, use the GNU Library General Public
323 | License instead of this License.
324 | 
325 | 
326 | "CLASSPATH" EXCEPTION TO THE GPL
327 | 
328 | Certain source files distributed by Oracle America and/or its affiliates are
329 | subject to the following clarification and special exception to the GPL, but
330 | only where Oracle has expressly included in the particular source file's header
331 | the words "Oracle designates this particular file as subject to the "Classpath"
332 | exception as provided by Oracle in the LICENSE file that accompanied this code."
333 | 
334 |     Linking this library statically or dynamically with other modules is making
335 |     a combined work based on this library.  Thus, the terms and conditions of
336 |     the GNU General Public License cover the whole combination.
337 | 
338 |     As a special exception, the copyright holders of this library give you
339 |     permission to link this library with independent modules to produce an
340 |     executable, regardless of the license terms of these independent modules,
341 |     and to copy and distribute the resulting executable under terms of your
342 |     choice, provided that you also meet, for each linked independent module,
343 |     the terms and conditions of the license of that module.  An independent
344 |     module is a module which is not derived from or based on this library.  If
345 |     you modify this library, you may extend this exception to your version of
346 |     the library, but you are not obligated to do so.  If you do not wish to do
347 |     so, delete this exception statement from your version.
348 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | include ojweb-generate/Makefile
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Welcome to amber-docs!
 3 | ======================
 4 | 
 5 | This repository is for design notes, presentations, guides, FAQs, and
 6 | other collateral surrounding [OpenJDK Project Amber](http://openjdk.java.net/projects/amber).
 7 | 
 8 | Most documents here are written in (pandoc) Markdown, and changes made
 9 | to Markdown sources are automatically formatted and pushed to the
10 | [OpenJDK website](http://openjdk.java.net/projects/amber).
11 | 
12 | See https://openjdk.java.net/ for more information about
13 | the OpenJDK Community and the JDK.
14 | 
15 | 


--------------------------------------------------------------------------------
/eg-drafts/deconstruction-patterns-records-and-classes.md:
--------------------------------------------------------------------------------
  1 | # Deconstruction Patterns for Records and Classes
  2 | #### Brian Goetz {.author}
  3 | #### August 2020 {.date}
  4 | 
  5 | > This document describes a possible approach for a future phase of _pattern
  6 | matching_ -- adding deconstruction patterns for records and classes.  This
  7 | builds on the work of [JEP 375](https://openjdk.java.net/jeps/375), and would follow [type patterns in
  8 | switch](../site/design-notes/patterns/type-patterns-in-switch).  _This is an exploratory document only
  9 | and does not constitute a plan for any specific feature in any specific version
 10 | of the Java Language._
 11 | 
 12 | [Records][records] are transparent carriers for their data.  By _transparent_,
 13 | we mean that they will readily give up their data, in the same form as their
 14 | state description, when asked.  The simplest way that they expose their data is
 15 | through accessor methods; for each record component `x`, there is a
 16 | corresponding accessor method `x()`.  (These methods can be overridden to
 17 | perform defensive copies when needed, but such overrides are constrained by the
 18 | specification of `Record::equals` to conform to the the constraint that
 19 | deconstructing a record and creating a new record from the results must be
 20 | `equals` to the  original record.)
 21 | 
 22 | Because records give up their state when asked, they already work, to some
 23 | degree, with [pattern matching][patterns0], since we can pattern match on a
 24 | record type and then call the accessors to extract the state:
 25 | 
 26 | ```
 27 | if (shape instanceof Circle c) {
 28 |     double radius = c.radius();
 29 |     double circumference = 2 * Math.PI * c.radius();
 30 |     double area = Math.PI * radius * radius;
 31 |     ...
 32 | }
 33 | ```
 34 | 
 35 | But, we can do better.  Records are _nominal tuples_, and languages with tuples
 36 | generally support deconstruction on tuples.  The analogue of this for records in
 37 | Java is a _deconstruction pattern_:
 38 | 
 39 | ```
 40 | if (shape instanceof Circle(var c, var r)) {
 41 |     double circumference = 2 * Math.PI * r;
 42 |     double area = Math.PI * r * r;
 43 | }
 44 | ```
 45 | 
 46 | Here, we perform the type test, extract the components, and bind the components
 47 | to variables in one go.  A deconstruction pattern for a record is equivalent to
 48 | testing it with a type pattern, and if that succeeds, invoking the accessors for
 49 | each component and binding the results to fresh variables.  Just as we are able
 50 | to infer the behavior for constructors, accessors, and `Object` methods for
 51 | records, we can do the same for deconstruction patterns.
 52 | 
 53 | ### Deconstruction patterns
 54 | 
 55 | [JEP 375][patterns0] gave us one kind of pattern: type patterns.  A type pattern
 56 | is denoted by a type name and a variable identifier: `String s` or `List<String>
 57 | list`.  The semantics of a type pattern is that of an `instanceof` test for the
 58 | type, plus, if the test succeeds, casting to that type.  For a type pattern `T
 59 | t` to be applicable to a target of type `U` (`u instanceof T t`), `U` must be
 60 | cast-convertible to `T` without unchecked warnings.  (This is why we can use
 61 | generics safely in type patterns.)  
 62 | 
 63 | A simple deconstruction pattern takes the form of `D(T t, ...) [d]`, where `D`
 64 | is the name of a type with a deconstruction pattern, `T t` is a  type pattern,
 65 | and `d` is an optional binding variable (of type `D`) to receive the result of
 66 | casting the target to `D`.
 67 | 
 68 | A deconstruction pattern `D(T t)` is applicable to any target type to which the
 69 | type pattern `D d` would be applicable to -- any type that is cast-convertible
 70 | to `D` without an unchecked warning.  Further, the nested pattern `T t` must be
 71 | applicable to the type of the corresponding binding variable of the
 72 | deconstruction pattern `D`.  A deconstruction pattern never matches `null`.
 73 | 
 74 | Records automatically acquire a canonical deconstruction pattern whose binding
 75 | variables correspond to the components of the record in the canonical order,
 76 | and whose implementation binds these to the return value of the corresponding
 77 | accessor.
 78 | 
 79 | ### Nested patterns
 80 | 
 81 | The above description -- where we describe what is between the parentheses in a
 82 | deconstruction pattern -- is a simplification.  In reality, what is between the
 83 | parenthesis is an ordered list of patterns, which will be recursively matched
 84 | against the extracted components.  If our deconstruction pattern is
 85 | `Circle(Point center, double radius)`, then `Point center` and `double radius`
 86 | are just ordinary patterns.  But we can nest any pattern there, as long as
 87 | it is applicable to the type of the corresponding binding.  
 88 | 
 89 | Matching a nested pattern `target matches P(Q)` is equivalent to the compound
 90 | match
 91 | 
 92 |     target matches P(T alpha) && alpha matches Q
 93 | 
 94 | where `T` is the type of the binding component of `P`.  We say "matches" here
 95 | rather than `instanceof` to highlight that we want to  use the intrinsic
 96 | matching semantics of the pattern, without consideration for the null-handling
 97 | behavior of language constructs such as `instanceof` or `switch`.
 98 | 
 99 | We can nest deconstruction patterns inside deconstruction patterns:
100 | 
101 | ```
102 | if (shape instanceof Circle(Point(var x, var y), double radius)) { ... }
103 | ```
104 | 
105 | We can also use type inference to elide the manifest type name, and the type
106 | will be inferred based on the type of the corresponding binding component:
107 | 
108 | ```
109 | if (shape instanceof Circle(var center, var radius)) { ... }
110 | ```
111 | 
112 | A nested deconstruction pattern `D(P, Q)` is _total_ on a target type `T` if the
113 | type pattern `D d` is total on `T`, and the patterns `P` and `Q` are total on
114 | the types of `D`'s binding components.
115 | 
116 | ### Match statements
117 | 
118 | Some patterns (such as `var x`) are total on their target type (will always
119 | match); others are partial (may fail).  Further, for some patterns, the
120 | applicability criteria can be evaluated statically by the compiler (the compiler
121 | knows that the type pattern `Object o` is total on `String`), whereas for others
122 | (such as `Optional.of(var x)`), whether the pattern matches can only be
123 | determined as runtime.  Total patterns can be given special treatment by the
124 | language, since their applicability can be statically analyzed.
125 | 
126 | If a pattern `P` is total on its target type, we don't have to apply it with a
127 | conditional construct (`instanceof` or `switch`); we can apply it with an
128 | unconditional construct.  We propose to introduce a _deconstruction statement_
129 | which is modeled on local variable declarations with initializers:
130 | 
131 | ```
132 | Foo f = bar.getFoo();
133 | ```
134 | 
135 | Here, `Foo f` is a local variable declaration, and `= bar.getFoo()` is an
136 | initializer of type `Foo`, the compiler type-checks that the initializer has a
137 | compatible type with the declaration, and the variable `f` is in scope for the
138 | remainder of the block immediately containing this declaration.  But, note that
139 | `Foo f` is also a pattern, and we could also interpret the above as a pattern
140 | match: since we know that `Foo f` will always match an expression of type `Foo`,
141 | the above has equivalent semantics to:
142 | 
143 | ```
144 | if (!(bar.getFoo() instanceof Foo f))
145 |     throw new ImpossibleException();
146 | // f is in scope here, due to flow scoping!
147 | ```
148 | 
149 | So we can generalize the declaration of local variables with initializers as
150 | being a match to a total pattern.  Since deconstruction patterns (with total
151 | nested patterns) are total on their corresponding type, we could deconstruct
152 | a `Circle` as:
153 | 
154 | ```
155 | void foo(Circle c) {
156 |     Circle(var center, var radius) = c;
157 | }
158 | ```
159 | 
160 | The deconstruction pattern here is total on circles, so the above statement has
161 | the effect of deconstructing the circle and binding `center` and `radius` to
162 | local variables that are in scope for the remainder of the method body.
163 | 
164 | A match statement with a non-nullable pattern throws `NullPointerException` if
165 | the match target is null.  Deconstruction patterns are not nullable -- because
166 | they intrinsically invoke a member with the target as a receiver -- so the above
167 | would throw `NullPointerException` (as would the equivalent code that accesses
168 | the state of the `Circle` directly.)
169 | 
170 | ## Deconstructors for classes
171 | 
172 | We can surely derive deconstruction patterns for records, but it would be nice
173 | if we could do deconstruction on arbitrary classes as well.  Of course, classes
174 | would have to declare something about how to deconstruct them, since we cannot
175 | derive this automatically the way we do for records.
176 | 
177 | A deconstruction pattern can be thought of as the dual of a constructor; a
178 | constructor takes N state components and aggregates them into an object, and a
179 | deconstruction pattern takes an object and decomposes it into N state
180 | components.  Constructors are instance members but are not inherited, whose
181 | names are constrained to be the name of the class; the same is true for
182 | deconstructors.
183 | 
184 | ```
185 | class Point {
186 |     int x;
187 |     int y;
188 | 
189 |     public Point(int x, int y) {
190 |         this.x = x;
191 |         this.y = y;
192 |     }
193 | 
194 |     public deconstructor Point(int x, int y) {
195 |         x = this.x;
196 |         y = this.y;
197 |     }
198 | }
199 | ```
200 | 
201 | The arguments of a constructor are parameters; the "arguments" of a
202 | deconstructor are declarations of binding variables (which are treated as blank
203 | finals in scope in the body.)  Because a deconstruction pattern is total, it
204 | _must_ assign all of the bindings, so we require that they all be DA in all exit
205 | paths.  The body is a deconstructor is effectively the body of a `void` method,
206 | so may use `return` if needed (like constructors).  The syntax is chosen to
207 | highlight the duality between constructors and deconstructors.
208 | 
209 | ### Overloading
210 | 
211 | Like constructors, deconstructors can be overloaded.  However, overload
212 | resolution is slightly different than for methods and constructors.  (For
213 | example, deconstructor parameters are output parameters rather than inputs, and
214 | so the directions of relations like subtyping are reversed in some of the
215 | applicability rules.)  And at the use site, what is specified is not an
216 | expression, but a pattern, so the way we derive constraints about which
217 | overloads are applicable is different as well.
218 | 
219 | (Details TBD, but given the expected prevalance of `var` patterns at the use
220 | site, we'd expect that in practice, we'll mostly only see overloads on arity.)
221 | 
222 | ### Varargs
223 | 
224 | Varargs patterns seem quite useful.  As a motivating example, suppose we have a
225 | pattern for a regular expression, whose binding is a varargs containing the
226 | matched groups.  We may well want to further match on the elements individually
227 | with separate patterns; suppose a regex extracts an integer and a string, we may
228 | well want to  match the regex against a target, and then have separate nested
229 | patterns to  further refine these two extracted groups.  
230 | 
231 | This is a substantial separate investigation; for now, details TBD, and we will
232 | likely hold off supporting the declaration of varargs patterns until we have a
233 | suitable semantics for matching varargs patterns.
234 | 
235 | ### Composition
236 | 
237 | Constructors compose with other constructors; constructors may delegate to
238 | superclass constructors, and may further invoke constructors to initialize the
239 | fields of the object.  Given the duality between constructors and
240 | deconstructors, not only do we want to support these modes of composition, but
241 | we'd like the composed form of each to look similar (enough); the more that the
242 | denotation of a constructor and deconstructor diverge, the more likely bugs will
243 | creep in.  We can lean on pattern assignment as our analogue of constructor
244 | invocation.
245 | 
246 | As a starting point:
247 | 
248 | ```
249 | class A {
250 |     int a;
251 | 
252 |     public A(int a) { this.a = a; }
253 |     public deconstructor A(int a) { a = this.a; }
254 | }
255 | 
256 | class B extends A { // extension
257 |     int b;
258 | 
259 |     public B(int a, int b) {
260 |         super(a);
261 |         this.b = b;
262 |     }
263 | 
264 |     public deconstructor B(int a, int b) {
265 |         super(var aa) = this;
266 |         a = aa;
267 |         b = this.b;
268 |     }
269 | }
270 | 
271 | class WithB { // composition
272 |     B b;
273 | 
274 |     public WithB(int a, int b) {
275 |         this.b = new B(a, b);
276 |     }
277 | 
278 |     public deconstructor WithB(int a, int b) {
279 |         B(var aa, var bb) = this;
280 |         a = aa;
281 |         b = bb;
282 |     }
283 | }
284 | ```
285 | 
286 | These examples illustrate composition with both extension and delegation.  In
287 | both cases, the constructor and deconstructor can delegate to that of another
288 | class (whether a superclass or not) to do part of its work.  This is good.
289 | 
290 | No one could read the above examples, though, without immediately disliking the
291 | fact that we had to introduce "garbage" variables `aa` and `bb` just to  receive
292 | the downstream bindings, and then immediately assign them to the current
293 | bindings.  (Secondarily, it might be pleasant and less error-prone to be able to
294 | omit the `= this` when invoking the superclass deconstructor.)
295 | 
296 | Without devolving into the bikeshed, what we're missing here to make composition
297 | smooth is some way to use a blank final as a pattern in a pattern assignment.
298 | One obvious choice is to just use the variable name directly:
299 | 
300 | ```
301 | B(a, b) = this;
302 | ```
303 | 
304 | but this is undesirable as (unlike with all other patterns) there is no visual
305 | cue that `a` is not just an expression and this is not just a method call.  (We
306 | had the same concern with constant patterns, and the current thinking is to just
307 | not have them, or if we must, to give them a more obviously-a-pattern syntax.)
308 | So some syntactic decoration of `a` to make it clear that is the assignment
309 | target, rather than a value source, (e.g., `a=`, `bind a`, etc) may be in order.
310 | 
311 | The other form of composition in constructors is delegating to `this`; this is
312 | analogous to delegating to `super` (and we may want similarly to default to `=
313 | this`).
314 | 
315 | ### Translation
316 | 
317 | A deconstructor cannot be translated in the obvious way, because of its multiple
318 | bindings.  But, we can lean on records (and eventually, inline records) to
319 | smooth this out.  A deconstruction pattern:
320 | 
321 | ```
322 | deconstructor Foo(int x, int y) { x = blah; y = blarg; }
323 | ```
324 | 
325 | can be translated as
326 | 
327 | ```
328 | [inline] record Foo$abc(int x, int y) { }
329 | Foo$abc <deconstruct>() { return new Foo$abc(blah, blarg); }
330 | ```
331 | 
332 | However, we have to exercise care with the name mangling of the record, since
333 | this descriptor will appear in the classfiles of clients -- this name must be
334 | stable with respect to source- and binary-compatible changes to the declaration
335 | of the deconstructor (or the rest of the class.)  
336 | 
337 | Our strategy here is to compute the descriptor for the deconstructor as if it
338 | were a method (yielding a method descriptor containing the erasure of the
339 | binding variables), and then encode that descriptor using the [symbolic
340 | freedom][symfree] encoding -- which was designed for exactly this purpose -- as
341 | our `abc` disambiguator.  Valid overloads will have distinct, stable
342 | disambiguators.
343 | 
344 | The use of inline classes (when we have them) reduces the cost of this
345 | translation mechanism; we can compatibly switch from records to inline records
346 | later as long as we generate a reference bridge for binary compatibility with
347 | old clients.
348 | 
349 | On the client side, we can factor a deconstruction pattern into an
350 | instanceof test and a conditional invocation of the desugared
351 | `<deconstruct>` method, and then invoking the corresponding record
352 | accessor for each binding variable needed by client code.
353 | 
354 | Nested patterns `P(Q)` at the client can be unrolled into `P(var
355 | alpha) && alpha matches Q` at the use site; in switches, the secondary
356 | component can be lowered to a guard.
357 | 
358 | ### Relationship to accessors
359 | 
360 | There is a notable relationship between deconstruction patterns and accessors;
361 | we can think of deconstructors as "multi-accessors" and implement accessors in
362 | terms of them, or we can implement a deconstructor in terms of accessors.  
363 | 
364 | Without diving too deeply into this topic, just as we can derive read accessors
365 | for record components, we would like to be able to, eventually, derive read
366 | accessors from a suitable deconstruction pattern for arbitrary classes.  This
367 | feature is outside the scope of this document.  
368 | 
369 | ### Records
370 | 
371 | Going forward, we can compile records to have a canonical deconstruction pattern
372 | whose implementation merely delegates to the component accessors.  In fact,
373 | because it is important that the accessors and deconstruction pattern agree on
374 | how to extract a given component, this will likely be the _only_ way to
375 | influence the deconstruction of records -- override the accessor.  You can
376 | declare additional (overloaded) deconstructors.
377 | 
378 | ### Switch miscellany
379 | 
380 | With the advent of deconstruction and nested patterns, some additional
381 | limitations of `switch` will be exposed.  We would like it to be a
382 | universally valid refactoring to refactor a set of nested
383 | deconstruction patterns:
384 | 
385 | ```
386 | case P(Q): A
387 | case P(R): B
388 | case P(S): C
389 | ```
390 | 
391 | to
392 | 
393 | ```
394 | case P(var alpha):
395 |     switch (alpha) {
396 |         case Q: A
397 |         case R: B
398 |         case S: C
399 |     }
400 | ```
401 | 
402 | However, there are still several primitive types (`float`, `double`,
403 | and `boolean`) that `switch` does not support as target types; these
404 | would need to be added in order for this to not become a refactoring
405 | impediment.  Adding these to `switch` is easy enough, and most of the
406 | type-specific work can be done in an `indy` bootstrap.
407 | 
408 | 
409 | 
410 | [records]: https://openjdk.java.net/jeps/359
411 | [patterns0]: https://openjdk.java.net/jeps/375
412 | [symfree]: https://blogs.oracle.com/jrose/symbolic-freedom-in-the-vm
413 | 


--------------------------------------------------------------------------------
/eg-drafts/reconstruction-records-and-classes.md:
--------------------------------------------------------------------------------
  1 | # Functional Transformation of Immutable Objects
  2 | #### Brian Goetz {.author}
  3 | #### August 2020 {.date}
  4 | 
  5 | > This document is an _early stage draft_ outlining a possible direction for supporting _functional transformation_ in the Java Language. This is an exploratory document only and does not constitute a plan for any specific feature in any specific version of the Java Language. This document also may reference other features under exploration; this is purely for illustrative purposes, and does not constitute any sort of plan or commitment to deliver any of these features.
  6 | 
  7 | Everyone likes records.  But, adding features like records and deconstruction,
  8 | which solves some old problems, raises news ones.  And, everything we can do
  9 | with records but not with classes increases a gap where users might feel they
 10 | have to make a hard choice; it's time to start charting the path of generalizing
 11 | the record "goodies" so that suitable classes can join in the fun.
 12 | 
 13 | This document focuses primarily on one specific goodie, which we'd like to endow
 14 | to records and then as soon as possible extend to classes: functional
 15 | transformation.  There are other potential new goodies for records (e.g.,
 16 | keyword-based construction and deconstruction) and new goodie-democratization
 17 | opportunities (e.g., accessors for arbitrary classes) that can come later.
 18 | 
 19 | More generally, in the last 10-15 years, Java developers have started to
 20 | appreciate the value of immutability for creating safe, reliable code that is
 21 | easy to reason about, and the language is starting to reflect that appreciation
 22 | -- but at every turn, the language and runtime still make us pay a tax for using
 23 | immutable objects.  Functional transformation allows immutable objects to pay
 24 | less usability tax (and, with inline classes, less performance tax as well.)
 25 | 
 26 | ## Withers
 27 | 
 28 | Records and inline classes are two new forms of shallowly-immutable classes in
 29 | Java.  This makes it even more obvious that "mutating" (applying a functional
 30 | transformation to) an immutable object is currently too painful.  (Obviously
 31 | records cannot be mutated -- but the next best thing is to create a new record
 32 | that has a known delta from an existing record.)  if our `Point` record wants to
 33 | expose a way to "set" the `x` and `y` components, it has to write `withX` and
 34 | `withY` methods:
 35 | 
 36 | ```
 37 | record Point(int x, int y) {
 38 |     Point withX(int newX) { return new Point(newX, y); }
 39 |     Point withY(int newY) { return new Point(x, newY); }
 40 | }
 41 | ```
 42 | 
 43 | This is doable, and has the advantage of working with the language we have
 44 | today, but has two obvious drawbacks.  Developers are clearly thrilled to be
 45 | free of the usual state-related boilerplate, and this would form a new category
 46 | of boilerplate that does not get automated away (two steps forward, one step
 47 | back),  and when records have many state components, writing these "withers"
 48 | gets more tedious and error-prone.  For records, at least, the language has
 49 | enough information to automate this away, so it would be especially a shame to
 50 | ask developers to do it by hand.
 51 | 
 52 | In fact, "withers" would be even worse than getters and setters, since while a
 53 | class might have _O(n)_ getters, it could conceivably have _O(2^n)_ withers.
 54 | Worse, as the number of components grows, the bodies of these "wither" accessors
 55 | gets more error-prone.  Let's nip this one in the bud before it becomes a
 56 | "pattern" (or worse, a "best practice".)
 57 | 
 58 | This problem is not unique to records or inline classes; existing value-based
 59 | classes (such as `LocalDateTime`) have had to expose wither methods as well.
 60 | But, if the language is going to encourage us to write immutable classes, it
 61 | should also help us with this problem.
 62 | 
 63 | ### Digression: learning from C\#
 64 | 
 65 | Our friends in the C# world have taken two swings at this problem already.
 66 | Their first solution builds on the fact that they already allow parameters to
 67 | have default values, and later added the ability for default values to refer to
 68 | `this`.  This means that you can write an ordinary library method called `with`:
 69 | 
 70 | ```
 71 | class Point {
 72 |     int x;
 73 |     int y;
 74 | 
 75 |     Point with(int x = this.x, int y = this.y) {
 76 |        return new Point(x, y);
 77 |     }
 78 | }
 79 | ```
 80 | 
 81 | This is an improvement in that it allows you to write _one_ method to handle the
 82 | _2^n_ possible combinations, as a pure API consideration, and the client can
 83 | specify only the parameters they want to change:
 84 | 
 85 | ```
 86 | p = p.with(x : 3);
 87 | ```
 88 | 
 89 | But, clearly that wasn't enough for C# developers, because recently (C# 9), they
 90 | have also introduced a `with` expression into the language:
 91 | 
 92 | ```
 93 | p with { x = 3; }
 94 | ```
 95 | 
 96 | The block on the right is extremely limited; it is a set of property
 97 | assignments.  (C# also recently introduced "init-only" properties (effectively,
 98 | named constructor arguments), so the above causes a new `Point` to be
 99 | instantiated, where the property assignments written in the block override those
100 | of the left operand.)
101 | 
102 | ### With expressions in Java
103 | 
104 | The C# approach was sensible for them because they could build on features they
105 | already had (default parameters, properties), but just copying that approach
106 | would drag with it a lot of baggage.  But we already have, in Java, the building
107 | blocks we need (almost) to do it differently, and possibly more richly:
108 | constructors and deconstructors.
109 | 
110 | A _reconstruction expression_ takes an operand whose static type is `T` and
111 | a block, where the block expresses a functional transformation on the state
112 | of the operand, and yields a new instance of type `T`:
113 | 
114 | ```
115 | Point p;
116 | Point pp = p with { x = 3; }
117 | ```
118 | 
119 | Here, `pp` will have whatever `y` value `p` had, and `x=3`.  The block  can be
120 | an arbitrary sequence of Java statements (assignment, ,loops, method calls,
121 | etc.), with some restrictions.  Ideally, we can define reconstruction for
122 | arbitrary classes, not just records, but we'll start with records.  A record
123 | always has a canonical constructor and deconstruction pattern.  That means
124 | we can interpret the above `with` expression as:
125 | 
126 |  - Declare a new block, with fresh mutable locals whose names and types are
127 |    those of the record components;
128 |  - Deconstruct the target with the canonical deconstructor, and assign the
129 |    results to the variables describe above;
130 |  - Execute the block from the `with` in that scope, that can mutate those locals
131 |    if the right names are used;
132 |  - Read the final value of the locals, and invoke the canonical constructor with
133 |    them;
134 |  - The value of the `with` expression is the resulting instance.
135 | 
136 | So, an expression such as
137 | 
138 | ```
139 | p with { x = 3; }
140 | ```
141 | 
142 | can be interpreted as something like:
143 | 
144 | ```
145 | {                             // new scope
146 |     Point(var x, var y) = p;  // deconstruct the LHS with canonical ctor
147 |     { x = 3; }                // execute the RHS of in that scope
148 |     yield new Point(x, y);    // reconstruct with new values
149 | }
150 | ```
151 | 
152 | We can think of the block on the RHS of the `with` expression as a functional
153 | transformation on the state of the record.  As such, it is reasonable to impose
154 | some restrictions on it.  For reasons that will become clear later, we will
155 | prohibit writing to any variables other than the locals corresponding to the
156 | extracted components, and locals declared inside the block.  The block is free
157 | to use any feature of the language (such as loops and conditional), not just
158 | assignment -- it is not a "DSL", it's a block of Java code that expresses a
159 | transformation on the state of the record.  
160 | 
161 | Clients can use `with` expressions, but classes may also want to use them in
162 | their implementation as well.  For example:
163 | 
164 | ```
165 | record Complex(double real, double im) {
166 |     Complex conjugate() { return this with { im = -im; } }
167 |     Complex realOnly() { return this with { im = 0; } }
168 |     Complex imOnly() { return this with { re = 0; } }
169 | }
170 | ```
171 | 
172 | Clients can of course perform these same manipulations, but the author may deem
173 | these operations to be important enough in this domain that it makes sense to
174 | expose as methods.
175 | 
176 | Note too that if the canonical constructor checks invariants, then a `with`
177 | expression will check them too.  For example:
178 | 
179 | ```
180 | record Rational(int num, int denom) {
181 |     Rational {
182 |         if (denom == 0)
183 |             throw new IllegalArgumentException("denom must not be zero");
184 |     }
185 | }
186 | ```
187 | 
188 | If we have a rational, and say
189 | 
190 | ```
191 | r with { denom = 0; }
192 | ```
193 | 
194 | we will get the same exception, since what this will do is unpack the numerator
195 | and denominator into mutable locals, mutate the denominator to zero, and then
196 | feed them back to the canonical constructor -- who will throw.  
197 | 
198 | ## Extrapolating from records
199 | 
200 | There is very little we would have to do to start supporting `with` expressions
201 | on records; all the building blocks are in place.  But before we go there, we
202 | want to make sure we have a path to extending this to any class that wants to
203 | participate in this protocol.  
204 | 
205 | What made things work with records is that we had a canonical constructor and
206 | deconstruction pattern, which were known to have stable signatures, names, and
207 | which match each other.  We can interpret this as an _external state
208 | description_ which we know how to map to and from the internal state description
209 | (which in the case of records is the same as the external description, possibly
210 | with some validation.)  Ordinary classes will not have all that, since they may
211 | have a different representation, but it may be possible to add back enough
212 | metadata to derive these behaviors.
213 | 
214 | Functional transformation depends on deconstruction, so we need a way to declare
215 | a deconstructor for an arbitrary class, and this is on the plan.  For purposes
216 | of this document, we will denote a deconstruction pattern as:
217 | 
218 | ```
219 | public deconstructor Point(int x, int y) { ... }
220 | ```
221 | 
222 | But, we're not there yet.  A class may have multiple constructors and
223 | deconstructors; we have to select a matched pair that extracts all the relevant
224 | state, and then is willing to repack the modified state into a new object.  We
225 | need a way of selecting the correct constructor and deconstructor.  
226 | 
227 | Obviously, the traditional way of selecting overloads won't work here, but
228 | there's another way we could get to -- by parameter name.  Let's assume for a
229 | moment that parameter names were significant (though we know they are not.)  
230 | We can interpret
231 | 
232 | ```
233 | p with { x = 3; }
234 | ```
235 | 
236 | as an overload selection problem: "Find me a (maximal) constructor that takes an
237 | `x`.  Then, take all the names and types in the argument list for that
238 | constructor, and find me a (minimal) deconstructor that extracts them all." But
239 | how do we even find the initial set `{ x }`?  This is where our restriction
240 | (which is a reasonable one anyway) comes in; we can look at the assignment
241 | targets in the block that are not assignments to locals declared in the block,
242 | and they must be assignments to "properties" of the object being reconstructed.
243 | 
244 | ### Making names significant
245 | 
246 | Making the parameter and binding names of constructors and deconstructors
247 | significant was needed for this feature (and, as it turns out, for a number of
248 | other desirable features too.)  We surely can't just say "from today on, they
249 | are significant", but we can give people an opt-in to significant names.  For purposes
250 | of exposition within this document, we'll
251 | use the modifier `__byname` on a constructor or deconstructor to indicate that
252 | names are significant.
253 | 
254 | Since we're anticipating using these names at the client site, we also need to
255 | define how `__byname` constructors and deconstructors can be overloaded, and how
256 | to do overload selection.  As a strawman, we'll take the simplest thing that
257 | could work: only one `__byname` constructor or deconstructor.  Later we'll see
258 | how to extend this.   If there is only one `__byname` constructor and
259 | deconstructor, clearly given:
260 | 
261 | ```
262 | p with { y = 3 }
263 | ```
264 | 
265 | we can interpret this the same way as for records:
266 | 
267 |  - Deconstruct the target with the `__byname` deconstructor found in the class
268 |    which is the static type of the target;
269 |  - Bind the resulting components to fresh mutable locals;
270 |  - Execute the RHS block in this context, with the restriction that it may only
271 |    mutate variables whose names appear in the constructor;
272 |  - Invoke the constructor with the final values of the component variables.
273 | 
274 | In this way, any class with a `__byname` constructor and deconstructor can
275 | participate in the reconstruction protocol.
276 | 
277 | ### Refining overloading
278 | 
279 | The "only one constructor and deconstructor" rule is overly restrictive,  so
280 | let's extend this to be more useful.  The existing rules about overloading and
281 | selecting members according to their parameter types and positions do not need
282 | to change, but we would need new rules for overloading `__byname` members (both
283 | against each other, and against positional declarations).
284 | 
285 | Each `__byname` constructor / deconstructor can be invoked positionally, so  it
286 | must follow the existing rules for overloading and overload selection.  A set of
287 | `__byname` members is valid if and only if for each two such members, their name
288 | sets are either disjoint or one is a proper subset of the other and the types
289 | corresponding to the common names are the same.  What this does is organize
290 | `__byname` members into disjoint telescoping chains.
291 | 
292 | For our
293 | 
294 | ```
295 | p with { x = 3; }
296 | ```
297 | 
298 | example, this means we select the _maximal_ (top member of the chain)
299 | constructor that has an `x`, and then find the _minimal_ deconstructor that
300 | contains all the names in that selected constructor.  This reproduces the object
301 | with maximal fidelity and minimal extraction cost.
302 | 
303 | The overloading rules may seem restrictive at first, but they are not as bad as
304 | they might initially seem.  Many current constructor overloads -- especially for
305 | classes that are suited to functional transformation -- are of the "telescoping"
306 | variety, where the overloads form a linear chain of _x-is-shorthand-for-y_ and
307 | the simpler ones exist merely as positional conveniences to provide defaults
308 | (`new HashMap()` delegates to `new HashMap(initCapacity)` which delegates to
309 | `new HashMap(initCapacity, loadFactor)`.)
310 | 
311 | This scheme was selected in part because some classes may have a wholly
312 | different internal representation than their external API, but want to support
313 | functional transformation using the external API for clients and the internal
314 | representation for the implementation.  By having a private constructor /
315 | deconstructor pair for internal use in terms of the internal representation, and
316 | a public pair for client use in terms of the external API, both clients and
317 | implementation can take advantage of functional transformation in terms that
318 | make sense to them.  And to the extent there is overlap between the public and
319 | private pair, names in the private pair can always be alpha-renamed.
320 | 
321 | ### Teasers
322 | 
323 | The `__byname` modifier is part of the underpinnings for many other useful
324 | features, such as keyword-based invocation (`new Foo(x: 3)`), with or without
325 | default parameters, keyword-based deconstruction, and automatic generation of
326 | read accessors for ordinary classes (you can think of a deconstructor as a
327 | multi-getter, and derive getters from that.)  We'll talk about these some other
328 | time.
329 | 
330 | ### Factories
331 | 
332 | Functional transformation depends on having a `__byname` constructor, but many
333 | developers prefer to expose factories rather than constructors.  Can we nominate
334 | a `__byname` factory to take this role instead?
335 | 
336 | The first problem is that factory methods are an API design pattern, not a
337 | language feature.  We can fix that with some simple mostly-sugar; we can
338 | interpret:
339 | 
340 | ```
341 | class Box<T> {
342 |     public factory of(T t) { ... }
343 | }
344 | ```
345 | 
346 | to mean
347 | 
348 | ```
349 | class Box<T> {
350 |     public static<T> Box<T> of(T t) { ... }
351 | }
352 | ```
353 | 
354 | This is a trivial transformation, and as a bonus, gives us permission to include
355 | factory methods in the "Constructors" section of Javadoc.  Then we can allow
356 | `__byname` factories, and extend the overload rules to consider all the
357 | `__byname` constructors and factories as a group.  To support functional
358 | transformation, we can extend the rule to allow selecting either a `__byname`
359 | constructor or factory.
360 | 
361 | #### Bonus round: interfaces?
362 | 
363 | We already allow static factories in interfaces,
364 | and there's no reason why interfaces cannot have deconstructors (if the
365 | interface provides an API that lets the state components be extracted, which
366 | some do).  If we also allowed deconstructors in interfaces we could even do
367 | with'ing on interfaces too:
368 | 
369 | ```
370 | interface Pointy {
371 |     public __byname factory of(int x, int y) { return new PointImpl(x, y); }
372 |     public __byname __deconstructor Pointy(int x, int y) { __match (x(), y()); }
373 |     public int x();
374 |     public int y();
375 | }
376 | ```
377 | 
378 | Then clients would be able to say:
379 | 
380 | ```
381 | Pointy p = Pointy.of(1, 2);
382 | Pointy flipped = p with { x = -x; y = -y; };
383 | ```
384 | 
385 | (Or not.)
386 | 


--------------------------------------------------------------------------------
/site/_index.md:
--------------------------------------------------------------------------------
 1 | # Project Amber
 2 | 
 3 | The goal of Project Amber is to explore and incubate smaller,
 4 | productivity-oriented Java language features that have been accepted
 5 | as candidate JEPs in
 6 | the [OpenJDK JEP Process](https://openjdk.org/jeps/1). This
 7 | Project is sponsored by
 8 | the [Compiler Group](https://openjdk.org/groups/compiler).
 9 | 
10 | Most Project Amber features go through at least two rounds
11 | of [_preview_](https://openjdk.org/jeps/12) before becoming an
12 | official part of the Java Platform.  For a given feature, there are separate
13 | JEPs for each round of preview and for final standardization.
14 | <!--
15 | This page links only to the most recent JEP for a feature. Such JEPs may
16 | have links to earlier JEPs for the feature, as appropriate.
17 | -->
18 | 
19 | ## Status of JEPs
20 | 
21 | Currently in progress:
22 | 
23 | -   [468: Derived Record Creation (Preview)](https://openjdk.org/jeps/468)
24 | 
25 | Currently in preview:
26 | 
27 | -   [495: Simple Source Files and Instance Main Methods (Fourth Preview)](https://openjdk.org/jeps/495) (previous: [445](https://openjdk.org/jeps/445), [463](https://openjdk.org/jeps/463), [477](https://openjdk.org/jeps/477))
28 | -   [494: Module Import Declarations (Second Preview)](https://openjdk.org/jeps/494) (previous: [476](https://openjdk.org/jeps/476))
29 | -   [492: Flexible Constructor Bodies (Third Preview)](https://openjdk.org/jeps/492) (previous: [447](https://openjdk.org/jeps/447), [482](https://openjdk.org/jeps/482))
30 | -   [488: Primitive Types in Patterns, <code>instanceof</code>, and <code>switch</code> (Second Preview)](https://openjdk.org/jeps/488) (previous: [455](https://openjdk.org/jeps/455))
31 | 
32 | Delivered:
33 | 
34 | -   [458: Launch Multi-File Source-Code Programs](https://openjdk.org/jeps/458)
35 | -   [456: Unnamed Variables and Patterns](https://openjdk.org/jeps/456) (preview: [433](https://openjdk.org/jeps/433))
36 | -   [441: Pattern Matching for <code>switch</code>](https://openjdk.org/jeps/441) (previews: [406](https://openjdk.org/jeps/406), [420](https://openjdk.org/jeps/420), [427](https://openjdk.org/jeps/427), [433](https://openjdk.org/jeps/433))
37 | -   [440: Record Patterns](https://openjdk.org/jeps/440) (previews: [405](https://openjdk.org/jeps/405), [432](https://openjdk.org/jeps/432))
38 | -   [409: Sealed Classes](https://openjdk.org/jeps/409) (previews: [360](https://openjdk.org/jeps/360), [397](https://openjdk.org/jeps/397))
39 | -   [395: Records](https://openjdk.org/jeps/395) (previews: [359](https://openjdk.org/jeps/359), [384](https://openjdk.org/jeps/384))
40 | -   [394: Pattern Matching for <code>instanceof</code>](https://openjdk.org/jeps/394) (previews: [305](https://openjdk.org/jeps/305), [375](https://openjdk.org/jeps/375))
41 | -   [378: Text Blocks](https://openjdk.org/jeps/378) (previews: [355](https://openjdk.org/jeps/355), [368](https://openjdk.org/jeps/368))
42 |     -   [Programmer's Guide](guides/text-blocks-guide)
43 | -   [361: Switch Expressions](https://openjdk.org/jeps/361) (previews: [325](https://openjdk.org/jeps/325), [354](https://openjdk.org/jeps/354))
44 | -   [323: Local-Variable Syntax for Lambda Parameters](https://openjdk.org/jeps/323)
45 | -   [286: Local-Variable Type Inference (<code>var</code>)](https://openjdk.org/jeps/286)
46 |     -   [Style Guidelines](guides/lvti-style-guide)
47 |     -   [FAQ](guides/lvti-faq)
48 | 
49 | On hold:
50 | 
51 | -   [301: Enhanced Enums](https://openjdk.org/jeps/301) (see [here](https://mail.openjdk.org/pipermail/amber-spec-experts/2017-May/000041.html) for explanation)
52 | -   [302: Lambda Leftovers](https://openjdk.org/jeps/302)
53 | -   [348: Java Compiler Intrinsics for JDK APIs](https://openjdk.org/jeps/348)
54 | 
55 | Withdrawn:
56 | 
57 | -   [465: String Templates (Third Preview)](https://openjdk.org/jeps/465) (see [here](https://mail.openjdk.org/pipermail/amber-spec-experts/2024-April/004106.html) for explanation)
58 | -   [326: Raw String Literals](https://openjdk.org/jeps/326), dropped in favor of Text Blocks (see [here](https://mail.openjdk.org/pipermail/jdk-dev/2018-December/002402.html) for explanation)
59 | 
60 | ## Documents
61 | 
62 | -   Guides
63 |     -   [Local Variable Type Inference Style Guide](guides/lvti-style-guide) (March 2018)
64 |     -   [Local Variable Type Inference FAQ](guides/lvti-faq) (Oct 2018)
65 |     -   [Programmer's Guide to Text Blocks](guides/text-blocks-guide) (Aug 2019)
66 | 
67 | -   Design notes
68 |     -   [Symbolic References for Constants](design-notes/constables) (March 2018)
69 |     -   [Data Classes and Sealed Types for Java](design-notes/records-and-sealed-classes) (February 2019)
70 |     -   [Towards Better Serialization](design-notes/towards-better-serialization) (June 2019)
71 |     -   Pattern matching
72 |         -   [Pattern Matching for Java](design-notes/patterns/pattern-matching-for-java) (September 2018)
73 |         -   [Pattern Matching in the Java Object Model](design-notes/patterns/pattern-match-object-model) (December 2020)
74 |         -   [Pattern Matching for Java -- Semantics](design-notes/patterns/pattern-match-semantics) (August 2020)
75 |         -   [Pattern Matching for Java -- Runtime and Translation](design-notes/patterns/pattern-match-translation) (June 2017)
76 |         -   [Extending `switch` for Pattern Matching](design-notes/patterns/extending-switch-for-patterns) (April 2017)
77 |         -   [Type Patterns in `switch`](design-notes/patterns/type-patterns-in-switch) (September 2020)
78 |         -   [Patterns: Exhaustiveness, Unconditionality, and Remainder](design-notes/patterns/exhaustiveness) (May 2023)
79 |         -   [Towards Member Patterns](design-notes/patterns/towards-member-patterns) (January 2024)
80 |     -   [String Tapas Redux: Beyond Mere String Interpolation](design-notes/templated-strings) (September 2021)
81 | 
82 | -   Historical notes
83 |     -   [Data Classes for Java](design-notes/data-classes-historical-1) (October 2017)
84 |     -   [Data Classes for Java](design-notes/data-classes-historical-2) (February 2018)
85 | 
86 | ## Community
87 | 
88 | -   [Members](https://openjdk.org/census#amber)
89 | -   Mailing Lists
90 |     -   [amber-dev](https://mail.openjdk.org/mailman/listinfo/amber-dev) --- For technical discussion related to Project Amber
91 |     -   [amber-spec-experts](https://mail.openjdk.org/mailman/listinfo/amber-spec-experts) --- For Expert Group members only
92 |     -   [amber-spec-observers](https://mail.openjdk.org/mailman/listinfo/amber-spec-observers) --- A read-only clone of amber-spec-experts
93 |     -   [amber-spec-comments](https://mail.openjdk.org/mailman/listinfo/amber-spec-comments) --- For submitting comments on the official specs
94 | -   Other Resources
95 |     -   [Inside.java](https://inside.java/tag/amber)
96 |     -   [Dev.java](https://dev.java/learn/)
97 | 


--------------------------------------------------------------------------------
/site/design-notes/on-ramp.md:
--------------------------------------------------------------------------------
  1 | # Paving the on-ramp
  2 | #### Brian Goetz {.author}
  3 | #### September 2022 {.date}
  4 | 
  5 | 
  6 | Java is one of the most widely taught programming languages in the world.  Tens
  7 | of thousands of educators find that the imperative core of the language combined
  8 | with a straightforward standard library is a foundation that students can
  9 | comfortably learn on.  Choosing Java gives educators many degrees of freedom:
 10 | they can situate students in `jshell` or Notepad or a full-fledged IDE; they can
 11 | teach imperative, object-oriented, functional, or hybrid programming styles; and
 12 | they can easily find libraries to interact with external data and services.  
 13 | 
 14 | No language is perfect, and one of the most common complaints about Java is that
 15 | it is "too verbose" or has "too much ceremony."  And unfortunately, Java imposes
 16 | its heaviest ceremony on those first learning the language, who need and
 17 | appreciate it the least.  The declaration of a class and the incantation of
 18 | `public static void main` is pure mystery to a beginning programmer.  While
 19 | these incantations have principled origins and serve a useful organizing purpose
 20 | in larger programs, they have the effect of placing obstacles in the path of
 21 | _becoming_ Java programmers. Educators constantly remind us of the litany of
 22 | complexity that students have to confront on Day 1 of class -- when they really
 23 | just want to write their first program.  
 24 | 
 25 | As an amusing demonstration of this, in her JavaOne keynote appearance in 2019,
 26 | [Aimee Lucido](https://www.youtube.com/watch?v=BkPPFiXUwYk) talked about when
 27 | she learned to program in Java, and how her teacher performed a rap song
 28 | to help students memorize `"public static void main"`.  Our hats are off to
 29 | creative educators everywhere for this kind of dedication, but teachers
 30 | shouldn't have to do this.
 31 | 
 32 | Of course, advanced programmers complain about ceremony too.  We will never be
 33 | able to satisfy programmers' insatiable appetite for typing fewer keystrokes,
 34 | and we shouldn't try, because the goal of programming is to write programs that
 35 | are easy to read and are clearly correct, not programs that were easy to type.
 36 | But we can try to better align the ceremony commensurate with the value it
 37 | brings to a program -- and let simple programs be expressed more simply.  
 38 | 
 39 | ## Concept overload
 40 | 
 41 | The classic "Hello World" program looks like this in Java:
 42 | 
 43 | ```
 44 | public class HelloWorld { 
 45 |     public static void main(String[] args) { 
 46 |         System.out.println("Hello World");
 47 |     }
 48 | }
 49 | ```
 50 | 
 51 | It may only be five lines, but those lines are packed with concepts that are
 52 | challenging to absorb without already having some programming experience and
 53 | familiarity with object orientation. Let's break down the concepts a student
 54 | confronts when writing their first Java program:
 55 | 
 56 |   - **public** (on the class).  The `public` accessibility level is relevant
 57 |     only when there is going to be cross-package access; in a simple "Hello
 58 |     World" program, there is only one class, which lives in the unnamed package.
 59 |     They haven't even written a one-line program yet; the notion of access
 60 |     control -- keeping parts of a program from accessing other parts of it -- is
 61 |     still way in their future.
 62 | 
 63 |   - **class**.  Our student hasn't set out to write a _class_, or model a
 64 |     complex system with objects; they want to write a _program_.  In Java, a
 65 |     program is just a `main` method in some class, but at this point our student
 66 |     still has no idea what a class is or why they want one.
 67 | 
 68 |   - **Methods**.  Methods are of course a key concept in Java, but the mechanics
 69 |     of methods -- parameters, return types, and invocation -- are still
 70 |     unfamiliar, and the `main` method is invoked magically from the `java`
 71 |     launcher rather than from explicit code.  
 72 | 
 73 |   - **public** (again).  Like the class, the `main` method has to be public, but
 74 |     again this is only relevant when programs are large enough to require
 75 |     packages to organize them.  
 76 | 
 77 |   - **static**.  The `main` method has to be static, and at this point, students
 78 |     have no context for understanding what a static method is or why they want
 79 |     one.  Worse, the early exposure to `static` methods will turn out to be a
 80 |     bad habit that must be later unlearned.  Worse still, the fact that the
 81 |     `main` method is `static` creates a seam between `main` and other methods;
 82 |     either they must become `static` too, or the `main` method must trampoline
 83 |     to some sort of "instance main" (more ceremony!)  And if we get this wrong,
 84 |     we get the dreaded and mystifying `"cannot be referenced from a static
 85 |     context"` error.
 86 | 
 87 |   - **main**.  The name `main` has special meaning in a Java program, indicating
 88 |     the starting point of a program, but this specialness hides behind being an
 89 |     ordinary method name.  This may contribute to the sense of "so many magic
 90 |     incantations."
 91 | 
 92 |   - **String[]**.  The parameter to `main` is an array of strings, which are the
 93 |     arguments that the `java` launcher collected from the command line.  But our
 94 |     first program -- likely our first dozen -- will not use command-line
 95 |     parameters. Requiring the `String[]` parameter is, at this point, a mistake
 96 |     waiting to happen, and it will be a long time until this parameter makes
 97 |     sense.  Worse, educators may be tempted to explain arrays at this point,
 98 |     which further increases the time-to-first-program.
 99 | 
100 |   - **System.out.println**.  If you look closely at this incantation, each
101 |     element in the chain is a different thing -- `System` is a class (what's a
102 |     class again?), `out` is a static field (what's a field?), and `println` is
103 |     an instance method.  The only part the student cares about right now is
104 |     `println`; the rest of it is an incantation that they do not yet understand
105 |     in order to get at the behavior they want.
106 | 
107 | That's a lot to explain to a student on the first day of class.  There's a good
108 | chance that by now, class is over and we haven't written any programs yet, or
109 | the teacher has said "don't worry what this means, you'll understand it later"
110 | six or eight times.  Not only is this a lot of _syntactic_ things to absorb, but
111 | each of those things appeals to a different concept (class, method, package,
112 | return value, parameter, array, static, public, etc) that the student doesn't
113 | have a framework for understanding yet.  Each of these will have an important
114 | role to play in larger programs, but so far, they only contribute to "wow,
115 | programming is complicated."  
116 | 
117 | It won't be practical (or even desirable) to get _all_ of these concepts out of
118 | the student's face on day 1, but we can do a lot -- and focus on the ones that
119 | do the most to help beginners understand how programs are constructed.
120 | 
121 | ## Goal: a smooth on-ramp
122 | 
123 | As much as programmers like to rant about ceremony, the real goal here is not
124 | mere ceremony reduction, but providing a graceful _on ramp_ to Java programming.
125 | This on-ramp should be helpful to beginning programmers by requiring only those
126 | concepts that a simple program needs.  
127 | 
128 | Not only should an on-ramp have a gradual slope and offer enough acceleration
129 | distance to get onto the highway at the right speed, but its direction must
130 | align with that of the highway.  When a programmer is ready to learn about more
131 | advanced concepts, they should not have to discard what they've already learned,
132 | but instead easily see how the simple programs they've already written
133 | generalize to more complicated ones, and both the syntatic and conceptual
134 | transformation from "simple" to "full blown" program should be straightforward
135 | and unintrusive.  It is a definite non-goal to create a "simplified dialect of
136 | Java for students".
137 | 
138 | We identify three simplifications that should aid both educators and students in
139 | navigating the on-ramp to Java, as well as being generally useful to simple
140 | programs beyond the classroom as well:
141 | 
142 |  - A more tolerant launch protocol
143 |  - Unnamed classes
144 |  - Predefined static imports for the most critical methods and fields
145 | 
146 | ## A more tolerant launch protocol
147 | 
148 | The Java Language Specification has relatively little to say about how Java
149 | "programs" get launched, other than saying that there is some way to indicate
150 | which class is the initial class of a program (JLS 12.1.1) and that a public
151 | static method called `main` whose sole argument is of type `String[]` and whose
152 | return is `void` constitutes the entry point of the indicated class.  
153 | 
154 | We can eliminate much of the concept overload simply by relaxing the
155 | interactions between a Java program and the `java` launcher:
156 | 
157 |  - Relax the requirement that the class, and `main` method, be public.  Public
158 |    accessibility is only relevant when access crosses packages; simple programs
159 |    live in the unnamed package, so cannot be accessed from any other package
160 |    anyway.  For a program whose main class is in the unnamed package, we can
161 |    drop the requirement that the class or its `main` method be public,
162 |    effectively treating the `java` launcher as if it too resided in the unnamed
163 |    package.
164 | 
165 |  - Make the "args" parameter to `main` optional, by allowing the `java` launcher to
166 |    first look for a main method with the traditional `main(String[])`
167 |    signature, and then (if not found) for a main method with no arguments.
168 | 
169 |  - Make the `static` modifier on `main` optional, by allowing the `java` launcher to
170 |    invoke an instance `main` method (of either signature) by instantiating an
171 |    instance using an accessible no-arg constructor and then invoking the `main`
172 |    method on it.
173 | 
174 | This small set of changes to the launch protocol strikes out five of the bullet
175 | points in the above list of concepts: public (twice), static, method parameters,
176 | and `String[]`.  
177 | 
178 | At this point, our Hello World program is now:
179 | 
180 | ```
181 | class HelloWorld { 
182 |     void main() { 
183 |         System.out.println("Hello World");
184 |     }
185 | }
186 | ```
187 | 
188 | It's not any shorter by line count, but we've removed a lot of "horizontal
189 | noise" along with a number of concepts.  Students and educators will appreciate
190 | it, but advanced programmers are unlikely to be in any hurry to make these
191 | implicit elements explicit either.  
192 | 
193 | Additionally, the notion of an "instance main" has value well beyond the first
194 | day.  Because excessive use of `static` is considered a code smell, many
195 | educators encourage the pattern of "all the static `main` method does is
196 | instantiate an instance and call an instance `main` method" anyway.  Formalizing
197 | the "instance main" protocol reduces a layer of boilerplate in these cases, and
198 | defers the point at which we have to explain what instance creation is -- and
199 | what `static` is.  (Further, allowing the `main` method to be an instance method
200 | means that it could be inherited from a superclass, which is useful for simple
201 | frameworks such as test runners or service frameworks.)
202 | 
203 | ## Unnamed classes
204 | 
205 | In a simple program, the `class` declaration often doesn't help either, because
206 | other classes (if there are any) are not going to reference it by name, and we
207 | don't extend a superclass or implement any interfaces.  If we say an "unnamed
208 | class" consists of member declarations without a class header, then our Hello
209 | World program becomes:
210 | 
211 | ```
212 | void main() { 
213 |     System.out.println("Hello World");
214 | }
215 | ```
216 | 
217 | Such source files can still have fields, methods, and even nested classes, so
218 | that as a program evolves from a few statements to needing some ancillary state
219 | or helper methods, these can be factored out of the `main` method while still
220 | not yet requiring a full class declaration:
221 | 
222 | ```
223 | String greeting() { return "Hello World"; }
224 | 
225 | void main() {
226 |     System.out.println(greeting());
227 | }
228 | ```
229 | 
230 | This is where treating `main` as an instance method really shines; the user has
231 | just declared two methods, and they can freely call each other.  Students need
232 | not confront the confusing distinction between instance and static methods yet;
233 | indeed, if not forced to confront static members on day 1, it might be a while
234 | before they do have to learn this distinction.  The fact that there is a
235 | receiver lurking in the background will come in handy later, but right now is
236 | not bothering anybody.
237 | 
238 | [JEP 330](https://openjdk.org/jeps/330) allows single-file programs to be
239 | launched directly without compilation; this streamlined launcher pairs well with
240 | unnamed classes. 
241 | 
242 | ## Predefined static imports
243 | 
244 | The most important classes, such as `String` and `Integer`, live in the
245 | `java.lang` package, which is automatically on-demand imported into all
246 | compilation units; this is why we do not have to `import java.lang.String` in
247 | every class.  Static imports were not added until Java 5, but no corresponding
248 | facility for automatic on-demand import of common behavior was added at that
249 | time.  Most programs, however, will want to do console IO, and Java forces us to
250 | do this in a roundabout way -- through the static `System.out` and `System.in`
251 | fields.  Basic console input and output is a reasonable candidate for
252 | auto-static import, as one or both are needed by most simple programs.  While
253 | these are currently instance methods accessed through static fields, we can
254 | easily create static methods for `println` and `readln` which are suitable for
255 | static import, and automatically import them.  At which point our first program
256 | is now down to:
257 | 
258 | ```
259 | void main() {
260 |     println("Hello World");
261 | }
262 | ```
263 | 
264 | ## Putting this all together
265 | 
266 | We've discussed several simplifications:
267 | 
268 |  - Update the launcher protocol to make public, static, and arguments optional
269 |    for main methods, and for main methods to be instance methods (when a
270 |    no-argument constructor is available); 
271 |  - Make the class wrapper for "main classes" optional (unnamed classes);
272 |  - Automatically static import methods like `println`
273 | 
274 | which together whittle our long list of day-1 concepts down considerably.  While
275 | this is still not as minimal as the minimal Python or Ruby program -- statements
276 | must still live in a method -- the goal here is not to win at "code golf".  The
277 | goal is to ensure that concepts not needed by simple programs need not appear in
278 | those programs, while at the same time not encouraging habits that have to be
279 | unlearned as programs scale up. 
280 | 
281 | Each of these simplifications is individually small and unintrusive, and each is
282 | independent of the others.  And each embodies a simple transformation that the
283 | author can easily manually reverse when it makes sense to do so: elided
284 | modifiers and `main` arguments can be added back, the class wrapper can be added
285 | back when the affordances of classes are needed (supertypes, constructors), and
286 | the full qualifier of static-import can be added back.  And these reversals are
287 | independent of one another; they can done in any combination or any order.
288 | 
289 | This seems to meet the requirements of our on-ramp; we've eliminated most of the
290 | day-1 ceremony elements without introducing new concepts that need to be
291 | unlearned. The remaining concepts -- a method is a container for statements, and
292 | a program is a Java source file with a `main` method -- are easily understood in
293 | relation to their fully specified counterparts.  
294 | 
295 | ## Alternatives
296 | 
297 | Obviously, we've lived with the status quo for 25+ years, so we could continue
298 | to do so.  There were other alternatives explored as well; ultimately, each of
299 | these fell afoul of one of our goals.
300 | 
301 | ### Can't we go further?
302 | 
303 | Fans of "code golf" -- of which there are many -- are surely right now trying to
304 | figure out how to eliminate the last little bit, the `main` method, and allow
305 | statements to exist at the top-level of a program.  We deliberately stopped
306 | short of this because it offers little value beyond the first few minutes, and
307 | even that small value quickly becomes something that needs to be unlearned.  
308 | 
309 | The fundamental problem behind allowing such "loose" statements is that
310 | variables can be declared inside both classes (fields) and methods (local
311 | variables), and they share the same syntactic production but not the same
312 | semantics.  So it is unclear (to both compilers and humans) whether a "loose"
313 | variable would be a local or a field.  If we tried to adopt some sort of simple
314 | heuristic to collapse this ambiguity (e.g., whether it precedes or follows the
315 | first statement), that may satisfy the compiler, but now simple refactorings
316 | might subtly change the meaning of the program, and we'd be replacing the
317 | explicit syntactic overhead of `void main()` with an invisible "line" in the
318 | program that subtly affects semantics, and a new subtle rule about the meaning
319 | of variable declarations that applies only to unnamed classes.  This doesn't
320 | help students, nor is this particularly helpful for all but the most trivial
321 | programs.  It quickly becomes a crutch to be discarded and unlearned, which
322 | falls afoul of our "on ramp" goals.  Of all the concepts on our list, "methods"
323 | and "a program is specified by a main method" seem the ones that are most worth
324 | asking students to learn early.
325 | 
326 | ### Why not "just" use `jshell`?  
327 | 
328 | While JShell is a great interactive tool, leaning too heavily on it as an onramp
329 | would fall afoul of our goals.  A JShell session is not a program, but a
330 | sequence of code snippets.  When we type declarations into `jshell`, they are
331 | viewed as implicitly static members of some unspecified class, with
332 | accessibility is ignored completely, and statements execute in a context where
333 | all previous declarations are in scope.  This is convenient for experimentation
334 | -- the primary goal of `jshell` -- but not such a great mental model for
335 | learning to write Java programs.  Transforming a batch of working declarations
336 | in `jshell` to a real Java program would not be sufficiently simple or
337 | unintrusive, and would lead to a non-idiomatic style of code, because the
338 | straightforward translation would have us redeclaring each method, class, and
339 | variable declaration as `static`.  Further, this is probably not the direction
340 | we want to go when we scale up from a handful of statements and declarations to
341 | a simple class -- we probably want to start using classes as classes, not just
342 | as containers for static members. JShell is a great tool for exploration and
343 | debugging, and we expect many educators will continue to incorporate it into
344 | their curriculum, but is not the on-ramp programming model we are looking for.  
345 | 
346 | ### What about "always local"?
347 | 
348 | One of the main tensions that `main` introduces is that most class members are
349 | not `static`, but the `main` method is -- and that forces programmers to
350 | confront the seam between static and non-static members.  JShell answers this
351 | with "make everything static". 
352 | 
353 | Another approach would be to "make everything local" -- treat a simple program
354 | as being the "unwrapped" body of an implicit main method.  We already allow
355 | variables and classes to be declared local to a method.  We could add local
356 | methods (a useful feature in its own right) and relax some of the asymmetries
357 | around nesting (again, an attractive cleanup), and then treat a mix of
358 | declarations and statements without a class wrapper as the body of an invisible
359 | `main` method. This seems an attractive model as well -- at first.
360 | 
361 | While the syntactic overhead of converting back to full-blown classes -- wrap
362 | the whole thing in a `main` method and a `class` declaration -- is far less
363 | intrusive than the transformation inherent in `jshell`, this is still not an
364 | ideal on-ramp.  Local variables interact with local classes (and methods, when
365 | we have them) in a very different way than instance fields do with instance
366 | methods and inner classes: their scopes are different (no forward references),
367 | their initialization rules are different, and captured local variables must be
368 | effectively final.  This is a subtly different programming model that would then
369 | have to be unlearned when scaling up to full classes. Further, the result of
370 | this wrapping -- where everything is local to the main method -- is also not
371 | "idiomatic Java".  So while local methods may be an attractive feature, they are
372 | similarly not the on-ramp we are looking for.
373 | 
374 | 


--------------------------------------------------------------------------------
/site/design-notes/patterns/exhaustiveness.md:
--------------------------------------------------------------------------------
  1 | # Patterns: Exhaustiveness, Unconditionality, and Remainder
  2 | #### Brian Goetz and Gavin Bierman {.author}
  3 | #### 2023-05-23 {.date}
  4 | 
  5 | As the `switch` construct has been made steadily more expressive (first to
  6 | support [`switch` expressions](https://openjdk.org/jeps/361), and later to
  7 | support [patterns in `switch`](https://openjdk.org/jeps/441)), it has become
  8 | important to provide compile-time checking for whether a particular `switch` is
  9 | _exhaustive_ for its selector type.  All `switch` expressions, and any `switch`
 10 | statement that uses a pattern label, must be exhaustive, or a compilation error
 11 | will occur.  For example, given:
 12 | 
 13 | ```
 14 | enum Color { RED, YELLOW, GREEN }
 15 | 
 16 | int numLetters = switch (color) {  // Error - not exhaustive
 17 |     case RED -> 3;
 18 |     case GREEN -> 5;
 19 | }
 20 | ```
 21 | 
 22 | we would like to get a compile-time error that tells us that this switch is not
 23 | exhaustive, because the anticipated input `YELLOW` is not covered.  Which raises
 24 | the question: what does "exhaustive" mean?
 25 | 
 26 | A switch with a _match-all_ label (a `default` label, or a `case null, default`
 27 | label, or a `case` label with a type pattern that matches every value of the
 28 | selector expression) is clearly exhaustive; for every possible value of the
 29 | selector, one of the labels will definitely be selected - the match-all label!
 30 | It is tempting to try to define exhaustiveness for a `switch` without a
 31 | match-all label as meaning "if a match-all label were added, it would never be
 32 | selected" As it turns out, this definition is too strong, and even if we were to
 33 | adopt this definition, we probably wouldn't enjoy programming in the resulting
 34 | language.
 35 | 
 36 | ## Switching over enums
 37 | 
 38 | Many of the tensions in defining exhaustiveness are visible even in the simple
 39 | case of exhaustive switch expressions over enum types.  If we complete our
 40 | switch over colors by handling all the enum constants, is it exhaustive?
 41 | 
 42 | ```
 43 | int numLetters = switch (color) { // Exhaustive!
 44 |     case RED -> 3;
 45 |     case GREEN -> 5;
 46 |     case YELLOW -> 6;
 47 | }
 48 | ```
 49 | 
 50 | We would surely like for this `switch` to be exhaustive -- for multiple reasons.
 51 | It would definitely be cumbersome to have to write a match-all clause which
 52 | probably just throws an exception, since we have already handled all the cases:
 53 | 
 54 | ```
 55 | int numLetters = switch (color) {
 56 |     case RED -> 3;
 57 |     case GREEN -> 5;
 58 |     case YELLOW -> 6;
 59 |     default -> throw new ArghThisIsIrritatingException(color.toString());
 60 | }
 61 | ```
 62 | 
 63 | Manually writing a `default` clause in this situation is not only irritating but
 64 | actually pernicious, since the compiler can do a better job of checking
 65 | exhaustiveness without one. (The same is true of any other match-all clause such
 66 | as `case null, default`, or an unconditional type pattern.) If we omit the
 67 | `default` clause, then we will discover at compile time if we have forgotten a
 68 | `case` label, rather than finding out at run time — and maybe not even then.
 69 | 
 70 | More importantly, what happens if someone later adds another constant to the
 71 | `Color` enum?  If we have an explicit match-all clause then we will only
 72 | discover the new constant value if it shows up at run time.  But if we code the
 73 | `switch` to cover all the constants known at compile time and omit the match-all
 74 | clause, then we will find out about this change the next time we recompile the
 75 | class containing the `switch` -- and can then choose how to handle it. A
 76 | match-all clause risks sweeping exhaustiveness errors under the rug.
 77 | 
 78 | In conclusion: An exhaustive `switch` without a match-all clause is better than
 79 | an exhaustive `switch` with one, when possible.
 80 | 
 81 | Looking to run time, what happens if a new `Color` constant is added, and the
 82 | class containing the `switch` is not recompiled?  There is a risk that the new
 83 | constant will be exposed to our `switch`.  Because this risk is always present
 84 | with enums, if an exhaustive enum `switch` does not have a match-all clause,
 85 | then the compiler will actually synthesize a `default` clause that throws an
 86 | exception.  This guarantees that the `switch` cannot complete normally without
 87 | selecting one of the clauses.  (Given that the compiler will insert a synthetic
 88 | `default` clause, it is tempting to ask whether we should _outlaw_ an explicit
 89 | match-all clause in an otherwise exhaustive switch.  But, this would be taking
 90 | it too far, as the user may want to provide customized error handling code.)
 91 | 
 92 | <!-- This is not quite true. We outlaw it if we have an unconditional pattern. -->
 93 | 
 94 | ## Unconditionality, exhaustiveness, and remainder
 95 | 
 96 | Even the simple case of switching over enums illustrates that exhaustiveness is
 97 | more subtle than it first appears.  We want to say that having `case` labels for
 98 | `RED`, `YELLOW`, and `GREEN` means that the switch is exhaustive for `Color`,
 99 | but it isn't really, or at least not completely; there are possible run time
100 | values of `Color` that are not matched by any of these labels.  This isn't a bug
101 | in our definition of exhaustiveness; "strengthening" the compile-time checking
102 | (which, in this case, would mean requiring a match-all clause) would be both
103 | inconvenient for users and result in worse type checking (and therefore less
104 | reliable programs).  The reality is that the compile-time notion of "exhaustive
105 | enough" and true run time exhaustiveness are not the same thing. Similarly, we
106 | would not want to weaken the run time checking by omitting the synthetic
107 | `default`, as this could create surprising results.  So we need both concepts,
108 | which we call _unconditionality_ (for the strong run time version) and
109 | _exhaustiveness_ (for the compile-time version.)
110 | 
111 | A pattern is _unconditional_ for a candidate type if we can prove at compile
112 | time that it will always match _all_ possible run time values of that type.  (An
113 | unconditional pattern thus requires no run time checks.)  Unconditionality is a
114 | strong condition; the only patterns currently supported that are unconditional
115 | are inferred type patterns (`var x`), unnamed patterns (`_`, which is sugar for
116 | `var _`), and type patterns where the type of the match candidate is a subtype
117 | of the type named in the pattern (e.g. the type pattern `CharSequence cs` is
118 | unconditional for the type `CharSequence` and also for `String` but not for the
119 | type `Object`.)
120 | 
121 | Exhaustiveness for a type is a property not of a single pattern, but more
122 | properly of a _set of patterns_ (or `case` labels).  A set of enum `case` labels
123 | is exhaustive for the corresponding enum type if the set of `case` labels
124 | contains all of the enum constants of that type.  As we will see, there are
125 | other ways for a set of patterns to be exhaustive as well.
126 | 
127 | If a set of patterns is exhaustive for a type, we call the run time values that
128 | are not matched by any pattern in the set the _remainder_ of the set.  In our
129 | enum switch example, the remainder includes any novel enum constants, as well as
130 | `null`.  (That `null` is part of the remainder for our enum switch is less
131 | obvious, because the innate null-hostility of switches hides this in the simple
132 | cases, but this will become important when we get to nested patterns.)
133 | 
134 | ## Sealed types
135 | 
136 | Switching over a selector whose type names a `sealed` class is similar to
137 | switching over enums, just lifted from the term level to the type level.  If all
138 | of the permitted subtypes of the sealed type are handled by the cases, we would
139 | like to be able to omit a match-all clause just as with enum switches.
140 | 
141 | ```
142 | sealed interface Container<T> permits Box, Bag { }
143 | 
144 | switch (container) {
145 |     case Box<T> box: ...
146 |     case Bag<T> bag: ...
147 | }
148 | ```
149 | 
150 | For the same reasons as with enum switches, we would like for this switch to be
151 | considered exhaustive on `Container<T>`.  And, just as with enum switches, it is
152 | possible for a novel subtype of `Container` to show up at run time, so the
153 | compiler similarly inserts a synthetic `default` clause that throws.  A set of
154 | patterns is exhaustive on a sealed type if it is exhaustive on every permitted
155 | subtype, and for an exhaustive set of patterns on a sealed type, any novel
156 | subtypes are considered part of the remainder.  As with enum switches, for an
157 | exhaustive set of patterns on a sealed type, the remainder contains the `null`
158 | value.
159 | 
160 | ### Record (and deconstruction) patterns
161 | 
162 | Given a record:
163 | 
164 | ```
165 | record IntBox(int i) { }
166 | ```
167 | 
168 | should the following switch be considered exhaustive?
169 | 
170 | ```
171 | IntBox ib = ...
172 | switch (ib) {
173 |     case IntBox(int i): ...
174 | }
175 | ```
176 | 
177 | The pattern `IntBox(int i)` is not unconditional on `IntBox`, because it doesn't
178 | match `null` (which is a valid run time value of `IntBox`).  But the above switch
179 | certainly should be considered exhaustive; requiring a match-all clause, or even
180 | a `case null` label here
181 | would help no one.  Given a record class `R` with an `x` component, the record
182 | pattern `R(p)` matches a value if the value is `instanceof R` and (recursively)
183 | if the record value's `x` component value matches the pattern `p`. The pattern
184 | `R(p)` is considered exhaustive for `R` if `p` is exhaustive for the type of the
185 | `x` component of `R`.
186 | 
187 | The pattern `IntBox(int i)` cannot be considered to match `null`, even though
188 | `null` can be cast to `IntBox`.  This is because, in general, to extract a
189 | record value's component value, after casting the value to the record type, we
190 | invoke the component accessor method on the resulting reference; if the
191 | candidate is `null`, this invocation will fail. Just as with the earlier cases,
192 | the remainder for any record pattern always includes `null`.
193 | 
194 | 
195 | ### Nested patterns
196 | 
197 | Nested patterns are also a source of remainder.  Say we have:
198 | 
199 | ```
200 | record Box<T>(T t) { }
201 | ```
202 | 
203 | and a switch:
204 | 
205 | ```
206 | Box<Box<String>> bbs = ...
207 | switch (bbs) {
208 |     case Box(Box(String s)): ...
209 | }
210 | ```
211 | 
212 | First, is this switch exhaustive?  By the above definition, `Box(Box(String s))`
213 | is exhaustive for the type `Box<Box<String>>` if the nested pattern `Box(String
214 | s)` is exhaustive for `Box<String>`; by the same rule, this is exhaustive if
215 | `String s` is exhaustive for `String`, which it is, by virtue of being
216 | unconditional.  So the switch is exhaustive.
217 | 
218 | So, what is the remainder?  This is where it gets interesting, and why we've
219 | bothered to include `null` in the remainder for the previous examples even when
220 | it might appear to be irrelevant because of the `null` hostility of switch.  The
221 | remainder here clearly includes `null`, but it also includes a `Box` value with
222 | a `null` component value! More generally, for a record pattern `R(p)` where `r`
223 | is in the remainder of `p` for the component type of `R`, the remainder includes
224 | an `R` value with `r` as its component value.
225 | 
226 | ### Nested patterns and sealed types
227 | 
228 | Let's put together nested patterns with sealed types.
229 | 
230 | ```
231 | sealed interface Fruit permits Apple, Orange { }
232 | final class Apple  implements Fruit { }
233 | final class Orange implements Fruit { }
234 | 
235 | Box<Fruit> bf = ...
236 | switch (bf) {
237 |     case Box(Apple a): ...
238 |     case Box(Orange o): ...
239 | }
240 | ```
241 | 
242 | Is this switch exhaustive on `Box<Fruit>`?  Together, the patterns `Apple a` and
243 | `Orange o` are exhaustive on `Fruit`, with remainder containing `null` and any
244 | novel subtype of `Fruit`.  So `Box(Apple a)` and `Box(Orange o)` _are_
245 | exhaustive on `Box<Fruit>`.  The remainder includes a `Box` value with a `null`
246 | component value, and also `Box` values whose component value's type is a novel
247 | subtype of `Fruit`.  (The rules get more complicated when the record has more
248 | than one component.)
249 | 
250 | ## It's complicated, but at its heart it's simple
251 | 
252 | This may all seem complicated but, at its heart, it derives from some
253 | straightforward rules about how patterns are exhaustive for a type:
254 | 
255 | -   A set of patterns containing a type pattern `T t` is exhaustive for `T` and
256 |     any of its subtypes;
257 | -   A set of patterns is exhaustive for a `sealed` type if it is exhaustive for
258 |     its permitted direct subtypes;
259 | -   A set of patterns containing a record pattern `R(P)` is exhaustive for `R`
260 |     if `P` is exhaustive for the component type of `R`; and
261 | -   A set of patterns containing the record patterns `R(P0) .. R(Pn)` is
262 |     exhaustive for `R` if the set of patterns `P0..Pn` is exhaustive for `R`'s
263 |     component type.
264 | 
265 | The remainder of an exhaustive set of patterns is the set of values that do not
266 | match any pattern in the set. These are the 'weird' values that might
267 | appear at run time, but for which it would be unreasonable or even
268 | counter-productive to require that they be explicitly handled every time in
269 | every `switch`.


--------------------------------------------------------------------------------
/site/design-notes/patterns/extending-switch-for-patterns.md:
--------------------------------------------------------------------------------
  1 | # Extending `switch` for Pattern Matching
  2 | 
  3 | #### Gavin Bierman and Brian Goetz {.author}
  4 | #### April 2017 {.date}
  5 | 
  6 | This document explores a possible direction for enhancements to
  7 | `switch` in the Java language, motivated by the desire to
  8 | support [_pattern matching_][pattern-match].  _This is an exploratory
  9 | document only and does not constitute a plan for any specific feature
 10 | in any specific version of the Java Language._
 11 | 
 12 | ### Pattern matching documents
 13 | 
 14 | - [Pattern Matching For Java](pattern-matching-for-java) --- Overview of
 15 |   pattern matching concepts, and how they might be surfaced in Java.
 16 | 
 17 | - [Pattern Matching For Java -- Semantics](pattern-match-semantics) --- More
 18 |   detailed notes on type checking, matching, and scoping of patterns and binding
 19 |   variables.
 20 | 
 21 | - [Extending Switch for Patterns](extending-switch-for-patterns) (this
 22 |   document) ---  An early exploration of the issues surrounding extending pattern
 23 |   matching to the `switch` statement.
 24 | 
 25 | - [Type Patterns in Switch](type-patterns-in-switch) --- A more up-to-date
 26 |   treatment of extending pattern matching to `switch` statements, including
 27 |   treatment of nullity and totality.
 28 | 
 29 | - [Pattern Matching in the Java Object model](pattern-match-object-model)
 30 |   --- Explores how patterns fit into the Java object model, how they fill a hole we
 31 |   may not have realized existed, and how they might affect API design going
 32 |   forward.
 33 | 
 34 | ## Background
 35 | 
 36 | Java inherited its `switch` construct nearly wholesale from C.  It was
 37 | designed as a limited mechanism for limited situations; one can only
 38 | switch on a small set of types, and one can only have case labels that
 39 | exactly match literal constants.  While its range was extended several
 40 | times (switching on enums in Java 5, and strings in Java 7), the basic
 41 | facility is largely unchanged from C.
 42 | 
 43 | As we consider extending `switch` to support a wider variety of types,
 44 | and `case` labels to support patterns, it raises some new questions,
 45 | such as:
 46 | 
 47 |  - What is the scope of binding variables introduced in pattern `case`
 48 |    labels?
 49 | 
 50 |  - Does fallthrough need to be restricted to make sense with pattern
 51 |    `case` labels?
 52 | 
 53 |  - Can `switch` be smoothly extended to an expression, and if so, what
 54 |    changes need to be made?
 55 | 
 56 |  - Do we need additional control flow constructs, like `break` or
 57 |    `continue`?
 58 | 
 59 |  - Do we need "guard" conditions on patterns?
 60 | 
 61 |  - Under what conditions might a `switch` expression without a
 62 |    `default` clause be considered exhaustive?
 63 | 
 64 | It should be noted that there is a duality between `switch` statements
 65 | and a chain of `if-else` statements.  We can use this duality as a
 66 | lens through which to evaluate the regularity of extensions to
 67 | `switch`.
 68 | 
 69 | ## Scoping
 70 | 
 71 | A `switch` statement today is one big scope; the "arms" of a `switch`
 72 | do not constitute individual scopes, unless scoping constructs (such
 73 | as introducing a new block) are explicitly used by the author.
 74 | 
 75 | The situation of having variable declarations arise from expressions
 76 | is new, so it is a reasonable question to ask "What is the scope of of
 77 | a binding variable of a pattern match?"  There's also an obvious
 78 | answer -- the scope of the statement that encloses the pattern match;
 79 | we can just hoist variables into the scope which includes the
 80 | statement which includes the match expression:
 81 | 
 82 |     if (x matches String s) { ... }
 83 | 
 84 | becomes
 85 | 
 86 |     String s;
 87 |     if (x matches String s) { ... }
 88 | 
 89 | However, this seems like one of those "obvious but wrong" answers;
 90 | there are going to be places in that scope where the variable is still
 91 | not usable (because it is not definitely assigned), and it is likely
 92 | that users will want to reuse the same variable name for multiple
 93 | bindings in the same scope:
 94 | 
 95 |     if (x matches Integer n) { ... }
 96 |     else if (x matches Float n) { ... }
 97 |     else if (x matches Double n) { ... }
 98 | 
 99 | (or the equivalent in a `switch` statement.)  Having to come up with a
100 | unique name for each binding variable, just because the variable has
101 | been hoisted into a broader scope, will be unpopular (and as it turns
102 | out, unnecessary.)
103 | 
104 | ### Natural scoping for binding variables
105 | 
106 | The following example illustrates the that "natural" scope of a
107 | binding variables is complex and not necessarily contiguous:
108 | 
109 |     if (x matches Foo(var y)) { .. y .. }                 // OK
110 |     if (x matches Foo(var y)) { ... } else { .. y .. }    // not OK
111 |     if (x matches Foo(var y) && .. y ..) { ... }          // OK
112 |     if (x matches Foo(var y) || .. y ..) { ... }          // not OK
113 |     if (!(x matches Foo(var y)) && .. y .. ) { ... }      // not OK
114 |     if (!(x matches Foo(var y)) || .. y .. ) { ... }      // OK
115 |     if (!(x matches Foo(var y))) { ... } else { .. y .. } // OK
116 |     if (!(x matches Foo(var y))) { y } else { ... }       // not OK
117 | 
118 | The above cases are derived from a standard application of _definite
119 | assignment_ rules; we'd like for a binding variable to be in scope
120 | wherever it is definitely assigned, to not be in scope wherever it is
121 | not definitely assigned, and for a binding variable to always be
122 | definitely unassigned at the point of its declaration.
123 | 
124 | We can construct a set of rules for the natural scope of these
125 | variables.  To start with, we say that each expression _e_ gives rise
126 | to two sets of binding variables, `e.T` and `e.F`, along with rules
127 | for when one or the other of these sets are included in the scope of a
128 | statement or expression, over all the expression forms.  If not
129 | otherwise defined, `e.T` = `e.F` = `{}` -- most expressions (including
130 | all current expression forms) make available no new bindings.  We also
131 | define a set of binding variables to additionally be in scope for
132 | certain expressions or statements via the "include in" clauses below.
133 | 
134 | - If _e_ is `x matches P`:
135 | 
136 |       e.T = { binding variables from P }
137 |       e.F = { }
138 | 
139 | - If e is `x && y`:
140 | 
141 |       e.T = union(x.T, y.T)
142 |       e.F = intersection(x.F, y.F)
143 |       include x.T in y
144 | 
145 | - If e is `x || y`:
146 | 
147 |       e.T = intersection(x.T, y.T)
148 |       e.F = union(x.F, y.F)
149 |       include x.F in y
150 | 
151 | - If e is `x ? y : z`:
152 | 
153 |       e.T = union(intersect(y.T, z.T),
154 |                   intersect(x.T, z.T),
155 |                   intersect(x.F, y.T))
156 |       e.F = union(intersect(y.F, z.F),
157 |                   intersect(x.T, z.F),
158 |                   intersect(x.F, y.F))
159 |       include x.T in y
160 |       include x.F in z
161 | 
162 | - If e is `(x)`:
163 | 
164 |       e.T = x.T
165 |       e.F = x.F
166 | 
167 | - If e is `!x`:
168 | 
169 |       e.T = x.F
170 |       e.F = x.T
171 | 
172 | We can do the same for statement forms:
173 | 
174 | - For `if (x) y else z`:
175 | 
176 |       include x.T in y
177 |       include x.F in z
178 | 
179 | - For `if (x) return/throw; z`
180 | 
181 |       include x.T in return/throw
182 |       include x.F in z
183 | 
184 | - For `while (x) y`:
185 | 
186 |       include x.T in y
187 | 
188 | - For `for (a; b; c) d`:
189 | 
190 |       include b.T in c
191 |       include b.T in d
192 | 
193 | - For `switch (x) { ... case P: y; case Q: ... }`
194 | 
195 |       include binding variables from P in y
196 | 
197 | Further, union and intersection should be limited to avoid conflicts.
198 | The `union` function should be a disjoint union: it is an error if any
199 | binding varible is present in both sets -- otherwise, expressions like
200 | `x matches Foo(var x) && y matches Bar(var x)` would include two
201 | different variables called `x` in the same scope.  Similarly, for
202 | `intersect`, it is an error if the same binding variable is present in
203 | both sets but with different types.
204 | 
205 | ## Fallthrough and OR patterns
206 | 
207 | While one could make an argument that fallthrough in `switch` was the
208 | wrong default, the problem fallthrough aims to solve -- treating
209 | multiple items similarly without duplicating the code -- were real,
210 | and are still relevant when our `case` labels get richer.
211 | 
212 | Patterns that generate binding variables require some refinements to the
213 | above scoping rules.  For example:
214 | 
215 |     case Foo(int x):
216 |     case Bar(float x):
217 |         s;
218 | 
219 | would be an error, just as `x matches Foo(int x) || x.matches
220 | Bar(float x)` would be.
221 | 
222 | However, there's no reason why we can't make this work, with `x` in
223 | scope in `s`:
224 | 
225 |     case Foo(int x):
226 |     case Bar(int x):
227 |         s;
228 | 
229 | This is analogous to the disjunction `y matches Foo(int x) || y
230 | matches Bar(int x)`.
231 | 
232 | Similarly, in:
233 | 
234 |     case Foo(int x, int y):
235 |     case Bar(int x):
236 |         s;
237 | 
238 | the binding variable `y` would not be available in `s`, because we
239 | can't rely on it having a value on all control paths, but `x` can
240 | still be available in `s`.  These restrictions are a straightforward
241 | refinement of the scoping rules presented earlier.
242 | 
243 | A more limited form of fallthrough is OR patterns:
244 | 
245 |     case P1 || P2:
246 |         s;
247 | 
248 | Which is equivalent to:
249 | 
250 |     case P1:
251 |     case P2:
252 |         s;
253 | 
254 | We might consider prohibiting fallthrough but allowing OR patterns (in
255 | which case we'd probably require that all OR patterns declare exactly
256 | the same set of binding variables.)
257 | 
258 | ### Guards, compound patterns, and continue
259 | 
260 | Nested patterns, such as:
261 | 
262 |     case Point(0, 0):
263 | 
264 | express _compound conditions_; we're testing that the target is a
265 | `Point`, and that both its `x` and `y` components match the constant
266 | pattern `0`.  While nested patterns are powerful, they have their
267 | limits; we can't easily test for whether a point is, say, on the
268 | diagonal.  We could express this with a _guard_:
269 | 
270 |     case Point(var x, var y) && x == y:
271 | 
272 | Alternately, we could express compound conditions by pushing the
273 | subordinate test into the body, and permitting the `continue` control
274 | flow construct in switches, which would indicate we want to break out
275 | of the existing `case` arm, and resume matching at the next `case`
276 | label:
277 | 
278 |     case Point(var x, var y):
279 |         if (x != y)
280 |             continue;
281 | 
282 | (Note that nested patterns desugar to guards, and guards desugar to
283 | `continue`, so we are likely to have to implement all these mechanisms
284 | internally anyway.)
285 | 
286 | ### Dead code
287 | 
288 | In some cases, the compiler may be able to prove that a case is
289 | unreachable, such as:
290 | 
291 |     switch (x) {
292 |         case Comparable c: ... break;
293 |         case Integer i: // can't reach this
294 |     }
295 | 
296 | In these cases, the compiler will issue an error (just as with
297 | unreachable `catch` clauses.)
298 | 
299 | ## Switch expressions
300 | 
301 | The other major direction in which we would like to extend `switch` is
302 | to given it an expression form:
303 | 
304 |     float overtimeFactor = switch (day) {
305 |         case SATURDAY -> 1.5;
306 |         case SUNDAY -> 2;
307 |         default -> 1;
308 |     }
309 | 
310 | While statement switches need not be exhaustive (just as `if`
311 | statements need not have an `else`), expression switches must be (as
312 | the expression must evaluate to something.)  Exhaustiveness can always
313 | be provided via a `default` arm, but sometimes we may want to do
314 | better.  The compiler can use class hierarchy information, as well as
315 | sealing information, to prove exhaustiveness.  (Since the type
316 | hiearchy can change between compile and run time, the compiler will
317 | still want to insert a catch-all throwing `default` even if it deems
318 | the analysis exhaustive.)
319 | 
320 | Unrestricted fallthrough makes less sense in an expression `switch`,
321 | but OR patterns still do:
322 | 
323 |     int days = switch (month) {
324 |         case JANUARY
325 |              || MARCH
326 |              || MAY
327 |              || JULY
328 |              || AUGUST
329 |              || OCTOBER
330 |              || DECEMBER -> 31;
331 |          case FEBRUARY -> 28;
332 |          case APRIL
333 |              || JUNE
334 |              || SEPTEMBER
335 |              || NOVEMBER -> 30;
336 |     };
337 | 
338 | A switch expression is a _poly expression_, and pushes its target type
339 | down into the switch arms (just as we do with conditional
340 | expressions.)
341 | 
342 | ### Mixing statements and expressions
343 | 
344 | While the common case with a switch expression is that the RHS of a
345 | case label is a single expression, occasionally the result may not be
346 | constructible in this way (or construction of the result might require
347 | side-effects, such as debugging output).  Other languages usually
348 | handle this with _block expressions_; we can construct a limited form
349 | of block expression for use in expression `switch` by coopting the
350 | `break` keyword, as in these examples:
351 | 
352 |     case String s -> {
353 |         System.out.println("It's a string!");
354 |         break s.toUpperCase();
355 |     }
356 | 
357 |     case Flooble f -> {
358 |         FloobleDescriptor fd = new FloobleDescriptor();
359 |         fd.setFlooble(f);
360 |         break fd;
361 |     }
362 | 
363 | There is some potential ambiguity between label-break and result-break
364 | here, but working these out is practical.
365 | 
366 | ### Throw expressions
367 | 
368 | It is not uncommon that one or more arms of a switch expression will
369 | result in a transfer-of-control operation, such as:
370 | 
371 |     int size = switch (x) {
372 |         case Collection c -> c.size();
373 |         case String s -> s.length();
374 |         default -> throw new IllegalArgumentException(...);
375 |     }
376 | 
377 | Even though `throw` is a statement, not an expression, the intent here
378 | is clear, so we want to allow `throw` (and possibly other
379 | transfer-of-control operations) in this context.
380 | 
381 | ### Targetless switch
382 | 
383 | In the theme of elevating `switch` as the generalization of the
384 | ternary conditional operator, we may also wish to allow a simplified
385 | form of `switch` where there is no switch target, and all case labels
386 | are boolean expressions:
387 | 
388 |     String fizzbuzz(int n) {
389 |         boolean byThree = n % 3 == 0;
390 |         boolean byFive = n % 5 == 0;
391 |         return switch {
392 |             case byThree && byFive -> "fizzbuzz";
393 |             case byThree -> "fizz";
394 |             case byFive -> "buzz";
395 |             default -> Integer.toString(n);
396 |         }
397 |     }
398 | 
399 | 
400 | 
401 | [pattern-match]: pattern-matching-for-java
402 | 


--------------------------------------------------------------------------------
/site/design-notes/patterns/pattern-match-translation.md:
--------------------------------------------------------------------------------
  1 | # Pattern Matching for Java --- Runtime and Translation
  2 | 
  3 | #### Brian Goetz and John Rose {.author}
  4 | #### June 2017 {.date}
  5 | 
  6 | This document explores compiler translation strategies and runtime
  7 | support for supporting [_pattern matching_][pattern-match] in the Java
  8 | Language.  This is an exploratory document only and does not
  9 | constitute a plan for any specific feature in any specific version of
 10 | the Java Language.  This document also may reference other features
 11 | under exploration; this is purely for illustrative purposes, and does
 12 | not constitute any sort of plan or committment to deliver any of these
 13 | features.
 14 | 
 15 | ## Background
 16 | 
 17 | We've proposed several kinds of [patterns][pattern-match], such as
 18 | deconstructor patterns, constant patterns, and type test patterns, and
 19 | several linguistic contexts in which pattern matching might be
 20 | supported (`match` predicate, `switch` statement).  An obvious
 21 | question is: what bytecode should the compiler generate for a pattern
 22 | match, or for the implementation of a pattern?  (There is also the
 23 | question of how one might declare a pattern in source code; this is a
 24 | topic for a separate document.)
 25 | 
 26 | ### What is a pattern?
 27 | 
 28 | A _pattern_ is a combination of a _predicate_ that can be applied to a
 29 | target, and a set of _binding variables_ that are produced if that
 30 | predicate applies.  We can model a pattern as a typed tuple _\<T,B\*\>(z,
 31 | b\*)_, where _T_ is the _target type_ of the pattern, _B\*_ are the
 32 | types of the binding variables, _z_ is a function _T->bool_
 33 | representing the predicate, and _b\*_ is a vector of partial functions
 34 | _T->Bi_ that produce the binding variables.
 35 | 
 36 | Some patterns are _total_; they match any target, and so their
 37 | predicate always returns `true`.  If a pattern is known to be total at
 38 | compile time, the compiler can use this knowledge to aid in
 39 | exhaustiveness analysis.
 40 | 
 41 | We've chosen to model patterns as nominal executable class members
 42 | (like methods or constructors).  A pattern is the dual of a method or
 43 | constructor; where methods and constructors take _N_ arguments and
 44 | produce one result, a pattern takes one argument and produces _N_
 45 | results.
 46 | 
 47 | ### Encoding patterns as methods
 48 | 
 49 | It is easy -- though neither performant nor consequence-free -- to
 50 | model patterns as ordinary methods.  Scala models patterns using
 51 | static `unapply` methods, which take a single argument and produces
 52 | either `Boolean` or an `Option` wrapper for one or more values to
 53 | communicate both the success/failure and the resulting binding
 54 | variables in a single invocation.  From a user and compiler
 55 | perspective, this is fine; the pattern
 56 | 
 57 |     def unapply(p : Point) : Option[(int, int)] = Some(p.x, p.y)
 58 | 
 59 | is clear enough (successfully deconstructing a `Point` results in an
 60 | `(int,int)` pair), and the compiler can readily translate pattern
 61 | matches into `unapply` calls.  However, from a cost perspective, this
 62 | is pretty bad; the `Option` is a heap-based box, the `Tuple` is a
 63 | heap-based box, and, in the absence of specialization, each `int` is
 64 | boxed into an `Integer`, for a total of four heap nodes per match.
 65 | 
 66 | This heap-based approach is the obvious one given a VM that lacks the
 67 | mechanisms that would make stack-based approaches possible, such as
 68 | multiple return (leaving multiple values on the stack), out parameters
 69 | (an alternate encoding of multiple return), uplevel references to
 70 | locals (providing down-stack frames the ability to access up-stack
 71 | locals), or value types (unboxed aggregates.)
 72 | 
 73 | However, with method handles, `invokedynamic` (indy), and soon,
 74 | `constantdynamic` (condy), we can encode a pattern so that most
 75 | pattern match operations can proceed without boxing, in a manner
 76 | highly optimizable by the JIT, and, as a bonus, in a way that can be
 77 | used across JVM languages.
 78 | 
 79 | ### Further performance considerations
 80 | 
 81 | The most common case of a pattern is that there is no significant
 82 | shared computation between testing to see if the pattern applies to
 83 | the target and extracting the various components.  For example, if we
 84 | are destructuring a `Point`, our test is an `instanceof` test, and
 85 | component extraction is field access.
 86 | 
 87 | The uncommon case is that there is either significant shared
 88 | computation, or there are atomicity requirements that say that the
 89 | components should be extracted in a single atomic operation.  In both
 90 | cases, it is desirable (in the latter case, necessary) to have an
 91 | intermediate carrier to hold the match state.  (In the common case, we
 92 | can think of the target as acting as its own carrier.)
 93 | 
 94 | We want to identify an encoding for matchers such that the common
 95 | cases are fast and allocation free, but that it is possible to use an
 96 | intermediate result carrier where that is required by semantics (i.e.,
 97 | `synchronized` patterns) or desired for efficiency reasons.  Further,
 98 | it should be a binary compatible change to switch from one mode to the
 99 | other -- client code shouldn't have to distinguish the two at runtime.
100 | And, for matchers that use intermediate result carriers, it should be
101 | a binary compatible change to migrate to using a value type as a
102 | carrier in the future.
103 | 
104 | ## Basic strategy
105 | 
106 | We'll start with the runtime representation and work our way up to
107 | classfile representation (and later, source file representation).  Our
108 | runtime strategy represents a pattern as a constant bundle of method
109 | handles.  Let's cover the simple (common) case and then we'll add in
110 | machinery for the general case.
111 | 
112 | ```{.java}
113 | interface __Pattern {
114 |     int numComponents();
115 |     MethodHandle predicate();      // T -> bool
116 |     MethodHandle component(int i); // T -> Bi
117 | }
118 | ```
119 | 
120 | To emit pattern-matching code, a compiler has to acquire a reference
121 | to the pattern object (likely via indy/condy), ask the pattern for its
122 | predicate method handle, invoke the predicate handle on the target,
123 | and, if the predicate succeeds, ask the pattern for the component
124 | method handles and invoke them on the target.  (We can further use
125 | indy/condy to cache the individual method handles, moving the "ask the
126 | pattern for its handles" code to link time.)  If the pattern is
127 | statically known to be total, then invoking the `predicate` can be
128 | omitted.
129 | 
130 | Illustrating deconstructing a `Point` into its `x` and `y` components
131 | using Java code (though in reality, this would only be called by
132 | compiler-generated code, or via reflection):
133 | 
134 | ```{.java}
135 | __Pattern p = ...;                       // constant
136 | MethodHandle predicate = p.predicate();  // constant
137 | MethodHandle pointX = p.component(0);    // constant
138 | MethodHandle pointY = p.component(1);    // constant
139 | if ((boolean) predicate.invoke(target)) {
140 |     int x = (int) pointX.invoke(target);
141 |     int y = (int) pointY.invoke(target);
142 |     ... use x and y ...
143 | }
144 | ```
145 | 
146 | ### Intermediate carriers
147 | 
148 | As mentioned, there is a minority of cases where an intermediate
149 | carrier is needed to hold the results of preprocessing the target
150 | (which might just precompute the components and store them in a holder
151 | aggregate, or might just compute precursors for computing the
152 | bindings).  We can model a pattern with such a carrier as a tuple
153 | _\<T,C,B\*\>(p,z,b\*)_, where _C_ is the carrier type, _p_ is a
154 | preprocessing function from `T->C`, and the predicate _z_ and binding
155 | functions _b\*_ are extended to take both the target and the carrier
156 | as arguments (and in practice, will likely ignore one or the other of
157 | them.)  So the predicate _p_ is a function `(T,C)->bool`, and similar
158 | for the binding functions.
159 | 
160 | ```{.java}
161 | interface __Pattern {
162 |     int numComponents();
163 |     MethodHandle preprocess();     // T -> C
164 |     MethodHandle predicate();      // (T,C) -> bool
165 |     MethodHandle component(int i); // (T,C) -> Bi
166 | }
167 | ```
168 | 
169 | Patterns that cannot tolerate concurrent interference can extract the
170 | components into a carrier with the appropriate lock held, as can
171 | patterns requiring complex imperative logic -- without the client
172 | having to treat this case separately.  This protocol is designed to
173 | prevent the uncommon carrier-ful case(s) from polluting the common
174 | carrier-free case with heap allocation (and when needed, we can
175 | eventually use value types rather than reference types for carriers.)
176 | 
177 | The choice of carrier type is ideally an implementation choice by the
178 | class declaring the pattern, but must be subject to some migration
179 | compatibility constraints, since existing client code will embed the
180 | carrier type in call sites.
181 | 
182 | ### Combinators
183 | 
184 | This strategy also allows us to move much of the work of implementing
185 | pattern matching into the runtime (without sacrificing efficiency),
186 | rather than burdening the compiler.  Because patterns are constant
187 | bundles of functions, we can compose them in interesting ways.  For
188 | example, suppose we have patterns _P\<T,C1,A\*\>(pp,pz,pb\*)_ and
189 | _Q\<T,C2,B\*\>(qp,qz,qb\*)_.  We can create a combinator for the pattern
190 | _R_ representing _P && Q_ as follows:
191 | 
192 |  - The target types of `P` and `Q` must be the same, and the target
193 |    type of `R` is the same as `P` and `Q`;
194 | 
195 |  - The carrier type of `R` is nominally the tuple `(C1,C2)`, though in
196 |    practice this can be optimized away if either `P` or `Q` (or
197 |    ideally, both) is carrier-free;
198 | 
199 |  - The `preprocess` of `R` applies the `preprocess` of both `P` and
200 |    `Q`, and constructs a carrier that holds both results;
201 | 
202 |  - The `predicate` of `R` the logical `AND` of the predicates of `P`
203 |    and `Q`;
204 | 
205 |  - The components of `R` is the concatenation of the components of `P`
206 |    and `Q`.
207 | 
208 | Such combinators act only on the (constant) pattern objects, and are
209 | themselves constant pattern objects, so are suitable to construct at
210 | link time with indy/condy.  As a result, compilers can destructure
211 | complex patterns (AND, OR, nested, guarded) into the same pattern
212 | protocol, allowing uniform code generation, and move the complexity of
213 | complex pattern generation to link time via indy/condy.
214 | 
215 | ## Classfile encoding
216 | 
217 | Logically, patterns are members of classes, like methods and
218 | constructors, though we cannot use the exact encoding of these
219 | artifacts for reasons outlined earlier.
220 | 
221 | Just as executable members include constructors, static methods, and
222 | instance methods, each of these cases is potentially sensible for
223 | patterns as well.  We've already enumerated several types of useful
224 | patterns; match-everything patterns, type-test patterns,
225 | deconstruction patterns, etc.  Deconstruction patterns (e.g.,
226 | `Point(var x, var y)`) are analogous to constructors -- in fact, for
227 | well-behaved objects, they are the dual of constructors.  And static
228 | patterns are the dual of static factories.
229 | 
230 | Like methods, patterns have names, and it is reasonable to want to
231 | overload multiple patterns with the same name but different
232 | signatures.  For example, suppose we have overloaded constructors:
233 | 
234 |     File(String s) { ... }
235 |     File(Path p) { ... }
236 |     File(URI u) { ... }
237 | 
238 | We have these constructors for client convenience -- whatever the
239 | client has, we can make a `File` out of that.  So similarly, we want
240 | deconstructing a `File` to be equally convenient, and expose
241 | overloaded patterns:
242 | 
243 |     case File(String name): ...
244 |     -- or --
245 |     case File(Path p): ...
246 |     -- or --
247 |     case File(URI u): ...
248 | 
249 | ### Method naming
250 | 
251 | Even if we could easily represent patterns as methods in a class file,
252 | we can easily fall afoul of overloading constraints.  If we took the
253 | Scala approach and mapped these all to methods returning `Tuple` or
254 | `Option[Tuple]`, erasure would prevent us from overloading in this
255 | way.
256 | 
257 | But, since our runtime representation is a constant bundle of method
258 | handles, we have a different option -- generate methods not to do the
259 | matching and extraction, but instead generate methods to return a
260 | `__Pattern` (which can be invoked at link time via indy/condy).  And
261 | because matchers are not called directly, but instead through pattern
262 | matching (or reflection), it doesn't really matter what we call these
263 | methods, so long as we preserve some reasonable compatibility
264 | requirements.
265 | 
266 | It's worth noting that we can use the `MethodType` type to describe a
267 | pattern -- in reverse.  Where a method has multiple inputs and one
268 | output, a pattern has one input and multiple outputs.  So we can use
269 | `MethodType` to construct a descriptor for a pattern, just inverting
270 | the input/output sense.  So the `String`-consuming `File` constructor
271 | and the `String`-producing `File` pattern could both be described the
272 | method type `(LString;)LFile;`.  (This duality is not accidental; the
273 | two operations are inverses of each other.)  So let's call `D` the
274 | descriptor for a pattern, and `N` the name for the pattern
275 | (deconstruction patterns are named for the class, just like
276 | constructors.)
277 | 
278 | If we pick an encoding scheme that can stably encode a descriptor and
279 | is resistent to collisions between overload-equivalent strings (such
280 | as the [symbolic freedom encoding][sym-free]), we can construct an
281 | identifier `DD=Enc(N,D)` and generate static factory methods:
282 | 
283 | ```{.java}
284 | static __Pattern DD() { ... }
285 | ```
286 | 
287 | Just as with methods, we need to encode some additional information,
288 | such as generic type signature, which we can do with an attribute,
289 | such as:
290 | 
291 | ```
292 | Deconstructor_attribute {
293 |     u2 name_index;
294 |     u4 length;
295 |     u1 is_total;
296 |     u2 carrier_type;        // UTF8 type descriptor
297 |     u2 generic_signature;   // S (UTF8 signature)
298 | }
299 | ```
300 | 
301 | ### Additional optimizations
302 | 
303 | It may be desirable for a `__Pattern` to convey, at run time, that it
304 | is carrier-free; this enables combinators to optimize away boxes for
305 | tuples of carriers.  For patterns that use value type carriers, it may
306 | also be useful for the `__Pattern` to be willing to dispense a
307 | sentinel value for the chosen carrier, which can also be used to
308 | optimize combinators.
309 | 
310 | This design is extremely `condy`- and JIT-friendly; the compiler can
311 | generate descriptions of constants to describe exactly the patterns,
312 | or sub-parts of patterns, that are needed, and rely on constant pool
313 | caching to provide fast lazy initialization (with all initialization
314 | costs paid at link time.)  The JIT will recognize that all method
315 | handles used for pattern matching are grounded in chains of constants,
316 | and so will routinely inline away all the intermediate data-shuffling
317 | code and carrier management code.
318 | 
319 | ### Data classes
320 | 
321 | Even in the absence of a language syntax for declaring member
322 | patterns, the _data classes_ feature currently under consideration
323 | lends itself cleanly to automatically exposing a pattern which matches
324 | the class signature (and the constructor signature).
325 | 
326 | ### Reflection
327 | 
328 | As patterns are class members, we'll need reflective support for
329 | discovering and invoking patterns.  This is a straightforward
330 | extension of existing reflective support for `Constructor` and
331 | `Method` members, which are wrappers around the `__Pattern` runtime
332 | abstraction.
333 | 
334 | ## Migration compatibility
335 | 
336 | Because compile-time information is used to condition code generation,
337 | we need to be clear about what can change, and can't, in a
338 | binary-compatible way.
339 | 
340 | Compilers can use pattern total-ness to make exhaustiveness decisions,
341 | which is extremely useful.  To be able to rely on this, total-ness
342 | should be an intrinsic property of the pattern that does not change
343 | across maintenance, and changing a total pattern to partial should not
344 | be a binary- or source-compatible change.
345 | 
346 | However, we envision patterns changing their carrier types, either
347 | changing from a box object to a value type when practical, or going
348 | from a carrier-free to a carrier-ful implementation through ordinary
349 | code evolution.  Therefore, existing call sites that embed the carrier
350 | type must continue to link, which means that if we are changing a
351 | carrier from `C` to `D`, then `C` and `D` should be adaptable to each
352 | other via `MethodHandles.asType()`.  Migrating carriers from a
353 | reference type `LFoo` to a value type `QFoo` would be supported by
354 | `asType()`.  (This suggests that carrier-free implementations should
355 | use `LObject`, allowing future implementations to migrate to a subtype
356 | of `LObject`, or to a value type.)
357 | 
358 | ## Strawman API
359 | 
360 | We've built a prototype of this approach.  It has factories for
361 | constant patterns, type test patterns, any patterns, and
362 | deconstruction patterns, and combinators for dropping bindings,
363 | adapting the target to a supertype of the target, ANDing patterns
364 | together, and nesting patterns.
365 | 
366 | ```{.java}
367 | public interface __Pattern<T> {
368 |     /**
369 |      * A method handle used to preprocess a target into an intermediate carrier.
370 |      * The method handle accepts a match target and returns the intermediate
371 |      * carrier.
372 |      *
373 |      * If the carrierFree() method returns true, then this method need not be
374 |      * called, and null can be used for the carrier in other method handle
375 |      * invocations.
376 |      */
377 |     MethodHandle preprocess();
378 | 
379 |     /**
380 |      * A method handle used to determine if the match succeeds.  It accepts
381 |      * the match target and the intermediate carrier returned by preprocess(),
382 |      * and returns a boolean indicating whether the match was successful.
383 |      *
384 |      * If the pattern is declared to always match, then this method need not be
385 |      * called.
386 |      */
387 |     MethodHandle predicate();
388 | 
389 |     /**
390 |      * A method handle to return the i'th component of a successful match.  It
391 |      * accepts the match target and the intermediate carrier returned by
392 |      * preprocess(), and returns the component.
393 |      */
394 |     MethodHandle component(int i);
395 | 
396 |     /**
397 |      * Indicates that this pattern does not make use of an intermediate carrier,
398 |      * and that the tryExact() method handle is a no-op.
399 |      * Combinators exploit carrier freedom to reduce unnecessary allocation.
400 |      */
401 |     boolean isCarrierFree();
402 | 
403 |     /**
404 |      * Returns the pattern descriptor, which is a MethodType whose return type
405 |      * is the match target, and whose parameter types are the components of the
406 |      * match.
407 |      */
408 |     MethodType descriptor();
409 | 
410 |     /**
411 |      * Returns the match target type
412 |      */
413 |     default Class<?> targetType() {
414 |         return descriptor().returnType();
415 |     }
416 | 
417 |     /**
418 |      * Return the intermediate carrier type
419 |      */
420 |     default Class<?> carrierType() {
421 |         return preprocess().type().returnType();
422 |     }
423 | 
424 |     // -- Combinators --
425 | 
426 |     /**
427 |      * Return a __Pattern that is identical to this one, but with fewer
428 |      * binding components
429 |      * @param positions indices of the binding components to drop
430 |      */
431 |     default __Pattern<T> dropBindings(int... positions) {
432 |         return __Patterns.dropBindings(this, positions);
433 |     }
434 | 
435 |     // -- Factories --
436 | 
437 |     /**
438 |      * Return a pattern handle that matches a constant
439 |      */
440 |     static <T> __Pattern<T> ofConstant(Class<T> targetType, T constant) {
441 |         return __Patterns.ofConstant(targetType, constant);
442 |     }
443 | 
444 |     /**
445 |      * Return a pattern handle that matches null
446 |      */
447 |     static <T> __Pattern<Object> ofNull() {
448 |         return __Patterns.ofNull();
449 |     }
450 | 
451 |     /**
452 |      * Return a pattern handle that matches a non-null reference
453 |      */
454 |     static<T> __Pattern<T> ofNonNull(Class<T> targetType) {
455 |         return __Patterns.ofNonNull(targetType);
456 |     }
457 | 
458 |     /**
459 |      * Return a pattern handle that matches targets of the type testType, and
460 |      * produces its target as a binding component.  If
461 |      * testType == targetType, this pattern will always succeed.
462 |      */
463 |     static <T, U extends T> __Pattern<T> ofType(Class<T> targetType, Class<U> testType) {
464 |         return __Patterns.ofType(targetType, testType);
465 |     }
466 | 
467 |     /**
468 |      * Return a pattern handle for a given target type that always succeeds, and
469 |      * produces its target as a binding component.
470 |      *
471 |      * To accept a broader range of target types, use {@link __Pattern::ofType(Class,Class)}
472 |      * or {@link __Pattern::adaptTarget}
473 |      */
474 |     static <T> __Pattern<T> ofType(Class<T> targetType) {
475 |         return __Patterns.ofType(targetType, targetType);
476 |     }
477 | 
478 |     /**
479 |      * Return a pattern handle that always succeeds
480 |      */
481 |     static <T> __Pattern<T> ofAny(Class<T> targetType) {
482 |         return __Patterns.ofAny(targetType);
483 |     }
484 | 
485 |     /**
486 |      * Return a pattern handle that tests its target against testType, and, if
487 |      * successful, extracts components from it according to the {@code components}
488 |      * array of method handles, which each accept a single argument of {@code typeTest}
489 |      */
490 |     static <T, U extends T> __Pattern<T> ofComponents(Class<T> targetType,
491 |                                                           Class<U> testType,
492 |                                                           MethodHandle... components) {
493 |         return __Patterns.ofComponents(targetType, testType, components);
494 |     }
495 | 
496 |     /**
497 |      * Adapt a pattern handle to a broader target type.
498 |      */
499 |     static<T, U extends T> __Pattern<T> adaptTarget(Class<T> targetType,
500 |                                                         __Pattern<U> pattern) {
501 |         return __Patterns.adaptTarget(pattern, targetType);
502 |     }
503 | 
504 |     /**
505 |      * Return a pattern handle that is the AND of the two patterns provided.
506 |      * The binding components will be the union of the binding components
507 |      * of the two patterns.
508 |      */
509 |     static<T> __Pattern<T> and(Class<T> targetType,
510 |                                    __Pattern<? extends T> left,
511 |                                    __Pattern<? extends T> right) {
512 |         return __Patterns.and(targetType, left, right);
513 |     }
514 | 
515 |     /**
516 |      * Returns a pattern handle that matches the outer pattern and then
517 |      * matches its binding components to the nested patterns.  The binding
518 |      * components are the binding components of the outer pattern, followed
519 |      * by the binding components of each nested pattern.
520 |      */
521 |     static<T> __Pattern<T> nested(__Pattern<T> outer, __Pattern... nested) {
522 |         return __Patterns.nest(outer, nested);
523 |     }
524 | ```
525 | 
526 | ## Future work
527 | 
528 | This document focuses mostly on the compilation target, and on
529 | analysis of compatibility requirements.  Deliberately left out
530 | topics include:
531 | 
532 |  - Language syntax.  While this is a very interesting topic, we prefer
533 |    to lay a solid groundwork that captures runtime description,
534 |    translation strategy, and migration compatibility before moving on
535 |    to the far more subjective subject of defining a language syntax
536 |    for describing patterns.
537 | 
538 |  - Switch classifiers.  While some languages translate pattern
539 |    switches into chains of if-else, it is often possible to do better,
540 |    by performing a pre-computation (e.g., hashing) on the target, and
541 |    strength-reducing to a traditional switch.  (This generally must be
542 |    tolerant of changes induced by recompilation, such as changed class
543 |    hierarchies, so `indy` is likely called for here.)  We anticipate
544 |    being able to use class hierarchy information to optimize switches
545 |    over subtypes of sealed hierarchies, and avoid redundant tests
546 |    based on known type relationships.  This problem will be addressed
547 |    in a separate document.
548 | 
549 | 
550 | [pattern-match]: pattern-matching-for-java
551 | [sym-free]: https://blogs.oracle.com/jrose/entry/symbolic_freedom_in_the_vm
552 | 
553 | 


--------------------------------------------------------------------------------
/site/design-notes/patterns/towards-member-patterns.md:
--------------------------------------------------------------------------------
  1 | # Towards member patterns
  2 | #### Brian Goetz {.author}
  3 | #### January 2024 {.date}
  4 | 
  5 | Time to check in on where things are in the bigger picture of patterns as class
  6 | members.  Note: while this document may have illustrative examples, you should
  7 | not take that as a definitive statement of syntax.
  8 | 
  9 | We've already dipped our toes in the water with _record patterns_.  A record
 10 | pattern looks like:
 11 | 
 12 |     case R(p1, p2, ... pn):
 13 | 
 14 | where `R` is a record type and `p1..pn` are nested patterns that are matched to
 15 | its components.  Because records are defined by their state description, we can
 16 | automatically derive record patterns "for free", just as we derive record
 17 | constructors, accessors, etc.
 18 | 
 19 | There are many other classes that would benefit from being deconstructible with
 20 | patterns.  To that end, we will generalize record patterns to _deconstruction
 21 | patterns_, where any class can declare an explicit deconstruction pattern and
 22 | participate in pattern matching like records do.
 23 | 
 24 | Deconstruction patterns are not the end of the user-declared pattern story. Just
 25 | as some classes prefer to expose static factories rather than constructors, they
 26 | will be able to expose corresponding static patterns.  And there is also a role
 27 | for "instance patterns" and "pattern objects" as well.
 28 | 
 29 | Looking only at record and deconstruction patterns, it might be tempting to
 30 | think that patterns are "just" methods with multiple return.   But this would be
 31 | extrapolating from a special case.  Pattern matching is intrinsically
 32 | _conditional_; the extraction of values from a target is conditioned on whether
 33 | the target _matches_ the pattern.  For the patterns we've seen so far -- type
 34 | patterns and record patterns -- matching can be determined entirely by types.
 35 | But more sophisticated patterns can also depend on other aspects of object
 36 | state.  For example, a pattern corresponding to the static factory
 37 | `Optional::of` requires not only that the match candidate be of type `Optional`,
 38 | but that the match candidate is an `Optional` that actually holds a value.
 39 | Similarly, a pattern corresponding a regular expression requires the match
 40 | candidate to not only be a `String`, but to match the regular expression.
 41 | 
 42 | ## The key intuition around patterns
 43 | 
 44 | A key capability of objects is _aggregation_; the combination of component
 45 | values into a higher-level composite that incorporates those components.  Java
 46 | facilitates a variety of idioms for aggregation, including constructors,
 47 | factories, builders, etc.  The dual of aggregation is _destructuring_ or
 48 | _decomposition_, which takes an aggregate and attempts to recover its
 49 | "ingredients".  However, Java's support for destructuring has historically been
 50 | far more ad-hoc, largely limited to "write some getters".  Pattern matching
 51 | seeks to put destructuring on the same firm foundation as aggregation.
 52 | 
 53 | Deconstruction patterns (such as record patterns) are the dual of construction.
 54 | If we construct an object:
 55 | 
 56 |     Object o = new Point(x, y);
 57 | 
 58 | we can deconstruct it with a deconstruction pattern:
 59 | 
 60 |     if (o instanceof Point(var x, var y)) { ... }
 61 | 
 62 | > Intuitively, this pattern match asks "could this object have come from
 63 | > invoking the constructor `new Point(x, y)` for some `x` and `y`, and if so,
 64 | > tell me what they are."
 65 | 
 66 | While not all patterns exist in direct correspondence to another constructor or
 67 | method, this intuition that a pattern reconstructs the ingredients to an
 68 | aggregation operation is central to the design; we'll explore the limitations of
 69 | this intuition in greater detail later.
 70 | 
 71 | ## Use cases for declared patterns
 72 | 
 73 | Before turning to how patterns fit into the object model, let's look at some of
 74 | the potential use cases for patterns in APIs.
 75 | 
 76 | ### Recovering construction arguments
 77 | 
 78 | Deconstruction patterns are the dual of constructors; where a constructor takes
 79 | N arguments and aggregates them into an object, a deconstruction pattern takes
 80 | an aggregate and decomposes it into its components.  Constructors are unusual in
 81 | that they are instance behavior (they have an implicit `this` argument), but are
 82 | not inherited; deconstruction patterns are the same.  For deconstruction
 83 | patterns (but not for all instance patterns), the match candidate is always the
 84 | receiver.  Tentatively, we've decided that deconstruction patterns are always
 85 | unconditional; that a deconstruction pattern for class `Foo` should match any
 86 | instance of `Foo`.  At the use site, deconstruction patterns use the same syntax
 87 | as record patterns:
 88 | 
 89 |     case Point(int x, int y):
 90 | 
 91 | Just as constructors can be overloaded, so can deconstruction patterns. However,
 92 | the reasons we might overload deconstruction patterns are slightly different
 93 | than for constructors, and so it may well be the case that we end up with fewer
 94 | overloads of deconstruction patterns than we do of constructors. Constructors
 95 | often form _telescoping sets_, both for reasons of syntactic convenience at the
 96 | use site (fewer arguments to specify) and to avoid brittleness (clients can let
 97 | the class implementation pick the defaults rather than hard-coding them.)  This
 98 | motivation is less pronounced for deconstruction patterns (unwanted bindings can
 99 | be ignored with `_`), so it is quite possible that authors will choose to have
100 | one deconstruction pattern overload per telescoping constructor _set_, rather
101 | than one per constructor.
102 | 
103 | There is no requirement for deconstruction patterns to expose the exact same API
104 | as constructors, but we expect this will be common, at least for classes for
105 | which the construction process is effectively an aggregation operation on the
106 | constructor arguments.
107 | 
108 | ### Recovering static factory arguments
109 | 
110 | Not all classes want to expose their constructors; sometimes classes prefer to
111 | expose static factories instead.  In this case, the class should be able to
112 | expose corresponding static patterns as well.
113 | 
114 | For a class like `Optional`, which exposes factories `Optional::of` and
115 | `Optional::empty`, the object state incorporates not only the factory arguments,
116 | but which factory was chosen.  Accordingly, it makes sense to deconstruct the
117 | object in the same way:
118 | 
119 |     switch (optional) {
120 |         case Optional.of(var payload): ...
121 |         case Optional.empty(): ...
122 |     }
123 | 
124 | Such patterns are necessarily conditional, asking the Pattern Question: "could
125 | this `Optional` have come from the `Optional::of` factory, and if so, with what
126 | argument?"  Static patterns, like static methods, lack a receiver, so `this` is
127 | not defined in the body of a static pattern.  However, we will need a way to
128 | denote the match candidate, so its state can be examined by the pattern body.
129 | 
130 | Another feature of static methods is that they can be used to put a factory for
131 | a class `C` in _another_ class, whether one in the same maintenance domain (such
132 | as the `Collections`) or in some other package.  This feature is shared by
133 | static patterns.
134 | 
135 | ### Conversions and queries
136 | 
137 | Another application for static patterns is the dual of static methods for
138 | conversions.  For a static method like `Integer::toString`, which converts an
139 | `int` to its `String` representation, a corresponding static pattern
140 | `Integer::toString` can ask the Pattern Question: "could this `String` have come
141 | from converting an integer to `String`, and if so, what integer".
142 | 
143 | Some groups of query methods in existing APIs are patterns in disguise.  The
144 | class `java.lang.Class` has a pair of instance methods, `Class::isArray` and
145 | `Class::getComponentType`, that work together to determine if the `Class`
146 | describes an array type, and if so, provide its component type. This question
147 | is much better framed as a single pattern:
148 | 
149 |     case Class.arrayClass(var componentType):
150 | 
151 | The two existing methods are made more complicated by their relationship to each
152 | other; `Class::getComponentType` has a precondition (the `Class` must describe
153 | an array type) and therefore has to specify and implement what to do if the
154 | precondition fails, and the relationship between the methods is captured only in
155 | documentation.  By combining them into a single pattern, it become impossible to
156 | misuse (because of the inherent conditionality of patterns) and easier to
157 | understand (because it can all be documented in one place.)
158 | 
159 | This hypothetical `Class::arrayClass` pattern also has a sensible dual as a
160 | factory method:
161 | 
162 |     static<T> Class<T[]> arrayClass(Class<T> componentType)
163 | 
164 | which produces the array `Class` for the array type whose component type is
165 | provided. An API need not provide both directions of a conversion, but if it
166 | does, the two generally strengthen each other.  This method/pattern pair could
167 | be either static or instance members, depending on API design choice.
168 | 
169 | Another form of "conversion" method / pattern pair, even though both types are
170 | the same, is "power of two".  A `powerOfTwo` method takes an exponent and
171 | returns the resulting power of two; a `powerOfTwo` pattern asks if its match
172 | candidate is a power of two, and if so, binds the base-two logarithm.
173 | 
174 | ### Numeric conversions
175 | 
176 | As Project Valhalla gives us the ability to declare new numeric types, we will
177 | want to be able to convert these new types to other numeric types.  For
178 | unconditional conversions (such as widening half-float to float), an ordinary
179 | method will suffice:
180 | 
181 |     float widen(HalfFloat f);
182 | 
183 | But the reverse is unlikely to be unconditional; narrowing conversions can fail
184 | if the value cannot be represented in the narrower type. This is better
185 | represented as a pattern which asks the Pattern Question: "could this `float`
186 | have come from widening a `HalfFloat`, and if so, tell me what `HalfFloat` that
187 | is."  A widening conversion (or boxing conversion) is best represented by a
188 | _pair_ of members, an ordinary method for the unconditional direction, and a
189 | pattern for the conditional direction.
190 | 
191 | ### Conditional extraction
192 | 
193 | Some operations, such as matching a string to a regular expression with capture
194 | groups, are pattern matches in disguise.  We should be able to take a regular
195 | expression R and match against it with `instanceof` or `switch`, binding capture
196 | groups (using varargs patterns) if it matches.
197 | 
198 | ## Member patterns in the object model
199 | 
200 | We currently have three kinds of executable class members: constructors, static
201 | methods, and instance methods.  (Actually constructors are not members, but we
202 | will leave this pedantic detail aside for now.)  As the above examples show,
203 | each of these can be amenable to a dual member which asks the Pattern Question
204 | about it.
205 | 
206 | Patterns are dual to constructors and methods in two ways: structurally and
207 | semantically.  Structurally, patterns invert the relationship between inputs and
208 | outputs: a method takes N arguments as input and produces a single result, and
209 | the corresponding pattern takes a candidate result (the "match candidate") and
210 | conditionally produces N bindings.  Semantically, patterns ask the Pattern
211 | Question: could this result have originated by some invocation of the dual
212 | operation.
213 | 
214 | ### Patterns as inverse methods and constructors
215 | 
216 | One way to frame patterns in the object model is as _inverse constructors_ and
217 | _inverse methods_.  For purposes of this document, I will use an illustrative
218 | syntax that directly evokes this duality (but remember, we're not discussing
219 | syntax now):
220 | 
221 | ```
222 | class Point {
223 |     final int x, y;
224 | 
225 |     // Constructor
226 |     Point(int x, int y) { ... }
227 | 
228 |     // Deconstruction pattern
229 |     inverse Point(int x, int y) { ... }
230 | }
231 | 
232 | class Optional<T> {
233 |     // Static factories
234 |     static<T> Optional<T> of(T t) { ... }
235 |     static<T> Optional<T> empty() { ... }
236 | 
237 |     // Static patterns
238 |     static<T> inverse Optional<T> of(T t) { ... }
239 |     static<T> inverse Optional<T> empty() { ... }
240 | }
241 | ```
242 | 
243 | `Point` has a constructor and an inverse constructor (deconstruction pattern) for
244 | the external representation `(int x, int y)`; in an inverse constructor, the
245 | binding list appears where the parameter list does in the constructor.
246 | `Optional<T>` has static factories and corresponding patterns for `empty` and
247 | `of`.  As with inverse constructors, the binding list of a pattern appears in
248 | the position that the parameters appear in a method declaration; additionally,
249 | the _match candidate type_ appears in the position that the return value appears
250 | in a method declaration.  In both cases, the declaration site and use site of
251 | the pattern uses the same syntax.
252 | 
253 | In the body of an inverse constructor or method, we need to be able to talk
254 | about the match candidate.  In this model, the match candidate has a type
255 | determined by the declaration (for an inverse constructor, the class; for an
256 | inverse method, the type specified in the "return position" of the inverse
257 | method declaration), and there is a predefined context variable (e.g., `that`)
258 | that refers to the match candidate.  For inverse constructors, the receiver
259 | (`this`) is aliased to the match candidate (`that`), but not necessarily so for
260 | inverse methods.
261 | 
262 | ### Do all methods potentially have inverses?
263 | 
264 | We've seen examples of constructors, static methods, and instance methods that
265 | have sensible inverses, but not all methods do.  For example, methods that
266 | operate primarily by side effects (such as mutative methods like setters or
267 | `List::add`) are not suitable candidates for inverses.  Similarly, pure
268 | functions that "co-mingle" their arguments (such as arithmetic operators) are
269 | also not suitable candidates for inverses, because the ingredients to the
270 | operation typically can't be recovered from the result (i.e., `4` could be the
271 | result of `plus(2, 2)` or `plus(1, 3)`).
272 | 
273 | Intuitively, the methods that are invertible are the ones that are
274 | _aggregative_.  The constructor of a (well-behaved) record is aggregative, since
275 | all the information passed to the constructor is preserved in the result.
276 | Factories like `Optional::of` are similarly aggregative, as are non-lossy
277 | conversions such as widening or boxing conversions.
278 | 
279 | Ideally, an aggregation operation and its corresponding inverse form an
280 | _embedding projection pair_ between the aggregate and a component space.
281 | Intuitively, an embedding-projection pair is an algebraic structure defined by a
282 | pair of functions between two sets such that composing in one direction
283 | (embed-then-project) is an identity, and composing in the other direction
284 | (project-then-embed) is a well-behaved approximation.
285 | 
286 | ### Conversions
287 | 
288 | Conversion methods are a frequent candidate for inversion.  We already have
289 | 
290 |     // Integer.java
291 |     static String toString(int i) { ... }
292 | 
293 | to which the obvious inverse is
294 | 
295 |     static inverse String toString(int i) { ... }
296 | 
297 | and we can inspect a string to see if it is the string representation of an integer with
298 | 
299 |     if (s instanceof Integer.toString(int i)) { ... }
300 | 
301 | This composes nicely with deconstruction patterns; if we have a `Box<String>`
302 | and want to ask whether the contained string is really the string representation
303 | of an integer, we can ask:
304 | 
305 |     case Box(Integer.toString(int i)):
306 | 
307 | which conveniently looks just like the composition of constructors or factories
308 | used to create such an instance (`new Box(Integer.toString(3))`).
309 | 
310 | When it comes to user-definable numeric conversions, the most likely strategy
311 | involves combining related operators in a single _witness_ object.  For example,
312 | numeric conversion might be modeled as:
313 | 
314 | ```
315 | interface NumericConversion<FROM, TO> {
316 |     TO convert(FROM from);
317 |     inverse TO convert(FROM from);
318 | }
319 | ```
320 | 
321 | which reflects the fact that conversion is total in one direction (widening,
322 | boxing) and conditional in the other (narrowing, unboxing.)
323 | 
324 | ### Regular expression matching
325 | 
326 | Regular expressions are a form of ad-hoc pattern; a given string might match a
327 | given regex, or not, and if it does, it might product multiple bindings (the
328 | capture groups.)  It would be nice to be able to express regular expression
329 | matches as ordinary pattern matches.
330 | 
331 | Conveniently, we already have an object representation of regular expressions --
332 | `java.util.Pattern`.  Which is an ideal place to put an instance pattern:
333 | 
334 | ```
335 | // varargs pattern
336 | public inverse String match(String... groups) {
337 |     Matcher m = matcher(that);    // *that* is the match candidate
338 |     if (m.matches())              // receiver for matcher() is the Pattern
339 |         __yield IntStream.range(1, m.groupCount())
340 |                             .map(m::group)
341 |                             .toArray(String[]::new);
342 | }
343 | ```
344 | 
345 | And now, we want to express "does string s match any of these regular
346 | expressions":
347 | 
348 | ```
349 | static final Pattern As = Pattern.compile("([aA]*)");
350 | static final Pattern Bs = Pattern.compile("([bB]*)");
351 | static final Pattern Cs = Pattern.compile("([cC]*)");
352 | 
353 | ...
354 | 
355 | switch (aString) {
356 |     case As.match(String as) -> ...
357 |     case Bs.match(String bs) -> ...
358 |     case Cs.match(String cs) -> ...
359 |     ...
360 | }
361 | ```
362 | 
363 | Essentially, `j.u.r.Pattern` becomes a _pattern object_, where the state of the
364 | object is used to determine whether or not it matches any given input.  (There
365 | is nothing stopping a class from having multiple patterns, just as it can have
366 | multiple methods.)
367 | 
368 | ## Pattern resolution
369 | 
370 | When we invoke a method, sometimes we are able to refer to the method with an
371 | _unqualified_ name (e.g., `m(3)`), and sometimes the method must be _qualified_
372 | with a type name, package name, or a receiver object.  The same is true for
373 | declared patterns.
374 | 
375 | Constructors for classes that are in the same package, or have been imported,
376 | can be referred to with an unqualified name; constructors can also be qualified
377 | with a package name.  The same is true for deconstruction patterns:
378 | 
379 | ```
380 | case Foo(int x, int y):         // unqualified
381 | case com.foo.Bar(int x, int y): // qualified by package
382 | ```
383 | 
384 | Static methods that are declared in the current class or an enclosing class, or
385 | are statically imported, can be referred to with an unqualified name; static
386 | methods can also be qualified with a type name.  The same is true for static
387 | patterns:
388 | 
389 | ```
390 | case powerOfTwo(int exp):  // unqualified
391 | case Optional.of(var e):   // qualified by class
392 | ```
393 | 
394 | Instance methods invoked on the current object can be referred to with an
395 | unqualified name; instance methods can also be qualified by a receiver object.
396 | The same is true for instance patterns:
397 | 
398 | ```
399 | case match(String s):    // unqualified
400 | case As.match(String s): // qualified by receiver
401 | ```
402 | 
403 | In a qualified pattern `x.y`, `x` might be a package name, a class name, or a
404 | (effectively final) receiver variable; we use the same rules for choosing how to
405 | interpret a qualifier for patterns as we do for method invocations.
406 | 
407 | ## Benefits of explicit duality
408 | 
409 | Declaring method-pattern pairs whose structure and name are the same yields many
410 | benefits.  It means that we take things apart using the same abstractions used to put them together, which makes code more readable and less error-prone.
411 | 
412 | Referring to a _inverse pair_ of operations by a single name is simpler than
413 | having separate names for each direction; not only don't we need to come up with
414 | a name for the other direction, we also don't need to teach clients that "these
415 | two names are inverses", because the inverses have the same name already. What
416 | we know about the method `Integer::toString` immediately carries over to its
417 | inverse.
418 | 
419 | Further, thinking about a method-pattern pair provides a normalizing force to
420 | actually ensuring the two are inverses; if we just had two related methods
421 | `xToY` and `yToX`, they might diverge subtly because the connection between the
422 | two members is not very strong.
423 | 
424 | Finally, this gives the language permission to treat the _pair_ of members as a
425 | thing in some cases, such as the use of ctor-dtor pairs in "withers" or
426 | serialization.
427 | 
428 | The explicit duality takes a little time to get used to.   We have many years of
429 | experience of naming a method for its directionality, so people's first reaction
430 | is often "the pattern should be called `Integer.fromString`, not
431 | `Integer.toString`". So people will initially bristle at giving both directions
432 | the same name, especially when one implies a directionality such as `toString`.
433 | (In these cases, we can fall back on a convention that says that we should name
434 | it for the total direction.)
435 | 
436 | ## Pattern lambdas, pattern objects, pattern references
437 | 
438 | Interfaces with a single abstract method (SAM) are called _functional
439 | interfaces_ and we support a conversion (historically called SAM conversion)
440 | from lambdas to functional interfaces.  Interfaces with a single abstract
441 | pattern can benefit from a similar conversion (call this "SAP" conversion.)
442 | 
443 | In the early days of Streams, people complained about processing a stream using
444 | instanceof and cast:
445 | 
446 | ```
447 | Stream<Object> objects = ...
448 | Stream<String> strings = objects.filter(x -> x instanceof String)
449 |                                 .map(x -> (String) x);
450 | ```
451 | 
452 | This locution is disappointing both for its verbosity (saying the same thing in
453 | two different ways) and its efficiency (doing the same work basically twice.)  Later, it became possible to slightly simplify this using `mapMulti`:
454 | 
455 | ```
456 | objects.mapMulti((x, sink) -> { if (x instanceof String s) sink.accept(s); })
457 | ```
458 | 
459 | But, ultimately this stream pipeline is a pattern match; we want to match the
460 | elements to the pattern `String s`, and get a stream of the matched string
461 | bindings.  We are now in a position to expose this more directly. Suppose we
462 | had the following SAP interface:
463 | 
464 | ```
465 | interface Match<T, U> {
466 |     inverse U match(T t);
467 | }
468 | ```
469 | 
470 | then `Stream` could expose a `match` method:
471 | 
472 | ```
473 | <U> Stream<T> match(Match<T, U> pattern);
474 | ```
475 | 
476 | We can SAP-convert a lambda whose yielded bindings are compatible with the sole
477 | abstract pattern in the SAP interface::
478 | 
479 | ```
480 | Match<Object, String> m = o -> { if (o instanceof String s) __yield(s); };
481 | ... stream.match(m) ...
482 | ```
483 | 
484 | And we can do the same with _pattern references_ to existing patterns that are
485 | compatible with the sole pattern in a SAP interface.   As a special case, we can
486 | also support a conversion from type patterns to a compatible SAP type with an
487 | `instanceof` pattern reference (analogous to a `new` method reference):
488 | 
489 | ```
490 | objects.match(String::instanceof)
491 | ```
492 | 
493 | where `String::instanceof` means the same as the previous lambda example.  This
494 | means that APIs like `Stream` can abstract over conditional behavior as well as
495 | unconditional.
496 | 


--------------------------------------------------------------------------------
/site/design-notes/templated-strings.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # String Tapas Redux
  3 | ## Beyond Mere String Interpolation {.subtitle}
  4 | #### Jim Laskey and Brian Goetz {.author}
  5 | #### September 2021 {.date}
  6 | 
  7 | Some time ago, we talked about all the things we might want to do with strings:
  8 | multi-line strings, raw strings, interpolated strings.    At the time, we sated
  9 | our appetite with the first course -- text blocks -- and now are ready to talk
 10 | about what we can do next.
 11 | 
 12 | It is one of the most commonly requested features to support some sort of
 13 | _string interpolation_, which is useful for formatting log messages and snippets
 14 | of HTML, JSON, XML, or SQL.  While Java already has many ways to combine
 15 | constant strings with non-constant values (concatenation, `String::format`,
 16 | `MessageFormat`), developers would prefer something more direct, for several
 17 | reasons:
 18 | 
 19 |  - _Ceremony_ --- Writing string interpolation expressions, rather than calls to
 20 |    template-formatting libraries, is less work.
 21 | 
 22 |  - _Readability_ --- In many cases (though not all) a string interpolation
 23 |    expression like `"My name is ${name}, I am ${age} years old"` is more
 24 |    readable than its equivalent with `String::format`, because the labels ("My
 25 |    name is") and the corresponding parameters are right next to each other.
 26 | 
 27 |  - _Safety_ --- A long format string or a long list of interpolants invites
 28 |    mistakes, such as the arity of parameters not matching that of format
 29 |    specifiers, or the types of the parameters not matching the corresponding
 30 |    format specifiers.
 31 | 
 32 | However, there are reasons we've been hesitant to do such a feature, including:
 33 | 
 34 |   - _Injection attacks_ --- Constructing SQL queries or JSON expressions with string
 35 |     interpolation is convenient, but is at risk for [injection
 36 |     attacks](https://xkcd.com/327/).  Improving mechanisms for constructing
 37 |     composite strings without similarly improving or enabling safer mechanisms
 38 |     for constructing queries would surely widen the attack surface.  This is
 39 |     asking users to choose between convenience and security.
 40 | 
 41 |   - _Localization_ --- Java has a strong commitment to internationalization;
 42 |     introducing a more convenient but less localizable mechanism for
 43 |     constructing messages will result in fewer applications being localized.
 44 |     This is asking users to choose between convenience and flexibility.
 45 | 
 46 |   - _Formatting_ --- A naive interpretation of string interpolation deprives us of
 47 |     the ability to format with format specifiers such as field widths,
 48 |     locale-sensitive numeric formatting, etc.  This is asking users to choose
 49 |     between convenience and expressiveness.
 50 | 
 51 | The versions of this feature implemented by many popular languages offer the
 52 | desired convenience in the simple cases, but fall afoul of many of these
 53 | downsides.  We may want the convenience of string interpolation, but we also
 54 | want safety and flexibility across a range of domains.
 55 | 
 56 | | Language&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; | Example |
 57 | |:---|:---|
 58 | | C#            | `$"{x} plus {y} equals {x + y}"`      |
 59 | | Groovy        | `"$x plus $y equals ${x + y}"`        |
 60 | | Haskell       | `[i\|#{x} plus #{y} equals #{x + y}\|]` |
 61 | | JavaScript    | &#96;`${x} plus ${y} equals ${x + y}`&#96; |
 62 | | Kotlin        | `"$x plus $y equals ${x + y}"`        |
 63 | | Scala         | `f"$x%d plus $y%d equals ${x + y}%d"` |
 64 | | Python        | `f"{x} plus {y} equals {x + y}"`      |
 65 | | Ruby          | `"#{x} plus #{y} equals #{x + y}"`    |
 66 | | Swift         | `"\(x) plus \(y) equals \(x + y)"`    |
 67 | | VisualBasic   | `$"{x} plus {y} equals {x + y}"`      |
 68 | 
 69 | We’re not interested in merely doing “string interpolation” as it has been
 70 | interpreted by other languages. We would like to do better.
 71 | 
 72 | ## What's wrong with string interpolation?
 73 | 
 74 | The only case handled by most other languages that support string interpolation
 75 | is the simplest one -- uninterpreted concatenation. Picking an example at
 76 | random:
 77 | 
 78 | ```
 79 | // Scala
 80 | val greeting = s"Hello, $name, I am $age years old"
 81 | ```
 82 | 
 83 | The feature illustrated here is constrained in many ways: the format string is
 84 | not validated, the parameters are not validated or transformed in any way, the
 85 | parts are combined by a very constrained mechanism (the result must be exactly
 86 | the segments of the format string concatenated with the string value of the
 87 | parameters), and finally, the result must be a `String`.  While these might be
 88 | convenient defaults, not being able to customize any of these behaviors is a
 89 | severe limitation.
 90 | 
 91 | In addition, the surfacing of the feature in the language is confusingly ad-hoc;
 92 | it requires a different delimiter from "regular" strings, as well as a different
 93 | set of rules for separating verbatim content from embedded expressions.  In the
 94 | first course (text blocks), an important goal was that string literals and text
 95 | blocks be different stackings of the same basic feature, rather than wholly
 96 | separate features (this is one reason "raw string literals" was withdrawn).  We
 97 | would like to follow the same discipline here; embedded parameters should be
 98 | part of the overall string expression feature, not a separate thing.
 99 | 
100 | ## Another level of indirection
101 | 
102 | We can meet our diverse goals by separating _mechanism_ from _policy_.
103 | How we introduce parameters into string expressions is mechanism; how we combine
104 | the parameters and the string into a final result (e.g., concatenation) is
105 | policy.  The language may need to have an opinion about how a templatized
106 | expression is expressed, but the semantics of how parameters are validated,
107 | transformed, and combined should remain in the hands of ordinary library code.
108 | Users should be able to select the templating policy they want, and be able to
109 | capture templating policies in libraries for reuse.
110 | 
111 | A templating policy might be described by an interface like:
112 | 
113 | ```
114 | interface TemplatePolicy<T> {
115 |     T apply(String templateString, List<Object> parameters);
116 | }
117 | ```
118 | 
119 | An implementation of a template policy is an ordinary object that implements
120 | some parameterization of `TemplatePolicy`.  The simplest template policy is what
121 | every other language does -- concatenation -- and can be exposed by the standard
122 | libraries.
123 | 
124 | We can express template processing as _instance behavior_ on a policy object:
125 | 
126 | ```
127 | String s = STR."Hello \{name}, I am \{age} years old.";
128 | ```
129 | 
130 | where `STR` is a static instance of `TemplatePolicy` which captures the obvious
131 | policy.
132 | 
133 | The escape sequence `\{` is currently unused (and therefore currently illegal in
134 | string literals and text blocks), so this choice of parameter carrier is
135 | compatible with the existing string literal and text block features.  (Swift
136 | uses `\(...)`, which would also be a valid choice.)  This means we do not need
137 | to invent a new form (or two) of "string template expression" with a different
138 | delimiter or prefix character.
139 | 
140 | The policy object has the flexibility to validate the format string and
141 | parameters, interpret the format string and parameters as it sees fit, combine
142 | them as it sees fit (not just sequential concatenation), and produce a result
143 | that is not even a `String`.  The compiler shreds a parameterized string
144 | expression into the constant and non-constant parts, and arranges for the
145 | combination method on the policy object to be invoked.
146 | 
147 | ## Examples
148 | 
149 | Delegating control to a policy object dramatically expands the expressiveness
150 | and safety of the feature.
151 | 
152 | #### String formatting
153 | 
154 | Formatting libraries like `String::format` offer more
155 | than just interpolation; they offer rich formatting options such as field-width
156 | management, leading-zero fills, hex conversion, locale-specific presentation,
157 | etc.  Making straight interpolation easier but no improvement for formatting
158 | libraries leaves users with an unpleasant choice of convenience or rich
159 | formatting.  If we wanted to format the number `age` using the various modifiers
160 | supported by the `%d` format specifier, we wouldn't want to abandon the convenience
161 | of the straightforward expression.
162 | 
163 | On the other hand, it would be folly to bake the `String::format` descriptor
164 | language into the Java language; representation and interpretation of the format
165 | specifiers should be under the control of the template policy.  But we can
166 | encapsulate this in a library that implements this set of format specifiers, and
167 | exposes a constant policy object.  Here, `FMT` is a policy object that
168 | interprets a set of format specifiers that are similar to `printf` /
169 | `String::format`, using the convention that the format specifier goes right
170 | before the "hole":
171 | 
172 | ```
173 | String s = FMT."Hello %s\{name}, I am %10d\{age} years old.";
174 | ```
175 | 
176 | When the format string is shredded into constant and variable parts, the end of
177 | each constant part should contain a format descriptor which is used to condition
178 | the formatting of the following parameter (and the policy object can validate
179 | this).  The Java language knows nothing of the format descriptor language; this
180 | is interpreted solely by the formatter library.
181 | 
182 | Even ignoring the choice of format descriptor language, library methods like
183 | `String::format` often embody difficult choices, such as whether or not to use
184 | the currently selected `Locale` to format numeric quantities.  Some users like
185 | the flexibility they get from such automatic localization; others resent the
186 | performance overhead of `Locale` processing.  By exposing a mechanism by which
187 | users and libraries can implement their own formatters, users are not
188 | constrained by these choices made by libraries on their behalf -- there could be
189 | both locale-sensitive and locale-insensitive formatters for the same domain, and
190 | the user can choose the one they want.
191 | 
192 | #### Validation and normalization
193 | 
194 | SQL statements are often parameterized by
195 | some dynamic data value.  Unfortunately, the data being interpolated is often
196 | tainted by user input.  The JDBC framework includes builders for _prepared
197 | statements_, which sanitize inputs and compose the query in a SQL-aware manner:
198 | 
199 | ```
200 | PreparedStatement ps
201 |     = connection.prepareStatement("SELECT * FROM Person p where p.last_name = ?");
202 | ps.setString(1, name);
203 | ```
204 | 
205 | This will escape any `'` characters in `name` and surround it with `'`
206 | characters before performing the interpolation.  If `name` is `"Bobby"`, the
207 | resulting query will be `SELECT * FROM Person p where p.last_name = 'Bobby'`.
208 | 
209 | With a convenient string interpolation feature, it is sorely tempting to
210 | construct SQL queries with:
211 | 
212 | ```
213 | String query = "SELECT * FROM Person p where p.last_name = '$name'";
214 | ResultSet rs = connection.createStatement().executeQuery(query);
215 | ```
216 | 
217 | Unfortunately, this now exposes the application to potentially disastrous SQL
218 | injection attacks unless `name` has been previously sanitized.  Trading
219 | security for convenience is not a good trade.
220 | 
221 | We can get the best of both worlds with a SQL-specific policy object that
222 | performs the sanitization that `PreparedStatement` does, and more:
223 | 
224 |  - Enforce that any quotes in the format string itself are balanced.
225 |  - Enforce that interpolation points do not occur in "quoted" parts of the
226 |    format string.
227 |  - Wrap parameters with quotes.
228 |  - Escape any quote characters in parameters.
229 | 
230 | SQL databases generally follow a common set of rules around single-quotes, but
231 | some databases also have other supported forms of quotes.  To the extent that a
232 | given database has its own nonstandard quoting rules, we would like to defend
233 | against attacks that exploit those as well.  This means that we don't just need
234 | a SQL-specific policy object; we need a `Connection`-specific policy
235 | object, because the `Connection` comes from the JDBC driver for the specific
236 | database we're talking to.
237 | 
238 | While there are many API choices that JDBC might select, one might be to make
239 | `Connection` also be a policy object; then we could ask the connection
240 | to format the query directly:
241 | 
242 | ```
243 | var query = connection."SELECT * FROM \{table}";
244 | ```
245 | 
246 | #### Non-string results
247 | 
248 | One could easily imagine a JSON or XML library
249 | providing a similar level of quote discipline and injection protection in those
250 | domains (they are vulnerable to injection attacks too):
251 | 
252 | ```
253 | String s = JSON."""
254 |                 {
255 |                    "a": \{a},
256 |                    "b": \{b}
257 |                 }
258 |                 """;
259 | ```
260 | 
261 | The policy referred to by `JSON` would perform the proper validation of the
262 | format string, and quoting and escaping of the parameters `a` and `b` before
263 | composing the final string.
264 | 
265 | But, do we even want to produce a string at all?  Many JSON libraries allow us
266 | to represent JSON documents through a `Json` type; it might be more efficient
267 | for the JSON policy object to go directly to that representation rather than
268 | first constructing a (potentially large) string and then parsing the resulting
269 | string.  While some policy objects will surely want to produce strings,
270 | there's no reason all of them do.  Our policy interface can be parameterized
271 | by the type it returns, as `TemplatePolicy` illustrated.  So this JSON example
272 | could be:
273 | 
274 | ```
275 | Json j = JSON."""
276 |               {
277 |                  "a": \{a},
278 |                  "b": \{b}
279 |               }
280 |               """;
281 | ```
282 | 
283 | which is more direct and potentially more efficient.
284 | 
285 | Another use for non-string results is when formatting messages for logging.
286 | Many logging calls are for debug information, and often debug logging is turned
287 | off.  Many frameworks allow you to provide a `Supplier<String>` for log messages
288 | that is only invoked if the message is actually going to be logged, to avoid the
289 | overhead of formatting a string that is going to be thrown away.  A lazy
290 | policy object could produce `Supplier<String>` rather than `String` itself.
291 | 
292 | #### Localization
293 | 
294 | The examples so far have been about interpolation enhanced
295 | with validation and transformation, but this can be taken further.  The JDK
296 | contains APIs such as `ResourceBundle` to support localization of messages. A
297 | resource bundle is a mapping from key names to localizable template strings.
298 | (These template strings use a different format than `String::format`, in part
299 | because they must support changing the order of parameters as part of the
300 | localization process; the interpolation "hole" in the localized template
301 | contains the index of the corresponding parameter.)
302 | 
303 | If resource bundles had a `TemplatePolicy`, then they could use the format
304 | string as a key to look up the localized string, and then perform the
305 | interpolation, all in one go:
306 | 
307 | ```
308 | String message = resourceBundle."error: file \{filename} not found";
309 | ```
310 | 
311 | which would have the effect of using the string `"error: file \{} not found"` as
312 | the key, mapping it to an appropriate localized error message for the current
313 | locale, reordering the parameters according to the `{nn}` holes in the
314 | localized messages, and formatting the result using the `MessageFormat` rules.
315 | 
316 | ## Templated Strings
317 | 
318 | A reasonable question is what should a templated string expression _without_ a
319 | policy evaluate to?   For those who "just" want string interpolation, the
320 | "obvious" answer is to use the concatenation policy, but there is a better
321 | choice: evaluate to an "unprocessed" string template, which can be passed to a
322 | library for later processing.  We can model an unprocessed template as:
323 | 
324 | ```
325 | interface TemplatedString {
326 |     String formatString();
327 |     List<Object> parameters();
328 |     // more
329 | }
330 | ```
331 | 
332 | and say that the following:
333 | 
334 | ```
335 | var s = "Hello, \{name}, I am \{age} years old";
336 | ```
337 | 
338 | evaluates to a `TemplatedString`.  Libraries like `String::format` can provide
339 | overloads that accept templated strings, so templated strings can be passed
340 | directly to libraries:
341 | 
342 | ```
343 | String format(String formatString, Object... parameters);
344 | String format(TemplatedString ts);
345 | ```
346 | 
347 | We can now recast our policy interface to take a templated string:
348 | 
349 | ```
350 | interface TemplatingPolicy<T, E extends Exception> {
351 |     T apply(TemplatedString ts) throws E;
352 | }
353 | ```
354 | 
355 | (We've also snuck in another parameter, that allows policies to declare that
356 | they throw checked exceptions that callers would have to deal with, such as
357 | `SQLException`, though most will likely instantiate `E` with
358 | `RuntimeException`.)
359 | 
360 | ## Restrictions
361 | 
362 | We may wish to place some syntactic restrictions on the parameters to limit
363 | readability and safety hazards (at the expense of expressiveness).  At one
364 | extreme of the spectrum, we could restrict to only allowing identifiers, as
365 | `bash` does, but this is surely too restrictive.  At the other extreme, we could
366 | allow arbitrary expressions.  But, Java's expressions cover a broad range,
367 | including string literals (which could create confusion over what is part of the
368 | format string and what is parameter), switch expressions (which can contain
369 | statements), and auto-incrementing expressions (which have side-effects); we may
370 | want to prune this back to eliminate puzzlers-in-waiting.
371 | 
372 | A possible middle ground is the subset of expressions generated from numeric
373 | literals, variables, field selection, arithmetic operators, and array
374 | dereference.  This is rich enough to describe parameters like `a.b[i-1]` or
375 | `fooCount+barCount`, but is guaranteed side-effect-free and does not contain
376 | embedded string literals.
377 | 
378 | ## Translation
379 | 
380 | The policy APIs shown here have the drawback of primitive and array boxing;
381 | further, for formatting such as that performed by `String::format`, much of the
382 | work is in scanning the format string, which is usually a constant at a given
383 | invocation site.  There are opportunities for more efficient translation with
384 | `invokedynamic` that avoids these pitfalls.  We wish to achieve a balance
385 | between making it easy for libraries to implement templating policies, and
386 | allowing policies (such as the `String::format` equivalent) to support a more
387 | efficient translation; the details of this will be covered separately.
388 | 


--------------------------------------------------------------------------------
/site/guides/lvti-faq.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Local Variable Type Inference
  3 | ## Frequently Asked Questions {.subtitle}
  4 | #### Brian Goetz and Stuart Marks {.author}
  5 | #### August 2019 {.date}
  6 | 
  7 | ## Q1. Why have `var` in Java? {#Q1}
  8 | 
  9 | Local variables are the workhorse of Java. They allow methods to
 10 | compute significant results by cheaply storing intermediate
 11 | values. Unlike a field, a local variable is declared, initialized, and
 12 | used in the same block. The name and initializer of a local variable
 13 | are often more important for a reader's understanding than the
 14 | type. Commonly, the name and initializer carry just as much
 15 | information as the type: `Person person = new Person();`
 16 | 
 17 | The role of `var` in a local variable declaration is to stand in for
 18 | the type, so that the name and initializer stand out: `var person =
 19 | new Person();` The compiler infers the type of the local variable from
 20 | the initializer. This is especially worthwhile if the type is
 21 | parameterized with wildcards, or if the type is mentioned in the
 22 | initializer. Using `var` can make code more concise without
 23 | sacrificing readability, and in some cases it can improve readability
 24 | by removing redundancy.
 25 | 
 26 | ## Q2. Does this make Java dynamically typed? Is this like `var` in JavaScript? {#Q2}
 27 | 
 28 | No and no. Java is still a statically typed language, and the addition
 29 | of `var` doesn't change this. `var` can be used in a local
 30 | variable declaration instead of the variable's type. With `var`, the
 31 | Java compiler _infers_ the type of the variable at compile time, using
 32 | type information obtained from the variable's initializer. The
 33 | inferred type is then used as the static type of the variable.
 34 | Typically, this is the same as the type you would have written
 35 | explicitly, so a variable declared with `var` behaves exactly as if
 36 | you had written the type explicitly.
 37 | 
 38 | Java compilers have performed type inference for many years. For
 39 | example, in Java 8, the parameters of a lambda expression do not need
 40 | explicit types because the compiler infers their types from how the
 41 | lambda expression is used:
 42 | 
 43 |     List<Person> list = ...
 44 |     list.stream().filter(p -> p.getAge() > 18) ...
 45 | 
 46 | In the code snippet above, the lambda parameter `p` is inferred to
 47 | have the static type `Person`. If the `Person` class is changed so that it
 48 | no longer has a `getAge` method, or if the list is changed to be a
 49 | list of type other than `Person`, type inference will fail with a
 50 | compile-time error.
 51 | 
 52 | ## Q3. Is a `var` variable final? {#Q3}
 53 | 
 54 | No. Local variables declared with `var` are non-final by
 55 | default. However, the `final` modifier can be added to `var`
 56 | declarations:
 57 | 
 58 |     final var person = new Person();
 59 | 
 60 | There is no shorthand for `final var` in Java. Languages such as Scala
 61 | use `val` to declare immutable (final) variables. This works well in
 62 | Scala because all variables - locals and fields alike -
 63 | are declared using a syntax of the form
 64 | 
 65 |     val name : type
 66 | 
 67 | or
 68 | 
 69 |     var name : type
 70 | 
 71 | You can include or omit the `": type"` part of the declaration depending
 72 | on whether or not you want type inference. In Scala, the choice
 73 | between mutability and immutability is orthogonal to type inference.
 74 | 
 75 | In Java, `var` can be used only where type inference is desired; it
 76 | cannot be used where a type is declared explicitly. If `val` were added,
 77 | it too could be used only where type inference is used. The use of `var`
 78 | or `val` in Java could not be used to control immutability if the type
 79 | were declared explicitly.
 80 | 
 81 | In addition, Java allows the use of `var` only for local variables,
 82 | not for fields. Immutability is much more significant for fields,
 83 | whereas immutable local variables are comparatively rarely used.
 84 | 
 85 | Using `var`/`val` keywords to control immutability is a feature that
 86 | seems like it ought to carry over cleanly from Scala to Java. In Java,
 87 | however, it would be much less useful than it is in Scala.
 88 | 
 89 | ## Q4. Won't bad developers misuse this feature to write terrible code? {#Q4}
 90 | 
 91 | Yes, bad developers will write terrible code no matter what we
 92 | do. Withholding a feature won't prevent them from doing so. But, when
 93 | used properly, using type inference allows developers to also write
 94 | better code.
 95 | 
 96 | One way that `var` may encourage developers to write better code is that
 97 | it lowers the overhead of declaring a new variable.  If the overhead
 98 | of declaring a variable is high, developers will often avoid doing so,
 99 | and create complex nested or chained expressions that impair
100 | readability solely in order to avoid declaring more variables.  With
101 | `var`, the overhead of pulling a subexpression into a named variable is
102 | lower, so developers are more likely to do so, resulting in more
103 | cleanly factored code.
104 | 
105 | When a feature is introduced, it is common that at first, programmers
106 | will use, overuse, and maybe even abuse that feature, and it takes
107 | some time for the community to converge on a reasonable set of
108 | guidelines for what uses are reasonable and what uses are not. It's
109 | probably reasonable to use `var` fairly frequently though not for the
110 | majority of local variable declarations.
111 | 
112 | Starting with Local Variable Type Inference (LVTI), we're publishing
113 | material about its intent and recommended usage (such as this FAQ, and
114 | the [LVTI Style Guidelines][1]) around the same time the feature is
115 | delivered. We hope that this will accelerate the community's
116 | convergence on what constitutes reasonable usage, and that it will
117 | help avoid most cases of abuse.
118 | 
119 | ## Q5. Where can `var` be used? {#Q5}
120 | 
121 | `var` can be used for declaring local variables, including index
122 | variables of for-loops and resource variables of the
123 | try-with-resources statement.
124 | 
125 | `var` cannot be used for fields, method parameters, and method return
126 | types. The reason is that types in these locations appear explicitly
127 | in class files and in Javadoc specifications. With type inference,
128 | it's quite easy for a change to an initializer to cause the variable's
129 | inferred type to change. For local variables, this is not a problem,
130 | because local variables are limited in scope, and their types are not
131 | recorded directly into class files. However, type inference could
132 | easily cause a problem if types for fields, method parameters, and
133 | method return types were inferred.
134 | 
135 | For example, suppose that the return type of a method were inferred
136 | from the expression in the method's `return` statement. A change to the
137 | method's implementation might end up changing the type of the
138 | expression in the `return` statement. This in turn might change the
139 | method's return type. This could result in a source or binary
140 | incompatibility. Such incompatible changes should not arise from
141 | harmless-looking changes to the implementation.
142 | 
143 | Suppose a field's type were inferred. A change to the field's
144 | initializer could change the field's type, which might unexpectedly
145 | break reflective code.
146 | 
147 | Type inference is ok within the implementation, but not in APIs. API
148 | contracts should be declared explicitly.
149 | 
150 | What about private fields and methods, which are not part of APIs? In
151 | theory, we could have chosen to support `var` for private fields and for
152 | the return type of private methods, without worry that this would
153 | cause incompatibilities due to separate compilation and dynamic
154 | linkage.  We chose to limit the scope of type inference in this way
155 | for simplicity. Trying to push the boundary to include some fields and
156 | some method returns makes the feature considerably more complex and
157 | harder to reason about, but only marginally more useful.
158 | 
159 | ## Q6. Why is an initializer required on the right-hand side of `var`? {#Q6}
160 | 
161 | The type of the variable is inferred from the type of the initializer.
162 | This means, of course, that `var` can only be used when there is an
163 | initializer.  We could have chosen to infer the type from the
164 | assignments to the variable, but that would have made the feature
165 | considerably more complex, and it could potentially lead to misleading
166 | or hard-to-diagnose errors. In order to keep things simple, we've
167 | defined `var` so that only local information is used for type
168 | inference.
169 | 
170 | Suppose that we allowed type inference based on assignment in multiple
171 | locations, separate from the variable declaration. Consider this
172 | example:
173 | 
174 |     var order;
175 |     ...
176 |     order = "first";
177 |     ...
178 |     order = 2;
179 | 
180 | If a type were chosen based on (say) the first assignment, it might
181 | cause an error at another statement that's quite distant from the
182 | cause of the error. (This is sometimes referred to as the
183 | "action-at-a-distance" problem.)
184 | 
185 | Alternatively, a type could be chosen that's compatible with all
186 | assignments. In this case one might expect that the inferred type
187 | would be `Object`, because that's the common superclass of `String` and
188 | `Integer`. Unfortunately, the situation is more complicated than
189 | that. Since both `String` and `Integer` are `Serializable` and `Comparable`,
190 | the common supertype would be an odd intersection type that's
191 | something like
192 | 
193 |     Serializable & Comparable<? extends Serializable & Comparable<...>>
194 | 
195 | (Note that it isn't possible to declare a variable of this type
196 | explicitly.) Also note that this results in a boxing conversion when
197 | 2 is assigned to `order`, which might be unexpected and undesirable.
198 | 
199 | To avoid these problems, it seems preferable to require that the type
200 | be inferred using an explicit initializer.
201 | 
202 | ## Q7. Why can't you use `var` with `null`? {#Q7}
203 | 
204 | Consider this declaration (which is illegal):
205 | 
206 |     var person = null; // ERROR
207 | 
208 | The `null` literal denotes a value of a special _null type_ ([JLS
209 | 4.1][2]) that is the subtype of all reference types in Java. The only
210 | value of the _null type_ is `null` itself, therefore, the only value
211 | that could ever be assigned to a variable of the _null type_ is
212 | `null`. This isn't very useful.
213 | 
214 | A special rule could be made so that a `var` declaration initialized
215 | to `null` is inferred to have type `Object`. This could be done, but
216 | it raises the question of what the programmer intended. Presumably the
217 | variable is initialized to `null` so that it can be assigned to some
218 | other value later. In that case it seems unlikely that inferring the
219 | variable's type as `Object` is the correct choice.
220 | 
221 | Instead of creating some special rules to handle this case, we've
222 | disallowed it. If you want a variable of type `Object`, declare it
223 | explicitly.
224 | 
225 | ## Q8. Can you use `var` with a diamond on the right-hand side? {#Q8}
226 | 
227 | Yes, it works, but it's probably not what you want. Consider:
228 | 
229 |     var list = new ArrayList<>();
230 | 
231 | This will infer the type of list to be `ArrayList<Object>`. In general,
232 | it's preferable use an explicit type on the left with diamond on the
233 | right, or use `var` on the left with an explicit type on the
234 | right. See the [LVTI Style Guidelines][1], [guideline G6][1G6], for
235 | further information.
236 | 
237 | [1]: lvti-style-guide
238 | [1G6]: lvti-style-guide#G6
239 | [2]: https://docs.oracle.com/javase/specs/jls/se11/html/jls-4.html#jls-4.1
240 | 


--------------------------------------------------------------------------------
/site/guides/lvti-style-guide.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Local Variable Type Inference
  3 | ## Style Guidelines {.subtitle}
  4 | #### Stuart W. Marks {.author}
  5 | #### March 2018 {.date}
  6 | 
  7 | ## Introduction
  8 | 
  9 | Java SE 10 introduced [type inference for local variables][jep286]. Previously,
 10 | all local variable declarations required an explicit (manifest) type
 11 | on the left-hand side. With type inference, the explicit type can be
 12 | replaced by the reserved type name `var` for local variable
 13 | declarations that have initializers. The type of the variable is
 14 | inferred from the type of the initializer.
 15 | 
 16 | There is a certain amount of controversy over this feature. Some
 17 | welcome the concision it enables; others fear that it deprives readers
 18 | of important type information, impairing readability.  And both groups
 19 | are right. It can make code more readable by eliminating redundant
 20 | information, and it can also make code less readable by eliding useful
 21 | information.  Another group worries that it will be overused,
 22 | resulting in more bad Java code being written.  This is also true, but
 23 | it's also likely to result in more *good* Java code being written.
 24 | Like all features, it must be used with judgment.  There's no blanket
 25 | rule for when it should and shouldn't be used.
 26 | 
 27 | Local variable declarations do not exist in isolation; the surrounding
 28 | code can affect or even overwhelm the effects of using `var`. The goal
 29 | of this document is to examine the impact that surrounding code has on
 30 | `var` declarations, to explain some of the tradeoffs, and to provide
 31 | guidelines for the effective use of `var`.
 32 | 
 33 | ## Principles
 34 | 
 35 | ### P1. Reading code is more important than writing code. {#P1}
 36 | 
 37 | Code is read much more often than it is written. Further, when writing
 38 | code, we usually have the whole context in our head, and take our
 39 | time; when reading code, we are often context-switching, and may be in
 40 | more of a hurry.  Whether and how particular language features are
 41 | used ought to be determined by their impact on *future readers* of the
 42 | program, not its original author. Shorter programs can be preferable
 43 | to longer ones, but shortening a program too much can omit information
 44 | that's useful for understanding the program. The central issue here is
 45 | to find the right size for the program such that understandability is
 46 | maximized.
 47 | 
 48 | We are specifically unconcerned here with the amount of keyboarding
 49 | that's necessary to input or to edit a program. While concision may be
 50 | a nice bonus for the author, focusing on it misses the main goal,
 51 | which is to improve the understandability of the resulting program.
 52 | 
 53 | ### P2. Code should be clear from local reasoning. {#P2}
 54 | 
 55 | The reader should be able to look at a `var` declaration, along with
 56 | uses of the declared variable, and understand almost immediately
 57 | what's going on. Ideally, the code should be readily understandable
 58 | using only the context from a snippet or a patch. If understanding a
 59 | `var` declaration requires the reader to look at several locations
 60 | around the code, it might not be a good situation in which to use
 61 | `var`. Then again, it might indicate a problem with the code itself.
 62 | 
 63 | ### P3. Code readability shouldn't depend on IDEs. {#P3}
 64 | 
 65 | Code is often written and read within an IDE, so it's tempting to rely
 66 | heavily on code analysis features of IDEs. For type declarations, why
 67 | not just use `var` everywhere, since one can always point at a
 68 | variable to determine its type?
 69 | 
 70 | There are two reasons. The first is that code is often read outside an
 71 | IDE. Code appears in many places where IDE facilities aren't
 72 | available, such as snippets within a document, browsing a repository
 73 | on the internet, or in a patch file. It is counterproductive to have
 74 | to import code into an IDE simply to understand what the code does.
 75 | 
 76 | The second reason is that even when one is reading code within an IDE,
 77 | explicit actions are often necessary to query the IDE for further
 78 | information about a variable. For instance, to query the type of a
 79 | variable declared using `var`, one might have to hover the pointer
 80 | over the variable and wait for a popup. This might take only a moment,
 81 | but it disrupts the flow of reading.
 82 | 
 83 | Code should be self-revealing. It should be understandable on its
 84 | face, without the need for assistance from tools.
 85 | 
 86 | ### P4. Explicit types are a tradeoff. {#P4}
 87 | 
 88 | Java has historically required local variable declarations to include
 89 | the type explicitly. While explicit types can be very helpful, they
 90 | are sometimes not very important, and are sometimes just in the
 91 | way. Requiring an explicit type can add clutter that crowds out useful
 92 | information.
 93 | 
 94 | Omitting an explicit type can reduce clutter, but only if its omission
 95 | doesn't impair understandability. The type isn't the only way to
 96 | convey information to the reader. Other means include the variable's
 97 | name and the initializer expression. We should take all the available
 98 | channels into account when determining whether it's OK to mute one of
 99 | these channels.
100 | 
101 | ## Guidelines
102 | 
103 | ### G1. Choose variable names that provide useful information. {#G1}
104 | 
105 | This is good practice in general, but it's much more
106 | important in the context of `var`. In a `var` declaration, information
107 | about the meaning and use of the variable can be conveyed using the
108 | variable's name. Replacing an explicit type with var should often be
109 | accompanied by improving the variable name.  For example:
110 | 
111 |     // ORIGINAL
112 |     List<Customer> x = dbconn.executeQuery(query);
113 | 
114 |     // GOOD
115 |     var custList = dbconn.executeQuery(query);
116 | 
117 | In this case, a useless variable name has been replaced with a name
118 | that is evocative of the type of the variable, which is now implicit
119 | in the `var` declaration.
120 | 
121 | Encoding the variable's type in its name, taken to its logical
122 | conclusion, results in "[Hungarian Notation]". Just as with explicit
123 | types, this is sometimes helpful, and sometimes just clutter.  In this
124 | example the name `custList` implies that a `List` is being
125 | returned. That might not be significant. Instead of the exact type,
126 | it's sometimes better for a variable's name to express the role or the
127 | nature of the variable, such as "customers":
128 | 
129 |     // ORIGINAL
130 |     try (Stream<Customer> result = dbconn.executeQuery(query)) {
131 |         return result.map(...)
132 |                      .filter(...)
133 |                      .findAny();
134 |     }
135 | 
136 |     // GOOD
137 |     try (var customers = dbconn.executeQuery(query)) {
138 |         return customers.map(...)
139 |                         .filter(...)
140 |                         .findAny();
141 |     }
142 | 
143 | ### G2. Minimize the scope of local variables. {#G2}
144 | 
145 | Limiting the scope of local variables is good practice in
146 | general. This practice is described in [*Effective Java (3rd Edition)*][Bloch],
147 | Item 57. It applies with extra force if `var` is in use.
148 | 
149 | In the following example, the `add` method clearly adds the special
150 | item as the last list element, so it's processed last, as expected.
151 | 
152 |     var items = new ArrayList<Item>(...);
153 |     items.add(MUST_BE_PROCESSED_LAST);
154 |     for (var item : items) ...
155 | 
156 | Now suppose that in order to remove duplicate items, a programmer were
157 | to modify this code to use a `HashSet` instead of an `ArrayList`:
158 | 
159 |     var items = new HashSet<Item>(...);
160 |     items.add(MUST_BE_PROCESSED_LAST);
161 |     for (var item : items) ...
162 | 
163 | This code now has a bug, since sets don't have a defined iteration
164 | order. However, the programmer is likely to fix this bug immediately,
165 | as the uses of the `items` variable are adjacent to its declaration.
166 | 
167 | Now suppose that this code is part of a large method, with a
168 | correspondingly large scope for the `items` variable:
169 | 
170 |     var items = new HashSet<Item>(...);
171 | 
172 |     // ... 100 lines of code ...
173 | 
174 |     items.add(MUST_BE_PROCESSED_LAST);
175 |     for (var item : items) ...
176 | 
177 | The impact of changing from an `ArrayList` to a `HashSet` is no longer
178 | readily apparent, since `items` is used so far away from its
179 | declaration. It seems likely that this bug could survive for much
180 | longer.
181 | 
182 | If `items` had been declared explicitly as `List<String>`, changing
183 | the initializer would also require changing the type to
184 | `Set<String>`. This might prompt the programmer to inspect the rest of
185 | the method for code that would be impacted by such a change. (Then
186 | again, it might not.)  Use of `var` would remove this prompting,
187 | possibly increasing the risk of a bug being introduced in code like
188 | this.
189 | 
190 | This might seem like an argument against using `var`, but it really
191 | isn't. The initial example that uses `var` is perfectly fine. The
192 | problem only occurs when the variable's scope is large. Instead of
193 | simply avoiding `var` in these cases, one should change the code to
194 | reduce the scope of the local variables, and only then declare them
195 | with `var`.
196 | 
197 | ### G3. Consider `var` when the initializer provides sufficient information to the reader. {#G3}
198 | 
199 | Local variables are often initialized with constructors. The name of
200 | the class being constructed is often repeated as the explicit type on
201 | the left-hand side. If the type name is long, use of `var` provides
202 | concision without loss of information:
203 | 
204 |     // ORIGINAL
205 |     ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
206 | 
207 |     // GOOD
208 |     var outputStream = new ByteArrayOutputStream();
209 | 
210 | It's also reasonable to use `var` in cases where the initializer is a
211 | method call, such as a static factory method, instead of a
212 | constructor, and when its name contains enough type information:
213 | 
214 |     // ORIGINAL
215 |     BufferedReader reader = Files.newBufferedReader(...);
216 |     List<String> stringList = List.of("a", "b", "c");
217 | 
218 |     // GOOD
219 |     var reader = Files.newBufferedReader(...);
220 |     var stringList = List.of("a", "b", "c");
221 | 
222 | In these cases, the methods' names strongly imply a particular return type, which is then used for inferring the type of the variable.
223 | 
224 | ### G4. Use `var` to break up chained or nested expressions with local variables. {#G4}
225 | 
226 | Consider code that takes a collection of strings and finds the string
227 | that occurs most often. This might look like the following:
228 | 
229 |     return strings.stream()
230 |                   .collect(groupingBy(s -> s, counting()))
231 |                   .entrySet()
232 |                   .stream()
233 |                   .max(Map.Entry.comparingByValue())
234 |                   .map(Map.Entry::getKey);
235 | 
236 | This code is correct, but it's potentially confusing, as it looks like
237 | a single stream pipeline. In fact, it's a short stream, followed by a
238 | second stream over the result of the first stream, followed by a
239 | mapping of the `Optional` result of the second stream. The most readable
240 | way to express this code would have been as two or three statements;
241 | first group entries into a map, then reduce over that map, then
242 | extract the key from the result (if present), as shown below:
243 | 
244 |     Map<String, Long> freqMap = strings.stream()
245 |                                        .collect(groupingBy(s -> s, counting()));
246 |     Optional<Map.Entry<String, Long>> maxEntryOpt = freqMap.entrySet()
247 |                                                            .stream()
248 |                                                            .max(Map.Entry.comparingByValue());
249 |     return maxEntryOpt.map(Map.Entry::getKey);
250 | 
251 | But the author probably resisted doing that because writing the types
252 | of the intermediate variables seemed too burdensome, so instead they
253 | distorted the control flow.  Using `var` allows us to express the
254 | code more naturally without paying the high price of explicitly
255 | declaring the types of the intermediate variables:
256 | 
257 |     var freqMap = strings.stream()
258 |                          .collect(groupingBy(s -> s, counting()));
259 |     var maxEntryOpt = freqMap.entrySet()
260 |                              .stream()
261 |                              .max(Map.Entry.comparingByValue());
262 |     return maxEntryOpt.map(Map.Entry::getKey);
263 | 
264 | One might legitimately prefer the first snippet with its single long
265 | chain of method calls. However, in some cases it's better to break up
266 | long method chains. Using `var` for these cases is a viable approach,
267 | whereas using full declarations of the intermediate variables in the
268 | second snippet makes it an unpalatable alternative.  As with many
269 | other situations, the correct use of `var` might involve both taking
270 | something out (explicit types) and adding something back (better
271 | variable names, better structuring of code).
272 | 
273 | ### G5. Don't worry too much about "programming to the interface" with local variables. {#G5}
274 | 
275 | A common idiom in Java programming is to construct an instance of a
276 | concrete type but to assign it to a variable of an interface
277 | type. This binds the code to the abstraction instead of the
278 | implementation, which preserves flexibility during future maintenance
279 | of the code. For example:
280 | 
281 |     // ORIGINAL
282 |     List<String> list = new ArrayList<>();
283 | 
284 | If `var` is used, however, the concrete type is inferred instead of the interface:
285 | 
286 |     // Inferred type of list is ArrayList<String>
287 |     var list = new ArrayList<String>();
288 | 
289 | It must be reiterated here that `var` can *only* be used for local
290 | variables. It cannot be used to infer field types, method parameter
291 | types, and method return types. The principle of "programming to the
292 | interface" is still as important as ever in those contexts.
293 | 
294 | The main issue is that code that uses the variable can form
295 | dependencies on the concrete implementation. If the variable's
296 | initializer were to change in the future, this might cause its
297 | inferred type to change, causing errors or bugs to occur in subsequent
298 | code that uses the variable.
299 | 
300 | If, as recommended in guideline G2, the scope of the local variable is
301 | small, the risks from "leakage" of the concrete implementation that
302 | can impact the subsequent code are limited. If the variable is used
303 | only in code that's a few lines away, it should be easy to avoid
304 | problems or to mitigate them if they arise.
305 | 
306 | In this particular case, `ArrayList` only contains a couple of methods
307 | that aren't on `List`, namely `ensureCapacity` and `trimToSize`. These
308 | methods don't affect the contents of the list, so calls to them don't
309 | affect the correctness of the program. This further reduces the impact
310 | of the inferred type being a concrete implementation rather than an
311 | interface.
312 | 
313 | ### G6. Take care when using `var` with diamond or generic methods. {#G6}
314 | 
315 | Both `var` and the "diamond" feature allow you to omit explicit type
316 | information when it can be derived from information already
317 | present. Can you use both in the same declaration?
318 | 
319 | Consider the following:
320 | 
321 |     PriorityQueue<Item> itemQueue = new PriorityQueue<Item>();
322 | 
323 | This can be rewritten using either diamond or `var`, without losing
324 | type information:
325 | 
326 |     // OK: both declare variables of type PriorityQueue<Item>
327 |     PriorityQueue<Item> itemQueue = new PriorityQueue<>();
328 |     var itemQueue = new PriorityQueue<Item>();
329 | 
330 | It is legal to use both `var` and diamond, but the inferred type will
331 | change:
332 | 
333 |     // DANGEROUS: infers as PriorityQueue<Object>
334 |     var itemQueue = new PriorityQueue<>();
335 | 
336 | For its inference, diamond can use the target type (typically, the
337 | left-hand side of a declaration) or the types of constructor
338 | arguments. If neither is present, it falls back to the broadest
339 | applicable type, which is often `Object`. This is usually not what was
340 | intended.
341 | 
342 | Generic methods have employed type inference so successfully that it's
343 | quite rare for programmers to provide explicit type arguments.
344 | Inference for generic methods relies on the target type if there are
345 | no actual method arguments that provide sufficient type
346 | information. In a `var` declaration, there is no target type, so a
347 | similar issue can occur as with diamond. For example,
348 | 
349 |     // DANGEROUS: infers as List<Object>
350 |     var list = List.of();
351 | 
352 | With both diamond and generic methods, additional type information can
353 | be provided by actual arguments to the constructor or method, allowing
354 | the intended type to be inferred. Thus,
355 | 
356 |     // OK: itemQueue infers as PriorityQueue<String>
357 |     Comparator<String> comp = ... ;
358 |     var itemQueue = new PriorityQueue<>(comp);
359 | 
360 |     // OK: infers as List<BigInteger>
361 |     var list = List.of(BigInteger.ZERO);
362 | 
363 | If you decide to use `var` with diamond or a generic method, you
364 | should ensure that method or constructor arguments provide enough type
365 | information so that the inferred type matches your intent. Otherwise,
366 | avoid using both `var` with diamond or a generic method in the same
367 | declaration.
368 | 
369 | ### G7. Take care when using `var` with literals. {#G7}
370 | 
371 | Primitive literals can be used as initializers for `var`
372 | declarations. It's unlikely that using `var` in these cases will
373 | provide much advantage, as the type names are generally short.
374 | However, `var` is sometimes useful, for example, to align variable
375 | names.
376 | 
377 | There is no issue with boolean, character, `long`, and string
378 | literals. The type inferred from these literals is precise, and so the
379 | meaning of `var` is unambiguous:
380 | 
381 |     // ORIGINAL
382 |     boolean ready = true;
383 |     char ch = '\ufffd';
384 |     long sum = 0L;
385 |     String label = "wombat";
386 | 
387 |     // GOOD
388 |     var ready = true;
389 |     var ch    = '\ufffd';
390 |     var sum   = 0L;
391 |     var label = "wombat";
392 | 
393 | Particular care should be taken when the initializer is a numeric value,
394 | especially an integer literal. With an explicit type on the left-hand
395 | side, the numeric value may be silently widened or narrowed to types
396 | other than `int`. With `var`, the value will be inferred as an
397 | `int`, which may be unintended.
398 | 
399 |     // ORIGINAL
400 |     byte flags = 0;
401 |     short mask = 0x7fff;
402 |     long base = 17;
403 | 
404 |     // DANGEROUS: all infer as int
405 |     var flags = 0;
406 |     var mask = 0x7fff;
407 |     var base = 17;
408 | 
409 | Floating point literals are mostly unambiguous:
410 | 
411 |     // ORIGINAL
412 |     float f = 1.0f;
413 |     double d = 2.0;
414 | 
415 |     // GOOD
416 |     var f = 1.0f;
417 |     var d = 2.0;
418 | 
419 | Note that `float` literals can be widened silently to `double`. It is
420 | somewhat obtuse to initialize a `double` variable using an explicit
421 | `float` literal such as `3.0f`, however, cases may arise where a
422 | `double` variable is initialized from a `float` field. Caution with
423 | `var` is advised here:
424 | 
425 |     // ORIGINAL
426 |     static final float INITIAL = 3.0f;
427 |     ...
428 |     double temp = INITIAL;
429 | 
430 |     // DANGEROUS: now infers as float
431 |     var temp = INITIAL;
432 | 
433 | (Indeed, this example violates guideline G3, because there
434 | isn't enough information in the initializer for a reader to see
435 | the inferred type.)
436 | 
437 | ## Examples
438 | 
439 | This section contains some examples of where `var` can be used to
440 | greatest benefit.
441 | 
442 | The following code removes at most `max` matching entries from a
443 | Map. Wildcarded type bounds are used for improving the flexibility of
444 | the method, resulting in considerable verbosity. Unfortunately, this
445 | requires the type of the Iterator to be a nested wildcard, making its
446 | declaration more verbose. This declaration is so long that the header
447 | of the for-loop no longer fits on a single line, making the code even
448 | harder to read.
449 | 
450 |     // ORIGINAL
451 |     void removeMatches(Map<? extends String, ? extends Number> map, int max) {
452 |         for (Iterator<? extends Map.Entry<? extends String, ? extends Number>> iterator =
453 |                  map.entrySet().iterator(); iterator.hasNext();) {
454 |             Map.Entry<? extends String, ? extends Number> entry = iterator.next();
455 |             if (max > 0 && matches(entry)) {
456 |                 iterator.remove();
457 |                 max--;
458 |             }
459 |         }
460 |     }
461 | 
462 | Use of `var` here removes the noisy type declarations for the local
463 | variables. Having explicit types for the Iterator and Map.Entry locals
464 | in this kind of loop is largely unnecessary. This also allows the
465 | for-loop control to fit on a single line, further improving
466 | readability.
467 | 
468 |     // GOOD
469 |     void removeMatches(Map<? extends String, ? extends Number> map, int max) {
470 |         for (var iterator = map.entrySet().iterator(); iterator.hasNext();) {
471 |             var entry = iterator.next();
472 |             if (max > 0 && matches(entry)) {
473 |                 iterator.remove();
474 |                 max--;
475 |             }
476 |         }
477 |     }
478 | 
479 | Consider code that reads a single line of text from a socket using
480 | the try-with-resources statement. The networking and I/O APIs use an
481 | object wrapping idiom. Each intermediate object must be declared as a
482 | resource variable so that it will be closed properly if an error
483 | occurs while opening a subsequent wrapper. The conventional code for
484 | this requires the class name to be repeated on the left and right
485 | sides of the variable declaration, resulting in a lot of clutter:
486 | 
487 |     // ORIGINAL
488 |     try (InputStream is = socket.getInputStream();
489 |          InputStreamReader isr = new InputStreamReader(is, charsetName);
490 |          BufferedReader buf = new BufferedReader(isr)) {
491 |         return buf.readLine();
492 |     }
493 | 
494 | Using `var` reduces the noise considerably:
495 | 
496 |     // GOOD
497 |     try (var inputStream = socket.getInputStream();
498 |          var reader = new InputStreamReader(inputStream, charsetName);
499 |          var bufReader = new BufferedReader(reader)) {
500 |         return bufReader.readLine();
501 |     }
502 | 
503 | ## Conclusion
504 | 
505 | Using `var` for declarations can improve code by reducing clutter,
506 | thereby letting more important information stand out. On the other
507 | hand, applying `var` indiscriminately can make things worse. Used
508 | properly, `var` can help improve good code, making it shorter and
509 | clearer without compromising understandability.
510 | 
511 | 
512 | [jep286]: https://openjdk.java.net/jeps/286
513 | [Hungarian Notation]: https://en.wikipedia.org/wiki/Hungarian_notation
514 | [Bloch]: https://www.pearson.com/us/higher-education/program/Bloch-Effective-Java-3rd-Edition/PGM1763855.html
515 | 


--------------------------------------------------------------------------------
/site/guides/text-blocks-guide.head:
--------------------------------------------------------------------------------
 1 | <head>
 2 |   <style>
 3 |     .control { color: #e76f00; font-weight: bold; }
 4 |   </style>
 5 |   <script>
 6 |     function highlightControls() {
 7 |       const controls = /[\u21a6\u2190\u2193\u00B7]+/g;
 8 |       document.querySelectorAll("code").forEach((code) => {
 9 |         code.innerHTML = code.textContent
10 |               .replace(controls, (match) => `<span class="control">${match}</span>`);
11 |       });
12 |     }
13 |     document.addEventListener("DOMContentLoaded", (event) => highlightControls());
14 |   </script>
15 | </head>
16 | 


--------------------------------------------------------------------------------