├── LICENSE
├── README.md
├── did_imputation.ado
├── did_imputation.sthlp
├── event_plot.ado
├── event_plot.sthlp
├── five_estimators_example.do
└── five_estimators_example.png


/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <https://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # did_imputation
 2 | Event studies: robust and efficient estimation, testing, and plotting
 3 | 
 4 | This is a Stata package for Borusyak, Jaravel, and Spiess (2023), "Revisiting Event Study Designs: Robust and Efficient Estimation"
 5 | 
 6 | The package includes:
 7 | 1) *did_imputation* command: for estimating causal effects & testing for pre-trends with the imputation method of Borusyak et al.
 8 | 2) *event_plot* command: for plotting event study graphs after did_imputation, other robust estimators
 9 | (by de Chaisemartin and D'Haultfoeuille, Callaway and Sant'Anna, and Sun and Abraham), and conventional event study OLS
10 | 3) an example of using all five estimators in a simulated dataset and plotting the coefficients & confidence intervals for all of them at once.
11 | 
12 | Please contact Kirill Borusyak at k.borusyak@berkeley.edu with any questions.
13 | 


--------------------------------------------------------------------------------
/did_imputation.ado:
--------------------------------------------------------------------------------
  1 | *! did_imputation: Treatment effect estimation and pre-trend testing in staggered adoption diff-in-diff designs with an imputation approach of Borusyak, Jaravel, and Spiess (2023)
  2 | *! Version: November 22, 2023
  3 | *! Author: Kirill Borusyak
  4 | *! Recent updates: project() option can no longer be combined with autosample, which could lead to errors
  5 | *! Citation: Borusyak, Jaravel, and Spiess, "Revisiting Event Study Designs: Robust and Efficient Estimation" (2023)
  6 | program define did_imputation, eclass sortpreserve
  7 | version 13.0
  8 | syntax varlist(min=4 max=4) [if] [in] [aw iw] [, wtr(varlist) sum Horizons(numlist >=0) ALLHorizons HBALance HETby(varname) PROject(varlist numeric) ///
  9 | 	minn(integer 30) shift(integer 0) ///
 10 | 	AUTOSample SAVEestimates(name) SAVEWeights LOADWeights(varlist) SAVEResid(name) ///
 11 | 	AVGEFFectsby(varlist) fe(string) Controls(varlist) UNITControls(varlist) TIMEControls(varlist) ///
 12 | 	CLUSter(varname) leaveout tol(real 0.000001) maxit(integer 100) verbose nose PREtrends(integer 0) delta(integer 0) alpha(real 0.05)]
 13 | qui {
 14 | 	if ("`verbose'"!="") noi di "Starting"
 15 | 	ms_get_version reghdfe, min_version("5.7.3")
 16 | 	ms_get_version ftools, min_version("2.37.0")
 17 | 	// Part 1: Initialize
 18 | 	marksample touse, novarlist
 19 | 	if ("`controls'"!="") markout `touse' `controls'
 20 | 	if ("`unitcontrols'"!="") markout `touse' `unitcontrols'
 21 | 	if ("`timecontrols'"!="") markout `touse' `timecontrols'
 22 | //	if ("`timeinteractions'"!="") markout `touse' `timeinteractions'
 23 | 	if ("`cluster'"!="") markout `touse' `cluster', strok
 24 | 	if ("`saveestimates'"!="") confirm new variable `saveestimates'
 25 | 	if ("`saveweights'"!="") confirm new variable `saveweights'
 26 | 	if ("`verbose'"!="") noi di "#00"
 27 | 	tempvar wei
 28 | 	if ("`weight'"=="") {
 29 | 	    gen `wei' = 1
 30 | 		local weiexp ""
 31 | 	}
 32 | 	else {
 33 | 		gen `wei' `exp'
 34 | 		replace `wei' = . if `wei'==0
 35 | 		markout `touse' `wei'
 36 | 		
 37 | 		if ("`sum'"=="") { // unless want a weighted sum, normalize the weights to have reasonable scale, just in case for better numerical convergence
 38 | 			sum `wei' if `touse'
 39 | 			replace `wei' = `wei' * r(N)/r(sum)
 40 | 			if ("`verbose'"!="") noi di "Normalizing weights by " %12.8f r(N)/r(sum)
 41 | 		}
 42 | 		local weiexp "[`weight'=`wei']"
 43 | 	}
 44 | 	local debugging = ("`verbose'"=="verbose")
 45 | 	
 46 | 	tokenize `varlist'
 47 | 	local Y `1'
 48 | 	local i `2'
 49 | 	local t `3'
 50 | 	local ei `4'
 51 | 	markout `touse' `Y' `t' // missing `ei' is fine, indicates the never-treated group
 52 | 	markout `touse' `i', strok
 53 | 	
 54 | 	tempvar D K
 55 | 	
 56 | 	// Process FE
 57 | 	if ("`fe'"=="") local fe `i' `t'
 58 | 	if ("`fe'"==".") {
 59 | 	    tempvar constant
 60 | 		gen `constant' = 1
 61 | 	    local fe `constant'
 62 | 	}
 63 | 	local fecount = 0
 64 | 	foreach fecurrent of local fe {
 65 | 	    if (("`fecurrent'"!="`i'" | "`unitcontrols'"=="") & ("`fecurrent'"!="`t'" | "`timecontrols'"=="")) { // skip i and t if there are corresponding interacted controls 
 66 | 			local ++fecount
 67 | 			local fecopy `fecopy' `fecurrent'
 68 | 			local fe`fecount' = subinstr("`fecurrent'","#"," ",.)
 69 | 			markout `touse' `fe`fecount'', strok
 70 | 		}
 71 | 	}
 72 | 	local fe `fecopy'
 73 | 	
 74 | 	// Figure out the delta
 75 | 	if (`delta'==0) {
 76 | 		cap tsset, noquery
 77 | 		if (_rc==0) {
 78 | 			if (r(timevar)=="`t'") {
 79 | 				local delta = r(tdelta)
 80 | 				if (`delta'!=1) noi di "Note: setting delta = `delta'"
 81 | 			}
 82 | 		}
 83 | 		else local delta = 1
 84 | 	}
 85 | 	if (`delta'<=0 | mi(`delta')) {
 86 | 		di as error "A problem has occured with determining delta. Please specify it explicitly."
 87 | 		error 198
 88 | 	}
 89 | 	
 90 | 	if (`debugging') noi di "#1"
 91 | 	gen `K' = (`t'-`ei'+`shift')/`delta' if `touse'
 92 | 	cap assert mi(`K') | mod(`K',1)==0
 93 | 	if (_rc!=0) {
 94 | 		di as error "There are non-integer values of the number of periods since treatment. Please check the time dimension of your data."
 95 | 		error 198
 96 | 	}
 97 | 	
 98 | 	gen `D' = (`K'>=0 & !mi(`K')) if `touse'
 99 | 
100 | 	if ("`avgeffectsby'"=="") local avgeffectsby = "`ei' `t'"
101 | 	if ("`cluster'"=="") local cluster = "`i'"
102 | 	
103 | 	if ("`autosample'"!="" & "`sum'"!="") {
104 | 		di as error "Autosample cannot be combined with sum. Please specify the sample explicitly"
105 | 		error 184
106 | 	}
107 | 	if ("`autosample'"!="" & "`hbalance'"!="") {
108 | 		di as error "Autosample cannot be combined with hbalance. Please specify the sample explicitly"
109 | 		error 184
110 | 	}
111 | 	if ("`autosample'"!="" & "`project'"!="") {
112 | 		di as error "Autosample cannot be combined with project. Please specify the sample explicitly"
113 | 		error 184
114 | 	}
115 | 	if ("`project'"!="" & "`hetby'"!="") {
116 | 		di as error "Options project and hetby cannot be combined."
117 | 		error 184
118 | 	}
119 | 	if ("`project'"!="" & "`sum'"!="") {
120 | 		di as error "Options project and sum cannot be combined." // hetby and sum are fine: just add them up separately
121 | 		error 184
122 | 	}
123 | 	if ("`se'"=="nose" & "`saveweights'"!="") {
124 | 		di as error "Option saveweights is not available if nose is specified." 
125 | 		error 184
126 | 	}
127 | 	if ("`se'"=="nose" & "`loadweights'"!="") {
128 | 		di as error "Option loadweights is not available if nose is specified." 
129 | 		error 184
130 | 	}
131 | 	if ("`se'"=="nose" & "`saveresid'"!="") {
132 | 		di as error "Option saveresid is not available if nose is specified." 
133 | 		error 184
134 | 	}
135 | 	if (`debugging') noi di "#2 `fe'"
136 | 	
137 | 	// Part 2: Prepare the variables with weights on the treated units (e.g. by horizon)
138 | 	local wtr_count : word count `wtr'
139 | 	local wtr_count_init = `wtr_count'
140 | 	if (`wtr_count'==0) { // if no wtr, use the simple average
141 | 		tempvar wtr
142 | 		gen `wtr' = 1 if (`touse') & (`D'==1)
143 | 		local wtrnames tau
144 | 		local wtr_count = 1
145 | 	}
146 | 	else { // create copies of the specified variables so that I can modify them later (adjust for weights, normalize)
147 | 		if (`wtr_count'==1) local wtrnames tau
148 | 			else local wtrnames "" // will fill it in the loop
149 | 		
150 | 	    local wtr_new_list 
151 | 		foreach v of local wtr {
152 | 		    tempvar `v'_new
153 | 			gen ``v'_new' = `v' if `touse'
154 | 			local wtr_new_list `wtr_new_list' ``v'_new'
155 | 			if (`wtr_count'>1) local wtrnames `wtrnames' tau_`v'
156 | 		}
157 | 		local wtr `wtr_new_list'
158 | 	}
159 | 
160 | 	* Horizons
161 | 	if (("`horizons'"!="" | "`allhorizons'"!="") & `wtr_count'>1) {
162 | 		di as error "Options horizons and allhorizons cannot be combined with multiple wtr variables"
163 | 		error 184
164 | 	}
165 | 	
166 | 	if ("`allhorizons'"!="") {
167 | 		if ("`horizons'"!="") {
168 | 			di as error "Options horizons and allhorizons cannot be combined"
169 | 			error 184
170 | 		}
171 | 		if ("`hbalance'"!="") di as error "Warning: combining hbalance with allhorizons may lead to very restricted samples. Consider specifying a smaller subset of horizons."
172 | 		
173 | 		levelsof `K' if `touse' & `D'==1 & `wtr'!=0 & !mi(`wtr'), local(horizons) 
174 | 	}
175 | 	
176 | 	if ("`horizons'"!="") { // Create a weights var for each horizon
177 | 		if ("`hbalance'"=="hbalance") {
178 | 		    // Put zero weight on units for which we don't have all horizons
179 | 			tempvar in_horizons num_horizons_by_i min_weight_by_i max_weight_by_i
180 | 			local n_horizons = 0
181 | 			gen `in_horizons'=0 if `touse'
182 | 			foreach h of numlist `horizons' {
183 | 				replace `in_horizons'=1 if (`K'==`h') & `touse'
184 | 				local ++n_horizons
185 | 			}
186 | 			egen `num_horizons_by_i' = sum(`in_horizons') if `in_horizons'==1, by(`i')
187 | 			replace `wtr' = 0 if `touse' & (`in_horizons'==0 | (`num_horizons_by_i'<`n_horizons'))
188 | 			
189 | 			// Now check whether wtr and wei weights are identical across periods
190 | 			egen `min_weight_by_i' = min(`wtr'*`wei') if `touse' & `in_horizons'==1 & (`num_horizons_by_i'==`n_horizons'), by(`i')
191 | 			egen `max_weight_by_i' = max(`wtr'*`wei') if `touse' & `in_horizons'==1 & (`num_horizons_by_i'==`n_horizons'), by(`i')
192 | 			cap assert `max_weight_by_i'<=1.000001*`min_weight_by_i' if `touse' & `in_horizons'==1 & (`num_horizons_by_i'==`n_horizons')
193 | 			if (_rc>0) {
194 | 			    di as error "Weights must be identical across periods for units in the balanced sample"
195 | 				error 498
196 | 			}
197 | 			drop `in_horizons' `num_horizons_by_i' `min_weight_by_i' `max_weight_by_i'
198 | 		}
199 | 		foreach h of numlist `horizons' {
200 | 		    tempvar wtr`h'
201 | 			gen `wtr`h'' = `wtr' * (`K'==`h')
202 | 			local horlist `horlist' `wtr`h''
203 | 			local hornameslist `hornameslist' tau`h'
204 | 		}
205 | 		local wtr `horlist'
206 | 		local wtrnames `hornameslist'
207 | 	}
208 | 	
209 | 	if ("`hetby'"!="") { // Split each wtr by values of hetby
210 | 		local hetby_type : type `hetby'
211 | 		local hetby_string = substr("`hetby_type'",1,3)=="str"
212 | 		if (`hetby_string'==0) {
213 | 			sum `hetby' if `touse' & (`D'==1)
214 | 			if r(min)<0 {
215 | 				di as error "The hetby variable cannot take negative values."
216 | 				error 411
217 | 			}
218 | 			cap assert `hetby'==round(`hetby') if `touse' & (`D'==1)
219 | 			if (_rc>0) {
220 | 				di as error "The hetby variable cannot take non-integer values."
221 | 				error 452
222 | 			}
223 | 		}
224 | 		levelsof `hetby' if `touse' & (`D'==1), local(hetby_values)
225 | 		if (`debugging') noi di `"Hetby_values: `hetby_values'"'
226 | 		if (r(r)>30) {
227 | 			di as error "The hetby variable takes too many (over 30) values"
228 | 			error 149
229 | 		}
230 | 		if (r(r)==0) {
231 | 			di as error "The hetby variable is always missing."
232 | 			error 148
233 | 		}
234 | 		local wtr_split 
235 | 		local wtrnames_split
236 | 		local index = 1
237 | 		foreach v of local wtr {
238 | 			local wtrname : word `index' of `wtrnames'
239 | 			foreach g of local hetby_values {
240 | 				if (`hetby_string') gen `v'_`g' = `v' if `hetby'==`"`g'"'
241 | 					else gen `v'_`g' = `v' if `hetby'==`g'
242 | 				local wtr_split `wtr_split' `v'_`g'
243 | 				local wtrnames_split `wtrnames_split' `wtrname'_`g'
244 | 			}
245 | 			local ++index
246 | 			drop `v'
247 | 		}
248 | 		local wtr `wtr_split'
249 | 		local wtrnames `wtrnames_split'
250 | 	}
251 | 	
252 | 	if ("`sum'"=="" & "`project'"=="") { // If computing the mean (and not projecting), normalize each wtr variable such that sum(wei*wtr*(D==1))==1
253 | 		foreach v of local wtr {
254 | 			cap assert `v'>=0 if (`touse') & (`D'==1)
255 | 			if (_rc!=0) {
256 | 				di as error "Negative wtr weights are only allowed if the sum option is specified"
257 | 				error 9
258 | 			}
259 | 			sum `v' `weiexp' if (`touse') & (`D'==1)
260 | 			replace `v' = `v'/r(sum) // r(sum)=sum(`v'*`wei')
261 | 		}
262 | 	}
263 | 	
264 | 	if ("`project'"!="") { // So far assume all wtr have to be 0/1, e.g. as coming from horizons
265 | 		if (`wtr_count_init'>0) {
266 | 			di as error "The option project can be combined with horizons/allhorizons but not with wtr." // To rethink if they could be combined
267 | 			error 184
268 | 		}
269 | 		local wtr_project
270 | 		local wtrnames_project
271 | 		local index = 1
272 | 		tempvar one wtrsq
273 | 		gen `one' = 1
274 | 		gen `wtrsq' = .
275 | 		foreach v of local wtr {
276 | 			local wtrname : word `index' of `wtrnames'
277 | 			
278 | 			* Process the constant via FWL
279 | 			reg `one' `project' `weiexp' if `touse' & (`D'==1) & !mi(`v') & (`v'>0), nocon  
280 | 			tempvar wtr_curr 
281 | 			predict `wtr_curr' if `touse' & (`D'==1) & !mi(`v') & (`v'>0), resid
282 | 			replace `wtrsq' = `wei'*`wtr_curr'^2 
283 | 			sum `wtrsq' 
284 | 			if (r(sum)<1e-6) noi di "WARNING: Dropping `wtrname'_cons because of collinearity"
285 | 			else {
286 | 				replace `wtr_curr' = `wtr_curr'/r(sum)
287 | 				local wtr_project `wtr_project' `wtr_curr'
288 | 				local wtrnames_project `wtrnames_project' `wtrname'_cons
289 | 			}
290 | 			
291 | 			* Process other vars via FWL
292 | 			foreach r of local project {
293 | 				local otherproject : list project - r
294 | 				reg `r' `otherproject' `weiexp' if `touse' & (`D'==1) & !mi(`v') & (`v'>0)
295 | 				tempvar wtr_curr 
296 | 				predict `wtr_curr' if `touse' & (`D'==1) & !mi(`v') & (`v'>0), resid
297 | 				replace `wtrsq' = `wei' * (`wtr_curr'^2)
298 | 				sum `wtrsq' 
299 | 				if (r(sum)<1e-6) noi di "WARNING: Dropping `wtrname'_`r' because of collinearity"
300 | 				else {
301 | 					replace `wtr_curr' = `wtr_curr'/r(sum)
302 | 					local wtr_project `wtr_project' `wtr_curr'
303 | 					local wtrnames_project `wtrnames_project' `wtrname'_`r'
304 | 				}
305 | 			}
306 | 			local ++index
307 | 		}
308 | 		local wtr `wtr_project'
309 | 		local wtrnames `wtrnames_project'
310 | 		if ("`wtr'"=="") {
311 | 			di as error "Projection is not possible, most likely because of collinearity."
312 | 			error 498
313 | 		}
314 | 	}
315 | 	
316 | 	if (`debugging') noi di "List: `wtr'"
317 | 	if (`debugging') noi di "Namelist: `wtrnames'"
318 | 
319 | 	// Part 2A: initialize the matrices [used to be just before Part 5]
320 | 	local tau_num : word count `wtr'
321 | 	local ctrl_num : word count `controls'
322 | 	if (`debugging') noi di `tau_num' 
323 | 	if (`debugging') noi di `"`wtr' | `wtrnames' | `controls'"'
324 | 	tempname b Nt
325 | 	matrix `b' = J(1,`tau_num'+`pretrends'+`ctrl_num',.)
326 | 	matrix `Nt' = J(1,`tau_num',.)
327 | 	if (`debugging') noi di "#4.0"
328 | 	
329 | 	// Part 3: Run the imputation regression and impute the controls for treated obs
330 | 	if ("`unitcontrols'"!="") local fe_i `i'##c.(`unitcontrols')
331 | 	if ("`timecontrols'"!="") local fe_t `t'##c.(`timecontrols')
332 | 	
333 | 	count if (`D'==0) & (`touse')
334 | 	if (r(N)==0) {
335 | 	    if (`shift'==0) noi di as error "There are no untreated observations, i.e. those with `t'<`ei' or mi(`ei')."
336 | 			else noi di as error "There are no untreated observations, i.e. those with `t'<`ei'-`shift' or mi(`ei')."
337 | 		noi di as error "Please double-check the period & event time variables."
338 | 		noi di
339 | 		error 459
340 | 	}
341 | 	
342 | 	tempvar imput_resid
343 | 	if (`debugging') noi di "#4: reghdfe `Y' `controls' if (`D'==0) & (`touse') `weiexp', a(`fe_i' `fe_t' `fe', savefe) nocon keepsing resid(`imput_resid') cluster(`cluster')"
344 | 	if (`debugging') noi reghdfe `Y' `controls' if (`D'==0) & (`touse') `weiexp', a(`fe_i' `fe_t' `fe', savefe) nocon keepsing resid(`imput_resid') cluster(`cluster')
345 | 		else reghdfe `Y' `controls' if (`D'==0) & (`touse') `weiexp', a(`fe_i' `fe_t' `fe', savefe) nocon keepsing resid(`imput_resid') cluster(`cluster')verbose(-1)
346 | 		// nocon makes the constant recorded in the first FE
347 | 		// keepsing is important for when there are units available in only one period (e.g. treated in period 2) which are fine
348 | 		// verbose(-1) suppresses singleton warnings
349 | 	local dof_adj = (e(N)-1)/(e(N)-e(df_m)-e(df_a)) * (e(N_clust)/(e(N_clust)-1)) // that's how regdfhe does dof adjustment with clusters, see reghdfe_common.mata line 634
350 | 		
351 | 	* Extrapolate the controls to the treatment group and construct Y0 (do it right away before the next reghdfe kills __hdfe*)
352 | 	if (`debugging') noi di "#5"
353 | 	tempvar Y0
354 | 	gen `Y0' = 0 if `touse'
355 | 	
356 | 	local feset = 1 // indexing as in reghdfe
357 | 	if ("`unitcontrols'"!="") {
358 | 	    recover __hdfe`feset'__*, from(`i')
359 | 		replace `Y0' = `Y0' + __hdfe`feset'__ if `touse'
360 | 		local j=1
361 | 		foreach v of local unitcontrols {
362 | 			replace `Y0' = `Y0'+__hdfe`feset'__Slope`j'*`v' if `touse'
363 | 			local ++j
364 | 		}
365 | 		local ++feset
366 | 	}
367 | 	if ("`timecontrols'"!="") {
368 | 	    recover __hdfe`feset'__*, from(`t')
369 | 		replace `Y0' = `Y0' + __hdfe`feset'__ if `touse'
370 | 		local j=1
371 | 		foreach v of local timecontrols {
372 | 			replace `Y0' = `Y0'+__hdfe`feset'__Slope`j'*`v' if `touse'
373 | 			local ++j
374 | 		}
375 | 		local ++feset
376 | 	}
377 | 	forvalues feindex = 1/`fecount' { // indexing as in the fe option
378 | 	    recover __hdfe`feset'__, from(`fe`feindex'')
379 | 		replace `Y0' = `Y0' + __hdfe`feset'__ if `touse'
380 | 	    local ++feset
381 | 	}
382 | 	foreach v of local controls {
383 | 		replace `Y0' = `Y0'+_b[`v']*`v' if `touse'
384 | 	}
385 | 	if (`debugging') noi di "#7"
386 | 	
387 | 	if ("`saveestimates'"=="") tempvar effect
388 | 	else {
389 | 		local effect `saveestimates'
390 | 		cap confirm var `effect', exact
391 | 		if (_rc==0) drop `effect'
392 | 	}
393 | 	gen `effect' = `Y' - `Y0' if (`D'==1) & (`touse')
394 | 
395 | 	drop __hdfe*
396 | 	if (`debugging') noi di "#8"
397 | 
398 | 	* Save control coefs and prepare weights corresponding to the controls to report them later
399 | 	if (`ctrl_num'>0) {
400 | 		forvalues h = 1/`ctrl_num' {
401 | 			local ctrl_current : word `h' of `controls'
402 | 			matrix `b'[1,`tau_num'+`pretrends'+`h'] = _b[`ctrl_current']
403 | 			local ctrlb`h' = _b[`ctrl_current']
404 | 			local ctrlse`h' = _se[`ctrl_current']
405 | 		}
406 | 		local ctrl_df = e(df_r)
407 | 		if (`debugging') noi di "#4B"
408 | 		local list_ctrl_weps
409 | 		if ("`se'"!="nose") { // Construct weights behind control estimates. [Could speed up by residualizing all relevant vars on FE first?]
410 | 			if (`debugging') noi di "#4C3"
411 | 			local ctrlvars "" // drop omitted vars from controls (so that residualization works correctly when computing SE?)
412 | 			forvalues h = 1/`ctrl_num' {
413 | 				local ctrl_current : word `h' of `controls'
414 | 				if (`ctrlb`h''!=0 | `ctrlse`h''!=0) local ctrlvars `ctrlvars' `ctrl_current'
415 | 			}
416 | 			if (`debugging') noi di "#4C4 `ctrlvars'"
417 | 			
418 | 			tempvar ctrlweight ctrlweight_product // ctrlweight_product=ctrlweight * ctrl_current
419 | 			forvalues h = 1/`ctrl_num' {
420 | 				if (`debugging') noi di "#4D `h'"
421 | 				tempvar ctrleps_w`h'
422 | 				if (`ctrlb`h''==0 & `ctrlse`h''==0) gen `ctrleps_w`h'' = 0 // omitted
423 | 				else {
424 | 					local ctrl_current : word `h' of `controls'
425 | 					//local rhsvars = subinstr(" `ctrlvars' "," `ctrl_current' "," ",.) 
426 | 					local rhsvars : list ctrlvars - ctrl_current
427 | 					reghdfe `ctrl_current' `rhsvars' `weiexp' if `touse' & `D'==0,  a(`fe_i' `fe_t' `fe') cluster(`cluster') resid(`ctrlweight')
428 | 					replace `ctrlweight' = `ctrlweight' * `wei'
429 | 					gen `ctrlweight_product' = `ctrlweight' * `ctrl_current'
430 | 					sum `ctrlweight_product' if `touse' & `D'==0 
431 | 					replace `ctrlweight' = `ctrlweight'/r(sum)
432 | 					egen `ctrleps_w`h'' = total(`ctrlweight' * `imput_resid') if `touse', by(`cluster')
433 | 					replace `ctrleps_w`h'' = `ctrleps_w`h'' * sqrt(`dof_adj')
434 | 					drop `ctrlweight' `ctrlweight_product'
435 | 				}
436 | 				local list_ctrl_weps `list_ctrl_weps' `ctrleps_w`h''
437 | 			}		
438 | 		}
439 | 		if (`debugging') noi di "#4.75 `list_ctrl_weps'"	
440 | 	}
441 | 		
442 | 	// Check if imputation was successful, and apply autosample
443 | 	* For FE can just check they have been imputed everywhere
444 | 	tempvar need_imputation
445 | 	gen byte `need_imputation' = 0
446 | 	foreach v of local wtr {
447 | 	    replace `need_imputation'=1 if `touse' & `D'==1 & `v'!=0 & !mi(`v')
448 | 	}
449 | 	replace `touse' = (`touse') & (`D'==0 | `need_imputation') // View as e(sample) all controls + relevant treatments only
450 | 	
451 | 	count if mi(`effect') & `need_imputation'
452 | 	if r(N)>0 {
453 | 		if (`debugging') noi di "#8b `wtr'"
454 | 		cap drop cannot_impute
455 | 		gen byte cannot_impute = mi(`effect') & `need_imputation'
456 | 		count if cannot_impute==1
457 | 		if ("`autosample'"=="") {
458 | 			noi di as error "Could not impute FE for " r(N) " observations. Those are saved in the cannot_impute variable. Use the autosample option if you would like those observations to be dropped from the sample automatically."
459 | 			error 198
460 | 		}
461 | 		else { // drop the subsample where it didn't work and renormalize all wtr variables
462 | 			assert "`sum'"==""
463 | 			local j = 1
464 | 			qui foreach v of local wtr {
465 | 				if (`debugging') noi di "#8d sum `v' `weiexp' if `touse' & `D'==1"
466 | 				local outputname : word `j' of `wtrnames'
467 | 				sum `v' `weiexp' if `touse' & `D'==1 // just a test that it added up to one first
468 | 				if (`debugging') noi di "#8dd " r(sum)
469 | 				assert abs(r(sum)-1)<10^-5 | abs(r(sum))<10^-5 // if this variable is always zero/missing, then the sum would be zero
470 | 				
471 | 				count if `touse' & `D'==1 & cannot_impute==1 & `v'!=0 & !mi(`v') 
472 | 				local n_cannot_impute = r(N) // count the dropped units
473 | 				if (`n_cannot_impute'>0) {
474 | 					sum `v' `weiexp' if `touse' & `D'==1 & cannot_impute!=1 & `v'!=0 & !mi(`v') // those still remaining
475 | 					if (r(N)==0) {
476 | 						replace `v' = 0 if `touse' & `D'==1 // totally drop the wtr
477 | 						local autosample_drop `autosample_drop' `outputname'
478 | 					}
479 | 					else {
480 | 						replace `v' = `v'/r(sum) if `touse' & `D'==1 & cannot_impute!=1
481 | 						replace `v' = 0 if cannot_impute==1
482 | 						local autosample_trim `autosample_trim' `outputname'
483 | 					}
484 | 				}
485 | 				local ++j
486 | 			}
487 | 			if (`debugging') noi di "#8e"
488 | 			replace `touse' = `touse' & cannot_impute!=1
489 | 			if ("`autosample_drop'"!="") noi di "Warning: suppressing the following coefficients because FE could not be imputed for any units: `autosample_drop'." 
490 | 			if ("`autosample_trim'"!="") noi di "Warning: part of the sample was dropped for the following coefficients because FE could not be imputed: `autosample_trim'." 
491 | 		}		
492 | 	}
493 | 	* Compare model degrees of freedom [does not work correctly for timecontrols and unitcontrols, need to recompute]
494 | 	if (`debugging') noi di "#8c"
495 | 	tempvar tnorm
496 | 	gen `tnorm' = rnormal() if (`touse') & (`D'==0 | `need_imputation')
497 | 	reghdfe `tnorm' `controls' if (`D'==0) & (`touse'), a(`fe_i' `fe_t' `fe') nocon keepsing verbose(-1)
498 | 	local df_m_control = e(df_m) // model DoF corresponding to explicitly specified controls
499 | 	local df_a_control = e(df_a) // DoF for FE
500 | 	reghdfe `tnorm' `controls' , a(`fe_i' `fe_t' `fe') nocon keepsing verbose(-1)
501 | 	local df_m_full = e(df_m) 
502 | 	local df_a_full = e(df_a) 
503 | 	if (`debugging') noi di "#9 `df_m_control' `df_m_full' `df_a_control' `df_a_full'"
504 | 	if (`df_m_control'<`df_m_full') {
505 | 		di as error "Could not run imputation for some observations because some controls are collinear in the D==0 subsample but not in the full sample"
506 | 		if ("`autosample'"!="") di as error "Please note that autosample does not know how to deal with this. Please correct the sample manually"
507 | 		error 481
508 | 	}
509 | 	if (`df_a_control'<`df_a_full') {
510 | 		di as error "Could not run imputation for some observations because some absorbed variables/FEs are collinear in the D==0 subsample but not in the full sample"
511 | 		if ("`autosample'"!="") di as error "Please note that autosample does not know how to deal with this. Please correct the sample manually"
512 | 		error 481
513 | 	}
514 | 	
515 | 	
516 | 	// Part 4: Suppress wtr which have an effective sample size (for absolute weights of treated obs) that is too small
517 | 	local droplist 
518 | 	tempvar abswei
519 | 	gen `abswei' = .
520 | 	local j = 1
521 | 	foreach v of local wtr {
522 | 		local outputname : word `j' of `wtrnames'
523 | 		replace `abswei' = abs(`v') if (`touse') & (`D'==1)
524 | 		sum `abswei' `weiexp' 
525 | 		if (r(sum)!=0) { // o/w dropped earlier
526 | 			replace `abswei' = (`v'*`wei'/r(sum))^2  if (`touse') & (`D'==1) // !! Probably doesn't work with fw, not sure about pw; probably ok for aw
527 | 			sum `abswei'
528 | 			if (r(sum)>1/`minn') { // HHI is large => effective sample size is too small
529 | 				local droplist `droplist' `outputname'
530 | 				replace `v' = 0 if `touse'
531 | 			}
532 | 		}
533 | 		else local droplist `droplist' `outputname' // not ideal: should report those with no data at all separately (maybe together with autosample_drop?)
534 | 		local ++j
535 | 	}
536 | 	if ("`droplist'"!="") noi di "WARNING: suppressing the following coefficients from estimation because of insufficient effective sample size: `droplist'. To report them nevertheless, set the minn option to a smaller number or 0, but keep in mind that the estimates may be unreliable and their SE may be downward biased." 
537 | 	
538 | 	if (`debugging') noi di "#9.5"
539 | 	
540 | 	// Part 5: pre-tests
541 | 	if (`pretrends'>0) {
542 | 		tempname pretrendvar
543 | 		tempvar preresid
544 | 		forvalues h = 1/`pretrends' {
545 | 			gen `pretrendvar'`h' = (`K'==-`h') if `touse'
546 | 			local pretrendvars `pretrendvars' `pretrendvar'`h'
547 | 			local prenames `prenames' pre`h'
548 | 		}
549 | 		if (`debugging') noi di "#9A reghdfe `Y' `controls' `pretrendvars' `weiexp' if `touse' & `D'==0,  a(`fe_i' `fe_t' `fe') cluster(`cluster') resid(`preresid')"
550 | 		reghdfe `Y' `controls' `pretrendvars' `weiexp' if `touse' & `D'==0,  a(`fe_i' `fe_t' `fe') cluster(`cluster') resid(`preresid')
551 | 		forvalues h = 1/`pretrends' {
552 | 			matrix `b'[1,`tau_num'+`h'] = _b[`pretrendvar'`h']
553 | 			local preb`h' = _b[`pretrendvar'`h']
554 | 			local prese`h' = _se[`pretrendvar'`h']
555 | 		}
556 | 		local pre_df = e(df_r)
557 | 		if (`debugging') noi di "#9B"
558 | 		local list_pre_weps
559 | 		if ("`se'"!="nose") { // Construct weights behind pre-trend estimaters. Could speed up by residualizing all relevant vars on FE first
560 | 			matrix pre_b = e(b)
561 | 			if (`debugging') noi di "#9C1"
562 | 			matrix pre_V = e(V)
563 | 			if (`debugging') noi di "#9C2"
564 | 			local dof_adj = (e(N)-1)/(e(N)-e(df_m)-e(df_a)) * (e(N_clust)/(e(N_clust)-1)) // that's how regdfhe does dof adjustment with clusters, see reghdfe_common.mata line 634
565 | 			if (`debugging') noi di "#9C3"
566 | 			local pretrendvars "" // drop omitted vars from pretrendvars (so that residualization works correctly when computing SE)
567 | 			forvalues h = 1/`pretrends' {
568 | 				if (`preb`h''!=0 | `prese`h''!=0) local pretrendvars `pretrendvars' `pretrendvar'`h'
569 | 			}
570 | 			if (`debugging') noi di "#9C4 `pretrendvars'"
571 | 			
572 | 			tempvar preweight
573 | 			forvalues h = 1/`pretrends' {
574 | 				if (`debugging') noi di "#9D `h'"
575 | 				tempvar preeps_w`h'
576 | 				if (`preb`h''==0 & `prese`h''==0) gen `preeps_w`h'' = 0 // omitted
577 | 				else {
578 | 					local rhsvars = subinstr(" `pretrendvars' "," `pretrendvar'`h' "," ",.) 
579 | 					reghdfe `pretrendvar'`h' `controls' `rhsvars' `weiexp' if `touse' & `D'==0,  a(`fe_i' `fe_t' `fe') cluster(`cluster') resid(`preweight')
580 | 					replace `preweight' = `preweight' * `wei'
581 | 					sum `preweight' if `touse' & `D'==0 & `pretrendvar'`h'==1
582 | 					replace `preweight' = `preweight'/r(sum)
583 | 					egen `preeps_w`h'' = total(`preweight' * `preresid') if `touse', by(`cluster')
584 | 					replace `preeps_w`h'' = `preeps_w`h'' * sqrt(`dof_adj')
585 | 					drop `preweight'
586 | 				}
587 | 				local list_pre_weps `list_pre_weps' `preeps_w`h''
588 | 			}		
589 | 		}
590 | 		if (`debugging') noi di "#9.75"	
591 | 	}
592 | 
593 | 	// Part 6: Compute the effects 
594 | 	count if `D'==0 & `touse'
595 | 	local Nc = r(N)	
596 | 	
597 | 	count if `touse'
598 | 	local Nall = r(N)
599 | 
600 | 	tempvar effectsum
601 | 	gen `effectsum' = .
602 | 	local j = 1
603 | 	foreach v of local wtr {
604 | 		local outputname : word `j' of `wtrnames'
605 | 		if (`debugging') noi di "Reporting `j' `v' `outputname'"
606 | 
607 | 		replace `effectsum' = `effect'*`v'*`wei' if (`D'==1) & (`touse')
608 | 		sum `effectsum'
609 | 		//ereturn scalar `outputname' = r(sum)
610 | 		matrix `b'[1,`j'] = r(sum)
611 | 	    
612 | 		count if `D'==1 & `touse' & `v'!=0 & !mi(`v')
613 | 		matrix `Nt'[1,`j'] = r(N)
614 | 
615 | 		local ++j
616 | 	}
617 | 	
618 | 	if (`debugging') noi di "#10"
619 | 	
620 | 	// Part 7: Report SE [can add a check that there are no conflicts in the residuals]
621 | 	if ("`se'"!="nose") { 
622 | 		cap drop __w_*
623 | 		tempvar tag_clus resid0 
624 | 		egen `tag_clus' = tag(`cluster') if `touse'
625 | 		gen `resid0' = `Y' - `Y0' if (`touse') & (`D'==0)
626 | 		if ("`loadweights'"=="") {
627 | 			local weightvars = ""
628 | 			foreach vn of local wtrnames {
629 | 				local weightvars `weightvars' __w_`vn'
630 | 			}
631 | 			if (`debugging') noi di "#11a imputation_weights `i' `t' `D' , touse(`touse') wtr(`wtr') saveweights(`weightvars') wei(`wei') fe(`fe') controls(`controls') unitcontrols(`unitcontrols') timecontrols(`timecontrols') tol(`tol') maxit(`maxit')"
632 | 			noi imputation_weights `i' `t' `D', touse(`touse') wtr(`wtr') saveweights(`weightvars') wei(`wei') ///
633 | 				fe(`fe') controls(`controls') unitcontrols(`unitcontrols') timecontrols(`timecontrols') ///
634 | 				tol(`tol') maxit(`maxit') `verbose'
635 | 			local Niter = r(iter)
636 | 		}
637 | 		else {
638 | 		    local weightvars `loadweights'
639 | 			// Here can verify the supplied weights
640 | 		}
641 | 		
642 | 		local list_weps = ""
643 | 		local j = 1
644 | 		foreach v of local wtr { // to do: speed up by sorting for all wtr together
645 | 			if (`debugging') noi di "#11b `v'"
646 | 			local weightvar : word `j' of `weightvars'
647 | 			local wtrname : word `j' of `wtrnames'
648 | 			tempvar clusterweight smartweight smartdenom avgtau eps_w`j' // Need to regenerate every time in case the weights on treated are in conflict
649 | 			egen `clusterweight' = total(`wei'*`v') if `touse' & (`D'==1), by(`cluster' `avgeffectsby')
650 | 			egen `smartdenom' = total(`clusterweight' * `wei' * `v') if `touse' & (`D'==1), by(`avgeffectsby')
651 | 			gen `smartweight' = `clusterweight' * `wei' * `v' / `smartdenom' if `touse' & (`D'==1)
652 | 			replace `smartweight' = 0 if mi(`smartweight') & `touse' & (`D'==1) // if the denominator is zero, this avgtau won't matter
653 | 			egen `avgtau' = sum(`effect'*`smartweight') if (`touse') & (`D'==1), by(`avgeffectsby')
654 | 			
655 | 			if ("`saveresid'"=="") tempvar resid
656 | 				else local resid `saveresid'_`wtrname'
657 | 			
658 | 			gen `resid' = `resid0'
659 | 			replace `resid' = `effect'-`avgtau' if (`touse') & (`D'==1)
660 | 			if ("`leaveout'"!="") {
661 | 				if (`debugging') noi di "#11LO"
662 | 				count if `smartdenom'>0 & ((`clusterweight'^2)/`smartdenom'>0.99999) & (`touse') & (`D'==1)
663 | 				if (r(N)>0) {
664 | 					local outputname : word `j' of `wtrnames' // is this the correct variable name when some coefs have been dropped?
665 | 					di as error `"Cannot compute leave-out standard errors because of "' r(N) `" observations for coefficient "`outputname'""'
666 | 					di as error "This most likely happened because there are cohorts with only one unit or cluster (and the default value for avgeffectsby  is used)."
667 | 					di as error "Consider using the avgeffectsby option with broader observation groups. Do not address this problem by using non-leave-out standard errors, as they may be downward biased for the same reason."
668 | 					error 498
669 | 				}
670 | 				replace `resid' = `resid' * `smartdenom' / (`smartdenom'-(`clusterweight'^2)) if (`touse') & (`D'==1)
671 | 			}
672 | 			egen `eps_w`j'' = sum(`wei'*`weightvar'*`resid') if `touse', by(`cluster')
673 | 			
674 | 			local list_weps `list_weps' `eps_w`j''
675 | 			drop `clusterweight' `smartweight' `smartdenom' `avgtau' 
676 | 			if ("`saveresid'"=="") drop `resid'
677 | 			local ++j
678 | 		}
679 | 		if (`debugging') noi di "11c"
680 | 		tempname V
681 | 		if (`debugging') noi di "11d `list_weps' | `list_pre_weps' | `list_ctrl_weps'"
682 | 		matrix accum `V' = `list_weps' `list_pre_weps' `list_ctrl_weps' if `tag_clus', nocon
683 | 		if (`debugging') noi di "11e `wtrnames' | `prenames' | `controls'"
684 | 		matrix rownames `V' = `wtrnames' `prenames' `controls'
685 | 		matrix colnames `V' = `wtrnames' `prenames' `controls'
686 | 		if ("`saveweights'"=="" & "`loadweights'"=="") drop __w_*
687 | 	}
688 | 	
689 | 	// Part 8: report everything 
690 | 	if (`debugging') noi di "#12"
691 | 	matrix colnames `b' = `wtrnames' `prenames' `controls'
692 | 	matrix colnames `Nt' = `wtrnames'
693 | 	ereturn post `b' `V', esample(`touse') depname(`Y') obs(`Nall')
694 | 	ereturn matrix Nt = `Nt'
695 | 	ereturn scalar Nc = `Nc'
696 | 	ereturn local depvar `Y'
697 | 	ereturn local cmd did_imputation
698 | 	ereturn local droplist `droplist'
699 | 	ereturn local autosample_drop `autosample_drop'
700 | 	ereturn local autosample_trim `autosample_trim'
701 | 	if ("`Niter'"!="") ereturn scalar Niter = `Niter'
702 | 	if (`pretrends'>0 & "`se'"!="nose") {
703 | 		test `prenames', df(`pre_df')
704 | 		ereturn scalar pre_F = r(F)
705 | 		ereturn scalar pre_p = r(p)
706 | 		ereturn scalar pre_df = `pre_df'
707 | 	}
708 | }
709 | 
710 | local level = 100*(1-`alpha')
711 | _coef_table_header
712 | ereturn display, level(`level')
713 | 
714 | end
715 | 
716 | // Additional program that computes the weights corresponding to the imputation estimator and saves them in a variable
717 | cap program drop imputation_weights
718 | program define imputation_weights, rclass sortpreserve
719 | syntax varlist(min=3 max=3), touse(varname) wtr(varlist) SAVEWeights(namelist) wei(varname) ///
720 | 	[tol(real 0.000001) maxit(integer 1000) fe(string) Controls(varlist) UNITControls(varlist) TIMEControls(varlist) verbose]
721 | 	// Weights of the imputation procedure given wtr for controls = - X0 * (X0'X0)^-1 * X1' * wtr but we get them via iterative procedure
722 | 	// k<0 | k==. is control
723 | 	// Observation weights are in wei; wtr should be specified BEFORE applying the wei, and the output is before applying them too, i.e. estimator = sum(wei*saveweights*Y)
724 | qui {	
725 | 	// Part 1: Initialize
726 | 	local debugging = ("`verbose'"=="verbose")
727 | 	if (`debugging') noi di "#IW1"
728 | 	tokenize `varlist'
729 | 	local i `1'
730 | 	local t `2'
731 | 	local D `3'
732 | 	
733 | 	local wcount : word count `wtr'
734 | 	local savecount : word count `saveweights'
735 | 	assert `wcount'==`savecount'
736 | 	forvalues j = 1/`wcount' {
737 | 		local wtr_j : word `j' of `wtr'
738 | 		local saveweights_j : word `j' of `saveweights'
739 | 		gen `saveweights_j' = `wtr_j'
740 | 		replace `saveweights_j' = 0 if mi(`saveweights_j') & `touse'
741 | 		tempvar copy`saveweights_j'
742 | 		gen `copy`saveweights_j'' = `saveweights_j'
743 | 	}
744 | 	
745 | 	local fecount = 0
746 | 	foreach fecurrent of local fe {
747 | 		local ++fecount
748 | 		local fe`fecount' = subinstr("`fecurrent'","#"," ",.)
749 | 	}
750 | 	
751 | 	if (`debugging') noi di "#IW2"
752 | 	
753 | 	// Part 2: Demean & construct denom for weight updating
754 | 	if ("`unitcontrols'"!="") {
755 | 	    tempvar N0i
756 | 		egen `N0i' = sum(`wei') if (`touse') & `D'==0, by(`i')
757 | 	}
758 | 	if ("`timecontrols'"!="") {
759 | 		tempvar N0t
760 | 		egen `N0t' = sum(`wei') if (`touse') & `D'==0, by(`t')
761 | 	}
762 | 	forvalues feindex = 1/`fecount' {
763 | 	    tempvar N0fe`feindex'
764 | 		egen `N0fe`feindex'' = sum(`wei') if (`touse') & `D'==0, by(`fe`feindex'')
765 | 	}
766 | 
767 | 	foreach v of local controls {
768 | 		tempvar dm_`v' c`v'
769 | 		sum `v' [aw=`wei'] if `D'==0 & `touse' // demean such that the mean is zero in the control sample
770 | 		gen `dm_`v'' = `v'-r(mean) if `touse'
771 | 		egen `c`v'' = sum(`wei' * `dm_`v''^2) if `D'==0 & `touse' 
772 | 	}
773 | 	
774 | 	foreach v of local unitcontrols {
775 | 		tempvar u`v' dm_u`v' s_u`v'
776 | 		egen `s_u`v'' = pc(`wei') if `D'==0 & `touse', by(`i') prop
777 | 		egen `dm_u`v'' = sum(`s_u`v'' * `v') if `touse', by(`i') // this automatically includes it in `D'==1 as well
778 | 		replace `dm_u`v'' = `v' - `dm_u`v'' if `touse'
779 | 		egen `u`v'' = sum(`wei' * `dm_u`v''^2) if `D'==0 & `touse', by(`i')
780 | 		drop `s_u`v''
781 | 	}
782 | 	foreach v of local timecontrols { 
783 | 		tempvar t`v' dm_t`v' s_t`v'
784 | 		egen `s_t`v'' = pc(`wei') if `D'==0 & `touse', by(`t') prop
785 | 		egen `dm_t`v'' = sum(`s_t`v'' * `v') if `touse', by(`t') // this automatically includes it in `D'==1 as well
786 | 		replace `dm_t`v'' = `v' - `dm_t`v'' if `touse'
787 | 		egen `t`v'' = sum(`wei' * `dm_t`v''^2) if `D'==0 & `touse', by(`t')
788 | 		drop `s_t`v''
789 | 	}
790 | 	if (`debugging') noi di "#IW3"
791 | 
792 | 	// Part 3: Iterate
793 | 	local it = 0
794 | 	local keepiterating `saveweights'
795 | 	tempvar delta
796 | 	gen `delta' = 0
797 | 	while (`it'<`maxit' & "`keepiterating'"!="") {
798 | 		if (`debugging') noi di "#IW it `it': `keepiterating'"
799 | 		// Simple controls 
800 | 		foreach v of local controls {
801 | 			update_weights `dm_`v'' , w(`keepiterating') wei(`wei') d(`D') touse(`touse') denom(`c`v'') 
802 | 		}
803 | 		
804 | 		// Unit-interacted continuous controls 
805 | 		foreach v of local unitcontrols {
806 | 			update_weights `dm_u`v'' , w(`keepiterating') wei(`wei') d(`D') touse(`touse') denom(`u`v'') by(`i')
807 | 		}
808 | 		if ("`unitcontrols'"!="") update_weights , w(`keepiterating') wei(`wei') d(`D') touse(`touse') denom(`N0i') by(`i') // could speed up a bit by skipping this if we have i#something later
809 | 		
810 | 		// Time-interacted continuous controls
811 | 		foreach v of local timecontrols {
812 | 			update_weights `dm_t`v'' , w(`keepiterating') wei(`wei') d(`D') touse(`touse') denom(`t`v'') by(`t')
813 | 		}
814 | 		if ("`timecontrols'"!="") update_weights , w(`keepiterating') wei(`wei') d(`D') touse(`touse') denom(`N0t') by(`t') // could speed up a bit by skipping this if we have t#something later
815 | 
816 | 		// FEs
817 | 		forvalues feindex = 1/`fecount' {
818 | 		    update_weights , w(`keepiterating') wei(`wei') d(`D') touse(`touse') denom(`N0fe`feindex'') by(`fe`feindex'')
819 | 		}
820 | 		
821 | 		// Check for which coefs the weights have changed, keep iterating for them
822 | 		local newkeepit
823 | 		foreach w of local keepiterating {
824 | 			replace `delta' = abs(`w'-`copy`w'')
825 | 			sum `delta' if `D'==0 & `touse'
826 | 			if (`debugging') noi di "#IW it `it' `w' " r(sum)
827 | 			if (r(sum)>`tol') local newkeepit `newkeepit' `w'
828 | 			replace `copy`w'' = `w'
829 | 		}
830 | 		local keepiterating `newkeepit'
831 | 		local ++it
832 | 	}
833 | 	if ("`keepiterating'"!="") {
834 | 	    noi di as error "Convergence of standard errors is not achieved for coefs: `keepiterating'."
835 | 		noi di as error "Try increasing the tolerance, the number of iterations, or use the nose option for the point estimates without SE."
836 | 	    error 430
837 | 	}
838 | 	return scalar iter = `it'
839 | }
840 | end
841 | 
842 | cap program drop update_weights // warning: intentionally destroys sorting
843 | program define update_weights, rclass
844 | 	syntax [varname(default=none)] , w(varlist) wei(varname) d(varname) touse(varname) denom(varname) [by(varlist)]
845 | 	// varlist = variable on which to residualize (if empty, a constant is assumed, as for any FE) [for now only one is max!]
846 | 	// w = variable storing the weights to be updated
847 | 	// wei = observation weights
848 | 	// touse = variable defining sample
849 | 	// denom = variable storing sum(`wei'*`varlist'^2) if `d'==0, by(`by')
850 | qui {	
851 | 	tempvar sumw
852 | 	tokenize `varlist'
853 | 	if ("`1'"=="") local 1 = "1"
854 | 	if ("`by'"!="") sort `by'
855 | 	foreach w_j of local w {
856 | 		noi di "#UW 5 `w_j': `1' by(`by') "
857 | 		egen `sumw' = total(`wei' * `w_j' * `1') if `touse', by(`by')
858 | 		replace `w_j' = `w_j'-`sumw'*`1'/`denom' if `d'==0 & `denom'!=0 & `touse'
859 | 		assert !mi(`w_j') if `touse'
860 | 		drop `sumw'
861 | 	}
862 | }
863 | end
864 | 
865 | // When there is a variable that only varies by `from' but is missing for some observations, fill in its missing values wherever possible
866 | cap program drop recover 
867 | program define recover, sortpreserve
868 | 	syntax varlist, from(varlist)
869 | 	foreach var of local varlist {
870 | 		gsort `from' -`var'
871 | 		by `from' : replace `var' = `var'[1] if mi(`var')
872 | 	}
873 | end
874 | 
875 | 


--------------------------------------------------------------------------------
/did_imputation.sthlp:
--------------------------------------------------------------------------------
  1 | {smcl}
  2 | {* *! version 3.1 2023-11-22}{...}
  3 | {vieweralsosee "reghdfe" "help reghdfe"}{...}
  4 | {vieweralsosee "event_plot" "help event_plot"}{...}
  5 | {viewerjumpto "Syntax" "did_imputation##syntax"}{...}
  6 | {viewerjumpto "Description" "did_imputation##description"}{...}
  7 | {viewerjumpto "What if imputation is not possible" "did_imputation##impfails"}{...}
  8 | {viewerjumpto "Options" "did_imputation##options"}{...}
  9 | {viewerjumpto "Weights" "did_imputation##weights"}{...}
 10 | {viewerjumpto "Stored results" "did_imputation##results"}{...}
 11 | {viewerjumpto "Usage examples" "did_imputation##usage"}{...}
 12 | {title:Title}
 13 | 
 14 | {pstd}
 15 | {bf:did_imputation} - Treatment effect estimation and pre-trend testing in event studies: difference-in-differences designs with staggered adoption of treatment, using the imputation approach of Borusyak, Jaravel, and Spiess (April 2023)
 16 | 
 17 | {marker syntax}{...}
 18 | {title:Syntax}
 19 | 
 20 | {phang}
 21 | {cmd: did_imputation} {it:Y i t Ei} [if] [in] [{help did_imputation##weights:estimation weights}] [{cmd:,} {help did_imputation##options:options}]
 22 | {p_end}
 23 | 
 24 | {synoptset 8 tabbed}{...}
 25 | {synopt : {it:Y}}outcome variable {p_end}
 26 | {synopt : {it:i}}variable for unique unit id{p_end}
 27 | {synopt : {it:t}}variable for calendar period {p_end}
 28 | {synopt : {it:Ei}}variable for unit-specific date of treatment (missing = never-treated) {p_end}
 29 | 
 30 | {phang} {it: Note:} These main parameters imply: {p_end}
 31 | {phang3}- the treatment indicator: {it:D=1[t>=Ei]}; {p_end}
 32 | {phang3}- "relative time", i.e. the number of periods since treatment: {it:K=(t-Ei)} (possibly adjusted by the {opt shift} and {opt delta} options described below). {p_end}
 33 | 
 34 | {phang}
 35 | {it: Note}: {cmd:did_imputation} requires a recent version of {help reghdfe}. If you get error messages (e.g. {it:r(123)} or {it:"verbose must be between 0 and 5"}), please (re)install {cmd:reghdfe} to make sure you have the most recent version.
 36 | {p_end}
 37 | 
 38 | {phang}
 39 | {it: Note}: Before emailing the authors about errors, please read the {help did_imputation##bugs:Bug reporting} section of this helpfile.
 40 | {p_end}
 41 | 
 42 | {marker description}{...}
 43 | {title:Description}
 44 | 
 45 | {pstd}
 46 | {bf:did_imputation} estimates the effects of a binary treatment with staggered rollout allowing for arbitrary heterogeneity and dynamics of causal effects, using the imputation estimator of Borusyak et al. (2023).
 47 | {p_end}
 48 | 
 49 | {pstd}
 50 | The benchmark case is with panel data, in which each unit {it:i} that gets treated as of period {it:Ei} stays treated forever; 
 51 | some units may never be treated. Other types of data (e.g. repeated cross-sections) and other designs (e.g. triple-diffs) are also allowed;
 52 | see {help did_imputation##usage:Usage examples}.
 53 | {p_end}
 54 | 
 55 | {pstd}Estimation proceeds in three steps:{p_end}
 56 | 
 57 | {p2col 5 8 8 0 : 1.}{ul:Estimate} a model for non-treated potential outcomes using the non-treated (i.e. never-treated or not-yet-treated) 
 58 | observations only. The benchmark model for diff-in-diff designs is a two-way fixed effect (FE) model: Y_it = a_i + b_t + eps_it, 
 59 | but other FEs, controls, etc., are also allowed.{p_end}
 60 | {p2col 5 8 8 0 : 2.}{ul:Extrapolate} the model from Step 1 to treated observations, {ul:imputing} non-treated potential outcomes Y_it(0),
 61 | and obtain an estimate of the treatment effect {it: tau_it = Y_it - Y_it(0)} for each treated observation. (See {help did_imputation##impfails:What if imputation is not possible}){p_end}
 62 | {p2col 5 8 8 0 : 3.}{ul:Take averages} of estimated treatment effects corresponding to the estimand of interest.{p_end}
 63 | 
 64 | {pstd}
 65 | A pre-trend test (for the assumptions of parallel trends and no anticipation) is a separate exercise 
 66 | (see the {opt pretrends} option). Regardless of whether the pre-trend test is performed, the reference group
 67 | for estimation is always all pre-treatment (or never-treated) observations.
 68 | {p_end}
 69 | 
 70 | {pstd}
 71 | To make "event study" plots, please use the accompanying command {help event_plot}.
 72 | {p_end}
 73 | 
 74 | {marker impfails}{...}
 75 | {title:What if imputation is not possible}
 76 | 
 77 | {phang}The imputation step (Step 2) is not always possible for all treated observations:{p_end}
 78 | {phang2}- With unit FEs, imputation is not possible for units treated in all periods in the sample;{p_end}
 79 | {phang2}- With period FEs, it is impossible to isolate the period FE from the variation in treatment effects
 80 | in a period when all units have already been treated (and if there are never-treated units);{p_end}
 81 | {phang2}- If you include group#period FEs, imputation is further impossible once all units {it:in the group} have been treated;{p_end}
 82 | {phang2}- Similar issues arise with other covariates in the model of Y(0).{p_end}
 83 | 
 84 | {phang}This is a fundamental issue: the model you specified does not allow to find unbiased estimates of treatment effects
 85 | for those observations (without restrictions on treatment effects; see Borusyak et al. 2023).
 86 | 
 87 | {phang}
 88 | If this problem arises (i.e. there is at least one treated observation that enters one of the estimands of interest with a non-zero weight
 89 | and for which the treatment effect cannot be imputed),
 90 | the command will throw an error and generate a dummy variable {it:cannot_impute} which equals to one for those observations.
 91 | 
 92 | {phang}You have two ways to proceed:{p_end}
 93 | {phang}- Modify the estimand, excluding those observations manually: via the {opt if} clause or by setting the weights on them to zero 
 94 | (via the {opt wtr} option);{p_end}
 95 | {phang}- Specify the {opt autosample} option that will do this automatically in most cases. But we recommend that you still review {it:cannot_impute}
 96 | to understand what estimand you will be getting.{p_end}
 97 | 
 98 | {marker options}{...}
 99 | {title:Options}
100 | 
101 | {dlgtab:Model of Y(0)}
102 | 
103 | {phang}{opt fe(list of FE)}: which FE to include in the model of Y(0). Default is {opt fe(i t)} for the diff-in-diff (two-way FE) model.
104 | But you can include fewer FEs, e.g. just period FE {opt fe(t)} or, for repeated cross-sections at the individual level, {opt fe(state t)}. 
105 | Or you can have more FEs: e.g. {bf:fe(}{it:i t{cmd:#}state}{bf:)} (for state-by-year FE with county-level data) 
106 | or {bf:fe(}{it:i{cmd:#}dow t}{bf:)} (for unit by day-of-week FE). Each member of the list has to look like 
107 | {it:v1{cmd:#}v2{cmd:#}...{cmd:#}vk}. If you want no FE at all, specify {opt fe(.)}.
108 | {p_end}
109 | 
110 | {phang}{opt c:ontrols(varlist)}: list of continuous time-varying controls. (For dummy-variable controls, e.g. gender, please use the
111 | {opt fe} option for better convergence.){p_end}
112 | 
113 | {phang}{opt unitc:ontrols(varlist)}: list of continuous controls (often unit-invariant) to be included {it:interacted} with unit dummies. 
114 | E.g. with {opt unitcontrols(year)} the regression includes unit-specific trends.
115 | (For binary controls interacted with unit dummies, use the {opt fe} option.) {p_end}
116 | 
117 | {pmore}{it:Use with caution}: the command may not recognize that imputation is not possible for some treated observations.
118 | For example, a unit-specific trend is not possible to estimate if only one pre-treatment observation is available for the unit, but it is not
119 | guaranteed that the command will throw an error.
120 | {p_end}
121 | 
122 | {phang}{opt timec:ontrols(varlist)} list of continuous controls (often time-invariant) to be included {it:interacted} with period dummies.
123 | E.g. with {opt timecontrols(population)} the regression includes {it:i.year#c.population}.
124 | (For binary controls interacted with period dummies, use the {opt fe} option.){p_end}
125 | 
126 | {pmore}{it:Use with caution}: the command may not recognize that imputation is not possible for some treated observations.
127 | {p_end}
128 | 
129 | {dlgtab:Estimands and Pre-trends}
130 | 
131 | {phang}{opt wtr(varlist)}: A list of variables, manually defining estimands of interest by storing the weights on the treated observations.{p_end}
132 | {phang2}- If {help did_imputation##weights:estimation weights} ({bf:aw/iw/fw}) are used, {opt wtr} weights will be applied {it:in addition}
133 | to those weights in defining the estimand.{p_end}
134 | {phang2}- If nothing is specified, the default is the simple ATT across all treated observations (or, with {opt horizons} or {opt allhorizons}, by horizon). So {opt wtr}=1/number of the relevant observations. {p_end}
135 | {phang2}- Values of {opt wtr} for untreated observations are ignored (except as initial values in the iterative procedure for computing SE).{p_end}
136 | {phang2}- Values below 0 are only allowed  if {opt sum} is also specified (i.e. for weighted sums and not weighted averages).{p_end}
137 | {phang2}- Using multiple {opt wtr} variables is faster than running {cmd:did_imputation} for each of them separately, and produces a joint variance-covariance matrix.{p_end}
138 | 
139 | {phang}{opt sum}: if specified, the weighted {it:sum}, rather than average, of treatment effects is computed (overall or by horizons).
140 | With {opt sum} specified, it's OK to have some {opt wtr}<0 or even adding up to zero;
141 | this is useful, for example, to estimate the difference between two weighted averages of treatment effects
142 | (e.g. across horizons or between men and woment).{p_end}
143 | 
144 | {phang}{opt h:orizons(numlist)}: if specified, weighted averages/sums of treatment effects will be reported for each of these horizons separately
145 | (i.e. tau0 for the treatment period, tau1 for one period after treatment, etc.). Horizons which are not specified will be ignored.
146 | Each horizon must a be non-negative integer.{p_end}
147 | 
148 | {phang}{opt allh:orizons}: picks all non-negative horizons available in the sample.{p_end}
149 | 
150 | {phang}{opt hbal:ance}: if specified together with a list of horizons, estimands for each of the horizons will be based
151 | only on the subset of units for which observations for all chosen horizons are available
152 | (note that by contruction this means that the estimands will be based on different periods).
153 | If {opt wtr} or estimation weights are specified, the researcher needs to make sure that the weights are constant over time
154 | for the relevant units---otherwise proper balancing is impossible and an error will be thrown.
155 | Note that excluded units will still be used in Step 1 (e.g. to recover the period FEs) and for pre-trend tests.{p_end}
156 | 
157 | {phang}{opt het:by(varname)}: reports estimands separately by subgroups defined by the discrete (non-negative integer or string) 
158 | variable provided. This is the preferred option for treatment effect heterogeneity analyses (but see also {opt project}).{p_end}
159 | 
160 | {phang}{opt pro:ject(varlist)}: projects (i.e., regresses) treatment effect estimates on a set of numeric variables 
161 | and reports the constant and slope coefficients. The variables should not be collinear. (To analyse effect heterogeneity
162 | by subgroup, option {opt hetby} is preferred. Note that standard errors may not agree exactly between {opt hetby} and
163 | {opt project}.){p_end}
164 | 
165 | {phang}{opt minn(#)}: the minimum effective number (i.e. inverse Herfindahl index) of treated observations,
166 | below which a coefficient is suppressed and a warning is issued. 
167 | The inference on coefficients which are based on a small number of observations is unreliable. The default is {opt minn(30)}.
168 | Set to {opt minn(0)} to report all coefficients nevertheless.{p_end}
169 | 
170 | {phang}{opt autos:ample}: if specified, the observations for which FE cannot be imputed will be automatically dropped from the sample, 
171 | with a warning issued. Otherwise an error will be thrown if any such observations are found.
172 | {opt autosample} cannot be combined with {opt sum} or {opt hbalance};
173 | please specify the sample explicitly if using one of those options and you get an error that imputation has failed (see {help did_imputation##impfails:What if imputation is not possible}).{p_end}
174 | 
175 | {phang}{opt shift(integer)}: specify to allow for anticipation effects.
176 | The command will pretend that treatment happened {opt shift} periods earlier for each treated unit.{p_end}
177 | {phang2}- Do NOT use this option for pre-trend testing;
178 | use it if anticipation effects are expected in your setting.
179 | (This option {it:can} be used for a placebo test but we recommend a pretrend test instead; see Section 4.4 of Borusyak et al. 2023.){p_end}
180 | {phang2}- The command's output will be labeled relative to the shifted treated date {it:Ei}-shift.
181 | For example, with {opt horizons(0/10)} {opt shift(3)} you will get coeffieints {it:_b[tau0]}...{it:_b[tau10]} where tau{it:h}
182 | is the effect {it:h} periods after the shifted treatment. That is, {it:tau1} corresponds to the average anticipation effect 2 periods before
183 | the actual treatment, while {it:tau8} to the average effect 5 periods after the actual treatment.{p_end}
184 | 
185 | {phang}{opt pre:trends(integer)}: if some value {it:k}>0 is specified, the command will performs a test for parallel trends,
186 | by a {bf:separate} regression on nontreated observations only: of the outcome on the dummies for 1,...,{it:k} periods before treatment,
187 | in addition to all the FE and controls. The coefficients are reported as {bf:pre}{it:1},...,{bf:pre}{it:k}.
188 | The F-statistic (corresponding to the cluster-robust Wald test), corresponding pvalue, and degrees-of-freedom are reported in {res:e(pre_F)}, 
189 | {res:e(pre_p)}, and {res:e(pre_df)} resp.{p_end}
190 | {phang2}- Use a reasonable number of pre-trends, do not use all of the available ones unless you have a really large never-treated group. With too many pre-trend coefficients, the power of the joint test will be lower.{p_end}
191 | {phang2}- The entire sample of nontreated observations is always used for pre-trend tests, regardless of {opt hbalance} and other options that restrict the sample for post-treatment effect estimation.{p_end}
192 | {phang2}- The number of pretrend coefficients does not affect the post-treatment effect estimates, which are always computed under the assumption of parallel trends and no anticipation.{p_end}
193 | {phang2}- The reference group for the pretrend test is all periods more than {it:k} periods prior to the event date (and all never-treated
194 | observations, if available).{p_end}
195 | {phang2}- Because of this reference group, it is expected that the SE are the largest for pre1 (opposite from some conventional tests).{p_end}
196 | {phang2}- This is only one of many tests for the parallel trends and no anticipation assumptions. Others are easy to implement manually; please see
197 | the paper for the discussion.{p_end}
198 | 
199 | {dlgtab:Standard errors}
200 | 
201 | {phang}{opt clus:ter(varname)}: cluster SE within groups defined by this variable. Default is {it:i}. {p_end}
202 | 
203 | {phang}{opt avgeff:ectsby(varlist)}: Use this option (and/or {opt leaveout}) if you have small cohorts of treated observations, and after reviewing
204 | Section 4.3 of Borusyak et al. (2023). In brief, SE computation requires averaging the treatment effects by groups of treated observations.{p_end}
205 | {phang2}- These groups should be large enough, so that there is no downward bias from overfitting. {p_end}
206 | {phang2}- But the larger they are, the more conservative SE will be, unless treatment effects are homogeneous within these groups. {p_end}
207 | {phang2}- The varlist in {opt avgeffectsby} defines these groups.{p_end}
208 | {phang2}- The default is cohort-years {opt avgeffectsby(Ei t)}, which is appropriate for large cohorts.{p_end}
209 | {phang2}- With small cohorts, specify coarser groupings: e.g. {opt avgeffectsby(K)} (to pool across cohorts) or 
210 | {opt avgeffectsby(D)} (to pool across cohorts and periods when computing the overall ATT). {p_end}
211 | {phang2}- The averages are computed using the "smart" formula from Section 4.3, adjusted for any clustering and any choice of {opt avgeffectsby}. {p_end}
212 | 
213 | {phang}{opt leaveout}: {it:Recommended option}. In particular, use it (and/or {opt avgeffectsby}) if you have small cohorts of treated observations.
214 | The averages of treatment effects will be computed excluding the own unit (or, more generally, cluster).
215 | See Section 4.3 of Borusyak et al. (2023) for details.{p_end}
216 | 
217 | {phang}{opt alpha(real)}: confidence intervals will be displayed corresponding to that significance level. Default is 0.05.{p_end}
218 | 
219 | {phang}{opt nose}: do not produce standard errors (much faster).{p_end}
220 | 
221 | {dlgtab:Miscellaneous}
222 | 
223 | {phang}{opt save:estimates(newvarname)}: if specified, a new variable will be created storing the estimates of treatment effects for each observation
224 | are stored. The researcher can then construct weighted averages of their interest manually (but without SE). {p_end}
225 | {pmore}{it:Note}: Individual estimates are of course not consistent. But weighted sums of many of them typically are, which is what {cmd:did_imputation} generally reports. {p_end}
226 | 
227 | {phang}{opt savew:eights}: if specified, new variables {it:__w_*} are generated, storing the weights corresponding to (each) coefficient.
228 | Recall that the imputation estimator is a linear estimator that can be represented as a weighted sum of the outcomes.{p_end}
229 | {phang2}- These weights are applied on top of any {help did_imputation##weights:estimation weights}.{p_end}
230 | {phang2}- For treated observations these weights equal to the corresponding {opt wtr} - that's why the estimator is unbiased
231 | under arbitrary treatment effect heterogeneity.{p_end}
232 | {phang2}- If a weighted average is estimated (i.e. {opt sum} is not specified) and there are no estimation weights, 
233 | the weights add up to one across all treated observations.{p_end}
234 | {phang2}- With unit and period FEs, weights add up to zero for every unit and time period (when weighted by estimation weights).{p_end}
235 | 
236 | {phang}{opt loadw:eights(varlist)}: use this to speed up the analysis of different outcome variables with an identical specification
237 | on an identical sample. To do so, provide the set of the weight variables (__w*, but can be renamed),
238 | saved using the {opt saveweights} option when running the analysis for the first outcome.
239 | [Warning: the validity of the weights is assumed and not double-checked.] [Currently works only if the varnames are not __w*]{p_end}
240 | 
241 | {phang}{opt saver:esid(name)}: if specified, a new variable {it:name}_* is generated for each estimand (e.g. {it:name}_tau0) to store 
242 | model residuals used in the computation of standard errors. For untreated observations, they are residuals from the estimation step. For 
243 | treated observations, they are the epsilon-tildes from the BJS Theorem 3 (based on the {opt avgeffectsby} option) or the leave-out
244 | versions from Appendix A.6. The residuals may be heterogeneous across estimands in general --- see equation (8) in the paper
245 | which depends on the estimator weights, and thus on the estimand. This option can be helpful for reproducing the standard errors manually.{p_end}
246 | 
247 | {phang}{opt delta(integer)}: indicates that one period should correpond to {opt delta} steps of {it:t} and {it:Ei}. 
248 | Default is 1, except when the time dimension of the data is set (via {help tsset} or {help xtset});
249 | in that case the default is the corresponding delta.{p_end}
250 | 
251 | {phang}{opt tol(real)}, {opt maxit(integer)}: tolerance and the maximum number of iterations.
252 | This affects the iterative procedure used to search for the weights underlying the estimator (to produce SE).
253 | Defaults are 10^-6 and 100, resp. If convergence is not achieved otherwise, try increasing them.{p_end}
254 | 
255 | {phang}{opt verbose}: specify for the debugging mode.{p_end}
256 | 
257 | 
258 | {marker weights}{...}
259 | {title:Estimation weights}
260 | 
261 | {phang} Estimation weights (only {opt aw} or {opt iw} are allowed) play two roles: 
262 | 
263 | {p2col 5 8 8 0 : 1.}Step 1 estimation is done by a weighted regression. This is most efficient when the variance
264 | of the error terms is inversely proportionate to the weights.
265 | 
266 | {p2col 5 8 8 0 : 2.}In Step 3, the average that defines the estimand is weighted by these weights.
267 | If {opt wtr} is specified explicitly, estimation weights are applied on top of them.{p_end}
268 | {phang2}- If {opt sum} is not specified, the estimand is the average of treatment effects weighted by {opt wtr}*{opt aw} {p_end}
269 | {phang2}- If {opt sum} is specified, the estimand is the sum of treatment effects multiplied by {opt wtr}*{opt aw} {p_end}
270 | 
271 | {phang} Do NOT specify regression weights if you only have the second motivation to use the weights,
272 | e.g. if you want to measure the ATT weighted by county size but you have no reason to think that the outcomes of larger counties have less noise.
273 | Instead specify the estimand of your interest via {opt wtr}.{p_end}
274 | 
275 | {phang} All weight types ({opt aw/iw/fw}) produce identical results. Weights should always be non-negative. {p_end}
276 | 
277 | {marker results}{...}
278 | {title:Stored results}
279 | 
280 | {pstd}
281 | {cmd:did_imputation} stores the following in {cmd:e()}:
282 | 
283 | {synoptset 10 tabbed}{...}
284 | {p2col 5 15 15 2: Matrices}{p_end}
285 | {synopt:{cmd:e(b)}}A row-vector of (i) the estimates, (ii) pre-trend coefficients, and (iii) coefficients on controls from Step 1:{p_end}
286 | {pmore3}- If {opt horizons} is specified, the program returns {bf:tau}{it:h} for each {it:h} in the list of horizons.{p_end}
287 | {pmore3}- If multiple {opt wtr} are specified, the program returns {bf:tau_}{it:v} for each {it:v} in the list of {opt wtr} variables. {p_end}
288 | {pmore3}- Otherwise the single returned coefficient is called {bf:tau}. {p_end}
289 | {pmore3}- If {opt hetby} is specified, the coefficient names are appended with underscore and the values of the grouping variable. For instance, with {opt hetby(female)} where {it:female} takes values 0 and 1, the command will return {bf:tau}{it:h}_0 and {bf:tau}{it:h}_1 for each horizon, corresponding to average treatment effects by horizon and sex.{p_end}
290 | {pmore3}- If {opt project} is specified, the coefficient names are appended with underscore and the constant plus the list of slope coefficients. For instance, with {opt project(female income)}, the command will return {bf:tau}{it:h}{bf:_cons}, {bf:tau}{it:h}{bf:_female}, and {bf:tau}{it:h}{bf:_income}.{p_end}
291 | {pmore3}- In addition, if {cmd:pretrends} is specified, the command returns {bf:pre}{it:h} for each pre-trend coefficient {it:h}=1..{opt pretrends}. {p_end}
292 | {pmore3}- And if {cmd:controls} is specified, the command returns the coefficients on those controls. (Estimated fixed effects, {cmd:unitcontrols}, and {cmd:timecontrols} are not reported in the {cmd:e(b)}.){p_end}
293 | {synopt:{cmd:e(V)}}Corresponding variance-covariance matrix {p_end}
294 | {synopt:{cmd:e(Nt)}}A row-vector of the number of treated observations used to compute each estimator {p_end}
295 | 
296 | {p2col 5 15 15 2: Scalars}{p_end}
297 | {synopt:{cmd:e(Nc)}} the number of control observations used in imputation (scalar) {p_end}
298 | {synopt:{cmd:e(pre_F), e(pre_p), e(pre_df)}} if {opt pretrends} is specified, the F-statistic, pvalue, and dof for the joint test for no pre-trends {p_end}
299 | {synopt:{cmd:e(Niter)}} the # of iterations to compute SE {p_end}
300 | 
301 | {p2col 5 15 15 2: Macros}{p_end}
302 | {synopt:{cmd:e(cmd)}} {cmd:did_imputation} {p_end}
303 | {synopt:{cmd:e(droplist)}} the set of coefficients suppressed to zero because of insufficient effective sample size (see the {opt minn} option){p_end}
304 | {synopt:{cmd:e(autosample_drop)}} the set of coefficients suppressed to zero because treatment effects could not be imputed for any observation (if {opt autosample} is specified) {p_end}
305 | {synopt:{cmd:e(autosample_trim)}} the set of coefficients where the sample was partially reduced because treatment effects could not be imputed for some observations (if {opt autosample} is specified) {p_end}
306 | 
307 | {p2col 5 15 15 2: Functions}{p_end}
308 | {synopt:{cmd:e(sample)}} Marks the estimation sample: all treated observations for which imputation was successful and the weights are non-zero for at least one coefficient + all non-treated observations used in Step 1 {p_end}
309 | 
310 | {marker usage}{...}
311 | {title:Usage Examples}
312 | 
313 | 	{phang}{ul:Conventional panels}{p_end}
314 | 
315 |     1) Estimate the single average treatment-on-the-treated (ATT) across all treated observations, assuming that FE can be imputed for all treated observations (which is rarely the case)
316 |         {cmd:. did_imputation Y i t Ei}
317 | 
318 |     2) Same but dropping the observations for which it cannot be imputed. (After running, investigate that the sample is what you think it is!)
319 |         {cmd:. did_imputation Y i t Ei, autosample}
320 | 
321 |     3) Estimate the ATT by horizon
322 |         {cmd:. did_imputation Y i t Ei, allhorizons autosample}
323 | 
324 |     4) Estimate the ATT at horizons 0..+6 only
325 |         {cmd:. did_imputation Y i t Ei, horizons(0/6)}
326 | 
327 |     5) Estimate the ATT at horizons 0..+6 for the subset of units available for all of these horizons (such that the dynamics are not driven by compositional effects)
328 |         {cmd:. did_imputation Y i t Ei, horizons(0/6) hbalance}
329 | 
330 |     6) Include time-varying controls:
331 |         {cmd:. did_imputation Y i t Ei, controls(w_first w_other*)}
332 | 
333 |     7) Include state-by-year FE
334 |         {cmd:. did_imputation Y county year Ei, fe(county state#year)}
335 | 
336 |     8) Drop unit FE
337 |         {cmd:. did_imputation Y i t Ei, fe(t)}
338 | 
339 |     9) Additionally report pre-trend coefficients for leads 1,...,5. The estimates for post-treatment effects will NOT change.
340 |         {cmd:. did_imputation Y i t Ei, horizons(0/6) pretrends(5)}
341 | 
342 |     10) Estimate the difference between ATE at horizons +2 vs +1 [this can equivalently be done via {help lincom} after estimating ATT by horizon]
343 |         {cmd:. count if K==1}
344 |         {cmd:. gen wtr1 = (K==1)/r(N)}
345 |         {cmd:. count if K==2}
346 |         {cmd:. gen wtr2 = (K==2)/r(N)}
347 |         {cmd:. gen wtr_diff = wtr2-wtr1}
348 |         {cmd:. did_imputation Y i t Ei, wtr(wtr_diff) sum}
349 | 
350 |     11) Reduce estimation time by using {opt loadweights} when analyzing several outcomes with identical specifications on identical samples:
351 |         {cmd:. did_imputation Y1 i t Ei, horizons(0/10) saveweights}
352 |         {cmd:. rename __* myweights* // optional}
353 |         {cmd:. did_imputation Y2 i t Ei, horizons(0/10) loadweights(myweights*)}
354 | 		
355 | {phang}12) {ul:Treatment effect heterogeneity}: {p_end}
356 | {pmore}To estimate heterogeneity by individual's sex {it:female}, you can obtain individual treatment effect estimates 
357 | (via {opt saveestimates}) and simply run a second-step regression of the estimates on {it:female}: {p_end}
358 | {pmore}{cmd:. did_imputation Y i t Ei, saveestimates(tau)}{p_end}
359 | {pmore}{cmd:. reg tau female} (DON'T DO THIS!){p_end}
360 | 
361 | {pmore}HOWEVER, standard errors will be incorrect. Instead, use {opt hetby} or {opt project}:{p_end}
362 | {pmore}{cmd:. did_imputation Y i t Ei, hetby(female)}{p_end}
363 | {pmore}will produce the ATT for males (tau_0) and females (tau_1). You can further use{p_end}
364 | {pmore}{cmd:. lincom tau_1-tau_0}{p_end}
365 | {pmore}to compute the difference between them with a SE. Alternatively,{p_end}
366 | {pmore}{cmd:. did_imputation Y i t Ei, project(female)}{p_end}
367 | {pmore}will produce the ATT for males (tau_cons) and the difference in ATTs between females and males (tau_female).{p_end}
368 | 
369 | {pmore}Both options can be combined with {opt horizons} to do heterogeneity analysis within each horizon.{p_end}
370 | 
371 | {pmore}If you also want to allow different period effects for the two groups, you can use the {opt fe()} option as usual, e.g.:{p_end}
372 | {pmore}{cmd:. did_imputation Y i t Ei, hetby(female) fe(i t#female)}{p_end}
373 | 		
374 | {phang}21) {ul:Repeated cross-sections}: {p_end}
375 | {pmore}When in each period you have a different sample of individiuals {it:i} in the same groups (e.g. regions), 
376 | replace individual FEs with group FEs and consider clustering at the regional level:{p_end}
377 | {phang2}{cmd:. did_imputation Y i t Ei, fe(region t) cluster(region) ...}{p_end}
378 | 
379 | {pmore}Note the main parameters still include {it:i}, and not {it:region}, as the unit identifier.{p_end}
380 | 
381 | {phang}22) {ul:Triple-diffs}: {p_end}
382 | {pmore}When observations are defined by {it:i,g,t} when, say, {it:i} are counties and {it:g} are age groups,
383 | specify a variable {it:ig} identifying the {it:(i,g)} pairs as the unit identifier, add appropriate FEs, and choose your clustering level, e.g.:{p_end}
384 | {phang2}{cmd:. did_imputation Y ig t Eig, fe(ig i#t g#t) cluster(i) ...}{p_end}
385 | 
386 | {pmore}Note that the event time {it:Eig} should be specific to the {it:i,g} pairs, not to the {it:i}. For instance, {it:Eig} is missing for a never-treated age group in a county where other groups are treated at some point.{p_end}
387 | 
388 | {title:Missing Features}
389 | 
390 | {phang}- Save imputed Y(0) in addition to treatment effects {p_end}
391 | {phang}- Making {opt hbalance} work with {opt autosample} {p_end}
392 | {phang}- Throw an error if imputation is not possible with complicated controls {p_end}
393 | {phang}- Allow to use treatment effect averages for SE computation which rely even on observations
394 | outside the estimation sample for the current {opt wtr} {p_end}
395 | {phang}- Estimation when treatment switches on and off {p_end}
396 | {phang}- More general interactions between FEs and continuous controls than with {opt timecontrols} and {opt unitcontrols}{p_end}
397 | {phang}- Frequency weights{p_end}
398 | {phang}- Verify that the unit ID variable is numeric{p_end}
399 | {phang}- In Stata 13 there may be a problem with the {opt df} option of {cmd:test} {p_end}
400 | {phang}- {opt loadweights} doesn't work when the weights are saved with the default names {it:__w*}{p_end}
401 | {phang}- Allow for designs in which treatment is not binary{p_end}
402 | {phang}- Add a check that ranges of {opt t} and {opt Ei} match{p_end}
403 | 
404 | {pstd}
405 | If you are interested in discussing these or others, please {help did_imputation##author:contact me}.
406 | 
407 | {marker bugs}{...}
408 | {title:Bug reporting}
409 | 
410 | {phang}If you get an error message, please:{p_end}
411 | {phang}- Reinstall {cmd:reghdfe} and {cmd:ftools} to have the most recent version{p_end}
412 | {phang}- Double check the syntax: e.g. make sure that {it:Ei} is the event date and not the treatment dummy, and that 
413 | the treatment dummy can be obtained as {it:t>=Ei}{p_end}
414 | {phang}- If it's a message with an explanation, read the message carefully.{p_end}
415 | 
416 | {phang}If this doesn't help:{p_end}
417 | {phang}- Rerun your command adding the {opt verbose} option and save the log-file{p_end}
418 | {phang}- If possible, create a version of the dataset in which you can replicate the error (e.g. with a fake outcome variable).{p_end}
419 | {phang}- If you can't share a fake dataset, summarize all the relevant variables in the log-file before calling {cmd:did_imputation}.{p_end}
420 | {phang}- Report this on {browse "https://github.com/borusyak/did_imputation/issues":github} or
421 | {help did_imputation##author:email me} with all of this.{p_end}
422 | 
423 | {title:References}
424 | 
425 | {phang}
426 | Borusyak, Kirill, Xavier Jaravel, and Jann Spiess (2023). "Revisiting Event Study Designs: Robust and Efficient Estimation," Working paper.
427 | {p_end}
428 | 
429 | {title:Acknowledgements}
430 | 
431 | {pstd}
432 | We thank Kyle Butts for the help in preparing this helpfile.
433 | 
434 | {marker author}{...}
435 | {title:Author}
436 | 
437 | {pstd}
438 | Kirill Borusyak (UC Berkeley), k.borusyak@berkeley.edu
439 | 
440 | 


--------------------------------------------------------------------------------
/event_plot.ado:
--------------------------------------------------------------------------------
  1 | *! event_plot: Plot coefficients from a staggered adoption event study analysis
  2 | *! Version: June 1, 2021
  3 | *! Author: Kirill Borusyak
  4 | *! Please check the latest version at https://github.com/borusyak/did_imputation/
  5 | *! Citation: Borusyak, Jaravel, and Spiess, "Revisiting Event Study Designs: Robust and Efficient Estimation" (2021)
  6 | program define event_plot
  7 | version 13.0
  8 | syntax [anything(name=eqlist)] [, trimlag(numlist integer) trimlead(numlist integer) default_look stub_lag(string) stub_lead(string) plottype(string) ciplottype(string) together ///
  9 | 		graph_opt(string asis) noautolegend legend_opt(string) perturb(numlist) shift(numlist integer) ///
 10 | 		lag_opt(string) lag_ci_opt(string) lead_opt(string) lead_ci_opt(string) ///
 11 | 		lag_opt1(string) lag_ci_opt1(string) lead_opt1(string) lead_ci_opt1(string) ///
 12 | 		lag_opt2(string) lag_ci_opt2(string) lead_opt2(string) lead_ci_opt2(string) ///
 13 | 		lag_opt3(string) lag_ci_opt3(string) lead_opt3(string) lead_ci_opt3(string) ///
 14 | 		lag_opt4(string) lag_ci_opt4(string) lead_opt4(string) lead_ci_opt4(string) ///
 15 | 		lag_opt5(string) lag_ci_opt5(string) lead_opt5(string) lead_ci_opt5(string) ///
 16 | 		lag_opt6(string) lag_ci_opt6(string) lead_opt6(string) lead_ci_opt6(string) ///
 17 | 		lag_opt7(string) lag_ci_opt7(string) lead_opt7(string) lead_ci_opt7(string) ///
 18 | 		lag_opt8(string) lag_ci_opt8(string) lead_opt8(string) lead_ci_opt8(string) ///
 19 | 		savecoef reportcommand noplot verbose alpha(real 0.05)]
 20 | qui {	
 21 | 	// to-do: read dcdh or K_95; compatibility with the code from Goodman-Bacon, eventdd(?), did_multiplegt; use eventstudy_siegloch on options for many graphs; Burtch: ib4.rel_period_pos
 22 | 	// Part 1: Initialize
 23 | 	local verbose = ("`verbose'"=="verbose")
 24 | 	if ("`plottype'"=="") local plottype connected
 25 | 	if ("`ciplottype'"=="" & ("`plottype'"=="connected" | "`plottype'"=="line")) local ciplottype rarea
 26 | 	if ("`ciplottype'"=="" & "`plottype'"=="scatter") local ciplottype rcap
 27 | 	if (`verbose') noi di "#1"
 28 | 	if ("`eqlist'"=="") local eqlist .
 29 | 	if ("`shift'"=="") local shift 0
 30 | 	if ("`savecoef'"=="savecoef") cap drop __event*
 31 | 
 32 | 	tempname dot bmat Vmat bmat_current Vmat_current
 33 | 	cap estimates store `dot' // cap in case there are no current estimate (but plotting is done based on previously saved ones)
 34 | 		local rc_current = _rc
 35 | 	local eq_n : word count `eqlist'
 36 | 	if (`eq_n'>8) {
 37 | 	    di as error "Combining at most 8 graphs are currently supported"
 38 | 		error 198
 39 | 	}
 40 | 	
 41 | 	if ("`perturb'"=="") {
 42 | 	    local perturb 0
 43 | 		if (`eq_n'>1) forvalues eq=1/`eq_n' {
 44 | 			local perturb `perturb' `=0.2*`eq'/`eq_n''
 45 | 		}
 46 | 	}
 47 | 	
 48 | 	tokenize `eqlist'
 49 | 	forvalues eq = 1/`eq_n' {
 50 | 		local hashpos = strpos("``eq''","#")
 51 | 	    if (`hashpos'==0) {  // e() syntax
 52 | 			if ("``eq''"==".") {
 53 | 				if (`rc_current'==0) estimates restore `dot'
 54 | 					else error 301
 55 | 			}
 56 | 			else estimates restore ``eq''
 57 | 			
 58 | 			matrix `bmat' = e(b)
 59 | 			cap matrix `Vmat' = e(V)
 60 | 			if (_rc==0) local vregime = "matrix"
 61 | 				else local vregime = "none"
 62 | 		}
 63 | 		else { // bmat#Vmat syntax
 64 | 			matrix `bmat' = `=substr("``eq''",1,`hashpos'-1)'
 65 | 			if (colsof(`bmat')==1) matrix `bmat' = `bmat''
 66 | 			
 67 | 			cap matrix `Vmat' = `=substr("``eq''",`hashpos'+1,.)'
 68 | 			if (_rc==0) {
 69 | 				if (rowsof(`Vmat')==1) local vregime = "row"
 70 | 				else if (colsof(`Vmat')==1) {
 71 | 					matrix `Vmat' = `Vmat''
 72 | 					local vregime = "row"
 73 | 				}
 74 | 				else if (rowsof(`Vmat')==colsof(`Vmat')) local vregime = "matrix"
 75 | 				else {
 76 | 					di as error "The variance matrix " substr("``eq''",`hashpos'+1,.) " does not have an expected format in model `eq'"
 77 | 					error 198
 78 | 				}
 79 | 			}
 80 | 			else local vregime = "none"
 81 | 		}
 82 | 			
 83 | 		* extract prefix and suffix
 84 | 		foreach o in lag lead {
 85 | 		    local currstub_`o' : word `eq' of `stub_`o''
 86 | 			if ("`currstub_`o''"=="") local currstub_`o' : word 1 of `stub_`o''
 87 | 			if ("`currstub_`o''"=="" & e(cmd)=="did_imputation" & "`o'"=="lag") local currstub_`o' tau#
 88 | 			if ("`currstub_`o''"=="" & e(cmd)=="did_imputation" & "`o'"=="lead") local currstub_`o' pre#
 89 | 			
 90 | 			if ("`currstub_`o''"!="") {
 91 | 				local hashpos = strpos("`currstub_`o''","#")
 92 | 				if (`hashpos'==0) {
 93 | 					di as error "stub_`o' is incorrectly specified for model `eq'"
 94 | 					error 198
 95 | 				}
 96 | 				local prefix_`o' = substr("`currstub_`o''",1,`hashpos'-1)
 97 | 				local postfix_`o' = substr("`currstub_`o''",`hashpos'+1,.)
 98 | 				local lprefix_`o' = length("`prefix_`o''")
 99 | 				local lpostfix_`o' = length("`postfix_`o''")
100 | 				local have_`o' = 1
101 | 			}
102 | 			else local have_`o' = 0
103 | 		}
104 | 		if (`have_lag'==0 & `have_lead'==0) {
105 | 			di as error "At least one of stub_lag and stub_lead has to be specified for model `eq'"
106 | 			error 198
107 | 		}
108 | 		if ("`currstub_lag'"=="`currstub_lead'") {
109 | 			di as error "stub_lag and stub_lead have to be different for model `eq'"
110 | 			error 198
111 | 		}
112 | 		
113 | 		// Part 2: Compute the number of available lags&leads
114 | 		local maxlag = -1
115 | 		local maxlead = 0 // zero leads = nothing since they start from 1, while lags start from 0
116 | 		local allvars : colnames `bmat'
117 | 		foreach v of local allvars {
118 | 			if (substr("`v'",1,2)=="o.") local v = substr("`v'",3,.)
119 | 			if (`have_lag') {
120 | 				if (substr("`v'",1,`lprefix_lag')=="`prefix_lag'" & substr("`v'",-`lpostfix_lag',.)=="`postfix_lag'") {
121 | 					if !mi(real(substr("`v'",`lprefix_lag'+1,length("`v'")-`lprefix_lag'-`lpostfix_lag'))) {
122 | 						local maxlag = max(`maxlag',real(substr("`v'",`lprefix_lag'+1,length("`v'")-`lprefix_lag'-`lpostfix_lag')))
123 | 					}
124 | 				}
125 | 			}
126 | 			if (`have_lead') {
127 | 				if (substr("`v'",1,`lprefix_lead')=="`prefix_lead'" & substr("`v'",-`lpostfix_lead',.)=="`postfix_lead'") {
128 | 					if !mi(real(substr("`v'",`lprefix_lead'+1,length("`v'")-`lprefix_lead'-`lpostfix_lead'))) {
129 | 						local maxlead = max(`maxlead',real(substr("`v'",`lprefix_lead'+1,length("`v'")-`lprefix_lead'-`lpostfix_lead')))
130 | 					}
131 | 				}
132 | 			}
133 | 		}
134 | 		
135 | 		local curr_trimlag : word `eq' of `trimlag'
136 | 			if mi("`curr_trimlag'") local curr_trimlag : word 1 of `trimlag'
137 | 			if mi("`curr_trimlag'") local curr_trimlag = -2
138 | 		local curr_trimlead : word `eq' of `trimlead'
139 | 			if mi("`curr_trimlead'") local curr_trimlead : word 1 of `trimlead'
140 | 			if mi("`curr_trimlead'") local curr_trimlead = -1
141 | 		
142 | 		local maxlag = cond(`curr_trimlag'>=-1, min(`maxlag',`curr_trimlag'), `maxlag') 
143 | 		local maxlead = cond(`curr_trimlead'>=0, min(`maxlead',`curr_trimlead'), `maxlead')
144 | 		if (_N<`maxlag'+`maxlead'+1) {
145 | 			di as err "Not enough observations to store `=`maxlag'+`maxlead'+1' coefficient estimates for model `eq'"
146 | 			error 198
147 | 		}
148 | 		if (`verbose') noi di "#2 Model `eq': `maxlag' lags, `maxlead' leads"
149 | 
150 | 		// Part 3: Fill in coefs & CIs
151 | 		if ("`savecoef'"=="") tempvar H`eq' pos`eq' coef`eq' hi`eq' lo`eq'
152 | 		else {
153 | 			local H`eq' __event_H`eq'
154 | 			local pos`eq' __event_pos`eq'
155 | 			local coef`eq' __event_coef`eq'
156 | 			local hi`eq' __event_hi`eq'
157 | 			local lo`eq' __event_lo`eq'
158 | 		}
159 | 		
160 | 		local shift`eq' : word `eq' of `shift'
161 | 		if ("`shift`eq''"=="") local shift`eq' 0
162 | 		
163 | 		gen `H`eq'' = _n-1-`maxlead' if _n<=`maxlag'+`maxlead'+1
164 | 		gen `coef`eq'' = .
165 | 		gen `hi`eq'' = .
166 | 		gen `lo`eq'' = .
167 | 		label var `H`eq'' "Periods since treatment"
168 | 		if (`maxlag'>=0) forvalues h=0/`maxlag' {
169 | 			matrix `bmat_current' = J(1,1,.)
170 | 			cap matrix `bmat_current' = `bmat'[1,"`prefix_lag'`h'`postfix_lag'"]
171 | 			cap replace `coef`eq'' = `bmat_current'[1,1] if `H`eq''==`h' // because `bmat'[1,"`prefix_lag'`h'`postfix_lag'"] is only a matrix expression on macs			
172 | 			
173 | 			if ("`ciplottype'"!="none" & "`vregime'"!="none") {
174 | 				matrix `Vmat_current' = J(1,1,.)
175 | 				if ("`vregime'"=="matrix") cap matrix `Vmat_current' = `Vmat'["`prefix_lag'`h'`postfix_lag'","`prefix_lag'`h'`postfix_lag'"]
176 | 					else cap matrix `Vmat_current' = `Vmat'[1,"`prefix_lag'`h'`postfix_lag'"]
177 | 				local se = `Vmat_current'[1,1]^0.5
178 | 				cap replace `hi`eq'' = `bmat_current'[1,1]+invnorm(1-`alpha'/2)*`se' if `H`eq''==`h'
179 | 				cap replace `lo`eq'' = `bmat_current'[1,1]-invnorm(1-`alpha'/2)*`se' if `H`eq''==`h'
180 | 			}
181 | 		}
182 | 		if (`maxlead'>0) forvalues h=1/`maxlead' {
183 | 			matrix `bmat_current' = J(1,1,.)
184 | 			cap matrix `bmat_current' = `bmat'[1,"`prefix_lead'`h'`postfix_lead'"]
185 | 			cap replace `coef`eq'' = `bmat_current'[1,1] if `H`eq''==-`h'
186 | 			
187 | 			if ("`ciplottype'"!="none" & "`vregime'"!="none") {
188 | 				matrix `Vmat_current' = J(1,1,.)
189 | 				if ("`vregime'"=="matrix") cap matrix `Vmat_current' = `Vmat'["`prefix_lead'`h'`postfix_lead'","`prefix_lead'`h'`postfix_lead'"]
190 | 					else cap matrix `Vmat_current' = `Vmat'[1,"`prefix_lead'`h'`postfix_lead'"]
191 | 				local se = `Vmat_current'[1,1]^0.5
192 | 				cap replace `hi`eq'' = `bmat_current'[1,1]+invnorm(1-`alpha'/2)*`se' if `H`eq''==-`h'
193 | 				cap replace `lo`eq'' = `bmat_current'[1,1]-invnorm(1-`alpha'/2)*`se' if `H`eq''==-`h'
194 | 			}
195 | 		}
196 | 		count if !mi(`coef`eq'')
197 | 		if (r(N)==0) {
198 | 			if (`eq_n'==1) noi di as error `"No estimates found. Make sure you have specified stub_lag and stub_lead correctly."'
199 | 				else noi di as error `"No estimates found for the model "``eq''". Make sure you have specified stub_lag and stub_lead correctly."'
200 | 			error 498
201 | 		}
202 | 		if (`verbose') noi di "#3 `perturb'"
203 | 		
204 | 		local perturb_now : word `eq' of `perturb'
205 | 		if ("`perturb_now'"=="") local perturb_now = 0
206 | 		if (`verbose') noi di "#3A gen `pos`eq''=`H`eq''+`perturb_now'-`shift`eq''"
207 | 		gen `pos`eq''=`H`eq''+`perturb_now'-`shift`eq''
208 | 		if (`verbose') noi di "#3B"
209 | 
210 | 	}
211 | 	cap estimates restore `dot'
212 | 	cap estimates drop `dot'
213 | 	
214 | 	// Part 4: Prepare graphs
215 | 	if ("`default_look'"!="") {
216 | 		local graph_opt xline(0, lcolor(gs8) lpattern(dash)) yline(0, lcolor(gs8)) graphregion(color(white)) bgcolor(white) ylabel(, angle(horizontal)) `graph_opt'
217 | 		if (`eq_n'==1) {
218 | 			local lag_opt color(navy) `lag_opt'
219 | 			local lead_opt color(maroon) msymbol(S) `lead_opt'
220 | 			local lag_ci_opt color(navy%45 navy%45) `lag_ci_opt' // color repeated twice only for connected/scatter, o/w doesn't matter
221 | 			local lead_ci_opt color(maroon%45 maroon%45) `lead_ci_opt'
222 | 		}
223 | 		else {
224 | 			local lag_opt1 color(navy) `lag_opt1'
225 | 			local lag_opt2 color(maroon) `lag_opt2'
226 | 			local lag_opt3 color(forest_green) `lag_opt3'
227 | 			local lag_opt4 color(dkorange) `lag_opt4'
228 | 			local lag_opt5 color(teal) `lag_opt5'
229 | 			local lag_opt6 color(cranberry) `lag_opt6'
230 | 			local lag_opt7 color(lavender) `lag_opt7'
231 | 			local lag_opt8 color(khaki) `lag_opt8'
232 | 			local lead_opt1 color(navy) `lead_opt1'
233 | 			local lead_opt2 color(maroon) `lead_opt2'
234 | 			local lead_opt3 color(forest_green) `lead_opt3'
235 | 			local lead_opt4 color(dkorange) `lead_opt4'
236 | 			local lead_opt5 color(teal) `lead_opt5'
237 | 			local lead_opt6 color(cranberry) `lead_opt6'
238 | 			local lead_opt7 color(lavender) `lead_opt7'
239 | 			local lead_opt8 color(khaki) `lead_opt8'
240 | 			local lag_ci_opt1 color(navy%45 navy%45) `lag_ci_opt1'
241 | 			local lag_ci_opt2 color(maroon%45 maroon%45) `lag_ci_opt2'
242 | 			local lag_ci_opt3 color(forest_green%45 forest_green%45) `lag_ci_opt3'
243 | 			local lag_ci_opt4 color(dkorange%45 dkorange%45) `lag_ci_opt4'
244 | 			local lag_ci_opt5 color(teal%45 teal%45) `lag_ci_opt5'
245 | 			local lag_ci_opt6 color(cranberry%45 cranberry%45) `lag_ci_opt6'
246 | 			local lag_ci_opt7 color(lavender%45 lavender%45) `lag_ci_opt7'
247 | 			local lag_ci_opt8 color(khaki%45 khaki%45) `lag_ci_opt8'
248 | 			local lead_ci_opt1 color(navy%45 navy%45) `lead_ci_opt1'
249 | 			local lead_ci_opt2 color(maroon%45 maroon%45) `lead_ci_opt2'
250 | 			local lead_ci_opt3 color(forest_green%45 forest_green%45) `lead_ci_opt3'
251 | 			local lead_ci_opt4 color(dkorange%45 dkorange%45) `lead_ci_opt4'
252 | 			local lead_ci_opt5 color(teal%45 teal%45) `lead_ci_opt5'
253 | 			local lead_ci_opt6 color(cranberry%45 cranberry%45) `lead_ci_opt6'
254 | 			local lead_ci_opt7 color(lavender%45 lavender%45) `lead_ci_opt7'
255 | 			local lead_ci_opt8 color(khaki%45 khaki%45) `lead_ci_opt8'
256 | 		}
257 | 		local legend_opt region(lstyle(none)) `legend_opt'
258 | 	}
259 | 	
260 | 	local plotindex = 0
261 | 	local legend_order
262 | 
263 | 	forvalues eq = 1/`eq_n' {
264 | 	    local lead_cmd
265 | 		local leadci_cmd
266 | 		local lag_cmd
267 | 		local lagci_cmd
268 | 		
269 | 		if ("`together'"=="") { // lead graph commands only when they are separate from lags
270 | 			count if !mi(`coef`eq'') & `H`eq''<0
271 | 			if (r(N)>0) {
272 | 				local ++plotindex
273 | 				local lead_cmd (`plottype' `coef`eq'' `pos`eq'' if !mi(`coef`eq'') & `H`eq''<0, `lead_opt' `lead_opt`eq'')
274 | 				local legend_order = `"`legend_order' `plotindex' "Pre-trend coefficients""'
275 | 			}
276 | 
277 | 			count if !mi(`hi`eq'') & `H`eq''<0
278 | 			if (r(N)>0) {
279 | 				local ++plotindex
280 | 				local leadci_cmd (`ciplottype' `hi`eq'' `lo`eq'' `pos`eq'' if !mi(`hi`eq'') & `H`eq''<0, `lead_ci_opt' `lead_ci_opt`eq'')
281 | 			}
282 | 		}
283 | 		
284 | 		local lag_filter = cond("`together'"=="", "`H`eq''>=0", "1") 
285 | 		count if !mi(`coef') & `lag_filter'
286 | 		if (r(N)>0) {
287 | 			local ++plotindex
288 | 			local lag_cmd (`plottype' `coef`eq'' `pos`eq'' if !mi(`coef`eq'') & `lag_filter', `lag_opt' `lag_opt`eq'')
289 | 			if ("`together'"=="") local legend_order = `"`legend_order' `plotindex' "Treatment effects""'
290 | 		}
291 | 
292 | 		count if !mi(`hi`eq'') & `lag_filter'
293 | 		if (r(N)>0) {
294 | 			local ++plotindex
295 | 			local lagci_cmd (`ciplottype' `hi`eq'' `lo`eq'' `pos`eq'' if !mi(`hi`eq'') & `lag_filter', `lag_ci_opt' `lag_ci_opt`eq'')
296 | 		}
297 | 		if ("`autolegend'"=="noautolegend") local legend = "" 
298 | 			else if ("`together'"=="together") local legend = "legend(off)" // show auto legend only for separate, o/w just one item
299 | 			else local legend legend(order(`legend_order') `legend_opt')
300 | 		local maincmd `maincmd' `lead_cmd' `leadci_cmd' `lag_cmd' `lagci_cmd'
301 | 		if (`verbose') noi di `"#4a ``eq'': `lead_cmd' `leadci_cmd' `lag_cmd' `lagci_cmd'"'
302 | 	}
303 | 	if (`verbose' | "`reportcommand'"!="") noi di `"twoway `maincmd' , `legend' `graph_opt'"'
304 | 	if ("`plot'"!="noplot") twoway `maincmd', `legend' `graph_opt'
305 | }
306 | end
307 | 


--------------------------------------------------------------------------------
/event_plot.sthlp:
--------------------------------------------------------------------------------
  1 | {smcl}
  2 | {* *! version 1 2021-05-26}{...}
  3 | {vieweralsosee "did_imputation" "help did_imputation"}{...}
  4 | {vieweralsosee "csdid" "help csdid"}{...}
  5 | {vieweralsosee "did_multiplegt" "help did_multiplegt"}{...}
  6 | {vieweralsosee "eventstudyinteract" "help eventstudyinteract"}{...}
  7 | {vieweralsosee "did_multiplegt" "help did_multiplegt"}{...}
  8 | {vieweralsosee "estimates store" "help estimates store"}{...}
  9 | {viewerjumpto "Syntax" "event_plot##syntax"}{...}
 10 | {viewerjumpto "The list of models" "event_plot##listmodels"}{...}
 11 | {viewerjumpto "Options" "event_plot##options"}{...}
 12 | {viewerjumpto "Combining plots" "event_plot##combine"}{...}
 13 | {viewerjumpto "Usage examples" "event_plot##usage"}{...}
 14 | {title:Description}
 15 | 
 16 | {pstd}
 17 | {bf:event_plot} - Plot the staggered-adoption diff-in-diff ("event study") estimates: coefficients post treatment ("lags") and, if available, pre-trend coefficients ("leads") along with confidence intervals (CIs). 
 18 | 
 19 | {pstd}
 20 | This command is used once estimates have been produced by the imputation estimator of Borusyak et al. 2021 ({help did_imputation}),
 21 | other methods robust to treatment effect heterogeneity ({help did_multiplegt}, {help csdid}, {help eventstudyinteract}), and conventional event-study OLS.
 22 | 
 23 | 
 24 | {marker syntax}{...}
 25 | {title:Syntax}
 26 | 
 27 | {phang}
 28 | {cmd: event_plot} [{help event_plot##listmodels:list of models}] [, {help event_plot##options:options}]
 29 | 
 30 | 
 31 | {marker listmodels}{...}
 32 | {title:The List of Models}
 33 | 
 34 | {phang}
 35 | Each term in the list of models specifies where to read the coefficient estimates (and variances) from.{p_end}
 36 | 
 37 | {phang}1) Leave empty or specify a dot ({bf:.}) to plot the current estimates, stored in the {cmd:e()} output;{p_end}
 38 | {phang}2) To show previously constructed estimates which were saved by {help estimates store}, provide their name;{p_end}
 39 | {phang}3) To read the estimates from an arbitrary row-vector, specify {it:bmat}{bf:#}{it:vmat} where:{p_end}
 40 | {pmore}- {it:bmat} is the name of the coefficient matrix or an expression to access it, e.g. r(myestimates) (with no internal spaces).
 41 | This should be a row-vector;{p_end}
 42 | {pmore}- {it:vmat} is the name of the variance matrix or an expression to access it.
 43 | This can be a square matrix or a row-vector of invidiual coefficient variances, and it is optional
 44 | (i.e. {it:bmat}{bf:#} would plot the coefs without CIs).{p_end}
 45 | 
 46 | {phang}By including several terms like this, you can combine several sets of estimates on one plot, see {help event_plot##combine:Combining plots}.
 47 | 
 48 | 
 49 | {marker options}{...}
 50 | {title:Options}
 51 | 
 52 | {pstd}
 53 | These options are designs for a showing single plot. Please see {help event_plot##combine:Combining plots} for adjustments and additional options when plots are combined.
 54 | 
 55 | {dlgtab:Which Coefficients to Show}
 56 | 
 57 | {phang}{opt stub_lag(prefix#postfix)}: a template for how the relevant coefficients are called in the estimation output.
 58 | No lag coefficients will be shown if {opt stub_lag} is not specified, except after {cmd:did_imputation} (in which case {opt stub_lag(tau#)} is assumed).
 59 | The template must include the symbol {it:#} indicating where the number is located (running from 0).{p_end}
 60 | 
 61 | {pmore}{it:Examples:}{p_end}
 62 | {phang2}{opt stub_lag(tau#)} means that the relevant coefficients are called tau0, tau1, ..., as with {cmd:did_imputation} (note that the postfix is empty in this example);{p_end}
 63 | {phang2}{opt stub_lag(L#xyz)} means they are called L0xyz, L1xyz, ... (note that just specifying {opt stub_lag(L#)} will not be enough in this case).
 64 | 
 65 | {phang}{opt stub_lead(prefix#postfix)}: same for the leads. Here the number runs from 1. {it:Examples:} {opt stub_lead(pre#)} or {opt stub_lead(F#xyz)}.
 66 | 
 67 | {phang}{opt trimlag(integer)}: lags 0..{bf:trimlag} will be shown, while others will be suppressed. To show none (i.e. pre-trends only), specify {opt trimlag(-1)}. The default is to show all available lags. 
 68 | 
 69 | {phang}{opt trimlead(integer)}: leads 1..{bf:trimlead} will be shown, while others will be suppressed. To show none (i.e no pre-trends), specify {opt trimlead(0)}. The default is to show all available lags. 
 70 | 
 71 | {dlgtab:How to Show The Coefficients}
 72 | 
 73 | {phang}{opt plottype(string)}: the {help twoway} plot type used to show coefficient estimates. Supported options: {help twoway connected:connected} (by default), {help line}, {help scatter}.{p_end}
 74 | 
 75 | {phang}{opt ciplottype(string)}; the {help twoway} plot type used to show CI estimates. Supported options:{p_end}
 76 | {phang2}- {help rarea} (default for {opt plottype(connected)} and {opt plottype(line)});{p_end}
 77 | {phang2}- {help rcap} (default for {opt plottype(scatter)});{p_end}
 78 | {phang2}- {help twoway connected:connected};{p_end}
 79 | {phang2}- {help scatter};{p_end}
 80 | {phang2}- {bf:none} (i.e. don't show CIs at all; default if SE are not available).{p_end}
 81 | 
 82 | {phang}{opt together}: by default the leads and lags are shown as two separate lines (as recommended by Borusyak, Jaravel, and Spiess 2021).
 83 | If {opt together} is specified, they are shown as one line, and the options for the lags are used for this line
 84 | (while the options for the leads are ignored). {p_end}
 85 | 
 86 | {phang}{opt shift(integer)}: Shift all coefficients to the left (when {opt shift}>0) or right (when {opt shift}<0). Specify if lag 0 actually corresponds to period -{opt shift} relative to the event time, as in the case of anticipation effects. This is similar to the {opt shift} option in {help did_imputation}. The default is zero. {p_end}
 87 | 
 88 | {dlgtab:Graph options}
 89 | 
 90 | {phang}{opt default_look}: sets default graph parameters. Additional graph options can still be specified and will be combined with these, but options cannot be repeated. See details in the {help event_plot##defaultlook:Default Look} section below. {p_end}
 91 | 
 92 | {phang}{opt graph_opt(string)}: additional {help twoway options} for the graph overall (e.g. {opt title}, {opt xlabel}).{p_end}
 93 | 
 94 | {phang}{opt lag_opt(string)}: additional options for the lag coefficient graph (e.g. {opt msymbol}, {opt lpattern}, {opt color}).{p_end}
 95 | 
 96 | {phang}{opt lag_ci_opt(string)}: additional options for the lag CI graph (e.g. {opt color}) {p_end}
 97 | 
 98 | {phang}{opt lead_opt(string)}, {opt lead_ci_opt(string)}: same for lead coefficients and CIs. Ignored if {opt together} is specified.{p_end}
 99 | 
100 | {dlgtab:Legend options}
101 | 
102 | {pstd}A legend is shown by default, unless {opt together} is specified. You can either adjust the automatic legend by using {opt legend_opt()}
103 | , or suppress or replace it by specifying {opt noautolegend} and modifying {opt graph_opt()}.{p_end}
104 | {pmore}{it:Notes:}{p_end}
105 | {phang2}- the order of graphs for the legend: lead coefs, lead CIs, lag coefs, lag CIs, excluding those not applicable
106 | (e.g. CIs with {opt ciplottype(none)} or leads with {opt together}).{p_end}
107 | {phang2}- with {opt ciplottype(connected)} or {opt ciplottype(scatter)}, each CI is two lines instead of one.{p_end}
108 | {phang2}- if {opt together} is specified, the legend is automatically off. Use {opt noautolegend} to add a manual legend.{p_end}
109 | 
110 | {phang}{opt legend_opt(string)}: additional options for the automatic legend.{p_end}
111 | 
112 | {phang}{opt noautolegend}: suppresses the automatic legend. A manual legend (or the {opt legend(off)} option) should be added to {opt graph_opt()}.{p_end}
113 | 
114 | {dlgtab:Miscellaneous}
115 | 
116 | {phang}{opt savecoef}: save the data underlying the plot in the current dataset, e.g. to later use it in more elaborate manual plots.
117 | Variables {it:__event_H#}, {it:__event_pos#}, {it:__event_coef#}, {it:__event_lo#}, and {it:__event_hi#} will be created for each model {it:#}=1,..., where:{p_end}
118 | {phang2}- {it:H} is the number of periods relative to treatment;{p_end}
119 | {phang2}- {it:pos} is the x-coordinate (equals to {it:H} by default but modified by {opt perturb} and {opt shift});{p_end}
120 | {phang2}- {it:coef} is the point estimate;{p_end}
121 | {phang2}- [{it:lo},{it:hi}] is the CI.{p_end}
122 | 
123 | {phang}{opt reportcommand}: report the command for the plot. Use it together with {opt savecoef} to then create more elaborate manual plots.{p_end}
124 | 
125 | {phang}{opt noplot}: do not show the plot (useful together with {opt savecoef}).{p_end}
126 | 
127 | {phang}{opt alpha(real)}: CIs will be shown for the confidence level {opt alpha}. Default is 0.05. {p_end}
128 | 
129 | {phang}{opt verbose}: debugging mode.{p_end}
130 | 
131 | 
132 | {marker combine}{...}
133 | {title:Combining plots}
134 | 
135 | {phang}Up to 8 models can be combined, e.g. to show how the estimates differ between {cmd:did_imputation} and OLS, or between males and females.
136 | 
137 | {phang}With several models, additional options are available, while the syntax and meaning of others is modified: {p_end}
138 | 
139 | {phang2}{opt perturb(numlist)}: shifts the plots horizontally relative to each other, so that the estimates are easier to read. The numlist is the list of x-shifts, and the default is an equally spaced sequence from 0 to 0.2 (but negative numbers are allowed). To prevent the shifts, specify {opt perturb(0)}. {p_end}
140 | 
141 | {phang2}{opt lag_opt#(string)}, {opt lag_ci_opt#(string)}, {opt lead_opt#(string)}, {opt lead_ci_opt#()} for #=1,...,5: extra parameters 
142 | for individual models (e.g. colors). Similar options without an index, e.g. {opt lag_opt()}, are passed to all relevant graphs. {p_end}
143 | 
144 | {phang2}{opt stub_lag}, {opt stub_lead}, {opt trim_lag}, {opt trim_lead}, {opt shift} can be specified either as a list of values (one per plot), or as just one value to be used for all plots.{p_end}
145 | 
146 | {phang2}{opt plottype} and {opt together} are currently required to be the same for all graphs.{p_end}
147 | 
148 | 
149 | {marker defaultlook}{...}
150 | {title:Default Look}
151 | 
152 | {phang} With one model, specifying {opt default_look} is equivalent to including these options:{p_end}
153 | 
154 | {phang2}{opt graph_opt(xline(0, lcolor(gs8) lpattern(dash)) yline(0, lcolor(gs8)) graphregion(color(white)) bgcolor(white) ylabel(, angle(horizontal)))}
155 | {opt lag_opt(color(navy))} {opt lead_opt(color(maroon) msymbol(S))}
156 | {opt lag_ci_opt(color(navy%45 navy%45))} {opt lead_ci_opt(color(maroon%45 maroon%45))}
157 | {opt legend_opt(region(lstyle(none)))}
158 | 
159 | {phang}With multiple models, the only difference is in colors. Both lags and leads use the same color: navy for the first plot, maroon for the second, etc.{p_end}
160 | 
161 | {marker usage}{...}
162 | {title:Usage examples}
163 | 
164 |     1) Estimation + plottting via {help did_imputation}:
165 | 
166 |         {cmd:did_imputation Y i t Ei, autosample hor(0/20) pretrend(14)}
167 |         {cmd:estimates store bjs} {it:// you need to store the coefs only to combined the plots, see Exanple 3}
168 |         {cmd:event_plot, default_look graph_opt(xtitle("Days since the event") ytitle("Coefficients") xlabel(-14(7)14 20))}
169 | 
170 |     2) Estimation + plotting via conventional OLS-based event study estimation:
171 | 
172 |         {it:// creating dummies for the lags 0..19, based on K = number of periods since treatment (or missing if there is a never-treated group)}
173 |         {cmd:forvalues l = 0/19} {
174 |         	{cmd:gen L`l'event = K==`l'}
175 |         }
176 |         {cmd:gen L20event = K>=20} {it:// binning K=20 and above}
177 |         
178 |         {it:// creating dummies for the leads 1..14}
179 |         {cmd:forvalues l = 0/13} { 
180 |         	{cmd:gen F`l'event = K==-`l'}
181 |         }
182 |         {cmd:gen F14event = K<=-14} {it:// binning K=-14 and below}
183 | 
184 |         {it:// running the event study regression. Drop leads 1 and 2 to avoid underidentification}
185 |         {it://if there is no never-treated group (could instead drop any others); see Borusyak et al. 2021}
186 |         {cmd:reghdfe outcome o.F1event o.F2event F3event-F14event L*event, a(i t) cluster(i)}
187 | 		
188 |         {it:// plotting the coeffients}
189 |         {cmd:event_plot, default_look stub_lag(L#event) stub_lead(F#event) together plottype(scatter)} ///
190 | 		{cmd:graph_opt(xtitle("Days since the event") ytitle("OLS coefficients") xlabel(-14(7)14 20))}
191 | 
192 |     3) Combining estimates from {help did_imputation} OLS:
193 | 
194 |         {cmd:event_plot bjs ., stub_lag(tau# L#event) stub_lead(pre# F#event) together plottype(scatter) default_look} ///
195 | 		{cmd:graph_opt(xtitle("Days since the event") ytitle("OLS coefficients") xlabel(-14(7)14 20))}
196 | 		
197 |     4) For estimation + plotting with {help csdid}, {help did_multiplegt}, and {help eventstudyinteract}, as well as {help did_imputation} 
198 |         and traditional OLS, see our example on GitHub: five_estimators_example.do at {browse "https://github.com/borusyak/did_imputation"}
199 | 
200 | 
201 | {title:Missing Features}
202 | 
203 | {phang}- More flexibility for {opt stub_lag} and {opt stub_lead} for reading the coefficients of conventional event studies{p_end}
204 | {phang}- Automatic support of alternative robust estimators: {cmd:did_multiplegt}, {cmd:csdid}, and {cmd:eventstudyinteract}{p_end}
205 | {phang}- Allow {opt plottype} and {opt together} to vary across the combined plots{p_end}
206 | {phang}- Make the command consistent with {cmd:did_multiplegt} with the {opt longdiff_placebo} option{p_end}
207 | {phang}- Throw an error when neither default_look nor graphical options are specified{p_end}
208 | {phang}- In old Stata versions, avoid using transparent colors{p_end}
209 | {phang}- After {cmd:eventstudyinteract}, allow to display omitted categories{p_end}
210 | {phang}- Add the addzero() option to accommodate the omitted category in, e.g., {cmd:eventstudyinteract}
211 | 
212 | {pstd}
213 | If you are interested in discussing these or others, please {help event_plot##author:contact me}.
214 | 
215 | {title:References}
216 | 
217 | {phang}{it:If using this command, please cite:}
218 | 
219 | {phang}
220 | Borusyak, Kirill, Xavier Jaravel, and Jann Spiess (2021). "Revisiting Event Study Designs: Robust and Efficient Estimation," Working paper.
221 | {p_end}
222 | 
223 | {title:Acknowledgements}
224 | 
225 | {pstd}
226 | We thank Kyle Butts for the help in preparing this helpfile.
227 | 
228 | {marker author}{...}
229 | {title:Author}
230 | 
231 | {pstd}
232 | Kirill Borusyak (UCL Economics), k.borusyak@ucl.ac.uk
233 | 
234 | 


--------------------------------------------------------------------------------
/five_estimators_example.do:
--------------------------------------------------------------------------------
  1 | /*
  2 | 	This simulated example illustrates how to estimate causal effects with event studies using a range of methods
  3 | 	and plot the coefficients & confidence intervals using the event_plot command.
  4 | 	
  5 | 	Date: 28/05/2021
  6 | 	Author: Kirill Borusyak (UCL), k.borusyak@ucl.ac.uk
  7 | 	
  8 | 	You'll need the following commands:
  9 | 		- did_imputation (Borusyak et al. 2021): available on SSC
 10 | 		- did_multiplegt (de Chaisemartin and D'Haultfoeuille 2020): available on SSC
 11 | 		- eventstudyinteract (San and Abraham 2020): available on SSC
 12 | 		- csdid (Callaway and Sant'Anna 2020): available on SSC
 13 | 
 14 | */
 15 | 
 16 | // Generate a complete panel of 300 units observed in 15 periods
 17 | clear all
 18 | timer clear
 19 | set seed 10
 20 | global T = 15
 21 | global I = 300
 22 | 
 23 | set obs `=$I*$T'
 24 | gen i = int((_n-1)/$T )+1 					// unit id
 25 | gen t = mod((_n-1),$T )+1					// calendar period
 26 | tsset i t
 27 | 
 28 | // Randomly generate treatment rollout years uniformly across Ei=10..16 (note that periods t>=16 would not be useful since all units are treated by then)
 29 | gen Ei = ceil(runiform()*7)+$T -6 if t==1	// year when unit is first treated
 30 | bys i (t): replace Ei = Ei[1]
 31 | gen K = t-Ei 								// "relative time", i.e. the number periods since treated (could be missing if never-treated)
 32 | gen D = K>=0 & Ei!=. 						// treatment indicator
 33 | 
 34 | // Generate the outcome with parallel trends and heterogeneous treatment effects
 35 | gen tau = cond(D==1, (t-12.5), 0) 			// heterogeneous treatment effects (in this case vary over calendar periods)
 36 | gen eps = rnormal()							// error term
 37 | gen Y = i + 3*t + tau*D + eps 				// the outcome (FEs play no role since all methods control for them)
 38 | //save five_estimators_data, replace
 39 | 
 40 | // Estimation with did_imputation of Borusyak et al. (2021)
 41 | did_imputation Y i t Ei, allhorizons pretrend(5)
 42 | event_plot, default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") ///
 43 | 	title("Borusyak et al. (2021) imputation estimator") xlabel(-5(1)5))
 44 | 
 45 | estimates store bjs // storing the estimates for later
 46 | 
 47 | // Estimation with did_multiplegt of de Chaisemartin and D'Haultfoeuille (2020)
 48 | did_multiplegt Y i t D, robust_dynamic dynamic(5) placebo(5) breps(100) cluster(i) 
 49 | event_plot e(estimates)#e(variances), default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") ///
 50 | 	title("de Chaisemartin and D'Haultfoeuille (2020)") xlabel(-5(1)5)) stub_lag(Effect_#) stub_lead(Placebo_#) together
 51 | 
 52 | matrix dcdh_b = e(estimates) // storing the estimates for later
 53 | matrix dcdh_v = e(variances)
 54 | 
 55 | // Estimation with cldid of Callaway and Sant'Anna (2020)
 56 | gen gvar = cond(Ei==., 0, Ei) // group variable as required for the csdid command
 57 | csdid Y, ivar(i) time(t) gvar(gvar) notyet
 58 | estat event, estore(cs) // this produces and stores the estimates at the same time
 59 | event_plot cs, default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") xlabel(-14(1)5) ///
 60 | 	title("Callaway and Sant'Anna (2020)")) stub_lag(Tp#) stub_lead(Tm#) together
 61 | 
 62 | // Estimation with eventstudyinteract of Sun and Abraham (2020)
 63 | sum Ei
 64 | gen lastcohort = Ei==r(max) // dummy for the latest- or never-treated cohort
 65 | forvalues l = 0/5 {
 66 | 	gen L`l'event = K==`l'
 67 | }
 68 | forvalues l = 1/14 {
 69 | 	gen F`l'event = K==-`l'
 70 | }
 71 | drop F1event // normalize K=-1 (and also K=-15) to zero
 72 | eventstudyinteract Y L*event F*event, vce(cluster i) absorb(i t) cohort(Ei) control_cohort(lastcohort)
 73 | event_plot e(b_iw)#e(V_iw), default_look graph_opt(xtitle("Periods since the event") ytitle("Average causal effect") xlabel(-14(1)5) ///
 74 | 	title("Sun and Abraham (2020)")) stub_lag(L#event) stub_lead(F#event) together
 75 | 
 76 | matrix sa_b = e(b_iw) // storing the estimates for later
 77 | matrix sa_v = e(V_iw)
 78 | 
 79 | // TWFE OLS estimation (which is correct here because of treatment effect homogeneity). Some groups could be binned.
 80 | reghdfe Y F*event L*event, a(i t) cluster(i)
 81 | event_plot, default_look stub_lag(L#event) stub_lead(F#event) together graph_opt(xtitle("Days since the event") ytitle("OLS coefficients") xlabel(-14(1)5) ///
 82 | 	title("OLS"))
 83 | 
 84 | estimates store ols // saving the estimates for later
 85 | 
 86 | // Construct the vector of true average treatment effects by the number of periods since treatment
 87 | matrix btrue = J(1,6,.)
 88 | matrix colnames btrue = tau0 tau1 tau2 tau3 tau4 tau5
 89 | qui forvalues h = 0/5 {
 90 | 	sum tau if K==`h'
 91 | 	matrix btrue[1,`h'+1]=r(mean)
 92 | }
 93 | 
 94 | // Combine all plots using the stored estimates
 95 | event_plot btrue# bjs dcdh_b#dcdh_v cs sa_b#sa_v ols, ///
 96 | 	stub_lag(tau# tau# Effect_# Tp# L#event L#event) stub_lead(pre# pre# Placebo_# Tm# F#event F#event) plottype(scatter) ciplottype(rcap) ///
 97 | 	together perturb(-0.325(0.13)0.325) trimlead(5) noautolegend ///
 98 | 	graph_opt(title("Event study estimators in a simulated panel (300 units, 15 periods)", size(medlarge)) ///
 99 | 		xtitle("Periods since the event") ytitle("Average causal effect") xlabel(-5(1)5) ylabel(0(1)3) ///
100 | 		legend(order(1 "True value" 2 "Borusyak et al." 4 "de Chaisemartin-D'Haultfoeuille" ///
101 | 				6 "Callaway-Sant'Anna" 8 "Sun-Abraham" 10 "OLS") rows(3) region(style(none))) ///
102 | 	/// the following lines replace default_look with something more elaborate
103 | 		xline(-0.5, lcolor(gs8) lpattern(dash)) yline(0, lcolor(gs8)) graphregion(color(white)) bgcolor(white) ylabel(, angle(horizontal)) ///
104 | 	) ///
105 | 	lag_opt1(msymbol(+) color(cranberry)) lag_ci_opt1(color(cranberry)) ///
106 | 	lag_opt2(msymbol(O) color(cranberry)) lag_ci_opt2(color(cranberry)) ///
107 | 	lag_opt3(msymbol(Dh) color(navy)) lag_ci_opt3(color(navy)) ///
108 | 	lag_opt4(msymbol(Th) color(forest_green)) lag_ci_opt4(color(forest_green)) ///
109 | 	lag_opt5(msymbol(Sh) color(dkorange)) lag_ci_opt5(color(dkorange)) ///
110 | 	lag_opt6(msymbol(Oh) color(purple)) lag_ci_opt6(color(purple)) 
111 | graph export "five_estimators_example.png", replace
112 | 


--------------------------------------------------------------------------------
/five_estimators_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/borusyak/did_imputation/767c8d6670a751170910d419bbafd323df92ef08/five_estimators_example.png


--------------------------------------------------------------------------------