├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── config.yml
    │   └── feature_request.md
├── .gitignore
├── CNAME
├── Gemfile
├── LICENSE
├── README.md
├── _announcements
    ├── week-0.md
    └── week-1.md
├── _config.yml
├── _includes
    └── minutes.liquid
├── _layouts
    ├── announcement.html
    ├── assignment.html
    ├── module.html
    ├── schedule.html
    └── staffer.html
├── _modules
    ├── calendar.md
    └── calendar_revision.md
├── _sass
    └── custom
    │   ├── announcement.scss
    │   ├── card.scss
    │   ├── custom.scss
    │   ├── module.scss
    │   ├── schedule.scss
    │   └── staffer.scss
├── _schedules
    └── weekly.md
├── _staffers
    ├── aidan.md
    ├── amanda.md
    ├── chenyan.md
    ├── clement.md
    ├── daphne.md
    ├── emmy.md
    ├── hari.md
    ├── mrigank.md
    ├── roochi.md
    ├── srinivas.md
    └── yiming.md
├── about.md
├── announcements.md
├── assets
    └── images
    │   ├── Roochi.png
    │   ├── abertsch.jpg
    │   ├── aidan.jpg
    │   ├── chenyan.jpg
    │   ├── clement.jpg
    │   ├── daphne.png
    │   ├── emmy.png
    │   ├── hari.png
    │   ├── mrigank.jpeg
    │   ├── placeholder.png
    │   ├── srinivas.jpg
    │   └── yiming.jpg
├── calendar.md
├── homework1.md
├── homework2.md
├── homework3.md
├── homework_materials
    ├── generate.py
    ├── hw1_starter_code.ipynb
    ├── hw1_template.tex
    ├── hw2.pdf
    ├── hw2_latex_template.zip
    ├── hw2_starter_code.zip
    ├── hw3.pdf
    ├── hw3_latex_template.zip
    ├── hw3_starter_code.zip
    ├── project_midpoint_template.zip
    └── project_proposal_template.zip
├── project.md
├── project_peer_feedback.md
├── quiz.md
├── schedule.md
└── staff.md


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Ask a question
4 |     url: https://github.com/kevinlin1/just-the-class/discussions
5 |     about: Ask questions and discuss with other community members
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | .bundle/
 3 | .jekyll-cache/
 4 | .jekyll-metadata
 5 | .sass-cache/
 6 | Gemfile.lock
 7 | _site/
 8 | node_modules/
 9 | vendor/
10 | 


--------------------------------------------------------------------------------
/CNAME:
--------------------------------------------------------------------------------
1 | 2023.cmu-llms.org


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | gem 'github-pages', group: :jekyll_plugins
3 | 
4 | gem "webrick", "~> 1.8"
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Kevin Lin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Github Repo for 11-667 at CMU
 2 | 
 3 | This is the website repository for the Fall 2023 iteration of 11-667: Large Language Models Methods and Applications.
 4 | 
 5 | # Acknowledgements
 6 | 
 7 | This repo is based off the [Just the Class](https://github.com/kevinlin1/just-the-class) Jekyll template, creared by Kevin Lin.
 8 | 
 9 | Just the Class extends the popular [Just the Docs](https://github.com/just-the-docs/just-the-docs) theme, which provides a robust and thoroughly-tested foundation for simple websites. 
10 | 
11 | # Getting Started
12 | 
13 | Getting started with Just the Class is simple.
14 | 
15 | 1. Create a [new repository based on Just the Class](https://github.com/kevinlin1/just-the-class/generate).
16 | 1. Update `_config.yml` and `README.md` with your course information. [Be sure to update the url and baseurl](https://mademistakes.com/mastering-jekyll/site-url-baseurl/).
17 | 1. Configure a [publishing source for GitHub Pages](https://help.github.com/en/articles/configuring-a-publishing-source-for-github-pages). Your course website is now live!
18 | 1. Edit and create `.md` [Markdown files](https://guides.github.com/features/mastering-markdown/) to add more content pages.
19 | 
20 | Just the Class has been used by instructors at Stanford University ([CS 161](https://stanford-cs161.github.io/winter2021/)), UC Berkeley ([Data 100](https://ds100.org/fa21/)), UC Santa Barbara ([CSW8](https://ucsb-csw8.github.io/s22/)), Northeastern University ([CS4530/5500](https://neu-se.github.io/CS4530-CS5500-Spring-2021/)), and Carnegie Mellon University ([17-450/17-950](https://cmu-crafting-software.github.io/)). Share your course website and find more examples in the [show and tell discussion](https://github.com/kevinlin1/just-the-class/discussions/categories/show-and-tell)!
21 | 
22 | ### Local development environment
23 | 
24 | Just the Class requires no special Jekyll plugins and can run on GitHub Pages' standard Jekyll compiler. To setup a local development environment, clone your template repository and follow the GitHub Docs on [Testing your GitHub Pages site locally with Jekyll](https://docs.github.com/en/pages/setting-up-a-github-pages-site-with-jekyll/testing-your-github-pages-site-locally-with-jekyll).
25 | 


--------------------------------------------------------------------------------
/_announcements/week-0.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Week 0 Announcement
3 | week: 0
4 | date: 2019-04-01
5 | ---
6 | 
7 | Hello world!
8 | {: .fs-5 }
9 | 


--------------------------------------------------------------------------------
/_announcements/week-1.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Week 1 Announcement
 3 | week: 1
 4 | date: 2019-04-08
 5 | ---
 6 | 
 7 | 1. Create a [new repository based on Just the Class](https://github.com/kevinlin1/just-the-class/generate).
 8 | 1. Configure a [publishing source for GitHub Pages](https://help.github.com/en/articles/configuring-a-publishing-source-for-github-pages). Your course website is now live!
 9 | 1. Update `_config.yml` with your course information.
10 | 1. Edit and create `.md` [Markdown files](https://guides.github.com/features/mastering-markdown/) to add your content.
11 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
 1 | # Welcome to Jekyll!
 2 | #
 3 | # This config file is meant for settings that affect your whole site, values
 4 | # which you are expected to set up once and rarely edit after that. If you find
 5 | # yourself editing these this file very often, consider using Jekyll's data files
 6 | # feature for the data you need to update frequently.
 7 | #
 8 | # For technical reasons, this file is *NOT* reloaded automatically when you use
 9 | # 'jekyll serve'. If you change this file, please restart the server process.
10 | 
11 | # Site settings
12 | # These are used to personalize your new site. If you look in the HTML files,
13 | # you will see them accessed via {{ site.title }}, {{ site.github_repo }}, and so on.
14 | # You can create any custom variable you would like, and they will be accessible
15 | # in the templates via {{ site.myvariable }}.
16 | title: 11-667
17 | tagline: Large Language Models Methods and Appllications
18 | description: Language Technologies Institute, Fall 2023
19 | author: Daphne Ippolito and Chenyan Xiong
20 | baseurl: '' # the subpath of your site, e.g. /blog
21 | url: 'https://cmu-llms.org' # the base hostname & protocol for your site, e.g. http://example.com
22 | exclude: ["Gemfile", "Gemfile.lock", "LICENSE"]
23 | 
24 | # Theme settings
25 | remote_theme: just-the-docs/just-the-docs@v0.5.2
26 | color_scheme: light
27 | search_enabled: true
28 | heading_anchors: true
29 | permalink: pretty
30 | aux_links:
31 |   Canvas:
32 |     - 'https://canvas.cmu.edu/courses/36424/'
33 |   Piazza:
34 |     - 'https://piazza.com/class/llxwdqbqn7z24b/'
35 | footer_content:
36 | 
37 | # Collections for website data
38 | collections:
39 |   staffers:
40 |   modules:
41 |   schedules:
42 |   announcements:
43 | # Default layouts for each collection type
44 | defaults:
45 |   - scope:
46 |       path: ''
47 |       type: staffers
48 |     values:
49 |       layout: staffer
50 |       height: 300
51 |       subpath: '/assets/images/'
52 |       width: 300
53 |   - scope:
54 |       path: ''
55 |       type: modules
56 |     values:
57 |       layout: module
58 |   - scope:
59 |       path: ''
60 |       type: schedules
61 |     values:
62 |       layout: schedule
63 |   - scope:
64 |       path: ''
65 |       type: announcements
66 |     values:
67 |       layout: announcement
68 | 
69 | compress_html:
70 |   clippings: all
71 |   comments: all
72 |   endings: all
73 |   startings: []
74 |   blanklines: false
75 |   profile: false
76 | 


--------------------------------------------------------------------------------
/_includes/minutes.liquid:
--------------------------------------------------------------------------------
 1 | {% capture _minutes_workspace %}
 2 |   {% comment %}
 3 |     Return the number of minutes between midnight and the given time string (e.g. '9:30 AM').
 4 | 
 5 |     Parameters:
 6 |       `time` (string): the time to convert.
 7 |   {% endcomment %}
 8 | 
 9 |   {% assign _time = include.time %}
10 |   {% assign _hhmm = _time | split: ' ' | first | split: ':' %}
11 |   {% assign _hours = _hhmm | first | to_integer %}
12 |   {% assign _minutes = _hhmm | last | to_integer %}
13 |   {% assign _ampm = _time | split: ' ' | last | upcase %}
14 | 
15 |   {% if _ampm == 'AM' and _hours == 12 %}
16 |     {% assign _hours = _hours | minus: 12 %}
17 |   {% elsif _ampm == 'PM' and _hours != 12 %}
18 |     {% assign _hours = _hours | plus: 12 %}
19 |   {% endif %}
20 | {% endcapture %}{% assign _minutes_workspace = '' %}{{ _hours | times: 60 | plus: _minutes }}
21 | 


--------------------------------------------------------------------------------
/_layouts/announcement.html:
--------------------------------------------------------------------------------
 1 | <div class="announcement">
 2 |   <h2>{{ page.title }}</h2>
 3 |   <span class="announcement-meta">
 4 |     {% if page.date %}
 5 |     {{ page.date | date: '%b %e' }}
 6 |     &middot;
 7 |     {% endif %}
 8 |     {% assign minutes = content | strip_html | number_of_words | divided_by: 180.0 | round %}
 9 |     {{ minutes }} min read
10 |   </span>
11 |   <div>
12 |     {{ content }}
13 |   </div>
14 | </div>
15 | 


--------------------------------------------------------------------------------
/_layouts/assignment.html:
--------------------------------------------------------------------------------
1 | <h2 class="fs-4" id="{{ page.title | slugify }}" >{{ page.title }}</h2>
2 | <div class="module">
3 |   {{ content }}
4 | </div>
5 | 


--------------------------------------------------------------------------------
/_layouts/module.html:
--------------------------------------------------------------------------------
1 | <h2 class="fs-4" id="{{ page.title | slugify }}" >{{ page.title }}</h2>
2 | <div class="module">
3 |   {{ content }}
4 | </div>
5 | 


--------------------------------------------------------------------------------
/_layouts/schedule.html:
--------------------------------------------------------------------------------
 1 | {% assign start_time = page.timeline | first %}
 2 | {% capture offset %}{% include minutes.liquid time=start_time %}{% endcapture %}
 3 | <div class="schedule">
 4 |   <ul class="schedule-timeline" style="min-width: {{ page.schedule | size | times: 120 }}px">
 5 |     {% for time in page.timeline %}
 6 |     <li class="schedule-time">{{ time }} </li>
 7 |     {% endfor %}
 8 |   </ul>
 9 |   <ul class="schedule-group">
10 |     {% for day in page.schedule %}
11 |     <li class="schedule-day">
12 |       <h2 class="schedule-header">{{ day.name }}</h2>
13 |       {% if day.events %}
14 |       <ul class="schedule-events" style="height: {{ page.timeline | size | times: 40 }}px">
15 |       {% for event in day.events %}
16 |         {% capture start %}{% include minutes.liquid time=event.start %}{% endcapture %}
17 |         {% capture end %}{% include minutes.liquid time=event.end %}{% endcapture %}
18 |         {% assign top = start | minus: offset | times: 40 | divided_by: 30 %}
19 |         {% assign height = end | minus: start | times: 40 | divided_by: 30 %}
20 |         <li class="schedule-event {% if event.class %}{{ event.class }}{% else %}{{ event.name | slugify }}{% endif %}"
21 |             style="top: {{ top }}px; height: {{ height }}px;">
22 |           <div class="name">{{ event.name }}</div>
23 |           <div class="time">{{ event.start }}–{{ event.end }}</div>
24 |           {% if event.location %}
25 |           <div class="location">{{ event.location }}</div>
26 |           {% endif %}
27 |         </li>
28 |       {% endfor %}
29 |       </ul>
30 |       {% endif %}
31 |     </li>
32 |     {% endfor %}
33 |   </ul>
34 | </div>
35 | 


--------------------------------------------------------------------------------
/_layouts/staffer.html:
--------------------------------------------------------------------------------
 1 | <div class="staffer">
 2 |   {%- if page.photo -%}
 3 |   <img class="staffer-image" src="{{ site.baseurl }}{{ page.subpath }}{{ page.photo }}" alt="" width="{{ page.width }}" height="{{ page.height }}">
 4 |   {%- endif -%}
 5 |   <div>
 6 |     <h3 class="staffer-name" id="{{ page.name | slugify }}">
 7 |       {%- if page.website -%}
 8 |       <a href="{{ page.website }}">{{ page.name }}</a>
 9 |       {%- else -%}
10 |       {{ page.name }}
11 |       {%- endif -%}
12 |       {%- if page.pronouns -%}
13 |       <span class="staffer-pronouns">{{ page.pronouns }}</span>
14 |       {%- endif -%}
15 |     </h3>
16 |     {%- if page.email -%}
17 |     <p><a href="mailto:{{ page.email }}">{{ page.email }}</a></p>
18 |     {%- endif -%}
19 |     {%- if page.section -%}
20 |     <p class="staffer-meta">Quiz Section: {{ page.section | markdownify | strip_html }}</p>
21 |     {%- endif -%}
22 |     {%- if page.role == "Instructor" -%}
23 |     <p class="staffer-meta">Office Hours: {{ page.office-hours | markdownify | strip_html }}</p>
24 |     {%- endif -%}
25 |     {{ content }}
26 |   </div>
27 | </div>
28 | 


--------------------------------------------------------------------------------
/_modules/calendar.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Lecture Schedule
  3 | ---
  4 | 
  5 | <!-- Sep 28
  6 | : [Java & Git](#)
  7 |   : [1.1](#)
  8 | 
  9 | Sep 29
 10 | : **Section**{: .label .label-purple }[Intro to Java](#)
 11 |   : [Solution](#)
 12 | 
 13 | Sep 30
 14 | : [Variables & Objects](#)
 15 |   : [1.2](#), [2.1](#)
 16 | 
 17 | Oct 1
 18 | : **Lab**{: .label .label-purple } [Intro to Java](#)
 19 | 
 20 | Oct 2
 21 | : [Tracing, IntLists, & Recursion](#)
 22 |   : [2.1](#)
 23 | : **HW 1 due**{: .label .label-red } -->
 24 | 
 25 | 
 26 | <!-- Oct 5
 27 | : [Linked Lists & Encapsulation](#)
 28 |   : [3.1](#), [2.2](#), [2.3](#)
 29 | 
 30 | Oct 6
 31 | : **Section**{: .label .label-purple }[Linked Lists](#)
 32 |   : [Solution](#)
 33 | 
 34 | Oct 7
 35 | : [Resizing Arrays](#)
 36 |   : [2.4](#), [2.5](#)
 37 | 
 38 | Oct 8
 39 | : **Lab**{: .label .label-purple } [Resizing Arrays](#)
 40 | 
 41 | Oct 9
 42 | : [Runtime Analysis](#)
 43 |   : [8.1](#), [8.2](#), [8.3](#), [8.4](#)
 44 | : **HW 2 due**{: .label .label-red } -->
 45 | 
 46 | 
 47 | 
 48 | Tuesday, Aug 29
 49 | : Building Blocks of Modern LLMs
 50 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W1L1_LM_fundamentals_and_transformer_architecture.pdf)
 51 | : Readings: [Jurasky and Martin, Chapter 10](https://web.stanford.edu/~jurafsky/slp3/10.pdf), [Illustrated Transformer](http://jalammar.github.io/illustrated-transformer/)
 52 | 
 53 | Thursday, Aug 31
 54 | : Transformer Architecture, LLM Pretraining Methods
 55 |   : [Slides 1](https://www.andrew.cmu.edu/course/11-667/lectures/W1L2_transformer_continued.pdf)
 56 |   [Slides 2](https://www.andrew.cmu.edu/course/11-667/lectures/W1L2_Pretraining_Task.pdf)
 57 | : Readings: [Dai and Le](https://arxiv.org/abs/1511.01432), [Radford et al.](https://www.mikecaptain.com/resources/pdf/GPT-1.pdf), [Lewis et al.](https://arxiv.org/abs/1910.13461) 
 58 | 
 59 | Tuesday, Sep 5
 60 | : LLM Pretraining Data **HW1 OUT**{: .label .label-purple } 
 61 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W2L1_pretraining_data.pdf)
 62 | : Readings: [ClueWeb22](https://arxiv.org/pdf/2211.15848.pdf), [Documenting C4](https://arxiv.org/pdf/2104.08758.pdf)
 63 | 
 64 | Thursday, Sep 7
 65 | : In-Context Learning, Prompt Engineering, and Alignment
 66 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W2L2_prompt_engineering_alignment.pdf)
 67 | : Readings: [GPT-2 paper](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), [Prompt Engineering](https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/)
 68 | 
 69 | Tuesday, Sep 12
 70 | : **PROJECT OUT**{: .label .label-blue } Interpretability
 71 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W3L1%20Interpretation.pdf)
 72 | : Readings: [Edge Probing](https://arxiv.org/abs/1905.06316), [Loss Landscape](https://arxiv.org/abs/1712.09913)
 73 | 
 74 | Thursday, Sep 14
 75 | : Generation-based Automatic Evaluation Methods
 76 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W3L2_Automatic%20evaluation%20of%20LLMs.pdf)
 77 | : Readings: [GEM Benchmark](https://arxiv.org/pdf/2102.01672.pdf), [Evaluation Challenges](http://aclanthology.lst.uni-saarland.de/D17-1238.pdf)
 78 | 
 79 | Tuesday, Sep 19
 80 | : Human Evaluation Methods
 81 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W4L1_human_evaluation.pdf)
 82 | : Readings: [Evaluation of Text Generation: A Survey](https://arxiv.org/abs/2006.14799)
 83 | 
 84 | Thursday, Sep 21
 85 | : Parameter-Efficient Tuning Methods
 86 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W4L2_PETM.pptx.pdf)
 87 | : Readings: [Prompt Tuning](https://arxiv.org/abs/2104.08691), [PETM blog post](https://www.leewayhertz.com/parameter-efficient-fine-tuning/)
 88 | 
 89 | Tuesday, Sep 26
 90 | : Generation before LLMs, Discuss HW1 **HW1 DUE**{: .label .label-purple } **PROJECT PROPOSALS DUE**{: .label .label-blue } 
 91 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W5L1_homework_recap_pre_llms.pptx.pdf)
 92 | : Readings: [Building NLG Systems, chapter 3](https://www-cambridge-org.cmu.idm.oclc.org/core/services/aop-cambridge-core/content/view/048E2C49A80D37B3B8BA69DD5FA823F9/9780511519857c3_p41-78_CBO.pdf/architecture_of_a_natural_language_generation_system.pdf), [Two Decades of Statistical Language Modeleing](https://www.cs.cmu.edu/~roni/papers/survey-slm-IEEE-PROC-0004.pdf)
 93 | 
 94 | Thursday, Sep 28
 95 | : In-class Project Proposals  **HW2 OUT**{: .label .label-red }
 96 |   : [Proposals](https://www.andrew.cmu.edu/course/11-667/lectures/W5L2_student_project_proposals.pdf)
 97 | 
 98 | Tuesday, Oct 3
 99 | : Industry Talk: Together.ai; Chatbots and AI Agents
100 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W6L1_AI_agents.pdf)
101 | : Readings: [Diplomacy agents](https://www-science-org.cmu.idm.oclc.org/doi/pdf/10.1126/science.ade9097), [Generative Agents: Interactive Simulacra of Human Behavior](https://arxiv.org/abs/2304.03442v1)
102 | 
103 | Thursday, Oct 5
104 | : LLM for Search Engines
105 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W6L2%20LLM%20for%20Search.pptx.pdf)
106 | : Readings: [ANCE](https://arxiv.org/pdf/2007.00808.pdf?utm_source=findwork.dev&ref=findwork.dev&utm_medium=jobposting), [SentenceT5](https://arxiv.org/abs/2108.08877)
107 | 
108 | Tuesday, Oct 10
109 | : MIDTERM
110 |   : 
111 | : Readings: None
112 | 
113 | Thursday, Oct 12
114 | : LLMs for Music Generation (Guest Lecture from Chris Donahue) 
115 |   : 
116 | : Readings: [AudioLM](https://arxiv.org/pdf/2209.03143.pdf), [Oore+ 18](https://arxiv.org/pdf/1808.03715.pdf)
117 | 
118 | Tuesday, Oct 17
119 | : Fall break (no class)
120 |   : 
121 | : Readings: Enjoy Fall Break 
122 | 
123 | Thursday, Oct 19
124 | : Fall break (no class)
125 |   : 
126 | : Readings: Enjoy Fall Break 
127 | 
128 | Tuesday, Oct 24
129 | : Anonymous Feedback for the Class. LLM for Search Engines Continued **HW2 DUE**{: .label .label-red } 
130 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W8L1%20LLM%20for%20Search%202.pdf)
131 | : Readings: [Scaling Laws](https://arxiv.org/abs/2001.08361), [Chinchilla](https://arxiv.org/abs/2203.15556)
132 | 
133 | Thursday, Oct 26
134 | : Paradigmns of Visual Representation Learning (Guest Lecture from [Xinlei Chen](https://xinleic.xyz/)) 
135 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/xinlei.pdf)
136 | : Readings: [SimCLR](https://arxiv.org/abs/2002.05709), [MAE](https://arxiv.org/abs/2111.06377)
137 | 
138 | Tuesday, Oct 31
139 | : Midpoint Project Presentations **PROJECT MIDPOINT DUE**{: .label .label-blue }  
140 |   : [Project Midpoint Slides](https://www.andrew.cmu.edu/course/11-667/lectures/midpoint_presenation_slides.pdf)
141 | 
142 | Thursday, Nov 2
143 | : Scaling Up: Scaling Laws
144 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W9L2%20Scaling%20Up%20Scaling%20Laws.pdf)
145 | : Readings: [Adam](https://arxiv.org/abs/1412.6980), [8-bit Adam](https://arxiv.org/abs/2110.02861)
146 | 
147 | Tuesday, Nov 7
148 | : Democracy day (no class)
149 |   : 
150 |   <!-- : [Slides](#) -->
151 | : Readings: None
152 | 
153 | Thursday, Nov 9
154 | : **Industry Talk: Anthropic**; Scaling Up: Optimizer Basis
155 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W10L2%20Scaling%20Up%20Optimizer%20Basis.pdf)
156 | : Readings: [GPipe](https://arxiv.org/abs/1811.06965), [ZeRO](https://arxiv.org/abs/1910.02054)
157 | 
158 | Tuesday, Nov 14
159 | : **HW3 OUT**{: .label .label-yellow } **Industry Talk: LLaMA2**; Scaling Up: Parallel Optimization
160 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W11L1%20Scaling%20Up%20Parallel%20Training.pdf)
161 | : Readings: [ZeRO Offload](https://arxiv.org/pdf/2101.06840.pdf), [LLaMA2](https://arxiv.org/abs/2307.09288)
162 | 
163 | Thursday, Nov 16
164 | : Retrieval Augmentation
165 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W11L2%20Efficient%20Scaling%20Retrieval%20Augmentation.pdf)
166 | : Readings: [RAG](https://arxiv.org/abs/2005.11401), [REALM](https://arxiv.org/abs/2002.08909)
167 | 
168 | Tuesday, Nov 21
169 | : Efficient Serving (Guest Lecture from [Beidi Chen](https://www.andrew.cmu.edu/user/beidic/))
170 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/guest_lecture_beidi.pdf)
171 | : Readings: [H2O](https://arxiv.org/abs/2306.14048), [StreamLB](https://arxiv.org/abs/2309.17453)
172 | 
173 | Thursday, Nov 23
174 | : Happy Thanksgiving (no class)
175 |   : 
176 |   <!-- : [Slides](#) -->
177 | : Readings: [Enjoy break](#)
178 | 
179 | Tuesday, Nov 28
180 | : LLMs for Code and Tool Use (Guest Lecture from [Daniel Fried](https://dpfried.github.io/))
181 |   <!-- : [Slides](#) -->
182 | : Readings: [TODO](#)
183 | 
184 | Thursday, Nov 30
185 | : **HW3 DUE**{: .label .label-yellow }; **Industry Talk: Cohere**; Legal Considerations (Guest Lecture from [A Feder Cooper](https://afedercooper.info/))
186 |   <!-- : [Slides](#) -->
187 | : Readings: [AI Supply Chain](https://arxiv.org/abs/2309.08133)
188 | 
189 | Tuesday, Dec 5
190 | : Ethical Considerations and Model Bias (Guest Lecture from [Maarten Sap](https://maartensap.com/))
191 |   <!-- : [Slides](#) -->
192 | : Readings: [TODO](#)
193 | 
194 | Thursday, Dec 7
195 | : Final Project Presentations
196 |   <!-- : [Slides](#) -->
197 | : Readings: [TODO](#)
198 | 


--------------------------------------------------------------------------------
/_modules/calendar_revision.md:
--------------------------------------------------------------------------------
  1 | 
  2 | Tuesday, Aug 29
  3 | : Building Blocks of Modern LLMs
  4 |   : [Slides](https://www.andrew.cmu.edu/course/11-667/lectures/W1L1_LM_fundamentals_and_transformer_architecture.pdf)
  5 | : Readings: [Jurasky and Martin, Chapter 10](https://web.stanford.edu/~jurafsky/slp3/10.pdf), [Illustrated Transformer](http://jalammar.github.io/illustrated-transformer/)
  6 | 
  7 | Thursday, Aug 31
  8 | : Transformer Architecture, LLM Pretraining Methods
  9 |   : [Slides 1](https://www.andrew.cmu.edu/course/11-667/lectures/W1L2_transformer_continued.pdf)
 10 |   [Slides 2](https://www.andrew.cmu.edu/course/11-667/lectures/W1L2_Pretraining_Task.pdf)
 11 | : Readings: [Dai and Le](https://arxiv.org/abs/1511.01432), [Radford et al.](https://www.mikecaptain.com/resources/pdf/GPT-1.pdf), [Lewis et al.](https://arxiv.org/abs/1910.13461) 
 12 | 
 13 | Tuesday, Sep 5
 14 | : LLM Pretraining Data **HW1 OUT**{: .label .label-purple } 
 15 |   <!-- : [Slides](#) -->
 16 | : Readings: [ClueWeb22](https://arxiv.org/pdf/2211.15848.pdf), [Documenting C4](https://arxiv.org/pdf/2104.08758.pdf)
 17 | 
 18 | Thursday, Sep 7
 19 | : In-Context Learning, Prompt Engineering, and Alignment
 20 |   <!-- : [Slides](#) -->
 21 | : Readings: [GPT-2 paper](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), [Prompt Engineering](https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/)
 22 | 
 23 | Tuesday, Sep 12
 24 | : **PROJECT OUT**{: .label .label-blue } Interpretability
 25 |   <!-- : [Slides](#) -->
 26 | : Readings: [Edge Prbing](https://arxiv.org/abs/1905.06316), [Loss Landscape](https://arxiv.org/abs/1712.09913)
 27 | 
 28 | Thursday, Sep 14
 29 | : Generation-based Automatic Evaluation Methods
 30 |   <!-- : [Slides](#) -->
 31 | : Readings: [TODO](#)
 32 | 
 33 | Tuesday, Sep 19
 34 | :  Representation-based Automatic Evaluation Methods
 35 |   <!-- : [Slides](#) -->
 36 | : Readings: [TODO](#)
 37 | 
 38 | Thursday, Sep 21
 39 | : **HW1 DUE**{: .label .label-purple } **HW2 OUT**{: .label .label-red } Human Evaluation Methods
 40 |   <!-- : [Slides](#) -->
 41 | : Readings: [TODO](#)
 42 | 
 43 | Tuesday, Sep 26
 44 | : Generation before LLMs; Parameter-Efficient Tuning Methods **PROJECT PROPOSALS DUE**{: .label .label-blue }
 45 |   <!-- : [Slides](#) -->
 46 | : Readings: [TODO](#)
 47 | 
 48 | Thursday, Sep 28
 49 | : In-class Project Proposals 
 50 |   <!-- : [Slides](#) -->
 51 | : Readings: [TODO](#)
 52 | 
 53 | Tuesday, Oct 3
 54 | : **Industry Talk: Together.ai**; Chatbots, AIGC, Writing Assistance
 55 |   <!-- : [Slides](#) -->
 56 | : Readings: [TODO](#)
 57 | 
 58 | Thursday, Oct 5
 59 | : LLM for Search Engines
 60 |   <!-- : [Slides](#) -->
 61 | : Readings: [TODO](#)
 62 | 
 63 | Tuesday, Oct 10
 64 | : MIDTERM
 65 |   <!-- : [Slides](#) -->
 66 | : Readings: [TODO](#)
 67 | 
 68 | Thursday, Oct 12
 69 | : LLMs Beyond Text: Music (Guest Lecture) **HW2 DUE**{: .label .label-red } 
 70 |   <!-- : [Slides](#) -->
 71 | : Readings: [TODO](#)
 72 | 
 73 | Tuesday, Oct 17
 74 | : Fall break (no class)
 75 |   <!-- : [Slides](#) -->
 76 | : Readings: [TODO](#)
 77 | 
 78 | Thursday, Oct 19
 79 | : Fall break (no class)
 80 |   <!-- : [Slides](#) -->
 81 | : Readings: [TODO](#)
 82 | 
 83 | Tuesday, Oct 24
 84 | : Scaling Up: Scaling Laws
 85 |   <!-- : [Slides](#) -->
 86 | : Readings: [TODO](#)
 87 | 
 88 | Thursday, Oct 26
 89 | : Multi-Modal Models and Real-World Interactions (Guest Lecture) **PROJECT MIDPOINT DUE**{: .label .label-blue }  
 90 |   <!-- : [Slides](#) -->
 91 | : Readings: [TODO](#)
 92 | 
 93 | Tuesday, Oct 31
 94 | : Midpoint Project Presentations
 95 |   <!-- : [Slides](#) -->
 96 | : Readings: [TODO](#)
 97 | 
 98 | Thursday, Nov 2
 99 | : Scaling Up: Optimization Basics
100 |   <!-- : [Slides](#) -->
101 | : Readings: [TODO](#)
102 | 
103 | Tuesday, Nov 7
104 | : Democracy day (no class)
105 |   <!-- : [Slides](#) -->
106 | : Readings: [TODO](#)
107 | 
108 | Thursday, Nov 9
109 | : **Industry Talk: Anthropic**; Scaling Up: Parallel Training Methods
110 |   <!-- : [Slides](#) -->
111 | : Readings: [TODO](#)
112 | 
113 | Tuesday, Nov 14
114 | : Efficient Scaling: Training and Model Augmentation
115 |   <!-- : [Slides](#) -->
116 | : Readings: [TODO](#)
117 | 
118 | Thursday, Nov 16
119 | : **HW3 OUT**{: .label .label-yellow } **Industry Talk: Cohere**; Efficient Scaling: Architecture, MoE, Sparity, Modularity
120 |   <!-- : [Slides](#) -->
121 | : Readings: [TODO](#)
122 | 
123 | Tuesday, Nov 21
124 | : Efficient Serving (Guest Lecture)
125 |   <!-- : [Slides](#) -->
126 | : Readings: [TODO](#)
127 | 
128 | Thursday, Nov 23
129 | : Happy Thanksgiving (no class)
130 |   <!-- : [Slides](#) -->
131 | : Readings: [TODO](#)
132 | 
133 | Tuesday, Nov 28
134 | : LLMs for Code and Tool Use (Guest Lecture)
135 |   <!-- : [Slides](#) -->
136 | : Readings: [TODO](#)
137 | 
138 | Thursday, Nov 30
139 | : **HW3 DUE**{: .label .label-yellow } Privacy, Security, and Legal Considerations
140 |   <!-- : [Slides](#) -->
141 | : Readings: [TODO](#)
142 | 
143 | Tuesday, Dec 5
144 | : Ethical Considerations and Model Bias (Guest Lecture)
145 |   <!-- : [Slides](#) -->
146 | : Readings: [TODO](#)
147 | 
148 | Thursday, Dec 7
149 | : Final Project Presentations
150 |   <!-- : [Slides](#) -->
151 | : Readings: [TODO](#)
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/_sass/custom/announcement.scss:
--------------------------------------------------------------------------------
 1 | .announcement {
 2 |   @extend %card;
 3 | 
 4 |   h1, h2 {
 5 |     @extend .text-gamma;
 6 |   }
 7 | 
 8 |   .announcement-meta {
 9 |     @extend .text-epsilon;
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/_sass/custom/card.scss:
--------------------------------------------------------------------------------
 1 | @mixin abstract-card() {
 2 |   box-shadow: 0 1px 3px rgba(0, 0, 0, 0.07), 0 4px 14px rgba(0, 0, 0, 0.05);
 3 |   margin: $sp-4 (-$gutter-spacing-sm);
 4 | 
 5 |   @include mq(md) {
 6 |     border-radius: $border-radius;
 7 |     margin: $sp-4 0;
 8 |   }
 9 | }
10 | 
11 | %card {
12 |   @include abstract-card();
13 |   display: flex;
14 |   flex-direction: column;
15 |   min-width: 0;
16 |   padding: 0 $sp-4;
17 |   position: relative;
18 |   word-wrap: break-word;
19 | 
20 |   >:first-child {
21 |     border-top: none !important;
22 |   }
23 | 
24 |   >:last-child {
25 |     border-bottom: none !important;
26 |   }
27 | 
28 |   .label {
29 |     border-radius: $border-radius;
30 |     margin-left: 0;
31 |     user-select: none;
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/_sass/custom/custom.scss:
--------------------------------------------------------------------------------
 1 | // Just the Class dependencies
 2 | @import 'card';
 3 | 
 4 | // Just the Class styles
 5 | @import 'announcement';
 6 | @import 'module';
 7 | @import 'schedule';
 8 | @import 'staffer';
 9 | 
10 | // Overrides
11 | code {
12 |   font-size: 14px;
13 |   padding: 0.2em 0.4em;
14 |   border: none;
15 | }
16 | 
17 | iframe {
18 |   max-width: 100%;
19 | }
20 | 
21 | details {
22 |   @extend .mb-4;
23 | }
24 | 
25 | summary {
26 |   @extend .btn, .btn-outline;
27 | 
28 |   width: 100%;
29 | }
30 | 
31 | .main-content-wrap {
32 |   max-width: $content-width;
33 |   margin: auto;
34 | }
35 | 
36 | .main-content {
37 |   dl {
38 |     display: block;
39 |     grid-template-columns: none;
40 |   }
41 | 
42 |   dt {
43 |     font-weight: 700;
44 |     text-align: start;
45 | 
46 |     &::after {
47 |       content: normal;
48 |     }
49 |   }
50 | 
51 |   dd {
52 |     font-weight: normal;
53 | 
54 |     + dt {
55 |       margin-top: 1em;
56 |     }
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/_sass/custom/module.scss:
--------------------------------------------------------------------------------
 1 | .main-content .module,
 2 | .module {
 3 |   @extend %card;
 4 | 
 5 |   h1,
 6 |   h2,
 7 |   h3,
 8 |   h4,
 9 |   h5,
10 |   h6 {
11 |     &:first-child {
12 |       margin-top: $sp-4;
13 |     }
14 |   }
15 | 
16 |   >dl {
17 |     border-bottom: $border $border-color;
18 |     border-top: $border $border-color;
19 |     display: grid;
20 |     grid-template-columns: max-content 1fr;
21 |     margin: $sp-2 (-$sp-4);
22 | 
23 |     &:first-child {
24 |       margin-top: 0;
25 |     }
26 | 
27 |     &:last-child {
28 |       margin-bottom: 0;
29 |     }
30 | 
31 |     @include mq(lg) {
32 |       grid-template-columns: 1fr 7fr;
33 |     }
34 | 
35 |     %module-item {
36 |       margin: 0;
37 |       padding: $sp-2;
38 | 
39 |       @include mq(sm) {
40 |         padding: $sp-2 $sp-4;
41 |       }
42 |     }
43 | 
44 |     >dt {
45 |       @extend %module-item;
46 |       border-top: $border $border-color;
47 |       font-weight: normal;
48 |       text-align: right;
49 | 
50 |       +dd {
51 |         border-top: $border $border-color;
52 |       }
53 | 
54 |       &:first-child {
55 |         border-top: none;
56 | 
57 |         +dd {
58 |           border-top: none;
59 |         }
60 |       }
61 | 
62 |       &::after {
63 |         content: ":";
64 |       }
65 |     }
66 | 
67 |     >dd {
68 |       @extend %module-item;
69 | 
70 |       +dd {
71 |         padding-top: 0;
72 |       }
73 | 
74 |       ol, ul, dl {
75 |         margin: 0;
76 |       }
77 | 
78 |       dl {
79 |         display: flex;
80 |         flex-direction: column;
81 | 
82 |         @include mq(sm) {
83 |           flex-direction: row;
84 |         }
85 | 
86 |         dt {
87 |           flex: 0 0 62.5%;
88 |           margin: 0;
89 |         }
90 | 
91 |         dd {
92 |           margin: 0;
93 |         }
94 |       }
95 |     }
96 |   }
97 | }
98 | 


--------------------------------------------------------------------------------
/_sass/custom/schedule.scss:
--------------------------------------------------------------------------------
  1 | .schedule {
  2 |   @include abstract-card();
  3 |   overflow-x: scroll;
  4 |   position: relative;
  5 | 
  6 |   li::before {
  7 |     display: none;
  8 |   }
  9 | 
 10 |   ul.schedule-timeline,
 11 |   ul.schedule-group,
 12 |   ul.schedule-events {
 13 |     margin-top: 0;
 14 |     padding-left: 0;
 15 |   }
 16 | 
 17 |   ul.schedule-timeline {
 18 |     margin: 40px auto 0;
 19 |     position: absolute;
 20 |     width: 100%;
 21 |   }
 22 | 
 23 |   .schedule-time {
 24 |     @extend .fs-2;
 25 |     color: $grey-dk-000;
 26 |     height: 40px;
 27 |     margin: 0;
 28 |     padding: $sp-2;
 29 |     position: relative;
 30 | 
 31 |     &::after {
 32 |       background-color: $border-color;
 33 |       content: '';
 34 |       height: 1px;
 35 |       left: 0;
 36 |       position: absolute;
 37 |       top: 0;
 38 |       width: 100%;
 39 |     }
 40 |   }
 41 | 
 42 |   .schedule-group {
 43 |     display: flex;
 44 |     margin-bottom: 0;
 45 |     position: relative;
 46 |   }
 47 | 
 48 |   .schedule-day {
 49 |     border-left: $border $border-color;
 50 |     flex: 1 0 0;
 51 |     margin: 0;
 52 |     min-width: 120px;
 53 | 
 54 |     &:first-of-type {
 55 |       border-left: 0;
 56 |     }
 57 |   }
 58 | 
 59 |   h2.schedule-header {
 60 |     align-items: center;
 61 |     display: flex;
 62 |     font-size: 18px !important;
 63 |     height: 40px;
 64 |     justify-content: center;
 65 |     margin: 0;
 66 |   }
 67 | 
 68 |   .schedule-events {
 69 |     display: flex;
 70 |     padding: 0;
 71 |     position: relative;
 72 |   }
 73 | 
 74 |   .schedule-event {
 75 |     background-color: $grey-dk-000;
 76 |     border-radius: $border-radius;
 77 |     box-shadow: 0 10px 20px rgba(0, 0, 0, .1), inset 0 -3px 0 rgba(0, 0, 0, .2);
 78 |     color: $white;
 79 |     float: left;
 80 |     height: 100%;
 81 |     margin: 0;
 82 |     padding: $sp-1 $sp-2;
 83 |     position: absolute;
 84 |     width: 100%;
 85 | 
 86 |     .name {
 87 |       @extend .fs-3, .fw-700;
 88 |     }
 89 | 
 90 |     .time,
 91 |     .location {
 92 |       @extend .fs-2;
 93 |     }
 94 | 
 95 |     &.lecture {
 96 |       background-color: $grey-dk-000;
 97 |     }
 98 | 
 99 |     &.office-hours-instructor {
100 |       background-color: $purple-000;
101 |     }
102 | 
103 |     &.office-hours-ta {
104 |       background-color: $blue-000;
105 |     }
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/_sass/custom/staffer.scss:
--------------------------------------------------------------------------------
 1 | .staffer {
 2 |   display: flex;
 3 |   margin: $sp-4 0;
 4 | 
 5 |   .staffer-image {
 6 |     border-radius: 50%;
 7 |     height: 100px;
 8 |     margin-right: $sp-4;
 9 |     width: auto;
10 |   }
11 | 
12 |   p,
13 |   .staffer-name {
14 |     margin: $sp-1 !important;
15 |   }
16 | 
17 |   .staffer-pronouns {
18 |     @extend .label, .text-grey-dk-100, .bg-grey-lt-200;
19 | 
20 |     user-select: none;
21 |   }
22 | 
23 |   .staffer-meta {
24 |     @extend .text-grey-dk-000;
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/_schedules/weekly.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | timeline:
 3 |   - '1:00 PM'
 4 |   - '1:30 PM'
 5 |   - '2:00 PM'
 6 |   - '2:30 PM'
 7 |   - '3:00 PM'
 8 |   - '3:30 PM'
 9 |   - '4:00 PM'
10 |   - '4:30 PM'
11 |   - '5:00 PM'
12 |   - '5:30 PM'
13 |   - '6:00 PM'
14 | schedule:
15 |   - name: Monday
16 |     events:
17 |       - name: Office Hours (TAs)
18 |         class: office-hours-ta
19 |         start: 5:00 PM
20 |         end: 6:00 PM
21 |         location: GHC 5417
22 |   - name: Tuesday
23 |     events:
24 |       - name: Lecture
25 |         class: lecture
26 |         start: 2:00 PM
27 |         end: 3:20 PM
28 |         location: Baker Hall A51
29 |       - name: Office Hours (Daphne)
30 |         class: office-hours-instructor
31 |         start: 3:30 PM
32 |         end: 4:30 PM
33 |         location: GHC 6407
34 |       - name: Office Hours (TAs)
35 |         class: office-hours-ta
36 |         start: 5:00 PM
37 |         end: 6:00 PM
38 |         location: GHC 5208
39 |   - name: Wednesday
40 |   - name: Thursday
41 |     events:
42 |       - name: Lecture
43 |         class: lecture
44 |         start: 2:00 PM
45 |         end: 3:20 PM
46 |         location: Baker Hall A51
47 |       - name: Office Hours (Chenyan)
48 |         class: office-hours-instructor
49 |         start: 3:30 PM
50 |         end: 4:30 PM
51 |         location: GHC 6409
52 | ---
53 | 


--------------------------------------------------------------------------------
/_staffers/aidan.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Aidan Yang
 3 | role: Teaching Assistant
 4 | website: https://aidanby.github.io/
 5 | photo: aidan.jpg
 6 | office-hours: TODO
 7 | ---
 8 | 
 9 | <!-- [Schedule an appointment](#){: .btn .btn-outline } -->
10 | 


--------------------------------------------------------------------------------
/_staffers/amanda.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Amanda Bertsch
3 | role: Teaching Assistant
4 | website: https://cs.cmu.edu/~abertsch/
5 | photo: abertsch.jpg
6 | ---
7 | 


--------------------------------------------------------------------------------
/_staffers/chenyan.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Chenyan Xiong
3 | role: Instructor
4 | website: http://www.cs.cmu.edu/~cx/
5 | photo: chenyan.jpg
6 | office-hours: Thursdays 3:30-4:30 GHC 6409
7 | ---
8 | 


--------------------------------------------------------------------------------
/_staffers/clement.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Clement Fung
 3 | role: Teaching Assistant
 4 | website: http://clementfung.me
 5 | photo: clement.jpg
 6 | office-hours: TODO
 7 | ---
 8 | 
 9 | <!-- [Schedule an appointment](#){: .btn .btn-outline } -->
10 | 


--------------------------------------------------------------------------------
/_staffers/daphne.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Daphne Ippolito
 3 | role: Instructor
 4 | website: http://daphnei.com
 5 | photo: daphne.png
 6 | office-hours: Tuesdays 3:30-4:30 PM, GHC 6407
 7 | ---
 8 | 
 9 | <!-- [Schedule an appointment](#){: .btn .btn-outline } -->
10 | 


--------------------------------------------------------------------------------
/_staffers/emmy.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Emmy Liu
 3 | role: Teaching Assistant
 4 | website: http://nightingal3.github.io
 5 | photo: emmy.png
 6 | office-hours: TODO
 7 | ---
 8 | 
 9 | <!-- [Schedule an appointment](#){: .btn .btn-outline } -->
10 | 


--------------------------------------------------------------------------------
/_staffers/hari.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Hari Manikandan
3 | role: Teaching Assistant
4 | website: http://hariharan98m.github.io
5 | photo: hari.png
6 | office-hours: TODO
7 | ---


--------------------------------------------------------------------------------
/_staffers/mrigank.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Mrigank Raman
3 | role: Teaching Assistant
4 | website: http://mrigankraman.github.io
5 | photo: mrigank.jpeg
6 | ---
7 | 
8 | <!-- [Schedule an appointment](#){: .btn .btn-outline } -->
9 | 


--------------------------------------------------------------------------------
/_staffers/roochi.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Roochi Shah
3 | role: Teaching Assistant
4 | photo: Roochi.png
5 | ---
6 | 


--------------------------------------------------------------------------------
/_staffers/srinivas.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Srinivas Gowriraj
3 | role: Teaching Assistant
4 | website: https://srinivas-gowriraj.github.io/
5 | photo: srinivas.jpg
6 | ---


--------------------------------------------------------------------------------
/_staffers/yiming.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Yiming Zhang
3 | role: Teaching Assistant
4 | website: https://y0mingzhang.github.io/
5 | photo: yiming.jpg
6 | ---
7 | 


--------------------------------------------------------------------------------
/about.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: home
  3 | nav_exclude: true
  4 | permalink: /:path/
  5 | title: Syllabus
  6 | seo:
  7 |   type: Course
  8 |   name: Just the Class
  9 | description: >-
 10 |     Course policies and information.
 11 | ---
 12 | 
 13 | #  Large Language Models Methods and Applications
 14 | 
 15 | {:.no_toc}
 16 | 
 17 | ## Table of contents
 18 | {: .no_toc .text-delta }
 19 | 
 20 | 1. TOC
 21 | {:toc}
 22 | 
 23 | ---
 24 | 
 25 | 
 26 | ## Important Details
 27 | 
 28 | * **Location**: [Baker Hall](https://www.cmu.edu/finance/property-space/floorplan-room/acad-admin/BPH/index.html) A51
 29 | * **Time**: Tuesdays and Thursdays 2 PM - 3:20 PM
 30 | * **Instructor email**: llms-11-667 @ andrew.cmu.edu
 31 | 
 32 | ## Course Description
 33 | 
 34 | Large Language Models Methods and Applications (11-667) is a graduate-level course that aims to provide a holistic view of the current state of large language models.
 35 | The first half of this course starts with the basic of language models, including network architectures, training, inference, and evaluation.
 36 | Then it discusses the interpretation (or attempts of), alignments, and emergent capabilities of large language models, followed by its popular applications in language tasks and new utilizations beyond texts.
 37 | In the second half, this course first presents the techniques of scaling up language model pretraining and recent approaches in making the pretraining of large models and their deployment more efficient.
 38 | It then discusses various concerns surrounding the deployment of large language models and wraps up with the challenges and frontiers of LLM developments.
 39 | 
 40 | This course is designed to give graduate-level students an overview of the techniques behind LLMs and a thorough grounding on the fundamentals and cutting-edge developments of LLMs, to prepare them for further research or applied endeavors in this new AI era.
 41 | 
 42 | 
 43 | ## Learning Goals
 44 | 
 45 | Students who successfully complete this course will be able to:
 46 | 
 47 | * Compare and contrast different models in the LLM ecosystem in order to determine the best model for any given task.
 48 | * Implement and train a neural language model from scratch in Pytorch.
 49 | * Utilize open-source libraries to finetune and do inference with popular pre-trained language models.
 50 | * Understand how to apply LLMs to a variety of downstream applications, and how decisions made during pre-training affect suitability for these tasks.
 51 | * Read and comprehend recent, academic papers on LLMs and have knowledge of the common terms used in them (alignment, scaling laws, RLHF, prompt engineering, instruction tuning, etc.).
 52 | * Design new methodologies to leverage existing large scale language models in novel ways.
 53 | 
 54 | 
 55 | ## Prerequisites
 56 | 
 57 | Students should have a basic understanding of machine learning, equivalent to the material covered by [10-301/10-601](http://www.cs.cmu.edu/~mgormley/courses/10601/schedule.html), and be familiar with concepts in natural language processing, equivalent to those covered by [11-411/11-611](http://demo.clab.cs.cmu.edu/NLP/).
 58 | 
 59 | Students are expected to be fluent in Python. Familiarity with deep learning frameworks such as PyTorch will also be helpful.
 60 | 
 61 | 
 62 | ## Class Format
 63 | 
 64 | Classes will be in person, every Tuesday and Thursday 2:00PM-3:20PM at Baker Hall A51. 
 65 | 
 66 | **Readings**: There will be reading materials for each lecture, which students are required to read through  _before_ the class.
 67 | 
 68 | **Quizes**: Each class will start with an in-person quiz about the reading materials for the lecture or the material from previous lectures.
 69 | 
 70 | **Interactive Activities**:
 71 | There will be ungraded, interactive activities interspersed through the lectures.
 72 | These will be things like discussing a topic from the class with those sitting near you or answering questions via polling software.
 73 | 
 74 | **Homework**: There will be three homework assignments, to be completed individually.
 75 | 
 76 | **Project**: There will be a group project with several checkpoints along the semester. The project will be completed in groups of 3-5 people. 
 77 | 
 78 | **Exams:** There will be a midterm exam on October 10. There will not be a final exam.
 79 | 
 80 | 
 81 | ## Grading
 82 | 
 83 | * 30%: Homeworks
 84 |     * Each homework is worth 10% of your grade.
 85 | * 45%: Course Project
 86 |     * Proposal: 10%
 87 |     * Midpoint report: 10%
 88 |     * Final report and presentation: 20%
 89 |     * Peer feedback: 5%
 90 | * 5%: In-Class Quizzes
 91 |     * These will be given in the last 10 minutes of class.
 92 |     * Only your top 20 quiz grades will be considered. (This means you can miss 5 quizzes and still get full marks.)
 93 | * 20%: Midterm
 94 | 
 95 | ## Late Policy
 96 | 
 97 | Each student has five free late dats to use on the three homeworks.
 98 | If you are out of late days, then you will not be able to get credit for subsequent late homeworks. 
 99 | One “day” is defined as anytime between 1 second and 24 hours after the homework deadline.
100 | The intent of the late day policy it to allow you to take extra time due to unforseen circumstances like illnesses.
101 | To use your late days on a homework, you **MUST** fill out [this form](https://forms.gle/pScBLUTirr3K7vJc7).
102 | 
103 | 
104 | In the event of a medical emergency, please make your personal health, physical and mental, your first priority.
105 | Seek help from medical and care providers such as University Health Services.
106 | Students can request medical extensions afterwards with proof/note from providers.
107 | These will not count toward your 5 days.
108 | For other emmergencies and absences, students can request extensions with corresponding documentation in a case-by-case basis with instructors.
109 | 
110 | There will be no makeups for the quizzes.
111 | There will be no late days on the project milestones.
112 | If your team cannot complete a project milestone, we expect you to submit to us your incomplete work by the due date, and we will consider accepting subsequent revisions on a case-by-base basis.
113 | 
114 | ## Accomodations
115 | If you have a disability and require
116 | accommodations, please contact Catherine Getchell, Director of Disability Resources, 412-268-6121,
117 | getchell@cmu.edu.
118 | If you have an accommodations letter from the Disability Resources office, we encourage you to discuss your accommodations and needs with us as early in the semester as possible.
119 | We will work with you to ensure that accommodations are provided as appropriate.
120 | 
121 | ## Policy on Missing Class
122 | 
123 | There will be no makeups for the quizzes if you miss class.
124 | The graded quizes are intended to encourage class attendance and participation, as there will be a variety of ungraded interactive activites in class which are important to your learning experience.
125 | In keeping with the principles of universal design for learning, we intend to be flexible with the quizzes; you can miss up to 5 quizzess for any reason (religious observances, visa issues, illness, etc.) and still get full marks, as we will only grade your top 20.
126 | 
127 | In extenuating circumstances where students have no option but to miss class (such as visa issues or a medical emergency), we may provide video recordings to individual students.
128 | If you need to request this, please email us llms-11-667 @ andrew.cmu.edu.
129 | We make no guarantees about the production quality or learning experience from these videos.
130 | 
131 | ## Academic Integrity
132 | Please take some time to read through CMU's [Academic Integrity Policy](https://www.cmu.edu/policies/student-and-student-life/academic-integrity.html).
133 | Students who violate this policy will be subject to the disciplinary actions described in the [Student Handbook](https://www.cmu.edu/student-affairs/theword/academic-discipline/index.html).
134 | 
135 | ### Collaboration on Homeworks
136 | The three homeworks should be completed individually.
137 | However, we encourage you to ask questions on Piazza and in office hours.
138 | While you may discuss strategies amongst yourselves, all experiments and analyses should be your own.
139 | 
140 | ### Use of Language Models
141 | Using a language model to generate any part of a homework answer without attribution will be considered a violation of academic integrity.
142 | This means that if you use ChatGPT or CoPilot to assist you on a homework, you **must** state so explicitly within your response.
143 | On each homework, you will be asked to attest to whether you used AI systems to assist on the homework, and if so, in what manner.
144 | If you have used AI systems to generate any part of your homework, you must submit the prompts/instructions/inputs you used to obtain the generated output.
145 | Your grading will be based on both the correctness of your homework response and the quality of your prompts/instructions.
146 | Errors in the generated outputs that appear in your homework response , and non-interesting prompts, e.g., merely putting in the homework questions to the language model, are not intellectual efforts and are unlikely to receive a good grade.
147 | 
148 | 


--------------------------------------------------------------------------------
/announcements.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Announcements
 4 | nav_exclude: true
 5 | description: A feed containing all of the class announcements.
 6 | ---
 7 | 
 8 | # Announcements
 9 | 
10 | Announcements are stored in the `_announcements` directory and rendered according to the layout file, `_layouts/announcement.html`.
11 | 
12 | {% assign announcements = site.announcements | reverse %}
13 | {% for announcement in announcements %}
14 | {{ announcement }}
15 | {% endfor %}
16 | 


--------------------------------------------------------------------------------
/assets/images/Roochi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/Roochi.png


--------------------------------------------------------------------------------
/assets/images/abertsch.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/abertsch.jpg


--------------------------------------------------------------------------------
/assets/images/aidan.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/aidan.jpg


--------------------------------------------------------------------------------
/assets/images/chenyan.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/chenyan.jpg


--------------------------------------------------------------------------------
/assets/images/clement.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/clement.jpg


--------------------------------------------------------------------------------
/assets/images/daphne.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/daphne.png


--------------------------------------------------------------------------------
/assets/images/emmy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/emmy.png


--------------------------------------------------------------------------------
/assets/images/hari.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/hari.png


--------------------------------------------------------------------------------
/assets/images/mrigank.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/mrigank.jpeg


--------------------------------------------------------------------------------
/assets/images/placeholder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/placeholder.png


--------------------------------------------------------------------------------
/assets/images/srinivas.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/srinivas.jpg


--------------------------------------------------------------------------------
/assets/images/yiming.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/assets/images/yiming.jpg


--------------------------------------------------------------------------------
/calendar.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Lectures
 4 | description: Listing of course modules and topics.
 5 | ---
 6 | 
 7 | # Calendar
 8 | 
 9 | {% for module in site.modules %}
10 | {{ module }}
11 | {% endfor %}
12 | 


--------------------------------------------------------------------------------
/homework1.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: page
  3 | title: Homework 1
  4 | description: >-
  5 |     Instructions for Homework 1
  6 | ---
  7 | 
  8 | # Homework 1
  9 | {:.no_toc}
 10 | 
 11 | ## Table of contents
 12 | {: .no_toc .text-delta }
 13 | 
 14 | 1. TOC
 15 | {:toc}
 16 | 
 17 | ---
 18 | 
 19 | **This homework is due on Tuesday, September 26 at 2 PM.**
 20 | 
 21 | [Submission Tex Template](https://github.com/daphnei/cmu-llm-class/blob/main/homework_materials/hw1_template.tex)
 22 | You are not required to follow this template for submission, but it is here for your convenience.
 23 | 
 24 | _(instructions last updated September 10)_
 25 | 
 26 | # September 10 Update
 27 | The starter code has been updated to fix a bug in Question 3.1 as noted in the Piazza.
 28 | 
 29 | Anywhere in the homework which instructs you to use ``davinci``, you may use `davinci-002` instead, which costs 1/10 as much.
 30 | You can see the pricing of the different models at https://openai.com/pricing.
 31 | Just make a note in your homework of which model you are using.
 32 | 
 33 | 
 34 | # Intro
 35 | 
 36 | In this homework, you will explore several ways OpenAI's GPT-3 models can be used to perform NLP and NLG (natural language generation) tasks.
 37 | You will also build an understanding of some of the limitations of large language models.
 38 | 
 39 | You do not turn in any code for this assignment.
 40 | Instead you will write a report discussing your observations for each question.
 41 | Your report should have clear section headers for each question and subquestion.
 42 | When appropriate, you should use figures and tables to support your answers.
 43 | You should submit your report as a PDF using the Canvas homework submission form.
 44 | 
 45 | ## Getting Started with OpenAI's API
 46 | 
 47 | We have provided you an [IPython notebook](https://github.com/daphnei/cmu-llm-class/blob/main/homework_materials/hw1_starter_code.ipynb) with starter code which you may use to solve the problems in the homework.
 48 | The [IPython notebook](https://github.com/daphnei/cmu-llm-class/blob/main/homework_materials/hw1_starter_code.ipynb) contains a simple interface for interacting with OpenAI's models.
 49 | Unless otherwise specified, you should use the model `davinci` in your experimentation.
 50 | This is the last model OpenAI released which didn't have any finetuning for alignment.
 51 | OpenAI has three other smaller model sizes you will be asked to compare against; in order from smallest to biggest these are
 52 | ``ada``, ``babbage``, and ``curie``.
 53 | For example, to swap to ada, you would recreate the inference engine using ``engine = OpenAIEngine('ada')``.
 54 | For the final problem in the homework, you will be asked to compare `davinci` with `text-davinci-003`, a variant of GPT-3 which was finetuned for instruction following.
 55 | 
 56 | Note that we are choosing to use slightly older versions of GPT-3's models in this homework because there tends to be more information about how the older models were trained than there is for OpenAI's newer ones.
 57 | 
 58 | If you feel uncomfortable creating an account with OpenAI in order to complete the homework, please email the professors at llms-11-667 @ andrew.cmu.edu, and we will help come up with an alternative arrangement for model inference. This will not affect your homework grade.
 59 | 
 60 | ## Disclaimer
 61 | As you all know by know, 11-667 is a completely new course, and you are guinea-pigging a completely new homework!
 62 | We have done our best to make sure all homework problems are solveable and that the LLMs will behave in a relatively predictable way.
 63 | That being said, we are also giving you freedom to experiment and get creative with the LLM prompts you use to solve the homework problems.
 64 | If you are struggling to get the LLM to behave in a way that allows you to answer any of the homework problems, please post on Piazza.
 65 | Also, we would love any feedback on how to make this homework better for future students taking the class.
 66 | 
 67 | # 1. Observing the Impact of Decoding Strategy
 68 | 
 69 | ## 1.1 Rolling a Twenty-Sided Die (10 points)
 70 | 
 71 | In this question, you will investigate the impact of the choice of decoding strategy by examining prompts which ought to yield a relatively uniform distribution over a set of possible outcomes.
 72 | 
 73 | Consider the task of rolling a 20-sided die.
 74 | In the real world, you should expect that after rolling a fair 20-sided die 100 times, you will observe each possible outcome about 5 times.
 75 | Your goal for this homework question is to understand how the choice of decoding strategy can influence how far an LLM's generations deviate from real-world assumptions.
 76 | 
 77 | Take a look at Section 1 in the [IPython notebook](https://github.com/daphnei/cmu-llm-class/blob/main/homework_materials/hw1_starter_code.ipynb).
 78 | It contains a prompt which rolls a D20 (aka a twenty-sided die) using a model finetuned for dialog.
 79 | You may use the prompt provided there, or try to construct your own.
 80 | Run the language model with the D20-rolling prompt 128 times using full random sampling (`top_p = 1.0`).
 81 | This should give you a distribution over outcomes the LLM is capable of generating.
 82 | 
 83 | **Analysis Questions**
 84 | 1. When using full random sampling (`top_p=1.0`), is the LLM equally likely to generate all outcomes? If not, what is your hypothesis for why this could be the case?
 85 | 2. Repeat the experiment with at least three values of `top_p` and discuss how changing the `top_p` hyperparameter effects the distribution of outcomes that get generated.
 86 | 
 87 | You should use plots or other visualizations of the output distributions to support your answer.
 88 | You will be graded on the correctness and thoroughness of your responses.
 89 | 
 90 | 
 91 | ## 1.2 Longform Generation (5 points)
 92 | 
 93 |   
 94 | Using a prompt of your choice, instruct the language model to generate a 256-token story.
 95 | Repeat this process three times, employing three different `top_p` values: 0.0, 0.5, and 1. Feel free to get as creative as you want with your prompt.
 96 |   
 97 | Next, prompt the language model with the opening sentence from a renowned book or speech and request it to generate 256 tokens. You can select from [this list of famous opening paragraphs](https://www.shortlist.com/lists/literatures-greatest-opening-paragraphs) or [this list of famous speeches](https://www.historyplace.com/speeches) or find your own. Do this three times with three different `top_p` values of 0.0, 0.5, and 1.
 98 | 
 99 | You should now in total have 6 generations.
100 | 
101 | **Analysis Questions**
102 | 1. For two sets of generations, discuss the impact choice of decoding strategy has on diversity in the generated stories. Compute a lexical diversity metric such as type-token ratio (the total number of unique words divided by the total number of words) to support your answer.
103 | 2. Regarding your second prompt, did the language model generate the correct continuation of the book/speech? Provide reasoning as to why it may or may not have done so.
104 | 3. When `top_p`=0, does adjusting the `frequency_penalty` increase the lexical diversity of the stories? Explain why or why not this is the case.
105 | 
106 | # 2. Measuring Perplexity (10 points)
107 | Perplexity is a key metric to evaluate the quality of an LLM.
108 | Intuitively, to be "perplexed" means to be surprised.
109 | We use perplexity to measure how much the model is surprised by seeing new data.
110 | If an evaluation dataset has low perplexity, the model is confident  that this dataset is similar to things it has seen before.
111 | The higher the perplexity, the less confident the model is that the data is like what it has seen before.
112 | 
113 | In this question, you will evaluate perplexity of several variations of a poem of your choice.
114 | Go to [this list of famous poems](https://www.poetrysoup.com/famous/poems/top_100_famous_poems.aspx) and pick your favorite.
115 | Copy and paste this poem into the starter code in the [IPython notebook](https://github.com/daphnei/cmu-llm-class/blob/main/homework_materials/hw1_starter_code.ipynb) and compute the poem's perplexity.
116 | 
117 | **Analysis Questions**
118 | 1. Add a typo to each line of the poem. Does the perplexity go up or down? Give a reason for why this might have happened.
119 | 2. Shuffle the order of the stanzas in the poem. Does the perplexity go up or down? Give a reason for why this might have happened.
120 | 3. Taking inspiration from the concept of [mimic poems](https://penandthepad.com/directions-writing-mimic-poem-3668.html), swap several of the words in your poem to alternative words which still make sense in context. Does the perplexity go up or down?  ([Here](https://www.teenink.com/poetry/all/article/13900/Oh-Homework-Oh-Homework) is an example of a mimic poem of Walt Whitman's ["O Captain! My Captain!"](https://www.poetrysoup.com/famous/poem/o_captain!_my_captain!_198), though you don't need to do anything this complex or fancy.) Give a reason for why this might have happened.
121 | 3. Famous poems like the one you are using are very likely to be in GPT-3's training set. How might this affect the poem's perplexity compared to a very new poem 
122 | which is not yet in any training set? 
123 | 
124 | # 3. Experimenting with Few-Shot Prompting
125 | 
126 | Few-shot learning with language models, sometimes called in-context learning, involves "teaching" a language model how to do a task by providing an instruction along with several examples of the task as a textual prompt.
127 | For example, to get a model to translate the word "squirrel" into Chinese, you might pass the LLM the prompt:
128 | 
129 | ```
130 | Translate English to Chinese.
131 | 
132 | dog -> 狗
133 | apple -> 苹果
134 | coffee -> 咖啡
135 | supermarket -> 超市
136 | squirrel ->
137 | ```
138 | 
139 | In this section, you will use this technique for two tasks: (1) to evaluate the model's common-sense reason abilities and (2) to build a pun explainer.
140 | 
141 | ## 3.1 Few-Shot Learning for the Choice of Plausible Alternatives Task (10 points)
142 | Many probe tasks have been proposed to evaluate the commonsense reasoning capabilities of
143 | LLMs.
144 | We will use the [Choice of Plausible Alternatives](https://aclanthology.org/S12-1052/) (COPA) probe task from the [SuperGLUE](https://super.gluebenchmark.com/)
145 | suite of LLM benchmarks to evaluate the fewshot performance of the LLM.
146 | 
147 | Here is an example from COPA (the correct choice is (b)):
148 | ```
149 | Premise: The man broke his toe.
150 | Question: What was the CAUSE of this?
151 | (a) He got a hole in his sock.
152 | (b) He dropped a hammer on his foot.
153 | ```
154 | 
155 | The [IPython notebook](https://github.com/daphnei/cmu-llm-class/blob/main/homework_materials/hw1_starter_code.ipynb) loads in three small subsets of COPA:
156 | 1. `train`: You may use this as a source of examples for your few-shot prompts.
157 | 2. `dev`: You should use this to compare different prompts to come up with good ones.
158 | 3. `test`: Once you are happy with your prompts, you should evaluate on `test` just once. You should include the accuracies on `test` in your final report.
159 | 
160 | Take a look at the starter code in the [IPython notebook](https://github.com/daphnei/cmu-llm-class/blob/main/homework_materials/hw1_starter_code.ipynb).
161 | The prompt in the notebook provides an instruction, but no examples.
162 | This is known as a "zero-shot" setting since no examples are provided.
163 | 
164 | Since COPA is a classification task, where the goal is to classify which of the choices is more correct, we do not actually need to do any generation.
165 | Rather, during evaluation, we construct two strings, one with Option (a) and one with Option (b), and them form a prediction based on which one the model says is more likely.
166 | 
167 | If you run the starter code, you will see that this prompt achieves ~55% accuracy on the validation set.
168 | You will use this prompt as a baseline.
169 | 
170 | Your job is to develop few-shot prompt which contain both an instruction and several examples of the task.
171 | You may either write your own examples from scratch, or take examples from the train set.
172 | - the baseline prompt
173 | - a prompt containing 1 example
174 | - a prompt containing 3 examples
175 | - a prompt containing 5 examples
176 | - two other prompt configurations of your choice. For example, you could try:
177 |   - your 5 example prompt but with the examples shuffled
178 |   - a prompt where all the examples are intentionally mislabeled
179 |   - the same examples as your other prompts but with an alternative template
180 |   - modify the instruction
181 | 
182 | **Analysis Questions**
183 | 1. Create a table showing final test set accuracies for each prompt.
184 | 2. What prompting formats did you experiment with? What worked well and what didn’t work?
185 | 3. What factors do you think most affect the model's performance?
186 | 4. For your best prompt, perform an error analysis of the test set examples it gets wrong. Qualitivaly, do you notice any patterns in the examples the model fails to classify correctly? 
187 | 5. Try our your best few-shot prompt from Question with the three smaller model sizes: ``curie``, ``babbage`` and ``ada``. How much does test set accuracy degrade on the smaller models? What is the smallest model size you can get away with and have good accuracy?
188 | 
189 | ## 3.2 Few-shot Learning for Generation Tasks (5 points)
190 | Few-shot learning techniques can also be used fo tasks that require generation.
191 | Choose one of the following sentence manipulation tasks, and try to write a few-shot prompt to get the model to do the task.
192 | 
193 | a. Convert a sentence to [pig latin](https://www.wikihow.com/Speak-Pig-Latin).
194 | b. Add a space between each character in the sentence.
195 | c. Apply a specified Caesar cipher[https://en.wikipedia.org/wiki/Caesar_cipher]
196 | 
197 | **Analysis Questions**
198 | 1. What few-shot prompt did the pick and how did you decide on it?
199 | 2. The three tasks listed above all require character-level manipulations. Why might such tasks be challenging for many modern large language models?
200 | 
201 | 
202 | # 4. Investigating Knowledge Across Different Model Sizes (10 points)
203 | 
204 | Pick a Wikipedia article on a person or place of your choice, and write a prompt containing the start of the first sentence in the article.
205 | You may choose to omit the pronunciation and other parenthetical details).
206 | For example, if you choose [Andrew Carnegie](https://en.wikipedia.org/wiki/Andrew_Carnegie), you should prompt with either `Andrew Carnegie (Scots: [kɑrˈnɛːɡi], English: /kɑːrˈnɛɡi/ kar-NEG-ee;[2][3][note 1] November 25, 1835 – August 11, 1919) was` or `Andrew Carnegie was an` or similar.
207 | Use this prompt with both `davinci` (the largest version of GPT-3) and `ada` (the smallest version of GPT-3), each time generating 300 tokens.
208 | 
209 | **Analysis Questions**
210 | 
211 | 1. Paste each generation into your report, and use the Wikipedia article as well as any knowledge you may have of the person/place to fact-check both generations.
212 | Highlight any parts of the generation that are incorrect or contradictory.
213 | For example, if the model generates `Andrew Carnegie was an American composer whose music has been described as America’s great signature.` then you should highlight this sentence.
214 | 2. What are some trends you notice about the kinds of errors found in the two model sizes?
215 | 3. Experiment with modifying your prompt to improve model accuracy. What extra information can you put in the prompt to make the model do better?
216 | 4. Create a sentence-long Wikipedia-sounding prompt that is completely wrong or else about a fictional person or place, for example `Bruce Lee was the 44th president of the United States from 2009 to 2017`. Prompt both model sizes with the sentence. Do both models write continuations in the same style?
217 | 5. Discuss the challenges in building an LLM that can simultaneously respond well to factual prompts as well as fantastical ones.  What kind of training data do you think, if initially given to the model at training time, would have made it better at supporting both use cases? 
218 | 
219 | # 5. Comparing Pre-Trained and Fine-tuned Models (5 points)
220 | 
221 | The model ``text-davinci-003`` is a variant of GPT-3 which was finetuned for instruction following, using the methods described in the paper ["Training language models to follow instructions with human feedback"](https://arxiv.org/abs/2203.02155). Experiment with writing prompts for the following tasks using both models.
222 | 
223 | - Writing a recipe for a food of your choice.
224 | - Explaining the rules for a sport or game of your choice.
225 | - Continuing a Wikipedia article of your choice, conditioned on the first sentence in the article. (You may also choose to re-use your fantastical prompt from Question 4.)
226 | 
227 | **Analysis Questions**
228 | 1. Provide the prompts you decided on and their generations.
229 | 2. What do you notice about the differences in the behaviour between the two models? Summarize the advantages and disadvantages of using a model finetuned for instruction following.
230 | 
231 | # 6. Acknowledgment of AI Tools
232 | 
233 | If you used ChatGPT or another AI to write any portion of your answers, please use this section to describe the prompts you employed and your methodology for developing them. You do not need to write anything here if you only used LLMs to run experiments as specified in the homework problem instructions.
234 | 
235 | # 7. Optional: Give us Feedback
236 | Was this homework enjoyable? Was it too easy or too hard? Do you have any suggestions for making the homework run more smoothly? Giving us feedback is compeltely optional and will not factor into your grade.
237 | 


--------------------------------------------------------------------------------
/homework2.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Homework 2
 4 | description: >-
 5 |     Instructions for Homework 2
 6 | ---
 7 | 
 8 | # Homework 2
 9 | {:.no_toc}
10 | 
11 | ## Table of contents
12 | {: .no_toc .text-delta }
13 | 
14 | 1. TOC
15 | {:toc}
16 | 
17 | ---
18 | 
19 | This homework is due on Tuesday, October 24th at 2 PM.
20 | 
21 | # Starter Code and Assignment File
22 | 
23 | The starter code for this homework is available [here](homework_materials/hw2_starter_code.zip).  
24 | The assignment file is available [here](homework_materials/hw2.pdf).  
25 | The latex template for this assignment is available [here](homework_materials/hw2_latex_template.zip).
26 | 
27 | # Preparation: Setting up AWS
28 | 
29 | To complete this homework, you will need access to GPU resources for training models.
30 | 
31 | The TAs have compiled step-by-step setup instructions with screenshots here: [AWS setup guide](https://docs.google.com/presentation/d/1Tw_klO84R9G7CZ3cINAKgy4BfdNm-8dlnRXSBIVD_3A/edit?usp=sharing)
32 | 
33 | **Note: Question 1, Question 2.1-4 and Question 3.1 can be done without GPU access. We suggest that you start the assignment early while waiting for AWS credits.**
34 | 
35 | # Submission
36 | 
37 | For homework 2, we will be using **Gradescope**. Please use the link from the Canvas coursepage.  
38 | Two assignments have been created on Gradescope: `Homework 2 - PDF` and `Homework 2 - Code and Checkpoints`.  
39 | 
40 | `Homework 2 - PDF` will be manually graded by the instructors.  
41 | Please submit your a pdf file `[andrew-id].pdf` with answers to the written questions, and annotate the locations of each question's solution in your PDF.
42 | 
43 | `Homework 2 - Code and Checkpoints` contains an autograder that will grade your code with the same unit tests provided in the homework.  
44 | We will also be downloading your model checkpoint `model.pt` and configuration `config.yaml` for perplexity testing.  
45 | The results of these tests will not be made visible until grades are released.  
46 | Please submit the following files, with these **exact filenames**:
47 | - `model.py`
48 | - `train.py`
49 | - `generate.py`
50 | - `classify.py`
51 | - `utils.py`
52 | - `model.pt` (weights from your trained model in Question 2.6.)
53 | - `config.yaml` (configuration file for your trained model in Question 2.6.)
54 | 
55 | Please use [this script](https://gist.github.com/Y0mingZhang/e65783e6c92d448ac94062a7f8951a50) to check that your submission format is correct. 
56 | 
57 | Note: Since many students are having trouble with submitting their model checkpoints on Gradescope with the 100MB size limit, there is a Canvas assignment for this: `Homework 2 - Model Checkpoint`.  
58 | If your model checkpoint exceeds 100MB, please submit your `model.pt` and `config.yaml` file on Canvas.  
59 | If your model checkpoint is below 100MB, you can still submit `model.pt` and `config.yaml` on Gradescope.  
60 | 


--------------------------------------------------------------------------------
/homework3.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Homework 3
 4 | description: >-
 5 |     Instructions for Homework 3
 6 | ---
 7 | 
 8 | # Homework 3
 9 | {:.no_toc}
10 | 
11 | ## Table of contents
12 | {: .no_toc .text-delta }
13 | 
14 | 1. TOC
15 | {:toc}
16 | 
17 | ---
18 | 
19 | This homework is due on Thursday Nov 30 2023 at 2pm.
20 | 
21 | # November 27 Update
22 | 
23 | The starter code, assignment file and latex template have been updated.
24 | Here is a summary of the main changes:
25 |    - Added `generate.py` to the starter code. **You will need to complete this file for Question 5.0 in the assignment.**
26 |    - **The expected submission format has changed** (see assigment file for details).
27 |    - Updated latex template to match the latest assignment file.
28 | 
29 | 
30 | # Starter Code and Assignment File
31 | 
32 | The starter code for this homework is available [here](homework_materials/hw3_starter_code.zip).  
33 | The assignment file is available [here](homework_materials/hw3.pdf).  
34 | The latex template for this assignment is available [here](homework_materials/hw3_latex_template.zip).
35 | 
36 | # Preparation: Setting up AWS
37 | 
38 | To complete this homework, you will need access to GPU resources for training models.
39 | 
40 | The TAs have compiled step-by-step setup instructions with screenshots here: [AWS setup guide](https://docs.google.com/presentation/d/1Tw_klO84R9G7CZ3cINAKgy4BfdNm-8dlnRXSBIVD_3A/edit?usp=sharing)
41 | 
42 | # Submission
43 | 
44 | For homework 3, we will be using **Gradescope** for code and written answers and **Canvas** for model checkpoints and data. Please use the link from the Canvas coursepage.  
45 | Two assignments have been created on Gradescope: `Homework 3 - PDF` and `Homework 3 - Code`.  
46 | 
47 | `Homework 3 - PDF` will be manually graded by the instructors.  
48 | Please submit your a pdf file `[andrew-id].pdf` with answers to the written questions, and annotate the locations of each question's solution in your PDF.
49 | 
50 | `Homework 3 - Code` contains an autograder that will grade your code with the same unit tests provided in the homework.  
51 | We will be evaluating code in Section 4 of the assignment through the autograder in Gradescope, but you should submit all the code that you used to complete the assignment.
52 | 
53 | For the autograder unit tests, please submit the following files on Gradescope with these **exact filenames**. Do not place your files in a directory, as the tests will not run properly.
54 | - `cleaning.py`
55 | - `tokenization.py`
56 | 
57 | We will also be manually grading and inspecting other parts of your code. To make manual grading possible, please submit the following (also on Gradescope):
58 | - `model.py` (if using your HW2 code, include the implementation of your model)
59 | - `config.yaml` (if using your HW2 code, include the config file to load your model)
60 | - `generate.py`
61 | - `test_other.py` (your custom test cases for Q3.3.4)
62 | - `other_scripts.zip` (optional, but if other scripts are used, please zip and include a README indicating which scripts correspond to each question)
63 | 
64 | Since Gradescope has a 100MB size limit, we will submit model checkpoints and code through Canvas: `Homework 3 - Model Checkpoints and Data`.  Please submit these files to Canvas, with these exact filenames:
65 | - `cleaned_data.arrow.zip`
66 | - `model-original.pt` (weights from your model trained on uncleaned data)
67 | - `model-clean.pt` (weights from your model trained on cleaned data)
68 | 


--------------------------------------------------------------------------------
/homework_materials/generate.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | import os
 4 | import torch 
 5 | # YOUR CODE HERE
 6 | # You may add additional imports or helper funcitons here.
 7 | 
 8 | 
 9 | def main():
10 |     torch.backends.cuda.matmul.allow_tf32 = True
11 |     torch.backends.cudnn.allow_tf32 = True
12 | 
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument(
15 |         "--prefixes",
16 |         type=str,
17 |         required=True,
18 |         help="a jsonl file with a list of strings as prefixes for generation",
19 |     )
20 |     parser.add_argument(
21 |         "--model_version",
22 |         type=str,
23 |         default="original",
24 |         help="Either the string 'original' or the string 'cleaned'",
25 |     )
26 |     parser.add_argument(
27 |         "--output_dir",
28 |         type=str,
29 |         default=os.getcwd(),
30 |         help="Directory where to save outputs.",
31 |     )
32 |     parser.add_argument(
33 |         "--max_new_tokens",
34 |         type=int,
35 |         default=32,
36 |         help="number of new tokens to generate",
37 |     )
38 |     parser.add_argument(
39 |         "--temperature", type=float, default=0.0, help="temperature in sampling"
40 |     )
41 | 
42 |     args = parser.parse_args()
43 |     with open(args.prefixes) as f:
44 |         prefixes = [json.loads(line)["prefix"] for line in f]
45 | 
46 |     max_new_tokens = args.max_new_tokens
47 |     temperature = args.temperature
48 |     model_version = args.model_version
49 |     output_dir = args.output_dir
50 | 
51 |     if model_version == "original":
52 |         # YOUR CODE HERE
53 |         # Implement inference using your model trained on the original data here.
54 |         # You may copy and paste from HW2.
55 |         generations = []
56 |     elif model_version == "cleaned":   
57 |         # YOUR CODE HERE
58 |         # Implement inference using your model trained on the cleaned data here.
59 |         # You may copy and paste from HW2.
60 |         generations = []
61 |     else:
62 |         raise ValueError("Invalid model version.")
63 | 
64 |     generation_path = os.path.join(output_dir, "generation.jsonl")
65 |     print(f"writing generations to {generation_path}")
66 |     with open(generation_path, "w") as f:
67 |         for prefix, generation in zip(prefixes, generations):
68 |             json.dump({"prefix": prefix, "generation": generation}, f)
69 |             f.write("\n")
70 | 
71 |     print("done!")
72 | 
73 | 
74 | if __name__ == "__main__":
75 |     main()
76 | 


--------------------------------------------------------------------------------
/homework_materials/hw1_starter_code.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "cells": [
   3 |     {
   4 |       "cell_type": "code",
   5 |       "execution_count": 1,
   6 |       "metadata": {
   7 |         "id": "6w4VWiJ9XUsY"
   8 |       },
   9 |       "outputs": [],
  10 |       "source": [
  11 |         "#@title Imports and Initialization\n",
  12 |         "%pip install datasets\n",
  13 |         "%pip install textwrap\n",
  14 |         "%pip install openai\n",
  15 |         "\n",
  16 |         "import collections\n",
  17 |         "from abc import ABC\n",
  18 |         "import datasets\n",
  19 |         "import json\n",
  20 |         "import openai\n",
  21 |         "import numpy as np\n",
  22 |         "from scipy.special import softmax\n",
  23 |         "import textwrap\n",
  24 |         "import matplotlib.pyplot as plt\n",
  25 |         "from IPython.display import clear_output\n",
  26 |         "\n",
  27 |         "OPENAI_SECRET_KEY = None\n",
  28 |         "\n",
  29 |         "clear_output()"
  30 |       ]
  31 |     },
  32 |     {
  33 |       "cell_type": "markdown",
  34 |       "metadata": {
  35 |         "id": "0Osx3WttDwKN"
  36 |       },
  37 |       "source": [
  38 |         "# Setup the OpenAI API\n",
  39 |         "\n",
  40 |         "1. Go to www.openai.com and log into your account.\n",
  41 |         "2. Go to https://platform.openai.com/account/api-keys and click the \"Create new secret key\" button. It doesn't matter what you name it.\n",
  42 |         "3. Copy your API key, and then run the code block below. It will ask you to enter your secret key into a text box."
  43 |       ]
  44 |     },
  45 |     {
  46 |       "cell_type": "code",
  47 |       "execution_count": null,
  48 |       "metadata": {
  49 |         "id": "WUzM-DZsD9ZR"
  50 |       },
  51 |       "outputs": [],
  52 |       "source": [
  53 |         "if OPENAI_SECRET_KEY is None:\n",
  54 |         "  print(\"Please paste your OpenAI API key here:\")\n",
  55 |         "  OPENAI_SECRET_KEY = input().strip()\n",
  56 |         "openai.api_key = OPENAI_SECRET_KEY\n",
  57 |         "clear_output()\n",
  58 |         "\n",
  59 |         "class OpenAIEngine():\n",
  60 |         "  def __init__(self, model_name):\n",
  61 |         "    self.model_name = model_name\n",
  62 |         "\n",
  63 |         "  def score(self, text):\n",
  64 |         "    \"\"\"Tokenizes and scores a piece of text.\n",
  65 |         "\n",
  66 |         "    This only works for the OpenAI models which support the legacy `Completion`\n",
  67 |         "    API.\n",
  68 |         "\n",
  69 |         "    The score is log-likelihood. A higher score means a token was more\n",
  70 |         "    likely according to the model.\n",
  71 |         "\n",
  72 |         "    Returns a list of tokens and a list of scores.\n",
  73 |         "    \"\"\"\n",
  74 |         "    response = openai.Completion.create(\n",
  75 |         "        engine=self.model_name,\n",
  76 |         "        prompt=text,\n",
  77 |         "        max_tokens=0,\n",
  78 |         "        logprobs=1,\n",
  79 |         "        echo=True)\n",
  80 |         "\n",
  81 |         "    tokens = response[\"choices\"][0][\"logprobs\"][\"tokens\"]\n",
  82 |         "    logprobs = response[\"choices\"][0][\"logprobs\"][\"token_logprobs\"]\n",
  83 |         "    if logprobs and logprobs[0] is None:\n",
  84 |         "      # GPT-3 API does not return logprob of the first token\n",
  85 |         "      logprobs[0] = 0.0\n",
  86 |         "    return tokens, logprobs\n",
  87 |         "\n",
  88 |         "  def perplexity(self, text):\n",
  89 |         "    \"\"\"Compute the perplexity of the provided text.\"\"\"\n",
  90 |         "    completion = openai.Completion.create(\n",
  91 |         "        model=self.model_name,\n",
  92 |         "        prompt=text,\n",
  93 |         "        logprobs=0,\n",
  94 |         "        max_tokens=0,\n",
  95 |         "        temperature=1.0,\n",
  96 |         "        echo=True)\n",
  97 |         "    token_logprobs = completion['choices'][0]['logprobs']['token_logprobs']\n",
  98 |         "    nll = np.mean([i for i in token_logprobs if i is not None])\n",
  99 |         "    ppl = np.exp(-nll)\n",
 100 |         "    return ppl\n",
 101 |         "\n",
 102 |         "  def generate(self,\n",
 103 |         "               prompt,\n",
 104 |         "               top_p=1.0,\n",
 105 |         "               num_tokens=32,\n",
 106 |         "               num_samples=1,\n",
 107 |         "               frequency_penalty=0.0,\n",
 108 |         "              presence_penalty=0.0):\n",
 109 |         "    \"\"\"Generates text given the provided prompt text.\n",
 110 |         "\n",
 111 |         "    This only works for the OpenAI models which support the legacy `Completion`\n",
 112 |         "    API.\n",
 113 |         "\n",
 114 |         "    If num_samples is 1, a single generated string is returned.\n",
 115 |         "    If num_samples > 1, a list of num_samples generated strings is returned.\n",
 116 |         "    \"\"\"\n",
 117 |         "    response = openai.Completion.create(\n",
 118 |         "      engine=self.model_name,\n",
 119 |         "      prompt=prompt,\n",
 120 |         "      temperature=1.0,\n",
 121 |         "      max_tokens=num_tokens,\n",
 122 |         "      top_p=top_p,\n",
 123 |         "      n=num_samples,\n",
 124 |         "      frequency_penalty=frequency_penalty,\n",
 125 |         "      presence_penalty=presence_penalty,\n",
 126 |         "      logprobs=1,\n",
 127 |         "    )\n",
 128 |         "    outputs = [r[\"text\"] for r in response[\"choices\"]]\n",
 129 |         "    return outputs[0] if num_samples == 1 else outputs\n",
 130 |         "\n",
 131 |         "\n",
 132 |         "  def chat_generate(self,\n",
 133 |         "                    previous_messages,\n",
 134 |         "                    top_p=1.0,\n",
 135 |         "                    num_tokens=32,\n",
 136 |         "                    num_samples=1,\n",
 137 |         "                    frequency_penalty=0.0,\n",
 138 |         "                    presence_penalty=0.0):\n",
 139 |         "    response = openai.ChatCompletion.create(\n",
 140 |         "      model=self.model_name,\n",
 141 |         "      messages=previous_messages,\n",
 142 |         "      temperature=1.0,\n",
 143 |         "      max_tokens=num_tokens,\n",
 144 |         "      top_p=top_p,\n",
 145 |         "      frequency_penalty=frequency_penalty,\n",
 146 |         "      presence_penalty=presence_penalty,\n",
 147 |         "      n=num_samples,\n",
 148 |         "    )\n",
 149 |         "    return response"
 150 |       ]
 151 |     },
 152 |     {
 153 |       "cell_type": "markdown",
 154 |       "metadata": {
 155 |         "id": "igSK0NUbwhi0"
 156 |       },
 157 |       "source": [
 158 |         "# Question 1: Observing the Impact of Decoding Strategy"
 159 |       ]
 160 |     },
 161 |     {
 162 |       "cell_type": "markdown",
 163 |       "metadata": {
 164 |         "id": "jaacqKL7fMBF"
 165 |       },
 166 |       "source": [
 167 |         "## 1.1: Rolling a Twenty-Sided Die"
 168 |       ]
 169 |     },
 170 |     {
 171 |       "cell_type": "code",
 172 |       "execution_count": null,
 173 |       "metadata": {
 174 |         "id": "-xdzzlM3gtxr"
 175 |       },
 176 |       "outputs": [],
 177 |       "source": [
 178 |         "MODEL_NAME = \"davinci\"\n",
 179 |         "engine = OpenAIEngine(MODEL_NAME)"
 180 |       ]
 181 |     },
 182 |     {
 183 |       "cell_type": "code",
 184 |       "execution_count": null,
 185 |       "metadata": {
 186 |         "id": "pOshmMOzh74a"
 187 |       },
 188 |       "outputs": [],
 189 |       "source": [
 190 |         "prompt = \"Let's roll a D20. The die shows the number\"\n",
 191 |         "rolls = engine.generate(prompt, num_tokens=1, num_samples=128, top_p=1.0)\n",
 192 |         "expected_number_of_outcomes = 20"
 193 |       ]
 194 |     },
 195 |     {
 196 |       "cell_type": "code",
 197 |       "execution_count": null,
 198 |       "metadata": {
 199 |         "colab": {
 200 |           "base_uri": "https://localhost:8080/"
 201 |         },
 202 |         "id": "0xIvyZesgwlT",
 203 |         "outputId": "c92d1ad3-bfbe-4169-db8b-3abf8569ce05"
 204 |       },
 205 |       "outputs": [
 206 |         {
 207 |           "name": "stdout",
 208 |           "output_type": "stream",
 209 |           "text": [
 210 |             "Counter({-1: 21, 12: 13, 10: 9, 18: 9, 15: 9, 7: 8, 4: 8, 8: 7, 2: 7, 9: 5, 6: 5, 19: 4, 16: 4, 5: 4, 1: 4, 14: 4, 3: 3, 13: 2, 17: 1, 20: 1})\n",
 211 |             "Percentage of valid outcomes generated: 0.95\n"
 212 |           ]
 213 |         }
 214 |       ],
 215 |       "source": [
 216 |         "rolls_counter = collections.Counter()\n",
 217 |         "for roll in rolls:\n",
 218 |         "  try:\n",
 219 |         "    roll_num = int(roll)\n",
 220 |         "    # Let's label invalid numbers as -1\n",
 221 |         "    roll_num = roll_num if 1 <= roll_num <= 20 else -1\n",
 222 |         "  except ValueError:\n",
 223 |         "    # Let's just label invalid generation as a roll of -1.\n",
 224 |         "    roll_num = -1\n",
 225 |         "  rolls_counter[roll_num] += 1\n",
 226 |         "\n",
 227 |         "print(rolls_counter)\n",
 228 |         "print(\"Percentage of valid outcomes generated:\",\n",
 229 |         "      (len(rolls_counter)-1)/expected_number_of_outcomes)"
 230 |       ]
 231 |     },
 232 |     {
 233 |       "cell_type": "markdown",
 234 |       "metadata": {
 235 |         "id": "zWXWJ7hHfO39"
 236 |       },
 237 |       "source": [
 238 |         "## 1.2: Longform Generation"
 239 |       ]
 240 |     },
 241 |     {
 242 |       "cell_type": "code",
 243 |       "execution_count": null,
 244 |       "metadata": {
 245 |         "colab": {
 246 |           "base_uri": "https://localhost:8080/",
 247 |           "height": 87
 248 |         },
 249 |         "id": "EnGViJT0ha4g",
 250 |         "outputId": "171e71cb-f608-4249-b197-1f692f6233d4"
 251 |       },
 252 |       "outputs": [
 253 |         {
 254 |           "data": {
 255 |             "application/vnd.google.colaboratory.intrinsic+json": {
 256 |               "type": "string"
 257 |             },
 258 |             "text/plain": [
 259 |               "'” The Hippopotamus Who Swallowed a Watermelon was my favorite picture book as a child. It was such a silly story, but it still had the silliness and the adventure that I loved. Plus, it’s a great one to learn a few new words, as there are plenty in it.\\n\\nThe illustrations are adorable and full of life, making the book feel like it’s bursting with energy. It was a fun one to read to my kids and the tale will stick with them for a long time.\\n\\n3. Nighttime Nap – Babar\\n\\nThis book was one of the very first books my daughter learned to read by herself. She was around 3 or 4 at the time and still loved the beautiful illustrations in the Babar series. It’s also fun to read the few words that my daughter could read at the time, and then ask her to predict what the word might be. I still read this one to my kids before they go to bed.\\n\\nI’ve always been a fan of Babar and this series of books is no different. They are a fun and easy way to get kids interested in books. They are simple, but they are beautiful too.\\n\\n4'"
 260 |             ]
 261 |           },
 262 |           "execution_count": 36,
 263 |           "metadata": {},
 264 |           "output_type": "execute_result"
 265 |         }
 266 |       ],
 267 |       "source": [
 268 |         "prompt = \"Let me tell you the story about the morning when a hippopotamus ate my homework.\"\n",
 269 |         "engine.generate(prompt, num_tokens=256, num_samples=1, top_p=0.8, frequency_penalty=0.0)"
 270 |       ]
 271 |     },
 272 |     {
 273 |       "cell_type": "code",
 274 |       "execution_count": null,
 275 |       "metadata": {
 276 |         "colab": {
 277 |           "base_uri": "https://localhost:8080/",
 278 |           "height": 87
 279 |         },
 280 |         "id": "QgIyI8YWgjXV",
 281 |         "outputId": "06e1dda7-2f63-44a6-982a-dc3369fe6a1e"
 282 |       },
 283 |       "outputs": [
 284 |         {
 285 |           "data": {
 286 |             "application/vnd.google.colaboratory.intrinsic+json": {
 287 |               "type": "string"
 288 |             },
 289 |             "text/plain": [
 290 |               "' it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way – in short, the period was so far like the present period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only.” ~ Charles Dickens, A Tale of Two Cities, 1859\\n\\nA few of my earliest memories are of lying awake and waiting for the monsters to come. I was absolutely convinced that my parents were asleep and unaware of what was happening to me in the dark, in my bed. I was certain that I was all alone.\\n\\nThat was true, but only because I didn’t understand what my mother was doing in the living room. I was three and a half years old.\\n\\nAs I grew older, and the monsters of childhood still lived in my mind, I began to understand. I’d overheard a conversation between my parents about my older brother,'"
 291 |             ]
 292 |           },
 293 |           "execution_count": 37,
 294 |           "metadata": {},
 295 |           "output_type": "execute_result"
 296 |         }
 297 |       ],
 298 |       "source": [
 299 |         "prompt = \"It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness,\"\n",
 300 |         "engine.generate(prompt, num_tokens=256, num_samples=1, top_p=0.8, frequency_penalty=0.0)"
 301 |       ]
 302 |     },
 303 |     {
 304 |       "cell_type": "markdown",
 305 |       "metadata": {
 306 |         "id": "03kqQumrsjnN"
 307 |       },
 308 |       "source": [
 309 |         "# Question 2: Measuring Perplexity"
 310 |       ]
 311 |     },
 312 |     {
 313 |       "cell_type": "code",
 314 |       "execution_count": null,
 315 |       "metadata": {
 316 |         "id": "8Qtno5zpYKsY"
 317 |       },
 318 |       "outputs": [],
 319 |       "source": [
 320 |         "MODEL_NAME = \"davinci\"\n",
 321 |         "engine = OpenAIEngine(MODEL_NAME)"
 322 |       ]
 323 |     },
 324 |     {
 325 |       "cell_type": "code",
 326 |       "execution_count": null,
 327 |       "metadata": {
 328 |         "colab": {
 329 |           "base_uri": "https://localhost:8080/"
 330 |         },
 331 |         "id": "Dybtqy-4MoRt",
 332 |         "outputId": "5ea8c024-c585-43eb-d385-43855abaac41"
 333 |       },
 334 |       "outputs": [
 335 |         {
 336 |           "data": {
 337 |             "text/plain": [
 338 |               "1.339475969608505"
 339 |             ]
 340 |           },
 341 |           "execution_count": 39,
 342 |           "metadata": {},
 343 |           "output_type": "execute_result"
 344 |         }
 345 |       ],
 346 |       "source": [
 347 |         "poem = \"\"\"\n",
 348 |         "’Twas brillig, and the slithy toves\n",
 349 |         "      Did gyre and gimble in the wabe:\n",
 350 |         "All mimsy were the borogoves,\n",
 351 |         "      And the mome raths outgrabe.\n",
 352 |         "\n",
 353 |         "“Beware the Jabberwock, my son!\n",
 354 |         "      The jaws that bite, the claws that catch!\n",
 355 |         "Beware the Jubjub bird, and shun\n",
 356 |         "      The frumious Bandersnatch!”\n",
 357 |         "\n",
 358 |         "He took his vorpal sword in hand;\n",
 359 |         "      Long time the manxome foe he sought—\n",
 360 |         "So rested he by the Tumtum tree\n",
 361 |         "      And stood awhile in thought.\n",
 362 |         "\n",
 363 |         "And, as in uffish thought he stood,\n",
 364 |         "      The Jabberwock, with eyes of flame,\n",
 365 |         "Came whiffling through the tulgey wood,\n",
 366 |         "      And burbled as it came!\n",
 367 |         "\n",
 368 |         "One, two! One, two! And through and through\n",
 369 |         "      The vorpal blade went snicker-snack!\n",
 370 |         "He left it dead, and with its head\n",
 371 |         "      He went galumphing back.\n",
 372 |         "\n",
 373 |         "“And hast thou slain the Jabberwock?\n",
 374 |         "      Come to my arms, my beamish boy!\n",
 375 |         "O frabjous day! Callooh! Callay!”\n",
 376 |         "      He chortled in his joy.\n",
 377 |         "\n",
 378 |         "’Twas brillig, and the slithy toves\n",
 379 |         "      Did gyre and gimble in the wabe:\n",
 380 |         "All mimsy were the borogoves,\n",
 381 |         "      And the mome raths outgrabe.\n",
 382 |         "\"\"\"\n",
 383 |         "\n",
 384 |         "engine.perplexity(poem)"
 385 |       ]
 386 |     },
 387 |     {
 388 |       "cell_type": "markdown",
 389 |       "metadata": {
 390 |         "id": "hBeWxVDCMcAa"
 391 |       },
 392 |       "source": [
 393 |         "# Question 3: Experimenting with Few-Shot Prompting"
 394 |       ]
 395 |     },
 396 |     {
 397 |       "cell_type": "markdown",
 398 |       "metadata": {
 399 |         "id": "3yy-su4dMkEd"
 400 |       },
 401 |       "source": [
 402 |         "## 3.1: Few-Shot Learning for the Choice of Plausible Alternatives Task"
 403 |       ]
 404 |     },
 405 |     {
 406 |       "cell_type": "code",
 407 |       "execution_count": 4,
 408 |       "metadata": {
 409 |         "colab": {
 410 |           "base_uri": "https://localhost:8080/"
 411 |         },
 412 |         "id": "H5UoOULgBD3Y",
 413 |         "outputId": "0d83f58d-753a-4113-8258-0fd6061f9391"
 414 |       },
 415 |       "outputs": [
 416 |         {
 417 |           "name": "stdout",
 418 |           "output_type": "stream",
 419 |           "text": [
 420 |             "Some examples from the train set:\n",
 421 |             "{\n",
 422 |             "  \"premise\": \"The woman spotted her friend from across the room.\",\n",
 423 |             "  \"choice1\": \"The woman waved.\",\n",
 424 |             "  \"choice2\": \"The woman escaped.\",\n",
 425 |             "  \"question\": \"cause\",\n",
 426 |             "  \"idx\": 379,\n",
 427 |             "  \"label\": 0\n",
 428 |             "}\n",
 429 |             "{\n",
 430 |             "  \"premise\": \"The girl made a wish.\",\n",
 431 |             "  \"choice1\": \"She saw a black cat.\",\n",
 432 |             "  \"choice2\": \"She saw a shooting star.\",\n",
 433 |             "  \"question\": \"cause\",\n",
 434 |             "  \"idx\": 45,\n",
 435 |             "  \"label\": 1\n",
 436 |             "}\n",
 437 |             "{\n",
 438 |             "  \"premise\": \"The woman hired a lawyer.\",\n",
 439 |             "  \"choice1\": \"She decided to sue her employer.\",\n",
 440 |             "  \"choice2\": \"She decided to run for office.\",\n",
 441 |             "  \"question\": \"cause\",\n",
 442 |             "  \"idx\": 39,\n",
 443 |             "  \"label\": 0\n",
 444 |             "}\n"
 445 |           ]
 446 |         }
 447 |       ],
 448 |       "source": [
 449 |         "copa_dataset = datasets.load_dataset(\"super_glue\", \"copa\")\n",
 450 |         "\n",
 451 |         "# You may draw on these examples to produce few-shot prompts.\n",
 452 |         "train_data = copa_dataset[\"train\"].shuffle(seed=1).select(range(50))\n",
 453 |         "\n",
 454 |         "# Use this development set to try out different few-shot prompts to see\n",
 455 |         "# what works best.\n",
 456 |         "dev_data = copa_dataset[\"train\"].shuffle(seed=1).select(range(50, 150))\n",
 457 |         "\n",
 458 |         "# You should only use this at the end during final evaluation to generate\n",
 459 |         "# accuracies to put in your report.\n",
 460 |         "test_data = copa_dataset[\"validation\"].shuffle(seed=1).select(range(100))\n",
 461 |         "\n",
 462 |         "print(\"Some examples from the train set:\")\n",
 463 |         "for i in range(3):\n",
 464 |         "  print(json.dumps(train_data[i], indent=2))"
 465 |       ]
 466 |     },
 467 |     {
 468 |       "cell_type": "code",
 469 |       "execution_count": null,
 470 |       "metadata": {
 471 |         "id": "Mi29l6BPwoQy"
 472 |       },
 473 |       "outputs": [],
 474 |       "source": [
 475 |         "prompt = \"Given the following premise and cause, label whether the cause seems correct\\n\\n\"\n",
 476 |         "eval_template = \"Review: {review}\\nSentiment: {sentiment}\""
 477 |       ]
 478 |     },
 479 |     {
 480 |       "cell_type": "code",
 481 |       "execution_count": null,
 482 |       "metadata": {
 483 |         "colab": {
 484 |           "base_uri": "https://localhost:8080/"
 485 |         },
 486 |         "id": "Biymx4eRyWR2",
 487 |         "outputId": "3187ca5b-9475-4ed1-8431-8ed788ccb5f3"
 488 |       },
 489 |       "outputs": [
 490 |         {
 491 |           "name": "stdout",
 492 |           "output_type": "stream",
 493 |           "text": [
 494 |             "======== 1 / 100 ========\n",
 495 |             "PREMISE: My car was towed.\n",
 496 |             "CHOICE 1 ✅: I parked illegally.\n",
 497 |             "CHOICE 2 ❌: I jumped the battery.\n",
 498 |             "PREDICTED: choice 2\n",
 499 |             "======== 2 / 100 ========\n",
 500 |             "PREMISE: I rubbed the soap between my hands.\n",
 501 |             "CHOICE 1 ✅: The soap foamed.\n",
 502 |             "CHOICE 2 ❌: My hands went numb.\n",
 503 |             "PREDICTED: choice 2\n",
 504 |             "======== 3 / 100 ========\n",
 505 |             "PREMISE: The stain came out of the shirt.\n",
 506 |             "CHOICE 1 ❌: I patched the shirt.\n",
 507 |             "CHOICE 2 ✅: I bleached the shirt.\n",
 508 |             "PREDICTED: choice 2\n",
 509 |             "======== 4 / 100 ========\n",
 510 |             "PREMISE: The police closed the investigation.\n",
 511 |             "CHOICE 1 ✅: They apprehended the suspect.\n",
 512 |             "CHOICE 2 ❌: The victim recovered.\n",
 513 |             "PREDICTED: choice 2\n",
 514 |             "======== 5 / 100 ========\n",
 515 |             "PREMISE: My foot went numb.\n",
 516 |             "CHOICE 1 ❌: I put my shoes on.\n",
 517 |             "CHOICE 2 ✅: I shook my foot.\n",
 518 |             "PREDICTED: choice 2\n",
 519 |             "======== 6 / 100 ========\n",
 520 |             "PREMISE: The hospital sent the patient home.\n",
 521 |             "CHOICE 1 ✅: The patient's symptoms cleared up.\n",
 522 |             "CHOICE 2 ❌: The patient's family visited him.\n",
 523 |             "PREDICTED: choice 2\n",
 524 |             "======== 7 / 100 ========\n",
 525 |             "PREMISE: The parents recognized their daughter's creativity.\n",
 526 |             "CHOICE 1 ❌: They taught her how to ride a bike.\n",
 527 |             "CHOICE 2 ✅: They encouraged her to become an artist.\n",
 528 |             "PREDICTED: choice 2\n",
 529 |             "======== 8 / 100 ========\n",
 530 |             "PREMISE: The girl went down the hill on her bike.\n",
 531 |             "CHOICE 1 ❌: Her bike swerved.\n",
 532 |             "CHOICE 2 ✅: Her bike sped up.\n",
 533 |             "PREDICTED: choice 2\n",
 534 |             "======== 9 / 100 ========\n",
 535 |             "PREMISE: I finished a page of the book.\n",
 536 |             "CHOICE 1 ❌: I ripped out the next page.\n",
 537 |             "CHOICE 2 ✅: I turned to the next page.\n",
 538 |             "PREDICTED: choice 2\n",
 539 |             "======== 10 / 100 ========\n",
 540 |             "PREMISE: The bananas ripened.\n",
 541 |             "CHOICE 1 ❌: We squeezed them.\n",
 542 |             "CHOICE 2 ✅: We ate them.\n",
 543 |             "PREDICTED: choice 2\n",
 544 |             "======== 11 / 100 ========\n",
 545 |             "PREMISE: The engine of the airplane was faulty.\n",
 546 |             "CHOICE 1 ✅: The airplane crashed.\n",
 547 |             "CHOICE 2 ❌: The pilot made an error.\n",
 548 |             "PREDICTED: choice 2\n",
 549 |             "======== 12 / 100 ========\n",
 550 |             "PREMISE: The farmland needed irrigation.\n",
 551 |             "CHOICE 1 ✅: A canal was constructed.\n",
 552 |             "CHOICE 2 ❌: A flood occurred.\n",
 553 |             "PREDICTED: choice 2\n",
 554 |             "======== 13 / 100 ========\n",
 555 |             "PREMISE: I moved to a new city.\n",
 556 |             "CHOICE 1 ✅: I found a better-paying job.\n",
 557 |             "CHOICE 2 ❌: My son was born.\n",
 558 |             "PREDICTED: choice 2\n",
 559 |             "======== 14 / 100 ========\n",
 560 |             "PREMISE: The boy's mouth stung.\n",
 561 |             "CHOICE 1 ✅: He ate a pepper.\n",
 562 |             "CHOICE 2 ❌: He blew out the candle.\n",
 563 |             "PREDICTED: choice 2\n",
 564 |             "======== 15 / 100 ========\n",
 565 |             "PREMISE: The children ran through the sprinkler.\n",
 566 |             "CHOICE 1 ❌: They ate popsicles.\n",
 567 |             "CHOICE 2 ✅: They were hot.\n",
 568 |             "PREDICTED: choice 2\n",
 569 |             "======== 16 / 100 ========\n",
 570 |             "PREMISE: The toddler started crying.\n",
 571 |             "CHOICE 1 ❌: Her parents took away her toy.\n",
 572 |             "CHOICE 2 ✅: Her parents gave her a toy.\n",
 573 |             "PREDICTED: choice 2\n",
 574 |             "======== 17 / 100 ========\n",
 575 |             "PREMISE: I tossed the ball upwards.\n",
 576 |             "CHOICE 1 ❌: The ball rolled across the ground.\n",
 577 |             "CHOICE 2 ✅: The ball hit the ceiling.\n",
 578 |             "PREDICTED: choice 2\n",
 579 |             "======== 18 / 100 ========\n",
 580 |             "PREMISE: The boy found the television show boring.\n",
 581 |             "CHOICE 1 ❌: He recorded it.\n",
 582 |             "CHOICE 2 ✅: He shut it off.\n",
 583 |             "PREDICTED: choice 2\n",
 584 |             "======== 19 / 100 ========\n",
 585 |             "PREMISE: The man went away for the weekend.\n",
 586 |             "CHOICE 1 ✅: He wanted to relax.\n",
 587 |             "CHOICE 2 ❌: He felt content.\n",
 588 |             "PREDICTED: choice 2\n",
 589 |             "======== 20 / 100 ========\n",
 590 |             "PREMISE: I hung up the phone.\n",
 591 |             "CHOICE 1 ✅: The caller said goodbye to me.\n",
 592 |             "CHOICE 2 ❌: The caller identified himself to me.\n",
 593 |             "PREDICTED: choice 2\n",
 594 |             "======== 21 / 100 ========\n",
 595 |             "PREMISE: I pushed the pendulum.\n",
 596 |             "CHOICE 1 ❌: It slowed to a stop.\n",
 597 |             "CHOICE 2 ✅: It swung back and forth.\n",
 598 |             "PREDICTED: choice 2\n",
 599 |             "======== 22 / 100 ========\n",
 600 |             "PREMISE: The man contemplated the painting.\n",
 601 |             "CHOICE 1 ✅: He felt in awe.\n",
 602 |             "CHOICE 2 ❌: He collapsed.\n",
 603 |             "PREDICTED: choice 2\n",
 604 |             "======== 23 / 100 ========\n",
 605 |             "PREMISE: I applied pressure to the cut on my arm.\n",
 606 |             "CHOICE 1 ❌: It healed.\n",
 607 |             "CHOICE 2 ✅: It stopped bleeding.\n",
 608 |             "PREDICTED: choice 2\n",
 609 |             "======== 24 / 100 ========\n",
 610 |             "PREMISE: The man took a shower.\n",
 611 |             "CHOICE 1 ✅: He was sweaty from working out.\n",
 612 |             "CHOICE 2 ❌: He opened a new bar of soap.\n",
 613 |             "PREDICTED: choice 2\n",
 614 |             "======== 25 / 100 ========\n",
 615 |             "PREMISE: The fans in the stadium cheered.\n",
 616 |             "CHOICE 1 ❌: The game ended in a tie.\n",
 617 |             "CHOICE 2 ✅: The player scored.\n",
 618 |             "PREDICTED: choice 2\n",
 619 |             "======== 26 / 100 ========\n",
 620 |             "PREMISE: The man took notice of the woman.\n",
 621 |             "CHOICE 1 ✅: He caught a whiff of her perfume.\n",
 622 |             "CHOICE 2 ❌: He had his back turned to her.\n",
 623 |             "PREDICTED: choice 2\n",
 624 |             "======== 27 / 100 ========\n",
 625 |             "PREMISE: The politician lost the election.\n",
 626 |             "CHOICE 1 ❌: He ran negative campaign ads.\n",
 627 |             "CHOICE 2 ✅: No one voted for him.\n",
 628 |             "PREDICTED: choice 2\n",
 629 |             "======== 28 / 100 ========\n",
 630 |             "PREMISE: I wrote a thank-you note to my grandmother.\n",
 631 |             "CHOICE 1 ❌: She became forgetful.\n",
 632 |             "CHOICE 2 ✅: She sent me a gift.\n",
 633 |             "PREDICTED: choice 2\n",
 634 |             "======== 29 / 100 ========\n",
 635 |             "PREMISE: The police aimed their weapons at the fugitive.\n",
 636 |             "CHOICE 1 ❌: The fugitive fell to the ground.\n",
 637 |             "CHOICE 2 ✅: The fugitive dropped his gun.\n",
 638 |             "PREDICTED: choice 2\n",
 639 |             "======== 30 / 100 ========\n",
 640 |             "PREMISE: My grandmother passed away.\n",
 641 |             "CHOICE 1 ✅: My family held a funeral.\n",
 642 |             "CHOICE 2 ❌: My family held a reunion.\n",
 643 |             "PREDICTED: choice 2\n",
 644 |             "======== 31 / 100 ========\n",
 645 |             "PREMISE: The man revealed personal information to the therapist.\n",
 646 |             "CHOICE 1 ✅: He trusted the therapist.\n",
 647 |             "CHOICE 2 ❌: He disagreed with the therapist.\n",
 648 |             "PREDICTED: choice 2\n",
 649 |             "======== 32 / 100 ========\n",
 650 |             "PREMISE: I lingered in bed upon awakening.\n",
 651 |             "CHOICE 1 ❌: I was hungry.\n",
 652 |             "CHOICE 2 ✅: It was Saturday.\n",
 653 |             "PREDICTED: choice 2\n",
 654 |             "======== 33 / 100 ========\n",
 655 |             "PREMISE: The customer thought the souvenir was overpriced.\n",
 656 |             "CHOICE 1 ❌: The vender sold it to him.\n",
 657 |             "CHOICE 2 ✅: He bargained with the vendor.\n",
 658 |             "PREDICTED: choice 2\n",
 659 |             "======== 34 / 100 ========\n",
 660 |             "PREMISE: I stood on one foot.\n",
 661 |             "CHOICE 1 ✅: My balance wavered.\n",
 662 |             "CHOICE 2 ❌: I kneeled down.\n",
 663 |             "PREDICTED: choice 2\n",
 664 |             "======== 35 / 100 ========\n",
 665 |             "PREMISE: My friend paid for my ticket.\n",
 666 |             "CHOICE 1 ❌: I sought a refund.\n",
 667 |             "CHOICE 2 ✅: I paid her back.\n",
 668 |             "PREDICTED: choice 2\n",
 669 |             "======== 36 / 100 ========\n",
 670 |             "PREMISE: I dabbed the floor with a paper towel.\n",
 671 |             "CHOICE 1 ✅: I spilled juice on the floor.\n",
 672 |             "CHOICE 2 ❌: The floor was permanently stained.\n",
 673 |             "PREDICTED: choice 2\n",
 674 |             "======== 37 / 100 ========\n",
 675 |             "PREMISE: The woman spoke with a foreign accent.\n",
 676 |             "CHOICE 1 ❌: She came from a wealthy family.\n",
 677 |             "CHOICE 2 ✅: She migrated from another country.\n",
 678 |             "PREDICTED: choice 2\n",
 679 |             "======== 38 / 100 ========\n",
 680 |             "PREMISE: The man made an error in his calculations.\n",
 681 |             "CHOICE 1 ❌: He checked his work.\n",
 682 |             "CHOICE 2 ✅: His work was hasty.\n",
 683 |             "PREDICTED: choice 2\n",
 684 |             "======== 39 / 100 ========\n",
 685 |             "PREMISE: The shirt shrunk.\n",
 686 |             "CHOICE 1 ❌: I poured bleach on it.\n",
 687 |             "CHOICE 2 ✅: I put it in the dryer.\n",
 688 |             "PREDICTED: choice 2\n",
 689 |             "======== 40 / 100 ========\n",
 690 |             "PREMISE: The man hated his new haircut.\n",
 691 |             "CHOICE 1 ✅: He wore a hat.\n",
 692 |             "CHOICE 2 ❌: He grew a beard.\n",
 693 |             "PREDICTED: choice 2\n",
 694 |             "======== 41 / 100 ========\n",
 695 |             "PREMISE: The hunter ran out of ammunition.\n",
 696 |             "CHOICE 1 ✅: He reloaded the gun.\n",
 697 |             "CHOICE 2 ❌: He aimed at the deer.\n",
 698 |             "PREDICTED: choice 2\n",
 699 |             "======== 42 / 100 ========\n",
 700 |             "PREMISE: The surfer caught the wave.\n",
 701 |             "CHOICE 1 ✅: The wave carried her to the shore.\n",
 702 |             "CHOICE 2 ❌: She paddled her board into the ocean.\n",
 703 |             "PREDICTED: choice 2\n",
 704 |             "======== 43 / 100 ========\n",
 705 |             "PREMISE: The student's answer on the exam was incorrect.\n",
 706 |             "CHOICE 1 ❌: The teacher added bonus points to the student's grade.\n",
 707 |             "CHOICE 2 ✅: The teacher subtracted points off the student's grade.\n",
 708 |             "PREDICTED: choice 2\n",
 709 |             "======== 44 / 100 ========\n",
 710 |             "PREMISE: The bureau drawers were cluttered.\n",
 711 |             "CHOICE 1 ✅: I threw away unnecessary contents.\n",
 712 |             "CHOICE 2 ❌: I locked the drawers shut.\n",
 713 |             "PREDICTED: choice 2\n",
 714 |             "======== 45 / 100 ========\n",
 715 |             "PREMISE: The chandelier shattered on the floor.\n",
 716 |             "CHOICE 1 ✅: The chandelier dropped from the ceiling.\n",
 717 |             "CHOICE 2 ❌: The chandelier's lights flickered on and off.\n",
 718 |             "PREDICTED: choice 2\n",
 719 |             "======== 46 / 100 ========\n",
 720 |             "PREMISE: The teenager grew taller than his father.\n",
 721 |             "CHOICE 1 ✅: He went through a growth spurt.\n",
 722 |             "CHOICE 2 ❌: He joined the basketball team.\n",
 723 |             "PREDICTED: choice 2\n",
 724 |             "======== 47 / 100 ========\n",
 725 |             "PREMISE: The skier slipped on the slope.\n",
 726 |             "CHOICE 1 ❌: She dropped her ski poles.\n",
 727 |             "CHOICE 2 ✅: She hit a patch of ice.\n",
 728 |             "PREDICTED: choice 2\n",
 729 |             "======== 48 / 100 ========\n",
 730 |             "PREMISE: The navy bombed the ship.\n",
 731 |             "CHOICE 1 ❌: The ship crashed into the pier.\n",
 732 |             "CHOICE 2 ✅: The ship's debris sunk in the sea.\n",
 733 |             "PREDICTED: choice 2\n",
 734 |             "======== 49 / 100 ========\n",
 735 |             "PREMISE: The man wanted to save money.\n",
 736 |             "CHOICE 1 ❌: He withdrew money from his savings account.\n",
 737 |             "CHOICE 2 ✅: He cut back on making frivolous purchases.\n",
 738 |             "PREDICTED: choice 2\n",
 739 |             "======== 50 / 100 ========\n",
 740 |             "PREMISE: The man became disabled.\n",
 741 |             "CHOICE 1 ✅: His family offered him financial support.\n",
 742 |             "CHOICE 2 ❌: His family cut off contact with him.\n",
 743 |             "PREDICTED: choice 2\n",
 744 |             "======== 51 / 100 ========\n",
 745 |             "PREMISE: The child disobeyed her parents.\n",
 746 |             "CHOICE 1 ❌: Her parents hugged her.\n",
 747 |             "CHOICE 2 ✅: Her parents punished her.\n",
 748 |             "PREDICTED: choice 2\n",
 749 |             "======== 52 / 100 ========\n",
 750 |             "PREMISE: The crowd gave the band a standing ovation.\n",
 751 |             "CHOICE 1 ❌: The band signed autographs.\n",
 752 |             "CHOICE 2 ✅: The band reappeared on the stage.\n",
 753 |             "PREDICTED: choice 2\n",
 754 |             "======== 53 / 100 ========\n",
 755 |             "PREMISE: I flipped the light switch up and down.\n",
 756 |             "CHOICE 1 ❌: The light faded.\n",
 757 |             "CHOICE 2 ✅: The light flickered.\n",
 758 |             "PREDICTED: choice 2\n",
 759 |             "======== 54 / 100 ========\n",
 760 |             "PREMISE: The man read the cartoon in the newspaper.\n",
 761 |             "CHOICE 1 ❌: He sipped coffee.\n",
 762 |             "CHOICE 2 ✅: He chuckled.\n",
 763 |             "PREDICTED: choice 2\n",
 764 |             "======== 55 / 100 ========\n",
 765 |             "PREMISE: The offender violated parole.\n",
 766 |             "CHOICE 1 ✅: She was sent back to jail.\n",
 767 |             "CHOICE 2 ❌: She stole money from a church.\n",
 768 |             "PREDICTED: choice 2\n",
 769 |             "======== 56 / 100 ========\n",
 770 |             "PREMISE: The man was lost.\n",
 771 |             "CHOICE 1 ✅: He asked for directions.\n",
 772 |             "CHOICE 2 ❌: He drew a map.\n",
 773 |             "PREDICTED: choice 2\n",
 774 |             "======== 57 / 100 ========\n",
 775 |             "PREMISE: The woman's eyeglasses fogged up.\n",
 776 |             "CHOICE 1 ❌: She reclined by the pool.\n",
 777 |             "CHOICE 2 ✅: She entered the sauna.\n",
 778 |             "PREDICTED: choice 2\n",
 779 |             "======== 58 / 100 ========\n",
 780 |             "PREMISE: The boy tuned the radio.\n",
 781 |             "CHOICE 1 ❌: The station was playing rock music.\n",
 782 |             "CHOICE 2 ✅: The station was coming in with static.\n",
 783 |             "PREDICTED: choice 2\n",
 784 |             "======== 59 / 100 ========\n",
 785 |             "PREMISE: I ran the ice cube under warm water.\n",
 786 |             "CHOICE 1 ❌: The ice cube stuck to my fingers.\n",
 787 |             "CHOICE 2 ✅: The ice cube vanished.\n",
 788 |             "PREDICTED: choice 2\n",
 789 |             "======== 60 / 100 ========\n",
 790 |             "PREMISE: The woman felt ashamed of scar on her face.\n",
 791 |             "CHOICE 1 ❌: She explained the scar to strangers.\n",
 792 |             "CHOICE 2 ✅: She hid the scar with makeup.\n",
 793 |             "PREDICTED: choice 2\n",
 794 |             "======== 61 / 100 ========\n",
 795 |             "PREMISE: I planted the seeds in the soil.\n",
 796 |             "CHOICE 1 ❌: I watered the soil.\n",
 797 |             "CHOICE 2 ✅: The seeds sprouted.\n",
 798 |             "PREDICTED: choice 2\n",
 799 |             "======== 62 / 100 ========\n",
 800 |             "PREMISE: The boy filled the bucket with sand.\n",
 801 |             "CHOICE 1 ❌: He was collecting seashells in the sand.\n",
 802 |             "CHOICE 2 ✅: He was building a sand castle.\n",
 803 |             "PREDICTED: choice 2\n",
 804 |             "======== 63 / 100 ========\n",
 805 |             "PREMISE: The mother scolded her daughter.\n",
 806 |             "CHOICE 1 ✅: Her daughter burped at the kitchen table.\n",
 807 |             "CHOICE 2 ❌: Her daughter set the kitchen table.\n",
 808 |             "PREDICTED: choice 2\n",
 809 |             "======== 64 / 100 ========\n",
 810 |             "PREMISE: The host of the trivia show called on the contestant for an answer.\n",
 811 |             "CHOICE 1 ❌: The contestant was in the lead.\n",
 812 |             "CHOICE 2 ✅: The contestant rang the buzzer.\n",
 813 |             "PREDICTED: choice 2\n",
 814 |             "======== 65 / 100 ========\n",
 815 |             "PREMISE: The computer crashed.\n",
 816 |             "CHOICE 1 ❌: I backed up my files.\n",
 817 |             "CHOICE 2 ✅: I downloaded a virus.\n",
 818 |             "PREDICTED: choice 2\n",
 819 |             "======== 66 / 100 ========\n",
 820 |             "PREMISE: The host cancelled the party.\n",
 821 |             "CHOICE 1 ✅: She was certain she had the flu.\n",
 822 |             "CHOICE 2 ❌: She worried she would catch the flu.\n",
 823 |             "PREDICTED: choice 2\n",
 824 |             "======== 67 / 100 ========\n",
 825 |             "PREMISE: The parents left their children with a babysitter.\n",
 826 |             "CHOICE 1 ❌: Their youngest child started attending preschool.\n",
 827 |             "CHOICE 2 ✅: They made plans to celebrate their anniversary.\n",
 828 |             "PREDICTED: choice 2\n",
 829 |             "======== 68 / 100 ========\n",
 830 |             "PREMISE: The boy mimicked his older brother.\n",
 831 |             "CHOICE 1 ✅: The boy looked up to his older brother.\n",
 832 |             "CHOICE 2 ❌: The boy wrestled with his older brother.\n",
 833 |             "PREDICTED: choice 2\n",
 834 |             "======== 69 / 100 ========\n",
 835 |             "PREMISE: The man went into denial about the tragedy.\n",
 836 |             "CHOICE 1 ❌: He got over it easily.\n",
 837 |             "CHOICE 2 ✅: He refused to talk about it.\n",
 838 |             "PREDICTED: choice 2\n",
 839 |             "======== 70 / 100 ========\n",
 840 |             "PREMISE: The inside of the train went dark.\n",
 841 |             "CHOICE 1 ✅: The train travelled through a tunnel.\n",
 842 |             "CHOICE 2 ❌: The train ran off the tracks.\n",
 843 |             "PREDICTED: choice 2\n",
 844 |             "======== 71 / 100 ========\n",
 845 |             "PREMISE: The mirror in the bathroom fogged up.\n",
 846 |             "CHOICE 1 ✅: The girl turned on the fan.\n",
 847 |             "CHOICE 2 ❌: The girl applied her makeup.\n",
 848 |             "PREDICTED: choice 2\n",
 849 |             "======== 72 / 100 ========\n",
 850 |             "PREMISE: A man cut in front of me in the long line.\n",
 851 |             "CHOICE 1 ✅: I confronted him.\n",
 852 |             "CHOICE 2 ❌: I smiled at him.\n",
 853 |             "PREDICTED: choice 2\n",
 854 |             "======== 73 / 100 ========\n",
 855 |             "PREMISE: The man broke his toe.\n",
 856 |             "CHOICE 1 ❌: He got a hole in his sock.\n",
 857 |             "CHOICE 2 ✅: He dropped a hammer on his foot.\n",
 858 |             "PREDICTED: choice 2\n",
 859 |             "======== 74 / 100 ========\n",
 860 |             "PREMISE: The man had lipstick on his cheek.\n",
 861 |             "CHOICE 1 ✅: The woman kissed him.\n",
 862 |             "CHOICE 2 ❌: The woman made him blush.\n",
 863 |             "PREDICTED: choice 2\n",
 864 |             "======== 75 / 100 ========\n",
 865 |             "PREMISE: It got dark outside.\n",
 866 |             "CHOICE 1 ❌: Snowflakes began to fall from the sky.\n",
 867 |             "CHOICE 2 ✅: The moon became visible in the sky.\n",
 868 |             "PREDICTED: choice 2\n",
 869 |             "======== 76 / 100 ========\n",
 870 |             "PREMISE: The woman fanned herself with her hand.\n",
 871 |             "CHOICE 1 ❌: She installed the air conditioner in the room.\n",
 872 |             "CHOICE 2 ✅: The air conditioner in the room broke.\n",
 873 |             "PREDICTED: choice 2\n",
 874 |             "======== 77 / 100 ========\n",
 875 |             "PREMISE: The book was deemed inappropriate for children.\n",
 876 |             "CHOICE 1 ✅: Schools banned it from its libraries.\n",
 877 |             "CHOICE 2 ❌: Teachers required students to read it.\n",
 878 |             "PREDICTED: choice 2\n",
 879 |             "======== 78 / 100 ========\n",
 880 |             "PREMISE: The scientist conducted an experiment.\n",
 881 |             "CHOICE 1 ✅: She validated her theory.\n",
 882 |             "CHOICE 2 ❌: She fabricated her data.\n",
 883 |             "PREDICTED: choice 2\n",
 884 |             "======== 79 / 100 ========\n",
 885 |             "PREMISE: My jaw dropped open.\n",
 886 |             "CHOICE 1 ❌: I was unhappy.\n",
 887 |             "CHOICE 2 ✅: I was shocked.\n",
 888 |             "PREDICTED: choice 2\n",
 889 |             "======== 80 / 100 ========\n",
 890 |             "PREMISE: The woman's date wanted to look like a gentleman.\n",
 891 |             "CHOICE 1 ✅: He opened the door for her.\n",
 892 |             "CHOICE 2 ❌: He asked her if she liked sushi.\n",
 893 |             "PREDICTED: choice 2\n",
 894 |             "======== 81 / 100 ========\n",
 895 |             "PREMISE: The service at the restaurant was slow.\n",
 896 |             "CHOICE 1 ❌: There were many empty tables.\n",
 897 |             "CHOICE 2 ✅: The restaurant was crowded.\n",
 898 |             "PREDICTED: choice 2\n",
 899 |             "======== 82 / 100 ========\n",
 900 |             "PREMISE: The competition ended.\n",
 901 |             "CHOICE 1 ❌: The teams got pumped up.\n",
 902 |             "CHOICE 2 ✅: The teams shook hands.\n",
 903 |             "PREDICTED: choice 2\n",
 904 |             "======== 83 / 100 ========\n",
 905 |             "PREMISE: The girl applied the scissors to the paper.\n",
 906 |             "CHOICE 1 ✅: The paper sliced apart.\n",
 907 |             "CHOICE 2 ❌: The paper crinkled.\n",
 908 |             "PREDICTED: choice 2\n",
 909 |             "======== 84 / 100 ========\n",
 910 |             "PREMISE: My daughter suffered an allergy attack.\n",
 911 |             "CHOICE 1 ❌: I encouraged her to take deep breaths.\n",
 912 |             "CHOICE 2 ✅: I took her to the emergency room.\n",
 913 |             "PREDICTED: choice 2\n",
 914 |             "======== 85 / 100 ========\n",
 915 |             "PREMISE: The man closed the book.\n",
 916 |             "CHOICE 1 ✅: He finished reading it.\n",
 917 |             "CHOICE 2 ❌: It provoked him to think.\n",
 918 |             "PREDICTED: choice 2\n",
 919 |             "======== 86 / 100 ========\n",
 920 |             "PREMISE: The clouds looked dark.\n",
 921 |             "CHOICE 1 ❌: I brought my laptop to work.\n",
 922 |             "CHOICE 2 ✅: I brought my umbrella to work.\n",
 923 |             "PREDICTED: choice 2\n",
 924 |             "======== 87 / 100 ========\n",
 925 |             "PREMISE: Our group's conversation gradually lulled to silence.\n",
 926 |             "CHOICE 1 ✅: I felt awkward.\n",
 927 |             "CHOICE 2 ❌: I became enraged.\n",
 928 |             "PREDICTED: choice 2\n",
 929 |             "======== 88 / 100 ========\n",
 930 |             "PREMISE: The man felt thankful to be alive.\n",
 931 |             "CHOICE 1 ✅: He was cured of cancer.\n",
 932 |             "CHOICE 2 ❌: His wife was diagnosed with cancer.\n",
 933 |             "PREDICTED: choice 2\n",
 934 |             "======== 89 / 100 ========\n",
 935 |             "PREMISE: The bully stuck his foot out in front of his classmate.\n",
 936 |             "CHOICE 1 ❌: The bully picked up his classmate.\n",
 937 |             "CHOICE 2 ✅: The bully's classmate tripped.\n",
 938 |             "PREDICTED: choice 2\n",
 939 |             "======== 90 / 100 ========\n",
 940 |             "PREMISE: The teenager ran away from home.\n",
 941 |             "CHOICE 1 ❌: He relied on his parents.\n",
 942 |             "CHOICE 2 ✅: His parents abused him.\n",
 943 |             "PREDICTED: choice 2\n",
 944 |             "======== 91 / 100 ========\n",
 945 |             "PREMISE: The waistband on my pants was loose.\n",
 946 |             "CHOICE 1 ✅: I put on a belt.\n",
 947 |             "CHOICE 2 ❌: I put on a hat.\n",
 948 |             "PREDICTED: choice 2\n",
 949 |             "======== 92 / 100 ========\n",
 950 |             "PREMISE: The woman wanted to be a doctor.\n",
 951 |             "CHOICE 1 ❌: She visited the hospital.\n",
 952 |             "CHOICE 2 ✅: She went to medical school.\n",
 953 |             "PREDICTED: choice 2\n",
 954 |             "======== 93 / 100 ========\n",
 955 |             "PREMISE: The man's eye became infected.\n",
 956 |             "CHOICE 1 ✅: He went blind.\n",
 957 |             "CHOICE 2 ❌: He put on glasses.\n",
 958 |             "PREDICTED: choice 2\n",
 959 |             "======== 94 / 100 ========\n",
 960 |             "PREMISE: The climbers reached the peak of the mountain.\n",
 961 |             "CHOICE 1 ❌: They encountered an avalanche.\n",
 962 |             "CHOICE 2 ✅: They congratulated each other.\n",
 963 |             "PREDICTED: choice 2\n",
 964 |             "======== 95 / 100 ========\n",
 965 |             "PREMISE: The woman had a religious awakening.\n",
 966 |             "CHOICE 1 ✅: She began going to church.\n",
 967 |             "CHOICE 2 ❌: She began travelling abroad.\n",
 968 |             "PREDICTED: choice 2\n",
 969 |             "======== 96 / 100 ========\n",
 970 |             "PREMISE: I approached the man.\n",
 971 |             "CHOICE 1 ❌: He looked busy.\n",
 972 |             "CHOICE 2 ✅: He looked familiar.\n",
 973 |             "PREDICTED: choice 2\n",
 974 |             "======== 97 / 100 ========\n",
 975 |             "PREMISE: The print on the brochure was tiny.\n",
 976 |             "CHOICE 1 ✅: The man put his glasses on.\n",
 977 |             "CHOICE 2 ❌: The man retrieved a pen from his pocket.\n",
 978 |             "PREDICTED: choice 2\n",
 979 |             "======== 98 / 100 ========\n",
 980 |             "PREMISE: The bartender refused to serve the patron.\n",
 981 |             "CHOICE 1 ❌: The patron was alone.\n",
 982 |             "CHOICE 2 ✅: The patron was drunk.\n",
 983 |             "PREDICTED: choice 2\n",
 984 |             "======== 99 / 100 ========\n",
 985 |             "PREMISE: The man got a discount on his groceries.\n",
 986 |             "CHOICE 1 ❌: He greeted the cashier.\n",
 987 |             "CHOICE 2 ✅: He used a coupon.\n",
 988 |             "PREDICTED: choice 2\n",
 989 |             "======== 100 / 100 ========\n",
 990 |             "PREMISE: The guests of the party hid behind the couch.\n",
 991 |             "CHOICE 1 ✅: It was a surprise party.\n",
 992 |             "CHOICE 2 ❌: It was a birthday party.\n",
 993 |             "PREDICTED: choice 2\n",
 994 |             "Accuracy of your prompt on 100 test examples: 55%\n"
 995 |           ]
 996 |         }
 997 |       ],
 998 |       "source": [
 999 |         "def classify_baseline(premise: str, choice1: str, choice2:str) -> str:\n",
1000 |         "  \"\"\" Given a review, returns a sentiment prediction, 0 for negative, 1 for positive.\"\"\"\n",
1001 |         "\n",
1002 |         "  eval_template = \"\"\"Which of the following makes more sense?\n",
1003 |         "\n",
1004 |         "  Choice 1: {premise} This happened because: {choice1}\n",
1005 |         "  Choice 2: {premise} This happened because: {choice2}\n",
1006 |         "\n",
1007 |         "  {label} makes more sense.\n",
1008 |         "  \"\"\"\n",
1009 |         "  label_map = {0: \"Choice 1\", 1: \"Choice 2\"}\n",
1010 |         "\n",
1011 |         "  label_to_score = {}\n",
1012 |         "  for label, label_str in label_map.items():\n",
1013 |         "    label_prompt = prompt + eval_template.format(\n",
1014 |         "        premise=premise, choice1=choice1, choice2=choice2, label=label_str)\n",
1015 |         "    _, score = engine.score(label_prompt)\n",
1016 |         "    llm_score_for_label = np.mean(score)\n",
1017 |         "\n",
1018 |         "    label_to_score[label] = llm_score_for_label\n",
1019 |         "\n",
1020 |         "  return max(label_to_score, key=label_to_score.get)\n",
1021 |         "\n",
1022 |         "\n",
1023 |         "def evaluate(dataset, verbose: bool=False) -> float:\n",
1024 |         "  \"\"\" Evaluate your prompt on the test set \"\"\"\n",
1025 |         "  correct = []\n",
1026 |         "  for i, instance in enumerate(dataset):\n",
1027 |         "    label = instance[\"label\"]\n",
1028 |         "    predicted = classify_baseline(\n",
1029 |         "        instance[\"premise\"], instance[\"choice1\"], instance[\"choice2\"])\n",
1030 |         "    correct.append(1 if label == predicted else 0)\n",
1031 |         "\n",
1032 |         "    if verbose:\n",
1033 |         "      print(f\"======== {i+1} / {len(dataset)} ========\")\n",
1034 |         "      print(f\"PREMISE: {instance['premise']}\")\n",
1035 |         "      print(f\"CHOICE 1 {'✅' if not label else '❌'}: {instance['choice1']}\")\n",
1036 |         "      print(f\"CHOICE 2 {'✅' if label else '❌'}: {instance['choice2']}\")\n",
1037 |         "      print(f\"PREDICTED: {'choice 2' if predicted else 'choice 1'}\")\n",
1038 |         "\n",
1039 |         "  acc = sum(correct) / len(correct)\n",
1040 |         "  return acc\n",
1041 |         "\n",
1042 |         "#  Once you have chosen your prompts, for final evaluation, replace dev_data\n",
1043 |         "# with test_data.\n",
1044 |         "acc = evaluate(dev_data, verbose=True)\n",
1045 |         "print(f\"Accuracy of your prompt on {len(test_data)} test examples: {acc:.0%}\")"
1046 |       ]
1047 |     }
1048 |   ],
1049 |   "metadata": {
1050 |     "colab": {
1051 |       "provenance": [],
1052 |       "toc_visible": true
1053 |     },
1054 |     "kernelspec": {
1055 |       "display_name": "Python 3",
1056 |       "name": "python3"
1057 |     },
1058 |     "language_info": {
1059 |       "name": "python"
1060 |     }
1061 |   },
1062 |   "nbformat": 4,
1063 |   "nbformat_minor": 0
1064 | }
1065 | 


--------------------------------------------------------------------------------
/homework_materials/hw1_template.tex:
--------------------------------------------------------------------------------
 1 | %--------------------
 2 | % Packages
 3 | % -------------------
 4 | \documentclass[11pt,a4paper]{article}
 5 | \usepackage[utf8x]{inputenc}
 6 | \usepackage[T1]{fontenc}
 7 | %\usepackage{gentium}
 8 | \usepackage{mathptmx} % Use Times Font
 9 | 
10 | \usepackage[pdftex]{graphicx} % Required for including pictures
11 | \usepackage[swedish]{babel} % Swedish translations
12 | \usepackage[pdftex,linkcolor=black,pdfborder={0 0 0}]{hyperref} % Format links for pdf
13 | \usepackage{calc} % To reset the counter in the document after title page
14 | \usepackage{enumitem} % Includes lists
15 | \frenchspacing % No double spacing between sentences
16 | \linespread{1.2} % Set linespace
17 | \usepackage[a4paper, lmargin=0.1666\paperwidth, rmargin=0.1666\paperwidth, tmargin=0.1111\paperheight, bmargin=0.1111\paperheight]{geometry} %margins
18 | %\usepackage{parskip}
19 | 
20 | \usepackage[all]{nowidow} % Tries to remove widows
21 | \usepackage[protrusion=true,expansion=true]{microtype} % Improves typography, load after fontpackage is selected
22 | \usepackage[mmddyyyy]{datetime}
23 | \usepackage{tcolorbox}
24 | 
25 | 
26 | %-----------------------
27 | % Set pdf information and add title, fill in the fields
28 | %-----------------------
29 | \hypersetup{ 	
30 | pdfsubject = {},
31 | pdftitle = {11-667 Homework 1},
32 | pdfauthor = {}
33 | }
34 | 
35 | %-----------------------
36 | % Begin document
37 | %-----------------------
38 | \begin{document} 
39 | 
40 | \title{11-667 Homework 1}
41 | \author{Student Name, Andrew ID}
42 | \date{Due date: September 21, 2023 at 2 PM}
43 | \maketitle
44 | 
45 | \section{Observing the Impact of Decoding Strategy}
46 | \subsection{Rolling a Twenty-Sided Die}
47 | \subsection{Longform Generation}
48 | 
49 | Here is a suggestion for how you can add generations into your latex.
50 | \begin{tcolorbox}[width=\textwidth,title={Prompt=Lorem ipsum, top-p=0},]
51 | dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
52 | \end{tcolorbox}
53 | 
54 | \section{Measuring Perplexity}
55 | 
56 | \section{Experimenting with Few-Shot Prompting}
57 | 
58 | \section{Investigating Knowledge Across Different Model Sizes}
59 | \subsection{Few-Shot Learning for the Choice of Plausible Alternatives Task}
60 | \subsection{Few-shot Learning for Generation Tasks}
61 | 
62 | \section{Comparing Pre-Trained and Fine-tuned Models}
63 | 
64 | \section{Acknowledgment of AI Tools}
65 | 
66 | \section{Optional Question: Give us Feedback}
67 | 
68 | 
69 | 
70 | \end{document}
71 | 


--------------------------------------------------------------------------------
/homework_materials/hw2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/homework_materials/hw2.pdf


--------------------------------------------------------------------------------
/homework_materials/hw2_latex_template.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/homework_materials/hw2_latex_template.zip


--------------------------------------------------------------------------------
/homework_materials/hw2_starter_code.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/homework_materials/hw2_starter_code.zip


--------------------------------------------------------------------------------
/homework_materials/hw3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/homework_materials/hw3.pdf


--------------------------------------------------------------------------------
/homework_materials/hw3_latex_template.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/homework_materials/hw3_latex_template.zip


--------------------------------------------------------------------------------
/homework_materials/hw3_starter_code.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/homework_materials/hw3_starter_code.zip


--------------------------------------------------------------------------------
/homework_materials/project_midpoint_template.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/homework_materials/project_midpoint_template.zip


--------------------------------------------------------------------------------
/homework_materials/project_proposal_template.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-llms-class/cmu-llm-class-website-2023/154bf5d681cdaefab070bcc5b1afd859c5c090ff/homework_materials/project_proposal_template.zip


--------------------------------------------------------------------------------
/project.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: page
  3 | title: Project
  4 | description: >-
  5 |     Instructions Course Project
  6 | ---
  7 | 
  8 | # 11-667 Projects
  9 | {:.no_toc}
 10 | 
 11 | ## Team Assignments 
 12 | Each group has been assigned to a project TA. Please check Piazza for the TA assignments. The role of your TA is to guide and advise your projects, so please reach out to your assigned project TA first if you have questions about your approach, or if you have any issues with the project in general.
 13 | 
 14 | You can meet with your TA as much or as little as you want, but there are two **mandatory** meetings with your TA to make sure that your group is on track:
 15 | 
 16 | - Week of Oct 2: TA matching announced. Your TA will reach out to you in order to schedule an introductory meeting before Fri Oct 13.
 17 | - Oct 31: Midterm project meetings. During class, TAs and instructors will talk to each group one-on-one to discuss the project. Please note that there will be no formal in-class presentation.
 18 | 
 19 | ## Logistics:
 20 | Team: Form a team of 3-5 people. Four is recommended. 
 21 | 
 22 | ## Final Deliverables Instructions
 23 | You have three deliverables in the final weeks of class.
 24 | 
 25 | * **Due December 7, 2-3:30 PM**: In class poster session in GHC 7107.
 26 |   * The library has some [good resources](https://www.cmu.edu/student-success/other-resources/resource-descriptions/science-research-posters.html) on designing an effective poster.
 27 |   * **VERY IMPORTANT**: You can print your poster for free following [this link](https://computing.cs.cmu.edu/desktop/printing-posters), but you should expect posters to take 2-3 days to print, so make sure to submit your poster early enough that it is ready! We will not accept excuses for posters not ready in time.
 28 |   * During the poster session, the instructors and TAs will visit your poster and ask you to go through it. They will also ask questions about your work.
 29 | * **Due December 8, at 8 PM**: Deadline for submitting final report.
 30 |   * You should write your project report following the conventions of an 8-page ACL paper. Experimental details which do not fit within 8 pages may be included in an Appendix.
 31 |   * Grading: Your report will be graded by TAs instructors OTHER than your assigned mentor. They will grade following the [ARR reviewer rubric](https://aclrollingreview.org/reviewform). Like an ACL paper, your report should be self-contained and comprehensible to someone unfamiliar with your project.
 32 | * **Final peer feedback**
 33 |   * **Due December 11 at 8 PM**: At the poster session, choose five posters to fill out the [following form](https://forms.gle/tqWiBjJ7A7NBVW1S7) over.
 34 | 
 35 | If you have been given permission to not attend the final poster sessions (for example, if you will be presenting at EMNLP), please schedule a meeting with Daphne and Chenyan sometime December 4-14 to present your poster to us. This will be an individual meeting. Your team will still be expected to show up to the in-person poster session on December 7.
 36 | 
 37 | ### A note on negative results
 38 | 
 39 | Sometimes research doesn’t work out. It is fine to have negative results in your report so long as you discuss *why* your hypothesis ended up being false, using experiments to back your explanation.
 40 | 
 41 | 
 42 | ## Midpoint Instructions
 43 | Your report should be in the style of an ACL submission. You may (but are not required to use the template [linked here](https://github.com/daphnei/cmu-llm-class/raw/main/homework_materials/project_midpoint_template.zip). Where relevant, you may copy text from your proposal. We will evalute youe midpoint report on:
 44 | 
 45 | 1. Have you made progress toward the stated goals in your proposal? You should have run at least one set of experiments by now.
 46 | 2. Do you have a plan for in place for completing your project by the end of the semester?
 47 | 3. Is your midpoint report well-written in the style of an ACL paper?
 48 | 
 49 | You should also prepare slides for a 3-minute in-class presentation. While your report is due on October 31 at 2 PM, we need your slides by October 30 at 8 PM so we have time to assemble them into a single slide deck.
 50 | 
 51 | ## Proposal Instructions
 52 | Please use the template [linked here](https://github.com/daphnei/cmu-llm-class/raw/main/homework_materials/project_proposal_template.zip) for the project proposal. Your proposal should have all the sections shown on this template. It should be no more than 2 pages. You should also plan a 2.5 minute presenation of the idea (3 slides maximum) to present in class.
 53 | 
 54 | If you would like feedback on your proposed idea, submit the [request for feedback form](https://forms.gle/182Trs8zddrjhv6B7) by end of day September 21st, and the TAs will be in touch.
 55 | 
 56 | ## Checkpoints: 
 57 | - Sep 12: Project is out. Start forming teams.
 58 | - Sep 21: Last day to request a quick check of your project ideas with TAs, with one passage maximum to describe your idea. Finalize teams (if you need help with team making, also contact TAs).
 59 | - Sep 26: Two-page project proposal write up due, in ACL format. Spotlight slide deck due (3 slide maximum).
 60 | - Sep 28: In-class project proposals. 2.5 min spotlight style presentations.
 61 |   - Peer scoring: each of you rates other project presentations in 1-5 likert scale (5 being you love the presentation). The scoring is based on the presentation not the project proposal content.
 62 |   - Peer feedback: each of you gives short suggestions to at least three other teams.
 63 | - Sep 30: Projects are matched with Instructor/TA mentors.
 64 | - Oct 30: Submit your slides by 8 PM on this date. **No late slide submissions allowed.**
 65 | - Oct 31: Midpoint progress report due.
 66 |   - Four pages of write up in ACL format.
 67 |   - Three minute presentation in class highlighting your progress.
 68 | - Oct 30-Nov 4: Schedule a midpoint check-in meeting with your assigned team mentor.
 69 | - Dec 7: Final Project Presentation poster session.
 70 | - Dec 8: Final project due. Eight pages of write up in ACL format (besides unlimited references and appendix). Appendices are allowed but may not be considered in grading. An additional page after the appendix stating the specific contribution of each team member, with acknowledgement of all team members, is mandatory.
 71 | 
 72 | ## Project Directions:
 73 | The final project is expected to be a novel contribution, either to fundamental research on language models  or to downstream applications of LLM technology. For a top grade, you must demonstrate a significant amount of technical sophistication in your project. If you have an idea and you aren’t sure if it is of sufficient scope, you are welcome to come to office hours to pitch it to us ahead of the project proposal deadline.
 74 | 
 75 | ## Grading Rubric: 
 76 | - The effort on the project of each individual team member [20%, different to each member]
 77 | - The completeness of the project [20%]
 78 | - Project proposal report [5%]
 79 | - Project midterm report [10%]
 80 | - Project final report write-up [30%]
 81 | - Project spotlight, Midterm presentation and final presentation [5% each for a total of 15%]
 82 | - Bonus: the potential impact of the project idea when fully flushed out. [+10 extra %]
 83 | 
 84 | ## Grades
 85 | A+: Exceptional or surprising. Could be submitted largely as-is as a paper to an academic conference. A new application or study or improvements, etc., of LLMs that expands very significantly on existing literature and is of benefit to the broader community.
 86 | 
 87 | A: A respectable research contribution that is novel and effective, and could be submitted largely as-is as a paper to an academic conference. Or an application with multiple well-thought out features which aren’t present in existing applications. Or a study of existing LLMs that revealed novel insights/concerns/understandings that were previously unknown to the community.
 88 | 
 89 | A-: A respectable research contribution that has some small incomplete parts, but is largely complete and promising. Or an application which contains promising, well-thought out features but has usability issues or insufficient technical scope.
 90 | B+: An idea that is novel, but the results may not be there yet, or the analysis is short.
 91 | 
 92 | B or B-: Results, analysis, or novelty are lacking.
 93 | 
 94 | C+ or below: Clear lack of effort or incompleteness.
 95 | 
 96 | Negative Results: Sometimes experiments don’t work as planned. If you try hard to get positive results but are not successful, you may still get a good grade by clearly describing why you thought your methods would work, and then performing an analysis of why your initial assumptions were incorrect, leading to results that did not match your initial expectations. The bar for paper writing, experimentation, and analysis will be a bit higher in these cases, as we want to make sure that you really made a serious effort.
 97 | 
 98 | ### Example project ideas:
 99 | We will gradually share example project ideas on this doc: 
100 | [project ideas](https://docs.google.com/document/d/1X-omaxkP9VAFtLRu7QoZEBait2ZDc827FyqwMZC_6R4/edit?usp=sharing). 
101 | 
102 | These ideas are for reference. 
103 | You can choose one of these example ideas (we do not guarantee any of these ideas will work out, same with all research ideas) or build something of your own.
104 | 


--------------------------------------------------------------------------------
/project_peer_feedback.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Project Peer Feedback
 4 | description: Instructions for how to submit peer feedback.
 5 | ---
 6 | 
 7 | ## Please use the folowing form to submit peer review.
 8 | You can see the teams you have been assigned to review at the spreadsheet linked in the form.
 9 | 
10 | <iframe src="https://docs.google.com/forms/d/e/1FAIpQLSf_mExmjG5Q8NlA_hBmvCm3tyLooU4arj6Fs0YedtpDffDfkQ/viewform?embedded=true" width="640" height="1710" frameborder="0" marginheight="0" marginwidth="0">Loading…</iframe>
11 | 
12 | 


--------------------------------------------------------------------------------
/quiz.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Quiz Instructions
 4 | description: Instructions for how to complete the quiz.
 5 | ---
 6 | 
 7 | ## If you are enrolled in the class, please complete the quiz on [Canvas](https://canvas.cmu.edu/courses/36424/quizzes/).
 8 | 
 9 | ## If you are on the waitlist, use the following form.
10 | 
11 | <iframe src="https://docs.google.com/forms/d/e/1FAIpQLScQ7W0nKcfG-vycQqYtYOEuzFjQH7JpF011A1pEvcsIlPPbHw/viewform?embedded=true" width="640" height="881" frameborder="0" marginheight="0" marginwidth="0">Loading…</iframe>


--------------------------------------------------------------------------------
/schedule.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Weekly Schedule
 4 | description: The weekly event schedule.
 5 | ---
 6 | 
 7 | # Weekly Schedule
 8 | 
 9 | {% for schedule in site.schedules %}
10 | {{ schedule }}
11 | {% endfor %}
12 | 


--------------------------------------------------------------------------------
/staff.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Staff
 4 | description: A listing of all the course staff members.
 5 | ---
 6 | 
 7 | # Staff
 8 | 
 9 | 
10 | For questions about course material or logistics, please use [Piazza]().
11 | For questions specific to you (for example, inquiring about a homework grade), please email the alias [llms-11-667@andrew.cmu.edu](mailto:llms-11-667@andrew.cmu.edu).
12 | 
13 | ## Instructors
14 | 
15 | {% assign instructors = site.staffers | where: 'role', 'Instructor' %}
16 | {% for staffer in instructors %}
17 | {{ staffer }}
18 | {% endfor %}
19 | 
20 | {% assign teaching_assistants = site.staffers | where: 'role', 'Teaching Assistant' %}
21 | {% assign num_teaching_assistants = teaching_assistants | size %}
22 | {% if num_teaching_assistants != 0 %}
23 | ## Teaching Assistants
24 | 
25 | {% for staffer in teaching_assistants %}
26 | {{ staffer }}
27 | {% endfor %}
28 | {% endif %}
29 | 


--------------------------------------------------------------------------------