├── .gitignore ├── LangChain Cookbook Part 1 - Fundamentals.ipynb ├── LangChain Cookbook Part 2 - Use Cases.ipynb ├── README.md ├── SUMMARY.md ├── agents ├── Agents + ZapierToolkit.ipynb └── Agents.ipynb ├── bots └── Twitter_Reply_Bot │ └── Twitter Reply Bot Notebook.ipynb ├── chains └── Chain Types.ipynb ├── chatapi └── ChatAPI + LangChain Basics.ipynb ├── data ├── ExplainThisBobScreenshot2.png ├── IntoThinAirBook.pdf ├── LinkedInIndustries.csv ├── LinkedInSubIndustries.csv ├── PaulGrahamEssayMedium │ ├── .DS_Store │ ├── fr.txt │ ├── guidetoinvestors.txt │ ├── mit.txt │ ├── notnot.txt │ ├── popular.txt │ ├── re.txt │ ├── road.txt │ ├── start.txt │ ├── startupfunding.txt │ ├── startupideas.txt │ ├── wealth.txt │ └── worked.txt ├── PaulGrahamEssaySmall │ ├── .DS_Store │ ├── cred.txt │ ├── disc.txt │ ├── fix.txt │ ├── fp.txt │ ├── getideas.txt │ ├── lwba.txt │ ├── nft.txt │ ├── noob.txt │ ├── nov.txt │ ├── pow.txt │ ├── prop62.txt │ ├── rootsoflisp.txt │ ├── rss.txt │ ├── todo.txt │ └── twitter.txt ├── PaulGrahamEssays │ ├── .DS_Store │ ├── 13sentences.txt │ ├── 5founders.txt │ ├── 6631327.txt │ ├── 95.txt │ ├── ace.txt │ ├── addiction.txt │ ├── airbnb.txt │ ├── airbnbs.txt │ ├── alien.txt │ ├── altair.txt │ ├── ambitious.txt │ ├── america.txt │ ├── angelinvesting.txt │ ├── aord.txt │ ├── apple.txt │ ├── artistsship.txt │ ├── avg.txt │ ├── badeconomy.txt │ ├── before.txt │ ├── better.txt │ ├── bias.txt │ ├── boss.txt │ ├── bronze.txt │ ├── bubble.txt │ ├── charisma.txt │ ├── cities.txt │ ├── college.txt │ ├── colleges.txt │ ├── conformism.txt │ ├── control.txt │ ├── convergence.txt │ ├── convince.txt │ ├── copy.txt │ ├── corpdev.txt │ ├── cred.txt │ ├── credentials.txt │ ├── desres.txt │ ├── determination.txt │ ├── die.txt │ ├── diff.txt │ ├── disagree.txt │ ├── disc.txt │ ├── discover.txt │ ├── distraction.txt │ ├── divergence.txt │ ├── donate.txt │ ├── ds.txt │ ├── early.txt │ ├── earnest.txt │ ├── ecw.txt │ ├── equity.txt │ ├── essay.txt │ ├── ffb.txt │ ├── fh.txt │ ├── fix.txt │ ├── fn.txt │ ├── founders.txt │ ├── foundersatwork.txt │ ├── foundervisa.txt │ ├── fp.txt │ ├── fr.txt │ ├── fundraising.txt │ ├── future.txt │ ├── gap.txt │ ├── gba.txt │ ├── genius.txt │ ├── getideas.txt │ ├── gh.txt │ ├── good.txt │ ├── goodart.txt │ ├── goodtaste.txt │ ├── googles.txt │ ├── growth.txt │ ├── guidetoinvestors.txt │ ├── hackernews.txt │ ├── head.txt │ ├── herd.txt │ ├── heresy.txt │ ├── heroes.txt │ ├── highres.txt │ ├── hiresfund.txt │ ├── hiring.txt │ ├── hp.txt │ ├── hs.txt │ ├── hubs.txt │ ├── hundred.txt │ ├── hw.txt │ ├── hwh.txt │ ├── icad.txt │ ├── ideas.txt │ ├── identity.txt │ ├── iflisp.txt │ ├── ineq.txt │ ├── inequality.txt │ ├── investors.txt │ ├── invtrend.txt │ ├── island.txt │ ├── javacover.txt │ ├── jessica.txt │ ├── judgement.txt │ ├── kate.txt │ ├── kids.txt │ ├── know.txt │ ├── ladder.txt │ ├── langdes.txt │ ├── laundry.txt │ ├── lesson.txt │ ├── lies.txt │ ├── love.txt │ ├── lwba.txt │ ├── mac.txt │ ├── makersschedule.txt │ ├── marginal.txt │ ├── maybe.txt │ ├── mean.txt │ ├── microsoft.txt │ ├── mit.txt │ ├── mod.txt │ ├── name.txt │ ├── nerds.txt │ ├── newideas.txt │ ├── newthings.txt │ ├── nft.txt │ ├── noob.txt │ ├── noop.txt │ ├── notnot.txt │ ├── nov.txt │ ├── nthings.txt │ ├── opensource.txt │ ├── organic.txt │ ├── orth.txt │ ├── own.txt │ ├── patentpledge.txt │ ├── pgh.txt │ ├── philosophy.txt │ ├── pinch.txt │ ├── polls.txt │ ├── popular.txt │ ├── pow.txt │ ├── power.txt │ ├── prcmc.txt │ ├── procrastination.txt │ ├── progbot.txt │ ├── prop62.txt │ ├── property.txt │ ├── publishing.txt │ ├── pypar.txt │ ├── ramenprofitable.txt │ ├── randomness.txt │ ├── re.txt │ ├── read.txt │ ├── real.txt │ ├── really.txt │ ├── relres.txt │ ├── revolution.txt │ ├── richnow.txt │ ├── road.txt │ ├── ronco.txt │ ├── rootsoflisp.txt │ ├── rss.txt │ ├── safe.txt │ ├── say.txt │ ├── schlep.txt │ ├── seesv.txt │ ├── segway.txt │ ├── selfindulgence.txt │ ├── sfp.txt │ ├── siliconvalley.txt │ ├── simply.txt │ ├── smart.txt │ ├── softwarepatents.txt │ ├── spam.txt │ ├── speak.txt │ ├── start.txt │ ├── startupfunding.txt │ ├── startuphubs.txt │ ├── startupideas.txt │ ├── startuplessons.txt │ ├── startupmistakes.txt │ ├── stuff.txt │ ├── submarine.txt │ ├── sun.txt │ ├── superangels.txt │ ├── swan.txt │ ├── tablets.txt │ ├── talk.txt │ ├── taste.txt │ ├── think.txt │ ├── todo.txt │ ├── top.txt │ ├── trolls.txt │ ├── twitter.txt │ ├── unions.txt │ ├── usa.txt │ ├── useful.txt │ ├── users.txt │ ├── vb.txt │ ├── vcsqueeze.txt │ ├── venturecapital.txt │ ├── vw.txt │ ├── want.txt │ ├── wealth.txt │ ├── web20.txt │ ├── webstartups.txt │ ├── weird.txt │ ├── whyyc.txt │ ├── wisdom.txt │ ├── word.txt │ ├── words.txt │ ├── work.txt │ ├── worked.txt │ ├── writing44.txt │ ├── wtax.txt │ ├── yahoo.txt │ ├── ycombinator.txt │ └── ycstart.txt ├── PaulGrahamEssaysLarge │ ├── addiction.txt │ ├── aord.txt │ ├── apple.txt │ ├── avg.txt │ ├── before.txt │ ├── bias.txt │ ├── boss.txt │ ├── copy.txt │ ├── corpdev.txt │ ├── desres.txt │ ├── diff.txt │ ├── ecw.txt │ ├── founders.txt │ ├── foundervisa.txt │ ├── gap.txt │ ├── gba.txt │ ├── gh.txt │ ├── goodtaste.txt │ ├── hubs.txt │ ├── iflisp.txt │ ├── island.txt │ ├── know.txt │ ├── langdes.txt │ ├── laundry.txt │ ├── love.txt │ ├── mod.txt │ ├── newideas.txt │ ├── nft.txt │ ├── philosophy.txt │ ├── popular.txt │ ├── pow.txt │ ├── rootsoflisp.txt │ ├── rss.txt │ ├── siliconvalley.txt │ ├── startuplessons.txt │ ├── submarine.txt │ ├── sun.txt │ ├── superangels.txt │ ├── todo.txt │ ├── unions.txt │ ├── useful.txt │ ├── vb.txt │ ├── vcsqueeze.txt │ ├── vw.txt │ ├── want.txt │ ├── web20.txt │ ├── weird.txt │ ├── wisdom.txt │ └── worked.txt ├── San_Francisco_Trees.csv ├── San_Francisco_Trees.db ├── Transcripts │ ├── MFMPod │ │ ├── mfm_pod_alex.txt │ │ ├── mfm_pod_rob.txt │ │ └── mfm_pod_steph.txt │ └── acme_co_v2.txt ├── WildBelle1.png ├── field-guide-to-data-science.pdf ├── gpt4_cost_2023_4_11.png ├── images │ └── TooEasy.png ├── matching_tone_samples.json ├── muir_lake_tahoe_in_winter.txt ├── question_a_book_audio.mp3 ├── state_of_the_union.txt └── thefuzz │ ├── .editorconfig │ ├── .github │ └── workflows │ │ └── ci.yml │ ├── .gitignore │ ├── .travis.yml │ ├── CHANGES.rst │ ├── LICENSE.txt │ ├── MANIFEST.in │ ├── README.rst │ ├── benchmarks.py │ ├── data │ └── titledata.csv │ ├── release │ ├── setup.py │ ├── test_thefuzz.py │ ├── test_thefuzz_hypothesis.py │ ├── test_thefuzz_pytest.py │ ├── thefuzz │ ├── StringMatcher.py │ ├── StringMatcher.pyi │ ├── __init__.py │ ├── fuzz.py │ ├── fuzz.pyi │ ├── process.py │ ├── process.pyi │ ├── string_processing.py │ ├── string_processing.pyi │ ├── utils.py │ └── utils.pyi │ └── tox.ini ├── data_generation ├── 5 Levels Of Summarization - Novice To Expert.ipynb ├── Advanced Retrieval With LangChain.ipynb ├── Ask A Book Questions.ipynb ├── Clean and Standardize Data.ipynb ├── Custom Files Question & Answer.ipynb ├── Expert Structured Output (Using Function Calling).ipynb ├── Expert Structured Output (Using Kor).ipynb ├── Exploring ChatGPT Function Calling.ipynb ├── Instructing LLMs To Match Tone.ipynb ├── Personalized Email Generation.ipynb ├── Retrieval_With_MMR.ipynb ├── Topic Modeling With Language Models.ipynb ├── Using LLMs To Summarize Personal Research.ipynb └── Working With Call or Video Transcripts.ipynb ├── getting_started └── Quickstart Guide.ipynb ├── loaders ├── Google Drive Loader.ipynb └── YouTube Loader.ipynb ├── requirements.txt └── tutorials ├── Google Drive Loader.ipynb ├── Twitter_Reply_Bot └── Twitter Reply Bot Notebook.ipynb └── YouTube Loader.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | **/.DS_STORE -------------------------------------------------------------------------------- /SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Table of contents 2 | 3 | * [Learn LangChain](README.md) 4 | -------------------------------------------------------------------------------- /data/ExplainThisBobScreenshot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkamradt/langchain-tutorials/697c4de4f6c655ea3aa16ea0de324da157398557/data/ExplainThisBobScreenshot2.png -------------------------------------------------------------------------------- /data/IntoThinAirBook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkamradt/langchain-tutorials/697c4de4f6c655ea3aa16ea0de324da157398557/data/IntoThinAirBook.pdf -------------------------------------------------------------------------------- /data/LinkedInIndustries.csv: -------------------------------------------------------------------------------- 1 | Industry 2 | Corporate Services 3 | Recreation & Travel 4 | Legal 5 | Wellness & Fitness 6 | Entertainment 7 | Consumer Goods 8 | Design 9 | Arts 10 | Manufacturing 11 | Finance 12 | Health Care 13 | Construction 14 | Nonprofit 15 | Real Estate 16 | Software & IT Services 17 | Hardware & Networking 18 | Agriculture 19 | Education 20 | Public Administration 21 | Transportation & Logistics 22 | Public Safety 23 | Media & Communications 24 | Energy & Mining 25 | Retail -------------------------------------------------------------------------------- /data/PaulGrahamEssayMedium/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkamradt/langchain-tutorials/697c4de4f6c655ea3aa16ea0de324da157398557/data/PaulGrahamEssayMedium/.DS_Store -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkamradt/langchain-tutorials/697c4de4f6c655ea3aa16ea0de324da157398557/data/PaulGrahamEssaySmall/.DS_Store -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/cred.txt: -------------------------------------------------------------------------------- 1 | April 2020I recently saw a 2 | video 3 | of TV journalists and politicians confidently 4 | saying that the coronavirus would be no worse than the flu. What 5 | struck me about it was not just how mistaken they seemed, but how 6 | daring. How could they feel safe saying such things?The answer, I realized, is that they didn't think they could get 7 | caught. They didn't realize there was any danger in making false 8 | predictions. These people constantly make false predictions, and 9 | get away with it, because the things they make predictions about 10 | either have mushy enough outcomes that they can bluster their way 11 | out of trouble, or happen so far in the future that few remember 12 | what they said.An epidemic is different. It falsifies your predictions rapidly and 13 | unequivocally.But epidemics are rare enough that these people clearly 14 | didn't realize this was even a possibility. Instead they just 15 | continued to use their ordinary m.o., which, as the epidemic has 16 | made clear, is to talk confidently about things they don't 17 | understand.An event like this is thus a uniquely powerful way of taking people's 18 | measure. As Warren Buffett said, "It's only when the tide goes out 19 | that you learn who's been swimming naked." And the tide has just 20 | gone out like never before.Now that we've seen the results, let's remember what we saw, because 21 | this is the most accurate test of credibility we're ever likely to have. I hope. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/disc.txt: -------------------------------------------------------------------------------- 1 | January 2017Because biographies of famous scientists tend to 2 | edit out their mistakes, we underestimate the 3 | degree of risk they were willing to take. 4 | And because anything a famous scientist did that 5 | wasn't a mistake has probably now become the 6 | conventional wisdom, those choices don't 7 | seem risky either.Biographies of Newton, for example, understandably focus 8 | more on physics than alchemy or theology. 9 | The impression we get is that his unerring judgment 10 | led him straight to truths no one else had noticed. 11 | How to explain all the time he spent on alchemy 12 | and theology? Well, smart people are often kind of 13 | crazy.But maybe there is a simpler explanation. Maybe 14 | the smartness and the craziness were not as separate 15 | as we think. Physics seems to us a promising thing 16 | to work on, and alchemy and theology obvious wastes 17 | of time. But that's because we know how things 18 | turned out. In Newton's day the three problems 19 | seemed roughly equally promising. No one knew yet 20 | what the payoff would be for inventing what we 21 | now call physics; if they had, more people would 22 | have been working on it. And alchemy and theology 23 | were still then in the category Marc Andreessen would 24 | describe as "huge, if true."Newton made three bets. One of them worked. But 25 | they were all risky. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/fix.txt: -------------------------------------------------------------------------------- 1 | 2 | Kevin Kelleher suggested an interesting way to compare programming 3 | languages: to describe each in terms of the problem it 4 | fixes. The surprising thing is how many, and how well, languages can be 5 | described this way. 6 | 7 | Algol: Assembly language is too low-level.Pascal: Algol doesn't have enough data types.Modula: Pascal is too wimpy for systems programming. 8 | Simula: Algol isn't good enough at simulations.Smalltalk: Not everything in Simula is an object.Fortran: Assembly language is too low-level.Cobol: Fortran is scary.PL/1: Fortran doesn't have enough data types.Ada: Every existing language is missing something.Basic: Fortran is scary.APL: Fortran isn't good enough at manipulating arrays.J: APL requires its own character set.C: Assembly language is too low-level.C++: C is too low-level.Java: C++ is a kludge. And Microsoft is going to crush us.C#: Java is controlled by Sun. 9 | Lisp: Turing Machines are an awkward way to describe computation.Scheme: MacLisp is a kludge.T: Scheme has no libraries.Common Lisp: There are too many dialects of Lisp.Dylan: Scheme has no libraries, and Lisp syntax is scary. 10 | Perl: Shell scripts/awk/sed are not enough like programming languages.Python: Perl is a kludge.Ruby: Perl is a kludge, and Lisp syntax is scary.Prolog: Programming is not enough like logic. 11 | -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/fp.txt: -------------------------------------------------------------------------------- 1 | December 2019I've seen the same pattern in many different fields: even though 2 | lots of people have worked hard in the field, only a small fraction 3 | of the space of possibilities has been explored, because they've 4 | all worked on similar things.Even the smartest, most imaginative people are surprisingly 5 | conservative when deciding what to work on. People who would never 6 | dream of being fashionable in any other way get sucked into working 7 | on fashionable problems.If you want to try working on unfashionable problems, one of the 8 | best places to look is in fields that people think have already been 9 | fully explored: essays, Lisp, venture funding — you may notice a 10 | pattern here. If you can find a new approach into a big but apparently 11 | played out field, the value of whatever you discover will be 12 | multiplied by its enormous surface area.The best protection against getting drawn into working on the same 13 | things as everyone else may be to genuinely 14 | love what you're doing. 15 | Then you'll continue to work on it even if you make the same mistake 16 | as other people and think that it's too marginal to matter. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/getideas.txt: -------------------------------------------------------------------------------- 1 | January 2023(Someone fed my essays into GPT to make something that could answer 2 | questions based on them, then asked it where good ideas come from. The 3 | answer was ok, but not what I would have said. This is what I would have said.)The way to get new ideas is to notice anomalies: what seems strange, 4 | or missing, or broken? You can see anomalies in everyday life (much 5 | of standup comedy is based on this), but the best place to look for 6 | them is at the frontiers of knowledge.Knowledge grows fractally. 7 | From a distance its edges look smooth, but when you learn enough 8 | to get close to one, you'll notice it's full of gaps. These gaps 9 | will seem obvious; it will seem inexplicable that no one has tried 10 | x or wondered about y. In the best case, exploring such gaps yields 11 | whole new fractal buds. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/lwba.txt: -------------------------------------------------------------------------------- 1 | 2 | After a link to 3 | Beating the Averages was posted on slashdot, 4 | some readers wanted to hear in more detail 5 | about the specific technical advantages we got from using 6 | Lisp in Viaweb. For those who are interested, 7 | here are some excerpts from a talk I gave in April 2001 at 8 | BBN Labs in Cambridge, MA. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/nft.txt: -------------------------------------------------------------------------------- 1 | May 2021Noora Health, a nonprofit I've 2 | supported for years, just launched 3 | a new NFT. It has a dramatic name, Save Thousands of Lives, 4 | because that's what the proceeds will do.Noora has been saving lives for 7 years. They run programs in 5 | hospitals in South Asia to teach new mothers how to take care of 6 | their babies once they get home. They're in 165 hospitals now. And 7 | because they know the numbers before and after they start at a new 8 | hospital, they can measure the impact they have. It is massive. 9 | For every 1000 live births, they save 9 babies.This number comes from a study 10 | of 133,733 families at 28 different 11 | hospitals that Noora conducted in collaboration with the Better 12 | Birth team at Ariadne Labs, a joint center for health systems 13 | innovation at Brigham and Women’s Hospital and Harvard T.H. Chan 14 | School of Public Health.Noora is so effective that even if you measure their costs in the 15 | most conservative way, by dividing their entire budget by the number 16 | of lives saved, the cost of saving a life is the lowest I've seen. 17 | $1,235.For this NFT, they're going to issue a public report tracking how 18 | this specific tranche of money is spent, and estimating the number 19 | of lives saved as a result.NFTs are a new territory, and this way of using them is especially 20 | new, but I'm excited about its potential. And I'm excited to see 21 | what happens with this particular auction, because unlike an NFT 22 | representing something that has already happened, 23 | this NFT gets better as the price gets higher.The reserve price was about $2.5 million, because that's what it 24 | takes for the name to be accurate: that's what it costs to save 25 | 2000 lives. But the higher the price of this NFT goes, the more 26 | lives will be saved. What a sentence to be able to write. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/noob.txt: -------------------------------------------------------------------------------- 1 | January 2020When I was young, I thought old people had everything figured out. 2 | Now that I'm old, I know this isn't true.I constantly feel like a noob. It seems like I'm always talking to 3 | some startup working in a new field I know nothing about, or reading 4 | a book about a topic I don't understand well enough, or visiting some new 5 | country where I don't know how things work.It's not pleasant to feel like a noob. And the word "noob" is 6 | certainly not a compliment. And yet today I realized something 7 | encouraging about being a noob: the more of a noob you are locally, 8 | the less of a noob you are globally.For example, if you stay in your home country, you'll feel less 9 | of a noob than if you move to Farawavia, where everything works 10 | differently. And yet you'll know more if you move. 11 | So the feeling of being a noob is inversely correlated with actual 12 | ignorance.But if the feeling of being a noob is good for us, why do we dislike 13 | it? What evolutionary purpose could such an aversion serve?I think the answer is that there are two sources of feeling like a 14 | noob: being stupid, and doing something novel. Our dislike of feeling 15 | like a noob is our brain telling us "Come on, come on, figure this 16 | out." Which was the right thing to be thinking for most of human 17 | history. The life of hunter-gatherers was complex, but it didn't 18 | change as much as life does now. They didn't suddenly have to figure 19 | out what to do about cryptocurrency. So it made sense to be biased 20 | toward competence at existing problems over the discovery of new 21 | ones. It made sense for humans to dislike the feeling of being a 22 | noob, just as, in a world where food was scarce, it made sense for 23 | them to dislike the feeling of being hungry.Now that too much food is more of a problem than too little, our 24 | dislike of feeling hungry leads us astray. And I think our dislike 25 | of feeling like a noob does too.Though it feels unpleasant, and people will sometimes ridicule you 26 | for it, the more you feel like a noob, the better. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/nov.txt: -------------------------------------------------------------------------------- 1 | November 2019If you discover something new, there's a significant chance you'll be 2 | accused of some form of heresy.To discover new things, you have 3 | to work on ideas that are good but non-obvious; if an idea is 4 | obviously good, other people are probably already working on it. 5 | One common way for a good idea to be non-obvious is for it to be hidden in the 6 | shadow of some mistaken assumption that people are very attached to. 7 | But anything you discover from working on such an idea will tend to 8 | contradict the mistaken assumption that was concealing it. And you 9 | will thus get a lot of heat from people attached to the mistaken 10 | assumption. Galileo and Darwin are famous examples of this phenomenon, 11 | but it's probably always an ingredient in the resistance to new 12 | ideas.So it's particularly dangerous for an organization or society to 13 | have a culture of pouncing on heresy. When you suppress heresies, 14 | you don't just prevent people from contradicting the mistaken 15 | assumption you're trying to protect. You also suppress any idea 16 | that implies indirectly that it's false. 17 | Every cherished mistaken assumption has 18 | a dead zone of unexplored ideas around it. And the more preposterous 19 | the assumption, the bigger the dead zone it creates.There is a positive side to this phenomenon though. If you're 20 | looking for new ideas, one way to find them is by looking for 21 | heresies. When you look at the question this way, the depressingly 22 | large dead zones around mistaken assumptions become excitingly large 23 | mines of new ideas. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/pow.txt: -------------------------------------------------------------------------------- 1 | January 2017People who are powerful but uncharismatic will tend to be disliked. 2 | Their power makes them a target for criticism that they don't have 3 | the charisma to disarm. That was Hillary Clinton's problem. It also 4 | tends to be a problem for any CEO who is more of a builder than a 5 | schmoozer. And yet the builder-type CEO is (like Hillary) probably 6 | the best person for the job.I don't think there is any solution to this problem. It's human 7 | nature. The best we can do is to recognize that it's happening, and 8 | to understand that being a magnet for criticism is sometimes a sign 9 | not that someone is the wrong person for a job, but that they're 10 | the right one. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/prop62.txt: -------------------------------------------------------------------------------- 1 | November 2016If you're a California voter, there is an important proposition 2 | on your ballot this year: Proposition 62, which bans the death 3 | penalty.When I was younger I used to think the debate about the death 4 | penalty was about when it's ok to take a human life. Is it ok 5 | to kill a killer?But that is not the issue here.The real world does not work like the version I was shown on TV growing up. The police 6 | often arrest the wrong person. 7 | Defendants' lawyers are often incompetent. And prosecutors 8 | are often motivated more by publicity than justice.In the real world, 9 | about 4% of people sentenced to death 10 | are innocent. 11 | So this is not about whether it's ok to kill killers. This 12 | is about whether it's ok to kill innocent people.A child could answer that one for you.This year, in California, you have a chance to end this, by 13 | voting yes on Proposition 62. But beware, because there is another 14 | proposition, Proposition 66, whose goal is to make it 15 | easier to execute people. So yes on 62, no on 66.It's time. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/rootsoflisp.txt: -------------------------------------------------------------------------------- 1 | May 2001 2 | 3 | (I wrote this article to help myself understand exactly 4 | what McCarthy discovered. You don't need to know this stuff 5 | to program in Lisp, but it should be helpful to 6 | anyone who wants to 7 | understand the essence of Lisp — both in the sense of its 8 | origins and its semantic core. The fact that it has such a core 9 | is one of Lisp's distinguishing features, and the reason why, 10 | unlike other languages, Lisp has dialects.)In 1960, John 11 | McCarthy published a remarkable paper in 12 | which he did for programming something like what Euclid did for 13 | geometry. He showed how, given a handful of simple 14 | operators and a notation for functions, you can 15 | build a whole programming language. 16 | He called this language Lisp, for "List Processing," 17 | because one of his key ideas was to use a simple 18 | data structure called a list for both 19 | code and data.It's worth understanding what McCarthy discovered, not 20 | just as a landmark in the history of computers, but as 21 | a model for what programming is tending to become in 22 | our own time. It seems to me that there have been 23 | two really clean, consistent models of programming so 24 | far: the C model and the Lisp model. 25 | These two seem points of high ground, with swampy lowlands 26 | between them. As computers have grown more powerful, 27 | the new languages being developed have been moving 28 | steadily toward the Lisp model. A popular recipe 29 | for new programming languages in the past 20 years 30 | has been to take the C model of computing and add to 31 | it, piecemeal, parts taken from the Lisp model, 32 | like runtime typing and garbage collection.In this article I'm going to try to explain in the 33 | simplest possible terms what McCarthy discovered. 34 | The point is not just to learn about an interesting 35 | theoretical result someone figured out forty years ago, 36 | but to show where languages are heading. 37 | The unusual thing about Lisp — in fact, the defining 38 | quality of Lisp — is that it can be written in 39 | itself. To understand what McCarthy meant by this, 40 | we're going to retrace his steps, with his mathematical 41 | notation translated into running Common Lisp code. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/rss.txt: -------------------------------------------------------------------------------- 1 | Aaron Swartz created a scraped 2 | feed 3 | of the essays page. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/todo.txt: -------------------------------------------------------------------------------- 1 | April 2012A palliative care nurse called Bronnie Ware made a list of the 2 | biggest regrets 3 | of the dying. Her list seems plausible. I could see 4 | myself — can see myself — making at least 4 of these 5 | 5 mistakes.If you had to compress them into a single piece of advice, it might 6 | be: don't be a cog. The 5 regrets paint a portrait of post-industrial 7 | man, who shrinks himself into a shape that fits his circumstances, 8 | then turns dutifully till he stops.The alarming thing is, the mistakes that produce these regrets are 9 | all errors of omission. You forget your dreams, ignore your family, 10 | suppress your feelings, neglect your friends, and forget to be 11 | happy. Errors of omission are a particularly dangerous type of 12 | mistake, because you make them by default.I would like to avoid making these mistakes. But how do you avoid 13 | mistakes you make by default? Ideally you transform your life so 14 | it has other defaults. But it may not be possible to do that 15 | completely. As long as these mistakes happen by default, you probably 16 | have to be reminded not to make them. So I inverted the 5 regrets, 17 | yielding a list of 5 commands 18 | 19 | Don't ignore your dreams; don't work too much; say what you 20 | think; cultivate friendships; be happy. 21 | 22 | which I then put at the top of the file I use as a todo list. -------------------------------------------------------------------------------- /data/PaulGrahamEssaySmall/twitter.txt: -------------------------------------------------------------------------------- 1 | April 2009Om Malik is the most recent of many people 2 | to ask why Twitter is such a big deal.The reason is that it's a new messaging 3 | protocol, where you don't specify the recipients. 4 | New protocols are rare. Or more precisely, new 5 | protocols that take off are. 6 | There are only a handful of commonly used ones: TCP/IP 7 | (the Internet), SMTP (email), HTTP (the web), and so on. So any 8 | new protocol is a big deal. But Twitter is a protocol owned 9 | by a private company. That's even rarer.Curiously, the fact that the founders of Twitter 10 | have been slow to monetize it may in the long run 11 | prove to be an advantage. Because they haven't tried 12 | to control it too much, Twitter feels to everyone like 13 | previous protocols. One forgets it's owned by a 14 | private company. That must have made it easier for 15 | Twitter to spread. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkamradt/langchain-tutorials/697c4de4f6c655ea3aa16ea0de324da157398557/data/PaulGrahamEssays/.DS_Store -------------------------------------------------------------------------------- /data/PaulGrahamEssays/5founders.txt: -------------------------------------------------------------------------------- 1 | April 2009Inc recently asked me who I thought were the 5 most 2 | interesting startup founders of the last 30 years. How do 3 | you decide who's the most interesting? The best test seemed 4 | to be influence: who are the 5 5 | who've influenced me most? Who do I use as examples when I'm 6 | talking to companies we fund? Who do I find myself quoting?1. Steve JobsI'd guess Steve is the most influential founder not just for me but 7 | for most people you could ask. A lot of startup culture is Apple 8 | culture. He was the original young founder. And while the concept 9 | of "insanely great" already existed in the arts, it was a novel 10 | idea to introduce into a company in the 1980s.More remarkable still, he's stayed interesting for 30 years. People 11 | await new Apple products the way they'd await new books by a popular 12 | novelist. Steve may not literally design them, but they wouldn't 13 | happen if he weren't CEO.Steve is clever and driven, but so are a lot of people in the Valley. 14 | What makes him unique is his 15 | sense of 16 | design. Before him, most 17 | companies treated design as a frivolous extra. Apple's competitors 18 | now know better.2. TJ RodgersTJ Rodgers isn't as famous as Steve Jobs, but he may be the best 19 | writer among Silicon Valley CEOs. I've probably learned more from 20 | him about the startup way of thinking than from anyone else. Not 21 | so much from specific things he's written as by reconstructing the 22 | mind that produced them: brutally candid; aggressively garbage-collecting 23 | outdated ideas; and yet driven by pragmatism rather than ideology.The first essay of his that I read was so electrifying that I 24 | remember exactly where I was at the time. It was 25 | High 26 | Technology Innovation: Free Markets or Government Subsidies? and 27 | I was downstairs in the Harvard Square T Station. It felt as if 28 | someone had flipped on a light switch inside my head.3. Larry & SergeyI'm sorry to treat Larry and Sergey as one person. I've always 29 | thought that was unfair to them. But it does seem as if Google was a 30 | collaboration.Before Google, companies in Silicon Valley already knew it was 31 | important to have the best hackers. So they claimed, at least. 32 | But Google pushed this idea further than anyone had before. Their 33 | hypothesis seems to have been that, in the initial stages at least, 34 | all you need is good hackers: if you hire all the smartest people 35 | and put them to work on a problem where their success can be measured, 36 | you win. All the other stuff—which includes all the stuff that 37 | business schools think business consists of—you can figure out 38 | along the way. The results won't be perfect, but they'll be optimal. 39 | If this was their hypothesis, it's now been verified experimentally.4. Paul BuchheitFew know this, but one person, Paul Buchheit, is responsible for 40 | three of the best things Google has done. He was the original 41 | author of GMail, which is the most impressive thing Google has after 42 | search. He also wrote the first prototype of AdSense, and was the 43 | author of Google's mantra "Don't be evil."PB made a point in a talk once that I now mention to every startup 44 | we fund: that it's better, initially, to make a small number of 45 | users really love you than a large number kind of like you. If I 46 | could tell startups only 47 | ten sentences, 48 | this would be one of them.Now he's cofounder of a startup called Friendfeed. It's only a 49 | year old, but already everyone in the Valley is watching them. 50 | Someone responsible for three of the biggest ideas at Google is 51 | going to come up with more.5. Sam AltmanI was told I shouldn't mention founders of YC-funded companies in 52 | this list. But Sam Altman can't be stopped by such flimsy rules. 53 | If he wants to be on this list, he's going to be.Honestly, Sam is, along with Steve Jobs, the founder I refer to 54 | most when I'm advising startups. On questions of design, I ask 55 | "What would Steve do?" but on questions of strategy or ambition I 56 | ask "What would Sama do?"What I learned from meeting Sama is that the doctrine of the elect 57 | applies to startups. It applies way less than most people think: 58 | startup investing does not consist of trying to pick winners the 59 | way you might in a horse race. But there are a few people with 60 | such force of will that they're going to get whatever they want. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/6631327.txt: -------------------------------------------------------------------------------- 1 | March 2006, rev August 2009A couple days ago I found to my surprise that I'd been granted a 2 | patent. 3 | It issued in 2003, but no one told me. I wouldn't know about it 4 | now except that a few months ago, while visiting Yahoo, I happened 5 | to run into a Big Cheese I knew from working there in the late 6 | nineties. He brought up something called Revenue Loop, which Viaweb 7 | had been working on when they bought us.The idea is basically that you sort search results not in order of 8 | textual "relevance" (as search engines did then) nor in order of 9 | how much advertisers bid (as Overture did) but in order of the bid 10 | times the number of transactions. Ordinarily you'd do this for 11 | shopping searches, though in fact one of the features of our scheme 12 | is that it automatically detects which searches are shopping searches.If you just order the results in order of bids, you can make the 13 | search results useless, because the first results could be dominated 14 | by lame sites that had bid the most. But if you order results by 15 | bid multiplied by transactions, far from selling out, you're getting 16 | a better measure of relevance. What could be a better sign that 17 | someone was satisfied with a search result than going to the site 18 | and buying something?And, of course, this algorithm automatically maximizes the revenue 19 | of the search engine.Everyone is focused on this type of approach now, but few were in 20 | 1998. In 1998 it was all about selling banner ads. We didn't know 21 | that, so we were pretty excited when we figured out what seemed to 22 | us the optimal way of doing shopping searches.When Yahoo was thinking of buying us, we had a meeting with Jerry 23 | Yang in New York. For him, I now realize, this was supposed to be 24 | one of those meetings when you check out a company you've pretty 25 | much decided to buy, just to make sure they're ok guys. We weren't 26 | expected to do more than chat and seem smart and reasonable. He 27 | must have been dismayed when I jumped up to the whiteboard and 28 | launched into a presentation of our exciting new technology.I was just as dismayed when he didn't seem to care at all about it. 29 | At the time I thought, "boy, is this guy poker-faced. We present 30 | to him what has to be the optimal way of sorting product search 31 | results, and he's not even curious." I didn't realize till much later 32 | why he didn't care. In 1998, advertisers were overpaying enormously 33 | for ads on web sites. 34 | In 1998, if advertisers paid the maximum that traffic was worth to 35 | them, Yahoo's revenues would have decreased.Things are different now, of course. Now this sort of thing is all 36 | the rage. So when I ran into the Yahoo exec I knew from the old 37 | days in the Yahoo cafeteria a few months ago, the first thing he 38 | remembered was not (fortunately) all the fights I had with him, but 39 | Revenue Loop."Well," I said, "I think we actually applied for a patent on it. 40 | I'm not sure what happened to the application after I left.""Really? That would be an important patent."So someone investigated, and sure enough, that patent application 41 | had continued in the pipeline for several years after, and finally 42 | issued in 2003.The main thing that struck me on reading it, actually, is that 43 | lawyers at some point messed up my nice clear writing. Some clever 44 | person with a spell checker reduced one section to Zen-like incomprehensibility: 45 | 46 | Also, common spelling errors will tend to get fixed. For example, 47 | if users searching for "compact disc player" end up spending 48 | considerable money at sites offering compact disc players, then 49 | those pages will have a higher relevance for that search phrase, 50 | even though the phrase "compact disc player" is not present on 51 | those pages. 52 | 53 | (That "compat disc player" wasn't a typo, guys.)For the fine prose of the original, see the provisional application 54 | of February 1998, back when we were still Viaweb and couldn't afford 55 | to pay lawyers to turn every "a lot of" into "considerable." -------------------------------------------------------------------------------- /data/PaulGrahamEssays/alien.txt: -------------------------------------------------------------------------------- 1 | October 2022If there were intelligent beings elsewhere in the universe, they'd 2 | share certain truths in common with us. The truths of mathematics 3 | would be the same, because they're true by definition. Ditto for 4 | the truths of physics; the mass of a carbon atom would be the same 5 | on their planet. But I think we'd share other truths with aliens 6 | besides the truths of math and physics, and that it would be 7 | worthwhile to think about what these might be.For example, I think we'd share the principle that a controlled 8 | experiment testing some hypothesis entitles us to have proportionally 9 | increased belief in it. It seems fairly likely, too, that it would 10 | be true for aliens that one can get better at something by practicing. 11 | We'd probably share Occam's razor. There doesn't seem anything 12 | specifically human about any of these ideas.We can only guess, of course. We can't say for sure what forms 13 | intelligent life might take. Nor is it my goal here to explore that 14 | question, interesting though it is. The point of the idea of alien 15 | truth is not that it gives us a way to speculate about what forms 16 | intelligent life might take, but that it gives us a threshold, or 17 | more precisely a target, for truth. If you're trying to find the 18 | most general truths short of those of math or physics, then presumably 19 | they'll be those we'd share in common with other forms of intelligent 20 | life.Alien truth will work best as a heuristic if we err on the side of 21 | generosity. If an idea might plausibly be relevant to aliens, that's 22 | enough. Justice, for example. I wouldn't want to bet that all 23 | intelligent beings would understand the concept of justice, but I 24 | wouldn't want to bet against it either.The idea of alien truth is related to Erdos's idea of God's book. 25 | He used to describe a particularly good proof as being in God's 26 | book, the implication being (a) that a sufficiently good proof was 27 | more discovered than invented, and (b) that its goodness would be 28 | universally recognized. If there's such a thing as alien truth, 29 | then there's more in God's book than math.What should we call the search for alien truth? The obvious choice 30 | is "philosophy." Whatever else philosophy includes, it should 31 | probably include this. I'm fairly sure Aristotle would have thought 32 | so. One could even make the case that the search for alien truth 33 | is, if not an accurate description of philosophy, a good 34 | definition for it. I.e. that it's what people who call 35 | themselves philosophers should be doing, whether or not they currently 36 | are. But I'm not wedded to that; doing it is what matters, not what 37 | we call it.We may one day have something like alien life among us in the form 38 | of AIs. And that may in turn allow us to be precise about what 39 | truths an intelligent being would have to share with us. We might 40 | find, for example, that it's impossible to create something we'd 41 | consider intelligent that doesn't use Occam's razor. We might one 42 | day even be able to prove that. But though this sort of research 43 | would be very interesting, it's not necessary for our purposes, or 44 | even the same field; the goal of philosophy, if we're going to call it that, would be 45 | to see what ideas we come up with using alien truth as a target, 46 | not to say precisely where the threshold of it is. Those two questions might one 47 | day converge, but they'll converge from quite different directions, 48 | and till they do, it would be too constraining to restrict ourselves 49 | to thinking only about things we're certain would be alien truths. 50 | Especially since this will probably be one of those areas where the 51 | best guesses turn out to be surprisingly close to optimal. (Let's 52 | see if that one does.)Whatever we call it, the attempt to discover alien truths would be 53 | a worthwhile undertaking. And curiously enough, that is itself 54 | probably an alien truth.Thanks to Trevor Blackwell, Greg Brockman, 55 | Patrick Collison, Robert Morris, and Michael Nielsen for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/altair.txt: -------------------------------------------------------------------------------- 1 | February 2015One of the most valuable exercises you can try if you want to 2 | understand startups is to look at the most successful companies and 3 | explain why they were not as lame as they seemed when they first 4 | launched. Because they practically all seemed lame at first. Not 5 | just small, lame. Not just the first step up a big mountain. More 6 | like the first step into a swamp.A Basic interpreter for the Altair? How could that ever grow into 7 | a giant company? People sleeping on airbeds in strangers' apartments? 8 | A web site for college students to stalk one another? A wimpy 9 | little single-board computer for hobbyists that used a TV as a 10 | monitor? A new search engine, when there were already about 10, 11 | and they were all trying to de-emphasize search? These ideas didn't 12 | just seem small. They seemed wrong. They were the kind of ideas 13 | you could not merely ignore, but ridicule.Often the founders themselves didn't know why their ideas were 14 | promising. They were attracted to these ideas by instinct, because 15 | they were living in the future and 16 | they sensed that something was missing. But they could not have 17 | put into words exactly how their ugly ducklings were going to grow 18 | into big, beautiful swans.Most people's first impulse when they hear about a lame-sounding 19 | new startup idea is to make fun of it. Even a lot of people who 20 | should know better.When I encounter a startup with a lame-sounding idea, I ask "What 21 | Microsoft is this the Altair Basic of?" Now it's a puzzle, and the 22 | burden is on me to solve it. Sometimes I can't think of an answer, 23 | especially when the idea is a made-up one. But it's remarkable how 24 | often there does turn out to be an answer. Often it's one the 25 | founders themselves hadn't seen yet.Intriguingly, there are sometimes multiple answers. I talked to a 26 | startup a few days ago that could grow into 3 distinct Microsofts. 27 | They'd probably vary in size by orders of magnitude. But you can 28 | never predict how big a Microsoft is going to be, so in cases like 29 | that I encourage founders to follow whichever path is most immediately 30 | exciting to them. Their instincts got them this far. Why stop now? -------------------------------------------------------------------------------- /data/PaulGrahamEssays/bias.txt: -------------------------------------------------------------------------------- 1 | October 2015This will come as a surprise to a lot of people, but in some cases 2 | it's possible to detect bias in a selection process without knowing 3 | anything about the applicant pool. Which is exciting because among 4 | other things it means third parties can use this technique to detect 5 | bias whether those doing the selecting want them to or not.You can use this technique whenever (a) you have at least 6 | a random sample of the applicants that were selected, (b) their 7 | subsequent performance is measured, and (c) the groups of 8 | applicants you're comparing have roughly equal distribution of ability.How does it work? Think about what it means to be biased. What 9 | it means for a selection process to be biased against applicants 10 | of type x is that it's harder for them to make it through. Which 11 | means applicants of type x have to be better to get selected than 12 | applicants not of type x. 13 | [1] 14 | Which means applicants of type x 15 | who do make it through the selection process will outperform other 16 | successful applicants. And if the performance of all the successful 17 | applicants is measured, you'll know if they do.Of course, the test you use to measure performance must be a valid 18 | one. And in particular it must not be invalidated by the bias you're 19 | trying to measure. 20 | But there are some domains where performance can be measured, and 21 | in those detecting bias is straightforward. Want to know if the 22 | selection process was biased against some type of applicant? Check 23 | whether they outperform the others. This is not just a heuristic 24 | for detecting bias. It's what bias means.For example, many suspect that venture capital firms are biased 25 | against female founders. This would be easy to detect: among their 26 | portfolio companies, do startups with female founders outperform 27 | those without? A couple months ago, one VC firm (almost certainly 28 | unintentionally) published a study showing bias of this type. First 29 | Round Capital found that among its portfolio companies, startups 30 | with female founders outperformed 31 | those without by 63%. 32 | [2]The reason I began by saying that this technique would come as a 33 | surprise to many people is that we so rarely see analyses of this 34 | type. I'm sure it will come as a surprise to First Round that they 35 | performed one. I doubt anyone there realized that by limiting their 36 | sample to their own portfolio, they were producing a study not of 37 | startup trends but of their own biases when selecting companies.I predict we'll see this technique used more in the future. The 38 | information needed to conduct such studies is increasingly available. 39 | Data about who applies for things is usually closely guarded by the 40 | organizations selecting them, but nowadays data about who gets 41 | selected is often publicly available to anyone who takes the trouble 42 | to aggregate it. 43 | Notes[1] 44 | This technique wouldn't work if the selection process looked 45 | for different things from different types of applicants—for 46 | example, if an employer hired men based on their ability but women 47 | based on their appearance.[2] 48 | As Paul Buchheit points out, First Round excluded their most 49 | successful investment, Uber, from the study. And while it 50 | makes sense to exclude outliers from some types of studies, 51 | studies of returns from startup investing, which is all about 52 | hitting outliers, are not one of them. 53 | Thanks to Sam Altman, Jessica Livingston, and Geoff Ralston for reading 54 | drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/cred.txt: -------------------------------------------------------------------------------- 1 | April 2020I recently saw a 2 | video 3 | of TV journalists and politicians confidently 4 | saying that the coronavirus would be no worse than the flu. What 5 | struck me about it was not just how mistaken they seemed, but how 6 | daring. How could they feel safe saying such things?The answer, I realized, is that they didn't think they could get 7 | caught. They didn't realize there was any danger in making false 8 | predictions. These people constantly make false predictions, and 9 | get away with it, because the things they make predictions about 10 | either have mushy enough outcomes that they can bluster their way 11 | out of trouble, or happen so far in the future that few remember 12 | what they said.An epidemic is different. It falsifies your predictions rapidly and 13 | unequivocally.But epidemics are rare enough that these people clearly 14 | didn't realize this was even a possibility. Instead they just 15 | continued to use their ordinary m.o., which, as the epidemic has 16 | made clear, is to talk confidently about things they don't 17 | understand.An event like this is thus a uniquely powerful way of taking people's 18 | measure. As Warren Buffett said, "It's only when the tide goes out 19 | that you learn who's been swimming naked." And the tide has just 20 | gone out like never before.Now that we've seen the results, let's remember what we saw, because 21 | this is the most accurate test of credibility we're ever likely to have. I hope. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/diff.txt: -------------------------------------------------------------------------------- 1 | December 2001 (rev. May 2002) 2 | 3 | (This article came about in response to some questions on 4 | the LL1 mailing list. It is now 5 | incorporated in Revenge of the Nerds.)When McCarthy designed Lisp in the late 1950s, it was 6 | a radical departure from existing languages, 7 | the most important of which was Fortran.Lisp embodied nine new ideas: 8 | 1. Conditionals. A conditional is an if-then-else 9 | construct. We take these for granted now. They were 10 | invented 11 | by McCarthy in the course of developing Lisp. 12 | (Fortran at that time only had a conditional 13 | goto, closely based on the branch instruction in the 14 | underlying hardware.) McCarthy, who was on the Algol committee, got 15 | conditionals into Algol, whence they spread to most other 16 | languages.2. A function type. In Lisp, functions are first class 17 | objects-- they're a data type just like integers, strings, 18 | etc, and have a literal representation, can be stored in variables, 19 | can be passed as arguments, and so on.3. Recursion. Recursion existed as a mathematical concept 20 | before Lisp of course, but Lisp was the first programming language to support 21 | it. (It's arguably implicit in making functions first class 22 | objects.)4. A new concept of variables. In Lisp, all variables 23 | are effectively pointers. Values are what 24 | have types, not variables, and assigning or binding 25 | variables means copying pointers, not what they point to.5. Garbage-collection.6. Programs composed of expressions. Lisp programs are 26 | trees of expressions, each of which returns a value. 27 | (In some Lisps expressions 28 | can return multiple values.) This is in contrast to Fortran 29 | and most succeeding languages, which distinguish between 30 | expressions and statements.It was natural to have this 31 | distinction in Fortran because (not surprisingly in a language 32 | where the input format was punched cards) the language was 33 | line-oriented. You could not nest statements. And 34 | so while you needed expressions for math to work, there was 35 | no point in making anything else return a value, because 36 | there could not be anything waiting for it.This limitation 37 | went away with the arrival of block-structured languages, 38 | but by then it was too late. The distinction between 39 | expressions and statements was entrenched. It spread from 40 | Fortran into Algol and thence to both their descendants.When a language is made entirely of expressions, you can 41 | compose expressions however you want. You can say either 42 | (using Arc syntax)(if foo (= x 1) (= x 2))or(= x (if foo 1 2))7. A symbol type. Symbols differ from strings in that 43 | you can test equality by comparing a pointer.8. A notation for code using trees of symbols.9. The whole language always available. 44 | There is 45 | no real distinction between read-time, compile-time, and runtime. 46 | You can compile or run code while reading, read or run code 47 | while compiling, and read or compile code at runtime.Running code at read-time lets users reprogram Lisp's syntax; 48 | running code at compile-time is the basis of macros; compiling 49 | at runtime is the basis of Lisp's use as an extension 50 | language in programs like Emacs; and reading at runtime 51 | enables programs to communicate using s-expressions, an 52 | idea recently reinvented as XML. 53 | When Lisp was first invented, all these ideas were far 54 | removed from ordinary programming practice, which was 55 | dictated largely by the hardware available in the late 1950s.Over time, the default language, embodied 56 | in a succession of popular languages, has 57 | gradually evolved toward Lisp. 1-5 are now widespread. 58 | 6 is starting to appear in the mainstream. 59 | Python has a form of 7, though there doesn't seem to be 60 | any syntax for it. 61 | 8, which (with 9) is what makes Lisp macros 62 | possible, is so far still unique to Lisp, 63 | perhaps because (a) it requires those parens, or something 64 | just as bad, and (b) if you add that final increment of power, 65 | you can no 66 | longer claim to have invented a new language, but only 67 | to have designed a new dialect of Lisp ; -)Though useful to present-day programmers, it's 68 | strange to describe Lisp in terms of its 69 | variation from the random expedients other languages 70 | adopted. That was not, probably, how McCarthy 71 | thought of it. Lisp wasn't designed to fix the mistakes 72 | in Fortran; it came about more as the byproduct of an 73 | attempt to axiomatize computation. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/disc.txt: -------------------------------------------------------------------------------- 1 | January 2017Because biographies of famous scientists tend to 2 | edit out their mistakes, we underestimate the 3 | degree of risk they were willing to take. 4 | And because anything a famous scientist did that 5 | wasn't a mistake has probably now become the 6 | conventional wisdom, those choices don't 7 | seem risky either.Biographies of Newton, for example, understandably focus 8 | more on physics than alchemy or theology. 9 | The impression we get is that his unerring judgment 10 | led him straight to truths no one else had noticed. 11 | How to explain all the time he spent on alchemy 12 | and theology? Well, smart people are often kind of 13 | crazy.But maybe there is a simpler explanation. Maybe 14 | the smartness and the craziness were not as separate 15 | as we think. Physics seems to us a promising thing 16 | to work on, and alchemy and theology obvious wastes 17 | of time. But that's because we know how things 18 | turned out. In Newton's day the three problems 19 | seemed roughly equally promising. No one knew yet 20 | what the payoff would be for inventing what we 21 | now call physics; if they had, more people would 22 | have been working on it. And alchemy and theology 23 | were still then in the category Marc Andreessen would 24 | describe as "huge, if true."Newton made three bets. One of them worked. But 25 | they were all risky. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/donate.txt: -------------------------------------------------------------------------------- 1 | March 2021The secret curse of the nonprofit world is restricted donations. 2 | If you haven't been involved with nonprofits, you may never have 3 | heard this phrase before. But if you have been, it probably made 4 | you wince.Restricted donations mean donations where the donor limits what can 5 | be done with the money. This is common with big donations, perhaps 6 | the default. And yet it's usually a bad idea. Usually the way the 7 | donor wants the money spent is not the way the nonprofit would have 8 | chosen. Otherwise there would have been no need to restrict the 9 | donation. But who has a better understanding of where money needs 10 | to be spent, the nonprofit or the donor?If a nonprofit doesn't understand better than its donors where money 11 | needs to be spent, then it's incompetent and you shouldn't be 12 | donating to it at all.Which means a restricted donation is inherently suboptimal. It's 13 | either a donation to a bad nonprofit, or a donation for the wrong 14 | things.There are a couple exceptions to this principle. One is when the 15 | nonprofit is an umbrella organization. It's reasonable to make a 16 | restricted donation to a university, for example, because a university 17 | is only nominally a single nonprofit. Another exception is when the 18 | donor actually does know as much as the nonprofit about where money 19 | needs to be spent. The Gates Foundation, for example, has specific 20 | goals and often makes restricted donations to individual nonprofits 21 | to accomplish them. But unless you're a domain expert yourself or 22 | donating to an umbrella organization, your donation would do more 23 | good if it were unrestricted.If restricted donations do less good than unrestricted ones, why 24 | do donors so often make them? Partly because doing good isn't donors' 25 | only motive. They often have other motives as well — to make a mark, 26 | or to generate good publicity 27 | [1], 28 | or to comply with regulations 29 | or corporate policies. Many donors may simply never have considered 30 | the distinction between restricted and unrestricted donations. They 31 | may believe that donating money for some specific purpose is just 32 | how donation works. And to be fair, nonprofits don't try very hard 33 | to discourage such illusions. They can't afford to. People running 34 | nonprofits are almost always anxious about money. They can't afford 35 | to talk back to big donors.You can't expect candor in a relationship so asymmetric. So I'll 36 | tell you what nonprofits wish they could tell you. If you want to 37 | donate to a nonprofit, donate unrestricted. If you trust them to 38 | spend your money, trust them to decide how. 39 | Note[1] 40 | Unfortunately restricted donations tend to generate more 41 | publicity than unrestricted ones. "X donates money to build a school 42 | in Africa" is not only more interesting than "X donates money to Y 43 | nonprofit to spend as Y chooses," but also focuses more attention 44 | on X. 45 | Thanks to Chase Adam, Ingrid Bassett, Trevor Blackwell, and Edith 46 | Elliot for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/fix.txt: -------------------------------------------------------------------------------- 1 | 2 | Kevin Kelleher suggested an interesting way to compare programming 3 | languages: to describe each in terms of the problem it 4 | fixes. The surprising thing is how many, and how well, languages can be 5 | described this way. 6 | 7 | Algol: Assembly language is too low-level.Pascal: Algol doesn't have enough data types.Modula: Pascal is too wimpy for systems programming. 8 | Simula: Algol isn't good enough at simulations.Smalltalk: Not everything in Simula is an object.Fortran: Assembly language is too low-level.Cobol: Fortran is scary.PL/1: Fortran doesn't have enough data types.Ada: Every existing language is missing something.Basic: Fortran is scary.APL: Fortran isn't good enough at manipulating arrays.J: APL requires its own character set.C: Assembly language is too low-level.C++: C is too low-level.Java: C++ is a kludge. And Microsoft is going to crush us.C#: Java is controlled by Sun. 9 | Lisp: Turing Machines are an awkward way to describe computation.Scheme: MacLisp is a kludge.T: Scheme has no libraries.Common Lisp: There are too many dialects of Lisp.Dylan: Scheme has no libraries, and Lisp syntax is scary. 10 | Perl: Shell scripts/awk/sed are not enough like programming languages.Python: Perl is a kludge.Ruby: Perl is a kludge, and Lisp syntax is scary.Prolog: Programming is not enough like logic. 11 | -------------------------------------------------------------------------------- /data/PaulGrahamEssays/foundervisa.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | April 2009I usually avoid politics, but since we now seem to have an administration that's open to suggestions, I'm going to risk making one. The single biggest thing the government could do to increase the number of startups in this country is a policy that would cost nothing: establish a new class of visa for startup founders.The biggest constraint on the number of new startups that get created in the US is not tax policy or employment law or even Sarbanes-Oxley. It's that we won't let the people who want to start them into the country.Letting just 10,000 startup founders into the country each year could have a visible effect on the economy. If we assume 4 people per startup, which is probably an overestimate, that's 2500 new companies. Each year. They wouldn't all grow as big as Google, but out of 2500 some would come close.By definition these 10,000 founders wouldn't be taking jobs from Americans: it could be part of the terms of the visa that they couldn't work for existing companies, only new ones they'd founded. In fact they'd cause there to be 4 | more jobs for Americans, because the companies they started would hire more employees as they grew.The tricky part might seem to be how one defined a startup. But that could be solved quite easily: let the market decide. Startup investors work hard to find the best startups. The government could not do better than to piggyback on their expertise, and use investment by recognized startup investors as the test of whether a company was a real startup.How would the government decide who's a startup investor? The same way they decide what counts as a university for student visas. We'll establish our own accreditation procedure. We know who one another are.10,000 people is a drop in the bucket by immigration standards, but would represent a huge increase in the pool of startup founders. I think this would have such a visible effect on the economy that it would make the legislator who introduced the bill famous. The only way to know for sure would be to try it, and that would cost practically nothing. 5 | Thanks to Trevor Blackwell, Paul Buchheit, Jeff Clavier, David Hornik, Jessica Livingston, Greg Mcadoo, Aydin Senkut, and Fred Wilson for reading drafts of this.Related: -------------------------------------------------------------------------------- /data/PaulGrahamEssays/fp.txt: -------------------------------------------------------------------------------- 1 | December 2019I've seen the same pattern in many different fields: even though 2 | lots of people have worked hard in the field, only a small fraction 3 | of the space of possibilities has been explored, because they've 4 | all worked on similar things.Even the smartest, most imaginative people are surprisingly 5 | conservative when deciding what to work on. People who would never 6 | dream of being fashionable in any other way get sucked into working 7 | on fashionable problems.If you want to try working on unfashionable problems, one of the 8 | best places to look is in fields that people think have already been 9 | fully explored: essays, Lisp, venture funding — you may notice a 10 | pattern here. If you can find a new approach into a big but apparently 11 | played out field, the value of whatever you discover will be 12 | multiplied by its enormous surface area.The best protection against getting drawn into working on the same 13 | things as everyone else may be to genuinely 14 | love what you're doing. 15 | Then you'll continue to work on it even if you make the same mistake 16 | as other people and think that it's too marginal to matter. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/getideas.txt: -------------------------------------------------------------------------------- 1 | January 2023(Someone fed my essays into GPT to make something that could answer 2 | questions based on them, then asked it where good ideas come from. The 3 | answer was ok, but not what I would have said. This is what I would have said.)The way to get new ideas is to notice anomalies: what seems strange, 4 | or missing, or broken? You can see anomalies in everyday life (much 5 | of standup comedy is based on this), but the best place to look for 6 | them is at the frontiers of knowledge.Knowledge grows fractally. 7 | From a distance its edges look smooth, but when you learn enough 8 | to get close to one, you'll notice it's full of gaps. These gaps 9 | will seem obvious; it will seem inexplicable that no one has tried 10 | x or wondered about y. In the best case, exploring such gaps yields 11 | whole new fractal buds. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/hiresfund.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Want to start a startup? Get funded by 4 | Y Combinator. 5 | 6 | 7 | 8 | 9 | September 2010The reason startups have been using 10 | more convertible notes in angel 11 | rounds is that they make deals close faster. By making it easier 12 | for startups to give different prices to different investors, they 13 | help them break the sort of deadlock that happens when investors 14 | all wait to see who else is going to invest.By far the biggest influence on investors' opinions of a startup 15 | is the opinion of other investors. There are very, very few who 16 | simply decide for themselves. Any startup founder can tell you the 17 | most common question they hear from investors is not about the 18 | founders or the product, but "who else is investing?"That tends to produce deadlocks. Raising an old-fashioned 19 | fixed-size equity round can take weeks, because all the angels sit around 20 | waiting for the others to commit, like competitors in a bicycle 21 | sprint who deliberately ride slowly at the start so they can follow 22 | whoever breaks first.Convertible notes let startups beat such deadlocks by rewarding 23 | investors willing to move first with lower (effective) valuations. 24 | Which they deserve because they're taking more risk. It's much 25 | safer to invest in a startup Ron Conway has already invested in; 26 | someone who comes after him should pay a higher price.The reason convertible notes allow more flexibility in price is 27 | that valuation caps aren't actual valuations, and notes are cheap 28 | and easy to do. So you can do high-resolution fundraising: if you 29 | wanted you could have a separate note with a different cap for each 30 | investor.That cap need not simply rise monotonically. A startup could 31 | also give better deals to investors they expected to help 32 | them most. The point is simply that different investors, 33 | whether because of the help they offer or their willingness to 34 | commit, have different values for 35 | startups, and their terms should reflect that.Different terms for different investors is 36 | clearly the way of the future. Markets always evolve toward higher 37 | resolution. You may not need to use convertible notes to do it. 38 | With sufficiently lightweight standardized equity terms (and some 39 | changes in investors' and lawyers' expectations about equity rounds) 40 | you might be able to do the same thing with equity instead of debt. 41 | Either would be fine with startups, so long as they can easily 42 | change their valuation.Deadlocks weren't the only problem with fixed-size equity rounds. 43 | Another was that startups had to decide in advance how much to 44 | raise. I think it's a mistake for a startup to fix upon a specific 45 | number. If investors are easily convinced, the startup should raise more 46 | now, and if investors are skeptical, the startup should take a 47 | smaller amount and use that to get the company to the point where 48 | it's more convincing.It's just not reasonable to expect startups to pick an optimal round 49 | size in advance, because that depends on the reactions of investors, 50 | and those are impossible to predict.Fixed-size, multi-investor angel rounds are such a bad idea for 51 | startups that one wonders why things were ever done that way. One 52 | possibility is that this custom reflects the way investors like to 53 | collude when they can get away with it. But I think the actual 54 | explanation is less sinister. I think angels (and their lawyers) 55 | organized rounds this way in unthinking imitation of VC series A 56 | rounds. In a series A, a fixed-size equity round with a lead makes 57 | sense, because there is usually just one big investor, who is 58 | unequivocally the lead. Fixed-size series A rounds already are 59 | high res. But the more investors you have in a round, the less 60 | sense it makes for everyone to get the same price.The most interesting question here may be what high res fundraising 61 | will do to the world of investors. Bolder investors will now get 62 | rewarded with lower prices. But more important, in a 63 | hits-driven business, is that they'll be able to get into the deals 64 | they want. Whereas the "who else is investing?" type of investors 65 | will not only pay higher prices, but may not be able to get into 66 | the best deals at all.Thanks to Immad Akhund, Sam Altman, John Bautista, Pete Koomen, 67 | Jessica Livingston, Dan Siroker, Harj Taggar, and 68 | Fred Wilson for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/hw.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Want to start a startup? Get funded by 4 | Y Combinator. 5 | 6 | 7 | 8 | 9 | October 2012One advantage of Y Combinator's early, broad focus is that we 10 | see trends before most other people. And one of the most conspicuous 11 | trends in the last batch was the large number of hardware startups. 12 | Out of 84 companies, 7 were making hardware. On the whole 13 | they've done better than the companies that weren't.They've faced resistance from investors of course. Investors have 14 | a deep-seated bias against hardware. But investors' opinions are 15 | a trailing indicator. The best founders are better at seeing the 16 | future than the best investors, because the best founders are making 17 | it.There is no one single force driving this trend. Hardware does 18 | well on crowdfunding sites. The spread of tablets makes it 19 | possible to build new things controlled 20 | by and even incorporating 21 | them. Electric motors 22 | have improved. 23 | Wireless connectivity of various types can now be taken for granted. 24 | It's getting more straightforward to get things manufactured. 25 | Arduinos, 3D printing, laser cutters, and more accessible CNC milling are making hardware easier to prototype. 26 | Retailers are less of a bottleneck as customers increasingly buy 27 | online.One question I can answer is why hardware is suddenly cool. 28 | It always was cool. 29 | Physical things are great. They just haven't 30 | been as great a way to start a rapidly growing business 31 | as software. But that rule may not be permanent. It's not even 32 | that old; it only dates from about 1990. Maybe the advantage 33 | of software will turn out to have been temporary. Hackers love to 34 | build hardware, and customers love to buy it. So if the ease of 35 | shipping hardware even approached the ease of shipping software, 36 | we'd see a lot more hardware startups.It wouldn't be the first time something was a bad idea till it 37 | wasn't. And it wouldn't be the first time investors learned that 38 | lesson from founders.So if you want to work on hardware, don't be deterred from doing 39 | it because you worry investors will discriminate against you. And 40 | in particular, don't be deterred from applying to Y Combinator 41 | with a hardware idea, because we're especially interested in hardware 42 | startups.We know there's room for the next Steve Jobs. 43 | But there's almost certainly also room for the first 44 | . 45 | Thanks to Sam Altman, Trevor Blackwell, David Cann, Sanjay Dastoor, 46 | Paul Gerhardt, Cameron Robertson, Harj Taggar, and Garry Tan for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/iflisp.txt: -------------------------------------------------------------------------------- 1 | May 2003If Lisp is so great, why don't more people use it? I was 2 | asked this question by a student in the audience at a 3 | talk I gave recently. Not for the first time, either.In languages, as in so many things, there's not much 4 | correlation between popularity and quality. Why does 5 | John Grisham (King of Torts sales rank, 44) outsell 6 | Jane Austen (Pride and Prejudice sales rank, 6191)? 7 | Would even Grisham claim that it's because he's a better 8 | writer?Here's the first sentence of Pride and Prejudice: 9 | 10 | It is a truth universally acknowledged, that a single man 11 | in possession of a good fortune must be in want of a 12 | wife. 13 | 14 | "It is a truth universally acknowledged?" Long words for 15 | the first sentence of a love story.Like Jane Austen, Lisp looks hard. Its syntax, or lack 16 | of syntax, makes it look completely unlike 17 | the languages 18 | most people are used to. Before I learned Lisp, I was afraid 19 | of it too. I recently came across a notebook from 1983 20 | in which I'd written: 21 | 22 | I suppose I should learn Lisp, but it seems so foreign. 23 | 24 | Fortunately, I was 19 at the time and not too resistant to learning 25 | new things. I was so ignorant that learning 26 | almost anything meant learning new things.People frightened by Lisp make up other reasons for not 27 | using it. The standard 28 | excuse, back when C was the default language, was that Lisp 29 | was too slow. Now that Lisp dialects are among 30 | the faster 31 | languages available, that excuse has gone away. 32 | Now the standard excuse is openly circular: that other languages 33 | are more popular.(Beware of such reasoning. It gets you Windows.)Popularity is always self-perpetuating, but it's especially 34 | so in programming languages. More libraries 35 | get written for popular languages, which makes them still 36 | more popular. Programs often have to work with existing programs, 37 | and this is easier if they're written in the same language, 38 | so languages spread from program to program like a virus. 39 | And managers prefer popular languages, because they give them 40 | more leverage over developers, who can more easily be replaced.Indeed, if programming languages were all more or less equivalent, 41 | there would be little justification for using any but the most 42 | popular. But they aren't all equivalent, not by a long 43 | shot. And that's why less popular languages, like Jane Austen's 44 | novels, continue to survive at all. When everyone else is reading 45 | the latest John Grisham novel, there will always be a few people 46 | reading Jane Austen instead. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/island.txt: -------------------------------------------------------------------------------- 1 | July 2006I've discovered a handy test for figuring out what you're addicted 2 | to. Imagine you were going to spend the weekend at a friend's house 3 | on a little island off the coast of Maine. There are no shops on 4 | the island and you won't be able to leave while you're there. Also, 5 | you've never been to this house before, so you can't assume it will 6 | have more than any house might.What, besides clothes and toiletries, do you make a point of packing? 7 | That's what you're addicted to. For example, if you find yourself 8 | packing a bottle of vodka (just in case), you may want to stop and 9 | think about that.For me the list is four things: books, earplugs, a notebook, and a 10 | pen.There are other things I might bring if I thought of it, like music, 11 | or tea, but I can live without them. I'm not so addicted to caffeine 12 | that I wouldn't risk the house not having any tea, just for a 13 | weekend.Quiet is another matter. I realize it seems a bit eccentric to 14 | take earplugs on a trip to an island off the coast of Maine. If 15 | anywhere should be quiet, that should. But what if the person in 16 | the next room snored? What if there was a kid playing basketball? 17 | (Thump, thump, thump... thump.) Why risk it? Earplugs are small.Sometimes I can think with noise. If I already have momentum on 18 | some project, I can work in noisy places. I can edit an essay or 19 | debug code in an airport. But airports are not so bad: most of the 20 | noise is whitish. I couldn't work with the sound of a sitcom coming 21 | through the wall, or a car in the street playing thump-thump music.And of course there's another kind of thinking, when you're starting 22 | something new, that requires complete quiet. You never 23 | know when this will strike. It's just as well to carry plugs.The notebook and pen are professional equipment, as it were. Though 24 | actually there is something druglike about them, in the sense that 25 | their main purpose is to make me feel better. I hardly ever go 26 | back and read stuff I write down in notebooks. It's just that if 27 | I can't write things down, worrying about remembering one idea gets 28 | in the way of having the next. Pen and paper wick ideas.The best notebooks I've found are made by a company called Miquelrius. 29 | I use their smallest size, which is about 2.5 x 4 in. 30 | The secret to writing on such 31 | narrow pages is to break words only when you run out of space, like 32 | a Latin inscription. I use the cheapest plastic Bic ballpoints, 33 | partly because their gluey ink doesn't seep through pages, and 34 | partly so I don't worry about losing them.I only started carrying a notebook about three years ago. Before 35 | that I used whatever scraps of paper I could find. But the problem 36 | with scraps of paper is that they're not ordered. In a notebook 37 | you can guess what a scribble means by looking at the pages 38 | around it. In the scrap era I was constantly finding notes I'd 39 | written years before that might say something I needed to remember, 40 | if I could only figure out what.As for books, I know the house would probably have something to 41 | read. On the average trip I bring four books and only read one of 42 | them, because I find new books to read en route. Really bringing 43 | books is insurance.I realize this dependence on books is not entirely good—that what 44 | I need them for is distraction. The books I bring on trips are 45 | often quite virtuous, the sort of stuff that might be assigned 46 | reading in a college class. But I know my motives aren't virtuous. 47 | I bring books because if the world gets boring I need to be able 48 | to slip into another distilled by some writer. It's like eating 49 | jam when you know you should be eating fruit.There is a point where I'll do without books. I was walking in 50 | some steep mountains once, and decided I'd rather just think, if I 51 | was bored, rather than carry a single unnecessary ounce. It wasn't 52 | so bad. I found I could entertain myself by having ideas instead 53 | of reading other people's. If you stop eating jam, fruit starts 54 | to taste better.So maybe I'll try not bringing books on some future trip. They're 55 | going to have to pry the plugs out of my cold, dead ears, however. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/know.txt: -------------------------------------------------------------------------------- 1 | December 2014I've read Villehardouin's chronicle of the Fourth Crusade at least 2 | two times, maybe three. And yet if I had to write down everything 3 | I remember from it, I doubt it would amount to much more than a 4 | page. Multiply this times several hundred, and I get an uneasy 5 | feeling when I look at my bookshelves. What use is it to read all 6 | these books if I remember so little from them?A few months ago, as I was reading Constance Reid's excellent 7 | biography of Hilbert, I figured out if not the answer to this 8 | question, at least something that made me feel better about it. 9 | She writes: 10 | 11 | Hilbert had no patience with mathematical lectures which filled 12 | the students with facts but did not teach them how to frame a 13 | problem and solve it. He often used to tell them that "a perfect 14 | formulation of a problem is already half its solution." 15 | 16 | That has always seemed to me an important point, and I was even 17 | more convinced of it after hearing it confirmed by Hilbert.But how had I come to believe in this idea in the first place? A 18 | combination of my own experience and other things I'd read. None 19 | of which I could at that moment remember! And eventually I'd forget 20 | that Hilbert had confirmed it too. But my increased belief in the 21 | importance of this idea would remain something I'd learned from 22 | this book, even after I'd forgotten I'd learned it.Reading and experience train your model of the world. And even if 23 | you forget the experience or what you read, its effect on your model 24 | of the world persists. Your mind is like a compiled program you've 25 | lost the source of. It works, but you don't know why.The place to look for what I learned from Villehardouin's chronicle 26 | is not what I remember from it, but my mental models of the crusades, 27 | Venice, medieval culture, siege warfare, and so on. Which doesn't 28 | mean I couldn't have read more attentively, but at least the harvest 29 | of reading is not so miserably small as it might seem.This is one of those things that seem obvious in retrospect. But 30 | it was a surprise to me and presumably would be to anyone else who 31 | felt uneasy about (apparently) forgetting so much they'd read.Realizing it does more than make you feel a little better about 32 | forgetting, though. There are specific implications.For example, reading and experience are usually "compiled" at the 33 | time they happen, using the state of your brain at that time. The 34 | same book would get compiled differently at different points in 35 | your life. Which means it is very much worth reading important 36 | books multiple times. I always used to feel some misgivings about 37 | rereading books. I unconsciously lumped reading together with work 38 | like carpentry, where having to do something again is a sign you 39 | did it wrong the first time. Whereas now the phrase "already read" 40 | seems almost ill-formed.Intriguingly, this implication isn't limited to books. Technology 41 | will increasingly make it possible to relive our experiences. When 42 | people do that today it's usually to enjoy them again (e.g. when 43 | looking at pictures of a trip) or to find the origin of some bug in 44 | their compiled code (e.g. when Stephen Fry succeeded in remembering 45 | the childhood trauma that prevented him from singing). But as 46 | technologies for recording and playing back your life improve, it 47 | may become common for people to relive experiences without any goal 48 | in mind, simply to learn from them again as one might when rereading 49 | a book.Eventually we may be able not just to play back experiences but 50 | also to index and even edit them. So although not knowing how you 51 | know things may seem part of being human, it may not be. 52 | Thanks to Sam Altman, Jessica Livingston, and Robert Morris for reading 53 | drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/ladder.txt: -------------------------------------------------------------------------------- 1 | August 2005Thirty years ago, one was supposed to work one's way up the corporate 2 | ladder. That's less the rule now. Our generation wants to get 3 | paid up front. Instead of developing a product for some big company 4 | in the expectation of getting job security in return, we develop 5 | the product ourselves, in a startup, and sell it to the big company. 6 | At the very least we want options.Among other things, this shift has created the appearance of a rapid 7 | increase in economic inequality. But really the two cases are not 8 | as different as they look in economic statistics.Economic statistics are misleading because they ignore the value 9 | of safe jobs. An easy job from which one can't be fired is worth 10 | money; exchanging the two is one of the commonest forms of 11 | corruption. A sinecure is, in effect, an annuity. Except sinecures 12 | don't appear in economic statistics. If they did, it would be clear 13 | that in practice socialist countries have nontrivial disparities 14 | of wealth, because they usually have a class of powerful bureaucrats 15 | who are paid mostly by seniority and can never be fired.While not a sinecure, a position on the corporate ladder was genuinely 16 | valuable, because big companies tried not to fire people, and 17 | promoted from within based largely on seniority. A position on the 18 | corporate ladder had a value analogous to the "goodwill" that is a 19 | very real element in the valuation of companies. It meant one could 20 | expect future high paying jobs.One of main causes of the decay of the corporate ladder is the trend 21 | for takeovers that began in the 1980s. Why waste your time climbing 22 | a ladder that might disappear before you reach the top?And, by no coincidence, the corporate ladder was one of the reasons 23 | the early corporate raiders were so successful. It's not only 24 | economic statistics that ignore the value of safe jobs. Corporate 25 | balance sheets do too. One reason it was profitable to carve up 1980s 26 | companies and sell them for parts was that they hadn't formally 27 | acknowledged their implicit debt to employees who had done good 28 | work and expected to be rewarded with high-paying executive jobs 29 | when their time came.In the movie Wall Street, Gordon Gekko 30 | ridicules a company overloaded with vice presidents. But the company 31 | may not be as corrupt as it seems; those VPs' cushy jobs were 32 | probably payment for work done earlier.I like the new model better. For one thing, it seems a bad plan 33 | to treat jobs as rewards. Plenty of good engineers got made into 34 | bad managers that way. And the old system meant people had to deal 35 | with a lot more corporate politics, in order to protect the work 36 | they'd invested in a position on the ladder.The big disadvantage of the new system is that it involves more risk. If you develop ideas in a startup instead 37 | of within a big company, any number of random factors could sink 38 | you before you can finish. But maybe the older generation would 39 | laugh at me for saying that the way we do things is riskier. After 40 | all, projects within big companies were always getting cancelled 41 | as a result of arbitrary decisions from higher up. My father's 42 | entire industry (breeder reactors) disappeared that way.For better or worse, the idea of the corporate ladder is probably 43 | gone for good. The new model seems more liquid, and more efficient. 44 | But it is less of a change, financially, than one might think. Our 45 | fathers weren't that stupid. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/lwba.txt: -------------------------------------------------------------------------------- 1 | 2 | After a link to 3 | Beating the Averages was posted on slashdot, 4 | some readers wanted to hear in more detail 5 | about the specific technical advantages we got from using 6 | Lisp in Viaweb. For those who are interested, 7 | here are some excerpts from a talk I gave in April 2001 at 8 | BBN Labs in Cambridge, MA. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/mod.txt: -------------------------------------------------------------------------------- 1 | December 2019There are two distinct ways to be politically moderate: on purpose 2 | and by accident. Intentional moderates are trimmers, deliberately 3 | choosing a position mid-way between the extremes of right and left. 4 | Accidental moderates end up in the middle, on average, because they 5 | make up their own minds about each question, and the far right and 6 | far left are roughly equally wrong.You can distinguish intentional from accidental moderates by the 7 | distribution of their opinions. If the far left opinion on some 8 | matter is 0 and the far right opinion 100, an intentional moderate's 9 | opinion on every question will be near 50. Whereas an accidental 10 | moderate's opinions will be scattered over a broad range, but will, 11 | like those of the intentional moderate, average to about 50.Intentional moderates are similar to those on the far left and the 12 | far right in that their opinions are, in a sense, not their own. 13 | The defining quality of an ideologue, whether on the left or the 14 | right, is to acquire one's opinions in bulk. You don't get to pick 15 | and choose. Your opinions about taxation can be predicted from your 16 | opinions about sex. And although intentional moderates 17 | might seem to be the opposite of ideologues, their beliefs (though 18 | in their case the word "positions" might be more accurate) are also 19 | acquired in bulk. If the median opinion shifts to the right or left, 20 | the intentional moderate must shift with it. Otherwise they stop 21 | being moderate.Accidental moderates, on the other hand, not only choose their own 22 | answers, but choose their own questions. They may not care at all 23 | about questions that the left and right both think are terribly 24 | important. So you can only even measure the politics of an accidental 25 | moderate from the intersection of the questions they care about and 26 | those the left and right care about, and this can 27 | sometimes be vanishingly small.It is not merely a manipulative rhetorical trick to say "if you're 28 | not with us, you're against us," but often simply false.Moderates are sometimes derided as cowards, particularly by 29 | the extreme left. But while it may be accurate to call intentional 30 | moderates cowards, openly being an accidental moderate requires the 31 | most courage of all, because you get attacked from both right and 32 | left, and you don't have the comfort of being an orthodox member 33 | of a large group to sustain you.Nearly all the most impressive people I know are accidental moderates. 34 | If I knew a lot of professional athletes, or people in the entertainment 35 | business, that might be different. Being on the far left or far 36 | right doesn't affect how fast you run or how well you sing. But 37 | someone who works with ideas has to be independent-minded to do it 38 | well.Or more precisely, you have to be independent-minded about the ideas 39 | you work with. You could be mindlessly doctrinaire in your politics 40 | and still be a good mathematician. In the 20th century, a lot of 41 | very smart people were Marxists — just no one who was smart about 42 | the subjects Marxism involves. But if the ideas you use in your 43 | work intersect with the politics of your time, you have two choices: 44 | be an accidental moderate, or be mediocre.Notes[1] It's possible in theory for one side to be entirely right and 45 | the other to be entirely wrong. Indeed, ideologues must always 46 | believe this is the case. But historically it rarely has been.[2] For some reason the far right tend to ignore moderates rather 47 | than despise them as backsliders. I'm not sure why. Perhaps it 48 | means that the far right is less ideological than the far left. Or 49 | perhaps that they are more confident, or more resigned, or simply 50 | more disorganized. I just don't know.[3] Having heretical opinions doesn't mean you have to express 51 | them openly. It may be 52 | easier to have them if you don't. 53 | Thanks to Austen Allred, Trevor Blackwell, Patrick Collison, Jessica Livingston, 54 | Amjad Masad, Ryan Petersen, and Harj Taggar for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/name.txt: -------------------------------------------------------------------------------- 1 | August 2015If you have a US startup called X and you don't have x.com, you 2 | should probably change your name.The reason is not just that people can't find you. For companies 3 | with mobile apps, especially, having the right domain name is not 4 | as critical as it used to be for getting users. The problem with 5 | not having the .com of your name is that it signals weakness. Unless 6 | you're so big that your reputation precedes you, a marginal domain 7 | suggests you're a marginal company. Whereas 8 | (as Stripe shows) 9 | having x.com signals strength even if it has no relation to what you 10 | do.Even good founders can be in denial about this. Their denial derives 11 | from two very powerful forces: identity, and lack of imagination.X is what we are, founders think. There's no other name as good. 12 | Both of which are false.You can fix the first by stepping back from the problem. Imagine 13 | you'd called your company something else. If you had, surely you'd 14 | be just as attached to that name as you are to your current one. 15 | The idea of switching to your current name would seem repellent. 16 | [1]There's nothing intrinsically great about your current name. Nearly 17 | all your attachment to it comes from it being attached to you. 18 | [2]The way to neutralize the second source of denial, your inability 19 | to think of other potential names, is to acknowledge that you're 20 | bad at naming. Naming is a completely separate skill from those 21 | you need to be a good founder. You can be a great startup founder 22 | but hopeless at thinking of names for your company.Once you acknowledge that, you stop believing there is nothing else 23 | you could be called. There are lots of other potential names that 24 | are as good or better; you just can't think of them.How do you find them? One answer is the default way to solve 25 | problems you're bad at: find someone else who can think of names. 26 | But with company names there is another possible 27 | approach. It turns out almost any word or word pair that is not 28 | an obviously bad name is a sufficiently good one, and the number 29 | of such domains is so large that you can find plenty that are cheap 30 | or even untaken. So make a list and try to buy some. That's what 31 | Stripe 32 | did. (Their search also turned up parse.com, which their 33 | friends at Parse took.)The reason I know that naming companies is a distinct skill orthogonal 34 | to the others you need in a startup is that I happen to have it. 35 | Back when I was running YC and did more office hours with startups, 36 | I would often help them find new names. 80% of the time we could 37 | find at least one good name in a 20 minute office hour slot.Now when I do office hours I have to focus on more important 38 | questions, like what the company is doing. I tell them when they 39 | need to change their name. But I know the power of the forces that 40 | have them in their grip, so I know most won't listen. 41 | [3]There are of course examples of startups that have succeeded without 42 | having the .com of their name. There are startups that have succeeded despite any 43 | number of different mistakes. But this mistake is less excusable 44 | than most. It's something that can be fixed in a couple days if 45 | you have sufficient discipline to acknowledge the problem.100% of the top 20 YC companies by valuation have the .com of their 46 | name. 94% of the top 50 do. But only 66% of companies in the current 47 | batch have the .com of their name. Which suggests there are lessons 48 | ahead for most of the rest, one way or another. 49 | Notes[1] 50 | Incidentally, this thought experiment works for 51 | nationality and religion too.[2] 52 | The liking you have for a name that has become part of your 53 | identity manifests itself not directly, which would be easy to 54 | discount, but as a collection of specious beliefs about its intrinsic 55 | qualities. (This too is true of nationality and religion as well.)[3] 56 | Sometimes founders know it's a problem that they don't have 57 | the .com of their name, but delusion strikes a step later in the belief that they'll 58 | be able to buy it despite having no evidence it's for sale. Don't 59 | believe a domain is for sale unless the owner has already told you 60 | an asking price. 61 | Thanks to Sam Altman, Jessica Livingston, and Geoff Ralston 62 | for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/nft.txt: -------------------------------------------------------------------------------- 1 | May 2021Noora Health, a nonprofit I've 2 | supported for years, just launched 3 | a new NFT. It has a dramatic name, Save Thousands of Lives, 4 | because that's what the proceeds will do.Noora has been saving lives for 7 years. They run programs in 5 | hospitals in South Asia to teach new mothers how to take care of 6 | their babies once they get home. They're in 165 hospitals now. And 7 | because they know the numbers before and after they start at a new 8 | hospital, they can measure the impact they have. It is massive. 9 | For every 1000 live births, they save 9 babies.This number comes from a study 10 | of 133,733 families at 28 different 11 | hospitals that Noora conducted in collaboration with the Better 12 | Birth team at Ariadne Labs, a joint center for health systems 13 | innovation at Brigham and Women’s Hospital and Harvard T.H. Chan 14 | School of Public Health.Noora is so effective that even if you measure their costs in the 15 | most conservative way, by dividing their entire budget by the number 16 | of lives saved, the cost of saving a life is the lowest I've seen. 17 | $1,235.For this NFT, they're going to issue a public report tracking how 18 | this specific tranche of money is spent, and estimating the number 19 | of lives saved as a result.NFTs are a new territory, and this way of using them is especially 20 | new, but I'm excited about its potential. And I'm excited to see 21 | what happens with this particular auction, because unlike an NFT 22 | representing something that has already happened, 23 | this NFT gets better as the price gets higher.The reserve price was about $2.5 million, because that's what it 24 | takes for the name to be accurate: that's what it costs to save 25 | 2000 lives. But the higher the price of this NFT goes, the more 26 | lives will be saved. What a sentence to be able to write. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/noob.txt: -------------------------------------------------------------------------------- 1 | January 2020When I was young, I thought old people had everything figured out. 2 | Now that I'm old, I know this isn't true.I constantly feel like a noob. It seems like I'm always talking to 3 | some startup working in a new field I know nothing about, or reading 4 | a book about a topic I don't understand well enough, or visiting some new 5 | country where I don't know how things work.It's not pleasant to feel like a noob. And the word "noob" is 6 | certainly not a compliment. And yet today I realized something 7 | encouraging about being a noob: the more of a noob you are locally, 8 | the less of a noob you are globally.For example, if you stay in your home country, you'll feel less 9 | of a noob than if you move to Farawavia, where everything works 10 | differently. And yet you'll know more if you move. 11 | So the feeling of being a noob is inversely correlated with actual 12 | ignorance.But if the feeling of being a noob is good for us, why do we dislike 13 | it? What evolutionary purpose could such an aversion serve?I think the answer is that there are two sources of feeling like a 14 | noob: being stupid, and doing something novel. Our dislike of feeling 15 | like a noob is our brain telling us "Come on, come on, figure this 16 | out." Which was the right thing to be thinking for most of human 17 | history. The life of hunter-gatherers was complex, but it didn't 18 | change as much as life does now. They didn't suddenly have to figure 19 | out what to do about cryptocurrency. So it made sense to be biased 20 | toward competence at existing problems over the discovery of new 21 | ones. It made sense for humans to dislike the feeling of being a 22 | noob, just as, in a world where food was scarce, it made sense for 23 | them to dislike the feeling of being hungry.Now that too much food is more of a problem than too little, our 24 | dislike of feeling hungry leads us astray. And I think our dislike 25 | of feeling like a noob does too.Though it feels unpleasant, and people will sometimes ridicule you 26 | for it, the more you feel like a noob, the better. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/noop.txt: -------------------------------------------------------------------------------- 1 | 2 | There is a kind of mania for object-oriented programming at the moment, but 3 | 4 | some of the smartest programmers I know are some of the least excited about it.My own feeling is that object-oriented 5 | programming is a useful technique in some 6 | cases, but it isn't something that has to pervade every program you 7 | write. You should be able to define new types, 8 | but you shouldn't have to express every program as the 9 | definition of new types.I think there are five reasons people like object-oriented 10 | programming, and three and a half of them are bad: 11 | Object-oriented programming is exciting 12 | if you have a statically-typed language without 13 | lexical closures or macros. To some degree, it offers a way around these 14 | limitations. (See Greenspun's Tenth Rule.) Object-oriented programming is popular in big companies, 15 | because it suits the way they write software. At big companies, 16 | software tends to be written by large (and frequently changing) 17 | teams of 18 | mediocre programmers. Object-oriented programming imposes a 19 | discipline on these programmers that prevents any one of them 20 | from doing too much damage. The price is that the resulting 21 | code is bloated with protocols and full of duplication. 22 | This is not too high a price for big companies, because their 23 | software is probably going to be bloated and full of 24 | duplication anyway. Object-oriented 25 | programming generates a lot of what looks like work. 26 | Back in the days of fanfold, there was a type of programmer who 27 | would only put five or ten lines of code on a page, preceded 28 | by twenty lines of elaborately formatted comments. 29 | Object-oriented programming is like crack for these people: it lets 30 | you incorporate all this scaffolding right into your source 31 | code. Something that a Lisp hacker might handle by pushing 32 | a symbol onto a list becomes a whole file of classes and 33 | methods. So it is a good tool if you want to convince yourself, 34 | or someone else, that you are doing a lot of work. If a language is itself an object-oriented program, it can 35 | be extended by users. Well, maybe. Or maybe you can do 36 | even better by offering the sub-concepts 37 | of object-oriented programming a la carte. Overloading, 38 | for example, is not intrinsically tied to classes. We'll see. Object-oriented abstractions map neatly onto the domains 39 | of certain specific kinds of programs, like simulations and CAD 40 | systems. 41 | 42 | 43 | I personally have never needed object-oriented abstractions. 44 | Common Lisp has an enormously powerful object system and I've 45 | never used it once. I've done a lot of things (e.g. making 46 | hash tables full of closures) that would have required 47 | object-oriented techniques to do in wimpier languages, but 48 | I have never had to use CLOS.Maybe I'm just stupid, or have worked on some limited subset 49 | of applications. There is a danger in designing a language 50 | based on one's own experience of programming. But it seems 51 | more dangerous to put stuff in that you've never needed 52 | because it's thought to be a good idea. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/nov.txt: -------------------------------------------------------------------------------- 1 | November 2019If you discover something new, there's a significant chance you'll be 2 | accused of some form of heresy.To discover new things, you have 3 | to work on ideas that are good but non-obvious; if an idea is 4 | obviously good, other people are probably already working on it. 5 | One common way for a good idea to be non-obvious is for it to be hidden in the 6 | shadow of some mistaken assumption that people are very attached to. 7 | But anything you discover from working on such an idea will tend to 8 | contradict the mistaken assumption that was concealing it. And you 9 | will thus get a lot of heat from people attached to the mistaken 10 | assumption. Galileo and Darwin are famous examples of this phenomenon, 11 | but it's probably always an ingredient in the resistance to new 12 | ideas.So it's particularly dangerous for an organization or society to 13 | have a culture of pouncing on heresy. When you suppress heresies, 14 | you don't just prevent people from contradicting the mistaken 15 | assumption you're trying to protect. You also suppress any idea 16 | that implies indirectly that it's false. 17 | Every cherished mistaken assumption has 18 | a dead zone of unexplored ideas around it. And the more preposterous 19 | the assumption, the bigger the dead zone it creates.There is a positive side to this phenomenon though. If you're 20 | looking for new ideas, one way to find them is by looking for 21 | heresies. When you look at the question this way, the depressingly 22 | large dead zones around mistaken assumptions become excitingly large 23 | mines of new ideas. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/patentpledge.txt: -------------------------------------------------------------------------------- 1 | August 2011I realized recently that we may be able to solve part of the patent 2 | problem without waiting for the government.I've never been 100% sure whether patents help or hinder technological 3 | progress. When I was a kid I thought they helped. I thought they 4 | protected inventors from having their ideas stolen by big companies. 5 | Maybe that was truer in the past, when more things were physical. 6 | But regardless of whether patents are in general a good thing, there 7 | do seem to be bad ways of using them. And since bad uses of patents 8 | seem to be increasing, there is an increasing call for patent reform.The problem with patent reform is that it has to go through the 9 | government. That tends to be slow. But recently I realized we can 10 | also attack the problem downstream. As well as pinching off the 11 | stream of patents at the point where they're issued, we may in some 12 | cases be able to pinch it off at the point where they're used.One way of using patents that clearly does not encourage innovation 13 | is when established companies with bad products use patents to 14 | suppress small competitors with good products. This is the type 15 | of abuse we may be able to decrease without having to go through 16 | the government.The way to do it is to get the companies that are above pulling 17 | this sort of trick to pledge publicly not to. Then the ones that 18 | won't make such a pledge will be very conspicuous. Potential 19 | employees won't want to work for them. And investors, too, will 20 | be able to see that they're the sort of company that competes by 21 | litigation rather than by making good products.Here's the pledge: 22 | 23 | No first use of software patents against companies with less 24 | than 25 people. 25 | 26 | I've deliberately traded precision for brevity. The patent pledge 27 | is not legally binding. It's like Google's "Don't be evil." They 28 | don't define what evil is, but by publicly saying that, they're 29 | saying they're willing to be held to a standard that, say, Altria 30 | is not. And though constraining, "Don't be evil" has been good for 31 | Google. Technology companies win by attracting the most productive 32 | people, and the most productive people are attracted to employers 33 | who hold themselves to a higher standard than the law requires. 34 | [1]The patent pledge is in effect a narrower but open source "Don't 35 | be evil." I encourage every technology company to adopt it. If 36 | you want to help fix patents, encourage your employer to.Already most technology companies wouldn't sink to using patents 37 | on startups. You don't see Google or Facebook suing startups for 38 | patent infringement. They don't need to. So for the better technology 39 | companies, the patent pledge requires no change in behavior. They're 40 | just promising to do what they'd do anyway. And when all the 41 | companies that won't use patents on startups have said so, the 42 | holdouts will be very conspicuous.The patent pledge doesn't fix every problem with patents. It won't 43 | stop patent trolls, for example; they're already pariahs. But the 44 | problem the patent pledge does fix may be more serious than the 45 | problem of patent trolls. Patent trolls are just parasites. A 46 | clumsy parasite may occasionally kill the host, but that's not its 47 | goal. Whereas companies that sue startups for patent infringement 48 | generally do it with explicit goal of keeping their product off the 49 | market.Companies that use patents on startups are attacking innovation at 50 | the root. Now there's something any individual can do about this 51 | problem, without waiting for the government: ask companies where 52 | they stand. 53 | Patent Pledge Site 54 | Notes:[1] 55 | Because the pledge is deliberately vague, we're going to need 56 | common sense when intepreting it. And even more vice versa: the 57 | pledge is vague in order to make people use common sense when 58 | interpreting it.So for example I've deliberately avoided saying whether the 25 59 | people have to be employees, or whether contractors count too. If 60 | a company has to split hairs that fine about whether a suit would 61 | violate the patent pledge, it's probably still a dick move. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/polls.txt: -------------------------------------------------------------------------------- 1 | November 2004 2 | A lot of people are writing now about 3 | why Kerry lost. Here I want to 4 | examine a more specific question: why were the exit polls so 5 | wrong?In Ohio, which Kerry ultimately 6 | lost 49-51, exit polls gave him a 52-48 victory. And this wasn't just 7 | random error. In every swing state they overestimated the Kerry vote. 8 | In Florida, which Bush ultimately won 52-47, exit polls predicted 9 | a dead heat.(These are not early numbers. They're from about midnight eastern time, 10 | long after polls closed in Ohio and Florida. And yet by the 11 | next afternoon the exit poll numbers online corresponded to the returns. 12 | The only way I can imagine this happening is if those in 13 | charge of the exit polls cooked the books after seeing the actual 14 | returns. But that's another issue.)What happened? The source of the problem may be a variant of 15 | the Bradley Effect. 16 | This term 17 | was invented after Tom Bradley, the black mayor of Los Angeles, 18 | lost an election for governor of California despite a comfortable 19 | lead in the polls. Apparently voters were afraid to say 20 | they planned to vote against him, lest their motives be 21 | (perhaps correctly) suspected.It seems likely that something similar happened in exit polls this year. 22 | In theory, exit polls ought to be very accurate. 23 | You're not asking people what they would do. You're 24 | asking what they just did.How can you get errors asking that? Because some people don't 25 | respond. To get a truly random sample, pollsters ask, say, every 26 | 20th person leaving the polling place who they voted for. But not 27 | everyone wants to answer. And the pollsters can't simply ignore 28 | those who won't, or their sample isn't random anymore. So what 29 | they do, apparently, is note down the age and race and sex of the 30 | person, and guess from that who they voted for.This works so long as there is no correlation between who people 31 | vote for and whether they're willing to talk about it. But this 32 | year there may have been. It may be that a significant number of 33 | those who voted for 34 | Bush didn't want to say so.Why not? Because people in the US are more conservative than they're 35 | willing to admit. The values of the elite in this country, at least 36 | at the moment, are NPR values. The average person, as I think both 37 | Republicans and Democrats would agree, is more socially conservative. 38 | But while some openly flaunt the fact that they don't share the 39 | opinions of the elite, others feel a little nervous about it, as 40 | if they had bad table manners.For example, according to current NPR values, you 41 | can't say anything that might be 42 | perceived as disparaging towards homosexuals. To do 43 | so is "homophobic." And yet a large number of Americans are deeply 44 | religious, and the Bible is quite explicit on the subject of 45 | homosexuality. What are they to do? I think what many do is keep 46 | their opinions, but keep them to themselves.They know what they believe, but they also know what they're supposed 47 | to believe. 48 | And so when a stranger (for example, a pollster) asks 49 | them their opinion about something like gay marriage, they will not 50 | always say what they really think.When the values of the elite are liberal, polls will tend to 51 | underestimate the conservativeness of ordinary voters. This seems 52 | to me the leading theory to explain why the exit polls were so 53 | far off this year. NPR values 54 | said one ought to vote for Kerry. So all the people who voted for 55 | Kerry felt virtuous for doing so, and were eager to tell pollsters 56 | they had. No one who voted for Kerry did it as an act of quiet 57 | defiance. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/pow.txt: -------------------------------------------------------------------------------- 1 | January 2017People who are powerful but uncharismatic will tend to be disliked. 2 | Their power makes them a target for criticism that they don't have 3 | the charisma to disarm. That was Hillary Clinton's problem. It also 4 | tends to be a problem for any CEO who is more of a builder than a 5 | schmoozer. And yet the builder-type CEO is (like Hillary) probably 6 | the best person for the job.I don't think there is any solution to this problem. It's human 7 | nature. The best we can do is to recognize that it's happening, and 8 | to understand that being a magnet for criticism is sometimes a sign 9 | not that someone is the wrong person for a job, but that they're 10 | the right one. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/prop62.txt: -------------------------------------------------------------------------------- 1 | November 2016If you're a California voter, there is an important proposition 2 | on your ballot this year: Proposition 62, which bans the death 3 | penalty.When I was younger I used to think the debate about the death 4 | penalty was about when it's ok to take a human life. Is it ok 5 | to kill a killer?But that is not the issue here.The real world does not work like the version I was shown on TV growing up. The police 6 | often arrest the wrong person. 7 | Defendants' lawyers are often incompetent. And prosecutors 8 | are often motivated more by publicity than justice.In the real world, 9 | about 4% of people sentenced to death 10 | are innocent. 11 | So this is not about whether it's ok to kill killers. This 12 | is about whether it's ok to kill innocent people.A child could answer that one for you.This year, in California, you have a chance to end this, by 13 | voting yes on Proposition 62. But beware, because there is another 14 | proposition, Proposition 66, whose goal is to make it 15 | easier to execute people. So yes on 62, no on 66.It's time. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/pypar.txt: -------------------------------------------------------------------------------- 1 | August 2004In a recent talk I said something that upset a lot of 2 | people: that you could get smarter programmers to work on 3 | a Python project than you could to work on a Java project.I didn't mean by this that Java programmers are dumb. I 4 | meant that Python programmers are smart. It's a lot of 5 | work to learn a new programming language. And people don't 6 | learn Python because it will get them a job; they learn it 7 | because they genuinely like to program and aren't satisfied with the languages they 8 | already know.Which makes them exactly the kind of programmers 9 | companies should want to hire. Hence what, for lack of a better 10 | name, I'll call the Python paradox: if a company chooses to write 11 | its software in a comparatively esoteric language, they'll be able 12 | to hire better programmers, because they'll attract only those 13 | who cared enough to learn it. And for 14 | programmers the paradox is even more pronounced: the language 15 | to learn, if you want to get a good job, is a language that 16 | people don't learn merely to get a job.Only a few companies have been smart enough to realize this 17 | so far. But there is a kind of selection going on here too: they're 18 | exactly the companies programmers would 19 | most like to work for. Google, for example. When they 20 | advertise Java programming jobs, they also want Python experience.A friend of mine who knows nearly all the widely used languages 21 | uses Python for most of his projects. He says the main reason 22 | is that he likes the way source code looks. That may seem 23 | a frivolous reason to choose one language over another. 24 | But it is not so frivolous as it sounds: when you program, 25 | you spend more time reading code than writing it. 26 | You push blobs of source code around the way a sculptor does 27 | blobs of clay. So a language that makes source code ugly is 28 | maddening to an exacting programmer, as clay full of lumps 29 | would be to a sculptor.At the mention of ugly source code, people will of course think 30 | of Perl. But the superficial ugliness of Perl is not the sort 31 | I mean. Real ugliness is not harsh-looking 32 | syntax, but having to build programs out of the wrong 33 | concepts. Perl may look like a cartoon character swearing, 34 | but there are 35 | cases where it surpasses Python conceptually.So far, anyway. Both languages are of course 36 | moving targets. But they 37 | share, along with Ruby (and Icon, and Joy, and J, and Lisp, 38 | and Smalltalk) the fact that 39 | they're created by, and used by, people who really care about 40 | programming. And those tend to be the ones who do it well. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/randomness.txt: -------------------------------------------------------------------------------- 1 | April 2006, rev August 2009Plato quotes Socrates as saying "the unexamined life is not worth 2 | living." Part of what he meant was that the proper role of humans is to 3 | think, just as the proper role of anteaters is to poke their noses 4 | into anthills.A lot of ancient philosophy had the quality — and I 5 | don't mean this in an insulting way — of the kind of conversations 6 | freshmen have late at night in common rooms: 7 | 8 | What is our purpose? Well, we humans are 9 | as conspicuously different from other animals as the anteater. 10 | In our case the distinguishing feature is the ability to reason. 11 | So obviously that is what we should be doing, and a human who 12 | doesn't is doing a bad job of being human — is no better than an 13 | animal. 14 | 15 | Now we'd give a different answer. At least, someone Socrates's age 16 | would. We'd ask why we even suppose we have a "purpose" in life. 17 | We may be better adapted for some things than others; we 18 | may be happier doing things we're adapted for; but why assume 19 | purpose?The history of ideas 20 | is a history of gradually discarding the assumption that it's all 21 | about us. No, it turns out, the earth is not the center of the 22 | universe — not even the center of the solar system. No, it turns 23 | out, humans are not created by God in his own image; they're just 24 | one species among many, descended not merely from apes, but from 25 | microorganisms. Even the concept of "me" turns out to be fuzzy 26 | around the edges if you examine it closely.The idea that we're the center of things is difficult to discard. 27 | So difficult that there's probably room to discard more. Richard 28 | Dawkins made another step in that direction only in the last several 29 | decades, with the idea of the 30 | selfish gene. 31 | No, it turns 32 | out, we're not even the protagonists: we're just the latest model 33 | vehicle our genes have constructed to travel around in. And having 34 | kids is our genes heading for the lifeboats. Reading 35 | that book snapped my brain out of its previous way of thinking the 36 | way Darwin's must have when it first appeared.(Few people can experience now what Darwin's contemporaries did 37 | when The Origin of Species was first published, because everyone 38 | now is raised either to take evolution for granted, or to regard 39 | it as a heresy. No one encounters the idea of natural selection for 40 | the first time as an adult.)So if you want to discover things that have been overlooked till 41 | now, one really good place to look is in our blind spot: in our 42 | natural, naive belief that it's all about us. And expect to encounter 43 | ferocious opposition if you do.Conversely, if you have to choose between two theories, prefer the 44 | one that doesn't center on you.This principle isn't only for big ideas. It works in everyday life, 45 | too. For example, suppose you're saving a piece of cake in the fridge, and you 46 | come home one day to find your housemate has eaten 47 | it. Two possible theories: 48 | 49 | a) Your housemate did it deliberately to upset you. He knew 50 | you were saving that piece of cake.b) Your housemate was hungry. 51 | 52 | I say pick b. No one knows who said "never attribute to malice what 53 | can be explained by incompetence," but it is a powerful idea. 54 | Its more general version is our answer to the Greeks: 55 | Don't see purpose where there isn't. 56 | Or better still, the positive version: 57 | See randomness. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/read.txt: -------------------------------------------------------------------------------- 1 | November 2022In the science fiction books I read as a kid, reading had often 2 | been replaced by some more efficient way of acquiring knowledge. 3 | Mysterious "tapes" would load it into one's brain like a program 4 | being loaded into a computer.That sort of thing is unlikely to happen anytime soon. Not just 5 | because it would be hard to build a replacement for reading, but 6 | because even if one existed, it would be insufficient. Reading about 7 | x doesn't just teach you about x; it also teaches you how to write. 8 | [1]Would that matter? If we replaced reading, would anyone need to be 9 | good at writing?The reason it would matter is that writing is not just a way to 10 | convey ideas, but also a way to have them.A good writer doesn't just think, and then write down what he 11 | thought, as a sort of transcript. A good writer will almost always 12 | discover new things in the process of writing. And there is, as far 13 | as I know, no substitute for this kind of discovery. Talking about 14 | your ideas with other people is a good way to develop them. But 15 | even after doing this, you'll find you still discover new things 16 | when you sit down to write. There is a kind of thinking that can 17 | only be done by writing.There are of course kinds of thinking that can be done without 18 | writing. If you don't need to go too deeply into a problem, you can 19 | solve it without writing. If you're thinking about how two pieces 20 | of machinery should fit together, writing about it probably won't 21 | help much. And when a problem can be described formally, you can 22 | sometimes solve it in your head. But if you need to solve a 23 | complicated, ill-defined problem, it will almost always help to 24 | write about it. Which in turn means that someone who's not good at 25 | writing will almost always be at a disadvantage in solving such 26 | problems.You can't think well without writing well, and you can't write well 27 | without reading well. And I mean that last "well" in both senses. 28 | You have to be good at reading, and read good things. 29 | [2]People who just want information may find other ways to get it. 30 | But people who want to have ideas can't afford to.Notes[1] 31 | Audiobooks can give you examples of good writing, but having 32 | them read to you doesn't teach you as much about writing as reading 33 | them yourself.[2] 34 | By "good at reading" I don't mean good at the mechanics of 35 | reading. You don't have to be good at extracting words from the 36 | page so much as extracting meaning from the words. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/ronco.txt: -------------------------------------------------------------------------------- 1 | January 2015No one, VC or angel, has invested in more of the top startups than 2 | Ron Conway. He knows what happened in every deal in the Valley, 3 | half the time because he arranged it.And yet he's a super nice guy. In fact, nice is not the word. 4 | Ronco is good. I know of zero instances in which he has behaved 5 | badly. It's hard even to imagine.When I first came to Silicon Valley I thought "How lucky that someone 6 | so powerful is so benevolent." But gradually I realized it wasn't 7 | luck. It was by being benevolent that Ronco became so powerful. 8 | All the deals he gets to invest in come to him through referrals. 9 | Google did. Facebook did. Twitter was a referral from Evan Williams 10 | himself. And the reason so many people refer deals to him is that 11 | he's proven himself to be a good guy.Good does not mean being a pushover. I would not want to face an 12 | angry Ronco. But if Ron's angry at you, it's because you did 13 | something wrong. Ron is so old school he's Old Testament. He will 14 | smite you in his just wrath, but there's no malice in it.In almost every domain there are advantages to seeming good. It 15 | makes people trust you. But actually being good is an expensive 16 | way to seem good. To an amoral person it might seem to be overkill.In some fields it might be, but apparently not in the startup world. 17 | Though plenty of investors are jerks, there is a clear trend among 18 | them: the most successful investors are also the most upstanding. 19 | [1]It was not always this way. I would not feel confident saying that 20 | about investors twenty years ago.What changed? The startup world became more transparent and more 21 | unpredictable. Both make it harder to seem good without actually 22 | being good.It's obvious why transparency has that effect. When an investor 23 | maltreats a founder now, it gets out. Maybe not all the way to the 24 | press, but other founders hear about it, and that investor 25 | starts to lose deals. 26 | [2]The effect of unpredictability is more subtle. It increases the 27 | work of being inconsistent. If you're going to be two-faced, you 28 | have to know who you should be nice to and who you can get away 29 | with being nasty to. In the startup world, things change so rapidly 30 | that you can't tell. The random college kid you talk to today might 31 | in a couple years be the CEO of the hottest startup in the Valley. 32 | If you can't tell who to be nice to, you have to be nice to everyone. 33 | And probably the only people who can manage that are the people who 34 | are genuinely good.In a sufficiently connected and unpredictable world, you can't seem 35 | good without being good.As often happens, Ron discovered how to be the investor of the 36 | future by accident. He didn't foresee the future of startup 37 | investing, realize it would pay to be upstanding, and force himself 38 | to behave that way. It would feel unnatural to him to behave any 39 | other way. He was already 40 | living in the future.Fortunately that future is not limited to the startup world. The 41 | startup world is more transparent and unpredictable than most, but 42 | almost everywhere the trend is in that direction.Notes[1] 43 | I'm not saying that if you sort investors by benevolence 44 | you've also sorted them by returns, but rather that if you do a 45 | scatterplot with benevolence on the x axis and returns on the y, 46 | you'd see a clear upward trend.[2] 47 | Y Combinator in particular, because it aggregates data 48 | from so many startups, has a pretty comprehensive view of 49 | investor behavior. 50 | Thanks to Sam Altman and Jessica Livingston for reading drafts of 51 | this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/rootsoflisp.txt: -------------------------------------------------------------------------------- 1 | May 2001 2 | 3 | (I wrote this article to help myself understand exactly 4 | what McCarthy discovered. You don't need to know this stuff 5 | to program in Lisp, but it should be helpful to 6 | anyone who wants to 7 | understand the essence of Lisp — both in the sense of its 8 | origins and its semantic core. The fact that it has such a core 9 | is one of Lisp's distinguishing features, and the reason why, 10 | unlike other languages, Lisp has dialects.)In 1960, John 11 | McCarthy published a remarkable paper in 12 | which he did for programming something like what Euclid did for 13 | geometry. He showed how, given a handful of simple 14 | operators and a notation for functions, you can 15 | build a whole programming language. 16 | He called this language Lisp, for "List Processing," 17 | because one of his key ideas was to use a simple 18 | data structure called a list for both 19 | code and data.It's worth understanding what McCarthy discovered, not 20 | just as a landmark in the history of computers, but as 21 | a model for what programming is tending to become in 22 | our own time. It seems to me that there have been 23 | two really clean, consistent models of programming so 24 | far: the C model and the Lisp model. 25 | These two seem points of high ground, with swampy lowlands 26 | between them. As computers have grown more powerful, 27 | the new languages being developed have been moving 28 | steadily toward the Lisp model. A popular recipe 29 | for new programming languages in the past 20 years 30 | has been to take the C model of computing and add to 31 | it, piecemeal, parts taken from the Lisp model, 32 | like runtime typing and garbage collection.In this article I'm going to try to explain in the 33 | simplest possible terms what McCarthy discovered. 34 | The point is not just to learn about an interesting 35 | theoretical result someone figured out forty years ago, 36 | but to show where languages are heading. 37 | The unusual thing about Lisp — in fact, the defining 38 | quality of Lisp — is that it can be written in 39 | itself. To understand what McCarthy meant by this, 40 | we're going to retrace his steps, with his mathematical 41 | notation translated into running Common Lisp code. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/rss.txt: -------------------------------------------------------------------------------- 1 | Aaron Swartz created a scraped 2 | feed 3 | of the essays page. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/segway.txt: -------------------------------------------------------------------------------- 1 | July 2009The Segway hasn't delivered on its initial promise, to put it mildly. 2 | There are several reasons why, but one is that people don't want 3 | to be seen riding them. Someone riding a Segway looks like a dork.My friend Trevor Blackwell built 4 | his own Segway, 5 | which we called 6 | the Segwell. He also built a one-wheeled version, 7 | the Eunicycle, 8 | which looks exactly like a regular unicycle till you realize the 9 | rider isn't pedaling. He has ridden them both to downtown Mountain 10 | View to get coffee. When he rides the Eunicycle, people smile at 11 | him. But when he rides the Segwell, they shout abuse from their 12 | cars: "Too lazy to walk, ya fuckin homo?"Why do Segways provoke this reaction? The reason you look like a 13 | dork riding a Segway is that you look smug. You don't seem to 14 | be working hard enough.Someone riding a motorcycle isn't working any harder. But because 15 | he's sitting astride it, he seems to be making an effort. When 16 | you're riding a Segway you're just standing there. And someone who's 17 | being whisked along while seeming to do no work — someone in a sedan 18 | chair, for example — can't help but look smug.Try this thought experiment and it becomes clear: imagine something 19 | that worked like the Segway, but that you rode with one foot in 20 | front of the other, like a skateboard. That wouldn't seem nearly 21 | as uncool.So there may be a way to capture more of the market Segway hoped 22 | to reach: make a version that doesn't look so easy for the rider. 23 | It would also be helpful if the styling was in the tradition of 24 | skateboards or bicycles rather than medical devices.Curiously enough, what got Segway into this problem was that the 25 | company was itself a kind of Segway. It was too easy for them; 26 | they were too successful raising money. If they'd had to grow the 27 | company gradually, by iterating through several versions they sold 28 | to real users, they'd have learned pretty quickly that people looked 29 | stupid riding them. Instead they had enough to work in secret. They 30 | had focus groups aplenty, I'm sure, but they didn't have the people 31 | yelling insults out of cars. So they never realized they were 32 | zooming confidently down a blind alley. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/selfindulgence.txt: -------------------------------------------------------------------------------- 1 | July 2010When we sold our startup in 1998 I suddenly got a lot of money. I 2 | now had to think about something I hadn't had to think about before: 3 | how not to lose it. I knew it was possible to go from rich to 4 | poor, just as it was possible to go from poor to rich. But while 5 | I'd spent a lot of the past several years studying the paths from 6 | poor to rich, 7 | I knew practically nothing about the paths from rich 8 | to poor. Now, in order to avoid them, I had to learn where they 9 | were.So I started to pay attention to how fortunes are lost. If you'd 10 | asked me as a kid how rich people became poor, I'd have said by 11 | spending all their money. That's how it happens in books and movies, 12 | because that's the colorful way to do it. But in fact the way most 13 | fortunes are lost is not through excessive expenditure, but through 14 | bad investments.It's hard to spend a fortune without noticing. Someone with ordinary 15 | tastes would find it hard to blow through more than a few tens of 16 | thousands of dollars without thinking "wow, I'm spending a lot of 17 | money." Whereas if you start trading derivatives, you can lose a 18 | million dollars (as much as you want, really) in the blink of an 19 | eye.In most people's minds, spending money on luxuries sets off alarms 20 | that making investments doesn't. Luxuries seem self-indulgent. 21 | And unless you got the money by inheriting it or winning a lottery, 22 | you've already been thoroughly trained that self-indulgence leads 23 | to trouble. Investing bypasses those alarms. You're not spending 24 | the money; you're just moving it from one asset to another. Which 25 | is why people trying to sell you expensive things say "it's an 26 | investment."The solution is to develop new alarms. This can be a tricky business, 27 | because while the alarms that prevent you from overspending are so 28 | basic that they may even be in our DNA, the ones that prevent you 29 | from making bad investments have to be learned, and are sometimes 30 | fairly counterintuitive.A few days ago I realized something surprising: the situation with 31 | time is much the same as with money. The most dangerous way to 32 | lose time is not to spend it having fun, but to spend it doing fake 33 | work. When you spend time having fun, you know you're being 34 | self-indulgent. Alarms start to go off fairly quickly. If I woke 35 | up one morning and sat down on the sofa and watched TV all day, I'd 36 | feel like something was terribly wrong. Just thinking about it 37 | makes me wince. I'd start to feel uncomfortable after sitting on 38 | a sofa watching TV for 2 hours, let alone a whole day.And yet I've definitely had days when I might as well have sat in 39 | front of a TV all day — days at the end of which, if I asked myself 40 | what I got done that day, the answer would have been: basically, 41 | nothing. I feel bad after these days too, but nothing like as bad 42 | as I'd feel if I spent the whole day on the sofa watching TV. If 43 | I spent a whole day watching TV I'd feel like I was descending into 44 | perdition. But the same alarms don't go off on the days when I get 45 | nothing done, because I'm doing stuff that seems, superficially, 46 | like real work. Dealing with email, for example. You do it sitting 47 | at a desk. It's not fun. So it must be work.With time, as with money, avoiding pleasure is no longer enough to 48 | protect you. It probably was enough to protect hunter-gatherers, 49 | and perhaps all pre-industrial societies. So nature and nurture 50 | combine to make us avoid self-indulgence. But the world has gotten 51 | more complicated: the most dangerous traps now are new behaviors 52 | that bypass our alarms about self-indulgence by mimicking more 53 | virtuous types. And the worst thing is, they're not even fun. 54 | Thanks to Sam Altman, Trevor Blackwell, Patrick Collison, Jessica 55 | Livingston, and Robert Morris for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/simply.txt: -------------------------------------------------------------------------------- 1 | March 2021I try to write using ordinary words and simple sentences.That kind of writing is easier to read, and the easier something 2 | is to read, the more deeply readers will engage with it. The less 3 | energy they expend on your prose, the more they'll have left for 4 | your ideas.And the further they'll read. Most readers' energy tends to flag 5 | part way through an article or essay. If the friction of reading 6 | is low enough, more keep going till the end.There's an Italian dish called saltimbocca, which means "leap 7 | into the mouth." My goal when writing might be called saltintesta: 8 | the ideas leap into your head and you barely notice the words that 9 | got them there.It's too much to hope that writing could ever be pure ideas. You 10 | might not even want it to be. But for most writers, most of the 11 | time, that's the goal to aim for. The gap between most writing and 12 | pure ideas is not filled with poetry.Plus it's more considerate to write simply. When you write in a 13 | fancy way to impress people, you're making them do extra work just 14 | so you can seem cool. It's like trailing a long train behind you 15 | that readers have to carry.And remember, if you're writing in English, that a lot of your 16 | readers won't be native English speakers. Their understanding of 17 | ideas may be way ahead of their understanding of English. So you 18 | can't assume that writing about a difficult topic means you can 19 | use difficult words.Of course, fancy writing doesn't just conceal ideas. It can also 20 | conceal the lack of them. That's why some people write that way, 21 | to conceal the fact that they have 22 | nothing to say. Whereas writing 23 | simply keeps you honest. If you say nothing simply, it will be 24 | obvious to everyone, including you.Simple writing also lasts better. People reading your stuff in the 25 | future will be in much the same position as people from other 26 | countries reading it today. The culture and the language will have 27 | changed. It's not vain to care about that, any more than it's vain 28 | for a woodworker to build a chair to last.Indeed, lasting is not merely an accidental quality of chairs, or 29 | writing. It's a sign you did a good job.But although these are all real advantages of writing simply, none 30 | of them are why I do it. The main reason I write simply is that it 31 | offends me not to. When I write a sentence that seems too complicated, 32 | or that uses unnecessarily intellectual words, it doesn't seem fancy 33 | to me. It seems clumsy.There are of course times when you want to use a complicated sentence 34 | or fancy word for effect. But you should never do it by accident.The other reason my writing ends up being simple is the way I do 35 | it. I write the first draft fast, then spend days editing it, trying 36 | to get everything just right. Much of this editing is cutting, and 37 | that makes simple writing even simpler. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/sun.txt: -------------------------------------------------------------------------------- 1 | September 2017The most valuable insights are both general and surprising. 2 | F = ma for example. But general and surprising is a hard 3 | combination to achieve. That territory tends to be picked 4 | clean, precisely because those insights are so valuable.Ordinarily, the best that people can do is one without the 5 | other: either surprising without being general (e.g. 6 | gossip), or general without being surprising (e.g. 7 | platitudes).Where things get interesting is the moderately valuable 8 | insights. You get those from small additions of whichever 9 | quality was missing. The more common case is a small 10 | addition of generality: a piece of gossip that's more than 11 | just gossip, because it teaches something interesting about 12 | the world. But another less common approach is to focus on 13 | the most general ideas and see if you can find something new 14 | to say about them. Because these start out so general, you 15 | only need a small delta of novelty to produce a useful 16 | insight.A small delta of novelty is all you'll be able to get most 17 | of the time. Which means if you take this route, your ideas 18 | will seem a lot like ones that already exist. Sometimes 19 | you'll find you've merely rediscovered an idea that did 20 | already exist. But don't be discouraged. Remember the huge 21 | multiplier that kicks in when you do manage to think of 22 | something even a little new.Corollary: the more general the ideas you're talking about, 23 | the less you should worry about repeating yourself. If you 24 | write enough, it's inevitable you will. Your brain is much 25 | the same from year to year and so are the stimuli that hit 26 | it. I feel slightly bad when I find I've said something 27 | close to what I've said before, as if I were plagiarizing 28 | myself. But rationally one shouldn't. You won't say 29 | something exactly the same way the second time, and that 30 | variation increases the chance you'll get that tiny but 31 | critical delta of novelty.And of course, ideas beget ideas. (That sounds 32 | familiar.) 33 | An idea with a small amount of novelty could lead to one 34 | with more. But only if you keep going. So it's doubly 35 | important not to let yourself be discouraged by people who 36 | say there's not much new about something you've discovered. 37 | "Not much new" is a real achievement when you're talking 38 | about the most general ideas. It's not true that there's nothing new under the sun. There 39 | are some domains where there's almost nothing new. But 40 | there's a big difference between nothing and almost nothing, 41 | when it's multiplied by the area under the sun. 42 | Thanks to Sam Altman, Patrick Collison, and Jessica 43 | Livingston for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/tablets.txt: -------------------------------------------------------------------------------- 1 | December 2010I was thinking recently how inconvenient it was not to have a general 2 | term for iPhones, iPads, and the corresponding things running 3 | Android. The closest to a general term seems to be "mobile devices," 4 | but that (a) applies to any mobile phone, and (b) doesn't really 5 | capture what's distinctive about the iPad.After a few seconds it struck me that what we'll end up calling 6 | these things is tablets. The only reason we even consider calling 7 | them "mobile devices" is that the iPhone preceded the iPad. If the 8 | iPad had come first, we wouldn't think of the iPhone as a phone; 9 | we'd think of it as a tablet small enough to hold up to your ear.The iPhone isn't so much a phone as a replacement for a phone. 10 | That's an important distinction, because it's an early instance of 11 | what will become a common pattern. Many if not most of the 12 | special-purpose objects around us are going to be replaced by apps 13 | running on tablets.This is already clear in cases like GPSes, music players, and 14 | cameras. But I think it will surprise people how many things are 15 | going to get replaced. We funded one startup that's 16 | replacing keys. 17 | The fact that you can change font sizes easily means the iPad 18 | effectively replaces reading glasses. I wouldn't be surprised if 19 | by playing some clever tricks with the accelerometer you could even 20 | replace the bathroom scale.The advantages of doing things in software on a single device are 21 | so great that everything that can get turned into software will. 22 | So for the next couple years, a good recipe for startups 23 | will be to look around you for things that people haven't realized 24 | yet can be made unnecessary by a tablet app.In 1938 Buckminster Fuller coined the term ephemeralization to 25 | describe the increasing tendency of physical machinery to be replaced 26 | by what we would now call software. The reason tablets are going 27 | to take over the world is not (just) that Steve Jobs and Co are 28 | industrial design wizards, but because they have this force behind 29 | them. The iPhone and the iPad have effectively drilled a hole that 30 | will allow ephemeralization to flow into a lot of new areas. No one 31 | who has studied the history of technology would want to underestimate 32 | the power of that force.I worry about the power Apple could have with this force behind 33 | them. I don't want to see another era of client monoculture like 34 | the Microsoft one in the 80s and 90s. But if ephemeralization is 35 | one of the main forces driving the spread of tablets, that suggests 36 | a way to compete with Apple: be a better platform for it.It has turned out to be a great thing that Apple tablets have 37 | accelerometers in them. Developers have used the accelerometer in 38 | ways Apple could never have imagined. That's the nature of platforms. 39 | The more versatile the tool, the less you can predict how people 40 | will use it. So tablet makers should be thinking: what else can 41 | we put in there? Not merely hardware, but software too. What else 42 | can we give developers access to? Give hackers an inch and they'll 43 | take you a mile. 44 | Thanks to Sam Altman, Paul Buchheit, Jessica Livingston, and 45 | Robert Morris for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/talk.txt: -------------------------------------------------------------------------------- 1 | October 2015Here's a simple trick for getting more people to read what you 2 | write: write in spoken language.Something comes over most people when they start writing. They write 3 | in a different language than they'd use if they were talking to a 4 | friend. The sentence structure and even the words are different. 5 | No one uses "pen" as a verb in spoken English. You'd feel like an 6 | idiot using "pen" instead of "write" in a conversation with a friend.The last straw for me was a sentence I read a couple days ago: 7 | 8 | The mercurial Spaniard himself declared: "After Altamira, all is 9 | decadence." 10 | 11 | It's from Neil Oliver's A History of Ancient Britain. I feel bad 12 | making an example of this book, because it's no worse than lots of 13 | others. But just imagine calling Picasso "the mercurial Spaniard" when 14 | talking to a friend. Even one 15 | sentence of this would raise eyebrows in conversation. And yet 16 | people write whole books of it.Ok, so written and spoken language are different. Does that make 17 | written language worse?If you want people to read and understand what you write, yes. 18 | Written language is more complex, which makes it more work to read. 19 | It's also more formal and distant, which gives the reader's attention 20 | permission to drift. But perhaps worst of all, the complex sentences 21 | and fancy words give you, the writer, the false impression that 22 | you're saying more than you actually are.You don't need complex sentences to express complex ideas. When 23 | specialists in some abstruse topic talk to one another about ideas 24 | in their field, they don't use sentences any more complex than they 25 | do when talking about what to have for lunch. They use different 26 | words, certainly. But even those they use no more than necessary. 27 | And in my experience, the harder the subject, the more informally 28 | experts speak. Partly, I think, because they have less to prove, 29 | and partly because the harder the ideas you're talking about, the 30 | less you can afford to let language get in the way.Informal language is the athletic clothing of ideas.I'm not saying spoken language always works best. Poetry is as much 31 | music as text, so you can say things you wouldn't say in conversation. 32 | And there are a handful of writers who can get away with using fancy 33 | language in prose. And then of course there are cases where writers 34 | don't want to make it easy to understand what they're saying—in 35 | corporate announcements of bad news, for example, or at the more 36 | bogus end of the humanities. But for nearly everyone else, spoken 37 | language is better.It seems to be hard for most people to write in spoken language. 38 | So perhaps the best solution is to write your first draft the way 39 | you usually would, then afterward look at each sentence and ask "Is 40 | this the way I'd say this if I were talking to a friend?" If it 41 | isn't, imagine what you would say, and use that instead. After a 42 | while this filter will start to operate as you write. When you write 43 | something you wouldn't say, you'll hear the clank as it hits the 44 | page.Before I publish a new essay, I read it out loud and fix everything 45 | that doesn't sound like conversation. I even fix bits that are 46 | phonetically awkward; I don't know if that's necessary, but it 47 | doesn't cost much.This trick may not always be enough. I've seen writing so far 48 | removed from spoken language that it couldn't be fixed sentence by 49 | sentence. For cases like that there's a more drastic solution. 50 | After writing the first draft, try explaining to a friend what you 51 | just wrote. Then replace the draft with what you said to your friend.People often tell me how much my essays sound like me talking. 52 | The fact that this seems worthy of comment shows how rarely people 53 | manage to write in spoken language. Otherwise everyone's writing 54 | would sound like them talking.If you simply manage to write in spoken language, you'll be ahead 55 | of 95% of writers. And it's so easy to do: just don't let a sentence 56 | through unless it's the way you'd say it to a friend.Thanks to Patrick Collison and Jessica Livingston for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/todo.txt: -------------------------------------------------------------------------------- 1 | April 2012A palliative care nurse called Bronnie Ware made a list of the 2 | biggest regrets 3 | of the dying. Her list seems plausible. I could see 4 | myself — can see myself — making at least 4 of these 5 | 5 mistakes.If you had to compress them into a single piece of advice, it might 6 | be: don't be a cog. The 5 regrets paint a portrait of post-industrial 7 | man, who shrinks himself into a shape that fits his circumstances, 8 | then turns dutifully till he stops.The alarming thing is, the mistakes that produce these regrets are 9 | all errors of omission. You forget your dreams, ignore your family, 10 | suppress your feelings, neglect your friends, and forget to be 11 | happy. Errors of omission are a particularly dangerous type of 12 | mistake, because you make them by default.I would like to avoid making these mistakes. But how do you avoid 13 | mistakes you make by default? Ideally you transform your life so 14 | it has other defaults. But it may not be possible to do that 15 | completely. As long as these mistakes happen by default, you probably 16 | have to be reminded not to make them. So I inverted the 5 regrets, 17 | yielding a list of 5 commands 18 | 19 | Don't ignore your dreams; don't work too much; say what you 20 | think; cultivate friendships; be happy. 21 | 22 | which I then put at the top of the file I use as a todo list. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/twitter.txt: -------------------------------------------------------------------------------- 1 | April 2009Om Malik is the most recent of many people 2 | to ask why Twitter is such a big deal.The reason is that it's a new messaging 3 | protocol, where you don't specify the recipients. 4 | New protocols are rare. Or more precisely, new 5 | protocols that take off are. 6 | There are only a handful of commonly used ones: TCP/IP 7 | (the Internet), SMTP (email), HTTP (the web), and so on. So any 8 | new protocol is a big deal. But Twitter is a protocol owned 9 | by a private company. That's even rarer.Curiously, the fact that the founders of Twitter 10 | have been slow to monetize it may in the long run 11 | prove to be an advantage. Because they haven't tried 12 | to control it too much, Twitter feels to everyone like 13 | previous protocols. One forgets it's owned by a 14 | private company. That must have made it easier for 15 | Twitter to spread. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/unions.txt: -------------------------------------------------------------------------------- 1 | May 2007People who worry about the increasing gap between rich and poor 2 | generally look back on the mid twentieth century as a golden age. 3 | In those days we had a large number of high-paying union manufacturing 4 | jobs that boosted the median income. I wouldn't quite call the 5 | high-paying union job a myth, but I think people who dwell on it 6 | are reading too much into it.Oddly enough, it was working with startups that made me realize 7 | where the high-paying union job came from. In a rapidly growing 8 | market, you don't worry too much about efficiency. It's more 9 | important to grow fast. If there's some mundane problem getting 10 | in your way, and there's a simple solution that's somewhat expensive, 11 | just take it and get on with more important things. EBay didn't 12 | win by paying less for servers than their competitors.Difficult though it may be to imagine now, manufacturing was a 13 | growth industry in the mid twentieth century. This was an era when 14 | small firms making everything from cars to candy were getting 15 | consolidated into a new kind of corporation with national reach and 16 | huge economies of scale. You had to grow fast or die. Workers 17 | were for these companies what servers are for an Internet startup. 18 | A reliable supply was more important than low cost.If you looked in the head of a 1950s auto executive, the attitude 19 | must have been: sure, give 'em whatever they ask for, so long as 20 | the new model isn't delayed.In other words, those workers were not paid what their work was 21 | worth. Circumstances being what they were, companies would have 22 | been stupid to insist on paying them so little.If you want a less controversial example of this phenomenon, ask 23 | anyone who worked as a consultant building web sites during the 24 | Internet Bubble. In the late nineties you could get paid huge sums 25 | of money for building the most trivial things. And yet does anyone 26 | who was there have any expectation those days will ever return? I 27 | doubt it. Surely everyone realizes that was just a temporary 28 | aberration.The era of labor unions seems to have been the same kind of aberration, 29 | just spread 30 | over a longer period, and mixed together with a lot of ideology 31 | that prevents people from viewing it with as cold an eye as they 32 | would something like consulting during the Bubble.Basically, unions were just Razorfish.People who think the labor movement was the creation of heroic union 33 | organizers have a problem to explain: why are unions shrinking now? 34 | The best they can do is fall back on the default explanation of 35 | people living in fallen civilizations. Our ancestors were giants. 36 | The workers of the early twentieth century must have had a moral 37 | courage that's lacking today.In fact there's a simpler explanation. The early twentieth century 38 | was just a fast-growing startup overpaying for infrastructure. And 39 | we in the present are not a fallen people, who have abandoned 40 | whatever mysterious high-minded principles produced the high-paying 41 | union job. We simply live in a time when the fast-growing companies 42 | overspend on different things. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/want.txt: -------------------------------------------------------------------------------- 1 | November 2022Since I was about 9 I've been puzzled by the apparent contradiction 2 | between being made of matter that behaves in a predictable way, and 3 | the feeling that I could choose to do whatever I wanted. At the 4 | time I had a self-interested motive for exploring the question. At 5 | that age (like most succeeding ages) I was always in trouble with 6 | the authorities, and it seemed to me that there might possibly be 7 | some way to get out of trouble by arguing that I wasn't responsible 8 | for my actions. I gradually lost hope of that, but the puzzle 9 | remained: How do you reconcile being a machine made of matter with 10 | the feeling that you're free to choose what you do? 11 | [1]The best way to explain the answer may be to start with a slightly 12 | wrong version, and then fix it. The wrong version is: You can do 13 | what you want, but you can't want what you want. Yes, you can control 14 | what you do, but you'll do what you want, and you can't control 15 | that.The reason this is mistaken is that people do sometimes change what 16 | they want. People who don't want to want something — drug addicts, 17 | for example — can sometimes make themselves stop wanting it. And 18 | people who want to want something — who want to like classical 19 | music, or broccoli — sometimes succeed.So we modify our initial statement: You can do what you want, but 20 | you can't want to want what you want.That's still not quite true. It's possible to change what you want 21 | to want. I can imagine someone saying "I decided to stop wanting 22 | to like classical music." But we're getting closer to the truth. 23 | It's rare for people to change what they want to want, and the more 24 | "want to"s we add, the rarer it gets.We can get arbitrarily close to a true statement by adding more "want 25 | to"s in much the same way we can get arbitrarily close to 1 by adding 26 | more 9s to a string of 9s following a decimal point. In practice 27 | three or four "want to"s must surely be enough. It's hard even to 28 | envision what it would mean to change what you want to want to want 29 | to want, let alone actually do it.So one way to express the correct answer is to use a regular 30 | expression. You can do what you want, but there's some statement 31 | of the form "you can't (want to)* want what you want" that's true. 32 | Ultimately you get back to a want that you don't control. 33 | [2] 34 | Notes[1] 35 | I didn't know when I was 9 that matter might behave randomly, 36 | but I don't think it affects the problem much. Randomness destroys 37 | the ghost in the machine as effectively as determinism.[2] 38 | If you don't like using an expression, you can make the same 39 | point using higher-order desires: There is some n such that you 40 | don't control your nth-order desires. 41 | Thanks to Trevor Blackwell, 42 | Jessica Livingston, Robert Morris, and 43 | Michael Nielsen for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/weird.txt: -------------------------------------------------------------------------------- 1 | August 2021When people say that in their experience all programming languages 2 | are basically equivalent, they're making a statement not about 3 | languages but about the kind of programming they've done.99.5% of programming consists of gluing together calls to library 4 | functions. All popular languages are equally good at this. So one 5 | can easily spend one's whole career operating in the intersection 6 | of popular programming languages.But the other .5% of programming is disproportionately interesting. 7 | If you want to learn what it consists of, the weirdness of weird 8 | languages is a good clue to follow.Weird languages aren't weird by accident. Not the good ones, at 9 | least. The weirdness of the good ones usually implies the existence 10 | of some form of programming that's not just the usual gluing together 11 | of library calls.A concrete example: Lisp macros. Lisp macros seem weird even to 12 | many Lisp programmers. They're not only not in the intersection of 13 | popular languages, but by their nature would be hard to implement 14 | properly in a language without turning it into a dialect of 15 | Lisp. And macros are definitely evidence of techniques that go 16 | beyond glue programming. For example, solving problems by first 17 | writing a language for problems of that type, and then writing 18 | your specific application in it. Nor is this all you can do with 19 | macros; it's just one region in a space of program-manipulating 20 | techniques that even now is far from fully explored.So if you want to expand your concept of what programming can be, 21 | one way to do it is by learning weird languages. Pick a language 22 | that most programmers consider weird but whose median user is smart, 23 | and then focus on the differences between this language and the 24 | intersection of popular languages. What can you say in this language 25 | that would be impossibly inconvenient to say in others? In the 26 | process of learning how to say things you couldn't previously say, 27 | you'll probably be learning how to think things you couldn't 28 | previously think. 29 | Thanks to Trevor Blackwell, Patrick Collison, Daniel Gackle, Amjad 30 | Masad, and Robert Morris for reading drafts of this. 31 | -------------------------------------------------------------------------------- /data/PaulGrahamEssays/whyyc.txt: -------------------------------------------------------------------------------- 1 | March 2006, rev August 2009Yesterday one of the founders we funded asked me why we started 2 | Y 3 | Combinator. Or more precisely, he asked if we'd started YC mainly 4 | for fun.Kind of, but not quite. It is enormously fun to be able to work 5 | with Rtm and Trevor again. I missed that after we sold Viaweb, and 6 | for all the years after I always had a background process running, 7 | looking for something we could do together. There is definitely 8 | an aspect of a band reunion to Y Combinator. Every couple days I 9 | slip and call it "Viaweb."Viaweb we started very explicitly to make money. I was sick of 10 | living from one freelance project to the next, and decided to just 11 | work as hard as I could till I'd made enough to solve the problem 12 | once and for all. Viaweb was sometimes fun, but it wasn't designed 13 | for fun, and mostly it wasn't. I'd be surprised if any startup is. 14 | All startups are mostly schleps.The real reason we started Y Combinator is neither selfish nor 15 | virtuous. We didn't start it mainly to make money; we have no idea 16 | what our average returns might be, and won't know for years. Nor 17 | did we start YC mainly to help out young would-be founders, though 18 | we do like the idea, and comfort ourselves occasionally with the 19 | thought that if all our investments tank, we will thus have been 20 | doing something unselfish. (It's oddly nondeterministic.)The real reason we started Y Combinator is one probably only a 21 | hacker would understand. We did it because it seems such a great 22 | hack. There are thousands of smart people who could start companies 23 | and don't, and with a relatively small amount of force applied at 24 | just the right place, we can spring on the world a stream of new 25 | startups that might otherwise not have existed.In a way this is virtuous, because I think startups are a good 26 | thing. But really what motivates us is the completely amoral desire 27 | that would motivate any hacker who looked at some complex device 28 | and realized that with a tiny tweak he could make it run more 29 | efficiently. In this case, the device is the world's economy, which 30 | fortunately happens to be open source. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/work.txt: -------------------------------------------------------------------------------- 1 | January 2015My father is a mathematician. For most of my childhood he worked 2 | for Westinghouse, modelling nuclear reactors.He was one of those lucky people who know early on what they want to 3 | do. When you talk to him about his childhood, there's a clear 4 | watershed at about age 12, when he "got interested in maths."He 5 | grew up in the small Welsh seacoast town of Pwllheli. As we retraced 6 | his walk to school on Google Street View, he said that it had been 7 | nice growing up in the country."Didn't it get boring when you got to be about 15?" I asked."No," he said, "by then I was interested in maths."In another conversation he told me that what he really liked was 8 | solving problems. To me the exercises at the end of each chapter 9 | in a math textbook represent work, or at best a way to reinforce 10 | what you learned in that chapter. To him the problems were the 11 | reward. The text of each chapter was just some advice about solving 12 | them. He said that as soon as he got a new textbook he'd immediately 13 | work out all the problems — to the slight annoyance of his teacher, 14 | since the class was supposed to work through the book gradually.Few people know so early or so certainly what they want to work on. 15 | But talking to my father reminded me of a heuristic the rest of us 16 | can use. If something that seems like work to other people doesn't 17 | seem like work to you, that's something you're well suited for. 18 | For example, a lot of programmers I know, including me, actually 19 | like debugging. It's not something people tend to volunteer; one 20 | likes it the way one likes popping zits. But you may have to like 21 | debugging to like programming, considering the degree to which 22 | programming consists of it.The stranger your tastes seem to other people, the stronger evidence 23 | they probably are of what you should do. When I was in college I 24 | used to write papers for my friends. It was quite interesting to 25 | write a paper for a class I wasn't taking. Plus they were always 26 | so relieved.It seemed curious that the same task could be painful to one person 27 | and pleasant to another, but I didn't realize at the time what this 28 | imbalance implied, because I wasn't looking for it. I didn't realize 29 | how hard it can be to decide what you should work on, and that you 30 | sometimes have to figure it out from subtle clues, like a detective 31 | solving a case in a mystery novel. So I bet it would help a lot 32 | of people to ask themselves about this explicitly. What seems like 33 | work to other people that doesn't seem like work to you? 34 | Thanks to Sam Altman, Trevor Blackwell, Jessica Livingston, 35 | Robert Morris, and my father for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/writing44.txt: -------------------------------------------------------------------------------- 1 | March 2005 2 | (In the process 3 | of answering an email, I accidentally wrote a tiny essay about writing. 4 | I usually spend weeks on an essay. This one took 67 minutes—23 5 | of writing, and 44 of rewriting.)I think it's far more important to write well than most people 6 | realize. Writing doesn't just communicate ideas; it generates them. 7 | If you're bad at writing and don't like to do it, you'll miss out 8 | on most of the ideas writing would have generated.As for how to write well, here's the short version: 9 | Write a bad version 10 | 1 as fast as you can; rewrite it over and over; cut out everything 11 | unnecessary; write in a conversational tone; develop a nose for 12 | bad writing, so you can see and fix it in yours; imitate writers 13 | you like; if you can't get started, tell someone what you plan to 14 | write about, then write down what you said; expect 15 | 80% of the ideas in an essay to happen after you start writing it, 16 | and 50% of those you start with to be wrong; be confident enough 17 | to cut; have friends you trust read your stuff and tell you which 18 | bits are confusing or drag; don't (always) make detailed outlines; 19 | mull ideas over for a few days before 20 | writing; carry a small notebook or scrap paper with you; start writing 21 | when you think of the first 22 | sentence; if a deadline 23 | forces you to start before that, just say the most important sentence 24 | first; write about stuff you like; don't try to sound impressive; don't hesitate to change the topic on the fly; 25 | use footnotes to contain digressions; use anaphora to knit 26 | sentences together; read your essays out loud to see (a) where you stumble 27 | over awkward phrases and (b) which bits are boring (the 28 | paragraphs you dread reading); try to tell the 29 | reader something new and useful; work in fairly big quanta of time; 30 | when you restart, begin by rereading what you have so far; when you 31 | finish, leave yourself something easy to start with; accumulate 32 | notes for topics you plan to cover at the bottom of the file; don't 33 | feel obliged to cover any of them; write for a reader who won't 34 | read the essay as carefully as you do, just as pop songs are 35 | designed to sound ok on crappy car radios; 36 | if you say anything mistaken, fix it immediately; 37 | ask friends which sentence you'll regret most; go back and tone 38 | down harsh remarks; publish stuff online, because 39 | an audience makes you write more, and thus generate more 40 | ideas; print out drafts instead of just looking at them 41 | on the screen; use simple, germanic words; learn to distinguish 42 | surprises from digressions; learn to recognize the approach of an 43 | ending, and when one appears, grab it. -------------------------------------------------------------------------------- /data/PaulGrahamEssays/wtax.txt: -------------------------------------------------------------------------------- 1 | August 2020Some politicians are proposing to introduce wealth taxes in addition 2 | to income and capital gains taxes. Let's try modeling the effects of various levels 3 | of wealth tax to see what they would mean in practice for a startup 4 | founder.Suppose you start a successful startup in your twenties, and then 5 | live for another 60 years. How much of your stock will a wealth tax 6 | consume?If the wealth tax applies to all your assets, it's easy to 7 | calculate its effect. A wealth tax of 1% means you get to keep 8 | 99% of your stock each year. After 60 years the proportion 9 | of stock you'll have left will be .99^60, or .547. So a 10 | straight 1% wealth tax means the government will over the 11 | course of your life take 45% of your stock.(Losing shares does not, obviously, mean becoming net 12 | poorer unless the value per share is increasing by less than the 13 | wealth tax rate.)Here's how much stock the government would take over 60 14 | years at various levels of wealth tax: 15 | wealth taxgovernment takes 16 | 0.1%6%0.5%26% 17 | 1.0%45% 18 | 2.0%70% 19 | 3.0%84% 20 | 4.0%91%5.0%95% 21 | 22 | 23 | 24 | A wealth tax will usually have a threshold at which it starts. 25 | How much difference would a high threshold make? To model that, 26 | we need to make some assumptions about the initial value of 27 | your stock and the growth rate.Suppose your stock is initially 28 | worth $2 million, and the company's trajectory is as follows: 29 | the value of your stock grows 3x for 2 years, then 2x for 2 years, 30 | then 50% for 2 years, after 31 | which you just get a typical public company growth rate, 32 | which we'll call 8%. 33 | [1] 34 | Suppose the wealth tax threshold is 35 | $50 million. How much stock does the government take now? 36 | 37 | wealth taxgovernment takes 38 | 0.1%5%0.5%23% 39 | 1.0%41% 40 | 2.0%65% 41 | 3.0%79% 42 | 4.0%88%5.0%93% 43 | 44 | 45 | 46 | It may at first seem surprising that such apparently small tax rates 47 | produce such dramatic effects. A 2% wealth tax with a $50 million 48 | threshold takes about two thirds of a successful founder's stock.The reason wealth taxes have such dramatic effects is that they're 49 | applied over and over to the same money. Income tax 50 | happens every year, but only to that year's income. Whereas if you 51 | live for 60 years after acquiring some asset, a wealth tax will tax 52 | that same asset 60 times. A wealth tax compounds.Note[1] 53 | In practice, eventually some of this 8% would come in the form of 54 | dividends, which are taxed as income at issue, so this model actually 55 | represents the most optimistic case for the founder. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/bias.txt: -------------------------------------------------------------------------------- 1 | October 2015This will come as a surprise to a lot of people, but in some cases 2 | it's possible to detect bias in a selection process without knowing 3 | anything about the applicant pool. Which is exciting because among 4 | other things it means third parties can use this technique to detect 5 | bias whether those doing the selecting want them to or not.You can use this technique whenever (a) you have at least 6 | a random sample of the applicants that were selected, (b) their 7 | subsequent performance is measured, and (c) the groups of 8 | applicants you're comparing have roughly equal distribution of ability.How does it work? Think about what it means to be biased. What 9 | it means for a selection process to be biased against applicants 10 | of type x is that it's harder for them to make it through. Which 11 | means applicants of type x have to be better to get selected than 12 | applicants not of type x. 13 | [1] 14 | Which means applicants of type x 15 | who do make it through the selection process will outperform other 16 | successful applicants. And if the performance of all the successful 17 | applicants is measured, you'll know if they do.Of course, the test you use to measure performance must be a valid 18 | one. And in particular it must not be invalidated by the bias you're 19 | trying to measure. 20 | But there are some domains where performance can be measured, and 21 | in those detecting bias is straightforward. Want to know if the 22 | selection process was biased against some type of applicant? Check 23 | whether they outperform the others. This is not just a heuristic 24 | for detecting bias. It's what bias means.For example, many suspect that venture capital firms are biased 25 | against female founders. This would be easy to detect: among their 26 | portfolio companies, do startups with female founders outperform 27 | those without? A couple months ago, one VC firm (almost certainly 28 | unintentionally) published a study showing bias of this type. First 29 | Round Capital found that among its portfolio companies, startups 30 | with female founders outperformed 31 | those without by 63%. 32 | [2]The reason I began by saying that this technique would come as a 33 | surprise to many people is that we so rarely see analyses of this 34 | type. I'm sure it will come as a surprise to First Round that they 35 | performed one. I doubt anyone there realized that by limiting their 36 | sample to their own portfolio, they were producing a study not of 37 | startup trends but of their own biases when selecting companies.I predict we'll see this technique used more in the future. The 38 | information needed to conduct such studies is increasingly available. 39 | Data about who applies for things is usually closely guarded by the 40 | organizations selecting them, but nowadays data about who gets 41 | selected is often publicly available to anyone who takes the trouble 42 | to aggregate it. 43 | Notes[1] 44 | This technique wouldn't work if the selection process looked 45 | for different things from different types of applicants—for 46 | example, if an employer hired men based on their ability but women 47 | based on their appearance.[2] 48 | As Paul Buchheit points out, First Round excluded their most 49 | successful investment, Uber, from the study. And while it 50 | makes sense to exclude outliers from some types of studies, 51 | studies of returns from startup investing, which is all about 52 | hitting outliers, are not one of them. 53 | Thanks to Sam Altman, Jessica Livingston, and Geoff Ralston for reading 54 | drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/foundervisa.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | April 2009I usually avoid politics, but since we now seem to have an administration that's open to suggestions, I'm going to risk making one. The single biggest thing the government could do to increase the number of startups in this country is a policy that would cost nothing: establish a new class of visa for startup founders.The biggest constraint on the number of new startups that get created in the US is not tax policy or employment law or even Sarbanes-Oxley. It's that we won't let the people who want to start them into the country.Letting just 10,000 startup founders into the country each year could have a visible effect on the economy. If we assume 4 people per startup, which is probably an overestimate, that's 2500 new companies. Each year. They wouldn't all grow as big as Google, but out of 2500 some would come close.By definition these 10,000 founders wouldn't be taking jobs from Americans: it could be part of the terms of the visa that they couldn't work for existing companies, only new ones they'd founded. In fact they'd cause there to be 4 | more jobs for Americans, because the companies they started would hire more employees as they grew.The tricky part might seem to be how one defined a startup. But that could be solved quite easily: let the market decide. Startup investors work hard to find the best startups. The government could not do better than to piggyback on their expertise, and use investment by recognized startup investors as the test of whether a company was a real startup.How would the government decide who's a startup investor? The same way they decide what counts as a university for student visas. We'll establish our own accreditation procedure. We know who one another are.10,000 people is a drop in the bucket by immigration standards, but would represent a huge increase in the pool of startup founders. I think this would have such a visible effect on the economy that it would make the legislator who introduced the bill famous. The only way to know for sure would be to try it, and that would cost practically nothing. 5 | Thanks to Trevor Blackwell, Paul Buchheit, Jeff Clavier, David Hornik, Jessica Livingston, Greg Mcadoo, Aydin Senkut, and Fred Wilson for reading drafts of this.Related: -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/iflisp.txt: -------------------------------------------------------------------------------- 1 | May 2003If Lisp is so great, why don't more people use it? I was 2 | asked this question by a student in the audience at a 3 | talk I gave recently. Not for the first time, either.In languages, as in so many things, there's not much 4 | correlation between popularity and quality. Why does 5 | John Grisham (King of Torts sales rank, 44) outsell 6 | Jane Austen (Pride and Prejudice sales rank, 6191)? 7 | Would even Grisham claim that it's because he's a better 8 | writer?Here's the first sentence of Pride and Prejudice: 9 | 10 | It is a truth universally acknowledged, that a single man 11 | in possession of a good fortune must be in want of a 12 | wife. 13 | 14 | "It is a truth universally acknowledged?" Long words for 15 | the first sentence of a love story.Like Jane Austen, Lisp looks hard. Its syntax, or lack 16 | of syntax, makes it look completely unlike 17 | the languages 18 | most people are used to. Before I learned Lisp, I was afraid 19 | of it too. I recently came across a notebook from 1983 20 | in which I'd written: 21 | 22 | I suppose I should learn Lisp, but it seems so foreign. 23 | 24 | Fortunately, I was 19 at the time and not too resistant to learning 25 | new things. I was so ignorant that learning 26 | almost anything meant learning new things.People frightened by Lisp make up other reasons for not 27 | using it. The standard 28 | excuse, back when C was the default language, was that Lisp 29 | was too slow. Now that Lisp dialects are among 30 | the faster 31 | languages available, that excuse has gone away. 32 | Now the standard excuse is openly circular: that other languages 33 | are more popular.(Beware of such reasoning. It gets you Windows.)Popularity is always self-perpetuating, but it's especially 34 | so in programming languages. More libraries 35 | get written for popular languages, which makes them still 36 | more popular. Programs often have to work with existing programs, 37 | and this is easier if they're written in the same language, 38 | so languages spread from program to program like a virus. 39 | And managers prefer popular languages, because they give them 40 | more leverage over developers, who can more easily be replaced.Indeed, if programming languages were all more or less equivalent, 41 | there would be little justification for using any but the most 42 | popular. But they aren't all equivalent, not by a long 43 | shot. And that's why less popular languages, like Jane Austen's 44 | novels, continue to survive at all. When everyone else is reading 45 | the latest John Grisham novel, there will always be a few people 46 | reading Jane Austen instead. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/island.txt: -------------------------------------------------------------------------------- 1 | July 2006I've discovered a handy test for figuring out what you're addicted 2 | to. Imagine you were going to spend the weekend at a friend's house 3 | on a little island off the coast of Maine. There are no shops on 4 | the island and you won't be able to leave while you're there. Also, 5 | you've never been to this house before, so you can't assume it will 6 | have more than any house might.What, besides clothes and toiletries, do you make a point of packing? 7 | That's what you're addicted to. For example, if you find yourself 8 | packing a bottle of vodka (just in case), you may want to stop and 9 | think about that.For me the list is four things: books, earplugs, a notebook, and a 10 | pen.There are other things I might bring if I thought of it, like music, 11 | or tea, but I can live without them. I'm not so addicted to caffeine 12 | that I wouldn't risk the house not having any tea, just for a 13 | weekend.Quiet is another matter. I realize it seems a bit eccentric to 14 | take earplugs on a trip to an island off the coast of Maine. If 15 | anywhere should be quiet, that should. But what if the person in 16 | the next room snored? What if there was a kid playing basketball? 17 | (Thump, thump, thump... thump.) Why risk it? Earplugs are small.Sometimes I can think with noise. If I already have momentum on 18 | some project, I can work in noisy places. I can edit an essay or 19 | debug code in an airport. But airports are not so bad: most of the 20 | noise is whitish. I couldn't work with the sound of a sitcom coming 21 | through the wall, or a car in the street playing thump-thump music.And of course there's another kind of thinking, when you're starting 22 | something new, that requires complete quiet. You never 23 | know when this will strike. It's just as well to carry plugs.The notebook and pen are professional equipment, as it were. Though 24 | actually there is something druglike about them, in the sense that 25 | their main purpose is to make me feel better. I hardly ever go 26 | back and read stuff I write down in notebooks. It's just that if 27 | I can't write things down, worrying about remembering one idea gets 28 | in the way of having the next. Pen and paper wick ideas.The best notebooks I've found are made by a company called Miquelrius. 29 | I use their smallest size, which is about 2.5 x 4 in. 30 | The secret to writing on such 31 | narrow pages is to break words only when you run out of space, like 32 | a Latin inscription. I use the cheapest plastic Bic ballpoints, 33 | partly because their gluey ink doesn't seep through pages, and 34 | partly so I don't worry about losing them.I only started carrying a notebook about three years ago. Before 35 | that I used whatever scraps of paper I could find. But the problem 36 | with scraps of paper is that they're not ordered. In a notebook 37 | you can guess what a scribble means by looking at the pages 38 | around it. In the scrap era I was constantly finding notes I'd 39 | written years before that might say something I needed to remember, 40 | if I could only figure out what.As for books, I know the house would probably have something to 41 | read. On the average trip I bring four books and only read one of 42 | them, because I find new books to read en route. Really bringing 43 | books is insurance.I realize this dependence on books is not entirely good—that what 44 | I need them for is distraction. The books I bring on trips are 45 | often quite virtuous, the sort of stuff that might be assigned 46 | reading in a college class. But I know my motives aren't virtuous. 47 | I bring books because if the world gets boring I need to be able 48 | to slip into another distilled by some writer. It's like eating 49 | jam when you know you should be eating fruit.There is a point where I'll do without books. I was walking in 50 | some steep mountains once, and decided I'd rather just think, if I 51 | was bored, rather than carry a single unnecessary ounce. It wasn't 52 | so bad. I found I could entertain myself by having ideas instead 53 | of reading other people's. If you stop eating jam, fruit starts 54 | to taste better.So maybe I'll try not bringing books on some future trip. They're 55 | going to have to pry the plugs out of my cold, dead ears, however. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/know.txt: -------------------------------------------------------------------------------- 1 | December 2014I've read Villehardouin's chronicle of the Fourth Crusade at least 2 | two times, maybe three. And yet if I had to write down everything 3 | I remember from it, I doubt it would amount to much more than a 4 | page. Multiply this times several hundred, and I get an uneasy 5 | feeling when I look at my bookshelves. What use is it to read all 6 | these books if I remember so little from them?A few months ago, as I was reading Constance Reid's excellent 7 | biography of Hilbert, I figured out if not the answer to this 8 | question, at least something that made me feel better about it. 9 | She writes: 10 | 11 | Hilbert had no patience with mathematical lectures which filled 12 | the students with facts but did not teach them how to frame a 13 | problem and solve it. He often used to tell them that "a perfect 14 | formulation of a problem is already half its solution." 15 | 16 | That has always seemed to me an important point, and I was even 17 | more convinced of it after hearing it confirmed by Hilbert.But how had I come to believe in this idea in the first place? A 18 | combination of my own experience and other things I'd read. None 19 | of which I could at that moment remember! And eventually I'd forget 20 | that Hilbert had confirmed it too. But my increased belief in the 21 | importance of this idea would remain something I'd learned from 22 | this book, even after I'd forgotten I'd learned it.Reading and experience train your model of the world. And even if 23 | you forget the experience or what you read, its effect on your model 24 | of the world persists. Your mind is like a compiled program you've 25 | lost the source of. It works, but you don't know why.The place to look for what I learned from Villehardouin's chronicle 26 | is not what I remember from it, but my mental models of the crusades, 27 | Venice, medieval culture, siege warfare, and so on. Which doesn't 28 | mean I couldn't have read more attentively, but at least the harvest 29 | of reading is not so miserably small as it might seem.This is one of those things that seem obvious in retrospect. But 30 | it was a surprise to me and presumably would be to anyone else who 31 | felt uneasy about (apparently) forgetting so much they'd read.Realizing it does more than make you feel a little better about 32 | forgetting, though. There are specific implications.For example, reading and experience are usually "compiled" at the 33 | time they happen, using the state of your brain at that time. The 34 | same book would get compiled differently at different points in 35 | your life. Which means it is very much worth reading important 36 | books multiple times. I always used to feel some misgivings about 37 | rereading books. I unconsciously lumped reading together with work 38 | like carpentry, where having to do something again is a sign you 39 | did it wrong the first time. Whereas now the phrase "already read" 40 | seems almost ill-formed.Intriguingly, this implication isn't limited to books. Technology 41 | will increasingly make it possible to relive our experiences. When 42 | people do that today it's usually to enjoy them again (e.g. when 43 | looking at pictures of a trip) or to find the origin of some bug in 44 | their compiled code (e.g. when Stephen Fry succeeded in remembering 45 | the childhood trauma that prevented him from singing). But as 46 | technologies for recording and playing back your life improve, it 47 | may become common for people to relive experiences without any goal 48 | in mind, simply to learn from them again as one might when rereading 49 | a book.Eventually we may be able not just to play back experiences but 50 | also to index and even edit them. So although not knowing how you 51 | know things may seem part of being human, it may not be. 52 | Thanks to Sam Altman, Jessica Livingston, and Robert Morris for reading 53 | drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/mod.txt: -------------------------------------------------------------------------------- 1 | December 2019There are two distinct ways to be politically moderate: on purpose 2 | and by accident. Intentional moderates are trimmers, deliberately 3 | choosing a position mid-way between the extremes of right and left. 4 | Accidental moderates end up in the middle, on average, because they 5 | make up their own minds about each question, and the far right and 6 | far left are roughly equally wrong.You can distinguish intentional from accidental moderates by the 7 | distribution of their opinions. If the far left opinion on some 8 | matter is 0 and the far right opinion 100, an intentional moderate's 9 | opinion on every question will be near 50. Whereas an accidental 10 | moderate's opinions will be scattered over a broad range, but will, 11 | like those of the intentional moderate, average to about 50.Intentional moderates are similar to those on the far left and the 12 | far right in that their opinions are, in a sense, not their own. 13 | The defining quality of an ideologue, whether on the left or the 14 | right, is to acquire one's opinions in bulk. You don't get to pick 15 | and choose. Your opinions about taxation can be predicted from your 16 | opinions about sex. And although intentional moderates 17 | might seem to be the opposite of ideologues, their beliefs (though 18 | in their case the word "positions" might be more accurate) are also 19 | acquired in bulk. If the median opinion shifts to the right or left, 20 | the intentional moderate must shift with it. Otherwise they stop 21 | being moderate.Accidental moderates, on the other hand, not only choose their own 22 | answers, but choose their own questions. They may not care at all 23 | about questions that the left and right both think are terribly 24 | important. So you can only even measure the politics of an accidental 25 | moderate from the intersection of the questions they care about and 26 | those the left and right care about, and this can 27 | sometimes be vanishingly small.It is not merely a manipulative rhetorical trick to say "if you're 28 | not with us, you're against us," but often simply false.Moderates are sometimes derided as cowards, particularly by 29 | the extreme left. But while it may be accurate to call intentional 30 | moderates cowards, openly being an accidental moderate requires the 31 | most courage of all, because you get attacked from both right and 32 | left, and you don't have the comfort of being an orthodox member 33 | of a large group to sustain you.Nearly all the most impressive people I know are accidental moderates. 34 | If I knew a lot of professional athletes, or people in the entertainment 35 | business, that might be different. Being on the far left or far 36 | right doesn't affect how fast you run or how well you sing. But 37 | someone who works with ideas has to be independent-minded to do it 38 | well.Or more precisely, you have to be independent-minded about the ideas 39 | you work with. You could be mindlessly doctrinaire in your politics 40 | and still be a good mathematician. In the 20th century, a lot of 41 | very smart people were Marxists — just no one who was smart about 42 | the subjects Marxism involves. But if the ideas you use in your 43 | work intersect with the politics of your time, you have two choices: 44 | be an accidental moderate, or be mediocre.Notes[1] It's possible in theory for one side to be entirely right and 45 | the other to be entirely wrong. Indeed, ideologues must always 46 | believe this is the case. But historically it rarely has been.[2] For some reason the far right tend to ignore moderates rather 47 | than despise them as backsliders. I'm not sure why. Perhaps it 48 | means that the far right is less ideological than the far left. Or 49 | perhaps that they are more confident, or more resigned, or simply 50 | more disorganized. I just don't know.[3] Having heretical opinions doesn't mean you have to express 51 | them openly. It may be 52 | easier to have them if you don't. 53 | Thanks to Austen Allred, Trevor Blackwell, Patrick Collison, Jessica Livingston, 54 | Amjad Masad, Ryan Petersen, and Harj Taggar for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/nft.txt: -------------------------------------------------------------------------------- 1 | May 2021Noora Health, a nonprofit I've 2 | supported for years, just launched 3 | a new NFT. It has a dramatic name, Save Thousands of Lives, 4 | because that's what the proceeds will do.Noora has been saving lives for 7 years. They run programs in 5 | hospitals in South Asia to teach new mothers how to take care of 6 | their babies once they get home. They're in 165 hospitals now. And 7 | because they know the numbers before and after they start at a new 8 | hospital, they can measure the impact they have. It is massive. 9 | For every 1000 live births, they save 9 babies.This number comes from a study 10 | of 133,733 families at 28 different 11 | hospitals that Noora conducted in collaboration with the Better 12 | Birth team at Ariadne Labs, a joint center for health systems 13 | innovation at Brigham and Women’s Hospital and Harvard T.H. Chan 14 | School of Public Health.Noora is so effective that even if you measure their costs in the 15 | most conservative way, by dividing their entire budget by the number 16 | of lives saved, the cost of saving a life is the lowest I've seen. 17 | $1,235.For this NFT, they're going to issue a public report tracking how 18 | this specific tranche of money is spent, and estimating the number 19 | of lives saved as a result.NFTs are a new territory, and this way of using them is especially 20 | new, but I'm excited about its potential. And I'm excited to see 21 | what happens with this particular auction, because unlike an NFT 22 | representing something that has already happened, 23 | this NFT gets better as the price gets higher.The reserve price was about $2.5 million, because that's what it 24 | takes for the name to be accurate: that's what it costs to save 25 | 2000 lives. But the higher the price of this NFT goes, the more 26 | lives will be saved. What a sentence to be able to write. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/pow.txt: -------------------------------------------------------------------------------- 1 | January 2017People who are powerful but uncharismatic will tend to be disliked. 2 | Their power makes them a target for criticism that they don't have 3 | the charisma to disarm. That was Hillary Clinton's problem. It also 4 | tends to be a problem for any CEO who is more of a builder than a 5 | schmoozer. And yet the builder-type CEO is (like Hillary) probably 6 | the best person for the job.I don't think there is any solution to this problem. It's human 7 | nature. The best we can do is to recognize that it's happening, and 8 | to understand that being a magnet for criticism is sometimes a sign 9 | not that someone is the wrong person for a job, but that they're 10 | the right one. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/rootsoflisp.txt: -------------------------------------------------------------------------------- 1 | May 2001 2 | 3 | (I wrote this article to help myself understand exactly 4 | what McCarthy discovered. You don't need to know this stuff 5 | to program in Lisp, but it should be helpful to 6 | anyone who wants to 7 | understand the essence of Lisp — both in the sense of its 8 | origins and its semantic core. The fact that it has such a core 9 | is one of Lisp's distinguishing features, and the reason why, 10 | unlike other languages, Lisp has dialects.)In 1960, John 11 | McCarthy published a remarkable paper in 12 | which he did for programming something like what Euclid did for 13 | geometry. He showed how, given a handful of simple 14 | operators and a notation for functions, you can 15 | build a whole programming language. 16 | He called this language Lisp, for "List Processing," 17 | because one of his key ideas was to use a simple 18 | data structure called a list for both 19 | code and data.It's worth understanding what McCarthy discovered, not 20 | just as a landmark in the history of computers, but as 21 | a model for what programming is tending to become in 22 | our own time. It seems to me that there have been 23 | two really clean, consistent models of programming so 24 | far: the C model and the Lisp model. 25 | These two seem points of high ground, with swampy lowlands 26 | between them. As computers have grown more powerful, 27 | the new languages being developed have been moving 28 | steadily toward the Lisp model. A popular recipe 29 | for new programming languages in the past 20 years 30 | has been to take the C model of computing and add to 31 | it, piecemeal, parts taken from the Lisp model, 32 | like runtime typing and garbage collection.In this article I'm going to try to explain in the 33 | simplest possible terms what McCarthy discovered. 34 | The point is not just to learn about an interesting 35 | theoretical result someone figured out forty years ago, 36 | but to show where languages are heading. 37 | The unusual thing about Lisp — in fact, the defining 38 | quality of Lisp — is that it can be written in 39 | itself. To understand what McCarthy meant by this, 40 | we're going to retrace his steps, with his mathematical 41 | notation translated into running Common Lisp code. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/rss.txt: -------------------------------------------------------------------------------- 1 | Aaron Swartz created a scraped 2 | feed 3 | of the essays page. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/sun.txt: -------------------------------------------------------------------------------- 1 | September 2017The most valuable insights are both general and surprising. 2 | F = ma for example. But general and surprising is a hard 3 | combination to achieve. That territory tends to be picked 4 | clean, precisely because those insights are so valuable.Ordinarily, the best that people can do is one without the 5 | other: either surprising without being general (e.g. 6 | gossip), or general without being surprising (e.g. 7 | platitudes).Where things get interesting is the moderately valuable 8 | insights. You get those from small additions of whichever 9 | quality was missing. The more common case is a small 10 | addition of generality: a piece of gossip that's more than 11 | just gossip, because it teaches something interesting about 12 | the world. But another less common approach is to focus on 13 | the most general ideas and see if you can find something new 14 | to say about them. Because these start out so general, you 15 | only need a small delta of novelty to produce a useful 16 | insight.A small delta of novelty is all you'll be able to get most 17 | of the time. Which means if you take this route, your ideas 18 | will seem a lot like ones that already exist. Sometimes 19 | you'll find you've merely rediscovered an idea that did 20 | already exist. But don't be discouraged. Remember the huge 21 | multiplier that kicks in when you do manage to think of 22 | something even a little new.Corollary: the more general the ideas you're talking about, 23 | the less you should worry about repeating yourself. If you 24 | write enough, it's inevitable you will. Your brain is much 25 | the same from year to year and so are the stimuli that hit 26 | it. I feel slightly bad when I find I've said something 27 | close to what I've said before, as if I were plagiarizing 28 | myself. But rationally one shouldn't. You won't say 29 | something exactly the same way the second time, and that 30 | variation increases the chance you'll get that tiny but 31 | critical delta of novelty.And of course, ideas beget ideas. (That sounds 32 | familiar.) 33 | An idea with a small amount of novelty could lead to one 34 | with more. But only if you keep going. So it's doubly 35 | important not to let yourself be discouraged by people who 36 | say there's not much new about something you've discovered. 37 | "Not much new" is a real achievement when you're talking 38 | about the most general ideas. It's not true that there's nothing new under the sun. There 39 | are some domains where there's almost nothing new. But 40 | there's a big difference between nothing and almost nothing, 41 | when it's multiplied by the area under the sun. 42 | Thanks to Sam Altman, Patrick Collison, and Jessica 43 | Livingston for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/todo.txt: -------------------------------------------------------------------------------- 1 | April 2012A palliative care nurse called Bronnie Ware made a list of the 2 | biggest regrets 3 | of the dying. Her list seems plausible. I could see 4 | myself — can see myself — making at least 4 of these 5 | 5 mistakes.If you had to compress them into a single piece of advice, it might 6 | be: don't be a cog. The 5 regrets paint a portrait of post-industrial 7 | man, who shrinks himself into a shape that fits his circumstances, 8 | then turns dutifully till he stops.The alarming thing is, the mistakes that produce these regrets are 9 | all errors of omission. You forget your dreams, ignore your family, 10 | suppress your feelings, neglect your friends, and forget to be 11 | happy. Errors of omission are a particularly dangerous type of 12 | mistake, because you make them by default.I would like to avoid making these mistakes. But how do you avoid 13 | mistakes you make by default? Ideally you transform your life so 14 | it has other defaults. But it may not be possible to do that 15 | completely. As long as these mistakes happen by default, you probably 16 | have to be reminded not to make them. So I inverted the 5 regrets, 17 | yielding a list of 5 commands 18 | 19 | Don't ignore your dreams; don't work too much; say what you 20 | think; cultivate friendships; be happy. 21 | 22 | which I then put at the top of the file I use as a todo list. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/unions.txt: -------------------------------------------------------------------------------- 1 | May 2007People who worry about the increasing gap between rich and poor 2 | generally look back on the mid twentieth century as a golden age. 3 | In those days we had a large number of high-paying union manufacturing 4 | jobs that boosted the median income. I wouldn't quite call the 5 | high-paying union job a myth, but I think people who dwell on it 6 | are reading too much into it.Oddly enough, it was working with startups that made me realize 7 | where the high-paying union job came from. In a rapidly growing 8 | market, you don't worry too much about efficiency. It's more 9 | important to grow fast. If there's some mundane problem getting 10 | in your way, and there's a simple solution that's somewhat expensive, 11 | just take it and get on with more important things. EBay didn't 12 | win by paying less for servers than their competitors.Difficult though it may be to imagine now, manufacturing was a 13 | growth industry in the mid twentieth century. This was an era when 14 | small firms making everything from cars to candy were getting 15 | consolidated into a new kind of corporation with national reach and 16 | huge economies of scale. You had to grow fast or die. Workers 17 | were for these companies what servers are for an Internet startup. 18 | A reliable supply was more important than low cost.If you looked in the head of a 1950s auto executive, the attitude 19 | must have been: sure, give 'em whatever they ask for, so long as 20 | the new model isn't delayed.In other words, those workers were not paid what their work was 21 | worth. Circumstances being what they were, companies would have 22 | been stupid to insist on paying them so little.If you want a less controversial example of this phenomenon, ask 23 | anyone who worked as a consultant building web sites during the 24 | Internet Bubble. In the late nineties you could get paid huge sums 25 | of money for building the most trivial things. And yet does anyone 26 | who was there have any expectation those days will ever return? I 27 | doubt it. Surely everyone realizes that was just a temporary 28 | aberration.The era of labor unions seems to have been the same kind of aberration, 29 | just spread 30 | over a longer period, and mixed together with a lot of ideology 31 | that prevents people from viewing it with as cold an eye as they 32 | would something like consulting during the Bubble.Basically, unions were just Razorfish.People who think the labor movement was the creation of heroic union 33 | organizers have a problem to explain: why are unions shrinking now? 34 | The best they can do is fall back on the default explanation of 35 | people living in fallen civilizations. Our ancestors were giants. 36 | The workers of the early twentieth century must have had a moral 37 | courage that's lacking today.In fact there's a simpler explanation. The early twentieth century 38 | was just a fast-growing startup overpaying for infrastructure. And 39 | we in the present are not a fallen people, who have abandoned 40 | whatever mysterious high-minded principles produced the high-paying 41 | union job. We simply live in a time when the fast-growing companies 42 | overspend on different things. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/want.txt: -------------------------------------------------------------------------------- 1 | November 2022Since I was about 9 I've been puzzled by the apparent contradiction 2 | between being made of matter that behaves in a predictable way, and 3 | the feeling that I could choose to do whatever I wanted. At the 4 | time I had a self-interested motive for exploring the question. At 5 | that age (like most succeeding ages) I was always in trouble with 6 | the authorities, and it seemed to me that there might possibly be 7 | some way to get out of trouble by arguing that I wasn't responsible 8 | for my actions. I gradually lost hope of that, but the puzzle 9 | remained: How do you reconcile being a machine made of matter with 10 | the feeling that you're free to choose what you do? 11 | [1]The best way to explain the answer may be to start with a slightly 12 | wrong version, and then fix it. The wrong version is: You can do 13 | what you want, but you can't want what you want. Yes, you can control 14 | what you do, but you'll do what you want, and you can't control 15 | that.The reason this is mistaken is that people do sometimes change what 16 | they want. People who don't want to want something — drug addicts, 17 | for example — can sometimes make themselves stop wanting it. And 18 | people who want to want something — who want to like classical 19 | music, or broccoli — sometimes succeed.So we modify our initial statement: You can do what you want, but 20 | you can't want to want what you want.That's still not quite true. It's possible to change what you want 21 | to want. I can imagine someone saying "I decided to stop wanting 22 | to like classical music." But we're getting closer to the truth. 23 | It's rare for people to change what they want to want, and the more 24 | "want to"s we add, the rarer it gets.We can get arbitrarily close to a true statement by adding more "want 25 | to"s in much the same way we can get arbitrarily close to 1 by adding 26 | more 9s to a string of 9s following a decimal point. In practice 27 | three or four "want to"s must surely be enough. It's hard even to 28 | envision what it would mean to change what you want to want to want 29 | to want, let alone actually do it.So one way to express the correct answer is to use a regular 30 | expression. You can do what you want, but there's some statement 31 | of the form "you can't (want to)* want what you want" that's true. 32 | Ultimately you get back to a want that you don't control. 33 | [2] 34 | Notes[1] 35 | I didn't know when I was 9 that matter might behave randomly, 36 | but I don't think it affects the problem much. Randomness destroys 37 | the ghost in the machine as effectively as determinism.[2] 38 | If you don't like using an expression, you can make the same 39 | point using higher-order desires: There is some n such that you 40 | don't control your nth-order desires. 41 | Thanks to Trevor Blackwell, 42 | Jessica Livingston, Robert Morris, and 43 | Michael Nielsen for reading drafts of this. -------------------------------------------------------------------------------- /data/PaulGrahamEssaysLarge/weird.txt: -------------------------------------------------------------------------------- 1 | August 2021When people say that in their experience all programming languages 2 | are basically equivalent, they're making a statement not about 3 | languages but about the kind of programming they've done.99.5% of programming consists of gluing together calls to library 4 | functions. All popular languages are equally good at this. So one 5 | can easily spend one's whole career operating in the intersection 6 | of popular programming languages.But the other .5% of programming is disproportionately interesting. 7 | If you want to learn what it consists of, the weirdness of weird 8 | languages is a good clue to follow.Weird languages aren't weird by accident. Not the good ones, at 9 | least. The weirdness of the good ones usually implies the existence 10 | of some form of programming that's not just the usual gluing together 11 | of library calls.A concrete example: Lisp macros. Lisp macros seem weird even to 12 | many Lisp programmers. They're not only not in the intersection of 13 | popular languages, but by their nature would be hard to implement 14 | properly in a language without turning it into a dialect of 15 | Lisp. And macros are definitely evidence of techniques that go 16 | beyond glue programming. For example, solving problems by first 17 | writing a language for problems of that type, and then writing 18 | your specific application in it. Nor is this all you can do with 19 | macros; it's just one region in a space of program-manipulating 20 | techniques that even now is far from fully explored.So if you want to expand your concept of what programming can be, 21 | one way to do it is by learning weird languages. Pick a language 22 | that most programmers consider weird but whose median user is smart, 23 | and then focus on the differences between this language and the 24 | intersection of popular languages. What can you say in this language 25 | that would be impossibly inconvenient to say in others? In the 26 | process of learning how to say things you couldn't previously say, 27 | you'll probably be learning how to think things you couldn't 28 | previously think. 29 | Thanks to Trevor Blackwell, Patrick Collison, Daniel Gackle, Amjad 30 | Masad, and Robert Morris for reading drafts of this. 31 | -------------------------------------------------------------------------------- /data/San_Francisco_Trees.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkamradt/langchain-tutorials/697c4de4f6c655ea3aa16ea0de324da157398557/data/San_Francisco_Trees.db -------------------------------------------------------------------------------- /data/WildBelle1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkamradt/langchain-tutorials/697c4de4f6c655ea3aa16ea0de324da157398557/data/WildBelle1.png -------------------------------------------------------------------------------- /data/field-guide-to-data-science.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkamradt/langchain-tutorials/697c4de4f6c655ea3aa16ea0de324da157398557/data/field-guide-to-data-science.pdf -------------------------------------------------------------------------------- /data/gpt4_cost_2023_4_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkamradt/langchain-tutorials/697c4de4f6c655ea3aa16ea0de324da157398557/data/gpt4_cost_2023_4_11.png -------------------------------------------------------------------------------- /data/images/TooEasy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkamradt/langchain-tutorials/697c4de4f6c655ea3aa16ea0de324da157398557/data/images/TooEasy.png -------------------------------------------------------------------------------- /data/question_a_book_audio.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkamradt/langchain-tutorials/697c4de4f6c655ea3aa16ea0de324da157398557/data/question_a_book_audio.mp3 -------------------------------------------------------------------------------- /data/thefuzz/.editorconfig: -------------------------------------------------------------------------------- 1 | # .editorconfig 2 | # http://editorconfig.org/ 3 | root = true 4 | 5 | [*] 6 | charset = utf-8 7 | end_of_line = lf 8 | indent_size = 2 9 | indent_style = space 10 | insert_final_newline = true 11 | trim_trailing_whitespace = true 12 | 13 | [*.bat] 14 | end_of_line = crlf 15 | 16 | [*.go] 17 | indent_size = 4 18 | indent_style = tab 19 | 20 | [*.html] 21 | indent_size = 4 22 | 23 | [*Makefile] 24 | indent_size = 4 25 | indent_style = tab 26 | 27 | [*.php] 28 | indent_size = 4 29 | 30 | [*.py] 31 | indent_size = 4 32 | 33 | [*.xml] 34 | indent_size = 4 35 | -------------------------------------------------------------------------------- /data/thefuzz/.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: The Fuzz 2 | 3 | on: [push, pull_request, workflow_dispatch] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | fail-fast: false 10 | matrix: 11 | python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] 12 | test-cmd: [pytest] 13 | include: 14 | #- python-version: pyp-y3.8 15 | # test-cmd: pytest test_thefuzz.py test_thefuzz_pytest.py 16 | - python-version: "3.7" 17 | test-cmd: python setup.py check --restructuredtext --strict --metadata 18 | - python-version: "3.10" 19 | test-cmd: python setup.py check --restructuredtext --strict --metadata 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip setuptools wheel 29 | pip install pytest pycodestyle docutils Pygments hypothesis python-Levenshtein 30 | - name: Test with pytest 31 | run: | 32 | ${{ matrix.test-cmd }} 33 | -------------------------------------------------------------------------------- /data/thefuzz/.gitignore: -------------------------------------------------------------------------------- 1 | *.py[oc] 2 | 3 | # Temp files 4 | *~ 5 | ~* 6 | .*~ 7 | \#* 8 | .#* 9 | *# 10 | 11 | # Build files 12 | build 13 | dist 14 | pkg 15 | *.egg 16 | *.egg-info 17 | 18 | # Debian Files 19 | debian/files 20 | debian/python-beaver* 21 | 22 | # Sphinx build 23 | doc/_build 24 | 25 | # Generated man page 26 | doc/aws_hostname.1 27 | 28 | # tox 29 | .tox 30 | 31 | # Hypothesis - keep the examples database 32 | .hypothesis/tmp 33 | .hypothesis/unicodedata 34 | .hypothesis 35 | 36 | # pytest 37 | .cache/ 38 | .pytest_cache 39 | __pycache__ 40 | 41 | # Pycharm 42 | .idea/ 43 | 44 | # vscode 45 | .vscode/ 46 | -------------------------------------------------------------------------------- /data/thefuzz/.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | matrix: 3 | include: 4 | - python: "3.7" 5 | env: TEST_SUITE=pytest 6 | - python: "3.8" 7 | env: TEST_SUITE=pytest 8 | - python: "3.9" 9 | env: TEST_SUITE=pytest 10 | - python: "3.10" 11 | env: TEST_SUITE=pytest 12 | - python: "3.11-dev" 13 | env: TEST_SUITE=pytest 14 | - python: "pypy3.7-7.3.5" 15 | env: TEST_SUITE="pytest test_thefuzz.py test_thefuzz_pytest.py" 16 | - python: "3.10" 17 | env: TEST_SUITE="python setup.py check --restructuredtext --strict --metadata" 18 | install: 19 | - pip install -U pip setuptools wheel 20 | - pip install pytest pycodestyle docutils Pygments hypothesis 21 | script: 22 | - $TEST_SUITE 23 | notifications: 24 | on_success: always 25 | cache: pip 26 | -------------------------------------------------------------------------------- /data/thefuzz/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | include *.rst 3 | include test_thefuzz.py 4 | -------------------------------------------------------------------------------- /data/thefuzz/README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://github.com/seatgeek/thefuzz/actions/workflows/ci.yml/badge.svg 2 | :target: https://github.com/seatgeek/thefuzz 3 | 4 | TheFuzz 5 | ======= 6 | 7 | Fuzzy string matching like a boss. It uses `Levenshtein Distance `_ to calculate the differences between sequences in a simple-to-use package. 8 | 9 | Requirements 10 | ============ 11 | 12 | - Python 3.7 or higher 13 | - difflib 14 | - `python-Levenshtein `_ (optional, provides a 4-10x speedup in String 15 | Matching, though may result in `differing results for certain cases `_) 16 | 17 | For testing 18 | ~~~~~~~~~~~ 19 | - pycodestyle 20 | - hypothesis 21 | - pytest 22 | 23 | Installation 24 | ============ 25 | 26 | Using PIP via PyPI 27 | 28 | .. code:: bash 29 | 30 | pip install thefuzz 31 | 32 | or the following to install `python-Levenshtein` too 33 | 34 | .. code:: bash 35 | 36 | pip install thefuzz[speedup] 37 | 38 | 39 | Using PIP via Github 40 | 41 | .. code:: bash 42 | 43 | pip install git+git://github.com/seatgeek/thefuzz.git@0.19.0#egg=thefuzz 44 | 45 | Adding to your ``requirements.txt`` file (run ``pip install -r requirements.txt`` afterwards) 46 | 47 | .. code:: bash 48 | 49 | git+ssh://git@github.com/seatgeek/thefuzz.git@0.19.0#egg=thefuzz 50 | 51 | Manually via GIT 52 | 53 | .. code:: bash 54 | 55 | git clone git://github.com/seatgeek/thefuzz.git thefuzz 56 | cd thefuzz 57 | python setup.py install 58 | 59 | 60 | Usage 61 | ===== 62 | 63 | .. code:: python 64 | 65 | >>> from thefuzz import fuzz 66 | >>> from thefuzz import process 67 | 68 | Simple Ratio 69 | ~~~~~~~~~~~~ 70 | 71 | .. code:: python 72 | 73 | >>> fuzz.ratio("this is a test", "this is a test!") 74 | 97 75 | 76 | Partial Ratio 77 | ~~~~~~~~~~~~~ 78 | 79 | .. code:: python 80 | 81 | >>> fuzz.partial_ratio("this is a test", "this is a test!") 82 | 100 83 | 84 | Token Sort Ratio 85 | ~~~~~~~~~~~~~~~~ 86 | 87 | .. code:: python 88 | 89 | >>> fuzz.ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear") 90 | 91 91 | >>> fuzz.token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear") 92 | 100 93 | 94 | Token Set Ratio 95 | ~~~~~~~~~~~~~~~ 96 | 97 | .. code:: python 98 | 99 | >>> fuzz.token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear") 100 | 84 101 | >>> fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear") 102 | 100 103 | 104 | Partial Token Sort Ratio 105 | ~~~~~~~~~~~~~~~~~~~~~~~~ 106 | 107 | .. code:: python 108 | 109 | >>> fuzz.token_sort_ratio("fuzzy was a bear", "wuzzy fuzzy was a bear") 110 | 84 111 | >>> fuzz.partial_token_sort_ratio("fuzzy was a bear", "wuzzy fuzzy was a bear") 112 | 100 113 | 114 | Process 115 | ~~~~~~~ 116 | 117 | .. code:: python 118 | 119 | >>> choices = ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"] 120 | >>> process.extract("new york jets", choices, limit=2) 121 | [('New York Jets', 100), ('New York Giants', 78)] 122 | >>> process.extractOne("cowboys", choices) 123 | ("Dallas Cowboys", 90) 124 | 125 | You can also pass additional parameters to ``extractOne`` method to make it use a specific scorer. A typical use case is to match file paths: 126 | 127 | .. code:: python 128 | 129 | >>> process.extractOne("System of a down - Hypnotize - Heroin", songs) 130 | ('/music/library/good/System of a Down/2005 - Hypnotize/01 - Attack.mp3', 86) 131 | >>> process.extractOne("System of a down - Hypnotize - Heroin", songs, scorer=fuzz.token_sort_ratio) 132 | ("/music/library/good/System of a Down/2005 - Hypnotize/10 - She's Like Heroin.mp3", 61) 133 | 134 | .. |Build Status| image:: https://github.com/seatgeek/thefuzz/actions/workflows/ci.yml/badge.svg 135 | :target: https://github.com/seatgeek/thefuzz 136 | -------------------------------------------------------------------------------- /data/thefuzz/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2014 SeatGeek 4 | 5 | # This file is part of thefuzz. 6 | 7 | from thefuzz import __version__ 8 | import os 9 | 10 | try: 11 | from setuptools import setup 12 | except ImportError: 13 | from distutils.core import setup 14 | 15 | 16 | def open_file(fname): 17 | return open(os.path.join(os.path.dirname(__file__), fname)) 18 | 19 | 20 | setup( 21 | name='thefuzz', 22 | version=__version__, 23 | author='Adam Cohen', 24 | author_email='adam@seatgeek.com', 25 | packages=['thefuzz'], 26 | extras_require={'speedup': ['python-levenshtein>=0.12']}, 27 | url='https://github.com/seatgeek/thefuzz', 28 | license="GPLv2", 29 | classifiers=[ 30 | 'Intended Audience :: Developers', 31 | 'License :: OSI Approved :: GNU General Public License v2 (GPLv2)', 32 | 'Programming Language :: Python', 33 | 'Programming Language :: Python :: 3', 34 | 'Programming Language :: Python :: 3.7', 35 | 'Programming Language :: Python :: 3.8', 36 | 'Programming Language :: Python :: 3.9', 37 | 'Programming Language :: Python :: 3.10', 38 | 'Programming Language :: Python :: 3.11', 39 | 'Programming Language :: Python :: 3 :: Only', 40 | ], 41 | description='Fuzzy string matching in python', 42 | long_description=open_file('README.rst').read(), 43 | zip_safe=True, 44 | ) 45 | -------------------------------------------------------------------------------- /data/thefuzz/test_thefuzz_pytest.py: -------------------------------------------------------------------------------- 1 | from thefuzz import process 2 | 3 | 4 | def test_process_warning(caplog): 5 | """Check that a string reduced to 0 by processor logs a warning to stderr""" 6 | 7 | query = ':::::::' 8 | choices = [':::::::'] 9 | 10 | _ = process.extractOne(query, choices) 11 | 12 | logstr = ("Applied processor reduces " 13 | "input query to empty string, " 14 | "all comparisons will have score 0. " 15 | "[Query: ':::::::']") 16 | 17 | assert 1 == len(caplog.records) 18 | log = caplog.records[0] 19 | 20 | assert log.levelname == "WARNING" 21 | assert log.name == "thefuzz.process" 22 | assert logstr == log.message 23 | -------------------------------------------------------------------------------- /data/thefuzz/thefuzz/StringMatcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | StringMatcher.py 4 | 5 | ported from python-Levenshtein 6 | [https://github.com/miohtama/python-Levenshtein] 7 | License available here: https://github.com/miohtama/python-Levenshtein/blob/master/COPYING 8 | """ 9 | 10 | from Levenshtein import * 11 | from warnings import warn 12 | 13 | 14 | class StringMatcher: 15 | """A SequenceMatcher-like class built on the top of Levenshtein""" 16 | 17 | def _reset_cache(self): 18 | self._ratio = self._distance = None 19 | self._opcodes = self._editops = self._matching_blocks = None 20 | 21 | def __init__(self, isjunk=None, seq1='', seq2=''): 22 | if isjunk: 23 | warn("isjunk not NOT implemented, it will be ignored") 24 | self._str1, self._str2 = seq1, seq2 25 | self._reset_cache() 26 | 27 | def set_seqs(self, seq1, seq2): 28 | self._str1, self._str2 = seq1, seq2 29 | self._reset_cache() 30 | 31 | def set_seq1(self, seq1): 32 | self._str1 = seq1 33 | self._reset_cache() 34 | 35 | def set_seq2(self, seq2): 36 | self._str2 = seq2 37 | self._reset_cache() 38 | 39 | def get_opcodes(self): 40 | if not self._opcodes: 41 | if self._editops: 42 | self._opcodes = opcodes(self._editops, self._str1, self._str2) 43 | else: 44 | self._opcodes = opcodes(self._str1, self._str2) 45 | return self._opcodes 46 | 47 | def get_editops(self): 48 | if not self._editops: 49 | if self._opcodes: 50 | self._editops = editops(self._opcodes, self._str1, self._str2) 51 | else: 52 | self._editops = editops(self._str1, self._str2) 53 | return self._editops 54 | 55 | def get_matching_blocks(self): 56 | if not self._matching_blocks: 57 | self._matching_blocks = matching_blocks(self.get_opcodes(), 58 | self._str1, self._str2) 59 | return self._matching_blocks 60 | 61 | def ratio(self): 62 | if not self._ratio: 63 | self._ratio = ratio(self._str1, self._str2) 64 | return self._ratio 65 | 66 | def quick_ratio(self): 67 | # This is usually quick enough :o) 68 | if not self._ratio: 69 | self._ratio = ratio(self._str1, self._str2) 70 | return self._ratio 71 | 72 | def real_quick_ratio(self): 73 | len1, len2 = len(self._str1), len(self._str2) 74 | return 2.0 * min(len1, len2) / (len1 + len2) 75 | 76 | def distance(self): 77 | if not self._distance: 78 | self._distance = distance(self._str1, self._str2) 79 | return self._distance 80 | -------------------------------------------------------------------------------- /data/thefuzz/thefuzz/StringMatcher.pyi: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple, List 2 | 3 | OpcodeT = Tuple[str, int, int, int, int] 4 | EditOpcodeT = Tuple[str, int, int] 5 | MatchingBlocksT = List[Tuple[int, int, int]] 6 | 7 | 8 | class StringMatcher: 9 | def _reset_cache(self) -> None: 10 | self._ratio: Optional[float] = None 11 | self._distance: Optional[int] = None 12 | self._opcodes: Optional[OpcodeT] = None 13 | self._editops: Optional[EditOpcodeT] = None 14 | self._matching_blocks: Optional[MatchingBlocksT] = None 15 | 16 | def __init__(self, isjunk: Optional[bool] = ..., seq1: str = ..., seq2: str = ...) -> None: ... 17 | def set_seqs(self, seq1: str, seq2: str) -> None: ... 18 | def set_seq1(self, seq1: str) -> None: ... 19 | def set_seq2(self, seq2: str) -> None: ... 20 | def get_opcodes(self) -> OpcodeT: ... 21 | def get_editops(self) -> EditOpcodeT: ... 22 | def get_matching_blocks(self) -> MatchingBlocksT: ... 23 | def ratio(self) -> float: ... 24 | def quick_ratio(self) -> float: ... 25 | def real_quick_ratio(self) -> float: ... 26 | def distance(self) -> int: ... 27 | -------------------------------------------------------------------------------- /data/thefuzz/thefuzz/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.19.0' 2 | -------------------------------------------------------------------------------- /data/thefuzz/thefuzz/fuzz.pyi: -------------------------------------------------------------------------------- 1 | def ratio(s1: str, s2: str) -> int: ... 2 | def partial_ratio(s1: str, s2: str) -> int: ... 3 | def _process_and_sort(s: str, force_ascii: bool, full_process: bool = ...) -> str: ... 4 | def _token_sort(s1: str, s2: str, partial: bool = ..., force_ascii: bool = ..., full_process: bool = ...) -> int: ... 5 | def token_sort_ratio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ... 6 | def partial_token_sort_ratio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ... 7 | def _token_set(s1: str, s2: str, partial: bool = ..., force_ascii: bool = ..., full_process: bool = ...) -> int: ... 8 | def token_set_ratio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ... 9 | def partial_token_set_ratio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ... 10 | def QRatio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ... 11 | def UQRatio(s1: str, s2: str, full_process: bool = ...) -> int: ... 12 | def WRatio(s1: str, s2: str, force_ascii: bool = ..., full_process: bool = ...) -> int: ... 13 | def UWRatio(s1: str, s2: str, full_process: bool = ...) -> int: ... 14 | -------------------------------------------------------------------------------- /data/thefuzz/thefuzz/process.pyi: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | import typing 3 | from typing import Any, Callable, Union, Tuple, Generator, TypeVar, Sequence 4 | 5 | 6 | ChoicesT = Union[Mapping[str, str], Sequence[str]] 7 | T = TypeVar('T') 8 | ProcessorT = Union[Callable[[str, bool], str], Callable[[Any], Any]] 9 | ScorerT = Callable[[str, str, bool, bool], int] 10 | 11 | 12 | @typing.overload 13 | def extractWithoutOrder(query: str, choices: Mapping[str, str], processor: ProcessorT, scorer: ScorerT, score_cutoff: int = ...) -> Generator[Tuple[str, int, str], None, None]: ... 14 | 15 | 16 | @typing.overload 17 | def extractWithoutOrder(query: str, choices: Sequence[str], processor: ProcessorT, scorer: ScorerT, score_cutoff: int = ...) -> Generator[Tuple[str, int], None, None]: ... 18 | -------------------------------------------------------------------------------- /data/thefuzz/thefuzz/string_processing.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | class StringProcessor: 5 | """ 6 | This class defines method to process strings in the most 7 | efficient way. Ideally all the methods below use unicode strings 8 | for both input and output. 9 | """ 10 | 11 | regex = re.compile(r"(?ui)\W") 12 | 13 | @classmethod 14 | def replace_non_letters_non_numbers_with_whitespace(cls, a_string): 15 | """ 16 | This function replaces any sequence of non letters and non 17 | numbers with a single white space. 18 | """ 19 | return cls.regex.sub(" ", a_string) 20 | -------------------------------------------------------------------------------- /data/thefuzz/thefuzz/string_processing.pyi: -------------------------------------------------------------------------------- 1 | class StringProcessor(object): 2 | @classmethod 3 | def replace_non_letters_non_numbers_with_whitespace(cls, a_string: str) -> str: ... 4 | -------------------------------------------------------------------------------- /data/thefuzz/thefuzz/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from thefuzz.string_processing import StringProcessor 4 | 5 | 6 | def validate_string(s): 7 | """ 8 | Check input has length and that length > 0 9 | 10 | :param s: 11 | :return: True if len(s) > 0 else False 12 | """ 13 | try: 14 | return len(s) > 0 15 | except TypeError: 16 | return False 17 | 18 | 19 | def check_for_equivalence(func): 20 | @functools.wraps(func) 21 | def decorator(*args, **kwargs): 22 | if args[0] == args[1]: 23 | return 100 24 | return func(*args, **kwargs) 25 | return decorator 26 | 27 | 28 | def check_for_none(func): 29 | @functools.wraps(func) 30 | def decorator(*args, **kwargs): 31 | if args[0] is None or args[1] is None: 32 | return 0 33 | return func(*args, **kwargs) 34 | return decorator 35 | 36 | 37 | def check_empty_string(func): 38 | @functools.wraps(func) 39 | def decorator(*args, **kwargs): 40 | if len(args[0]) == 0 or len(args[1]) == 0: 41 | return 0 42 | return func(*args, **kwargs) 43 | return decorator 44 | 45 | 46 | bad_chars = "".join([chr(i) for i in range(128, 256)]) # ascii dammit! 47 | translation_table = {ord(c): None for c in bad_chars} 48 | 49 | 50 | def ascii_only(s): 51 | return s.translate(translation_table) 52 | 53 | 54 | def make_type_consistent(s1, s2): 55 | """If objects aren't both string instances force them to strings""" 56 | if isinstance(s1, str) and isinstance(s2, str): 57 | return s1, s2 58 | 59 | else: 60 | return str(s1), str(s2) 61 | 62 | 63 | def full_process(s, force_ascii=False): 64 | """Process string by 65 | -- removing all but letters and numbers 66 | -- trim whitespace 67 | -- force to lower case 68 | if force_ascii == True, force convert to ascii""" 69 | 70 | if force_ascii: 71 | s = ascii_only(str(s)) 72 | # Keep only Letters and Numbers (see Unicode docs). 73 | string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s) 74 | # Remove leading and trailing whitespaces and force into lowercase. 75 | string_out = string_out.strip().lower() 76 | return string_out 77 | 78 | 79 | def intr(n): 80 | '''Returns a correctly rounded integer''' 81 | return int(round(n)) 82 | -------------------------------------------------------------------------------- /data/thefuzz/thefuzz/utils.pyi: -------------------------------------------------------------------------------- 1 | from typing import Any, Tuple, Union, Callable, TypeVar 2 | 3 | TCallable = TypeVar("TCallable", bound=Callable[..., Any]) 4 | 5 | 6 | def validate_string(s: str) -> bool: ... 7 | def check_for_equivalence(func: TCallable) -> TCallable: ... 8 | def check_for_none(func: TCallable) -> TCallable: ... 9 | def check_empty_string(func: TCallable) -> TCallable: ... 10 | def asciionly(s: str) -> str: ... 11 | def asciidammit(s: Union[str, bytes]) -> str: ... 12 | def make_type_consistent(s1: str, s2: str) -> Tuple[str, str]: ... 13 | def full_process(s: str, force_ascii: bool = ...) -> str: ... 14 | def intr(n: float) -> int: ... 15 | -------------------------------------------------------------------------------- /data/thefuzz/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{37, 38, 39, 310, 311, py3} 3 | skip_missing_interpreters = True 4 | 5 | [testenv] 6 | deps = pytest 7 | pycodestyle 8 | hypothesis 9 | commands = pytest 10 | --------------------------------------------------------------------------------