├── figures ├── acq.png ├── logo.png ├── rea.png ├── tasks.png └── results.png ├── context_data ├── PaulGrahamEssays │ ├── rss.txt │ ├── pow.txt │ ├── todo.txt │ ├── nft.txt │ ├── weird.txt │ ├── rootsoflisp.txt │ ├── foundervisa.txt │ ├── iflisp.txt │ ├── sun.txt │ ├── want.txt │ ├── unions.txt │ ├── bias.txt │ ├── know.txt │ ├── mod.txt │ ├── island.txt │ ├── diff.txt │ ├── founders.txt │ ├── vw.txt │ ├── copy.txt │ ├── goodtaste.txt │ ├── ecw.txt │ ├── corpdev.txt │ ├── addiction.txt │ ├── newideas.txt │ ├── aord.txt │ ├── vcsqueeze.txt │ ├── vb.txt │ ├── hubs.txt │ ├── gba.txt │ ├── apple.txt │ └── submarine.txt ├── a_stars.txt └── r_stars.txt ├── LICENSE ├── README.md ├── viz.ipynb └── gen_test_data.ipynb /figures/acq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nick7nlp/Counting-Stars/HEAD/figures/acq.png -------------------------------------------------------------------------------- /figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nick7nlp/Counting-Stars/HEAD/figures/logo.png -------------------------------------------------------------------------------- /figures/rea.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nick7nlp/Counting-Stars/HEAD/figures/rea.png -------------------------------------------------------------------------------- /figures/tasks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nick7nlp/Counting-Stars/HEAD/figures/tasks.png -------------------------------------------------------------------------------- /figures/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nick7nlp/Counting-Stars/HEAD/figures/results.png -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/rss.txt: -------------------------------------------------------------------------------- 1 | Aaron Swartz created a scraped 2 | feed 3 | of the essays page. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/pow.txt: -------------------------------------------------------------------------------- 1 | January 2017People who are powerful but uncharismatic will tend to be disliked. 2 | Their power makes them a target for criticism that they don't have 3 | the charisma to disarm. That was Hillary Clinton's problem. It also 4 | tends to be a problem for any CEO who is more of a builder than a 5 | schmoozer. And yet the builder-type CEO is (like Hillary) probably 6 | the best person for the job.I don't think there is any solution to this problem. It's human 7 | nature. The best we can do is to recognize that it's happening, and 8 | to understand that being a magnet for criticism is sometimes a sign 9 | not that someone is the wrong person for a job, but that they're 10 | the right one. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Mingyang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/todo.txt: -------------------------------------------------------------------------------- 1 | April 2012A palliative care nurse called Bronnie Ware made a list of the 2 | biggest regrets 3 | of the dying. Her list seems plausible. I could see 4 | myself — can see myself — making at least 4 of these 5 | 5 mistakes.If you had to compress them into a single piece of advice, it might 6 | be: don't be a cog. The 5 regrets paint a portrait of post-industrial 7 | man, who shrinks himself into a shape that fits his circumstances, 8 | then turns dutifully till he stops.The alarming thing is, the mistakes that produce these regrets are 9 | all errors of omission. You forget your dreams, ignore your family, 10 | suppress your feelings, neglect your friends, and forget to be 11 | happy. Errors of omission are a particularly dangerous type of 12 | mistake, because you make them by default.I would like to avoid making these mistakes. But how do you avoid 13 | mistakes you make by default? Ideally you transform your life so 14 | it has other defaults. But it may not be possible to do that 15 | completely. As long as these mistakes happen by default, you probably 16 | have to be reminded not to make them. So I inverted the 5 regrets, 17 | yielding a list of 5 commands 18 | 19 | Don't ignore your dreams; don't work too much; say what you 20 | think; cultivate friendships; be happy. 21 | 22 | which I then put at the top of the file I use as a todo list. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/nft.txt: -------------------------------------------------------------------------------- 1 | May 2021Noora Health, a nonprofit I've 2 | supported for years, just launched 3 | a new NFT. It has a dramatic name, Save Thousands of Lives, 4 | because that's what the proceeds will do.Noora has been saving lives for 7 years. They run programs in 5 | hospitals in South Asia to teach new mothers how to take care of 6 | their babies once they get home. They're in 165 hospitals now. And 7 | because they know the numbers before and after they start at a new 8 | hospital, they can measure the impact they have. It is massive. 9 | For every 1000 live births, they save 9 babies.This number comes from a study 10 | of 133,733 families at 28 different 11 | hospitals that Noora conducted in collaboration with the Better 12 | Birth team at Ariadne Labs, a joint center for health systems 13 | innovation at Brigham and Women’s Hospital and Harvard T.H. Chan 14 | School of Public Health.Noora is so effective that even if you measure their costs in the 15 | most conservative way, by dividing their entire budget by the number 16 | of lives saved, the cost of saving a life is the lowest I've seen. 17 | $1,235.For this NFT, they're going to issue a public report tracking how 18 | this specific tranche of money is spent, and estimating the number 19 | of lives saved as a result.NFTs are a new territory, and this way of using them is especially 20 | new, but I'm excited about its potential. And I'm excited to see 21 | what happens with this particular auction, because unlike an NFT 22 | representing something that has already happened, 23 | this NFT gets better as the price gets higher.The reserve price was about $2.5 million, because that's what it 24 | takes for the name to be accurate: that's what it costs to save 25 | 2000 lives. But the higher the price of this NFT goes, the more 26 | lives will be saved. What a sentence to be able to write. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/weird.txt: -------------------------------------------------------------------------------- 1 | August 2021When people say that in their experience all programming languages 2 | are basically equivalent, they're making a statement not about 3 | languages but about the kind of programming they've done.99.5% of programming consists of gluing together calls to library 4 | functions. All popular languages are equally good at this. So one 5 | can easily spend one's whole career operating in the intersection 6 | of popular programming languages.But the other .5% of programming is disproportionately interesting. 7 | If you want to learn what it consists of, the weirdness of weird 8 | languages is a good clue to follow.Weird languages aren't weird by accident. Not the good ones, at 9 | least. The weirdness of the good ones usually implies the existence 10 | of some form of programming that's not just the usual gluing together 11 | of library calls.A concrete example: Lisp macros. Lisp macros seem weird even to 12 | many Lisp programmers. They're not only not in the intersection of 13 | popular languages, but by their nature would be hard to implement 14 | properly in a language without turning it into a dialect of 15 | Lisp. And macros are definitely evidence of techniques that go 16 | beyond glue programming. For example, solving problems by first 17 | writing a language for problems of that type, and then writing 18 | your specific application in it. Nor is this all you can do with 19 | macros; it's just one region in a space of program-manipulating 20 | techniques that even now is far from fully explored.So if you want to expand your concept of what programming can be, 21 | one way to do it is by learning weird languages. Pick a language 22 | that most programmers consider weird but whose median user is smart, 23 | and then focus on the differences between this language and the 24 | intersection of popular languages. What can you say in this language 25 | that would be impossibly inconvenient to say in others? In the 26 | process of learning how to say things you couldn't previously say, 27 | you'll probably be learning how to think things you couldn't 28 | previously think. 29 | Thanks to Trevor Blackwell, Patrick Collison, Daniel Gackle, Amjad 30 | Masad, and Robert Morris for reading drafts of this. 31 | -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/rootsoflisp.txt: -------------------------------------------------------------------------------- 1 | May 2001 2 | 3 | (I wrote this article to help myself understand exactly 4 | what McCarthy discovered. You don't need to know this stuff 5 | to program in Lisp, but it should be helpful to 6 | anyone who wants to 7 | understand the essence of Lisp — both in the sense of its 8 | origins and its semantic core. The fact that it has such a core 9 | is one of Lisp's distinguishing features, and the reason why, 10 | unlike other languages, Lisp has dialects.)In 1960, John 11 | McCarthy published a remarkable paper in 12 | which he did for programming something like what Euclid did for 13 | geometry. He showed how, given a handful of simple 14 | operators and a notation for functions, you can 15 | build a whole programming language. 16 | He called this language Lisp, for "List Processing," 17 | because one of his key ideas was to use a simple 18 | data structure called a list for both 19 | code and data.It's worth understanding what McCarthy discovered, not 20 | just as a landmark in the history of computers, but as 21 | a model for what programming is tending to become in 22 | our own time. It seems to me that there have been 23 | two really clean, consistent models of programming so 24 | far: the C model and the Lisp model. 25 | These two seem points of high ground, with swampy lowlands 26 | between them. As computers have grown more powerful, 27 | the new languages being developed have been moving 28 | steadily toward the Lisp model. A popular recipe 29 | for new programming languages in the past 20 years 30 | has been to take the C model of computing and add to 31 | it, piecemeal, parts taken from the Lisp model, 32 | like runtime typing and garbage collection.In this article I'm going to try to explain in the 33 | simplest possible terms what McCarthy discovered. 34 | The point is not just to learn about an interesting 35 | theoretical result someone figured out forty years ago, 36 | but to show where languages are heading. 37 | The unusual thing about Lisp — in fact, the defining 38 | quality of Lisp — is that it can be written in 39 | itself. To understand what McCarthy meant by this, 40 | we're going to retrace his steps, with his mathematical 41 | notation translated into running Common Lisp code. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/foundervisa.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | April 2009I usually avoid politics, but since we now seem to have an administration that's open to suggestions, I'm going to risk making one. The single biggest thing the government could do to increase the number of startups in this country is a policy that would cost nothing: establish a new class of visa for startup founders.The biggest constraint on the number of new startups that get created in the US is not tax policy or employment law or even Sarbanes-Oxley. It's that we won't let the people who want to start them into the country.Letting just 10,000 startup founders into the country each year could have a visible effect on the economy. If we assume 4 people per startup, which is probably an overestimate, that's 2500 new companies. Each year. They wouldn't all grow as big as Google, but out of 2500 some would come close.By definition these 10,000 founders wouldn't be taking jobs from Americans: it could be part of the terms of the visa that they couldn't work for existing companies, only new ones they'd founded. In fact they'd cause there to be 4 | more jobs for Americans, because the companies they started would hire more employees as they grew.The tricky part might seem to be how one defined a startup. But that could be solved quite easily: let the market decide. Startup investors work hard to find the best startups. The government could not do better than to piggyback on their expertise, and use investment by recognized startup investors as the test of whether a company was a real startup.How would the government decide who's a startup investor? The same way they decide what counts as a university for student visas. We'll establish our own accreditation procedure. We know who one another are.10,000 people is a drop in the bucket by immigration standards, but would represent a huge increase in the pool of startup founders. I think this would have such a visible effect on the economy that it would make the legislator who introduced the bill famous. The only way to know for sure would be to try it, and that would cost practically nothing. 5 | Thanks to Trevor Blackwell, Paul Buchheit, Jeff Clavier, David Hornik, Jessica Livingston, Greg Mcadoo, Aydin Senkut, and Fred Wilson for reading drafts of this.Related: -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/iflisp.txt: -------------------------------------------------------------------------------- 1 | May 2003If Lisp is so great, why don't more people use it? I was 2 | asked this question by a student in the audience at a 3 | talk I gave recently. Not for the first time, either.In languages, as in so many things, there's not much 4 | correlation between popularity and quality. Why does 5 | John Grisham (King of Torts sales rank, 44) outsell 6 | Jane Austen (Pride and Prejudice sales rank, 6191)? 7 | Would even Grisham claim that it's because he's a better 8 | writer?Here's the first sentence of Pride and Prejudice: 9 | 10 | It is a truth universally acknowledged, that a single man 11 | in possession of a good fortune must be in want of a 12 | wife. 13 | 14 | "It is a truth universally acknowledged?" Long words for 15 | the first sentence of a love story.Like Jane Austen, Lisp looks hard. Its syntax, or lack 16 | of syntax, makes it look completely unlike 17 | the languages 18 | most people are used to. Before I learned Lisp, I was afraid 19 | of it too. I recently came across a notebook from 1983 20 | in which I'd written: 21 | 22 | I suppose I should learn Lisp, but it seems so foreign. 23 | 24 | Fortunately, I was 19 at the time and not too resistant to learning 25 | new things. I was so ignorant that learning 26 | almost anything meant learning new things.People frightened by Lisp make up other reasons for not 27 | using it. The standard 28 | excuse, back when C was the default language, was that Lisp 29 | was too slow. Now that Lisp dialects are among 30 | the faster 31 | languages available, that excuse has gone away. 32 | Now the standard excuse is openly circular: that other languages 33 | are more popular.(Beware of such reasoning. It gets you Windows.)Popularity is always self-perpetuating, but it's especially 34 | so in programming languages. More libraries 35 | get written for popular languages, which makes them still 36 | more popular. Programs often have to work with existing programs, 37 | and this is easier if they're written in the same language, 38 | so languages spread from program to program like a virus. 39 | And managers prefer popular languages, because they give them 40 | more leverage over developers, who can more easily be replaced.Indeed, if programming languages were all more or less equivalent, 41 | there would be little justification for using any but the most 42 | popular. But they aren't all equivalent, not by a long 43 | shot. And that's why less popular languages, like Jane Austen's 44 | novels, continue to survive at all. When everyone else is reading 45 | the latest John Grisham novel, there will always be a few people 46 | reading Jane Austen instead. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/sun.txt: -------------------------------------------------------------------------------- 1 | September 2017The most valuable insights are both general and surprising. 2 | F = ma for example. But general and surprising is a hard 3 | combination to achieve. That territory tends to be picked 4 | clean, precisely because those insights are so valuable.Ordinarily, the best that people can do is one without the 5 | other: either surprising without being general (e.g. 6 | gossip), or general without being surprising (e.g. 7 | platitudes).Where things get interesting is the moderately valuable 8 | insights. You get those from small additions of whichever 9 | quality was missing. The more common case is a small 10 | addition of generality: a piece of gossip that's more than 11 | just gossip, because it teaches something interesting about 12 | the world. But another less common approach is to focus on 13 | the most general ideas and see if you can find something new 14 | to say about them. Because these start out so general, you 15 | only need a small delta of novelty to produce a useful 16 | insight.A small delta of novelty is all you'll be able to get most 17 | of the time. Which means if you take this route, your ideas 18 | will seem a lot like ones that already exist. Sometimes 19 | you'll find you've merely rediscovered an idea that did 20 | already exist. But don't be discouraged. Remember the huge 21 | multiplier that kicks in when you do manage to think of 22 | something even a little new.Corollary: the more general the ideas you're talking about, 23 | the less you should worry about repeating yourself. If you 24 | write enough, it's inevitable you will. Your brain is much 25 | the same from year to year and so are the stimuli that hit 26 | it. I feel slightly bad when I find I've said something 27 | close to what I've said before, as if I were plagiarizing 28 | myself. But rationally one shouldn't. You won't say 29 | something exactly the same way the second time, and that 30 | variation increases the chance you'll get that tiny but 31 | critical delta of novelty.And of course, ideas beget ideas. (That sounds 32 | familiar.) 33 | An idea with a small amount of novelty could lead to one 34 | with more. But only if you keep going. So it's doubly 35 | important not to let yourself be discouraged by people who 36 | say there's not much new about something you've discovered. 37 | "Not much new" is a real achievement when you're talking 38 | about the most general ideas. It's not true that there's nothing new under the sun. There 39 | are some domains where there's almost nothing new. But 40 | there's a big difference between nothing and almost nothing, 41 | when it's multiplied by the area under the sun. 42 | Thanks to Sam Altman, Patrick Collison, and Jessica 43 | Livingston for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/want.txt: -------------------------------------------------------------------------------- 1 | November 2022Since I was about 9 I've been puzzled by the apparent contradiction 2 | between being made of matter that behaves in a predictable way, and 3 | the feeling that I could choose to do whatever I wanted. At the 4 | time I had a self-interested motive for exploring the question. At 5 | that age (like most succeeding ages) I was always in trouble with 6 | the authorities, and it seemed to me that there might possibly be 7 | some way to get out of trouble by arguing that I wasn't responsible 8 | for my actions. I gradually lost hope of that, but the puzzle 9 | remained: How do you reconcile being a machine made of matter with 10 | the feeling that you're free to choose what you do? 11 | [1]The best way to explain the answer may be to start with a slightly 12 | wrong version, and then fix it. The wrong version is: You can do 13 | what you want, but you can't want what you want. Yes, you can control 14 | what you do, but you'll do what you want, and you can't control 15 | that.The reason this is mistaken is that people do sometimes change what 16 | they want. People who don't want to want something — drug addicts, 17 | for example — can sometimes make themselves stop wanting it. And 18 | people who want to want something — who want to like classical 19 | music, or broccoli — sometimes succeed.So we modify our initial statement: You can do what you want, but 20 | you can't want to want what you want.That's still not quite true. It's possible to change what you want 21 | to want. I can imagine someone saying "I decided to stop wanting 22 | to like classical music." But we're getting closer to the truth. 23 | It's rare for people to change what they want to want, and the more 24 | "want to"s we add, the rarer it gets.We can get arbitrarily close to a true statement by adding more "want 25 | to"s in much the same way we can get arbitrarily close to 1 by adding 26 | more 9s to a string of 9s following a decimal point. In practice 27 | three or four "want to"s must surely be enough. It's hard even to 28 | envision what it would mean to change what you want to want to want 29 | to want, let alone actually do it.So one way to express the correct answer is to use a regular 30 | expression. You can do what you want, but there's some statement 31 | of the form "you can't (want to)* want what you want" that's true. 32 | Ultimately you get back to a want that you don't control. 33 | [2] 34 | Notes[1] 35 | I didn't know when I was 9 that matter might behave randomly, 36 | but I don't think it affects the problem much. Randomness destroys 37 | the ghost in the machine as effectively as determinism.[2] 38 | If you don't like using an expression, you can make the same 39 | point using higher-order desires: There is some n such that you 40 | don't control your nth-order desires. 41 | Thanks to Trevor Blackwell, 42 | Jessica Livingston, Robert Morris, and 43 | Michael Nielsen for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/unions.txt: -------------------------------------------------------------------------------- 1 | May 2007People who worry about the increasing gap between rich and poor 2 | generally look back on the mid twentieth century as a golden age. 3 | In those days we had a large number of high-paying union manufacturing 4 | jobs that boosted the median income. I wouldn't quite call the 5 | high-paying union job a myth, but I think people who dwell on it 6 | are reading too much into it.Oddly enough, it was working with startups that made me realize 7 | where the high-paying union job came from. In a rapidly growing 8 | market, you don't worry too much about efficiency. It's more 9 | important to grow fast. If there's some mundane problem getting 10 | in your way, and there's a simple solution that's somewhat expensive, 11 | just take it and get on with more important things. EBay didn't 12 | win by paying less for servers than their competitors.Difficult though it may be to imagine now, manufacturing was a 13 | growth industry in the mid twentieth century. This was an era when 14 | small firms making everything from cars to candy were getting 15 | consolidated into a new kind of corporation with national reach and 16 | huge economies of scale. You had to grow fast or die. Workers 17 | were for these companies what servers are for an Internet startup. 18 | A reliable supply was more important than low cost.If you looked in the head of a 1950s auto executive, the attitude 19 | must have been: sure, give 'em whatever they ask for, so long as 20 | the new model isn't delayed.In other words, those workers were not paid what their work was 21 | worth. Circumstances being what they were, companies would have 22 | been stupid to insist on paying them so little.If you want a less controversial example of this phenomenon, ask 23 | anyone who worked as a consultant building web sites during the 24 | Internet Bubble. In the late nineties you could get paid huge sums 25 | of money for building the most trivial things. And yet does anyone 26 | who was there have any expectation those days will ever return? I 27 | doubt it. Surely everyone realizes that was just a temporary 28 | aberration.The era of labor unions seems to have been the same kind of aberration, 29 | just spread 30 | over a longer period, and mixed together with a lot of ideology 31 | that prevents people from viewing it with as cold an eye as they 32 | would something like consulting during the Bubble.Basically, unions were just Razorfish.People who think the labor movement was the creation of heroic union 33 | organizers have a problem to explain: why are unions shrinking now? 34 | The best they can do is fall back on the default explanation of 35 | people living in fallen civilizations. Our ancestors were giants. 36 | The workers of the early twentieth century must have had a moral 37 | courage that's lacking today.In fact there's a simpler explanation. The early twentieth century 38 | was just a fast-growing startup overpaying for infrastructure. And 39 | we in the present are not a fallen people, who have abandoned 40 | whatever mysterious high-minded principles produced the high-paying 41 | union job. We simply live in a time when the fast-growing companies 42 | overspend on different things. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/bias.txt: -------------------------------------------------------------------------------- 1 | October 2015This will come as a surprise to a lot of people, but in some cases 2 | it's possible to detect bias in a selection process without knowing 3 | anything about the applicant pool. Which is exciting because among 4 | other things it means third parties can use this technique to detect 5 | bias whether those doing the selecting want them to or not.You can use this technique whenever (a) you have at least 6 | a random sample of the applicants that were selected, (b) their 7 | subsequent performance is measured, and (c) the groups of 8 | applicants you're comparing have roughly equal distribution of ability.How does it work? Think about what it means to be biased. What 9 | it means for a selection process to be biased against applicants 10 | of type x is that it's harder for them to make it through. Which 11 | means applicants of type x have to be better to get selected than 12 | applicants not of type x. 13 | [1] 14 | Which means applicants of type x 15 | who do make it through the selection process will outperform other 16 | successful applicants. And if the performance of all the successful 17 | applicants is measured, you'll know if they do.Of course, the test you use to measure performance must be a valid 18 | one. And in particular it must not be invalidated by the bias you're 19 | trying to measure. 20 | But there are some domains where performance can be measured, and 21 | in those detecting bias is straightforward. Want to know if the 22 | selection process was biased against some type of applicant? Check 23 | whether they outperform the others. This is not just a heuristic 24 | for detecting bias. It's what bias means.For example, many suspect that venture capital firms are biased 25 | against female founders. This would be easy to detect: among their 26 | portfolio companies, do startups with female founders outperform 27 | those without? A couple months ago, one VC firm (almost certainly 28 | unintentionally) published a study showing bias of this type. First 29 | Round Capital found that among its portfolio companies, startups 30 | with female founders outperformed 31 | those without by 63%. 32 | [2]The reason I began by saying that this technique would come as a 33 | surprise to many people is that we so rarely see analyses of this 34 | type. I'm sure it will come as a surprise to First Round that they 35 | performed one. I doubt anyone there realized that by limiting their 36 | sample to their own portfolio, they were producing a study not of 37 | startup trends but of their own biases when selecting companies.I predict we'll see this technique used more in the future. The 38 | information needed to conduct such studies is increasingly available. 39 | Data about who applies for things is usually closely guarded by the 40 | organizations selecting them, but nowadays data about who gets 41 | selected is often publicly available to anyone who takes the trouble 42 | to aggregate it. 43 | Notes[1] 44 | This technique wouldn't work if the selection process looked 45 | for different things from different types of applicants—for 46 | example, if an employer hired men based on their ability but women 47 | based on their appearance.[2] 48 | As Paul Buchheit points out, First Round excluded their most 49 | successful investment, Uber, from the study. And while it 50 | makes sense to exclude outliers from some types of studies, 51 | studies of returns from startup investing, which is all about 52 | hitting outliers, are not one of them. 53 | Thanks to Sam Altman, Jessica Livingston, and Geoff Ralston for reading 54 | drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/know.txt: -------------------------------------------------------------------------------- 1 | December 2014I've read Villehardouin's chronicle of the Fourth Crusade at least 2 | two times, maybe three. And yet if I had to write down everything 3 | I remember from it, I doubt it would amount to much more than a 4 | page. Multiply this times several hundred, and I get an uneasy 5 | feeling when I look at my bookshelves. What use is it to read all 6 | these books if I remember so little from them?A few months ago, as I was reading Constance Reid's excellent 7 | biography of Hilbert, I figured out if not the answer to this 8 | question, at least something that made me feel better about it. 9 | She writes: 10 | 11 | Hilbert had no patience with mathematical lectures which filled 12 | the students with facts but did not teach them how to frame a 13 | problem and solve it. He often used to tell them that "a perfect 14 | formulation of a problem is already half its solution." 15 | 16 | That has always seemed to me an important point, and I was even 17 | more convinced of it after hearing it confirmed by Hilbert.But how had I come to believe in this idea in the first place? A 18 | combination of my own experience and other things I'd read. None 19 | of which I could at that moment remember! And eventually I'd forget 20 | that Hilbert had confirmed it too. But my increased belief in the 21 | importance of this idea would remain something I'd learned from 22 | this book, even after I'd forgotten I'd learned it.Reading and experience train your model of the world. And even if 23 | you forget the experience or what you read, its effect on your model 24 | of the world persists. Your mind is like a compiled program you've 25 | lost the source of. It works, but you don't know why.The place to look for what I learned from Villehardouin's chronicle 26 | is not what I remember from it, but my mental models of the crusades, 27 | Venice, medieval culture, siege warfare, and so on. Which doesn't 28 | mean I couldn't have read more attentively, but at least the harvest 29 | of reading is not so miserably small as it might seem.This is one of those things that seem obvious in retrospect. But 30 | it was a surprise to me and presumably would be to anyone else who 31 | felt uneasy about (apparently) forgetting so much they'd read.Realizing it does more than make you feel a little better about 32 | forgetting, though. There are specific implications.For example, reading and experience are usually "compiled" at the 33 | time they happen, using the state of your brain at that time. The 34 | same book would get compiled differently at different points in 35 | your life. Which means it is very much worth reading important 36 | books multiple times. I always used to feel some misgivings about 37 | rereading books. I unconsciously lumped reading together with work 38 | like carpentry, where having to do something again is a sign you 39 | did it wrong the first time. Whereas now the phrase "already read" 40 | seems almost ill-formed.Intriguingly, this implication isn't limited to books. Technology 41 | will increasingly make it possible to relive our experiences. When 42 | people do that today it's usually to enjoy them again (e.g. when 43 | looking at pictures of a trip) or to find the origin of some bug in 44 | their compiled code (e.g. when Stephen Fry succeeded in remembering 45 | the childhood trauma that prevented him from singing). But as 46 | technologies for recording and playing back your life improve, it 47 | may become common for people to relive experiences without any goal 48 | in mind, simply to learn from them again as one might when rereading 49 | a book.Eventually we may be able not just to play back experiences but 50 | also to index and even edit them. So although not knowing how you 51 | know things may seem part of being human, it may not be. 52 | Thanks to Sam Altman, Jessica Livingston, and Robert Morris for reading 53 | drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/mod.txt: -------------------------------------------------------------------------------- 1 | December 2019There are two distinct ways to be politically moderate: on purpose 2 | and by accident. Intentional moderates are trimmers, deliberately 3 | choosing a position mid-way between the extremes of right and left. 4 | Accidental moderates end up in the middle, on average, because they 5 | make up their own minds about each question, and the far right and 6 | far left are roughly equally wrong.You can distinguish intentional from accidental moderates by the 7 | distribution of their opinions. If the far left opinion on some 8 | matter is 0 and the far right opinion 100, an intentional moderate's 9 | opinion on every question will be near 50. Whereas an accidental 10 | moderate's opinions will be scattered over a broad range, but will, 11 | like those of the intentional moderate, average to about 50.Intentional moderates are similar to those on the far left and the 12 | far right in that their opinions are, in a sense, not their own. 13 | The defining quality of an ideologue, whether on the left or the 14 | right, is to acquire one's opinions in bulk. You don't get to pick 15 | and choose. Your opinions about taxation can be predicted from your 16 | opinions about sex. And although intentional moderates 17 | might seem to be the opposite of ideologues, their beliefs (though 18 | in their case the word "positions" might be more accurate) are also 19 | acquired in bulk. If the median opinion shifts to the right or left, 20 | the intentional moderate must shift with it. Otherwise they stop 21 | being moderate.Accidental moderates, on the other hand, not only choose their own 22 | answers, but choose their own questions. They may not care at all 23 | about questions that the left and right both think are terribly 24 | important. So you can only even measure the politics of an accidental 25 | moderate from the intersection of the questions they care about and 26 | those the left and right care about, and this can 27 | sometimes be vanishingly small.It is not merely a manipulative rhetorical trick to say "if you're 28 | not with us, you're against us," but often simply false.Moderates are sometimes derided as cowards, particularly by 29 | the extreme left. But while it may be accurate to call intentional 30 | moderates cowards, openly being an accidental moderate requires the 31 | most courage of all, because you get attacked from both right and 32 | left, and you don't have the comfort of being an orthodox member 33 | of a large group to sustain you.Nearly all the most impressive people I know are accidental moderates. 34 | If I knew a lot of professional athletes, or people in the entertainment 35 | business, that might be different. Being on the far left or far 36 | right doesn't affect how fast you run or how well you sing. But 37 | someone who works with ideas has to be independent-minded to do it 38 | well.Or more precisely, you have to be independent-minded about the ideas 39 | you work with. You could be mindlessly doctrinaire in your politics 40 | and still be a good mathematician. In the 20th century, a lot of 41 | very smart people were Marxists — just no one who was smart about 42 | the subjects Marxism involves. But if the ideas you use in your 43 | work intersect with the politics of your time, you have two choices: 44 | be an accidental moderate, or be mediocre.Notes[1] It's possible in theory for one side to be entirely right and 45 | the other to be entirely wrong. Indeed, ideologues must always 46 | believe this is the case. But historically it rarely has been.[2] For some reason the far right tend to ignore moderates rather 47 | than despise them as backsliders. I'm not sure why. Perhaps it 48 | means that the far right is less ideological than the far left. Or 49 | perhaps that they are more confident, or more resigned, or simply 50 | more disorganized. I just don't know.[3] Having heretical opinions doesn't mean you have to express 51 | them openly. It may be 52 | easier to have them if you don't. 53 | Thanks to Austen Allred, Trevor Blackwell, Patrick Collison, Jessica Livingston, 54 | Amjad Masad, Ryan Petersen, and Harj Taggar for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/island.txt: -------------------------------------------------------------------------------- 1 | July 2006I've discovered a handy test for figuring out what you're addicted 2 | to. Imagine you were going to spend the weekend at a friend's house 3 | on a little island off the coast of Maine. There are no shops on 4 | the island and you won't be able to leave while you're there. Also, 5 | you've never been to this house before, so you can't assume it will 6 | have more than any house might.What, besides clothes and toiletries, do you make a point of packing? 7 | That's what you're addicted to. For example, if you find yourself 8 | packing a bottle of vodka (just in case), you may want to stop and 9 | think about that.For me the list is four things: books, earplugs, a notebook, and a 10 | pen.There are other things I might bring if I thought of it, like music, 11 | or tea, but I can live without them. I'm not so addicted to caffeine 12 | that I wouldn't risk the house not having any tea, just for a 13 | weekend.Quiet is another matter. I realize it seems a bit eccentric to 14 | take earplugs on a trip to an island off the coast of Maine. If 15 | anywhere should be quiet, that should. But what if the person in 16 | the next room snored? What if there was a kid playing basketball? 17 | (Thump, thump, thump... thump.) Why risk it? Earplugs are small.Sometimes I can think with noise. If I already have momentum on 18 | some project, I can work in noisy places. I can edit an essay or 19 | debug code in an airport. But airports are not so bad: most of the 20 | noise is whitish. I couldn't work with the sound of a sitcom coming 21 | through the wall, or a car in the street playing thump-thump music.And of course there's another kind of thinking, when you're starting 22 | something new, that requires complete quiet. You never 23 | know when this will strike. It's just as well to carry plugs.The notebook and pen are professional equipment, as it were. Though 24 | actually there is something druglike about them, in the sense that 25 | their main purpose is to make me feel better. I hardly ever go 26 | back and read stuff I write down in notebooks. It's just that if 27 | I can't write things down, worrying about remembering one idea gets 28 | in the way of having the next. Pen and paper wick ideas.The best notebooks I've found are made by a company called Miquelrius. 29 | I use their smallest size, which is about 2.5 x 4 in. 30 | The secret to writing on such 31 | narrow pages is to break words only when you run out of space, like 32 | a Latin inscription. I use the cheapest plastic Bic ballpoints, 33 | partly because their gluey ink doesn't seep through pages, and 34 | partly so I don't worry about losing them.I only started carrying a notebook about three years ago. Before 35 | that I used whatever scraps of paper I could find. But the problem 36 | with scraps of paper is that they're not ordered. In a notebook 37 | you can guess what a scribble means by looking at the pages 38 | around it. In the scrap era I was constantly finding notes I'd 39 | written years before that might say something I needed to remember, 40 | if I could only figure out what.As for books, I know the house would probably have something to 41 | read. On the average trip I bring four books and only read one of 42 | them, because I find new books to read en route. Really bringing 43 | books is insurance.I realize this dependence on books is not entirely good—that what 44 | I need them for is distraction. The books I bring on trips are 45 | often quite virtuous, the sort of stuff that might be assigned 46 | reading in a college class. But I know my motives aren't virtuous. 47 | I bring books because if the world gets boring I need to be able 48 | to slip into another distilled by some writer. It's like eating 49 | jam when you know you should be eating fruit.There is a point where I'll do without books. I was walking in 50 | some steep mountains once, and decided I'd rather just think, if I 51 | was bored, rather than carry a single unnecessary ounce. It wasn't 52 | so bad. I found I could entertain myself by having ideas instead 53 | of reading other people's. If you stop eating jam, fruit starts 54 | to taste better.So maybe I'll try not bringing books on some future trip. They're 55 | going to have to pry the plugs out of my cold, dead ears, however. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/diff.txt: -------------------------------------------------------------------------------- 1 | December 2001 (rev. May 2002) 2 | 3 | (This article came about in response to some questions on 4 | the LL1 mailing list. It is now 5 | incorporated in Revenge of the Nerds.)When McCarthy designed Lisp in the late 1950s, it was 6 | a radical departure from existing languages, 7 | the most important of which was Fortran.Lisp embodied nine new ideas: 8 | 1. Conditionals. A conditional is an if-then-else 9 | construct. We take these for granted now. They were 10 | invented 11 | by McCarthy in the course of developing Lisp. 12 | (Fortran at that time only had a conditional 13 | goto, closely based on the branch instruction in the 14 | underlying hardware.) McCarthy, who was on the Algol committee, got 15 | conditionals into Algol, whence they spread to most other 16 | languages.2. A function type. In Lisp, functions are first class 17 | objects-- they're a data type just like integers, strings, 18 | etc, and have a literal representation, can be stored in variables, 19 | can be passed as arguments, and so on.3. Recursion. Recursion existed as a mathematical concept 20 | before Lisp of course, but Lisp was the first programming language to support 21 | it. (It's arguably implicit in making functions first class 22 | objects.)4. A new concept of variables. In Lisp, all variables 23 | are effectively pointers. Values are what 24 | have types, not variables, and assigning or binding 25 | variables means copying pointers, not what they point to.5. Garbage-collection.6. Programs composed of expressions. Lisp programs are 26 | trees of expressions, each of which returns a value. 27 | (In some Lisps expressions 28 | can return multiple values.) This is in contrast to Fortran 29 | and most succeeding languages, which distinguish between 30 | expressions and statements.It was natural to have this 31 | distinction in Fortran because (not surprisingly in a language 32 | where the input format was punched cards) the language was 33 | line-oriented. You could not nest statements. And 34 | so while you needed expressions for math to work, there was 35 | no point in making anything else return a value, because 36 | there could not be anything waiting for it.This limitation 37 | went away with the arrival of block-structured languages, 38 | but by then it was too late. The distinction between 39 | expressions and statements was entrenched. It spread from 40 | Fortran into Algol and thence to both their descendants.When a language is made entirely of expressions, you can 41 | compose expressions however you want. You can say either 42 | (using Arc syntax)(if foo (= x 1) (= x 2))or(= x (if foo 1 2))7. A symbol type. Symbols differ from strings in that 43 | you can test equality by comparing a pointer.8. A notation for code using trees of symbols.9. The whole language always available. 44 | There is 45 | no real distinction between read-time, compile-time, and runtime. 46 | You can compile or run code while reading, read or run code 47 | while compiling, and read or compile code at runtime.Running code at read-time lets users reprogram Lisp's syntax; 48 | running code at compile-time is the basis of macros; compiling 49 | at runtime is the basis of Lisp's use as an extension 50 | language in programs like Emacs; and reading at runtime 51 | enables programs to communicate using s-expressions, an 52 | idea recently reinvented as XML. 53 | When Lisp was first invented, all these ideas were far 54 | removed from ordinary programming practice, which was 55 | dictated largely by the hardware available in the late 1950s.Over time, the default language, embodied 56 | in a succession of popular languages, has 57 | gradually evolved toward Lisp. 1-5 are now widespread. 58 | 6 is starting to appear in the mainstream. 59 | Python has a form of 7, though there doesn't seem to be 60 | any syntax for it. 61 | 8, which (with 9) is what makes Lisp macros 62 | possible, is so far still unique to Lisp, 63 | perhaps because (a) it requires those parens, or something 64 | just as bad, and (b) if you add that final increment of power, 65 | you can no 66 | longer claim to have invented a new language, but only 67 | to have designed a new dialect of Lisp ; -)Though useful to present-day programmers, it's 68 | strange to describe Lisp in terms of its 69 | variation from the random expedients other languages 70 | adopted. That was not, probably, how McCarthy 71 | thought of it. Lisp wasn't designed to fix the mistakes 72 | in Fortran; it came about more as the byproduct of an 73 | attempt to axiomatize computation. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

Counting-Stars (★): A Multi-evidence, Position-aware, and Scalable Benchmark for Evaluating Long-Context Large Language Models

3 |
4 | 5 |
6 | 7 |
8 | 9 | In this work, we propose **a multi-evidence, position-aware, and scalable benchmark** for evaluating long-context LLMs, named **Counting-Stars**, which evaluates long-context LLMs by using two tasks: multi-evidence acquisition and multi-evidence reasoning. 10 | - **Multi-evidence**: *Counting-Stars is the most evidence-intensive evaluation in known long-context benchmarks*. 11 | - **Position-aware**: *The position of the evidence in the context can be adjusted as desired and tested in a targeted manner*. 12 | - **Scalable**: *Both the context length and the amount of evidence can be expanded arbitrarily*. 13 | 14 | 15 | Based on the Counting-Stars test, we conduct experiments to evaluate long-context LLMs (i.e., GPT-4 Turbo, Gemini 1.5 Pro, Claude3 Opus, GLM-4, and Moonshot-v1). Experimental results demonstrate that Gemini 1.5 Pro achieves the best overall results, while the performance of GPT-4 Turbo is the most stable across various tasks. Furthermore, our analysis of these LLMs, which are extended to handle long-context scenarios, indicates that there is potential for improvement as the length of the input context and the intricacy of the tasks are increasing. 16 | 17 | > Please find more details of this work in the [paper](https://arxiv.org/pdf/2403.11802). 18 | ## Note 19 | 20 | We'd like to encourage you to test the Counting-Stars using 21 | - Me-Acq. (EN) means the English version of Multi-evidence Acquisition in the Counting-Stars. 22 | - ```Counting_Stars_EN_acquisition_128000_32_32.jsonl``` 23 | - Me-Acq. (ZH) means the Chinese version of Multi-evidence Acquisition in the Counting-Stars. 24 | - ```Counting_Stars_ZH_acquisition_128000_32_32.jsonl``` 25 | - Me-Rea. (EN) means the English version of Multi-evidence Reasoning in the Counting-Stars. 26 | - ```Counting_Stars_EN_reasoning_128000_32_32.jsonl``` 27 | - Me-Rea. (ZH) means the Chinese version of Multi-evidence Reasoning in the Counting-Stars. 28 | - ```Counting_Stars_ZH_reasoning_128000_32_32.jsonl``` 29 | 30 | , the 128K English and Chinese versions of the Counting-Stars. 31 | 32 | 33 | |Rank|Models|Claimed Length|Me-Acq.(ZH)|Me-Acq.(EN)|Me-Rea.(ZH)|Me-Rea.(EN)|Avg.| 34 | |----|----|----|----|----|----|----|----| 35 | |1| Gemini 1.5 Pro|1M|0.775|0.833|0.575|0.371|0.639| 36 | |2| GPT-4 Turbo (1106)|128K|0.697|0.718|0.473|0.651|0.635| 37 | |3| Claude3 Opus|200K|0.807|0.705|0.488|0.374|0.594| 38 | |4| GPT-4 Turbo (0125)|128K|0.663|0.662|0.386|0.610|0.580| 39 | |5| Moonshot-v1|200K|0.606|0.559|0.344|0.460|0.492| 40 | |6| GLM-4|128K|0.682|0.389|0.475|0.179|0.431| 41 | |-| Claude3 Sonnet|200K|0.788|-|-|-|-| 42 | |-| Claude3 Haiku|200K|0.698|-|-|-|-| 43 | |-| Baichuan3-Turbo|128K|0.759|0.490|-|-|-| 44 | 45 | ## Task Description 46 | 47 |

48 | 49 |

50 | 51 | ## Evaluation Results 52 | 53 |

54 | 55 |

56 | 57 |

58 | 59 |

60 | 61 | > Visualization of the results on the Chinese version of the Counting-Stars-32-(Multi-evidence Acquisition). 62 | 63 |

64 | 65 |

66 | 67 | > Visualization of the results on the Chinese version of the Counting-Stars-32-(Multi-evidence Reasoning). 68 | 69 | ## Cite 70 | If you find our work helpful, feel free to give us a cite. 71 | 72 | ``` 73 | @inproceedings{song-etal-2025-counting, 74 | title = "Counting-Stars: A Multi-evidence, Position-aware, and Scalable Benchmark for Evaluating Long-Context Large Language Models", 75 | author = "Song, Mingyang and Zheng, Mao and Luo, Xuan", 76 | booktitle = "Proceedings of the 31st International Conference on Computational Linguistics", 77 | year = "2025", 78 | address = "Abu Dhabi, UAE", 79 | publisher = "Association for Computational Linguistics", 80 | url = "https://aclanthology.org/2025.coling-main.253", 81 | pages = "3753--3763" 82 | } 83 | ``` 84 | 85 | ## CONTACT 86 | For any questions, feel free to create an issue, and we will try our best to solve it. \ 87 | **If the problem is more urgent**, you can email me simultaneously (I check email almost daily). 88 | ``` 89 | NAME: Mingyang Song 90 | EMAIL: nickmysong@tencent.com 91 | ``` 92 | Our visualization code is built on the source code from [NeedleInAHaystack](https://github.com/gkamradt/LLMTest_NeedleInAHaystack). Thanks for their work. 93 | -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/founders.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Want to start a startup? Get funded by 4 | Y Combinator. 5 | 6 | 7 | 8 | 9 | October 2010 10 | 11 | (I wrote this for Forbes, who asked me to write something 12 | about the qualities we look for in founders. In print they had to cut 13 | the last item because they didn't have room.)1. DeterminationThis has turned out to be the most important quality in startup 14 | founders. We thought when we started Y Combinator that the most 15 | important quality would be intelligence. That's the myth in the 16 | Valley. And certainly you don't want founders to be stupid. But 17 | as long as you're over a certain threshold of intelligence, what 18 | matters most is determination. You're going to hit a lot of 19 | obstacles. You can't be the sort of person who gets demoralized 20 | easily.Bill Clerico and Rich Aberman of WePay 21 | are a good example. They're 22 | doing a finance startup, which means endless negotiations with big, 23 | bureaucratic companies. When you're starting a startup that depends 24 | on deals with big companies to exist, it often feels like they're 25 | trying to ignore you out of existence. But when Bill Clerico starts 26 | calling you, you may as well do what he asks, because he is not 27 | going away. 28 | 2. FlexibilityYou do not however want the sort of determination implied by phrases 29 | like "don't give up on your dreams." The world of startups is so 30 | unpredictable that you need to be able to modify your dreams on the 31 | fly. The best metaphor I've found for the combination of determination 32 | and flexibility you need is a running back. 33 | He's determined to get 34 | downfield, but at any given moment he may need to go sideways or 35 | even backwards to get there.The current record holder for flexibility may be Daniel Gross of 36 | Greplin. He applied to YC with 37 | some bad ecommerce idea. We told 38 | him we'd fund him if he did something else. He thought for a second, 39 | and said ok. He then went through two more ideas before settling 40 | on Greplin. He'd only been working on it for a couple days when 41 | he presented to investors at Demo Day, but he got a lot of interest. 42 | He always seems to land on his feet. 43 | 3. ImaginationIntelligence does matter a lot of course. It seems like the type 44 | that matters most is imagination. It's not so important to be able 45 | to solve predefined problems quickly as to be able to come up with 46 | surprising new ideas. In the startup world, most good ideas 47 | seem 48 | bad initially. If they were obviously good, someone would already 49 | be doing them. So you need the kind of intelligence that produces 50 | ideas with just the right level of craziness.Airbnb is that kind of idea. 51 | In fact, when we funded Airbnb, we 52 | thought it was too crazy. We couldn't believe large numbers of 53 | people would want to stay in other people's places. We funded them 54 | because we liked the founders so much. As soon as we heard they'd 55 | been supporting themselves by selling Obama and McCain branded 56 | breakfast cereal, they were in. And it turned out the idea was on 57 | the right side of crazy after all. 58 | 4. NaughtinessThough the most successful founders are usually good people, they 59 | tend to have a piratical gleam in their eye. They're not Goody 60 | Two-Shoes type good. Morally, they care about getting the big 61 | questions right, but not about observing proprieties. That's why 62 | I'd use the word naughty rather than evil. They delight in 63 | breaking 64 | rules, but not rules that matter. This quality may be redundant 65 | though; it may be implied by imagination.Sam Altman of Loopt 66 | is one of the most successful alumni, so we 67 | asked him what question we could put on the Y Combinator application 68 | that would help us discover more people like him. He said to ask 69 | about a time when they'd hacked something to their advantage—hacked in the sense of beating the system, not breaking into 70 | computers. It has become one of the questions we pay most attention 71 | to when judging applications. 72 | 5. FriendshipEmpirically it seems to be hard to start a startup with just 73 | one 74 | founder. Most of the big successes have two or three. And the 75 | relationship between the founders has to be strong. They must 76 | genuinely like one another, and work well together. Startups do 77 | to the relationship between the founders what a dog does to a sock: 78 | if it can be pulled apart, it will be.Emmett Shear and Justin Kan of Justin.tv 79 | are a good example of close 80 | friends who work well together. They've known each other since 81 | second grade. They can practically read one another's minds. I'm 82 | sure they argue, like all founders, but I have never once sensed 83 | any unresolved tension between them.Thanks to Jessica Livingston and Chris Steiner for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/vw.txt: -------------------------------------------------------------------------------- 1 | January 2012A few hours before the Yahoo acquisition was announced in June 1998 2 | I took a snapshot of Viaweb's 3 | site. I thought it might be interesting to look at one day.The first thing one notices is is how tiny the pages are. Screens 4 | were a lot smaller in 1998. If I remember correctly, our frontpage 5 | used to just fit in the size window people typically used then.Browsers then (IE 6 was still 3 years in the future) had few fonts 6 | and they weren't antialiased. If you wanted to make pages that 7 | looked good, you had to render display text as images.You may notice a certain similarity between the Viaweb and Y Combinator logos. We did that 8 | as an inside joke when we started YC. Considering how basic a red 9 | circle is, it seemed surprising to me when we started Viaweb how 10 | few other companies used one as their logo. A bit later I realized 11 | why.On the Company 12 | page you'll notice a mysterious individual called John McArtyem. 13 | Robert Morris (aka Rtm) was so publicity averse after the 14 | Worm that he 15 | didn't want his name on the site. I managed to get him to agree 16 | to a compromise: we could use his bio but not his name. He has 17 | since relaxed a bit 18 | on that point.Trevor graduated at about the same time the acquisition closed, so in the 19 | course of 4 days he went from impecunious grad student to millionaire 20 | PhD. The culmination of my career as a writer of press releases 21 | was one celebrating 22 | his graduation, illustrated with a drawing I did of him during 23 | a meeting.(Trevor also appears as Trevino 24 | Bagwell in our directory of web designers merchants could hire 25 | to build stores for them. We inserted him as a ringer in case some 26 | competitor tried to spam our web designers. We assumed his logo 27 | would deter any actual customers, but it did not.)Back in the 90s, to get users you had to get mentioned in magazines 28 | and newspapers. There were not the same ways to get found online 29 | that there are today. So we used to pay a PR 30 | firm $16,000 a month to get us mentioned in the press. Fortunately 31 | reporters liked 32 | us.In our advice about 33 | getting traffic from search engines (I don't think the term SEO 34 | had been coined yet), we say there are only 7 that matter: Yahoo, 35 | AltaVista, Excite, WebCrawler, InfoSeek, Lycos, and HotBot. Notice 36 | anything missing? Google was incorporated that September.We supported online transactions via a company called 37 | Cybercash, 38 | since if we lacked that feature we'd have gotten beaten up in product 39 | comparisons. But Cybercash was so bad and most stores' order volumes 40 | were so low that it was better if merchants processed orders like phone orders. We had a page in our site trying to talk merchants 41 | out of doing real time authorizations.The whole site was organized like a funnel, directing people to the 42 | test drive. 43 | It was a novel thing to be able to try out software online. We put 44 | cgi-bin in our dynamic urls to fool competitors about how our 45 | software worked.We had some well 46 | known users. Needless to say, Frederick's of Hollywood got the 47 | most traffic. We charged a flat fee of $300/month for big stores, 48 | so it was a little alarming to have users who got lots of traffic. 49 | I once calculated how much Frederick's was costing us in bandwidth, 50 | and it was about $300/month.Since we hosted all the stores, which together were getting just 51 | over 10 million page views per month in June 1998, we consumed what 52 | at the time seemed a lot of bandwidth. We had 2 T1s (3 Mb/sec) 53 | coming into our offices. In those days there was no AWS. Even 54 | colocating servers seemed too risky, considering how often things 55 | went wrong with them. So we had our servers in our offices. Or 56 | more precisely, in Trevor's office. In return for the unique 57 | privilege of sharing his office with no other humans, he had to 58 | share it with 6 shrieking tower servers. His office was nicknamed 59 | the Hot Tub on account of the heat they generated. Most days his 60 | stack of window air conditioners could keep up.For describing pages, we had a template language called RTML, which 61 | supposedly stood for something, but which in fact I named after 62 | Rtm. RTML was Common Lisp augmented by some macros and libraries, 63 | and concealed under a structure editor that made it look like it 64 | had syntax.Since we did continuous releases, our software didn't actually have 65 | versions. But in those days the trade press expected versions, so 66 | we made them up. If we wanted to get lots of attention, we made 67 | the version number an 68 | integer. That "version 4.0" icon was generated by our own 69 | button generator, incidentally. The whole Viaweb site was made 70 | with our software, even though it wasn't an online store, because 71 | we wanted to experience what our users did.At the end of 1997, we released a general purpose shopping search 72 | engine called Shopfind. It 73 | was pretty advanced for the time. It had a programmable crawler 74 | that could crawl most of the different stores online and pick out 75 | the products. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/copy.txt: -------------------------------------------------------------------------------- 1 | July 2006 2 | When I was in high school I spent a lot of time imitating bad 3 | writers. What we studied in English classes was mostly fiction, 4 | so I assumed that was the highest form of writing. Mistake number 5 | one. The stories that seemed to be most admired were ones in which 6 | people suffered in complicated ways. Anything funny or 7 | gripping was ipso facto suspect, unless it was old enough to be hard to 8 | understand, like Shakespeare or Chaucer. Mistake number two. The 9 | ideal medium seemed the short story, which I've since learned had 10 | quite a brief life, roughly coincident with the peak of magazine 11 | publishing. But since their size made them perfect for use in 12 | high school classes, we read a lot of them, which gave us the 13 | impression the short story was flourishing. Mistake number three. 14 | And because they were so short, nothing really had to happen; you 15 | could just show a randomly truncated slice of life, and that was 16 | considered advanced. Mistake number four. The result was that I 17 | wrote a lot of stories in which nothing happened except that someone 18 | was unhappy in a way that seemed deep.For most of college I was a philosophy major. I was very impressed 19 | by the papers published in philosophy journals. They were so 20 | beautifully typeset, and their tone was just captivating—alternately 21 | casual and buffer-overflowingly technical. A fellow would be walking 22 | along a street and suddenly modality qua modality would spring upon 23 | him. I didn't ever quite understand these papers, but I figured 24 | I'd get around to that later, when I had time to reread them more 25 | closely. In the meantime I tried my best to imitate them. This 26 | was, I can now see, a doomed undertaking, because they weren't 27 | really saying anything. No philosopher ever refuted another, for 28 | example, because no one said anything definite enough to refute. 29 | Needless to say, my imitations didn't say anything either.In grad school I was still wasting time imitating the wrong things. 30 | There was then a fashionable type of program called an expert system, 31 | at the core of which was something called an inference engine. I 32 | looked at what these things did and thought "I could write that in 33 | a thousand lines of code." And yet eminent professors were writing 34 | books about them, and startups were selling them for a year's salary 35 | a copy. What an opportunity, I thought; these impressive things 36 | seem easy to me; I must be pretty sharp. Wrong. It was simply a 37 | fad. The books the professors wrote about expert systems are now 38 | ignored. They were not even on a path to anything interesting. 39 | And the customers paying so much for them were largely the same 40 | government agencies that paid thousands for screwdrivers and toilet 41 | seats.How do you avoid copying the wrong things? Copy only what you 42 | genuinely like. That would have saved me in all three cases. I 43 | didn't enjoy the short stories we had to read in English classes; 44 | I didn't learn anything from philosophy papers; I didn't use expert 45 | systems myself. I believed these things were good because they 46 | were admired.It can be hard to separate the things you like from the things 47 | you're impressed with. One trick is to ignore presentation. Whenever 48 | I see a painting impressively hung in a museum, I ask myself: how 49 | much would I pay for this if I found it at a garage sale, dirty and 50 | frameless, and with no idea who painted it? If you walk around a 51 | museum trying this experiment, you'll find you get some truly 52 | startling results. Don't ignore this data point just because it's 53 | an outlier.Another way to figure out what you like is to look at what you enjoy 54 | as guilty pleasures. Many things people like, especially if they're 55 | young and ambitious, they like largely for the feeling of virtue 56 | in liking them. 99% of people reading Ulysses are thinking 57 | "I'm reading Ulysses" as they do it. A guilty pleasure is 58 | at least a pure one. What do you read when you don't feel up to being 59 | virtuous? What kind of book do you read and feel sad that there's 60 | only half of it left, instead of being impressed that you're half 61 | way through? That's what you really like.Even when you find genuinely good things to copy, there's another 62 | pitfall to be avoided. Be careful to copy what makes them good, 63 | rather than their flaws. It's easy to be drawn into imitating 64 | flaws, because they're easier to see, and of course easier to copy 65 | too. For example, most painters in the eighteenth and nineteenth 66 | centuries used brownish colors. They were imitating the great 67 | painters of the Renaissance, whose paintings by that time were brown 68 | with dirt. Those paintings have since been cleaned, revealing 69 | brilliant colors; their imitators are of course still brown.It was painting, incidentally, that cured me of copying the wrong 70 | things. Halfway through grad school I decided I wanted to try being 71 | a painter, and the art world was so manifestly corrupt that it 72 | snapped the leash of credulity. These people made philosophy 73 | professors seem as scrupulous as mathematicians. It was so clearly 74 | a choice of doing good work xor being an insider that I was forced 75 | to see the distinction. It's there to some degree in almost every 76 | field, but I had till then managed to avoid facing it.That was one of the most valuable things I learned from painting: 77 | you have to figure out for yourself what's 78 | good. You can't trust 79 | authorities. They'll lie to you on this one. 80 | 81 | Comment on this essay. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/goodtaste.txt: -------------------------------------------------------------------------------- 1 | November 2021(This essay is derived from a talk at the Cambridge Union.)When I was a kid, I'd have said there wasn't. My father told me so. 2 | Some people like some things, and other people like other things, 3 | and who's to say who's right?It seemed so obvious that there was no such thing as good taste 4 | that it was only through indirect evidence that I realized my father 5 | was wrong. And that's what I'm going to give you here: a proof by 6 | reductio ad absurdum. If we start from the premise that there's no 7 | such thing as good taste, we end up with conclusions that are 8 | obviously false, and therefore the premise must be wrong.We'd better start by saying what good taste is. There's a narrow 9 | sense in which it refers to aesthetic judgements and a broader one 10 | in which it refers to preferences of any kind. The strongest proof 11 | would be to show that taste exists in the narrowest sense, so I'm 12 | going to talk about taste in art. You have better taste than me if 13 | the art you like is better than the art I like.If there's no such thing as good taste, then there's no such thing 14 | as good art. Because if there is such a 15 | thing as good art, it's 16 | easy to tell which of two people has better taste. Show them a lot 17 | of works by artists they've never seen before and ask them to 18 | choose the best, and whoever chooses the better art has better 19 | taste.So if you want to discard the concept of good taste, you also have 20 | to discard the concept of good art. And that means you have to 21 | discard the possibility of people being good at making it. Which 22 | means there's no way for artists to be good at their jobs. And not 23 | just visual artists, but anyone who is in any sense an artist. You 24 | can't have good actors, or novelists, or composers, or dancers 25 | either. You can have popular novelists, but not good ones.We don't realize how far we'd have to go if we discarded the concept 26 | of good taste, because we don't even debate the most obvious cases. 27 | But it doesn't just mean we can't say which of two famous painters 28 | is better. It means we can't say that any painter is better than a 29 | randomly chosen eight year old.That was how I realized my father was wrong. I started studying 30 | painting. And it was just like other kinds of work I'd done: you 31 | could do it well, or badly, and if you tried hard, you could get 32 | better at it. And it was obvious that Leonardo and Bellini were 33 | much better at it than me. That gap between us was not imaginary. 34 | They were so good. And if they could be good, then art could be 35 | good, and there was such a thing as good taste after all.Now that I've explained how to show there is such a thing as good 36 | taste, I should also explain why people think there isn't. There 37 | are two reasons. One is that there's always so much disagreement 38 | about taste. Most people's response to art is a tangle of unexamined 39 | impulses. Is the artist famous? Is the subject attractive? Is this 40 | the sort of art they're supposed to like? Is it hanging in a famous 41 | museum, or reproduced in a big, expensive book? In practice most 42 | people's response to art is dominated by such extraneous factors.And the people who do claim to have good taste are so often mistaken. 43 | The paintings admired by the so-called experts in one generation 44 | are often so different from those admired a few generations later. 45 | It's easy to conclude there's nothing real there at all. It's only 46 | when you isolate this force, for example by trying to paint and 47 | comparing your work to Bellini's, that you can see that it does in 48 | fact exist.The other reason people doubt that art can be good is that there 49 | doesn't seem to be any room in the art for this goodness. The 50 | argument goes like this. Imagine several people looking at a work 51 | of art and judging how good it is. If being good art really is a 52 | property of objects, it should be in the object somehow. But it 53 | doesn't seem to be; it seems to be something happening in the heads 54 | of each of the observers. And if they disagree, how do you choose 55 | between them?The solution to this puzzle is to realize that the purpose of art 56 | is to work on its human audience, and humans have a lot in common. 57 | And to the extent the things an object acts upon respond in the 58 | same way, that's arguably what it means for the object to have the 59 | corresponding property. If everything a particle interacts with 60 | behaves as if the particle had a mass of m, then it has a mass of 61 | m. So the distinction between "objective" and "subjective" is not 62 | binary, but a matter of degree, depending on how much the subjects 63 | have in common. Particles interacting with one another are at one 64 | pole, but people interacting with art are not all the way at the 65 | other; their reactions aren't random.Because people's responses to art aren't random, art can be designed 66 | to operate on people, and be good or bad depending on how effectively 67 | it does so. Much as a vaccine can be. If someone were talking about 68 | the ability of a vaccine to confer immunity, it would seem very 69 | frivolous to object that conferring immunity wasn't really a property 70 | of vaccines, because acquiring immunity is something that happens 71 | in the immune system of each individual person. Sure, people's 72 | immune systems vary, and a vaccine that worked on one might not 73 | work on another, but that doesn't make it meaningless to talk about 74 | the effectiveness of a vaccine.The situation with art is messier, of course. You can't measure 75 | effectiveness by simply taking a vote, as you do with vaccines. 76 | You have to imagine the responses of subjects with a deep knowledge 77 | of art, and enough clarity of mind to be able to ignore extraneous 78 | influences like the fame of the artist. And even then you'd still 79 | see some disagreement. People do vary, and judging art is hard, 80 | especially recent art. There is definitely not a total order either 81 | of works or of people's ability to judge them. But there is equally 82 | definitely a partial order of both. So while it's not possible to 83 | have perfect taste, it is possible to have good taste. 84 | Thanks to the Cambridge Union for inviting me, and to Trevor 85 | Blackwell, Jessica Livingston, and Robert Morris for reading drafts 86 | of this. 87 | -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/ecw.txt: -------------------------------------------------------------------------------- 1 | December 2014If the world were static, we could have monotonically increasing 2 | confidence in our beliefs. The more (and more varied) experience 3 | a belief survived, the less likely it would be false. Most people 4 | implicitly believe something like this about their opinions. And 5 | they're justified in doing so with opinions about things that don't 6 | change much, like human nature. But you can't trust your opinions 7 | in the same way about things that change, which could include 8 | practically everything else.When experts are wrong, it's often because they're experts on an 9 | earlier version of the world.Is it possible to avoid that? Can you protect yourself against 10 | obsolete beliefs? To some extent, yes. I spent almost a decade 11 | investing in early stage startups, and curiously enough protecting 12 | yourself against obsolete beliefs is exactly what you have to do 13 | to succeed as a startup investor. Most really good startup ideas 14 | look like bad ideas at first, and many of those look bad specifically 15 | because some change in the world just switched them from bad to 16 | good. I spent a lot of time learning to recognize such ideas, and 17 | the techniques I used may be applicable to ideas in general.The first step is to have an explicit belief in change. People who 18 | fall victim to a monotonically increasing confidence in their 19 | opinions are implicitly concluding the world is static. If you 20 | consciously remind yourself it isn't, you start to look for change.Where should one look for it? Beyond the moderately useful 21 | generalization that human nature doesn't change much, the unfortunate 22 | fact is that change is hard to predict. This is largely a tautology 23 | but worth remembering all the same: change that matters usually 24 | comes from an unforeseen quarter.So I don't even try to predict it. When I get asked in interviews 25 | to predict the future, I always have to struggle to come up with 26 | something plausible-sounding on the fly, like a student who hasn't 27 | prepared for an exam. 28 | [1] 29 | But it's not out of laziness that I haven't 30 | prepared. It seems to me that beliefs about the future are so 31 | rarely correct that they usually aren't worth the extra rigidity 32 | they impose, and that the best strategy is simply to be aggressively 33 | open-minded. Instead of trying to point yourself in the right 34 | direction, admit you have no idea what the right direction is, and 35 | try instead to be super sensitive to the winds of change.It's ok to have working hypotheses, even though they may constrain 36 | you a bit, because they also motivate you. It's exciting to chase 37 | things and exciting to try to guess answers. But you have to be 38 | disciplined about not letting your hypotheses harden into anything 39 | more. 40 | [2]I believe this passive m.o. works not just for evaluating new ideas 41 | but also for having them. The way to come up with new ideas is not 42 | to try explicitly to, but to try to solve problems and simply not 43 | discount weird hunches you have in the process.The winds of change originate in the unconscious minds of domain 44 | experts. If you're sufficiently expert in a field, any weird idea 45 | or apparently irrelevant question that occurs to you is ipso facto 46 | worth exploring. 47 | [3] 48 | Within Y Combinator, when an idea is described 49 | as crazy, it's a compliment—in fact, on average probably a 50 | higher compliment than when an idea is described as good.Startup investors have extraordinary incentives for correcting 51 | obsolete beliefs. If they can realize before other investors that 52 | some apparently unpromising startup isn't, they can make a huge 53 | amount of money. But the incentives are more than just financial. 54 | Investors' opinions are explicitly tested: startups come to them 55 | and they have to say yes or no, and then, fairly quickly, they learn 56 | whether they guessed right. The investors who say no to a Google 57 | (and there were several) will remember it for the rest of their 58 | lives.Anyone who must in some sense bet on ideas rather than merely 59 | commenting on them has similar incentives. Which means anyone who 60 | wants such incentives can have them, by turning their comments into 61 | bets: if you write about a topic in some fairly durable and public 62 | form, you'll find you worry much more about getting things right 63 | than most people would in a casual conversation. 64 | [4]Another trick I've found to protect myself against obsolete beliefs 65 | is to focus initially on people rather than ideas. Though the nature 66 | of future discoveries is hard to predict, I've found I can predict 67 | quite well what sort of people will make them. Good new ideas come 68 | from earnest, energetic, independent-minded people.Betting on people over ideas saved me countless times as an investor. 69 | We thought Airbnb was a bad idea, for example. But we could tell 70 | the founders were earnest, energetic, and independent-minded. 71 | (Indeed, almost pathologically so.) So we suspended disbelief and 72 | funded them.This too seems a technique that should be generally applicable. 73 | Surround yourself with the sort of people new ideas come from. If 74 | you want to notice quickly when your beliefs become obsolete, you 75 | can't do better than to be friends with the people whose discoveries 76 | will make them so.It's hard enough already not to become the prisoner of your own 77 | expertise, but it will only get harder, because change is accelerating. 78 | That's not a recent trend; change has been accelerating since the 79 | paleolithic era. Ideas beget ideas. I don't expect that to change. 80 | But I could be wrong. 81 | Notes[1] 82 | My usual trick is to talk about aspects of the present that 83 | most people haven't noticed yet.[2] 84 | Especially if they become well enough known that people start 85 | to identify them with you. You have to be extra skeptical about 86 | things you want to believe, and once a hypothesis starts to be 87 | identified with you, it will almost certainly start to be in that 88 | category.[3] 89 | In practice "sufficiently expert" doesn't require one to be 90 | recognized as an expert—which is a trailing indicator in any 91 | case. In many fields a year of focused work plus caring a lot would 92 | be enough.[4] 93 | Though they are public and persist indefinitely, comments on 94 | e.g. forums and places like Twitter seem empirically to work like 95 | casual conversation. The threshold may be whether what you write 96 | has a title. 97 | Thanks to Sam Altman, Patrick Collison, and Robert Morris 98 | for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/corpdev.txt: -------------------------------------------------------------------------------- 1 | January 2015Corporate Development, aka corp dev, is the group within companies 2 | that buys other companies. If you're talking to someone from corp 3 | dev, that's why, whether you realize it yet or not.It's usually a mistake to talk to corp dev unless (a) you want to 4 | sell your company right now and (b) you're sufficiently likely to 5 | get an offer at an acceptable price. In practice that means startups 6 | should only talk to corp dev when they're either doing really well 7 | or really badly. If you're doing really badly, meaning the company 8 | is about to die, you may as well talk to them, because you have 9 | nothing to lose. And if you're doing really well, you can safely 10 | talk to them, because you both know the price will have to be high, 11 | and if they show the slightest sign of wasting your time, you'll 12 | be confident enough to tell them to get lost.The danger is to companies in the middle. Particularly to young 13 | companies that are growing fast, but haven't been doing it for long 14 | enough to have grown big yet. It's usually a mistake for a promising 15 | company less than a year old even to talk to corp dev.But it's a mistake founders constantly make. When someone from 16 | corp dev wants to meet, the founders tell themselves they should 17 | at least find out what they want. Besides, they don't want to 18 | offend Big Company by refusing to meet.Well, I'll tell you what they want. They want to talk about buying 19 | you. That's what the title "corp dev" means. So before agreeing 20 | to meet with someone from corp dev, ask yourselves, "Do we want to 21 | sell the company right now?" And if the answer is no, tell them 22 | "Sorry, but we're focusing on growing the company." They won't be 23 | offended. And certainly the founders of Big Company won't be 24 | offended. If anything they'll think more highly of you. You'll 25 | remind them of themselves. They didn't sell either; that's why 26 | they're in a position now to buy other companies. 27 | [1]Most founders who get contacted by corp dev already know what it 28 | means. And yet even when they know what corp dev does and know 29 | they don't want to sell, they take the meeting. Why do they do it? 30 | The same mix of denial and wishful thinking that underlies most 31 | mistakes founders make. It's flattering to talk to someone who wants 32 | to buy you. And who knows, maybe their offer will be surprisingly 33 | high. You should at least see what it is, right?No. If they were going to send you an offer immediately by email, 34 | sure, you might as well open it. But that is not how conversations 35 | with corp dev work. If you get an offer at all, it will be at the 36 | end of a long and unbelievably distracting process. And if the 37 | offer is surprising, it will be surprisingly low.Distractions are the thing you can least afford in a startup. And 38 | conversations with corp dev are the worst sort of distraction, 39 | because as well as consuming your attention they undermine your 40 | morale. One of the tricks to surviving a grueling process is not 41 | to stop and think how tired you are. Instead you get into a sort 42 | of flow. 43 | [2] 44 | Imagine what it would do to you if at mile 20 of a 45 | marathon, someone ran up beside you and said "You must feel really 46 | tired. Would you like to stop and take a rest?" Conversations 47 | with corp dev are like that but worse, because the suggestion of 48 | stopping gets combined in your mind with the imaginary high price 49 | you think they'll offer.And then you're really in trouble. If they can, corp dev people 50 | like to turn the tables on you. They like to get you to the point 51 | where you're trying to convince them to buy instead of them trying 52 | to convince you to sell. And surprisingly often they succeed.This is a very slippery slope, greased with some of the most powerful 53 | forces that can work on founders' minds, and attended by an experienced 54 | professional whose full time job is to push you down it.Their tactics in pushing you down that slope are usually fairly 55 | brutal. Corp dev people's whole job is to buy companies, and they 56 | don't even get to choose which. The only way their performance is 57 | measured is by how cheaply they can buy you, and the more ambitious 58 | ones will stop at nothing to achieve that. For example, they'll 59 | almost always start with a lowball offer, just to see if you'll 60 | take it. Even if you don't, a low initial offer will demoralize you 61 | and make you easier to manipulate.And that is the most innocent of their tactics. Just wait till 62 | you've agreed on a price and think you have a done deal, and then 63 | they come back and say their boss has vetoed the deal and won't do 64 | it for more than half the agreed upon price. Happens all the time. 65 | If you think investors can behave badly, it's nothing compared to 66 | what corp dev people can do. Even corp dev people at companies 67 | that are otherwise benevolent.I remember once complaining to a 68 | friend at Google about some nasty trick their corp dev people had 69 | pulled on a YC startup."What happened to Don't be Evil?" I asked."I don't think corp dev got the memo," he replied.The tactics you encounter in M&A conversations can be like nothing 70 | you've experienced in the otherwise comparatively 71 | upstanding world 72 | of Silicon Valley. It's as if a chunk of genetic material from the 73 | old-fashioned robber baron business world got incorporated into the 74 | startup world. 75 | [3]The simplest way to protect yourself is to use the trick that John 76 | D. Rockefeller, whose grandfather was an alcoholic, used to protect 77 | himself from becoming one. He once told a Sunday school class 78 | 79 | Boys, do you know why I never became a drunkard? Because I never 80 | took the first drink. 81 | 82 | Do you want to sell your company right now? Not eventually, right 83 | now. If not, just don't take the first meeting. They won't be 84 | offended. And you in turn will be guaranteed to be spared one of 85 | the worst experiences that can happen to a startup.If you do want to sell, there's another set of 86 | techniques 87 | for doing 88 | that. But the biggest mistake founders make in dealing with corp 89 | dev is not doing a bad job of talking to them when they're ready 90 | to, but talking to them before they are. So if you remember only 91 | the title of this essay, you already know most of what you need to 92 | know about M&A in the first year.Notes[1] 93 | I'm not saying you should never sell. I'm saying you should 94 | be clear in your own mind about whether you want to sell or not, 95 | and not be led by manipulation or wishful thinking into trying to 96 | sell earlier than you otherwise would have.[2] 97 | In a startup, as in most competitive sports, the task at hand 98 | almost does this for you; you're too busy to feel tired. But when 99 | you lose that protection, e.g. at the final whistle, the fatigue 100 | hits you like a wave. To talk to corp dev is to let yourself feel 101 | it mid-game.[3] 102 | To be fair, the apparent misdeeds of corp dev people are magnified 103 | by the fact that they function as the face of a large organization 104 | that often doesn't know its own mind. Acquirers can be surprisingly 105 | indecisive about acquisitions, and their flakiness is indistinguishable 106 | from dishonesty by the time it filters down to you.Thanks to Marc Andreessen, Jessica Livingston, Geoff 107 | Ralston, and Qasar Younis for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/addiction.txt: -------------------------------------------------------------------------------- 1 | July 2010What hard liquor, cigarettes, heroin, and crack have in common is 2 | that they're all more concentrated forms of less addictive predecessors. 3 | Most if not all the things we describe as addictive are. And the 4 | scary thing is, the process that created them is accelerating.We wouldn't want to stop it. It's the same process that cures 5 | diseases: technological progress. Technological progress means 6 | making things do more of what we want. When the thing we want is 7 | something we want to want, we consider technological progress good. 8 | If some new technique makes solar cells x% more efficient, that 9 | seems strictly better. When progress concentrates something we 10 | don't want to want—when it transforms opium into heroin—it seems 11 | bad. But it's the same process at work. 12 | [1]No one doubts this process is accelerating, which means increasing 13 | numbers of things we like will be transformed into things we like 14 | too much. 15 | [2]As far as I know there's no word for something we like too much. 16 | The closest is the colloquial sense of "addictive." That usage has 17 | become increasingly common during my lifetime. And it's clear why: 18 | there are an increasing number of things we need it for. At the 19 | extreme end of the spectrum are crack and meth. Food has been 20 | transformed by a combination of factory farming and innovations in 21 | food processing into something with way more immediate bang for the 22 | buck, and you can see the results in any town in America. Checkers 23 | and solitaire have been replaced by World of Warcraft and FarmVille. 24 | TV has become much more engaging, and even so it can't compete with Facebook.The world is more addictive than it was 40 years ago. And unless 25 | the forms of technological progress that produced these things are 26 | subject to different laws than technological progress in general, 27 | the world will get more addictive in the next 40 years than it did 28 | in the last 40.The next 40 years will bring us some wonderful things. I don't 29 | mean to imply they're all to be avoided. Alcohol is a dangerous 30 | drug, but I'd rather live in a world with wine than one without. 31 | Most people can coexist with alcohol; but you have to be careful. 32 | More things we like will mean more things we have to be careful 33 | about.Most people won't, unfortunately. Which means that as the world 34 | becomes more addictive, the two senses in which one can live a 35 | normal life will be driven ever further apart. One sense of "normal" 36 | is statistically normal: what everyone else does. The other is the 37 | sense we mean when we talk about the normal operating range of a 38 | piece of machinery: what works best.These two senses are already quite far apart. Already someone 39 | trying to live well would seem eccentrically abstemious in most of 40 | the US. That phenomenon is only going to become more pronounced. 41 | You can probably take it as a rule of thumb from now on that if 42 | people don't think you're weird, you're living badly.Societies eventually develop antibodies to addictive new things. 43 | I've seen that happen with cigarettes. When cigarettes first 44 | appeared, they spread the way an infectious disease spreads through 45 | a previously isolated population. Smoking rapidly became a 46 | (statistically) normal thing. There were ashtrays everywhere. We 47 | had ashtrays in our house when I was a kid, even though neither of 48 | my parents smoked. You had to for guests.As knowledge spread about the dangers of smoking, customs changed. 49 | In the last 20 years, smoking has been transformed from something 50 | that seemed totally normal into a rather seedy habit: from something 51 | movie stars did in publicity shots to something small huddles of 52 | addicts do outside the doors of office buildings. A lot of the 53 | change was due to legislation, of course, but the legislation 54 | couldn't have happened if customs hadn't already changed.It took a while though—on the order of 100 years. And unless the 55 | rate at which social antibodies evolve can increase to match the 56 | accelerating rate at which technological progress throws off new 57 | addictions, we'll be increasingly unable to rely on customs to 58 | protect us. 59 | [3] 60 | Unless we want to be canaries in the coal mine 61 | of each new addiction—the people whose sad example becomes a 62 | lesson to future generations—we'll have to figure out for ourselves 63 | what to avoid and how. It will actually become a reasonable strategy 64 | (or a more reasonable strategy) to suspect 65 | everything new.In fact, even that won't be enough. We'll have to worry not just 66 | about new things, but also about existing things becoming more 67 | addictive. That's what bit me. I've avoided most addictions, but 68 | the Internet got me because it became addictive while I was using 69 | it. 70 | [4]Most people I know have problems with Internet addiction. We're 71 | all trying to figure out our own customs for getting free of it. 72 | That's why I don't have an iPhone, for example; the last thing I 73 | want is for the Internet to follow me out into the world. 74 | [5] 75 | My latest trick is taking long hikes. I used to think running was a 76 | better form of exercise than hiking because it took less time. Now 77 | the slowness of hiking seems an advantage, because the longer I 78 | spend on the trail, the longer I have to think without interruption.Sounds pretty eccentric, doesn't it? It always will when you're 79 | trying to solve problems where there are no customs yet to guide 80 | you. Maybe I can't plead Occam's razor; maybe I'm simply eccentric. 81 | But if I'm right about the acceleration of addictiveness, then this 82 | kind of lonely squirming to avoid it will increasingly be the fate 83 | of anyone who wants to get things done. We'll increasingly be 84 | defined by what we say no to. 85 | Notes[1] 86 | Could you restrict technological progress to areas where you 87 | wanted it? Only in a limited way, without becoming a police state. 88 | And even then your restrictions would have undesirable side effects. 89 | "Good" and "bad" technological progress aren't sharply differentiated, 90 | so you'd find you couldn't slow the latter without also slowing the 91 | former. And in any case, as Prohibition and the "war on drugs" 92 | show, bans often do more harm than good.[2] 93 | Technology has always been accelerating. By Paleolithic 94 | standards, technology evolved at a blistering pace in the Neolithic 95 | period.[3] 96 | Unless we mass produce social customs. I suspect the recent 97 | resurgence of evangelical Christianity in the US is partly a reaction 98 | to drugs. In desperation people reach for the sledgehammer; if 99 | their kids won't listen to them, maybe they'll listen to God. But 100 | that solution has broader consequences than just getting kids to 101 | say no to drugs. You end up saying no to 102 | science as well. 103 | I worry we may be heading for a future in which only a few people 104 | plot their own itinerary through no-land, while everyone else books 105 | a package tour. Or worse still, has one booked for them by the 106 | government.[4] 107 | People commonly use the word "procrastination" to describe 108 | what they do on the Internet. It seems to me too mild to describe 109 | what's happening as merely not-doing-work. We don't call it 110 | procrastination when someone gets drunk instead of working.[5] 111 | Several people have told me they like the iPad because it 112 | lets them bring the Internet into situations where a laptop would 113 | be too conspicuous. In other words, it's a hip flask. (This is 114 | true of the iPhone too, of course, but this advantage isn't as 115 | obvious because it reads as a phone, and everyone's used to those.)Thanks to Sam Altman, Patrick Collison, Jessica Livingston, and 116 | Robert Morris for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/newideas.txt: -------------------------------------------------------------------------------- 1 | May 2021There's one kind of opinion I'd be very afraid to express publicly. 2 | If someone I knew to be both a domain expert and a reasonable person 3 | proposed an idea that sounded preposterous, I'd be very reluctant 4 | to say "That will never work."Anyone who has studied the history of ideas, and especially the 5 | history of science, knows that's how big things start. Someone 6 | proposes an idea that sounds crazy, most people dismiss it, then 7 | it gradually takes over the world.Most implausible-sounding ideas are in fact bad and could be safely 8 | dismissed. But not when they're proposed by reasonable domain 9 | experts. If the person proposing the idea is reasonable, then they 10 | know how implausible it sounds. And yet they're proposing it anyway. 11 | That suggests they know something you don't. And if they have deep 12 | domain expertise, that's probably the source of it. 13 | [1]Such ideas are not merely unsafe to dismiss, but disproportionately 14 | likely to be interesting. When the average person proposes an 15 | implausible-sounding idea, its implausibility is evidence of their 16 | incompetence. But when a reasonable domain expert does it, the 17 | situation is reversed. There's something like an efficient market 18 | here: on average the ideas that seem craziest will, if correct, 19 | have the biggest effect. So if you can eliminate the theory that 20 | the person proposing an implausible-sounding idea is incompetent, 21 | its implausibility switches from evidence that it's boring to 22 | evidence that it's exciting. 23 | [2]Such ideas are not guaranteed to work. But they don't have to be. 24 | They just have to be sufficiently good bets — to have sufficiently 25 | high expected value. And I think on average they do. I think if you 26 | bet on the entire set of implausible-sounding ideas proposed by 27 | reasonable domain experts, you'd end up net ahead.The reason is that everyone is too conservative. The word "paradigm" 28 | is overused, but this is a case where it's warranted. Everyone is 29 | too much in the grip of the current paradigm. Even the people who 30 | have the new ideas undervalue them initially. Which means that 31 | before they reach the stage of proposing them publicly, they've 32 | already subjected them to an excessively strict filter. 33 | [3]The wise response to such an idea is not to make statements, but 34 | to ask questions, because there's a real mystery here. Why has this 35 | smart and reasonable person proposed an idea that seems so wrong? 36 | Are they mistaken, or are you? One of you has to be. If you're the 37 | one who's mistaken, that would be good to know, because it means 38 | there's a hole in your model of the world. But even if they're 39 | mistaken, it should be interesting to learn why. A trap that an 40 | expert falls into is one you have to worry about too.This all seems pretty obvious. And yet there are clearly a lot of 41 | people who don't share my fear of dismissing new ideas. Why do they 42 | do it? Why risk looking like a jerk now and a fool later, instead 43 | of just reserving judgement?One reason they do it is envy. If you propose a radical new idea 44 | and it succeeds, your reputation (and perhaps also your wealth) 45 | will increase proportionally. Some people would be envious if that 46 | happened, and this potential envy propagates back into a conviction 47 | that you must be wrong.Another reason people dismiss new ideas is that it's an easy way 48 | to seem sophisticated. When a new idea first emerges, it usually 49 | seems pretty feeble. It's a mere hatchling. Received wisdom is a 50 | full-grown eagle by comparison. So it's easy to launch a devastating 51 | attack on a new idea, and anyone who does will seem clever to those 52 | who don't understand this asymmetry.This phenomenon is exacerbated by the difference between how those 53 | working on new ideas and those attacking them are rewarded. The 54 | rewards for working on new ideas are weighted by the value of the 55 | outcome. So it's worth working on something that only has a 10% 56 | chance of succeeding if it would make things more than 10x better. 57 | Whereas the rewards for attacking new ideas are roughly constant; 58 | such attacks seem roughly equally clever regardless of the target.People will also attack new ideas when they have a vested interest 59 | in the old ones. It's not surprising, for example, that some of 60 | Darwin's harshest critics were churchmen. People build whole careers 61 | on some ideas. When someone claims they're false or obsolete, they 62 | feel threatened.The lowest form of dismissal is mere factionalism: to automatically 63 | dismiss any idea associated with the opposing faction. The lowest 64 | form of all is to dismiss an idea because of who proposed it.But the main thing that leads reasonable people to dismiss new ideas 65 | is the same thing that holds people back from proposing them: the 66 | sheer pervasiveness of the current paradigm. It doesn't just affect 67 | the way we think; it is the Lego blocks we build thoughts out of. 68 | Popping out of the current paradigm is something only a few people 69 | can do. And even they usually have to suppress their intuitions at 70 | first, like a pilot flying through cloud who has to trust his 71 | instruments over his sense of balance. 72 | [4]Paradigms don't just define our present thinking. They also vacuum 73 | up the trail of crumbs that led to them, making our standards for 74 | new ideas impossibly high. The current paradigm seems so perfect 75 | to us, its offspring, that we imagine it must have been accepted 76 | completely as soon as it was discovered — that whatever the church thought 77 | of the heliocentric model, astronomers must have been convinced as 78 | soon as Copernicus proposed it. Far, in fact, from it. Copernicus 79 | published the heliocentric model in 1532, but it wasn't till the 80 | mid seventeenth century that the balance of scientific opinion 81 | shifted in its favor. 82 | [5]Few understand how feeble new ideas look when they first appear. 83 | So if you want to have new ideas yourself, one of the most valuable 84 | things you can do is to learn what they look like when they're born. 85 | Read about how new ideas happened, and try to get yourself into the 86 | heads of people at the time. How did things look to them, when the 87 | new idea was only half-finished, and even the person who had it was 88 | only half-convinced it was right?But you don't have to stop at history. You can observe big new ideas 89 | being born all around you right now. Just look for a reasonable 90 | domain expert proposing something that sounds wrong.If you're nice, as well as wise, you won't merely resist attacking 91 | such people, but encourage them. Having new ideas is a lonely 92 | business. Only those who've tried it know how lonely. These people 93 | need your help. And if you help them, you'll probably learn something 94 | in the process.Notes[1] 95 | This domain expertise could be in another field. Indeed, 96 | such crossovers tend to be particularly promising.[2] 97 | I'm not claiming this principle extends much beyond math, 98 | engineering, and the hard sciences. In politics, for example, 99 | crazy-sounding ideas generally are as bad as they sound. Though 100 | arguably this is not an exception, because the people who propose 101 | them are not in fact domain experts; politicians are domain experts 102 | in political tactics, like how to get elected and how to get 103 | legislation passed, but not in the world that policy acts upon. 104 | Perhaps no one could be.[3] 105 | This sense of "paradigm" was defined by Thomas Kuhn in his 106 | Structure of Scientific Revolutions, but I also recommend his 107 | Copernican Revolution, where you can see him at work developing the 108 | idea.[4] 109 | This is one reason people with a touch of Asperger's may have 110 | an advantage in discovering new ideas. They're always flying on 111 | instruments.[5] 112 | Hall, Rupert. From Galileo to Newton. Collins, 1963. This 113 | book is particularly good at getting into contemporaries' heads.Thanks to Trevor Blackwell, Patrick Collison, Suhail Doshi, Daniel 114 | Gackle, Jessica Livingston, and Robert Morris for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/aord.txt: -------------------------------------------------------------------------------- 1 | October 2015When I talk to a startup that's been operating for more than 8 or 2 | 9 months, the first thing I want to know is almost always the same. 3 | Assuming their expenses remain constant and their revenue growth 4 | is what it has been over the last several months, do they make it to 5 | profitability on the money they have left? Or to put it more 6 | dramatically, by default do they live or die?The startling thing is how often the founders themselves don't know. 7 | Half the founders I talk to don't know whether they're default alive 8 | or default dead.If you're among that number, Trevor Blackwell has made a handy 9 | calculator you can use to find out.The reason I want to know first whether a startup is default alive 10 | or default dead is that the rest of the conversation depends on the 11 | answer. If the company is default alive, we can talk about ambitious 12 | new things they could do. If it's default dead, we probably need 13 | to talk about how to save it. We know the current trajectory ends 14 | badly. How can they get off that trajectory?Why do so few founders know whether they're default alive or default 15 | dead? Mainly, I think, because they're not used to asking that. 16 | It's not a question that makes sense to ask early on, any more than 17 | it makes sense to ask a 3 year old how he plans to support 18 | himself. But as the company grows older, the question switches from 19 | meaningless to critical. That kind of switch often takes people 20 | by surprise.I propose the following solution: instead of starting to ask too 21 | late whether you're default alive or default dead, start asking too 22 | early. It's hard to say precisely when the question switches 23 | polarity. But it's probably not that dangerous to start worrying 24 | too early that you're default dead, whereas it's very dangerous to 25 | start worrying too late.The reason is a phenomenon I wrote about earlier: the 26 | fatal pinch. 27 | The fatal pinch is default dead + slow growth + not enough 28 | time to fix it. And the way founders end up in it is by not realizing 29 | that's where they're headed.There is another reason founders don't ask themselves whether they're 30 | default alive or default dead: they assume it will be easy to raise 31 | more money. But that assumption is often false, and worse still, the 32 | more you depend on it, the falser it becomes.Maybe it will help to separate facts from hopes. Instead of thinking 33 | of the future with vague optimism, explicitly separate the components. 34 | Say "We're default dead, but we're counting on investors to save 35 | us." Maybe as you say that, it will set off the same alarms in your 36 | head that it does in mine. And if you set off the alarms sufficiently 37 | early, you may be able to avoid the fatal pinch.It would be safe to be default dead if you could count on investors 38 | saving you. As a rule their interest is a function of 39 | growth. If you have steep revenue growth, say over 5x a year, you 40 | can start to count on investors being interested even if you're not 41 | profitable. 42 | [1] 43 | But investors are so fickle that you can never 44 | do more than start to count on them. Sometimes something about your 45 | business will spook investors even if your growth is great. So no 46 | matter how good your growth is, you can never safely treat fundraising 47 | as more than a plan A. You should always have a plan B as well: you 48 | should know (as in write down) precisely what you'll need to do to 49 | survive if you can't raise more money, and precisely when you'll 50 | have to switch to plan B if plan A isn't working.In any case, growing fast versus operating cheaply is far from the 51 | sharp dichotomy many founders assume it to be. In practice there 52 | is surprisingly little connection between how much a startup spends 53 | and how fast it grows. When a startup grows fast, it's usually 54 | because the product hits a nerve, in the sense of hitting some big 55 | need straight on. When a startup spends a lot, it's usually because 56 | the product is expensive to develop or sell, or simply because 57 | they're wasteful.If you're paying attention, you'll be asking at this point not just 58 | how to avoid the fatal pinch, but how to avoid being default dead. 59 | That one is easy: don't hire too fast. Hiring too fast is by far 60 | the biggest killer of startups that raise money. 61 | [2]Founders tell themselves they need to hire in order to grow. But 62 | most err on the side of overestimating this need rather than 63 | underestimating it. Why? Partly because there's so much work to 64 | do. Naive founders think that if they can just hire enough 65 | people, it will all get done. Partly because successful startups have 66 | lots of employees, so it seems like that's what one does in order 67 | to be successful. In fact the large staffs of successful startups 68 | are probably more the effect of growth than the cause. And 69 | partly because when founders have slow growth they don't want to 70 | face what is usually the real reason: the product is not appealing 71 | enough.Plus founders who've just raised money are often encouraged to 72 | overhire by the VCs who funded them. Kill-or-cure strategies are 73 | optimal for VCs because they're protected by the portfolio effect. 74 | VCs want to blow you up, in one sense of the phrase or the other. 75 | But as a founder your incentives are different. You want above all 76 | to survive. 77 | [3]Here's a common way startups die. They make something moderately 78 | appealing and have decent initial growth. They raise their first 79 | round fairly easily, because the founders seem smart and the idea 80 | sounds plausible. But because the product is only moderately 81 | appealing, growth is ok but not great. The founders convince 82 | themselves that hiring a bunch of people is the way to boost growth. 83 | Their investors agree. But (because the product is only moderately 84 | appealing) the growth never comes. Now they're rapidly running out 85 | of runway. They hope further investment will save them. But because 86 | they have high expenses and slow growth, they're now unappealing 87 | to investors. They're unable to raise more, and the company dies.What the company should have done is address the fundamental problem: 88 | that the product is only moderately appealing. Hiring people is 89 | rarely the way to fix that. More often than not it makes it harder. 90 | At this early stage, the product needs to evolve more than to be 91 | "built out," and that's usually easier with fewer people. 92 | [4]Asking whether you're default alive or default dead may save you 93 | from this. Maybe the alarm bells it sets off will counteract the 94 | forces that push you to overhire. Instead you'll be compelled to 95 | seek growth in other ways. For example, by doing 96 | things that don't scale, or by redesigning the product in the 97 | way only founders can. 98 | And for many if not most startups, these paths to growth will be 99 | the ones that actually work.Airbnb waited 4 months after raising money at the end of Y Combinator 100 | before they hired their first employee. In the meantime the founders 101 | were terribly overworked. But they were overworked evolving Airbnb 102 | into the astonishingly successful organism it is now.Notes[1] 103 | Steep usage growth will also interest investors. Revenue 104 | will ultimately be a constant multiple of usage, so x% usage growth 105 | predicts x% revenue growth. But in practice investors discount 106 | merely predicted revenue, so if you're measuring usage you need a 107 | higher growth rate to impress investors.[2] 108 | Startups that don't raise money are saved from hiring too 109 | fast because they can't afford to. But that doesn't mean you should 110 | avoid raising money in order to avoid this problem, any more than 111 | that total abstinence is the only way to avoid becoming an alcoholic.[3] 112 | I would not be surprised if VCs' tendency to push founders 113 | to overhire is not even in their own interest. They don't know how 114 | many of the companies that get killed by overspending might have 115 | done well if they'd survived. My guess is a significant number.[4] 116 | After reading a draft, Sam Altman wrote:"I think you should make the hiring point more strongly. I think 117 | it's roughly correct to say that YC's most successful companies 118 | have never been the fastest to hire, and one of the marks of a great 119 | founder is being able to resist this urge."Paul Buchheit adds:"A related problem that I see a lot is premature scaling—founders 120 | take a small business that isn't really working (bad unit economics, 121 | typically) and then scale it up because they want impressive growth 122 | numbers. This is similar to over-hiring in that it makes the business 123 | much harder to fix once it's big, plus they are bleeding cash really 124 | fast." 125 | Thanks to Sam Altman, Paul Buchheit, Joe Gebbia, Jessica Livingston, 126 | and Geoff Ralston for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/vcsqueeze.txt: -------------------------------------------------------------------------------- 1 | November 2005In the next few years, venture capital funds will find themselves 2 | squeezed from four directions. They're already stuck with a seller's 3 | market, because of the huge amounts they raised at the end of the 4 | Bubble and still haven't invested. This by itself is not the end 5 | of the world. In fact, it's just a more extreme version of the 6 | norm 7 | in the VC business: too much money chasing too few deals.Unfortunately, those few deals now want less and less money, because 8 | it's getting so cheap to start a startup. The four causes: open 9 | source, which makes software free; Moore's law, which makes hardware 10 | geometrically closer to free; the Web, which makes promotion free 11 | if you're good; and better languages, which make development a lot 12 | cheaper.When we started our startup in 1995, the first three were our biggest 13 | expenses. We had to pay $5000 for the Netscape Commerce Server, 14 | the only software that then supported secure http connections. We 15 | paid $3000 for a server with a 90 MHz processor and 32 meg of 16 | memory. And we paid a PR firm about $30,000 to promote our launch.Now you could get all three for nothing. You can get the software 17 | for free; people throw away computers more powerful than our first 18 | server; and if you make something good you can generate ten times 19 | as much traffic by word of mouth online than our first PR firm got 20 | through the print media.And of course another big change for the average startup is that 21 | programming languages have improved-- or rather, the median language has. At most startups ten years 22 | ago, software development meant ten programmers writing code in 23 | C++. Now the same work might be done by one or two using Python 24 | or Ruby.During the Bubble, a lot of people predicted that startups would 25 | outsource their development to India. I think a better model for 26 | the future is David Heinemeier Hansson, who outsourced his development 27 | to a more powerful language instead. A lot of well-known applications 28 | are now, like BaseCamp, written by just one programmer. And one 29 | guy is more than 10x cheaper than ten, because (a) he won't waste 30 | any time in meetings, and (b) since he's probably a founder, he can 31 | pay himself nothing.Because starting a startup is so cheap, venture capitalists now 32 | often want to give startups more money than the startups want to 33 | take. VCs like to invest several million at a time. But as one 34 | VC told me after a startup he funded would only take about half a 35 | million, "I don't know what we're going to do. Maybe we'll just 36 | have to give some of it back." Meaning give some of the fund back 37 | to the institutional investors who supplied it, because it wasn't 38 | going to be possible to invest it all.Into this already bad situation comes the third problem: Sarbanes-Oxley. 39 | Sarbanes-Oxley is a law, passed after the Bubble, that drastically 40 | increases the regulatory burden on public companies. And in addition 41 | to the cost of compliance, which is at least two million dollars a 42 | year, the law introduces frightening legal exposure for corporate 43 | officers. An experienced CFO I know said flatly: "I would not 44 | want to be CFO of a public company now."You might think that responsible corporate governance is an area 45 | where you can't go too far. But you can go too far in any law, and 46 | this remark convinced me that Sarbanes-Oxley must have. This CFO 47 | is both the smartest and the most upstanding money guy I know. If 48 | Sarbanes-Oxley deters people like him from being CFOs of public 49 | companies, that's proof enough that it's broken.Largely because of Sarbanes-Oxley, few startups go public now. For 50 | all practical purposes, succeeding now equals getting bought. Which 51 | means VCs are now in the business of finding promising little 2-3 52 | man startups and pumping them up into companies that cost $100 53 | million to acquire. They didn't mean to be in this business; it's 54 | just what their business has evolved into.Hence the fourth problem: the acquirers have begun to realize they 55 | can buy wholesale. Why should they wait for VCs to make the startups 56 | they want more expensive? Most of what the VCs add, acquirers don't 57 | want anyway. The acquirers already have brand recognition and HR 58 | departments. What they really want is the software and the developers, 59 | and that's what the startup is in the early phase: concentrated 60 | software and developers.Google, typically, seems to have been the first to figure this out. 61 | "Bring us your startups early," said Google's speaker at the Startup School. They're quite 62 | explicit about it: they like to acquire startups at just the point 63 | where they would do a Series A round. (The Series A round is the 64 | first round of real VC funding; it usually happens in the first 65 | year.) It is a brilliant strategy, and one that other big technology 66 | companies will no doubt try to duplicate. Unless they want to have 67 | still more of their lunch eaten by Google.Of course, Google has an advantage in buying startups: a lot of the 68 | people there are rich, or expect to be when their options vest. 69 | Ordinary employees find it very hard to recommend an acquisition; 70 | it's just too annoying to see a bunch of twenty year olds get rich 71 | when you're still working for salary. Even if it's the right thing 72 | for your company to do.The Solution(s)Bad as things look now, there is a way for VCs to save themselves. 73 | They need to do two things, one of which won't surprise them, and 74 | another that will seem an anathema.Let's start with the obvious one: lobby to get Sarbanes-Oxley 75 | loosened. This law was created to prevent future Enrons, not to 76 | destroy the IPO market. Since the IPO market was practically dead 77 | when it passed, few saw what bad effects it would have. But now 78 | that technology has recovered from the last bust, we can see clearly 79 | what a bottleneck Sarbanes-Oxley has become.Startups are fragile plants—seedlings, in fact. These seedlings 80 | are worth protecting, because they grow into the trees of the 81 | economy. Much of the economy's growth is their growth. I think 82 | most politicians realize that. But they don't realize just how 83 | fragile startups are, and how easily they can become collateral 84 | damage of laws meant to fix some other problem.Still more dangerously, when you destroy startups, they make very 85 | little noise. If you step on the toes of the coal industry, you'll 86 | hear about it. But if you inadvertantly squash the startup industry, 87 | all that happens is that the founders of the next Google stay in 88 | grad school instead of starting a company.My second suggestion will seem shocking to VCs: let founders cash 89 | out partially in the Series A round. At the moment, when VCs invest 90 | in a startup, all the stock they get is newly issued and all the 91 | money goes to the company. They could buy some stock directly from 92 | the founders as well.Most VCs have an almost religious rule against doing this. They 93 | don't want founders to get a penny till the company is sold or goes 94 | public. VCs are obsessed with control, and they worry that they'll 95 | have less leverage over the founders if the founders have any money.This is a dumb plan. In fact, letting the founders sell a little stock 96 | early would generally be better for the company, because it would 97 | cause the founders' attitudes toward risk to be aligned with the 98 | VCs'. As things currently work, their attitudes toward risk tend 99 | to be diametrically opposed: the founders, who have nothing, would 100 | prefer a 100% chance of $1 million to a 20% chance of $10 million, 101 | while the VCs can afford to be "rational" and prefer the latter.Whatever they say, the reason founders are selling their companies 102 | early instead of doing Series A rounds is that they get paid up 103 | front. That first million is just worth so much more than the 104 | subsequent ones. If founders could sell a little stock early, 105 | they'd be happy to take VC money and bet the rest on a bigger 106 | outcome.So why not let the founders have that first million, or at least 107 | half million? The VCs would get same number of shares for the 108 | money. So what if some of the money would go to the 109 | founders instead of the company?Some VCs will say this is 110 | unthinkable—that they want all their money to be put to work 111 | growing the company. But the fact is, the huge size of current VC 112 | investments is dictated by the structure 113 | of VC funds, not the needs of startups. Often as not these large 114 | investments go to work destroying the company rather than growing 115 | it.The angel investors who funded our startup let the founders sell 116 | some stock directly to them, and it was a good deal for everyone. 117 | The angels made a huge return on that investment, so they're happy. 118 | And for us founders it blunted the terrifying all-or-nothingness 119 | of a startup, which in its raw form is more a distraction than a 120 | motivator.If VCs are frightened at the idea of letting founders partially 121 | cash out, let me tell them something still more frightening: you 122 | are now competing directly with Google. 123 | Thanks to Trevor Blackwell, Sarah Harlin, Jessica 124 | Livingston, and Robert Morris for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/vb.txt: -------------------------------------------------------------------------------- 1 | January 2016Life is short, as everyone knows. When I was a kid I used to wonder 2 | about this. Is life actually short, or are we really complaining 3 | about its finiteness? Would we be just as likely to feel life was 4 | short if we lived 10 times as long?Since there didn't seem any way to answer this question, I stopped 5 | wondering about it. Then I had kids. That gave me a way to answer 6 | the question, and the answer is that life actually is short.Having kids showed me how to convert a continuous quantity, time, 7 | into discrete quantities. You only get 52 weekends with your 2 year 8 | old. If Christmas-as-magic lasts from say ages 3 to 10, you only 9 | get to watch your child experience it 8 times. And while it's 10 | impossible to say what is a lot or a little of a continuous quantity 11 | like time, 8 is not a lot of something. If you had a handful of 8 12 | peanuts, or a shelf of 8 books to choose from, the quantity would 13 | definitely seem limited, no matter what your lifespan was.Ok, so life actually is short. Does it make any difference to know 14 | that?It has for me. It means arguments of the form "Life is too short 15 | for x" have great force. It's not just a figure of speech to say 16 | that life is too short for something. It's not just a synonym for 17 | annoying. If you find yourself thinking that life is too short for 18 | something, you should try to eliminate it if you can.When I ask myself what I've found life is too short for, the word 19 | that pops into my head is "bullshit." I realize that answer is 20 | somewhat tautological. It's almost the definition of bullshit that 21 | it's the stuff that life is too short for. And yet bullshit does 22 | have a distinctive character. There's something fake about it. 23 | It's the junk food of experience. 24 | [1]If you ask yourself what you spend your time on that's bullshit, 25 | you probably already know the answer. Unnecessary meetings, pointless 26 | disputes, bureaucracy, posturing, dealing with other people's 27 | mistakes, traffic jams, addictive but unrewarding pastimes.There are two ways this kind of thing gets into your life: it's 28 | either forced on you, or it tricks you. To some extent you have to 29 | put up with the bullshit forced on you by circumstances. You need 30 | to make money, and making money consists mostly of errands. Indeed, 31 | the law of supply and demand insures that: the more rewarding some 32 | kind of work is, the cheaper people will do it. It may be that 33 | less bullshit is forced on you than you think, though. There has 34 | always been a stream of people who opt out of the default grind and 35 | go live somewhere where opportunities are fewer in the conventional 36 | sense, but life feels more authentic. This could become more common.You can do it on a smaller scale without moving. The amount of 37 | time you have to spend on bullshit varies between employers. Most 38 | large organizations (and many small ones) are steeped in it. But 39 | if you consciously prioritize bullshit avoidance over other factors 40 | like money and prestige, you can probably find employers that will 41 | waste less of your time.If you're a freelancer or a small company, you can do this at the 42 | level of individual customers. If you fire or avoid toxic customers, 43 | you can decrease the amount of bullshit in your life by more than 44 | you decrease your income.But while some amount of bullshit is inevitably forced on you, the 45 | bullshit that sneaks into your life by tricking you is no one's 46 | fault but your own. And yet the bullshit you choose may be harder 47 | to eliminate than the bullshit that's forced on you. Things that 48 | lure you into wasting your time have to be really good at 49 | tricking you. An example that will be familiar to a lot of people 50 | is arguing online. When someone 51 | contradicts you, they're in a sense attacking you. Sometimes pretty 52 | overtly. Your instinct when attacked is to defend yourself. But 53 | like a lot of instincts, this one wasn't designed for the world we 54 | now live in. Counterintuitive as it feels, it's better most of 55 | the time not to defend yourself. Otherwise these people are literally 56 | taking your life. 57 | [2]Arguing online is only incidentally addictive. There are more 58 | dangerous things than that. As I've written before, one byproduct 59 | of technical progress is that things we like tend to become more 60 | addictive. Which means we will increasingly have to make a conscious 61 | effort to avoid addictions — to stand outside ourselves and ask "is 62 | this how I want to be spending my time?"As well as avoiding bullshit, one should actively seek out things 63 | that matter. But different things matter to different people, and 64 | most have to learn what matters to them. A few are lucky and realize 65 | early on that they love math or taking care of animals or writing, 66 | and then figure out a way to spend a lot of time doing it. But 67 | most people start out with a life that's a mix of things that 68 | matter and things that don't, and only gradually learn to distinguish 69 | between them.For the young especially, much of this confusion is induced by the 70 | artificial situations they find themselves in. In middle school and 71 | high school, what the other kids think of you seems the most important 72 | thing in the world. But when you ask adults what they got wrong 73 | at that age, nearly all say they cared too much what other kids 74 | thought of them.One heuristic for distinguishing stuff that matters is to ask 75 | yourself whether you'll care about it in the future. Fake stuff 76 | that matters usually has a sharp peak of seeming to matter. That's 77 | how it tricks you. The area under the curve is small, but its shape 78 | jabs into your consciousness like a pin.The things that matter aren't necessarily the ones people would 79 | call "important." Having coffee with a friend matters. You won't 80 | feel later like that was a waste of time.One great thing about having small children is that they make you 81 | spend time on things that matter: them. They grab your sleeve as 82 | you're staring at your phone and say "will you play with me?" And 83 | odds are that is in fact the bullshit-minimizing option.If life is short, we should expect its shortness to take us by 84 | surprise. And that is just what tends to happen. You take things 85 | for granted, and then they're gone. You think you can always write 86 | that book, or climb that mountain, or whatever, and then you realize 87 | the window has closed. The saddest windows close when other people 88 | die. Their lives are short too. After my mother died, I wished I'd 89 | spent more time with her. I lived as if she'd always be there. 90 | And in her typical quiet way she encouraged that illusion. But an 91 | illusion it was. I think a lot of people make the same mistake I 92 | did.The usual way to avoid being taken by surprise by something is to 93 | be consciously aware of it. Back when life was more precarious, 94 | people used to be aware of death to a degree that would now seem a 95 | bit morbid. I'm not sure why, but it doesn't seem the right answer 96 | to be constantly reminding oneself of the grim reaper hovering at 97 | everyone's shoulder. Perhaps a better solution is to look at the 98 | problem from the other end. Cultivate a habit of impatience about 99 | the things you most want to do. Don't wait before climbing that 100 | mountain or writing that book or visiting your mother. You don't 101 | need to be constantly reminding yourself why you shouldn't wait. 102 | Just don't wait.I can think of two more things one does when one doesn't have much 103 | of something: try to get more of it, and savor what one has. Both 104 | make sense here.How you live affects how long you live. Most people could do better. 105 | Me among them.But you can probably get even more effect by paying closer attention 106 | to the time you have. It's easy to let the days rush by. The 107 | "flow" that imaginative people love so much has a darker cousin 108 | that prevents you from pausing to savor life amid the daily slurry 109 | of errands and alarms. One of the most striking things I've read 110 | was not in a book, but the title of one: James Salter's Burning 111 | the Days.It is possible to slow time somewhat. I've gotten better at it. 112 | Kids help. When you have small children, there are a lot of moments 113 | so perfect that you can't help noticing.It does help too to feel that you've squeezed everything out of 114 | some experience. The reason I'm sad about my mother is not just 115 | that I miss her but that I think of all the things we could have 116 | done that we didn't. My oldest son will be 7 soon. And while I 117 | miss the 3 year old version of him, I at least don't have any regrets 118 | over what might have been. We had the best time a daddy and a 3 119 | year old ever had.Relentlessly prune bullshit, don't wait to do things that matter, 120 | and savor the time you have. That's what you do when life is short.Notes[1] 121 | At first I didn't like it that the word that came to mind was 122 | one that had other meanings. But then I realized the other meanings 123 | are fairly closely related. Bullshit in the sense of things you 124 | waste your time on is a lot like intellectual bullshit.[2] 125 | I chose this example deliberately as a note to self. I get 126 | attacked a lot online. People tell the craziest lies about me. 127 | And I have so far done a pretty mediocre job of suppressing the 128 | natural human inclination to say "Hey, that's not true!"Thanks to Jessica Livingston and Geoff Ralston for reading drafts 129 | of this. -------------------------------------------------------------------------------- /context_data/a_stars.txt: -------------------------------------------------------------------------------- 1 | {'32': [15, 117, 42, 69, 58, 107, 9, 49, 113, 66, 26, 102, 81, 94, 77, 61, 5, 19, 109, 3, 35, 54, 86, 89, 127, 21, 46, 122, 38, 97, 74, 29], '64': [15, 117, 42, 69, 58, 107, 9, 49, 113, 66, 26, 102, 81, 94, 77, 61, 5, 19, 109, 3, 35, 54, 86, 89, 127, 21, 46, 122, 38, 97, 74, 29, 153, 167, 255, 198, 139, 201, 185, 230, 194, 171, 211, 175, 162, 130, 239, 213, 223, 158, 178, 243, 235, 190, 217, 227, 145, 183, 249, 206, 245, 150, 135, 142], '128': [15, 117, 42, 69, 58, 107, 9, 49, 113, 66, 26, 102, 81, 94, 77, 61, 5, 19, 109, 3, 35, 54, 86, 89, 127, 21, 46, 122, 38, 97, 74, 29, 153, 167, 255, 198, 139, 201, 185, 230, 194, 171, 211, 175, 162, 130, 239, 213, 223, 158, 178, 243, 235, 190, 217, 227, 145, 183, 249, 206, 245, 150, 135, 142, 333, 471, 305, 509, 418, 461, 371, 358, 373, 437, 382, 263, 342, 291, 365, 429, 479, 421, 483, 491, 393, 407, 386, 442, 294, 270, 273, 486, 499, 346, 446, 301, 287, 467, 391, 323, 379, 474, 402, 266, 409, 413, 502, 329, 494, 454, 397, 283, 338, 313, 319, 353, 299, 457, 310, 427, 327, 279, 449, 505, 434, 361, 257, 351], '256': [15, 117, 42, 69, 58, 107, 9, 49, 113, 66, 26, 102, 81, 94, 77, 61, 5, 19, 109, 3, 35, 54, 86, 89, 127, 21, 46, 122, 38, 97, 74, 29, 153, 167, 255, 198, 139, 201, 185, 230, 194, 171, 211, 175, 162, 130, 239, 213, 223, 158, 178, 243, 235, 190, 217, 227, 145, 183, 249, 206, 245, 150, 135, 142, 333, 471, 305, 509, 418, 461, 371, 358, 373, 437, 382, 263, 342, 291, 365, 429, 479, 421, 483, 491, 393, 407, 386, 442, 294, 270, 273, 486, 499, 346, 446, 301, 287, 467, 391, 323, 379, 474, 402, 266, 409, 413, 502, 329, 494, 454, 397, 283, 338, 313, 319, 353, 299, 457, 310, 427, 327, 279, 449, 505, 434, 361, 257, 351, 534, 639, 723, 730, 606, 794, 653, 701, 1019, 690, 909, 698, 625, 922, 567, 834, 859, 683, 790, 710, 913, 633, 950, 927, 957, 677, 735, 773, 555, 558, 546, 609, 523, 894, 837, 759, 665, 934, 539, 879, 853, 765, 975, 782, 642, 530, 526, 662, 749, 619, 818, 581, 843, 1007, 901, 823, 814, 623, 890, 693, 649, 851, 741, 946, 570, 942, 514, 613, 1014, 755, 1021, 551, 873, 715, 962, 517, 631, 669, 847, 898, 562, 883, 829, 717, 739, 746, 870, 983, 937, 799, 1002, 1011, 991, 589, 955, 658, 885, 726, 578, 761, 997, 987, 686, 994, 967, 778, 598, 978, 809, 785, 907, 705, 807, 586, 575, 594, 543, 862, 674, 770, 827, 601, 802, 867, 645, 929, 919, 970], '512': [15, 117, 42, 69, 58, 107, 9, 49, 113, 66, 26, 102, 81, 94, 77, 61, 5, 19, 109, 3, 35, 54, 86, 89, 127, 21, 46, 122, 38, 97, 74, 29, 153, 167, 255, 198, 139, 201, 185, 230, 194, 171, 211, 175, 162, 130, 239, 213, 223, 158, 178, 243, 235, 190, 217, 227, 145, 183, 249, 206, 245, 150, 135, 142, 333, 471, 305, 509, 418, 461, 371, 358, 373, 437, 382, 263, 342, 291, 365, 429, 479, 421, 483, 491, 393, 407, 386, 442, 294, 270, 273, 486, 499, 346, 446, 301, 287, 467, 391, 323, 379, 474, 402, 266, 409, 413, 502, 329, 494, 454, 397, 283, 338, 313, 319, 353, 299, 457, 310, 427, 327, 279, 449, 505, 434, 361, 257, 351, 534, 639, 723, 730, 606, 794, 653, 701, 1019, 690, 909, 698, 625, 922, 567, 834, 859, 683, 790, 710, 913, 633, 950, 927, 957, 677, 735, 773, 555, 558, 546, 609, 523, 894, 837, 759, 665, 934, 539, 879, 853, 765, 975, 782, 642, 530, 526, 662, 749, 619, 818, 581, 843, 1007, 901, 823, 814, 623, 890, 693, 649, 851, 741, 946, 570, 942, 514, 613, 1014, 755, 1021, 551, 873, 715, 962, 517, 631, 669, 847, 898, 562, 883, 829, 717, 739, 746, 870, 983, 937, 799, 1002, 1011, 991, 589, 955, 658, 885, 726, 578, 761, 997, 987, 686, 994, 967, 778, 598, 978, 809, 785, 907, 705, 807, 586, 575, 594, 543, 862, 674, 770, 827, 601, 802, 867, 645, 929, 919, 970, 1451, 1695, 1666, 1301, 1374, 1650, 1991, 1357, 1149, 1854, 1075, 1617, 2025, 1519, 1466, 1169, 1423, 1877, 1850, 1318, 1533, 1382, 1526, 1523, 1482, 1297, 1961, 1658, 1341, 1439, 1789, 1102, 1163, 1906, 1061, 1718, 1983, 1614, 2037, 1743, 1281, 1747, 1831, 1110, 1165, 1425, 1346, 1185, 1871, 1965, 1811, 1114, 1974, 1270, 2006, 2033, 1573, 1579, 1402, 1607, 1565, 1734, 1755, 1306, 1334, 1602, 1570, 1757, 1953, 1937, 1090, 1394, 2045, 1883, 1933, 1979, 1630, 1379, 1327, 1637, 1471, 1886, 2015, 1231, 1899, 1691, 1626, 1082, 1909, 1587, 1819, 1645, 1686, 1267, 1857, 1414, 1723, 1246, 1473, 1554, 2017, 1293, 1714, 1155, 1861, 1046, 1350, 1773, 1418, 1986, 1361, 1025, 1727, 1643, 1314, 1286, 1031, 1398, 1609, 1729, 1366, 1498, 1998, 1842, 1458, 1485, 1041, 1529, 1146, 1059, 1502, 1387, 1681, 1454, 1507, 1793, 1223, 1738, 1177, 1837, 1310, 1433, 1289, 1958, 1930, 1561, 1181, 1322, 1927, 1633, 1338, 1038, 1443, 1710, 1873, 1767, 1277, 1599, 2001, 1189, 1699, 1129, 1203, 1943, 1805, 1158, 1463, 1195, 1782, 1133, 1826, 1430, 1763, 2011, 1099, 1551, 1771, 1513, 1593, 1078, 1950, 1918, 2029, 1670, 1213, 1331, 1257, 1262, 1119, 1581, 1479, 1087, 1209, 1547, 1901, 1054, 1242, 1494, 1970, 1275, 1894, 1923, 1846, 1410, 1217, 1447, 1706, 1590, 1542, 1511, 2021, 1914, 1355, 1623, 1391, 1677, 1095, 1785, 1173, 1197, 1750, 1815, 1371, 1889, 1702, 1834, 1675, 1779, 1803, 1995, 1405, 1205, 1137, 1238, 1253, 1227, 1661, 1122, 1050, 1033, 1865, 1823, 1947, 1491, 1067, 1799, 1141, 1655, 1235, 1557, 1251, 1125, 1070, 1106, 2041, 1537], '1024': [15, 117, 42, 69, 58, 107, 9, 49, 113, 66, 26, 102, 81, 94, 77, 61, 5, 19, 109, 3, 35, 54, 86, 89, 127, 21, 46, 122, 38, 97, 74, 29, 153, 167, 255, 198, 139, 201, 185, 230, 194, 171, 211, 175, 162, 130, 239, 213, 223, 158, 178, 243, 235, 190, 217, 227, 145, 183, 249, 206, 245, 150, 135, 142, 333, 471, 305, 509, 418, 461, 371, 358, 373, 437, 382, 263, 342, 291, 365, 429, 479, 421, 483, 491, 393, 407, 386, 442, 294, 270, 273, 486, 499, 346, 446, 301, 287, 467, 391, 323, 379, 474, 402, 266, 409, 413, 502, 329, 494, 454, 397, 283, 338, 313, 319, 353, 299, 457, 310, 427, 327, 279, 449, 505, 434, 361, 257, 351, 534, 639, 723, 730, 606, 794, 653, 701, 1019, 690, 909, 698, 625, 922, 567, 834, 859, 683, 790, 710, 913, 633, 950, 927, 957, 677, 735, 773, 555, 558, 546, 609, 523, 894, 837, 759, 665, 934, 539, 879, 853, 765, 975, 782, 642, 530, 526, 662, 749, 619, 818, 581, 843, 1007, 901, 823, 814, 623, 890, 693, 649, 851, 741, 946, 570, 942, 514, 613, 1014, 755, 1021, 551, 873, 715, 962, 517, 631, 669, 847, 898, 562, 883, 829, 717, 739, 746, 870, 983, 937, 799, 1002, 1011, 991, 589, 955, 658, 885, 726, 578, 761, 997, 987, 686, 994, 967, 778, 598, 978, 809, 785, 907, 705, 807, 586, 575, 594, 543, 862, 674, 770, 827, 601, 802, 867, 645, 929, 919, 970, 1451, 1695, 1666, 1301, 1374, 1650, 1991, 1357, 1149, 1854, 1075, 1617, 2025, 1519, 1466, 1169, 1423, 1877, 1850, 1318, 1533, 1382, 1526, 1523, 1482, 1297, 1961, 1658, 1341, 1439, 1789, 1102, 1163, 1906, 1061, 1718, 1983, 1614, 2037, 1743, 1281, 1747, 1831, 1110, 1165, 1425, 1346, 1185, 1871, 1965, 1811, 1114, 1974, 1270, 2006, 2033, 1573, 1579, 1402, 1607, 1565, 1734, 1755, 1306, 1334, 1602, 1570, 1757, 1953, 1937, 1090, 1394, 2045, 1883, 1933, 1979, 1630, 1379, 1327, 1637, 1471, 1886, 2015, 1231, 1899, 1691, 1626, 1082, 1909, 1587, 1819, 1645, 1686, 1267, 1857, 1414, 1723, 1246, 1473, 1554, 2017, 1293, 1714, 1155, 1861, 1046, 1350, 1773, 1418, 1986, 1361, 1025, 1727, 1643, 1314, 1286, 1031, 1398, 1609, 1729, 1366, 1498, 1998, 1842, 1458, 1485, 1041, 1529, 1146, 1059, 1502, 1387, 1681, 1454, 1507, 1793, 1223, 1738, 1177, 1837, 1310, 1433, 1289, 1958, 1930, 1561, 1181, 1322, 1927, 1633, 1338, 1038, 1443, 1710, 1873, 1767, 1277, 1599, 2001, 1189, 1699, 1129, 1203, 1943, 1805, 1158, 1463, 1195, 1782, 1133, 1826, 1430, 1763, 2011, 1099, 1551, 1771, 1513, 1593, 1078, 1950, 1918, 2029, 1670, 1213, 1331, 1257, 1262, 1119, 1581, 1479, 1087, 1209, 1547, 1901, 1054, 1242, 1494, 1970, 1275, 1894, 1923, 1846, 1410, 1217, 1447, 1706, 1590, 1542, 1511, 2021, 1914, 1355, 1623, 1391, 1677, 1095, 1785, 1173, 1197, 1750, 1815, 1371, 1889, 1702, 1834, 1675, 1779, 1803, 1995, 1405, 1205, 1137, 1238, 1253, 1227, 1661, 1122, 1050, 1033, 1865, 1823, 1947, 1491, 1067, 1799, 1141, 1655, 1235, 1557, 1251, 1125, 1070, 1106, 2041, 1537, 3369, 3198, 3802, 2471, 2921, 3585, 3619, 3035, 3194, 4075, 3838, 2598, 2467, 2211, 3105, 2909, 2870, 3834, 2310, 2298, 3498, 3461, 2069, 3642, 2691, 3659, 4059, 3779, 2785, 2091, 3394, 2522, 3962, 2323, 3361, 2353, 2451, 3918, 2486, 3030, 3951, 2902, 3549, 2510, 2154, 3161, 4041, 3475, 2727, 3790, 2762, 2191, 2447, 4011, 3537, 2203, 3523, 3053, 3165, 2705, 2074, 2739, 4019, 2413, 2755, 3403, 2949, 3559, 3358, 3047, 4081, 3449, 2389, 3483, 2959, 3622, 4002, 3666, 2669, 2049, 3602, 2663, 2077, 2578, 2338, 3998, 3590, 3017, 3693, 3698, 3286, 2593, 3565, 2237, 3757, 3647, 2307, 2842, 3061, 3281, 2462, 2165, 3097, 2221, 2907, 3322, 3495, 2546, 2559, 3254, 3326, 3606, 3629, 3345, 3953, 2259, 2819, 3701, 2866, 2277, 4067, 2138, 2409, 3249, 2614, 2231, 3957, 2294, 2611, 3685, 3298, 2375, 3893, 4027, 3733, 3842, 2513, 3861, 2607, 2143, 2797, 2530, 3511, 3375, 3237, 2679, 2862, 3967, 2119, 3713, 2262, 3083, 3414, 2085, 3579, 3235, 3246, 2955, 4070, 3306, 2702, 3913, 3535, 2561, 2150, 3885, 2993, 3751, 3905, 2266, 3761, 2333, 3454, 2365, 2395, 3517, 2722, 3937, 3078, 3681, 2205, 4051, 2242, 3881, 3581, 3087, 2683, 3042, 2503, 2130, 3143, 3477, 3158, 2226, 3705, 3389, 2650, 3094, 2621, 2425, 2158, 2991, 2421, 2377, 2897, 4086, 3505, 2251, 2551, 3638, 2134, 3514, 3737, 2385, 2349, 2121, 3597, 2499, 2094, 2913, 3009, 3339, 2457, 2453, 2273, 2765, 2218, 3027, 2987, 2482, 3113, 2971, 3911, 3994, 4007, 3270, 3169, 3845, 3823, 2179, 3137, 3150, 2657, 3650, 3001, 3831, 2098, 2794, 4014, 2937, 3765, 2811, 2851, 3798, 3398, 2183, 3243, 3274, 2065, 3214, 2966, 3873, 2195, 3670, 3718, 2383, 2710, 3555, 2582, 2127, 2555, 3175, 3121, 3351, 2927, 3290, 3826, 3378, 2935, 2255, 2630, 3211, 2997, 3542, 3662, 2646, 2185, 2342, 3690, 3769, 3387, 3975, 2198, 2813, 2878, 3747, 2635, 3773, 2667, 2569, 3178, 2145, 2773, 4054, 3135, 2675, 3111, 3922, 3978, 3731, 2715, 2781, 3901, 2803, 2590, 3727, 2171, 3405, 2734, 3226, 2831, 2745, 2887, 3435, 3309, 3117, 3153, 4029, 2286, 3431, 3038, 3146, 2347, 2405, 3741, 2833, 3722, 3574, 2979, 3437, 2478, 3989, 2494, 3491, 4037, 3470, 2329, 3343, 2945, 2490, 3890, 2102, 3006, 3934, 3783, 3229, 3853, 2214, 3457, 2397, 3205, 3074, 3219, 2361, 2919, 2741, 3570, 3446, 3417, 3982, 3711, 3186, 2083, 3898, 4079, 3130, 3201, 2637, 2431, 2695, 3221, 2617, 2602, 2963, 3070, 2327, 3442, 2642, 2437, 3526, 2517, 2107, 2699, 2174, 2687, 3545, 2845, 3805, 2822, 3927, 3091, 2858, 3813, 2585, 4089, 2805, 3278, 3381, 3679, 2319, 2359, 3022, 3877, 3754, 3485, 2853, 2435, 2771, 3594, 2537, 3293, 4023, 2891, 2247, 2113, 3849, 3969, 4063, 3265, 3502, 3931, 2163, 2789, 2403, 2234, 2269, 3262, 3794, 2826, 3317, 3531, 2109, 3059, 3303, 2063, 3787, 3103, 3366, 2474, 3674, 3191, 3610, 2838, 2370, 3330, 3625, 2730, 3409, 3333, 2942, 2506, 3811, 2315, 2973, 2929, 2882, 3051, 3865, 3946, 2750, 2535, 3015, 3181, 3315, 3353, 2654, 3655, 3259, 3615, 2573, 2874, 2719, 2055, 3817, 2757, 2895, 2543, 3986, 2626, 2059, 2417, 3871, 3422, 3859, 3066, 4046, 3634, 3425, 2981, 2291, 3466, 2301, 3126, 2282, 4034, 3941, 4094, 2527, 2779, 2441, 2567, 3561]} -------------------------------------------------------------------------------- /context_data/r_stars.txt: -------------------------------------------------------------------------------- 1 | {'32': [16, 116, 43, 70, 59, 106, 8, 48, 112, 67, 25, 101, 82, 93, 76, 62, 6, 18, 108, 4, 34, 53, 85, 90, 126, 22, 45, 121, 39, 96, 75, 30], '64': [16, 116, 43, 70, 59, 106, 8, 48, 112, 67, 25, 101, 82, 93, 76, 62, 6, 18, 108, 4, 34, 53, 85, 90, 126, 22, 45, 121, 39, 96, 75, 30, 154, 168, 256, 197, 140, 200, 184, 229, 195, 172, 210, 174, 163, 131, 238, 214, 222, 159, 177, 242, 236, 189, 216, 228, 146, 184, 248, 207, 244, 151, 136, 143], '128': [16, 116, 43, 70, 59, 106, 8, 48, 112, 67, 25, 101, 82, 93, 76, 62, 6, 18, 108, 4, 34, 53, 85, 90, 126, 22, 45, 121, 39, 96, 75, 30, 154, 168, 256, 197, 140, 200, 184, 229, 195, 172, 210, 174, 163, 131, 238, 214, 222, 159, 177, 242, 236, 189, 216, 228, 146, 184, 248, 207, 244, 151, 136, 143, 332, 472, 306, 510, 417, 462, 370, 359, 374, 438, 381, 262, 343, 290, 364, 430, 478, 422, 484, 492, 394, 406, 385, 441, 293, 271, 272, 487, 500, 345, 445, 302, 286, 466, 392, 324, 378, 473, 403, 265, 410, 412, 501, 328, 493, 453, 398, 284, 339, 312, 318, 354, 300, 458, 309, 426, 328, 280, 448, 506, 433, 360, 256, 352], '256': [16, 116, 43, 70, 59, 106, 8, 48, 112, 67, 25, 101, 82, 93, 76, 62, 6, 18, 108, 4, 34, 53, 85, 90, 126, 22, 45, 121, 39, 96, 75, 30, 154, 168, 256, 197, 140, 200, 184, 229, 195, 172, 210, 174, 163, 131, 238, 214, 222, 159, 177, 242, 236, 189, 216, 228, 146, 184, 248, 207, 244, 151, 136, 143, 332, 472, 306, 510, 417, 462, 370, 359, 374, 438, 381, 262, 343, 290, 364, 430, 478, 422, 484, 492, 394, 406, 385, 441, 293, 271, 272, 487, 500, 345, 445, 302, 286, 466, 392, 324, 378, 473, 403, 265, 410, 412, 501, 328, 493, 453, 398, 284, 339, 312, 318, 354, 300, 458, 309, 426, 328, 280, 448, 506, 433, 360, 256, 352, 533, 638, 724, 729, 605, 795, 652, 700, 1018, 691, 908, 699, 626, 923, 568, 835, 858, 682, 789, 709, 914, 634, 951, 926, 956, 676, 734, 774, 556, 559, 545, 608, 524, 893, 836, 760, 664, 935, 538, 880, 854, 766, 976, 783, 643, 531, 527, 663, 750, 620, 817, 580, 842, 1006, 900, 824, 815, 624, 891, 694, 648, 850, 742, 945, 569, 941, 513, 612, 1015, 754, 1022, 550, 872, 714, 963, 518, 632, 668, 848, 899, 561, 882, 830, 718, 740, 747, 869, 984, 936, 800, 1001, 1010, 990, 590, 956, 659, 886, 725, 577, 760, 998, 988, 685, 995, 966, 779, 599, 979, 808, 786, 908, 704, 806, 587, 574, 593, 544, 863, 675, 771, 828, 602, 803, 866, 644, 928, 918, 971], '512': [16, 116, 43, 70, 59, 106, 8, 48, 112, 67, 25, 101, 82, 93, 76, 62, 6, 18, 108, 4, 34, 53, 85, 90, 126, 22, 45, 121, 39, 96, 75, 30, 154, 168, 256, 197, 140, 200, 184, 229, 195, 172, 210, 174, 163, 131, 238, 214, 222, 159, 177, 242, 236, 189, 216, 228, 146, 184, 248, 207, 244, 151, 136, 143, 332, 472, 306, 510, 417, 462, 370, 359, 374, 438, 381, 262, 343, 290, 364, 430, 478, 422, 484, 492, 394, 406, 385, 441, 293, 271, 272, 487, 500, 345, 445, 302, 286, 466, 392, 324, 378, 473, 403, 265, 410, 412, 501, 328, 493, 453, 398, 284, 339, 312, 318, 354, 300, 458, 309, 426, 328, 280, 448, 506, 433, 360, 256, 352, 533, 638, 724, 729, 605, 795, 652, 700, 1018, 691, 908, 699, 626, 923, 568, 835, 858, 682, 789, 709, 914, 634, 951, 926, 956, 676, 734, 774, 556, 559, 545, 608, 524, 893, 836, 760, 664, 935, 538, 880, 854, 766, 976, 783, 643, 531, 527, 663, 750, 620, 817, 580, 842, 1006, 900, 824, 815, 624, 891, 694, 648, 850, 742, 945, 569, 941, 513, 612, 1015, 754, 1022, 550, 872, 714, 963, 518, 632, 668, 848, 899, 561, 882, 830, 718, 740, 747, 869, 984, 936, 800, 1001, 1010, 990, 590, 956, 659, 886, 725, 577, 760, 998, 988, 685, 995, 966, 779, 599, 979, 808, 786, 908, 704, 806, 587, 574, 593, 544, 863, 675, 771, 828, 602, 803, 866, 644, 928, 918, 971, 1452, 1694, 1665, 1300, 1373, 1651, 1990, 1356, 1150, 1855, 1076, 1616, 2024, 1520, 1467, 1168, 1424, 1876, 1851, 1317, 1534, 1381, 1525, 1522, 1481, 1298, 1962, 1659, 1342, 1440, 1788, 1103, 1162, 1905, 1062, 1717, 1984, 1613, 2038, 1744, 1282, 1746, 1830, 1109, 1166, 1424, 1345, 1184, 1872, 1964, 1812, 1113, 1973, 1271, 2007, 2034, 1572, 1578, 1403, 1608, 1564, 1735, 1754, 1305, 1335, 1601, 1571, 1756, 1952, 1938, 1089, 1393, 2046, 1882, 1934, 1978, 1629, 1380, 1328, 1636, 1472, 1887, 2014, 1230, 1898, 1690, 1627, 1081, 1908, 1588, 1818, 1644, 1685, 1266, 1858, 1415, 1724, 1247, 1474, 1555, 2016, 1292, 1713, 1154, 1860, 1047, 1351, 1774, 1419, 1985, 1360, 1024, 1726, 1644, 1313, 1285, 1030, 1399, 1610, 1730, 1367, 1497, 1997, 1841, 1457, 1486, 1042, 1528, 1145, 1058, 1503, 1386, 1680, 1453, 1506, 1792, 1222, 1739, 1178, 1836, 1309, 1432, 1290, 1957, 1931, 1560, 1182, 1321, 1928, 1632, 1339, 1037, 1444, 1709, 1872, 1768, 1278, 1598, 2000, 1190, 1698, 1128, 1204, 1944, 1804, 1157, 1464, 1194, 1781, 1134, 1827, 1429, 1762, 2010, 1100, 1552, 1772, 1512, 1592, 1077, 1951, 1919, 2030, 1669, 1214, 1332, 1258, 1263, 1118, 1580, 1480, 1088, 1210, 1548, 1900, 1055, 1243, 1495, 1969, 1276, 1893, 1922, 1845, 1409, 1216, 1446, 1707, 1589, 1543, 1510, 2020, 1913, 1354, 1624, 1390, 1678, 1096, 1784, 1172, 1196, 1749, 1814, 1372, 1890, 1703, 1835, 1674, 1778, 1804, 1994, 1406, 1204, 1136, 1237, 1252, 1226, 1660, 1121, 1051, 1034, 1864, 1824, 1948, 1490, 1066, 1798, 1140, 1656, 1234, 1556, 1250, 1126, 1071, 1105, 2042, 1538], '1024': [16, 116, 43, 70, 59, 106, 8, 48, 112, 67, 25, 101, 82, 93, 76, 62, 6, 18, 108, 4, 34, 53, 85, 90, 126, 22, 45, 121, 39, 96, 75, 30, 154, 168, 256, 197, 140, 200, 184, 229, 195, 172, 210, 174, 163, 131, 238, 214, 222, 159, 177, 242, 236, 189, 216, 228, 146, 184, 248, 207, 244, 151, 136, 143, 332, 472, 306, 510, 417, 462, 370, 359, 374, 438, 381, 262, 343, 290, 364, 430, 478, 422, 484, 492, 394, 406, 385, 441, 293, 271, 272, 487, 500, 345, 445, 302, 286, 466, 392, 324, 378, 473, 403, 265, 410, 412, 501, 328, 493, 453, 398, 284, 339, 312, 318, 354, 300, 458, 309, 426, 328, 280, 448, 506, 433, 360, 256, 352, 533, 638, 724, 729, 605, 795, 652, 700, 1018, 691, 908, 699, 626, 923, 568, 835, 858, 682, 789, 709, 914, 634, 951, 926, 956, 676, 734, 774, 556, 559, 545, 608, 524, 893, 836, 760, 664, 935, 538, 880, 854, 766, 976, 783, 643, 531, 527, 663, 750, 620, 817, 580, 842, 1006, 900, 824, 815, 624, 891, 694, 648, 850, 742, 945, 569, 941, 513, 612, 1015, 754, 1022, 550, 872, 714, 963, 518, 632, 668, 848, 899, 561, 882, 830, 718, 740, 747, 869, 984, 936, 800, 1001, 1010, 990, 590, 956, 659, 886, 725, 577, 760, 998, 988, 685, 995, 966, 779, 599, 979, 808, 786, 908, 704, 806, 587, 574, 593, 544, 863, 675, 771, 828, 602, 803, 866, 644, 928, 918, 971, 1452, 1694, 1665, 1300, 1373, 1651, 1990, 1356, 1150, 1855, 1076, 1616, 2024, 1520, 1467, 1168, 1424, 1876, 1851, 1317, 1534, 1381, 1525, 1522, 1481, 1298, 1962, 1659, 1342, 1440, 1788, 1103, 1162, 1905, 1062, 1717, 1984, 1613, 2038, 1744, 1282, 1746, 1830, 1109, 1166, 1424, 1345, 1184, 1872, 1964, 1812, 1113, 1973, 1271, 2007, 2034, 1572, 1578, 1403, 1608, 1564, 1735, 1754, 1305, 1335, 1601, 1571, 1756, 1952, 1938, 1089, 1393, 2046, 1882, 1934, 1978, 1629, 1380, 1328, 1636, 1472, 1887, 2014, 1230, 1898, 1690, 1627, 1081, 1908, 1588, 1818, 1644, 1685, 1266, 1858, 1415, 1724, 1247, 1474, 1555, 2016, 1292, 1713, 1154, 1860, 1047, 1351, 1774, 1419, 1985, 1360, 1024, 1726, 1644, 1313, 1285, 1030, 1399, 1610, 1730, 1367, 1497, 1997, 1841, 1457, 1486, 1042, 1528, 1145, 1058, 1503, 1386, 1680, 1453, 1506, 1792, 1222, 1739, 1178, 1836, 1309, 1432, 1290, 1957, 1931, 1560, 1182, 1321, 1928, 1632, 1339, 1037, 1444, 1709, 1872, 1768, 1278, 1598, 2000, 1190, 1698, 1128, 1204, 1944, 1804, 1157, 1464, 1194, 1781, 1134, 1827, 1429, 1762, 2010, 1100, 1552, 1772, 1512, 1592, 1077, 1951, 1919, 2030, 1669, 1214, 1332, 1258, 1263, 1118, 1580, 1480, 1088, 1210, 1548, 1900, 1055, 1243, 1495, 1969, 1276, 1893, 1922, 1845, 1409, 1216, 1446, 1707, 1589, 1543, 1510, 2020, 1913, 1354, 1624, 1390, 1678, 1096, 1784, 1172, 1196, 1749, 1814, 1372, 1890, 1703, 1835, 1674, 1778, 1804, 1994, 1406, 1204, 1136, 1237, 1252, 1226, 1660, 1121, 1051, 1034, 1864, 1824, 1948, 1490, 1066, 1798, 1140, 1656, 1234, 1556, 1250, 1126, 1071, 1105, 2042, 1538, 3368, 3199, 3803, 2472, 2920, 3584, 3620, 3034, 3195, 4074, 3839, 2599, 2468, 2212, 3104, 2910, 2869, 3833, 2309, 2299, 3499, 3462, 2068, 3643, 2690, 3660, 4060, 3778, 2784, 2092, 3393, 2523, 3963, 2322, 3360, 2354, 2452, 3917, 2485, 3031, 3952, 2901, 3550, 2511, 2155, 3160, 4040, 3474, 2726, 3791, 2761, 2190, 2446, 4012, 3538, 2204, 3524, 3052, 3166, 2706, 2075, 2738, 4018, 2414, 2754, 3402, 2950, 3558, 3357, 3048, 4080, 3450, 2388, 3484, 2960, 3621, 4001, 3665, 2670, 2050, 3601, 2664, 2078, 2579, 2337, 3997, 3591, 3016, 3694, 3699, 3285, 2594, 3564, 2236, 3756, 3648, 2306, 2843, 3060, 3282, 2463, 2164, 3096, 2220, 2908, 3321, 3496, 2545, 2560, 3253, 3325, 3607, 3628, 3346, 3954, 2260, 2820, 3700, 2867, 2278, 4066, 2139, 2408, 3250, 2613, 2232, 3958, 2295, 2610, 3686, 3299, 2376, 3892, 4028, 3732, 3843, 2514, 3862, 2608, 2144, 2796, 2531, 3512, 3376, 3238, 2680, 2861, 3966, 2118, 3712, 2263, 3084, 3413, 2086, 3578, 3234, 3247, 2956, 4071, 3307, 2703, 3912, 3536, 2560, 2151, 3886, 2992, 3750, 3906, 2265, 3762, 2332, 3453, 2364, 2396, 3518, 2721, 3936, 3079, 3680, 2204, 4052, 2243, 3882, 3582, 3086, 2684, 3043, 2502, 2131, 3144, 3478, 3159, 2225, 3704, 3390, 2651, 3095, 2620, 2424, 2159, 2990, 2420, 2378, 2896, 4087, 3506, 2250, 2552, 3639, 2133, 3515, 3736, 2386, 2348, 2120, 3598, 2498, 2093, 2912, 3010, 3338, 2458, 2452, 2274, 2764, 2219, 3026, 2986, 2483, 3112, 2972, 3912, 3993, 4006, 3269, 3168, 3844, 3824, 2178, 3136, 3151, 2658, 3649, 3000, 3832, 2097, 2795, 4013, 2936, 3766, 2810, 2852, 3799, 3399, 2182, 3244, 3273, 2064, 3215, 2965, 3874, 2194, 3671, 3719, 2382, 2709, 3554, 2583, 2126, 2556, 3176, 3120, 3352, 2928, 3291, 3827, 3377, 2934, 2256, 2631, 3212, 2996, 3541, 3661, 2645, 2186, 2341, 3691, 3768, 3386, 3974, 2199, 2812, 2879, 3748, 2634, 3772, 2666, 2570, 3177, 2146, 2772, 4055, 3134, 2674, 3110, 3921, 3979, 3732, 2716, 2782, 3902, 2802, 2591, 3728, 2172, 3406, 2733, 3225, 2830, 2746, 2886, 3434, 3308, 3118, 3154, 4030, 2287, 3430, 3039, 3147, 2348, 2404, 3742, 2832, 3723, 3575, 2980, 3438, 2477, 3990, 2493, 3492, 4038, 3471, 2328, 3344, 2946, 2489, 3889, 2101, 3005, 3933, 3784, 3230, 3854, 2215, 3456, 2396, 3206, 3073, 3220, 2362, 2918, 2742, 3571, 3445, 3418, 3983, 3712, 3187, 2084, 3897, 4078, 3129, 3202, 2638, 2432, 2696, 3222, 2618, 2603, 2964, 3069, 2328, 3443, 2643, 2438, 3527, 2518, 2108, 2698, 2175, 2688, 3546, 2844, 3804, 2821, 3926, 3092, 2859, 3812, 2586, 4088, 2806, 3279, 3382, 3678, 2318, 2358, 3023, 3878, 3755, 3486, 2854, 2436, 2772, 3593, 2536, 3294, 4022, 2892, 2246, 2114, 3850, 3968, 4062, 3266, 3501, 3932, 2164, 2790, 2404, 2235, 2268, 3263, 3793, 2825, 3318, 3530, 2110, 3058, 3302, 2062, 3788, 3102, 3365, 2473, 3675, 3190, 3609, 2839, 2371, 3331, 3626, 2731, 3410, 3332, 2941, 2505, 3810, 2316, 2974, 2928, 2883, 3050, 3864, 3947, 2749, 2534, 3016, 3182, 3316, 3352, 2653, 3656, 3258, 3616, 2574, 2875, 2718, 2054, 3816, 2758, 2896, 2544, 3987, 2627, 2060, 2416, 3872, 3421, 3858, 3065, 4047, 3633, 3424, 2982, 2292, 3467, 2302, 3127, 2283, 4035, 3940, 4093, 2526, 2778, 2442, 2568, 3562]} -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/hubs.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Want to start a startup? Get funded by 4 | Y Combinator. 5 | 6 | 7 | 8 | 9 | October 2011If you look at a list of US cities sorted by population, the number 10 | of successful startups per capita varies by orders of magnitude. 11 | Somehow it's as if most places were sprayed with startupicide.I wondered about this for years. I could see the average town was 12 | like a roach motel for startup ambitions: smart, ambitious people 13 | went in, but no startups came out. But I was never able to figure 14 | out exactly what happened inside the motel—exactly what was 15 | killing all the potential startups. 16 | [1]A couple weeks ago I finally figured it out. I was framing the 17 | question wrong. The problem is not that most towns kill startups. 18 | It's that death is the default for startups, 19 | and most towns don't save them. Instead of thinking of most places 20 | as being sprayed with startupicide, it's more accurate to think of 21 | startups as all being poisoned, and a few places being sprayed with 22 | the antidote.Startups in other places are just doing what startups naturally do: 23 | fail. The real question is, what's saving startups in places 24 | like Silicon Valley? 25 | [2]EnvironmentI think there are two components to the antidote: being in a place 26 | where startups are the cool thing to do, and chance meetings with 27 | people who can help you. And what drives them both is the number 28 | of startup people around you.The first component is particularly helpful in the first stage of 29 | a startup's life, when you go from merely having an interest in 30 | starting a company to actually doing it. It's quite a leap to start 31 | a startup. It's an unusual thing to do. But in Silicon Valley it 32 | seems normal. 33 | [3]In most places, if you start a startup, people treat you as if 34 | you're unemployed. People in the Valley aren't automatically 35 | impressed with you just because you're starting a company, but they 36 | pay attention. Anyone who's been here any amount of time knows not 37 | to default to skepticism, no matter how inexperienced you seem or 38 | how unpromising your idea sounds at first, because they've all seen 39 | inexperienced founders with unpromising sounding ideas who a few 40 | years later were billionaires.Having people around you care about what you're doing is an 41 | extraordinarily powerful force. Even the 42 | most willful people are susceptible to it. About a year after we 43 | started Y Combinator I said something to a partner at a well known 44 | VC firm that gave him the (mistaken) impression I was considering 45 | starting another startup. He responded so eagerly that for about 46 | half a second I found myself considering doing it.In most other cities, the prospect of starting a startup just doesn't 47 | seem real. In the Valley it's not only real but fashionable. That 48 | no doubt causes a lot of people to start startups who shouldn't. 49 | But I think that's ok. Few people are suited to running a startup, 50 | and it's very hard to predict beforehand which are (as I know all 51 | too well from being in the business of trying to predict beforehand), 52 | so lots of people starting startups who shouldn't is probably the 53 | optimal state of affairs. As long as you're at a point in your 54 | life when you can bear the risk of failure, the best way to find 55 | out if you're suited to running a startup is to try 56 | it.ChanceThe second component of the antidote is chance meetings with people 57 | who can help you. This force works in both phases: both in the 58 | transition from the desire to start a startup to starting one, and 59 | the transition from starting a company to succeeding. The power 60 | of chance meetings is more variable than people around you caring 61 | about startups, which is like a sort of background radiation that 62 | affects everyone equally, but at its strongest it is far stronger.Chance meetings produce miracles to compensate for the disasters 63 | that characteristically befall startups. In the Valley, terrible 64 | things happen to startups all the time, just like they do to startups 65 | everywhere. The reason startups are more likely to make it here 66 | is that great things happen to them too. In the Valley, lightning 67 | has a sign bit.For example, you start a site for college students and you decide 68 | to move to the Valley for the summer to work on it. And then on a 69 | random suburban street in Palo Alto you happen to run into Sean 70 | Parker, who understands the domain really well because he started 71 | a similar startup himself, and also knows all the investors. And 72 | moreover has advanced views, for 2004, on founders retaining control of their companies.You can't say precisely what the miracle will be, or even for sure 73 | that one will happen. The best one can say is: if you're in a 74 | startup hub, unexpected good things will probably happen to you, 75 | especially if you deserve them.I bet this is true even for startups we fund. Even with us working 76 | to make things happen for them on purpose rather than by accident, 77 | the frequency of helpful chance meetings in the Valley is so high 78 | that it's still a significant increment on what we can deliver.Chance meetings play a role like the role relaxation plays in having 79 | ideas. Most people have had the experience of working hard on some 80 | problem, not being able to solve it, giving up and going to bed, 81 | and then thinking of the answer in the shower in the morning. What 82 | makes the answer appear is letting your thoughts drift a bit—and thus drift off the wrong 83 | path you'd been pursuing last night and onto the right one adjacent 84 | to it.Chance meetings let your acquaintance drift in the same way taking 85 | a shower lets your thoughts drift. The critical thing in both cases 86 | is that they drift just the right amount. The meeting between Larry 87 | Page and Sergey Brin was a good example. They let their acquaintance 88 | drift, but only a little; they were both meeting someone they had 89 | a lot in common with.For Larry Page the most important component of the antidote was 90 | Sergey Brin, and vice versa. The antidote is 91 | people. It's not the 92 | physical infrastructure of Silicon Valley that makes it work, or 93 | the weather, or anything like that. Those helped get it started, 94 | but now that the reaction is self-sustaining what drives it is the 95 | people.Many observers have noticed that one of the most distinctive things 96 | about startup hubs is the degree to which people help one another 97 | out, with no expectation of getting anything in return. I'm not 98 | sure why this is so. Perhaps it's because startups are less of a 99 | zero sum game than most types of business; they are rarely killed 100 | by competitors. Or perhaps it's because so many startup founders 101 | have backgrounds in the sciences, where collaboration is encouraged.A large part of YC's function is to accelerate that process. We're 102 | a sort of Valley within the Valley, where the density of people 103 | working on startups and their willingness to help one another are 104 | both artificially amplified.NumbersBoth components of the antidote—an environment that encourages 105 | startups, and chance meetings with people who help you—are 106 | driven by the same underlying cause: the number of startup people 107 | around you. To make a startup hub, you need a lot of people 108 | interested in startups.There are three reasons. The first, obviously, is that if you don't 109 | have enough density, the chance meetings don't happen. 110 | [4] 111 | The second is that different startups need such different things, so 112 | you need a lot of people to supply each startup with what they need 113 | most. Sean Parker was exactly what Facebook needed in 2004. Another 114 | startup might have needed a database guy, or someone with connections 115 | in the movie business.This is one of the reasons we fund such a large number of companies, 116 | incidentally. The bigger the community, the greater the chance it 117 | will contain the person who has that one thing you need most.The third reason you need a lot of people to make a startup hub is 118 | that once you have enough people interested in the same problem, 119 | they start to set the social norms. And it is a particularly 120 | valuable thing when the atmosphere around you encourages you to do 121 | something that would otherwise seem too ambitious. In most places 122 | the atmosphere pulls you back toward the mean.I flew into the Bay Area a few days ago. I notice this every time 123 | I fly over the Valley: somehow you can sense something is going on. 124 | Obviously you can sense prosperity in how well kept a 125 | place looks. But there are different kinds of prosperity. Silicon 126 | Valley doesn't look like Boston, or New York, or LA, or DC. I tried 127 | asking myself what word I'd use to describe the feeling the Valley 128 | radiated, and the word that came to mind was optimism.Notes[1] 129 | I'm not saying it's impossible to succeed in a city with few 130 | other startups, just harder. If you're sufficiently good at 131 | generating your own morale, you can survive without external 132 | encouragement. Wufoo was based in Tampa and they succeeded. But 133 | the Wufoos are exceptionally disciplined.[2] 134 | Incidentally, this phenomenon is not limited to startups. Most 135 | unusual ambitions fail, unless the person who has them manages to 136 | find the right sort of community.[3] 137 | Starting a company is common, but starting a startup is rare. 138 | I've talked about the distinction between the two elsewhere, but 139 | essentially a startup is a new business designed for scale. Most 140 | new businesses are service businesses and except in rare cases those 141 | don't scale.[4] 142 | As I was writing this, I had a demonstration of the density of 143 | startup people in the Valley. Jessica and I bicycled to University 144 | Ave in Palo Alto to have lunch at the fabulous Oren's Hummus. As 145 | we walked in, we met Charlie Cheever sitting near the door. Selina 146 | Tobaccowala stopped to say hello on her way out. Then Josh Wilson 147 | came in to pick up a take out order. After lunch we went to get 148 | frozen yogurt. On the way we met Rajat Suri. When we got to the 149 | yogurt place, we found Dave Shen there, and as we walked out we ran 150 | into Yuri Sagalov. We walked with him for a block or so and we ran 151 | into Muzzammil Zaveri, and then a block later we met Aydin Senkut. 152 | This is everyday life in Palo Alto. I wasn't trying to meet people; 153 | I was just having lunch. And I'm sure for every startup founder 154 | or investor I saw that I knew, there were 5 more I didn't. If Ron 155 | Conway had been with us he would have met 30 people he knew.Thanks to Sam Altman, Paul Buchheit, Jessica Livingston, and 156 | Harj Taggar for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/gba.txt: -------------------------------------------------------------------------------- 1 | April 2004To the popular press, "hacker" means someone who breaks 2 | into computers. Among programmers it means a good programmer. 3 | But the two meanings are connected. To programmers, 4 | "hacker" connotes mastery in the most literal sense: someone 5 | who can make a computer do what he wants—whether the computer 6 | wants to or not.To add to the confusion, the noun "hack" also has two senses. It can 7 | be either a compliment or an insult. It's called a hack when 8 | you do something in an ugly way. But when you do something 9 | so clever that you somehow beat the system, that's also 10 | called a hack. The word is used more often in the former than 11 | the latter sense, probably because ugly solutions are more 12 | common than brilliant ones.Believe it or not, the two senses of "hack" are also 13 | connected. Ugly and imaginative solutions have something in 14 | common: they both break the rules. And there is a gradual 15 | continuum between rule breaking that's merely ugly (using 16 | duct tape to attach something to your bike) and rule breaking 17 | that is brilliantly imaginative (discarding Euclidean space).Hacking predates computers. When he 18 | was working on the Manhattan Project, Richard Feynman used to 19 | amuse himself by breaking into safes containing secret documents. 20 | This tradition continues today. 21 | When we were in grad school, a hacker friend of mine who spent too much 22 | time around MIT had 23 | his own lock picking kit. 24 | (He now runs a hedge fund, a not unrelated enterprise.)It is sometimes hard to explain to authorities why one would 25 | want to do such things. 26 | Another friend of mine once got in trouble with the government for 27 | breaking into computers. This had only recently been declared 28 | a crime, and the FBI found that their usual investigative 29 | technique didn't work. Police investigation apparently begins with 30 | a motive. The usual motives are few: drugs, money, sex, 31 | revenge. Intellectual curiosity was not one of the motives on 32 | the FBI's list. Indeed, the whole concept seemed foreign to 33 | them.Those in authority tend to be annoyed by hackers' 34 | general attitude of disobedience. But that disobedience is 35 | a byproduct of the qualities that make them good programmers. 36 | They may laugh at the CEO when he talks in generic corporate 37 | newspeech, but they also laugh at someone who tells them 38 | a certain problem can't be solved. 39 | Suppress one, and you suppress the other.This attitude is sometimes affected. Sometimes young programmers 40 | notice the eccentricities of eminent hackers and decide to 41 | adopt some of their own in order to seem smarter. 42 | The fake version is not merely 43 | annoying; the prickly attitude of these posers 44 | can actually slow the process of innovation.But even factoring in their annoying eccentricities, 45 | the disobedient attitude of hackers is a net win. I wish its 46 | advantages were better understood.For example, I suspect people in Hollywood are 47 | simply mystified by 48 | hackers' attitudes toward copyrights. They are a perennial 49 | topic of heated discussion on Slashdot. 50 | But why should people who program computers 51 | be so concerned about copyrights, of all things?Partly because some companies use mechanisms to prevent 52 | copying. Show any hacker a lock and his first thought is 53 | how to pick it. But there is a deeper reason that 54 | hackers are alarmed by measures like copyrights and patents. 55 | They see increasingly aggressive measures to protect 56 | "intellectual property" 57 | as a threat to the intellectual 58 | freedom they need to do their job. 59 | And they are right.It is by poking about inside current technology that 60 | hackers get ideas for the next generation. No thanks, 61 | intellectual homeowners may say, we don't need any 62 | outside help. But they're wrong. 63 | The next generation of computer technology has 64 | often—perhaps more often than not—been developed by outsiders.In 1977 there was no doubt some group within IBM developing 65 | what they expected to be 66 | the next generation of business computer. They were mistaken. 67 | The next generation of business computer was 68 | being developed on entirely different lines by two long-haired 69 | guys called Steve in a garage in Los Altos. At about the 70 | same time, the powers that be 71 | were cooperating to develop the 72 | official next generation operating system, Multics. 73 | But two guys who thought Multics excessively complex went off 74 | and wrote their own. They gave it a name that 75 | was a joking reference to Multics: Unix.The latest intellectual property laws impose 76 | unprecedented restrictions on the sort of poking around that 77 | leads to new ideas. In the past, a competitor might use patents 78 | to prevent you from selling a copy of something they 79 | made, but they couldn't prevent you from 80 | taking one apart to see how it worked. The latest 81 | laws make this a crime. How are we 82 | to develop new technology if we can't study current 83 | technology to figure out how to improve it?Ironically, hackers have brought this on themselves. 84 | Computers are responsible for the problem. The control systems 85 | inside machines used to be physical: gears and levers and cams. 86 | Increasingly, the brains (and thus the value) of products is 87 | in software. And by this I mean software in the general sense: 88 | i.e. data. A song on an LP is physically stamped into the 89 | plastic. A song on an iPod's disk is merely stored on it.Data is by definition easy to copy. And the Internet 90 | makes copies easy to distribute. So it is no wonder 91 | companies are afraid. But, as so often happens, fear has 92 | clouded their judgement. The government has responded 93 | with draconian laws to protect intellectual property. 94 | They probably mean well. But 95 | they may not realize that such laws will do more harm 96 | than good.Why are programmers so violently opposed to these laws? 97 | If I were a legislator, I'd be interested in this 98 | mystery—for the same reason that, if I were a farmer and suddenly 99 | heard a lot of squawking coming from my hen house one night, 100 | I'd want to go out and investigate. Hackers are not stupid, 101 | and unanimity is very rare in this world. 102 | So if they're all squawking, 103 | perhaps there is something amiss.Could it be that such laws, though intended to protect America, 104 | will actually harm it? Think about it. There is something 105 | very American about Feynman breaking into safes during 106 | the Manhattan Project. It's hard to imagine the authorities 107 | having a sense of humor about such things over 108 | in Germany at that time. Maybe it's not a coincidence.Hackers are unruly. That is the essence of hacking. And it 109 | is also the essence of Americanness. It is no accident 110 | that Silicon Valley 111 | is in America, and not France, or Germany, 112 | or England, or Japan. In those countries, people color inside 113 | the lines.I lived for a while in Florence. But after I'd been there 114 | a few months I realized that what I'd been unconsciously hoping 115 | to find there was back in the place I'd just left. 116 | The reason Florence is famous is that in 1450, it was New York. 117 | In 1450 it was filled with the kind of turbulent and ambitious 118 | people you find now in America. (So I went back to America.)It is greatly to America's advantage that it is 119 | a congenial atmosphere for the right sort of unruliness—that 120 | it is a home not just for the smart, but for smart-alecks. 121 | And hackers are invariably smart-alecks. If we had a national 122 | holiday, it would be April 1st. It says a great deal about 123 | our work that we use the same word for a brilliant or a 124 | horribly cheesy solution. When we cook one up we're not 125 | always 100% sure which kind it is. But as long as it has 126 | the right sort of wrongness, that's a promising sign. 127 | It's odd that people 128 | think of programming as precise and methodical. Computers 129 | are precise and methodical. Hacking is something you do 130 | with a gleeful laugh.In our world some of the most characteristic solutions 131 | are not far removed from practical 132 | jokes. IBM was no doubt rather surprised by the consequences 133 | of the licensing deal for DOS, just as the hypothetical 134 | "adversary" must be when Michael Rabin solves a problem by 135 | redefining it as one that's easier to solve.Smart-alecks have to develop a keen sense of how much they 136 | can get away with. And lately hackers 137 | have sensed a change 138 | in the atmosphere. 139 | Lately hackerliness seems rather frowned upon.To hackers the recent contraction in civil liberties seems 140 | especially ominous. That must also mystify outsiders. 141 | Why should we care especially about civil 142 | liberties? Why programmers, more than 143 | dentists or salesmen or landscapers?Let me put the case in terms a government official would appreciate. 144 | Civil liberties are not just an ornament, or a quaint 145 | American tradition. Civil liberties make countries rich. 146 | If you made a graph of 147 | GNP per capita vs. civil liberties, you'd notice a definite 148 | trend. Could civil liberties really be a cause, rather 149 | than just an effect? I think so. I think a society in which 150 | people can do and say what they want will also tend to 151 | be one in which the most efficient solutions win, rather than 152 | those sponsored by the most influential people. 153 | Authoritarian countries become corrupt; 154 | corrupt countries become poor; and poor countries are weak. 155 | It seems to me there is 156 | a Laffer curve for government power, just as for 157 | tax revenues. At least, it seems likely enough that it 158 | would be stupid to try the experiment and find out. Unlike 159 | high tax rates, you can't repeal totalitarianism if it 160 | turns out to be a mistake.This is why hackers worry. The government spying on people doesn't 161 | literally make programmers write worse code. It just leads 162 | eventually to a world in which bad ideas win. And because 163 | this is so important to hackers, they're especially sensitive 164 | to it. They can sense totalitarianism approaching from a 165 | distance, as animals can sense an approaching 166 | thunderstorm.It would be ironic if, as hackers fear, recent measures 167 | intended to protect national security and intellectual property 168 | turned out to be a missile aimed right at what makes 169 | America successful. But it would not be the first time that 170 | measures taken in an atmosphere of panic had 171 | the opposite of the intended effect.There is such a thing as Americanness. 172 | There's nothing like living abroad to teach you that. 173 | And if you want to know whether something will nurture or squash 174 | this quality, it would be hard to find a better focus 175 | group than hackers, because they come closest of any group 176 | I know to embodying it. Closer, probably, than 177 | the men running our government, 178 | who for all their talk of patriotism 179 | remind me more of Richelieu or Mazarin 180 | than Thomas Jefferson or George Washington.When you read what the founding fathers had to say for 181 | themselves, they sound more like hackers. 182 | "The spirit of resistance to government," 183 | Jefferson wrote, "is so valuable on certain occasions, that I wish 184 | it always to be kept alive."Imagine an American president saying that today. 185 | Like the remarks of an outspoken old grandmother, the sayings of 186 | the founding fathers have embarrassed generations of 187 | their less confident successors. They remind us where we come from. 188 | They remind us that it is the people who break rules that are 189 | the source of America's wealth and power.Those in a position to impose rules naturally want them to be 190 | obeyed. But be careful what you ask for. You might get it.Thanks to Ken Anderson, Trevor Blackwell, Daniel Giffin, 191 | Sarah Harlin, Shiro Kawai, Jessica Livingston, Matz, 192 | Jackie McDonough, Robert Morris, Eric Raymond, Guido van Rossum, 193 | David Weinberger, and 194 | Steven Wolfram for reading drafts of this essay. 195 | (The image shows Steves Jobs and Wozniak 196 | with a "blue box." 197 | Photo by Margret Wozniak. Reproduced by permission of Steve 198 | Wozniak.) -------------------------------------------------------------------------------- /viz.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "090f0080", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import uuid\n", 11 | "import pandas as pd\n", 12 | "import json\n", 13 | "import os\n", 14 | "import glob\n", 15 | "import jsonlines\n", 16 | "import requests\n", 17 | "from tqdm import trange\n", 18 | "import random\n", 19 | "import json_repair\n", 20 | "import seaborn as sns\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "from matplotlib.colors import LinearSegmentedColormap\n", 23 | "import pandas as pd\n", 24 | "from collections import Counter" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "id": "b9cdf0ef", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "def get_reasoning_score(index, pre):\n", 35 | " file = open(\"context_data/a_stars.txt\", \"r\")\n", 36 | " a_stars = eval(file.readline())[\"32\"]\n", 37 | " file = open(\"context_data/r_stars.txt\", \"r\")\n", 38 | " r_stars = eval(file.readline())[\"32\"]\n", 39 | " if a_stars[index] in pre and r_stars[index] in pre:\n", 40 | " return 0.5\n", 41 | " elif a_stars[index] in pre and r_stars[index] not in pre:\n", 42 | " return 1\n", 43 | " elif a_stars[index] not in pre and r_stars[index] in pre:\n", 44 | " return 0.25\n", 45 | " else:\n", 46 | " return 0\n", 47 | "# get formate context size\n", 48 | "def get_context_size(max_context_length, n):\n", 49 | " intervel = int(max_context_length / n)\n", 50 | " return [i for i in range(intervel, max_context_length + 1, intervel)]\n", 51 | "\n", 52 | "# reduce duplicate from the predicted results\n", 53 | "def reduce_duplicate(predicted, m):\n", 54 | " if len(predicted) > m:\n", 55 | " predicted = predicted[:m]\n", 56 | " predicted = list(set(predicted))\n", 57 | " else:\n", 58 | " predicted = list(set(predicted))\n", 59 | " return predicted\n", 60 | "\n", 61 | "# get results from English version of the Counting-Stars\n", 62 | "def get_data_EN(folder_path, max_context_length, m, n, test_type):\n", 63 | " context_size = get_context_size(max_context_length, n)\n", 64 | " data = []\n", 65 | " average_score = 0\n", 66 | " indicator = 0\n", 67 | " if test_type == \"Acquisition\":\n", 68 | " scalar = 0.82\n", 69 | " elif test_type == \"Reasoning\":\n", 70 | " scalar = 0.815\n", 71 | " for item in jsonlines.Reader(folder_path):\n", 72 | " if \"```\" in item['answer']:\n", 73 | " predicted = json_repair.loads(item['answer'].replace('```','').replace(\"json\",'').strip())['little_penguin']\n", 74 | " else:\n", 75 | " try:\n", 76 | " predicted = json.loads(item['answer'])['little_penguin']\n", 77 | " except:\n", 78 | " predicted = item['answer']['little_penguin']\n", 79 | " predicted = reduce_duplicate(predicted, m)\n", 80 | " for i in range(1, m+1): \n", 81 | " counting_times = i\n", 82 | " if test_type == \"Acquisition\":\n", 83 | " try:\n", 84 | " if item[\"reference_counting_results\"][i-1] in predicted:\n", 85 | " score = 1\n", 86 | " else:\n", 87 | " score = 0\n", 88 | " except:\n", 89 | " score = 0\n", 90 | " else:\n", 91 | " score = get_reasoning_score(counting_times-1, predicted)\n", 92 | " average_score += score\n", 93 | " data.append({\n", 94 | " \"Counting Times\": counting_times,\n", 95 | " \"Context Size\": int(item['context_size'] / scalar),\n", 96 | " \"Score\": score\n", 97 | " })\n", 98 | " df = pd.DataFrame(data)\n", 99 | " print (df.head())\n", 100 | " print (f\"You have {len(df)} rows\")\n", 101 | " pivot_table = pd.pivot_table(df, values='Score', index=['Counting Times', 'Context Size'], aggfunc='mean').reset_index()\n", 102 | " pivot_table = pivot_table.pivot(index=\"Counting Times\", columns=\"Context Size\", values=\"Score\")\n", 103 | " return pivot_table, pivot_table.mean(axis=None).round(3)\n", 104 | "\n", 105 | "# get results from Chinese version of the Counting-Stars\n", 106 | "def get_data_ZH(folder_path, max_context_length, m, n, test_type):\n", 107 | " context_size = get_context_size(max_context_length, n)\n", 108 | " data = []\n", 109 | " average_score = 0\n", 110 | " indicator = 0\n", 111 | " if test_type == \"Acquisition\":\n", 112 | " scalar = 0.725\n", 113 | " elif test_type == \"Reasoning\":\n", 114 | " scalar = 0.72\n", 115 | " for item in jsonlines.Reader(folder_path):\n", 116 | " if \"```\" in item['answer']:\n", 117 | " predicted = json_repair.loads(item['answer'].replace('```','').replace(\"json\",'').strip())['小企鹅']\n", 118 | " else:\n", 119 | " try:\n", 120 | " predicted = json_repair.loads(item['answer'])['小企鹅'] \n", 121 | " except:\n", 122 | " predicted = item['answer']['小企鹅']\n", 123 | " predicted = reduce_duplicate(predicted, m)\n", 124 | " for i in range(1, m+1): \n", 125 | " counting_times = i\n", 126 | " if test_type == \"Acquisition\":\n", 127 | " try:\n", 128 | " if item[\"reference_counting_results\"][i-1] in predicted:\n", 129 | " score = 1\n", 130 | " else:\n", 131 | " score = 0\n", 132 | " except:\n", 133 | " score = 0\n", 134 | " else:\n", 135 | " score = get_reasoning_score(counting_times-1, predicted)\n", 136 | " average_score += score\n", 137 | " data.append({\n", 138 | " \"Counting Times\": counting_times,\n", 139 | " \"Context Size\": int(item['context_size'] / scalar),\n", 140 | " \"Score\": score\n", 141 | " })\n", 142 | " df = pd.DataFrame(data)\n", 143 | " print (df.head())\n", 144 | " print (f\"You have {len(df)} rows\")\n", 145 | " pivot_table = pd.pivot_table(df, values='Score', index=['Counting Times', 'Context Size'], aggfunc='mean').reset_index()\n", 146 | " pivot_table = pivot_table.pivot(index=\"Counting Times\", columns=\"Context Size\", values=\"Score\")\n", 147 | " return pivot_table, pivot_table.mean(axis=None).round(3)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "id": "0506824c", 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "import numpy\n", 158 | "m = 32\n", 159 | "n = 32\n", 160 | "max_context_length = 128000\n", 161 | "\n", 162 | "testing_type = \"Acquisition\"\n", 163 | "#testing_type = \"Reasoning\"\n", 164 | "\n", 165 | "folder_path_test = open(\"xxx\",\"r\")\n", 166 | "viz_data_gpt, mean_gpt = get_data_ZH(folder_path_test, max_context_length, m, n, testing_type)\n", 167 | " \n", 168 | "folder_path_test = open(\"xxx\",\"r\")\n", 169 | "viz_data_gemini, mean_gemini = get_data_ZH(folder_path_test, max_context_length, m, n, testing_type)\n", 170 | " \n", 171 | "folder_path_test = open(\"xxx\",\"r\")\n", 172 | "viz_data_claude, mean_claude = get_data_ZH(folder_path_test, max_context_length, m, n, testing_type)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "id": "44e53cf2", 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "# Create a custom colormap. Go to https://coolors.co/ and pick cool colors\n", 183 | "cmap = LinearSegmentedColormap.from_list(\"custom_cmap\", [\"#184E77\", \"#1E6091\", \"#1A759F\", \"#168AAD\", \"#34A0A4\", \"#52B69A\", \"#76C893\", \"#99D98C\", \"#B5E48C\", \"#D9ED92\"])\n", 184 | "\n", 185 | "fig = plt.figure(figsize=(17, 14))\n", 186 | "ax1 = fig.add_subplot(3, 1, 1)\n", 187 | "# Create the heatmap with better aesthetics\n", 188 | "sns.heatmap(\n", 189 | " viz_data_gpt,\n", 190 | " #annot=True,\n", 191 | " fmt=\"g\",\n", 192 | " cmap=cmap,\n", 193 | " linewidths=0.3,\n", 194 | " cbar_kws={'label': 'Score', \"pad\": 0.02}\n", 195 | ")\n", 196 | "\n", 197 | "labels = [i for i in range(2, m+1, 2)]\n", 198 | "x = [i-0.5 for i in range(2, m+1, 2)]\n", 199 | "\n", 200 | "# More aesthetics\n", 201 | "plt.title(f'Counting-Stars-({m})-(Multi-evidence {testing_type}): GPT-4 Turbo (Acc: {mean_gpt})', size=11) # Adds a title\n", 202 | "plt.xlabel('Context Length', size=11) # X-axis label\n", 203 | "plt.ylabel('Counting Times', size=11) # Y-axis label\n", 204 | "plt.xticks(rotation=45) # Rotates the x-axis labels to prevent overlap\n", 205 | "plt.yticks(x, labels, rotation=45) # Ensures the y-axis labels are horizontal\n", 206 | "\n", 207 | "\n", 208 | "\n", 209 | "ax2 = fig.add_subplot(3, 1, 2)\n", 210 | "# Create the heatmap with better aesthetics\n", 211 | "sns.heatmap(\n", 212 | " viz_data_claude,\n", 213 | " #annot=True,\n", 214 | " fmt=\"g\",\n", 215 | " cmap=cmap,\n", 216 | " linewidths=0.3,\n", 217 | " cbar_kws={'label': 'Score', \"pad\": 0.02}\n", 218 | ")\n", 219 | "\n", 220 | "labels = [i for i in range(2, m+1, 2)]\n", 221 | "x = [i-0.5 for i in range(2, m+1, 2)]\n", 222 | "\n", 223 | "# More aesthetics\n", 224 | "plt.title(f'Counting-Stars-({m})-(Multi-evidence {testing_type}): Claude3 Opus (Acc: {mean_claude})', size=11) # Adds a title\n", 225 | "plt.xlabel('Context Length', size=11) # X-axis label\n", 226 | "plt.ylabel('Counting Times', size=11) # Y-axis label\n", 227 | "plt.xticks(rotation=45) # Rotates the x-axis labels to prevent overlap\n", 228 | "plt.yticks(x, labels, rotation=45) # Ensures the y-axis labels are horizontal\n", 229 | "\n", 230 | "\n", 231 | "ax3 = fig.add_subplot(3, 1, 3)\n", 232 | "\n", 233 | "# Create the heatmap with better aesthetics\n", 234 | "sns.heatmap(\n", 235 | " viz_data_gemini,\n", 236 | " #annot=True,\n", 237 | " fmt=\"g\",\n", 238 | " cmap=cmap,\n", 239 | " linewidths=0.3,\n", 240 | " cbar_kws={'label': 'Score', \"pad\": 0.02}\n", 241 | ")\n", 242 | "\n", 243 | "labels = [i for i in range(2, m+1, 2)]\n", 244 | "x = [i-0.5 for i in range(2, m+1, 2)]\n", 245 | "\n", 246 | "# More aesthetics\n", 247 | "plt.title(f'Counting-Stars-({m})-(Multi-evidence {testing_type}): Gemini Pro 1.5 (Acc: {mean_gemini})', size=11) # Adds a title\n", 248 | "plt.xlabel('Context Length', size=11) # X-axis label\n", 249 | "plt.ylabel('Counting Times', size=11) # Y-axis label\n", 250 | "plt.xticks(rotation=45) # Rotates the x-axis labels to prevent overlap\n", 251 | "plt.yticks(x, labels, rotation=45) # Ensures the y-axis labels are horizontal\n", 252 | "\n", 253 | "fig.subplots_adjust(hspace=0.4)\n", 254 | "plt.savefig(f\"results.pdf\", dpi=2380, bbox_inches='tight')\n", 255 | "plt.show()" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "id": "d2bec855", 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "id": "7f0a7a38", 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [] 273 | } 274 | ], 275 | "metadata": { 276 | "kernelspec": { 277 | "display_name": "Python 3 (ipykernel)", 278 | "language": "python", 279 | "name": "python3" 280 | }, 281 | "language_info": { 282 | "codemirror_mode": { 283 | "name": "ipython", 284 | "version": 3 285 | }, 286 | "file_extension": ".py", 287 | "mimetype": "text/x-python", 288 | "name": "python", 289 | "nbconvert_exporter": "python", 290 | "pygments_lexer": "ipython3", 291 | "version": "3.11.5" 292 | } 293 | }, 294 | "nbformat": 4, 295 | "nbformat_minor": 5 296 | } 297 | -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/apple.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Want to start a startup? Get funded by 4 | Y Combinator. 5 | 6 | 7 | 8 | 9 | November 2009I don't think Apple realizes how badly the App Store approval process 10 | is broken. Or rather, I don't think they realize how much it matters 11 | that it's broken.The way Apple runs the App Store has harmed their reputation with 12 | programmers more than anything else they've ever done. 13 | Their reputation with programmers used to be great. 14 | It used to be the most common complaint you heard 15 | about Apple was that their fans admired them too uncritically. 16 | The App Store has changed that. Now a lot of programmers 17 | have started to see Apple as evil.How much of the goodwill Apple once had with programmers have they 18 | lost over the App Store? A third? Half? And that's just so far. 19 | The App Store is an ongoing karma leak.* * *How did Apple get into this mess? Their fundamental problem is 20 | that they don't understand software.They treat iPhone apps the way they treat the music they sell through 21 | iTunes. Apple is the channel; they own the user; if you want to 22 | reach users, you do it on their terms. The record labels agreed, 23 | reluctantly. But this model doesn't work for software. It doesn't 24 | work for an intermediary to own the user. The software business 25 | learned that in the early 1980s, when companies like VisiCorp showed 26 | that although the words "software" and "publisher" fit together, 27 | the underlying concepts don't. Software isn't like music or books. 28 | It's too complicated for a third party to act as an intermediary 29 | between developer and user. And yet that's what Apple is trying 30 | to be with the App Store: a software publisher. And a particularly 31 | overreaching one at that, with fussy tastes and a rigidly enforced 32 | house style.If software publishing didn't work in 1980, it works even less now 33 | that software development has evolved from a small number of big 34 | releases to a constant stream of small ones. But Apple doesn't 35 | understand that either. Their model of product development derives 36 | from hardware. They work on something till they think it's finished, 37 | then they release it. You have to do that with hardware, but because 38 | software is so easy to change, its design can benefit from evolution. 39 | The standard way to develop applications now is to launch fast and 40 | iterate. Which means it's a disaster to have long, random delays 41 | each time you release a new version.Apparently Apple's attitude is that developers should be more careful 42 | when they submit a new version to the App Store. They would say 43 | that. But powerful as they are, they're not powerful enough to 44 | turn back the evolution of technology. Programmers don't use 45 | launch-fast-and-iterate out of laziness. They use it because it 46 | yields the best results. By obstructing that process, Apple is 47 | making them do bad work, and programmers hate that as much as Apple 48 | would.How would Apple like it if when they discovered a serious bug in 49 | OS X, instead of releasing a software update immediately, they had 50 | to submit their code to an intermediary who sat on it for a month 51 | and then rejected it because it contained an icon they didn't like?By breaking software development, Apple gets the opposite of what 52 | they intended: the version of an app currently available in the App 53 | Store tends to be an old and buggy one. One developer told me: 54 | 55 | As a result of their process, the App Store is full of half-baked 56 | applications. I make a new version almost every day that I release 57 | to beta users. The version on the App Store feels old and crappy. 58 | I'm sure that a lot of developers feel this way: One emotion is 59 | "I'm not really proud about what's in the App Store", and it's 60 | combined with the emotion "Really, it's Apple's fault." 61 | 62 | Another wrote: 63 | 64 | I believe that they think their approval process helps users by 65 | ensuring quality. In reality, bugs like ours get through all the 66 | time and then it can take 4-8 weeks to get that bug fix approved, 67 | leaving users to think that iPhone apps sometimes just don't work. 68 | Worse for Apple, these apps work just fine on other platforms 69 | that have immediate approval processes. 70 | 71 | Actually I suppose Apple has a third misconception: that all the 72 | complaints about App Store approvals are not a serious problem. 73 | They must hear developers complaining. But partners and suppliers 74 | are always complaining. It would be a bad sign if they weren't; 75 | it would mean you were being too easy on them. Meanwhile the iPhone 76 | is selling better than ever. So why do they need to fix anything?They get away with maltreating developers, in the short term, because 77 | they make such great hardware. I just bought a new 27" iMac a 78 | couple days ago. It's fabulous. The screen's too shiny, and the 79 | disk is surprisingly loud, but it's so beautiful that you can't 80 | make yourself care.So I bought it, but I bought it, for the first time, with misgivings. 81 | I felt the way I'd feel buying something made in a country with a 82 | bad human rights record. That was new. In the past when I bought 83 | things from Apple it was an unalloyed pleasure. Oh boy! They make 84 | such great stuff. This time it felt like a Faustian bargain. They 85 | make such great stuff, but they're such assholes. Do I really want 86 | to support this company?* * *Should Apple care what people like me think? What difference does 87 | it make if they alienate a small minority of their users?There are a couple reasons they should care. One is that these 88 | users are the people they want as employees. If your company seems 89 | evil, the best programmers won't work for you. That hurt Microsoft 90 | a lot starting in the 90s. Programmers started to feel sheepish 91 | about working there. It seemed like selling out. When people from 92 | Microsoft were talking to other programmers and they mentioned where 93 | they worked, there were a lot of self-deprecating jokes about having 94 | gone over to the dark side. But the real problem for Microsoft 95 | wasn't the embarrassment of the people they hired. It was the 96 | people they never got. And you know who got them? Google and 97 | Apple. If Microsoft was the Empire, they were the Rebel Alliance. 98 | And it's largely because they got more of the best people that 99 | Google and Apple are doing so much better than Microsoft today.Why are programmers so fussy about their employers' morals? Partly 100 | because they can afford to be. The best programmers can work 101 | wherever they want. They don't have to work for a company they 102 | have qualms about.But the other reason programmers are fussy, I think, is that evil 103 | begets stupidity. An organization that wins by exercising power 104 | starts to lose the ability to win by doing better work. And it's 105 | not fun for a smart person to work in a place where the best ideas 106 | aren't the ones that win. I think the reason Google embraced "Don't 107 | be evil" so eagerly was not so much to impress the outside world 108 | as to inoculate themselves against arrogance. 109 | [1]That has worked for Google so far. They've become more 110 | bureaucratic, but otherwise they seem to have held true to their 111 | original principles. With Apple that seems less the case. When you 112 | look at the famous 113 | 1984 ad 114 | now, it's easier to imagine Apple as the 115 | dictator on the screen than the woman with the hammer. 116 | [2] 117 | In fact, if you read the dictator's speech it sounds uncannily like a 118 | prophecy of the App Store. 119 | 120 | We have triumphed over the unprincipled dissemination of facts.We have created, for the first time in all history, a garden of 121 | pure ideology, where each worker may bloom secure from the pests 122 | of contradictory and confusing truths. 123 | 124 | The other reason Apple should care what programmers think of them 125 | is that when you sell a platform, developers make or break you. If 126 | anyone should know this, Apple should. VisiCalc made the Apple II.And programmers build applications for the platforms they use. Most 127 | applications—most startups, probably—grow out of personal projects. 128 | Apple itself did. Apple made microcomputers because that's what 129 | Steve Wozniak wanted for himself. He couldn't have afforded a 130 | minicomputer. 131 | [3] 132 | Microsoft likewise started out making interpreters 133 | for little microcomputers because 134 | Bill Gates and Paul Allen were interested in using them. It's a 135 | rare startup that doesn't build something the founders use.The main reason there are so many iPhone apps is that so many programmers 136 | have iPhones. They may know, because they read it in an article, 137 | that Blackberry has such and such market share. But in practice 138 | it's as if RIM didn't exist. If they're going to build something, 139 | they want to be able to use it themselves, and that means building 140 | an iPhone app.So programmers continue to develop iPhone apps, even though Apple 141 | continues to maltreat them. They're like someone stuck in an abusive 142 | relationship. They're so attracted to the iPhone that they can't 143 | leave. But they're looking for a way out. One wrote: 144 | 145 | While I did enjoy developing for the iPhone, the control they 146 | place on the App Store does not give me the drive to develop 147 | applications as I would like. In fact I don't intend to make any 148 | more iPhone applications unless absolutely necessary. 149 | [4] 150 | 151 | Can anything break this cycle? No device I've seen so far could. 152 | Palm and RIM haven't a hope. The only credible contender is Android. 153 | But Android is an orphan; Google doesn't really care about it, not 154 | the way Apple cares about the iPhone. Apple cares about the iPhone 155 | the way Google cares about search.* * *Is the future of handheld devices one locked down by Apple? It's 156 | a worrying prospect. It would be a bummer to have another grim 157 | monoculture like we had in the 1990s. In 1995, writing software 158 | for end users was effectively identical with writing Windows 159 | applications. Our horror at that prospect was the single biggest 160 | thing that drove us to start building web apps.At least we know now what it would take to break Apple's lock. 161 | You'd have to get iPhones out of programmers' hands. If programmers 162 | used some other device for mobile web access, they'd start to develop 163 | apps for that instead.How could you make a device programmers liked better than the iPhone? 164 | It's unlikely you could make something better designed. Apple 165 | leaves no room there. So this alternative device probably couldn't 166 | win on general appeal. It would have to win by virtue of some 167 | appeal it had to programmers specifically.One way to appeal to programmers is with software. If you 168 | could think of an application programmers had to have, but that 169 | would be impossible in the circumscribed world of the iPhone, 170 | you could presumably get them to switch.That would definitely happen if programmers started to use handhelds 171 | as development machines—if handhelds displaced laptops the 172 | way laptops displaced desktops. You need more control of a development 173 | machine than Apple will let you have over an iPhone.Could anyone make a device that you'd carry around in your pocket 174 | like a phone, and yet would also work as a development machine? 175 | It's hard to imagine what it would look like. But I've learned 176 | never to say never about technology. A phone-sized device that 177 | would work as a development machine is no more miraculous by present 178 | standards than the iPhone itself would have seemed by the standards 179 | of 1995.My current development machine is a MacBook Air, which I use with 180 | an external monitor and keyboard in my office, and by itself when 181 | traveling. If there was a version half the size I'd prefer it. 182 | That still wouldn't be small enough to carry around everywhere like 183 | a phone, but we're within a factor of 4 or so. Surely that gap is 184 | bridgeable. In fact, let's make it an 185 | RFS. Wanted: 186 | Woman with hammer.Notes[1] 187 | When Google adopted "Don't be evil," they were still so small 188 | that no one would have expected them to be, yet. 189 | [2] 190 | The dictator in the 1984 ad isn't Microsoft, incidentally; 191 | it's IBM. IBM seemed a lot more frightening in those days, but 192 | they were friendlier to developers than Apple is now.[3] 193 | He couldn't even afford a monitor. That's why the Apple 194 | I used a TV as a monitor.[4] 195 | Several people I talked to mentioned how much they liked the 196 | iPhone SDK. The problem is not Apple's products but their policies. 197 | Fortunately policies are software; Apple can change them instantly 198 | if they want to. Handy that, isn't it?Thanks to Sam Altman, Trevor Blackwell, Ross Boucher, 199 | James Bracy, Gabor Cselle, 200 | Patrick Collison, Jason Freedman, John Gruber, Joe Hewitt, Jessica Livingston, 201 | Robert Morris, Teng Siong Ong, Nikhil Pandit, Savraj Singh, and Jared Tame for reading drafts of this. -------------------------------------------------------------------------------- /context_data/PaulGrahamEssays/submarine.txt: -------------------------------------------------------------------------------- 1 | April 2005"Suits make a corporate comeback," says the New 2 | York Times. Why does this sound familiar? Maybe because 3 | the suit was also back in February, 4 | 5 | September 6 | 2004, June 7 | 2004, March 8 | 2004, September 9 | 2003, 10 | 11 | November 12 | 2002, 13 | April 2002, 14 | and February 15 | 2002. 16 | 17 | Why do the media keep running stories saying suits are back? Because 18 | PR firms tell 19 | them to. One of the most surprising things I discovered 20 | during my brief business career was the existence of the PR industry, 21 | lurking like a huge, quiet submarine beneath the news. Of the 22 | stories you read in traditional media that aren't about politics, 23 | crimes, or disasters, more than half probably come from PR firms.I know because I spent years hunting such "press hits." Our startup spent 24 | its entire marketing budget on PR: at a time when we were assembling 25 | our own computers to save money, we were paying a PR firm $16,000 26 | a month. And they were worth it. PR is the news equivalent of 27 | search engine optimization; instead of buying ads, which readers 28 | ignore, you get yourself inserted directly into the stories. [1]Our PR firm 29 | was one of the best in the business. In 18 months, they got press 30 | hits in over 60 different publications. 31 | And we weren't the only ones they did great things for. 32 | In 1997 I got a call from another 33 | startup founder considering hiring them to promote his company. I 34 | told him they were PR gods, worth every penny of their outrageous 35 | fees. But I remember thinking his company's name was odd. 36 | Why call an auction site "eBay"? 37 | SymbiosisPR is not dishonest. Not quite. In fact, the reason the best PR 38 | firms are so effective is precisely that they aren't dishonest. 39 | They give reporters genuinely valuable information. A good PR firm 40 | won't bug reporters just because the client tells them to; they've 41 | worked hard to build their credibility with reporters, and they 42 | don't want to destroy it by feeding them mere propaganda.If anyone is dishonest, it's the reporters. The main reason PR 43 | firms exist is that reporters are lazy. Or, to put it more nicely, 44 | overworked. Really they ought to be out there digging up stories 45 | for themselves. But it's so tempting to sit in their offices and 46 | let PR firms bring the stories to them. After all, they know good 47 | PR firms won't lie to them.A good flatterer doesn't lie, but tells his victim selective truths 48 | (what a nice color your eyes are). Good PR firms use the same 49 | strategy: they give reporters stories that are true, but whose truth 50 | favors their clients.For example, our PR firm often pitched stories about how the Web 51 | let small merchants compete with big ones. This was perfectly true. 52 | But the reason reporters ended up writing stories about this 53 | particular truth, rather than some other one, was that small merchants 54 | were our target market, and we were paying the piper.Different publications vary greatly in their reliance on PR firms. 55 | At the bottom of the heap are the trade press, who make most of 56 | their money from advertising and would give the magazines away for 57 | free if advertisers would let them. [2] The average 58 | trade publication is a bunch of ads, glued together by just enough 59 | articles to make it look like a magazine. They're so desperate for 60 | "content" that some will print your press releases almost verbatim, 61 | if you take the trouble to write them to read like articles.At the other extreme are publications like the New York Times 62 | and the Wall Street Journal. Their reporters do go out and 63 | find their own stories, at least some of the time. They'll listen 64 | to PR firms, but briefly and skeptically. We managed to get press 65 | hits in almost every publication we wanted, but we never managed 66 | to crack the print edition of the Times. [3]The weak point of the top reporters is not laziness, but vanity. 67 | You don't pitch stories to them. You have to approach them as if 68 | you were a specimen under their all-seeing microscope, and make it 69 | seem as if the story you want them to run is something they thought 70 | of themselves.Our greatest PR coup was a two-part one. We estimated, based on 71 | some fairly informal math, that there were about 5000 stores on the 72 | Web. We got one paper to print this number, which seemed neutral 73 | enough. But once this "fact" was out there in print, we could quote 74 | it to other publications, and claim that with 1000 users we had 20% 75 | of the online store market.This was roughly true. We really did have the biggest share of the 76 | online store market, and 5000 was our best guess at its size. But 77 | the way the story appeared in the press sounded a lot more definite.Reporters like definitive statements. For example, many of the 78 | stories about Jeremy Jaynes's conviction say that he was one of the 79 | 10 worst spammers. This "fact" originated in Spamhaus's ROKSO list, 80 | which I think even Spamhaus would admit is a rough guess at the top 81 | spammers. The first stories about Jaynes cited this source, but 82 | now it's simply repeated as if it were part of the indictment. 83 | [4]All you can say with certainty about Jaynes is that he was a fairly 84 | big spammer. But reporters don't want to print vague stuff like 85 | "fairly big." They want statements with punch, like "top ten." And 86 | PR firms give them what they want. 87 | Wearing suits, we're told, will make us 88 | 3.6 89 | percent more productive.BuzzWhere the work of PR firms really does get deliberately misleading is in 90 | the generation of "buzz." They usually feed the same story to 91 | several different publications at once. And when readers see similar 92 | stories in multiple places, they think there is some important trend 93 | afoot. Which is exactly what they're supposed to think.When Windows 95 was launched, people waited outside stores 94 | at midnight to buy the first copies. None of them would have been 95 | there without PR firms, who generated such a buzz in 96 | the news media that it became self-reinforcing, like a nuclear chain 97 | reaction.I doubt PR firms realize it yet, but the Web makes it possible to 98 | track them at work. If you search for the obvious phrases, you 99 | turn up several efforts over the years to place stories about the 100 | return of the suit. For example, the Reuters article 101 | 102 | that got picked up by USA 103 | Today in September 2004. "The suit is back," it begins.Trend articles like this are almost always the work of 104 | PR firms. Once you know how to read them, it's straightforward to 105 | figure out who the client is. With trend stories, PR firms usually 106 | line up one or more "experts" to talk about the industry generally. 107 | In this case we get three: the NPD Group, the creative director of 108 | GQ, and a research director at Smith Barney. [5] When 109 | you get to the end of the experts, look for the client. And bingo, 110 | there it is: The Men's Wearhouse.Not surprising, considering The Men's Wearhouse was at that moment 111 | running ads saying "The Suit is Back." Talk about a successful 112 | press hit-- a wire service article whose first sentence is your own 113 | ad copy.The secret to finding other press hits from a given pitch 114 | is to realize that they all started from the same document back at 115 | the PR firm. Search for a few key phrases and the names of the 116 | clients and the experts, and you'll turn up other variants of this 117 | story.Casual 118 | fridays are out and dress codes are in writes Diane E. Lewis 119 | in The Boston Globe. In a remarkable coincidence, Ms. Lewis's 120 | industry contacts also include the creative director of GQ.Ripped jeans and T-shirts are out, writes Mary Kathleen Flynn in 121 | US News & World Report. And she too knows the 122 | creative director of GQ.Men's suits 123 | are back writes Nicole Ford in Sexbuzz.Com ("the ultimate men's 124 | entertainment magazine").Dressing 125 | down loses appeal as men suit up at the office writes Tenisha 126 | Mercer of The Detroit News. 127 | Now that so many news articles are online, I suspect you could find 128 | a similar pattern for most trend stories placed by PR firms. I 129 | propose we call this new sport "PR diving," and I'm sure there are 130 | far more striking examples out there than this clump of five stories.OnlineAfter spending years chasing them, it's now second nature 131 | to me to recognize press hits for what they are. But before we 132 | hired a PR firm I had no idea where articles in the mainstream media 133 | came from. I could tell a lot of them were crap, but I didn't 134 | realize why.Remember the exercises in critical reading you did in school, where 135 | you had to look at a piece of writing and step back and ask whether 136 | the author was telling the whole truth? If you really want to be 137 | a critical reader, it turns out you have to step back one step 138 | further, and ask not just whether the author is telling the truth, 139 | but why he's writing about this subject at all.Online, the answer tends to be a lot simpler. Most people who 140 | publish online write what they write for the simple reason that 141 | they want to. You 142 | can't see the fingerprints of PR firms all over the articles, as 143 | you can in so many print publications-- which is one of the reasons, 144 | though they may not consciously realize it, that readers trust 145 | bloggers more than Business Week.I was talking recently to a friend who works for a 146 | big newspaper. He thought the print media were in serious trouble, 147 | and that they were still mostly in denial about it. "They think 148 | the decline is cyclic," he said. "Actually it's structural."In other words, the readers are leaving, and they're not coming 149 | back. 150 | Why? I think the main reason is that the writing online is more honest. 151 | Imagine how incongruous the New York Times article about 152 | suits would sound if you read it in a blog: 153 | The urge to look corporate-- sleek, commanding, 154 | prudent, yet with just a touch of hubris on your well-cut sleeve-- 155 | is an unexpected development in a time of business disgrace. 156 | 157 | The problem 158 | with this article is not just that it originated in a PR firm. 159 | The whole tone is bogus. This is the tone of someone writing down 160 | to their audience.Whatever its flaws, the writing you find online 161 | is authentic. It's not mystery meat cooked up 162 | out of scraps of pitch letters and press releases, and pressed into 163 | molds of zippy 164 | journalese. It's people writing what they think.I didn't realize, till there was an alternative, just how artificial 165 | most of the writing in the mainstream media was. I'm not saying 166 | I used to believe what I read in Time and Newsweek. Since high 167 | school, at least, I've thought of magazines like that more as 168 | guides to what ordinary people were being 169 | told to think than as 170 | sources of information. But I didn't realize till the last 171 | few years that writing for publication didn't have to mean writing 172 | that way. I didn't realize you could write as candidly and 173 | informally as you would if you were writing to a friend.Readers aren't the only ones who've noticed the 174 | change. The PR industry has too. 175 | A hilarious article 176 | on the site of the PR Society of America gets to the heart of the 177 | matter: 178 | Bloggers are sensitive about becoming mouthpieces 179 | for other organizations and companies, which is the reason they 180 | began blogging in the first place. 181 | PR people fear bloggers for the same reason readers 182 | like them. And that means there may be a struggle ahead. As 183 | this new kind of writing draws readers away from traditional media, we 184 | should be prepared for whatever PR mutates into to compensate. 185 | When I think 186 | how hard PR firms work to score press hits in the traditional 187 | media, I can't imagine they'll work any less hard to feed stories 188 | to bloggers, if they can figure out how. 189 | Notes[1] PR has at least 190 | one beneficial feature: it favors small companies. If PR didn't 191 | work, the only alternative would be to advertise, and only big 192 | companies can afford that.[2] Advertisers pay 193 | less for ads in free publications, because they assume readers 194 | ignore something they get for free. This is why so many trade 195 | publications nominally have a cover price and yet give away free 196 | subscriptions with such abandon.[3] Different sections 197 | of the Times vary so much in their standards that they're 198 | practically different papers. Whoever fed the style section reporter 199 | this story about suits coming back would have been sent packing by 200 | the regular news reporters.[4] The most striking 201 | example I know of this type is the "fact" that the Internet worm 202 | of 1988 infected 6000 computers. I was there when it was cooked up, 203 | and this was the recipe: someone guessed that there were about 204 | 60,000 computers attached to the Internet, and that the worm might 205 | have infected ten percent of them.Actually no one knows how many computers the worm infected, because 206 | the remedy was to reboot them, and this destroyed all traces. But 207 | people like numbers. And so this one is now replicated 208 | all over the Internet, like a little worm of its own.[5] Not all were 209 | necessarily supplied by the PR firm. Reporters sometimes call a few 210 | additional sources on their own, like someone adding a few fresh 211 | vegetables to a can of soup. 212 | Thanks to Ingrid Basset, Trevor Blackwell, Sarah Harlin, Jessica 213 | Livingston, Jackie McDonough, Robert Morris, and Aaron Swartz (who 214 | also found the PRSA article) for reading drafts of this.Correction: Earlier versions used a recent 215 | Business Week article mentioning del.icio.us as an example 216 | of a press hit, but Joshua Schachter tells me 217 | it was spontaneous. -------------------------------------------------------------------------------- /gen_test_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 9, 6 | "id": "46b362f2", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "./test_data/Counting_Stars_EN_multi-evidence-retrieval-searching_128000_32_32.jsonl\n", 14 | "撒了32次星星\n", 15 | "18866\n", 16 | "撒了32次星星\n", 17 | "37223\n", 18 | "撒了32次星星\n", 19 | "55676\n", 20 | "撒了32次星星\n", 21 | "72449\n", 22 | "撒了32次星星\n", 23 | "90226\n", 24 | "撒了32次星星\n", 25 | "108808\n", 26 | "撒了32次星星\n", 27 | "126043\n", 28 | "撒了32次星星\n", 29 | "143722\n", 30 | "撒了32次星星\n", 31 | "161214\n", 32 | "撒了32次星星\n", 33 | "178738\n", 34 | "撒了32次星星\n", 35 | "196217\n", 36 | "撒了32次星星\n", 37 | "214366\n", 38 | "撒了32次星星\n", 39 | "232257\n", 40 | "撒了32次星星\n", 41 | "250212\n", 42 | "撒了32次星星\n", 43 | "267685\n", 44 | "撒了32次星星\n", 45 | "284783\n", 46 | "撒了32次星星\n", 47 | "302384\n", 48 | "撒了32次星星\n", 49 | "320187\n", 50 | "撒了32次星星\n", 51 | "338073\n", 52 | "撒了32次星星\n", 53 | "355557\n", 54 | "撒了32次星星\n", 55 | "374231\n", 56 | "撒了32次星星\n", 57 | "392587\n", 58 | "撒了32次星星\n", 59 | "410318\n", 60 | "撒了32次星星\n", 61 | "428433\n", 62 | "撒了32次星星\n", 63 | "446051\n", 64 | "撒了32次星星\n", 65 | "463849\n", 66 | "撒了32次星星\n", 67 | "482058\n", 68 | "撒了32次星星\n", 69 | "499498\n", 70 | "撒了32次星星\n", 71 | "516604\n", 72 | "撒了32次星星\n", 73 | "533982\n", 74 | "撒了32次星星\n", 75 | "552214\n", 76 | "撒了32次星星\n", 77 | "570151\n", 78 | "共计32条数据\n", 79 | "./test_data/Counting_Stars_ZH_multi-evidence-retrieval-searching_128000_32_32.jsonl\n", 80 | "撒了32次星星\n", 81 | "3289\n", 82 | "撒了32次星星\n", 83 | "6089\n", 84 | "撒了32次星星\n", 85 | "8889\n", 86 | "撒了32次星星\n", 87 | "11689\n", 88 | "撒了32次星星\n", 89 | "14489\n", 90 | "撒了32次星星\n", 91 | "17289\n", 92 | "撒了32次星星\n", 93 | "20089\n", 94 | "撒了32次星星\n", 95 | "22889\n", 96 | "撒了32次星星\n", 97 | "25689\n", 98 | "撒了32次星星\n", 99 | "28489\n", 100 | "撒了32次星星\n", 101 | "31288\n", 102 | "撒了32次星星\n", 103 | "34089\n", 104 | "撒了32次星星\n", 105 | "36889\n", 106 | "撒了32次星星\n", 107 | "39689\n", 108 | "撒了32次星星\n", 109 | "42489\n", 110 | "撒了32次星星\n", 111 | "45289\n", 112 | "撒了32次星星\n", 113 | "48089\n", 114 | "撒了32次星星\n", 115 | "50889\n", 116 | "撒了32次星星\n", 117 | "53689\n", 118 | "撒了32次星星\n", 119 | "56489\n", 120 | "撒了32次星星\n", 121 | "59288\n", 122 | "撒了32次星星\n", 123 | "62088\n", 124 | "撒了32次星星\n", 125 | "64888\n", 126 | "撒了32次星星\n", 127 | "67689\n", 128 | "撒了32次星星\n", 129 | "70489\n", 130 | "撒了32次星星\n", 131 | "73289\n", 132 | "撒了32次星星\n", 133 | "76089\n", 134 | "撒了32次星星\n", 135 | "78889\n", 136 | "撒了32次星星\n", 137 | "81689\n", 138 | "撒了32次星星\n", 139 | "84489\n", 140 | "撒了32次星星\n", 141 | "87289\n", 142 | "撒了32次星星\n", 143 | "90089\n", 144 | "共计32条数据\n", 145 | "./test_data/Counting_Stars_EN_multi-evidence-retrieval-reasoning_128000_32_32.jsonl\n", 146 | "撒了32次星星\n", 147 | "22568\n", 148 | "撒了32次星星\n", 149 | "40925\n", 150 | "撒了32次星星\n", 151 | "59378\n", 152 | "撒了32次星星\n", 153 | "76151\n", 154 | "撒了32次星星\n", 155 | "93928\n", 156 | "撒了32次星星\n", 157 | "112510\n", 158 | "撒了32次星星\n", 159 | "129745\n", 160 | "撒了32次星星\n", 161 | "147424\n", 162 | "撒了32次星星\n", 163 | "164916\n", 164 | "撒了32次星星\n", 165 | "182440\n", 166 | "撒了32次星星\n", 167 | "199919\n", 168 | "撒了32次星星\n", 169 | "218068\n", 170 | "撒了32次星星\n", 171 | "235959\n", 172 | "撒了32次星星\n", 173 | "253914\n", 174 | "撒了32次星星\n", 175 | "271387\n", 176 | "撒了32次星星\n", 177 | "288485\n", 178 | "撒了32次星星\n", 179 | "306086\n", 180 | "撒了32次星星\n", 181 | "323889\n", 182 | "撒了32次星星\n", 183 | "341775\n", 184 | "撒了32次星星\n", 185 | "359259\n", 186 | "撒了32次星星\n", 187 | "377933\n", 188 | "撒了32次星星\n", 189 | "396289\n", 190 | "撒了32次星星\n", 191 | "414020\n", 192 | "撒了32次星星\n", 193 | "432135\n", 194 | "撒了32次星星\n", 195 | "449753\n", 196 | "撒了32次星星\n", 197 | "467551\n", 198 | "撒了32次星星\n", 199 | "485760\n", 200 | "撒了32次星星\n", 201 | "503200\n", 202 | "撒了32次星星\n", 203 | "520306\n", 204 | "撒了32次星星\n", 205 | "537684\n", 206 | "撒了32次星星\n", 207 | "555916\n", 208 | "撒了32次星星\n", 209 | "573853\n", 210 | "共计32条数据\n", 211 | "./test_data/Counting_Stars_ZH_multi-evidence-retrieval-reasoning_128000_32_32.jsonl\n", 212 | "撒了32次星星\n", 213 | "4159\n", 214 | "撒了32次星星\n", 215 | "6959\n", 216 | "撒了32次星星\n", 217 | "9759\n", 218 | "撒了32次星星\n", 219 | "12559\n", 220 | "撒了32次星星\n", 221 | "15359\n", 222 | "撒了32次星星\n", 223 | "18159\n", 224 | "撒了32次星星\n", 225 | "20959\n", 226 | "撒了32次星星\n", 227 | "23759\n", 228 | "撒了32次星星\n", 229 | "26559\n", 230 | "撒了32次星星\n", 231 | "29359\n", 232 | "撒了32次星星\n", 233 | "32158\n", 234 | "撒了32次星星\n", 235 | "34959\n", 236 | "撒了32次星星\n", 237 | "37759\n", 238 | "撒了32次星星\n", 239 | "40559\n", 240 | "撒了32次星星\n", 241 | "43359\n", 242 | "撒了32次星星\n", 243 | "46159\n", 244 | "撒了32次星星\n", 245 | "48959\n", 246 | "撒了32次星星\n", 247 | "51759\n", 248 | "撒了32次星星\n", 249 | "54559\n", 250 | "撒了32次星星\n", 251 | "57359\n", 252 | "撒了32次星星\n", 253 | "60158\n", 254 | "撒了32次星星\n", 255 | "62958\n", 256 | "撒了32次星星\n", 257 | "65758\n", 258 | "撒了32次星星\n", 259 | "68559\n", 260 | "撒了32次星星\n", 261 | "71359\n", 262 | "撒了32次星星\n", 263 | "74159\n", 264 | "撒了32次星星\n", 265 | "76959\n", 266 | "撒了32次星星\n", 267 | "79759\n", 268 | "撒了32次星星\n", 269 | "82559\n", 270 | "撒了32次星星\n", 271 | "85359\n", 272 | "撒了32次星星\n", 273 | "88159\n", 274 | "撒了32次星星\n", 275 | "90959\n", 276 | "共计32条数据\n" 277 | ] 278 | } 279 | ], 280 | "source": [ 281 | "import uuid\n", 282 | "import pandas as pd\n", 283 | "import json\n", 284 | "import os\n", 285 | "import glob\n", 286 | "import jsonlines\n", 287 | "import requests\n", 288 | "from tqdm import trange\n", 289 | "import random\n", 290 | "import glob\n", 291 | "import re\n", 292 | "\n", 293 | "def get_context(language=\"EN\"):\n", 294 | " if language == \"EN\":\n", 295 | " context = \"\"\n", 296 | " for file in glob.glob(\"./context_data/PaulGrahamEssays/*.txt\"):\n", 297 | " with open(file, 'r') as f:\n", 298 | " context += f.read().replace(\"\\n\", \" \")\n", 299 | " return context\n", 300 | " else:\n", 301 | " string_punctuation = '!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~'\n", 302 | " context_file = open(\"./context_data/The_Story_of_the_Stone.txt\", \"r\", encoding=\"utf-8\")\n", 303 | " context = \"\"\n", 304 | " for i in context_file.readlines():\n", 305 | " context += i.strip().replace(\"------------\",\" \").replace(\"\\n\", \" \").replace(\" \",\"\")\n", 306 | " context = re.sub('[{}]'.format(string_punctuation),\"\",context)\n", 307 | " context = re.sub('[a-zA-Z]','',context)\n", 308 | " return context\n", 309 | "\n", 310 | "def get_stars(stars_dir, counting_times):\n", 311 | " stars_file = open(stars_dir,\"r\")\n", 312 | " return eval(stars_file.readline())[str(counting_times)]\n", 313 | "\n", 314 | "def sentence_with_star(language, test_type, indicator):\n", 315 | " if language == \"ZH\":\n", 316 | " if test_type == \"multi-evidence-retrieval-searching\":\n", 317 | " single_star = f\"\\n小企鹅数了{a_stars[indicator]}颗★\\n\"\n", 318 | " else:\n", 319 | " single_star = f\"\\n小企鹅数了{r_stars[indicator]}颗★,但发现数错了,于是又数了一遍,这次数对了,是{a_stars[indicator]}颗★\\n\"\n", 320 | " else:\n", 321 | " if test_type == \"multi-evidence-retrieval-searching\":\n", 322 | " single_star = f\"\\nThe little penguin counted {a_stars[indicator]} ★\\n\"\n", 323 | " else:\n", 324 | " single_star = f\"\\nThe little penguin counted {r_stars[indicator]} ★, but found that a mistake had been made, so the counting was done again, and this time {a_stars[indicator]} ★ was counted correctly.\\n\"\n", 325 | " return single_star\n", 326 | "\n", 327 | "def select_question(language, test_type):\n", 328 | " if language == \"ZH\":\n", 329 | " searching_question = \"\\n\\n\\n\\n在这个月光皎洁、云雾缭绕的夜晚,小企鹅正望向天空,全神贯注地数★。请帮助小企鹅收集所数★的颗数,按照如下格式:{\\\"小企鹅\\\":[x,x,x,...]},不要求和,[x,x,x,...]中数字为小企鹅每次数★的颗数,仅以JSON格式输出结果,不需要输出任何解释。\"\n", 330 | " reasoning_question = \"\\n\\n\\n\\n在这个月光皎洁、云雾缭绕的夜晚,小企鹅正望向天空,全神贯注地数★。请帮助小企鹅收集所数★的正确颗数,按照如下格式:{\\\"小企鹅\\\":[x,x,x,...]},不要求和,[x,x,x,...]中数字为小企鹅正确数★的颗数,仅以JSON格式输出结果,不需要输出任何解释。\"\n", 331 | " if test_type == \"multi-evidence-retrieval-searching\":\n", 332 | " return searching_question\n", 333 | " else:\n", 334 | " return reasoning_question\n", 335 | " else:\n", 336 | " searching_question = \"\\n\\n\\n\\n\" + \"On this moonlit and misty night, the little penguin is looking up at the sky and concentrating on counting ★. Please help the little penguin collect the number of ★, for example: {\\\"little_penguin\\\": [x, x, x,...]}. The summation is not required, and the numbers in [x, x, x,...] represent the counted number of ★ by the little penguin. Only output the results in JSON format without any explanation.\"\n", 337 | " reasoning_question = \"\\n\\n\\n\\n\" + \"On this moonlit and misty night, the little penguin is looking up at the sky and concentrating on counting ★. Please help the little penguin collect the correct number of ★, for example: {\\\"little_penguin\\\": [x, x, x,...]}. The summation is not required, and the numbers in [x, x, x,...] represent the correctly counted number of ★ by the little penguin. Only output the results in JSON format without any explanation.\"\n", 338 | " if test_type == \"multi-evidence-retrieval-searching\":\n", 339 | " return searching_question\n", 340 | " else:\n", 341 | " return reasoning_question\n", 342 | "\n", 343 | "m = 32\n", 344 | "n = 32\n", 345 | "version = [[m, n]]\n", 346 | "language_types = [\"EN\", \"ZH\"]\n", 347 | "task_types = [\"multi-evidence-retrieval-searching\", \"multi-evidence-retrieval-reasoning\"]\n", 348 | "# MeRS-ZH, MeRS-EN, MeRR-EN, MeRR-ZH\n", 349 | "a_stars = get_stars(\"./context_data/a_stars.txt\", m)\n", 350 | "r_stars = get_stars(\"./context_data/r_stars.txt\", m)\n", 351 | "max_context_length = 128000\n", 352 | "\n", 353 | "if __name__ == '__main__':\n", 354 | " for task_type in task_types:\n", 355 | " for language in language_types:\n", 356 | " if language == \"EN\":\n", 357 | " scalar = 0.8\n", 358 | " else:\n", 359 | " scalar = 0.7\n", 360 | " context = get_context(language=language)\n", 361 | " for m, n in version:\n", 362 | " line_count = 0\n", 363 | " interval = int(max_context_length/n)\n", 364 | " context_size = [int(i*scalar) for i in range(interval, max_context_length+1, interval)]\n", 365 | " file_name = f\"./test_data/Counting_Stars_{language}_{task_type}_{max_context_length}_{m}_{n}.jsonl\"\n", 366 | " test_data = open(file_name, \"w\", encoding=\"utf-8\")\n", 367 | " print(file_name)\n", 368 | " for j in context_size:\n", 369 | " indicator = 0\n", 370 | " sprinkle_stars_context = \" \".join(context.split(\" \")[:j]) if language == \"EN\" else context[:j]\n", 371 | " for k in range(0, j, int(j / m)):\n", 372 | " single_star = sentence_with_star(language, task_type, indicator)\n", 373 | " if language == \"ZH\":\n", 374 | " sprinkle_stars_context = (sprinkle_stars_context[:k+int(j/m)+len(single_star)*indicator] + single_star + sprinkle_stars_context[k+int(j/m)+len(single_star)*indicator:]) \n", 375 | " else:\n", 376 | " sprinkle_stars_context = (\" \".join(sprinkle_stars_context.split(\" \")[:len(single_star.split(\" \"))*indicator+k+int(j / m)]) + single_star + \" \".join(sprinkle_stars_context.split(\" \")[int(j / m)+k+len(single_star.split(\" \"))*indicator:]))\n", 377 | " indicator += 1\n", 378 | " if indicator == m:\n", 379 | " print(f\"撒了{indicator}次星星\")\n", 380 | " break\n", 381 | " print(len(sprinkle_stars_context + select_question(language, task_type)))\n", 382 | " output_template = {\"question\": sprinkle_stars_context + select_question(language, task_type), \"context_size\": j, \"retrieval_question\": select_question(language, task_type),\n", 383 | " \"reference_counting_results\": a_stars, \"parameters\": {\"temperature\": 0.0}}\n", 384 | " print(json.dumps(output_template, ensure_ascii=False), file=test_data)\n", 385 | " line_count += 1\n", 386 | " test_data.flush()\n", 387 | " test_data.close()\n", 388 | " print(f\"共计{line_count}条数据\")" 389 | ] 390 | } 391 | ], 392 | "metadata": { 393 | "kernelspec": { 394 | "display_name": "Python 3 (ipykernel)", 395 | "language": "python", 396 | "name": "python3" 397 | }, 398 | "language_info": { 399 | "codemirror_mode": { 400 | "name": "ipython", 401 | "version": 3 402 | }, 403 | "file_extension": ".py", 404 | "mimetype": "text/x-python", 405 | "name": "python", 406 | "nbconvert_exporter": "python", 407 | "pygments_lexer": "ipython3", 408 | "version": "3.11.5" 409 | } 410 | }, 411 | "nbformat": 4, 412 | "nbformat_minor": 5 413 | } 414 | --------------------------------------------------------------------------------