├── .gitignore ├── tests ├── requirements.txt ├── samples │ ├── test_utils │ │ ├── 1.html │ │ └── 2.html │ └── test_apiv1 │ │ └── go-resolutions.html ├── test_utils.py ├── conftest.py ├── test_models.py └── test_apiv1.py ├── requirements.txt ├── application.py ├── .travis.yml ├── Dockerfile ├── tldr ├── __init__.py ├── utils.py ├── models.py └── apiv1 │ └── __init__.py ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .*.swp 3 | __pycache__ 4 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest>=3.0.5 2 | pytest-cov>=2.4.0 3 | coveralls>=1.1 4 | -------------------------------------------------------------------------------- /tests/samples/test_utils/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suminb/tldr/HEAD/tests/samples/test_utils/1.html -------------------------------------------------------------------------------- /tests/samples/test_utils/2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suminb/tldr/HEAD/tests/samples/test_utils/2.html -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | flask>=0.11.1 2 | JPype1-py3>=0.5.5.2 3 | konlpy>=0.4.4 4 | requests>=2.12.1 5 | textrankr>=0.3 6 | logbook>=1.0.0 7 | beautifulsoup4>=4.5.1 8 | newspaper3k>=0.1.7 9 | -------------------------------------------------------------------------------- /application.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from tldr import create_app 4 | 5 | 6 | if __name__ == '__main__': 7 | application = create_app() 8 | host = os.environ.get('HOST', '0.0.0.0') 9 | port = int(os.environ.get('PORT', 8004)) 10 | 11 | application.run(host=host, port=port) 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | language: python 4 | python: 5 | - "3.5" 6 | 7 | env: 8 | global: 9 | - PYTHONPATH=. 10 | 11 | install: 12 | - pip install -r requirements.txt 13 | - pip install -r tests/requirements.txt 14 | 15 | script: 16 | - py.test -v tests --cov tldr --cov-report term-missing 17 | 18 | after_success: 19 | - coveralls 20 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.10 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y g++ openjdk-8-jdk \ 5 | python3-dev python3-dev python3-pip \ 6 | git 7 | 8 | RUN git clone https://github.com/suminb/tldr.git 9 | WORKDIR tldr 10 | RUN git checkout develop 11 | RUN pip3 install -r requirements.txt 12 | 13 | ENV PORT=8804 14 | EXPOSE 8804 15 | CMD python3 application.py 16 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from tldr.utils import extract_text 6 | 7 | 8 | @pytest.mark.parametrize('sample_file', os.listdir('tests/samples/test_utils')) 9 | def test_extract_text(sample_file): 10 | path = os.path.join('tests', 'samples', 'test_utils', sample_file) 11 | with open(path, encoding='euc-kr') as fin: 12 | html = fin.read() 13 | text = extract_text(html) 14 | # NOTE: Not sure if this is good enough 15 | assert len(text) > 250 16 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from tldr import create_app 4 | 5 | 6 | @pytest.fixture(scope='module') 7 | def app(request): 8 | """Session-wide test `Flask` application.""" 9 | settings_override = { 10 | 'TESTING': True, 11 | } 12 | app = create_app(__name__, config=settings_override) 13 | 14 | # Establish an application context before running the tests. 15 | ctx = app.app_context() 16 | ctx.push() 17 | 18 | def teardown(): 19 | ctx.pop() 20 | 21 | request.addfinalizer(teardown) 22 | return app 23 | 24 | 25 | @pytest.fixture 26 | def testapp(app): 27 | return app.test_client() 28 | -------------------------------------------------------------------------------- /tldr/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from flask import Flask 4 | 5 | __version__ = '0.1.3' 6 | __author__ = 'Sumin Byeon' 7 | __email__ = 'suminb@gmail.com' 8 | 9 | 10 | def create_app(name=__name__, config={}, static_folder='static', 11 | template_folder='templates'): 12 | 13 | app = Flask(name, static_folder=static_folder, 14 | template_folder=template_folder) 15 | app.secret_key = os.environ.get('SECRET', 'secret') 16 | app.config['DEBUG'] = bool(os.environ.get('DEBUG', False)) 17 | app.config.update(config) 18 | 19 | from tldr.apiv1 import apiv1_module 20 | app.register_blueprint(apiv1_module, url_prefix='/api/v1/') 21 | 22 | return app 23 | -------------------------------------------------------------------------------- /tests/test_models.py: -------------------------------------------------------------------------------- 1 | # import pytest 2 | 3 | from tldr.models import Article 4 | 5 | 6 | def test_canonical_url(): 7 | html = """ 8 | 9 | 10 | 11 | 12 | 13 | """ # noqa 14 | article = Article(html) 15 | assert article.canonical_url == 'https://techcrunch.com/2016/12/22/aws-catapulted-amazon-into-a-breakout-2016-on-wall-street/' # noqa 16 | 17 | 18 | # def test_canonical_url_not_found(): 19 | # article = Article('') 20 | # with pytest.raises(ValueError): 21 | # article.canonical_url 22 | -------------------------------------------------------------------------------- /tldr/utils.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import jpype 3 | from newspaper import fulltext 4 | from textrankr import TextRank 5 | 6 | 7 | TEXT_HTML_RATIO_THRESHOLD = 0.01 8 | 9 | 10 | def extract_text(html): 11 | """Extracts text from raw HTML.""" 12 | text = fulltext(html, 'ko') 13 | 14 | # NOTE: Is this an appropriate condition to check? 15 | if float(len(text)) / len(html) > TEXT_HTML_RATIO_THRESHOLD: 16 | return text 17 | 18 | soup = BeautifulSoup(html, 'html.parser') 19 | article_tag_text = extract_text_from_article_tag(soup) 20 | 21 | if len(article_tag_text) > len(text): 22 | return article_tag_text 23 | 24 | return text 25 | 26 | 27 | def extract_text_from_article_tag(soup): 28 | article_tag = soup.find('article') 29 | if article_tag: 30 | return article_tag.text 31 | else: 32 | return '' 33 | 34 | 35 | def summarize_text(text): 36 | jpype.attachThreadToJVM() 37 | textrank = TextRank(text) 38 | return textrank.summarize() 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Sumin Byeon. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software without 15 | specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Travis](https://img.shields.io/travis/suminb/tldr.svg)](https://travis-ci.org/suminb/tldr) 2 | [![Coveralls](https://img.shields.io/coveralls/suminb/tldr.svg)](https://coveralls.io/github/suminb/tldr) 3 | 4 | TL;DR 5 | ----- 6 | TL;DR, which stands for "Too Long; Didn't Read", is a text summarization service. 7 | 8 | Obtaining Docker Image 9 | ---------------------- 10 | The pre-built Docker image can be obtained by running the following commands: 11 | 12 | docker pull sumin/tldr 13 | 14 | Code Build 15 | ---------- 16 | If you would like to build the code yourself, you will need the following 17 | things: 18 | 19 | - Docker 20 | - Python 2.x or 3.x 21 | - Java 6 or higher 22 | 23 | The Docker image can be built by running the following command: 24 | 25 | docker build ${directory containing Dockderfile} 26 | 27 | Run 28 | --- 29 | 30 | docker run -d -p 8804:${host port} sumin/tldr 31 | 32 | For example, if you would like to map the 8804 port of the guest host to the 33 | identical port number (8804) of the host, you may want to run the following 34 | command. 35 | 36 | docker run -d -p 8804:8804 sumin/tldr 37 | 38 | Invocation 39 | ---------- 40 | RESTful APIs can be invoked via any standard HTTP client. Use of `curl` or 41 | `wget` is advised. 42 | 43 | 44 | ### Text Summarization 45 | 46 | curl -XPOST -d "text=This is a text" http://localhost:8804/api/v1/summarize 47 | 48 | ### Text Extraction 49 | 50 | curl -XPOST -d "html=Example" \ 51 | http://localhost:8804/api/v1/extract-text 52 | 53 | Tests 54 | ----- 55 | 56 | py.test -v tests 57 | -------------------------------------------------------------------------------- /tests/test_apiv1.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | 6 | def test_extract_text(testapp): 7 | html = '

Some text

' 8 | resp = testapp.post('/api/v1/extract-text', data={'html': html}) 9 | assert resp.status_code == 200 10 | assert resp.data.decode('utf-8') == 'Some text' 11 | 12 | 13 | @pytest.mark.parametrize('filename, start, end', [ 14 | ('go-resolutions.html', 15 | 'My Go Resolutions for 2017', 16 | 'I understand the solution space better.') 17 | ]) 18 | def test_extract_test_long(testapp, filename, start, end): 19 | path = os.path.join('tests', 'samples', 'test_apiv1', filename) 20 | with open(path) as fin: 21 | html = fin.read() 22 | 23 | resp = testapp.post('/api/v1/extract-text', data={'html': html}) 24 | assert resp.status_code == 200 25 | 26 | text = resp.data.decode('utf-8') 27 | assert text.startswith(start) 28 | assert text.endswith(end) 29 | 30 | 31 | # TODO: Need to test against longer text 32 | def test_summarize_text(testapp): 33 | text = 'This is some sample text' 34 | resp = testapp.post('/api/v1/summarize', data={'text': text}) 35 | assert resp.status_code == 200 36 | 37 | 38 | @pytest.mark.parametrize('content_type', ['application/json', 'text/plain']) 39 | def test_summarize_url(testapp, content_type): 40 | url = 'https://github.com/suminb/finance' 41 | headers = {'Accept': content_type} 42 | data = {'url': url} 43 | resp = testapp.post('/api/v1/summarize-url', data=data, headers=headers) 44 | assert resp.status_code == 200 45 | assert resp.headers['Content-Type'].split(';')[0] == content_type 46 | -------------------------------------------------------------------------------- /tldr/models.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from tldr.utils import extract_text, summarize_text 4 | 5 | 6 | class Article(object): 7 | 8 | #: BeautifulSoup object 9 | soup = None 10 | 11 | _title = None 12 | _text = None 13 | _summary = None 14 | 15 | def __init__(self, html): 16 | self.html = html 17 | 18 | def init_soup(self): 19 | if self.soup is None: 20 | self.soup = BeautifulSoup(self.html, 'html.parser') 21 | return self.soup 22 | 23 | @property 24 | def published_at(self): 25 | raise NotImplementedError() 26 | 27 | @property 28 | def fetched_at(self): 29 | raise NotImplementedError() 30 | 31 | @property 32 | def title(self): 33 | if self._title is None: 34 | soup = self.init_soup() 35 | self._title = soup.title.get_text().strip() 36 | return self._title 37 | 38 | @property 39 | def url(self): 40 | raise NotImplementedError() 41 | 42 | @property 43 | def canonical_url(self): 44 | soup = self.init_soup() 45 | link = soup.find('link', {'rel': 'canonical'}) 46 | if link is None: 47 | # NOTE: Should we return None or raise an error? 48 | # raise ValueError('Canonical URL not found') 49 | return None 50 | 51 | href = link.get('href') 52 | if href is None: 53 | # raise ValueError('Canonical URL not found') 54 | return None 55 | 56 | return href 57 | 58 | # TODO: Make @cached_property 59 | @property 60 | def text(self): 61 | """Extracts text body (an article) from HTML.""" 62 | # FIXME: What's going to happen when no article is found? 63 | 64 | if self._text is None: 65 | self._text = extract_text(self.html) 66 | return self._text 67 | 68 | @property 69 | def summary(self): 70 | if self._summary is None: 71 | self._summary = summarize_text(self.text) 72 | return self._summary 73 | 74 | def as_dict(self): 75 | keys = ['canonical_url', 'title', 'html', 'text', 'summary'] 76 | return {key: getattr(self, key) for key in keys} 77 | -------------------------------------------------------------------------------- /tldr/apiv1/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from flask import Blueprint, jsonify, request 4 | from logbook import Logger, StreamHandler 5 | import requests 6 | 7 | from tldr.models import Article 8 | from tldr.utils import summarize_text 9 | 10 | 11 | apiv1_module = Blueprint('apiv1', __name__, template_folder='templates') 12 | 13 | log = Logger(__name__) 14 | log.handlers.append(StreamHandler(sys.stdout, level='INFO')) 15 | 16 | 17 | # TODO: Make this more generic 18 | # TODO: Move elsewhere 19 | def json_requested(): 20 | """Determines whether the requested content type is application/json.""" 21 | best = request.accept_mimetypes \ 22 | .best_match(['application/json', 'text/plain']) 23 | return best == 'application/json' and \ 24 | request.accept_mimetypes[best] > \ 25 | request.accept_mimetypes['text/plain'] 26 | 27 | 28 | @apiv1_module.route('summarize', methods=['POST']) 29 | def summarize(): 30 | """Summarizes given text.""" 31 | text = request.form['text'] 32 | return summarize_text(text) 33 | 34 | 35 | @apiv1_module.route('summarize-url', methods=['POST']) 36 | def summarize_url(): 37 | """Summarizes a given URL.""" 38 | url = request.form['url'] 39 | 40 | log.info('Fetching url {}', url) 41 | resp, html = fetch_url(url) 42 | 43 | article = Article(html) 44 | 45 | if json_requested(): 46 | data = article.as_dict() 47 | data['url'] = resp.url 48 | return jsonify(data) 49 | else: 50 | headers = {'Content-Type': 'text/plain; charset=utf-8'} 51 | return article.summary, 200, headers 52 | 53 | 54 | @apiv1_module.route('extract-text', methods=['POST']) 55 | def extract_text(): 56 | """Extracts text from an HTML document.""" 57 | html = request.form['html'] 58 | article = Article(html) 59 | try: 60 | return article.text 61 | except AttributeError as e: 62 | log.warn(e) 63 | # NOTE: When a parsing error occurs, an AttributeError is raised. 64 | # We'll deal with this exception later. 65 | return '' 66 | 67 | 68 | def fetch_url(url, params={}): 69 | resp = requests.get(url, params=params) 70 | # FIXME: This is a temporary workaround, as some Korean websites do not 71 | # specify which encoding they use. 72 | try: 73 | return resp, resp.content.decode('utf-8') 74 | except UnicodeDecodeError: 75 | return resp, resp.content.decode('euc-kr') 76 | -------------------------------------------------------------------------------- /tests/samples/test_apiv1/go-resolutions.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | research!rsc: My Go Resolutions for 2017 5 | 6 | 7 | 8 | 9 | 10 | 218 | 219 | 220 | 221 | 222 |
223 |

research!rsc

224 |

Thoughts and links about programming, 225 | by

226 | 227 |
228 | 229 |
230 |
231 |

My Go Resolutions for 2017 232 |
233 |
234 | 235 | Posted on Wednesday, January 18, 2017. 236 | 237 | 238 |
239 |
240 |

241 |

’Tis the season for resolutions, 242 | and I thought it would make sense to write a little 243 | about what I hope to work on this year as far as Go is concerned.

244 | 245 |

My goal every year is to help Go developers. 246 | I want to make sure that the work we do on the Go team 247 | has a significant, positive impact on Go developers. 248 | That may sound obvious, but there are a variety of common ways to fail to achieve that: 249 | for example, spending too much time cleaning up or optimizing code that doesn’t need it; 250 | responding only to the most common or recent complaints or requests; 251 | or focusing too much on short-term improvements. 252 | It’s important to step back and make sure we’re focusing 253 | our development work where it does the most good.

254 | 255 |

This post outlines a few of my own major focuses for this year. 256 | This is only my personal list, not the Go team’s list.

257 | 258 |

One reason for posting this is to gather feedback. 259 | If these spark any ideas or suggestions of your own, 260 | please feel free to comment below or on the linked GitHub issues.

261 | 262 |

Another reason is to make clear that I’m aware of these issues as important. 263 | I think too often people interpret lack of action by the Go team 264 | as a signal that we think everything is perfect, when instead 265 | there is simply other, higher priority work to do first.

266 | 267 |

Type aliases

268 | 269 |

There is a recurring problem with moving types 270 | from one package to another during large codebase refactorings. 271 | We tried to solve it last year with general aliases, 272 | which didn’t work for at least two reasons: we didn’t explain the change well enough, 273 | and we didn’t deliver it on time, so it wasn’t ready for Go 1.8. 274 | Learning from that experience, 275 | I gave a talk 276 | and wrote an article 277 | about the underlying problem, 278 | and that started a productive discussion 279 | on the Go issue tracker about the solution space. 280 | It looks like more limited type aliases 281 | are the right next step. 282 | I want to make sure those land smoothly in Go 1.9. #18130.

283 | 284 |

Package management

285 | 286 |

I designed the Go support for downloading published packages 287 | (“goinstall”, which became “go get”) in February 2010. 288 | A lot has happened since then. 289 | In particular, other language ecosystems have really raised the bar 290 | for what people expect from package management, 291 | and the open source world has mostly agreed on 292 | semantic versioning, which provides a useful base 293 | for inferring version compatibility. 294 | Go needs to do better here, and a group of contributors have been 295 | working on a solution. 296 | I want to make sure these ideas are integrated well 297 | into the standard Go toolchain and to make package management 298 | a reason that people love Go.

299 | 300 |

Build improvements

301 | 302 |

There are a handful of shortcomings in the design of 303 | the go command’s build system that are overdue to be fixed. 304 | Here are three representative examples that I intend to 305 | address with a bit of a redesign of the internals of the go command.

306 | 307 |

Builds can be too slow, 308 | because the go command doesn’t cache build results as aggressively as it should. 309 | Many people don’t realize that go install saves its work while go build does not, 310 | and then they run repeated go build commands that are slow 311 | because the later builds do more work than they should need to. 312 | The same for repeated go test without go test -i when dependencies are modified. 313 | All builds should be as incremental as possible. 314 | #4719.

315 | 316 |

Test results should be cached too: 317 | if none of the inputs to a test have changed, 318 | then usually there is no need to rerun the test. 319 | This will make it very cheap to run “all tests” when little or nothing has changed. 320 | #11193.

321 | 322 |

Work outside GOPATH should be supported nearly as well 323 | as work inside GOPATH. 324 | In particular, it should be possible to git clone a repo, 325 | cd into it, and run go commands and have them work fine. 326 | Package management only makes that more important: 327 | you’ll need to be able to work on different versions of a package (say, v1 and v2) 328 | without having entirely separate GOPATHs for them. 329 | #17271.

330 | 331 |

Code corpus

332 | 333 |

I think it helped to have concrete examples from real projects 334 | in the talk and article I prepared about codebase refactoring (see above). 335 | We’ve also defined that additions to vet 336 | must target problems that happen frequently in real programs. 337 | I’d like to see that kind of analysis of actual practice—examining 338 | the effects on and possible improvements to real programs—become a 339 | standard way we discuss and evaluate changes to Go.

340 | 341 |

Right now there’s not an agreed-upon representative corpus of code to use for 342 | those analyses: everyone must first create their own, which is too much work. 343 | I’d like to put together a single, self-contained Git repo people can check out that 344 | contains our official baseline corpus for those analyses. 345 | A possible starting point could be the top 100 Go language repos 346 | on GitHub by stars or forks or both.

347 | 348 |

Automatic vet

349 | 350 |

The Go distribution ships with this powerful tool, 351 | go vet, 352 | that points out correctness bugs. 353 | We have a high bar for checks, so that when vet speaks, you should listen. 354 | But everyone has to remember to run it. 355 | It would be better if you didn’t have to remember. 356 | In particular, I think we could probably run vet 357 | in parallel with the final compile and link of the test binary 358 | during go test without slowing the compile-edit-test cycle at all. 359 | If we can do that, and if we limit the enabled vet checks to a subset 360 | that is essentially 100% accurate, 361 | we can make passing vet a precondition for running a test at all. 362 | Then developers don’t need to remember to run go vet. 363 | They run go test, 364 | and once in a while vet speaks up with something important 365 | and avoids a debugging session. 366 | #18084, 367 | #18085.

368 | 369 |

Errors & best practices

370 | 371 |

Part of the intended contract for error reporting in Go is that functions 372 | include relevant available context, including the operation being attempted 373 | (such as the function name and its arguments). 374 | For example, this program:

375 | 376 |
err := os.Remove("/tmp/nonexist")
377 | fmt.Println(err)
378 | 
379 | 380 |

prints this output:

381 | 382 |
remove /tmp/nonexist: no such file or directory
383 | 
384 | 385 |

Not enough Go code adds context like os.Remove does. Too much code does only

386 | 387 |
if err != nil {
388 |     return err
389 | }
390 | 
391 | 392 |

all the way up the call stack, 393 | discarding useful context that should be reported 394 | (like remove /tmp/nonexist: above). 395 | I would like to try to understand whether our expectations 396 | for including context are wrong, or if there is something 397 | we can do to make it easier to write code that returns better errors.

398 | 399 |

There are also various discussions in the community about 400 | agreed-upon interfaces for stripping error context. 401 | I would like to try to understand when that makes sense and 402 | whether we should adopt an official recommendation.

403 | 404 |

Context & best practices

405 | 406 |

We added the new context package 407 | in Go 1.7 for holding request-scoped information like 408 | timeouts, cancellation state, and credentials. 409 | An individual context is immutable (like an individual string or int): 410 | it is only possible to derive a new, updated context and 411 | pass that context explicitly further down the call stack or 412 | (less commonly) back up to the caller. 413 | The context is now carried through APIs such as 414 | database/sql 415 | and 416 | net/http, 417 | mainly so that those can stop processing a request when the caller 418 | is no longer interested in the result. 419 | Timeout information is appropriate to carry in a context, 420 | but—to use a real example we removed—database options 421 | are not, because they are unlikely to apply equally well to all possible 422 | database operations carried out during a request. 423 | What about the current clock source, or logging sink? 424 | Is either of those appropriate to store in a context? 425 | I would like to try to understand and characterize the 426 | criteria for what is and is not an appropriate use of context.

427 | 428 |

Memory model

429 | 430 |

Go’s memory model is intentionally low-key, 431 | making few promises to users, compared to other languages. 432 | In fact it starts by discouraging people from reading the rest of the document. 433 | At the same time, it demands more of the compiler than other languages: 434 | in particular, a race on an integer value is not sufficient license 435 | for your program to misbehave in arbitrary ways. 436 | But there are some complete gaps, in particular no mention of 437 | the sync/atomic package. 438 | I think the core compiler and runtime developers all agree 439 | that the behavior of those atomics should be roughly the same as 440 | C++ seqcst atomics or Java volatiles, 441 | but we still need to write that down carefully in the memory model, 442 | and probably also in a long blog post. 443 | #5045, 444 | #7948, 445 | #9442.

446 | 447 |

Immutability

448 | 449 |

The race detector 450 | is one of Go’s most loved features. 451 | But not having races would be even better. 452 | I would love it if there were some reasonable way to integrate 453 | reference immutability into Go, 454 | so that programmers can make clear, checked assertions about what can and cannot 455 | be written and thereby eliminate certain races at compile time. 456 | Go already has one immutable type, string; it would 457 | be nice to retroactively define that 458 | string is a named type (or type alias) for immutable []byte. 459 | I don’t think that will happen this year, 460 | but I’d like to understand the solution space better. 461 | Javari, Midori, Pony, and Rust have all staked out interesting points 462 | in the solution space, and there are plenty of research papers 463 | beyond those.

464 | 465 |

In the long-term, if we could statically eliminate the possibility of races, 466 | that would eliminate the need for most of the memory model. 467 | That may well be an impossible dream, 468 | but again I’d like to understand the solution space better.

469 | 470 |

Generics

471 | 472 |

Nothing sparks more heated arguments 473 | among Go and non-Go developers than the question of whether Go should 474 | have support for generics (or how many years ago that should have happened). 475 | I don’t believe the Go team has ever said “Go does not need generics.” 476 | What we have said is that there are higher-priority issues facing Go. 477 | For example, I believe that better support for package management 478 | would have a much larger immediate positive impact on most Go developers 479 | than adding generics. 480 | But we do certainly understand that for a certain subset of Go use cases, 481 | the lack of parametric polymorphism is a significant hindrance.

482 | 483 |

Personally, I would like to be able to write general channel-processing 484 | functions like:

485 | 486 |
// Join makes all messages received on the input channels
487 | // available for receiving from the returned channel.
488 | func Join(inputs ...<-chan T) <-chan T
489 | 
490 | // Dup duplicates messages received on c to both c1 and c2.
491 | func Dup(c <-chan T) (c1, c2 <-chan T)
492 | 
493 | 494 |

I would also like to be able to write 495 | Go support for high-level data processing abstractions, 496 | analogous to 497 | FlumeJava or 498 | C#’s LINQ, 499 | in a way that catches type errors at compile time instead of at run time. 500 | There are also any number of data structures or generic algorithms 501 | that might be written, 502 | but I personally find these broader applications more compelling.

503 | 504 |

We’ve struggled off and on 505 | for years 506 | to find the right way to add generics to Go. 507 | At least a few of the past proposals got hung up on trying to design 508 | something that provided both general parametric polymorphism 509 | (like chan T) and also a unification of string and []byte. 510 | If the latter is handled by parameterization over immutability, 511 | as described in the previous section, then maybe that simplifies 512 | the demands on a design for generics.

513 | 514 |

When I first started thinking about generics for Go in 2008, 515 | the main examples to learn from were C#, Java, Haskell, and ML. 516 | None of the approaches in those languages seemed like a 517 | perfect fit for Go. 518 | Today, there are newer attempts to learn from as well, 519 | including Dart, Midori, Rust, and Swift.

520 | 521 |

It’s been a few years since we ventured out and explored the design space. 522 | It is probably time to look around again, 523 | especially in light of the insight about mutability and 524 | the additional examples set by newer languages. 525 | I don’t think generics will happen this year, 526 | but I’d like to be able to say I understand the solution space better.

527 | 528 |
529 | 530 | 531 |
532 | 544 | 545 |
546 | 547 | 548 | 549 | 550 | 554 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | --------------------------------------------------------------------------------