├── tests
    ├── py.test.ini
    ├── __init__.py
    ├── test_install.py
    ├── fixtures
    │   ├── activity_endpoints
    │   │   ├── https%3A%2F%2Fwww.linkedin.com%2Fcountserv%2Fcount%2Fshare%3Furl%3Dhttps%3A%2F%2Ftheintercept.com%2F2016%2F11%2F26%2Flaura-ingraham-lifezette%2F%2F%26format%3Djson.json
    │   │   ├── https%3A%2F%2Fgraph.facebook.com%2F%3Fid%3Dhttps%3A%2F%2Ftheintercept.com%2F2016%2F11%2F26%2Flaura-ingraham-lifezette%2F.json
    │   │   └── https%3A%2F%2Fbuttons.reddit.com%2Fbutton_info.json%3Furl%3Dhttps%3A%2F%2Ftheintercept.com%2F2016%2F11%2F26%2Flaura-ingraham-lifezette%2F.json
    │   ├── invalid
    │   │   └── invalid.html
    │   ├── netzpolitik.org
    │   │   └── index.html
    │   └── businessinsider.com
    │   │   └── dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4.html
    ├── test_extract.py
    ├── test_pos.py
    ├── test_domain.py
    ├── test_social.py
    ├── test_module.py
    └── test_html.py
├── metadoc
    ├── extract
    │   ├── __init__.py
    │   ├── ner.py
    │   ├── extractor.py
    │   ├── html.py
    │   └── pos.py
    ├── social
    │   ├── __init__.py
    │   ├── providers.py
    │   └── activity.py
    ├── domain
    │   ├── __init__.py
    │   ├── check.py
    │   ├── lookup.py
    │   ├── domaintools.py
    │   └── blacklists.py
    ├── install.py
    └── __init__.py
├── requirements-dev.txt
├── MANIFEST.in
├── .gitignore
├── requirements.txt
├── .travis.yml
├── LICENSE.md
├── serve.py
├── setup.py
└── README.md


/tests/py.test.ini:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/metadoc/extract/__init__.py:
--------------------------------------------------------------------------------
1 | from .extractor import Extractor


--------------------------------------------------------------------------------
/metadoc/social/__init__.py:
--------------------------------------------------------------------------------
1 | from .activity import ActivityCount


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | asynctest==0.9.0
3 | pytest==3.0.5
4 | pytest-cov==2.4.0
5 | 
6 | 


--------------------------------------------------------------------------------
/metadoc/domain/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | 
4 | from .domaintools import Domaintools
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include requirements-dev.txt
3 | include README.md
4 | recursive-include metadoc/extract/data *
5 | #global-exclude *.zip
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .cache
 3 | .coverage
 4 | **/__pycache__
 5 | **.pickle
 6 | *.egg
 7 | *.egg-info
 8 | *-sdist
 9 | dist
10 | venv
11 | htmlcov
12 | *.swp
13 | venv36
14 | metadoc/extract/data/*
15 | .pytest_cache


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp==1.1.5
 2 | bottle==0.12.10
 3 | python-dateutil==2.6.1
 4 | jmespath==0.9.0
 5 | langdetect==1.0.7
 6 | goose3==3.0.9
 7 | nltk==3.2.1
 8 | numpy==1.13.3
 9 | requests==2.18.4
10 | tldextract==2.0.2
11 | whois==0.7
12 | 


--------------------------------------------------------------------------------
/tests/test_install.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import asynctest
 3 | from metadoc.install import install_nltk_sets
 4 | 
 5 | class MetadocInstallTest(asynctest.TestCase):
 6 |   def setUp(self):
 7 |     return
 8 | 
 9 |   @asynctest.ignore_loop
10 |   def test_install(self):
11 |     install_nltk_sets()
12 | 


--------------------------------------------------------------------------------
/tests/fixtures/activity_endpoints/https%3A%2F%2Fwww.linkedin.com%2Fcountserv%2Fcount%2Fshare%3Furl%3Dhttps%3A%2F%2Ftheintercept.com%2F2016%2F11%2F26%2Flaura-ingraham-lifezette%2F%2F%26format%3Djson.json:
--------------------------------------------------------------------------------
1 | {"count":76,"fCnt":"76","fCntPlusOne":"77","url":"https:\/\/theintercept.com\/2016\/11\/26\/laura-ingraham-lifezette\/\/"}


--------------------------------------------------------------------------------
/metadoc/domain/check.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import itertools
 5 | from .blacklists import blacklists
 6 | 
 7 | def check_credibility(url):
 8 |   plain_lists = [l for l in list(blacklists.values())]
 9 |   consolidated_list = list(itertools.chain.from_iterable(plain_lists))
10 |   confidence = consolidated_list.count(url) / len(blacklists)
11 |   unique_set = set(consolidated_list)
12 |   
13 |   return {
14 |     "is_blacklisted": url in consolidated_list,
15 |     "fake_confidence": "{0:.2f}".format(confidence)
16 |   }


--------------------------------------------------------------------------------
/metadoc/domain/lookup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import urllib
 5 | import whois
 6 | import datetime
 7 | 
 8 | def whois_date_registered(domain):
 9 |   try:
10 |     query = whois.query(domain) # silently fails in corporate env, vocally fails behind proxy
11 |   except Exception as e:
12 |     query = None
13 |     pass
14 | 
15 |   # if query.creation_date == "before aug-1996": query.creation_date = datetime.datetime(1996) # .co.uk edge case
16 |   # elif type(query.creation_date) is not "date": query = None
17 |   return query.creation_date if query else None


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 | before_install:
 5 |   - sudo apt-get -qq update
 6 |   - sudo apt-get install -y python3 libxml2-dev libxslt1-dev libtiff-dev libjpeg-dev webp whois
 7 | # command to install dependencies
 8 | install:
 9 |   - pip3 install -r requirements-dev.txt
10 |   - pip3 install codecov coveralls
11 | before_script:
12 |   - python -m nltk.downloader brown punkt maxent_treebank_pos_tagger wordnet stopwords averaged_perceptron_tagger words maxent_ne_chunker
13 | # command to run tests
14 | script:
15 |   - py.test --cov=metadoc -v tests
16 | after_success:
17 |   - coveralls
18 | notifications:
19 |   email: false
20 | 


--------------------------------------------------------------------------------
/tests/fixtures/invalid/invalid.html:
--------------------------------------------------------------------------------
 1 | CTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | 
 4 |   <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
 5 |         <head>
 6 |                 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 7 |                 <meta property="article:published_time" content="18-2-15 54" />
 8 |                 <meta property="article:modified" content="18-2-15 5" />
 9 |                 <script type="application/ld+json">
10 |     //<![CDATA[
11 | <test></test>
12 |      //]]>
13 |                 </script>
14 |        </head>
15 |        <body>
16 | 
17 |       </body>
18 |   </html>
19 | 
20 | 


--------------------------------------------------------------------------------
/tests/fixtures/activity_endpoints/https%3A%2F%2Fgraph.facebook.com%2F%3Fid%3Dhttps%3A%2F%2Ftheintercept.com%2F2016%2F11%2F26%2Flaura-ingraham-lifezette%2F.json:
--------------------------------------------------------------------------------
 1 | {
 2 |    "og_object": {
 3 |       "id": "1537693146247498",
 4 |       "description": "Macedonian teens and Russian propagandists have been blamed for the scourge of \"fake news,\" but much originated from shady sites tied to Donald Trump allies.",
 5 |       "title": "Some Fake News Publishers Just Happen to Be Donald Trump\u2019s Cronies",
 6 |       "type": "article",
 7 |       "updated_time": "2016-12-03T15:02:34+0000"
 8 |    },
 9 |    "share": {
10 |       "comment_count": 3,
11 |       "share_count": 13768
12 |    },
13 |    "id": "https://theintercept.com/2016/11/26/laura-ingraham-lifezette/"
14 | }


--------------------------------------------------------------------------------
/tests/test_extract.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import unittest
 3 | from unittest.mock import patch
 4 | from metadoc.extract import Extractor
 5 | from metadoc.extract.pos import do_train
 6 | 
 7 | class MetadocExtractorTest(unittest.TestCase):
 8 |   def setUp(self):
 9 |     article_path = "tests/fixtures/theintercept.com/laura-ingraham-lifezette.html"
10 |     with open(article_path, 'r') as article:
11 |       self.article_html=article.read()
12 | 
13 |     self.extractor = Extractor(self.article_html)
14 | 
15 |   def test_init(self):
16 |     assert self.extractor.html == self.article_html
17 | 
18 |   def test_without_ft(self):
19 |     self.extractor.fulltext = ""
20 |     self.extractor.detect_language()
21 |     assert self.extractor
22 | 
23 |   def test_get_all_local(self):
24 |     do_train()
25 |     self.extractor.get_all()
26 |     assert self.extractor.contenthash == "2b374ca41d42bd582e500e6cdbc936ef"
27 |     assert self.extractor.title == "Some Fake News Publishers Just Happen to Be Donald Trump’s Cronies"
28 | 


--------------------------------------------------------------------------------
/metadoc/social/providers.py:
--------------------------------------------------------------------------------
 1 | """While we're only interested in share, like, up counts for now,
 2 | there's a lot of interesting metadata in e.g. reddit responses like
 3 | user_reports, report_reasons, num_reports, that might be useful
 4 | in building certain heuristics.
 5 | """
 6 | 
 7 | providers = [
 8 |   {
 9 |     "provider": "facebook",
10 |     "endpoint": "https://graph.facebook.com/?id={0}",
11 |     "metrics": [{
12 |       "label": "sharecount",
13 |       "path": "share.share_count"
14 |     }]
15 |   },
16 |   {
17 |     "provider": "linkedin",
18 |     "endpoint": "https://www.linkedin.com/countserv/count/share?url={0}/&format=json",
19 |     "metrics": [{
20 |       "label": "sharecount",
21 |       "path": "count"
22 |     }]
23 |   },
24 |   {
25 |     "provider": "reddit",
26 |     "endpoint": "https://buttons.reddit.com/button_info.json?url={0}",
27 |     "metrics": [{
28 |       "label": "upvotes",
29 |       "path": "data.children[0].data.ups"
30 |     },
31 |     {
32 |       "label": "num_reports",
33 |       "path": "data.children[0].data.num_reports"
34 |     }]
35 |   }
36 | ]


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016 Paul Solbach
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a
 4 | copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included
12 | in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/metadoc/install.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | import glob
 4 | import nltk
 5 | import os
 6 | import time
 7 | 
 8 | def remove_zips(data_dir):
 9 |      glob_path = os.path.join(data_dir, '**/*.zip')
10 |      for filename in glob.iglob(glob_path, recursive=True):
11 |          print("Removing {}...".format(filename))
12 |          os.remove(filename)
13 | 
14 | def install_nltk_sets():
15 |     DATA_DIR = os.path.join(os.path.dirname(__file__), "extract/data")
16 |     REQUIRED_CORPORA = [
17 |         'brown', # Required for FastNPExtractor
18 |         'punkt', # Required for WordTokenizer
19 |         'wordnet', # Required for lemmatization and Wordnet
20 |         'maxent_ne_chunker',
21 |         'stopwords',
22 |         'words'
23 |     ]
24 | 
25 |     for each in REQUIRED_CORPORA:
26 |         print(('[+] Downloading corpus:  "{0}"'.format(each)))
27 |         nltk.download(each, download_dir=DATA_DIR)
28 | 
29 |     from metadoc.extract.pos import do_train
30 |     print('[+] Training tagger now.')
31 |     do_train()
32 |     remove_zips(DATA_DIR)
33 |     return
34 | 
35 | if __name__ == "__main__":
36 |     install_nltk_sets()
37 | 


--------------------------------------------------------------------------------
/tests/test_pos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import asyncio
 5 | import asynctest
 6 | import pytest
 7 | 
 8 | from asynctest.mock import patch
 9 | from metadoc.extract.pos import do_train, AveragedPerceptronTagger
10 | 
11 | class MetadocPerceptronTest(asynctest.TestCase):
12 |   def setUp(self):
13 |     return
14 | 
15 |   @asynctest.ignore_loop
16 |   def test_init(self):
17 |     self.perceptron_tagger = AveragedPerceptronTagger(autoload=True)
18 |     tags = self.perceptron_tagger.tag("Rami Eid is studying at Stony Brook University in NY")
19 |     assert len(tags) == 10
20 | 
21 |   @asynctest.ignore_loop
22 |   def test_string_ends_with_nnp(self):
23 |     self.perceptron_tagger = AveragedPerceptronTagger(autoload=True)
24 |     test_sentence = "The extraordinary phenomenon of fake news spread by Facebook and other \
25 |       social media during the 2016 presidential election has been largely portrayed as a lucky break for Donald Trump"
26 | 
27 |     tags = self.perceptron_tagger.tag(test_sentence)
28 |     entities = self.perceptron_tagger.named_entities(tags)
29 |     
30 |     assert tags[len(tags)-1][1] == "NNP"
31 |     assert "Donald Trump" in entities
32 | 
33 |   @asynctest.ignore_loop
34 |   @patch('metadoc.extract.pos.pickle.load')
35 |   def test_no_pickle_found(self, _mocked_func):
36 |     _mocked_func.side_effect = IOError('foo')
37 |     with pytest.raises(IOError):
38 |       AveragedPerceptronTagger(autoload=True)
39 | 


--------------------------------------------------------------------------------
/tests/fixtures/activity_endpoints/https%3A%2F%2Fbuttons.reddit.com%2Fbutton_info.json%3Furl%3Dhttps%3A%2F%2Ftheintercept.com%2F2016%2F11%2F26%2Flaura-ingraham-lifezette%2F.json:
--------------------------------------------------------------------------------
1 | {"kind": "Listing", "data": {"modhash": "nyjj03f66efdc11f1eac41664cff6d7bcff5571ad854a1b681", "children": [{"kind": "t3", "data": {"contest_mode": false, "banned_by": null, "domain": "theintercept.com", "subreddit": "hillaryclinton", "selftext_html": null, "selftext": "", "likes": null, "suggested_sort": null, "user_reports": [], "secure_media": null, "saved": false, "id": "5gjkag", "gilded": 0, "secure_media_embed": {}, "clicked": false, "report_reasons": null, "author": "NYLaw", "media": null, "name": "t3_5gjkag", "score": 1, "approved_by": null, "over_18": false, "removal_reason": null, "hidden": false, "thumbnail": "default", "subreddit_id": "t5_2u1c9", "edited": false, "link_flair_css_class": null, "author_flair_css_class": null, "downs": 0, "mod_reports": [], "archived": false, "media_embed": {}, "is_self": false, "hide_score": false, "spoiler": false, "permalink": "/r/hillaryclinton/comments/5gjkag/some_fake_news_publishers_just_happen_to_be/", "locked": false, "stickied": false, "created": 1480935974.0, "url": "https://theintercept.com/2016/11/26/laura-ingraham-lifezette/", "author_flair_text": null, "quarantine": false, "title": "Some Fake News Publishers Just Happen to be Donald Trump's Cronies", "created_utc": 1480907174.0, "link_flair_text": null, "distinguished": null, "num_comments": 0, "visited": false, "num_reports": null, "ups": 1}}], "after": null, "before": null}}


--------------------------------------------------------------------------------
/metadoc/domain/domaintools.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import math
 3 | import time
 4 | import logging
 5 | import tldextract
 6 | from datetime import datetime, timedelta
 7 | from .lookup import whois_date_registered
 8 | from .check import check_credibility
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | class Domaintools(object):
13 |   """Gather various metadata like whois informaion
14 |   and blacklist status about any given hostname
15 |   """
16 |   def __init__(self, url=None):
17 |     self.url = url or None
18 |     self.get_domain(url)
19 | 
20 |   def get_domain(self, url):
21 |     no_fetch_extract = tldextract.TLDExtract(suffix_list_urls=None)
22 |     tld = no_fetch_extract(url)
23 |     self.domain = "{}.{}".format(tld.domain, tld.suffix)
24 | 
25 |   def get_date_registered(self):
26 |     self.date_registered = whois_date_registered(self.domain)
27 | 
28 |   def check_credibility(self):
29 |     self.credibility = check_credibility(self.domain)
30 | 
31 |   def get_all(self):
32 |     start_time = time.time()
33 |     if not self.domain: return
34 |     self.get_date_registered()
35 |     self.check_credibility()
36 | 
37 |     if self.date_registered:
38 |       self.recalculate_fake_confidence()
39 |       self.date_registered_iso = self.date_registered.isoformat()
40 | 
41 |     logger.debug("--- domain module %s seconds ---" % (time.time() - start_time))
42 | 
43 |   def recalculate_fake_confidence(self):
44 |     # Adds .2 to fake_confidence if website was registered delta 1y
45 |     one_year_ago = datetime.now() - timedelta(days=1*365)
46 |     if self.date_registered < one_year_ago: return
47 | 
48 |     confidence = self.credibility.get("fake_confidence", 0)
49 |     self.credibility["fake_confidence"] = float(confidence) + .2
50 | 


--------------------------------------------------------------------------------
/serve.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | __title__ = 'Metadoc - Postmodern news article metadata service'
 5 | __copyright__ = 'Copyright 2016, Paul Solbach'
 6 | __author__ = 'Paul Solbach'
 7 | __license__ = 'MIT'
 8 | 
 9 | import concurrent
10 | import json
11 | import bottle
12 | from bottle import response, request, get, route, run, abort, error
13 | from metadoc import Metadoc
14 | 
15 | bottle.BaseRequest.MEMFILE_MAX = 1024 * 1024 # up max POST payload size to 1MB
16 | 
17 | @error(404)
18 | def error404(error):
19 |   return json.dumps({'code': 404,'message': 'url param is missing.'})
20 | 
21 | @get('/social')
22 | def social_article():
23 |   """GET data url required"""
24 |   response.content_type = 'application/json'
25 |   url = request.query.getone("url")
26 |   if not url:
27 |     abort(404)
28 | 
29 |   metadoc = Metadoc(url=url)
30 |   payload = metadoc.query(mode="social", fmt="social")
31 | 
32 |   return json.dumps(payload)
33 | 
34 | @get('/extract')
35 | def extract_article():
36 |   """GET data url required"""
37 |   response.content_type = 'application/json'
38 |   url = request.query.getone("url")
39 |   if not url:
40 |     abort(404)
41 | 
42 |   metadoc = Metadoc(url=url)
43 |   metadoc._prepare()
44 |   metadoc._query_domain()
45 |   metadoc._query_extract()
46 | 
47 |   payload = metadoc._render() # Preserve order
48 |   return json.dumps(payload)
49 | 
50 | @get('/full')
51 | def full_article():
52 |   """GET data url required"""
53 |   response.content_type = 'application/json'
54 |   url = request.query.getone("url")
55 |   if not url:
56 |     abort(404)
57 | 
58 |   metadoc = Metadoc(url=url)
59 |   payload = metadoc.query()
60 | 
61 |   return json.dumps(payload)
62 | 
63 | 
64 | run(host='localhost', reloader=True, port=6060)
65 | 


--------------------------------------------------------------------------------
/metadoc/social/activity.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import asyncio
 3 | import jmespath
 4 | import json
 5 | import logging
 6 | import requests
 7 | import signal
 8 | import time
 9 | 
10 | from aiohttp import ClientSession
11 | from .providers import providers
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | class ActivityCount(object):
16 |     """Gather activity/share stats from social APIs"""
17 | 
18 |     def __init__(self, url=None):
19 |         self.url = url or None
20 |         self.responses = []
21 | 
22 |     def get_all(self, loop):
23 |         activity_tasks = []
24 |         for provider in providers:
25 |             url = provider["endpoint"].format(self.url)
26 |             task = asyncio.ensure_future(self.collect_sharecount(url, provider))
27 |             activity_tasks.append(task)
28 | 
29 |         return asyncio.gather(*activity_tasks)
30 | 
31 |     async def get_json(self, url):
32 |         async with ClientSession() as session:
33 |             async with session.get(url) as response:
34 |                 return await response.read()
35 | 
36 |     async def collect_sharecount(self, url, provider):
37 |         try:
38 |             response = await self.get_json(url)
39 |             j = json.loads(response)
40 | 
41 |             data = {
42 |                 "provider": provider["provider"],
43 |                 "metrics": []
44 |             }
45 | 
46 |             for m in provider["metrics"]:
47 |                 data["metrics"].append({
48 |                 "count": jmespath.search(m["path"], j),
49 |                 "label": m["label"]
50 |                 })
51 |             self.responses.append(data)
52 |         except Exception as exc:
53 |             logger.error("Collecting sharecount failed!")
54 |             logger.exception(exc)
55 | 
56 | 


--------------------------------------------------------------------------------
/tests/test_domain.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import datetime
 4 | 
 5 | import unittest
 6 | from unittest.mock import patch
 7 | from metadoc.domain import Domaintools
 8 | 
 9 | class MetadocDomaintoolsTest(unittest.TestCase):
10 | 
11 |   def setUp(self):
12 |     article_path = "tests/fixtures/theintercept.com/laura-ingraham-lifezette.html"
13 |     self.title = "Some Fake News Publishers Just Happen to Be Donald Trump’s Cronies"
14 |     self.url = "https://theintercept.com/2016/11/26/laura-ingraham-lifezette/"
15 |     self.date_registered = datetime.datetime(2008, 10, 1, 0, 0)
16 |     self.domaintools = Domaintools(url=self.url)
17 | 
18 |     with open(article_path, 'r') as article:
19 |       self.article_html=article.read()
20 | 
21 |   def test_init(self):
22 |     assert self.domaintools.url == self.url
23 |     assert self.domaintools.domain == "theintercept.com"
24 | 
25 |   @patch('metadoc.domain.domaintools.whois_date_registered')
26 |   def test_get_all_local(self, _mocked_func):
27 |     _mocked_func.return_value = self.date_registered
28 |     self.domaintools.get_all()#self.loop)
29 |     assert self.domaintools.date_registered == self.date_registered
30 | 
31 |     credibility_resp = {
32 |       "is_blacklisted": False,
33 |       "fake_confidence": "0.00"
34 |     }
35 | 
36 |     assert self.domaintools.credibility == credibility_resp
37 |     assert self.domaintools.date_registered == self.date_registered
38 | 
39 |   def test_get_all_remote(self):
40 |     self.domaintools.get_all()
41 |     assert self.domaintools.date_registered is not self.date_registered
42 | 
43 |   def test_new_domain(self):
44 |     today = datetime.datetime.now()
45 |     self.domaintools.date_registered = today
46 |     self.domaintools.check_credibility()
47 |     self.domaintools.recalculate_fake_confidence()
48 | 
49 |     assert self.domaintools.credibility["fake_confidence"] == 0.2
50 | 
51 | 


--------------------------------------------------------------------------------
/tests/test_social.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import asyncio
 5 | import asynctest
 6 | import datetime
 7 | import json
 8 | import jmespath
 9 | import urllib.parse
10 | 
11 | from asynctest.mock import patch
12 | from metadoc.social import ActivityCount
13 | from metadoc.social.providers import providers
14 | 
15 | class MetadocActivityCountTest(asynctest.TestCase):
16 |     def setUp(self):
17 |         self.url = "https://theintercept.com/2016/11/26/laura-ingraham-lifezette/"
18 |         self.activity = ActivityCount(url=self.url)
19 | 
20 |     @asynctest.ignore_loop
21 |     def test_init(self):
22 |         assert self.activity.url == self.url
23 | 
24 |     def mocked_get_json(self, url):
25 |         escaped_url = urllib.parse.quote(url, safe='')
26 |         with open("tests/fixtures/activity_endpoints/{0}.json".format(escaped_url), 'r') as file:
27 |           file_content=file.read()
28 | 
29 |         json_response = json.loads(file_content)
30 |         provider = urllib.parse.urlparse(url).netloc.split(".")[1]
31 |         setattr(self, provider, json_response)
32 | 
33 |         return file_content
34 | 
35 |     @patch.object(ActivityCount, 'get_json')
36 |     async def test_get_all_local(self, _mocked_func):
37 |         _mocked_func.side_effect = self.mocked_get_json
38 | 
39 |         for metrics in self.activity.responses:
40 |           provider_data = [p for p in providers if p["provider"] == metrics["provider"]]
41 |           test_data = getattr(self, metrics["provider"], None)
42 |           test_metric_count = jmespath.search(provider_data[0]["metrics"][0]["path"], test_data)
43 |           returned_metric_count = metrics["metrics"][0]["count"]
44 |           assert test_metric_count == returned_metric_count
45 | 
46 |     async def test_get_all_remote(self):
47 |         await self.activity.get_all(self.loop)
48 |         assert len(self.activity.responses) > 0
49 | 
50 |     async def test_invalid_url(self):
51 |         activity = ActivityCount(url="nourlatall")
52 |         res = await activity.collect_sharecount(url="nourlatall", provider="foo")
53 |         assert res == None
54 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | import os.path
 4 | import sys
 5 | import re
 6 | from subprocess import call
 7 | from setuptools import setup, find_packages
 8 | from setuptools.command.install import install as _install
 9 | from setuptools.command.sdist import sdist as _sdist
10 | from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
11 | 
12 | with open('./README.md') as f:
13 |     long_description = f.read()
14 | 
15 | requirements_txt = open("./requirements.txt").read()
16 | main_py = open('metadoc/__init__.py').read()
17 | metadata = dict(re.findall("__([a-z]+)__ = '([^']+)'", main_py))
18 | 
19 | 
20 | def _post_install():
21 |         from metadoc.install import install_nltk_sets
22 |         install_nltk_sets()
23 | 
24 | class DevInstall(_install):
25 |     def run(self):
26 |         call(["pip install -r ./requirements-dev.txt --no-clean"], shell=True)
27 |         self.execute(_post_install, (), msg="Installing nltk sets!")
28 |         _install.run(self)
29 | 
30 | class CustomInstall(_sdist):
31 |     def run(self):
32 |         call(["pip install -r ./requirements.txt --no-clean"], shell=True)
33 |         self.execute(_post_install, (), msg="Installing nltk sets!")
34 |         _sdist.run(self)
35 | 
36 | class BdistEggInstall(_bdist_wheel):
37 |    def run(self):
38 |         call(["pip install -r ./requirements.txt --no-clean"], shell=True)
39 |         self.execute(_post_install, (), msg="Installing nltk sets!")
40 |         _bdist_wheel.run(self)
41 | 
42 | setup(
43 |     name='metadoc',
44 |     version=metadata["version"],
45 |     description="Post-truth era news article metadata service.",
46 |     long_description=long_description,
47 |     long_description_content_type='text/markdown',
48 |     classifiers=[ # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
49 |         "Programming Language :: Python :: 3.5",
50 |         "Topic :: Internet :: WWW/HTTP",
51 |         "Development Status :: 3 - Alpha",
52 |         "License :: OSI Approved :: MIT License",
53 |         "Intended Audience :: Developers",
54 |         "Operating System :: POSIX :: Linux",
55 |         "Environment :: Web Environment",
56 |     ],
57 |     keywords=["scraping", "metadata", "news article"],
58 |     author=metadata["author"],
59 |     author_email='p@psolbach.com',
60 |     url='https://github.com/praise-internet/metadoc',
61 |     license=metadata["license"],
62 |     cmdclass={'sdist': CustomInstall, 'develop': DevInstall},
63 |     packages=find_packages(exclude=['tests']),
64 |     install_requires=requirements_txt.strip().split("\n"),
65 |     include_package_data=True,
66 |     zip_safe=False
67 | )
68 | 


--------------------------------------------------------------------------------
/tests/test_module.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import asynctest
 3 | import pytest
 4 | from metadoc import Metadoc
 5 | 
 6 | class MetadocModuleTest(asynctest.TestCase):
 7 |   def setUp(self):
 8 |     self.url = "https://theintercept.com/2016/11/26/laura-ingraham-lifezette/"
 9 |     article_path = "tests/fixtures/theintercept.com/laura-ingraham-lifezette.html"
10 |     with open(article_path, 'r') as article:
11 |       self.article_html=article.read()
12 | 
13 |     self.metadoc = Metadoc(url=self.url, html=self.article_html)
14 | 
15 |   @asynctest.ignore_loop
16 |   def test_init(self):
17 |     assert self.metadoc.url == self.url
18 |     assert self.metadoc.html == self.article_html
19 | 
20 |   @asynctest.ignore_loop
21 |   def test_query_all(self):
22 |     result = self.metadoc.query()
23 |     assert result
24 | 
25 |   @asynctest.ignore_loop
26 |   def test_extract(self):
27 |     self.metadoc.query("extract")
28 |     assert self.metadoc.extractor
29 | 
30 |   @asynctest.ignore_loop
31 |   def test_social(self):
32 |     self.metadoc.query("social")
33 |     assert self.metadoc.activity
34 | 
35 |   @asynctest.ignore_loop
36 |   def test_social_return(self):
37 |     result = self.metadoc.query("social", "social")
38 |     assert list(result.keys()) == ["url", "social", "__version__"]
39 | 
40 |   @asynctest.ignore_loop
41 |   def test_domain(self):
42 |     self.metadoc.query("domain")
43 |     assert self.metadoc.domain
44 | 
45 |   @asynctest.ignore_loop
46 |   def test_no_url_fail(self):
47 |     with pytest.raises(AttributeError):
48 |       Metadoc()
49 | 
50 |   @asynctest.ignore_loop
51 |   def test_invalid_url_fail(self):
52 |       metadoc = Metadoc(url="https://theintercept.com/404/", html=None)
53 |       result = metadoc.query()
54 |       assert result["errors"][0] ==  "Requesting article body failed with 404 status code."
55 | 
56 |   @asynctest.ignore_loop
57 |   def test_no_html(self):
58 |     metadoc = Metadoc(url=self.url)
59 |     metadoc.query()
60 | 
61 |   @asynctest.ignore_loop
62 |   def test_check_result(self):
63 |       self.metadoc._check_result({})
64 | 
65 |   @asynctest.ignore_loop
66 |   def test_invalid_charset_check(self):
67 |       s = "Von da an beginnt fÃ¤r die meisten jedoch der hektische Teil."
68 |       assert self.metadoc._check_invalid_encoding(s) == True
69 |       s = "Von da an beginnt fÃ¼r die meisten jedoch der hektische Teil."
70 |       assert self.metadoc._check_invalid_encoding(s) == True
71 |       s = "Von da an beginnt fÃ¶r die meisten jedoch der hektische Teil."
72 |       assert self.metadoc._check_invalid_encoding(s) == True
73 |       s = "Von da an beginnt fÃ¼r die meisten jedoch der hektische Teil."
74 |       assert self.metadoc._check_invalid_encoding(s) == True
75 | 
76 |       s = "DE PÃŠRA"
77 |       assert self.metadoc._check_invalid_encoding(s) == False
78 | 
79 |   @asynctest.ignore_loop
80 |   def test_invalid_t3n(self):
81 |       metadoc = Metadoc(url="https://t3n.de/news/remote-work-home-office-heimarbeit-erfahrungsbericht-1018248/", html=None)
82 |       result = metadoc.query()
83 |       assert result["title"] ==  "Remote Workers Life: „Das Homeoffice löst viele Probleme, schafft aber auch neue“"
84 | 


--------------------------------------------------------------------------------
/metadoc/extract/ner.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import imp
  5 | import sys
  6 | import os
  7 | 
  8 | if os.environ.get("LAMBDA_TASK_ROOT", False):
  9 |     # overwrite sqlite with dummy modules, for AWS Lambda
 10 |     sys.modules["sqlite"] = imp.new_module("sqlite")
 11 |     sys.modules["sqlite3.dbapi2"] = imp.new_module("sqlite.dbapi2")
 12 | import nltk
 13 | 
 14 | import difflib
 15 | import operator
 16 | import numpy
 17 | import string
 18 | import re
 19 | 
 20 | from nltk.tokenize import RegexpTokenizer
 21 | from .pos import AveragedPerceptronTagger
 22 | 
 23 | tokenizer = RegexpTokenizer(r'\w+')
 24 | 
 25 | # add path, for AWS Lambda
 26 | LOCAL_DATA_PATH = os.path.join(os.path.dirname(__file__), "data")
 27 | nltk.data.path.append(LOCAL_DATA_PATH)
 28 | 
 29 | def isPunct(word):
 30 |   pattern = r"(`|\.|#|\$|%|&|\'|\(|\)|\*|\||\+|,|-|—|/|:|;|<|=|>|\?|@|\[|\]|\^|_|`|{|}|~|”|“|’)"
 31 |   return re.search(pattern, word) is not None
 32 | 
 33 | class EntityExtractor(object):
 34 |   def __init__(self, text):
 35 |     self.perceptron_tagger = AveragedPerceptronTagger(autoload=True)
 36 |     self.stopwords = set(nltk.corpus.stopwords.words())
 37 |     self.top_fraction = 70 # consider top candidate keywords only
 38 |     self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
 39 |     self.sentences = self.sent_detector.tokenize(text)
 40 | 
 41 |   def _calculate_word_scores(self, word_list):
 42 |     """Quick and dirty, inspired by Sujit Pal's RAKE implementation.
 43 |     """
 44 |     word_freq = nltk.FreqDist()
 45 |     for word in word_list:
 46 |       word_freq[word] += 1
 47 |     
 48 |     word_scores = {k:v for k, v in word_freq.items() if v > 0}
 49 |     return word_scores
 50 | 
 51 |   # def _get_mt_median(self, word_scores):
 52 |   #   median = numpy.median([v for k, v in word_scores.items()])
 53 |   #   return {k: v for k, v in word_scores.items() if v > median}
 54 | 
 55 |   def _filter_distance(self, words):
 56 |     close_matches = []
 57 |     wordlist = set(words[:]) # deepcopy
 58 | 
 59 |     for word in words:
 60 |       if word in close_matches: continue
 61 |       matches = difflib.get_close_matches(word, wordlist, 2)
 62 |       if len(matches) > 1:
 63 |         close_matches += matches[1:]
 64 | 
 65 |     return wordlist.difference(close_matches)
 66 | 
 67 |   def _sort_and_filter(self, word_scores):
 68 |     n_words = len(word_scores)
 69 |     sorted_word_scores = sorted(word_scores.items(), key=operator.itemgetter(1), reverse=True)
 70 |     top_words = sorted_word_scores[0:int(n_words/100*self.top_fraction)]
 71 |     punct_filtered = [k[0] for k in top_words if not isPunct(k[0])]
 72 |     distance_filtered = self._filter_distance(punct_filtered)
 73 |     return list(distance_filtered)
 74 | 
 75 |   def _contains_stopword(self, ent):
 76 |     filtered = [word.lower() in self.stopwords for word in ent.split(" ")]
 77 |     return True in filtered
 78 | 
 79 |   def get_scored_entities(self):
 80 |     named_ents = []
 81 | 
 82 |     for sent in self.sentences:
 83 |       pos_tags = self.perceptron_tagger.tag(" ".join(nltk.word_tokenize(sent)))
 84 |       entities = self.perceptron_tagger.named_entities(pos_tags)
 85 |       named_ents += [ent for ent in entities if not self._contains_stopword(ent)]
 86 | 
 87 |     ent_scores = self._calculate_word_scores(named_ents)
 88 |     self.ent_scores = ent_scores
 89 |     return ent_scores
 90 | 
 91 |   def get_names(self):
 92 |     filtered_names = {k: v for k, v in self.ent_scores.items() if len(k.split(" ")) > 1}
 93 |     top_names = self._sort_and_filter(filtered_names)
 94 |     return top_names[:8]
 95 |     
 96 |   def get_keywords(self):
 97 |     filtered_keywords = {k.lower(): v for k, v in self.ent_scores.items() if len(k.split(" ")) == 1}
 98 |     top_keywords = self._sort_and_filter(filtered_keywords)
 99 |     return top_keywords[:8]
100 | 


--------------------------------------------------------------------------------
/metadoc/extract/extractor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import re
  3 | import logging
  4 | import lxml
  5 | import math
  6 | import time
  7 | import hashlib
  8 | 
  9 | from langdetect import detect
 10 | from goose3 import Goose, Configuration
 11 | 
 12 | from .ner import EntityExtractor
 13 | from .html import HtmlMeta
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | class Extractor(object):
 18 |   """Entity recognition, pullquote extraction etc.
 19 |   """
 20 |   def __init__(self, html=None, title=" ", **kwargs):
 21 |     self.html = html or None
 22 |     self.title = title or None
 23 |     self.entities = []
 24 |     self.keywords = []
 25 |     self.names = []
 26 |     self.fulltext = None
 27 |     self.language = None
 28 |     self.description = None
 29 |     self.canonical_url = None
 30 |     self.image = None
 31 |     self.published_date = None
 32 |     self.modified_date = None
 33 |     self.scraped_date = None
 34 |     self.contenthash = None
 35 |     self.reading_time = None
 36 | 
 37 |     config = Configuration()
 38 |     config.enable_image_fetching = False
 39 |     self.goose = Goose(config=config)
 40 | 
 41 |     self.tree = None
 42 | 
 43 |   def detect_language(self):
 44 |     """Langdetect is non-deterministic, so to achieve a higher probability
 45 |     we attempt detection multiple times and only report success if we get identical results.
 46 |     """
 47 |     if self.language:
 48 |         return
 49 | 
 50 |     try:
 51 |         nondet_attempts = [detect(self.fulltext) for i in range(0,2)]
 52 |         is_unique = len(set(nondet_attempts)) == 1
 53 |         self.language = nondet_attempts[0] if is_unique else False
 54 |     except:
 55 |         pass
 56 | 
 57 |   def sanitize_html(self):
 58 |     # Lxml bails out on html w/ emojis
 59 |     emoji_pattern = re.compile("["
 60 |         u"\U0001F600-\U0001F64F"  # emoticons
 61 |         u"\U0001F300-\U0001F5FF"  # symbols & pictographs
 62 |         u"\U0001F680-\U0001F6FF"  # transport & map symbols
 63 |         u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
 64 |       "]+", flags=re.UNICODE)
 65 | 
 66 |     self.html = emoji_pattern.sub(r'', self.html)
 67 | 
 68 |     # empty charset derails goose3
 69 |     self.html = self.html.replace('<meta charset="">', '<meta charset="utf-8">')
 70 | 
 71 |   def extract_text(self):
 72 |     """Parse fulltext, do keyword extraction using the newspaper lib
 73 |     => newspaper.readthedocs.io
 74 |     """
 75 |     res = self.goose.extract(url=None, raw_html=self.html.encode("utf-8"))
 76 |     self.tree = res.raw_doc
 77 |     self.fulltext = res.cleaned_text
 78 |     self.language = res.meta_lang
 79 | 
 80 |     entities = EntityExtractor(self.fulltext)
 81 |     entities.get_scored_entities() # Averaged Perceptron Tagger
 82 |     self.keywords = entities.get_keywords() # Above median?
 83 |     self.names = entities.get_names() # Filter top
 84 | 
 85 |   def extract_metadata(self):
 86 |     """Sniff for essential and additional metadata via
 87 |     either metatags and or json-ld"""
 88 |     html_meta = HtmlMeta(self.html, tree=self.tree)
 89 |     html_meta.extract()
 90 | 
 91 |     # data
 92 |     self.authors = html_meta.authors
 93 |     self.title = html_meta.title
 94 |     self.description = html_meta.description
 95 |     self.canonical_url = html_meta.canonical_url
 96 |     self.image = html_meta.image
 97 |     self.published_date = html_meta.published_date
 98 |     self.modified_date = html_meta.modified_date
 99 |     self.scraped_date = html_meta.scraped_date
100 | 
101 |   def get_contenthash(self):
102 |     """Generate md5 hash over title and body copy in order to keep track
103 |     of changes made to a text, do diffs if necessary
104 |     """
105 |     contentstring = (self.title + self.fulltext).encode("utf-8")
106 |     self.contenthash = hashlib.md5(contentstring).hexdigest()
107 |     return self.contenthash
108 | 
109 |   def get_reading_time(self):
110 |     """Calculate average reading time in seconds"""
111 |     if not self.fulltext: return None
112 |     wordcount = len(self.fulltext.split())
113 |     self.reading_time = math.floor(wordcount / 300 * 60)
114 | 
115 |   def get_all(self):
116 |     start_time = time.time()
117 |     self.sanitize_html()
118 |     self.extract_text()
119 |     self.extract_metadata()
120 |     self.detect_language()
121 |     self.get_contenthash()
122 |     self.get_reading_time()
123 |     logger.debug("--- extraction module %s seconds ---" % (time.time() - start_time))
124 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🔹 metadoc
  2 | [![Coverage Status](https://coveralls.io/repos/github/psolbach/metadoc/badge.svg?branch=master)](https://coveralls.io/github/psolbach/metadoc?branch=master)
  3 | 
  4 | Metadoc is a lightning-fast news article metadata extraction library. It does social media activity lookup, source authenticity rating, checksum creation, json-ld and metatag parsing as well as information extraction for named entities, pullquotes, fulltext and other useful things based off of arbitrary article URLs.
  5 | 
  6 | ## Example
  7 | 
  8 | You just throw it any news article URL, and Metadoc will yield.
  9 | ```python
 10 | from metadoc import Metadoc
 11 | url = "https://theintercept.com/2016/11/17/iphones-secretly-send-call-history-to-apple-security-firm-says"
 12 | metadoc = Metadoc(url=url)
 13 | res = metadoc.query()
 14 | ```
 15 | =>
 16 | ```python
 17 | {
 18 |   '__version__': '0.9.0',
 19 |   'authors': ['Kim Zetter'],
 20 |   'canonical_url': 'https://theintercept.com/2016/11/17/iphones-secretly-send-call-history-to-apple-security-firm-says/',
 21 |   'domain': {
 22 |     'credibility': {
 23 |       'fake_confidence': '0.00',
 24 |       'is_blacklisted': False
 25 |     },
 26 |     'date_registered': None,
 27 |     'favicon': 'https://logo.clearbit.com/theintercept.com?size=200',
 28 |     'name': 'theintercept.com'},
 29 |     'entities': {
 30 |       'keywords': [
 31 |         'cellebrite',
 32 |         'fbi',
 33 |         'skype',
 34 |         'intercept'
 35 |         ...
 36 |       ]
 37 |     }
 38 |   },
 39 |   'image': 'https://theintercept.imgix.net/wp-uploads/sites/1/2016/11/GettyImages-578052668-s.jpg?auto=compress%2Cformat&q=90&fit=crop&w=1200&h=800',
 40 |   'language': 'en',
 41 |   'modified_date': None,
 42 |   'published_date': '2016-11-17T11:00:36+00:00',
 43 |   'scraped_date': '2018-07-10T12:13:46+00:00',
 44 |   'social': [{
 45 |     'metrics': [{
 46 |       'count': 7340, 'label': 'sharecount'
 47 |     }],
 48 |     'provider': 'facebook'
 49 |   }],
 50 |   'text': {
 51 |     'contenthash': '940a62c70db255b4aec378529ae7a2c8',
 52 |     'fulltext': 'a guardian of user privacy this year after fighting FBI
 53 |       demands to help crack into San Bernardino shooter Syed ...',
 54 |     'reading_time': 439,
 55 |     'summary': 'Your call logs get sent to Apple’s servers whenever iCloud is on — something Apple does not disclose.'
 56 |   },
 57 |   'title': 'iPhones Secretly Send Call\xa0History to Apple, Security Firm Says',
 58 |   'url': 'https://theintercept.com/2016/11/17/iphones-secretly-send-call-history-to-apple-security-firm-says'
 59 | }
 60 | ```
 61 | 
 62 | ## Trustworthiness Check
 63 | Metadoc does a basic background check on article sources. This means a simple blacklist-lookup via `whois` data on the domain. Blacklists taken into account include the controversial [PropOrNot](http://www.propornot.com/p/the-list.html). Thus, only if a domain is found on every blacklist do we spit out a `fake_confidence` of 1. The resulting metadata should be taken with a grain of salt.
 64 | 
 65 | ## Part-of-speech tagging
 66 | For speed and simplicity, we decided against `nltk` and instead rely on the Averaged Perceptron as imagined by Matthew Honnibal [@explosion](https://github.com/explosion). The pip install comes pre-trained with a [CoNLL 2000](http://www.cnts.ua.ac.be/conll2000/) training set which works reasonably well to detect proper nouns. Since training is non-deterministic, unwanted stopwords might slip through. If you want to try out other datasets, simply replace `metadoc/extract/data/training_set.txt` with your own and run `metadoc.extract.pos.do_train`.
 67 | 
 68 | ## Install
 69 | Requires python 3.5.
 70 | 
 71 | #### Using pip
 72 | ```shell
 73 | pip install metadoc
 74 | ```
 75 | 
 76 | ## Develop
 77 | 
 78 | #### Mac OS
 79 | ```shell
 80 | brew install python3 libxml2 libxslt libtiff libjpeg webp little-cms2
 81 | ```
 82 | #### Ubuntu
 83 | ```shell
 84 | apt-get install -y python3 libxml2-dev libxslt-dev libtiff-dev libjpeg-dev webp whois
 85 | ```
 86 | #### Fedora/Redhat
 87 | ```shell
 88 | dnf install libxml2-devel libxslt-devel libtiff-devel libjpeg-devel libjpeg-turbo-devel libwebp whois
 89 | ```
 90 | #### Then
 91 | ```shell
 92 | pip3 install -r requirements-dev.txt
 93 | python serve.py => serving @ 6060
 94 | ```
 95 | 
 96 | ## Test
 97 | ```shell
 98 | py.test -v tests
 99 | ```
100 | If you happen to run into an error with OSX 10.11 concerning a lazy bound library in PIL,   
101 | just remove `/PIL/.dylibs/liblzma.5.dylib`.
102 | 
103 | ## Todo
104 | * Page concatenation is needed in order to properly calculate wordcount and reading time.
105 | * Authenticity heuristic with sharecount deviance detection (requires state).
106 | * ~~Perf: Worst offender is nltk's pos tagger. Roll own w/ Average Perceptron.~~
107 | * ~~Newspaper's summarize produces pullquotes, fulltext takes a while. Move to libextract?~~
108 | 
109 | ## Contributors
110 | [Martin Borho](https://github.com/mborho)   
111 | [Paul Solbach](https://github.com/___paul)   
112 | 
113 | ---
114 | 
115 | Meteadoc is a software product of FanMatics, Hamburg.   
116 | Metadoc stems from a pedigree of nice libraries like [goose3](https://github.com/goose3/goose3/tree/master/goose3), [langdetect](https://github.com/Mimino666/langdetect) and [nltk](https://github.com/nltk/nltk).   
117 | Metadoc leans on [this](https://github.com/hankcs/AveragedPerceptronPython) perceptron implementation inspired by Matthew Honnibal.    
118 | Metadoc is a work-in-progress.   
119 | 


--------------------------------------------------------------------------------
/tests/test_html.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import asyncio
  5 | import asynctest
  6 | 
  7 | from asynctest.mock import patch
  8 | from metadoc.extract.html import HtmlMeta
  9 | 
 10 | def get_html_meta(article_path):
 11 |     with open(article_path, 'r') as article:
 12 |         html = article.read()
 13 |         meta = HtmlMeta(html)
 14 |         meta.extract()
 15 |         return meta
 16 |     return None
 17 | 
 18 | class MetadocHtmMetaTest(asynctest.TestCase):
 19 | 
 20 |     @asynctest.ignore_loop
 21 |     def test_extract(self):
 22 |         paths = [
 23 |             "guardian.com/florida-shooting-suspect-charged-questions-nikolas-cruz.html",
 24 |             "zeit.de/pressefreiheit-tuerkei-inhaftierte-journalisten-deniz-yuecel-freedeniz.html",
 25 |             "theintercept.com/iphones-secretly-send-call-history-to-apple-security-firm-says.html",
 26 |             "nytimes/skeleton-ghana-jamaica.html",
 27 |             "wired.com/inside-the-mind-of-amanda-feilding-countess-of-psychedelic-science.html",
 28 |             "theverge.com/spacex-falcon-9-launch-starlink-microsat-2a-2b-paz-watch-live.html",
 29 |             "faz.net/dass-wir-ueberwacht-werden-ist-klar-aber-von-wem-und-wie-eine-spurensuche-15445555.html",
 30 |             "time.com/jared-kushner-security-clearance-trump-kelly.html",
 31 |             "netzpolitik.org/index.html",
 32 |             "invalid/invalid.html",
 33 |             "bloomberg.com/brexit-talks-in-peril-as-may-rejects-eu-draft-as-unacceptable",
 34 |             "buzzfeed.com/so-viel-dreck",
 35 |             "bostonreview.net/thad-williamson-almost-inevitable-failure-justice",
 36 |             "washingtonpost.com/i-need-loyalty-james-comeys-riveting-prepared-testimony-about-what-trump-asked-him-annotated.html",
 37 |             "washingtonpost.com/trump-to-nominate-carson-to-lead-u-s-housing-urban-policy.html",
 38 |             "bellingcat.com/six-months-medical-facilities-still-fire.html",
 39 |             "slate.com/how_facebook_s_news_feed_algorithm_works.html",
 40 |             "mashable.com/australia-heat-records-bom.html",
 41 |             "telegraph.co.uk/When-Stephen-Fry-met-Jony-Ive-the-self-confessed-fanboi-meets-Apples-newly-promoted-chief-design-officer.html",
 42 |             "nautil.us/the-strange-persistence-of-first-languages.html",
 43 |             "businessinsider.com/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4.html",
 44 |         ]
 45 |         objs = [get_html_meta("tests/fixtures/"+path) for path in paths]
 46 | 
 47 |         # published_data
 48 |         assert objs[0].published_date == "2018-02-16T00:01:52+00:00"
 49 |         assert objs[1].published_date == "2018-02-16T10:59:47+00:00"
 50 |         assert objs[2].published_date == "2016-11-17T11:00:36+00:00"
 51 |         assert objs[3].published_date == "2018-02-15T18:44:34+00:00"
 52 |         assert objs[4].published_date == "2018-02-15T20:40:04+00:00"
 53 |         assert objs[5].published_date == "2018-02-15T18:54:21+00:00"
 54 |         assert objs[6].published_date == "2018-02-15T08:22:05+00:00"
 55 |         assert objs[7].published_date == "2018-02-28T03:11:27+00:00"
 56 |         assert objs[8].published_date == "2018-02-16T13:46:24+00:00"
 57 |         assert objs[9].published_date == None
 58 | 
 59 |         # modified_date
 60 |         assert objs[0].modified_date == "2018-02-16T09:51:54+00:00"
 61 |         assert objs[1].modified_date == "2018-02-16T10:59:47+00:00"
 62 |         assert objs[2].modified_date == None
 63 |         assert objs[3].modified_date == "2018-02-16T05:45:23+00:00"
 64 |         assert objs[4].modified_date == "2018-02-15T20:40:03+00:00"
 65 |         assert objs[5].modified_date == "2018-02-15T18:54:21+00:00"
 66 |         assert objs[6].modified_date == "2018-02-15T09:29:16+00:00"
 67 |         assert objs[7].modified_date == "2018-02-28T15:45:06+00:00"
 68 |         assert objs[8].modified_date == "2018-02-16T17:16:57+00:00"
 69 |         assert objs[9].modified_date == None
 70 | 
 71 |         # title
 72 |         assert objs[4].title == "Inside the Mind of Amanda Feilding, Countess of Psychedelic Science"
 73 |         assert objs[8].title == "Bundeswehr bereitet sich auf den Kampf gegen Killer-Roboter vor"
 74 |         assert objs[9].title == None
 75 | 
 76 |         # authors
 77 |         assert objs[2].authors == ["Kim Zetter"]
 78 |         assert objs[3].authors == ["Randal C. Archibold"]
 79 |         assert objs[5].authors == ["Loren Grush"]
 80 |         assert objs[8].authors == ["Alexander Fanta"]
 81 |         assert objs[9].authors == []
 82 |         assert objs[10].authors == ["Tim Ross", "Ian Wishart"]
 83 |         assert objs[11].authors == ["Becky Barnicoat"]
 84 |         assert objs[12].authors == ["Thad Williamson"]
 85 |         assert objs[13].authors == ["Amber Phillips", "Peter W. Stevenson"]
 86 |         assert objs[14].authors == ["Elise Viebeck"]
 87 |         assert objs[15].authors == [] # link stripped
 88 |         assert objs[16].authors == ["Will Oremus"]
 89 |         assert objs[17].authors == ["Johnny Lieu"]
 90 |         assert objs[18].authors == ["Stephen Fry"]
 91 |         assert objs[19].authors == ["Julie Sedivy"]
 92 |         assert objs[20].authors == ["Becky Peterson, Business Insider"]
 93 | 
 94 |         # summary
 95 |         assert objs[8].description.startswith("Wissenschafter und Aktivisten warnen seit") == True
 96 |         assert objs[9].description == ""
 97 | 
 98 |         # canonical url
 99 |         assert objs[4].canonical_url == "https://www.wired.com/story/inside-the-mind-of-amanda-feilding-countess-of-psychedelic-science/"
100 |         assert objs[9].canonical_url == None
101 | 
102 |         # images
103 |         assert objs[6].image == "http://media2.faz.net/ppmedia/1912312546/1.5445566/article_multimedia_overview/scoring-teaser.png"
104 |         assert objs[9].image== None
105 | 
106 |         """for x, obj in enumerate(objs):
107 |             #print(x, obj.jsonld)
108 |             print(x, obj.canonical_url)
109 |             print(x, obj.image)"""
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/metadoc/extract/html.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import json
  4 | import logging
  5 | import lxml.etree, lxml.html
  6 | from datetime import datetime
  7 | from dateutil.parser import parse
  8 | from dateutil.tz import tzoffset
  9 | from collections import ChainMap
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | class HtmlMeta(object):
 14 |     """Extract metadata from html.
 15 |     Needs work, e.g. handling multiple @property=author tags,
 16 |     detect if author content is a social media destination.
 17 |     """
 18 |     def __init__(self, html, encoding="UTF-8", tree=None):
 19 |         self.html = html or None
 20 |         if tree is not None:
 21 |             # reuse tree already parsed
 22 |             self.document = tree
 23 |         else:
 24 |             self.parser = lxml.html.HTMLParser(encoding=encoding)
 25 |             self.document = lxml.html.fromstring(html, parser=self.parser)
 26 |         self._jsonld_xpath = lxml.etree.XPath('descendant-or-self::script[@type="application/ld+json"]')
 27 |         self._metatag_xpath = lxml.etree.XPath("//meta")
 28 |         self._links_xpath = lxml.etree.XPath("//link")
 29 | 
 30 |         self.links = {}
 31 |         self.jsonld = {}
 32 |         self.metatags = {}
 33 | 
 34 |     @property
 35 |     def title(self):
 36 |         return self.jsonld.get("headline") \
 37 |             or self.metatags.get("og:title") \
 38 |                 or self.extract_title()
 39 | 
 40 |     @property
 41 |     def description(self):
 42 |         return self.metatags.get("og:description") \
 43 |             or self.metatags.get("description", "").strip()
 44 | 
 45 |     @property
 46 |     def canonical_url(self):
 47 |         return self.links.get("canonical")
 48 | 
 49 |     @property
 50 |     def image(self):
 51 |         return self.metatags.get("og:image") \
 52 |             or self.jsonld.get("thumbnailUrl")
 53 | 
 54 |     def _extract_ld_authors(self):
 55 |         # extract from jsonld
 56 |         ld_authors = self.jsonld.get("author", {})
 57 | 
 58 |         # Return if unparseable
 59 |         if not ld_authors:
 60 |             return None
 61 | 
 62 |         # sanitize ld structure
 63 |         if type(ld_authors) == str:
 64 |             ld_authors = {"name": ld_authors}
 65 | 
 66 |         ld_authors = [a["name"] for a in ld_authors] if type(ld_authors) == list else ld_authors.get("name", False)
 67 |         return ld_authors
 68 | 
 69 |     @property
 70 |     def authors(self):
 71 |         # get a value from trove
 72 |         authors = self._extract_ld_authors() \
 73 |             or self.metatags.get("author") \
 74 |                 or self.metatags.get("article:author") \
 75 |                     or self.metatags.get("dcterms.creator") \
 76 |                         or self.metatags.get("article:authorName") \
 77 |                             or self.metatags.get("citation_author") \
 78 |                                 or self.jsonld.get("authors") # intercept
 79 | 
 80 |         if authors:
 81 |             # ensure list
 82 |             if type(authors) != list:
 83 |                 authors = [authors]
 84 |             # strip links
 85 |             authors = [a for a in authors if a.startswith("http") == False]
 86 | 
 87 |         if not authors:
 88 |             # washingtonpost
 89 |             xauthors = self.document.xpath("(//span[@itemprop='author'])[1]//span[@itemprop='name']/text()")
 90 |             if xauthors:
 91 |                 authors = xauthors
 92 | 
 93 |         return authors if authors else []
 94 | 
 95 |     @property
 96 |     def published_date(self):
 97 |         res = None
 98 |         xpaths = [
 99 |             "//meta[@name='date']/@content",
100 |             "//meta[@property='article:published_time']/@content",
101 |             "//meta[@property='article:published']/@content",
102 |             "//meta[@name='parsely-pub-date']/@content",
103 |             "//meta[@name='DC.date.issued']/@content",
104 |             "//time[@itemprop='datePublished']/@datetime",
105 |         ]
106 |         res = self._query_date(xpaths)
107 |         if res is None:
108 |             ld_date = self.jsonld.get("datePublished") or self.jsonld.get("dateCreated")
109 |             if ld_date:
110 |                 res = self._format_date(ld_date)
111 |         return res
112 | 
113 |     @property
114 |     def modified_date(self):
115 |         res = None
116 |         xpaths = [
117 |             "//meta[@property='article:modified_time']/@content",
118 |             "//meta[@property='article:modified']/@content",
119 |             "//meta[@name='last-modified']/@content",
120 |         ]
121 |         res = self._query_date(xpaths)
122 |         if res is None:
123 |             ld_date = self.jsonld.get("dateModified")
124 |             if ld_date:
125 |                 res = self._format_date(ld_date)
126 |         return res
127 | 
128 |     @property
129 |     def scraped_date(self):
130 |         return self._format_date(datetime.now())
131 | 
132 |     def extract(self):
133 |         self.metatags = self._extract_items(self._get_metatag_item, self._metatag_xpath)
134 |         self.jsonld = self._extract_items(self._get_jsonld_item, self._jsonld_xpath)
135 |         self.links = self._extract_items(self._get_link_item, self._links_xpath)
136 | 
137 |     def _extract_items(self, get_item, xpath):
138 |         items = [item for item in map(get_item, xpath(self.document)) if item]
139 |         return dict(ChainMap(*items))
140 | 
141 |     def _get_metatag_item(self, node):
142 |         name = node.xpath('@property') or node.xpath('@itemprop') or node.xpath('@name')
143 |         content = node.xpath('@content')
144 | 
145 |         return {name[0]: content[0]} \
146 |           if (name and content) else None
147 | 
148 |     def _get_link_item(self, node):
149 |         name = node.xpath('@rel')
150 |         content = node.xpath('@href')
151 | 
152 |         return {name[0]: content[0]} \
153 |           if (name and content) else None
154 | 
155 |     def _get_jsonld_item(self, node):
156 |         ld = None
157 |         try:
158 |             ld_text = node.text.strip()
159 |             # sanitize if neccessary
160 |             if ld_text.find("<![CDATA[") > -1:
161 |                 ld_text = ld_text[ld_text.find("{"):ld_text.rfind("}")+1]
162 | 
163 |             ld = json.loads(ld_text)
164 |             if type(ld) is list:
165 |                 for item in[i for i in ld if i.get("@type") == "NewsArticle"]:
166 |                     return item
167 |         except Exception as exc:
168 |             logger.error("JSON-LD parsing failed")
169 |             logger.exception(exc)
170 |         return ld if ld else {}
171 | 
172 |     def extract_title(self):
173 |         title = self.document.xpath("(//title)[1]//text()")
174 |         return title[0] if len(title) else None
175 | 
176 |     def _format_date(self, date_in):
177 |         date = parse(date_in) if type(date_in) is str else date_in
178 |         return date.astimezone().astimezone(
179 |                     tzoffset(None, 0)).replace(microsecond=0).isoformat()
180 | 
181 |     def _query_date(self, xpath_rules):
182 |         for xpath_rule in xpath_rules:
183 |             dates = self.document.xpath(xpath_rule)
184 |             if len(dates) > 0:
185 |                 try:
186 |                     return self._format_date(str(dates[0]))#.get("content"))
187 |                 except:
188 |                     pass
189 |         return None
190 | 


--------------------------------------------------------------------------------
/metadoc/__init__.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | __title__ = 'Metadoc - Postmodern news article metadata service'
  4 | __copyright__ = 'Copyright 2016, Paul Solbach'
  5 | __author__ = 'Paul Solbach'
  6 | __license__ = 'MIT'
  7 | __version__ = '0.10.5'
  8 | 
  9 | import asyncio
 10 | import time
 11 | import concurrent
 12 | import requests
 13 | import urllib.parse
 14 | import os
 15 | import re
 16 | import sys
 17 | import logging
 18 | 
 19 | from .domain import Domaintools
 20 | from .extract import Extractor
 21 | from .social import ActivityCount
 22 | 
 23 | logger = logging.getLogger()
 24 | logger.setLevel(os.environ.get("LOGLEVEL", "INFO"))
 25 | formatter = logging.Formatter('%(asctime)s [%(name)s] %(levelname)s %(message)s')
 26 | 
 27 | # set user agent
 28 | USER_AGENT = os.environ.get("USER_AGENT",
 29 |         "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36")
 30 | 
 31 | if not os.environ.get("LAMBDA_TASK_ROOT", False):
 32 |     # add stream handler, except for AWS Lambda
 33 |     ch = logging.StreamHandler(sys.stdout)
 34 |     ch.setFormatter(formatter)
 35 |     logger.addHandler(ch)
 36 | 
 37 | class Metadoc(object):
 38 | 
 39 |     def __init__(self, url=None, html=None, **kwargs):
 40 |         """Metadoc API, initialize with
 41 |         :param url: The article url we shall investigate, required.
 42 |         :param html: You can pass in the article html manually, optional.
 43 |         """
 44 |         logger.info("Processing url: {}".format(url))
 45 | 
 46 |         self.errors = []
 47 |         self.html = html or None
 48 |         self.url = url or None
 49 | 
 50 |         if not self.url:
 51 |           raise AttributeError('Missing \"url\" attribute.')
 52 | 
 53 |         self.extractor = None
 54 |         self.activity = None
 55 |         self.domain = None
 56 | 
 57 |     def _prepare(self):
 58 |         if not self.html:
 59 |             self.html = self._request_url()
 60 |         self.extractor = Extractor(html=self.html) # Named entities, synthetic summaries
 61 |         self.activity = ActivityCount(url=self.url) # Social activity from various networks
 62 |         self.domain = Domaintools(url=self.url) # Domain whois date, blacklisting
 63 | 
 64 |     def query(self, mode=None, fmt=None):
 65 |         data = None
 66 |         try:
 67 |             self._prepare()
 68 |             calls = {
 69 |                 "social": self._query_social,
 70 |                 "domain": self._query_domain,
 71 |                 "extract": self._query_extract,
 72 |             }
 73 |             calls.get(mode, self._query_all)()
 74 |             data = self._render_social() if fmt == "social" else self._render()
 75 |             if mode is None:
 76 |                 self._check_result(data)
 77 |         except Exception as exc:
 78 |             logger.error("Error when processing {}".format(self.url))
 79 |             logger.exception(exc)
 80 |             self.errors.append(str(exc))
 81 | 
 82 |         # return data or error
 83 |         if data is None or self.errors:
 84 |             return self._render_errors()
 85 |         return data
 86 | 
 87 |     def _query_all(self):
 88 |         """Combine all available resources"""
 89 |         subtasks = []
 90 |         loop = asyncio.new_event_loop()
 91 |         asyncio.set_event_loop(loop)
 92 | 
 93 |         executor = concurrent.futures.ThreadPoolExecutor(max_workers=3)
 94 |         subtasks.append(loop.run_in_executor(executor, self.extractor.get_all))
 95 |         subtasks.append(loop.run_in_executor(executor, self.domain.get_all))
 96 |         subtasks.append(self.activity.get_all(loop))
 97 | 
 98 |         loop.run_until_complete(asyncio.wait(subtasks, loop=loop))
 99 |         loop.close()
100 | 
101 |     def _query_domain(self):
102 |         self.domain.get_all()
103 | 
104 |     def _query_social(self):
105 |         loop = asyncio.new_event_loop()
106 |         asyncio.set_event_loop(loop)
107 | 
108 |         loop.run_until_complete(self.activity.get_all(loop))
109 |         loop.close()
110 | 
111 |     def _query_extract(self):
112 |         self.extractor.get_all()
113 | 
114 |     def _render_errors(self):
115 |         return {
116 |             "errors": self.errors
117 |         }
118 | 
119 |     def _render_social(self):
120 |         return {
121 |           "url": self.url,
122 |           "social": getattr(self.activity, "responses", None),
123 |           "__version__": __version__
124 |         }
125 | 
126 |     def _render(self):
127 |         """Construct response dict after partial or complete
128 |         queries to various sources
129 |         """
130 |         return {
131 |           "url": self.url,
132 |           "title": getattr(self.extractor, "title", None),
133 |           "authors": getattr(self.extractor, "authors", None),
134 |           "canonical_url": getattr(self.extractor, "canonical_url", None),
135 |           "image": getattr(self.extractor, "image", None),
136 |           "social": getattr(self.activity, "responses", None),
137 |           "language": getattr(self.extractor, "language", None),
138 |           "published_date": getattr(self.extractor, "published_date", None),
139 |           "modified_date": getattr(self.extractor, "modified_date", None),
140 |           "scraped_date": getattr(self.extractor, "scraped_date", None),
141 |           "text": {
142 |             "fulltext": getattr(self.extractor, "fulltext", None),
143 |             "summary": getattr(self.extractor, "description", "No summary found."),
144 |             "reading_time": getattr(self.extractor, "reading_time", None),
145 |             "contenthash": getattr(self.extractor, "contenthash", None)
146 |           },
147 |           "entities": {
148 |             "names": getattr(self.extractor, "names", None),
149 |             "keywords": getattr(self.extractor, "keywords", None),
150 |           },
151 |           "domain": {
152 |             "name": getattr(self.domain, "domain", None),
153 |             "credibility": getattr(self.domain, "credibility", None),
154 |             "date_registered": getattr(self.domain, "date_registered_iso", None),
155 |             "favicon": "https://logo.clearbit.com/{0}?size=200".format(getattr(self.domain, "domain", None)),
156 |           },
157 |           "__version__": __version__
158 |         }
159 | 
160 |     def _check_result(self, res):
161 |         if not res.get("title"):
162 |             logger.warning("No title: {}".format(self.url))
163 |         if not res.get("canonical_url"):
164 |             logger.warning("No canonical url: {}".format(self.url))
165 |         if len(res.get("text", {}).get("fulltext", [])) < 50:
166 |             logger.warning("No or little text: {}".format(self.url))
167 |         if not res.get("entities", {}).get("names"):
168 |             logger.warning("No names: {}".format(self.url))
169 |         if not res.get("entities", {}).get("keywords"):
170 |             logger.warning("No keywords: {}".format(self.url))
171 |         if not res.get("domain", {}).get("name"):
172 |             logger.warning("No domain name: {}".format(self.url))
173 | 
174 |     def _request_url(self):
175 |         """In case no html parameter was provided to the constructor"""
176 | 
177 |         p = urllib.parse.urlparse(self.url)
178 |         netloc = p.netloc or p.path
179 |         path = p.path if p.netloc else ''
180 |         # if not netloc.startswith('www.'):
181 |         #     netloc = 'www.' + netloc
182 | 
183 |         p = urllib.parse.ParseResult(p.scheme, netloc, path, *p[3:])
184 |         url = p.geturl()
185 | 
186 |         req = requests.get(url, headers={
187 |           'Accept-Encoding': 'identity, gzip, deflate, *',
188 |           'User-Agent': USER_AGENT
189 |         })
190 | 
191 |         if req.status_code != 200:
192 |           raise Exception('Requesting article body failed with {} status code.'.format(req.status_code))
193 | 
194 |         if self._check_invalid_encoding(req.text):
195 |             # check for encoding conflicts (e.g. t3n.de)
196 |             enc_apparent = req.apparent_encoding.lower()
197 |             if req.encoding.lower() != enc_apparent and \
198 |                enc_apparent != "windows-1254":
199 |                 logger.info("Switching html encoding: {} -> {}".format(req.encoding, enc_apparent))
200 |                 req.encoding = enc_apparent
201 |         return req.text
202 | 
203 |     def _check_invalid_encoding(self, html):
204 |         r=r'(Ã¼|Ã¤|Ã¶|Ã¼)'
205 |         return True if re.search(r, html, re.I|re.M) else False
206 | 


--------------------------------------------------------------------------------
/metadoc/domain/blacklists.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | blacklists = {
  5 |   "propornot": [
  6 |     "4thmedia.org",
  7 |     "nsnbc.me",
  8 |     "presstv.com",
  9 |     "theunhivedmind.com",
 10 |     "sana.sy",
 11 |     "activistpost.com",
 12 |     "americasfreedomfighters.com",
 13 |     "beforeitsnews.com",
 14 |     "corbettreport.com",
 15 |     "drudgereport.com",
 16 |     "endingthefed.com",
 17 |     "globalresearch.ca",
 18 |     "hangthebankers.com",
 19 |     "infowars.com",
 20 |     "naturalnews.com",
 21 |     "paulcraigroberts.org",
 22 |     "ronpaulinstitute.org",
 23 |     "southfront.org",
 24 |     "theantimedia.org",
 25 |     "trueactivist.com",
 26 |     "veteranstoday.com",
 27 |     "washingtonsblog.com",
 28 |     "yournewswire.com",
 29 |     "zerohedge.com",
 30 |     "4threvolutionarywar.wordpress.com",
 31 |     "abeldanger.net",
 32 |     "ahtribune.com",
 33 |     "allnewspipeline.com",
 34 |     "americanlookout.com",
 35 |     "amren.com",
 36 |     "amtvmedia.com",
 37 |     "ancient-code.com",
 38 |     "anonews.co",
 39 |     "anonhq.com",
 40 |     "antiwar.com",
 41 |     "asia-pacificresearch.com",
 42 |     "assassinationscience.com",
 43 |     "baltimoregazette.com",
 44 |     "barenakedislam.com",
 45 |     "bignuggetnews.com",
 46 |     "blackagendareport.com",
 47 |     "blacklistednews.com",
 48 |     "christianfightback.com",
 49 |     "collective-evolution.com",
 50 |     "conservativedailypost.com",
 51 |     "consortiumnews.com",
 52 |     "cosmicscientist.com",
 53 |     "countercurrents.org",
 54 |     "counterinformation.wordpress.com",
 55 |     "dailyoccupation.com",
 56 |     "dailystormer.com",
 57 |     "darkmoon.me",
 58 |     "darkpolitricks.com",
 59 |     "davidstockmanscontracorner.com",
 60 |     "dcclothesline.com",
 61 |     "dcleaks.com",
 62 |     "defenddemocracy.press",
 63 |     "dennismichaellynch.com",
 64 |     "disclose.tv",
 65 |     "disclosuremedia.net",
 66 |     "educate-yourself.org",
 67 |     "educateinspirechange.org",
 68 |     "endoftheamericandream.com",
 69 |     "endtime.com",
 70 |     "eutopia.buzz",
 71 |     "ewao.com",
 72 |     "eyeopening.info",
 73 |     "fellowshipoftheminds.com",
 74 |     "filmsforaction.org",
 75 |     "floridasunpost.com",
 76 |     "foreignpolicyjournal.com",
 77 |     "fourwinds10.net",
 78 |     "freedomoutpost.com",
 79 |     "gaia.com",
 80 |     "galacticconnection.com",
 81 |     "gatesofvienna.net",
 82 |     "geopolmonitor.com",
 83 |     "godlikeproductions.com",
 84 |     "govtslaves.info",
 85 |     "greanvillepost.com",
 86 |     "guccifer2.wordpress.com",
 87 |     "healthnutnews.com",
 88 |     "henrymakow.com",
 89 |     "heresyblog.net",
 90 |     "humansarefree.com",
 91 |     "ihavethetruth.com",
 92 |     "ihavethetruth.com",
 93 |     "in5d.com",
 94 |     "informationclearinghouse.info",
 95 |     "intellihub.com",
 96 |     "intrepidreport.com",
 97 |     "investmentresearchdynamics.com",
 98 |     "investmentwatchblog.com",
 99 |     "jackpineradicals.com",
100 |     "jamesrgrangerjr.com",
101 |     "jewsnews.co.il",
102 |     "journal-neo.org",
103 |     "katehon.com",
104 |     "katehon.org",
105 |     "kingworldnews.com",
106 |     "lewrockwell.com",
107 |     "libertyblitzkrieg.com",
108 |     "libertywritersnews.com",
109 |     "makeamericagreattoday.com",
110 |     "mintpressnews.com",
111 |     "moonofalabama.org",
112 |     "nakedcapitalism.com",
113 |     "naturalblaze.com",
114 |     "newcoldwar.org",
115 |     "newstarget.com",
116 |     "newswithviews.com",
117 |     "nowtheendbegins.com",
118 |     "off-guardian.org",
119 |     "oftwominds.com",
120 |     "opednews.com",
121 |     "orientalreview.org",
122 |     "patriotrising.com",
123 |     "platosguns.com",
124 |     "pravda.ru",
125 |     "pravdareport.com",
126 |     "prepperwebsite.com",
127 |     "prisonplanet.com",
128 |     "rbth.com",
129 |     "readynutrition.com",
130 |     "redflagnews.com",
131 |     "regated.com",
132 |     "rense.com",
133 |     "righton.com",
134 |     "rinf.com",
135 |     "rt.com",
136 |     "rumormillnews.com",
137 |     "ruptly.tv",
138 |     "russia-insider.com",
139 |     "sentinelblog.com",
140 |     "sgtreport.com",
141 |     "shiftfrequency.com",
142 |     "shtfplan.com",
143 |     "silentmajoritypatriots.com",
144 |     "silverdoctors.com",
145 |     "sott.net",
146 |     "sputniknews.com",
147 |     "stormcloudsgathering.com",
148 |     "strategic-culture.org",
149 |     "superstation95.com",
150 |     "survivopedia.com",
151 |     "the-newspapers.com",
152 |     "thecommonsenseshow.com",
153 |     "thedailybell.com",
154 |     "thedailysheeple.com",
155 |     "theduran.com",
156 |     "theearthchild.co.za",
157 |     "theeconomiccollapseblog.com",
158 |     "theeventchronicle.com",
159 |     "thefederalistpapers.org",
160 |     "thefreethoughtproject.com",
161 |     "themindunleashed.org",
162 |     "thenewsdoctors.com",
163 |     "therebel.media",
164 |     "therussophile.org",
165 |     "thesaker.is",
166 |     "thesleuthjournal.com",
167 |     "thetruenews.info",
168 |     "thetruthseeker.co.uk",
169 |     "thirdworldtraveler.com",
170 |     "toprightnews.com",
171 |     "trunews.com",
172 |     "truth-out.org",
173 |     "truthandaction.org",
174 |     "truthdig.com",
175 |     "truthfeed.com",
176 |     "truthkings.com",
177 |     "ufoholic.com",
178 |     "undergroundworldnews.com",
179 |     "unz.com",
180 |     "usanewshome.com",
181 |     "usapoliticsnow.com",
182 |     "usasupreme.com",
183 |     "usdcrisis.com",
184 |     "usslibertyveterans.org",
185 |     "vdare.com",
186 |     "veteransnewsnow.com",
187 |     "vigilantcitizen.com",
188 |     "viralliberty.com",
189 |     "voltairenet.org",
190 |     "wakeupthesheep.com",
191 |     "wakingtimes.com",
192 |     "wearechange.org",
193 |     "weshapelife.org",
194 |     "whatdoesitmean.com",
195 |     "whatreallyhappened.com",
196 |     "wikileaks.com",
197 |     "wikileaks.org",
198 |     "wikispooks.com",
199 |     "worldnewspolitics.com",
200 |     "worldpoliticsus.com",
201 |     "www.fort-russ.com",
202 |     "oilgeopolitics.net",
203 |     "gangstergovernment.com",
204 |     "memoryholeblog.com",
205 |     "eutimes.net",
206 |     "intersectionproject.eu"
207 |   ],
208 |   "fortliberty.org": [
209 |     "21stcenturywire.com",
210 |     "800whistleblower.com",
211 |     "activistpost.com",
212 |     "alternet.org",
213 |     "americannews.com",
214 |     "antiwar.com",
215 |     "beforeitsnews.com",
216 |     "bigpzone.com",
217 |     "chronicle.su",
218 |     "consciouslifenews.com",
219 |     "conspiracywire.com",
220 |     "countdowntozerotime.com",
221 |     "counterpsyops.com",
222 |     "dailybuzzlive.com",
223 |     "dailycurrant.com",
224 |     "dcclothesline.com",
225 |     "disclose.tv",
226 |     "duffelblog.com",
227 |     "duhprogressive.com",
228 |     "elitereaders.com",
229 |     "empirenews.net",
230 |     "english.ruvr.ru",
231 |     "eutimes.net",
232 |     "federalistpress.com",
233 |     "freepatriot.org",
234 |     "fromthetrenchesworldreport.com",
235 |     "geoengineeringwatch.org",
236 |     "globalresearch.ca",
237 |     "gonzoglobe.com",
238 |     "govtslaves.info",
239 |     "guardianlv.com",
240 |     "gulagbound.com",
241 |     "hangthebankers.com",
242 |     "healthimpactnews.com",
243 |     "humansarefree.com",
244 |     "huzlers.com",
245 |     "infowars.com",
246 |     "intellihub.com",
247 |     "lewrockwell.com",
248 |     "libertynews.com",
249 |     "livefreelivenatural.com",
250 |     "nationalreport.net",
251 |     "naturalcuresnotmedicine.com",
252 |     "naturalnews.com",
253 |     "newswire-24.com",
254 |     "nodisinfo.com",
255 |     "notallowedto.com",
256 |     "now8news.com",
257 |     "nowtheendbegins.com",
258 |     "pakalertpress.com",
259 |     "politicalblindspot.com",
260 |     "presstv.ir",
261 |     "prisonplanet.com",
262 |     "randpaulreview.com",
263 |     "rawforbeauty.com",
264 |     "realfarmacy.com",
265 |     "redflagnews.com",
266 |     "responsibletechnology.org",
267 |     "rt.com",
268 |     "secretsofthefed.com",
269 |     "southweb.org",
270 |     "thecommonsenseshow.com",
271 |     "thecontroversialfiles.net",
272 |     "thedailysheeple.com",
273 |     "thefreethoughtproject",
274 |     "thelastgreatstand.com",
275 |     "thenewamerican.com",
276 |     "theracketreport.com",
277 |     "therightplanet.com",
278 |     "therundownlive.com",
279 |     "theuspatriot.com",
280 |     "topinfopost.com",
281 |     "truthandaction.org",
282 |     "truthbroadcastnetwork.com",
283 |     "turnerradionetwork.com",
284 |     "undergroundhealth.com",
285 |     "usahitman.com",
286 |     "veteranstoday.com",
287 |     "westernjournalism.com",
288 |     "whydontyoutrythis.com",
289 |     "worldnewsdailyreport.com",
290 |     "worldtruth.tv",
291 |     "yournewswire.com"
292 |   ],
293 |   "zimdar": [
294 |     "enduringvision.com",
295 |     "70news.wordpress.com",
296 |     "abcnews.com.co",
297 |     "politicalo.com",
298 |     "americannews.com",
299 |     "indecisionforever.com",
300 |     "realnewsrightnow.com",
301 |     "infowars.com",
302 |     "rilenews.com",
303 |     "civictribune.com",
304 |     "mediamass.net",
305 |     "megynkelly.us",
306 |     "msnbc.com.co",
307 |     "msnbc.website",
308 |     "nationalreport.net",
309 |     "creambmp.com",
310 |     "news-hound.com",
311 |     "newsbiscuit.com",
312 |     "dcgazette.com",
313 |     "politicops.com",
314 |     "newsmutiny.com",
315 |     "drudgereport.com.co",
316 |     "empirenews.net"
317 |   ]
318 | }


--------------------------------------------------------------------------------
/metadoc/extract/pos.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Averaged perceptron classifier. Implementation geared for simplicity rather than
  3 | efficiency. Adapted from @hankcs, cf. https://github.com/hankcs/AveragedPerceptronPython/blob/master/LICENSE
  4 | Based on http://honnibal.wordpress.com/2013/09/11/a-good-part-of-speechpos-tagger-in-about-200-lines-of-python/
  5 | """
  6 | from collections import defaultdict
  7 | import pickle
  8 | import random
  9 | import logging
 10 | import os
 11 | 
 12 | PICKLE = os.path.join(os.path.dirname(__file__), "data/tagger.pickle")
 13 | TRAINING_SET = os.path.join(os.path.dirname(__file__), "data/training_set.txt")
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | def do_train():
 18 |   tagger = AveragedPerceptronTagger(autoload=False)
 19 |   logger.info('Reading corpus.')
 20 |   training_data = []
 21 |   sentence = ([], [])
 22 | 
 23 |   for line in open(TRAINING_SET):
 24 |     params = line.split(' ')
 25 |     if len(params) != 2: continue
 26 | 
 27 |     sentence[0].append(params[0])
 28 |     sentence[1].append(params[1])
 29 | 
 30 |     if params[0] == '.':
 31 |       training_data.append(sentence)
 32 |       sentence = ([], [])
 33 | 
 34 |   logger.info('training corpus size : %d', len(training_data))
 35 |   logger.info('Start training...')
 36 |   tagger.train(training_data, save_loc=PICKLE)
 37 | 
 38 | class AveragedPerceptron(object):
 39 |   '''An averaged perceptron, as implemented by Matthew Honnibal.
 40 |   '''
 41 | 
 42 |   def __init__(self):
 43 |     # Each feature gets its own weight vector, so weights is a dict-of-dicts
 44 |     self.weights = {}
 45 |     self.classes = set()
 46 |     # The accumulated values, for the averaging. These will be keyed by
 47 |     # feature/clas tuples
 48 |     self._totals = defaultdict(int)
 49 |     # The last time the feature was changed, for the averaging. Also
 50 |     # keyed by feature/clas tuples
 51 |     # (tstamps is short for timestamps)
 52 |     self._tstamps = defaultdict(int)
 53 |     # Number of instances seen
 54 |     self.i = 0
 55 | 
 56 |   def predict(self, features):
 57 |     '''Dot-product the features and current weights and return the best label.'''
 58 |     scores = defaultdict(float)
 59 |     for feat, value in features.items():
 60 |       if feat not in self.weights or value == 0:
 61 |         continue
 62 |       weights = self.weights[feat]
 63 |       for label, weight in weights.items():
 64 |         scores[label] += value * weight
 65 |     # Do a secondary alphabetic sort, for stability
 66 |     return max(self.classes, key=lambda label: (scores[label], label))
 67 | 
 68 |   def update(self, truth, guess, features):
 69 |     '''Update the feature weights.'''
 70 |     def upd_feat(c, f, w, v):
 71 |       param = (f, c)
 72 |       self._totals[param] += (self.i - self._tstamps[param]) * w
 73 |       self._tstamps[param] = self.i
 74 |       self.weights[f][c] = w + v
 75 | 
 76 |     self.i += 1
 77 |     if truth == guess:
 78 |       return None
 79 |     for f in features:
 80 |       weights = self.weights.setdefault(f, {})
 81 |       upd_feat(truth, f, weights.get(truth, 0.0), 1.0)
 82 |       upd_feat(guess, f, weights.get(guess, 0.0), -1.0)
 83 |     return None
 84 | 
 85 |   def average_weights(self):
 86 |     '''Average weights from all iterations.'''
 87 |     for feat, weights in self.weights.items():
 88 |       new_feat_weights = {}
 89 |       for clas, weight in weights.items():
 90 |         param = (feat, clas)
 91 |         total = self._totals[param]
 92 |         total += (self.i - self._tstamps[param]) * weight
 93 |         averaged = round(total / float(self.i), 3)
 94 |         if averaged:
 95 |           new_feat_weights[clas] = averaged
 96 |       self.weights[feat] = new_feat_weights
 97 |     return None
 98 | 
 99 |   # def save(self, path):
100 |   #   '''Save the pickled model weights.'''
101 |   #   return pickle.dump(dict(self.weights), open(path, 'w'))
102 | 
103 |   # def load(self, path):
104 |   #   '''Load the pickled model weights.'''
105 |   #   self.weights = pickle.load(open(path))
106 |   #   return None
107 | 
108 | class AveragedPerceptronTagger(object):
109 |   '''Greedy Averaged Perceptron tagger, as implemented by Matthew Honnibal.
110 |   :param load: Load the pickled model upon instantiation.
111 |   '''
112 |   START = ['-START-', '-START2-']
113 |   END = ['-END-', '-END2-']
114 |   AP_MODEL_LOC = PICKLE
115 | 
116 |   def __init__(self, autoload=False):
117 |     self.model = AveragedPerceptron()
118 |     self.tagdict = {}
119 |     self.classes = set()
120 | 
121 |     if autoload:
122 |       self.load(self.AP_MODEL_LOC)
123 | 
124 |   def tag(self, corpus):
125 |     '''Tags a string `corpus`.'''
126 |     # Assume untokenized corpus has \n between sentences and ' ' between words
127 |     s_split = lambda t: t.split('\n')
128 |     w_split = lambda s: s.split()
129 | 
130 |     def split_sents(corpus):
131 |       for s in s_split(corpus):
132 |         yield w_split(s)
133 | 
134 |     prev, prev2 = self.START
135 |     tokens = []
136 | 
137 |     for words in split_sents(corpus):
138 |       context = self.START + [self._normalize(w) for w in words] + self.END
139 |       for i, word in enumerate(words):
140 |         tag = self.tagdict.get(word)
141 |         if not tag:
142 |           features = self._get_features(i, word, context, prev, prev2)
143 |           tag = self.model.predict(features)
144 | 
145 |         tokens.append((word, tag.strip()))
146 |         prev2 = prev
147 |         prev = tag
148 | 
149 |     return tokens
150 | 
151 |   def named_entities(self, tags):
152 |     '''return sequential named entities,
153 |     IO classification isn't as accurate here, since we're not differentiating between PERSON and ORGANIZATION.
154 |     Still, this is fast and in many cases suited to the task.
155 | 
156 |     [('The', 'DT'), ('extraordinary', 'JJ'), ('phenomenon', 'NN'), ('of', 'IN'), ('fake', 'JJ'),
157 |     ('news', 'NN'), ('spread', 'NN'), ('by', 'IN'), ('Facebook', 'NNP'), ('and', ''), ('other', 'JJ'),
158 |     ('social', 'JJ'), ('media', 'NNS'), ('during', 'IN'), ('the', 'DT'), ('2016', 'CD'), ('presidential', 'JJ'),
159 |     ('election', 'NN'), ('has', 'VBZ'), ('been', 'VBN'), ('largely', 'RB'), ('portrayed', 'VBN'), ('as', 'IN'),
160 |     ('a', 'DT'), ('lucky', 'JJ'), ('break', 'NN'), ('for', 'IN'), ('Donald', 'NNP'), ('Trump', 'NNP')]
161 |     '''
162 | 
163 |     ent, entities = [], []
164 |     tags_len = len(tags)-1
165 |     push_ent = lambda x: entities.append(" ".join(ent))
166 | 
167 |     for i, tag in enumerate(tags):
168 |       if tag[1] == "NNP":
169 |         ent.append(tag[0])
170 |         if i == tags_len:
171 |           push_ent(ent)
172 | 
173 |       elif len(ent):
174 |         push_ent(ent)
175 |         ent = []
176 | 
177 |     return entities
178 | 
179 |   def train(self, sentences, save_loc=None, nr_iter=5):
180 |     '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter``
181 |     controls the number of Perceptron training iterations.
182 |     :param sentences: A list of (words, tags) tuples.
183 |     :param save_loc: If not ``None``, saves a pickled model in this location.
184 |     :param nr_iter: Number of training iterations.
185 |     '''
186 |     self._make_tagdict(sentences)
187 |     self.model.classes = self.classes
188 |     for iter_ in range(nr_iter):
189 |       c = 0
190 |       n = 0
191 |       for words, tags in sentences:
192 |         prev, prev2 = self.START
193 |         context = self.START + [self._normalize(w) for w in words] \
194 |              + self.END
195 |         for i, word in enumerate(words):
196 |           guess = self.tagdict.get(word)
197 |           if not guess:
198 |             feats = self._get_features(i, word, context, prev, prev2)
199 |             guess = self.model.predict(feats)
200 |             self.model.update(tags[i], guess, feats)
201 |           prev2 = prev
202 |           prev = guess
203 |           c += guess == tags[i]
204 |           n += 1
205 |       random.shuffle(sentences)
206 |       logger.info("Iter {0}: {1}/{2}={3}".format(iter_, c, n, _pc(c, n)))
207 |     self.model.average_weights()
208 | 
209 |     # Pickle as a binary file
210 |     if save_loc is not None:
211 |       pickle.dump((self.model.weights, self.tagdict, self.classes),
212 |             open(save_loc, 'wb'), -1)
213 | 
214 |     return None
215 | 
216 |   def load(self, loc=None):
217 |     '''Load a pickled model.'''
218 |     try:
219 |       w_td_c = pickle.load(open(loc, 'rb'))
220 |     except IOError:
221 |       raise IOError("Invalid perceptrontagger.pickle file.")
222 | 
223 |     self.model.weights, self.tagdict, self.classes = w_td_c
224 |     self.model.classes = self.classes
225 |     return None
226 | 
227 |   def _normalize(self, word):
228 |     '''Normalization used in pre-processing.
229 |     - All words are lower cased
230 |     - Digits in the range 1800-2100 are represented as !YEAR;
231 |     - Other digits are represented as !DIGITS
232 |     :rtype: str
233 |     '''
234 |     if '-' in word and word[0] != '-':
235 |       return '!HYPHEN'
236 |     elif word.isdigit() and len(word) == 4:
237 |       return '!YEAR'
238 |     elif word[0].isdigit():
239 |       return '!DIGITS'
240 |     else:
241 |       return word.lower()
242 | 
243 |   def _get_features(self, i, word, context, prev, prev2):
244 |     '''Map tokens into a feature representation, implemented as a
245 |     {hashable: float} dict. If the features change, a new model must be
246 |     trained.
247 |     '''
248 | 
249 |     def add(name, *args):
250 |       features[' '.join((name,) + tuple(args))] += 1
251 | 
252 |     i += len(self.START)
253 |     features = defaultdict(int)
254 |     # It's useful to have a constant feature, which acts sort of like a prior
255 |     add('bias')
256 |     add('i suffix', word[-3:])
257 |     add('i pref1', word[0])
258 |     add('i-1 tag', prev)
259 |     add('i-2 tag', prev2)
260 |     add('i tag+i-2 tag', prev, prev2)
261 |     add('i word', context[i])
262 |     add('i-1 tag+i word', prev, context[i])
263 |     add('i-1 word', context[i - 1])
264 |     add('i-1 suffix', context[i - 1][-3:])
265 |     add('i-2 word', context[i - 2])
266 |     add('i+1 word', context[i + 1])
267 |     add('i+1 suffix', context[i + 1][-3:])
268 |     add('i+2 word', context[i + 2])
269 |     return features
270 | 
271 |   def _make_tagdict(self, sentences):
272 |     '''Make a tag dictionary for single-tag words.'''
273 |     counts = defaultdict(lambda: defaultdict(int))
274 | 
275 |     for words, tags in sentences:
276 |       for word, tag in zip(words, tags):
277 |         counts[word][tag] += 1
278 |         self.classes.add(tag)
279 | 
280 |     freq_thresh = 20
281 |     ambiguity_thresh = 0.97
282 | 
283 |     for word, tag_freqs in counts.items():
284 |       tag, mode = max(tag_freqs.items(), key=lambda item: item[1])
285 |       n = sum(tag_freqs.values())
286 |       # Don't add rare words to the tag dictionary
287 |       # Only add quite unambiguous words
288 |       if n >= freq_thresh and (float(mode) / n) >= ambiguity_thresh:
289 |         self.tagdict[word] = tag
290 | 
291 | 
292 | def _pc(n, d):
293 |   return (float(n) / d) * 100
294 | 


--------------------------------------------------------------------------------
/tests/fixtures/netzpolitik.org/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="de-DE" xmlns:og="http://ogp.me/ns#" xmlns:fb="http://ogp.me/ns/fb#">
  3 | <head>
  4 | <meta name="flattr:id" content="m0nek4">
  5 | <meta name="twitter:dnt" content="on">
  6 | <meta charset="UTF-8">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1">
  8 | <link rel="profile" href="http://gmpg.org/xfn/11">
  9 | 
 10 | <meta name="apple-itunes-app" content="app-id=729849005, app-argument=netzpolitik://p/185645" />
 11 | 
 12 | <title>Bundeswehr bereitet sich auf den Kampf gegen Killer-Roboter vor &#8211; netzpolitik.org</title>
 13 | <link rel="alternate" type="application/rss+xml" title="netzpolitik.org &raquo; Feed" href="https://netzpolitik.org/feed/" />
 14 | <link rel="alternate" type="application/rss+xml" title="netzpolitik.org &raquo; Kommentar-Feed" href="https://netzpolitik.org/comments/feed/" />
 15 | <link rel="alternate" type="text/calendar" title="netzpolitik.org &raquo; iCal Feed" href="https://netzpolitik.org/events/?ical=1" />
 16 | <link rel="alternate" type="application/rss+xml" title="netzpolitik.org &raquo; Bundeswehr bereitet sich auf den Kampf gegen Killer-Roboter vor Kommentar-Feed" href="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/feed/" />
 17 | <link rel='stylesheet' id='contact-form-7-css'  href='https://cdn.netzpolitik.org/wp-content/plugins/contact-form-7/includes/css/styles.css?ver=5.0.1' type='text/css' media='all' />
 18 | <link rel='stylesheet' id='grid_frontend-css'  href='https://netzpolitik.org/wp-content/themes/liebefeld/grid/default-frontend.css?ver=4.9.5' type='text/css' media='all' />
 19 | <link rel='stylesheet' id='wordpress-popular-posts-css-css'  href='https://cdn.netzpolitik.org/wp-content/plugins/wordpress-popular-posts/public/css/wpp.css?ver=4.0.13' type='text/css' media='all' />
 20 | <link rel='stylesheet' id='tribe-accessibility-css-css'  href='https://cdn.netzpolitik.org/wp-content/plugins/the-events-calendar/common/src/resources/css/accessibility.min.css?ver=4.7.10' type='text/css' media='all' />
 21 | <link rel='stylesheet' id='tribe-events-full-calendar-style-css'  href='https://cdn.netzpolitik.org/wp-content/plugins/the-events-calendar/src/resources/css/tribe-events-full.min.css?ver=4.6.13' type='text/css' media='all' />
 22 | <link rel='stylesheet' id='tribe-events-calendar-style-css'  href='https://cdn.netzpolitik.org/wp-content/plugins/the-events-calendar/src/resources/css/tribe-events-theme.min.css?ver=4.6.13' type='text/css' media='all' />
 23 | <link rel='stylesheet' id='tribe-events-calendar-full-mobile-style-css'  href='https://cdn.netzpolitik.org/wp-content/plugins/the-events-calendar/src/resources/css/tribe-events-full-mobile.min.css?ver=4.6.13' type='text/css' media='only screen and (max-width: 768px)' />
 24 | <link rel='stylesheet' id='tribe-events-calendar-mobile-style-css'  href='https://cdn.netzpolitik.org/wp-content/plugins/the-events-calendar/src/resources/css/tribe-events-theme-mobile.min.css?ver=4.6.13' type='text/css' media='only screen and (max-width: 768px)' />
 25 | <link rel='stylesheet' id='liebefeld-style-css'  href='https://netzpolitik.org/wp-content/themes/liebefeld/style.css?ver=4.9.5' type='text/css' media='all' />
 26 | <link rel='stylesheet' id='dashicons-css'  href='https://cdn.netzpolitik.org/wp-includes/css/dashicons.min.css?ver=4.9.5' type='text/css' media='all' />
 27 | <link rel='stylesheet' id='newsletter-subscription-css'  href='https://cdn.netzpolitik.org/wp-content/plugins/newsletter/subscription/style.css?ver=5.3.3' type='text/css' media='all' />
 28 | <link rel='stylesheet' id='rvm_jvectormap_css-css'  href='https://cdn.netzpolitik.org/wp-content/plugins/responsive-vector-maps/css/jquery-jvectormap-2.0.3.css?ver=4.9.5' type='text/css' media='all' />
 29 | <link rel='stylesheet' id='wp-featherlight-css'  href='https://cdn.netzpolitik.org/wp-content/plugins/wp-featherlight/css/wp-featherlight.min.css?ver=1.2.0' type='text/css' media='all' />
 30 | <script type='text/javascript'>
 31 | /* <![CDATA[ */
 32 | var amber_config = {"lookup_availability":"","site_name":"netzpolitik.org"};
 33 | /* ]]> */
 34 | </script>
 35 | <script type='text/javascript' src='https://cdn.netzpolitik.org/wp-content/plugins/amberlink/js/amber.js?ver=4.9.5'></script>
 36 | <script type='text/javascript' src='https://cdn.netzpolitik.org/wp-includes/js/jquery/jquery.js?ver=1.12.4'></script>
 37 | <script type='text/javascript' src='https://cdn.netzpolitik.org/wp-includes/js/jquery/jquery-migrate.min.js?ver=1.4.1'></script>
 38 | <script type='text/javascript' src='https://cdn.netzpolitik.org/wp-content/plugins/responsive-vector-maps/js/regions-data/jquery-jvectormap-2.0.3.min.js?ver=2.0.3'></script>
 39 | <script type='text/javascript'>
 40 | /* <![CDATA[ */
 41 | var wpp_params = {"sampling_active":"1","sampling_rate":"100","ajax_url":"https:\/\/netzpolitik.org\/wp-admin\/admin-ajax.php","action":"update_views_ajax","ID":"185645","token":"8856da7e63"};
 42 | /* ]]> */
 43 | </script>
 44 | <script type='text/javascript' src='https://cdn.netzpolitik.org/wp-content/plugins/wordpress-popular-posts/public/js/wpp.js?ver=4.0.13'></script>
 45 | <link rel='https://api.w.org/' href='https://netzpolitik.org/wp-json/' />
 46 | <link rel="EditURI" type="application/rsd+xml" title="RSD" href="https://netzpolitik.org/xmlrpc.php?rsd" />
 47 | <link rel="wlwmanifest" type="application/wlwmanifest+xml" href="https://cdn.netzpolitik.org/wp-includes/wlwmanifest.xml" /> 
 48 | <link rel='prev' title='Deniz Yücel ist frei. Viele andere Journalisten sitzen noch im Knast.' href='https://netzpolitik.org/2018/deniz-yuecel-ist-frei-viele-andere-journalisten-sitzen-noch-im-knast/' />
 49 | <link rel='next' title='Koalitionsvertrag setzt widersprüchlichen Kurs bei der IT-Sicherheit fort' href='https://netzpolitik.org/2018/koalitionsvertrag-setzt-widerspruechlichen-kurs-bei-der-it-sicherheit-fort/' />
 50 | <meta name="generator" content="WordPress 4.9.5" />
 51 | <link rel="canonical" href="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/" />
 52 | <link rel='shortlink' href='https://netzpolitik.org/?p=185645' />
 53 | <link rel="alternate" type="application/json+oembed" href="https://netzpolitik.org/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fnetzpolitik.org%2F2018%2Fbundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor%2F" />
 54 | <link rel="alternate" type="text/xml+oembed" href="https://netzpolitik.org/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fnetzpolitik.org%2F2018%2Fbundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor%2F&#038;format=xml" />
 55 | <meta name="tec-api-version" content="v1"><meta name="tec-api-origin" content="https://netzpolitik.org"><link rel="https://theeventscalendar.com/" href="https://netzpolitik.org/wp-json/tribe/events/v1/" />		<style type="text/css">.recentcomments a{display:inline !important;padding:0 !important;margin:0 !important;}</style>
 56 | 		
 57 | <!-- START - Open Graph for Facebook, Google+ and Twitter Card Tags 2.2.4 -->
 58 |  <!-- Facebook Open Graph -->
 59 |   <meta property="og:locale" content="de_DE"/>
 60 |   <meta property="og:site_name" content="netzpolitik.org"/>
 61 |   <meta property="og:title" content="Bundeswehr bereitet sich auf den Kampf gegen Killer-Roboter vor"/>
 62 |   <meta property="og:url" content="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/"/>
 63 |   <meta property="og:description" content="Wissenschafter und Aktivisten warnen seit längerem vor dem Einsatz autonomer Waffensysteme. Die deutsche Bundeswehr hat nun offiziell ausgeschlossen, solche Systeme ankaufen zu wollen. &quot;Wir haben eine sehr klare Position. Wir haben keine Absicht, autonome Systeme zu erwerben&quot;, sagte Generalleutnant"/>
 64 |   <meta property="og:image" content="https://cdn.netzpolitik.org/wp-upload/2018/02/15644305924_411524670f_o-e1518782526590.jpg"/>
 65 |   <meta property="article:published_time" content="2018-02-16T13:46:24+00:00"/>
 66 |   <meta property="article:modified_time" content="2018-02-16T17:16:57+00:00" />
 67 |   <meta property="og:updated_time" content="2018-02-16T17:16:57+00:00" />
 68 |   <meta property="article:publisher" content="https://www.facebook.com/netzpolitik"/>
 69 |  <!-- Google+ / Schema.org -->
 70 |   <meta itemprop="datePublished" content="2018-02-16"/>
 71 |   <meta itemprop="dateModified" content="2018-02-16T17:16:57+00:00" />
 72 |   <meta itemprop="author" content="Alexander Fanta"/>
 73 |  <!-- Twitter Cards -->
 74 |   <meta name="twitter:title" content="Bundeswehr bereitet sich auf den Kampf gegen Killer-Roboter vor"/>
 75 |   <meta name="twitter:url" content="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/"/>
 76 |   <meta name="twitter:description" content="Wissenschafter und Aktivisten warnen seit längerem vor dem Einsatz autonomer Waffensysteme. Die deutsche Bundeswehr hat nun offiziell ausgeschlossen, solche Systeme ankaufen zu wollen. &quot;Wir haben eine sehr klare Position. Wir haben keine Absicht, autonome Systeme zu erwerben&quot;, sagte Generalleutnant"/>
 77 |   <meta name="twitter:image" content="https://cdn.netzpolitik.org/wp-upload/2018/02/15644305924_411524670f_o-e1518782526590.jpg"/>
 78 |   <meta name="twitter:card" content="summary"/>
 79 |   <meta name="twitter:site" content="@netzpolitik"/>
 80 |  <!-- SEO -->
 81 |  <!-- Misc. tags -->
 82 |  <!-- is_singular -->
 83 | <!-- END - Open Graph for Facebook, Google+ and Twitter Card Tags 2.2.4 -->
 84 | 	
 85 | </head>
 86 | 
 87 | <body class="post-template-default single single-post postid-185645 single-format-standard tribe-no-js wp-featherlight-captions header--small">
 88 | <div id="page" class="site">
 89 | 	<a class="skip-link screen-reader-text" href="#content">
 90 |         Skip to content    </a>
 91 | 
 92 | 	<header id="masthead" class="site-header">
 93 |     <div class="header-area">
 94 |         <div class="site-branding">
 95 | 			                <p class="site-title">
 96 |                     <a href="https://netzpolitik.org/" rel="home" title="netzpolitik.org">
 97 | 						            <img class="netzpolitik__logo" src="https://netzpolitik.org/wp-content/themes/liebefeld/images/netzpolitik_logo.svg" alt="netzpolitik.org"  />
 98 |                     </a>
 99 |                 </p>
100 | 				        </div><!-- .site-branding -->
101 | 
102 | 
103 |         <div id="site-navigation" class="main-navigation" role="navigation">
104 |             <a href="#show__menu" class="menu__canvas--toggle" aria-controls="site-navigation" aria-expanded="false">
105 |                 <span class="canvas__hamburger"></span><span class="menu__canvas--description">Themen</span>
106 |             </a>
107 | 
108 |             <div class="menu__canvas" id="show__menu">
109 | 
110 |                 <a href="#" class="canvas-close__button">&times;</a>
111 | 
112 | 				<nav class="menu-hauptmenue-container"><ul id="menu__main" class="menu__main-list"><li id="menu-item-150205" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150205"><a href="https://netzpolitik.org/category/datenschutz/">Datenschutz</a></li>
113 | <li id="menu-item-150211" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150211"><a href="https://netzpolitik.org/category/ueberwachung/">Überwachung</a></li>
114 | <li id="menu-item-150207" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150207"><a href="https://netzpolitik.org/category/netze/">Netze</a></li>
115 | <li id="menu-item-150223" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150223"><a href="https://netzpolitik.org/category/oeffentlichkeit/">Öffentlichkeit</a></li>
116 | <li id="menu-item-150437" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150437"><a href="https://netzpolitik.org/category/wissen/">Wissen</a></li>
117 | <li id="menu-item-150222" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150222"><a href="https://netzpolitik.org/category/nutzerrechte/">Nutzerrechte</a></li>
118 | <li id="menu-item-150224" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150224"><a href="https://netzpolitik.org/category/technologie/">Technologie</a></li>
119 | <li id="menu-item-150206" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150206"><a href="https://netzpolitik.org/category/kultur/">Kultur</a></li>
120 | <li id="menu-item-150186" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150186"><a href="https://netzpolitik.org/category/demokratie/">Demokratie</a></li>
121 | </ul></nav>
122 |                 <a href="#" class="menu__canvas--close-trigger"></a>
123 | 
124 |             </div>
125 | 
126 |         </div><!-- #site-navigation -->
127 | 
128 | 		     <a href="https://netzpolitik.org/spenden/?via=nav" class="header-donation">
129 |             <span class="header-donation__icon"></span>
130 |             <span class="header-donation__text">Spenden</span>
131 |         </a>
132 | 
133 | 		<form role="search" method="get" class="searchform" action="https://netzpolitik.org/">
134 | 	<label class="search-label">
135 | 		<span class="screen-reader-text">Suche nach:</span>
136 | 		<input type="search" class="search-field"
137 | 		       placeholder="Suchen…"
138 | 		       value="" name="s"
139 | 		       title="Suche nach:"/>
140 | 	</label>
141 | 	<button type="submit" class="search-submit">
142 | 		<i class="icon__search"></i>
143 | 		<span class="screen-reader-text">Suchen</span>
144 | 	</button>
145 | 
146 | 
147 | </form>
148 |     </div><!-- #header-area -->
149 | 
150 | </header><!-- #masthead -->
151 | 
152 | 	<div id="content" class="site-content">
153 | 
154 | 	<div id="primary" class="content-area">
155 | 		<main id="main" class="site-main" role="main">
156 | 
157 | 		
158 | <article id="post-185645" class="article-area post-185645 post type-post status-publish format-standard has-post-thumbnail hentry category-linkschleuder tag-bundeswehr tag-cyber-krieg tag-killer-robots tag-ludwig-leinhos tag-pentagon">
159 | 	<header class="entry-header">
160 | 
161 | 		 <a href="https://netzpolitik.org/category/linkschleuder/" class="entry-kicker"> Linkschleuder</a> 
162 | 		<h1 class="entry-title">Bundeswehr bereitet sich auf den Kampf gegen Killer-Roboter vor</h1>      <div class="entry-excerpt">
163 |      			</div>
164 | 
165 | 
166 | 
167 | 					<div class="entry-meta">
168 | 							am <time class="published dt-published posted-on" datetime="16. Februar 2018">
169 | 				16.02.2018			</time>
170 | 			<span class="byline"><a href="https://netzpolitik.org/author/alexander-fanta/" title="Beiträge von Alexander Fanta" rel="author">Alexander Fanta</a> </span>/ <a href="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/#comments">7 Kommentare</a> / <a class="share-article" href="#share">Teilen</a>							</div><!-- .entry-meta -->
171 | 			
172 | 	</header><!-- .entry-header -->
173 | 
174 | 			<figure class="wp-caption entry-thumbnail">
175 | 			<img width="860" height="484" src="https://cdn.netzpolitik.org/wp-upload/2018/02/15644305924_411524670f_o-e1518782526590-860x484.jpg" class="attachment-landscape-860 size-landscape-860 wp-post-image" alt="" srcset="https://cdn.netzpolitik.org/wp-upload/2018/02/15644305924_411524670f_o-e1518782526590-860x484.jpg 860w, https://cdn.netzpolitik.org/wp-upload/2018/02/15644305924_411524670f_o-e1518782526590-300x169.jpg 300w, https://cdn.netzpolitik.org/wp-upload/2018/02/15644305924_411524670f_o-e1518782526590-730x411.jpg 730w, https://cdn.netzpolitik.org/wp-upload/2018/02/15644305924_411524670f_o-e1518782526590-1200x675.jpg 1200w, https://cdn.netzpolitik.org/wp-upload/2018/02/15644305924_411524670f_o-e1518782526590-660x372.jpg 660w, https://cdn.netzpolitik.org/wp-upload/2018/02/15644305924_411524670f_o-e1518782526590-380x214.jpg 380w, https://cdn.netzpolitik.org/wp-upload/2018/02/15644305924_411524670f_o-e1518782526590-160x90.jpg 160w" sizes="(max-width: 860px) 100vw, 860px" />				<figcaption class="wp-caption-text">
176 | 					Putzig, aber tödlich: Autonome Waffensysteme stehen nicht unter direkter menschlicher Kontrolle.<span class='media-license-caption'> <a class="" rel="license" target="_blank" href="http://creativecommons.org/licenses/publicdomain/mark/1.0/deed.de">CC public domain</a> <a href="https://www.flickr.com/photos/pasukaru76/15644305924/in/photolist-pQra7u-6tqVY7-CbWe11-hJqT3j-6Rr8H2-9LPBnH-wV1E8q-9NEtYQ-mJ581D-9M2DUT-dSXX6k-boA4g9-9NEwpU-kkVZ-9LxXRe-6L1bm7-YimpGQ-pgYeyZ-aw1yu9-muvK3v-6tqUTL-dK6QHy-aPRG3k-4CSC8j-9Nx7VU-fKeAGz-9NF1UU-9NGqDK-78giQL-9NuVwv-m1iHRV-9Nz3As-9Nv8dj-7DC17e-9NxRVo-puotz9-oKnd9T-pwpd6q-9ND6qW-bz8mE5-dA4ET4-D922td-jcKM5N-7DBZNV-p42fJ2-jcHLz4-oikMpn-7DBZup-bY1k75-Dkmoa7" >Pascal</a></span>				</figcaption>
177 | 					</figure>
178 | 		
179 | 	<div class="entry-content">
180 | 
181 | 		<p>Wissenschafter und Aktivisten warnen seit längerem <a href="https://netzpolitik.org/2017/killer-roboter-diskussion-um-toedliche-autonome-waffensysteme/">vor dem Einsatz autonomer Waffensysteme</a>. Die deutsche Bundeswehr hat nun offiziell ausgeschlossen, solche Systeme ankaufen zu wollen. &#8222;Wir haben eine sehr klare Position. Wir haben keine Absicht, autonome Systeme zu erwerben&#8220;, sagte Generalleutnant Ludwig Leinhos, dem das &#8222;Kommando Cyber- und Informationsraum&#8220; untersteht, <a href="https://uk.reuters.com/article/uk-germany-security-robots/german-military-has-no-plans-to-acquire-robot-weapons-general-idUKKCN1FZ2NA" data-versionurl="http://web.archive.org/web/20180216133254/https://uk.reuters.com/article/uk-germany-security-robots/german-military-has-no-plans-to-acquire-robot-weapons-general-idUKKCN1FZ2NA" data-versiondate="2018-02-16T13:32:55+00:00" data-amber-behavior="">gestern im Vorfeld der Münchner Sicherheitskonferenz</a>. Er betonte aber, dass Deutschland sich darauf vorbereiten müsse, sich gegen den Einsatz von <a href="https://netzpolitik.org/tag/killer-robots/">Killer-Robotern</a> durch andere Staaten zu verteidigen.<div class="netzpolitik-cta">
182 | <!--    <a href="/spenden/?via=cta">
183 |         <strong><span class="header-donation__icon"></span>Hilf mit! </strong>
184 |         <span> Mit Deiner finanziellen Hilfe unterstützt Du unabhängigen Journalismus.</span>
185 |     </a>
186 | </div>
187 | <div style="display:none">
188 | -->
189 | <div class="wrapper">
190 | 				<div><p><a href="https://netzpolitik.org/spenden"><img src="https://cdn.netzpolitik.org/wp-upload/2017/12/jetztspenden12-1.jpg" width="900" height="300" /></a>Wir finanzieren uns zu fast 100 % aus Spenden von Leserinnen und Lesern. Unterstütze <a href="https://netzpolitik.org/2017/liebe-leserinnen-und-leser-investiert-in-freiheit/">unsere Arbeit</a> mit einer <a href="https://netzpolitik.org/spenden/">Spende oder einem Dauerauftrag</a>.</p>
191 | </div>
192 |     		</div></div>
193 | </p>
194 | <p>Einer <a href="https://www.stopkillerrobots.org/" data-versionurl="http://web.archive.org/web/20180216134254/https://www.stopkillerrobots.org/" data-versiondate="2018-02-16T13:43:01+00:00" data-amber-behavior="">internationale Kampagne</a> zur Ächtung von Roboter-Kampfsystemen haben sich inzwischen 22 Staaten angeschlossen, berichtet die Nachrichtenseite Politico. Dennoch <a href="http://www.newsweek.com/drones-swarm-autonomous-russia-robots-609399" data-versionurl="http://web.archive.org/web/20180216140253/http://www.newsweek.com/drones-swarm-autonomous-russia-robots-609399" data-versiondate="2018-02-16T14:02:55+00:00" data-amber-behavior="">arbeiten einige Staaten</a>, allen voran der Rüstungsweltmeister USA, an solchen Systemen. Politico schildert in seiner Geschichte <a href="https://www.politico.eu/article/attack-killer-robots-autonomous-weapons-drones/" data-versionurl="http://web.archive.org/web/20180216135754/https://www.politico.eu/article/attack-killer-robots-autonomous-weapons-drones/" data-versiondate="2018-02-16T13:57:55+00:00" data-amber-behavior="">die Möglichkeiten der Technologie</a>:</p>
195 | <blockquote><p>Im Oktober 2016 lud das US-Verteidigungsministerium <a href="https://www.cbsnews.com/news/60-minutes-autonomous-drones-set-to-revolutionize-military-technology/" data-versionurl="http://web.archive.org/web/20180216125257/https://www.cbsnews.com/news/60-minutes-autonomous-drones-set-to-revolutionize-military-technology/" data-versiondate="2018-02-16T12:52:57+00:00" data-amber-behavior="">eine TV-Crew</a> in die Wüste Kaliforniens ein und entließ dort aus einem Flugzeug einen Schwarm von 103 Drohnen in Vogelgröße in den blauen Himmel. Wie Schwalben flog der Schwarm in Formation, änderte akkordiert seine Richtung, positionierte sich laufend neu und reagierte auf seine Umgebung &#8211; die dafür nötigen Entscheidungen traf der Schwarm kollektiv, ohne menschliche Hilfe. Nichts illustriert die revolutionäre Natur von vollständig autonomen Waffensystemen besser als die Neuentwicklung solcher &#8222;Schwarm-Drohnen&#8220; &#8211; kleiner, unbemannter Flugobjekte, die in Gruppen operieren und schon bald die existierende Militärtechnologie in der Leistung überholen könnten, zu einem Bruchteil der Kosten. [Eigene Übersetzung]</p></blockquote>
196 | </p>	</div><!-- .entry-content -->
197 | 
198 | 	<!--div class="entry-biopause entry-biopause-2">
199 | 
200 | 	<a href="#" class="biopause-link">
201 | 		<img src="" alt="" class="biopause-image" style="background-color: #be3631;
202 |     width: 300px;
203 |     height: 200px;
204 |     display: block;">
205 | 	</a>
206 | 
207 | </div-->
208 | 
209 | 
210 |     <div class="entry-social" id="share" name="share">
211 | 
212 | 	<span class="entry-social__headline">
213 | 		Weitersagen und Unterstützen. Danke!
214 | 	</span>
215 | 
216 | 	
217 | 	<a href="#comments"
218 | 	   class="social__button social__button--comment"
219 | 	   title="Artikel kommentieren"
220 | 	   aria-label="Artikel kommentieren">
221 |             <span class="social__icon">
222 |                 <img src="https://netzpolitik.org/wp-content/themes/liebefeld/images/comment.svg?fill=new" alt="Kommentare" />
223 |             </span>
224 | 		<!--span class="social__text">Kommentieren</span-->
225 | 	</a>
226 | 
227 | 	<a href="http://de.facebook.com/sharer.php?u=https%3A%2F%2Fnetzpolitik.org%2F2018%2Fbundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor%2F"
228 | 	   class="social__button social__button--facebook"
229 | 	   title="bei Facebook empfehlen"
230 | 	   aria-label="bei Facebook empfehlen">
231 |             <span class="social__icon">
232 |                 <img src="https://netzpolitik.org/wp-content/themes/liebefeld/images/facebook.svg" alt="Facebook" />
233 |             </span>
234 | 		<!--span class="social__text">Facebook</span-->
235 | 	</a>
236 | 
237 | 	<a href="http://twitter.com/share?url=https%3A%2F%2Fnetzpolitik.org%2F2018%2Fbundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor%2F"
238 | 	   class="social__button social__button--twitter"
239 | 	   title="bei Twitter twittern"
240 | 	   aria-label="bei Twitter twittern">
241 |             <span class="social__icon">
242 |                 <img src="https://netzpolitik.org/wp-content/themes/liebefeld/images/twitter.svg" alt="Twitter" />
243 |             </span>
244 | 		<!--span class="social__text">Twitter</span-->
245 | 	</a>
246 | 
247 | 	<a href="https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=EGZ3MYEM4KGZN"
248 | 	   class="social__button social__button--paypal"
249 | 	   title="via PayPal spenden"
250 | 	   aria-label="via PayPal spenden">
251 |             <span class="social__icon">
252 |                 <img src="https://netzpolitik.org/wp-content/themes/liebefeld/images/paypal.svg?fill=white" alt="Paypal" />
253 |             </span>
254 | 		<!--span class="social__text">PayPal</span-->
255 | 	</a>
256 | 
257 | 
258 | 	<a href="bitcoin:1FMHxhVJo6RREtfaLDVMA1VvoKV1jc12av?label=netzpolitik.org"
259 | 	   class="social__button social__button--bitcoin"
260 | 	   title="via Bitcoin spenden"
261 | 	   aria-label="via Bitcoin spenden">
262 |             <span class="social__icon">
263 |                 <img src="https://netzpolitik.org/wp-content/themes/liebefeld/images/bitcoin.svg" alt="Bitcoin" />
264 |             </span>
265 | 		<!--span class="social__text">Bitcoin</span-->
266 | 	</a>
267 | 
268 | 	<a href="http://whatsapp.com/share?url=https%3A%2F%2Fnetzpolitik.org%2F2018%2Fbundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor%2F"
269 | 	   class="social__button social__button--whatsapp"
270 | 	   title="mit Whatsapp empfehlen"
271 | 	   aria-label="mit Whatsapp empfehlen">
272 |             <span class="social__icon">
273 |                 <img src="https://netzpolitik.org/wp-content/themes/liebefeld/images/whatsapp.svg" alt="Whatsapp" />
274 |             </span>
275 | 		<!--span class="social__text">Whatsapp</span-->
276 | 	</a>
277 | 
278 | 	<a href="mailto:?subject=Interessante%20Seite%20auf%20https://netzpolitik.org&amp;body=Dieser%20Seite%20interessiert%20dich%20bestimmt%3A%0D%0Ahttps%3A%2F%2Fnetzpolitik.org%2F2018%2Fbundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor%2F"
279 | 	   class="social__button social__button--mail"
280 | 	   title="mit E-Mail empfehlen"
281 | 	   aria-label="bei E-Mail empfehlen">
282 |             <span class="social__icon">
283 |                 <img src="https://netzpolitik.org/wp-content/themes/liebefeld/images/send-mail.svg?n=1" alt="Email" />
284 |             </span>
285 | 		<!--span class="social__text">E-Mail</span-->
286 | 	</a>
287 | 
288 | </div>
289 | 
290 | 	<footer class="entry-footer">
291 | 		
292 | 		<div class="entry-author">
293 | 			<p class="entry-author__kicker">Über den Autor/ die Autorin</p>
294 | 								<h2 class="entry-author__title">
295 | 						<a class="entry-author__link"
296 | 						   href="https://netzpolitik.org/author/alexander-fanta/"
297 | 						   rel="author"
298 | 						   itemprop="author">
299 | 							Alexander Fanta						</a>
300 | 					</h2>
301 | 					<div class="entry-author__description">
302 | 						Alexander ist seit Januar 2018 Journalist bei Netzpolitik.org und schreibt dort über die digitale Gesellschaft und ihre Feinde. 2017 beschäftigte er sich als Stipendiat am Reuters-Institut für Journalismusforschung in Oxford und bei der NZZ in Zürich mit Projekten zum Roboterjournalismus. Davor arbeitete Alexander für die österreichische Nachrichtenagentur APA. Er ist unter alexander.fanta ett Netzpolitik.org (<a href="http://pool.sks-keyservers.net/pks/lookup?op=get&amp;search=0x2271FE6D4CD84C62">PGP</a>) und unter <a href="https://twitter.com/FantaAlexx">@FantaAlexx</a> erreichbar.					</div>
303 | 							</div>
304 | 
305 | 		
306 | 			<div class="entry-footer__meta">
307 | 				<div class="entry-footer__date">
308 | 					<span class="entry-footer__title">Veröffentlicht</span>
309 | 					<time class="entry-date published" datetime="2018-02-16T13:46:24+00:00">16.02.2018 13:46 Uhr</time><br><time class="updated" datetime="2018-02-16T17:16:57+00:00"><span class="entry-footer__title">Zuletzt aktualisiert</span> 16.02.2018 17:16 Uhr</time>
310 | 
311 |                 </div>
312 | 				<div class="entry-footer__category">
313 | 					 <strong class="entry-footer__title" > Kategorie</strong >  <ul class="entry-footer__list" ><li class="entry-footer__list-item" > <a href="https://netzpolitik.org/category/linkschleuder/" rel="tag">Linkschleuder</a></li ></ul > 				</div>
314 | 				<div class="entry-footer__tags">
315 | 					<strong class="entry-footer__title" > Schlagworte</strong > <ul class="entry-footer__list" ><li class="entry-footer__list-item" > <a href="https://netzpolitik.org/tag/bundeswehr/" rel="tag">Bundeswehr</a></li ><li class="entry-footer__list-item" > <a href="https://netzpolitik.org/tag/cyber-krieg/" rel="tag">cyber-krieg</a></li ><li class="entry-footer__list-item" > <a href="https://netzpolitik.org/tag/killer-robots/" rel="tag">Killer Robots</a></li ><li class="entry-footer__list-item" > <a href="https://netzpolitik.org/tag/ludwig-leinhos/" rel="tag">Ludwig Leinhos</a></li ><li class="entry-footer__list-item" > <a href="https://netzpolitik.org/tag/pentagon/" rel="tag">Pentagon</a></li ></ul > 				</div>
316 | 			</div>
317 | 
318 | 				</footer><!-- .entry-footer -->
319 | </article><!-- #post-## -->
320 | 
321 | 	<nav class="navigation post-navigation" role="navigation">
322 | 		<h2 class="screen-reader-text">Beitrags-Navigation</h2>
323 | 		<div class="nav-links"><div class="nav-previous"><a href="https://netzpolitik.org/2018/deniz-yuecel-ist-frei-viele-andere-journalisten-sitzen-noch-im-knast/" rel="prev"><span class="nav-meta" aria-hidden="true">Vorheriger Beitrag</span> <span class="screen-reader-text">Previous post:</span> <span class="nav-post-title">Deniz Yücel ist frei. Viele andere Journalisten sitzen noch im Knast.</span></a></div><div class="nav-next"><a href="https://netzpolitik.org/2018/koalitionsvertrag-setzt-widerspruechlichen-kurs-bei-der-it-sicherheit-fort/" rel="next"><span class="nav-meta" aria-hidden="true">Nächster Beitrag</span> <span class="screen-reader-text">Next post:</span> <span class="nav-post-title">Koalitionsvertrag setzt widersprüchlichen Kurs bei der IT-Sicherheit fort</span></a></div></div>
324 | 	</nav><div class="related__articles">
325 |     <div class="related__articles--wrap">
326 | 
327 |         <span class="related__headline">Unsere Lesetipps</span>
328 | 
329 | 		
330 |         <div class="grid clearfix grid-frontend">
331 |             <div class="grid-container grid-container-c-1d3-1d3-1d3">
332 |                 <div class="grid-container-content">
333 |                     <div class="grid-slots-wrapper">
334 |                         <div class="grid-slot grid-slot-1d3">
335 |                             <div class="grid-boxes-wrapper">
336 |                                 <div class="grid-box">
337 | 
338 | 
339 | 									<article class="teaser post-180199 post type-post status-publish format-standard has-post-thumbnail hentry category-linkschleuder tag-ccw tag-killer-robots tag-roboter tag-samsung tag-suedkorea">
340 | 
341 | 	        <figure class="teaser__thumbnail thumbnail-16by9">
342 |             <a href="https://netzpolitik.org/2017/killer-roboter-diskussion-um-toedliche-autonome-waffensysteme/" class="teaser__link teaser__image-link u-uid  u-url"
343 |                title="„Killer-Roboter“: Diskussion um tödliche autonome Waffensysteme">
344 |                 <img width="380" height="214" src="https://cdn.netzpolitik.org/wp-upload/2017/12/roboter-art-380x214.jpg" class="attachment-landscape-380 size-landscape-380 wp-post-image" alt="" srcset="https://cdn.netzpolitik.org/wp-upload/2017/12/roboter-art-380x214.jpg 380w, https://cdn.netzpolitik.org/wp-upload/2017/12/roboter-art-860x484.jpg 860w, https://cdn.netzpolitik.org/wp-upload/2017/12/roboter-art-660x372.jpg 660w, https://cdn.netzpolitik.org/wp-upload/2017/12/roboter-art-160x90.jpg 160w" sizes="(max-width: 380px) 100vw, 380px" />            </a>
345 |         </figure>
346 |         
347 | 	<div class="teaser__inner">
348 | 
349 | 		<header class="teaser__header">
350 | 							<span class="teaser__kicker">Linkschleuder</span>
351 | 						<h2 class="teaser__headline entry-title p-name"><a href="https://netzpolitik.org/2017/killer-roboter-diskussion-um-toedliche-autonome-waffensysteme/" class="teaser__link teaser__text-link teaser__headline-link u-uid  u-url" title="„Killer-Roboter“: Diskussion um tödliche autonome Waffensysteme">„Killer-Roboter“: Diskussion um tödliche autonome Waffensysteme</a></h2>
352 |             
353 | 		</header>
354 | 
355 | 		<div class="teaser__body">
356 | 			<div class="teaser__excerpt p-summary e-content">
357 | 				<p>Adrian Lobe greift in der FAZ unter dem Titel „Tod durch Roboter“ das Thema „Killer-Roboter“ auf. Anlass ist zum einen das Treffen einer Expertengruppe bei den Vereinten Nationen in Genf, die darüber beraten hat, ob autonome Waffensysteme künftig aus dem Verkehr gezogen werden sollen. Dazu könnte die UN-Waffenkonvention (Convention on Conventional Weapons (CCW)) erweitert werden. [&hellip;]</p>
358 | 			</div>
359 | 			<a
360 |     href="https://netzpolitik.org/2017/killer-roboter-diskussion-um-toedliche-autonome-waffensysteme/"
361 |     class="teaser__link teaser__readmore-link more-link"
362 |     title="„Killer-Roboter“: Diskussion um tödliche autonome Waffensysteme">
363 | 
364 |     <span class="screen-reader-text">Lesen Sie diesen Artikel: „Killer-Roboter“: Diskussion um tödliche autonome Waffensysteme</span>
365 |     <span class="teaser__link--text" aria-hidden="true">weiterlesen</span>
366 |     <span class="teaser__link--icon" aria-hidden="true"></span>
367 | </a>		</div>
368 | 		<footer class="teaser__footer">
369 | 
370 | 			<div class="teaser__byline posted-on byline">
371 | 				<a href="https://netzpolitik.org/author/constanze/" title="Beiträge von Constanze" rel="author">Constanze</a> / <time class="entry-date published" datetime="2017-12-08T13:32:05+00:00">08.12.2017 13:32 Uhr</time>
372 | 						</div>
373 | 			
374 | 		</footer>
375 | 	</div>
376 | </article>
377 | 
378 | 
379 |                                 </div>
380 |                             </div>
381 |                         </div>
382 |                         <div class="grid-slot grid-slot-1d3">
383 |                             <div class="grid-boxes-wrapper">
384 |                                 <div class="grid-box">
385 | 
386 | 
387 | 									<article class="teaser post-184014 post type-post status-publish format-standard has-post-thumbnail hentry category-datenschutz tag-afghanistan tag-datenschutz tag-fitbit tag-leaks tag-militaer tag-strava">
388 | 
389 | 	        <figure class="teaser__thumbnail thumbnail-16by9">
390 |             <a href="https://netzpolitik.org/2018/wo-die-bundeswehr-joggt-fitness-app-entbloesst-militaerbasen-in-konfliktgebieten/" class="teaser__link teaser__image-link u-uid  u-url"
391 |                title="Wo die Bundeswehr joggen geht: Fitness-App entblößt Militärbasen in Konfliktgebieten">
392 |                 <img width="380" height="214" src="https://cdn.netzpolitik.org/wp-upload/2018/01/heatmap-nahe-bundeswehrstandort-mazar-i-sharif-2-380x214.jpg" class="attachment-landscape-380 size-landscape-380 wp-post-image" alt="" srcset="https://cdn.netzpolitik.org/wp-upload/2018/01/heatmap-nahe-bundeswehrstandort-mazar-i-sharif-2-380x214.jpg 380w, https://cdn.netzpolitik.org/wp-upload/2018/01/heatmap-nahe-bundeswehrstandort-mazar-i-sharif-2-1200x675.jpg 1200w, https://cdn.netzpolitik.org/wp-upload/2018/01/heatmap-nahe-bundeswehrstandort-mazar-i-sharif-2-860x484.jpg 860w, https://cdn.netzpolitik.org/wp-upload/2018/01/heatmap-nahe-bundeswehrstandort-mazar-i-sharif-2-660x372.jpg 660w, https://cdn.netzpolitik.org/wp-upload/2018/01/heatmap-nahe-bundeswehrstandort-mazar-i-sharif-2-160x90.jpg 160w" sizes="(max-width: 380px) 100vw, 380px" />            </a>
393 |         </figure>
394 |         
395 | 	<div class="teaser__inner">
396 | 
397 | 		<header class="teaser__header">
398 | 							<span class="teaser__kicker">Datenschutz</span>
399 | 						<h2 class="teaser__headline entry-title p-name"><a href="https://netzpolitik.org/2018/wo-die-bundeswehr-joggt-fitness-app-entbloesst-militaerbasen-in-konfliktgebieten/" class="teaser__link teaser__text-link teaser__headline-link u-uid  u-url" title="Wo die Bundeswehr joggen geht: Fitness-App entblößt Militärbasen in Konfliktgebieten">Wo die Bundeswehr joggen geht: Fitness-App entblößt Militärbasen in Konfliktgebieten</a></h2>
400 |             
401 | 		</header>
402 | 
403 | 		<div class="teaser__body">
404 | 			<div class="teaser__excerpt p-summary e-content">
405 | 				<p>Fitbit sammelt Daten über Laufrouten auf der ganzen Welt und stellt sie ins Internet. Die Karte macht damit ungewollt geheime Camps und Bewegungsprofile in Kampfzonen sichtbar. Auch bei der Bundeswehr in Afghanistan wird fleißig trainiert.</p>
406 | 			</div>
407 | 			<a
408 |     href="https://netzpolitik.org/2018/wo-die-bundeswehr-joggt-fitness-app-entbloesst-militaerbasen-in-konfliktgebieten/"
409 |     class="teaser__link teaser__readmore-link more-link"
410 |     title="Wo die Bundeswehr joggen geht: Fitness-App entblößt Militärbasen in Konfliktgebieten">
411 | 
412 |     <span class="screen-reader-text">Lesen Sie diesen Artikel: Wo die Bundeswehr joggen geht: Fitness-App entblößt Militärbasen in Konfliktgebieten</span>
413 |     <span class="teaser__link--text" aria-hidden="true">weiterlesen</span>
414 |     <span class="teaser__link--icon" aria-hidden="true"></span>
415 | </a>		</div>
416 | 		<footer class="teaser__footer">
417 | 
418 | 			<div class="teaser__byline posted-on byline">
419 | 				<a href="https://netzpolitik.org/author/alexander-fanta/" title="Beiträge von Alexander Fanta" rel="author">Alexander Fanta</a> / <time class="entry-date published" datetime="2018-01-29T17:05:54+00:00">29.01.2018 17:05 Uhr</time>
420 | 						</div>
421 | 			
422 | 		</footer>
423 | 	</div>
424 | </article>
425 | 
426 | 
427 |                                 </div>
428 |                             </div>
429 |                         </div>
430 |                         <div class="grid-slot grid-slot-1d3">
431 |                             <div class="grid-boxes-wrapper">
432 |                                 <div class="grid-box">
433 | 
434 | 
435 | 									<article class="teaser post-179176 post type-post status-publish format-standard has-post-thumbnail hentry category-netzpolitik-podcast category-technologie tag-autonome-waffen tag-campaigning tag-killer-robots tag-konvention tag-marcel-dickow tag-netzpolitik-podcast tag-podcast tag-ruestung">
436 | 
437 | 	        <figure class="teaser__thumbnail thumbnail-16by9">
438 |             <a href="https://netzpolitik.org/2017/interview-die-un-waffenkonvention-und-die-killer-roboter/" class="teaser__link teaser__image-link u-uid  u-url"
439 |                title="Interview: Die UN-Waffenkonvention und die „Killer-Roboter“">
440 |                 <img width="380" height="214" src="https://cdn.netzpolitik.org/wp-upload/2017/12/matador-380x214.jpg" class="attachment-landscape-380 size-landscape-380 wp-post-image" alt="" srcset="https://cdn.netzpolitik.org/wp-upload/2017/12/matador-380x214.jpg 380w, https://cdn.netzpolitik.org/wp-upload/2017/12/matador-860x484.jpg 860w, https://cdn.netzpolitik.org/wp-upload/2017/12/matador-660x372.jpg 660w, https://cdn.netzpolitik.org/wp-upload/2017/12/matador-160x90.jpg 160w" sizes="(max-width: 380px) 100vw, 380px" />            </a>
441 |         </figure>
442 |         
443 | 	<div class="teaser__inner">
444 | 
445 | 		<header class="teaser__header">
446 | 							<span class="teaser__kicker">Netzpolitik Podcast</span>
447 | 						<h2 class="teaser__headline entry-title p-name"><a href="https://netzpolitik.org/2017/interview-die-un-waffenkonvention-und-die-killer-roboter/" class="teaser__link teaser__text-link teaser__headline-link u-uid  u-url" title="Interview: Die UN-Waffenkonvention und die „Killer-Roboter“">Interview: Die UN-Waffenkonvention und die „Killer-Roboter“</a></h2>
448 |             
449 | 		</header>
450 | 
451 | 		<div class="teaser__body">
452 | 			<div class="teaser__excerpt p-summary e-content">
453 | 				<p>Im Rahmen einer Erweiterung der UN-Waffenkonvention könnte rechtlich geregelt werden, dass Entscheidungen über Leben und Tod nicht vollständig an Computer delegiert werden dürfen. Dazu traf sich erstmals ein UN-Expertengremium. Ein großer Schritt in Richtung eines Verbots von tödlichen autonomen Waffensystemen? Wir haben mit Marcel Dickow über das Ergebnis gesprochen.</p>
454 | 			</div>
455 | 			<a
456 |     href="https://netzpolitik.org/2017/interview-die-un-waffenkonvention-und-die-killer-roboter/"
457 |     class="teaser__link teaser__readmore-link more-link"
458 |     title="Interview: Die UN-Waffenkonvention und die „Killer-Roboter“">
459 | 
460 |     <span class="screen-reader-text">Lesen Sie diesen Artikel: Interview: Die UN-Waffenkonvention und die „Killer-Roboter“</span>
461 |     <span class="teaser__link--text" aria-hidden="true">weiterlesen</span>
462 |     <span class="teaser__link--icon" aria-hidden="true"></span>
463 | </a>		</div>
464 | 		<footer class="teaser__footer">
465 | 
466 | 			<div class="teaser__byline posted-on byline">
467 | 				<a href="https://netzpolitik.org/author/constanze/" title="Beiträge von Constanze" rel="author">Constanze</a> / <time class="entry-date published" datetime="2017-12-04T12:15:07+00:00">04.12.2017 12:15 Uhr</time>
468 | 						</div>
469 | 			
470 | 		</footer>
471 | 	</div>
472 | </article>
473 | 
474 | 
475 |                                 </div>
476 |                             </div>
477 |                         </div>
478 |                     </div>
479 |                 </div>
480 |             </div>
481 |         </div>
482 |     </div>
483 | </div>
484 | 
485 | 
486 | 
487 | 
488 | <div class="comments-wrap">
489 | 
490 |     <div id="comments" class="comments-area">
491 | 
492 |         <span class="comments__headline">
493 |             7 Kommentare        </span>
494 | 
495 | 		
496 | 			
497 |             <ol class="comment-list">
498 | 						<li id="comment-2405892" class="comment even thread-even depth-1">
499 | 			<article id="div-comment-2405892" class="comment-body">
500 | 				<footer class="comment-meta">
501 | 					<div class="comment-author vcard">
502 | 												<b class="fn">Cortaz</b> <span class="says">sagt:</span>					</div><!-- .comment-author -->
503 | 
504 | 					<div class="comment-metadata">
505 | 						<a href="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/#comment-2405892">
506 | 							<time datetime="2018-02-16T14:22:29+00:00">
507 | 								16. Februar 2018 um 14:22&nbsp;Uhr							</time>
508 | 						</a>
509 | 											</div><!-- .comment-metadata -->
510 | 
511 | 									</footer><!-- .comment-meta -->
512 | 
513 | 				<div class="comment-content">
514 | 					<p>Witzig dabei ist das die Bundeswehr nicht einmal in der Lage ist den konventionellen Job ordentlich zu erledigen. Völlig unnormal ist das solche Systeme überhaupt entwickelt werden. Es mag abgedreht klingen, aber ich denke zuerst an T2 und frage mich ob solche Systeme jemals völlig kontrollierbar sind und wie es sein kann das man Tod automatisiert.<br />
515 | Unfassbar.</p>
516 | 				</div><!-- .comment-content -->
517 | 
518 | 				<div class="reply"><a rel='nofollow' class='comment-reply-link' href='https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/?replytocom=2405892#respond' onclick='return addComment.moveForm( "div-comment-2405892", "2405892", "respond", "185645" )' aria-label='Antworte auf Cortaz'>Antworten</a></div>			</article><!-- .comment-body -->
519 | </li><!-- #comment-## -->
520 | 		<li id="comment-2405910" class="comment odd alt thread-odd thread-alt depth-1 parent">
521 | 			<article id="div-comment-2405910" class="comment-body">
522 | 				<footer class="comment-meta">
523 | 					<div class="comment-author vcard">
524 | 												<b class="fn">Mimimi</b> <span class="says">sagt:</span>					</div><!-- .comment-author -->
525 | 
526 | 					<div class="comment-metadata">
527 | 						<a href="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/#comment-2405910">
528 | 							<time datetime="2018-02-16T16:59:27+00:00">
529 | 								16. Februar 2018 um 16:59&nbsp;Uhr							</time>
530 | 						</a>
531 | 											</div><!-- .comment-metadata -->
532 | 
533 | 									</footer><!-- .comment-meta -->
534 | 
535 | 				<div class="comment-content">
536 | 					<p>Was soll daraus werden? Autonome Truppen kämpfen gegen autonome Truppen? Das ist eine Fortführung der abstrusitäten im Krieg. Ab dem Moment wo der Oberbefehlshaber nicht mehr voranritt hat der Krieg seine &#8222;ehrenhaftigkeit&#8220; verloren.</p>
537 | 				</div><!-- .comment-content -->
538 | 
539 | 				<div class="reply"><a rel='nofollow' class='comment-reply-link' href='https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/?replytocom=2405910#respond' onclick='return addComment.moveForm( "div-comment-2405910", "2405910", "respond", "185645" )' aria-label='Antworte auf Mimimi'>Antworten</a></div>			</article><!-- .comment-body -->
540 | <ol class="children">
541 | 		<li id="comment-2406361" class="comment even depth-2 parent">
542 | 			<article id="div-comment-2406361" class="comment-body">
543 | 				<footer class="comment-meta">
544 | 					<div class="comment-author vcard">
545 | 												<b class="fn">h s</b> <span class="says">sagt:</span>					</div><!-- .comment-author -->
546 | 
547 | 					<div class="comment-metadata">
548 | 						<a href="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/#comment-2406361">
549 | 							<time datetime="2018-02-19T21:07:52+00:00">
550 | 								19. Februar 2018 um 21:07&nbsp;Uhr							</time>
551 | 						</a>
552 | 											</div><!-- .comment-metadata -->
553 | 
554 | 									</footer><!-- .comment-meta -->
555 | 
556 | 				<div class="comment-content">
557 | 					<p>Die Vorstellung von Krieg als &#8222;ehrenhaft&#8220; ist Romantik.  In der Realitaet geht es um das Durchsetzen von Zielen mit Gewalt, da will man primaer gewinnen und nicht ehrenvoller aber uU toter Zweiter werden.</p>
558 | <p>Das ist nicht notwendigermassen ein Widerspruch zur Regulierung oder Aechtung von Waffensystemen, bei denen keiner wirklich etwas gewinnt.  Weswegen wir teilautonome Waffensysteme bereits haben und behalten werden, und Verteidugungswaffen immer schneller und dafuer autonomer werden.  Letzteres ist halt nicht so problematisch, da sich idR keine friedlichen Objekte mit Schallgeschwidigkeit auf Kollisionskurs begeben&#8230;</p>
559 | 				</div><!-- .comment-content -->
560 | 
561 | 				<div class="reply"><a rel='nofollow' class='comment-reply-link' href='https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/?replytocom=2406361#respond' onclick='return addComment.moveForm( "div-comment-2406361", "2406361", "respond", "185645" )' aria-label='Antworte auf h s'>Antworten</a></div>			</article><!-- .comment-body -->
562 | <ol class="children">
563 | 		<li id="comment-2406600" class="comment odd alt depth-3">
564 | 			<article id="div-comment-2406600" class="comment-body">
565 | 				<footer class="comment-meta">
566 | 					<div class="comment-author vcard">
567 | 												<b class="fn">9043589056</b> <span class="says">sagt:</span>					</div><!-- .comment-author -->
568 | 
569 | 					<div class="comment-metadata">
570 | 						<a href="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/#comment-2406600">
571 | 							<time datetime="2018-02-21T16:40:06+00:00">
572 | 								21. Februar 2018 um 16:40&nbsp;Uhr							</time>
573 | 						</a>
574 | 											</div><!-- .comment-metadata -->
575 | 
576 | 									</footer><!-- .comment-meta -->
577 | 
578 | 				<div class="comment-content">
579 | 					<p>Wie sagte Worf? Nur der Sieg ist ehrenhaft.<br />
580 | Mein Zusatz, in erster Linie deshalb, weil man danach die Geschichtsbücher schreiben kann.<br />
581 | Bitte jetzt nicht mit Nazis kommen. Ich  weiß das das Verbrecher waren.</p>
582 | 				</div><!-- .comment-content -->
583 | 
584 | 				<div class="reply"><a rel='nofollow' class='comment-reply-link' href='https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/?replytocom=2406600#respond' onclick='return addComment.moveForm( "div-comment-2406600", "2406600", "respond", "185645" )' aria-label='Antworte auf 9043589056'>Antworten</a></div>			</article><!-- .comment-body -->
585 | </li><!-- #comment-## -->
586 | </ol><!-- .children -->
587 | </li><!-- #comment-## -->
588 | </ol><!-- .children -->
589 | </li><!-- #comment-## -->
590 | 		<li id="comment-2405911" class="comment even thread-even depth-1 parent">
591 | 			<article id="div-comment-2405911" class="comment-body">
592 | 				<footer class="comment-meta">
593 | 					<div class="comment-author vcard">
594 | 												<b class="fn">hansi</b> <span class="says">sagt:</span>					</div><!-- .comment-author -->
595 | 
596 | 					<div class="comment-metadata">
597 | 						<a href="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/#comment-2405911">
598 | 							<time datetime="2018-02-16T17:12:46+00:00">
599 | 								16. Februar 2018 um 17:12&nbsp;Uhr							</time>
600 | 						</a>
601 | 											</div><!-- .comment-metadata -->
602 | 
603 | 									</footer><!-- .comment-meta -->
604 | 
605 | 				<div class="comment-content">
606 | 					<p>16. Februar 2018, von MSC</p>
607 | <p>MSC 2018 – AGENDA UND TEILNEHMERLISTE VERÖFFENTLICHT<br />
608 | Von Freitag bis Sonntag kommen mehr als 30 Staats- und Regierungschefs und über 100 Minister aus aller Welt zur Münchner Sicherheitskonferenz (MSC) zusammen, um über aktuelle Herausforderungen in der internationalen&#8230;</p>
609 | <p>Ergo :Die MSC- beginnt erst heute. Das ist wohl so eine Sache mit den vorgefertigten Pressemitteilungen. Meistens ist das Gegenteil richtig.</p>
610 | 				</div><!-- .comment-content -->
611 | 
612 | 				<div class="reply"><a rel='nofollow' class='comment-reply-link' href='https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/?replytocom=2405911#respond' onclick='return addComment.moveForm( "div-comment-2405911", "2405911", "respond", "185645" )' aria-label='Antworte auf hansi'>Antworten</a></div>			</article><!-- .comment-body -->
613 | <ol class="children">
614 | 		<li id="comment-2405912" class="comment byuser comment-author-alexander-fanta bypostauthor odd alt depth-2">
615 | 			<article id="div-comment-2405912" class="comment-body">
616 | 				<footer class="comment-meta">
617 | 					<div class="comment-author vcard">
618 | 												<b class="fn">Alexander Fanta</b> <span class="says">sagt:</span>					</div><!-- .comment-author -->
619 | 
620 | 					<div class="comment-metadata">
621 | 						<a href="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/#comment-2405912">
622 | 							<time datetime="2018-02-16T17:19:11+00:00">
623 | 								16. Februar 2018 um 17:19&nbsp;Uhr							</time>
624 | 						</a>
625 | 											</div><!-- .comment-metadata -->
626 | 
627 | 									</footer><!-- .comment-meta -->
628 | 
629 | 				<div class="comment-content">
630 | 					<p>Stimmt, die eigentliche Konferenz beginnt erst heute. Die Veranstaltung, bei der Leinhos sprach, fand im Vorfeld dazu statt.</p>
631 | 				</div><!-- .comment-content -->
632 | 
633 | 				<div class="reply"><a rel='nofollow' class='comment-reply-link' href='https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/?replytocom=2405912#respond' onclick='return addComment.moveForm( "div-comment-2405912", "2405912", "respond", "185645" )' aria-label='Antworte auf Alexander Fanta'>Antworten</a></div>			</article><!-- .comment-body -->
634 | </li><!-- #comment-## -->
635 | </ol><!-- .children -->
636 | </li><!-- #comment-## -->
637 | 		<li id="comment-2406615" class="comment even thread-odd thread-alt depth-1">
638 | 			<article id="div-comment-2406615" class="comment-body">
639 | 				<footer class="comment-meta">
640 | 					<div class="comment-author vcard">
641 | 												<b class="fn">h s</b> <span class="says">sagt:</span>					</div><!-- .comment-author -->
642 | 
643 | 					<div class="comment-metadata">
644 | 						<a href="https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/#comment-2406615">
645 | 							<time datetime="2018-02-21T18:50:18+00:00">
646 | 								21. Februar 2018 um 18:50&nbsp;Uhr							</time>
647 | 						</a>
648 | 											</div><!-- .comment-metadata -->
649 | 
650 | 									</footer><!-- .comment-meta -->
651 | 
652 | 				<div class="comment-content">
653 | 					<p>Wie immer ganz vorne mit dabei: die EU-Kommission.</p>
654 | <p><a href="https://www.heise.de/newsticker/meldung/Dual-Use-EU-soll-autonome-nicht-toedliche-Waffensysteme-vorantreiben-3975637.html" rel="nofollow">https://www.heise.de/newsticker/meldung/Dual-Use-EU-soll-autonome-nicht-toedliche-Waffensysteme-vorantreiben-3975637.html</a></p>
655 | <p>Weil nicht-toedliche Waffensysteme ja kein Problem sind, klar.  Und dann wundern, dass Leute der EU-Kommission nicht ueber den Weg trauen oder aus dieser EU raus wollen.</p>
656 | 				</div><!-- .comment-content -->
657 | 
658 | 				<div class="reply"><a rel='nofollow' class='comment-reply-link' href='https://netzpolitik.org/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/?replytocom=2406615#respond' onclick='return addComment.moveForm( "div-comment-2406615", "2406615", "respond", "185645" )' aria-label='Antworte auf h s'>Antworten</a></div>			</article><!-- .comment-body -->
659 | </li><!-- #comment-## -->
660 |             </ol><!-- .comment-list -->
661 | 
662 | 				<div id="respond" class="comment-respond">
663 | 		<h3 id="reply-title" class="comment-reply-title">Schreibe einen Kommentar <small><a rel="nofollow" id="cancel-comment-reply-link" href="/2018/bundeswehr-bereitet-sich-auf-den-kampf-gegen-killer-roboter-vor/#respond" style="display:none;">Antworten abbrechen</a></small></h3>			<form action="https://netzpolitik.org/wp-comments-post.php" method="post" id="commentform" class="comment-form" novalidate>
664 | 				<p class="comment-notes"><span id="email-notes">Deine E-Mail-Adresse wird nicht veröffentlicht.</span> Erforderliche Felder sind mit <span class="required">*</span> markiert.</p><p class="comment-form-comment"><label for="comment">Kommentar</label> <textarea autocomplete="nope"  id="ffe742fff7"  name="3aa2f323ae"   cols="45" rows="8" maxlength="65525" required="required"></textarea><textarea id="comment" aria-hidden="true" name="comment" autocomplete="nope" style="clip:rect(1px, 1px, 1px, 1px);position:absolute !important;white-space:nowrap;height:1px;width:1px;overflow:hidden;" tabindex="-1"></textarea><script type="text/javascript">document.getElementById("comment").setAttribute( "id", "1f6e5552d202e3c7b26e5cc4a372be9d" );document.getElementById("ffe742fff7").setAttribute( "id", "comment" );</script></p><p class="comment-form-author"><label for="author">Name <span class="required">*</span></label> <input id="author" name="author" type="text" value="" size="30" maxlength="245" required='required' /></p>
665 | <p class="comment-form-email"><label for="email">E-Mail <span class="required">*</span></label> <input id="email" name="email" type="email" value="" size="30" maxlength="100" aria-describedby="email-notes" required='required' /></p>
666 | <p class="comment-form-url"><label for="url">Website</label> <input id="url" name="url" type="url" value="" size="30" maxlength="200" /></p>
667 | <p class="form-submit"><input name="submit" type="submit" id="submit" class="submit" value="Kommentar abschicken" /> <input type='hidden' name='comment_post_ID' value='185645' id='comment_post_ID' />
668 | <input type='hidden' name='comment_parent' id='comment_parent' value='0' />
669 | </p>			</form>
670 | 			</div><!-- #respond -->
671 | 	
672 |     </div><!-- #comments -->
673 | </div>
674 | 
675 | 		</main><!-- #main -->
676 | 	</div><!-- #primary -->
677 | 
678 | 
679 | </div><!-- #content -->
680 | 
681 | <footer id="colophon" class="site-footer">
682 | 
683 |     <div class="footer__navigation">
684 | 
685 | 		<nav class="menu-grauer-footer-container"><ul id="footer-topics" class="footer__menu-list"><li id="menu-item-150384" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150384"><a href="https://netzpolitik.org/category/datenschutz/">Datenschutz</a></li>
686 | <li id="menu-item-150391" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150391"><a href="https://netzpolitik.org/category/ueberwachung/">Überwachung</a></li>
687 | <li id="menu-item-150387" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150387"><a href="https://netzpolitik.org/category/netze/">Netze</a></li>
688 | <li id="menu-item-150389" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150389"><a href="https://netzpolitik.org/category/oeffentlichkeit/">Öffentlichkeit</a></li>
689 | <li id="menu-item-150392" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150392"><a href="https://netzpolitik.org/category/wissen/">Wissen</a></li>
690 | <li id="menu-item-150388" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150388"><a href="https://netzpolitik.org/category/nutzerrechte/">Nutzerrechte</a></li>
691 | <li id="menu-item-150390" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150390"><a href="https://netzpolitik.org/category/technologie/">Technologie</a></li>
692 | <li id="menu-item-150386" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150386"><a href="https://netzpolitik.org/category/kultur/">Kultur</a></li>
693 | <li id="menu-item-150385" class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-150385"><a href="https://netzpolitik.org/category/demokratie/">Demokratie</a></li>
694 | </ul></nav>
695 |         <p class="footer__licence">
696 |             <strong>Lizenz: </strong>
697 |             Die von uns verfassten Inhalte stehen, soweit nicht anders vermerkt, unter der Lizenz
698 |             <a href="http://creativecommons.org/licenses/by-nc-sa/4.0/">Creative Commons BY-NC-SA 4.0.</a>
699 |         </p>
700 |     </div>
701 | 
702 |     <div class="footer-service">
703 | 		<nav class="menu-blauer-footer-container"><ul id="footer-service__menu" class="footer-service__menu-list"><li id="menu-item-151981" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-151981"><a href="https://netzpolitik.org/ueber-uns/">Über uns</a></li>
704 | <li id="menu-item-165524" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-165524"><a href="https://netzpolitik.org/impressum/">Impressum</a></li>
705 | <li id="menu-item-150360" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-150360"><a href="https://netzpolitik.org/14-tage/">14 Tage</a></li>
706 | <li id="menu-item-150364" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-150364"><a href="https://netzpolitik.org/kommentare/">Kommentar-Regeln</a></li>
707 | <li id="menu-item-150365" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-150365"><a href="https://netzpolitik.org/stellenanzeige-buchen/">Stellenanzeige buchen</a></li>
708 | <li id="menu-item-150367" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-150367"><a href="https://netzpolitik.org/werbung/">Werbung</a></li>
709 | <li id="menu-item-150361" class="menu-item menu-item-type-post_type menu-item-object-page menu-item-150361"><a href="https://netzpolitik.org/datenschutz/">Datenschutz</a></li>
710 | </ul></nav>    </div>
711 | 
712 |     <div class="sponsored">
713 |         <div class="site-info">
714 | 
715 |             <p class="sponsored__info">
716 |                 Mit freundlicher Unterstützung von <br/><a href="https://palasthotel.de"><img src="https://netzpolitik.org/wp-content/themes/liebefeld/images/palasthotel.svg" alt="Palasthotel" /></a>
717 |             </p>
718 | 
719 |         </div><!-- .site-info -->
720 |     </div>
721 | 
722 | </footer><!-- #colophon -->
723 | </div><!-- #site -->
724 | 
725 | 		<script>
726 | 		( function ( body ) {
727 | 			'use strict';
728 | 			body.className = body.className.replace( /\btribe-no-js\b/, 'tribe-js' );
729 | 		} )( document.body );
730 | 		</script>
731 | 		<script> /* <![CDATA[ */var tribe_l10n_datatables = {"aria":{"sort_ascending":": aktivieren, um Spalte aufsteigend zu sortieren","sort_descending":": aktivieren, um Spalte absteigend zu sortieren"},"length_menu":"_MENU_ Eintr\u00e4ge anzeigen","empty_table":"Keine Daten verf\u00fcgbar in der Tabelle","info":"Zeige von _START_ bis _END_ von ingesamt _TOTAL_ Eintr\u00e4gen","info_empty":"Zeige 0 bis 0 von 0 Eintr\u00e4gen","info_filtered":"(gefiltert von _MAX_ Gesamt Eintr\u00e4gen)","zero_records":"Keine Zeilen gefunden","search":"Suche:","all_selected_text":"Alle Objekte auf dieser Seite wurden selektiert","select_all_link":"Alle Seiten selektieren","clear_selection":"Selektierung aufheben","pagination":{"all":"Alle","next":"N\u00e4chste","previous":"Vorherige"},"select":{"rows":{"0":"","_":": Ausgew\u00e4hlte %d Zeilen","1":": Ausgew\u00e4hlte 1 Zeile"}},"datepicker":{"dayNames":["Sonntag","Montag","Dienstag","Mittwoch","Donnerstag","Freitag","Samstag"],"dayNamesShort":["So","Mo","Di","Mi","Do","Fr","Sa"],"dayNamesMin":["S","M","D","M","D","F","S"],"monthNames":["Januar","Februar","M\u00e4rz","April","Mai","Juni","Juli","August","September","Oktober","November","Dezember"],"monthNamesShort":["Januar","Februar","M\u00e4rz","April","Mai","Juni","Juli","August","September","Oktober","November","Dezember"],"nextText":"Weiter","prevText":"Zur\u00fcck","currentText":"Heute","closeText":"Erledigt"}};/* ]]> */ </script><script type='text/javascript'>
732 | /* <![CDATA[ */
733 | var wpcf7 = {"apiSettings":{"root":"https:\/\/netzpolitik.org\/wp-json\/contact-form-7\/v1","namespace":"contact-form-7\/v1"},"recaptcha":{"messages":{"empty":"Bitte best\u00e4tige, dass du keine Maschine bist."}},"cached":"1"};
734 | /* ]]> */
735 | </script>
736 | <script type='text/javascript' src='https://cdn.netzpolitik.org/wp-content/plugins/contact-form-7/includes/js/scripts.js?ver=5.0.1'></script>
737 | <script type='text/javascript' src='https://cdn.netzpolitik.org/wp-includes/js/comment-reply.min.js?ver=4.9.5'></script>
738 | <script type='text/javascript'>
739 | /* <![CDATA[ */
740 | var newsletter = {"messages":{"email_error":"The email is not correct","name_error":"The name is not correct","surname_error":"The last name is not correct","privacy_error":"You must accept the privacy statement"},"profile_max":"20"};
741 | /* ]]> */
742 | </script>
743 | <script type='text/javascript' src='https://cdn.netzpolitik.org/wp-content/plugins/newsletter/subscription/validate.js?ver=5.3.3'></script>
744 | <script type='text/javascript' src='https://cdn.netzpolitik.org/wp-content/plugins/wp-featherlight/js/wpFeatherlight.pkgd.min.js?ver=1.2.0'></script>
745 | <script type='text/javascript' src='https://cdn.netzpolitik.org/wp-includes/js/wp-embed.min.js?ver=4.9.5'></script>
746 | 
747 | </body>
748 | </html>


--------------------------------------------------------------------------------
/tests/fixtures/businessinsider.com/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4.html:
--------------------------------------------------------------------------------
   1 | 
   2 | <!DOCTYPE html>
   3 | 
   4 | <html lang="de" xmlns="http://www.w3.org/1999/xhtml" xml:lang="de" lang="de" xmlns:fb="http://www.facebook.com/2008/fbml" >
   5 | <head>
   6 |     <meta http-equiv="X-UA-Compatible" content="IE=edge" />
   7 |     <meta http-equiv="content-type" content="text/html;charset=utf-8" /><script type="text/javascript">window.NREUM||(NREUM={}),__nr_require=function(e,t,n){function r(n){if(!t[n]){var o=t[n]={exports:{}};e[n][0].call(o.exports,function(t){var o=e[n][1][t];return r(o||t)},o,o.exports)}return t[n].exports}if("function"==typeof __nr_require)return __nr_require;for(var o=0;o<n.length;o++)r(n[o]);return r}({1:[function(e,t,n){function r(){}function o(e,t,n){return function(){return i(e,[f.now()].concat(u(arguments)),t?null:this,n),t?void 0:this}}var i=e("handle"),a=e(2),u=e(3),c=e("ee").get("tracer"),f=e("loader"),s=NREUM;"undefined"==typeof window.newrelic&&(newrelic=s);var p=["setPageViewName","setCustomAttribute","setErrorHandler","finished","addToTrace","inlineHit","addRelease"],d="api-",l=d+"ixn-";a(p,function(e,t){s[t]=o(d+t,!0,"api")}),s.addPageAction=o(d+"addPageAction",!0),s.setCurrentRouteName=o(d+"routeName",!0),t.exports=newrelic,s.interaction=function(){return(new r).get()};var m=r.prototype={createTracer:function(e,t){var n={},r=this,o="function"==typeof t;return i(l+"tracer",[f.now(),e,n],r),function(){if(c.emit((o?"":"no-")+"fn-start",[f.now(),r,o],n),o)try{return t.apply(this,arguments)}catch(e){throw c.emit("fn-err",[arguments,this,e],n),e}finally{c.emit("fn-end",[f.now()],n)}}}};a("setName,setAttribute,save,ignore,onEnd,getContext,end,get".split(","),function(e,t){m[t]=o(l+t)}),newrelic.noticeError=function(e){"string"==typeof e&&(e=new Error(e)),i("err",[e,f.now()])}},{}],2:[function(e,t,n){function r(e,t){var n=[],r="",i=0;for(r in e)o.call(e,r)&&(n[i]=t(r,e[r]),i+=1);return n}var o=Object.prototype.hasOwnProperty;t.exports=r},{}],3:[function(e,t,n){function r(e,t,n){t||(t=0),"undefined"==typeof n&&(n=e?e.length:0);for(var r=-1,o=n-t||0,i=Array(o<0?0:o);++r<o;)i[r]=e[t+r];return i}t.exports=r},{}],4:[function(e,t,n){t.exports={exists:"undefined"!=typeof window.performance&&window.performance.timing&&"undefined"!=typeof window.performance.timing.navigationStart}},{}],ee:[function(e,t,n){function r(){}function o(e){function t(e){return e&&e instanceof r?e:e?c(e,u,i):i()}function n(n,r,o,i){if(!d.aborted||i){e&&e(n,r,o);for(var a=t(o),u=m(n),c=u.length,f=0;f<c;f++)u[f].apply(a,r);var p=s[y[n]];return p&&p.push([b,n,r,a]),a}}function l(e,t){v[e]=m(e).concat(t)}function m(e){return v[e]||[]}function w(e){return p[e]=p[e]||o(n)}function g(e,t){f(e,function(e,n){t=t||"feature",y[n]=t,t in s||(s[t]=[])})}var v={},y={},b={on:l,emit:n,get:w,listeners:m,context:t,buffer:g,abort:a,aborted:!1};return b}function i(){return new r}function a(){(s.api||s.feature)&&(d.aborted=!0,s=d.backlog={})}var u="nr@context",c=e("gos"),f=e(2),s={},p={},d=t.exports=o();d.backlog=s},{}],gos:[function(e,t,n){function r(e,t,n){if(o.call(e,t))return e[t];var r=n();if(Object.defineProperty&&Object.keys)try{return Object.defineProperty(e,t,{value:r,writable:!0,enumerable:!1}),r}catch(i){}return e[t]=r,r}var o=Object.prototype.hasOwnProperty;t.exports=r},{}],handle:[function(e,t,n){function r(e,t,n,r){o.buffer([e],r),o.emit(e,t,n)}var o=e("ee").get("handle");t.exports=r,r.ee=o},{}],id:[function(e,t,n){function r(e){var t=typeof e;return!e||"object"!==t&&"function"!==t?-1:e===window?0:a(e,i,function(){return o++})}var o=1,i="nr@id",a=e("gos");t.exports=r},{}],loader:[function(e,t,n){function r(){if(!x++){var e=h.info=NREUM.info,t=d.getElementsByTagName("script")[0];if(setTimeout(s.abort,3e4),!(e&&e.licenseKey&&e.applicationID&&t))return s.abort();f(y,function(t,n){e[t]||(e[t]=n)}),c("mark",["onload",a()+h.offset],null,"api");var n=d.createElement("script");n.src="https://"+e.agent,t.parentNode.insertBefore(n,t)}}function o(){"complete"===d.readyState&&i()}function i(){c("mark",["domContent",a()+h.offset],null,"api")}function a(){return E.exists&&performance.now?Math.round(performance.now()):(u=Math.max((new Date).getTime(),u))-h.offset}var u=(new Date).getTime(),c=e("handle"),f=e(2),s=e("ee"),p=window,d=p.document,l="addEventListener",m="attachEvent",w=p.XMLHttpRequest,g=w&&w.prototype;NREUM.o={ST:setTimeout,SI:p.setImmediate,CT:clearTimeout,XHR:w,REQ:p.Request,EV:p.Event,PR:p.Promise,MO:p.MutationObserver};var v=""+location,y={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net",agent:"js-agent.newrelic.com/nr-1071.min.js"},b=w&&g&&g[l]&&!/CriOS/.test(navigator.userAgent),h=t.exports={offset:u,now:a,origin:v,features:{},xhrWrappable:b};e(1),d[l]?(d[l]("DOMContentLoaded",i,!1),p[l]("load",r,!1)):(d[m]("onreadystatechange",o),p[m]("onload",r)),c("mark",["firstbyte",u],null,"api");var x=0,E=e(4)},{}]},{},["loader"]);</script>
   8 |     <title>Dropbox VP Todd Jackson leaves for First Round Capital - Business Insider Deutschland</title>
   9 |     <meta name="date" content="2018-04-12" />
  10 | <meta name="sailthru.date" content="2018-04-12" />
  11 | <meta name="news_keywords" content="Dropbox, First Round, Venture Capital, BI Prime, Becky Peterson" />
  12 | <meta name="sailthru.tags" content="Dropbox, First Round, Venture Capital, BI Prime, Becky Peterson" />
  13 | <meta name="description" content="BI PRIME: Dropbox may need different skills as it moves toward an enterprise model." />
  14 | <meta name="sailthru.description" content="BI PRIME: Dropbox may need different skills as it moves toward an enterprise model." />
  15 | <meta name="title" content="A key Dropbox exec quit just weeks after its blockbuster IPO — but experts say it could be good in the long run" />
  16 | <meta name="sailthru.verticals" content="international" />
  17 | <meta name="sailthru.title" content="A key Dropbox exec quit just weeks after its blockbuster IPO — but experts say it could be good in the long run" />
  18 | <meta name="tbi-image" content="//static2.businessinsider.de/image/5acfe9287708e905f61f09b2/a-key-dropbox-exec-quit-just-weeks-after-its-blockbuster-ipo--but-experts-say-it-could-be-good-in-the-long-run.jpg" />
  19 | <meta name="sailthru.image.full" content="//static2.businessinsider.de/image/5acfe9287708e905f61f09b2/a-key-dropbox-exec-quit-just-weeks-after-its-blockbuster-ipo--but-experts-say-it-could-be-good-in-the-long-run.jpg" />
  20 | <meta name="sailthru.image.thumb" content="//static1.businessinsider.de/image/5acfe9287708e905f61f09b2-50-50/a-key-dropbox-exec-quit-just-weeks-after-its-blockbuster-ipo--but-experts-say-it-could-be-good-in-the-long-run.jpg" />
  21 | <meta name="sailthru.author" content="Becky Peterson" />
  22 | <meta property="og:title" content="A key Dropbox exec quit just weeks after its blockbuster IPO — but experts say it could be good in the long run" />
  23 | <meta property="og:description" content="BI PRIME: Dropbox may need different skills as it moves toward an enterprise model." />
  24 | <meta property="og:type" content="article" />
  25 | <meta property="og:url" content="http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4" />
  26 | <meta property="og:site_name" content="Business Insider Deutschland" />
  27 | <meta property="fb:app_id" content="155043519637" />
  28 | <meta property="article:publisher" content="Business.Insider.Deutschland" />
  29 | <meta property="author" content="Becky Peterson, Business Insider" />
  30 | <meta name="twitter:url" content="http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4" />
  31 | <meta name="twitter:title" content="A key Dropbox exec quit just weeks after its blockbuster IPO — but experts say it could be good in the long run" />
  32 | <meta name="twitter:description" content="BI PRIME: Dropbox may need different skills as it moves toward an enterprise model." />
  33 | <meta name="twitter:card" content="summary_large_image" />
  34 | <meta property="twitter:image" content="//static2.businessinsider.de/image/5acfe9287708e905f61f09b2/a-key-dropbox-exec-quit-just-weeks-after-its-blockbuster-ipo--but-experts-say-it-could-be-good-in-the-long-run.jpg" />
  35 | <meta name="twitter:creator" content="beckpeterson" />
  36 | <meta property="article:author" content="http://www.businessinsider.de/autoren/becky-peterson" />
  37 | <meta property="og:image" content="//static4.businessinsider.de/image/57843c16dd0895c3488b4942/a-key-dropbox-exec-quit-just-weeks-after-its-blockbuster-ipo--but-experts-say-it-could-be-good-in-the-long-run.jpg" />        <link href="//static3.businessinsider.de/assets/css/min-base-de.css?0" media="screen" rel="stylesheet" type="text/css" />
  38 | <link href="//static6.businessinsider.de/assets/css/min-post-de.css?0" media="screen" rel="stylesheet" type="text/css" />
  39 | <link rel="syndication-source" />    <link type="text/css" rel="stylesheet" media="print" href="//static5.businessinsider.de/assets/css/print.css?0" />
  40 | 
  41 |     <script type="text/javascript">
  42 |         //this should be refactored to the BI.Google_Plus object, however the BI core library will need to be moved to above the headScript call
  43 |         window.gPlusConfigs = {client_id : '551214076990-h27puiaonnlg9a9772gq3qbe9nk6qnno.apps.googleusercontent.com', base_domain: 'businessinsider.de'};
  44 |     </script>
  45 | 
  46 |     <script type="text/javascript" src="//code.jquery.com/jquery-1.7.1.min.js"></script>
  47 | <script type="text/javascript" src="//static4.businessinsider.de/assets/js/min2.js?0"></script>
  48 | <script type="application/ld+json">
  49 |     //<![CDATA[
  50 |     {"@context":"http:\/\/schema.org","@type":"NewsArticle","headline":"A key Dropbox exec quit just weeks after its blockbuster IPO \u2014 but experts say it could be good in the long run","url":"http:\/\/www.businessinsider.de\/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4","thumbnailUrl":"\/\/static2.businessinsider.de\/image\/5acfe9287708e905f61f09b2\/a-key-dropbox-exec-quit-just-weeks-after-its-blockbuster-ipo--but-experts-say-it-could-be-good-in-the-long-run.jpg","dateCreated":"2018-04-12T20:21:29Z","articleSection":"international","creator":"Becky Peterson","keywords":["Dropbox","First Round","Venture Capital","BI Prime"]}    //]]>
  51 | </script>
  52 | <script type="text/javascript">
  53 |     //<![CDATA[
  54 |     BI.post.isSlideshow=false;    //]]>
  55 | </script>
  56 | <script type="text/javascript">
  57 |     //<![CDATA[
  58 |     post = {"id":"5acfc2637708e902dd57c474","uri":"http:\/\/www.businessinsider.de\/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4","thumb":"\/\/static4.businessinsider.de\/image\/5acfe9287708e905f61f09b2-90-90\/a-key-dropbox-exec-quit-just-weeks-after-its-blockbuster-ipo--but-experts-say-it-could-be-good-in-the-long-run.jpg","author":"Becky Peterson","author_ids":"","post_type":"post"};    //]]>
  59 | </script>
  60 | <script type="text/javascript">
  61 |     //<![CDATA[
  62 |     
  63 |         Author = "Becky Peterson";
  64 |         Category = "Dropbox|First Round|Venture Capital|BI Prime";
  65 |         AuthorType = "Staff";
  66 |         Editor = "knarr_us";
  67 |         BIISub = "none"
  68 |             //]]>
  69 | </script>
  70 | <script type="text/javascript">
  71 |     //<![CDATA[
  72 |     
  73 |         BI.nav_select = "";
  74 |         BI.highTrafficMode = 0;
  75 |         BI.post.isDraft = 0;
  76 |         BI.post.url = "http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4";
  77 |         BI.post.name = "dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4";
  78 |         BI.comments.currentPage = 0;
  79 |         BI.comments.filterPundits = 1;
  80 |         BI.comments.getAllComments = 1;
  81 |             //]]>
  82 | </script>
  83 |     <!-- AppNexus Ad Script -->
  84 | 
  85 |     <script>
  86 |         adSSetup = {
  87 |             view: 'd',
  88 |             partners: true,
  89 |             adPlacements: [
  90 |                     'superbanner',
  91 |                     'sky',
  92 |                     'billboard',
  93 |                     'mrec',
  94 |                     'mrec_btf',
  95 |                     'inpage'
  96 |                 ],
  97 |             adSlotSizes: {
  98 |                 'superbanner': [{
  99 |                     'minWidth': 1,
 100 |                     'sizes': [[728,90],[728,600],[1000,600]]
 101 |                 }],
 102 |                 'sky': [{
 103 |                     'minWidth': 1,
 104 |                     'sizes': [[160,600],[120,600],[300,600],[500,1000],[1000,1000]]
 105 |                 }],
 106 |                 'billboard': [{
 107 |                     'minWidth': 799,
 108 |                     'sizes': [[800,250]]
 109 |                 }, {
 110 |                     'minWidth': 1,
 111 |                     'sizes': [[970,250],[800,250]]
 112 |                 }],
 113 |                 'mrec': [{
 114 |                     'minWidth': 1,
 115 |                     'sizes': [[300,250]]
 116 |                 }],
 117 |                 'mrec_btf': [{
 118 |                     'minWidth': 1,
 119 |                     'sizes': [[300,250],[300,600]]
 120 |                 }],
 121 |                 'inpage': [{
 122 |                     'minWidth': 1,
 123 |                     'sizes': [[1,1],[640,360],[1000,300]]
 124 |                 }]
 125 |             },
 126 | 
 127 | 
 128 |             colorBg: true, // enable/disable coloring of the page-background
 129 |             bgClick: true, // enable/disable click on page-background
 130 |             stickySky: true, // enable/disable stickiness for skyscraper
 131 |             pageName: "international_post" , // channel/article name from CMS
 132 |                         target: "Dropbox;First-Round;Venture-Capital;BI-Prime"
 133 |             
 134 |         }
 135 |     </script>
 136 | 
 137 | 
 138 | 
 139 |     <script src="//acdn.adnxs.com/as/1h/pages/businessinsider.js"></script>
 140 | 
 141 |     <!-- optimizely desktop -->
 142 |         <!-- End optimizely desktop -->
 143 | 
 144 |     <!-- Facebook Instant Start -->
 145 |     <meta property="fb:pages" content="1436932876630275" />
 146 |     <!-- Facebook Instant End -->
 147 | 
 148 |     <!--Tech Insider Pinterest-->
 149 |     
 150 |     
 151 |             <meta name="robots" content="noindex">
 152 |     
 153 |     
 154 |     <meta name="tbi-vertical" content="international" />
 155 | 
 156 |     <!--[if gte IE 9]>
 157 |     <style type="text/css">
 158 |         .gradient {
 159 |             filter: none;
 160 |         }
 161 |     </style>
 162 |     <![endif]-->
 163 | 
 164 |     <!--[if lt IE 8]>
 165 |         <link type="text/css" rel="stylesheet" media="all" href="//static3.businessinsider.de/assets/css/ie7_or_lower.css" />
 166 |     <![endif]-->
 167 |     <!--[if IE 8]>
 168 |         <link type="text/css" rel="stylesheet" media="all" href="//static5.businessinsider.de/assets/css/ie8.css" />
 169 |     <![endif]-->
 170 |     <!--[if lt IE 9]>
 171 |         <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
 172 |         <script type="text/javascript" src="//static3.businessinsider.de/assets/js/vendor/ie8/placeholders.min.js"></script>
 173 |     <![endif]-->
 174 | 
 175 |     <!--[if IE 9]>
 176 |         <link type="text/css" rel="stylesheet" media="all" href="//static4.businessinsider.de/assets/css/ie9.css" />
 177 |         <script type="text/javascript" src="//static5.businessinsider.de/assets/js/shims/Element.classList.js"></script>
 178 |     <![endif]-->
 179 | 
 180 |     <!--Tech Insider if IE 9 -->
 181 |     
 182 |     <script type="text/javascript">
 183 |         sas_target= 'Dropbox;First Round;Venture Capital;BI Prime';        var vertical = 'international';
 184 |         var PageType = 'post';
 185 |         var post_id = '';
 186 |         var _sf_startpt=(new Date()).getTime();
 187 |     </script>
 188 | 
 189 |     <!-- Kissmetrics -->
 190 |     <script type="text/javascript">
 191 |         var _kmq = _kmq || [];
 192 |         var _kmk = _kmk || '999f578acebfab6dd29b540ab4ebbcbaafafa774';
 193 | 
 194 |         function _kms(u){
 195 |             setTimeout(function(){
 196 |                 var s = document.createElement('script');
 197 |                 var f = document.getElementsByTagName('script')[0];
 198 |                 s.type = 'text/javascript';
 199 |                 s.async = true;
 200 |                 s.src = u;
 201 |                 f.parentNode.insertBefore(s, f);
 202 |             }, 1);
 203 |         }
 204 |             </script>
 205 |     <!-- Kissmetrics -->
 206 | 
 207 |     <link rel="alternate" type="application/rss+xml" title="RSS" href="http://www.businessinsider.de/international.rss" />
 208 | 
 209 |     <link rel="apple-touch-icon" sizes="180x180" href="//static4.businessinsider.de/assets/images/de/favicons/apple-touch-icon.png?v=BI-DE-2017-08-10" />
 210 | <link rel="icon" type="image/png" sizes="32x32" href="//static3.businessinsider.de/assets/images/de/favicons/favicon-32x32.png?v=BI-DE-2017-08-10" />
 211 | <link rel="icon" type="image/png" sizes="16x16" href="//static3.businessinsider.de/assets/images/de/favicons/favicon-16x16.png?v=BI-DE-2017-08-10" />
 212 | <link rel="manifest" href="//static4.businessinsider.de/assets/images/de/favicons/manifest.json?v=BI-DE-2017-08-10" />
 213 | <link rel="mask-icon" href="//static6.businessinsider.de/assets/images/de/favicons/safari-pinned-tab.svg?v=BI-DE-2017-08-10" color="#1D607C" />
 214 | <link rel="shortcut icon" href="//static5.businessinsider.de/assets/images/de/favicons/favicon.ico?v=BI-DE-2017-08-10" />
 215 | <meta name="msapplication-config" content="//static4.businessinsider.de/assets/images/de/favicons/browserconfig.xml?v=BI-DE-2017-08-10" />
 216 | <meta name="theme-color" content="#1D607C" />
 217 | <meta name="linkedin:owner" content="mid:1d5f7b" />
 218 | 
 219 |     <!--[if gte IE 9]>
 220 |         <meta name="application-name" content="Business Insider - international"/>
 221 |         <meta name="msapplication-tooltip" content="Start the page in Site Mode"/>
 222 |         <meta name="msapplication-starturl" content="http://www.businessinsider.de/international"/>
 223 |         <meta name="msapplication-window" content="width=800;height=600"/>
 224 |         <meta name="msapplication-navbutton-color" content="#4C7C8D"/>
 225 | 
 226 |         <meta name="msapplication-task" content="name=Tech;action-uri=http://www.businessinsider.de/sai;icon-uri=http://www.businessinsider.de/favicon.ico"/>
 227 |         <meta name="msapplication-task" content="name=Media;action-uri=http://www.businessinsider.de/thewire;icon-uri=http://www.businessinsider.de/favicon.ico"/>
 228 |         <meta name="msapplication-task" content="name=Wall Street;action-uri=http://www.businessinsider.de/clusterstock;icon-uri=http://www.businessinsider.de/favicon.ico"/>
 229 |         <meta name="msapplication-task" content="name=Markets;action-uri=http://www.businessinsider.de/moneygame;icon-uri=http://www.businessinsider.de/favicon.ico"/>
 230 |         <meta name="msapplication-task" content="name=Strategy;action-uri=http://www.businessinsider.de/warroom;icon-uri=http://www.businessinsider.de/favicon.ico"/>
 231 |     <![endif]-->
 232 | 
 233 |     <script type="text/javascript">BI.siteURI = 'http://www.businessinsider.de';</script>
 234 |     <script type="text/javascript">BI.secureURI = 'https://www.businessinsider.de';</script>
 235 |     <script type="text/javascript">BI.staticURI = '//static1.businessinsider.de';</script>
 236 |     <script type="text/javascript">BI.env.setDomain('http://www.businessinsider.de');</script>
 237 | 
 238 |             <!-- *************** ANALYTICS *************** -->
 239 |     <script>
 240 |         BI.ANALYTICS_SERVICE_URL = '//analytics.businessinsider.com';
 241 |     </script>
 242 |     <!-- *************** END ANALYTICS *************** -->
 243 |     
 244 |     
 245 | 
 246 |     
 247 |     <script type="text/javascript" src="//platform.linkedin.com/in.js">
 248 |        api_key: x4dbohswv62d
 249 |        authorize: true
 250 |        scope: r_basicprofile r_emailaddress w_share
 251 |     </script>
 252 | 
 253 | <!-- Taboola head -->
 254 | 
 255 | <!-- / Taboola head -->
 256 | 
 257 |         <!-- Begin IVW Tag -->
 258 |         <script type="text/javascript" src="//script.ioam.de/iam.js?m=1"></script>
 259 |     <!-- End IVW Tag -->
 260 | 
 261 | <!-- CLICK PERFORMANCE DFP SETUP -->
 262 | 
 263 |     <script type='text/javascript'>
 264 |     var googletag = googletag || {};
 265 |     googletag.cmd = googletag.cmd || [];
 266 |     (function()
 267 |     { var gads = document.createElement('script'); gads.async = true; gads.type = 'text/javascript'; var useSSL = 'https:' == document.location.protocol; gads.src = (useSSL ? 'https:' : 'http:') + '//www.googletagservices.com/tag/js/gpt.js'; var node = document.getElementsByTagName('script')[0]; node.parentNode.insertBefore(gads, node); }
 268 |     )();
 269 |     </script>
 270 |     <script>
 271 |     var gptAdSlots = [];
 272 |     googletag.cmd.push(function()
 273 |     {
 274 |         // Define a size mapping object. The first parameter to addSize is
 275 |         // a viewport size, while the second is a list of allowed ad sizes.
 276 |         var mapping = googletag.sizeMapping().
 277 |         // Desktop
 278 |         addSize([620, 0], [[620, 80], [620, 100], [620, 150], [620, 200], [620, 300], [300, 250], [300, 300], [300, 600]]).
 279 |         // Mobile
 280 |         addSize([0, 0], [[320, 50], [320, 80], [320, 100], [320, 150], [300, 250]]).build();
 281 | 
 282 |         gptAdSlots[0] = googletag.defineSlot('/133556709/Article_A', [[620, 80], [620, 100], [620, 150], [620, 200], [320, 50], [320, 80], [320, 100], [320, 150], [300, 250]], 'div-gpt-ad-1445608536739-0').
 283 |             defineSizeMapping([[[620, 0], [[620, 80], [620, 100], [620, 150], [620, 200]]], [[0, 0], [[320, 50], [320, 80], [320, 100], [320, 150], [300, 250]]]]).
 284 |             addService(googletag.pubads());
 285 | 
 286 |         gptAdSlots[1] = googletag.defineSlot('/133556709/Article_b', [[300, 600], [300, 250], [300, 300]], 'div-gpt-ad-1445608536739-1').
 287 |             defineSizeMapping([[[620, 0], [[300, 250], [300, 300], [300, 600]]]]).
 288 |             addService(googletag.pubads());
 289 | 
 290 |         gptAdSlots[2] = googletag.defineSlot('/133556709/Article_C', [[620, 150], [620, 200], [620, 300], [320, 100], [300, 250]], 'div-gpt-ad-1445608536739-2').
 291 |             defineSizeMapping([[[620, 0], [[620, 150], [620, 200], [620, 300]]], [[0, 0], [[320, 80], [300, 250]]]]).
 292 |             addService(googletag.pubads());
 293 | 
 294 |         googletag.pubads().setTargeting("URL", window.location.pathname.substr(0, 40));
 295 |         googletag.pubads().setTargeting("CATEGORY", "Dropbox,First Round,Venture Capital,BI Prime");
 296 |         googletag.pubads().enableSingleRequest();
 297 |         googletag.pubads().collapseEmptyDivs();
 298 |         googletag.enableServices();
 299 | 
 300 |         googletag.pubads().addEventListener('slotRenderEnded', function(event) {
 301 |             var slotName = event.slot.getAdUnitPath();
 302 |             if (slotName == '/133556709/Article_A' && event.isEmpty) {
 303 |                 document.getElementById(event.slot.getSlotElementId()).style.display = "none";
 304 |             }
 305 | 
 306 |             if (slotName == '/133556709/Article_b' && event.isEmpty) {
 307 |                 document.getElementById(event.slot.getSlotElementId()).style.display = "none";
 308 |             }
 309 | 
 310 |             if (slotName == '/133556709/Article_C' && event.isEmpty) {
 311 |                 document.getElementById(event.slot.getSlotElementId()).style.display = "none";
 312 |             }
 313 | 
 314 |         });
 315 | 
 316 |     }
 317 |         );
 318 |     </script>
 319 | 
 320 | <!-- CLICK PERFORMANCE DFP SETUP -->
 321 | 
 322 | <!-- Chartbeat -->
 323 |     <script type="text/javascript">
 324 |         // chartbeat wants the vars in global
 325 |         window._sf_async_config = window._sf_async_config || {};
 326 |         window._sf_async_config.uid = 14447;
 327 |         window._sf_async_config.domain = 'businessinsider.de';
 328 |         window._sf_async_config.flickerControl = false;
 329 |         window._sf_async_config.useCanonical = false;
 330 |         window._sf_startpt = (new Date()).getTime();
 331 |     </script>
 332 |     <script async src="https://static.chartbeat.com/js/chartbeat_mab.js"></script>
 333 | <!-- / Chartbeat -->
 334 | 
 335 | <!-- detect adblock users for Piano and set cookie appropriately -->
 336 |     <script>
 337 |         // CustomEvent polyfill
 338 |         (function () {
 339 |           if ( typeof window.CustomEvent === "function" ) return false;
 340 | 
 341 |           function CustomEvent ( event, params ) {
 342 |             params = params || { bubbles: false, cancelable: false, detail: undefined };
 343 |             var evt = document.createEvent( 'CustomEvent' );
 344 |             evt.initCustomEvent( event, params.bubbles, params.cancelable, params.detail );
 345 |             return evt;
 346 |           }
 347 | 
 348 |           CustomEvent.prototype = window.Event.prototype;
 349 | 
 350 |           window.CustomEvent = CustomEvent;
 351 |         })();
 352 | 
 353 |         var setAdblockerCookie = function(adblocker) {
 354 |             var d = new Date();
 355 |             d.setTime(d.getTime() + 60 * 60 * 24 * 30 * 1000);
 356 |             document.cookie = "__adblocker=" + (adblocker ? "true" : "false") + "; expires=" + d.toUTCString() + "; path=/";
 357 | 
 358 |             var event = new CustomEvent('piano_set', {'detail' : adblocker});
 359 |             document.dispatchEvent(event);
 360 |         }
 361 | 
 362 |         var script = document.createElement("script");
 363 | 
 364 |         script.setAttribute("async", true);
 365 |         script.setAttribute("src", "//www.npttech.com/advertising.js");
 366 |         script.setAttribute("onerror", "setAdblockerCookie(true);");
 367 |         script.setAttribute("onload", "setAdblockerCookie(false);");
 368 |         document.getElementsByTagName("head")[0].appendChild(script);
 369 |     </script>
 370 | 
 371 | </head>
 372 | 
 373 | 
 374 | <body class="">
 375 | 
 376 | <div data-cookie-notice-container></div>
 377 | <!-- print only -->
 378 | <img class="print" src="//static5.businessinsider.de/assets/images/de/logos/logo-print.png" />
 379 | 
 380 | <!-- FB Connect -->
 381 | <div id="fb-root" data-bi-facebook-root="155043519637"></div>
 382 | <!-- / FB Connect -->
 383 | 
 384 | <!--[if lt IE 9]>
 385 |     <div class="container>
 386 |         <div class="row">
 387 |             <div class="alert alert-error alert-block">
 388 |                 <h4>Achtung!</h4>
 389 |                 Deine Version des Internet Explorer ist veraltet. Aus Sicherheitsgründen solltest du <a href="http://windows.microsoft.com/de-de/internet-explorer/download-ie" target="_blank" title="Upgrade IE">deinen Browser aktualisieren</a>. Bitte installiere die neueste Version über Windows Update.
 390 |             </div>
 391 |         </div>
 392 |     </div>
 393 | <![endif]-->
 394 | 
 395 | <!-- masthead -->
 396 | <div class="masthead">
 397 |     <div class="container">
 398 |         <div class="row-fluid">
 399 |             <div class="span4">
 400 |                 <div data-hamburger class="menu">
 401 |                     <div class="hamburger" data-e2e-name="hamburger">
 402 |                         <span></span>
 403 |                         <span></span>
 404 |                         <span></span>
 405 |                     </div>
 406 |                 </div>
 407 |                 <a href="/" class="sprites logo"></a>
 408 |             </div>
 409 |             <div class="span4">
 410 |                                                             <h2 class="vert-name"><a href="http://www.businessinsider.de/international">International</a></h2>
 411 |                                                 </div>
 412 |             <!-- links -->
 413 |             <div class="span4 links list-pipes list-right no-pipe">
 414 | 
 415 |                 <!-- DE masthead promotion -->
 416 |                                                 <!-- End DE masthead promotion -->
 417 | 
 418 |                 <ul class="search">
 419 |                     <li>
 420 |                         <a data-search-icon-open href="#" title="Suchen" class="display-none"><span class="svg sprites search-icon"></span></a>
 421 |                         <a data-search-icon-close href="#" class="close-x display-none">
 422 |                             <span></span>
 423 |                             <span></span>
 424 |                         </a>
 425 |                     </li>
 426 |                 </ul>
 427 |                 <div>
 428 |                     <ul class="links-promo">
 429 |                                                     
 430 | 
 431 |     <li class="social">
 432 |         <a href="//www.facebook.com/Business.Insider.Deutschland" title="Folge uns auf Facebook" aria-label="Folge uns auf Facebook" target="_blank"><i class="fa fa-facebook ks-header-facebook"></i></a>
 433 |     </li>
 434 | 
 435 |     <li class="social">
 436 |         <a href="//twitter.com/BIDeutschland" title="Folge uns auf Twitter" aria-label="Folge uns auf Twitter" target="_blank"><i class="fa fa-twitter ks-header-twitter"></i></a>
 437 |     </li>
 438 | 
 439 |     <li class="social">
 440 |         <a href="//www.linkedin.com/company/business-insider-deustchland" title="Folge uns auf LinkedIn" aria-label="Folge uns auf LinkedIn" target="_blank"><i class="fa fa-linkedin ks-header-linkedin"></i></a>
 441 |     </li>
 442 |                         
 443 |                         
 444 |                                             </ul>
 445 | 
 446 |                     <ul class="links-user">
 447 |                         
 448 |                         
 449 |                         
 450 |                                                 <span data-bi-ajax-route="/esi/user_menubar?0=json%3A%7B%22viking%22%3Atrue%7D"></span>                </div>
 451 |             </div>
 452 |         </div>
 453 |     </div>
 454 |     <!-- Search Area -->
 455 |     <div data-search-over class="nav-over transition">
 456 |         <div class="container transition search">
 457 |             <form method="get" action="/s" class="search-field">
 458 |                 <input data-search-input name="q" type="text" placeholder="Suchen" autocomplete="off">
 459 |             </form>
 460 |         </div>
 461 |     </div>
 462 |     <!-- Nav area -->
 463 |     <div data-nav-over class="nav-over transition">
 464 | 
 465 |         <div class="transition">
 466 |             <div class="list-pipes nav-sections">
 467 |                 <ul class="verticals">
 468 | 
 469 |                                                                 <li class="active"><a class="item verticals__anchor" data-dropdown-id="dropdown-Trending" href="/trending">
 470 |                                 Trending                            </a>
 471 |                         </li>
 472 |                                                                 <li><a class="item verticals__anchor" data-dropdown-id="dropdown-Wirtschaft" href="/wirtschaft">
 473 |                                 Wirtschaft                            </a>
 474 |                         </li>
 475 |                                                                 <li><a class="item verticals__anchor" data-dropdown-id="dropdown-Tech" href="/tech">
 476 |                                 Tech                            </a>
 477 |                         </li>
 478 |                                                                 <li><a class="item verticals__anchor" data-dropdown-id="dropdown-Politik" href="/politik">
 479 |                                 Politik                            </a>
 480 |                         </li>
 481 |                                                                 <li><a class="item verticals__anchor" data-dropdown-id="dropdown-Strategy" href="/strategy">
 482 |                                 Strategy                            </a>
 483 |                         </li>
 484 |                                                                 <li><a class="item verticals__anchor" data-dropdown-id="dropdown-Wissenschaft" href="/wissenschaft">
 485 |                                 Wissenschaft                            </a>
 486 |                         </li>
 487 |                                                                 <li><a class="item verticals__anchor" data-dropdown-id="dropdown-Video" href="/video">
 488 |                                 Video                            </a>
 489 |                         </li>
 490 |                                                                 <li><a class="item verticals__anchor" data-dropdown-id="dropdown-Alles" href="/">
 491 |                                 Alles                            </a>
 492 |                         </li>
 493 |                                     </ul>
 494 |             </div>
 495 |             <div class="main-nav-dropdown ks-nav-dropdown"></div>
 496 |         </div>
 497 |     </div>
 498 | </div>
 499 | 
 500 | <!-- / masthead -->
 501 | 
 502 | 
 503 | <div class="site-shim-outer">
 504 | <div data-super-banner class="super-banner-container print-hide">
 505 |                         
 506 | 		    <div id="superbanner"></div>    	
 507 | 	            </div>
 508 | 
 509 | <div class="ad-placeholder-container">
 510 |     <!-- DO NOT REMOVE   -->
 511 | </div>
 512 | 
 513 | <div class="tout-wrapper" data-ad-tout>
 514 |     </div>
 515 | 
 516 | <div class="skyscraper-container print-hide">
 517 |                         
 518 | 		    <div id="sky"></div>    	
 519 | 	            </div>
 520 | 
 521 | <div class="site-shim-inner">
 522 | 
 523 | <div class="site-content-container">
 524 | 
 525 | <div id="main-content">
 526 |                                         <div class="ad-subnav-container">
 527 |                     <div class="subnav-content leaderboard">
 528 |                         
 529 | 		    <div id="billboard"></div>    	
 530 | 	                    </div>
 531 |                 </div>
 532 |                             <div class="container">
 533 |         <!-- flash messages -->
 534 |                 <div class="siteskin-dropshadow">
 535 |             <div class="row primary-row">
 536 |                                     <div class="breaking-module-rendered" id="breaking-module-rendered">
 537 |                         <div class="page-top">
 538 |                         <span data-bi-ajax-route="/esi/breaking_module"></span>                        </div>
 539 |                     </div>
 540 |                                 <div class="span8">
 541 |                     <div class="breaking-module-siteskin" id="breaking-module-siteskin"></div>
 542 |     
 543 | <a name="post-top"></a>
 544 | 
 545 | 
 546 | <div id="share-email-tmpl">
 547 |     <div id="shareEmailModal" class="modal hide fade" tabindex="-1" role="dialog" aria-labelledby="shareEmailModalLabel" aria-hidden="true">
 548 |       <div class="modal-header">
 549 |         <button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
 550 |         <h3 id="shareEmailModalLabel"></h3>
 551 |       </div>
 552 |       <div class="modal-body">
 553 |         <div class="share-email-error alert alert-error display-none"></div>
 554 | 
 555 |         <div class="share-email-body">
 556 |             <div class="register">
 557 |                 <form id="form_form" class="form-horizontal" action="#" method="post">
 558 |                     <div class="control-group">
 559 |                         <label for="mail_from" class="control-label">von</label>
 560 |                         <div class="controls">
 561 |                             <input type="text" name="mail_from" class="mail-from" placeholder="Deine E-Mail-Adresse" />
 562 |                         </div>
 563 |                     </div>
 564 |                     <div class="control-group">
 565 |                         <label for="mail_to" class="control-label">an</label>
 566 |                         <div class="controls">
 567 |                             <input type="text" name="mail_to" class="mail-to" placeholder="E-Mail-Adresse des Empfängers" />
 568 |                         </div>
 569 |                     </div>
 570 |                     <input type="hidden" name="action" value="email" />
 571 |                     <div class="control-group">
 572 |                         <div class="controls">
 573 |                             <input type="submit" class="btn submit email-post" value="Senden" />
 574 |                         </div>
 575 |                     </div>
 576 |                 </form>
 577 |             </div>
 578 |         </div>
 579 | 
 580 |         <div class="share-email-success" style="display:none">
 581 |             <div class="alert alert-success">
 582 |                 Der Artikel wurde versandt.
 583 |             </div>
 584 |         </div>
 585 | 
 586 |       </div>
 587 |     </div>
 588 | </div>
 589 | 
 590 | 
 591 | 
 592 | 
 593 | 
 594 | <div  class="content post">
 595 |     <div class="sl-layout-post">
 596 |     
 597 |         <script type="text/javascript">BI.post.tickers = {};BI.post.tickers =[{"symbol":"DBX","link":"http:\/\/www.businessinsider.de"}];</script>
 598 | <!-- Sponsored Post Tag -->
 599 | 
 600 |     
 601 | <h1>A key Dropbox exec quit just weeks after its blockbuster IPO — but experts say it could be good in the long run</h1>
 602 | 
 603 | <div id="content" class="content">
 604 |     <div class="post-top">
 605 |         <!-- Byline -->
 606 |         <div class="flex byline">
 607 | 	    
 608 | 			<div class="byline-icon">
 609 | 							<a target="_blank" href="http://www.businessinsider.com">
 610 | 							<img src="//static3.businessinsider.de/image/596e528b9d09181ef227e037-50/business-insider.jpg" alt="Business Insider" />
 611 | 							</a>
 612 | 					</div>
 613 | 	
 614 | 	<div class="flex-item">
 615 | 		<div class="list-pipes no-pipe">
 616 | 			<ul>
 617 | 																		<li class="publication-author ks-author-byline"><a href="http://www.businessinsider.de/autoren/becky-peterson">Becky Peterson</a>, <a target="_blank" href="http://www.businessinsider.com">Business Insider</a></li>
 618 | 					                
 619 | 				                    </ul>
 620 |                 </div>
 621 |                 <div class="list-pipes no-pipe">
 622 |                     <ul>
 623 |                 		        			        <li class="river-post__date">
 624 | 	<span class="svg sprites date-icon"></span>
 625 | 	<span data-bi-format="date" rel="1523564489"> 12.04.2018, 22:21 </span>
 626 | </li>		        			
 627 | 		    
 628 | 			</ul>
 629 | 
 630 | 		</div>
 631 | 	</div>
 632 | </div>
 633 |         <!-- Sharing -->
 634 |         <div class="share">
 635 | 			
 636 | <!-- RRSSB start -->
 637 | <div class="rrssb-baseline share-container ks-share-top clearfix" data-share style="display:block;">
 638 |     <!-- buttons start here -->
 639 |     <ul class="rrssb-buttons rrssb-top clearfix">
 640 |         <li class="facebook ks-byline-fb">
 641 |             <a href="https://www.facebook.com/sharer/sharer.php?u=http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4" title="facebook" class="popup">
 642 |                 <i class="fa fa-facebook-square"></i>
 643 |                 <span class="text">facebook</span>
 644 |             </a>
 645 |         </li>
 646 |         <li class="linkedin ks-byline-linkedin">
 647 |             <a href="http://www.linkedin.com/shareArticle?mini=true&amp;url=http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4&amp;title=A key Dropbox exec quit just weeks after its blockbuster IPO — but experts say it could be good in the long run&amp;summary=BI PRIME: Dropbox may need different skills as it moves toward an enterprise model." title="linkedin" class="popup">
 648 |                 <i class="fa fa-linkedin"></i>
 649 |                 <span class="text">linkedin</span>
 650 |             </a>
 651 |         </li>
 652 |         <li class="twitter ks-byline-tweet">
 653 |             <a href="http://twitter.com/share?url=http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4&amp;via=&amp;text=A+key+Dropbox+exec+quit+just+weeks+after+its+blockbuster+IPO+%E2%80%94+but+experts+say+it+could+be+good+in+the+long+run" title="twitter" class="popup">
 654 |                 <i class="fa fa-twitter"></i>
 655 |                 <span class="text">twitter</span>
 656 |             </a>
 657 |         </li>
 658 |         <li class="email button-email ks-byline-email" data-href="http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4">
 659 |             <a href="#" title="email">
 660 |                 <i class="fa fa-envelope"></i>
 661 |                 <span class="text">email</span>
 662 |             </a>
 663 |         </li>
 664 |                 <li class="printbtn ks-byline-print">
 665 |             <a href="javascript:window.print()" title="print">
 666 |                 <i class="fa fa-print"></i>
 667 |                 <span class="text">print</span>
 668 |             </a>
 669 |         </li>
 670 |             </ul>
 671 |     <!-- buttons end here -->
 672 | </div>
 673 | <!-- RRSSB end -->        </div>
 674 |     </div>
 675 | 
 676 |     <!-- Click Performance content ad-->
 677 |     	<!-- END Click Performance content ad-->
 678 | 
 679 |     <div class="clear-both">
 680 | 		<div class="KonaBody post-content">
 681 | 			
 682 |             
 683 | <div class="KonaFilter image-container display-table"><div><div data-post-image class="image on-image"><img src="//static1.businessinsider.de/image/5acfc25a7708e902dd57c472-2200/rts5iy5.jpg" border="0" alt="Dropbox CEO Drew Houston" data-mce-source="Reuters/Mike Blake" data-mce-caption="Dropbox CEO Drew Houston"><span class="caption-source"><span class='caption'>Dropbox CEO Drew Houston needs to find a new VP of product.</span><span class="source"><span>Reuters/Mike Blake</span></span></span></div><div id="finanzen_post_anchor"></div><p></div></div>
 684 | <ul>
 685 | <li><strong>Dropbox VP of product Todd Jackson has left the company to join First Round Capital as a founder in residence. </strong></li>
 686 | <li><strong>His move comes on the heels of the cloud storage company's successful IPO.</strong></li>
 687 | <li><strong>It also comes just as Dropbox is looking to expand its focus on enterprise products as a means of growing revenue and, eventually, reaching profitability.</strong></li>
 688 | <li><strong>Despite Jackson's status a superstar in the consumer space, investors think his departure could actually help Dropbox accomplish its product strategy.</strong></li>
 689 | </ul>
 690 | <hr>
 691 | <p><br>Dropbox lost its superstar VP of product Todd Jackson, just two weeks after <a href="http://www.businessinsider.com/dropbox-company-value-cupcake-ipo-2018-3">the file-sharing service's huge IPO</a>. And analysts say it could be just what Dropbox needs to execute on its promise to refocus on selling its services to larger businesses. </p>
 692 | <p>Jackson <a href="https://twitter.com/tjack/status/983380917103165440">announced via Twitter</a> on Monday a new role at First Round Capital as its first-ever founder in residence. His job will be to advise startup founders in the First Round portfolio, and to launch a Los Angeles edition of the firm's Product Program — a masterclass for early-career product managers.  </p>
 693 | <p>"We're grateful for all the contributions Todd made to Dropbox over the past 2.5 years. While we'll miss him greatly, we wish him all the best in his new role at First Round," a Dropbox spokesperson tells Business Insider. Jackson could not be reached for comment.</p>
 694 | <p>Dropbox hasn't named a replacement for Jackson yet, but for the time being, the company has confirmed that its product organization will be managed by Quentin Clark, Dropbox’s senior VP of engineering, product and design. Clark, who was Jackson's boss, joined Dropbox in September, after two years at SAP and two decades at Microsoft — which is to say he's extremely familiar with business-to-business technology products. </p>
 695 | <p>"From the looks of things, one would say this news was a hard-to-turn-down opportunity for Todd, but also a leadership adjustment to put Dropbox on a more solid money-making path. That might be welcomed news for investors," Gartner research director Karen Hobert said. </p>
 696 | <p>Before joining Dropbox in 2015, Jackson spent his career doing product management across some of the biggest companies in tech: Google, Facebook and Twitter. He's a big name in Silicon Valley — but not one associated with enterprise-grade subscription market that Dropbox is looking to conquer. </p>
 697 | <p><span class="KonaFilter image-container display-table float_right"><span><span data-post-image class="image on-image"><img src="//static3.businessinsider.de/image/5acfc25b7708e902dd57c473-608/todd%20jackson.jpeg" border="0" alt="Todd Jackson" data-mce-source="First Round"><span class="caption-source"><span class='caption'>Dropbox VP of product Todd Jackson left the company this week.</span><span class="source"><span>First Round</span></span></span></span><p></span></span>"Todd’s background is predominantly startups for consumer products over pure business products," Hobert said. "Given that, his leaving might be an indication of more investment in monetizing and growing the enterprise side of the business — which Dropbox needs to do." </p>
 698 | <p>When Dropbox filed for its IPO at the end of February, the 11-year-old company made public for the first time that the company isn't profitable. </p>
 699 | <p>Only 11 million of Dropbox's 500 million registered users <a href="http://www.businessinsider.com/dropbox-11-million-users-and-300000-work-teams-pay-for-its-service-2018-2">pay to use the service</a>, according to its S-1. But the company has 300 million users that it has identified as likely to convert to the paid model, and winning over these users is part of its strategy to become profitable. </p>
 700 | <p>"They need a solid enterprise leader for that plan," Hobert said.</p>
 701 | <p>And while it's a big deal for any company to lose an executive so soon after its IPO, analysts don't foresee there being a big impact on business in the near term.</p>
 702 | <p>"Customers are a trailing indicator and product changes will begin to show up in the roadmap in six months or beyond," IDC research director Terry Frazier said. "If the current product and roadmap are acceptable, they will buy. If that changes in six months or a year, they will not. That’s when customers will begin voting on the change. "</p>
 703 | <h2>First Round was an early investor in Jackson's startup </h2>
 704 | <p>What may have been a liability at Dropbox is an asset in the eyes of First Round. </p>
 705 | <p>"This career arc makes Todd a rare breed," First Round <a href="https://medium.com/@firstround/welcoming-todd-jackson-as-a-founder-in-residence-at-first-round-5aa53780d125">said in an announcement</a>. "He’s experienced enough to have helped build some of the most iconic products in tech, including Gmail. But he’s also worked at the scrappiest end of the spectrum, scaling high-performance teams from scratch."</p>
 706 | <p>Jackson first met with First Round in 2013 when he was fundraising for Cover, a startup he founded. Cover, which made a smart Android lock screen, was acquired by Twitter in 2014, and Jackson became Twitter's director of product management. In 2014, his wife Arielle — a veteran of Google and Square — joined First Round as a marketing expert in residence. </p>
 707 | <p>Jackson left Twitter for Dropbox in 2015, just a few months after CEO Jack Dorsey reclaimed his role at the helm of social media company.</p>
 708 | <p>Incidentally, when Jackson first joined Dropbox, he replaced Ilya Fushman, who left the company to join Index Ventures as a general partner. Fushman has since moved to another VC firm, Kleiner Perkins Caufield &amp; Byers.</p>
 709 |                                     
 710 |                         
 711 | 					</div>
 712 | 
 713 |         
 714 |         <!-- Post category tags -->
 715 | 		    <div class="category-wrapper" data-category-wrapper>
 716 | 
 717 |         <div class="tags-top align-right">
 718 |             Mehr:
 719 | 
 720 |                                                                                 <a href="http://www.businessinsider.de/kategorien/dropbox">Dropbox</a>
 721 |                                                                                                     <a href="http://www.businessinsider.de/kategorien/first-round">First Round</a>
 722 |                                                                                                     <a href="http://www.businessinsider.de/kategorien/venture-capital">Venture Capital</a>
 723 |                                                                                                     <a href="http://www.businessinsider.de/kategorien/bi-prime">BI Prime</a>
 724 |                                 
 725 |                     </div>
 726 |     </div>
 727 | 
 728 |     </div>
 729 | </div>
 730 | 
 731 |         <div id="social-reader-notification" class="alert clear-both" style="display:none;"></div>
 732 | 
 733 |         <div id="facebook-notification" class="alert" style="display:none;"></div>
 734 | 
 735 |                 
 736 |         <!-- Click Performance content ad-->
 737 |                     <!-- /133556709/Article_C -->
 738 |             <div id='div-gpt-ad-1445608536739-2' style="margin:10px 0 10px 0;padding:20px 0 20px 0;border-top:1px solid #ddd;border-bottom:1px solid #ddd;">
 739 |                 <script type='text/javascript'>
 740 |                     googletag.cmd.push(function() { googletag.display('div-gpt-ad-1445608536739-2'); });
 741 |                 </script>
 742 |             </div>
 743 |                 <!-- END Click Performance content ad-->
 744 |     </div>
 745 | 
 746 |     
 747 |         <div id="vendor_5" class="print-hide"></div>
 748 | 
 749 |         <div class="bottom-share-module clearfix clear-both">
 750 |             <!-- RRSSB BOTTOM POST CALL -->
 751 |             
 752 | <!-- RRSSB start -->
 753 | <div class="rrssb-baseline share-container ks-share-bottom margin-top margin-bottom clearfix" style="display:none;">
 754 |     <!-- buttons start here -->
 755 |     <ul class="rrssb-buttons rrssb-bottom clearfix">
 756 |         <li class="facebook ks-sharemodule-fb">
 757 |             <a href="https://www.facebook.com/sharer/sharer.php?u=http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4" title="facebook" class="popup">
 758 |                 <i class="fa fa-facebook-square"></i>
 759 |                 <span class="text">facebook</span>
 760 |             </a>
 761 |         </li>
 762 |         <li class="linkedin ks-sharemodule-linkedin">
 763 |             <a href="http://www.linkedin.com/shareArticle?mini=true&amp;url=http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4&amp;title=A key Dropbox exec quit just weeks after its blockbuster IPO — but experts say it could be good in the long run&amp;summary=BI PRIME: Dropbox may need different skills as it moves toward an enterprise model." title="linkedin" class="popup">
 764 |                 <i class="fa fa-linkedin"></i>
 765 |                 <span class="text">linkedin</span>
 766 |             </a>
 767 |         </li>
 768 |         <li class="twitter ks-sharemodule-tweet">
 769 |             <a href="http://twitter.com/share?url=http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4&amp;via=&amp;text=A key Dropbox exec quit just weeks after its blockbuster IPO — but experts say it could be good in the long run" title="twitter" class="popup">
 770 |                 <i class="fa fa-twitter"></i>
 771 |                 <span class="text">twitter</span>
 772 |             </a>
 773 |         </li>
 774 |         <li class="email button-email ks-sharemodule-email" data-href="http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4">
 775 |             <a href="#" title="email">
 776 |                 <i class="fa fa-envelope"></i>
 777 |                 <span class="text">email</span>
 778 |             </a>
 779 |         </li>
 780 |                 <li class="printbtn ks-byline-print">
 781 |             <a href="javascript:window.print()" title="print">
 782 |                 <i class="fa fa-print"></i>
 783 |                 <span class="text">print</span>
 784 |             </a>
 785 |         </li>
 786 |             </ul>
 787 |     <!-- buttons end here -->
 788 | </div>
 789 | <!-- RRSSB end -->
 790 | 
 791 | <!-- This Empty div tells sailthru where to fly out the concierge promo. -->
 792 | <div id="bottom-share-links"></div>
 793 |         </div>
 794 | 
 795 |         
 796 |         
 797 |                 
 798 |         
 799 |         
 800 |         <!-- Schema.org/Article metadata -->
 801 | <div style="display:none;" itemscope itemtype="http://schema.org/Article">
 802 |     <h1 itemprop="name">A key Dropbox exec quit just weeks after its blockbuster IPO — but experts say it could be good in the long run</h1>
 803 |     <a href="http://www.businessinsider.de/dropbox-vp-todd-jackson-leaves-for-first-round-capital-2018-4" itemprop="url">A key Dropbox exec quit just weeks after its blockbuster IPO — but experts say it could be good in the long run</a>
 804 |     <img src="//static3.businessinsider.de/image/5acfe9287708e905f61f09b2-100-100/a-key-dropbox-exec-quit-just-weeks-after-its-blockbuster-ipo--but-experts-say-it-could-be-good-in-the-long-run.jpg"  itemprop="image" />
 805 |     <div itemprop="description"><p>Dropbox VP of product Todd Jackson has left the...</p></div>
 806 | </div>
 807 | <!-- / Schema.org/Article metadata -->
 808 |         
 809 |     
 810 | </div>
 811 |                     </div>
 812 |                                             <div class="span4">
 813 |                             <div class="main-rail">
 814 |                                                                     
 815 | <div id="right-rail" class="right-rail">
 816 | 	<div class="sl-layout-post">
 817 | 		<!-- PERFECT MARKET RR -->
 818 | 		
 819 | 			<!-- Only show at uppermost top if skin is active -->
 820 | 			
 821 | 			<!-- Vertical Host -->
 822 | 													<!-- / Vertical Host -->
 823 | 
 824 | 			<!-- Countdown Clock  -->
 825 | 						<!-- / Countdown Clock  -->
 826 | 
 827 | 			<!-- Top Right Rail Ad -->
 828 | 			<div class="ad-display-wrapper">
 829 | 			
 830 | 		    <div id="mrec"></div>    	
 831 | 				</div>
 832 | 			<!-- / Top Right Rail Ad -->
 833 | 
 834 | 			<!-- Recommended For You  -->
 835 |             			<!-- / Recommended For You  -->
 836 | 
 837 | 			<!-- Show here if skin is not active -->
 838 | 									    
 839 | 		    <!-- taboola videos -->
 840 | 			
 841 | 			<!-- Your Money -->
 842 | 			
 843 | 			<!-- Medium Rectangle 2 -->
 844 | 			
 845 | 			<!-- Dianomi -->
 846 | 		 	
 847 | 			<!-- Featured -->
 848 | 		 	
 849 | 			<!-- Newsletters -->
 850 | 			<div id="mc_embed_signup" class="newsletter-signup">
 851 | 	<h3 class="underlined thin">Das Beste von uns, per E-Mail</h3>
 852 | 	<form action="https://businessinsider.us11.list-manage.com/subscribe/post?u=c16d760b60a5fbf60ff9cd77e&amp;id=773e276e57" method="post" id="mc-embedded-subscribe-form" name="mc-embedded-subscribe-form" class="validate" target="_blank" novalidate>
 853 | 	    <div id="mc_embed_signup_scroll">
 854 | 		<div class="input-append">
 855 | 		<input type="email" value="" name="EMAIL"  placeholder="E-Mail-Adresse" class="required email" id="news-email" data-email>
 856 | 		<button name="subscribe" class="btn submit" id="news-signup" type="submit">Abonnieren</button>
 857 | 		</div>
 858 | 		<div id="mce-responses" class="clear">
 859 | 			<div class="response" id="mce-error-response" style="display:none"></div>
 860 | 			<div class="response" id="mce-success-response" style="display:none"></div>
 861 | 		</div>    <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
 862 | 	    <div style="position: absolute; left: -5000px;"><input type="text" name="b_c16d760b60a5fbf60ff9cd77e_773e276e57" tabindex="-1" value=""></div>
 863 | 	    </div>
 864 | 	</form>
 865 | </div>
 866 |             <!-- Top Right Rail Ad -->
 867 | 			<div class="ad-display-wrapper">
 868 | 			
 869 | 		    <div id="mrec_btf_351388254"></div>
 870 | 		<script>
 871 | 			if (typeof ASCDP !== 'undefined') {
 872 | 				ASCDP.adS.renderAd("mrec_btf_" + 351388254);
 873 | 			}		
 874 | 		</script>    
 875 | 				</div>
 876 | 			<!-- / Top Right Rail Ad -->
 877 |             <div id="vendor_1"></div>
 878 | 
 879 | 
 880 | 
 881 |              <!-- Click Performance content ad-->
 882 | 		    			 <!-- END Click Performance content ad-->
 883 | 
 884 | 			<!-- taboola 'from the web' - needs correct condition -->
 885 | 			
 886 | 			<!-- Finance widget - Main -->
 887 | 			<div id="finanzen_right_rail_anchor"></div>			
 888 | 			<!-- Editorial Sidebar -->
 889 | 			
 890 |             <div id="vendor_2"></div>
 891 | 
 892 | 			<!-- Editorial Sidebar 2 -->
 893 | 							<!-- editorial sidebar 2 -->
 894 | <div id="editorial2" class="ks-rr-editorial-2" data-bi-heatmap="editorial">
 895 | 	<h3 class="underlined thin">Junior-Depot</h3>
 896 | 	<div>
 897 | 		<!-- VisualVest -->
 898 | <div class="ed-widget"><img class="ed-widget-dws-logo" src="//static-ssl.businessinsider.de/image/5a4f953888e59443008b474b-240-60/visualvest-logo-neu-2018.jpg" border="0" alt="VisualVest" /> <!--DO NOT EDIT ABOVE THIS LINE -->
 899 | <ul class="media-list">
 900 | <ul class="media-list">
 901 | <li class="media"><a href="//www.businessinsider.de/sc/sparen-fuer-ihr-kind-mit-dem-vermoegensverwalter-visualvest-2018-1" class="pull-left"> <img src="//static-ssl.businessinsider.de/image/5a5344b1cb4eafb97fe2ad30-60-60/geldrinelleshutterstock60.jpg" border="0" alt="Sparen" /> </a>
 902 | <div class="media-body">
 903 | <p><a href="//www.businessinsider.de/sc/sparen-fuer-ihr-kind-mit-dem-vermoegensverwalter-visualvest-2018-1" class="title">Sparen f&uuml;r Ihr Kind</a></p>
 904 | </div>
 905 | </li>
 906 | </ul>
 907 | </ul>
 908 | <div style="font-size: 12px; text-align: right;"><em>Anzeige</em></div>
 909 | </div>
 910 | <!-- WhatsApp-Newsletter -->
 911 | <h3 class="underlined thin">&nbsp;</h3>
 912 | <p>&nbsp;</p>
 913 | <h3 class="underlined thin">WhatsApp-Newsletter</h3>
 914 | <div class="ed-widget"><img class="ed-widget-dws-logo" src="https://static-ssl.businessinsider.de/image/5a548b3fcca1c737251e2b36-105-60/businessinsider-logo60neu.png" border="0" alt="Business Insider" /> <!--DO NOT EDIT ABOVE THIS LINE -->
 915 | <ul class="media-list">
 916 | <li class="media"><a href="http://www.businessinsider.de/die-besten-storys-von-business-insider-per-whatsapp-newsletter-2018-1" class="pull-left"> <img src="//static-ssl.businessinsider.de/image/5a5492c9cb4eaf321229447b-60-60/whatsapp-logo-60.png" border="0" alt="WhatsApp-Newsletter" /> </a>
 917 | <div class="media-body">
 918 | <p><a href="//www.businessinsider.de/die-besten-storys-von-business-insider-per-whatsapp-newsletter-2018-1" class="title">Die besten Storys von Business Insider per WhatsApp </a></p>
 919 | </div>
 920 | </li>
 921 | </ul>
 922 | </div>	</div>
 923 | </div>
 924 | <script>amplify.publish('editorial-sidebar-loaded', 'editorial2');</script>
 925 | <!-- / editorial sidebar 2 -->
 926 | 			
 927 |             <div id="vendor_3"></div>
 928 | 
 929 | 			<!-- Medium Rectangle 3-->
 930 | 			<div class="ad-display-wrapper">
 931 | 						</div>
 932 | 			
 933 |             <div id="vendor_4"></div>
 934 | 
 935 | 			</div>
 936 | </div>
 937 |                                                             </div>
 938 |                         </div>
 939 |                     
 940 |                 </div>
 941 |             </div>
 942 | 
 943 |             
 944 |             </div>
 945 |         </div>
 946 | 
 947 | </div><!-- /.site-content-container -->
 948 | 
 949 | </div><!-- /.site-shim-inner -->
 950 | 
 951 | </div><!-- /.site-shim-outer -->
 952 | <div class="footer" data-bi-footer>
 953 | 
 954 |     <div class="container">
 955 | 
 956 |         <section class="top">
 957 |             <a href="/" class="sprites logo"></a>
 958 |             <section class="social-icons">
 959 |                 <span class="social-prompt">Follow us on:</span>
 960 | 
 961 |                 <ul class="links-user">
 962 |                                             
 963 | 
 964 |     <li class="social">
 965 |         <a href="//www.facebook.com/Business.Insider.Deutschland" title="Folge uns auf Facebook" aria-label="Folge uns auf Facebook" target="_blank"><i class="fa fa-facebook ks-header-facebook"></i></a>
 966 |     </li>
 967 | 
 968 |     <li class="social">
 969 |         <a href="//twitter.com/BIDeutschland" title="Folge uns auf Twitter" aria-label="Folge uns auf Twitter" target="_blank"><i class="fa fa-twitter ks-header-twitter"></i></a>
 970 |     </li>
 971 | 
 972 |     <li class="social">
 973 |         <a href="//www.linkedin.com/company/business-insider-deustchland" title="Folge uns auf LinkedIn" aria-label="Folge uns auf LinkedIn" target="_blank"><i class="fa fa-linkedin ks-header-linkedin"></i></a>
 974 |     </li>
 975 |                     
 976 |                     
 977 |                                     </ul>
 978 |             </section>
 979 | 
 980 |             <div class="sister-sites-container">
 981 |                 <span class="sister-site-prompt">Also check out:</span>
 982 | 
 983 |                 <a href="http://www.thisisinsider.com" class="sister-sites sister-one"></a>
 984 |             </div>
 985 |         </section>
 986 | 
 987 |         <div class="list-pipes">
 988 |             <ul>
 989 |             <li>Copyright &copy; 2018 Business Insider Deutschland / finanzen.net GmbH Alle Rechte vorbehalten. </li>
 990 |             <span class="nowrap">Durch die Nutzung dieser Website zeigen Sie an, dass Sie unsere
 991 |                                         <a href="/nutzungsbedingungen">Nutzungsbedingungen</a>                    sowie die
 992 |                                                             <a href="/datenschutz">Datenschutzerklärung</a> akzeptieren und mit deren Einhaltung einverstanden sind.</span>
 993 |                 </li>
 994 | 
 995 |             </ul>
 996 |         </div>
 997 |         <div class="list-pipes">
 998 |             <ul>
 999 |                 <li>&nbsp; <a href="/werben">Werben auf BI</a>&nbsp; </li>
1000 |                 <li>&nbsp; <a href="/disclaimer">Disclaimer</a>&nbsp; </li>
1001 |                 <li>&nbsp; <a href="/impressum">Impressum</a>&nbsp; </li>
1002 |                 <li>&nbsp; Powered by <a href="http://www.mongodb.org">MongoDB</a>&nbsp;</li>
1003 |             </ul>
1004 |         </div>
1005 |         <div class="international-links list-pipes">
1006 |             <ul><li>International Editions:</li>
1007 |                 <li class="no-pipe"><a href="http://www.businessinsider.de">DE</a></li><li><a href="http://www.businessinsider.com?IR=C">US</a></li><li><a href="http://uk.businessinsider.com?IR=C">UK</a></li><li><a href="http://www.businessinsider.com.au/">AUS</a></li><li><a href="http://www.businessinsider.in/">IN</a></li><li><a href="http://www.businessinsider.my/">MY</a></li><li><a href="http://www.businessinsider.sg/">SG</a></li><li><a href="http://www.businessinsider.com.pl/?IR=C">PL</a></li><li><a href="http://nordic.businessinsider.com/?IR=C">SE</a></li><li><a href="http://www.businessinsider.nl?IR=C">NL</a></li><li><a href="http://www.businessinsider.fr/?IR=C">FR</a></li><li><a href="https://it.businessinsider.com">IT</a></li><li><a href="http://www.businessinsider.jp">JP</a></li>            </ul>
1008 |         </div>
1009 |                    </div>
1010 |         </div>
1011 |     </div>
1012 | 
1013 | <script type="text/javascript" src="//static5.businessinsider.de/assets/js/min-foot.js?0"></script>
1014 | 
1015 | <!--[if gte IE 9]>
1016 | <script type="text/javascript">amplify.publish("ie9-pinned-site", {"hid":null,"vertical":"international"});</script><![endif]-->
1017 | 
1018 | <!-- Google Tag Manager -->
1019 | <noscript><iframe src=""//www.googletagmanager.com/ns.html?id=GTM-WZ7X7V""
1020 | height=""0"" width=""0"" style=""display:none;visibility:hidden""></iframe></noscript>
1021 | <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
1022 | new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
1023 | j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
1024 | '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
1025 | })(window,document,'script','dataLayer','GTM-WZ7X7V');</script>
1026 | <!-- End Google Tag Manager -->
1027 | 
1028 | <!-- Twitter widget.js -->
1029 | <script type="text/javascript">
1030 |     !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0];if(!d.getElementById(id)){
1031 |         js=d.createElement(s);
1032 |         js.id=id;
1033 |         js.src="https://platform.twitter.com/widgets.js";
1034 |         js.async=true;
1035 |         fjs.parentNode.insertBefore(js,fjs);
1036 |     }}(document,"script","twitter-wjs");
1037 | </script>
1038 | <!-- /Twitter widget.js -->
1039 | 
1040 |     
1041 |     <!-- START Nielsen Online SiteCensus V6.0 -->
1042 |     <!-- COPYRIGHT 2010 Nielsen Online -->
1043 |     <script type="text/javascript">
1044 |     (function () {
1045 |         var d = new Image(1, 1);
1046 |         d.onerror = d.onload = function () { d.onerror = d.onload = null; };
1047 |         d.src = ["//secure-us.imrworldwide.com/cgi-bin/m?ci=us-103525h&cg=0&cc=1&si=", escape(window.location.href), "&rp=", escape(document.referrer), "&ts=compact&rnd=", (new Date()).getTime()].join('');
1048 |     })();
1049 |     </script>
1050 |     <noscript>
1051 |         <div><img src="//secure-us.imrworldwide.com/cgi-bin/m?ci=us-103525h&amp;cg=0&amp;cc=1&amp;ts=noscript" width="1" height="1" alt="" /></div>
1052 |     </noscript>
1053 |     <!-- END Nielsen Online SiteCensus V6.0 -->
1054 | 
1055 |         <!-- Chartbeat -->
1056 |     <script type="text/javascript">
1057 |         // chartbeat wants the vars in global
1058 |         window._sf_async_config.sections = 'international';
1059 |         window._sf_async_config.authors = 'Becky Peterson';
1060 | 
1061 |         //only set the following if on a video post
1062 |         
1063 |         //only set the following if on a sponsored post
1064 |         
1065 |         (function(){
1066 |           function loadChartbeat() {
1067 |             window._sf_endpt=(new Date()).getTime();
1068 |             var e = document.createElement('script');
1069 |             e.setAttribute('language', 'javascript');
1070 |             e.setAttribute('type', 'text/javascript');
1071 |             e.setAttribute('src',
1072 |                (('https:' === document.location.protocol) ? 'https://s3.amazonaws.com/' : 'http://') +
1073 |                'static.chartbeat.com/js/chartbeat_video.js');
1074 |             document.body.appendChild(e);
1075 |           }
1076 |           var oldonload = window.onload;
1077 |           window.onload = (typeof window.onload != 'function') ?
1078 |              loadChartbeat : function() { oldonload(); loadChartbeat(); };
1079 |         })();
1080 |     </script>
1081 |     <!-- / Chartbeat -->
1082 |     
1083 | 
1084 |                     <!-- SZM VERSION="2.0" -->
1085 |         <script type="text/javascript">
1086 |             var contentPage = 'International-article',
1087 |                 questionnaireflag = 'in',
1088 |                 comment = 's-news-5acfc2637708e902dd57c474';
1089 | 
1090 |             if (typeof BI.post !== "undefined" && BI.post.name === "impressum") {
1091 |                 contentPage = "Angebot";
1092 |             }
1093 | 
1094 |             var iam_data = {
1095 |                 "cp" : contentPage,
1096 |                 "st" : "businsi",
1097 |                 "sv" : questionnaireflag,
1098 |                 "co" : comment
1099 |             };
1100 | 
1101 |             iom.c(iam_data, 2);
1102 |         </script>
1103 |         <!-- /SZM -->
1104 |     
1105 |     <!-- BEGIN Krux Control Tag -->
1106 | <script class="kxct" data-id="I2I9M2yx" data-timing="async" data-version="1.9" type="text/javascript">
1107 |   window.Krux||((Krux=function(){Krux.q.push(arguments)}).q=[]);
1108 |   (function(){
1109 |     var k=document.createElement('script');k.type='text/javascript';k.async=true;
1110 |     var m,src=(m=location.href.match(/\bkxsrc=([^&]+)/))&&decodeURIComponent(m[1]);
1111 |     k.src = /^https?:\/\/([^\/]+\.)?krxd\.net(:\d{1,5})?\//i.test(src) ? src : src === "disable" ? "" :
1112 |       (location.protocol==="https:"?"https:":"http:")+"//cdn.krxd.net/controltag?confid=I2I9M2yx"
1113 |   ;
1114 |     var s=document.getElementsByTagName('script')[0];s.parentNode.insertBefore(k,s);
1115 |   }());
1116 | </script>
1117 | <!-- END Krux Controltag -->
1118 | <!-- Perfect market JS -->
1119 |     <!-- End Perfect market JS-->
1120 | 
1121 | <!-- taboola foot -->
1122 | 
1123 | 
1124 |     <!-- START Parse.ly Include: Standard -->
1125 | <div id="parsely-root" style="display: none">
1126 |   <div id="parsely-cfg" data-parsely-site="businessinsider.com"></div>
1127 | </div>
1128 | <script>
1129 | (function(s, p, d) {
1130 |   var h=d.location.protocol, i=p+"-"+s,
1131 |       e=d.getElementById(i), r=d.getElementById(p+"-root"),
1132 |       u=h==="https:"?"d1z2jf7jlzjs58.cloudfront.net"
1133 |       :"static."+p+".com";
1134 |   if (e) return;
1135 |   e = d.createElement(s); e.id = i; e.async = true;
1136 |   e.src = h+"//"+u+"/p.js"; r.appendChild(e);
1137 | })("script", "parsely", document);
1138 | </script>
1139 | <!-- END Parse.ly Include -->
1140 | <!-- Ads without positioning -->
1141 | <div>
1142 |     
1143 | 		    <div id="inpage"></div>    	
1144 | 	</div>
1145 | <div>
1146 |     
1147 | 		    <div id="inpage"></div>    	
1148 | 	</div>
1149 | 
1150 | <script type="text/javascript">window.NREUM||(NREUM={});NREUM.info={"beacon":"bam.nr-data.net","licenseKey":"4b4136998e","applicationID":"90737971","transactionName":"ZVZVZRBVXEJVVkJRDVwcdEQRQF1cG0VZSxY=","queueTime":0,"applicationTime":66,"atts":"SRFWE1hPT0w=","errorBeacon":"bam.nr-data.net","agent":""}</script></body>
1151 | </html>
1152 | 


--------------------------------------------------------------------------------