├── bazel
├── BUILD
└── six.BUILD
├── .gitignore
├── api
├── .gitignore
├── __init__.py
├── hicache.py
└── op.py
├── util
├── .gitignore
├── __init__.py
├── misc.py
├── http.py
├── unionfind.py
├── attrib.py
├── dumper.py
├── keyquote.py
├── mwdatetime.py
├── parsedt.py
└── pattern.py
├── formats
├── .gitignore
├── __init__.py
├── uniqstr.py
├── http.py
├── image.py
└── contenttype.py
├── emql
├── .gitignore
├── adapters
│ ├── test
│ │ ├── .gitignore
│ │ ├── __init__.py
│ │ ├── test_nytimes.py
│ │ ├── test_twitter.py
│ │ └── test_weblink.py
│ ├── .gitignore
│ ├── __init__.py
│ ├── lib.py
│ ├── metacritic.py
│ ├── stats.py
│ ├── twitter.py
│ ├── quote.py
│ ├── text.py
│ ├── nytimes.py
│ └── search.py
├── __init__.py
├── docs
│ └── documentation.css
└── apikeys.py
├── mql
├── .gitignore
├── graph
│ ├── __init__.py
│ └── conn_mock.py
├── __init__.py
├── grquoting.py
├── mid.py
├── pathexpr.py
├── benchmark.py
└── grparse.py
├── OWNERS
├── bootstrap
├── BUILD
└── bootstrap.py
├── test
├── config.cfg
├── __init__.py
├── query_sort_test.py
├── regression_id_test.py
├── mql_exceptions_test.py
├── best_hrid_test.py
├── mql_fixture_test.py
├── BUILD
├── cost_test.py
├── return_test.py
└── regression_misc_test.py
├── tid.py
├── log
├── __init__.py
├── log_util.py
└── log.py
├── WORKSPACE
├── pymql_import_test.py
├── CONTRIBUTING.md
├── BUILD
├── mqlbin.py
├── README.md
└── error.py
/bazel/BUILD:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bazel-*
2 |
--------------------------------------------------------------------------------
/api/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 |
--------------------------------------------------------------------------------
/util/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 |
--------------------------------------------------------------------------------
/formats/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 |
--------------------------------------------------------------------------------
/emql/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 | /*.pyo
3 |
--------------------------------------------------------------------------------
/emql/adapters/test/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 |
--------------------------------------------------------------------------------
/emql/adapters/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 | /*.pyo
3 |
--------------------------------------------------------------------------------
/mql/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 | /*.out
3 | /*.tmp
4 | /*.err
5 |
--------------------------------------------------------------------------------
/OWNERS:
--------------------------------------------------------------------------------
1 | rtp
2 | warrenharris
3 | file://depot/google3/metaweb/freebase/OWNERS
4 |
--------------------------------------------------------------------------------
/bootstrap/BUILD:
--------------------------------------------------------------------------------
1 |
2 | py_binary(
3 | name = "bootstrap",
4 | srcs = ["bootstrap.py"],
5 | python_version = "PY2",
6 | deps = [
7 | "//:mql",
8 | ],
9 | )
10 |
--------------------------------------------------------------------------------
/test/config.cfg:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2.4
2 | #
3 | # Author: bneutra@google.com (Brendan Neutra)
4 | # flags to run mql tests
5 | --graphd_addr=blade:freebase-graphd-sandbox
6 | # replay|record|nomock
7 | # NOTE: with the introduction of randomized hashing of dicts in 2.7 mocking no longer functions
8 | --mockmode=nomock
9 |
--------------------------------------------------------------------------------
/bazel/six.BUILD:
--------------------------------------------------------------------------------
1 | # Description:
2 | # Six provides simple utilities for wrapping over differences between Python 2
3 | # and Python 3.
4 |
5 | licenses(["notice"]) # MIT
6 |
7 | exports_files(["LICENSE"])
8 |
9 | py_library(
10 | name = "six",
11 | srcs = ["six.py"],
12 | visibility = ["//visibility:public"],
13 | )
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
--------------------------------------------------------------------------------
/formats/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 |
--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 |
--------------------------------------------------------------------------------
/emql/adapters/test/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
--------------------------------------------------------------------------------
/emql/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # emql package
16 |
--------------------------------------------------------------------------------
/tid.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | from pymql.log import generate_tid
17 | generate_transaction_id = generate_tid
18 |
--------------------------------------------------------------------------------
/util/misc.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | import re
17 |
18 | # wsplit
19 | wsplit_re = re.compile('\s+')
20 | def wsplit(s):
21 | return wsplit_re.split(s.strip())
22 |
23 |
--------------------------------------------------------------------------------
/emql/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # adapters package
16 |
17 | # do not add any import here since if it were to fail, all python adapters
18 | # would fail to load
19 |
20 |
--------------------------------------------------------------------------------
/log/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2.6
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Backward compatible support for mql LOG calls"""
17 |
18 | __author__ = 'bneutra@google.com (Brendan Neutra)'
19 |
20 | from log import *
21 |
--------------------------------------------------------------------------------
/WORKSPACE:
--------------------------------------------------------------------------------
1 | workspace(name = "pymql")
2 |
3 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
4 |
5 | # Abseil-py
6 | http_archive(
7 | name = "absl_py",
8 | sha256 = "fe3948746ca0543f01fb7767fb00bb739c7fe7e2514180c1575100b988b66542",
9 | strip_prefix = "abseil-py-master",
10 | urls = ["https://github.com/abseil/abseil-py/archive/master.zip"],
11 | )
12 |
13 | http_archive(
14 | name = "six_archive",
15 | build_file = "@//bazel:six.BUILD",
16 | sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a",
17 | strip_prefix = "six-1.10.0",
18 | urls = [
19 | "http://mirror.bazel.build/pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
20 | "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
21 | ],
22 | )
23 |
--------------------------------------------------------------------------------
/mql/graph/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2.6
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | #
17 |
18 | __author__ = 'nicholasv@google.com (Nicholas Veeser)'
19 |
20 | __all__ = ['TcpGraphConnector', 'MockRecordConnector', 'MockReplayConnector']
21 |
22 | from conn_tcp import TcpGraphConnector
23 | from conn_mock import MockRecordConnector
24 | from conn_mock import MockReplayConnector
25 |
--------------------------------------------------------------------------------
/log/log_util.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2.6
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Backward compatible support for mql LOG calls, Levels."""
16 |
17 | __author__ = 'bneutra@google.com (Brendan Neutra)'
18 |
19 | from absl import logging
20 |
21 | FATAL = logging.FATAL
22 | ERROR = logging.ERROR
23 | CRIT = ALERT = ERROR
24 | WARN = logging.WARN
25 | WARNING = WARN
26 | INFO = logging.INFO
27 | NOTICE = INFO
28 | DEBUG = logging.DEBUG
29 | SPEW = 2 # e.g. mql.utils.dumplog: for things that are expensive and verbose
30 |
--------------------------------------------------------------------------------
/pymql_import_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2.4
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | """Import unittest for pymql"""
17 |
18 | __author__ = 'rtp@google.com (Tyler Pirtle)'
19 |
20 | import google3
21 | from google3.testing.pybase import googletest
22 |
23 |
24 | class PymqlImportTest(googletest.TestCase):
25 |
26 | def canImport(self):
27 | import pymql
28 |
29 | def canInit(self):
30 | import pymql
31 | mql = pymql.MQLService(graphd_addrs=['localhost:8100'])
32 |
33 | def emqlCanImport(self):
34 | import pymql.emql.emql
35 |
36 |
37 | if __name__ == '__main__':
38 | googletest.main()
39 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement (CLA). You (or your employer) retain the copyright to your
10 | contribution; this simply gives us permission to use and redistribute your
11 | contributions as part of the project. Head over to
12 | to see your current agreements on file or
13 | to sign a new one.
14 |
15 | You generally only need to submit a CLA once, so if you've already submitted one
16 | (even if it was for a different project), you probably don't need to do it
17 | again.
18 |
19 | ## Code reviews
20 |
21 | All submissions, including submissions by project members, require review. We
22 | use GitHub pull requests for this purpose. Consult
23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
24 | information on using pull requests.
25 |
26 | ## Community Guidelines
27 |
28 | This project follows
29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).
--------------------------------------------------------------------------------
/emql/docs/documentation.css:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2020 Google LLC
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | @import url(html4css1.css);
18 |
19 | html {
20 | color: black;
21 | background-color: white;
22 | }
23 |
24 | body {
25 | margin-left: 10ex;
26 | margin-top: 5ex;
27 | padding-left: 1ex;
28 | border-left: 1px solid #006;
29 | width: 75ex;
30 | background-color: white;
31 | }
32 |
33 | h1 {
34 | border-bottom: 2px solid #006;
35 | }
36 |
37 | dt {
38 | font-weight: bold;
39 | }
40 |
41 | h1, h2, h3, h4, h5, h6 {
42 | font-family: Helvetica, Arial, sans-serif;
43 | padding: 4px;
44 | font-size: 100%;
45 | }
46 |
47 | h1.title {
48 | font-size: 120%;
49 | background-color: orange;
50 | }
51 |
--------------------------------------------------------------------------------
/BUILD:
--------------------------------------------------------------------------------
1 | # Author: rtp@google.com (Tyler Pirtle)
2 | #
3 | # Description:
4 | # mql - implementation(s) of the Metaweb Query Language
5 |
6 | package(default_visibility = ["//visibility:public"])
7 |
8 | py_library(
9 | name = "mql",
10 | srcs = [
11 | "__init__.py",
12 | "tid.py",
13 | "error.py",
14 | "api/__init__.py",
15 | "api/envelope.py",
16 | "formats/__init__.py",
17 | "formats/http.py",
18 | "util/__init__.py",
19 | "util/dumper.py",
20 | "util/keyquote.py",
21 | "util/mwdatetime.py",
22 | ] + glob([
23 | "log/*.py",
24 | "mql/*.py",
25 | "mql/graph/*.py",
26 | ]),
27 | deps = [
28 | "@absl_py//absl:app",
29 | "@absl_py//absl/flags",
30 | "@absl_py//absl/logging",
31 | ],
32 | )
33 |
34 | #py_test(
35 | # name = "pymql_import_test",
36 | # size = "small",
37 | # srcs = ["pymql_import_test.py"],
38 | # deps = [
39 | # ":mql",
40 | # "//pyglib",
41 | # "//testing/pybase",
42 | # ],
43 | #)
44 |
45 | py_binary(
46 | name = "mqlbin",
47 | srcs = ["mqlbin.py"],
48 | python_version = "PY2",
49 | deps = [
50 | ":mql",
51 | ],
52 | )
53 |
54 | test_suite(
55 | name = "AllTests",
56 | tests = [
57 | "//third_party/py/pymql/test:AllTests",
58 | ],
59 | )
60 |
--------------------------------------------------------------------------------
/emql/adapters/lib.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import mw
16 |
17 | def bdb_lookup(me,guid,bdb):
18 | guid = guid.replace('#','/guid/')
19 | path = mw.blob.blobclient.BLOBClient.get_static_relative_url(bdb, guid)
20 | hostname,port=me.mss.ctx.clobd_read_addrs[0]
21 | hostname=hostname + ':' + str(port)
22 | url, connection = me.get_session().http_connect(hostname, path)
23 | connection.request('GET', url)
24 | response = connection.getresponse()
25 | result = response.read()
26 | #TODO: how to do debugging? LOG if debug?
27 | #print "metacritic_adapter: result: %s" % result
28 | if response.status==200:
29 | return mw.json.loads(result)
30 | elif response.status==404:
31 | return None
32 | else:
33 | #TODO: Log unexpected status from BDB
34 | return None
35 |
--------------------------------------------------------------------------------
/api/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from service import Session, ServiceContext
18 | from content import Content, ContentWrapper, NEW_DOCUMENT
19 | from envelope import MQLEnvelope
20 | from mw.mql.error import (MQLError, MQLParseError, MQLInternalError,
21 | MQLTypeError, MQLResultError, MQLInternalParseError,
22 | NamespaceException)
23 |
24 | Session # PYFLAKES
25 | ServiceContext # PYFLAKES
26 | Content # PYFLAKES
27 | ContentWrapper # PYFLAKES
28 | NEW_DOCUMENT # PYFLAKES
29 | MQLEnvelope # PYFLAKES
30 | MQLError # PYFLAKES
31 | MQLParseError # PYFLAKES
32 | MQLInternalError # PYFLAKES
33 | MQLTypeError # PYFLAKES
34 | MQLResultError # PYFLAKES
35 | MQLInternalParseError # PYFLAKES
36 | NamespaceException # PYFLAKES
37 |
--------------------------------------------------------------------------------
/util/http.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import re
16 | import urllib2
17 |
18 | ip_address = re.compile(r"^\d+\.\d+\.\d+.\d+$").match
19 |
20 | def parse_domain_from_host(host):
21 | host = host.split(':')[0]
22 |
23 | if not ip_address(host):
24 | # the domain is the last one or two dot-separated words
25 | domain = '.'.join(host.rsplit(".", 2)[-2:])
26 | else:
27 | domain = host
28 |
29 | return domain
30 |
31 | def get_http_proxy_opener(mss):
32 | """
33 | Lazily retrieve proxy info
34 | """
35 | config = mss.full_config
36 |
37 | proxy_addr = config.get('me.external_proxy', '').strip()
38 | if not proxy_addr:
39 | return urllib2.urlopen
40 | else:
41 | proxy_handler = urllib2.ProxyHandler({'http': proxy_addr})
42 | return urllib2.build_opener(proxy_handler).open
43 |
44 | def proxied_urlopen(request, mss):
45 | opener = get_http_proxy_opener(mss)
46 | return opener(request)
47 |
--------------------------------------------------------------------------------
/mql/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """This is the beginning of a public API for doing MQL/LoJSON reads and writes.
15 |
16 | To use (using mql_read as an example)
17 |
18 | from mw.mql import mql_read, MiniContext
19 |
20 | query = {
21 | "query":[{
22 | "id":"/common/topic",
23 | "type":"/type/type",
24 | "properties":[{}]
25 | }]
26 | }
27 |
28 | ctx = MiniContext(("localhost", 1234))
29 | result = mql_read(ctx, query)
30 |
31 | """
32 |
33 | #from pathexpr import wrap_query
34 | #from mw.log import LOG
35 | #
36 | #__all__ = ['mql_read', 'mql_write', 'MiniContext']
37 | #
38 | #def mql_read(ctx, query, varenv=None, transaction_id=None):
39 | # LOG.error("deprecated", "mw.mql.mql_read()")
40 | # return wrap_query(ctx.high_querier.read, query, varenv, transaction_id)
41 | #
42 | #def mql_write(ctx, query, varenv=None, transaction_id=None):
43 | # LOG.error("deprecated", "mw.mql.mql_write()")
44 | # assert not ctx.gc.readonly, "Context must be created with readonly=False"
45 | # return wrap_query(ctx.high_querier.write, query, varenv, transaction_id)
46 |
--------------------------------------------------------------------------------
/mqlbin.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A simple wrapper to demonstrate basic mql reads and writes."""
15 |
16 | __author__ = "bneutra@google.com (Brendan Neutra)"
17 |
18 | import json
19 |
20 | from absl import app
21 | from absl import flags
22 | from collections import OrderedDict
23 | from pymql import MQLService
24 | from pymql.mql.graph import TcpGraphConnector
25 |
26 | FLAGS = flags.FLAGS
27 | flags.DEFINE_string(
28 | "mqlenv", None, "a dict in the form of a string which "
29 | "contains valid mql env key/val pairs")
30 |
31 | flags.DEFINE_string("mqlcmd", None, "'read' or 'write'")
32 | flags.DEFINE_string("graphd_addr", "localhost:9100",
33 | "host:port of graphd server")
34 |
35 |
36 | def main(argv):
37 | if not FLAGS.graphd_addr:
38 | raise Exception("Must specify a --graphd_addr")
39 |
40 | conn = TcpGraphConnector(addrs=[("localhost", 8100)])
41 | mql = MQLService(connector=conn)
42 |
43 | q = json.loads(argv[1], object_pairs_hook=OrderedDict)
44 | env = {}
45 | if FLAGS.mqlenv:
46 | env = json.loads(FLAGS.mqlenv)
47 |
48 | if FLAGS.mqlcmd == "read":
49 | print mql.read(q, **env)
50 | elif FLAGS.mqlcmd == "write":
51 | print mql.write(q, **env)
52 | else:
53 | print "you must provie a --mqlcmd, either 'read' or 'write'"
54 |
55 |
56 | if __name__ == "__main__":
57 | app.run(main)
58 |
--------------------------------------------------------------------------------
/util/unionfind.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | #
16 | #
17 | # flexible union-find operations
18 | #
19 | # you can use a particular attribute of the objects you're working
20 | # with as the union chain attribute.
21 | #
22 | #
23 | # NOT WELL-TESTED
24 | #
25 | #
26 |
27 | #
28 | # union-find: merge two nodes
29 | # the first argument is favored as the new common root
30 | #
31 | def union(node1, node2, chainattr):
32 | c1 = find(node1, chainattr)
33 | c2 = find(node2, chainattr)
34 | if c1 == c2: return
35 | setattr(c2, chainattr, c1)
36 |
37 | #
38 | # union-find: find the definitive member of a set,
39 | # collapsing lookup chains along the way
40 | #
41 | def find(node, chain_attr=None, chain_get=None, chain_put=None):
42 | if chain_get is None:
43 | chain_get = lambda p: getattr(p, chain_attr)
44 | if chain_put is None:
45 | chain_put = lambda p,v: setattr(p, chain_attr, v)
46 |
47 | #
48 | # find the root for this union
49 | #
50 | root = None
51 | c = node
52 | while 1:
53 | cc = chain_get(c)
54 | if c == cc:
55 | root = c
56 | break
57 | c = cc
58 | #print chainattr, node.id, root.id
59 |
60 | #
61 | # collapse the chain from us to the root
62 | #
63 | c = node
64 | while 1:
65 | cc = chain_get(c)
66 | if cc == root:
67 | break
68 | chain_put(c, root)
69 | c = cc
70 |
71 | return root
72 |
73 |
--------------------------------------------------------------------------------
/emql/adapters/test/test_nytimes.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from mw.tests.helpers import TestFixture
16 | from mw.emql import emql
17 |
18 | class TestNytimes_adapter(TestFixture):
19 |
20 | def setUp(self):
21 | super(TestNytimes_adapter, self).setUp()
22 | self.cache = emql.emql_cache()
23 |
24 |
25 | def run_query(self, q):
26 | api_key = self.mss.ctx.config['extensions.nytimes_articles']
27 | debug, cursors, results = self.mss.emqlread(None, q, {'debug': True, 'cache': False},
28 | api_keys={'nytimes_articles': api_key},
29 | cache=self.cache)
30 | return results
31 |
32 | def test_stephen_colbert(self):
33 | r = self.run_query({"id": "/en/stephen_colbert",
34 | "/base/topics/news/nytimes": [{'limit': 4}]})
35 | assert r["/base/topics/news/nytimes"]
36 | self.assertEqual(len(r["/base/topics/news/nytimes"]), 4)
37 |
38 | def test_us_presidents(self):
39 | results = self.run_query([{"id": None,
40 | "/base/topics/news/nytimes": [{"limit": 1}],
41 | "limit": 3,
42 | "/people/person/date_of_birth": None,
43 | "sort": "-/people/person/date_of_birth",
44 | "type": "/government/us_president"}])
45 | for r in results:
46 | assert r["/base/topics/news/nytimes"]
47 | self.assertEqual(len(r["/base/topics/news/nytimes"]), 1)
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/util/attrib.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from mw.mql import scope
16 | from mw.log import LOG
17 |
18 | def set_oauth_attribution_if_needed(mss):
19 | if not mss.authorized_app_id:
20 | return
21 |
22 | user_id = mss.get_user_id()
23 |
24 | query = [{
25 | "id": None,
26 | "creator": user_id,
27 | "type": "/freebase/written_by",
28 | "/freebase/written_by/application": {"id": mss.authorized_app_id}
29 | }]
30 |
31 | result = mss.mqlread(query, cache=False)
32 | if result:
33 | if len(result) > 1:
34 | # somehow we manage to get multiple attributions - fail gracefully and log an error
35 | LOG.warn("set_oauth_attribution_if_needed.duplicate",
36 | "duplicate attributions for %s and %s" % (mss.authorized_app_id, user_id),
37 | application_id=mss.authorized_app_id,
38 | user_id=user_id,
39 | attributions=result)
40 | result = result[0]
41 | else:
42 | query = {
43 | "create": "unconditional",
44 | "id": None,
45 | "/freebase/written_by/application": {
46 | "connect": "insert",
47 | "id": mss.authorized_app_id
48 | },
49 | "type": ["/freebase/written_by", "/type/attribution"]
50 | }
51 |
52 | with mss.push_variables(permission="/boot/oauth_permission",
53 | privileged=scope.Privileged,
54 | authority=None):
55 | result = mss.mqlwrite(query)
56 | mss.push_variables(attribution=result['id'] if result else None)
57 |
--------------------------------------------------------------------------------
/test/query_sort_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | """Query sorting unittest for pymql."""
17 |
18 | __author__ = 'bneutra@google.com (Brendan Neutra)'
19 |
20 | import collections
21 | import json
22 |
23 | import google3
24 | import pymql
25 |
26 | from google3.testing.pybase import googletest
27 |
28 | testdictpart = collections.OrderedDict({
29 | 'propd': None,
30 | 'propc': 'foo',
31 | 'propb': [],
32 | 'prope': {},
33 | 'propf': 1.1,
34 | 11: False
35 | })
36 |
37 | testdict = testdictpart.copy()
38 |
39 | testdict['propa'] = testdictpart.copy()
40 | testdict['propg'] = [testdictpart.copy(), testdictpart.copy()]
41 | testdict['propg'][1]['propa'] = testdictpart.copy()
42 |
43 |
44 | def IsSorted(part):
45 | """Check that all keys are sorted."""
46 | if isinstance(part, list):
47 | for p in part:
48 | if IsSorted(p) is False:
49 | return False
50 | elif isinstance(part, dict):
51 | if sorted(part.keys()) != part.keys():
52 | return False
53 | for k, v in part.iteritems():
54 | if IsSorted(v) is False:
55 | return False
56 |
57 | return True
58 |
59 |
60 | class PymqlSortTest(googletest.TestCase):
61 |
62 | def testSorting(self):
63 | """basic sorting test."""
64 | sorted_dict = pymql.sort_query_keys(testdict)
65 | self.assertTrue(IsSorted(sorted_dict))
66 | self.assertFalse(IsSorted(testdict))
67 |
68 | # the dict should not change in meaning
69 | # need to convert to dict first.
70 | converted_dict = json.loads(json.dumps(testdict))
71 | converted_sorted_dict = json.loads(json.dumps(sorted_dict))
72 | # nice helper function that's order independent
73 | self.assertDictEqual(converted_sorted_dict, converted_dict)
74 |
75 |
76 | if __name__ == '__main__':
77 | googletest.main()
78 |
--------------------------------------------------------------------------------
/emql/adapters/metacritic.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import mw
16 | from lib import bdb_lookup
17 |
18 | #TODO: python docs
19 | #TODO: log exceptions?
20 |
21 | class metacritic_adapter(mw.emql.adapter.Adapter):
22 |
23 | SECRET='random_rodent'
24 |
25 | def make_result(self,key,scores):
26 | return {
27 | 'key' : key,
28 | 'url' : 'http://www.metacritic.com/video/titles/%s' % key,
29 | 'score' : scores['metascore'],
30 | 'userscore' : scores['userscore'],
31 | 'attribution_html' : 'TODO'
32 | }
33 |
34 | def check_secret(self,params,guid,result):
35 | if params.get('query') and params.get('query').get('secret') == self.SECRET:
36 | return True
37 | else:
38 | result[guid] = { 'error':'Invalid auth' }
39 | return False
40 |
41 | def get_key(self, me, guid):
42 | result = bdb_lookup(me,guid,'source-metacritic-movie')
43 | if result:
44 | return result[0]
45 | else:
46 | return None
47 |
48 | def get_scores(self,me,guid):
49 | result = bdb_lookup(me,guid,'metacritic-scores')
50 | return result
51 |
52 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
53 | result = {}
54 | for mqlres in args:
55 | guid = mqlres['guid']
56 | if not self.check_secret(params,guid,result):
57 | continue
58 | key = self.get_key(me,guid)
59 | if not key:
60 | continue
61 | scores = self.get_scores(me,guid)
62 | if not scores:
63 | #TODO: log
64 | continue
65 | result[guid]=self.make_result(key,scores)
66 | return result
67 |
68 |
--------------------------------------------------------------------------------
/test/regression_id_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2.4
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # -*- coding: utf-8 -*-
17 | #
18 | """test regressions around id resolution."""
19 |
20 | __author__ = 'bneutra@google.com (Brendan Neutra)'
21 |
22 | import google3
23 | from pymql.mql import error
24 | from pymql.test import mql_fixture
25 |
26 |
27 | class MQLTest(mql_fixture.MQLTest):
28 |
29 | def setUp(self):
30 | self.SetMockPath('data/regression_id.yaml')
31 | super(MQLTest, self).setUp()
32 | self.env = {'as_of_time': '2009-10-01'}
33 |
34 | def testDeepId(self):
35 | # buganizer: 4363162
36 | query = """
37 | {"id":
38 | "/en/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a"}
39 | """
40 | self.DoQuery(query, exp_response='null')
41 |
42 | def testTooDeepId(self):
43 | # buganizer: 4363162
44 | # id path limit is 200 deep
45 | query = """
46 | {"id":
47 | "/en/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a"}
48 | """
49 | exc_response = (
50 | error.MQLParseError,
51 | 'Id has too many segments. Maximum is 200'
52 | )
53 |
54 | self.DoQuery(query, exc_response=exc_response)
55 |
56 | if __name__ == '__main__':
57 | mql_fixture.main()
58 |
--------------------------------------------------------------------------------
/util/dumper.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import pprint
16 |
17 | def dump(object,depth=10, ctx=None):
18 | if ctx is None:
19 | ctx = {}
20 |
21 | # don't subclass these types. Please!
22 | if isinstance(object,(basestring,str,bool,int,float,long)):
23 | return object
24 | elif object is None:
25 | return object
26 |
27 | # subclasses of these types are interesting.
28 | if (type(object) in [dict, list, tuple]) and len(object) == 0:
29 | return object
30 |
31 | oid = id(object)
32 | if oid in ctx:
33 | return "!!REPEAT!!" + ctx[oid]
34 |
35 | typename = type(object).__name__
36 | if typename == 'instance':
37 | typename = object.__class__.__name__
38 | ctx[oid] = '<' + typename + ' instance at ' + hex(oid) + '>'
39 |
40 | if typename in ctx:
41 | return "!!SKIPPED!!" + ctx[oid]
42 |
43 | if depth < 0:
44 | return "!!DEPTH!!" + ctx[oid]
45 |
46 | if isinstance(object, dict):
47 | result = { '!!REPR!!' : ctx[oid] }
48 | for k in object:
49 | result[k] = dump(object[k],depth-1,ctx)
50 |
51 | return result
52 |
53 | elif isinstance(object,(list,tuple)):
54 | result = [ ctx[oid] ]
55 | for k in object:
56 | result.append(dump(k,depth-1,ctx))
57 |
58 | return result
59 |
60 | result = { '!!REPR!!' : ctx[oid] }
61 | try:
62 | for key in object.__dict__:
63 | if key not in ctx:
64 | result[key] = dump(object.__dict__[key],depth-1,ctx)
65 | except:
66 | pass
67 | return result
68 |
69 | def dumper(object,depth=10,ctx=None):
70 | if ctx is None:
71 | ctx = {}
72 | pprint.pprint(dump(object,depth,ctx))
73 |
74 | def dumps(object, **kws):
75 | return pprint.pformat(dump(object, **kws))
76 |
77 |
--------------------------------------------------------------------------------
/emql/adapters/test/test_twitter.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from mw.tests.helpers import TestFixture
16 | from mw.emql import emql
17 |
18 | class TestTwitter_adapter(TestFixture):
19 |
20 | def setUp(self):
21 | super(TestTwitter_adapter, self).setUp()
22 | self.cache = emql.emql_cache()
23 |
24 | def run_query(self, q):
25 | debug, cursors, results = self.mss.emqlread(None, q, {'debug': True, 'cache': False},
26 | cache=self.cache)
27 | return results
28 |
29 | def test_stephen_colbert(self):
30 | r = self.run_query({"id": "/en/stephen_colbert",
31 | "/base/topics/news/twitter_from": [{'limit': 4}]})
32 | assert r["/base/topics/news/twitter_from"]
33 | self.assertEqual(len(r["/base/topics/news/twitter_from"]), 4)
34 | for tweet in r["/base/topics/news/twitter_from"]:
35 | self.failIf('raw' in tweet)
36 |
37 | r = self.run_query({"id": "/en/stephen_colbert",
38 | "/base/topics/news/twitter_from": [{'limit': 3,
39 | 'raw': True}]})
40 | assert r["/base/topics/news/twitter_from"]
41 | self.assertEqual(len(r["/base/topics/news/twitter_from"]), 3)
42 | for tweet in r["/base/topics/news/twitter_from"]:
43 | self.failUnless('raw' in tweet)
44 |
45 |
46 | def test_george_washington(self):
47 | r = self.run_query({"id": "/en/george_washington",
48 | "/base/topics/news/twitter_from": None})
49 | assert not r["/base/topics/news/twitter_from"]
50 |
51 | #
52 | # def test_us_presidents(self):
53 | # results = self.run_query([{"id": None,
54 | # "/base/topics/news/nytimes": [{"limit": 1}],
55 | # "limit": 3,
56 | # "/people/person/date_of_birth": None,
57 | # "sort": "-/people/person/date_of_birth",
58 | # "type": "/government/us_president"}])
59 | # for r in results:
60 | # assert r["/base/topics/news/nytimes"]
61 | # self.assertEqual(len(r["/base/topics/news/nytimes"]), 1)
62 | #
63 | #
64 | #
65 |
--------------------------------------------------------------------------------
/emql/adapters/stats.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | import math
17 | from itertools import izip, chain
18 |
19 | from mw.emql.adapter import Adapter, AdapterUserError
20 | from mw.emql.emql import id_guid, formatted_id_guid
21 |
22 |
23 | class stats_adapter(Adapter):
24 |
25 | def reduce(self, tid, graph, mql, me, control, mqlres, params, api_keys):
26 |
27 | constraints = params.get('constraints')
28 | op = params.get('property')
29 | params = params.get('query')
30 |
31 | args = None
32 | if isinstance(params, dict):
33 | args = params.get('value', '').split('.')
34 |
35 | if not args:
36 | raise ValueError, "%s: missing 'value' argument" %(op)
37 |
38 | def get(res, prop):
39 | if isinstance(res, dict):
40 | return res[prop]
41 | else:
42 | value = res[0]
43 | if isinstance(value, dict):
44 | value = value[prop]
45 | return value
46 |
47 | values = []
48 | for _mqlres in mqlres:
49 | value = reduce(get, args, _mqlres)
50 | if value is not None:
51 | values.append(value)
52 |
53 | if values:
54 | if op.startswith('@'):
55 | op = op[1:]
56 |
57 | try:
58 | if op == 'average':
59 | return dict(value=float(sum(values)) / len(values))
60 |
61 | if op == 'median':
62 | values.sort()
63 | return dict(value=values[len(values) / 2])
64 |
65 | if op == 'min':
66 | return dict(value=min(values))
67 |
68 | if op == 'max':
69 | return dict(value=max(values))
70 |
71 | if op == 'total':
72 | return dict(value=sum(values))
73 |
74 | if op == 'sigma':
75 | average = float(sum(values)) / len(values)
76 | squares = sum((value - average) * (value - average)
77 | for value in values)
78 | return dict(value=math.sqrt(squares / len(values)))
79 |
80 | except TypeError, e:
81 | raise AdapterUserError('reduce', op, self.uri, str(e))
82 |
83 | raise NotImplementedError, op
84 |
85 | return dict(value=None)
86 |
87 | def help(self, tid, graph, mql, me, control, params):
88 | from docs import stats_adapter_help
89 |
90 | return 'text/x-rst;', stats_adapter_help
91 |
--------------------------------------------------------------------------------
/formats/uniqstr.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 |
17 | UniqueStr is a base class for implementing enums
18 | as strings.
19 |
20 | see MediaType and TextEncoding subclasses for example
21 | subclasses.
22 |
23 | """
24 |
25 | class UniqueStr(str):
26 | """
27 | UniqueStr looks like an ascii str, but it has been normalized.
28 |
29 | It's a string that behaves like an enum.
30 |
31 | Subclass this for values like media-types, charsets,
32 | language names, locale, etc.
33 | """
34 |
35 | # dictionary mapping names to known values.
36 | # multiple names may match to the same unique str if it has aliases.
37 | # this looks like a mapping from str to str but it's really a mapping
38 | # from str to UniqueStr.
39 | _known = dict()
40 |
41 | # if set, attempts to create new values will fail
42 | _exclusive = False
43 |
44 |
45 | def __new__(cls, s):
46 | # make sure cls has its own _known and _exclusive -
47 | # i'm sure there is a better way to do this...
48 | if '_known' not in cls.__dict__:
49 | cls._known = {}
50 | cls._exclusive = False
51 |
52 | s = cls.normalize(s)
53 | mt = cls._known.get(s)
54 | if not mt:
55 | if cls._exclusive:
56 | raise ValueError, "Unknown unique string"
57 |
58 | mt = str.__new__(cls, s)
59 | cls._known[s] = mt
60 | return mt
61 |
62 |
63 | @classmethod
64 | def normalize(cls, s):
65 | """
66 | normalize a string before intern-ing it.
67 |
68 | this is useful when there are multiple values of a string
69 | that are acceptable but you want to convert them to a
70 | preferred format, e.g. using a particular capitalization
71 | style for case-insensitive identifiers.
72 |
73 | this is also an opportunity to reject (with ValueError)
74 | invalid values.
75 | """
76 | if not isinstance(s, str):
77 | s = str(s)
78 | #raise ValueError('%s must be a string' % cls.__name__)
79 |
80 | return s.strip()
81 |
82 |
83 | def addalias(self, alias):
84 | """
85 | add an alias for this unique string.
86 |
87 | you can do more powerful things by overriding .normalize().
88 | """
89 | if alias in self._known:
90 | if self is not self._known[alias]:
91 | raise ValueError, 'attempt to change UniqueStr alias'
92 | # XXX should log a warning here, but it's safe to continue
93 | return
94 | self._known[alias] = self
95 |
--------------------------------------------------------------------------------
/test/mql_exceptions_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # -*- coding: utf-8 -*-
16 | #
17 | """Making sure new exceptions are called properly."""
18 |
19 | __author__ = 'bobbyrullo@google.com (Bobby Rullo)'
20 |
21 | import google3
22 | import json
23 | from pymql.mql import error
24 | from pymql.test import mql_fixture
25 |
26 | class MQLExceptionTest(mql_fixture.MQLTest):
27 |
28 | def setUp(self):
29 | super(MQLExceptionTest, self).setUp()
30 | self.env = {'user': '/user/mw_brendan'}
31 |
32 |
33 | def getFuzzKey(self, test_id):
34 | fuzz = self.getFuzz(test_id)
35 | fuzzKey = 'key_{0}'.format(fuzz[:fuzz.find('.')])
36 | return fuzzKey
37 |
38 | def newNode(self):
39 | query = json.dumps({
40 | "id": None,
41 | "create": "unconditional",
42 | })
43 |
44 | self.DoQuery(query, mqlwrite=True)
45 | new_id = self.mql_result.result['id']
46 | return new_id
47 |
48 | def testMQLValueAlreadyInUseError(self):
49 | key = self.getFuzzKey('alreadyInUse')
50 |
51 | new_id = self.newNode()
52 |
53 | query = {
54 | "id": new_id,
55 | "key": {
56 | "namespace": "/user/mw_brendan/default_domain",
57 | "value": key,
58 | "connect": "insert"
59 | }
60 | }
61 |
62 | self.DoQuery(json.dumps(query), mqlwrite=True)
63 |
64 | new_id = self.newNode()
65 |
66 | query['id'] = new_id
67 |
68 | self.DoQuery(json.dumps(query), mqlwrite=True,
69 | exc_response = (
70 | error.MQLValueAlreadyInUseError,
71 | 'This value is already in use. Please delete it first.'
72 | ))
73 |
74 |
75 | def testMQLTooManyValuesForUniqueQuery(self):
76 | query = {
77 | "type": None,
78 | "id": "/en/sofia_coppola",
79 | "name": None
80 | }
81 |
82 | exc_response = (
83 | error.MQLTooManyValuesForUniqueQuery,
84 | "Unique query may have at most one result. Got 25"
85 | )
86 | self.DoQuery(json.dumps(query), exc_response=exc_response)
87 |
88 |
89 | def testMQLTooManyWrites(self):
90 | query = """
91 | {
92 | "create":"unconditional",
93 | "type":"/user/mw_brendan/default_domain/note",
94 | "name":"foobartoomanywrites",
95 | "id":null
96 | }
97 | """
98 | self.env = {
99 | 'user': '/user/mw_brendan',
100 | 'max_writes': {
101 | 'limit': 0,
102 | 'guid': '9202a8c04000641f80000000011af200'
103 | }
104 | }
105 | exc_response = (
106 | error.MQLWriteQuotaError,
107 | 'Daily write limit of 0 was exceeded.'
108 | )
109 | self.DoQuery(query, mqlwrite=True, exc_response=exc_response)
110 |
111 | if __name__ == '__main__':
112 | mql_fixture.main()
113 |
--------------------------------------------------------------------------------
/emql/adapters/twitter.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import mw, urllib, rfc822, time, datetime
16 | from collections import defaultdict
17 | from mw.emql.adapter import Adapter
18 |
19 | def rfc822_to_iso(d):
20 | r = rfc822.parsedate(d)
21 | r = time.mktime(r)
22 | r = datetime.datetime.fromtimestamp(r)
23 | return r.isoformat()
24 |
25 | class tweets_from_adapter(Adapter):
26 |
27 | def pre(self, tid, graph, mql, me, control, parent, params, api_keys):
28 | return {
29 | '/internet/social_network_user/twitter_id': {
30 | 'value': None, 'limit': 1, 'optional': True
31 | },
32 | ':extras': {'foo': 'bar'}
33 | }
34 |
35 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
36 | result = defaultdict(list)
37 | query = params['query'] or {}
38 |
39 | if isinstance(query, list):
40 | query = query[0]
41 |
42 | limit = query.get('limit', 5)
43 | raw = query.get('raw', None)
44 |
45 | for mqlres in args:
46 | if not mqlres['/internet/social_network_user/twitter_id']:
47 | continue
48 |
49 | url, connection = me.get_session().http_connect('twitter.com',
50 | "/statuses/user_timeline.json")
51 | qs = urllib.urlencode({
52 | 'count': limit,
53 | 'screen_name': mqlres['/internet/social_network_user/twitter_id']['value']
54 | })
55 | connection.request('GET', "%s?%s" % (url, qs))
56 | response = connection.getresponse()
57 | json = mw.json.loads(response.read())
58 | tweets = []
59 | if 'error' in json:
60 | me.log('error', 'emql.adapters.twitter', json['error'], response=json)
61 | raise Exception(json['error'])
62 |
63 | for j in json:
64 | tweet = {
65 | 'timestamp': rfc822_to_iso(j['created_at']),
66 | 'key': j['id'],
67 | 'text': j['text'],
68 | 'user': {'name': j['user']['name'],
69 | 'profile_image_url': j['user']['profile_image_url'],
70 | 'screen_name': j['user']['screen_name'],
71 | 'url': 'http://twitter.com/%s' % j['user']['screen_name']},
72 | 'url': 'http://twitter.com/%s/status/%s' % (j['user']['screen_name'], j['id'])
73 | }
74 | if raw:
75 | tweet['raw'] = j
76 | tweets.append(tweet)
77 |
78 | result[mqlres['guid']].extend(tweets)
79 |
80 | return dict((k, v[:limit]) for k,v in result.iteritems())
81 |
82 | def help(self, tid, graph, mql, me, control, params):
83 | from docs import twitter_adapter_help
84 |
85 | return 'text/x-rst;', twitter_adapter_help
86 |
--------------------------------------------------------------------------------
/test/best_hrid_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests /freebase/object_hints/best_hrid resolution.
16 |
17 | /freebase/object_hints/best_hrid specifies a persistent HRID
18 | for an entity. This should be favored over the earlier MQL
19 | algorithm for choosing an HRID based on namespace traversal
20 | and various heuristics.
21 | """
22 | __author__ = 'nix@google.com (Nick Thompson)'
23 |
24 | import json
25 | import random
26 | import string
27 |
28 | import google3
29 | from pymql.mql import error
30 | from pymql.test import mql_fixture
31 |
32 | class HRIDTest(mql_fixture.MQLTest):
33 | """Tests HRID queries using mqlread."""
34 |
35 | def setUp(self):
36 | # NOTE: the mock graphd support is broken, so there is no best_hrid.yaml
37 | #self.SetMockPath('data/best_hrid.yaml')
38 | super(HRIDTest, self).setUp()
39 | self.env = {'user': '/user/mw_brendan'}
40 |
41 | def newNodeWithHRID(self, best_hrid):
42 | query = """
43 | {
44 | "create":"unless_exists",
45 | "/freebase/object_hints/best_hrid": "%s",
46 | "guid":null
47 | }
48 | """ % best_hrid
49 | self.DoQuery(query, mqlwrite=True)
50 | self.assertEquals(self.mql_result.result["create"],
51 | "created")
52 | return self.mql_result.result["guid"]
53 |
54 | def query_assert(self, q, r, exc_response=None, type="mqlread", asof=None):
55 | self.env = {}
56 | if asof is not None:
57 | self.env["as_of_time"] = asof
58 | self.DoQuery(q, exp_response=r, exc_response=exc_response)
59 |
60 | def test_missing_hrid(self):
61 | """Test that MQL still finds an id even if best_hrid is not present"""
62 | q= '{"id":null, "guid":"#9202a8c04000641f8000000000092a01", "mid":null}'
63 | r= ('{"guid": "#9202a8c04000641f8000000000092a01",'
64 | '"id": "/en/sting","mid":"/m/0lbj1"}')
65 | self.query_assert(q,r)
66 |
67 | def test_good_hrid(self):
68 | """Test /type/type, a best_hrid that agrees with the MQL heuristics"""
69 | # /m/0j == /type/type
70 | q= '{"id":null, "mid":"/m/0j", "/freebase/object_hints/best_hrid":null}'
71 | r= ('{"id": "/type/type","mid":"/m/0j",'
72 | '"/freebase/object_hints/best_hrid":"/type/type"}')
73 | self.query_assert(q, r)
74 |
75 | def test_hrid_override(self):
76 | """Create a new node with a bogus best_hrid.
77 |
78 | The old MQL heuristics will fail; check that best_hrid works.
79 | """
80 | best_hrid = ('/user/nix/random_test_hrid/' +
81 | ''.join(random.choice(string.ascii_lowercase)
82 | for x in range(16)))
83 | guid = self.newNodeWithHRID(best_hrid)
84 |
85 | q= (('{"id":null, "guid":"%(guid)s",'
86 | '"/freebase/object_hints/best_hrid":null}' %
87 | {"guid":guid}))
88 | r= (('{"id": "%(best_hrid)s","guid":"%(guid)s",'
89 | '"/freebase/object_hints/best_hrid":"%(best_hrid)s"}') %
90 | {"guid":guid,"best_hrid":best_hrid})
91 | self.query_assert(q, r)
92 |
93 | if __name__ == '__main__':
94 | mql_fixture.main()
95 |
--------------------------------------------------------------------------------
/api/hicache.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | from locache import LojsonCachePolicy
17 | from mw.log import LOG
18 |
19 | class LWTCachePolicy(LojsonCachePolicy):
20 | """
21 | Long run, we can probably factor 'mss' out of here completely, right?
22 | """
23 | cost_prefix = 'c'
24 |
25 | def __init__(self, mss, tag='mql'):
26 | # give fake ctx/varenv because we'll be overriding all uses
27 | # and want to make sure that any time LojsonCachePolicy tries
28 | # to access ctx/varenv, that it explodes loudly, rather than
29 | # silently using a bad value
30 | super(LWTCachePolicy, self).__init__(None, None, tag,
31 | start_time=mss.time_start)
32 | self.mss = mss
33 |
34 | def _set_varenv(self, varenv):
35 | # this is a no-op because we're forwarding to self.mss.varenv
36 | pass
37 |
38 | def _get_varenv(self):
39 | return self.mss.varenv
40 |
41 | # wrap the existing varenv
42 | varenv = property(_get_varenv, _set_varenv)
43 |
44 | def annotate_key_object(self, key_obj):
45 | return self.get_varenv_envelope(key_obj, ("cursor", "macro", "escape",
46 | "uniqueness_failure", "$lang",
47 | "asof", "normalize_only", "unicode_text"))
48 |
49 | def annotate_result(self, result):
50 | full_result = super(LWTCachePolicy, self).annotate_result(result)
51 |
52 | full_result["tid"] = self.mss.transaction_id
53 |
54 | if 'cursor' in self.mss.varenv:
55 | full_result['cursor'] = self.mss.varenv['cursor']
56 |
57 | return full_result
58 |
59 | def extract_result(self, full_result):
60 | # all of this should maybe be done in the mqlread itself?
61 |
62 | # set the age header to at least this old
63 |
64 | # this is the other place where the use of
65 | # mss.time_start is important
66 | self.mss.cache_age = max(self.mss.cache_age,
67 | self.start_time - full_result['time'])
68 |
69 | if 'cursor' in full_result:
70 | self.mss.varenv['cursor'] = full_result['cursor']
71 |
72 | return super(LWTCachePolicy, self).extract_result(full_result)
73 |
74 |
75 | def add_cost(self, costkey, value=1):
76 | self.mss.add_cost(self.cost_prefix + costkey, value)
77 |
78 | def should_read_cache(self):
79 | return self.varenv.get("cache",True)
80 |
81 | def should_write_cache(self):
82 | # allow certain reads to not write-through to the cache (for
83 | # instance, crawlers and results with cursors
84 | cache_writes = not self.varenv.get('no_store_cache', False)
85 |
86 | # we don't cache past the first page in a cursor'ed query
87 | has_working_cursor = 'cursor' in self.varenv and self.varenv['cursor'] != True
88 |
89 | return cache_writes and not has_working_cursor
90 |
--------------------------------------------------------------------------------
/emql/adapters/quote.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | from itertools import izip, chain
17 | from urllib import urlencode
18 |
19 | from mw.emql.adapter import Adapter, REQUEST_HEADERS
20 | from mw.emql.emql import id_guid, formatted_id_guid
21 |
22 |
23 | class quote_adapter(Adapter):
24 |
25 | ticker = "/business/stock_ticker_symbol/ticker_symbol"
26 |
27 | def pre(self, tid, graph, mql, me, control, parent, params, api_keys):
28 |
29 | return {self.ticker: None}
30 |
31 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
32 |
33 | query = params.get('query')
34 | results = {}
35 |
36 | format = ''
37 | keys = []
38 |
39 | # format documented at http://alexle.net/archives/196
40 | if isinstance(query, dict):
41 | for key in query.iterkeys():
42 | if key == 'volume':
43 | format += 'v'
44 | keys.append(key)
45 | elif key == 'price':
46 | format += 'l1'
47 | keys.append(key)
48 | elif key == 'ticker':
49 | pass
50 | elif key == 'high':
51 | keys.append(key)
52 | format += 'h'
53 | elif key == 'low':
54 | keys.append(key)
55 | format += 'g'
56 | else:
57 | raise ValueError, key
58 | else:
59 | format = 'l1'
60 | keys = ['price']
61 |
62 | url, connection = me.get_session().http_connect('download.finance.yahoo.com', '/d/quotes.csv')
63 | connection.request('POST', url,
64 | urlencode({'s': ','.join(mqlres[self.ticker]
65 | for mqlres in args),
66 | 'f': format }),
67 | REQUEST_HEADERS)
68 | response = connection.getresponse()
69 | response = response.read()
70 |
71 | results = {}
72 | for mqlres, values in izip(args, response.rstrip().split('\r\n')):
73 | if query is None:
74 | results[mqlres['guid']] = values
75 | else:
76 | result = {}
77 | for key, value in izip(keys, values.split(',')):
78 | if value == "N/A":
79 | value = None
80 | elif key in ('high', 'low', 'price'):
81 | value = float(value)
82 | elif key == 'volume':
83 | value = long(value)
84 | result[key] = value
85 | if 'ticker' in query:
86 | result['ticker'] = mqlres[self.ticker]
87 | results[mqlres['guid']] = result
88 |
89 | return results
90 |
91 | def help(self, tid, graph, mql, me, control, params):
92 | from docs import quote_adapter_help
93 |
94 | return 'text/x-rst;', quote_adapter_help
95 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MQL, the Metaweb Query Language
2 |
3 | This repository contains the original implementation of the Metaweb Query
4 | Language, written in Python.
5 |
6 | ## Building / Using MQL
7 |
8 | Even though MQL is written in Python, this particular version of it requires
9 | [Bazel](https://bazel.build) to operate properly. You can build the simple
10 | command-line MQL query tool like so:
11 |
12 | ```
13 | [¬º-°]¬ bazel build :mqlbin
14 | INFO: Analyzed target //:mqlbin (6 packages loaded, 36 targets configured).
15 | INFO: Found 1 target...
16 | Target //:mqlbin up-to-date:
17 | bazel-out/k8-py2-fastbuild/bin/mqlbin
18 | INFO: Elapsed time: 0.771s, Critical Path: 0.02s
19 | INFO: 0 processes.
20 | INFO: Build completed successfully, 1 total action
21 | ```
22 |
23 | Then, it can be executed out of the bazel build directory:
24 |
25 | *IMPORTANT!* This will only work if you have properly you need to have an
26 | instance of [graphd](https://github.com/google/graphd) running and it needs to
27 | be properly **bootstrapped** for MQL (see below).
28 |
29 | ```
30 | [¬º-°]¬ bazel-out/k8-py2-fastbuild/bin/mqlbin --graphd_addr=localhost:8100 --mqlcmd=read '{"id": "/type/object/type", "guid": null}'
31 |
32 | MQLResult(result={'guid': '#d119a8c0400062d1800000000000000c', 'id': '/type/object/type'}, cost=defaultdict(, {'pr': 0.0, 'va': 38742.0, 'tu': 22.0, 'in': 3975.0, 'ir': 0.0, 'tr': 23.0, 'ts': 0.0, 'iw': 0.0, 'te': 26.0, 'mql_utime': 0.047658000000000006, 'mql_dbreqs': 11, 'dw': 0.0, 'tg': 0.030711889266967773, 'tf': 0.04290890693664551, 'pf': 0.0, 'mql_rtime': 1.1784470081329346, 'dr': 5619.0, 'gqr': 0, 'mql_stime': 0.0009940000000000018}), dateline=None, cursor=None)
33 |
34 | ```
35 |
36 | ## Bootstrapping a graphd for MQL
37 |
38 | PyMQL comes with a graphd bootstrap program that you can use to bootstrap an
39 | empty graphd for use with MQL. The bootstrap program itself writes the set of
40 | core types required for MQL to operate.
41 |
42 | First, ensure you have a graphd running:
43 |
44 | ```
45 | [¬º-°]¬ git clone https://github.com/google/graphd
46 | Cloning into 'graphd'...
47 | remote: Enumerating objects: 1259, done.
48 | remote: Total 1259 (delta 0), reused 0 (delta 0), pack-reused 1259
49 | Receiving objects: 100% (1259/1259), 2.57 MiB | 14.95 MiB/s, done.
50 | Resolving deltas: 100% (482/482), done.
51 | [¬º-°]¬ cd graphd
52 | [¬º-°]¬ bazel build graphd
53 | ...(graphd builds)
54 | Target //graphd:graphd up-to-date:
55 | bazel-bin/graphd/graphd
56 | INFO: Elapsed time: 29.584s, Critical Path: 0.87s
57 | INFO: 373 processes: 373 linux-sandbox.
58 | INFO: Build completed successfully, 377 total actions
59 | [¬º-°]¬ bazel-bin/graphd/graphd -d /tmp/data-dir -p /tmp/graphd.pid -n
60 |
61 | ```
62 |
63 | In another terminal, run the bootstrap:
64 |
65 | ```
66 | [¬º-°]¬ ./bazel-out/k8-py2-fastbuild/bin/bootstrap/bootstrap --load bootstrap/otg.bootstrap
67 | ```
68 |
69 | The bootstrap takes a few minutes to run and you'll see lots of
70 | `graphd.request.start` and `graphd.request.end` lines. This is normal.
71 |
72 | After this is done, you can run MQL queries via mqlbin.
73 |
74 | ## History
75 |
76 | This code was originally authored by Tim Sturge, then maintained by Warren
77 | Harris after his departure.
78 |
79 | Dime ("2 MQL's") was the implementation written by Warren in OCaml that offered
80 | significant improvements over this initial implementation. However, when Metaweb
81 | was acquired by Google nearing the end of the productionization of Dime, it was
82 | only used partially until Freebase was turned down a few years later. In the
83 | meantime, Warren had gone on to develop other tools used during the early days
84 | of the Knowledge Graph projects at Google.
85 |
--------------------------------------------------------------------------------
/log/log.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2.6
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Backward compatible support for mql LOG calls."""
16 |
17 | __author__ = 'bneutra@google.com (Brendan Neutra)'
18 |
19 | import datetime
20 | import os
21 | import socket
22 | from pymql.log import log_util
23 | from pymql.util import dumper
24 | from absl import logging
25 |
26 | __all__ = ['generate_tid', 'LOG', 'pprintlog', 'dumplog']
27 |
28 | # allow google logging to discover the caller
29 | # i.e. ignore these local functions
30 | skip = [
31 | '_logit', 'fatal', 'error', 'warn', 'info', 'debug', 'spew', 'exception',
32 | 'warning', 'alert', 'notice', 'log', 'pprintlog', 'dumplog'
33 | ]
34 |
35 |
36 | def _logit(level, s, args=None, kwargs=None):
37 | # let's not waste any cycles
38 | if level > logging.get_verbosity():
39 | return
40 | msg = ''
41 | if args:
42 | msg += '\t'.join(str(arg) for arg in args)
43 | if kwargs:
44 | msg += '\t'.join('%s=%s' % (pair) for pair in kwargs.iteritems())
45 | logging.vlog(level, '%s %s' % (s, msg))
46 |
47 |
48 | class LOG(object):
49 |
50 | @staticmethod
51 | def fatal(s, *args, **kwargs):
52 | _logit(logging.FATAL, s, args, kwargs)
53 |
54 | @staticmethod
55 | def error(s, *args, **kwargs):
56 | _logit(logging.ERROR, s, args, kwargs)
57 |
58 | @staticmethod
59 | def warn(s, *args, **kwargs):
60 | _logit(logging.WARN, s, args, kwargs)
61 |
62 | @staticmethod
63 | def info(s, *args, **kwargs):
64 | _logit(logging.INFO, s, args, kwargs)
65 |
66 | @staticmethod
67 | def debug(s, *args, **kwargs):
68 | _logit(logging.DEBUG, s, args, kwargs)
69 |
70 | @staticmethod
71 | def spew(s, *args, **kwargs):
72 | _logit(log_util.SPEW, s, args, kwargs)
73 |
74 | @staticmethod
75 | def log(level, s, *args, **kwargs):
76 | _logit(level, s, args, kwargs)
77 |
78 | exception = fatal
79 | notice = info
80 | warning = warn
81 | alert = warn
82 |
83 |
84 | def dumplog(string, obj, level=log_util.SPEW):
85 | if level <= logging.get_verbosity():
86 | LOG.log(level, string, dumper.dumps(obj))
87 |
88 |
89 | def pprintlog(string, obj, level=log_util.DEBUG, **kwargs):
90 | if level <= logging.get_verbosity():
91 | LOG.log(level, string, repr(obj))
92 |
93 |
94 | tid_seqno = 0
95 | hostname = socket.getfqdn()
96 | del socket
97 | pid = os.getpid()
98 |
99 |
100 | def generate_tid(token=None, hostport=None):
101 | global tid_seqno
102 |
103 | # can't determine port without looking at WSGI environ or apache
104 | # config? perhaps we could read this from a config file?
105 | if not hostport:
106 | hostport = '%s:0' % hostname
107 | # hostport could be just a port, we prefix it with hostname then
108 | elif isinstance(hostport, (int, long)):
109 | hostport = '%s:%d' % (hostname, hostport)
110 | elif ':' not in hostport:
111 | hostport = '%s:%s' % (hostname, hostport)
112 |
113 | if not token:
114 | token = 'me'
115 |
116 | # small race condition here
117 | tid_seqno += 1
118 |
119 | return ('%s;%s;%05d;%sZ;%04d' %
120 | (token, hostport, pid, datetime.datetime.utcnow().isoformat('T'),
121 | tid_seqno))
122 |
--------------------------------------------------------------------------------
/test/mql_fixture_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2.4
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # -*- coding: utf-8 -*-
17 | #
18 | """test the test fixture."""
19 |
20 | __author__ = 'bneutra@google.com (Brendan Neutra)'
21 |
22 | import sys
23 | import google3
24 | from pymql.mql import error
25 | from pymql.test import mql_fixture
26 | from google3.pyglib import logging
27 |
28 |
29 | class MQLTest(mql_fixture.MQLTest):
30 | """for testing basic mqlread queries."""
31 |
32 | def setUp(self):
33 | self.SetMockPath('data/mql_fixture.yaml')
34 | super(MQLTest, self).setUp()
35 | self.env = {'as_of_time': '2009-10-01'}
36 |
37 | def DoQueryException(self, query, expected, **kwargs):
38 | """expect a failure."""
39 | try:
40 | self.DoQuery(query, **kwargs)
41 | except AssertionError:
42 | msg = str(sys.exc_info()[1])
43 | if not expected in msg:
44 | self.fail('expected: %s\ngot: %s' % (expected, msg))
45 | else:
46 | logging.debug('assertion raised, as expected! got: %s', expected)
47 |
48 | def testPositive(self):
49 | query = """
50 | {
51 | "id": "/en/bob_dylan"
52 | }
53 | """
54 | exp_response = """
55 | {
56 | "id": "/en/bob_dylan"
57 | }
58 | """
59 | self.DoQuery(query, exp_response=exp_response)
60 |
61 | def testUnexpectedResponse(self):
62 | query = """
63 | {
64 | "id": "/en/bob_dylan"
65 | }
66 | """
67 | exp_response = """
68 | {
69 | "id": "/n/bob_dylan"
70 | }
71 | """
72 | self.DoQueryException(
73 | query,
74 | '!=',
75 | exp_response=exp_response
76 | )
77 |
78 | def testUnexpectedError(self):
79 | query = """
80 | {
81 | "invalidkey": "/en/bob_dylan"
82 | }
83 | """
84 | exp_response = """
85 | {
86 | "id": "/n/bob_dylan"
87 | }
88 | """
89 | self.DoQueryException(
90 | query,
91 | 'exception. was not expected',
92 | exp_response=exp_response
93 | )
94 |
95 | def testExpectError(self):
96 | query = """
97 | {
98 | "guid": "#9202a8c04000641f8000000003abd178",
99 | "id": "/en/bob_dylan"
100 | }
101 | """
102 | exc_response = (
103 | error.MQLParseError,
104 | "Can't specify an id more than once in a single clause"
105 | )
106 | self.DoQuery(query, exc_response=exc_response)
107 |
108 | def testExpectNoError(self):
109 | query = """
110 | {
111 | "guid": "#9202a8c04000641f8000000003abd178",
112 | "id": "/en/bob_dylan"
113 | }
114 | """
115 | self.DoQueryException(
116 | query,
117 | 'exception. was not expected',
118 | exp_response='whatev'
119 | )
120 |
121 | def testExpectOtherError(self):
122 | query = """
123 | {
124 | "guid": "#9202a8c04000641f8000000003abd178",
125 | "id": "/en/bob_dylan"
126 | }
127 | """
128 | self.DoQueryException(
129 | query,
130 | "MQLParseError'> != ",
131 | exc_response=(KeyError, 'whatev')
132 | )
133 |
134 | if __name__ == '__main__':
135 | mql_fixture.main()
136 |
--------------------------------------------------------------------------------
/mql/grquoting.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import re
16 | from xml.sax import saxutils
17 | import urllib
18 | import cgi
19 |
20 | from pymql.error import FormattingError
21 |
22 | ######################################################################
23 |
24 | # quoting rules
25 | _internal_quoting_rules = [
26 | ('\"', '\\\"'),
27 | ('\\', '\\\\'),
28 | ('\n', '\\n'),
29 | ]
30 |
31 | _internal_to_quote = dict(_internal_quoting_rules)
32 | _internal_from_quote = dict([(a, b) for b, a in _internal_quoting_rules])
33 | _internal_from_quote['\''] = ''
34 | _internal_from_quote['\"'] = ''
35 |
36 | # I love REs (aka read it and weep)
37 | re_quoted_string_text = '^\"((?:[^\\\\\"]|\\\\[\\\\\"n])*)\"$'
38 | re_quoted_string_part = '\\\\[\\\\\"n]'
39 | # everything matches this, so we don't test (ie. all unquoted strings are legal)
40 | re_unquoted_string_text = '^(?:[^\\\\\n\"]|([\\\\\n\"]))*$'
41 | re_unquoted_string_part = '[\\\\\n\"]'
42 |
43 | re_qs = re.compile(re_quoted_string_text)
44 | re_qs_part = re.compile(re_quoted_string_part)
45 | re_us_part = re.compile(re_unquoted_string_part)
46 |
47 |
48 | def _internal_quote_sub(m):
49 | return _internal_to_quote[m.group()]
50 |
51 |
52 | def _internal_unquote_sub(m):
53 | return _internal_from_quote[m.group()]
54 |
55 |
56 | def _internal_leading_trailing(m):
57 | return
58 |
59 |
60 | ######################################################################
61 |
62 |
63 | def quote(string):
64 | return '"' + re_us_part.sub(_internal_quote_sub, string) + '"'
65 |
66 |
67 | def unquote(string):
68 | middlem = re_qs.match(string)
69 | if middlem is None:
70 | raise FormattingError('Badly formatted quoted string %s ' % string)
71 | return re_qs_part.sub(_internal_unquote_sub, middlem.group(1))
72 |
73 |
74 | ######################################################################
75 |
76 | #
77 | # html escaping
78 | # url escaping
79 | #
80 | # originally from mw/client/escaping.py
81 | #
82 |
83 |
84 | def escapeAttribute(data):
85 | """
86 | Prepares data to be used as an attribute value. The return value
87 | is a quoted version of data. The resulting string can be used
88 | directly as an attribute value:
89 | >>> print "" % quoteattr("ab ' cd \" ef")
90 |
91 | """
92 | return (saxutils.quoteattr(data))
93 |
94 |
95 | def escapeUrl(data):
96 | """
97 | Replace special characters in string using the "%xx"
98 | escape. Letters, digits, and the characters "/_.-" are never
99 | escaped.
100 | """
101 | return (urllib.quote(data))
102 |
103 |
104 | def escapeMarkup(data):
105 | """
106 | Convert the characters "&", "<" and ">" in data to HTML-safe
107 | sequences.
108 | """
109 | return (cgi.escape(data))
110 |
111 |
112 | ######################################################################
113 |
114 | if __name__ == '__main__':
115 | print quote("\n\r\t\"\\foo\\\"") # result is "\n\r\t\"\\foo\\\"" (duh)
116 | print unquote(
117 | "\"foo\\n\\\"\\\\\""
118 | ) # result is foo"\ -- note that python sees "foo\n\"\\"
119 | print unquote(
120 | "\"foo\\\"\\\"") # should die with an "illegal quoted string" exception
121 |
--------------------------------------------------------------------------------
/emql/adapters/text.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | from itertools import izip, chain
17 |
18 | from mw.emql.adapter import Adapter
19 | from mw.emql.emql import id_guid, formatted_id_guid
20 |
21 |
22 | class text_adapter(Adapter):
23 |
24 | def pre(self, tid, graph, mql, me, control, parent, params, api_keys):
25 |
26 | return {"/common/document/content":
27 | {"optional": True, "blob_id": None, "media_type": None},
28 | "/common/document/source_uri": None,
29 | "guid": None}
30 |
31 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
32 |
33 | params = params.get('query')
34 | results = {}
35 |
36 | for mqlres in args:
37 | guid = mqlres['guid']
38 | content = mqlres["/common/document/content"]
39 | if content is not None:
40 | mediatype = content["media_type"]
41 | if mediatype and mediatype.startswith("/media_type/text"):
42 | blob_id = content["blob_id"]
43 | if blob_id:
44 | chars = me.get_session().fetch_blob(tid, blob_id)
45 | try:
46 | chars = unicode(chars, 'utf-8')
47 | except:
48 | pass
49 |
50 | if params is None:
51 | results[guid] = chars
52 | else:
53 | results[guid] = result = params.copy()
54 | if 'maxlength' in result:
55 | chars = chars[:result['maxlength']]
56 | if 'chars' in result:
57 | result['chars'] = chars
58 | if 'length' in result:
59 | result['length'] = len(chars)
60 |
61 | elif mqlres["/common/document/source_uri"] is not None:
62 | if params is None:
63 | maxlength = None
64 | mode = 'blurb'
65 | else:
66 | maxlength = params.get('maxlength')
67 | mode = params.get('mode', 'blurb')
68 | if mode not in ('blurb', 'raw'):
69 | raise ValueError, "invalid mode: '%s'" %(mode)
70 |
71 | query = '/guid/%s' %(guid[1:])
72 | if maxlength:
73 | query += '?maxlength=%d' %(maxlength)
74 |
75 | url, connection = me.get_session().http_connect('api.freebase.com', '/api/trans/%s' %(mode) + query)
76 | connection.request('GET', url)
77 | response = connection.getresponse()
78 | chars = response.read()
79 |
80 | if params is None:
81 | results[guid] = chars
82 | else:
83 | results[guid] = result = params.copy()
84 | if 'chars' in result:
85 | result['chars'] = chars
86 | if 'length' in result:
87 | result['length'] = len(chars)
88 |
89 | return results
90 |
91 | def help(self, tid, graph, mql, me, control, params):
92 | from docs import text_adapter_help
93 |
94 | return 'text/x-rst;', text_adapter_help
95 |
--------------------------------------------------------------------------------
/test/BUILD:
--------------------------------------------------------------------------------
1 | # Author: bneutra@google.com (Brendan Neutra)
2 | #
3 | # Description: mql query language tests.
4 | #
5 |
6 | package(default_visibility = ["//visibility:public"])
7 |
8 | licenses(["unencumbered"]) # Google acquisition
9 |
10 | exports_files(["LICENSE"])
11 |
12 | py_library(
13 | name = "testing_deps",
14 | testonly = 1,
15 | srcs = [
16 | "mql_fixture.py",
17 | ],
18 | data = [
19 | ":config.cfg",
20 | ] + glob([
21 | "data/*.yaml",
22 | ]),
23 | deps = [
24 | "//base",
25 | "//loadbalancer/gslb/client/public:pywrapgslbchannel",
26 | "//metaweb/graphd/server:graphd_py_pb2",
27 | "//pyglib",
28 | "//testing/pybase",
29 | "//third_party/py/pymql:mql",
30 | "//third_party/py/simplejson:simplejson_fast",
31 | "//third_party/py/yaml",
32 | ],
33 | )
34 |
35 | py_test(
36 | name = "type_link_test",
37 | size = "large",
38 | srcs = [
39 | "type_link_test.py",
40 | ],
41 | deps = [
42 | ":testing_deps",
43 | ],
44 | )
45 |
46 | py_test(
47 | name = "cost_test",
48 | size = "large",
49 | srcs = [
50 | "cost_test.py",
51 | ],
52 | deps = [
53 | ":testing_deps",
54 | ],
55 | )
56 |
57 | py_test(
58 | name = "sort_test",
59 | size = "large",
60 | srcs = [
61 | "sort_test.py",
62 | ],
63 | deps = [
64 | ":testing_deps",
65 | ],
66 | )
67 |
68 | py_test(
69 | name = "basic_mql_test",
70 | size = "large",
71 | srcs = [
72 | "basic_mql_test.py",
73 | ],
74 | deps = [
75 | ":testing_deps",
76 | ],
77 | )
78 |
79 | py_test(
80 | name = "mids_test",
81 | size = "large",
82 | srcs = [
83 | "mids_test.py",
84 | ],
85 | deps = [
86 | ":testing_deps",
87 | ],
88 | )
89 |
90 | py_test(
91 | name = "best_hrid_test",
92 | size = "medium",
93 | srcs = [
94 | "best_hrid_test.py",
95 | ],
96 | deps = [
97 | ":testing_deps",
98 | ],
99 | )
100 |
101 | py_test(
102 | name = "mql_fixture_test",
103 | size = "large",
104 | srcs = [
105 | "mql_fixture_test.py",
106 | ],
107 | deps = [
108 | ":testing_deps",
109 | ],
110 | )
111 |
112 | py_test(
113 | name = "regression_id_test",
114 | size = "large",
115 | srcs = [
116 | "regression_id_test.py",
117 | ],
118 | deps = [
119 | ":testing_deps",
120 | ],
121 | )
122 |
123 | py_test(
124 | name = "regression_misc_test",
125 | size = "large",
126 | srcs = [
127 | "regression_misc_test.py",
128 | ],
129 | deps = [
130 | ":testing_deps",
131 | ],
132 | )
133 |
134 | py_test(
135 | name = "mql_manual_test",
136 | size = "large",
137 | srcs = [
138 | "mql_manual_test.py",
139 | ],
140 | deps = [
141 | ":testing_deps",
142 | ],
143 | )
144 |
145 | py_test(
146 | name = "mql_manual_two_test",
147 | size = "large",
148 | srcs = [
149 | "mql_manual_two_test.py",
150 | ],
151 | deps = [
152 | ":testing_deps",
153 | ],
154 | )
155 |
156 | py_test(
157 | name = "mql_manual_write_test",
158 | size = "large",
159 | srcs = [
160 | "mql_manual_write_test.py",
161 | ],
162 | deps = [
163 | ":testing_deps",
164 | ],
165 | )
166 |
167 | py_test(
168 | name = "return_test",
169 | size = "large",
170 | srcs = [
171 | "return_test.py",
172 | ],
173 | deps = [
174 | ":testing_deps",
175 | ],
176 | )
177 |
178 | py_test(
179 | name = "mql_exceptions_test",
180 | size = "large",
181 | srcs = [
182 | "mql_exceptions_test.py",
183 | ],
184 | deps = [
185 | ":testing_deps",
186 | ],
187 | )
188 |
189 | py_test(
190 | name = "query_sort_test",
191 | size = "small",
192 | srcs = [
193 | "query_sort_test.py",
194 | ],
195 | deps = [
196 | ":testing_deps",
197 | ],
198 | )
199 |
--------------------------------------------------------------------------------
/bootstrap/bootstrap.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """bootstrap -- dump and load a bootstrap from an existing graph."""
3 |
4 | import json
5 | import re
6 | import sys
7 |
8 | from absl import app
9 | from absl import flags
10 | from absl import logging
11 |
12 | from pymql import MQLService
13 |
14 | from pymql.mql import graph
15 | from pymql.mql import lojson
16 |
17 | FLAGS = flags.FLAGS
18 | flags.DEFINE_string(
19 | 'mqlenv', None, 'a dict in the form of a string which '
20 | 'contains valid mql env key/val pairs')
21 | flags.DEFINE_string('graphd_addr', 'localhost:9100',
22 | 'host:port of graphd server')
23 | flags.DEFINE_string('load', '', 'load bootstrap from given file')
24 |
25 |
26 | class BootstrapError(Exception):
27 | pass
28 |
29 |
30 | class Bootstrap(object):
31 | version = 1
32 |
33 | def __init__(self, gc):
34 | self.gc = gc
35 |
36 | def load_from_file(self, filename):
37 | loadfile = open(filename, 'r')
38 | data = ''.join(loadfile.readlines())
39 | regex = re.compile('[\n\t]+')
40 | data = regex.sub(' ', data)
41 | loadfile.close()
42 |
43 | d = json.loads(data)
44 | if d['0_version'] != self.version:
45 | raise BootstrapError('version mismatch')
46 |
47 | self.bootstrap = d['1_bootstrap']
48 | self.nodes = d['2_nodes']
49 | self.links = d['3_links']
50 |
51 | def mkprim(self, **kwds):
52 | if 'scope' not in kwds and self.root_user:
53 | kwds['scope'] = self.root_user
54 | params = ' '.join(['%s=%s' % (k, v) for (k, v) in kwds.items()])
55 | result = self.gc.write_varenv('(%s)' % params, {})
56 | return result[0]
57 |
58 | def load_bootstrap(self):
59 | self.xlate = {}
60 | self.xlate_link = {}
61 |
62 | if len(self.gc.read_varenv('(pagesize=1 result=(guid))', {})):
63 | logging.fatal("Can't bootstrap a non-empty graph")
64 |
65 | self.root_user = None # avoid forward ref in mkprim
66 | self.root_user = self.mkprim(name='"ROOT_USER"')
67 | self.root_namespace = self.mkprim(name='"ROOT_NAMESPACE"')
68 | self.has_key = self.mkprim(name='"HAS_KEY"')
69 |
70 | self.xlate[self.bootstrap['ROOT_USER']] = self.root_user
71 | self.xlate[self.bootstrap['ROOT_NAMESPACE']] = self.root_namespace
72 | self.xlate[self.bootstrap['HAS_KEY']] = self.has_key
73 |
74 | def load_root_user(self):
75 | # we dumped them separately, but we want to load them together...
76 | node_pos = 0
77 | link_pos = 0
78 | while node_pos < len(self.nodes) or link_pos < len(self.links):
79 | if link_pos >= len(
80 | self.links) or (node_pos < len(self.nodes) and
81 | self.nodes[node_pos] < self.links[link_pos]['guid']):
82 | # we will do the next node
83 | node = self.nodes[node_pos]
84 | self.write_node(node)
85 | node_pos += 1
86 | else:
87 | link = self.links[link_pos]
88 | self.write_link(link)
89 | link_pos += 1
90 |
91 | def write_node(self, node):
92 | if node not in self.xlate:
93 | self.xlate[node] = self.mkprim()
94 |
95 | def write_link(self, link):
96 | new_link = {'datatype': link['datatype'], 'value': link['value']}
97 | for ptr in ('left', 'right', 'scope', 'typeguid'):
98 | # translate the link
99 | if ptr in link:
100 | if link[ptr] == 'null':
101 | new_link[ptr] = 'null'
102 | elif link[ptr] not in self.xlate:
103 | raise BootstrapError('Saw dangling link %s' % repr(link))
104 | else:
105 | new_link[ptr] = self.xlate[link[ptr]]
106 | new_link['guid'] = self.mkprim(**new_link)
107 |
108 | self.xlate_link[link['guid']] = new_link
109 |
110 |
111 | def main(argv):
112 | if not FLAGS.graphd_addr:
113 | raise Exception('Must specify a --graphd_addr')
114 |
115 | conn = graph.TcpGraphConnector(addrs=[('localhost', 8100)])
116 |
117 | bootstrap = Bootstrap(conn)
118 | bootstrap.load_from_file(FLAGS.load)
119 | bootstrap.load_bootstrap()
120 | bootstrap.load_root_user()
121 |
122 |
123 | if __name__ == '__main__':
124 | app.run(main)
125 |
--------------------------------------------------------------------------------
/util/keyquote.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import string
16 | from pymql.mql import error
17 |
18 | def quotekey(ustr):
19 | """
20 | quote a unicode string to turn it into a valid namespace key
21 |
22 | """
23 | valid_always = string.ascii_letters + string.digits + '_'
24 | valid_interior_only = valid_always + '-'
25 |
26 | if isinstance(ustr, str):
27 | s = unicode(ustr,'utf-8')
28 | elif isinstance(ustr, unicode):
29 | s = ustr
30 | else:
31 | raise ValueError, 'quotekey() expects utf-8 string or unicode'
32 |
33 | if len(s) == 0:
34 | return str(s)
35 |
36 | output = []
37 | if s[0] in valid_always:
38 | output.append(s[0])
39 | else:
40 | output.append('$%04X' % ord(s[0]))
41 |
42 | for c in s[1:-1]:
43 | if c in valid_interior_only:
44 | output.append(c)
45 | else:
46 | output.append('$%04X' % ord(c))
47 |
48 | if len(s) > 1:
49 | if s[-1] in valid_always:
50 | output.append(s[-1])
51 | else:
52 | output.append('$%04X' % ord(s[-1]))
53 |
54 | return str(''.join(output))
55 |
56 |
57 | def unquotekey(key, encoding=None):
58 | """
59 | unquote a namespace key and turn it into a unicode string
60 | """
61 |
62 | valid_always = string.ascii_letters + string.digits + "_"
63 |
64 | output = []
65 | i = 0
66 | while i < len(key):
67 | if key[i] in valid_always:
68 | output.append(key[i])
69 | i += 1
70 | elif key[i] in '_-' and i != 0 and i != len(key):
71 | output.append(key[i])
72 | i += 1
73 | elif key[i] == '$' and i+4 < len(key):
74 | # may raise ValueError if there are invalid characters
75 | output.append(unichr(int(key[i+1:i+5],16)))
76 | i += 5
77 | else:
78 | msg = "key %s has invalid character %s at position %d" % (
79 | key,
80 | key[i],
81 | i
82 | )
83 | raise error.MQLInternalError(None, msg)
84 |
85 | ustr = u''.join(output)
86 |
87 | if encoding is None:
88 | return ustr
89 |
90 | return ustr.encode(encoding)
91 |
92 |
93 | def unquote_id(id):
94 | """
95 | Turn an id into a user-readable string, for instance turning
96 | /media_type/application/rss$002Bxml into
97 | /media_type/application/rss+xml
98 | """
99 |
100 | if '/' not in id:
101 | return unquotekey(id)
102 |
103 | return '/'.join(unquotekey(k) for k in id.split('/'))
104 |
105 | def id_to_urlid(id):
106 | """
107 | convert a mql id to an id suitable for embedding in a url path.
108 | """
109 |
110 | # XXX shouldn't be in metaweb.api!
111 | from mw.formats.http import urlencode_pathseg
112 |
113 | segs = id.split('/')
114 |
115 | assert isinstance(id, str) and id != '', 'bad id "%s"' % id
116 |
117 | if id[0] == '~':
118 | assert len(segs) == 1
119 | # assume valid, should check
120 | return id
121 |
122 | if id[0] == '#':
123 | assert len(segs) == 1
124 | # assume valid, should check
125 | return '%23' + id[1:]
126 |
127 | if id[0] != '/':
128 | raise ValueError, 'unknown id format %s' % id
129 |
130 | # ok, we have a slash-path
131 | # requote components as keys and rejoin.
132 | # urlids do not have leading slashes!!!
133 | return '/'.join(urlencode_pathseg(unquotekey(seg)) for seg in segs[1:])
134 |
135 |
--------------------------------------------------------------------------------
/util/mwdatetime.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import datetime
16 | import re
17 |
18 |
19 | # a datetime is a non-empty string containing one of
20 | # yyyy, yyyy-mm, yyyy-mm-dd,
21 | # Thh, Thh:mm, Thh:mm:ss Thh:mm:ss.dddd
22 | # or yyyy-mm-dd followed by one of the T constructs.
23 | # Note that this is more lenient than valid_timestamp in lojson - it matches the @timestamp clause only,
24 | # not our extended ISO 8601 syntax
25 |
26 |
27 | # Python datetime classes support only a year range between MINYEAR (1) and MAXYEAR(9999)
28 | # we want to support anything from -9999 (== 10000BC) to 9999 (== 9999AD)
29 | # and possibly support more in the future.
30 |
31 | # and some other useful methods:
32 | __datetime_re = re.compile(r'^(?:(?:(-?\d{4})(?:-(\d\d)(?:-(\d\d))?)?)|(?:(-?\d{4})-(\d\d)-(\d\d)T)?(\d\d)(?:\:(\d\d)(?:\:(\d\d)(?:\.(\d{1,6}))?)?)?(Z|[-+](?:0\d|1[0-4])\:(00|15|30|45))?)$')
33 |
34 | # returns the graph format datetime (like ISO except for a leading T on times)
35 | def coerce_datetime(dt):
36 | try:
37 | if dt == '__now__':
38 | return datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
39 | if dt == '__today__':
40 | return datetime.datetime.utcnow().strftime("%Y-%m-%d")
41 |
42 | match = __datetime_re.match(dt)
43 | if not match:
44 | return None
45 | elif match.group(1):
46 | if check_date(*match.group(1,2,3)):
47 | return dt
48 | else:
49 | return None
50 | elif match.group(4):
51 | # a date/time
52 | # we don't do subseconds as python thinks that '2' is "2 microseconds" not "2 deciseconds".
53 | if not check_date(*match.group(4,5,6)):
54 | return None
55 | if not check_time(*match.group(7,8,9)):
56 | return None
57 |
58 | return dt
59 |
60 | elif match.group(7):
61 | if not check_time(*match.group(7,8,9)):
62 | return None
63 |
64 | return 'T' + dt
65 | else:
66 | # no idea what the problem is, but it is invalid
67 | return None
68 |
69 | except TypeError:
70 | return None
71 | except ValueError:
72 | return None
73 |
74 | def check_date(year,month,day):
75 | # returns true or false depending on whether the day is valid
76 | # handles strings and nulls
77 | fakeyear = int(year)
78 | if int(fakeyear) > 9999 or int(fakeyear) < -9999:
79 | return False
80 |
81 | if month is None:
82 | return True
83 | elif int(month) < 1 or int(month) > 12:
84 | return False
85 | elif day is None:
86 | return True
87 | else:
88 | while fakeyear <= 0:
89 | fakeyear += 8000
90 |
91 | try:
92 | datetime.date(fakeyear,int(month),int(day))
93 | return True
94 | except ValueError:
95 | return False
96 |
97 | def check_time(hour,minute,second):
98 | if hour is None:
99 | return False
100 | elif int(hour) < 0 or int(hour) > 23:
101 | return False
102 | elif minute is None:
103 | return True
104 | elif int(minute) < 0 or int(minute) > 59:
105 | return False
106 | elif second is None:
107 | return True
108 | elif int(second) < 0 or int(second) > 59:
109 | return False
110 | else:
111 | return True
112 |
113 |
114 | def uncoerce_datetime(graphdt):
115 | if graphdt[0] == 'T':
116 | return graphdt[1:]
117 | else:
118 | return graphdt
119 |
--------------------------------------------------------------------------------
/emql/adapters/test/test_weblink.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from mw.tests.helpers import TestFixture
16 | from mw.emql import emql
17 | null = None
18 | true = True
19 | false = False
20 | WEBLINK = "/common/topic/weblink"
21 | class TestWeblinks_adapter(TestFixture):
22 |
23 | def setUp(self):
24 | super(TestWeblinks_adapter, self).setUp()
25 | self.cache = emql.emql_cache()
26 |
27 |
28 | def run_query(self, q):
29 | debug, cursors, results = self.mss.emqlread(None, q, {'debug': True, 'cache': False},
30 | cache=self.cache)
31 | return results
32 |
33 |
34 | def test_bob_dylan(self):
35 |
36 | r = self.run_query({
37 | "id":"/en/bob_dylan",
38 | WEBLINK:[]
39 | })
40 |
41 | weblinks = r[WEBLINK]
42 | self.assert_(weblinks, "Basic sanity test - make sure there are some weblinks returning which indiciate that at least emql is working and that the weblinks adapter is returning results.")
43 |
44 | #XXXXXX UNCOMMENT AFTER https://bugs.freebase.com/browse/DA-1093 ######
45 |
46 | #self.assert_("http://www.bobdylan.com/" in weblinks, "Test a key hanging off of a resource")
47 |
48 | self.assert_("http://en.wikipedia.org/wiki/Bob_Dylan" in weblinks, "Test a key hanging off a topic")
49 |
50 |
51 | def test_list_shape(self):
52 | """
53 | Let's test to make sure weblink works with just a [] shape, in which case it should
54 | just return a list of strings
55 | """
56 |
57 | r = self.run_query({
58 | "id":"/en/migraine",
59 | "/common/topic/weblink":[]
60 | })
61 |
62 | weblinks = r[WEBLINK]
63 | self.assert_(len(weblinks), "there should be some weblinks in here!")
64 |
65 | for w in weblinks:
66 | self.assert_(isinstance(w, str))
67 |
68 | def test_topic_with_all_types_of_weblinks(self):
69 | """
70 | This particular topic has a weblinks generated from keys in all three
71 | places - off the topic, off the annotation, off the resource
72 | """
73 | q = {
74 | "id": "/en/royal_mail",
75 | WEBLINK: [{
76 | "url":null,
77 | "template":{
78 | "id":null,
79 | "template":null,
80 | "ns":null
81 | },
82 | "category":{
83 | "id":null,
84 | "name":null,
85 | "optional":true
86 | },
87 | "key":null
88 | }]
89 | }
90 | r = self.run_query(q)
91 |
92 | weblink_dict = {}
93 | for w in r[WEBLINK]:
94 | weblink_dict[w['url']] = w
95 |
96 | official_link = weblink_dict.get("http://www.royalmailgroup.com/")
97 | self.assert_(official_link, "The official link for royal mail is present. Key Hangs off resource.")
98 | self.assert_(official_link['category']['name'] == "Official Website", "Official Website category is....Official Website")
99 |
100 |
101 | guardian_link = weblink_dict.get("http://www.guardian.co.uk//uk/post")
102 | self.assert_(guardian_link, "Guardian link is present. Key hangs off annotation.")
103 | self.assert_(guardian_link['category']['name'] == "Tag", "Category is Tag")
104 |
105 | wiki_link = weblink_dict.get("http://en.wikipedia.org/wiki/index.html?curid=349823")
106 | self.assert_(wiki_link, "Wiki link is present. Key hangs off topic itself.")
107 |
108 |
109 |
--------------------------------------------------------------------------------
/mql/mid.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # mid.py - machine ids.
17 |
18 | from cStringIO import StringIO
19 | import sys
20 |
21 | ################################################################################
22 | ## version 1 constants
23 | VERSION = 1L
24 | MAX_BITS = 40
25 | VERSION_BITS = 2
26 | OBJID_BITS = 34
27 | GRAPHID_BITS = MAX_BITS - VERSION_BITS - OBJID_BITS
28 | GRAPHID0 = 0x9202a8c04000641f
29 | GUID_BASE = 0x8000000000000000L
30 |
31 | VERSION_MASK = (1L << VERSION_BITS) - 1L
32 | MAX_GRAPHS = 1L << GRAPHID_BITS
33 | GRAPHID_MASK = MAX_GRAPHS - 1L
34 | OBJID_MASK = (1L << OBJID_BITS) - 1L
35 |
36 | VERSION_LEFT = (VERSION - 1) << 38L
37 | VERSION_RIGHT = (VERSION - 1) << 3L
38 |
39 | ################################################################################
40 | ## Exceptions
41 |
42 |
43 | class InvalidMunch(Exception):
44 | pass
45 |
46 |
47 | class InvalidGraphID(Exception):
48 | pass
49 |
50 |
51 | class UnknownGraphID(Exception):
52 | pass
53 |
54 |
55 | class InvalidMIDVersion(Exception):
56 | pass
57 |
58 |
59 | class InvalidMID(Exception):
60 | pass
61 |
62 |
63 | class InvalidObjID(Exception):
64 | pass
65 |
66 |
67 | munch_map = [-1] * 256
68 | for i, c in enumerate("0123456789bcdfghjklmnpqrstvwxyz_"):
69 | munch_map[ord(c)] = long(i)
70 |
71 |
72 | ## a Munch (copyright W. Harris, 2010) is 5 bits.
73 | def char_of_munch(c):
74 | if not 0 <= c <= 31:
75 | raise InvalidMunch(c)
76 | return "0123456789bcdfghjklmnpqrstvwxyz_"[c]
77 |
78 |
79 | def munch_of_char(c):
80 | value = munch_map[ord(c)]
81 | if value == -1:
82 | raise InvalidMunch(c)
83 | return value
84 |
85 |
86 | def munchstr_of_int(n):
87 | buf = [""] * 16 #....
88 |
89 | def loop(i, n):
90 | if n == 0:
91 | return "".join(buf[16 - i:])
92 | buf[15 - i] = char_of_munch(n & 0x1f)
93 | return loop(i + 1, n >> 5)
94 |
95 | return loop(0, n)
96 |
97 |
98 | def int_of_munchstr(str, ofs, l):
99 | rv = 0
100 | i = ofs
101 | while i <= (ofs + l) - 1:
102 | v = munch_of_char(str[i])
103 | rv = rv << 5 | v
104 | i += 1
105 |
106 | return rv
107 |
108 |
109 | def graphid_of_guid(guid):
110 | graphid = long(guid[:16], 16)
111 | ms_crap = long(guid[16:24], 16) & 0xfffffffc
112 | n = graphid - GRAPHID0
113 | if 0 <= n < MAX_GRAPHS and ms_crap == 0x80000000:
114 | return n
115 | else:
116 | raise UnknownGraphID(n)
117 |
118 |
119 | def objid_of_guid(guid):
120 | return long(guid[23:32], 16) & OBJID_MASK
121 |
122 |
123 | def of_guid(guid):
124 | graphid = graphid_of_guid(guid)
125 | objid = objid_of_guid(guid)
126 | n = VERSION_LEFT | graphid << 34 | objid
127 | version_munch = VERSION_RIGHT << 3 | graphid
128 | version_str = char_of_munch(version_munch)
129 | return "".join(("/m/", version_str, munchstr_of_int(n)))
130 |
131 |
132 | def to_guid(mid):
133 | len_mid = len(mid)
134 | if not (4 <= len_mid <= 11 or mid.startswith("/m")):
135 | raise InvalidMID(mid)
136 |
137 | version_munch = munch_of_char(mid[3])
138 | ver = (version_munch << 3) + 1
139 | if ver != VERSION:
140 | raise InvalidMIDVersion(mid)
141 |
142 | graphid = GRAPHID0 | version_munch & GRAPHID_MASK
143 | graphid = graphid << 64
144 | objid = GUID_BASE | int_of_munchstr(mid, 4L, len_mid - 4)
145 | guid = graphid | objid
146 | return hex(guid)[2:-1] # chop off 0x and L
147 |
148 |
149 | if __name__ == "__main__":
150 | #o_guid = "9202a8c04000641f800000000172fcb8"
151 | #o_guid = "9202a8c04000641f800000000164382e"
152 | #o_guid = "9202a8c04000641f800000000172fcb8"
153 | o_guid = "9202a8c04000641f80000000013e068e"
154 |
155 | if len(sys.argv) < 2:
156 | print "usage: mid.py "
157 | sys.exit(1)
158 |
159 | mid = sys.argv[1]
160 | print to_guid(mid)
161 | #mid = of_guid(o_guid)
162 | #print mid
163 | #n_guid = to_guid(mid)
164 | #print n_guid
165 |
--------------------------------------------------------------------------------
/emql/adapters/nytimes.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import mw, urllib, urlparse
16 | from datetime import datetime
17 | from collections import defaultdict
18 | from mw.emql.adapter import Adapter
19 | from lxml import etree
20 |
21 | class nytimes_articles_adapter(Adapter):
22 |
23 | def pre(self, tid, graph, mql, me, control, parent, params, api_keys):
24 | return {
25 | "key": [{
26 | "optional": True,
27 | "value": None,
28 | "namespace" : "/user/jamie/nytdataid",
29 | "limit": 10
30 | }]
31 | }
32 |
33 | def get_articles(self, me, nytd_key, api_keys):
34 | url, connection = me.get_session().http_connect('data.nytimes.com', "/%s.rdf" % nytd_key)
35 | connection.request('GET', url)
36 | response = connection.getresponse()
37 | rdf = response.read()
38 | rdf = etree.fromstring(rdf)
39 |
40 | # Grab the search api call
41 | search_url = rdf.xpath("//nyt:search_api_query", namespaces=rdf.nsmap)
42 | if not search_url:
43 | return []
44 |
45 | search_url = urlparse.urlparse(search_url[0].text)
46 | params = urlparse.parse_qs(search_url.query)
47 | params['api-key'] = api_keys['nytimes_articles']
48 | params['fields'] = ','.join([
49 | 'date',
50 | 'url',
51 | 'nytd_lead_paragraph',
52 | 'nytd_title',
53 | 'byline',
54 | 'nytd_byline',
55 | 'small_image_url',
56 | 'small_image_height',
57 | 'small_image_width',
58 | 'source_facet'
59 | ])
60 |
61 | # build the actual query
62 | url, connection = me.get_session().http_connect(search_url.hostname, search_url.path)
63 | qs = urllib.urlencode(params, doseq=True)
64 | connection.request('GET', "%s?%s" % (url, qs))
65 |
66 | response = connection.getresponse()
67 | json = mw.json.loads(response.read())
68 |
69 | json = [{
70 | 'headline': j['nytd_title'],
71 | 'text': j['nytd_lead_paragraph'],
72 | 'byline': j.get('nytd_byline', j.get('byline', None)),
73 | 'source': j.get('source_facet', None),
74 | 'date': datetime.strptime(j['date'], '%Y%m%d').isoformat(),
75 | 'img': ({'url': j['small_image_url'],
76 | 'height': j.get('small_image_height') or None,
77 | 'width': j.get('small_image_widget') or None}
78 | if j.get('small_image_url')
79 | else None),
80 | 'url': j['url']
81 | } for j in json['results']]
82 |
83 | return json
84 |
85 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
86 | result = defaultdict(list)
87 | query = params['query'] or {}
88 |
89 | if isinstance(query, list):
90 | query = query[0]
91 |
92 | limit = query.get('limit', 5)
93 |
94 | if not (api_keys and api_keys.get('nytimes_articles')):
95 | raise Exception('This property requires a New York Times API key. '
96 | 'Get one here: http://developer.nytimes.com/apps/register')
97 |
98 | for mqlres in args:
99 | if not mqlres['key']:
100 | continue
101 |
102 | for key in mqlres['key']:
103 | articles = self.get_articles(me, key['value'], api_keys)
104 | result[mqlres['guid']].extend(articles)
105 |
106 | return dict((k, v[:limit]) for k,v in result.iteritems())
107 |
108 | def help(self, tid, graph, mql, me, control, params):
109 | from docs import nytimes_adapter_help
110 |
111 | return 'text/x-rst;', nytimes_adapter_help
112 |
113 |
114 |
--------------------------------------------------------------------------------
/util/parsedt.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | # imported from Client
17 |
18 | # given a graphd datetime string (iso6801 format)
19 | # parse it and format it
20 |
21 | import re, datetime
22 |
23 |
24 | ISO8601_TIME_PATTERN = r"(?P[0-9]{2})(:(?P[0-9]{2}))?(:(?P[0-9]{2})(.(?P[0-9]+))?)?"
25 |
26 | ISO8601_TIME_REGEX = re.compile(ISO8601_TIME_PATTERN)
27 |
28 | ISO8601_REGEX = \
29 | re.compile(r"(?P-)?(?P[0-9]{4})(-(?P[0-9]{1,2})(-(?P[0-9]{1,2})"
30 | r"((?P.)" + ISO8601_TIME_PATTERN +
31 | r"(?PZ|(([-+])([0-9]{2}):([0-9]{2})))?)?)?)?")
32 |
33 | LABELS = ('year', 'month', 'day', 'hour', 'minute', 'second')
34 |
35 | # This essentially maps the number of date components to a format,
36 | # Especially annoying: these can't be unicode, strftime doesn't like that
37 | FORMATS = [
38 | "%Y", # year only
39 | "%b %Y", # year, month
40 | "%b %e, %Y", # year, month, day
41 | "%b %e, %Y %l%p", # year, month, day, hour
42 | "%b %e, %Y %l:%M%p", # year, month, day, hour, minute
43 | "%b %e, %Y %l:%M:%S%p", # year, month, day, hour, minute, second
44 | ]
45 | BC_FORMATS = [format.replace("%Y", "%Y B.C.E.") for format in FORMATS]
46 | CE_FORMATS = [format.replace("%Y", "%Y C.E.") for format in FORMATS]
47 |
48 |
49 | def parse_isodate(iso_date):
50 | """
51 | Given an iso8601-formatted string (or fraction thereof) return a
52 | tuple containing a python datetime object and a format string that
53 | should be used to display it. The format is passible to strftime()
54 | and should be locale-sensitive about ordering (though today it is
55 | not)
56 | """
57 |
58 | m = ISO8601_REGEX.match(iso_date)
59 | if not m:
60 | m = ISO8601_TIME_REGEX.match(iso_date)
61 | if not m: # bad data in the graph
62 | return None, None
63 | time_only = True
64 | else:
65 | time_only = False
66 |
67 | values = m.groupdict()
68 |
69 | args = []
70 | if time_only:
71 | today = datetime.date.today()
72 | args = [today.year, today.month, today.day]
73 | start = 3
74 | else:
75 | start = 0
76 |
77 | count = start
78 | for k in xrange(start, 6):
79 | value = values[LABELS[k]]
80 | if value is None:
81 | args.append(1)
82 | else:
83 | count += 1
84 | args.append(int(value))
85 |
86 | try:
87 | d = datetime.datetime(*args)
88 | except ValueError:
89 | return None, None
90 |
91 | if values.get('bc'):
92 | format = BC_FORMATS[count - 1]
93 | elif 0 <= d.year < 1000:
94 | format = CE_FORMATS[count - 1]
95 | else:
96 | format = FORMATS[count - 1]
97 | if time_only:
98 | format = format[10:]
99 |
100 | if iso_date.endswith('Z'):
101 | format += ' UTC'
102 |
103 | return d, format
104 |
105 |
106 | def format_isodate(iso_date):
107 | """
108 | Given an iso8601 formatted string (or fraction thereof) return
109 | a timezone-independent display of the string.
110 | """
111 |
112 | d, format = parse_isodate(iso_date)
113 | if d is None:
114 | return None
115 |
116 | if d.year >= 1900:
117 | result = d.strftime(format)
118 | else:
119 | # make sure to pick something that is a leapyear, so that
120 | # 29-Feb is available! Note that 1900 is NOT a leapyear
121 | d_1904 = d.replace(year=1904)
122 | result = d_1904.strftime(format).replace("1904", str(d.year))
123 |
124 | if format.endswith("%p"):
125 | result = result[:-2] + result[-2:].lower()
126 |
127 | return result.replace(" ", " ").lstrip()
128 |
129 |
130 | if __name__ == "__main__":
131 | import sys
132 | print format_isodate(sys.argv[1])
133 |
--------------------------------------------------------------------------------
/test/cost_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2.6
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # -*- coding: utf-8 -*-
17 | #
18 | """mql cost tests."""
19 |
20 | __author__ = 'bneutra@google.com (Brendan Neutra)'
21 |
22 | import google3
23 | from pymql.mql import error
24 | from pymql.test import mql_fixture
25 |
26 | # stuff we care about
27 | FLOAT_COSTS = ['mql_stime',
28 | 'mql_stime',
29 | 'mql_utime',
30 | 'mql_rtime',
31 | 'mql_dbtime'
32 | ]
33 | INT_COSTS = ['pf',
34 | 'mql_dbtries',
35 | 'tu',
36 | 'ts',
37 | 'te'
38 | ]
39 |
40 | # important note: in mock replay mode, stored graph response costs
41 | # are tallied. But mql_[x]time will be calculated in realtime
42 | # so those costs will be quite different than when the mock was
43 | # recorded (they will be smaller, kinda the point of mocking)
44 |
45 | class MQLTest(mql_fixture.MQLTest):
46 | """mql cost tests."""
47 |
48 | def setUp(self):
49 | self.SetMockPath('data/cost.yaml')
50 | super(MQLTest, self).setUp()
51 | self.env = {'as_of_time': '2010-05-01'}
52 |
53 | def testCost(self):
54 | """simple positive test."""
55 |
56 | query = """
57 | {
58 | "/people/person/place_of_birth": null,
59 | "id": "/en/bob_dylan"
60 | }
61 | """
62 | exp_response = """
63 | {
64 | "/people/person/place_of_birth": "Duluth",
65 | "id": "/en/bob_dylan"
66 | }
67 | """
68 | self.DoQuery(query, exp_response=exp_response)
69 | cost = self.mql_result.cost
70 | self.costs_exist(cost)
71 | self.assertGreater(cost['te'], 10, 'te cost should be something')
72 | self.assertEqual(cost['mql_dbreqs'], 4, 'four graphd requests')
73 |
74 | def testCostError(self):
75 | """a query that gets a GQL error."""
76 |
77 | query = """
78 | {
79 | "guid": "foobar"
80 | }
81 | """
82 | exc_response = (
83 | error.MQLParseError,
84 | 'Can only use a hexadecimal guid here'
85 | )
86 | self.DoQuery(query, exc_response=exc_response)
87 | cost = self.mql_service.get_cost()
88 | self.costs_exist(cost)
89 | self.assertEqual(cost['mql_dbreqs'], 1, 'only one graphd request')
90 |
91 | def testCostComplex(self):
92 | """query that does a lot of GQL."""
93 |
94 | query = """
95 | [{
96 | "/people/person/date_of_birth" : [],
97 | "/music/artist/album" : [],
98 | "/film/actor/film" : [],
99 | "/film/director/film" : [],
100 | "/film/producer/film" : [],
101 | "/tv/tv_actor/starring_roles" : [],
102 | "/tv/tv_producer/programs_produced" : [],
103 | "type": "/music/artist",
104 | "b:type": "/film/actor",
105 | "c:type": "/film/director",
106 | "d:type": "/film/producer",
107 | "e:type": "/tv/tv_actor",
108 | "f:type": "/tv/tv_producer",
109 | "id": null
110 | }]
111 | """
112 | self.DoQuery(query)
113 | cost = self.mql_result.cost
114 | self.costs_exist(cost)
115 | self.assertEqual(cost['mql_dbreqs'], 12, '12 graphd requests')
116 | self.assertGreater(cost['tu'], 100, 'tu cost should be something')
117 |
118 |
119 | def testQueryTimeout(self):
120 |
121 | self.env['query_timeout_tu'] = 50
122 | query = """
123 | [{
124 | "type": "/people/person",
125 | "date_of_birth": null,
126 | "sort": "date_of_birth"
127 | }]
128 | """
129 | exc_response = (
130 | error.MQLTimeoutError,
131 | 'Query too difficult.'
132 | )
133 | self.DoQuery(query, exc_response=exc_response)
134 | cost = self.mql_service.get_cost()
135 | self.costs_exist(cost)
136 |
137 | def testQueryTimeoutFloat(self):
138 |
139 | # float is allowed
140 | self.env['query_timeout_tu'] = 50.1
141 | query = """
142 | [{
143 | "type": "/people/person",
144 | "date_of_birth": null,
145 | "sort": "date_of_birth"
146 | }]
147 | """
148 | exc_response = (
149 | error.MQLTimeoutError,
150 | 'Query too difficult.'
151 | )
152 | self.DoQuery(query, exc_response=exc_response)
153 | cost = self.mql_service.get_cost()
154 | self.costs_exist(cost)
155 |
156 | def costs_exist(self, cost):
157 | for c in FLOAT_COSTS:
158 | self.assertIsInstance(cost[c], float, 'cost %s exists' % c)
159 | for c in INT_COSTS:
160 | self.assertIsInstance(cost[c], int, 'cost %s exists' % c)
161 |
162 | if __name__ == '__main__':
163 | mql_fixture.main()
164 |
--------------------------------------------------------------------------------
/formats/http.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | #
17 | # utilities for dealing with http
18 | #
19 | # url escaping
20 | # content-type parsing and graph lookup
21 | #
22 | # originally from mw/client/escaping.py
23 | # duplicated in mw/mql/grquoting.py
24 | #
25 |
26 |
27 | import urllib
28 |
29 | # Table mapping response codes to messages; entries have the
30 | # form {code: (shortmessage, longmessage)}.
31 | # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
32 | from BaseHTTPServer import BaseHTTPRequestHandler
33 | http_status_codes = BaseHTTPRequestHandler.responses
34 |
35 |
36 | # some useful uri splitting code in the "urischemes" thirdparty module.
37 | #
38 | # later i found that the most complete uri manipulation module
39 | # seems to be in 4Suite:
40 | #
41 | # from Ft.Lib import Uri, Iri
42 |
43 |
44 | #
45 | #
46 | # ALLOW:
47 | #
48 | # '~' is in the unreserved set, so they should be available like "_.-"
49 | # ':' is in pchar
50 | # '@' is in pchar (though naive text parsers may think it's an email address)
51 | #
52 | # "$" is a valid sub-delim
53 | # "!" is a valid sub-delim
54 | # "*" is a valid sub-delim
55 | # "," is a valid sub-delim
56 | # ";" is a valid sub-delim
57 | #
58 | # GENERALLY DISALLOW:
59 | #
60 | # "&" is in sub-delims but has special meaning to form parsers
61 | # "=" is in sub-delims but excluded due to avoid any possible confusion
62 | # "+" is in sub-delims but excluded due to avoid any possible confusion
63 | # with form-encoded queries
64 |
65 | # ALWAYS DISALLOW
66 | #
67 | # "'" is in sub-delims but likely to confuse
68 | # "(" is in sub-delims but definitely confuses email text parsers
69 | # ")" is in sub-delims but definitely confuses email text parsers
70 |
71 | # [A-Za-z0-9] and "_.-" are always safe in urllib.quote
72 | # additionally, we allow:
73 | our_safe = "~:@$!*,;"
74 |
75 | # this handles unicode
76 | def base_urlencode(data, safe):
77 | if isinstance(data, unicode):
78 | data = data.encode('utf_8')
79 | return urllib.quote(data, safe)
80 |
81 |
82 | def urlencode(data):
83 | '''
84 | default url-encoder - please shift to one of the more
85 | specific versions, depending on whether you're quoting
86 | a path segment or a query arg.
87 | '''
88 | # "_.-" are always untouched
89 | return base_urlencode(data, ',')
90 |
91 |
92 |
93 | # within path segments (between slashes) we don't need
94 | # to follow the same rules as for forms parsing.
95 | #
96 | # "=" is only special to form parsers
97 | # "&" is only special to form parsers
98 | # "+" is only special to form parsers
99 | def urlencode_pathseg(data):
100 | '''
101 | urlencode for placement between slashes in an url.
102 | '''
103 | return base_urlencode(data, our_safe + "=&+")
104 |
105 |
106 | # "/" is allowed in query but reserved in path segments
107 | # "?" is allowed in query but reserved in path segments
108 | def urlencode_querykey(data):
109 | '''
110 | encode for placement before '=' in a query argument
111 |
112 | this allows '/?'
113 | '''
114 | return base_urlencode(data, our_safe + '/?')
115 |
116 |
117 | # "/" is allowed in query but reserved in path segments
118 | # "?" is allowed in query but reserved in path segments
119 | # "=" should be allowed by form parsers after the key=
120 | def urlencode_queryvalue(data):
121 | '''
122 | encode for placement after '=' in a query argument
123 |
124 | this allows '/?='
125 | '''
126 | return base_urlencode(data, our_safe + '/?')
127 |
128 |
129 | # "/" is allowed in query but reserved in path segments
130 | # "?" is allowed in query but reserved in path segments
131 | # "=" is only special to form parsers
132 | # "&" is only special to form parsers
133 | # "+" is only special to form parsers
134 | def urlencode_fragment(data):
135 | '''
136 | encode for placement after '=' in a query argument
137 |
138 | this allows '/?='
139 | '''
140 | return base_urlencode(data, our_safe + '/?=&+')
141 |
142 | #
143 | # who knows what browsers do? it ain't rfc3986 that's for sure.
144 | #
145 | def urlencode_formtext(data):
146 | '''
147 | encode a form key or value, pretending to be a browser.
148 |
149 | this version encodes space as '+' rather than as '%20',
150 | which is used when you are pretending to be a browser form
151 | submit.
152 | '''
153 | if isinstance(data, unicode):
154 | data = data.encode('utf_8')
155 | return urllib.quote_plus(data, our_safe)
156 |
157 |
158 | def urldecode(data):
159 | '''
160 | replace "%xx" with character equivalent
161 | '''
162 | return urllib.unquote(data)
163 |
--------------------------------------------------------------------------------
/test/return_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2.4
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # -*- coding: utf-8 -*-
17 | #
18 | """mql return directive."""
19 |
20 | __author__ = 'bneutra@google.com (Brendan Neutra)'
21 |
22 | # thanks warren for these dimetests
23 |
24 | import google3
25 | from pymql.mql import error
26 | from pymql.test import mql_fixture
27 |
28 | class MQLTest(mql_fixture.MQLTest):
29 | """mql return directive."""
30 |
31 | def setUp(self):
32 | self.SetMockPath('data/return.yaml')
33 | super(MQLTest, self).setUp()
34 | self.env = {'as_of_time': '2010-05-01'}
35 |
36 |
37 | def testReturnCountOfObject(self):
38 | """return count of object."""
39 |
40 | query = """
41 | {
42 | "/people/person/children": {
43 | "count": null,
44 | "return": "count"
45 | },
46 | "id": "/en/bob_dylan"
47 | }
48 | """
49 | exp_response = """
50 | {
51 | "/people/person/children": 6,
52 | "id": "/en/bob_dylan"
53 | }
54 | """
55 | self.DoQuery(query, exp_response=exp_response)
56 |
57 | def testReturnCountOfArray(self):
58 | """return count of array."""
59 |
60 | query = """
61 | {
62 | "/people/person/children": [
63 | {
64 | "count": null,
65 | "return": "count"
66 | }
67 | ],
68 | "id": "/en/bob_dylan"
69 | }
70 | """
71 | exp_response = """
72 | {
73 | "/people/person/children": [
74 | 6
75 | ],
76 | "id": "/en/bob_dylan"
77 | }
78 | """
79 | self.DoQuery(query, exp_response=exp_response)
80 |
81 | def testReturnEstimateCountOfArray(self):
82 | """return estimate-count of array."""
83 |
84 | query = """
85 | {
86 | "/people/person/children": [
87 | {
88 | "return": "estimate-count",
89 | "estimate-count": null
90 | }
91 | ],
92 | "id": "/en/bob_dylan"
93 | }
94 | """
95 | exp_response = """
96 | {
97 | "/people/person/children": [
98 | 6
99 | ],
100 | "id": "/en/bob_dylan"
101 | }
102 | """
103 | self.DoQuery(query, exp_response=exp_response)
104 |
105 | def testReturnCountNullWhenNone(self):
106 | """return count null when none."""
107 |
108 | query = """
109 | {
110 | "album": {
111 | "return": "count",
112 | "name": "Arrested"
113 | },
114 | "type": "/music/artist",
115 | "name": "The Police"
116 | }
117 | """
118 | exp_response = """
119 | null
120 | """
121 | self.DoQuery(query, exp_response=exp_response)
122 |
123 | def testReturnCount0WhenNoneAndOptional(self):
124 | """return count 0 when none and optional."""
125 |
126 | query = """
127 | {
128 | "album": {
129 | "optional": true,
130 | "return": "count",
131 | "name": "Arrested"
132 | },
133 | "type": "/music/artist",
134 | "name": "The Police"
135 | }
136 | """
137 | exp_response = """
138 | {
139 | "album": 0,
140 | "type": "/music/artist",
141 | "name": "The Police"
142 | }
143 | """
144 | self.DoQuery(query, exp_response=exp_response)
145 |
146 | def testReturnIgnoresOtherResultValues(self):
147 | """return ignores other result values."""
148 |
149 | query = """
150 | {
151 | "/people/person/children": [
152 | {
153 | "count": null,
154 | "nationality": {
155 | "id": "/en/united_states",
156 | "name": null
157 | },
158 | "return": "count",
159 | "id": null
160 | }
161 | ],
162 | "id": "/en/bob_dylan"
163 | }
164 | """
165 | exp_response = """
166 | {
167 | "/people/person/children": [
168 | 2
169 | ],
170 | "id": "/en/bob_dylan"
171 | }
172 | """
173 | self.DoQuery(query, exp_response=exp_response)
174 |
175 | def testReturnImplicitCount(self):
176 | """return implicit count."""
177 |
178 | query = """
179 | {
180 | "/people/person/children": {
181 | "return": "count",
182 | "id": null
183 | },
184 | "id": "/en/bob_dylan"
185 | }
186 | """
187 | exp_response = """
188 | {
189 | "/people/person/children": 6,
190 | "id": "/en/bob_dylan"
191 | }
192 | """
193 | self.DoQuery(query, exp_response=exp_response)
194 |
195 | def testReturnIdFail(self):
196 | """return id."""
197 |
198 | query = """
199 | {
200 | "/people/person/children": {
201 | "date_of_birth": null,
202 | "return": "id",
203 | "id": null
204 | },
205 | "id": "/en/bob_dylan"
206 | }
207 | """
208 | exc_response = (
209 | error.MQLParseError,
210 | "'return' currently only supports 'count' and 'estimate-count'"
211 | )
212 | self.DoQuery(query, exc_response=exc_response)
213 |
214 | if __name__ == '__main__':
215 | mql_fixture.main()
216 |
--------------------------------------------------------------------------------
/emql/adapters/search.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | from itertools import izip, chain
17 |
18 | from mw.emql.adapter import Adapter
19 | from mw.emql.emql import id_guid, formatted_id_guid, MQL_LIMIT
20 |
21 |
22 | class search_adapter(Adapter):
23 |
24 | def pre(self, tid, graph, mql, me, control, parent, params, api_keys):
25 |
26 | constraints = params.get('constraints')
27 | params = params.get('query')
28 |
29 | if params is None:
30 | if constraints is not None:
31 | for operator, _params in constraints:
32 | if operator == '~=':
33 | params = _params
34 | break
35 |
36 | if isinstance(params, dict) and params.get('query') is None:
37 | if constraints is not None:
38 | for operator, _params in constraints:
39 | if operator == '~=':
40 | params['query'] = _params
41 | break
42 |
43 | if isinstance(params, list):
44 | if params:
45 | params = params[0]
46 | else:
47 | params = None
48 |
49 | if isinstance(params, (str, unicode)):
50 | params = { 'query': params }
51 | elif params is None or params.get('query') is None:
52 | raise ValueError, 'no query'
53 |
54 | args = {}
55 | result = {}
56 |
57 | for arg, value in params.iteritems():
58 | if arg.endswith('|='):
59 | name = str(arg[:-2])
60 | else:
61 | name = str(arg)
62 | if name in ('query', 'prefix', 'prefixed',
63 | 'type', 'type_strict', 'domain', 'domain_strict',
64 | 'type_exclude', 'type_exclude_strict',
65 | 'domain_exclude', 'domain_exclude_strict',
66 | 'limit', 'denylist', 'related', 'property',
67 | 'mql_filter', 'geo_filter', 'as_of_time', 'timeout'):
68 | args[name] = value
69 | elif name != 'score':
70 | result[name] = value
71 |
72 | for arg, value in parent.iteritems():
73 | if arg.endswith('|='):
74 | name = str(arg[:-2])
75 | else:
76 | name = str(arg)
77 | if name not in args:
78 | if name == 'limit':
79 | args[name] = value
80 | elif name == 'type' and isinstance(value, basestring):
81 | args['type_strict'] = 'any'
82 | args[name] = value
83 |
84 | if 'limit' not in args:
85 | args['limit'] = MQL_LIMIT # plug-in default MQL limit
86 |
87 | if 'score' in params:
88 | matches = me.get_session().relevance_query(tid, format='ac', **args)
89 | guids = ['#' + match['guid'] for match in matches]
90 | else:
91 | matches = me.get_session().relevance_query(tid, format='guids', **args)
92 | guids = ['#' + guid for guid in matches]
93 |
94 | if guids:
95 | result['guid|='] = guids
96 | else:
97 | result['guid|='] = ['#00000000000000000000000000000000']
98 |
99 | if 'score' in params:
100 | result[':extras'] = {
101 | "fetch-data": dict((match['guid'], match['score'])
102 | for match in matches)
103 | }
104 |
105 | return result
106 |
107 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
108 |
109 | constraints = params.get('constraints')
110 | scores = params.get(':extras', {}).get('fetch-data')
111 | params = params.get('query')
112 |
113 | was_list = False
114 | if isinstance(params, list):
115 | if params:
116 | params = params[0]
117 | was_list = True
118 | else:
119 | params = None
120 |
121 | if params is None:
122 | if constraints is not None:
123 | for operator, _params in constraints:
124 | if operator == '~=':
125 | params = _params
126 | break
127 |
128 | if isinstance(params, (str, unicode)):
129 | results = dict((mqlres['guid'], params) for mqlres in args)
130 | else:
131 | if scores is not None:
132 | for mqlres in args:
133 | mqlres['score'] = scores[mqlres['guid'][1:]]
134 |
135 | if 'guid' in params:
136 | fn = dict.get
137 | else:
138 | fn = dict.pop
139 |
140 | results = {}
141 | for mqlres in args:
142 | mqlres['query'] = params['query']
143 | results[fn(mqlres, 'guid')] = [mqlres] if was_list else mqlres
144 |
145 | return results
146 |
147 | def help(self, tid, graph, mql, me, control, params):
148 | from docs import search_adapter_help
149 |
150 | return 'text/x-rst;', search_adapter_help
151 |
152 |
153 |
--------------------------------------------------------------------------------
/test/regression_misc_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | #
16 | """Test misc. regressions."""
17 |
18 | __author__ = 'bneutra@google.com (Brendan Neutra)'
19 |
20 | import google3
21 | from pymql.test import mql_fixture
22 |
23 |
24 | class MQLTest(mql_fixture.MQLTest):
25 |
26 | def setUp(self):
27 | self.SetMockPath('data/regression_misc.yaml')
28 | super(MQLTest, self).setUp()
29 | self.env = {'as_of_time': '2009-10-01'}
30 |
31 | def testUtf8(self):
32 | """Regression test for issue 4970606."""
33 |
34 | query = u"""
35 | [{"name":"Beyonc\u00e9", "id": null}]
36 | """
37 | exp_response = u"""
38 | [
39 | {
40 | "id": "/en/beyonce",
41 | "name": "Beyonc\u00e9"
42 | },
43 | {
44 | "id": "/m/07ldnn6",
45 | "name": "Beyonc\u00e9"
46 | }
47 | ]
48 | """
49 | self.DoQuery(query.encode('utf-8'),
50 | exp_response=exp_response.encode('utf-8'))
51 |
52 | def testCursor(self):
53 | """JIRA API-62 bug."""
54 |
55 | # not sure the bug is valid but I just wanted to capture
56 | # this style of query. the bug was that it timed out
57 | # but i can't reproduce that -brendan
58 |
59 | query = """
60 | [
61 | {
62 | "attribution": {
63 | "guid": null,
64 | "optional": true,
65 | "id": null
66 | },
67 | "reverse": null,
68 | "creator": {
69 | "guid": null,
70 | "optional": true,
71 | "id": null
72 | },
73 | "timestamp": null,
74 | "timestamp>=": "2012-01-01T20",
75 | "source": {
76 | "guid": null,
77 | "optional": true,
78 | "id": null
79 | },
80 | "valid": null,
81 | "limit": 1000,
82 | "master_property": null,
83 | "operation": null,
84 | "type": "/type/link",
85 | "target_value": null,
86 | "target": {
87 | "guid": null,
88 | "optional": true,
89 | "id": null
90 | }
91 | }
92 | ]
93 | """
94 | cursor = True
95 | while 1:
96 | self.env = {'cursor': cursor, 'as_of_time': '2012-01-02'}
97 | self.MQLQuerier(query)
98 | cursor = self.mql_result.cursor
99 | if cursor is False: break
100 |
101 | def testCursorComplex(self):
102 | """random hash ordering cursor bug b/8323666."""
103 | # TODO(bneutra) how to repro the bug, testing in process
104 | # doesn't tickle it.
105 |
106 | query = """
107 | [
108 | {
109 | "sort": "-timestamp",
110 | "type": "/type/link",
111 | "reverse": null,
112 | "creator": null,
113 | "timestamp": null,
114 | "source": {
115 | "mid": null
116 | },
117 | "a:creator": {
118 | "type": "/dataworld/provenance",
119 | "optional": "forbidden"
120 | },
121 | "valid": null,
122 | "limit": 10,
123 | "master_property": null,
124 | "operation": null,
125 | "target": {
126 | "mid": null
127 | },
128 | "target_value": null,
129 | "b:creator": {
130 | "usergroup": {
131 | "id|=": [
132 | "/freebase/bots",
133 | "/en/metaweb_staff",
134 | "/en/current_metaweb_staff"
135 | ],
136 | "optional": "forbidden"
137 | }
138 | }
139 | }
140 | ]
141 | """
142 | cursor = True
143 | i = 0
144 | while i < 30:
145 | i+=1
146 | self.env = {'cursor': cursor}
147 | self.MQLQuerier(query)
148 | self.assertEquals(len(self.mql_result.result), 10)
149 | # we should have a new cursor
150 | self.assertNotEquals(cursor, self.mql_result.cursor)
151 | cursor = self.mql_result.cursor
152 | # it should be a cursor
153 | self.assertNotEquals(cursor, False)
154 |
155 | def testCursorComplex2(self):
156 | """random hash ordering cursor bug b/8323666 freeq."""
157 |
158 | # TODO(bneutra) how to repro the bug, testing in process
159 | # doesn't tickle it.
160 |
161 | query = """
162 | [
163 | {
164 | "master_property": {
165 | "id": null,
166 | "reverse_property": null
167 | },
168 | "limit": 3,
169 | "type": "/type/link",
170 | "target": {
171 | "guid": null,
172 | "type": [],
173 | "id": "#9202a8c04000641f8000000003b50f85"
174 | },
175 | "source": {
176 | "guid": null,
177 | "type": [],
178 | "id": null
179 | }
180 | }
181 | ]
182 | """
183 | cursor = True
184 | i = 0
185 | while i < 30:
186 | i+=1
187 | self.env = {'cursor': cursor, 'as_of_time': '2013-03-01'}
188 | self.MQLQuerier(query)
189 | self.assertEquals(len(self.mql_result.result), 3)
190 | # we should have a new cursor
191 | self.assertNotEquals(cursor, self.mql_result.cursor)
192 | cursor = self.mql_result.cursor
193 | # it should be a cursor
194 | self.assertNotEquals(cursor, False)
195 |
196 |
197 | if __name__ == '__main__':
198 | mql_fixture.main()
199 |
--------------------------------------------------------------------------------
/formats/image.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """
16 |
17 | functions for manipulating image content
18 |
19 | everything is done in memory, we assume images
20 | aren't too large.
21 |
22 | """
23 |
24 | import os, contenttype
25 | from StringIO import StringIO
26 | from mw.log import LOG
27 |
28 | from mw.api.content import Content, ContentWrapper
29 | from mw.error import ContentLoadError
30 | import mw.siteconfig
31 | from mw.mql import scope
32 |
33 | TN_MODES = ['fit', 'fill', 'fillcrop', 'fillcropmid']
34 | DEF_TN_MODE = 'fit'
35 |
36 | class ImageContent(ContentWrapper):
37 | """
38 | methods for dealing with image content
39 | """
40 |
41 | # ie6 uses some bizarre content_types for PNG and JPEG images
42 | # XXX it would be nice to fix the content_type in the
43 | # /type/content object, but it may already have been uploaded.
44 | # so for now, images uploaded from ie6 will have the "wrong"
45 | # content-type and we'll need to garden them.
46 | remap_dumb_ie_mime_types = {
47 | 'image/pjpeg': contenttype.MediaType('image/jpeg'),
48 | 'image/x-png': contenttype.MediaType('image/png')
49 | }
50 |
51 |
52 | @classmethod
53 | def match(cls, c):
54 | """
55 | true if this ContentWrapper subclass applies to the content argument.
56 | """
57 | media_type = cls.remap_dumb_ie_mime_types.get(c.media_type, c.media_type)
58 | if not c.media_type.startswith('image/'):
59 | return False
60 |
61 | subtype = media_type.split('/')[1]
62 |
63 | return subtype in ('gif', 'png', 'jpeg', 'x-icon')
64 |
65 | def __init__(self, content):
66 | super(ImageContent, self).__init__(content)
67 | self.size = None
68 |
69 | def load(self, mss):
70 | result = mss.mqlread(dict(id=self.content.content_id,
71 | type='/common/image',
72 | size=dict(x=None, y=None)))
73 |
74 | if result is None:
75 | return
76 |
77 | self.size = (result['size']['x'], result['size']['y'])
78 |
79 | def upload(self, mss):
80 | """
81 | add a /common/image facet to the type/content
82 | """
83 | self.load(mss)
84 | if self.size is None:
85 | self.parse(mss)
86 |
87 | w = { 'id': self.content.content_id,
88 | 'type': { 'connect': 'insert',
89 | 'id': '/common/image' }}
90 | if self.size[0] and self.size[1]:
91 | w['/common/image/size'] = { 'create': 'unless_exists',
92 | 'type': '/measurement_unit/rect_size',
93 | 'x': self.size[0],
94 | 'y': self.size[1] }
95 |
96 | with mss.push_variables(authority="/user/content_administrator",
97 | privileged=scope.Authority):
98 | result = mss.mqlwrite(w)
99 |
100 | def parse(self, mss):
101 | """
102 | extract data from the image
103 |
104 | exif tags from digital cameras
105 | """
106 | # exif tags from digital cameras?
107 |
108 | self.content.fetch_body(mss)
109 | try:
110 | # XXXarielb move to pygmy as soon as pygmy doesn't crash within threads
111 | from PIL import Image
112 | img = Image.open(StringIO(self.content.body))
113 | # believe the image parser over anything in the graph
114 | self.size = img.size
115 | except ImportError, ie:
116 | LOG.error("format.image.no_pil", str(e))
117 | raise
118 | except Exception, e:
119 | LOG.error("format.image.parse", str(e))
120 | raise ContentLoadError('Invalid image file',
121 | app_code="upload/invalid_image_data",
122 | error=e)
123 |
124 | def update_content(self):
125 | media_type = self.content.media_type
126 | LOG.info('update_content', "Image Updating content from %s to %s" % (media_type,
127 | self.remap_dumb_ie_mime_types.get(media_type)))
128 | self.content.media_type = self.remap_dumb_ie_mime_types.get(media_type, media_type)
129 |
130 | @classmethod
131 | def get_fallback_image_path(cls):
132 | try:
133 | config = mw.siteconfig.get_config2()
134 | path = config.get('me.img_thumb_fallback')
135 | if path and os.path.exists(path):
136 | return path
137 | except KeyError, e:
138 | pass
139 |
140 | LOG.error("image.thumb", "Could not find fallback image for thumbnailing service.")
141 | return None
142 |
143 |
144 | # failover for thumnailing operation in the event that
145 | # the image is too large to thumbnail
146 | def thumb_fallback(self, mss):
147 | path = ImageContent.get_fallback_image_path()
148 | if path is None:
149 | return None
150 | # load data
151 | fd = open(path)
152 | data = fd.read()
153 | fd.close()
154 | # the fallback image is a known GIF image.
155 | thumb_mt = 'image/gif'
156 | c = Content(media_type=thumb_mt)
157 | c.set_body(data)
158 | return c
159 |
--------------------------------------------------------------------------------
/error.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # Table mapping response codes to messages; entries have the
16 | # form {code: (shortmessage, longmessage)}.
17 | # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
18 | from BaseHTTPServer import BaseHTTPRequestHandler
19 | import traceback
20 | from pymql.log import LOG
21 |
22 |
23 | # HTTP error code messages
24 | # XXX: We really really need to move to py2.5
25 | def is_valid_HTTP_code(code):
26 | return code in BaseHTTPRequestHandler.responses.keys()
27 |
28 |
29 | def get_HTTP_err(code):
30 | return '%d %s' % (code, BaseHTTPRequestHandler.responses[code][0])
31 |
32 |
33 | class ParameterizedError(Exception):
34 | """
35 | This is a special Exception class that is used to format messages
36 | where the contents of the message itself are important. Use it
37 | exactly how you would use the python % format operator:
38 |
39 | class MyException(ParameterizedError):
40 | pass
41 |
42 | raise MyException('Got an error in query %(query)s', query=q)
43 |
44 | This will format the string appropriately, but allow exception
45 | handlers to unpack the relevant data and optionall reinsert it
46 | into the result string
47 | """
48 | DEF_PFX = '/api/status/error'
49 | DEF_ME_CODE = '/unknown/unknown'
50 |
51 | def __init__(self,
52 | msg,
53 | http_code=400,
54 | app_code=DEF_ME_CODE,
55 | inner_exc=None,
56 | **kwds):
57 | self.msg = msg
58 | Exception.__init__(self, msg)
59 |
60 | if not is_valid_HTTP_code(http_code):
61 | http_code = 500
62 | self.http_status = get_HTTP_err(http_code)
63 | self.http_code = http_code
64 |
65 | # app_code and and api code setup
66 | codes = app_code.split('/')
67 | if len(codes) < 3:
68 | codes = self.DEF_ME_CODE.split('/')
69 | self.comp_code = '%s/%s' % (self.DEF_PFX, codes[1])
70 | self.app_code = '%s' % '/'.join(codes[2:])
71 | self.messages = [self.gen_msgs(**kwds)]
72 |
73 | if not kwds.has_key('error'):
74 | # don't extract the current frame (__init__)
75 | stack = traceback.extract_stack()[:-1]
76 | kwds['traceback'] = '\r\n'.join(traceback.format_list(stack))
77 |
78 | # log inner exception or self
79 | exc = self
80 | if inner_exc:
81 | exc = inner_exc
82 | comp = app_code[1:].replace('/', '.')
83 | if exc == self:
84 | LOG.debug(comp, msg, **kwds)
85 | else:
86 | LOG.exception(msg, **kwds)
87 | self.kwds = kwds
88 |
89 | def gen_msgs(self, **kwds):
90 | return {
91 | 'code': '%s/%s' % (self.DEF_PFX, self.app_code),
92 | 'message': self.msg,
93 | 'info': kwds.copy()
94 | }
95 |
96 | def get_err_dict(self):
97 | return {
98 | 'status': self.http_status,
99 | 'code': self.comp_code,
100 | 'messages': self.messages
101 | }
102 |
103 | def __str__(self):
104 | return str(self.get_err_dict())
105 |
106 |
107 | class NetworkAddressError(ParameterizedError):
108 | pass
109 |
110 |
111 | class ContentLoadError(ParameterizedError):
112 | pass
113 |
114 |
115 | class TypeVerifyError(ParameterizedError):
116 | pass
117 |
118 |
119 | class EmailError(ParameterizedError):
120 | pass
121 |
122 |
123 | class SubscriptionError(ParameterizedError):
124 | pass
125 |
126 |
127 | class MSSError(ParameterizedError):
128 | pass
129 |
130 |
131 | class UserLookupError(ParameterizedError):
132 | pass
133 |
134 |
135 | class UserAuthError(ParameterizedError):
136 | pass
137 |
138 |
139 | class BlobError(ParameterizedError):
140 | pass
141 |
142 |
143 | class BLOBClientError(ParameterizedError):
144 | pass
145 |
146 |
147 | class RelevanceError(ParameterizedError):
148 | pass
149 |
150 |
151 | class TextSearchError(ParameterizedError):
152 | pass
153 |
154 |
155 | class AutocompleteError(ParameterizedError):
156 | pass
157 |
158 |
159 | class EmptyResult(ParameterizedError):
160 | pass
161 |
162 |
163 | class GraphConnectionError(ParameterizedError):
164 | pass
165 |
166 |
167 | class FormattingError(ParameterizedError):
168 | pass
169 |
170 |
171 | class SessionError(ParameterizedError):
172 | pass
173 |
174 |
175 | class ConfigError(ParameterizedError):
176 | pass
177 |
178 |
179 | class SanitizationError(ParameterizedError):
180 | pass
181 |
182 |
183 | class BlurbError(ParameterizedError):
184 | pass
185 |
186 |
187 | class DomainOperationError(ParameterizedError):
188 | pass
189 |
190 |
191 | class GenericRuntimeError(ParameterizedError):
192 | pass
193 |
194 |
195 | class OAuthDisabledError(ParameterizedError):
196 | pass
197 |
198 |
199 | class RecaptchaError(ParameterizedError):
200 |
201 | def __init__(self,
202 | msg,
203 | http_code=500,
204 | app_code=ParameterizedError.DEF_ME_CODE,
205 | inner_exc=None,
206 | **kwds):
207 | self.message = msg
208 | ParameterizedError.__init__(
209 | self,
210 | msg,
211 | http_code=http_code,
212 | app_code=app_code,
213 | inner_exc=inner_exc,
214 | **kwds)
215 |
216 |
217 | class ReadOnlyDatabaseError(ParameterizedError):
218 |
219 | def __init__(self, msg=None, *args, **kwds):
220 | msg = msg or 'You cannot save right now. Please try again later'
221 | ParameterizedError.__init__(self, msg, *args, **kwds)
222 |
--------------------------------------------------------------------------------
/mql/graph/conn_mock.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Connector classes for mocked graphd query and response strings.
15 |
16 | Use these connectors when using the pymql library.
17 | See test/mql_fixture.py in pymql for a reference
18 | of how to use the record and replay connectors.
19 | """
20 |
21 | __author__ = 'bneutra@google.com (Brendan Neutra)'
22 | import sys
23 | import hashlib
24 | import re
25 | import time
26 | from pymql.mql import error
27 | from pymql.mql.graph.connector import GraphConnector
28 | from pymql.mql.grparse import ReplyParser
29 | from absl import logging
30 |
31 |
32 | class GraphMockException(Exception):
33 | pass
34 |
35 |
36 | TIMEOUT_POLICIES = {
37 | 'default': {
38 | 'timeout': 8.0,
39 | 'stubby_deadline': 10.0,
40 | 'fail_fast': False,
41 | },
42 | 'bootstrap': {
43 | 'timeout': 2.0,
44 | 'stubby_deadline': 4.0,
45 | 'fail_fast': False,
46 | },
47 | }
48 |
49 |
50 | class MockRecordConnector(GraphConnector):
51 | """Mock connector for recording graphd responses.
52 |
53 | This class will append to the mockdata dictionary that it
54 | is handed. It interacts with a slightly modified
55 | live connector that you specify
56 | (e.g. the mock stubby connector)
57 | See test/mql_fixture.py for an implentation example.
58 | """
59 |
60 | def __init__(self, mockdata, connector, **kwargs):
61 |
62 | if not kwargs.get('policy_map', None):
63 | kwargs['policy_map'] = TIMEOUT_POLICIES
64 | GraphConnector.__init__(self, **kwargs)
65 | self.mockdata = mockdata
66 | self._conn = connector
67 | self._conn._save_raw_response = True
68 | self._mocked = {}
69 |
70 | def open(self, policy=None):
71 |
72 | self._conn.open(policy)
73 |
74 | def transmit_query(self, q, policy, deadline, **kwargs):
75 |
76 | try:
77 | result = self._conn.transmit_query(q, policy, deadline)
78 | except error.MQLTimeoutError:
79 | self.gen_mock_data(q, self._conn._raw_response)
80 | self.totalcost = self._conn.totalcost
81 | raise
82 |
83 | self.gen_mock_data(q, self._conn._raw_response)
84 | self.totalcost = self._conn.totalcost
85 | return result
86 |
87 | def reset_cost(self):
88 | if hasattr(self, '_conn'):
89 | self._conn.reset_cost()
90 |
91 | def gen_mock_data(self, q, result):
92 |
93 | k, hsh = strip_mock_query(q)
94 | if hsh in self._mocked:
95 | # if a query has been seen before, assume it needs another
96 | # version of the response mocked.
97 | self._mocked[hsh] += 1
98 | hsh = hsh + '_' + str(self._mocked[hsh])
99 | else:
100 | self._mocked[hsh] = 0
101 | self.mockdata[hsh] = [k, result]
102 |
103 |
104 | class MockReplayConnector(GraphConnector):
105 | """Mock connector for recording graphd responses.
106 |
107 | This class will read from the mockdata dictionary that it
108 | is handed. It doesn't connect or interact with graphd.
109 | It's faster and more reliable than talking to a live db.
110 | See test/mql_fixture.py for an implentation example.
111 | """
112 |
113 | def __init__(self, mockdata):
114 | # don't connect to a graph, do not call __init__
115 | self.no_timeouts = False
116 | self.totalcost = {}
117 | self.mockdata = mockdata
118 | self._mocked = {}
119 |
120 | def open(self, policy=None):
121 | pass
122 |
123 | def transmit_query(self, q, policy, deadline, **kwargs):
124 | start_time = time.time()
125 | logging.debug('mocking query: %s', q)
126 | k, hsh = strip_mock_query(q)
127 |
128 | if hsh in self._mocked:
129 | # we've seen this query before for this test
130 | # so increment as we did in record mode
131 | self._mocked[hsh] += 1
132 | hsh = hsh + '_' + str(self._mocked[hsh])
133 | else:
134 | self._mocked[hsh] = 0
135 |
136 | if hsh not in self.mockdata:
137 | msg = '%s NO MOCKED REPONSE for this query: %s' % (hsh, k)
138 | logging.error(msg)
139 | raise GraphMockException(msg)
140 |
141 | m = self.mockdata[hsh]
142 | msg = 'mock query found %s: %s' % (hsh, m[0])
143 | logging.debug(msg)
144 | logging.debug('mock response found: %s', m[1])
145 | rg = re.search(' dateline\=\"(\S+)\" ', m[1])
146 | self.dateline = None
147 | if rg:
148 | self.dateline = rg.groups()[0]
149 |
150 | reply_parser = ReplyParser()
151 | reply_parser.parse_full_reply(m[1])
152 | ret = reply_parser.get_reply()
153 | dbtime = time.time() - start_time
154 | self.add_graph_costs(ret.cost, dbtime, tries=1)
155 | return ret
156 |
157 | def _get_policy(self, policy=None):
158 | return None
159 |
160 |
161 | def strip_mock_query(q):
162 | # strip off the id
163 | # note the query may be spread over multiple lines
164 | # but the directives should be on the first one.
165 | k = re.sub(' (id=\S+) ', ' ', q, count=1)
166 |
167 | # exception cases
168 | # timestamp stuff generated when creating mock responses is fine when it
169 | # comes time to replay, but mql does a scope query in realtime, not sure why
170 | # TODO(bneutra): why must MQL do this?
171 | p = re.compile('timestamp\>20\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d+ ')
172 | if re.search(p, k):
173 | logging.debug('we saw a timestamp in the query %s', k)
174 | k = re.sub(p, 'timestamp>2010-09-23T00:00:00.000001 ', k)
175 |
176 | h = hashlib.sha1()
177 | h.update(k)
178 | hsh = h.hexdigest()
179 | return k, hsh
180 |
--------------------------------------------------------------------------------
/formats/contenttype.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | """
17 | routines for working with content-type headers
18 | and other sources of media_types and text_encodings.
19 |
20 | """
21 |
22 | import cgi
23 | from mw.formats.uniqstr import UniqueStr
24 | from mw.util import keyquote
25 |
26 | class MediaType(UniqueStr):
27 | """
28 | this looks like an ordinary python str containing a media-type.
29 | it has some extra methods on it that are useful for the metaweb.
30 | """
31 |
32 | _valid_part0 = ('application', 'audio', 'image', 'message', 'model', 'multipart',
33 | 'text', 'text_encoding', 'video')
34 |
35 | @property
36 | def id(self):
37 | """the id property holds the metaweb id: value """
38 | return '/media_type/%s' % '/'.join(keyquote.quotekey(part)
39 | for part in self.split('/'))
40 |
41 | metaweb_type = '/common/media_type'
42 |
43 | type = property(lambda self: str(self).split('/')[0].strip())
44 | subtype = property(lambda self: str(self).split('/')[1].strip())
45 |
46 | @classmethod
47 | def normalize(cls, s):
48 | s = UniqueStr.normalize(s)
49 |
50 | if len(s) > 128:
51 | raise ValueError('invalid media type "%s"' % s)
52 |
53 | parts = s.lower().split('/')
54 | if len(parts) != 2:
55 | raise ValueError('invalid media type "%s"' % s)
56 |
57 | if parts[0] not in cls._valid_part0:
58 | raise ValueError('invalid media type "%s"' % s)
59 |
60 | return s
61 |
62 | ###################################################
63 |
64 | @classmethod
65 | def from_id(cls, id):
66 | if id is None:
67 | return None
68 | assert id.startswith('/media_type/')
69 | idpath = id[len("/media_type/"):]
70 |
71 | return keyquote.unquote_id(idpath)
72 |
73 | class TextEncoding(UniqueStr):
74 | """
75 | canonicalized text encoding string.
76 |
77 | # see http://WWW.IANA.ORG/assignments/character-sets
78 | """
79 |
80 | metaweb_type = '/common/text_encoding'
81 |
82 | @property
83 | def id(self):
84 | """the id property holds the metaweb id: value """
85 | return '/media_type/text_encoding/%s' % keyquote.quotekey(self.lower())
86 |
87 | @property
88 | def codec(self):
89 | """the codec property holds the python codec"""
90 | return self._codec
91 |
92 | @codec.setter
93 | def codec(self, value):
94 | self._codec = value
95 |
96 | @classmethod
97 | def normalize(cls, s):
98 | s = UniqueStr.normalize(s)
99 |
100 | # XXX check for valid token
101 |
102 | if len(s) > 20:
103 | raise ValueError, 'invalid charset "%s"' % s
104 |
105 | # STANDARDS PEOPLE DIG ALL CAPS.
106 | return s.upper()
107 |
108 | @classmethod
109 | def from_id(cls, id):
110 | if id is None:
111 | return None
112 |
113 | # better be ASCII, but make sure it's not unicode
114 | id = str(id)
115 | # XXX this is a bad namespace location!
116 | assert id.startswith('/media_type/text_encoding/')
117 | idpath = id[len('/media_type/text_encoding/'):]
118 | return cls(keyquote.unquotekey(idpath))
119 |
120 |
121 | #
122 | # for now we list (and preload) some text encoding names.
123 | #
124 |
125 | # some well-known text-encodings
126 | # official names from http://www.iana.org/assignments/character-sets
127 | # python codec names are at .../lib/standard-encodings.html
128 | ascii = TextEncoding('us-ascii')
129 | ascii.addalias('ascii')
130 | ascii.codec = 'ascii'
131 |
132 | utf8 = TextEncoding('utf-8')
133 | utf8.codec = 'utf_8'
134 |
135 | utf16 = TextEncoding('utf-16')
136 | utf16.codec = 'utf_16'
137 |
138 | # XXX fill in the rest of the character sets we care about and
139 | # then turn on _exclusive
140 | #TextEncoding._exclusive = True
141 |
142 |
143 | def ContentType(value):
144 | mt, params = cgi.parse_header(value)
145 | mt = MediaType(mt)
146 |
147 | charset = params.get('charset')
148 | if charset is not None:
149 | # XXX whatever this is for, it's ugly...
150 | charset = charset.replace("'", '')
151 | te = TextEncoding(charset)
152 | else:
153 | te = None
154 |
155 | return (mt, te)
156 |
157 | class LanguageCode(UniqueStr):
158 | """
159 | normalized language code string.
160 |
161 | mumble rfc-3066 inspired but more about common
162 | practice and the content we have.
163 |
164 | normalization may do surprising things.
165 | "en-US" gets normalized to "en".
166 | """
167 |
168 | metaweb_type = '/type/lang'
169 |
170 | @property
171 | def id(self):
172 | """the id property holds the metaweb id: value """
173 | return '/lang/%s' % keyquote.quotekey(self)
174 |
175 | @classmethod
176 | def normalize(cls, s):
177 | s = UniqueStr.normalize(s)
178 |
179 | if len(s) > 20:
180 | raise ValueError, 'invalid language code "%s"' % s
181 |
182 | # XXX for now we accept but do not require a leading '/lang/'
183 | # choose one, i think.
184 | if s.startswith('/lang/'):
185 | s = s[len('/lang/'):]
186 |
187 | # cut off anything following '-' (e.g. "en-US" -> "en")
188 | # XXX this should be specified and documented
189 | return s.split('-', 1)[0]
190 |
191 | @classmethod
192 | def from_id(cls, id):
193 | if id is None:
194 | return None
195 |
196 | # better be ASCII, but make sure it's not unicode
197 | id = str(id)
198 | assert id.startswith('/lang/')
199 | return cls(keyquote.unquotekey(id[len('/lang/'):]))
200 |
--------------------------------------------------------------------------------
/emql/apikeys.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import hmac, hashlib
16 |
17 | null = None
18 | from mw.user.sqlmodel import mwOAuthProviderToken, get_sql_connection
19 | from sqlobject import AND, IN
20 |
21 | def get_context(mss):
22 | """
23 | Get a unique string representing the combined user/app
24 | context.
25 |
26 | Note that this depends on mss.authenticate() having been
27 | called, if appropriate. This allows the context to be null if the
28 | call did not require authentication.
29 | """
30 | user_id = mss.get_user_id() or ''
31 | app_id = mss.get_app_id() or ''
32 |
33 | if not user_id and not app_id:
34 | return None
35 |
36 | # user_id&app_id, user_id&, or &app_id
37 | context = "%s&%s" % (user_id, app_id)
38 |
39 | # hmac-sha1 just like oauth
40 | magic_secret = "Sup3rAuth3nticated!eMQL"
41 | signed_context = hmac.new(magic_secret, context, hashlib.sha1).hexdigest()
42 |
43 | return signed_context
44 |
45 | def get_extension_api_query(extension_id=None, optional=False):
46 | result = [{"id": null,
47 | "type": "/freebase/foreign_api",
48 | "consumer_token": {"id": null,
49 | "optional": True},
50 | "access_token": {"id": null,
51 | "optional": True},
52 | "api_keys": [{
53 | "id": null,
54 | "optional": True
55 | }]
56 | }]
57 | if extension_id:
58 | result[0]["extension"] = {"id": extension_id}
59 |
60 | if optional:
61 | result[0]["optional"] = True
62 |
63 | return result
64 |
65 |
66 | def get_api_keys(mss, extension_id, apis=None):
67 | """
68 | For a given extension, get all the API keys out of the database
69 |
70 | `apis` is the result of something like get_extension_api_query() -
71 | if you don't provide it then mqlread will be run to fill it in for
72 | the given extension_id
73 | """
74 |
75 | # get a list of all keys that this extension needs, grouped by API
76 | # (because, in fact, an extension might use APIs that share
77 | # overlapping keys)
78 |
79 | if apis is None:
80 | q = get_extension_api_query(extension_id, optional=False)
81 | apis = mss.mqlread(q)
82 |
83 | if not apis:
84 | return None
85 |
86 | # ok, now authenticate
87 | mss.authenticate()
88 | context = get_context(mss)
89 |
90 | # to fetch them from the database, we want a flat list of all unique ids
91 | all_keys = set()
92 | for api in apis:
93 | for api_key in api["api_keys"]:
94 | all_keys.add(api_key)
95 | if api["access_token"]:
96 | all_keys.add(api["access_token"]["id"])
97 | if api["consumer_token"]:
98 | all_keys.add(api["consumer_token"]["id"])
99 |
100 | conn = get_sql_connection(mss)
101 |
102 | # now query the provider database for all of these specific keys
103 | foreign_key_list = mwOAuthProviderToken.select(
104 | AND(mwOAuthProviderToken.q.context == context,
105 | IN(mwOAuthProviderToken.q.apiKeyId, all_keys)),
106 | connection=conn
107 | )
108 |
109 | # generate a map of id->key data so we can access it below
110 | foreign_keys = {}
111 | for foreign_key in foreign_key_list:
112 | info = {
113 | "id" : foreign_key.apiKeyId,
114 | "key": foreign_key.key
115 | }
116 | if foreign_key.secret:
117 | info["secret"] = foreign_key.secret
118 |
119 | foreign_keys[foreign_key.apiKeyId] = info
120 |
121 | # now generate a datastructure similar to the mqlread
122 | # something like
123 | # [{ "id": "/netflix/queue_info",
124 | # "consumer_token": {
125 | # "id": "/netflix/consumer_token",
126 | # "key": "ccc",
127 | # "secret": "secretccc",
128 | # },
129 | # "access_token": {
130 | # "id": "/netflix/access_token",
131 | # "key": "aaa",
132 | # "secret": "secretaaa",
133 | # },
134 | # },
135 | # { "id": "/netflix/movie_info",
136 | # "consumer_token": {
137 | # "id": "/netflix/consumer_token",
138 | # "key": "ccc",
139 | # "secret": "secretccc",
140 | # },
141 | # "api_keys": [{
142 | # "id": "/netflix/affiliate_code",
143 | # "key": "fff"
144 | # }]
145 | # }]
146 |
147 | api_manifest = []
148 | for api in apis:
149 | api_info = {"id": api["id"]}
150 | api_manifest.append(api_info)
151 |
152 | for special_key in ("consumer_token", "access_token"):
153 | if api.get(special_key):
154 | # map "consumer_token" to "/netflix/consumer_token"
155 | special_key_id = api[special_key]["id"]
156 |
157 | # even if we dont' have the key, include dummy entry
158 | # meaning that the API requires the key
159 | api_info[special_key] = {
160 | "id": special_key_id
161 | }
162 | if special_key_id in foreign_keys:
163 | # key and secret MUST be there
164 | foreign_key = foreign_keys[special_key_id]
165 | api_info[special_key]["key"] = foreign_key["key"]
166 | api_info[special_key]["secret"] = foreign_key["secret"]
167 |
168 | for api_key in api["api_keys"]:
169 | api_key_id = api_key["id"]
170 |
171 | # put a dummy entry in, meaning the API requires/expects
172 | # the key
173 | api_key_info = {
174 | "id": api_key_id,
175 | }
176 | api_info.setdefault("api_keys",[]).append(api_key_info)
177 |
178 | if api_key_id in foreign_keys:
179 |
180 | foreign_key = foreign_keys[api_key_id]
181 |
182 | if foreign_key.get("key"):
183 | api_key_info["key"] = foreign_key["key"]
184 |
185 | if foreign_key.get("secret"):
186 | api_key_info["secret"] = foreign_key["secret"]
187 |
188 | return api_manifest
189 |
--------------------------------------------------------------------------------
/api/op.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import os
16 | from mw.log import LOG
17 | import logging
18 |
19 | from optparse import OptionParser
20 | from ConfigParser import ConfigParser, NoSectionError, NoOptionError
21 | from mw.user.cache import get_user_by_name
22 |
23 | class OP(OptionParser):
24 | def __init__(self, *args, **kws):
25 | usage = kws.get('usage','')
26 | kws['usage'] = "%%prog [-d] [-g HOST:PORT] %s [...]" % usage
27 | OptionParser.__init__(self, *args, **kws)
28 |
29 | config_file = None
30 | if 'ME_SITE_CONFIG' in os.environ:
31 | config_file = os.environ['ME_SITE_CONFIG']
32 | if not os.path.exists(config_file):
33 | config_file = None
34 |
35 |
36 | if config_file == None:
37 | # default look in me/mwbuild/_site.cfg
38 | config_file = os.path.abspath(os.path.join(os.path.dirname(__file__),
39 | '../../../mwbuild/_site.cfg'))
40 |
41 | # walk up the directory structure, stopping at project.mw4
42 | # (i.e. the root of whatever project we're in)
43 | path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
44 | config_file = os.path.join(path, "_site.cfg")
45 |
46 | while (not os.path.exists(config_file) and
47 | not os.path.exists(os.path.join(path, "project.mw4"))):
48 | path = os.path.abspath(os.path.join(path, ".."))
49 | config_file = os.path.join(path, "_site.cfg")
50 |
51 | if not os.path.exists(config_file):
52 | config_file = None
53 |
54 |
55 | self.add_option('-c', '--config', dest='config_file',
56 | default=config_file,
57 | help="location of _site.cfg with graph configuration")
58 |
59 | self.add_option('-d', '--debug', dest='debug',
60 | default=False, action='store_true',
61 | help="turn on debugging output")
62 |
63 | self.add_option('-l', '--loglevel', dest='loglevel',
64 | default='WARNING', action='store',
65 | help="set the log level")
66 | self.add_option('-g', '--graph', dest='graphd_addr',
67 | metavar="HOST:PORT",
68 | help="address of graphd in the form host:port")
69 | self.add_option('-b', '--blob', dest='blobd_addr',
70 | metavar="HOST:PORT",
71 | help="address of blobd in the form host:port")
72 | self.add_option('-D', '--define', dest='defines',
73 | default=[], action='append',
74 | help='override other site.cfg options in the form section.entry=value')
75 | self.add_option("-a", "--as_user", dest="as_user",
76 | metavar="/user/USERID",
77 | help="User ID to write with")
78 |
79 | self.add_option("-r", "--relevance", dest="relevance_addr",
80 | metavar="HOST:PORT",
81 | help="host:port of relevance server")
82 | self.add_option("-s", "--geo", dest="geo_addr",
83 | metavar="HOST:PORT",
84 | help="host:port of geo server")
85 |
86 | self.add_option("-T", "--no_timeouts", dest="no_timeouts",
87 | default=False, action='store_true',
88 | help="turn off socket timeouts (off by default)")
89 |
90 | def parse_args(self, *args, **kws):
91 | # this is an all-in-one function. It parses the args, loads the config and creates the session.
92 | # most of the time in simple scripts you don't need any more control than this.
93 |
94 | options, args = self.parse_args_only(*args,**kws)
95 |
96 | config = self.load_config(options)
97 |
98 | self.create_session(config,options)
99 |
100 | return (options, args)
101 |
102 | def parse_args_only(self, *args, **kws):
103 | # this strictly parses the args without loading the config or creating the session
104 | return OptionParser.parse_args(self, *args, **kws)
105 |
106 | def load_config(self,options):
107 | # this loads the configuration file without attempting to connect to any services
108 |
109 | from paste.deploy import appconfig
110 |
111 | config = {}
112 | if options.config_file is not None:
113 | LOG.debug("parse.args", "Trying to open %s" % options.config_file)
114 | try:
115 | config = appconfig("config:%s" % options.config_file)
116 | except LookupError as e:
117 | LOG.debug("parse.args", "Error loading config file, missing paste sections", options.config_file, e)
118 | # fall through
119 |
120 | for k,v in (li.split('=', 1)
121 | for li in options.defines):
122 | config[k] = v
123 |
124 | loglevels = 'EMERG ALERT CRIT ERR WARNING NOTICE INFO DEBUG'.split()
125 | if options.loglevel in loglevels:
126 | LOG.setLevel(logging.getLevelName(options.loglevel))
127 | else:
128 | self.error('unknown log level %s\n valid log levels are %s'
129 | % (options.loglevel, ', '.join(loglevels)))
130 | sys.exit(1)
131 |
132 | # go through the config file for these options, keeps things
133 | # simple
134 | if options.graphd_addr:
135 | config["graphd.address"] = options.graphd_addr
136 |
137 | if options.blobd_addr:
138 | config["clobd.address"] = options.blobd_addr
139 | config["clobd.masteraddress"] = options.blobd_addr
140 |
141 | if options.relevance_addr:
142 | config["relevance.address"] = options.relevance_addr
143 |
144 | if options.geo_addr:
145 | config["geo.address"] = options.geo_addr
146 |
147 | if options.no_timeouts:
148 | config["debug.no_timeouts"] = options.no_timeouts and 'true'
149 |
150 | self.config = config
151 | return config
152 |
153 | def create_session(self,config,options):
154 | # this opens the connections to services
155 |
156 | from mw.api.service import ServiceContext, Session
157 | self.ctx = ServiceContext()
158 |
159 | self.ctx.load_config(config)
160 | self.ctx.connect()
161 |
162 | self.session = Session(self.ctx)
163 |
164 | # do further configuration of Session
165 |
166 | self.session.finish_init()
167 |
168 | if options.as_user:
169 | if not options.as_user.startswith("/user/"):
170 | raise Exception("User must be in the form /user/USERID")
171 | user_name = options.as_user[len("/user/"):]
172 | self.session.push_variables(user=options.as_user)
173 | self.session._signed_user = get_user_by_name(user_name)
174 | self.session.get_user().validate(self.session)
175 |
176 | return self.session
177 |
--------------------------------------------------------------------------------
/mql/pathexpr.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | #
16 | # code for dealing with metaweb path expressions
17 | #
18 | # there will be a lot of variants of this language based
19 | # on embedding, it would be nice to have them all abstracted
20 | # out at some point but for now we're still figuring out
21 | # what the differences are.
22 | #
23 |
24 | import sys, os, re
25 |
26 | if __name__ == '__main__':
27 | sys.path.append(os.path.abspath('../..'))
28 |
29 | from pymql.log import LOG
30 | from error import MQLInternalError
31 |
32 | from pymql import json
33 | from pymql.error import EmptyResult, ParameterizedError
34 |
35 |
36 | class JSONResponse(object):
37 |
38 | def __init__(self, **kws):
39 | self.response = {
40 | 'status': '500 Internal Server Error',
41 | 'code': '/api/status/error/server',
42 | 'messages': []
43 | }
44 | self.extend(**kws)
45 |
46 | def extend(self, **kws):
47 | for k, v in kws.iteritems():
48 | if k == 'messages':
49 | self.response[k] += v
50 | else:
51 | self.response[k] = v
52 |
53 | def log(self, text, **kws):
54 | kws['message'] = text
55 | self.response['messages'].append(kws)
56 |
57 |
58 | #
59 | #
60 | # snipped from mod_python 3.1.3 apache.py
61 | #
62 | # - modified to format result as a json-like structure.
63 | #
64 | import traceback
65 |
66 |
67 | def json_traceback(response=None, exception=None, **kws):
68 | """
69 | This function is only used when debugging is on.
70 | It sends the output similar to what you'd see
71 | when using Python interactively to the browser
72 | """
73 |
74 | debug = 1
75 | etype, evalue, etb = sys.exc_info()
76 |
77 | try: # try/finally
78 | try: # try/except
79 |
80 | if debug and etype is IOError and str(evalue)[:5] == 'Write':
81 | # if this is an IOError while writing to client,
82 | # it is probably better not to try to write to the cleint
83 | # even if debug is on.
84 | LOG.error('json_traceback', 'skipping error write to client')
85 | debug = 0
86 |
87 | # write to log
88 | for e in traceback.format_exception(etype, evalue, etb):
89 | s = '%s' % e[:-1]
90 | LOG.error('json_traceback', s)
91 |
92 | if response is None:
93 | response = JSONResponse(
94 | status='500 Internal Server Error', code='/api/status/error/server')
95 | response.extend(**kws)
96 |
97 | stack = [
98 | dict(zip('file,line,func,source'.split(','), quad))
99 | for quad in traceback.extract_tb(etb, None)
100 | ]
101 |
102 | text = '%s: %s' % (etype, evalue)
103 | response.log(text, stack=stack, level='error')
104 |
105 | return response.response
106 |
107 | except Exception, e:
108 | # hit the backstop. must be a bug in the normal exception handling code,
109 | # do something simple.
110 | response = {
111 | 'status': '500 Internal Server Error',
112 | 'messages': [{
113 | 'level': 'error',
114 | 'text': traceback.format_exc()
115 | }],
116 | }
117 | return response
118 |
119 | finally:
120 | # erase the traceback
121 | etb = None
122 |
123 |
124 | def wrap_query(querier, sq, varenv=None, transaction_id=None):
125 | """
126 | Run a query with the given querier (usually something like
127 | ctx.low_querier.read) - performing appropriate envelope packing and
128 | unpacking, multiple queries, error handling, etc
129 | """
130 |
131 | LOG.error(
132 | 'deprecated',
133 | 'mw.mql.pathexpr.wrap_query() is DEPRECATED and will go away soon!')
134 |
135 | if isinstance(sq, basestring):
136 | # convert to json query
137 | try:
138 | # XXX should eventually use unicode, for now utf8
139 | sq = json.loads(sq, encoding='utf-8', result_encoding='utf-8')
140 |
141 | except ValueError, e:
142 | # debug ME-907
143 | LOG.exception('mql.pathexpr.wrap_query()', sq=sq, varenv=varenv)
144 |
145 | SIMPLEJSON_ERR_RE = re.compile('^(.+): line (\d+) column (\d+)')
146 | m = SIMPLEJSON_ERR_RE.match(str(e))
147 | if not m:
148 | raise
149 | response = JSONResponse(
150 | status='400 Bad Request', code='/api/status/error/request')
151 | text = 'json parse error: ' + m.group(1)
152 | response.log(
153 | text, line=int(m.group(2)), column=int(m.group(3)), level='error')
154 | return response.response
155 |
156 | except Exception, e:
157 | return json_traceback(
158 | exception=e,
159 | status='400 Bad Request',
160 | code='/api/status/error/request')
161 |
162 | if not isinstance(sq, dict):
163 | response = JSONResponse(
164 | status='400 Bad Request', code='/api/status/error/request')
165 | text = 'json type error: query was not a dictionary'
166 | response.log(text, level='error')
167 | return response.response
168 |
169 | if varenv is None:
170 | varenv = {}
171 |
172 | # backwards compatibility until we remove the transaction_id parameter
173 | if 'tid' not in varenv:
174 | varenv['tid'] = transaction_id
175 |
176 | if 'cursor' in sq:
177 | varenv['cursor'] = sq['cursor']
178 |
179 | try:
180 | # should be JSONResponse(query=sq['query']) 'queries' to match
181 | # envelope spec
182 | response = JSONResponse(query=sq)
183 | results = {}
184 |
185 | # filter out these special keys for now - eventually some of
186 | # these will be filled in by the caller but only if we trust
187 | # them!
188 | reserved_names = ('request_id', 'cost', 'lang', 'transaction_id',
189 | 'permission', 'cursor', 'user')
190 |
191 | valid_queries = (
192 | (k, v) for k, v in sq.iteritems() if k not in reserved_names)
193 |
194 | # make sure to copy the request_id
195 | if 'request_id' in sq:
196 | response['request_id'] = sq['request_id']
197 |
198 | # should only looking either at sq['query'] for a single query or
199 | # sq['queries'] for multiple queries
200 | for id, subq in valid_queries:
201 | # assuming querier is a bound method here..
202 | LOG.notice(
203 | 'Query',
204 | '%s.%s' % (querier.im_class.__name__, querier.__name__),
205 | subq=subq)
206 | try:
207 | results[id] = querier(subq, varenv)
208 |
209 | response.extend(status='200 OK')
210 |
211 | except EmptyResult, e:
212 | LOG.info('emptyresult', '%s' % e)
213 | response.log('empty result for query %s' % subq)
214 | result = None
215 |
216 | # exceptions should be packed into response['error']
217 | except ParameterizedError, e:
218 | if isinstance(e, MQLInternalError):
219 | response.extend(status='500 Internal Server Error')
220 | else:
221 | response.extend(status='400 Bad Request')
222 |
223 | tb = json_traceback(response=response, exception=e)
224 | response.log('parse exception: %s' % e, level='error')
225 | result = None
226 | except Exception, e:
227 | LOG.exception('python.exception')
228 | tb = json_traceback(response=response, exception=e)
229 | return tb
230 |
231 | response.extend(result=results)
232 | if 'cursor' in varenv:
233 | response.extend(cursor=varenv['cursor'])
234 |
235 | return response.response
236 |
237 | except Exception, e:
238 | LOG.exception('python.exception')
239 | return json_traceback(response=response, exception=e)
240 |
--------------------------------------------------------------------------------
/util/pattern.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | import re, zlib
17 | from urlparse import urlparse
18 |
19 | RE_KEY = re.compile('\$([0-9A-F][0-9A-F][0-9A-F][0-9A-F])')
20 | RE_VARS = re.compile('{([^}]+)}')
21 | RE_NS = re.compile('([^]]+)\[([^]]+)\]')
22 |
23 |
24 | class Pattern(object):
25 |
26 | def __init__(self, pattern, guid=None, key=None, error=None):
27 |
28 | self.pattern = pattern
29 | self.guid = guid
30 | self.vars = dict((var, None) for var in RE_VARS.findall(pattern))
31 | self.error = error
32 | if 'key' in self.vars:
33 | self.vars['key'] = self.decode_key(key)
34 |
35 | # returns a utf-8 encoded string of the pattern with variables
36 | # whose value is not None expanded.
37 | # if error is not None, the entire pattern is replaced with error
38 | # when a None variable value is encountered
39 | def __str__(self):
40 |
41 | string = self.pattern
42 | error = self.error
43 |
44 | if isinstance(string, unicode):
45 | for var, value in self.vars.iteritems():
46 | if value is not None:
47 | if isinstance(value, str):
48 | value = unicode(value, 'utf-8')
49 | elif not isinstance(value, unicode):
50 | value = unicode(value)
51 | string = string.replace(u'{%s}' %(var), value)
52 | elif error is not None:
53 | if isinstance(error, str):
54 | string = unicode(error, 'utf-8')
55 | elif not isinstance(error, unicode):
56 | string = unicode(error)
57 | else:
58 | string = error
59 | break
60 | string = string.encode('utf-8')
61 | else:
62 | for var, value in self.vars.iteritems():
63 | if value is not None:
64 | if isinstance(value, unicode):
65 | value = value.encode('utf-8')
66 | elif not isinstance(value, str):
67 | value = str(value)
68 | string = string.replace('{%s}' %(var), value)
69 | elif error is not None:
70 | if isinstance(error, unicode):
71 | string = error.encode('utf-8')
72 | elif not isinstance(error, str):
73 | string = str(error)
74 | else:
75 | string = error
76 | break
77 |
78 | return string
79 |
80 | # returns a unicode string of the pattern with variables
81 | # whose value is not None expanded.
82 | # if error is not None, the entire pattern is replaced with error
83 | # when a None variable value is encountered
84 | def __unicode__(self):
85 |
86 | string = self.pattern
87 | error = self.error
88 |
89 | if isinstance(string, unicode):
90 | for var, value in self.vars.iteritems():
91 | if value is not None:
92 | if isinstance(value, str):
93 | value = unicode(value, 'utf-8')
94 | elif not isinstance(value, unicode):
95 | value = unicode(value)
96 | string = string.replace(u'{%s}' %(var), value)
97 | elif error is not None:
98 | if isinstance(error, str):
99 | string = unicode(error, 'utf-8')
100 | elif not isinstance(error, unicode):
101 | string = unicode(error)
102 | else:
103 | string = error
104 | break
105 | else:
106 | for var, value in self.vars.iteritems():
107 | if value is not None:
108 | if isinstance(value, unicode):
109 | value = value.encode('utf-8')
110 | elif not isinstance(value, str):
111 | value = str(value)
112 | string = string.replace('{%s}' %(var), value)
113 | elif error is not None:
114 | if isinstance(error, unicode):
115 | string = error.encode('utf-8')
116 | elif not isinstance(error, str):
117 | string = str(error)
118 | else:
119 | string = error
120 | break
121 | string = unicode(string, 'utf-8')
122 |
123 | return string
124 |
125 | def decode_key(self, key):
126 |
127 | value = key
128 | if value is not None:
129 | value = RE_KEY.sub('\\u\\1', value)
130 | if value is not key:
131 | value = value.decode('unicode-escape').encode('utf-8')
132 |
133 | return value
134 |
135 | def _prop_name(self, prefix, var, prop):
136 |
137 | # use adler32 as it's shorter than hash on 64-bit and just as fast
138 | return "%s_%x:%s" %(prefix or "p", zlib.adler32(var) & 0xffffffff, prop)
139 |
140 | def mql_query(self, prefix=None):
141 |
142 | query = {}
143 | for var, value in self.vars.iteritems():
144 | if var != 'key' and value is None:
145 | _query = prev = query
146 | for prop in var.split('.'):
147 | nsprop = RE_NS.search(prop)
148 | if nsprop is not None:
149 | prop, ns = nsprop.groups()
150 | prop = self._prop_name(prefix, var, prop)
151 | _query[prop] = {
152 | "key": [{
153 | "limit": 1, "namespace": ns, "value": None
154 | }]
155 | }
156 | break
157 | else:
158 | prop = self._prop_name(prefix, var, prop)
159 | _query[prop] = [{"limit": 1}]
160 | prev = _query
161 | _query = _query[prop][0]
162 | else:
163 | # last prop is assumed to be prop: null compatible
164 | # so that name or literal queries require no hacks
165 | prev[prop] = None
166 |
167 | if query:
168 | query["guid"] = self.guid
169 |
170 | return query
171 |
172 | def set_key(self, key):
173 |
174 | if 'key' in self.vars:
175 | self.vars['key'] = self.decode_key(key)
176 |
177 | return self
178 |
179 | def set_mqlres(self, mqlres, prefix=None, clear=False):
180 |
181 | if clear:
182 | for var in self.vars.iterkeys():
183 | if var != 'key':
184 | self.vars[var] = None
185 |
186 | for var, value in self.vars.iteritems():
187 | if var != 'key' and value is None:
188 | value = mqlres
189 | for prop in var.split('.'):
190 | nsprop = RE_NS.search(prop)
191 | try:
192 | if nsprop is not None:
193 | prop, ns = nsprop.groups()
194 | prop = self._prop_name(prefix, var, prop)
195 | value = value[prop]['key'][0]['value']
196 | break
197 | else:
198 | prop = self._prop_name(prefix, var, prop)
199 | value = value[prop]
200 | if isinstance(value, list):
201 | value = value[0]
202 | except:
203 | value = None
204 | break
205 |
206 | self.vars[var] = value
207 |
208 | return self
209 |
210 | def set_uri(self, uri):
211 |
212 | vars = self.vars
213 | (vars['scheme'], vars['host'], vars['path'], x,
214 | vars['query'], vars['fragment']) = urlparse(uri)
215 |
216 | return self
217 |
--------------------------------------------------------------------------------
/mql/benchmark.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import os, sys, re
16 | if __name__ == "__main__":
17 | sys.path.append(os.path.abspath("../.."))
18 |
19 | from pymql.log import LOG
20 | from pymql import json
21 | import time
22 |
23 | try:
24 | import cProfile
25 | profiler = "cProfile"
26 | except ImportError, e:
27 | import hotshot
28 | profiler = "hotshot"
29 |
30 |
31 | def wildcard_mql_query():
32 | return [{"id": None, "*": None}]
33 |
34 |
35 | def get_all_domains_mql_query():
36 | return [{
37 | "id":
38 | None,
39 | "name":
40 | None,
41 | "type":
42 | "/type/domain",
43 | "key": {
44 | "value": None,
45 | "namespace": "/"
46 | },
47 | "/type/namespace/keys": [{
48 | "value": None,
49 | "type": None,
50 | "namespace": {
51 | "type":
52 | "/type/type",
53 | "name":
54 | None,
55 | "id":
56 | None,
57 | "domain":
58 | None,
59 | "/type/namespace/keys": [{
60 | "value": None,
61 | "type": None,
62 | "namespace": {
63 | "type": "/type/property",
64 | "unique": None,
65 | "id": None,
66 | "schema": None,
67 | "expected_type": None,
68 | "master_property": None,
69 | "name": None,
70 | "reverse_property": []
71 | }
72 | }]
73 | }
74 | }]
75 | }]
76 |
77 |
78 | def get_domain_mql_query():
79 | return {
80 | "id":
81 | "/type",
82 | "name":
83 | None,
84 | "type":
85 | "/type/domain",
86 | "/type/namespace/keys": [{
87 | "value": None,
88 | "type": None,
89 | "namespace": {
90 | "type":
91 | "/type/type",
92 | "name":
93 | None,
94 | "id":
95 | None,
96 | "domain":
97 | None,
98 | "/type/namespace/keys": [{
99 | "value": None,
100 | "type": None,
101 | "namespace": {
102 | "type": "/type/property",
103 | "unique": None,
104 | "id": None,
105 | "schema": None,
106 | "expected_type": None,
107 | "master_property": None,
108 | "name": None,
109 | "reverse_property": []
110 | }
111 | }]
112 | }
113 | }]
114 | }
115 |
116 |
117 | def get_type_mql_query():
118 | return {
119 | "type": [],
120 | "name":
121 | None,
122 | "id":
123 | "/type/object",
124 | "/type/type/domain":
125 | None,
126 | "/type/namespace/keys": [{
127 | "value": None,
128 | "type": None,
129 | "namespace": {
130 | "type": "/type/property",
131 | "unique": None,
132 | "id": None,
133 | "schema": None,
134 | "expected_type": None,
135 | "master_property": None,
136 | "name": None,
137 | "reverse_property": []
138 | }
139 | }]
140 | }
141 |
142 |
143 | def get_schema_query(guid):
144 | return {
145 | "@guid":
146 | guid,
147 | "is_instance_of": {
148 | "@id": "/type/type"
149 | },
150 | "uses_properties_from": {
151 | "@guid": None,
152 | ":optional": True
153 | },
154 | "has_default_property_name": {
155 | ":value": None,
156 | ":optional": True
157 | },
158 | "has_key": [{
159 | ":optional": True,
160 | "@guid": None,
161 | ":value": None,
162 | "has_schema": {
163 | "@guid": None,
164 | },
165 | "has_expected_concept_type": {
166 | ":optional": True,
167 | "@guid": None
168 | },
169 | "has_master_property": {
170 | ":optional": True,
171 | "@guid": None,
172 | "is_unique_property": {
173 | ":value": None,
174 | ":datatype": "boolean",
175 | ":optional": True
176 | }
177 | },
178 | "is_unique_property": {
179 | ":value": None,
180 | ":datatype": "boolean",
181 | ":optional": True
182 | },
183 | "is_instance_of": {
184 | "@id": "/type/property"
185 | }
186 | }]
187 | }
188 |
189 |
190 | def get_object_query():
191 | q = get_schema_query(None)
192 | q["@id"] = "/type/object"
193 | return q
194 |
195 |
196 | def get_domain_query():
197 | ns_query = {
198 | "@id": "/type",
199 | "is_instance_of": {
200 | "@id": "/type/domain"
201 | },
202 | "has_key": [get_schema_query(None)]
203 | }
204 | ns_query["has_key"][0][":value"] = None
205 | ns_query["has_key"][0]["has_domain"] = {"@id": "/type"}
206 | return ns_query
207 |
208 |
209 | def get_wildcard_query():
210 | return [{
211 | "@guid": None,
212 | "*": [{
213 | "@guid": None,
214 | ":guid": None,
215 | ":value": None,
216 | ":optional": True
217 | }]
218 | }]
219 |
220 |
221 | def test_run(ctx, varenv, options, query):
222 | graphq = ctx.gc
223 | ctx.gc.reset_cost()
224 |
225 | #ctx.gc.reopen()
226 | result = None
227 |
228 | start_time = time.time()
229 |
230 | for i in xrange(options.num):
231 | if options.flush:
232 | ctx.high_querier.schema_factory.flush("")
233 |
234 | if options.type == "graph":
235 | result = ctx.gc.read(
236 | query, transaction_id=varenv["tid"], policy=varenv["policy"])
237 | else:
238 | result = ctx.high_querier.read(query, varenv)
239 |
240 | stop_time = time.time()
241 |
242 | ctx.gc.totalcost["dt"] = stop_time - start_time
243 |
244 | return result
245 |
246 |
247 | def cmdline_main():
248 | LOG.warning("benchmark", "test start")
249 | start_time = time.time()
250 |
251 | from mql.mql import cmdline
252 | op = cmdline.OP("testing")
253 |
254 | op.add_option(
255 | "-n", dest="num", default=1000, type="int", help="number of iterations")
256 |
257 | op.add_option(
258 | "-P",
259 | dest="profile",
260 | default=None,
261 | help="run profiler with output to file")
262 |
263 | op.add_option("-c", dest="call", default=None, help="function to call")
264 |
265 | op.add_option(
266 | "-f", dest="query_file", default=None, help="file containing query")
267 |
268 | op.add_option(
269 | "--flush",
270 | dest="flush",
271 | default=None,
272 | help="flush cache between every request")
273 |
274 | op.add_option("-t", dest="type", default="mql", help="graph or MQL query")
275 |
276 | options, args = op.parse_args()
277 |
278 | stop_time = time.time()
279 | op.ctx.gc.totalcost["dt"] = stop_time - start_time
280 |
281 | LOG.warning("start cost", {
282 | "nreqs": op.ctx.gc.nrequests,
283 | "cost": op.ctx.gc.totalcost
284 | })
285 |
286 | options, args = op.parse_args()
287 |
288 | queryfile = options.query_file
289 | if queryfile is not None:
290 | qf = open(queryfile, "r")
291 | query = "".join(qf.readlines())
292 | regex = re.compile("[\n\t]+")
293 | query = regex.sub(" ", query)
294 | qf.close()
295 | elif options.call:
296 | query = globals()[options.call]()
297 | elif len(args) == 1:
298 | query = args[0]
299 | else:
300 | op.error("Must specify a query argument")
301 |
302 | if options.type == "mql":
303 | # XXX should eventually use unicode, for now utf8
304 | query = json.loads(query, encoding="utf-8", result_encoding="utf-8")
305 | elif options.type == "graph":
306 | pass
307 | else:
308 | op.error("-t must be 'mql' or 'graph'")
309 |
310 | if options.profile:
311 | if profiler == "hotshot":
312 | profile = hotshot.Profile(options.profile)
313 | profile.runcall(test_run, op.ctx, op.varenv, options, query)
314 | LOG.warning(
315 | "benchmark",
316 | "Saving hotshot profile in Stats format to %s" % options.profile)
317 |
318 | elif profiler == "cProfile":
319 | profile = cProfile.Profile()
320 | profile.runcall(test_run, op.ctx, op.varenv, options, query)
321 |
322 | LOG.warning(
323 | "benchmark",
324 | "Saving cProfile data in kcachegrind format to %s" % options.profile)
325 | # get from http://jcalderone.livejournal.com/21124.html
326 | # and put in thirdparty/pyroot
327 | from mql.mql import lsprofcalltree
328 | k = lsprofcalltree.KCacheGrind(profile)
329 | k.output(open(options.profile, "w"))
330 | else:
331 | LOG.warning("benchmark", "No profiler available, not running benchmark")
332 | else:
333 | reslist = test_run(op.ctx, op.varenv, options, query)
334 |
335 | LOG.warning("run cost", {
336 | "nreqs": op.ctx.gc.nrequests,
337 | "cost": op.ctx.gc.totalcost
338 | })
339 | #print repr(reslist[0])
340 | #pprint.pprint(reslist)
341 |
342 | #LOG.warning("benchmark", "test finish")
343 |
344 |
345 | if __name__ == "__main__":
346 | cmdline_main()
347 |
--------------------------------------------------------------------------------
/mql/grparse.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google LLC
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """quick and dirty parsing of graphd query language strings into python lists.
15 | """
16 | import re
17 | from grquoting import quote, unquote
18 |
19 | from error import MQLGraphError, MQLDatelineInvalidError, MQLTimeoutError,\
20 | MQLCursorInvalidError, GraphIsSnapshottingError
21 |
22 | from pymql.log import LOG
23 |
24 | gstr_escape = quote
25 |
26 | # there are several places in pymi where gstr_unescape is called on a string that
27 | # is not escaped. One example is the result of result=(datatype) - a bareword
28 | # like boolean is returned, not a quoted string.
29 | #
30 | # Rather than try and fix these cases, I've made gstr_unescape preserve
31 | # this behaviour. Please use mw.mql.grquoting.unquote() instead.
32 |
33 |
34 | def gstr_unescape(string):
35 | if string[0] == '"':
36 | return unquote(string)
37 | else:
38 | return string
39 |
40 |
41 | cost_parameters = [
42 | ('tr', 'time/real',
43 | 'number of milliseconds graphd spent executing to answer this query in '
44 | 'general. This number will get larger on a system that is busy with other'
45 | ' things, even if graphd isn\'t involved in them.'
46 | ),
47 | ('tu', 'time/user',
48 | 'number of milliseconds graphd spent executing in user mode while '
49 | 'computing the answer to this request.'
50 | ),
51 | ('ts', 'time/system',
52 | 'number of milliseconds graphd spent executing in system mode while '
53 | 'computing the answer to this requests. "Executing in system mode" almost'
54 | ' always means "reading a lot of data from disk".'
55 | ),
56 | ('pr', 'page reclaims',
57 | 'a benevolent form of page fault that doesn\'t actually do any work '
58 | 'because the page is still in the local cache.'
59 | ),
60 | ('pf', 'page faults',
61 | 'the thing we\'re trying to minimize. Higher pf will usually be '
62 | 'accompanied by a higher ts.'
63 | ),
64 | ('dw', 'primitive data writes',
65 | 'Usually, these will be what you expect, except for queries that create '
66 | 'implicit type links and type system fragments.'
67 | ),
68 | ('dr', 'primitive data reads',
69 | 'how many single primitive structs were read from disk (for example, as '
70 | 'part of dismissing them as candiates for a qualified search).'
71 | ),
72 | ('in', 'index size reads',
73 | 'how many indices were looked up with their starting address and size.'),
74 | ('ir', 'index element reads', 'get one member of one index.'),
75 | ('iw', 'index element write', 'add an element to an index.'),
76 | ('va', 'value allocation',
77 | 'allocate a (possibly temporary or transient) result data structure.'),
78 | ('te', 'time/overall',
79 | 'number of milliseconds from receipt of this query by the graph, to the '
80 | 'start of sending the response'
81 | ),
82 | ('tg', 'time/graph',
83 | 'time me observes from sending the first byte of the request to receiving'
84 | ' the last byte'
85 | ),
86 | ('tf', 'time/formatted',
87 | 'time me takes from sending the request to handing off the formatted '
88 | 'response'
89 | ), ('tm', 'time/mql', 'time taken inside the MQL subroutines'),
90 | ('cr', 'cache/read', 'number of requests sent to memcache'),
91 | ('cm', 'cache/miss', 'number of memcache misses'),
92 | ('ch', 'cache/hit', 'number of memcache hits'),
93 | ('lr', 'lojson-cache/read', 'number of schema requests sent to memcache'),
94 | ('lm', 'lojson-cache/miss', 'number of schema memcache misses'),
95 | ('lh', 'lojson-cache/hit', 'number of schema memcache hits'),
96 | ('rt', 'relevance/time',
97 | 'time taken inside the relevance server (as measured by ME)'),
98 | ('gcr', 'graph connect retries',
99 | 'the number of times that ME tried to open a connection to a graph'),
100 | ('gqr', 'graph query retries',
101 | 'the number of times that ME tried to service a query from a single graph')
102 | ]
103 |
104 | costcode_dict = dict([(cc[0], (cc[1], cc[2])) for cc in cost_parameters])
105 |
106 | costitem_re = re.compile(r'([a-zA-Z]+)=(\d+)\s*')
107 |
108 |
109 | def coststr_to_dict(coststr):
110 | if not coststr:
111 | return None
112 | matches = costitem_re.findall(coststr)
113 | return dict([(k, int(v)) for k, v in matches])
114 |
115 |
116 | graphresult_re = re.compile(
117 | r'(\(|\)| |\-\>|\<\-|[a-z]+\=|[\-\:\._A-Za-z0-9]+|\"(?:[^\"\\]|\\[\\\"n])*\")'
118 | )
119 |
120 |
121 | class GraphResult(list):
122 | pass
123 |
124 |
125 | class ReplyParser:
126 | """
127 | parses a graphd reply char by char.
128 | paren lists are broken up into python lists
129 | all list elements are returned as strings
130 | """
131 |
132 | def __init__(self):
133 | self.inbuf = []
134 | self.replyqueue = []
135 |
136 | self.reset_parser()
137 |
138 | def reset_parser(self):
139 | # parser state
140 |
141 | self.instring = 0 # true if we have read an open " but no close
142 | self.escaped = 0 # true if we just read a backslash
143 | # if instring is 1, curstr is a list of characters that
144 | # will be joined to make the string
145 | self.curstr = []
146 | self.curreply = [] # list of strings - join when ready to
147 | # use (faster than string concat)
148 |
149 | def parsestr(self, s):
150 | if '\n' in s:
151 | # parse all of the 'completed' lines, and if there is an
152 | # uncompleted line at the end of s, leave it in curreply
153 |
154 | reply_list = s.split('\n')
155 |
156 | self.curreply.append(reply_list.pop(0))
157 |
158 | for reply in reply_list:
159 |
160 | # parse the previous reply
161 | replystr = ''.join(self.curreply)
162 | self.parse_full_reply(replystr)
163 | self.reset_parser()
164 |
165 | # now add the current line
166 | self.curreply.append(reply)
167 |
168 | # note that we're not processing the last line, because it is incomplete
169 |
170 | else:
171 | self.curreply.append(s)
172 |
173 | def parse_full_reply(self, replystr):
174 | """
175 | parse the given reply string from the graph into a bunch of
176 | nested lists of tokens. Results are in the form:
177 | [ 'ok', 'id=', '"me;..."', [[['010000..', '01...', ...]]]]
178 | """
179 | LOG.debug('graph.result', replystr)
180 | token_list = graphresult_re.findall(replystr)
181 |
182 | curlist = []
183 |
184 | stack = []
185 | push_state = stack.append
186 | pop_state = stack.pop
187 |
188 | for count, tok in enumerate(token_list):
189 | if tok == '(':
190 | push_state(curlist)
191 | curlist = []
192 | elif tok == ')':
193 | sublist = curlist
194 | curlist = pop_state()
195 | curlist.append(sublist)
196 | elif tok == '\n':
197 | raise MQLGraphError(
198 | None,
199 | 'Not allowed a newline in parse_full_reply',
200 | reply=replystr,
201 | tokens=token_list)
202 | elif tok == ' ' or tok == '':
203 | pass
204 | else:
205 | curlist.append(tok)
206 |
207 | LOG.debug('graph.result.parsed', 'Parsed %d tokens' % count)
208 | if len(stack) != 0:
209 | raise MQLGraphError(
210 | None,
211 | 'got linefeed in the middle of a reply?',
212 | reply=replystr,
213 | tokens=token_list,
214 | depth=len(stack))
215 |
216 | self.replyqueue.append(curlist)
217 |
218 | def get_reply_raw(self):
219 | return self.replyqueue.pop(0)
220 |
221 | def get_reply(self):
222 | l = self.get_reply_raw()
223 | result = GraphResult()
224 | result.status = l.pop(0)
225 | result.cost = None
226 | result.dateline = None
227 |
228 | if result.status == 'ok':
229 | result += l.pop()
230 | elif result.status == 'error':
231 | result.errcode = l.pop(0)
232 | result.errmsg = unquote(l.pop())
233 | else:
234 | raise MQLGraphError(
235 | None, 'grparse: unknown graphd reply type', header=l[0], reply=l)
236 |
237 | # what's left is info messages from graphd
238 | li = 0
239 | while li < len(l):
240 | rv = l[li]
241 | if type(rv) == str and rv in ('cost=', 'dateline=', 'id='):
242 | modifier = rv[:-1]
243 | setattr(result, modifier, unquote(l[li + 1]))
244 | li += 2
245 | else:
246 | raise MQLGraphError(
247 | None,
248 | 'unknown response modifier from graphd',
249 | header=l[li],
250 | reply=l)
251 |
252 | if result.status == 'error' and result.errcode == 'BADCURSOR':
253 | raise MQLCursorInvalidError(None, result.errmsg)
254 | if result.status == 'error' and result.errcode == 'DATELINE':
255 | raise MQLDatelineInvalidError(None, result.errmsg)
256 | if result.status == 'error' and result.errcode == 'AGAIN':
257 | raise GraphIsSnapshottingError(None, result.errmsg)
258 | if result.status == 'error' and result.errcode == 'COST':
259 | raise MQLTimeoutError(None, 'Query too difficult.', cost=result.cost)
260 | if result.status == 'error' and result.errcode != 'EMPTY':
261 | raise MQLGraphError(
262 | None,
263 | 'error %(subclass)s: %(detail)s',
264 | detail=result.errmsg,
265 | subclass=result.errcode,
266 | dateline=result.dateline)
267 | return result
268 |
269 | def put_buf(self, buf):
270 | self.inbuf.append(buf)
271 |
272 | def isready(self):
273 | return len(self.replyqueue) > 0
274 |
275 |
276 | # this is different from a normal list printer because it
277 | # assumes that any sublists will come at the end.
278 | # of course that's wrong. hmmph.
279 | def print_result(l, indent=''):
280 | if l is None:
281 | print indent + 'None'
282 | return
283 | #print type(l)
284 | if isinstance(l, list):
285 | dangle = 0
286 | for li in l:
287 | if isinstance(li, list):
288 | if dangle:
289 | print
290 | dangle = 0
291 | print_result(li, indent + ' ')
292 | else:
293 | if not dangle:
294 | print indent,
295 | dangle = 1
296 | print str(li),
297 | if dangle:
298 | print
299 |
--------------------------------------------------------------------------------