├── bazel
    ├── BUILD
    └── six.BUILD
├── .gitignore
├── api
    ├── .gitignore
    ├── __init__.py
    ├── hicache.py
    └── op.py
├── util
    ├── .gitignore
    ├── __init__.py
    ├── misc.py
    ├── http.py
    ├── unionfind.py
    ├── attrib.py
    ├── dumper.py
    ├── keyquote.py
    ├── mwdatetime.py
    ├── parsedt.py
    └── pattern.py
├── formats
    ├── .gitignore
    ├── __init__.py
    ├── uniqstr.py
    ├── http.py
    ├── image.py
    └── contenttype.py
├── emql
    ├── .gitignore
    ├── adapters
    │   ├── test
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── test_nytimes.py
    │   │   ├── test_twitter.py
    │   │   └── test_weblink.py
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── lib.py
    │   ├── metacritic.py
    │   ├── stats.py
    │   ├── twitter.py
    │   ├── quote.py
    │   ├── text.py
    │   ├── nytimes.py
    │   └── search.py
    ├── __init__.py
    ├── docs
    │   └── documentation.css
    └── apikeys.py
├── mql
    ├── .gitignore
    ├── graph
    │   ├── __init__.py
    │   └── conn_mock.py
    ├── __init__.py
    ├── grquoting.py
    ├── mid.py
    ├── pathexpr.py
    ├── benchmark.py
    └── grparse.py
├── OWNERS
├── bootstrap
    ├── BUILD
    └── bootstrap.py
├── test
    ├── config.cfg
    ├── __init__.py
    ├── query_sort_test.py
    ├── regression_id_test.py
    ├── mql_exceptions_test.py
    ├── best_hrid_test.py
    ├── mql_fixture_test.py
    ├── BUILD
    ├── cost_test.py
    ├── return_test.py
    └── regression_misc_test.py
├── tid.py
├── log
    ├── __init__.py
    ├── log_util.py
    └── log.py
├── WORKSPACE
├── pymql_import_test.py
├── CONTRIBUTING.md
├── BUILD
├── mqlbin.py
├── README.md
└── error.py


/bazel/BUILD:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bazel-*
2 | 


--------------------------------------------------------------------------------
/api/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 | 


--------------------------------------------------------------------------------
/util/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 | 


--------------------------------------------------------------------------------
/formats/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 | 


--------------------------------------------------------------------------------
/emql/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 | /*.pyo
3 | 


--------------------------------------------------------------------------------
/emql/adapters/test/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 | 


--------------------------------------------------------------------------------
/emql/adapters/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 | /*.pyo
3 | 


--------------------------------------------------------------------------------
/mql/.gitignore:
--------------------------------------------------------------------------------
1 | /*.pyc
2 | /*.out
3 | /*.tmp
4 | /*.err
5 | 


--------------------------------------------------------------------------------
/OWNERS:
--------------------------------------------------------------------------------
1 | rtp
2 | warrenharris
3 | file://depot/google3/metaweb/freebase/OWNERS
4 | 


--------------------------------------------------------------------------------
/bootstrap/BUILD:
--------------------------------------------------------------------------------
 1 | 
 2 | py_binary(
 3 |     name = "bootstrap",
 4 |     srcs = ["bootstrap.py"],
 5 |     python_version = "PY2",
 6 |     deps = [
 7 |         "//:mql",
 8 |     ],
 9 | )
10 | 


--------------------------------------------------------------------------------
/test/config.cfg:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2.4
2 | #
3 | # Author: bneutra@google.com (Brendan Neutra)
4 | # flags to run mql tests
5 | --graphd_addr=blade:freebase-graphd-sandbox
6 | # replay|record|nomock
7 | # NOTE: with the introduction of randomized hashing of dicts in 2.7 mocking no longer functions
8 | --mockmode=nomock
9 | 


--------------------------------------------------------------------------------
/bazel/six.BUILD:
--------------------------------------------------------------------------------
 1 | # Description:
 2 | #   Six provides simple utilities for wrapping over differences between Python 2
 3 | #   and Python 3.
 4 | 
 5 | licenses(["notice"])  # MIT
 6 | 
 7 | exports_files(["LICENSE"])
 8 | 
 9 | py_library(
10 |     name = "six",
11 |     srcs = ["six.py"],
12 |     visibility = ["//visibility:public"],
13 | )


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/formats/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/emql/adapters/test/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/emql/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # emql package
16 | 


--------------------------------------------------------------------------------
/tid.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from pymql.log import generate_tid
17 | generate_transaction_id = generate_tid
18 | 


--------------------------------------------------------------------------------
/util/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import re
17 | 
18 | # wsplit
19 | wsplit_re = re.compile('\s+')
20 | def wsplit(s):
21 |     return wsplit_re.split(s.strip())
22 | 
23 | 


--------------------------------------------------------------------------------
/emql/adapters/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # adapters package
16 | 
17 | # do not add any import here since if it were to fail, all python adapters
18 | # would fail to load
19 | 
20 | 


--------------------------------------------------------------------------------
/log/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2.6
 2 | # Copyright 2020 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Backward compatible support for mql LOG calls"""
17 | 
18 | __author__ = 'bneutra@google.com (Brendan Neutra)'
19 | 
20 | from log import *
21 | 


--------------------------------------------------------------------------------
/WORKSPACE:
--------------------------------------------------------------------------------
 1 | workspace(name = "pymql")
 2 | 
 3 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 4 | 
 5 | # Abseil-py
 6 | http_archive(
 7 |     name = "absl_py",
 8 |     sha256 = "fe3948746ca0543f01fb7767fb00bb739c7fe7e2514180c1575100b988b66542",
 9 |     strip_prefix = "abseil-py-master",
10 |     urls = ["https://github.com/abseil/abseil-py/archive/master.zip"],
11 | )
12 | 
13 | http_archive(
14 |     name = "six_archive",
15 |     build_file = "@//bazel:six.BUILD",
16 |     sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a",
17 |     strip_prefix = "six-1.10.0",
18 |     urls = [
19 |         "http://mirror.bazel.build/pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
20 |         "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
21 |     ],
22 | )
23 | 


--------------------------------------------------------------------------------
/mql/graph/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2.6
 2 | # Copyright 2020 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | #
17 | 
18 | __author__ = 'nicholasv@google.com (Nicholas Veeser)'
19 | 
20 | __all__ = ['TcpGraphConnector', 'MockRecordConnector', 'MockReplayConnector']
21 | 
22 | from conn_tcp import TcpGraphConnector
23 | from conn_mock import MockRecordConnector
24 | from conn_mock import MockReplayConnector
25 | 


--------------------------------------------------------------------------------
/log/log_util.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2.6
 2 | # Copyright 2020 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Backward compatible support for mql LOG calls, Levels."""
16 | 
17 | __author__ = 'bneutra@google.com (Brendan Neutra)'
18 | 
19 | from absl import logging
20 | 
21 | FATAL = logging.FATAL
22 | ERROR = logging.ERROR
23 | CRIT = ALERT = ERROR
24 | WARN = logging.WARN
25 | WARNING = WARN
26 | INFO = logging.INFO
27 | NOTICE = INFO
28 | DEBUG = logging.DEBUG
29 | SPEW = 2  # e.g. mql.utils.dumplog: for things that are expensive and verbose
30 | 


--------------------------------------------------------------------------------
/pymql_import_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2.4
 2 | # Copyright 2020 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Import unittest for pymql"""
17 | 
18 | __author__ = 'rtp@google.com (Tyler Pirtle)'
19 | 
20 | import google3
21 | from google3.testing.pybase import googletest
22 | 
23 | 
24 | class PymqlImportTest(googletest.TestCase):
25 | 
26 |   def canImport(self):
27 |     import pymql
28 | 
29 |   def canInit(self):
30 |     import pymql
31 |     mql = pymql.MQLService(graphd_addrs=['localhost:8100'])
32 | 
33 |   def emqlCanImport(self):
34 |     import pymql.emql.emql
35 | 
36 | 
37 | if __name__ == '__main__':
38 |   googletest.main()
39 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement (CLA). You (or your employer) retain the copyright to your
10 | contribution; this simply gives us permission to use and redistribute your
11 | contributions as part of the project. Head over to
12 | <https://cla.developers.google.com/> to see your current agreements on file or
13 | to sign a new one.
14 | 
15 | You generally only need to submit a CLA once, so if you've already submitted one
16 | (even if it was for a different project), you probably don't need to do it
17 | again.
18 | 
19 | ## Code reviews
20 | 
21 | All submissions, including submissions by project members, require review. We
22 | use GitHub pull requests for this purpose. Consult
23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
24 | information on using pull requests.
25 | 
26 | ## Community Guidelines
27 | 
28 | This project follows
29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).


--------------------------------------------------------------------------------
/emql/docs/documentation.css:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2020 Google LLC
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *      http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | @import url(html4css1.css);
18 | 
19 | html {
20 |     color: black;
21 |     background-color: white;
22 | }
23 | 
24 | body {
25 |     margin-left: 10ex;
26 |     margin-top: 5ex;
27 |     padding-left: 1ex;
28 |     border-left: 1px solid #006;
29 |     width: 75ex;
30 |     background-color: white;
31 | }
32 | 
33 | h1 {
34 |     border-bottom: 2px solid #006;
35 | }
36 | 
37 | dt {
38 |     font-weight: bold;
39 | }
40 | 
41 | h1, h2, h3, h4, h5, h6 {
42 |     font-family: Helvetica, Arial, sans-serif;
43 |     padding: 4px;
44 |     font-size: 100%;
45 | }
46 | 
47 | h1.title {
48 |     font-size: 120%;
49 |     background-color: orange;
50 | }
51 | 


--------------------------------------------------------------------------------
/BUILD:
--------------------------------------------------------------------------------
 1 | # Author: rtp@google.com (Tyler Pirtle)
 2 | #
 3 | # Description:
 4 | #  mql - implementation(s) of the Metaweb Query Language
 5 | 
 6 | package(default_visibility = ["//visibility:public"])
 7 | 
 8 | py_library(
 9 |     name = "mql",
10 |     srcs = [
11 |         "__init__.py",
12 |         "tid.py",
13 |         "error.py",
14 |         "api/__init__.py",
15 |         "api/envelope.py",
16 |         "formats/__init__.py",
17 |         "formats/http.py",
18 |         "util/__init__.py",
19 |         "util/dumper.py",
20 |         "util/keyquote.py",
21 |         "util/mwdatetime.py",
22 |     ] + glob([
23 |         "log/*.py",
24 |         "mql/*.py",
25 |         "mql/graph/*.py",
26 |     ]),
27 |     deps = [
28 |         "@absl_py//absl:app",
29 |         "@absl_py//absl/flags",
30 |         "@absl_py//absl/logging",
31 |     ],
32 | )
33 | 
34 | #py_test(
35 | #    name = "pymql_import_test",
36 | #    size = "small",
37 | #    srcs = ["pymql_import_test.py"],
38 | #    deps = [
39 | #        ":mql",
40 | #        "//pyglib",
41 | #        "//testing/pybase",
42 | #    ],
43 | #)
44 | 
45 | py_binary(
46 |     name = "mqlbin",
47 |     srcs = ["mqlbin.py"],
48 |     python_version = "PY2",
49 |     deps = [
50 |         ":mql",
51 |     ],
52 | )
53 | 
54 | test_suite(
55 |     name = "AllTests",
56 |     tests = [
57 |         "//third_party/py/pymql/test:AllTests",
58 |     ],
59 | )
60 | 


--------------------------------------------------------------------------------
/emql/adapters/lib.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import mw
16 | 
17 | def bdb_lookup(me,guid,bdb):
18 |     guid = guid.replace('#','/guid/')
19 |     path = mw.blob.blobclient.BLOBClient.get_static_relative_url(bdb, guid)
20 |     hostname,port=me.mss.ctx.clobd_read_addrs[0]
21 |     hostname=hostname + ':' + str(port)
22 |     url, connection = me.get_session().http_connect(hostname, path)
23 |     connection.request('GET', url)
24 |     response = connection.getresponse()
25 |     result   = response.read()
26 |     #TODO: how to do debugging? LOG if debug?
27 |     #print "metacritic_adapter: result: %s" % result
28 |     if response.status==200:
29 |         return mw.json.loads(result)
30 |     elif response.status==404:
31 |         return None
32 |     else:
33 |         #TODO: Log unexpected status from BDB
34 |         return None
35 | 


--------------------------------------------------------------------------------
/api/__init__.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # Copyright 2020 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | from service import Session, ServiceContext                     
18 | from content import Content, ContentWrapper, NEW_DOCUMENT
19 | from envelope import MQLEnvelope
20 | from mw.mql.error import (MQLError, MQLParseError, MQLInternalError, 
21 |                           MQLTypeError, MQLResultError, MQLInternalParseError, 
22 |                           NamespaceException)
23 | 
24 | Session               # PYFLAKES
25 | ServiceContext        # PYFLAKES
26 | Content               # PYFLAKES
27 | ContentWrapper        # PYFLAKES
28 | NEW_DOCUMENT          # PYFLAKES
29 | MQLEnvelope           # PYFLAKES
30 | MQLError              # PYFLAKES
31 | MQLParseError         # PYFLAKES
32 | MQLInternalError      # PYFLAKES
33 | MQLTypeError          # PYFLAKES
34 | MQLResultError        # PYFLAKES
35 | MQLInternalParseError # PYFLAKES 
36 | NamespaceException    # PYFLAKES
37 | 


--------------------------------------------------------------------------------
/util/http.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import re
16 | import urllib2
17 | 
18 | ip_address = re.compile(r"^\d+\.\d+\.\d+.\d+$").match
19 | 
20 | def parse_domain_from_host(host):
21 |     host = host.split(':')[0]
22 | 
23 |     if not ip_address(host):
24 |         # the domain is the last one or two dot-separated words
25 |         domain = '.'.join(host.rsplit(".", 2)[-2:])
26 |     else:
27 |         domain = host
28 |     
29 |     return domain
30 | 
31 | def get_http_proxy_opener(mss):
32 |     """
33 |     Lazily retrieve proxy info
34 |     """
35 |     config = mss.full_config
36 | 
37 |     proxy_addr = config.get('me.external_proxy', '').strip()
38 |     if not proxy_addr:
39 |         return urllib2.urlopen
40 |     else:
41 |         proxy_handler = urllib2.ProxyHandler({'http': proxy_addr})
42 |         return urllib2.build_opener(proxy_handler).open
43 | 
44 | def proxied_urlopen(request, mss):
45 |     opener = get_http_proxy_opener(mss)
46 |     return opener(request)
47 | 


--------------------------------------------------------------------------------
/mql/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """This is the beginning of a public API for doing MQL/LoJSON reads and writes.
15 | 
16 | To use (using mql_read as an example)
17 | 
18 | from mw.mql import mql_read, MiniContext
19 | 
20 | query = {
21 |   "query":[{
22 |     "id":"/common/topic",
23 |     "type":"/type/type",
24 |     "properties":[{}]
25 |   }]
26 | }
27 | 
28 | ctx = MiniContext(("localhost", 1234))
29 | result = mql_read(ctx, query)
30 | 
31 | """
32 | 
33 | #from pathexpr import wrap_query
34 | #from mw.log import LOG
35 | #
36 | #__all__ = ['mql_read', 'mql_write', 'MiniContext']
37 | #
38 | #def mql_read(ctx, query, varenv=None, transaction_id=None):
39 | #    LOG.error("deprecated", "mw.mql.mql_read()")
40 | #    return wrap_query(ctx.high_querier.read, query, varenv, transaction_id)
41 | #
42 | #def mql_write(ctx, query, varenv=None, transaction_id=None):
43 | #    LOG.error("deprecated", "mw.mql.mql_write()")
44 | #    assert not ctx.gc.readonly, "Context must be created with readonly=False"
45 | #    return wrap_query(ctx.high_querier.write, query, varenv, transaction_id)
46 | 


--------------------------------------------------------------------------------
/mqlbin.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """A simple wrapper to demonstrate basic mql reads and writes."""
15 | 
16 | __author__ = "bneutra@google.com (Brendan Neutra)"
17 | 
18 | import json
19 | 
20 | from absl import app
21 | from absl import flags
22 | from collections import OrderedDict
23 | from pymql import MQLService
24 | from pymql.mql.graph import TcpGraphConnector
25 | 
26 | FLAGS = flags.FLAGS
27 | flags.DEFINE_string(
28 |     "mqlenv", None, "a dict in the form of a string which "
29 |     "contains valid mql env key/val pairs")
30 | 
31 | flags.DEFINE_string("mqlcmd", None, "'read' or 'write'")
32 | flags.DEFINE_string("graphd_addr", "localhost:9100",
33 |                     "host:port of graphd server")
34 | 
35 | 
36 | def main(argv):
37 |   if not FLAGS.graphd_addr:
38 |     raise Exception("Must specify a --graphd_addr")
39 | 
40 |   conn = TcpGraphConnector(addrs=[("localhost", 8100)])
41 |   mql = MQLService(connector=conn)
42 | 
43 |   q = json.loads(argv[1], object_pairs_hook=OrderedDict)
44 |   env = {}
45 |   if FLAGS.mqlenv:
46 |     env = json.loads(FLAGS.mqlenv)
47 | 
48 |   if FLAGS.mqlcmd == "read":
49 |     print mql.read(q, **env)
50 |   elif FLAGS.mqlcmd == "write":
51 |     print mql.write(q, **env)
52 |   else:
53 |     print "you must provie a --mqlcmd, either 'read' or 'write'"
54 | 
55 | 
56 | if __name__ == "__main__":
57 |   app.run(main)
58 | 


--------------------------------------------------------------------------------
/util/unionfind.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | #
16 | #
17 | #   flexible union-find operations
18 | #
19 | #   you can use a particular attribute of the objects you're working
20 | #     with as the union chain attribute.
21 | #    
22 | #
23 | #   NOT WELL-TESTED  
24 | #
25 | #
26 | 
27 | #
28 | # union-find: merge two nodes
29 | #  the first argument is favored as the new common root
30 | #
31 | def union(node1, node2, chainattr):
32 |     c1 = find(node1, chainattr)
33 |     c2 = find(node2, chainattr)
34 |     if c1 == c2: return
35 |     setattr(c2, chainattr, c1)
36 | 
37 | #
38 | # union-find: find the definitive member of a set,
39 | #  collapsing lookup chains along the way
40 | #
41 | def find(node, chain_attr=None, chain_get=None, chain_put=None):
42 |     if chain_get is None:
43 |         chain_get = lambda p: getattr(p, chain_attr)
44 |     if chain_put is None:
45 |         chain_put = lambda p,v: setattr(p, chain_attr, v)
46 | 
47 |     #
48 |     # find the root for this union
49 |     #
50 |     root = None
51 |     c = node
52 |     while 1:
53 |         cc = chain_get(c)
54 |         if c == cc:
55 |             root = c
56 |             break
57 |         c = cc
58 |     #print chainattr, node.id, root.id
59 | 
60 |     #
61 |     # collapse the chain from us to the root
62 |     #
63 |     c = node
64 |     while 1:
65 |         cc = chain_get(c)
66 |         if cc == root:
67 |             break
68 |         chain_put(c, root)
69 |         c = cc
70 | 
71 |     return root
72 | 
73 | 


--------------------------------------------------------------------------------
/emql/adapters/test/test_nytimes.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from mw.tests.helpers import TestFixture
16 | from mw.emql import emql
17 | 
18 | class TestNytimes_adapter(TestFixture):
19 |     
20 |     def setUp(self):
21 |         super(TestNytimes_adapter, self).setUp()
22 |         self.cache = emql.emql_cache()
23 | 
24 | 
25 |     def run_query(self, q):
26 |         api_key = self.mss.ctx.config['extensions.nytimes_articles']
27 |         debug, cursors, results = self.mss.emqlread(None, q, {'debug': True, 'cache': False},
28 |                                                     api_keys={'nytimes_articles': api_key}, 
29 |                                                     cache=self.cache)
30 |         return results
31 | 
32 |     def test_stephen_colbert(self):
33 |         r = self.run_query({"id": "/en/stephen_colbert",
34 |                             "/base/topics/news/nytimes": [{'limit': 4}]})
35 |         assert r["/base/topics/news/nytimes"]
36 |         self.assertEqual(len(r["/base/topics/news/nytimes"]), 4)
37 | 
38 |     def test_us_presidents(self):
39 |         results = self.run_query([{"id": None,
40 |                                    "/base/topics/news/nytimes": [{"limit": 1}],
41 |                                    "limit": 3,
42 |                                    "/people/person/date_of_birth": None,
43 |                                    "sort": "-/people/person/date_of_birth",
44 |                                    "type": "/government/us_president"}])
45 |         for r in results:
46 |             assert r["/base/topics/news/nytimes"]
47 |             self.assertEqual(len(r["/base/topics/news/nytimes"]), 1)
48 |         
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/util/attrib.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from mw.mql import scope
16 | from mw.log import LOG
17 | 
18 | def set_oauth_attribution_if_needed(mss):
19 |     if not mss.authorized_app_id:
20 |         return
21 |     
22 |     user_id = mss.get_user_id()
23 |     
24 |     query = [{
25 |         "id": None,
26 |         "creator": user_id,
27 |         "type": "/freebase/written_by",
28 |         "/freebase/written_by/application": {"id": mss.authorized_app_id}
29 |     }]
30 | 
31 |     result = mss.mqlread(query, cache=False)
32 |     if result:
33 |         if len(result) > 1:
34 |             # somehow we manage to get multiple attributions - fail gracefully and log an error
35 |             LOG.warn("set_oauth_attribution_if_needed.duplicate",
36 |                      "duplicate attributions for %s and %s" % (mss.authorized_app_id, user_id),
37 |                      application_id=mss.authorized_app_id,
38 |                      user_id=user_id,
39 |                      attributions=result)
40 |         result = result[0]
41 |     else:
42 |         query = {
43 |              "create": "unconditional",
44 |              "id": None,
45 |              "/freebase/written_by/application": {
46 |                 "connect": "insert",
47 |                 "id": mss.authorized_app_id
48 |              },
49 |              "type": ["/freebase/written_by", "/type/attribution"]
50 |         }
51 |     
52 |         with mss.push_variables(permission="/boot/oauth_permission",
53 |                                 privileged=scope.Privileged,
54 |                                 authority=None):
55 |             result = mss.mqlwrite(query)
56 |     mss.push_variables(attribution=result['id'] if result else None)
57 | 


--------------------------------------------------------------------------------
/test/query_sort_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | """Query sorting unittest for pymql."""
17 | 
18 | __author__ = 'bneutra@google.com (Brendan Neutra)'
19 | 
20 | import collections
21 | import json
22 | 
23 | import google3
24 | import pymql
25 | 
26 | from google3.testing.pybase import googletest
27 | 
28 | testdictpart = collections.OrderedDict({
29 |     'propd': None,
30 |     'propc': 'foo',
31 |     'propb': [],
32 |     'prope': {},
33 |     'propf': 1.1,
34 |     11: False
35 | })
36 | 
37 | testdict = testdictpart.copy()
38 | 
39 | testdict['propa'] = testdictpart.copy()
40 | testdict['propg'] = [testdictpart.copy(), testdictpart.copy()]
41 | testdict['propg'][1]['propa'] = testdictpart.copy()
42 | 
43 | 
44 | def IsSorted(part):
45 |   """Check that all keys are sorted."""
46 |   if isinstance(part, list):
47 |     for p in part:
48 |       if IsSorted(p) is False:
49 |         return False
50 |   elif isinstance(part, dict):
51 |     if sorted(part.keys()) != part.keys():
52 |       return False
53 |     for k, v in part.iteritems():
54 |       if IsSorted(v) is False:
55 |         return False
56 | 
57 |   return True
58 | 
59 | 
60 | class PymqlSortTest(googletest.TestCase):
61 | 
62 |   def testSorting(self):
63 |     """basic sorting test."""
64 |     sorted_dict = pymql.sort_query_keys(testdict)
65 |     self.assertTrue(IsSorted(sorted_dict))
66 |     self.assertFalse(IsSorted(testdict))
67 | 
68 |     # the dict should not change in meaning
69 |     # need to convert to dict first.
70 |     converted_dict = json.loads(json.dumps(testdict))
71 |     converted_sorted_dict = json.loads(json.dumps(sorted_dict))
72 |     # nice helper function that's order independent
73 |     self.assertDictEqual(converted_sorted_dict, converted_dict)
74 | 
75 | 
76 | if __name__ == '__main__':
77 |   googletest.main()
78 | 


--------------------------------------------------------------------------------
/emql/adapters/metacritic.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import mw
16 | from lib import bdb_lookup
17 | 
18 | #TODO: python docs
19 | #TODO: log exceptions?
20 | 
21 | class metacritic_adapter(mw.emql.adapter.Adapter):
22 |     
23 |     SECRET='random_rodent'
24 |     
25 |     def make_result(self,key,scores):
26 |         return {
27 |                 'key'       : key,
28 |                 'url'       : 'http://www.metacritic.com/video/titles/%s' % key,
29 |                 'score'     : scores['metascore'],
30 |                 'userscore' : scores['userscore'],
31 |                 'attribution_html' : '<span>TODO</span>'
32 |         }
33 | 
34 |     def check_secret(self,params,guid,result):
35 |         if params.get('query') and params.get('query').get('secret') == self.SECRET:
36 |             return True
37 |         else:
38 |             result[guid] = { 'error':'Invalid auth' }
39 |             return False
40 |     
41 |     def get_key(self, me, guid):
42 |         result = bdb_lookup(me,guid,'source-metacritic-movie')
43 |         if result:
44 |             return result[0]
45 |         else:
46 |             return None
47 |     
48 |     def get_scores(self,me,guid):
49 |         result = bdb_lookup(me,guid,'metacritic-scores')
50 |         return result
51 |     
52 |     def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
53 |         result = {}
54 |         for mqlres in args:
55 |             guid = mqlres['guid']
56 |             if not self.check_secret(params,guid,result):
57 |                 continue
58 |             key  = self.get_key(me,guid)
59 |             if not key:
60 |                 continue
61 |             scores = self.get_scores(me,guid)
62 |             if not scores:
63 |                 #TODO: log
64 |                 continue
65 |             result[guid]=self.make_result(key,scores)
66 |         return result
67 |     
68 | 


--------------------------------------------------------------------------------
/test/regression_id_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2.4
 2 | # Copyright 2020 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # -*- coding: utf-8 -*-
17 | #
18 | """test regressions around id resolution."""
19 | 
20 | __author__ = 'bneutra@google.com (Brendan Neutra)'
21 | 
22 | import google3
23 | from pymql.mql import error
24 | from pymql.test import mql_fixture
25 | 
26 | 
27 | class MQLTest(mql_fixture.MQLTest):
28 | 
29 |   def setUp(self):
30 |     self.SetMockPath('data/regression_id.yaml')
31 |     super(MQLTest, self).setUp()
32 |     self.env = {'as_of_time': '2009-10-01'}
33 | 
34 |   def testDeepId(self):
35 |     # buganizer: 4363162
36 |     query = """
37 | {"id":
38 | "/en/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a"}
39 | """
40 |     self.DoQuery(query, exp_response='null')
41 | 
42 |   def testTooDeepId(self):
43 |     # buganizer: 4363162
44 |     # id path limit is 200 deep
45 |     query = """
46 | {"id":
47 | "/en/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a"}
48 | """
49 |     exc_response = (
50 |         error.MQLParseError,
51 |         'Id has too many segments. Maximum is 200'
52 |     )
53 | 
54 |     self.DoQuery(query, exc_response=exc_response)
55 | 
56 | if __name__ == '__main__':
57 |   mql_fixture.main()
58 | 


--------------------------------------------------------------------------------
/util/dumper.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pprint
16 | 
17 | def dump(object,depth=10, ctx=None):
18 |     if ctx is None:
19 |         ctx = {}
20 |         
21 |     # don't subclass these types. Please!
22 |     if isinstance(object,(basestring,str,bool,int,float,long)):
23 |         return object
24 |     elif object is None:
25 |         return object
26 | 
27 |     # subclasses of these types are interesting.
28 |     if (type(object) in [dict, list, tuple]) and len(object) == 0:
29 |         return object
30 | 
31 |     oid = id(object)
32 |     if oid in ctx:
33 |         return "!!REPEAT!!" + ctx[oid]
34 |     
35 |     typename = type(object).__name__
36 |     if typename == 'instance':
37 |         typename = object.__class__.__name__
38 |     ctx[oid] = '<' + typename + ' instance at ' + hex(oid) + '>'
39 | 
40 |     if typename in ctx:
41 |         return "!!SKIPPED!!" + ctx[oid]
42 | 
43 |     if depth < 0:
44 |         return "!!DEPTH!!" + ctx[oid]
45 |     
46 |     if isinstance(object, dict):
47 |         result = { '!!REPR!!' : ctx[oid] }
48 |         for k in object:
49 |             result[k] = dump(object[k],depth-1,ctx)
50 |             
51 |         return result
52 | 
53 |     elif isinstance(object,(list,tuple)):
54 |         result = [ ctx[oid] ]
55 |         for k in object:
56 |             result.append(dump(k,depth-1,ctx))
57 |             
58 |         return result
59 | 
60 |     result = { '!!REPR!!' : ctx[oid] }
61 |     try:
62 |         for key in object.__dict__:
63 |             if key not in ctx:
64 |                 result[key] = dump(object.__dict__[key],depth-1,ctx)
65 |     except:
66 |         pass
67 |     return result
68 |     
69 | def dumper(object,depth=10,ctx=None):
70 |     if ctx is None:
71 |         ctx = {}
72 |     pprint.pprint(dump(object,depth,ctx))
73 | 
74 | def dumps(object, **kws):
75 |     return pprint.pformat(dump(object, **kws))
76 | 
77 | 


--------------------------------------------------------------------------------
/emql/adapters/test/test_twitter.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from mw.tests.helpers import TestFixture
16 | from mw.emql import emql
17 | 
18 | class TestTwitter_adapter(TestFixture):
19 |     
20 |     def setUp(self):
21 |         super(TestTwitter_adapter, self).setUp()
22 |         self.cache = emql.emql_cache()
23 | 
24 |     def run_query(self, q):
25 |         debug, cursors, results = self.mss.emqlread(None, q, {'debug': True, 'cache': False},
26 |                                                     cache=self.cache)
27 |         return results
28 | 
29 |     def test_stephen_colbert(self):
30 |         r = self.run_query({"id": "/en/stephen_colbert",
31 |                             "/base/topics/news/twitter_from": [{'limit': 4}]})
32 |         assert r["/base/topics/news/twitter_from"]
33 |         self.assertEqual(len(r["/base/topics/news/twitter_from"]), 4)
34 |         for tweet in r["/base/topics/news/twitter_from"]:
35 |             self.failIf('raw' in tweet)
36 | 
37 |         r = self.run_query({"id": "/en/stephen_colbert",
38 |                             "/base/topics/news/twitter_from": [{'limit': 3,
39 |                                                                 'raw': True}]})
40 |         assert r["/base/topics/news/twitter_from"]
41 |         self.assertEqual(len(r["/base/topics/news/twitter_from"]), 3)
42 |         for tweet in r["/base/topics/news/twitter_from"]:
43 |             self.failUnless('raw' in tweet)
44 | 
45 | 
46 |     def test_george_washington(self):
47 |         r = self.run_query({"id": "/en/george_washington",
48 |                             "/base/topics/news/twitter_from": None})
49 |         assert not r["/base/topics/news/twitter_from"]
50 | 
51 | #
52 | #    def test_us_presidents(self):
53 | #        results = self.run_query([{"id": None,
54 | #                                   "/base/topics/news/nytimes": [{"limit": 1}],
55 | #                                   "limit": 3,
56 | #                                   "/people/person/date_of_birth": None,
57 | #                                   "sort": "-/people/person/date_of_birth",
58 | #                                   "type": "/government/us_president"}])
59 | #        for r in results:
60 | #            assert r["/base/topics/news/nytimes"]
61 | #            self.assertEqual(len(r["/base/topics/news/nytimes"]), 1)
62 | #        
63 | #
64 | #
65 | 


--------------------------------------------------------------------------------
/emql/adapters/stats.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import math
17 | from itertools import izip, chain
18 | 
19 | from mw.emql.adapter import Adapter, AdapterUserError
20 | from mw.emql.emql import id_guid, formatted_id_guid
21 | 
22 | 
23 | class stats_adapter(Adapter):
24 | 
25 |     def reduce(self, tid, graph, mql, me, control, mqlres, params, api_keys):
26 | 
27 |         constraints = params.get('constraints')
28 |         op = params.get('property')
29 |         params = params.get('query')
30 | 
31 |         args = None
32 |         if isinstance(params, dict):
33 |             args = params.get('value', '').split('.')
34 | 
35 |         if not args:
36 |             raise ValueError, "%s: missing 'value' argument" %(op)
37 | 
38 |         def get(res, prop):
39 |             if isinstance(res, dict):
40 |                 return res[prop]
41 |             else:
42 |                 value = res[0]
43 |                 if isinstance(value, dict):
44 |                     value = value[prop]
45 |                 return value
46 | 
47 |         values = []
48 |         for _mqlres in mqlres:
49 |             value = reduce(get, args, _mqlres)
50 |             if value is not None:
51 |                 values.append(value)
52 | 
53 |         if values:
54 |             if op.startswith('@'):
55 |                 op = op[1:]
56 | 
57 |             try:
58 |                 if op == 'average':
59 |                     return dict(value=float(sum(values)) / len(values))
60 | 
61 |                 if op == 'median':
62 |                     values.sort()
63 |                     return dict(value=values[len(values) / 2])
64 | 
65 |                 if op == 'min':
66 |                     return dict(value=min(values))
67 | 
68 |                 if op == 'max':
69 |                     return dict(value=max(values))
70 | 
71 |                 if op == 'total':
72 |                     return dict(value=sum(values))
73 | 
74 |                 if op == 'sigma':
75 |                     average = float(sum(values)) / len(values)
76 |                     squares = sum((value - average) * (value - average)
77 |                                   for value in values)
78 |                     return dict(value=math.sqrt(squares / len(values)))
79 | 
80 |             except TypeError, e:
81 |                 raise AdapterUserError('reduce', op, self.uri, str(e))
82 | 
83 |             raise NotImplementedError, op
84 |         
85 |         return dict(value=None)
86 | 
87 |     def help(self, tid, graph, mql, me, control, params):
88 |         from docs import stats_adapter_help
89 | 
90 |         return 'text/x-rst;', stats_adapter_help
91 | 


--------------------------------------------------------------------------------
/formats/uniqstr.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """
16 | 
17 | UniqueStr is a base class for implementing enums
18 | as strings.
19 | 
20 | see MediaType and TextEncoding subclasses for example
21 | subclasses.
22 | 
23 | """
24 | 
25 | class UniqueStr(str):
26 |     """
27 |     UniqueStr looks like an ascii str, but it has been normalized.
28 | 
29 |     It's a string that behaves like an enum.
30 | 
31 |     Subclass this for values like media-types, charsets, 
32 |     language names, locale, etc.
33 |     """
34 | 
35 |     # dictionary mapping names to known values.
36 |     # multiple names may match to the same unique str if it has aliases.
37 |     # this looks like a mapping from str to str but it's really a mapping
38 |     # from str to UniqueStr.
39 |     _known = dict()
40 | 
41 |     # if set, attempts to create new values will fail
42 |     _exclusive = False
43 | 
44 |     
45 |     def __new__(cls, s):
46 |         # make sure cls has its own _known and _exclusive -
47 |         #  i'm sure there is a better way to do this...
48 |         if '_known' not in cls.__dict__:
49 |             cls._known = {}
50 |             cls._exclusive = False
51 | 
52 |         s = cls.normalize(s)
53 |         mt = cls._known.get(s)
54 |         if not mt:
55 |             if cls._exclusive:
56 |                 raise ValueError, "Unknown unique string"
57 | 
58 |             mt = str.__new__(cls, s)
59 |             cls._known[s] = mt
60 |         return mt
61 | 
62 | 
63 |     @classmethod
64 |     def normalize(cls, s):
65 |         """
66 |         normalize a string before intern-ing it.
67 | 
68 |         this is useful when there are multiple values of a string
69 |         that are acceptable but you want to convert them to a
70 |         preferred format, e.g. using a particular capitalization
71 |         style for case-insensitive identifiers.
72 | 
73 |         this is also an opportunity to reject (with ValueError)
74 |         invalid values.
75 |         """
76 |         if not isinstance(s, str):
77 |             s = str(s)
78 |             #raise ValueError('%s must be a string' % cls.__name__)
79 |             
80 |         return s.strip()
81 | 
82 | 
83 |     def addalias(self, alias):
84 |         """
85 |         add an alias for this unique string.
86 | 
87 |         you can do more powerful things by overriding .normalize().
88 |         """
89 |         if alias in self._known:
90 |             if self is not self._known[alias]:
91 |                 raise ValueError, 'attempt to change UniqueStr alias'
92 |             # XXX should log a warning here, but it's safe to continue
93 |             return
94 |         self._known[alias] = self
95 | 


--------------------------------------------------------------------------------
/test/mql_exceptions_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # -*- coding: utf-8 -*-
 16 | #
 17 | """Making sure new exceptions are called properly."""
 18 | 
 19 | __author__ = 'bobbyrullo@google.com (Bobby Rullo)'
 20 | 
 21 | import google3
 22 | import json
 23 | from pymql.mql import error
 24 | from pymql.test import mql_fixture
 25 | 
 26 | class MQLExceptionTest(mql_fixture.MQLTest):
 27 | 
 28 |   def setUp(self):
 29 |     super(MQLExceptionTest, self).setUp()
 30 |     self.env = {'user': '/user/mw_brendan'}
 31 | 
 32 | 
 33 |   def getFuzzKey(self, test_id):
 34 |     fuzz = self.getFuzz(test_id)
 35 |     fuzzKey = 'key_{0}'.format(fuzz[:fuzz.find('.')])
 36 |     return fuzzKey
 37 | 
 38 |   def newNode(self):
 39 |     query = json.dumps({
 40 |       "id": None,
 41 |       "create": "unconditional",
 42 |       })
 43 | 
 44 |     self.DoQuery(query, mqlwrite=True)
 45 |     new_id = self.mql_result.result['id']
 46 |     return new_id
 47 | 
 48 |   def testMQLValueAlreadyInUseError(self):
 49 |     key = self.getFuzzKey('alreadyInUse')
 50 | 
 51 |     new_id = self.newNode()
 52 | 
 53 |     query = {
 54 |         "id": new_id,
 55 |         "key": {
 56 |         "namespace": "/user/mw_brendan/default_domain",
 57 |         "value": key,
 58 |         "connect": "insert"
 59 |         }
 60 |       }
 61 | 
 62 |     self.DoQuery(json.dumps(query), mqlwrite=True)
 63 | 
 64 |     new_id = self.newNode()
 65 | 
 66 |     query['id'] = new_id
 67 | 
 68 |     self.DoQuery(json.dumps(query), mqlwrite=True,
 69 |                  exc_response = (
 70 |                      error.MQLValueAlreadyInUseError,
 71 |                      'This value is already in use. Please delete it first.'
 72 |                      ))
 73 | 
 74 | 
 75 |   def testMQLTooManyValuesForUniqueQuery(self):
 76 |     query = {
 77 |         "type": None,
 78 |         "id": "/en/sofia_coppola",
 79 |         "name": None
 80 |     }
 81 | 
 82 |     exc_response = (
 83 |         error.MQLTooManyValuesForUniqueQuery,
 84 |         "Unique query may have at most one result. Got 25"
 85 |     )
 86 |     self.DoQuery(json.dumps(query), exc_response=exc_response)
 87 | 
 88 | 
 89 |   def testMQLTooManyWrites(self):
 90 |     query = """
 91 |     {
 92 |       "create":"unconditional",
 93 |       "type":"/user/mw_brendan/default_domain/note",
 94 |       "name":"foobartoomanywrites",
 95 |       "id":null
 96 |     }
 97 |     """
 98 |     self.env = {
 99 |       'user': '/user/mw_brendan',
100 |       'max_writes': {
101 |         'limit': 0,
102 |         'guid': '9202a8c04000641f80000000011af200'
103 |       }
104 |     }
105 |     exc_response = (
106 |         error.MQLWriteQuotaError,
107 |         'Daily write limit of 0 was exceeded.'
108 |     )
109 |     self.DoQuery(query, mqlwrite=True, exc_response=exc_response)
110 | 
111 | if __name__ == '__main__':
112 |   mql_fixture.main()
113 | 


--------------------------------------------------------------------------------
/emql/adapters/twitter.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import mw, urllib, rfc822, time, datetime
16 | from collections import defaultdict
17 | from mw.emql.adapter import Adapter
18 | 
19 | def rfc822_to_iso(d):
20 |     r = rfc822.parsedate(d)
21 |     r = time.mktime(r)
22 |     r = datetime.datetime.fromtimestamp(r)
23 |     return r.isoformat()
24 | 
25 | class tweets_from_adapter(Adapter):
26 | 
27 |     def pre(self, tid, graph, mql, me, control, parent, params, api_keys):
28 |         return {
29 |             '/internet/social_network_user/twitter_id': { 
30 |                 'value': None, 'limit': 1, 'optional': True 
31 |              },
32 |             ':extras': {'foo': 'bar'}
33 |         }
34 | 
35 |     def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
36 |         result = defaultdict(list)
37 |         query = params['query'] or {}
38 | 
39 |         if isinstance(query, list):
40 |             query = query[0]
41 | 
42 |         limit = query.get('limit', 5)
43 |         raw = query.get('raw', None)
44 | 
45 |         for mqlres in args:
46 |             if not mqlres['/internet/social_network_user/twitter_id']:
47 |                 continue
48 | 
49 |             url, connection = me.get_session().http_connect('twitter.com',
50 |                                                             "/statuses/user_timeline.json")
51 |             qs = urllib.urlencode({
52 |                     'count': limit,
53 |                     'screen_name': mqlres['/internet/social_network_user/twitter_id']['value']
54 |             })
55 |             connection.request('GET', "%s?%s" % (url, qs))
56 |             response = connection.getresponse()
57 |             json = mw.json.loads(response.read())
58 |             tweets = []
59 |             if 'error' in json:
60 |                 me.log('error', 'emql.adapters.twitter', json['error'], response=json)
61 |                 raise Exception(json['error'])
62 | 
63 |             for j in json:
64 |                 tweet = {
65 |                     'timestamp': rfc822_to_iso(j['created_at']),
66 |                     'key': j['id'],
67 |                     'text': j['text'],
68 |                     'user': {'name': j['user']['name'],
69 |                              'profile_image_url': j['user']['profile_image_url'],
70 |                              'screen_name': j['user']['screen_name'],
71 |                              'url': 'http://twitter.com/%s' % j['user']['screen_name']},
72 |                     'url': 'http://twitter.com/%s/status/%s' % (j['user']['screen_name'], j['id'])
73 |                 }
74 |                 if raw:
75 |                     tweet['raw'] = j
76 |                 tweets.append(tweet)
77 | 
78 |             result[mqlres['guid']].extend(tweets)
79 | 
80 |         return dict((k, v[:limit]) for k,v in result.iteritems())
81 | 
82 |     def help(self, tid, graph, mql, me, control, params):
83 |         from docs import twitter_adapter_help
84 | 
85 |         return 'text/x-rst;', twitter_adapter_help
86 | 


--------------------------------------------------------------------------------
/test/best_hrid_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Tests /freebase/object_hints/best_hrid resolution.
16 | 
17 | /freebase/object_hints/best_hrid specifies a persistent HRID
18 | for an entity. This should be favored over the earlier MQL
19 | algorithm for choosing an HRID based on namespace traversal
20 | and various heuristics.
21 | """
22 | __author__ = 'nix@google.com (Nick Thompson)'
23 | 
24 | import json
25 | import random
26 | import string
27 | 
28 | import google3
29 | from pymql.mql import error
30 | from pymql.test import mql_fixture
31 | 
32 | class HRIDTest(mql_fixture.MQLTest):
33 |   """Tests HRID queries using mqlread."""
34 | 
35 |   def setUp(self):
36 |     # NOTE: the mock graphd support is broken, so there is no best_hrid.yaml
37 |     #self.SetMockPath('data/best_hrid.yaml')
38 |     super(HRIDTest, self).setUp()
39 |     self.env = {'user': '/user/mw_brendan'}
40 | 
41 |   def newNodeWithHRID(self, best_hrid):
42 |     query = """
43 |     {
44 |       "create":"unless_exists",
45 |       "/freebase/object_hints/best_hrid": "%s",
46 |       "guid":null
47 |     }
48 |     """ % best_hrid
49 |     self.DoQuery(query, mqlwrite=True)
50 |     self.assertEquals(self.mql_result.result["create"],
51 |                       "created")
52 |     return self.mql_result.result["guid"]
53 | 
54 |   def query_assert(self, q, r, exc_response=None, type="mqlread", asof=None):
55 |     self.env = {}
56 |     if asof is not None:
57 |       self.env["as_of_time"] = asof
58 |     self.DoQuery(q, exp_response=r, exc_response=exc_response)
59 | 
60 |   def test_missing_hrid(self):
61 |     """Test that MQL still finds an id even if best_hrid is not present"""
62 |     q= '{"id":null, "guid":"#9202a8c04000641f8000000000092a01", "mid":null}'
63 |     r= ('{"guid": "#9202a8c04000641f8000000000092a01",'
64 |         '"id": "/en/sting","mid":"/m/0lbj1"}')
65 |     self.query_assert(q,r)
66 | 
67 |   def test_good_hrid(self):
68 |     """Test /type/type, a best_hrid that agrees with the MQL heuristics"""
69 |     #  /m/0j == /type/type
70 |     q= '{"id":null, "mid":"/m/0j", "/freebase/object_hints/best_hrid":null}'
71 |     r= ('{"id": "/type/type","mid":"/m/0j",'
72 |         '"/freebase/object_hints/best_hrid":"/type/type"}')
73 |     self.query_assert(q, r)
74 | 
75 |   def test_hrid_override(self):
76 |     """Create a new node with a bogus best_hrid.
77 | 
78 |     The old MQL heuristics will fail; check that best_hrid works.
79 |     """
80 |     best_hrid = ('/user/nix/random_test_hrid/' +
81 |                  ''.join(random.choice(string.ascii_lowercase)
82 |                          for x in range(16)))
83 |     guid = self.newNodeWithHRID(best_hrid)
84 | 
85 |     q= (('{"id":null, "guid":"%(guid)s",'
86 |          '"/freebase/object_hints/best_hrid":null}' %
87 |          {"guid":guid}))
88 |     r= (('{"id": "%(best_hrid)s","guid":"%(guid)s",'
89 |          '"/freebase/object_hints/best_hrid":"%(best_hrid)s"}') %
90 |          {"guid":guid,"best_hrid":best_hrid})
91 |     self.query_assert(q, r)
92 | 
93 | if __name__ == '__main__':
94 |   mql_fixture.main()
95 | 


--------------------------------------------------------------------------------
/api/hicache.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from locache import LojsonCachePolicy
17 | from mw.log import LOG
18 | 
19 | class LWTCachePolicy(LojsonCachePolicy):
20 |     """
21 |     Long run, we can probably factor 'mss' out of here completely, right?
22 |     """
23 |     cost_prefix = 'c'
24 | 
25 |     def __init__(self, mss, tag='mql'):
26 |         # give fake ctx/varenv because we'll be overriding all uses
27 |         # and want to make sure that any time LojsonCachePolicy tries
28 |         # to access ctx/varenv, that it explodes loudly, rather than
29 |         # silently using a bad value
30 |         super(LWTCachePolicy, self).__init__(None, None, tag,
31 |                                              start_time=mss.time_start)
32 |         self.mss = mss
33 | 
34 |     def _set_varenv(self, varenv):
35 |         # this is a no-op because we're forwarding to self.mss.varenv
36 |         pass
37 | 
38 |     def _get_varenv(self):
39 |         return self.mss.varenv
40 | 
41 |     # wrap the existing varenv 
42 |     varenv = property(_get_varenv, _set_varenv)
43 | 
44 |     def annotate_key_object(self, key_obj):
45 |         return self.get_varenv_envelope(key_obj, ("cursor", "macro", "escape",
46 |                                                   "uniqueness_failure", "$lang",
47 |                                                   "asof", "normalize_only", "unicode_text"))
48 |     
49 |     def annotate_result(self, result):
50 |         full_result = super(LWTCachePolicy, self).annotate_result(result)
51 |         
52 |         full_result["tid"] = self.mss.transaction_id
53 | 
54 |         if 'cursor' in self.mss.varenv:
55 |             full_result['cursor'] = self.mss.varenv['cursor']
56 | 
57 |         return full_result
58 |     
59 |     def extract_result(self, full_result):
60 |         # all of this should maybe be done in the mqlread itself?
61 |         
62 |         # set the age header to at least this old
63 |         
64 |         # this is the other place where the use of
65 |         # mss.time_start is important
66 |         self.mss.cache_age = max(self.mss.cache_age,
67 |                                  self.start_time - full_result['time'])
68 | 
69 |         if 'cursor' in full_result:
70 |             self.mss.varenv['cursor'] = full_result['cursor']
71 | 
72 |         return super(LWTCachePolicy, self).extract_result(full_result)
73 |         
74 |     
75 |     def add_cost(self, costkey, value=1):
76 |         self.mss.add_cost(self.cost_prefix + costkey, value)
77 | 
78 |     def should_read_cache(self):
79 |         return self.varenv.get("cache",True)
80 | 
81 |     def should_write_cache(self):
82 |         # allow certain reads to not write-through to the cache (for
83 |         # instance, crawlers and results with cursors
84 |         cache_writes = not self.varenv.get('no_store_cache', False)
85 | 
86 |         # we don't cache past the first page in a cursor'ed query
87 |         has_working_cursor = 'cursor' in self.varenv and self.varenv['cursor'] != True
88 | 
89 |         return cache_writes and not has_working_cursor
90 | 


--------------------------------------------------------------------------------
/emql/adapters/quote.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from itertools import izip, chain
17 | from urllib import urlencode
18 | 
19 | from mw.emql.adapter import Adapter, REQUEST_HEADERS
20 | from mw.emql.emql import id_guid, formatted_id_guid
21 | 
22 | 
23 | class quote_adapter(Adapter):
24 | 
25 |     ticker = "/business/stock_ticker_symbol/ticker_symbol"
26 | 
27 |     def pre(self, tid, graph, mql, me, control, parent, params, api_keys):
28 | 
29 |         return {self.ticker: None}
30 | 
31 |     def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
32 | 
33 |         query = params.get('query')
34 |         results = {}
35 | 
36 |         format = ''
37 |         keys = []
38 | 
39 |         # format documented at http://alexle.net/archives/196
40 |         if isinstance(query, dict):
41 |             for key in query.iterkeys():
42 |                 if key == 'volume':
43 |                     format += 'v'
44 |                     keys.append(key)
45 |                 elif key == 'price':
46 |                     format += 'l1'
47 |                     keys.append(key)
48 |                 elif key == 'ticker':
49 |                     pass
50 |                 elif key == 'high':
51 |                     keys.append(key)
52 |                     format += 'h'
53 |                 elif key == 'low':
54 |                     keys.append(key)
55 |                     format += 'g'
56 |                 else:
57 |                     raise ValueError, key
58 |         else:
59 |             format = 'l1'
60 |             keys = ['price']
61 | 
62 |         url, connection = me.get_session().http_connect('download.finance.yahoo.com', '/d/quotes.csv')
63 |         connection.request('POST', url,
64 |                            urlencode({'s': ','.join(mqlres[self.ticker]
65 |                                                     for mqlres in args),
66 |                                       'f': format }),
67 |                            REQUEST_HEADERS)
68 |         response = connection.getresponse()
69 |         response = response.read()
70 | 
71 |         results = {}
72 |         for mqlres, values in izip(args, response.rstrip().split('\r\n')):
73 |             if query is None:
74 |                 results[mqlres['guid']] = values
75 |             else:
76 |                 result = {}
77 |                 for key, value in izip(keys, values.split(',')):
78 |                     if value == "N/A":
79 |                         value = None
80 |                     elif key in ('high', 'low', 'price'):
81 |                         value = float(value)
82 |                     elif key == 'volume':
83 |                         value = long(value)
84 |                     result[key] = value
85 |                 if 'ticker' in query:
86 |                     result['ticker'] = mqlres[self.ticker]
87 |                 results[mqlres['guid']] = result
88 | 
89 |         return results
90 | 
91 |     def help(self, tid, graph, mql, me, control, params):
92 |         from docs import quote_adapter_help
93 | 
94 |         return 'text/x-rst;', quote_adapter_help
95 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MQL, the Metaweb Query Language
 2 | 
 3 | This repository contains the original implementation of the Metaweb Query
 4 | Language, written in Python.
 5 | 
 6 | ## Building / Using MQL
 7 | 
 8 | Even though MQL is written in Python, this particular version of it requires
 9 | [Bazel](https://bazel.build) to operate properly. You can build the simple
10 | command-line MQL query tool like so:
11 | 
12 | ```
13 | [¬º-°]¬ bazel build :mqlbin
14 | INFO: Analyzed target //:mqlbin (6 packages loaded, 36 targets configured).
15 | INFO: Found 1 target...
16 | Target //:mqlbin up-to-date:
17 |   bazel-out/k8-py2-fastbuild/bin/mqlbin
18 |   INFO: Elapsed time: 0.771s, Critical Path: 0.02s
19 |   INFO: 0 processes.
20 |   INFO: Build completed successfully, 1 total action
21 | ```
22 | 
23 | Then, it can be executed out of the bazel build directory:
24 | 
25 | *IMPORTANT!* This will only work if you have properly you need to have an
26 | instance of [graphd](https://github.com/google/graphd) running and it needs to
27 | be properly **bootstrapped** for MQL (see below).
28 | 
29 | ```
30 | [¬º-°]¬ bazel-out/k8-py2-fastbuild/bin/mqlbin --graphd_addr=localhost:8100 --mqlcmd=read '{"id": "/type/object/type", "guid": null}'
31 | <logs...>
32 | MQLResult(result={'guid': '#d119a8c0400062d1800000000000000c', 'id': '/type/object/type'}, cost=defaultdict(<type 'float'>, {'pr': 0.0, 'va': 38742.0, 'tu': 22.0, 'in': 3975.0, 'ir': 0.0, 'tr': 23.0, 'ts': 0.0, 'iw': 0.0, 'te': 26.0, 'mql_utime': 0.047658000000000006, 'mql_dbreqs': 11, 'dw': 0.0, 'tg': 0.030711889266967773, 'tf': 0.04290890693664551, 'pf': 0.0, 'mql_rtime': 1.1784470081329346, 'dr': 5619.0, 'gqr': 0, 'mql_stime': 0.0009940000000000018}), dateline=None, cursor=None)
33 | 
34 | ```
35 | 
36 | ## Bootstrapping a graphd for MQL
37 | 
38 | PyMQL comes with a graphd bootstrap program that you can use to bootstrap an
39 | empty graphd for use with MQL. The bootstrap program itself writes the set of
40 | core types required for MQL to operate.
41 | 
42 | First, ensure you have a graphd running:
43 | 
44 | ```
45 | [¬º-°]¬ git clone https://github.com/google/graphd
46 | Cloning into 'graphd'...
47 | remote: Enumerating objects: 1259, done.
48 | remote: Total 1259 (delta 0), reused 0 (delta 0), pack-reused 1259
49 | Receiving objects: 100% (1259/1259), 2.57 MiB | 14.95 MiB/s, done.
50 | Resolving deltas: 100% (482/482), done.
51 | [¬º-°]¬ cd graphd
52 | [¬º-°]¬ bazel build graphd
53 | ...(graphd builds)
54 | Target //graphd:graphd up-to-date:
55 |   bazel-bin/graphd/graphd
56 |   INFO: Elapsed time: 29.584s, Critical Path: 0.87s
57 |   INFO: 373 processes: 373 linux-sandbox.
58 |   INFO: Build completed successfully, 377 total actions
59 | [¬º-°]¬ bazel-bin/graphd/graphd -d /tmp/data-dir -p /tmp/graphd.pid -n
60 | <graphd is now running in the foreground>
61 | ```
62 | 
63 | In another terminal, run the bootstrap:
64 | 
65 | ```
66 | [¬º-°]¬ ./bazel-out/k8-py2-fastbuild/bin/bootstrap/bootstrap --load bootstrap/otg.bootstrap
67 | ```
68 | 
69 | The bootstrap takes a few minutes to run and you'll see lots of
70 | `graphd.request.start` and `graphd.request.end` lines. This is normal.
71 | 
72 | After this is done, you can run MQL queries via mqlbin.
73 | 
74 | ## History
75 | 
76 | This code was originally authored by Tim Sturge, then maintained by Warren
77 | Harris after his departure.
78 | 
79 | Dime ("2 MQL's") was the implementation written by Warren in OCaml that offered
80 | significant improvements over this initial implementation. However, when Metaweb
81 | was acquired by Google nearing the end of the productionization of Dime, it was
82 | only used partially until Freebase was turned down a few years later. In the
83 | meantime, Warren had gone on to develop other tools used during the early days
84 | of the Knowledge Graph projects at Google.
85 | 


--------------------------------------------------------------------------------
/log/log.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2.6
  2 | # Copyright 2020 Google LLC
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """Backward compatible support for mql LOG calls."""
 16 | 
 17 | __author__ = 'bneutra@google.com (Brendan Neutra)'
 18 | 
 19 | import datetime
 20 | import os
 21 | import socket
 22 | from pymql.log import log_util
 23 | from pymql.util import dumper
 24 | from absl import logging
 25 | 
 26 | __all__ = ['generate_tid', 'LOG', 'pprintlog', 'dumplog']
 27 | 
 28 | # allow google logging to discover the caller
 29 | # i.e. ignore these local functions
 30 | skip = [
 31 |     '_logit', 'fatal', 'error', 'warn', 'info', 'debug', 'spew', 'exception',
 32 |     'warning', 'alert', 'notice', 'log', 'pprintlog', 'dumplog'
 33 | ]
 34 | 
 35 | 
 36 | def _logit(level, s, args=None, kwargs=None):
 37 |   # let's not waste any cycles
 38 |   if level > logging.get_verbosity():
 39 |     return
 40 |   msg = ''
 41 |   if args:
 42 |     msg += '\t'.join(str(arg) for arg in args)
 43 |   if kwargs:
 44 |     msg += '\t'.join('%s=%s' % (pair) for pair in kwargs.iteritems())
 45 |   logging.vlog(level, '%s %s' % (s, msg))
 46 | 
 47 | 
 48 | class LOG(object):
 49 | 
 50 |   @staticmethod
 51 |   def fatal(s, *args, **kwargs):
 52 |     _logit(logging.FATAL, s, args, kwargs)
 53 | 
 54 |   @staticmethod
 55 |   def error(s, *args, **kwargs):
 56 |     _logit(logging.ERROR, s, args, kwargs)
 57 | 
 58 |   @staticmethod
 59 |   def warn(s, *args, **kwargs):
 60 |     _logit(logging.WARN, s, args, kwargs)
 61 | 
 62 |   @staticmethod
 63 |   def info(s, *args, **kwargs):
 64 |     _logit(logging.INFO, s, args, kwargs)
 65 | 
 66 |   @staticmethod
 67 |   def debug(s, *args, **kwargs):
 68 |     _logit(logging.DEBUG, s, args, kwargs)
 69 | 
 70 |   @staticmethod
 71 |   def spew(s, *args, **kwargs):
 72 |     _logit(log_util.SPEW, s, args, kwargs)
 73 | 
 74 |   @staticmethod
 75 |   def log(level, s, *args, **kwargs):
 76 |     _logit(level, s, args, kwargs)
 77 | 
 78 |   exception = fatal
 79 |   notice = info
 80 |   warning = warn
 81 |   alert = warn
 82 | 
 83 | 
 84 | def dumplog(string, obj, level=log_util.SPEW):
 85 |   if level <= logging.get_verbosity():
 86 |     LOG.log(level, string, dumper.dumps(obj))
 87 | 
 88 | 
 89 | def pprintlog(string, obj, level=log_util.DEBUG, **kwargs):
 90 |   if level <= logging.get_verbosity():
 91 |     LOG.log(level, string, repr(obj))
 92 | 
 93 | 
 94 | tid_seqno = 0
 95 | hostname = socket.getfqdn()
 96 | del socket
 97 | pid = os.getpid()
 98 | 
 99 | 
100 | def generate_tid(token=None, hostport=None):
101 |   global tid_seqno
102 | 
103 |   # can't determine port without looking at WSGI environ or apache
104 |   # config? perhaps we could read this from a config file?
105 |   if not hostport:
106 |     hostport = '%s:0' % hostname
107 |   # hostport could be just a port, we prefix it with hostname then
108 |   elif isinstance(hostport, (int, long)):
109 |     hostport = '%s:%d' % (hostname, hostport)
110 |   elif ':' not in hostport:
111 |     hostport = '%s:%s' % (hostname, hostport)
112 | 
113 |   if not token:
114 |     token = 'me'
115 | 
116 |   # small race condition here
117 |   tid_seqno += 1
118 | 
119 |   return ('%s;%s;%05d;%sZ;%04d' %
120 |           (token, hostport, pid, datetime.datetime.utcnow().isoformat('T'),
121 |            tid_seqno))
122 | 


--------------------------------------------------------------------------------
/test/mql_fixture_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2.4
  2 | # Copyright 2020 Google LLC
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # -*- coding: utf-8 -*-
 17 | #
 18 | """test the test fixture."""
 19 | 
 20 | __author__ = 'bneutra@google.com (Brendan Neutra)'
 21 | 
 22 | import sys
 23 | import google3
 24 | from pymql.mql import error
 25 | from pymql.test import mql_fixture
 26 | from google3.pyglib import logging
 27 | 
 28 | 
 29 | class MQLTest(mql_fixture.MQLTest):
 30 |   """for testing basic mqlread queries."""
 31 | 
 32 |   def setUp(self):
 33 |     self.SetMockPath('data/mql_fixture.yaml')
 34 |     super(MQLTest, self).setUp()
 35 |     self.env = {'as_of_time': '2009-10-01'}
 36 | 
 37 |   def DoQueryException(self, query, expected, **kwargs):
 38 |     """expect a failure."""
 39 |     try:
 40 |       self.DoQuery(query, **kwargs)
 41 |     except AssertionError:
 42 |       msg = str(sys.exc_info()[1])
 43 |       if not expected in msg:
 44 |         self.fail('expected: %s\ngot: %s' % (expected, msg))
 45 |       else:
 46 |         logging.debug('assertion raised, as expected! got: %s', expected)
 47 | 
 48 |   def testPositive(self):
 49 |     query = """
 50 |     {
 51 |       "id": "/en/bob_dylan"
 52 |     }
 53 |     """
 54 |     exp_response = """
 55 |     {
 56 |       "id": "/en/bob_dylan"
 57 |     }
 58 |     """
 59 |     self.DoQuery(query, exp_response=exp_response)
 60 | 
 61 |   def testUnexpectedResponse(self):
 62 |     query = """
 63 |     {
 64 |       "id": "/en/bob_dylan"
 65 |     }
 66 |     """
 67 |     exp_response = """
 68 |     {
 69 |       "id": "/n/bob_dylan"
 70 |     }
 71 |     """
 72 |     self.DoQueryException(
 73 |         query,
 74 |         '!=',
 75 |         exp_response=exp_response
 76 |     )
 77 | 
 78 |   def testUnexpectedError(self):
 79 |     query = """
 80 |     {
 81 |       "invalidkey": "/en/bob_dylan"
 82 |     }
 83 |     """
 84 |     exp_response = """
 85 |     {
 86 |       "id": "/n/bob_dylan"
 87 |     }
 88 |     """
 89 |     self.DoQueryException(
 90 |         query,
 91 |         'exception. was not expected',
 92 |         exp_response=exp_response
 93 |     )
 94 | 
 95 |   def testExpectError(self):
 96 |     query = """
 97 |     {
 98 |       "guid": "#9202a8c04000641f8000000003abd178",
 99 |       "id": "/en/bob_dylan"
100 |     }
101 |     """
102 |     exc_response = (
103 |         error.MQLParseError,
104 |         "Can't specify an id more than once in a single clause"
105 |     )
106 |     self.DoQuery(query, exc_response=exc_response)
107 | 
108 |   def testExpectNoError(self):
109 |     query = """
110 |     {
111 |       "guid": "#9202a8c04000641f8000000003abd178",
112 |       "id": "/en/bob_dylan"
113 |     }
114 |     """
115 |     self.DoQueryException(
116 |         query,
117 |         'exception. was not expected',
118 |         exp_response='whatev'
119 |     )
120 | 
121 |   def testExpectOtherError(self):
122 |     query = """
123 |     {
124 |       "guid": "#9202a8c04000641f8000000003abd178",
125 |       "id": "/en/bob_dylan"
126 |     }
127 |     """
128 |     self.DoQueryException(
129 |         query,
130 |         "MQLParseError'> != <type 'exceptions.KeyError'>",
131 |         exc_response=(KeyError, 'whatev')
132 |     )
133 | 
134 | if __name__ == '__main__':
135 |   mql_fixture.main()
136 | 


--------------------------------------------------------------------------------
/mql/grquoting.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import re
 16 | from xml.sax import saxutils
 17 | import urllib
 18 | import cgi
 19 | 
 20 | from pymql.error import FormattingError
 21 | 
 22 | ######################################################################
 23 | 
 24 | # quoting rules
 25 | _internal_quoting_rules = [
 26 |     ('\"', '\\\"'),
 27 |     ('\\', '\\\\'),
 28 |     ('\n', '\\n'),
 29 | ]
 30 | 
 31 | _internal_to_quote = dict(_internal_quoting_rules)
 32 | _internal_from_quote = dict([(a, b) for b, a in _internal_quoting_rules])
 33 | _internal_from_quote['\''] = ''
 34 | _internal_from_quote['\"'] = ''
 35 | 
 36 | # I love REs (aka read it and weep)
 37 | re_quoted_string_text = '^\"((?:[^\\\\\"]|\\\\[\\\\\"n])*)\"$'
 38 | re_quoted_string_part = '\\\\[\\\\\"n]'
 39 | # everything matches this, so we don't test (ie. all unquoted strings are legal)
 40 | re_unquoted_string_text = '^(?:[^\\\\\n\"]|([\\\\\n\"]))*$'
 41 | re_unquoted_string_part = '[\\\\\n\"]'
 42 | 
 43 | re_qs = re.compile(re_quoted_string_text)
 44 | re_qs_part = re.compile(re_quoted_string_part)
 45 | re_us_part = re.compile(re_unquoted_string_part)
 46 | 
 47 | 
 48 | def _internal_quote_sub(m):
 49 |   return _internal_to_quote[m.group()]
 50 | 
 51 | 
 52 | def _internal_unquote_sub(m):
 53 |   return _internal_from_quote[m.group()]
 54 | 
 55 | 
 56 | def _internal_leading_trailing(m):
 57 |   return
 58 | 
 59 | 
 60 | ######################################################################
 61 | 
 62 | 
 63 | def quote(string):
 64 |   return '"' + re_us_part.sub(_internal_quote_sub, string) + '"'
 65 | 
 66 | 
 67 | def unquote(string):
 68 |   middlem = re_qs.match(string)
 69 |   if middlem is None:
 70 |     raise FormattingError('Badly formatted quoted string %s ' % string)
 71 |   return re_qs_part.sub(_internal_unquote_sub, middlem.group(1))
 72 | 
 73 | 
 74 | ######################################################################
 75 | 
 76 | #
 77 | #  html escaping
 78 | #  url escaping
 79 | #
 80 | #  originally from mw/client/escaping.py
 81 | #
 82 | 
 83 | 
 84 | def escapeAttribute(data):
 85 |   """
 86 |     Prepares data to be used as an attribute value. The return value
 87 |     is a quoted version of data. The resulting string can be used
 88 |     directly as an attribute value:
 89 |     >>> print "<element attr=%s>" % quoteattr("ab ' cd \" ef")
 90 |     <element attr="ab ' cd &quot; ef">
 91 |     """
 92 |   return (saxutils.quoteattr(data))
 93 | 
 94 | 
 95 | def escapeUrl(data):
 96 |   """
 97 |     Replace special characters in string using the "%xx"
 98 |     escape. Letters, digits, and the characters "/_.-" are never
 99 |     escaped.
100 |     """
101 |   return (urllib.quote(data))
102 | 
103 | 
104 | def escapeMarkup(data):
105 |   """
106 |     Convert the characters "&", "<" and ">" in data to HTML-safe
107 |     sequences.
108 |     """
109 |   return (cgi.escape(data))
110 | 
111 | 
112 | ######################################################################
113 | 
114 | if __name__ == '__main__':
115 |   print quote("\n\r\t\"\\foo\\\"")  # result is "\n\r\t\"\\foo\\\"" (duh)
116 |   print unquote(
117 |       "\"foo\\n\\\"\\\\\""
118 |   )  # result is foo<newline>"\ -- note that python sees "foo\n\"\\"
119 |   print unquote(
120 |       "\"foo\\\"\\\"")  # should die with an "illegal quoted string" exception
121 | 


--------------------------------------------------------------------------------
/emql/adapters/text.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from itertools import izip, chain
17 | 
18 | from mw.emql.adapter import Adapter
19 | from mw.emql.emql import id_guid, formatted_id_guid
20 | 
21 | 
22 | class text_adapter(Adapter):
23 | 
24 |     def pre(self, tid, graph, mql, me, control, parent, params, api_keys):
25 | 
26 |         return {"/common/document/content":
27 |                   {"optional": True, "blob_id": None, "media_type": None},
28 |                 "/common/document/source_uri": None,
29 |                 "guid": None}
30 | 
31 |     def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
32 | 
33 |         params = params.get('query')
34 |         results = {}
35 | 
36 |         for mqlres in args:
37 |             guid = mqlres['guid']
38 |             content = mqlres["/common/document/content"]
39 |             if content is not None:
40 |                 mediatype = content["media_type"]
41 |                 if mediatype and mediatype.startswith("/media_type/text"):
42 |                     blob_id = content["blob_id"]
43 |                     if blob_id:
44 |                         chars = me.get_session().fetch_blob(tid, blob_id)
45 |                         try:
46 |                             chars = unicode(chars, 'utf-8')
47 |                         except:
48 |                             pass
49 | 
50 |                         if params is None:
51 |                             results[guid] = chars
52 |                         else:
53 |                             results[guid] = result = params.copy()
54 |                             if 'maxlength' in result:
55 |                                 chars = chars[:result['maxlength']]
56 |                             if 'chars' in result:
57 |                                 result['chars'] = chars
58 |                             if 'length' in result:
59 |                                 result['length'] = len(chars)
60 | 
61 |             elif mqlres["/common/document/source_uri"] is not None:
62 |                 if params is None:
63 |                     maxlength = None
64 |                     mode = 'blurb'
65 |                 else:
66 |                     maxlength = params.get('maxlength')
67 |                     mode = params.get('mode', 'blurb')
68 |                     if mode not in ('blurb', 'raw'):
69 |                         raise ValueError, "invalid mode: '%s'" %(mode)
70 | 
71 |                 query = '/guid/%s' %(guid[1:])
72 |                 if maxlength:
73 |                     query += '?maxlength=%d' %(maxlength)
74 | 
75 |                 url, connection = me.get_session().http_connect('api.freebase.com', '/api/trans/%s' %(mode) + query)
76 |                 connection.request('GET', url)
77 |                 response = connection.getresponse()
78 |                 chars = response.read()
79 | 
80 |                 if params is None:
81 |                     results[guid] = chars
82 |                 else:
83 |                     results[guid] = result = params.copy()
84 |                     if 'chars' in result:
85 |                         result['chars'] = chars
86 |                     if 'length' in result:
87 |                         result['length'] = len(chars)
88 | 
89 |         return results
90 | 
91 |     def help(self, tid, graph, mql, me, control, params):
92 |         from docs import text_adapter_help
93 | 
94 |         return 'text/x-rst;', text_adapter_help
95 | 


--------------------------------------------------------------------------------
/test/BUILD:
--------------------------------------------------------------------------------
  1 | # Author: bneutra@google.com (Brendan Neutra)
  2 | #
  3 | # Description: mql query language tests.
  4 | #
  5 | 
  6 | package(default_visibility = ["//visibility:public"])
  7 | 
  8 | licenses(["unencumbered"])  # Google acquisition
  9 | 
 10 | exports_files(["LICENSE"])
 11 | 
 12 | py_library(
 13 |     name = "testing_deps",
 14 |     testonly = 1,
 15 |     srcs = [
 16 |         "mql_fixture.py",
 17 |     ],
 18 |     data = [
 19 |         ":config.cfg",
 20 |     ] + glob([
 21 |         "data/*.yaml",
 22 |     ]),
 23 |     deps = [
 24 |         "//base",
 25 |         "//loadbalancer/gslb/client/public:pywrapgslbchannel",
 26 |         "//metaweb/graphd/server:graphd_py_pb2",
 27 |         "//pyglib",
 28 |         "//testing/pybase",
 29 |         "//third_party/py/pymql:mql",
 30 |         "//third_party/py/simplejson:simplejson_fast",
 31 |         "//third_party/py/yaml",
 32 |     ],
 33 | )
 34 | 
 35 | py_test(
 36 |     name = "type_link_test",
 37 |     size = "large",
 38 |     srcs = [
 39 |         "type_link_test.py",
 40 |     ],
 41 |     deps = [
 42 |         ":testing_deps",
 43 |     ],
 44 | )
 45 | 
 46 | py_test(
 47 |     name = "cost_test",
 48 |     size = "large",
 49 |     srcs = [
 50 |         "cost_test.py",
 51 |     ],
 52 |     deps = [
 53 |         ":testing_deps",
 54 |     ],
 55 | )
 56 | 
 57 | py_test(
 58 |     name = "sort_test",
 59 |     size = "large",
 60 |     srcs = [
 61 |         "sort_test.py",
 62 |     ],
 63 |     deps = [
 64 |         ":testing_deps",
 65 |     ],
 66 | )
 67 | 
 68 | py_test(
 69 |     name = "basic_mql_test",
 70 |     size = "large",
 71 |     srcs = [
 72 |         "basic_mql_test.py",
 73 |     ],
 74 |     deps = [
 75 |         ":testing_deps",
 76 |     ],
 77 | )
 78 | 
 79 | py_test(
 80 |     name = "mids_test",
 81 |     size = "large",
 82 |     srcs = [
 83 |         "mids_test.py",
 84 |     ],
 85 |     deps = [
 86 |         ":testing_deps",
 87 |     ],
 88 | )
 89 | 
 90 | py_test(
 91 |     name = "best_hrid_test",
 92 |     size = "medium",
 93 |     srcs = [
 94 |         "best_hrid_test.py",
 95 |     ],
 96 |     deps = [
 97 |         ":testing_deps",
 98 |     ],
 99 | )
100 | 
101 | py_test(
102 |     name = "mql_fixture_test",
103 |     size = "large",
104 |     srcs = [
105 |         "mql_fixture_test.py",
106 |     ],
107 |     deps = [
108 |         ":testing_deps",
109 |     ],
110 | )
111 | 
112 | py_test(
113 |     name = "regression_id_test",
114 |     size = "large",
115 |     srcs = [
116 |         "regression_id_test.py",
117 |     ],
118 |     deps = [
119 |         ":testing_deps",
120 |     ],
121 | )
122 | 
123 | py_test(
124 |     name = "regression_misc_test",
125 |     size = "large",
126 |     srcs = [
127 |         "regression_misc_test.py",
128 |     ],
129 |     deps = [
130 |         ":testing_deps",
131 |     ],
132 | )
133 | 
134 | py_test(
135 |     name = "mql_manual_test",
136 |     size = "large",
137 |     srcs = [
138 |         "mql_manual_test.py",
139 |     ],
140 |     deps = [
141 |         ":testing_deps",
142 |     ],
143 | )
144 | 
145 | py_test(
146 |     name = "mql_manual_two_test",
147 |     size = "large",
148 |     srcs = [
149 |         "mql_manual_two_test.py",
150 |     ],
151 |     deps = [
152 |         ":testing_deps",
153 |     ],
154 | )
155 | 
156 | py_test(
157 |     name = "mql_manual_write_test",
158 |     size = "large",
159 |     srcs = [
160 |         "mql_manual_write_test.py",
161 |     ],
162 |     deps = [
163 |         ":testing_deps",
164 |     ],
165 | )
166 | 
167 | py_test(
168 |     name = "return_test",
169 |     size = "large",
170 |     srcs = [
171 |         "return_test.py",
172 |     ],
173 |     deps = [
174 |         ":testing_deps",
175 |     ],
176 | )
177 | 
178 | py_test(
179 |     name = "mql_exceptions_test",
180 |     size = "large",
181 |     srcs = [
182 |         "mql_exceptions_test.py",
183 |     ],
184 |     deps = [
185 |         ":testing_deps",
186 |     ],
187 | )
188 | 
189 | py_test(
190 |     name = "query_sort_test",
191 |     size = "small",
192 |     srcs = [
193 |         "query_sort_test.py",
194 |     ],
195 |     deps = [
196 |         ":testing_deps",
197 |     ],
198 | )
199 | 


--------------------------------------------------------------------------------
/bootstrap/bootstrap.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """bootstrap -- dump and load a bootstrap from an existing graph."""
  3 | 
  4 | import json
  5 | import re
  6 | import sys
  7 | 
  8 | from absl import app
  9 | from absl import flags
 10 | from absl import logging
 11 | 
 12 | from pymql import MQLService
 13 | 
 14 | from pymql.mql import graph
 15 | from pymql.mql import lojson
 16 | 
 17 | FLAGS = flags.FLAGS
 18 | flags.DEFINE_string(
 19 |     'mqlenv', None, 'a dict in the form of a string which '
 20 |     'contains valid mql env key/val pairs')
 21 | flags.DEFINE_string('graphd_addr', 'localhost:9100',
 22 |                     'host:port of graphd server')
 23 | flags.DEFINE_string('load', '', 'load bootstrap from given file')
 24 | 
 25 | 
 26 | class BootstrapError(Exception):
 27 |   pass
 28 | 
 29 | 
 30 | class Bootstrap(object):
 31 |   version = 1
 32 | 
 33 |   def __init__(self, gc):
 34 |     self.gc = gc
 35 | 
 36 |   def load_from_file(self, filename):
 37 |     loadfile = open(filename, 'r')
 38 |     data = ''.join(loadfile.readlines())
 39 |     regex = re.compile('[\n\t]+')
 40 |     data = regex.sub(' ', data)
 41 |     loadfile.close()
 42 | 
 43 |     d = json.loads(data)
 44 |     if d['0_version'] != self.version:
 45 |       raise BootstrapError('version mismatch')
 46 | 
 47 |     self.bootstrap = d['1_bootstrap']
 48 |     self.nodes = d['2_nodes']
 49 |     self.links = d['3_links']
 50 | 
 51 |   def mkprim(self, **kwds):
 52 |     if 'scope' not in kwds and self.root_user:
 53 |       kwds['scope'] = self.root_user
 54 |     params = ' '.join(['%s=%s' % (k, v) for (k, v) in kwds.items()])
 55 |     result = self.gc.write_varenv('(%s)' % params, {})
 56 |     return result[0]
 57 | 
 58 |   def load_bootstrap(self):
 59 |     self.xlate = {}
 60 |     self.xlate_link = {}
 61 | 
 62 |     if len(self.gc.read_varenv('(pagesize=1 result=(guid))', {})):
 63 |       logging.fatal("Can't bootstrap a non-empty graph")
 64 | 
 65 |     self.root_user = None  # avoid forward ref in mkprim
 66 |     self.root_user = self.mkprim(name='"ROOT_USER"')
 67 |     self.root_namespace = self.mkprim(name='"ROOT_NAMESPACE"')
 68 |     self.has_key = self.mkprim(name='"HAS_KEY"')
 69 | 
 70 |     self.xlate[self.bootstrap['ROOT_USER']] = self.root_user
 71 |     self.xlate[self.bootstrap['ROOT_NAMESPACE']] = self.root_namespace
 72 |     self.xlate[self.bootstrap['HAS_KEY']] = self.has_key
 73 | 
 74 |   def load_root_user(self):
 75 |     # we dumped them separately, but we want to load them together...
 76 |     node_pos = 0
 77 |     link_pos = 0
 78 |     while node_pos < len(self.nodes) or link_pos < len(self.links):
 79 |       if link_pos >= len(
 80 |           self.links) or (node_pos < len(self.nodes) and
 81 |                           self.nodes[node_pos] < self.links[link_pos]['guid']):
 82 |         # we will do the next node
 83 |         node = self.nodes[node_pos]
 84 |         self.write_node(node)
 85 |         node_pos += 1
 86 |       else:
 87 |         link = self.links[link_pos]
 88 |         self.write_link(link)
 89 |         link_pos += 1
 90 | 
 91 |   def write_node(self, node):
 92 |     if node not in self.xlate:
 93 |       self.xlate[node] = self.mkprim()
 94 | 
 95 |   def write_link(self, link):
 96 |     new_link = {'datatype': link['datatype'], 'value': link['value']}
 97 |     for ptr in ('left', 'right', 'scope', 'typeguid'):
 98 |       # translate the link
 99 |       if ptr in link:
100 |         if link[ptr] == 'null':
101 |           new_link[ptr] = 'null'
102 |         elif link[ptr] not in self.xlate:
103 |           raise BootstrapError('Saw dangling link %s' % repr(link))
104 |         else:
105 |           new_link[ptr] = self.xlate[link[ptr]]
106 |     new_link['guid'] = self.mkprim(**new_link)
107 | 
108 |     self.xlate_link[link['guid']] = new_link
109 | 
110 | 
111 | def main(argv):
112 |   if not FLAGS.graphd_addr:
113 |     raise Exception('Must specify a --graphd_addr')
114 | 
115 |   conn = graph.TcpGraphConnector(addrs=[('localhost', 8100)])
116 | 
117 |   bootstrap = Bootstrap(conn)
118 |   bootstrap.load_from_file(FLAGS.load)
119 |   bootstrap.load_bootstrap()
120 |   bootstrap.load_root_user()
121 | 
122 | 
123 | if __name__ == '__main__':
124 |   app.run(main)
125 | 


--------------------------------------------------------------------------------
/util/keyquote.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import string
 16 | from pymql.mql import error
 17 | 
 18 | def quotekey(ustr):
 19 |     """
 20 |     quote a unicode string to turn it into a valid namespace key
 21 | 
 22 |     """
 23 |     valid_always = string.ascii_letters + string.digits + '_'
 24 |     valid_interior_only = valid_always + '-'
 25 | 
 26 |     if isinstance(ustr, str):
 27 |         s = unicode(ustr,'utf-8')
 28 |     elif isinstance(ustr, unicode):
 29 |         s = ustr
 30 |     else:
 31 |         raise ValueError, 'quotekey() expects utf-8 string or unicode'
 32 | 
 33 |     if len(s) == 0:
 34 |         return str(s)
 35 | 
 36 |     output = []
 37 |     if s[0] in valid_always:
 38 |         output.append(s[0])
 39 |     else:
 40 |         output.append('$%04X' % ord(s[0]))
 41 | 
 42 |     for c in s[1:-1]:
 43 |         if c in valid_interior_only:
 44 |             output.append(c)
 45 |         else:
 46 |             output.append('$%04X' % ord(c))
 47 | 
 48 |     if len(s) > 1:
 49 |         if s[-1] in valid_always:
 50 |             output.append(s[-1])
 51 |         else:
 52 |             output.append('$%04X' % ord(s[-1]))
 53 | 
 54 |     return str(''.join(output))
 55 | 
 56 | 
 57 | def unquotekey(key, encoding=None):
 58 |     """
 59 |     unquote a namespace key and turn it into a unicode string
 60 |     """
 61 | 
 62 |     valid_always = string.ascii_letters + string.digits + "_"
 63 | 
 64 |     output = []
 65 |     i = 0
 66 |     while i < len(key):
 67 |         if key[i] in valid_always:
 68 |             output.append(key[i])
 69 |             i += 1
 70 |         elif key[i] in '_-' and i != 0 and i != len(key):
 71 |             output.append(key[i])
 72 |             i += 1
 73 |         elif key[i] == '$' and i+4 < len(key):
 74 |             # may raise ValueError if there are invalid characters
 75 |             output.append(unichr(int(key[i+1:i+5],16)))
 76 |             i += 5
 77 |         else:
 78 |             msg = "key %s has invalid character %s at position %d" % (
 79 |                 key,
 80 |                 key[i],
 81 |                 i
 82 |             )
 83 |             raise error.MQLInternalError(None, msg)
 84 | 
 85 |     ustr = u''.join(output)
 86 | 
 87 |     if encoding is None:
 88 |         return ustr
 89 | 
 90 |     return ustr.encode(encoding)
 91 | 
 92 | 
 93 | def unquote_id(id):
 94 |     """
 95 |     Turn an id into a user-readable string, for instance turning
 96 |     /media_type/application/rss$002Bxml into
 97 |     /media_type/application/rss+xml
 98 |     """
 99 | 
100 |     if '/' not in id:
101 |         return unquotekey(id)
102 | 
103 |     return '/'.join(unquotekey(k) for k in id.split('/'))
104 | 
105 | def id_to_urlid(id):
106 |     """
107 |     convert a mql id to an id suitable for embedding in a url path.
108 |     """
109 | 
110 |     # XXX shouldn't be in metaweb.api!
111 |     from mw.formats.http import urlencode_pathseg
112 | 
113 |     segs = id.split('/')
114 | 
115 |     assert isinstance(id, str) and id != '', 'bad id "%s"' % id
116 | 
117 |     if id[0] == '~':
118 |         assert len(segs) == 1
119 |         # assume valid, should check
120 |         return id
121 | 
122 |     if id[0] == '#':
123 |         assert len(segs) == 1
124 |         # assume valid, should check
125 |         return '%23' + id[1:]
126 | 
127 |     if id[0] != '/':
128 |         raise ValueError, 'unknown id format %s' % id
129 | 
130 |     # ok, we have a slash-path
131 |     # requote components as keys and rejoin.
132 |     # urlids do not have leading slashes!!!
133 |     return '/'.join(urlencode_pathseg(unquotekey(seg)) for seg in segs[1:])
134 | 
135 | 


--------------------------------------------------------------------------------
/util/mwdatetime.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import datetime
 16 | import re
 17 | 
 18 | 
 19 | # a datetime is a non-empty string containing one of
 20 | # yyyy, yyyy-mm, yyyy-mm-dd,
 21 | # Thh, Thh:mm, Thh:mm:ss Thh:mm:ss.dddd
 22 | # or yyyy-mm-dd followed by one of the T constructs.
 23 | # Note that this is more lenient than valid_timestamp in lojson - it matches the @timestamp clause only,
 24 | # not our extended ISO 8601 syntax
 25 | 
 26 | 
 27 | # Python datetime classes support only a year range between MINYEAR (1) and MAXYEAR(9999)
 28 | # we want to support anything from -9999 (== 10000BC) to 9999 (== 9999AD)
 29 | # and possibly support more in the future.
 30 | 
 31 | # and some other useful methods:
 32 | __datetime_re = re.compile(r'^(?:(?:(-?\d{4})(?:-(\d\d)(?:-(\d\d))?)?)|(?:(-?\d{4})-(\d\d)-(\d\d)T)?(\d\d)(?:\:(\d\d)(?:\:(\d\d)(?:\.(\d{1,6}))?)?)?(Z|[-+](?:0\d|1[0-4])\:(00|15|30|45))?)$')
 33 | 
 34 | # returns the graph format datetime (like ISO except for a leading T on times)
 35 | def coerce_datetime(dt):
 36 |     try:
 37 |         if dt == '__now__':
 38 |             return datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
 39 |         if dt == '__today__':
 40 |             return datetime.datetime.utcnow().strftime("%Y-%m-%d")
 41 | 
 42 |         match = __datetime_re.match(dt)
 43 |         if not match:
 44 |             return None
 45 |         elif match.group(1):
 46 |             if check_date(*match.group(1,2,3)):
 47 |                 return dt
 48 |             else:
 49 |                 return None
 50 |         elif match.group(4):
 51 |             # a date/time
 52 |             # we don't do subseconds as python thinks that '2' is "2 microseconds" not "2 deciseconds".
 53 |             if not check_date(*match.group(4,5,6)):
 54 |                 return None
 55 |             if not check_time(*match.group(7,8,9)):
 56 |                 return None
 57 | 
 58 |             return dt
 59 | 
 60 |         elif match.group(7):
 61 |             if not check_time(*match.group(7,8,9)):
 62 |                 return None
 63 | 
 64 |             return 'T' + dt
 65 |         else:
 66 |             # no idea what the problem is, but it is invalid
 67 |             return None
 68 | 
 69 |     except TypeError:
 70 |         return None
 71 |     except ValueError:
 72 |         return None
 73 | 
 74 | def check_date(year,month,day):
 75 |     # returns true or false depending on whether the day is valid
 76 |     # handles strings and nulls
 77 |     fakeyear = int(year)
 78 |     if int(fakeyear) > 9999 or int(fakeyear) < -9999:
 79 |         return False
 80 | 
 81 |     if month is None:
 82 |         return True
 83 |     elif int(month) < 1 or int(month) > 12:
 84 |         return False
 85 |     elif day is None:
 86 |         return True
 87 |     else:
 88 |         while fakeyear <= 0:
 89 |             fakeyear += 8000
 90 | 
 91 |         try:
 92 |             datetime.date(fakeyear,int(month),int(day))
 93 |             return True
 94 |         except ValueError:
 95 |             return False
 96 | 
 97 | def check_time(hour,minute,second):
 98 |     if hour is None:
 99 |         return False
100 |     elif int(hour) < 0 or int(hour) > 23:
101 |         return False
102 |     elif minute is None:
103 |         return True
104 |     elif int(minute) < 0 or int(minute) > 59:
105 |         return False
106 |     elif second is None:
107 |         return True
108 |     elif int(second) < 0 or int(second) > 59:
109 |         return False
110 |     else:
111 |         return True
112 | 
113 | 
114 | def uncoerce_datetime(graphdt):
115 |     if graphdt[0] == 'T':
116 |         return graphdt[1:]
117 |     else:
118 |         return graphdt
119 | 


--------------------------------------------------------------------------------
/emql/adapters/test/test_weblink.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from mw.tests.helpers import TestFixture
 16 | from mw.emql import emql
 17 | null = None
 18 | true = True
 19 | false = False
 20 | WEBLINK = "/common/topic/weblink"
 21 | class TestWeblinks_adapter(TestFixture):
 22 |     
 23 |     def setUp(self):
 24 |         super(TestWeblinks_adapter, self).setUp()
 25 |         self.cache = emql.emql_cache()
 26 | 
 27 | 
 28 |     def run_query(self, q):
 29 |         debug, cursors, results = self.mss.emqlread(None, q, {'debug': True, 'cache': False},
 30 |                                                     cache=self.cache)
 31 |         return results
 32 | 
 33 | 
 34 |     def test_bob_dylan(self):
 35 |         
 36 |         r = self.run_query({
 37 |                 "id":"/en/bob_dylan",
 38 |                 WEBLINK:[]
 39 |         })
 40 | 
 41 |         weblinks = r[WEBLINK] 
 42 |         self.assert_(weblinks, "Basic sanity test - make sure there are some weblinks returning which indiciate that at least emql is working and that the weblinks adapter is returning results.")
 43 | 
 44 |         #XXXXXX UNCOMMENT AFTER https://bugs.freebase.com/browse/DA-1093 ######
 45 | 
 46 |         #self.assert_("http://www.bobdylan.com/" in weblinks, "Test a key hanging off of a resource")
 47 | 
 48 |         self.assert_("http://en.wikipedia.org/wiki/Bob_Dylan" in weblinks, "Test a key hanging off a topic")
 49 | 
 50 |         
 51 |     def test_list_shape(self):
 52 |         """
 53 |         Let's test to make sure weblink works with just a [] shape, in which case it should
 54 |         just return a list of strings
 55 |         """
 56 |         
 57 |         r = self.run_query({
 58 |                 "id":"/en/migraine",
 59 |                 "/common/topic/weblink":[]
 60 |                 })
 61 |         
 62 |         weblinks = r[WEBLINK]
 63 |         self.assert_(len(weblinks), "there should be some weblinks in here!")
 64 | 
 65 |         for w in weblinks:
 66 |             self.assert_(isinstance(w, str))
 67 | 
 68 |     def test_topic_with_all_types_of_weblinks(self):
 69 |         """
 70 |         This particular topic has a weblinks generated from keys in all three 
 71 |         places - off the topic, off the annotation, off the resource
 72 |         """
 73 |         q = {
 74 |            "id": "/en/royal_mail",
 75 |            WEBLINK: [{
 76 |                "url":null,
 77 |                "template":{
 78 |                    "id":null,
 79 |                    "template":null,
 80 |                    "ns":null
 81 |                },
 82 |                "category":{
 83 |                   "id":null,
 84 |                   "name":null,
 85 |                   "optional":true
 86 |                },
 87 |                "key":null
 88 |            }]
 89 |         }
 90 |         r = self.run_query(q)
 91 |         
 92 |         weblink_dict = {}
 93 |         for w in r[WEBLINK]:
 94 |             weblink_dict[w['url']] = w
 95 |         
 96 |         official_link = weblink_dict.get("http://www.royalmailgroup.com/")
 97 |         self.assert_(official_link, "The official link for royal mail is present. Key Hangs off resource.")
 98 |         self.assert_(official_link['category']['name'] == "Official Website", "Official Website category is....Official Website")
 99 |         
100 | 
101 |         guardian_link = weblink_dict.get("http://www.guardian.co.uk//uk/post")
102 |         self.assert_(guardian_link, "Guardian link is present. Key hangs off annotation.")
103 |         self.assert_(guardian_link['category']['name'] == "Tag", "Category is Tag")
104 |         
105 |         wiki_link = weblink_dict.get("http://en.wikipedia.org/wiki/index.html?curid=349823")
106 |         self.assert_(wiki_link, "Wiki link is present. Key hangs off topic itself.")
107 | 
108 |         
109 | 


--------------------------------------------------------------------------------
/mql/mid.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright 2020 Google LLC
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # mid.py - machine ids.
 17 | 
 18 | from cStringIO import StringIO
 19 | import sys
 20 | 
 21 | ################################################################################
 22 | ## version 1 constants
 23 | VERSION = 1L
 24 | MAX_BITS = 40
 25 | VERSION_BITS = 2
 26 | OBJID_BITS = 34
 27 | GRAPHID_BITS = MAX_BITS - VERSION_BITS - OBJID_BITS
 28 | GRAPHID0 = 0x9202a8c04000641f
 29 | GUID_BASE = 0x8000000000000000L
 30 | 
 31 | VERSION_MASK = (1L << VERSION_BITS) - 1L
 32 | MAX_GRAPHS = 1L << GRAPHID_BITS
 33 | GRAPHID_MASK = MAX_GRAPHS - 1L
 34 | OBJID_MASK = (1L << OBJID_BITS) - 1L
 35 | 
 36 | VERSION_LEFT = (VERSION - 1) << 38L
 37 | VERSION_RIGHT = (VERSION - 1) << 3L
 38 | 
 39 | ################################################################################
 40 | ## Exceptions
 41 | 
 42 | 
 43 | class InvalidMunch(Exception):
 44 |   pass
 45 | 
 46 | 
 47 | class InvalidGraphID(Exception):
 48 |   pass
 49 | 
 50 | 
 51 | class UnknownGraphID(Exception):
 52 |   pass
 53 | 
 54 | 
 55 | class InvalidMIDVersion(Exception):
 56 |   pass
 57 | 
 58 | 
 59 | class InvalidMID(Exception):
 60 |   pass
 61 | 
 62 | 
 63 | class InvalidObjID(Exception):
 64 |   pass
 65 | 
 66 | 
 67 | munch_map = [-1] * 256
 68 | for i, c in enumerate("0123456789bcdfghjklmnpqrstvwxyz_"):
 69 |   munch_map[ord(c)] = long(i)
 70 | 
 71 | 
 72 | ## a Munch (copyright W. Harris, 2010) is 5 bits.
 73 | def char_of_munch(c):
 74 |   if not 0 <= c <= 31:
 75 |     raise InvalidMunch(c)
 76 |   return "0123456789bcdfghjklmnpqrstvwxyz_"[c]
 77 | 
 78 | 
 79 | def munch_of_char(c):
 80 |   value = munch_map[ord(c)]
 81 |   if value == -1:
 82 |     raise InvalidMunch(c)
 83 |   return value
 84 | 
 85 | 
 86 | def munchstr_of_int(n):
 87 |   buf = [""] * 16  #....
 88 | 
 89 |   def loop(i, n):
 90 |     if n == 0:
 91 |       return "".join(buf[16 - i:])
 92 |     buf[15 - i] = char_of_munch(n & 0x1f)
 93 |     return loop(i + 1, n >> 5)
 94 | 
 95 |   return loop(0, n)
 96 | 
 97 | 
 98 | def int_of_munchstr(str, ofs, l):
 99 |   rv = 0
100 |   i = ofs
101 |   while i <= (ofs + l) - 1:
102 |     v = munch_of_char(str[i])
103 |     rv = rv << 5 | v
104 |     i += 1
105 | 
106 |   return rv
107 | 
108 | 
109 | def graphid_of_guid(guid):
110 |   graphid = long(guid[:16], 16)
111 |   ms_crap = long(guid[16:24], 16) & 0xfffffffc
112 |   n = graphid - GRAPHID0
113 |   if 0 <= n < MAX_GRAPHS and ms_crap == 0x80000000:
114 |     return n
115 |   else:
116 |     raise UnknownGraphID(n)
117 | 
118 | 
119 | def objid_of_guid(guid):
120 |   return long(guid[23:32], 16) & OBJID_MASK
121 | 
122 | 
123 | def of_guid(guid):
124 |   graphid = graphid_of_guid(guid)
125 |   objid = objid_of_guid(guid)
126 |   n = VERSION_LEFT | graphid << 34 | objid
127 |   version_munch = VERSION_RIGHT << 3 | graphid
128 |   version_str = char_of_munch(version_munch)
129 |   return "".join(("/m/", version_str, munchstr_of_int(n)))
130 | 
131 | 
132 | def to_guid(mid):
133 |   len_mid = len(mid)
134 |   if not (4 <= len_mid <= 11 or mid.startswith("/m")):
135 |     raise InvalidMID(mid)
136 | 
137 |   version_munch = munch_of_char(mid[3])
138 |   ver = (version_munch << 3) + 1
139 |   if ver != VERSION:
140 |     raise InvalidMIDVersion(mid)
141 | 
142 |   graphid = GRAPHID0 | version_munch & GRAPHID_MASK
143 |   graphid = graphid << 64
144 |   objid = GUID_BASE | int_of_munchstr(mid, 4L, len_mid - 4)
145 |   guid = graphid | objid
146 |   return hex(guid)[2:-1]  # chop off 0x and L
147 | 
148 | 
149 | if __name__ == "__main__":
150 |   #o_guid = "9202a8c04000641f800000000172fcb8"
151 |   #o_guid = "9202a8c04000641f800000000164382e"
152 |   #o_guid = "9202a8c04000641f800000000172fcb8"
153 |   o_guid = "9202a8c04000641f80000000013e068e"
154 | 
155 |   if len(sys.argv) < 2:
156 |     print "usage: mid.py <mid to decode>"
157 |     sys.exit(1)
158 | 
159 |   mid = sys.argv[1]
160 |   print to_guid(mid)
161 |   #mid    = of_guid(o_guid)
162 |   #print mid
163 |   #n_guid = to_guid(mid)
164 |   #print n_guid
165 | 


--------------------------------------------------------------------------------
/emql/adapters/nytimes.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import mw, urllib, urlparse
 16 | from datetime import datetime
 17 | from collections import defaultdict
 18 | from mw.emql.adapter import Adapter
 19 | from lxml import etree
 20 | 
 21 | class nytimes_articles_adapter(Adapter):
 22 | 
 23 |     def pre(self, tid, graph, mql, me, control, parent, params, api_keys):
 24 |         return {
 25 |             "key": [{ 
 26 |                 "optional": True, 
 27 |                 "value": None, 
 28 |                 "namespace" : "/user/jamie/nytdataid",
 29 |                 "limit": 10
 30 |              }]
 31 |         }
 32 |     
 33 |     def get_articles(self, me, nytd_key, api_keys):
 34 |         url, connection = me.get_session().http_connect('data.nytimes.com', "/%s.rdf" % nytd_key)
 35 |         connection.request('GET', url)
 36 |         response = connection.getresponse()
 37 |         rdf = response.read()
 38 |         rdf = etree.fromstring(rdf)
 39 |         
 40 |         # Grab the search api call
 41 |         search_url = rdf.xpath("//nyt:search_api_query", namespaces=rdf.nsmap)
 42 |         if not search_url:
 43 |             return []
 44 |         
 45 |         search_url = urlparse.urlparse(search_url[0].text)
 46 |         params = urlparse.parse_qs(search_url.query)
 47 |         params['api-key'] = api_keys['nytimes_articles']
 48 |         params['fields'] = ','.join([
 49 |             'date', 
 50 |             'url', 
 51 |             'nytd_lead_paragraph',
 52 |             'nytd_title', 
 53 |             'byline',
 54 |             'nytd_byline',
 55 |             'small_image_url',
 56 |             'small_image_height',
 57 |             'small_image_width',
 58 |             'source_facet'
 59 |         ])
 60 |         
 61 |         # build the actual query
 62 |         url, connection = me.get_session().http_connect(search_url.hostname, search_url.path)
 63 |         qs = urllib.urlencode(params, doseq=True)
 64 |         connection.request('GET', "%s?%s" % (url, qs))
 65 |     
 66 |         response = connection.getresponse()
 67 |         json = mw.json.loads(response.read())
 68 |     
 69 |         json = [{
 70 |             'headline': j['nytd_title'],
 71 |             'text': j['nytd_lead_paragraph'],
 72 |             'byline': j.get('nytd_byline', j.get('byline', None)),
 73 |             'source': j.get('source_facet', None),
 74 |             'date': datetime.strptime(j['date'], '%Y%m%d').isoformat(),
 75 |             'img': ({'url': j['small_image_url'],
 76 |                      'height': j.get('small_image_height') or None,
 77 |                      'width': j.get('small_image_widget') or None}
 78 |                     if j.get('small_image_url')
 79 |                     else None),
 80 |             'url': j['url']
 81 |         } for j in json['results']]
 82 |         
 83 |         return json
 84 |     
 85 |     def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
 86 |         result = defaultdict(list)
 87 |         query = params['query'] or {}
 88 |         
 89 |         if isinstance(query, list):
 90 |             query = query[0]
 91 |         
 92 |         limit = query.get('limit', 5)
 93 |         
 94 |         if not (api_keys and api_keys.get('nytimes_articles')):
 95 |             raise Exception('This property requires a New York Times API key. '
 96 |                             'Get one here: http://developer.nytimes.com/apps/register')
 97 |         
 98 |         for mqlres in args:
 99 |             if not mqlres['key']:
100 |                 continue
101 |             
102 |             for key in mqlres['key']:
103 |                 articles = self.get_articles(me, key['value'], api_keys)
104 |                 result[mqlres['guid']].extend(articles)
105 |             
106 |         return dict((k, v[:limit]) for k,v in result.iteritems())
107 | 
108 |     def help(self, tid, graph, mql, me, control, params):
109 |         from docs import nytimes_adapter_help
110 | 
111 |         return 'text/x-rst;', nytimes_adapter_help
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/util/parsedt.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | # imported from Client
 17 | 
 18 | # given a graphd datetime string (iso6801 format)
 19 | # parse it and format it
 20 | 
 21 | import re, datetime
 22 | 
 23 | 
 24 | ISO8601_TIME_PATTERN = r"(?P<hour>[0-9]{2})(:(?P<minute>[0-9]{2}))?(:(?P<second>[0-9]{2})(.(?P<fraction>[0-9]+))?)?"
 25 | 
 26 | ISO8601_TIME_REGEX = re.compile(ISO8601_TIME_PATTERN)
 27 | 
 28 | ISO8601_REGEX = \
 29 |     re.compile(r"(?P<bc>-)?(?P<year>[0-9]{4})(-(?P<month>[0-9]{1,2})(-(?P<day>[0-9]{1,2})"
 30 |                r"((?P<separator>.)" + ISO8601_TIME_PATTERN +
 31 |                r"(?P<timezone>Z|(([-+])([0-9]{2}):([0-9]{2})))?)?)?)?")
 32 | 
 33 | LABELS = ('year', 'month', 'day', 'hour', 'minute', 'second')
 34 | 
 35 | # This essentially maps the number of date components to a format,
 36 | # Especially annoying: these can't be unicode, strftime doesn't like that
 37 | FORMATS = [
 38 |     "%Y",                   # year only
 39 |     "%b %Y",                # year, month
 40 |     "%b %e, %Y",            # year, month, day
 41 |     "%b %e, %Y %l%p",       # year, month, day, hour
 42 |     "%b %e, %Y %l:%M%p",    # year, month, day, hour, minute
 43 |     "%b %e, %Y %l:%M:%S%p", # year, month, day, hour, minute, second
 44 | ]
 45 | BC_FORMATS = [format.replace("%Y", "%Y B.C.E.") for format in FORMATS]
 46 | CE_FORMATS = [format.replace("%Y", "%Y C.E.") for format in FORMATS]
 47 | 
 48 | 
 49 | def parse_isodate(iso_date):
 50 |      """
 51 |      Given an iso8601-formatted string (or fraction thereof) return a
 52 |      tuple containing a python datetime object and a format string that
 53 |      should be used to display it. The format is passible to strftime()
 54 |      and should be locale-sensitive about ordering (though today it is
 55 |      not)
 56 |      """
 57 | 
 58 |      m = ISO8601_REGEX.match(iso_date)
 59 |      if not m:
 60 |          m = ISO8601_TIME_REGEX.match(iso_date)
 61 |          if not m: # bad data in the graph
 62 |              return None, None
 63 |          time_only = True
 64 |      else:
 65 |          time_only = False
 66 | 
 67 |      values = m.groupdict()
 68 | 
 69 |      args = []
 70 |      if time_only:
 71 |          today = datetime.date.today()
 72 |          args = [today.year, today.month, today.day]
 73 |          start = 3
 74 |      else:
 75 |          start = 0
 76 | 
 77 |      count = start
 78 |      for k in xrange(start, 6):
 79 |          value = values[LABELS[k]]
 80 |          if value is None:
 81 |              args.append(1)
 82 |          else:
 83 |              count += 1
 84 |              args.append(int(value))
 85 | 
 86 |      try:
 87 |          d = datetime.datetime(*args)
 88 |      except ValueError:
 89 |          return None, None
 90 | 
 91 |      if values.get('bc'):
 92 |          format = BC_FORMATS[count - 1]
 93 |      elif 0 <= d.year < 1000:
 94 |          format = CE_FORMATS[count - 1]
 95 |      else:
 96 |          format = FORMATS[count - 1]
 97 |          if time_only:
 98 |              format = format[10:]
 99 | 
100 |      if iso_date.endswith('Z'):
101 |          format += ' UTC'
102 | 
103 |      return d, format
104 | 
105 | 
106 | def format_isodate(iso_date):
107 |      """
108 |      Given an iso8601 formatted string (or fraction thereof) return
109 |      a timezone-independent display of the string.
110 |      """
111 | 
112 |      d, format = parse_isodate(iso_date)
113 |      if d is None:
114 |          return None
115 | 
116 |      if d.year >= 1900:
117 |          result = d.strftime(format)
118 |      else:
119 |          # make sure to pick something that is a leapyear, so that
120 |          # 29-Feb is available! Note that 1900 is NOT a leapyear
121 |          d_1904 = d.replace(year=1904)
122 |          result = d_1904.strftime(format).replace("1904", str(d.year))
123 | 
124 |      if format.endswith("%p"):
125 |          result = result[:-2] + result[-2:].lower()
126 | 
127 |      return result.replace("  ", " ").lstrip()
128 | 
129 | 
130 | if __name__ == "__main__":
131 |     import sys
132 |     print format_isodate(sys.argv[1])
133 | 


--------------------------------------------------------------------------------
/test/cost_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2.6
  2 | # Copyright 2020 Google LLC
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # -*- coding: utf-8 -*-
 17 | #
 18 | """mql cost tests."""
 19 | 
 20 | __author__ = 'bneutra@google.com (Brendan Neutra)'
 21 | 
 22 | import google3
 23 | from pymql.mql import error
 24 | from pymql.test import mql_fixture
 25 | 
 26 | # stuff we care about
 27 | FLOAT_COSTS = ['mql_stime',
 28 |     'mql_stime',
 29 |     'mql_utime',
 30 |     'mql_rtime',
 31 |     'mql_dbtime'
 32 |     ]
 33 | INT_COSTS = ['pf',
 34 |     'mql_dbtries',
 35 |     'tu',
 36 |     'ts',
 37 |     'te'
 38 |   ]
 39 | 
 40 | # important note: in mock replay mode, stored graph response costs
 41 | # are tallied. But mql_[x]time will be calculated in realtime
 42 | # so those costs will be quite different than when the mock was 
 43 | # recorded (they will be smaller, kinda the point of mocking)
 44 | 
 45 | class MQLTest(mql_fixture.MQLTest):
 46 |   """mql cost tests."""
 47 | 
 48 |   def setUp(self):
 49 |     self.SetMockPath('data/cost.yaml')
 50 |     super(MQLTest, self).setUp()
 51 |     self.env = {'as_of_time': '2010-05-01'}
 52 | 
 53 |   def testCost(self):
 54 |     """simple positive test."""
 55 | 
 56 |     query = """
 57 |     {
 58 |       "/people/person/place_of_birth": null,
 59 |       "id": "/en/bob_dylan"
 60 |     }
 61 |     """
 62 |     exp_response = """
 63 |     {
 64 |       "/people/person/place_of_birth": "Duluth",
 65 |       "id": "/en/bob_dylan"
 66 |     }
 67 |     """
 68 |     self.DoQuery(query, exp_response=exp_response)
 69 |     cost = self.mql_result.cost
 70 |     self.costs_exist(cost)
 71 |     self.assertGreater(cost['te'], 10, 'te cost should be something')
 72 |     self.assertEqual(cost['mql_dbreqs'], 4, 'four graphd requests')
 73 | 
 74 |   def testCostError(self):
 75 |     """a query that gets a GQL error."""
 76 | 
 77 |     query = """
 78 |     {
 79 |       "guid": "foobar"
 80 |     }
 81 |     """
 82 |     exc_response = (
 83 |         error.MQLParseError,
 84 |         'Can only use a hexadecimal guid here'
 85 |     )
 86 |     self.DoQuery(query, exc_response=exc_response)
 87 |     cost = self.mql_service.get_cost()
 88 |     self.costs_exist(cost)
 89 |     self.assertEqual(cost['mql_dbreqs'], 1, 'only one graphd request')
 90 | 
 91 |   def testCostComplex(self):
 92 |     """query that does a lot of GQL."""
 93 | 
 94 |     query = """
 95 |     [{
 96 |       "/people/person/date_of_birth" : [],
 97 |       "/music/artist/album" : [],
 98 |       "/film/actor/film" : [],
 99 |       "/film/director/film" : [],
100 |       "/film/producer/film" : [],
101 |       "/tv/tv_actor/starring_roles" : [],
102 |       "/tv/tv_producer/programs_produced" : [],
103 |       "type": "/music/artist",
104 |       "b:type": "/film/actor",
105 |       "c:type": "/film/director",
106 |       "d:type": "/film/producer",
107 |       "e:type": "/tv/tv_actor",
108 |       "f:type": "/tv/tv_producer",
109 |       "id": null
110 |     }]
111 |     """
112 |     self.DoQuery(query)
113 |     cost = self.mql_result.cost
114 |     self.costs_exist(cost)
115 |     self.assertEqual(cost['mql_dbreqs'], 12, '12 graphd requests')
116 |     self.assertGreater(cost['tu'], 100, 'tu cost should be something')
117 | 
118 | 
119 |   def testQueryTimeout(self):
120 | 
121 |     self.env['query_timeout_tu'] = 50
122 |     query = """
123 |     [{
124 |       "type": "/people/person",
125 |       "date_of_birth": null,
126 |       "sort": "date_of_birth"
127 |     }]
128 |     """
129 |     exc_response = (
130 |         error.MQLTimeoutError,
131 |         'Query too difficult.'
132 |     )
133 |     self.DoQuery(query, exc_response=exc_response)
134 |     cost = self.mql_service.get_cost()
135 |     self.costs_exist(cost)
136 | 
137 |   def testQueryTimeoutFloat(self):
138 | 
139 |     # float is allowed
140 |     self.env['query_timeout_tu'] = 50.1
141 |     query = """
142 |     [{
143 |       "type": "/people/person",
144 |       "date_of_birth": null,
145 |       "sort": "date_of_birth"
146 |     }]
147 |     """
148 |     exc_response = (
149 |         error.MQLTimeoutError,
150 |         'Query too difficult.'
151 |     )
152 |     self.DoQuery(query, exc_response=exc_response)
153 |     cost = self.mql_service.get_cost()
154 |     self.costs_exist(cost)
155 | 
156 |   def costs_exist(self, cost):
157 |     for c in FLOAT_COSTS:
158 |       self.assertIsInstance(cost[c], float, 'cost %s exists' % c)
159 |     for c in INT_COSTS:
160 |       self.assertIsInstance(cost[c], int, 'cost %s exists' % c)
161 | 
162 | if __name__ == '__main__':
163 |   mql_fixture.main()
164 | 


--------------------------------------------------------------------------------
/formats/http.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | #
 17 | #  utilities for dealing with http
 18 | #
 19 | #  url escaping
 20 | #  content-type parsing and graph lookup
 21 | #
 22 | #  originally from mw/client/escaping.py
 23 | #  duplicated in mw/mql/grquoting.py
 24 | #
 25 | 
 26 | 
 27 | import urllib
 28 | 
 29 | # Table mapping response codes to messages; entries have the
 30 | # form {code: (shortmessage, longmessage)}.
 31 | # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
 32 | from BaseHTTPServer import BaseHTTPRequestHandler
 33 | http_status_codes = BaseHTTPRequestHandler.responses
 34 | 
 35 | 
 36 | # some useful uri splitting code in the "urischemes" thirdparty module.
 37 | #
 38 | # later i found that the most complete uri manipulation module 
 39 | # seems to be in 4Suite:
 40 | #
 41 | # from Ft.Lib import Uri, Iri
 42 | 
 43 | 
 44 | #
 45 | #
 46 | # ALLOW:
 47 | #
 48 | # '~' is in the unreserved set, so they should be available like "_.-"
 49 | # ':' is in pchar
 50 | # '@' is in pchar (though naive text parsers may think it's an email address)
 51 | #
 52 | # "$" is a valid sub-delim
 53 | # "!" is a valid sub-delim
 54 | # "*" is a valid sub-delim
 55 | # "," is a valid sub-delim
 56 | # ";" is a valid sub-delim
 57 | #
 58 | # GENERALLY DISALLOW:
 59 | #
 60 | # "&" is in sub-delims but has special meaning to form parsers
 61 | # "=" is in sub-delims but excluded due to avoid any possible confusion
 62 | # "+" is in sub-delims but excluded due to avoid any possible confusion
 63 | #     with form-encoded queries
 64 | 
 65 | # ALWAYS DISALLOW
 66 | #
 67 | # "'" is in sub-delims but likely to confuse
 68 | # "(" is in sub-delims but definitely confuses email text parsers
 69 | # ")" is in sub-delims but definitely confuses email text parsers
 70 | 
 71 | # [A-Za-z0-9] and "_.-" are always safe in urllib.quote
 72 | # additionally, we allow:
 73 | our_safe = "~:@$!*,;"
 74 | 
 75 | # this handles unicode
 76 | def base_urlencode(data, safe):
 77 |     if isinstance(data, unicode):
 78 |         data = data.encode('utf_8')
 79 |     return urllib.quote(data, safe)
 80 | 
 81 | 
 82 | def urlencode(data):
 83 |     '''
 84 |     default url-encoder - please shift to one of the more
 85 |     specific versions, depending on whether you're quoting
 86 |     a path segment or a query arg.
 87 |     '''
 88 |     # "_.-" are always untouched
 89 |     return base_urlencode(data, ',')
 90 | 
 91 | 
 92 | 
 93 | # within path segments (between slashes) we don't need
 94 | #  to follow the same rules as for forms parsing.
 95 | #
 96 | # "=" is only special to form parsers
 97 | # "&" is only special to form parsers
 98 | # "+" is only special to form parsers
 99 | def urlencode_pathseg(data):
100 |     '''
101 |     urlencode for placement between slashes in an url.
102 |     '''
103 |     return base_urlencode(data, our_safe + "=&+")
104 | 
105 | 
106 | # "/" is allowed in query but reserved in path segments
107 | # "?" is allowed in query but reserved in path segments
108 | def urlencode_querykey(data):
109 |     '''
110 |     encode for placement before '=' in a query argument
111 | 
112 |     this allows '/?'
113 |     '''
114 |     return base_urlencode(data, our_safe + '/?')
115 | 
116 | 
117 | # "/" is allowed in query but reserved in path segments
118 | # "?" is allowed in query but reserved in path segments
119 | # "=" should be allowed by form parsers after the key=
120 | def urlencode_queryvalue(data):
121 |     '''
122 |     encode for placement after '=' in a query argument
123 | 
124 |     this allows '/?='
125 |     '''
126 |     return base_urlencode(data, our_safe + '/?')
127 | 
128 | 
129 | # "/" is allowed in query but reserved in path segments
130 | # "?" is allowed in query but reserved in path segments
131 | # "=" is only special to form parsers
132 | # "&" is only special to form parsers
133 | # "+" is only special to form parsers
134 | def urlencode_fragment(data):
135 |     '''
136 |     encode for placement after '=' in a query argument
137 | 
138 |     this allows '/?='
139 |     '''
140 |     return base_urlencode(data, our_safe + '/?=&+')
141 | 
142 | #
143 | # who knows what browsers do?  it ain't rfc3986 that's for sure.
144 | #
145 | def urlencode_formtext(data):
146 |     '''
147 |     encode a form key or value, pretending to be a browser.
148 | 
149 |     this version encodes space as '+' rather than as '%20',
150 |     which is used when you are pretending to be a browser form
151 |     submit.
152 |     '''
153 |     if isinstance(data, unicode):
154 |         data = data.encode('utf_8')
155 |     return urllib.quote_plus(data, our_safe)
156 | 
157 | 
158 | def urldecode(data):
159 |     '''
160 |     replace "%xx" with character equivalent
161 |     '''
162 |     return urllib.unquote(data)
163 | 


--------------------------------------------------------------------------------
/test/return_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2.4
  2 | # Copyright 2020 Google LLC
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | # -*- coding: utf-8 -*-
 17 | #
 18 | """mql return directive."""
 19 | 
 20 | __author__ = 'bneutra@google.com (Brendan Neutra)'
 21 | 
 22 | # thanks warren for these dimetests
 23 | 
 24 | import google3
 25 | from pymql.mql import error
 26 | from pymql.test import mql_fixture
 27 | 
 28 | class MQLTest(mql_fixture.MQLTest):
 29 |   """mql return directive."""
 30 | 
 31 |   def setUp(self):
 32 |     self.SetMockPath('data/return.yaml')
 33 |     super(MQLTest, self).setUp()
 34 |     self.env = {'as_of_time': '2010-05-01'}
 35 | 
 36 | 
 37 |   def testReturnCountOfObject(self):
 38 |     """return count of object."""
 39 | 
 40 |     query = """
 41 |     {
 42 |       "/people/person/children": {
 43 |         "count": null,
 44 |         "return": "count"
 45 |       },
 46 |       "id": "/en/bob_dylan"
 47 |     }
 48 |     """
 49 |     exp_response = """
 50 |     {
 51 |       "/people/person/children": 6,
 52 |       "id": "/en/bob_dylan"
 53 |     }
 54 |     """
 55 |     self.DoQuery(query, exp_response=exp_response)
 56 | 
 57 |   def testReturnCountOfArray(self):
 58 |     """return count of array."""
 59 | 
 60 |     query = """
 61 |     {
 62 |       "/people/person/children": [
 63 |         {
 64 |           "count": null,
 65 |           "return": "count"
 66 |         }
 67 |       ],
 68 |       "id": "/en/bob_dylan"
 69 |     }
 70 |     """
 71 |     exp_response = """
 72 |     {
 73 |       "/people/person/children": [
 74 |         6
 75 |       ],
 76 |       "id": "/en/bob_dylan"
 77 |     }
 78 |     """
 79 |     self.DoQuery(query, exp_response=exp_response)
 80 | 
 81 |   def testReturnEstimateCountOfArray(self):
 82 |     """return estimate-count of array."""
 83 | 
 84 |     query = """
 85 |     {
 86 |       "/people/person/children": [
 87 |         {
 88 |           "return": "estimate-count",
 89 |           "estimate-count": null
 90 |         }
 91 |       ],
 92 |       "id": "/en/bob_dylan"
 93 |     }
 94 |     """
 95 |     exp_response = """
 96 |     {
 97 |       "/people/person/children": [
 98 |         6
 99 |       ],
100 |       "id": "/en/bob_dylan"
101 |     }
102 |     """
103 |     self.DoQuery(query, exp_response=exp_response)
104 | 
105 |   def testReturnCountNullWhenNone(self):
106 |     """return count null when none."""
107 | 
108 |     query = """
109 |     {
110 |       "album": {
111 |         "return": "count",
112 |         "name": "Arrested"
113 |       },
114 |       "type": "/music/artist",
115 |       "name": "The Police"
116 |     }
117 |     """
118 |     exp_response = """
119 |     null
120 |     """
121 |     self.DoQuery(query, exp_response=exp_response)
122 | 
123 |   def testReturnCount0WhenNoneAndOptional(self):
124 |     """return count 0 when none and optional."""
125 | 
126 |     query = """
127 |     {
128 |       "album": {
129 |         "optional": true,
130 |         "return": "count",
131 |         "name": "Arrested"
132 |       },
133 |       "type": "/music/artist",
134 |       "name": "The Police"
135 |     }
136 |     """
137 |     exp_response = """
138 |     {
139 |       "album": 0,
140 |       "type": "/music/artist",
141 |       "name": "The Police"
142 |     }
143 |     """
144 |     self.DoQuery(query, exp_response=exp_response)
145 | 
146 |   def testReturnIgnoresOtherResultValues(self):
147 |     """return ignores other result values."""
148 | 
149 |     query = """
150 |     {
151 |       "/people/person/children": [
152 |         {
153 |           "count": null,
154 |           "nationality": {
155 |             "id": "/en/united_states",
156 |             "name": null
157 |           },
158 |           "return": "count",
159 |           "id": null
160 |         }
161 |       ],
162 |       "id": "/en/bob_dylan"
163 |     }
164 |     """
165 |     exp_response = """
166 |     {
167 |       "/people/person/children": [
168 |         2
169 |       ],
170 |       "id": "/en/bob_dylan"
171 |     }
172 |     """
173 |     self.DoQuery(query, exp_response=exp_response)
174 | 
175 |   def testReturnImplicitCount(self):
176 |     """return implicit count."""
177 | 
178 |     query = """
179 |     {
180 |       "/people/person/children": {
181 |         "return": "count",
182 |         "id": null
183 |       },
184 |       "id": "/en/bob_dylan"
185 |     }
186 |     """
187 |     exp_response = """
188 |     {
189 |       "/people/person/children": 6,
190 |       "id": "/en/bob_dylan"
191 |     }
192 |     """
193 |     self.DoQuery(query, exp_response=exp_response)
194 | 
195 |   def testReturnIdFail(self):
196 |     """return id."""
197 | 
198 |     query = """
199 |     {
200 |       "/people/person/children": {
201 |         "date_of_birth": null,
202 |         "return": "id",
203 |         "id": null
204 |       },
205 |       "id": "/en/bob_dylan"
206 |     }
207 |     """
208 |     exc_response = (
209 |         error.MQLParseError,
210 | 	"'return' currently only supports 'count' and 'estimate-count'"
211 |     )
212 |     self.DoQuery(query, exc_response=exc_response)
213 | 
214 | if __name__ == '__main__':
215 |   mql_fixture.main()
216 | 


--------------------------------------------------------------------------------
/emql/adapters/search.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | from itertools import izip, chain
 17 | 
 18 | from mw.emql.adapter import Adapter
 19 | from mw.emql.emql import id_guid, formatted_id_guid, MQL_LIMIT
 20 | 
 21 | 
 22 | class search_adapter(Adapter):
 23 | 
 24 |     def pre(self, tid, graph, mql, me, control, parent, params, api_keys):
 25 | 
 26 |         constraints = params.get('constraints')
 27 |         params = params.get('query')
 28 | 
 29 |         if params is None:
 30 |             if constraints is not None:
 31 |                 for operator, _params in constraints:
 32 |                     if operator == '~=':
 33 |                         params = _params
 34 |                         break
 35 |         
 36 |         if isinstance(params, dict) and params.get('query') is None:
 37 |             if constraints is not None:
 38 |                 for operator, _params in constraints:
 39 |                     if operator == '~=':
 40 |                         params['query'] = _params
 41 |                         break
 42 | 
 43 |         if isinstance(params, list):
 44 |             if params:
 45 |                 params = params[0]
 46 |             else:
 47 |                 params = None
 48 | 
 49 |         if isinstance(params, (str, unicode)):
 50 |             params = { 'query': params }
 51 |         elif params is None or params.get('query') is None:
 52 |             raise ValueError, 'no query'
 53 | 
 54 |         args = {}
 55 |         result = {}
 56 | 
 57 |         for arg, value in params.iteritems():
 58 |             if arg.endswith('|='):
 59 |                 name = str(arg[:-2])
 60 |             else:
 61 |                 name = str(arg)
 62 |             if name in ('query', 'prefix', 'prefixed',
 63 |                         'type', 'type_strict', 'domain', 'domain_strict',
 64 |                         'type_exclude', 'type_exclude_strict',
 65 |                         'domain_exclude', 'domain_exclude_strict',
 66 |                         'limit', 'denylist', 'related', 'property',
 67 |                         'mql_filter', 'geo_filter', 'as_of_time', 'timeout'):
 68 |                 args[name] = value
 69 |             elif name != 'score':
 70 |                 result[name] = value
 71 | 
 72 |         for arg, value in parent.iteritems():
 73 |             if arg.endswith('|='):
 74 |                 name = str(arg[:-2])
 75 |             else:
 76 |                 name = str(arg)
 77 |             if name not in args:
 78 |                 if name == 'limit':
 79 |                     args[name] = value
 80 |                 elif name == 'type' and isinstance(value, basestring):
 81 |                     args['type_strict'] = 'any'
 82 |                     args[name] = value
 83 | 
 84 |         if 'limit' not in args:
 85 |             args['limit'] = MQL_LIMIT # plug-in default MQL limit
 86 | 
 87 |         if 'score' in params:
 88 |             matches = me.get_session().relevance_query(tid, format='ac', **args)
 89 |             guids = ['#' + match['guid'] for match in matches]
 90 |         else:
 91 |             matches = me.get_session().relevance_query(tid, format='guids', **args)
 92 |             guids = ['#' + guid for guid in matches]
 93 | 
 94 |         if guids:
 95 |             result['guid|='] = guids
 96 |         else:
 97 |             result['guid|='] = ['#00000000000000000000000000000000']
 98 | 
 99 |         if 'score' in params:
100 |             result[':extras'] = {
101 |                 "fetch-data": dict((match['guid'], match['score'])
102 |                                    for match in matches)
103 |             }
104 | 
105 |         return result
106 | 
107 |     def fetch(self, tid, graph, mql, me, control, args, params, api_keys):
108 | 
109 |         constraints = params.get('constraints')
110 |         scores = params.get(':extras', {}).get('fetch-data')
111 |         params = params.get('query')
112 | 
113 |         was_list = False
114 |         if isinstance(params, list):
115 |             if params:
116 |                 params = params[0]
117 |                 was_list = True
118 |             else:
119 |                 params = None
120 | 
121 |         if params is None:
122 |             if constraints is not None:
123 |                 for operator, _params in constraints:
124 |                     if operator == '~=':
125 |                         params = _params
126 |                         break
127 | 
128 |         if isinstance(params, (str, unicode)):
129 |             results = dict((mqlres['guid'], params) for mqlres in args)
130 |         else:
131 |             if scores is not None:
132 |                 for mqlres in args:
133 |                     mqlres['score'] = scores[mqlres['guid'][1:]]
134 | 
135 |             if 'guid' in params:
136 |                 fn = dict.get
137 |             else:
138 |                 fn = dict.pop
139 | 
140 |             results = {}
141 |             for mqlres in args:
142 |                 mqlres['query'] = params['query']
143 |                 results[fn(mqlres, 'guid')] = [mqlres] if was_list else mqlres
144 | 
145 |         return results
146 | 
147 |     def help(self, tid, graph, mql, me, control, params):
148 |         from docs import search_adapter_help
149 | 
150 |         return 'text/x-rst;', search_adapter_help
151 | 
152 | 
153 | 


--------------------------------------------------------------------------------
/test/regression_misc_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | #
 16 | """Test misc. regressions."""
 17 | 
 18 | __author__ = 'bneutra@google.com (Brendan Neutra)'
 19 | 
 20 | import google3
 21 | from pymql.test import mql_fixture
 22 | 
 23 | 
 24 | class MQLTest(mql_fixture.MQLTest):
 25 | 
 26 |   def setUp(self):
 27 |     self.SetMockPath('data/regression_misc.yaml')
 28 |     super(MQLTest, self).setUp()
 29 |     self.env = {'as_of_time': '2009-10-01'}
 30 | 
 31 |   def testUtf8(self):
 32 |     """Regression test for issue 4970606."""
 33 | 
 34 |     query = u"""
 35 |       [{"name":"Beyonc\u00e9", "id": null}]
 36 |     """
 37 |     exp_response = u"""
 38 |     [
 39 |       {
 40 |         "id": "/en/beyonce",
 41 |         "name": "Beyonc\u00e9"
 42 |       },
 43 |       {
 44 |         "id": "/m/07ldnn6",
 45 |         "name": "Beyonc\u00e9"
 46 |       }
 47 |     ]
 48 |     """
 49 |     self.DoQuery(query.encode('utf-8'),
 50 |                  exp_response=exp_response.encode('utf-8'))
 51 | 
 52 |   def testCursor(self):
 53 |     """JIRA API-62 bug."""
 54 | 
 55 |     # not sure the bug is valid but I just wanted to capture
 56 |     # this style of query. the bug was that it timed out
 57 |     # but i can't reproduce that -brendan
 58 | 
 59 |     query = """
 60 |       [
 61 |         {
 62 |           "attribution": {
 63 |             "guid": null,
 64 |             "optional": true,
 65 |             "id": null
 66 |           },
 67 |           "reverse": null,
 68 |           "creator": {
 69 |             "guid": null,
 70 |             "optional": true,
 71 |             "id": null
 72 |           },
 73 |           "timestamp": null,
 74 |           "timestamp>=": "2012-01-01T20",
 75 |           "source": {
 76 |             "guid": null,
 77 |             "optional": true,
 78 |             "id": null
 79 |           },
 80 |           "valid": null,
 81 |           "limit": 1000,
 82 |           "master_property": null,
 83 |           "operation": null,
 84 |           "type": "/type/link",
 85 |           "target_value": null,
 86 |           "target": {
 87 |             "guid": null,
 88 |             "optional": true,
 89 |             "id": null
 90 |           }
 91 |         }
 92 |       ]
 93 |     """
 94 |     cursor = True
 95 |     while 1:
 96 |       self.env = {'cursor': cursor, 'as_of_time': '2012-01-02'}
 97 |       self.MQLQuerier(query)
 98 |       cursor = self.mql_result.cursor
 99 |       if cursor is False: break
100 | 
101 |   def testCursorComplex(self):
102 |     """random hash ordering cursor bug b/8323666."""
103 |     # TODO(bneutra) how to repro the bug, testing in process
104 |     # doesn't tickle it.
105 | 
106 |     query = """
107 |     [
108 |       {
109 |         "sort": "-timestamp",
110 |         "type": "/type/link",
111 |         "reverse": null,
112 |         "creator": null,
113 |         "timestamp": null,
114 |         "source": {
115 |           "mid": null
116 |         },
117 |         "a:creator": {
118 |           "type": "/dataworld/provenance",
119 |           "optional": "forbidden"
120 |         },
121 |         "valid": null,
122 |         "limit": 10,
123 |         "master_property": null,
124 |         "operation": null,
125 |         "target": {
126 |           "mid": null
127 |         },
128 |         "target_value": null,
129 |         "b:creator": {
130 |           "usergroup": {
131 |             "id|=": [
132 |               "/freebase/bots",
133 |               "/en/metaweb_staff",
134 |               "/en/current_metaweb_staff"
135 |             ],
136 |             "optional": "forbidden"
137 |           }
138 |         }
139 |       }
140 |     ]
141 | """
142 |     cursor = True
143 |     i = 0
144 |     while i < 30:
145 |       i+=1
146 |       self.env = {'cursor': cursor}
147 |       self.MQLQuerier(query)
148 |       self.assertEquals(len(self.mql_result.result), 10)
149 |       # we should have a new cursor
150 |       self.assertNotEquals(cursor, self.mql_result.cursor)
151 |       cursor = self.mql_result.cursor
152 |       # it should be a cursor
153 |       self.assertNotEquals(cursor, False)
154 | 
155 |   def testCursorComplex2(self):
156 |     """random hash ordering cursor bug b/8323666 freeq."""
157 | 
158 |     # TODO(bneutra) how to repro the bug, testing in process
159 |     # doesn't tickle it.
160 | 
161 |     query = """
162 |     [
163 |       {
164 |         "master_property": {
165 |           "id": null,
166 |           "reverse_property": null
167 |         },
168 |         "limit": 3,
169 |         "type": "/type/link",
170 |         "target": {
171 |           "guid": null,
172 |           "type": [],
173 |           "id": "#9202a8c04000641f8000000003b50f85"
174 |         },
175 |         "source": {
176 |           "guid": null,
177 |           "type": [],
178 |           "id": null
179 |         }
180 |       }
181 |     ]
182 |     """
183 |     cursor = True
184 |     i = 0
185 |     while i < 30:
186 |       i+=1
187 |       self.env = {'cursor': cursor, 'as_of_time': '2013-03-01'}
188 |       self.MQLQuerier(query)
189 |       self.assertEquals(len(self.mql_result.result), 3)
190 |       # we should have a new cursor
191 |       self.assertNotEquals(cursor, self.mql_result.cursor)
192 |       cursor = self.mql_result.cursor
193 |       # it should be a cursor
194 |       self.assertNotEquals(cursor, False)
195 | 
196 | 
197 | if __name__ == '__main__':
198 |   mql_fixture.main()
199 | 


--------------------------------------------------------------------------------
/formats/image.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | 
 17 | functions for manipulating image content
 18 | 
 19 | everything is done in memory, we assume images
 20 | aren't too large.
 21 | 
 22 | """
 23 | 
 24 | import os, contenttype
 25 | from StringIO import StringIO
 26 | from mw.log import LOG
 27 | 
 28 | from mw.api.content import Content, ContentWrapper
 29 | from mw.error import ContentLoadError
 30 | import mw.siteconfig
 31 | from mw.mql import scope
 32 | 
 33 | TN_MODES = ['fit', 'fill', 'fillcrop', 'fillcropmid']
 34 | DEF_TN_MODE = 'fit'
 35 | 
 36 | class ImageContent(ContentWrapper):
 37 |     """
 38 |     methods for dealing with image content
 39 |     """
 40 | 
 41 |     # ie6 uses some bizarre content_types for PNG and JPEG images
 42 |     # XXX it would be nice to fix the content_type in the
 43 |     #  /type/content object, but it may already have been uploaded.
 44 |     # so for now, images uploaded from ie6 will have the "wrong"
 45 |     #  content-type and we'll need to garden them.
 46 |     remap_dumb_ie_mime_types = {
 47 |         'image/pjpeg': contenttype.MediaType('image/jpeg'),
 48 |         'image/x-png': contenttype.MediaType('image/png')
 49 |     }
 50 | 
 51 | 
 52 |     @classmethod
 53 |     def match(cls, c):
 54 |         """
 55 |         true if this ContentWrapper subclass applies to the content argument.
 56 |         """
 57 |         media_type = cls.remap_dumb_ie_mime_types.get(c.media_type, c.media_type)
 58 |         if not c.media_type.startswith('image/'):
 59 |             return False
 60 | 
 61 |         subtype = media_type.split('/')[1]
 62 |         
 63 |         return subtype in ('gif', 'png', 'jpeg', 'x-icon')
 64 | 
 65 |     def __init__(self, content):
 66 |         super(ImageContent, self).__init__(content)
 67 |         self.size = None
 68 | 
 69 |     def load(self, mss):
 70 |         result = mss.mqlread(dict(id=self.content.content_id,
 71 |                              type='/common/image',
 72 |                              size=dict(x=None, y=None)))
 73 | 
 74 |         if result is None:
 75 |             return
 76 |         
 77 |         self.size = (result['size']['x'], result['size']['y'])
 78 | 
 79 |     def upload(self, mss):
 80 |         """
 81 |         add a /common/image facet to the type/content
 82 |         """
 83 |         self.load(mss)
 84 |         if self.size is None:
 85 |             self.parse(mss)
 86 |             
 87 |         w = { 'id': self.content.content_id,
 88 |               'type': { 'connect': 'insert',
 89 |                         'id': '/common/image' }}
 90 |         if self.size[0] and self.size[1]:
 91 |             w['/common/image/size'] = { 'create': 'unless_exists',
 92 |                                         'type': '/measurement_unit/rect_size',
 93 |                                         'x': self.size[0],
 94 |                                         'y': self.size[1] }
 95 | 
 96 |         with mss.push_variables(authority="/user/content_administrator",
 97 |                                 privileged=scope.Authority):
 98 |             result = mss.mqlwrite(w)
 99 | 
100 |     def parse(self, mss):
101 |         """
102 |         extract data from the image
103 | 
104 |         exif tags from digital cameras
105 |         """
106 |         # exif tags from digital cameras?
107 |         
108 |         self.content.fetch_body(mss)
109 |         try:
110 |             # XXXarielb move to pygmy as soon as pygmy doesn't crash within threads
111 |             from PIL import Image
112 |             img = Image.open(StringIO(self.content.body))
113 |             # believe the image parser over anything in the graph
114 |             self.size = img.size
115 |         except ImportError, ie:
116 |             LOG.error("format.image.no_pil", str(e))
117 |             raise
118 |         except Exception, e: 
119 |             LOG.error("format.image.parse", str(e))
120 |             raise ContentLoadError('Invalid image file', 
121 |                                    app_code="upload/invalid_image_data", 
122 |                                    error=e)
123 | 
124 |     def update_content(self):
125 |         media_type = self.content.media_type
126 |         LOG.info('update_content', "Image Updating content from %s to %s" % (media_type,
127 |                                                       self.remap_dumb_ie_mime_types.get(media_type)))
128 |         self.content.media_type = self.remap_dumb_ie_mime_types.get(media_type, media_type)
129 |   
130 |     @classmethod
131 |     def get_fallback_image_path(cls):
132 | 	try:
133 |             config = mw.siteconfig.get_config2()
134 |             path = config.get('me.img_thumb_fallback')
135 |             if path and os.path.exists(path):
136 |                 return path
137 | 	except KeyError, e:
138 | 	    pass
139 | 
140 |         LOG.error("image.thumb", "Could not find fallback image for thumbnailing service.")
141 |         return None
142 | 
143 | 
144 |     # failover for thumnailing operation in the event that 
145 |     # the image is too large to thumbnail
146 |     def thumb_fallback(self, mss):
147 |         path = ImageContent.get_fallback_image_path()
148 |         if path is None:
149 |             return None
150 |         # load data 
151 |         fd = open(path)
152 |         data = fd.read()
153 |         fd.close()
154 |         # the fallback image is a known GIF image.
155 |         thumb_mt = 'image/gif'
156 |         c = Content(media_type=thumb_mt)
157 |         c.set_body(data)
158 |         return c
159 | 


--------------------------------------------------------------------------------
/error.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # Table mapping response codes to messages; entries have the
 16 | # form {code: (shortmessage, longmessage)}.
 17 | # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
 18 | from BaseHTTPServer import BaseHTTPRequestHandler
 19 | import traceback
 20 | from pymql.log import LOG
 21 | 
 22 | 
 23 | # HTTP error code messages
 24 | # XXX: We really really need to move to py2.5
 25 | def is_valid_HTTP_code(code):
 26 |   return code in BaseHTTPRequestHandler.responses.keys()
 27 | 
 28 | 
 29 | def get_HTTP_err(code):
 30 |   return '%d %s' % (code, BaseHTTPRequestHandler.responses[code][0])
 31 | 
 32 | 
 33 | class ParameterizedError(Exception):
 34 |   """
 35 |     This is a special Exception class that is used to format messages
 36 |     where the contents of the message itself are important. Use it
 37 |     exactly how you would use the python % format operator:
 38 | 
 39 |     class MyException(ParameterizedError):
 40 |         pass
 41 | 
 42 |     raise MyException('Got an error in query %(query)s', query=q)
 43 | 
 44 |     This will format the string appropriately, but allow exception
 45 |     handlers to unpack the relevant data and optionall reinsert it
 46 |     into the result string
 47 |     """
 48 |   DEF_PFX = '/api/status/error'
 49 |   DEF_ME_CODE = '/unknown/unknown'
 50 | 
 51 |   def __init__(self,
 52 |                msg,
 53 |                http_code=400,
 54 |                app_code=DEF_ME_CODE,
 55 |                inner_exc=None,
 56 |                **kwds):
 57 |     self.msg = msg
 58 |     Exception.__init__(self, msg)
 59 | 
 60 |     if not is_valid_HTTP_code(http_code):
 61 |       http_code = 500
 62 |     self.http_status = get_HTTP_err(http_code)
 63 |     self.http_code = http_code
 64 | 
 65 |     # app_code and and api code setup
 66 |     codes = app_code.split('/')
 67 |     if len(codes) < 3:
 68 |       codes = self.DEF_ME_CODE.split('/')
 69 |     self.comp_code = '%s/%s' % (self.DEF_PFX, codes[1])
 70 |     self.app_code = '%s' % '/'.join(codes[2:])
 71 |     self.messages = [self.gen_msgs(**kwds)]
 72 | 
 73 |     if not kwds.has_key('error'):
 74 |       # don't extract the current frame (__init__)
 75 |       stack = traceback.extract_stack()[:-1]
 76 |       kwds['traceback'] = '\r\n'.join(traceback.format_list(stack))
 77 | 
 78 |     # log inner exception or self
 79 |     exc = self
 80 |     if inner_exc:
 81 |       exc = inner_exc
 82 |     comp = app_code[1:].replace('/', '.')
 83 |     if exc == self:
 84 |       LOG.debug(comp, msg, **kwds)
 85 |     else:
 86 |       LOG.exception(msg, **kwds)
 87 |     self.kwds = kwds
 88 | 
 89 |   def gen_msgs(self, **kwds):
 90 |     return {
 91 |         'code': '%s/%s' % (self.DEF_PFX, self.app_code),
 92 |         'message': self.msg,
 93 |         'info': kwds.copy()
 94 |     }
 95 | 
 96 |   def get_err_dict(self):
 97 |     return {
 98 |         'status': self.http_status,
 99 |         'code': self.comp_code,
100 |         'messages': self.messages
101 |     }
102 | 
103 |   def __str__(self):
104 |     return str(self.get_err_dict())
105 | 
106 | 
107 | class NetworkAddressError(ParameterizedError):
108 |   pass
109 | 
110 | 
111 | class ContentLoadError(ParameterizedError):
112 |   pass
113 | 
114 | 
115 | class TypeVerifyError(ParameterizedError):
116 |   pass
117 | 
118 | 
119 | class EmailError(ParameterizedError):
120 |   pass
121 | 
122 | 
123 | class SubscriptionError(ParameterizedError):
124 |   pass
125 | 
126 | 
127 | class MSSError(ParameterizedError):
128 |   pass
129 | 
130 | 
131 | class UserLookupError(ParameterizedError):
132 |   pass
133 | 
134 | 
135 | class UserAuthError(ParameterizedError):
136 |   pass
137 | 
138 | 
139 | class BlobError(ParameterizedError):
140 |   pass
141 | 
142 | 
143 | class BLOBClientError(ParameterizedError):
144 |   pass
145 | 
146 | 
147 | class RelevanceError(ParameterizedError):
148 |   pass
149 | 
150 | 
151 | class TextSearchError(ParameterizedError):
152 |   pass
153 | 
154 | 
155 | class AutocompleteError(ParameterizedError):
156 |   pass
157 | 
158 | 
159 | class EmptyResult(ParameterizedError):
160 |   pass
161 | 
162 | 
163 | class GraphConnectionError(ParameterizedError):
164 |   pass
165 | 
166 | 
167 | class FormattingError(ParameterizedError):
168 |   pass
169 | 
170 | 
171 | class SessionError(ParameterizedError):
172 |   pass
173 | 
174 | 
175 | class ConfigError(ParameterizedError):
176 |   pass
177 | 
178 | 
179 | class SanitizationError(ParameterizedError):
180 |   pass
181 | 
182 | 
183 | class BlurbError(ParameterizedError):
184 |   pass
185 | 
186 | 
187 | class DomainOperationError(ParameterizedError):
188 |   pass
189 | 
190 | 
191 | class GenericRuntimeError(ParameterizedError):
192 |   pass
193 | 
194 | 
195 | class OAuthDisabledError(ParameterizedError):
196 |   pass
197 | 
198 | 
199 | class RecaptchaError(ParameterizedError):
200 | 
201 |   def __init__(self,
202 |                msg,
203 |                http_code=500,
204 |                app_code=ParameterizedError.DEF_ME_CODE,
205 |                inner_exc=None,
206 |                **kwds):
207 |     self.message = msg
208 |     ParameterizedError.__init__(
209 |         self,
210 |         msg,
211 |         http_code=http_code,
212 |         app_code=app_code,
213 |         inner_exc=inner_exc,
214 |         **kwds)
215 | 
216 | 
217 | class ReadOnlyDatabaseError(ParameterizedError):
218 | 
219 |   def __init__(self, msg=None, *args, **kwds):
220 |     msg = msg or 'You cannot save right now. Please try again later'
221 |     ParameterizedError.__init__(self, msg, *args, **kwds)
222 | 


--------------------------------------------------------------------------------
/mql/graph/conn_mock.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Connector classes for mocked graphd query and response strings.
 15 | 
 16 | Use these connectors when using the pymql library.
 17 | See test/mql_fixture.py in pymql for a reference
 18 | of how to use the record and replay connectors.
 19 | """
 20 | 
 21 | __author__ = 'bneutra@google.com (Brendan Neutra)'
 22 | import sys
 23 | import hashlib
 24 | import re
 25 | import time
 26 | from pymql.mql import error
 27 | from pymql.mql.graph.connector import GraphConnector
 28 | from pymql.mql.grparse import ReplyParser
 29 | from absl import logging
 30 | 
 31 | 
 32 | class GraphMockException(Exception):
 33 |   pass
 34 | 
 35 | 
 36 | TIMEOUT_POLICIES = {
 37 |     'default': {
 38 |         'timeout': 8.0,
 39 |         'stubby_deadline': 10.0,
 40 |         'fail_fast': False,
 41 |     },
 42 |     'bootstrap': {
 43 |         'timeout': 2.0,
 44 |         'stubby_deadline': 4.0,
 45 |         'fail_fast': False,
 46 |     },
 47 | }
 48 | 
 49 | 
 50 | class MockRecordConnector(GraphConnector):
 51 |   """Mock connector for recording graphd responses.
 52 | 
 53 |   This class will append to the mockdata dictionary that it
 54 |   is handed. It interacts with a slightly modified
 55 |   live connector that you specify
 56 |   (e.g. the mock stubby connector)
 57 |   See test/mql_fixture.py for an implentation example.
 58 |   """
 59 | 
 60 |   def __init__(self, mockdata, connector, **kwargs):
 61 | 
 62 |     if not kwargs.get('policy_map', None):
 63 |       kwargs['policy_map'] = TIMEOUT_POLICIES
 64 |     GraphConnector.__init__(self, **kwargs)
 65 |     self.mockdata = mockdata
 66 |     self._conn = connector
 67 |     self._conn._save_raw_response = True
 68 |     self._mocked = {}
 69 | 
 70 |   def open(self, policy=None):
 71 | 
 72 |     self._conn.open(policy)
 73 | 
 74 |   def transmit_query(self, q, policy, deadline, **kwargs):
 75 | 
 76 |     try:
 77 |       result = self._conn.transmit_query(q, policy, deadline)
 78 |     except error.MQLTimeoutError:
 79 |       self.gen_mock_data(q, self._conn._raw_response)
 80 |       self.totalcost = self._conn.totalcost
 81 |       raise
 82 | 
 83 |     self.gen_mock_data(q, self._conn._raw_response)
 84 |     self.totalcost = self._conn.totalcost
 85 |     return result
 86 | 
 87 |   def reset_cost(self):
 88 |     if hasattr(self, '_conn'):
 89 |       self._conn.reset_cost()
 90 | 
 91 |   def gen_mock_data(self, q, result):
 92 | 
 93 |     k, hsh = strip_mock_query(q)
 94 |     if hsh in self._mocked:
 95 |       # if a query has been seen before, assume it needs another
 96 |       # version of the response mocked.
 97 |       self._mocked[hsh] += 1
 98 |       hsh = hsh + '_' + str(self._mocked[hsh])
 99 |     else:
100 |       self._mocked[hsh] = 0
101 |     self.mockdata[hsh] = [k, result]
102 | 
103 | 
104 | class MockReplayConnector(GraphConnector):
105 |   """Mock connector for recording graphd responses.
106 | 
107 |   This class will read from the mockdata dictionary that it
108 |   is handed. It doesn't connect or interact with graphd.
109 |   It's faster and more reliable than talking to a live db.
110 |   See test/mql_fixture.py for an implentation example.
111 |   """
112 | 
113 |   def __init__(self, mockdata):
114 |     # don't connect to a graph, do not call __init__
115 |     self.no_timeouts = False
116 |     self.totalcost = {}
117 |     self.mockdata = mockdata
118 |     self._mocked = {}
119 | 
120 |   def open(self, policy=None):
121 |     pass
122 | 
123 |   def transmit_query(self, q, policy, deadline, **kwargs):
124 |     start_time = time.time()
125 |     logging.debug('mocking query: %s', q)
126 |     k, hsh = strip_mock_query(q)
127 | 
128 |     if hsh in self._mocked:
129 |       # we've seen this query before for this test
130 |       # so increment as we did in record mode
131 |       self._mocked[hsh] += 1
132 |       hsh = hsh + '_' + str(self._mocked[hsh])
133 |     else:
134 |       self._mocked[hsh] = 0
135 | 
136 |     if hsh not in self.mockdata:
137 |       msg = '%s NO MOCKED REPONSE for this query: %s' % (hsh, k)
138 |       logging.error(msg)
139 |       raise GraphMockException(msg)
140 | 
141 |     m = self.mockdata[hsh]
142 |     msg = 'mock query found %s: %s' % (hsh, m[0])
143 |     logging.debug(msg)
144 |     logging.debug('mock response found: %s', m[1])
145 |     rg = re.search(' dateline\=\"(\S+)\" ', m[1])
146 |     self.dateline = None
147 |     if rg:
148 |       self.dateline = rg.groups()[0]
149 | 
150 |     reply_parser = ReplyParser()
151 |     reply_parser.parse_full_reply(m[1])
152 |     ret = reply_parser.get_reply()
153 |     dbtime = time.time() - start_time
154 |     self.add_graph_costs(ret.cost, dbtime, tries=1)
155 |     return ret
156 | 
157 |   def _get_policy(self, policy=None):
158 |     return None
159 | 
160 | 
161 | def strip_mock_query(q):
162 |   # strip off the id
163 |   # note the query may be spread over multiple lines
164 |   # but the directives should be on the first one.
165 |   k = re.sub(' (id=\S+) ', ' ', q, count=1)
166 | 
167 |   # exception cases
168 |   # timestamp stuff generated when creating mock responses is fine when it
169 |   # comes time to replay, but mql does a scope query in realtime, not sure why
170 |   # TODO(bneutra): why must MQL do this?
171 |   p = re.compile('timestamp\>20\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d+ ')
172 |   if re.search(p, k):
173 |     logging.debug('we saw a timestamp in the query %s', k)
174 |   k = re.sub(p, 'timestamp>2010-09-23T00:00:00.000001 ', k)
175 | 
176 |   h = hashlib.sha1()
177 |   h.update(k)
178 |   hsh = h.hexdigest()
179 |   return k, hsh
180 | 


--------------------------------------------------------------------------------
/formats/contenttype.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | """
 17 | routines for working with content-type headers
 18 | and other sources of media_types and text_encodings.
 19 | 
 20 | """
 21 | 
 22 | import cgi
 23 | from mw.formats.uniqstr import UniqueStr
 24 | from mw.util import keyquote
 25 | 
 26 | class MediaType(UniqueStr):
 27 |     """
 28 |     this looks like an ordinary python str containing a media-type.
 29 |     it has some extra methods on it that are useful for the metaweb.
 30 |     """
 31 | 
 32 |     _valid_part0 = ('application', 'audio', 'image', 'message', 'model', 'multipart',
 33 |                     'text', 'text_encoding', 'video')
 34 | 
 35 |     @property
 36 |     def id(self):
 37 |         """the id property holds the metaweb id: value  """
 38 |         return '/media_type/%s' % '/'.join(keyquote.quotekey(part)
 39 |                                            for part in self.split('/'))
 40 | 
 41 |     metaweb_type = '/common/media_type'
 42 |     
 43 |     type = property(lambda self: str(self).split('/')[0].strip())
 44 |     subtype = property(lambda self: str(self).split('/')[1].strip())
 45 | 
 46 |     @classmethod
 47 |     def normalize(cls, s):
 48 |         s = UniqueStr.normalize(s)
 49 | 
 50 |         if len(s) > 128:
 51 |             raise ValueError('invalid media type "%s"' % s)
 52 | 
 53 |         parts = s.lower().split('/')
 54 |         if len(parts) != 2:
 55 |             raise ValueError('invalid media type "%s"' % s)
 56 | 
 57 |         if parts[0] not in cls._valid_part0:
 58 |             raise ValueError('invalid media type "%s"' % s)
 59 | 
 60 |         return s
 61 | 
 62 |     ###################################################
 63 | 
 64 |     @classmethod
 65 |     def from_id(cls, id):
 66 |         if id is None:
 67 |             return None
 68 |         assert id.startswith('/media_type/')
 69 |         idpath = id[len("/media_type/"):]
 70 | 
 71 |         return keyquote.unquote_id(idpath)
 72 | 
 73 | class TextEncoding(UniqueStr):
 74 |     """
 75 |     canonicalized text encoding string.
 76 | 
 77 |     # see http://WWW.IANA.ORG/assignments/character-sets
 78 |     """
 79 | 
 80 |     metaweb_type = '/common/text_encoding'
 81 | 
 82 |     @property
 83 |     def id(self):
 84 |         """the id property holds the metaweb id: value """
 85 |         return '/media_type/text_encoding/%s' % keyquote.quotekey(self.lower())
 86 | 
 87 |     @property
 88 |     def codec(self):
 89 |         """the codec property holds the python codec"""
 90 |         return self._codec
 91 | 
 92 |     @codec.setter
 93 |     def codec(self, value):
 94 |         self._codec = value
 95 | 
 96 |     @classmethod
 97 |     def normalize(cls, s):
 98 |         s = UniqueStr.normalize(s)
 99 | 
100 |         # XXX check for valid token
101 | 
102 |         if len(s) > 20:
103 |             raise ValueError, 'invalid charset "%s"' % s
104 | 
105 |         # STANDARDS PEOPLE DIG ALL CAPS.
106 |         return s.upper()
107 | 
108 |     @classmethod
109 |     def from_id(cls, id):
110 |         if id is None:
111 |             return None
112 | 
113 |         # better be ASCII, but make sure it's not unicode
114 |         id = str(id)
115 |         # XXX this is a bad namespace location!
116 |         assert id.startswith('/media_type/text_encoding/')
117 |         idpath = id[len('/media_type/text_encoding/'):]
118 |         return cls(keyquote.unquotekey(idpath))
119 | 
120 | 
121 | #
122 | #  for now we list (and preload) some text encoding names.
123 | #
124 | 
125 | # some well-known text-encodings
126 | #  official names from http://www.iana.org/assignments/character-sets
127 | #  python codec names are at .../lib/standard-encodings.html
128 | ascii = TextEncoding('us-ascii')
129 | ascii.addalias('ascii')
130 | ascii.codec = 'ascii'
131 | 
132 | utf8 = TextEncoding('utf-8')
133 | utf8.codec = 'utf_8'
134 | 
135 | utf16 = TextEncoding('utf-16')
136 | utf16.codec = 'utf_16'
137 | 
138 | # XXX fill in the rest of the character sets we care about and
139 | #  then turn on _exclusive
140 | #TextEncoding._exclusive = True
141 | 
142 | 
143 | def ContentType(value):
144 |     mt, params = cgi.parse_header(value)
145 |     mt = MediaType(mt)
146 | 
147 |     charset = params.get('charset')
148 |     if charset is not None:
149 |         # XXX whatever this is for, it's ugly...
150 |         charset = charset.replace("'", '')
151 |         te = TextEncoding(charset)
152 |     else:
153 |         te = None
154 | 
155 |     return (mt, te)
156 | 
157 | class LanguageCode(UniqueStr):
158 |     """
159 |     normalized language code string.
160 | 
161 |     mumble rfc-3066 inspired but more about common
162 |     practice and the content we have.
163 | 
164 |     normalization may do surprising things.
165 |     "en-US" gets normalized to "en".
166 |     """
167 | 
168 |     metaweb_type = '/type/lang'
169 | 
170 |     @property
171 |     def id(self):
172 |         """the id property holds the metaweb id: value """
173 |         return '/lang/%s' % keyquote.quotekey(self)
174 | 
175 |     @classmethod
176 |     def normalize(cls, s):
177 |         s = UniqueStr.normalize(s)
178 | 
179 |         if len(s) > 20:
180 |             raise ValueError, 'invalid language code "%s"' % s
181 | 
182 |         # XXX for now we accept but do not require a leading '/lang/'
183 |         #  choose one, i think.
184 |         if s.startswith('/lang/'):
185 |             s = s[len('/lang/'):]
186 | 
187 |         # cut off anything following '-' (e.g. "en-US" -> "en")
188 |         # XXX this should be specified and documented
189 |         return s.split('-', 1)[0]
190 | 
191 |     @classmethod
192 |     def from_id(cls, id):
193 |         if id is None:
194 |             return None
195 | 
196 |         # better be ASCII, but make sure it's not unicode
197 |         id = str(id)
198 |         assert id.startswith('/lang/')
199 |         return cls(keyquote.unquotekey(id[len('/lang/'):]))
200 | 


--------------------------------------------------------------------------------
/emql/apikeys.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import hmac, hashlib
 16 | 
 17 | null = None
 18 | from mw.user.sqlmodel import mwOAuthProviderToken, get_sql_connection
 19 | from sqlobject import AND, IN
 20 | 
 21 | def get_context(mss):
 22 |     """
 23 |     Get a unique string representing the combined user/app
 24 |     context.
 25 | 
 26 |     Note that this depends on mss.authenticate() having been
 27 |     called, if appropriate. This allows the context to be null if the
 28 |     call did not require authentication.
 29 |     """
 30 |     user_id = mss.get_user_id() or ''
 31 |     app_id = mss.get_app_id() or ''
 32 | 
 33 |     if not user_id and not app_id:
 34 |         return None
 35 | 
 36 |     # user_id&app_id, user_id&, or &app_id
 37 |     context = "%s&%s" % (user_id, app_id)
 38 | 
 39 |     # hmac-sha1 just like oauth
 40 |     magic_secret = "Sup3rAuth3nticated!eMQL"
 41 |     signed_context = hmac.new(magic_secret, context, hashlib.sha1).hexdigest()
 42 | 
 43 |     return signed_context
 44 | 
 45 | def get_extension_api_query(extension_id=None, optional=False):
 46 |     result = [{"id": null,
 47 |                "type": "/freebase/foreign_api",
 48 |                "consumer_token": {"id": null,
 49 |                                   "optional": True},
 50 |                "access_token": {"id": null,
 51 |                                 "optional": True},
 52 |                "api_keys": [{
 53 |                    "id": null,
 54 |                    "optional": True
 55 |                    }]
 56 |                }]
 57 |     if extension_id:
 58 |         result[0]["extension"] = {"id": extension_id}
 59 | 
 60 |     if optional:
 61 |         result[0]["optional"] = True
 62 | 
 63 |     return result
 64 |         
 65 | 
 66 | def get_api_keys(mss, extension_id, apis=None):
 67 |     """
 68 |     For a given extension, get all the API keys out of the database
 69 | 
 70 |     `apis` is the result of something like get_extension_api_query() -
 71 |     if you don't provide it then mqlread will be run to fill it in for
 72 |     the given extension_id
 73 |     """
 74 | 
 75 |     # get a list of all keys that this extension needs, grouped by API
 76 |     # (because, in fact, an extension might use APIs that share
 77 |     # overlapping keys)
 78 | 
 79 |     if apis is None:
 80 |         q = get_extension_api_query(extension_id, optional=False)
 81 |         apis = mss.mqlread(q)
 82 | 
 83 |     if not apis:
 84 |         return None
 85 |     
 86 |     # ok, now authenticate
 87 |     mss.authenticate()
 88 |     context = get_context(mss)
 89 | 
 90 |     # to fetch them from the database, we want a flat list of all unique ids
 91 |     all_keys = set()
 92 |     for api in apis:
 93 |         for api_key in api["api_keys"]:
 94 |             all_keys.add(api_key)
 95 |         if api["access_token"]:
 96 |             all_keys.add(api["access_token"]["id"])
 97 |         if api["consumer_token"]:
 98 |             all_keys.add(api["consumer_token"]["id"])
 99 | 
100 |     conn = get_sql_connection(mss)
101 | 
102 |     # now query the provider database for all of these specific keys
103 |     foreign_key_list = mwOAuthProviderToken.select(
104 |         AND(mwOAuthProviderToken.q.context == context,
105 |             IN(mwOAuthProviderToken.q.apiKeyId, all_keys)),
106 |         connection=conn
107 |         )
108 | 
109 |     # generate a map of id->key data so we can access it below
110 |     foreign_keys = {}
111 |     for foreign_key in foreign_key_list:
112 |         info = {
113 |             "id" : foreign_key.apiKeyId,
114 |             "key": foreign_key.key
115 |             }
116 |         if foreign_key.secret:
117 |             info["secret"] = foreign_key.secret
118 |             
119 |         foreign_keys[foreign_key.apiKeyId] = info
120 | 
121 |     # now generate a datastructure similar to the mqlread
122 |     # something like
123 |     # [{ "id": "/netflix/queue_info",
124 |     #    "consumer_token": {
125 |     #        "id": "/netflix/consumer_token",
126 |     #        "key": "ccc",
127 |     #        "secret": "secretccc",
128 |     #    },
129 |     #    "access_token": {
130 |     #        "id": "/netflix/access_token",
131 |     #        "key": "aaa",
132 |     #        "secret": "secretaaa",
133 |     #    },
134 |     #  },
135 |     #  { "id": "/netflix/movie_info",
136 |     #    "consumer_token": {
137 |     #        "id": "/netflix/consumer_token",
138 |     #        "key": "ccc",
139 |     #        "secret": "secretccc",
140 |     #    },
141 |     #    "api_keys": [{
142 |     #        "id": "/netflix/affiliate_code",
143 |     #        "key": "fff"
144 |     #     }]
145 |     #  }]
146 |         
147 |     api_manifest = []
148 |     for api in apis:
149 |         api_info = {"id": api["id"]}
150 |         api_manifest.append(api_info)
151 | 
152 |         for special_key in ("consumer_token", "access_token"):
153 |             if api.get(special_key):
154 |                 # map "consumer_token" to "/netflix/consumer_token"
155 |                 special_key_id = api[special_key]["id"]
156 |                 
157 |                 # even if we dont' have the key, include dummy entry
158 |                 # meaning that the API requires the key
159 |                 api_info[special_key] = {
160 |                     "id": special_key_id
161 |                     }
162 |                 if special_key_id in foreign_keys:
163 |                     # key and secret MUST be there
164 |                     foreign_key = foreign_keys[special_key_id]
165 |                     api_info[special_key]["key"] = foreign_key["key"]
166 |                     api_info[special_key]["secret"] = foreign_key["secret"]
167 |             
168 |         for api_key in api["api_keys"]:
169 |             api_key_id = api_key["id"]
170 | 
171 |             # put a dummy entry in, meaning the API requires/expects
172 |             # the key
173 |             api_key_info = {
174 |                 "id": api_key_id,
175 |                 }
176 |             api_info.setdefault("api_keys",[]).append(api_key_info)
177 |             
178 |             if api_key_id in foreign_keys:
179 |                 
180 |                 foreign_key = foreign_keys[api_key_id]
181 |                 
182 |                 if foreign_key.get("key"):
183 |                     api_key_info["key"] = foreign_key["key"]
184 |                     
185 |                 if foreign_key.get("secret"):
186 |                     api_key_info["secret"] = foreign_key["secret"]
187 | 
188 |     return api_manifest
189 | 


--------------------------------------------------------------------------------
/api/op.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import os
 16 | from mw.log import LOG
 17 | import logging
 18 | 
 19 | from optparse import OptionParser
 20 | from ConfigParser import ConfigParser, NoSectionError, NoOptionError
 21 | from mw.user.cache import get_user_by_name
 22 | 
 23 | class OP(OptionParser):
 24 |     def __init__(self, *args, **kws):
 25 |         usage = kws.get('usage','')
 26 |         kws['usage'] = "%%prog  [-d] [-g HOST:PORT] %s [...]" % usage
 27 |         OptionParser.__init__(self, *args, **kws)
 28 | 
 29 |         config_file = None
 30 |         if 'ME_SITE_CONFIG' in os.environ:
 31 |             config_file = os.environ['ME_SITE_CONFIG']
 32 |             if not os.path.exists(config_file):
 33 |                 config_file = None
 34 | 
 35 |             
 36 |         if config_file == None:
 37 |             # default look in me/mwbuild/_site.cfg
 38 |             config_file = os.path.abspath(os.path.join(os.path.dirname(__file__),
 39 |                                                        '../../../mwbuild/_site.cfg'))
 40 | 
 41 |             # walk up the directory structure, stopping at project.mw4
 42 |             # (i.e. the root of whatever project we're in)
 43 |             path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 44 |             config_file = os.path.join(path, "_site.cfg")
 45 |             
 46 |             while (not os.path.exists(config_file) and
 47 |                    not os.path.exists(os.path.join(path, "project.mw4"))):
 48 |                 path = os.path.abspath(os.path.join(path, ".."))
 49 |                 config_file = os.path.join(path, "_site.cfg")
 50 |                 
 51 |             if not os.path.exists(config_file):
 52 |                 config_file = None
 53 | 
 54 | 
 55 |         self.add_option('-c', '--config', dest='config_file',
 56 |                         default=config_file,
 57 |                         help="location of _site.cfg with graph configuration")
 58 | 
 59 |         self.add_option('-d', '--debug', dest='debug',
 60 |                         default=False, action='store_true',
 61 |                         help="turn on debugging output")
 62 | 
 63 |         self.add_option('-l', '--loglevel', dest='loglevel',
 64 |                         default='WARNING', action='store',
 65 |                         help="set the log level")
 66 |         self.add_option('-g', '--graph', dest='graphd_addr',
 67 |                         metavar="HOST:PORT",
 68 |                         help="address of graphd in the form host:port")
 69 |         self.add_option('-b', '--blob', dest='blobd_addr',
 70 |                         metavar="HOST:PORT",
 71 |                         help="address of blobd in the form host:port")
 72 |         self.add_option('-D', '--define', dest='defines',
 73 |                         default=[], action='append',
 74 |                         help='override other site.cfg options in the form section.entry=value')
 75 |         self.add_option("-a", "--as_user", dest="as_user",
 76 |                         metavar="/user/USERID",
 77 |                         help="User ID to write with")
 78 | 
 79 |         self.add_option("-r", "--relevance", dest="relevance_addr",
 80 |                         metavar="HOST:PORT",
 81 |                         help="host:port of relevance server")
 82 |         self.add_option("-s", "--geo", dest="geo_addr",
 83 |                         metavar="HOST:PORT",
 84 |                         help="host:port of geo server")
 85 | 
 86 |         self.add_option("-T", "--no_timeouts", dest="no_timeouts",
 87 |                         default=False, action='store_true',
 88 |                         help="turn off socket timeouts (off by default)")
 89 | 
 90 |     def parse_args(self, *args, **kws):
 91 |         # this is an all-in-one function. It parses the args, loads the config and creates the session.
 92 |         # most of the time in simple scripts you don't need any more control than this.
 93 | 
 94 |         options, args = self.parse_args_only(*args,**kws)
 95 | 
 96 |         config = self.load_config(options)
 97 | 
 98 |         self.create_session(config,options)
 99 | 
100 |         return (options, args)
101 | 
102 |     def parse_args_only(self, *args, **kws):
103 |         # this strictly parses the args without loading the config or creating the session
104 |         return OptionParser.parse_args(self, *args, **kws)
105 | 
106 |     def load_config(self,options):
107 |         # this loads the configuration file without attempting to connect to any services
108 | 
109 |         from paste.deploy import appconfig
110 | 
111 |         config = {}
112 |         if options.config_file is not None:
113 |             LOG.debug("parse.args", "Trying to open %s" % options.config_file)
114 |             try:
115 |                 config = appconfig("config:%s" % options.config_file)
116 |             except LookupError as e:
117 |                 LOG.debug("parse.args", "Error loading config file, missing paste sections", options.config_file, e)
118 |                 # fall through
119 | 
120 |         for k,v in (li.split('=', 1)
121 |                     for li in options.defines):
122 |             config[k] = v
123 | 
124 |         loglevels = 'EMERG ALERT CRIT ERR WARNING NOTICE INFO DEBUG'.split()
125 |         if options.loglevel in loglevels:
126 |             LOG.setLevel(logging.getLevelName(options.loglevel))
127 |         else:
128 |             self.error('unknown log level %s\n  valid log levels are %s'
129 |                      % (options.loglevel, ', '.join(loglevels)))
130 |             sys.exit(1)
131 | 
132 |         # go through the config file for these options, keeps things
133 |         # simple
134 |         if options.graphd_addr:
135 |             config["graphd.address"] = options.graphd_addr
136 | 
137 |         if options.blobd_addr:
138 |             config["clobd.address"] = options.blobd_addr
139 |             config["clobd.masteraddress"] = options.blobd_addr
140 | 
141 |         if options.relevance_addr:
142 |             config["relevance.address"] = options.relevance_addr
143 | 
144 |         if options.geo_addr:
145 |             config["geo.address"] = options.geo_addr
146 | 
147 |         if options.no_timeouts:
148 |             config["debug.no_timeouts"] = options.no_timeouts and 'true'
149 | 
150 |         self.config = config
151 |         return config
152 | 
153 |     def create_session(self,config,options):
154 |         # this opens the connections to services
155 | 
156 |         from mw.api.service import ServiceContext, Session
157 |         self.ctx = ServiceContext()
158 | 
159 |         self.ctx.load_config(config)
160 |         self.ctx.connect()
161 | 
162 |         self.session = Session(self.ctx)
163 | 
164 |         # do further configuration of Session
165 | 
166 |         self.session.finish_init()
167 | 
168 |         if options.as_user:
169 |             if not options.as_user.startswith("/user/"):
170 |                 raise Exception("User must be in the form /user/USERID")
171 |             user_name = options.as_user[len("/user/"):]
172 |             self.session.push_variables(user=options.as_user)
173 |             self.session._signed_user = get_user_by_name(user_name)
174 |             self.session.get_user().validate(self.session)
175 | 
176 |         return self.session
177 | 


--------------------------------------------------------------------------------
/mql/pathexpr.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | #
 16 | #  code for dealing with metaweb path expressions
 17 | #
 18 | #  there will be a lot of variants of this language based
 19 | #    on embedding, it would be nice to have them all abstracted
 20 | #    out at some point but for now we're still figuring out
 21 | #    what the differences are.
 22 | #
 23 | 
 24 | import sys, os, re
 25 | 
 26 | if __name__ == '__main__':
 27 |   sys.path.append(os.path.abspath('../..'))
 28 | 
 29 | from pymql.log import LOG
 30 | from error import MQLInternalError
 31 | 
 32 | from pymql import json
 33 | from pymql.error import EmptyResult, ParameterizedError
 34 | 
 35 | 
 36 | class JSONResponse(object):
 37 | 
 38 |   def __init__(self, **kws):
 39 |     self.response = {
 40 |         'status': '500 Internal Server Error',
 41 |         'code': '/api/status/error/server',
 42 |         'messages': []
 43 |     }
 44 |     self.extend(**kws)
 45 | 
 46 |   def extend(self, **kws):
 47 |     for k, v in kws.iteritems():
 48 |       if k == 'messages':
 49 |         self.response[k] += v
 50 |       else:
 51 |         self.response[k] = v
 52 | 
 53 |   def log(self, text, **kws):
 54 |     kws['message'] = text
 55 |     self.response['messages'].append(kws)
 56 | 
 57 | 
 58 | #
 59 | #
 60 | #  snipped from mod_python 3.1.3 apache.py
 61 | #
 62 | #   - modified to format result as a json-like structure.
 63 | #
 64 | import traceback
 65 | 
 66 | 
 67 | def json_traceback(response=None, exception=None, **kws):
 68 |   """
 69 |     This function is only used when debugging is on.
 70 |     It sends the output similar to what you'd see
 71 |     when using Python interactively to the browser
 72 |     """
 73 | 
 74 |   debug = 1
 75 |   etype, evalue, etb = sys.exc_info()
 76 | 
 77 |   try:  # try/finally
 78 |     try:  # try/except
 79 | 
 80 |       if debug and etype is IOError and str(evalue)[:5] == 'Write':
 81 |         # if this is an IOError while writing to client,
 82 |         # it is probably better not to try to write to the cleint
 83 |         # even if debug is on.
 84 |         LOG.error('json_traceback', 'skipping error write to client')
 85 |         debug = 0
 86 | 
 87 |       # write to log
 88 |       for e in traceback.format_exception(etype, evalue, etb):
 89 |         s = '%s' % e[:-1]
 90 |         LOG.error('json_traceback', s)
 91 | 
 92 |       if response is None:
 93 |         response = JSONResponse(
 94 |             status='500 Internal Server Error', code='/api/status/error/server')
 95 |       response.extend(**kws)
 96 | 
 97 |       stack = [
 98 |           dict(zip('file,line,func,source'.split(','), quad))
 99 |           for quad in traceback.extract_tb(etb, None)
100 |       ]
101 | 
102 |       text = '%s: %s' % (etype, evalue)
103 |       response.log(text, stack=stack, level='error')
104 | 
105 |       return response.response
106 | 
107 |     except Exception, e:
108 |       # hit the backstop.  must be a bug in the normal exception handling code,
109 |       #  do something simple.
110 |       response = {
111 |           'status': '500 Internal Server Error',
112 |           'messages': [{
113 |               'level': 'error',
114 |               'text': traceback.format_exc()
115 |           }],
116 |       }
117 |       return response
118 | 
119 |   finally:
120 |     # erase the traceback
121 |     etb = None
122 | 
123 | 
124 | def wrap_query(querier, sq, varenv=None, transaction_id=None):
125 |   """
126 |     Run a query with the given querier (usually something like
127 |     ctx.low_querier.read) - performing appropriate envelope packing and
128 |     unpacking, multiple queries, error handling, etc
129 |     """
130 | 
131 |   LOG.error(
132 |       'deprecated',
133 |       'mw.mql.pathexpr.wrap_query() is DEPRECATED and will go away soon!')
134 | 
135 |   if isinstance(sq, basestring):
136 |     # convert to json query
137 |     try:
138 |       # XXX should eventually use unicode, for now utf8
139 |       sq = json.loads(sq, encoding='utf-8', result_encoding='utf-8')
140 | 
141 |     except ValueError, e:
142 |       # debug ME-907
143 |       LOG.exception('mql.pathexpr.wrap_query()', sq=sq, varenv=varenv)
144 | 
145 |       SIMPLEJSON_ERR_RE = re.compile('^(.+): line (\d+) column (\d+)')
146 |       m = SIMPLEJSON_ERR_RE.match(str(e))
147 |       if not m:
148 |         raise
149 |       response = JSONResponse(
150 |           status='400 Bad Request', code='/api/status/error/request')
151 |       text = 'json parse error: ' + m.group(1)
152 |       response.log(
153 |           text, line=int(m.group(2)), column=int(m.group(3)), level='error')
154 |       return response.response
155 | 
156 |     except Exception, e:
157 |       return json_traceback(
158 |           exception=e,
159 |           status='400 Bad Request',
160 |           code='/api/status/error/request')
161 | 
162 |   if not isinstance(sq, dict):
163 |     response = JSONResponse(
164 |         status='400 Bad Request', code='/api/status/error/request')
165 |     text = 'json type error: query was not a dictionary'
166 |     response.log(text, level='error')
167 |     return response.response
168 | 
169 |   if varenv is None:
170 |     varenv = {}
171 | 
172 |   # backwards compatibility until we remove the transaction_id parameter
173 |   if 'tid' not in varenv:
174 |     varenv['tid'] = transaction_id
175 | 
176 |   if 'cursor' in sq:
177 |     varenv['cursor'] = sq['cursor']
178 | 
179 |   try:
180 |     # should be JSONResponse(query=sq['query']) 'queries' to match
181 |     # envelope spec
182 |     response = JSONResponse(query=sq)
183 |     results = {}
184 | 
185 |     # filter out these special keys for now - eventually some of
186 |     # these will be filled in by the caller but only if we trust
187 |     # them!
188 |     reserved_names = ('request_id', 'cost', 'lang', 'transaction_id',
189 |                       'permission', 'cursor', 'user')
190 | 
191 |     valid_queries = (
192 |         (k, v) for k, v in sq.iteritems() if k not in reserved_names)
193 | 
194 |     # make sure to copy the request_id
195 |     if 'request_id' in sq:
196 |       response['request_id'] = sq['request_id']
197 | 
198 |     # should only looking either at sq['query'] for a single query or
199 |     # sq['queries'] for multiple queries
200 |     for id, subq in valid_queries:
201 |       # assuming querier is a bound method here..
202 |       LOG.notice(
203 |           'Query',
204 |           '%s.%s' % (querier.im_class.__name__, querier.__name__),
205 |           subq=subq)
206 |       try:
207 |         results[id] = querier(subq, varenv)
208 | 
209 |         response.extend(status='200 OK')
210 | 
211 |       except EmptyResult, e:
212 |         LOG.info('emptyresult', '%s' % e)
213 |         response.log('empty result for query %s' % subq)
214 |         result = None
215 | 
216 |       # exceptions should be packed into response['error']
217 |       except ParameterizedError, e:
218 |         if isinstance(e, MQLInternalError):
219 |           response.extend(status='500 Internal Server Error')
220 |         else:
221 |           response.extend(status='400 Bad Request')
222 | 
223 |         tb = json_traceback(response=response, exception=e)
224 |         response.log('parse exception: %s' % e, level='error')
225 |         result = None
226 |       except Exception, e:
227 |         LOG.exception('python.exception')
228 |         tb = json_traceback(response=response, exception=e)
229 |         return tb
230 | 
231 |     response.extend(result=results)
232 |     if 'cursor' in varenv:
233 |       response.extend(cursor=varenv['cursor'])
234 | 
235 |     return response.response
236 | 
237 |   except Exception, e:
238 |     LOG.exception('python.exception')
239 |     return json_traceback(response=response, exception=e)
240 | 


--------------------------------------------------------------------------------
/util/pattern.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | import re, zlib
 17 | from urlparse import urlparse
 18 | 
 19 | RE_KEY = re.compile('\$([0-9A-F][0-9A-F][0-9A-F][0-9A-F])')
 20 | RE_VARS = re.compile('{([^}]+)}')
 21 | RE_NS = re.compile('([^]]+)\[([^]]+)\]')
 22 | 
 23 | 
 24 | class Pattern(object):
 25 | 
 26 |     def __init__(self, pattern, guid=None, key=None, error=None):
 27 | 
 28 |         self.pattern = pattern
 29 |         self.guid = guid
 30 |         self.vars = dict((var, None) for var in RE_VARS.findall(pattern))
 31 |         self.error = error
 32 |         if 'key' in self.vars:
 33 |             self.vars['key'] = self.decode_key(key)
 34 | 
 35 |     # returns a utf-8 encoded string of the pattern with variables
 36 |     # whose value is not None expanded.
 37 |     # if error is not None, the entire pattern is replaced with error 
 38 |     # when a None variable value is encountered
 39 |     def __str__(self):
 40 | 
 41 |         string = self.pattern
 42 |         error = self.error
 43 | 
 44 |         if isinstance(string, unicode):
 45 |             for var, value in self.vars.iteritems():
 46 |                 if value is not None:
 47 |                     if isinstance(value, str):
 48 |                         value = unicode(value, 'utf-8')
 49 |                     elif not isinstance(value, unicode):
 50 |                         value = unicode(value)
 51 |                     string = string.replace(u'{%s}' %(var), value)
 52 |                 elif error is not None:
 53 |                     if isinstance(error, str):
 54 |                         string = unicode(error, 'utf-8')
 55 |                     elif not isinstance(error, unicode):
 56 |                         string = unicode(error)
 57 |                     else:
 58 |                         string = error
 59 |                     break
 60 |             string = string.encode('utf-8')
 61 |         else:
 62 |             for var, value in self.vars.iteritems():
 63 |                 if value is not None:
 64 |                     if isinstance(value, unicode):
 65 |                         value = value.encode('utf-8')
 66 |                     elif not isinstance(value, str):
 67 |                         value = str(value)
 68 |                     string = string.replace('{%s}' %(var), value)
 69 |                 elif error is not None:
 70 |                     if isinstance(error, unicode):
 71 |                         string = error.encode('utf-8')
 72 |                     elif not isinstance(error, str):
 73 |                         string = str(error)
 74 |                     else:
 75 |                         string = error
 76 |                     break
 77 | 
 78 |         return string
 79 | 
 80 |     # returns a unicode string of the pattern with variables
 81 |     # whose value is not None expanded.
 82 |     # if error is not None, the entire pattern is replaced with error 
 83 |     # when a None variable value is encountered
 84 |     def __unicode__(self):
 85 | 
 86 |         string = self.pattern
 87 |         error = self.error
 88 | 
 89 |         if isinstance(string, unicode):
 90 |             for var, value in self.vars.iteritems():
 91 |                 if value is not None:
 92 |                     if isinstance(value, str):
 93 |                         value = unicode(value, 'utf-8')
 94 |                     elif not isinstance(value, unicode):
 95 |                         value = unicode(value)
 96 |                     string = string.replace(u'{%s}' %(var), value)
 97 |                 elif error is not None:
 98 |                     if isinstance(error, str):
 99 |                         string = unicode(error, 'utf-8')
100 |                     elif not isinstance(error, unicode):
101 |                         string = unicode(error)
102 |                     else:
103 |                         string = error
104 |                     break
105 |         else:
106 |             for var, value in self.vars.iteritems():
107 |                 if value is not None:
108 |                     if isinstance(value, unicode):
109 |                         value = value.encode('utf-8')
110 |                     elif not isinstance(value, str):
111 |                         value = str(value)
112 |                     string = string.replace('{%s}' %(var), value)
113 |                 elif error is not None:
114 |                     if isinstance(error, unicode):
115 |                         string = error.encode('utf-8')
116 |                     elif not isinstance(error, str):
117 |                         string = str(error)
118 |                     else:
119 |                         string = error
120 |                     break
121 |             string = unicode(string, 'utf-8')
122 | 
123 |         return string
124 | 
125 |     def decode_key(self, key):
126 | 
127 |         value = key
128 |         if value is not None:
129 |             value = RE_KEY.sub('\\u\\1', value)
130 |             if value is not key:
131 |                 value = value.decode('unicode-escape').encode('utf-8')
132 | 
133 |         return value
134 | 
135 |     def _prop_name(self, prefix, var, prop):
136 | 
137 |         # use adler32 as it's shorter than hash on 64-bit and just as fast
138 |         return "%s_%x:%s" %(prefix or "p", zlib.adler32(var) & 0xffffffff, prop)
139 | 
140 |     def mql_query(self, prefix=None):
141 | 
142 |         query = {}
143 |         for var, value in self.vars.iteritems():
144 |             if var != 'key' and value is None:
145 |                 _query = prev = query
146 |                 for prop in var.split('.'):
147 |                     nsprop = RE_NS.search(prop)
148 |                     if nsprop is not None:
149 |                         prop, ns = nsprop.groups()
150 |                         prop = self._prop_name(prefix, var, prop)
151 |                         _query[prop] = {
152 |                             "key": [{
153 |                                 "limit": 1, "namespace": ns, "value": None
154 |                             }]
155 |                         }
156 |                         break
157 |                     else:
158 |                         prop = self._prop_name(prefix, var, prop)
159 |                         _query[prop] = [{"limit": 1}]
160 |                         prev = _query
161 |                         _query = _query[prop][0]
162 |                 else:
163 |                     # last prop is assumed to be prop: null compatible
164 |                     # so that name or literal queries require no hacks
165 |                     prev[prop] = None
166 | 
167 |         if query:
168 |             query["guid"] = self.guid
169 | 
170 |         return query
171 | 
172 |     def set_key(self, key):
173 | 
174 |         if 'key' in self.vars:
175 |             self.vars['key'] = self.decode_key(key)
176 | 
177 |         return self
178 | 
179 |     def set_mqlres(self, mqlres, prefix=None, clear=False):
180 | 
181 |         if clear:
182 |             for var in self.vars.iterkeys():
183 |                 if var != 'key': 
184 |                     self.vars[var] = None
185 | 
186 |         for var, value in self.vars.iteritems():
187 |             if var != 'key' and value is None:
188 |                 value = mqlres
189 |                 for prop in var.split('.'):
190 |                     nsprop = RE_NS.search(prop)
191 |                     try:
192 |                         if nsprop is not None:
193 |                             prop, ns = nsprop.groups()
194 |                             prop = self._prop_name(prefix, var, prop)
195 |                             value = value[prop]['key'][0]['value']
196 |                             break
197 |                         else:
198 |                             prop = self._prop_name(prefix, var, prop)
199 |                             value = value[prop]
200 |                             if isinstance(value, list):
201 |                                 value = value[0]
202 |                     except:
203 |                         value = None
204 |                         break
205 |     
206 |                 self.vars[var] = value
207 | 
208 |         return self
209 | 
210 |     def set_uri(self, uri):
211 | 
212 |         vars = self.vars
213 |         (vars['scheme'], vars['host'], vars['path'], x,
214 |          vars['query'], vars['fragment']) = urlparse(uri)
215 | 
216 |         return self
217 | 


--------------------------------------------------------------------------------
/mql/benchmark.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import os, sys, re
 16 | if __name__ == "__main__":
 17 |   sys.path.append(os.path.abspath("../.."))
 18 | 
 19 | from pymql.log import LOG
 20 | from pymql import json
 21 | import time
 22 | 
 23 | try:
 24 |   import cProfile
 25 |   profiler = "cProfile"
 26 | except ImportError, e:
 27 |   import hotshot
 28 |   profiler = "hotshot"
 29 | 
 30 | 
 31 | def wildcard_mql_query():
 32 |   return [{"id": None, "*": None}]
 33 | 
 34 | 
 35 | def get_all_domains_mql_query():
 36 |   return [{
 37 |       "id":
 38 |           None,
 39 |       "name":
 40 |           None,
 41 |       "type":
 42 |           "/type/domain",
 43 |       "key": {
 44 |           "value": None,
 45 |           "namespace": "/"
 46 |       },
 47 |       "/type/namespace/keys": [{
 48 |           "value": None,
 49 |           "type": None,
 50 |           "namespace": {
 51 |               "type":
 52 |                   "/type/type",
 53 |               "name":
 54 |                   None,
 55 |               "id":
 56 |                   None,
 57 |               "domain":
 58 |                   None,
 59 |               "/type/namespace/keys": [{
 60 |                   "value": None,
 61 |                   "type": None,
 62 |                   "namespace": {
 63 |                       "type": "/type/property",
 64 |                       "unique": None,
 65 |                       "id": None,
 66 |                       "schema": None,
 67 |                       "expected_type": None,
 68 |                       "master_property": None,
 69 |                       "name": None,
 70 |                       "reverse_property": []
 71 |                   }
 72 |               }]
 73 |           }
 74 |       }]
 75 |   }]
 76 | 
 77 | 
 78 | def get_domain_mql_query():
 79 |   return {
 80 |       "id":
 81 |           "/type",
 82 |       "name":
 83 |           None,
 84 |       "type":
 85 |           "/type/domain",
 86 |       "/type/namespace/keys": [{
 87 |           "value": None,
 88 |           "type": None,
 89 |           "namespace": {
 90 |               "type":
 91 |                   "/type/type",
 92 |               "name":
 93 |                   None,
 94 |               "id":
 95 |                   None,
 96 |               "domain":
 97 |                   None,
 98 |               "/type/namespace/keys": [{
 99 |                   "value": None,
100 |                   "type": None,
101 |                   "namespace": {
102 |                       "type": "/type/property",
103 |                       "unique": None,
104 |                       "id": None,
105 |                       "schema": None,
106 |                       "expected_type": None,
107 |                       "master_property": None,
108 |                       "name": None,
109 |                       "reverse_property": []
110 |                   }
111 |               }]
112 |           }
113 |       }]
114 |   }
115 | 
116 | 
117 | def get_type_mql_query():
118 |   return {
119 |       "type": [],
120 |       "name":
121 |           None,
122 |       "id":
123 |           "/type/object",
124 |       "/type/type/domain":
125 |           None,
126 |       "/type/namespace/keys": [{
127 |           "value": None,
128 |           "type": None,
129 |           "namespace": {
130 |               "type": "/type/property",
131 |               "unique": None,
132 |               "id": None,
133 |               "schema": None,
134 |               "expected_type": None,
135 |               "master_property": None,
136 |               "name": None,
137 |               "reverse_property": []
138 |           }
139 |       }]
140 |   }
141 | 
142 | 
143 | def get_schema_query(guid):
144 |   return {
145 |       "@guid":
146 |           guid,
147 |       "is_instance_of": {
148 |           "@id": "/type/type"
149 |       },
150 |       "uses_properties_from": {
151 |           "@guid": None,
152 |           ":optional": True
153 |       },
154 |       "has_default_property_name": {
155 |           ":value": None,
156 |           ":optional": True
157 |       },
158 |       "has_key": [{
159 |           ":optional": True,
160 |           "@guid": None,
161 |           ":value": None,
162 |           "has_schema": {
163 |               "@guid": None,
164 |           },
165 |           "has_expected_concept_type": {
166 |               ":optional": True,
167 |               "@guid": None
168 |           },
169 |           "has_master_property": {
170 |               ":optional": True,
171 |               "@guid": None,
172 |               "is_unique_property": {
173 |                   ":value": None,
174 |                   ":datatype": "boolean",
175 |                   ":optional": True
176 |               }
177 |           },
178 |           "is_unique_property": {
179 |               ":value": None,
180 |               ":datatype": "boolean",
181 |               ":optional": True
182 |           },
183 |           "is_instance_of": {
184 |               "@id": "/type/property"
185 |           }
186 |       }]
187 |   }
188 | 
189 | 
190 | def get_object_query():
191 |   q = get_schema_query(None)
192 |   q["@id"] = "/type/object"
193 |   return q
194 | 
195 | 
196 | def get_domain_query():
197 |   ns_query = {
198 |       "@id": "/type",
199 |       "is_instance_of": {
200 |           "@id": "/type/domain"
201 |       },
202 |       "has_key": [get_schema_query(None)]
203 |   }
204 |   ns_query["has_key"][0][":value"] = None
205 |   ns_query["has_key"][0]["has_domain"] = {"@id": "/type"}
206 |   return ns_query
207 | 
208 | 
209 | def get_wildcard_query():
210 |   return [{
211 |       "@guid": None,
212 |       "*": [{
213 |           "@guid": None,
214 |           ":guid": None,
215 |           ":value": None,
216 |           ":optional": True
217 |       }]
218 |   }]
219 | 
220 | 
221 | def test_run(ctx, varenv, options, query):
222 |   graphq = ctx.gc
223 |   ctx.gc.reset_cost()
224 | 
225 |   #ctx.gc.reopen()
226 |   result = None
227 | 
228 |   start_time = time.time()
229 | 
230 |   for i in xrange(options.num):
231 |     if options.flush:
232 |       ctx.high_querier.schema_factory.flush("")
233 | 
234 |     if options.type == "graph":
235 |       result = ctx.gc.read(
236 |           query, transaction_id=varenv["tid"], policy=varenv["policy"])
237 |     else:
238 |       result = ctx.high_querier.read(query, varenv)
239 | 
240 |   stop_time = time.time()
241 | 
242 |   ctx.gc.totalcost["dt"] = stop_time - start_time
243 | 
244 |   return result
245 | 
246 | 
247 | def cmdline_main():
248 |   LOG.warning("benchmark", "test start")
249 |   start_time = time.time()
250 | 
251 |   from mql.mql import cmdline
252 |   op = cmdline.OP("testing")
253 | 
254 |   op.add_option(
255 |       "-n", dest="num", default=1000, type="int", help="number of iterations")
256 | 
257 |   op.add_option(
258 |       "-P",
259 |       dest="profile",
260 |       default=None,
261 |       help="run profiler with output to file")
262 | 
263 |   op.add_option("-c", dest="call", default=None, help="function to call")
264 | 
265 |   op.add_option(
266 |       "-f", dest="query_file", default=None, help="file containing query")
267 | 
268 |   op.add_option(
269 |       "--flush",
270 |       dest="flush",
271 |       default=None,
272 |       help="flush cache between every request")
273 | 
274 |   op.add_option("-t", dest="type", default="mql", help="graph or MQL query")
275 | 
276 |   options, args = op.parse_args()
277 | 
278 |   stop_time = time.time()
279 |   op.ctx.gc.totalcost["dt"] = stop_time - start_time
280 | 
281 |   LOG.warning("start cost", {
282 |       "nreqs": op.ctx.gc.nrequests,
283 |       "cost": op.ctx.gc.totalcost
284 |   })
285 | 
286 |   options, args = op.parse_args()
287 | 
288 |   queryfile = options.query_file
289 |   if queryfile is not None:
290 |     qf = open(queryfile, "r")
291 |     query = "".join(qf.readlines())
292 |     regex = re.compile("[\n\t]+")
293 |     query = regex.sub(" ", query)
294 |     qf.close()
295 |   elif options.call:
296 |     query = globals()[options.call]()
297 |   elif len(args) == 1:
298 |     query = args[0]
299 |   else:
300 |     op.error("Must specify a query argument")
301 | 
302 |   if options.type == "mql":
303 |     # XXX should eventually use unicode, for now utf8
304 |     query = json.loads(query, encoding="utf-8", result_encoding="utf-8")
305 |   elif options.type == "graph":
306 |     pass
307 |   else:
308 |     op.error("-t must be 'mql' or 'graph'")
309 | 
310 |   if options.profile:
311 |     if profiler == "hotshot":
312 |       profile = hotshot.Profile(options.profile)
313 |       profile.runcall(test_run, op.ctx, op.varenv, options, query)
314 |       LOG.warning(
315 |           "benchmark",
316 |           "Saving hotshot profile in Stats format to %s" % options.profile)
317 | 
318 |     elif profiler == "cProfile":
319 |       profile = cProfile.Profile()
320 |       profile.runcall(test_run, op.ctx, op.varenv, options, query)
321 | 
322 |       LOG.warning(
323 |           "benchmark",
324 |           "Saving cProfile data in kcachegrind format to %s" % options.profile)
325 |       # get from http://jcalderone.livejournal.com/21124.html
326 |       # and put in thirdparty/pyroot
327 |       from mql.mql import lsprofcalltree
328 |       k = lsprofcalltree.KCacheGrind(profile)
329 |       k.output(open(options.profile, "w"))
330 |     else:
331 |       LOG.warning("benchmark", "No profiler available, not running benchmark")
332 |   else:
333 |     reslist = test_run(op.ctx, op.varenv, options, query)
334 | 
335 |   LOG.warning("run cost", {
336 |       "nreqs": op.ctx.gc.nrequests,
337 |       "cost": op.ctx.gc.totalcost
338 |   })
339 |   #print repr(reslist[0])
340 |   #pprint.pprint(reslist)
341 | 
342 |   #LOG.warning("benchmark", "test finish")
343 | 
344 | 
345 | if __name__ == "__main__":
346 |   cmdline_main()
347 | 


--------------------------------------------------------------------------------
/mql/grparse.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google LLC
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """quick and dirty parsing of graphd query language strings into python lists.
 15 | """
 16 | import re
 17 | from grquoting import quote, unquote
 18 | 
 19 | from error import MQLGraphError, MQLDatelineInvalidError, MQLTimeoutError,\
 20 |     MQLCursorInvalidError, GraphIsSnapshottingError
 21 | 
 22 | from pymql.log import LOG
 23 | 
 24 | gstr_escape = quote
 25 | 
 26 | # there are several places in pymi where gstr_unescape is called on a string that
 27 | # is not escaped. One example is the result of result=(datatype) - a bareword
 28 | # like boolean is returned, not a quoted string.
 29 | #
 30 | # Rather than try and fix these cases, I've made gstr_unescape preserve
 31 | # this behaviour. Please use mw.mql.grquoting.unquote() instead.
 32 | 
 33 | 
 34 | def gstr_unescape(string):
 35 |   if string[0] == '"':
 36 |     return unquote(string)
 37 |   else:
 38 |     return string
 39 | 
 40 | 
 41 | cost_parameters = [
 42 |     ('tr', 'time/real',
 43 |      'number of milliseconds graphd spent executing to answer this query in '
 44 |      'general. This number will get larger on a system that is busy with other'
 45 |      ' things, even if graphd isn\'t involved in them.'
 46 |     ),
 47 |     ('tu', 'time/user',
 48 |      'number of milliseconds graphd spent executing in user mode while '
 49 |      'computing the answer to this request.'
 50 |     ),
 51 |     ('ts', 'time/system',
 52 |      'number of milliseconds graphd spent executing in system mode while '
 53 |      'computing the answer to this requests. "Executing in system mode" almost'
 54 |      ' always means "reading a lot of data from disk".'
 55 |     ),
 56 |     ('pr', 'page reclaims',
 57 |      'a benevolent form of page fault that doesn\'t actually do any work '
 58 |      'because the page is still in the local cache.'
 59 |     ),
 60 |     ('pf', 'page faults',
 61 |      'the thing we\'re trying to minimize. Higher pf will usually be '
 62 |      'accompanied by a higher ts.'
 63 |     ),
 64 |     ('dw', 'primitive data writes',
 65 |      'Usually, these will be what you expect, except for queries that create '
 66 |      'implicit type links and type system fragments.'
 67 |     ),
 68 |     ('dr', 'primitive data reads',
 69 |      'how many single primitive structs were read from disk (for example, as '
 70 |      'part of dismissing them as candiates for a qualified search).'
 71 |     ),
 72 |     ('in', 'index size reads',
 73 |      'how many indices were looked up with their starting address and size.'),
 74 |     ('ir', 'index element reads', 'get one member of one index.'),
 75 |     ('iw', 'index element write', 'add an element to an index.'),
 76 |     ('va', 'value allocation',
 77 |      'allocate a (possibly temporary or transient) result data structure.'),
 78 |     ('te', 'time/overall',
 79 |      'number of milliseconds from receipt of this query by the graph, to the '
 80 |      'start of sending the response'
 81 |     ),
 82 |     ('tg', 'time/graph',
 83 |      'time me observes from sending the first byte of the request to receiving'
 84 |      ' the last byte'
 85 |     ),
 86 |     ('tf', 'time/formatted',
 87 |      'time me takes from sending the request to handing off the formatted '
 88 |      'response'
 89 |     ), ('tm', 'time/mql', 'time taken inside the MQL subroutines'),
 90 |     ('cr', 'cache/read', 'number of requests sent to memcache'),
 91 |     ('cm', 'cache/miss', 'number of memcache misses'),
 92 |     ('ch', 'cache/hit', 'number of memcache hits'),
 93 |     ('lr', 'lojson-cache/read', 'number of schema requests sent to memcache'),
 94 |     ('lm', 'lojson-cache/miss', 'number of schema memcache misses'),
 95 |     ('lh', 'lojson-cache/hit', 'number of schema memcache hits'),
 96 |     ('rt', 'relevance/time',
 97 |      'time taken inside the relevance server (as measured by ME)'),
 98 |     ('gcr', 'graph connect retries',
 99 |      'the number of times that ME tried to open a connection to a graph'),
100 |     ('gqr', 'graph query retries',
101 |      'the number of times that ME tried to service a query from a single graph')
102 | ]
103 | 
104 | costcode_dict = dict([(cc[0], (cc[1], cc[2])) for cc in cost_parameters])
105 | 
106 | costitem_re = re.compile(r'([a-zA-Z]+)=(\d+)\s*')
107 | 
108 | 
109 | def coststr_to_dict(coststr):
110 |   if not coststr:
111 |     return None
112 |   matches = costitem_re.findall(coststr)
113 |   return dict([(k, int(v)) for k, v in matches])
114 | 
115 | 
116 | graphresult_re = re.compile(
117 |     r'(\(|\)| |\-\>|\<\-|[a-z]+\=|[\-\:\._A-Za-z0-9]+|\"(?:[^\"\\]|\\[\\\"n])*\")'
118 | )
119 | 
120 | 
121 | class GraphResult(list):
122 |   pass
123 | 
124 | 
125 | class ReplyParser:
126 |   """
127 |     parses a graphd reply char by char.
128 |       paren lists are broken up into python lists
129 |       all list elements are returned as strings
130 |     """
131 | 
132 |   def __init__(self):
133 |     self.inbuf = []
134 |     self.replyqueue = []
135 | 
136 |     self.reset_parser()
137 | 
138 |   def reset_parser(self):
139 |     # parser state
140 | 
141 |     self.instring = 0  # true if we have read an open " but no close
142 |     self.escaped = 0  # true if we just read a backslash
143 |     # if instring is 1, curstr is a list of characters that
144 |     #  will be joined to make the string
145 |     self.curstr = []
146 |     self.curreply = []  # list of strings - join when ready to
147 |     # use (faster than string concat)
148 | 
149 |   def parsestr(self, s):
150 |     if '\n' in s:
151 |       # parse all of the 'completed' lines, and if there is an
152 |       # uncompleted line at the end of s, leave it in curreply
153 | 
154 |       reply_list = s.split('\n')
155 | 
156 |       self.curreply.append(reply_list.pop(0))
157 | 
158 |       for reply in reply_list:
159 | 
160 |         # parse the previous reply
161 |         replystr = ''.join(self.curreply)
162 |         self.parse_full_reply(replystr)
163 |         self.reset_parser()
164 | 
165 |         # now add the current line
166 |         self.curreply.append(reply)
167 | 
168 |       # note that we're not processing the last line, because it is incomplete
169 | 
170 |     else:
171 |       self.curreply.append(s)
172 | 
173 |   def parse_full_reply(self, replystr):
174 |     """
175 |         parse the given reply string from the graph into a bunch of
176 |         nested lists of tokens. Results are in the form:
177 |         [ 'ok', 'id=', '"me;..."', [[['010000..', '01...', ...]]]]
178 |         """
179 |     LOG.debug('graph.result', replystr)
180 |     token_list = graphresult_re.findall(replystr)
181 | 
182 |     curlist = []
183 | 
184 |     stack = []
185 |     push_state = stack.append
186 |     pop_state = stack.pop
187 | 
188 |     for count, tok in enumerate(token_list):
189 |       if tok == '(':
190 |         push_state(curlist)
191 |         curlist = []
192 |       elif tok == ')':
193 |         sublist = curlist
194 |         curlist = pop_state()
195 |         curlist.append(sublist)
196 |       elif tok == '\n':
197 |         raise MQLGraphError(
198 |             None,
199 |             'Not allowed a newline in parse_full_reply',
200 |             reply=replystr,
201 |             tokens=token_list)
202 |       elif tok == ' ' or tok == '':
203 |         pass
204 |       else:
205 |         curlist.append(tok)
206 | 
207 |     LOG.debug('graph.result.parsed', 'Parsed %d tokens' % count)
208 |     if len(stack) != 0:
209 |       raise MQLGraphError(
210 |           None,
211 |           'got linefeed in the middle of a reply?',
212 |           reply=replystr,
213 |           tokens=token_list,
214 |           depth=len(stack))
215 | 
216 |     self.replyqueue.append(curlist)
217 | 
218 |   def get_reply_raw(self):
219 |     return self.replyqueue.pop(0)
220 | 
221 |   def get_reply(self):
222 |     l = self.get_reply_raw()
223 |     result = GraphResult()
224 |     result.status = l.pop(0)
225 |     result.cost = None
226 |     result.dateline = None
227 | 
228 |     if result.status == 'ok':
229 |       result += l.pop()
230 |     elif result.status == 'error':
231 |       result.errcode = l.pop(0)
232 |       result.errmsg = unquote(l.pop())
233 |     else:
234 |       raise MQLGraphError(
235 |           None, 'grparse: unknown graphd reply type', header=l[0], reply=l)
236 | 
237 |     # what's left is info messages from graphd
238 |     li = 0
239 |     while li < len(l):
240 |       rv = l[li]
241 |       if type(rv) == str and rv in ('cost=', 'dateline=', 'id='):
242 |         modifier = rv[:-1]
243 |         setattr(result, modifier, unquote(l[li + 1]))
244 |         li += 2
245 |       else:
246 |         raise MQLGraphError(
247 |             None,
248 |             'unknown response modifier from graphd',
249 |             header=l[li],
250 |             reply=l)
251 | 
252 |     if result.status == 'error' and result.errcode == 'BADCURSOR':
253 |       raise MQLCursorInvalidError(None, result.errmsg)
254 |     if result.status == 'error' and result.errcode == 'DATELINE':
255 |       raise MQLDatelineInvalidError(None, result.errmsg)
256 |     if result.status == 'error' and result.errcode == 'AGAIN':
257 |       raise GraphIsSnapshottingError(None, result.errmsg)
258 |     if result.status == 'error' and result.errcode == 'COST':
259 |       raise MQLTimeoutError(None, 'Query too difficult.', cost=result.cost)
260 |     if result.status == 'error' and result.errcode != 'EMPTY':
261 |       raise MQLGraphError(
262 |           None,
263 |           'error %(subclass)s: %(detail)s',
264 |           detail=result.errmsg,
265 |           subclass=result.errcode,
266 |           dateline=result.dateline)
267 |     return result
268 | 
269 |   def put_buf(self, buf):
270 |     self.inbuf.append(buf)
271 | 
272 |   def isready(self):
273 |     return len(self.replyqueue) > 0
274 | 
275 | 
276 | # this is different from a normal list printer because it
277 | #  assumes that any sublists will come at the end.
278 | # of course that's wrong.  hmmph.
279 | def print_result(l, indent=''):
280 |   if l is None:
281 |     print indent + 'None'
282 |     return
283 |   #print type(l)
284 |   if isinstance(l, list):
285 |     dangle = 0
286 |     for li in l:
287 |       if isinstance(li, list):
288 |         if dangle:
289 |           print
290 |           dangle = 0
291 |         print_result(li, indent + '    ')
292 |       else:
293 |         if not dangle:
294 |           print indent,
295 |           dangle = 1
296 |         print str(li),
297 |     if dangle:
298 |       print
299 | 


--------------------------------------------------------------------------------