├── bazel ├── BUILD └── six.BUILD ├── .gitignore ├── api ├── .gitignore ├── __init__.py ├── hicache.py └── op.py ├── util ├── .gitignore ├── __init__.py ├── misc.py ├── http.py ├── unionfind.py ├── attrib.py ├── dumper.py ├── keyquote.py ├── mwdatetime.py ├── parsedt.py └── pattern.py ├── formats ├── .gitignore ├── __init__.py ├── uniqstr.py ├── http.py ├── image.py └── contenttype.py ├── emql ├── .gitignore ├── adapters │ ├── test │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── test_nytimes.py │ │ ├── test_twitter.py │ │ └── test_weblink.py │ ├── .gitignore │ ├── __init__.py │ ├── lib.py │ ├── metacritic.py │ ├── stats.py │ ├── twitter.py │ ├── quote.py │ ├── text.py │ ├── nytimes.py │ └── search.py ├── __init__.py ├── docs │ └── documentation.css └── apikeys.py ├── mql ├── .gitignore ├── graph │ ├── __init__.py │ └── conn_mock.py ├── __init__.py ├── grquoting.py ├── mid.py ├── pathexpr.py ├── benchmark.py └── grparse.py ├── OWNERS ├── bootstrap ├── BUILD └── bootstrap.py ├── test ├── config.cfg ├── __init__.py ├── query_sort_test.py ├── regression_id_test.py ├── mql_exceptions_test.py ├── best_hrid_test.py ├── mql_fixture_test.py ├── BUILD ├── cost_test.py ├── return_test.py └── regression_misc_test.py ├── tid.py ├── log ├── __init__.py ├── log_util.py └── log.py ├── WORKSPACE ├── pymql_import_test.py ├── CONTRIBUTING.md ├── BUILD ├── mqlbin.py ├── README.md └── error.py /bazel/BUILD: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bazel-* 2 | -------------------------------------------------------------------------------- /api/.gitignore: -------------------------------------------------------------------------------- 1 | /*.pyc 2 | -------------------------------------------------------------------------------- /util/.gitignore: -------------------------------------------------------------------------------- 1 | /*.pyc 2 | -------------------------------------------------------------------------------- /formats/.gitignore: -------------------------------------------------------------------------------- 1 | /*.pyc 2 | -------------------------------------------------------------------------------- /emql/.gitignore: -------------------------------------------------------------------------------- 1 | /*.pyc 2 | /*.pyo 3 | -------------------------------------------------------------------------------- /emql/adapters/test/.gitignore: -------------------------------------------------------------------------------- 1 | /*.pyc 2 | -------------------------------------------------------------------------------- /emql/adapters/.gitignore: -------------------------------------------------------------------------------- 1 | /*.pyc 2 | /*.pyo 3 | -------------------------------------------------------------------------------- /mql/.gitignore: -------------------------------------------------------------------------------- 1 | /*.pyc 2 | /*.out 3 | /*.tmp 4 | /*.err 5 | -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- 1 | rtp 2 | warrenharris 3 | file://depot/google3/metaweb/freebase/OWNERS 4 | -------------------------------------------------------------------------------- /bootstrap/BUILD: -------------------------------------------------------------------------------- 1 | 2 | py_binary( 3 | name = "bootstrap", 4 | srcs = ["bootstrap.py"], 5 | python_version = "PY2", 6 | deps = [ 7 | "//:mql", 8 | ], 9 | ) 10 | -------------------------------------------------------------------------------- /test/config.cfg: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.4 2 | # 3 | # Author: bneutra@google.com (Brendan Neutra) 4 | # flags to run mql tests 5 | --graphd_addr=blade:freebase-graphd-sandbox 6 | # replay|record|nomock 7 | # NOTE: with the introduction of randomized hashing of dicts in 2.7 mocking no longer functions 8 | --mockmode=nomock 9 | -------------------------------------------------------------------------------- /bazel/six.BUILD: -------------------------------------------------------------------------------- 1 | # Description: 2 | # Six provides simple utilities for wrapping over differences between Python 2 3 | # and Python 3. 4 | 5 | licenses(["notice"]) # MIT 6 | 7 | exports_files(["LICENSE"]) 8 | 9 | py_library( 10 | name = "six", 11 | srcs = ["six.py"], 12 | visibility = ["//visibility:public"], 13 | ) -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /formats/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | -------------------------------------------------------------------------------- /emql/adapters/test/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /emql/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # emql package 16 | -------------------------------------------------------------------------------- /tid.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from pymql.log import generate_tid 17 | generate_transaction_id = generate_tid 18 | -------------------------------------------------------------------------------- /util/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import re 17 | 18 | # wsplit 19 | wsplit_re = re.compile('\s+') 20 | def wsplit(s): 21 | return wsplit_re.split(s.strip()) 22 | 23 | -------------------------------------------------------------------------------- /emql/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # adapters package 16 | 17 | # do not add any import here since if it were to fail, all python adapters 18 | # would fail to load 19 | 20 | -------------------------------------------------------------------------------- /log/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.6 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Backward compatible support for mql LOG calls""" 17 | 18 | __author__ = 'bneutra@google.com (Brendan Neutra)' 19 | 20 | from log import * 21 | -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- 1 | workspace(name = "pymql") 2 | 3 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") 4 | 5 | # Abseil-py 6 | http_archive( 7 | name = "absl_py", 8 | sha256 = "fe3948746ca0543f01fb7767fb00bb739c7fe7e2514180c1575100b988b66542", 9 | strip_prefix = "abseil-py-master", 10 | urls = ["https://github.com/abseil/abseil-py/archive/master.zip"], 11 | ) 12 | 13 | http_archive( 14 | name = "six_archive", 15 | build_file = "@//bazel:six.BUILD", 16 | sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a", 17 | strip_prefix = "six-1.10.0", 18 | urls = [ 19 | "http://mirror.bazel.build/pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz", 20 | "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz", 21 | ], 22 | ) 23 | -------------------------------------------------------------------------------- /mql/graph/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.6 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # 17 | 18 | __author__ = 'nicholasv@google.com (Nicholas Veeser)' 19 | 20 | __all__ = ['TcpGraphConnector', 'MockRecordConnector', 'MockReplayConnector'] 21 | 22 | from conn_tcp import TcpGraphConnector 23 | from conn_mock import MockRecordConnector 24 | from conn_mock import MockReplayConnector 25 | -------------------------------------------------------------------------------- /log/log_util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.6 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Backward compatible support for mql LOG calls, Levels.""" 16 | 17 | __author__ = 'bneutra@google.com (Brendan Neutra)' 18 | 19 | from absl import logging 20 | 21 | FATAL = logging.FATAL 22 | ERROR = logging.ERROR 23 | CRIT = ALERT = ERROR 24 | WARN = logging.WARN 25 | WARNING = WARN 26 | INFO = logging.INFO 27 | NOTICE = INFO 28 | DEBUG = logging.DEBUG 29 | SPEW = 2 # e.g. mql.utils.dumplog: for things that are expensive and verbose 30 | -------------------------------------------------------------------------------- /pymql_import_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.4 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Import unittest for pymql""" 17 | 18 | __author__ = 'rtp@google.com (Tyler Pirtle)' 19 | 20 | import google3 21 | from google3.testing.pybase import googletest 22 | 23 | 24 | class PymqlImportTest(googletest.TestCase): 25 | 26 | def canImport(self): 27 | import pymql 28 | 29 | def canInit(self): 30 | import pymql 31 | mql = pymql.MQLService(graphd_addrs=['localhost:8100']) 32 | 33 | def emqlCanImport(self): 34 | import pymql.emql.emql 35 | 36 | 37 | if __name__ == '__main__': 38 | googletest.main() 39 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement (CLA). You (or your employer) retain the copyright to your 10 | contribution; this simply gives us permission to use and redistribute your 11 | contributions as part of the project. Head over to 12 | to see your current agreements on file or 13 | to sign a new one. 14 | 15 | You generally only need to submit a CLA once, so if you've already submitted one 16 | (even if it was for a different project), you probably don't need to do it 17 | again. 18 | 19 | ## Code reviews 20 | 21 | All submissions, including submissions by project members, require review. We 22 | use GitHub pull requests for this purpose. Consult 23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 24 | information on using pull requests. 25 | 26 | ## Community Guidelines 27 | 28 | This project follows 29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). -------------------------------------------------------------------------------- /emql/docs/documentation.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | @import url(html4css1.css); 18 | 19 | html { 20 | color: black; 21 | background-color: white; 22 | } 23 | 24 | body { 25 | margin-left: 10ex; 26 | margin-top: 5ex; 27 | padding-left: 1ex; 28 | border-left: 1px solid #006; 29 | width: 75ex; 30 | background-color: white; 31 | } 32 | 33 | h1 { 34 | border-bottom: 2px solid #006; 35 | } 36 | 37 | dt { 38 | font-weight: bold; 39 | } 40 | 41 | h1, h2, h3, h4, h5, h6 { 42 | font-family: Helvetica, Arial, sans-serif; 43 | padding: 4px; 44 | font-size: 100%; 45 | } 46 | 47 | h1.title { 48 | font-size: 120%; 49 | background-color: orange; 50 | } 51 | -------------------------------------------------------------------------------- /BUILD: -------------------------------------------------------------------------------- 1 | # Author: rtp@google.com (Tyler Pirtle) 2 | # 3 | # Description: 4 | # mql - implementation(s) of the Metaweb Query Language 5 | 6 | package(default_visibility = ["//visibility:public"]) 7 | 8 | py_library( 9 | name = "mql", 10 | srcs = [ 11 | "__init__.py", 12 | "tid.py", 13 | "error.py", 14 | "api/__init__.py", 15 | "api/envelope.py", 16 | "formats/__init__.py", 17 | "formats/http.py", 18 | "util/__init__.py", 19 | "util/dumper.py", 20 | "util/keyquote.py", 21 | "util/mwdatetime.py", 22 | ] + glob([ 23 | "log/*.py", 24 | "mql/*.py", 25 | "mql/graph/*.py", 26 | ]), 27 | deps = [ 28 | "@absl_py//absl:app", 29 | "@absl_py//absl/flags", 30 | "@absl_py//absl/logging", 31 | ], 32 | ) 33 | 34 | #py_test( 35 | # name = "pymql_import_test", 36 | # size = "small", 37 | # srcs = ["pymql_import_test.py"], 38 | # deps = [ 39 | # ":mql", 40 | # "//pyglib", 41 | # "//testing/pybase", 42 | # ], 43 | #) 44 | 45 | py_binary( 46 | name = "mqlbin", 47 | srcs = ["mqlbin.py"], 48 | python_version = "PY2", 49 | deps = [ 50 | ":mql", 51 | ], 52 | ) 53 | 54 | test_suite( 55 | name = "AllTests", 56 | tests = [ 57 | "//third_party/py/pymql/test:AllTests", 58 | ], 59 | ) 60 | -------------------------------------------------------------------------------- /emql/adapters/lib.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import mw 16 | 17 | def bdb_lookup(me,guid,bdb): 18 | guid = guid.replace('#','/guid/') 19 | path = mw.blob.blobclient.BLOBClient.get_static_relative_url(bdb, guid) 20 | hostname,port=me.mss.ctx.clobd_read_addrs[0] 21 | hostname=hostname + ':' + str(port) 22 | url, connection = me.get_session().http_connect(hostname, path) 23 | connection.request('GET', url) 24 | response = connection.getresponse() 25 | result = response.read() 26 | #TODO: how to do debugging? LOG if debug? 27 | #print "metacritic_adapter: result: %s" % result 28 | if response.status==200: 29 | return mw.json.loads(result) 30 | elif response.status==404: 31 | return None 32 | else: 33 | #TODO: Log unexpected status from BDB 34 | return None 35 | -------------------------------------------------------------------------------- /api/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from service import Session, ServiceContext 18 | from content import Content, ContentWrapper, NEW_DOCUMENT 19 | from envelope import MQLEnvelope 20 | from mw.mql.error import (MQLError, MQLParseError, MQLInternalError, 21 | MQLTypeError, MQLResultError, MQLInternalParseError, 22 | NamespaceException) 23 | 24 | Session # PYFLAKES 25 | ServiceContext # PYFLAKES 26 | Content # PYFLAKES 27 | ContentWrapper # PYFLAKES 28 | NEW_DOCUMENT # PYFLAKES 29 | MQLEnvelope # PYFLAKES 30 | MQLError # PYFLAKES 31 | MQLParseError # PYFLAKES 32 | MQLInternalError # PYFLAKES 33 | MQLTypeError # PYFLAKES 34 | MQLResultError # PYFLAKES 35 | MQLInternalParseError # PYFLAKES 36 | NamespaceException # PYFLAKES 37 | -------------------------------------------------------------------------------- /util/http.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | import urllib2 17 | 18 | ip_address = re.compile(r"^\d+\.\d+\.\d+.\d+$").match 19 | 20 | def parse_domain_from_host(host): 21 | host = host.split(':')[0] 22 | 23 | if not ip_address(host): 24 | # the domain is the last one or two dot-separated words 25 | domain = '.'.join(host.rsplit(".", 2)[-2:]) 26 | else: 27 | domain = host 28 | 29 | return domain 30 | 31 | def get_http_proxy_opener(mss): 32 | """ 33 | Lazily retrieve proxy info 34 | """ 35 | config = mss.full_config 36 | 37 | proxy_addr = config.get('me.external_proxy', '').strip() 38 | if not proxy_addr: 39 | return urllib2.urlopen 40 | else: 41 | proxy_handler = urllib2.ProxyHandler({'http': proxy_addr}) 42 | return urllib2.build_opener(proxy_handler).open 43 | 44 | def proxied_urlopen(request, mss): 45 | opener = get_http_proxy_opener(mss) 46 | return opener(request) 47 | -------------------------------------------------------------------------------- /mql/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """This is the beginning of a public API for doing MQL/LoJSON reads and writes. 15 | 16 | To use (using mql_read as an example) 17 | 18 | from mw.mql import mql_read, MiniContext 19 | 20 | query = { 21 | "query":[{ 22 | "id":"/common/topic", 23 | "type":"/type/type", 24 | "properties":[{}] 25 | }] 26 | } 27 | 28 | ctx = MiniContext(("localhost", 1234)) 29 | result = mql_read(ctx, query) 30 | 31 | """ 32 | 33 | #from pathexpr import wrap_query 34 | #from mw.log import LOG 35 | # 36 | #__all__ = ['mql_read', 'mql_write', 'MiniContext'] 37 | # 38 | #def mql_read(ctx, query, varenv=None, transaction_id=None): 39 | # LOG.error("deprecated", "mw.mql.mql_read()") 40 | # return wrap_query(ctx.high_querier.read, query, varenv, transaction_id) 41 | # 42 | #def mql_write(ctx, query, varenv=None, transaction_id=None): 43 | # LOG.error("deprecated", "mw.mql.mql_write()") 44 | # assert not ctx.gc.readonly, "Context must be created with readonly=False" 45 | # return wrap_query(ctx.high_querier.write, query, varenv, transaction_id) 46 | -------------------------------------------------------------------------------- /mqlbin.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """A simple wrapper to demonstrate basic mql reads and writes.""" 15 | 16 | __author__ = "bneutra@google.com (Brendan Neutra)" 17 | 18 | import json 19 | 20 | from absl import app 21 | from absl import flags 22 | from collections import OrderedDict 23 | from pymql import MQLService 24 | from pymql.mql.graph import TcpGraphConnector 25 | 26 | FLAGS = flags.FLAGS 27 | flags.DEFINE_string( 28 | "mqlenv", None, "a dict in the form of a string which " 29 | "contains valid mql env key/val pairs") 30 | 31 | flags.DEFINE_string("mqlcmd", None, "'read' or 'write'") 32 | flags.DEFINE_string("graphd_addr", "localhost:9100", 33 | "host:port of graphd server") 34 | 35 | 36 | def main(argv): 37 | if not FLAGS.graphd_addr: 38 | raise Exception("Must specify a --graphd_addr") 39 | 40 | conn = TcpGraphConnector(addrs=[("localhost", 8100)]) 41 | mql = MQLService(connector=conn) 42 | 43 | q = json.loads(argv[1], object_pairs_hook=OrderedDict) 44 | env = {} 45 | if FLAGS.mqlenv: 46 | env = json.loads(FLAGS.mqlenv) 47 | 48 | if FLAGS.mqlcmd == "read": 49 | print mql.read(q, **env) 50 | elif FLAGS.mqlcmd == "write": 51 | print mql.write(q, **env) 52 | else: 53 | print "you must provie a --mqlcmd, either 'read' or 'write'" 54 | 55 | 56 | if __name__ == "__main__": 57 | app.run(main) 58 | -------------------------------------------------------------------------------- /util/unionfind.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # 16 | # 17 | # flexible union-find operations 18 | # 19 | # you can use a particular attribute of the objects you're working 20 | # with as the union chain attribute. 21 | # 22 | # 23 | # NOT WELL-TESTED 24 | # 25 | # 26 | 27 | # 28 | # union-find: merge two nodes 29 | # the first argument is favored as the new common root 30 | # 31 | def union(node1, node2, chainattr): 32 | c1 = find(node1, chainattr) 33 | c2 = find(node2, chainattr) 34 | if c1 == c2: return 35 | setattr(c2, chainattr, c1) 36 | 37 | # 38 | # union-find: find the definitive member of a set, 39 | # collapsing lookup chains along the way 40 | # 41 | def find(node, chain_attr=None, chain_get=None, chain_put=None): 42 | if chain_get is None: 43 | chain_get = lambda p: getattr(p, chain_attr) 44 | if chain_put is None: 45 | chain_put = lambda p,v: setattr(p, chain_attr, v) 46 | 47 | # 48 | # find the root for this union 49 | # 50 | root = None 51 | c = node 52 | while 1: 53 | cc = chain_get(c) 54 | if c == cc: 55 | root = c 56 | break 57 | c = cc 58 | #print chainattr, node.id, root.id 59 | 60 | # 61 | # collapse the chain from us to the root 62 | # 63 | c = node 64 | while 1: 65 | cc = chain_get(c) 66 | if cc == root: 67 | break 68 | chain_put(c, root) 69 | c = cc 70 | 71 | return root 72 | 73 | -------------------------------------------------------------------------------- /emql/adapters/test/test_nytimes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from mw.tests.helpers import TestFixture 16 | from mw.emql import emql 17 | 18 | class TestNytimes_adapter(TestFixture): 19 | 20 | def setUp(self): 21 | super(TestNytimes_adapter, self).setUp() 22 | self.cache = emql.emql_cache() 23 | 24 | 25 | def run_query(self, q): 26 | api_key = self.mss.ctx.config['extensions.nytimes_articles'] 27 | debug, cursors, results = self.mss.emqlread(None, q, {'debug': True, 'cache': False}, 28 | api_keys={'nytimes_articles': api_key}, 29 | cache=self.cache) 30 | return results 31 | 32 | def test_stephen_colbert(self): 33 | r = self.run_query({"id": "/en/stephen_colbert", 34 | "/base/topics/news/nytimes": [{'limit': 4}]}) 35 | assert r["/base/topics/news/nytimes"] 36 | self.assertEqual(len(r["/base/topics/news/nytimes"]), 4) 37 | 38 | def test_us_presidents(self): 39 | results = self.run_query([{"id": None, 40 | "/base/topics/news/nytimes": [{"limit": 1}], 41 | "limit": 3, 42 | "/people/person/date_of_birth": None, 43 | "sort": "-/people/person/date_of_birth", 44 | "type": "/government/us_president"}]) 45 | for r in results: 46 | assert r["/base/topics/news/nytimes"] 47 | self.assertEqual(len(r["/base/topics/news/nytimes"]), 1) 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /util/attrib.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from mw.mql import scope 16 | from mw.log import LOG 17 | 18 | def set_oauth_attribution_if_needed(mss): 19 | if not mss.authorized_app_id: 20 | return 21 | 22 | user_id = mss.get_user_id() 23 | 24 | query = [{ 25 | "id": None, 26 | "creator": user_id, 27 | "type": "/freebase/written_by", 28 | "/freebase/written_by/application": {"id": mss.authorized_app_id} 29 | }] 30 | 31 | result = mss.mqlread(query, cache=False) 32 | if result: 33 | if len(result) > 1: 34 | # somehow we manage to get multiple attributions - fail gracefully and log an error 35 | LOG.warn("set_oauth_attribution_if_needed.duplicate", 36 | "duplicate attributions for %s and %s" % (mss.authorized_app_id, user_id), 37 | application_id=mss.authorized_app_id, 38 | user_id=user_id, 39 | attributions=result) 40 | result = result[0] 41 | else: 42 | query = { 43 | "create": "unconditional", 44 | "id": None, 45 | "/freebase/written_by/application": { 46 | "connect": "insert", 47 | "id": mss.authorized_app_id 48 | }, 49 | "type": ["/freebase/written_by", "/type/attribution"] 50 | } 51 | 52 | with mss.push_variables(permission="/boot/oauth_permission", 53 | privileged=scope.Privileged, 54 | authority=None): 55 | result = mss.mqlwrite(query) 56 | mss.push_variables(attribution=result['id'] if result else None) 57 | -------------------------------------------------------------------------------- /test/query_sort_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """Query sorting unittest for pymql.""" 17 | 18 | __author__ = 'bneutra@google.com (Brendan Neutra)' 19 | 20 | import collections 21 | import json 22 | 23 | import google3 24 | import pymql 25 | 26 | from google3.testing.pybase import googletest 27 | 28 | testdictpart = collections.OrderedDict({ 29 | 'propd': None, 30 | 'propc': 'foo', 31 | 'propb': [], 32 | 'prope': {}, 33 | 'propf': 1.1, 34 | 11: False 35 | }) 36 | 37 | testdict = testdictpart.copy() 38 | 39 | testdict['propa'] = testdictpart.copy() 40 | testdict['propg'] = [testdictpart.copy(), testdictpart.copy()] 41 | testdict['propg'][1]['propa'] = testdictpart.copy() 42 | 43 | 44 | def IsSorted(part): 45 | """Check that all keys are sorted.""" 46 | if isinstance(part, list): 47 | for p in part: 48 | if IsSorted(p) is False: 49 | return False 50 | elif isinstance(part, dict): 51 | if sorted(part.keys()) != part.keys(): 52 | return False 53 | for k, v in part.iteritems(): 54 | if IsSorted(v) is False: 55 | return False 56 | 57 | return True 58 | 59 | 60 | class PymqlSortTest(googletest.TestCase): 61 | 62 | def testSorting(self): 63 | """basic sorting test.""" 64 | sorted_dict = pymql.sort_query_keys(testdict) 65 | self.assertTrue(IsSorted(sorted_dict)) 66 | self.assertFalse(IsSorted(testdict)) 67 | 68 | # the dict should not change in meaning 69 | # need to convert to dict first. 70 | converted_dict = json.loads(json.dumps(testdict)) 71 | converted_sorted_dict = json.loads(json.dumps(sorted_dict)) 72 | # nice helper function that's order independent 73 | self.assertDictEqual(converted_sorted_dict, converted_dict) 74 | 75 | 76 | if __name__ == '__main__': 77 | googletest.main() 78 | -------------------------------------------------------------------------------- /emql/adapters/metacritic.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import mw 16 | from lib import bdb_lookup 17 | 18 | #TODO: python docs 19 | #TODO: log exceptions? 20 | 21 | class metacritic_adapter(mw.emql.adapter.Adapter): 22 | 23 | SECRET='random_rodent' 24 | 25 | def make_result(self,key,scores): 26 | return { 27 | 'key' : key, 28 | 'url' : 'http://www.metacritic.com/video/titles/%s' % key, 29 | 'score' : scores['metascore'], 30 | 'userscore' : scores['userscore'], 31 | 'attribution_html' : 'TODO' 32 | } 33 | 34 | def check_secret(self,params,guid,result): 35 | if params.get('query') and params.get('query').get('secret') == self.SECRET: 36 | return True 37 | else: 38 | result[guid] = { 'error':'Invalid auth' } 39 | return False 40 | 41 | def get_key(self, me, guid): 42 | result = bdb_lookup(me,guid,'source-metacritic-movie') 43 | if result: 44 | return result[0] 45 | else: 46 | return None 47 | 48 | def get_scores(self,me,guid): 49 | result = bdb_lookup(me,guid,'metacritic-scores') 50 | return result 51 | 52 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys): 53 | result = {} 54 | for mqlres in args: 55 | guid = mqlres['guid'] 56 | if not self.check_secret(params,guid,result): 57 | continue 58 | key = self.get_key(me,guid) 59 | if not key: 60 | continue 61 | scores = self.get_scores(me,guid) 62 | if not scores: 63 | #TODO: log 64 | continue 65 | result[guid]=self.make_result(key,scores) 66 | return result 67 | 68 | -------------------------------------------------------------------------------- /test/regression_id_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.4 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # -*- coding: utf-8 -*- 17 | # 18 | """test regressions around id resolution.""" 19 | 20 | __author__ = 'bneutra@google.com (Brendan Neutra)' 21 | 22 | import google3 23 | from pymql.mql import error 24 | from pymql.test import mql_fixture 25 | 26 | 27 | class MQLTest(mql_fixture.MQLTest): 28 | 29 | def setUp(self): 30 | self.SetMockPath('data/regression_id.yaml') 31 | super(MQLTest, self).setUp() 32 | self.env = {'as_of_time': '2009-10-01'} 33 | 34 | def testDeepId(self): 35 | # buganizer: 4363162 36 | query = """ 37 | {"id": 38 | "/en/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a"} 39 | """ 40 | self.DoQuery(query, exp_response='null') 41 | 42 | def testTooDeepId(self): 43 | # buganizer: 4363162 44 | # id path limit is 200 deep 45 | query = """ 46 | {"id": 47 | "/en/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a/a"} 48 | """ 49 | exc_response = ( 50 | error.MQLParseError, 51 | 'Id has too many segments. Maximum is 200' 52 | ) 53 | 54 | self.DoQuery(query, exc_response=exc_response) 55 | 56 | if __name__ == '__main__': 57 | mql_fixture.main() 58 | -------------------------------------------------------------------------------- /util/dumper.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import pprint 16 | 17 | def dump(object,depth=10, ctx=None): 18 | if ctx is None: 19 | ctx = {} 20 | 21 | # don't subclass these types. Please! 22 | if isinstance(object,(basestring,str,bool,int,float,long)): 23 | return object 24 | elif object is None: 25 | return object 26 | 27 | # subclasses of these types are interesting. 28 | if (type(object) in [dict, list, tuple]) and len(object) == 0: 29 | return object 30 | 31 | oid = id(object) 32 | if oid in ctx: 33 | return "!!REPEAT!!" + ctx[oid] 34 | 35 | typename = type(object).__name__ 36 | if typename == 'instance': 37 | typename = object.__class__.__name__ 38 | ctx[oid] = '<' + typename + ' instance at ' + hex(oid) + '>' 39 | 40 | if typename in ctx: 41 | return "!!SKIPPED!!" + ctx[oid] 42 | 43 | if depth < 0: 44 | return "!!DEPTH!!" + ctx[oid] 45 | 46 | if isinstance(object, dict): 47 | result = { '!!REPR!!' : ctx[oid] } 48 | for k in object: 49 | result[k] = dump(object[k],depth-1,ctx) 50 | 51 | return result 52 | 53 | elif isinstance(object,(list,tuple)): 54 | result = [ ctx[oid] ] 55 | for k in object: 56 | result.append(dump(k,depth-1,ctx)) 57 | 58 | return result 59 | 60 | result = { '!!REPR!!' : ctx[oid] } 61 | try: 62 | for key in object.__dict__: 63 | if key not in ctx: 64 | result[key] = dump(object.__dict__[key],depth-1,ctx) 65 | except: 66 | pass 67 | return result 68 | 69 | def dumper(object,depth=10,ctx=None): 70 | if ctx is None: 71 | ctx = {} 72 | pprint.pprint(dump(object,depth,ctx)) 73 | 74 | def dumps(object, **kws): 75 | return pprint.pformat(dump(object, **kws)) 76 | 77 | -------------------------------------------------------------------------------- /emql/adapters/test/test_twitter.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from mw.tests.helpers import TestFixture 16 | from mw.emql import emql 17 | 18 | class TestTwitter_adapter(TestFixture): 19 | 20 | def setUp(self): 21 | super(TestTwitter_adapter, self).setUp() 22 | self.cache = emql.emql_cache() 23 | 24 | def run_query(self, q): 25 | debug, cursors, results = self.mss.emqlread(None, q, {'debug': True, 'cache': False}, 26 | cache=self.cache) 27 | return results 28 | 29 | def test_stephen_colbert(self): 30 | r = self.run_query({"id": "/en/stephen_colbert", 31 | "/base/topics/news/twitter_from": [{'limit': 4}]}) 32 | assert r["/base/topics/news/twitter_from"] 33 | self.assertEqual(len(r["/base/topics/news/twitter_from"]), 4) 34 | for tweet in r["/base/topics/news/twitter_from"]: 35 | self.failIf('raw' in tweet) 36 | 37 | r = self.run_query({"id": "/en/stephen_colbert", 38 | "/base/topics/news/twitter_from": [{'limit': 3, 39 | 'raw': True}]}) 40 | assert r["/base/topics/news/twitter_from"] 41 | self.assertEqual(len(r["/base/topics/news/twitter_from"]), 3) 42 | for tweet in r["/base/topics/news/twitter_from"]: 43 | self.failUnless('raw' in tweet) 44 | 45 | 46 | def test_george_washington(self): 47 | r = self.run_query({"id": "/en/george_washington", 48 | "/base/topics/news/twitter_from": None}) 49 | assert not r["/base/topics/news/twitter_from"] 50 | 51 | # 52 | # def test_us_presidents(self): 53 | # results = self.run_query([{"id": None, 54 | # "/base/topics/news/nytimes": [{"limit": 1}], 55 | # "limit": 3, 56 | # "/people/person/date_of_birth": None, 57 | # "sort": "-/people/person/date_of_birth", 58 | # "type": "/government/us_president"}]) 59 | # for r in results: 60 | # assert r["/base/topics/news/nytimes"] 61 | # self.assertEqual(len(r["/base/topics/news/nytimes"]), 1) 62 | # 63 | # 64 | # 65 | -------------------------------------------------------------------------------- /emql/adapters/stats.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import math 17 | from itertools import izip, chain 18 | 19 | from mw.emql.adapter import Adapter, AdapterUserError 20 | from mw.emql.emql import id_guid, formatted_id_guid 21 | 22 | 23 | class stats_adapter(Adapter): 24 | 25 | def reduce(self, tid, graph, mql, me, control, mqlres, params, api_keys): 26 | 27 | constraints = params.get('constraints') 28 | op = params.get('property') 29 | params = params.get('query') 30 | 31 | args = None 32 | if isinstance(params, dict): 33 | args = params.get('value', '').split('.') 34 | 35 | if not args: 36 | raise ValueError, "%s: missing 'value' argument" %(op) 37 | 38 | def get(res, prop): 39 | if isinstance(res, dict): 40 | return res[prop] 41 | else: 42 | value = res[0] 43 | if isinstance(value, dict): 44 | value = value[prop] 45 | return value 46 | 47 | values = [] 48 | for _mqlres in mqlres: 49 | value = reduce(get, args, _mqlres) 50 | if value is not None: 51 | values.append(value) 52 | 53 | if values: 54 | if op.startswith('@'): 55 | op = op[1:] 56 | 57 | try: 58 | if op == 'average': 59 | return dict(value=float(sum(values)) / len(values)) 60 | 61 | if op == 'median': 62 | values.sort() 63 | return dict(value=values[len(values) / 2]) 64 | 65 | if op == 'min': 66 | return dict(value=min(values)) 67 | 68 | if op == 'max': 69 | return dict(value=max(values)) 70 | 71 | if op == 'total': 72 | return dict(value=sum(values)) 73 | 74 | if op == 'sigma': 75 | average = float(sum(values)) / len(values) 76 | squares = sum((value - average) * (value - average) 77 | for value in values) 78 | return dict(value=math.sqrt(squares / len(values))) 79 | 80 | except TypeError, e: 81 | raise AdapterUserError('reduce', op, self.uri, str(e)) 82 | 83 | raise NotImplementedError, op 84 | 85 | return dict(value=None) 86 | 87 | def help(self, tid, graph, mql, me, control, params): 88 | from docs import stats_adapter_help 89 | 90 | return 'text/x-rst;', stats_adapter_help 91 | -------------------------------------------------------------------------------- /formats/uniqstr.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | 17 | UniqueStr is a base class for implementing enums 18 | as strings. 19 | 20 | see MediaType and TextEncoding subclasses for example 21 | subclasses. 22 | 23 | """ 24 | 25 | class UniqueStr(str): 26 | """ 27 | UniqueStr looks like an ascii str, but it has been normalized. 28 | 29 | It's a string that behaves like an enum. 30 | 31 | Subclass this for values like media-types, charsets, 32 | language names, locale, etc. 33 | """ 34 | 35 | # dictionary mapping names to known values. 36 | # multiple names may match to the same unique str if it has aliases. 37 | # this looks like a mapping from str to str but it's really a mapping 38 | # from str to UniqueStr. 39 | _known = dict() 40 | 41 | # if set, attempts to create new values will fail 42 | _exclusive = False 43 | 44 | 45 | def __new__(cls, s): 46 | # make sure cls has its own _known and _exclusive - 47 | # i'm sure there is a better way to do this... 48 | if '_known' not in cls.__dict__: 49 | cls._known = {} 50 | cls._exclusive = False 51 | 52 | s = cls.normalize(s) 53 | mt = cls._known.get(s) 54 | if not mt: 55 | if cls._exclusive: 56 | raise ValueError, "Unknown unique string" 57 | 58 | mt = str.__new__(cls, s) 59 | cls._known[s] = mt 60 | return mt 61 | 62 | 63 | @classmethod 64 | def normalize(cls, s): 65 | """ 66 | normalize a string before intern-ing it. 67 | 68 | this is useful when there are multiple values of a string 69 | that are acceptable but you want to convert them to a 70 | preferred format, e.g. using a particular capitalization 71 | style for case-insensitive identifiers. 72 | 73 | this is also an opportunity to reject (with ValueError) 74 | invalid values. 75 | """ 76 | if not isinstance(s, str): 77 | s = str(s) 78 | #raise ValueError('%s must be a string' % cls.__name__) 79 | 80 | return s.strip() 81 | 82 | 83 | def addalias(self, alias): 84 | """ 85 | add an alias for this unique string. 86 | 87 | you can do more powerful things by overriding .normalize(). 88 | """ 89 | if alias in self._known: 90 | if self is not self._known[alias]: 91 | raise ValueError, 'attempt to change UniqueStr alias' 92 | # XXX should log a warning here, but it's safe to continue 93 | return 94 | self._known[alias] = self 95 | -------------------------------------------------------------------------------- /test/mql_exceptions_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # -*- coding: utf-8 -*- 16 | # 17 | """Making sure new exceptions are called properly.""" 18 | 19 | __author__ = 'bobbyrullo@google.com (Bobby Rullo)' 20 | 21 | import google3 22 | import json 23 | from pymql.mql import error 24 | from pymql.test import mql_fixture 25 | 26 | class MQLExceptionTest(mql_fixture.MQLTest): 27 | 28 | def setUp(self): 29 | super(MQLExceptionTest, self).setUp() 30 | self.env = {'user': '/user/mw_brendan'} 31 | 32 | 33 | def getFuzzKey(self, test_id): 34 | fuzz = self.getFuzz(test_id) 35 | fuzzKey = 'key_{0}'.format(fuzz[:fuzz.find('.')]) 36 | return fuzzKey 37 | 38 | def newNode(self): 39 | query = json.dumps({ 40 | "id": None, 41 | "create": "unconditional", 42 | }) 43 | 44 | self.DoQuery(query, mqlwrite=True) 45 | new_id = self.mql_result.result['id'] 46 | return new_id 47 | 48 | def testMQLValueAlreadyInUseError(self): 49 | key = self.getFuzzKey('alreadyInUse') 50 | 51 | new_id = self.newNode() 52 | 53 | query = { 54 | "id": new_id, 55 | "key": { 56 | "namespace": "/user/mw_brendan/default_domain", 57 | "value": key, 58 | "connect": "insert" 59 | } 60 | } 61 | 62 | self.DoQuery(json.dumps(query), mqlwrite=True) 63 | 64 | new_id = self.newNode() 65 | 66 | query['id'] = new_id 67 | 68 | self.DoQuery(json.dumps(query), mqlwrite=True, 69 | exc_response = ( 70 | error.MQLValueAlreadyInUseError, 71 | 'This value is already in use. Please delete it first.' 72 | )) 73 | 74 | 75 | def testMQLTooManyValuesForUniqueQuery(self): 76 | query = { 77 | "type": None, 78 | "id": "/en/sofia_coppola", 79 | "name": None 80 | } 81 | 82 | exc_response = ( 83 | error.MQLTooManyValuesForUniqueQuery, 84 | "Unique query may have at most one result. Got 25" 85 | ) 86 | self.DoQuery(json.dumps(query), exc_response=exc_response) 87 | 88 | 89 | def testMQLTooManyWrites(self): 90 | query = """ 91 | { 92 | "create":"unconditional", 93 | "type":"/user/mw_brendan/default_domain/note", 94 | "name":"foobartoomanywrites", 95 | "id":null 96 | } 97 | """ 98 | self.env = { 99 | 'user': '/user/mw_brendan', 100 | 'max_writes': { 101 | 'limit': 0, 102 | 'guid': '9202a8c04000641f80000000011af200' 103 | } 104 | } 105 | exc_response = ( 106 | error.MQLWriteQuotaError, 107 | 'Daily write limit of 0 was exceeded.' 108 | ) 109 | self.DoQuery(query, mqlwrite=True, exc_response=exc_response) 110 | 111 | if __name__ == '__main__': 112 | mql_fixture.main() 113 | -------------------------------------------------------------------------------- /emql/adapters/twitter.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import mw, urllib, rfc822, time, datetime 16 | from collections import defaultdict 17 | from mw.emql.adapter import Adapter 18 | 19 | def rfc822_to_iso(d): 20 | r = rfc822.parsedate(d) 21 | r = time.mktime(r) 22 | r = datetime.datetime.fromtimestamp(r) 23 | return r.isoformat() 24 | 25 | class tweets_from_adapter(Adapter): 26 | 27 | def pre(self, tid, graph, mql, me, control, parent, params, api_keys): 28 | return { 29 | '/internet/social_network_user/twitter_id': { 30 | 'value': None, 'limit': 1, 'optional': True 31 | }, 32 | ':extras': {'foo': 'bar'} 33 | } 34 | 35 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys): 36 | result = defaultdict(list) 37 | query = params['query'] or {} 38 | 39 | if isinstance(query, list): 40 | query = query[0] 41 | 42 | limit = query.get('limit', 5) 43 | raw = query.get('raw', None) 44 | 45 | for mqlres in args: 46 | if not mqlres['/internet/social_network_user/twitter_id']: 47 | continue 48 | 49 | url, connection = me.get_session().http_connect('twitter.com', 50 | "/statuses/user_timeline.json") 51 | qs = urllib.urlencode({ 52 | 'count': limit, 53 | 'screen_name': mqlres['/internet/social_network_user/twitter_id']['value'] 54 | }) 55 | connection.request('GET', "%s?%s" % (url, qs)) 56 | response = connection.getresponse() 57 | json = mw.json.loads(response.read()) 58 | tweets = [] 59 | if 'error' in json: 60 | me.log('error', 'emql.adapters.twitter', json['error'], response=json) 61 | raise Exception(json['error']) 62 | 63 | for j in json: 64 | tweet = { 65 | 'timestamp': rfc822_to_iso(j['created_at']), 66 | 'key': j['id'], 67 | 'text': j['text'], 68 | 'user': {'name': j['user']['name'], 69 | 'profile_image_url': j['user']['profile_image_url'], 70 | 'screen_name': j['user']['screen_name'], 71 | 'url': 'http://twitter.com/%s' % j['user']['screen_name']}, 72 | 'url': 'http://twitter.com/%s/status/%s' % (j['user']['screen_name'], j['id']) 73 | } 74 | if raw: 75 | tweet['raw'] = j 76 | tweets.append(tweet) 77 | 78 | result[mqlres['guid']].extend(tweets) 79 | 80 | return dict((k, v[:limit]) for k,v in result.iteritems()) 81 | 82 | def help(self, tid, graph, mql, me, control, params): 83 | from docs import twitter_adapter_help 84 | 85 | return 'text/x-rst;', twitter_adapter_help 86 | -------------------------------------------------------------------------------- /test/best_hrid_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests /freebase/object_hints/best_hrid resolution. 16 | 17 | /freebase/object_hints/best_hrid specifies a persistent HRID 18 | for an entity. This should be favored over the earlier MQL 19 | algorithm for choosing an HRID based on namespace traversal 20 | and various heuristics. 21 | """ 22 | __author__ = 'nix@google.com (Nick Thompson)' 23 | 24 | import json 25 | import random 26 | import string 27 | 28 | import google3 29 | from pymql.mql import error 30 | from pymql.test import mql_fixture 31 | 32 | class HRIDTest(mql_fixture.MQLTest): 33 | """Tests HRID queries using mqlread.""" 34 | 35 | def setUp(self): 36 | # NOTE: the mock graphd support is broken, so there is no best_hrid.yaml 37 | #self.SetMockPath('data/best_hrid.yaml') 38 | super(HRIDTest, self).setUp() 39 | self.env = {'user': '/user/mw_brendan'} 40 | 41 | def newNodeWithHRID(self, best_hrid): 42 | query = """ 43 | { 44 | "create":"unless_exists", 45 | "/freebase/object_hints/best_hrid": "%s", 46 | "guid":null 47 | } 48 | """ % best_hrid 49 | self.DoQuery(query, mqlwrite=True) 50 | self.assertEquals(self.mql_result.result["create"], 51 | "created") 52 | return self.mql_result.result["guid"] 53 | 54 | def query_assert(self, q, r, exc_response=None, type="mqlread", asof=None): 55 | self.env = {} 56 | if asof is not None: 57 | self.env["as_of_time"] = asof 58 | self.DoQuery(q, exp_response=r, exc_response=exc_response) 59 | 60 | def test_missing_hrid(self): 61 | """Test that MQL still finds an id even if best_hrid is not present""" 62 | q= '{"id":null, "guid":"#9202a8c04000641f8000000000092a01", "mid":null}' 63 | r= ('{"guid": "#9202a8c04000641f8000000000092a01",' 64 | '"id": "/en/sting","mid":"/m/0lbj1"}') 65 | self.query_assert(q,r) 66 | 67 | def test_good_hrid(self): 68 | """Test /type/type, a best_hrid that agrees with the MQL heuristics""" 69 | # /m/0j == /type/type 70 | q= '{"id":null, "mid":"/m/0j", "/freebase/object_hints/best_hrid":null}' 71 | r= ('{"id": "/type/type","mid":"/m/0j",' 72 | '"/freebase/object_hints/best_hrid":"/type/type"}') 73 | self.query_assert(q, r) 74 | 75 | def test_hrid_override(self): 76 | """Create a new node with a bogus best_hrid. 77 | 78 | The old MQL heuristics will fail; check that best_hrid works. 79 | """ 80 | best_hrid = ('/user/nix/random_test_hrid/' + 81 | ''.join(random.choice(string.ascii_lowercase) 82 | for x in range(16))) 83 | guid = self.newNodeWithHRID(best_hrid) 84 | 85 | q= (('{"id":null, "guid":"%(guid)s",' 86 | '"/freebase/object_hints/best_hrid":null}' % 87 | {"guid":guid})) 88 | r= (('{"id": "%(best_hrid)s","guid":"%(guid)s",' 89 | '"/freebase/object_hints/best_hrid":"%(best_hrid)s"}') % 90 | {"guid":guid,"best_hrid":best_hrid}) 91 | self.query_assert(q, r) 92 | 93 | if __name__ == '__main__': 94 | mql_fixture.main() 95 | -------------------------------------------------------------------------------- /api/hicache.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from locache import LojsonCachePolicy 17 | from mw.log import LOG 18 | 19 | class LWTCachePolicy(LojsonCachePolicy): 20 | """ 21 | Long run, we can probably factor 'mss' out of here completely, right? 22 | """ 23 | cost_prefix = 'c' 24 | 25 | def __init__(self, mss, tag='mql'): 26 | # give fake ctx/varenv because we'll be overriding all uses 27 | # and want to make sure that any time LojsonCachePolicy tries 28 | # to access ctx/varenv, that it explodes loudly, rather than 29 | # silently using a bad value 30 | super(LWTCachePolicy, self).__init__(None, None, tag, 31 | start_time=mss.time_start) 32 | self.mss = mss 33 | 34 | def _set_varenv(self, varenv): 35 | # this is a no-op because we're forwarding to self.mss.varenv 36 | pass 37 | 38 | def _get_varenv(self): 39 | return self.mss.varenv 40 | 41 | # wrap the existing varenv 42 | varenv = property(_get_varenv, _set_varenv) 43 | 44 | def annotate_key_object(self, key_obj): 45 | return self.get_varenv_envelope(key_obj, ("cursor", "macro", "escape", 46 | "uniqueness_failure", "$lang", 47 | "asof", "normalize_only", "unicode_text")) 48 | 49 | def annotate_result(self, result): 50 | full_result = super(LWTCachePolicy, self).annotate_result(result) 51 | 52 | full_result["tid"] = self.mss.transaction_id 53 | 54 | if 'cursor' in self.mss.varenv: 55 | full_result['cursor'] = self.mss.varenv['cursor'] 56 | 57 | return full_result 58 | 59 | def extract_result(self, full_result): 60 | # all of this should maybe be done in the mqlread itself? 61 | 62 | # set the age header to at least this old 63 | 64 | # this is the other place where the use of 65 | # mss.time_start is important 66 | self.mss.cache_age = max(self.mss.cache_age, 67 | self.start_time - full_result['time']) 68 | 69 | if 'cursor' in full_result: 70 | self.mss.varenv['cursor'] = full_result['cursor'] 71 | 72 | return super(LWTCachePolicy, self).extract_result(full_result) 73 | 74 | 75 | def add_cost(self, costkey, value=1): 76 | self.mss.add_cost(self.cost_prefix + costkey, value) 77 | 78 | def should_read_cache(self): 79 | return self.varenv.get("cache",True) 80 | 81 | def should_write_cache(self): 82 | # allow certain reads to not write-through to the cache (for 83 | # instance, crawlers and results with cursors 84 | cache_writes = not self.varenv.get('no_store_cache', False) 85 | 86 | # we don't cache past the first page in a cursor'ed query 87 | has_working_cursor = 'cursor' in self.varenv and self.varenv['cursor'] != True 88 | 89 | return cache_writes and not has_working_cursor 90 | -------------------------------------------------------------------------------- /emql/adapters/quote.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from itertools import izip, chain 17 | from urllib import urlencode 18 | 19 | from mw.emql.adapter import Adapter, REQUEST_HEADERS 20 | from mw.emql.emql import id_guid, formatted_id_guid 21 | 22 | 23 | class quote_adapter(Adapter): 24 | 25 | ticker = "/business/stock_ticker_symbol/ticker_symbol" 26 | 27 | def pre(self, tid, graph, mql, me, control, parent, params, api_keys): 28 | 29 | return {self.ticker: None} 30 | 31 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys): 32 | 33 | query = params.get('query') 34 | results = {} 35 | 36 | format = '' 37 | keys = [] 38 | 39 | # format documented at http://alexle.net/archives/196 40 | if isinstance(query, dict): 41 | for key in query.iterkeys(): 42 | if key == 'volume': 43 | format += 'v' 44 | keys.append(key) 45 | elif key == 'price': 46 | format += 'l1' 47 | keys.append(key) 48 | elif key == 'ticker': 49 | pass 50 | elif key == 'high': 51 | keys.append(key) 52 | format += 'h' 53 | elif key == 'low': 54 | keys.append(key) 55 | format += 'g' 56 | else: 57 | raise ValueError, key 58 | else: 59 | format = 'l1' 60 | keys = ['price'] 61 | 62 | url, connection = me.get_session().http_connect('download.finance.yahoo.com', '/d/quotes.csv') 63 | connection.request('POST', url, 64 | urlencode({'s': ','.join(mqlres[self.ticker] 65 | for mqlres in args), 66 | 'f': format }), 67 | REQUEST_HEADERS) 68 | response = connection.getresponse() 69 | response = response.read() 70 | 71 | results = {} 72 | for mqlres, values in izip(args, response.rstrip().split('\r\n')): 73 | if query is None: 74 | results[mqlres['guid']] = values 75 | else: 76 | result = {} 77 | for key, value in izip(keys, values.split(',')): 78 | if value == "N/A": 79 | value = None 80 | elif key in ('high', 'low', 'price'): 81 | value = float(value) 82 | elif key == 'volume': 83 | value = long(value) 84 | result[key] = value 85 | if 'ticker' in query: 86 | result['ticker'] = mqlres[self.ticker] 87 | results[mqlres['guid']] = result 88 | 89 | return results 90 | 91 | def help(self, tid, graph, mql, me, control, params): 92 | from docs import quote_adapter_help 93 | 94 | return 'text/x-rst;', quote_adapter_help 95 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MQL, the Metaweb Query Language 2 | 3 | This repository contains the original implementation of the Metaweb Query 4 | Language, written in Python. 5 | 6 | ## Building / Using MQL 7 | 8 | Even though MQL is written in Python, this particular version of it requires 9 | [Bazel](https://bazel.build) to operate properly. You can build the simple 10 | command-line MQL query tool like so: 11 | 12 | ``` 13 | [¬º-°]¬ bazel build :mqlbin 14 | INFO: Analyzed target //:mqlbin (6 packages loaded, 36 targets configured). 15 | INFO: Found 1 target... 16 | Target //:mqlbin up-to-date: 17 | bazel-out/k8-py2-fastbuild/bin/mqlbin 18 | INFO: Elapsed time: 0.771s, Critical Path: 0.02s 19 | INFO: 0 processes. 20 | INFO: Build completed successfully, 1 total action 21 | ``` 22 | 23 | Then, it can be executed out of the bazel build directory: 24 | 25 | *IMPORTANT!* This will only work if you have properly you need to have an 26 | instance of [graphd](https://github.com/google/graphd) running and it needs to 27 | be properly **bootstrapped** for MQL (see below). 28 | 29 | ``` 30 | [¬º-°]¬ bazel-out/k8-py2-fastbuild/bin/mqlbin --graphd_addr=localhost:8100 --mqlcmd=read '{"id": "/type/object/type", "guid": null}' 31 | 32 | MQLResult(result={'guid': '#d119a8c0400062d1800000000000000c', 'id': '/type/object/type'}, cost=defaultdict(, {'pr': 0.0, 'va': 38742.0, 'tu': 22.0, 'in': 3975.0, 'ir': 0.0, 'tr': 23.0, 'ts': 0.0, 'iw': 0.0, 'te': 26.0, 'mql_utime': 0.047658000000000006, 'mql_dbreqs': 11, 'dw': 0.0, 'tg': 0.030711889266967773, 'tf': 0.04290890693664551, 'pf': 0.0, 'mql_rtime': 1.1784470081329346, 'dr': 5619.0, 'gqr': 0, 'mql_stime': 0.0009940000000000018}), dateline=None, cursor=None) 33 | 34 | ``` 35 | 36 | ## Bootstrapping a graphd for MQL 37 | 38 | PyMQL comes with a graphd bootstrap program that you can use to bootstrap an 39 | empty graphd for use with MQL. The bootstrap program itself writes the set of 40 | core types required for MQL to operate. 41 | 42 | First, ensure you have a graphd running: 43 | 44 | ``` 45 | [¬º-°]¬ git clone https://github.com/google/graphd 46 | Cloning into 'graphd'... 47 | remote: Enumerating objects: 1259, done. 48 | remote: Total 1259 (delta 0), reused 0 (delta 0), pack-reused 1259 49 | Receiving objects: 100% (1259/1259), 2.57 MiB | 14.95 MiB/s, done. 50 | Resolving deltas: 100% (482/482), done. 51 | [¬º-°]¬ cd graphd 52 | [¬º-°]¬ bazel build graphd 53 | ...(graphd builds) 54 | Target //graphd:graphd up-to-date: 55 | bazel-bin/graphd/graphd 56 | INFO: Elapsed time: 29.584s, Critical Path: 0.87s 57 | INFO: 373 processes: 373 linux-sandbox. 58 | INFO: Build completed successfully, 377 total actions 59 | [¬º-°]¬ bazel-bin/graphd/graphd -d /tmp/data-dir -p /tmp/graphd.pid -n 60 | 61 | ``` 62 | 63 | In another terminal, run the bootstrap: 64 | 65 | ``` 66 | [¬º-°]¬ ./bazel-out/k8-py2-fastbuild/bin/bootstrap/bootstrap --load bootstrap/otg.bootstrap 67 | ``` 68 | 69 | The bootstrap takes a few minutes to run and you'll see lots of 70 | `graphd.request.start` and `graphd.request.end` lines. This is normal. 71 | 72 | After this is done, you can run MQL queries via mqlbin. 73 | 74 | ## History 75 | 76 | This code was originally authored by Tim Sturge, then maintained by Warren 77 | Harris after his departure. 78 | 79 | Dime ("2 MQL's") was the implementation written by Warren in OCaml that offered 80 | significant improvements over this initial implementation. However, when Metaweb 81 | was acquired by Google nearing the end of the productionization of Dime, it was 82 | only used partially until Freebase was turned down a few years later. In the 83 | meantime, Warren had gone on to develop other tools used during the early days 84 | of the Knowledge Graph projects at Google. 85 | -------------------------------------------------------------------------------- /log/log.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.6 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Backward compatible support for mql LOG calls.""" 16 | 17 | __author__ = 'bneutra@google.com (Brendan Neutra)' 18 | 19 | import datetime 20 | import os 21 | import socket 22 | from pymql.log import log_util 23 | from pymql.util import dumper 24 | from absl import logging 25 | 26 | __all__ = ['generate_tid', 'LOG', 'pprintlog', 'dumplog'] 27 | 28 | # allow google logging to discover the caller 29 | # i.e. ignore these local functions 30 | skip = [ 31 | '_logit', 'fatal', 'error', 'warn', 'info', 'debug', 'spew', 'exception', 32 | 'warning', 'alert', 'notice', 'log', 'pprintlog', 'dumplog' 33 | ] 34 | 35 | 36 | def _logit(level, s, args=None, kwargs=None): 37 | # let's not waste any cycles 38 | if level > logging.get_verbosity(): 39 | return 40 | msg = '' 41 | if args: 42 | msg += '\t'.join(str(arg) for arg in args) 43 | if kwargs: 44 | msg += '\t'.join('%s=%s' % (pair) for pair in kwargs.iteritems()) 45 | logging.vlog(level, '%s %s' % (s, msg)) 46 | 47 | 48 | class LOG(object): 49 | 50 | @staticmethod 51 | def fatal(s, *args, **kwargs): 52 | _logit(logging.FATAL, s, args, kwargs) 53 | 54 | @staticmethod 55 | def error(s, *args, **kwargs): 56 | _logit(logging.ERROR, s, args, kwargs) 57 | 58 | @staticmethod 59 | def warn(s, *args, **kwargs): 60 | _logit(logging.WARN, s, args, kwargs) 61 | 62 | @staticmethod 63 | def info(s, *args, **kwargs): 64 | _logit(logging.INFO, s, args, kwargs) 65 | 66 | @staticmethod 67 | def debug(s, *args, **kwargs): 68 | _logit(logging.DEBUG, s, args, kwargs) 69 | 70 | @staticmethod 71 | def spew(s, *args, **kwargs): 72 | _logit(log_util.SPEW, s, args, kwargs) 73 | 74 | @staticmethod 75 | def log(level, s, *args, **kwargs): 76 | _logit(level, s, args, kwargs) 77 | 78 | exception = fatal 79 | notice = info 80 | warning = warn 81 | alert = warn 82 | 83 | 84 | def dumplog(string, obj, level=log_util.SPEW): 85 | if level <= logging.get_verbosity(): 86 | LOG.log(level, string, dumper.dumps(obj)) 87 | 88 | 89 | def pprintlog(string, obj, level=log_util.DEBUG, **kwargs): 90 | if level <= logging.get_verbosity(): 91 | LOG.log(level, string, repr(obj)) 92 | 93 | 94 | tid_seqno = 0 95 | hostname = socket.getfqdn() 96 | del socket 97 | pid = os.getpid() 98 | 99 | 100 | def generate_tid(token=None, hostport=None): 101 | global tid_seqno 102 | 103 | # can't determine port without looking at WSGI environ or apache 104 | # config? perhaps we could read this from a config file? 105 | if not hostport: 106 | hostport = '%s:0' % hostname 107 | # hostport could be just a port, we prefix it with hostname then 108 | elif isinstance(hostport, (int, long)): 109 | hostport = '%s:%d' % (hostname, hostport) 110 | elif ':' not in hostport: 111 | hostport = '%s:%s' % (hostname, hostport) 112 | 113 | if not token: 114 | token = 'me' 115 | 116 | # small race condition here 117 | tid_seqno += 1 118 | 119 | return ('%s;%s;%05d;%sZ;%04d' % 120 | (token, hostport, pid, datetime.datetime.utcnow().isoformat('T'), 121 | tid_seqno)) 122 | -------------------------------------------------------------------------------- /test/mql_fixture_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.4 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # -*- coding: utf-8 -*- 17 | # 18 | """test the test fixture.""" 19 | 20 | __author__ = 'bneutra@google.com (Brendan Neutra)' 21 | 22 | import sys 23 | import google3 24 | from pymql.mql import error 25 | from pymql.test import mql_fixture 26 | from google3.pyglib import logging 27 | 28 | 29 | class MQLTest(mql_fixture.MQLTest): 30 | """for testing basic mqlread queries.""" 31 | 32 | def setUp(self): 33 | self.SetMockPath('data/mql_fixture.yaml') 34 | super(MQLTest, self).setUp() 35 | self.env = {'as_of_time': '2009-10-01'} 36 | 37 | def DoQueryException(self, query, expected, **kwargs): 38 | """expect a failure.""" 39 | try: 40 | self.DoQuery(query, **kwargs) 41 | except AssertionError: 42 | msg = str(sys.exc_info()[1]) 43 | if not expected in msg: 44 | self.fail('expected: %s\ngot: %s' % (expected, msg)) 45 | else: 46 | logging.debug('assertion raised, as expected! got: %s', expected) 47 | 48 | def testPositive(self): 49 | query = """ 50 | { 51 | "id": "/en/bob_dylan" 52 | } 53 | """ 54 | exp_response = """ 55 | { 56 | "id": "/en/bob_dylan" 57 | } 58 | """ 59 | self.DoQuery(query, exp_response=exp_response) 60 | 61 | def testUnexpectedResponse(self): 62 | query = """ 63 | { 64 | "id": "/en/bob_dylan" 65 | } 66 | """ 67 | exp_response = """ 68 | { 69 | "id": "/n/bob_dylan" 70 | } 71 | """ 72 | self.DoQueryException( 73 | query, 74 | '!=', 75 | exp_response=exp_response 76 | ) 77 | 78 | def testUnexpectedError(self): 79 | query = """ 80 | { 81 | "invalidkey": "/en/bob_dylan" 82 | } 83 | """ 84 | exp_response = """ 85 | { 86 | "id": "/n/bob_dylan" 87 | } 88 | """ 89 | self.DoQueryException( 90 | query, 91 | 'exception. was not expected', 92 | exp_response=exp_response 93 | ) 94 | 95 | def testExpectError(self): 96 | query = """ 97 | { 98 | "guid": "#9202a8c04000641f8000000003abd178", 99 | "id": "/en/bob_dylan" 100 | } 101 | """ 102 | exc_response = ( 103 | error.MQLParseError, 104 | "Can't specify an id more than once in a single clause" 105 | ) 106 | self.DoQuery(query, exc_response=exc_response) 107 | 108 | def testExpectNoError(self): 109 | query = """ 110 | { 111 | "guid": "#9202a8c04000641f8000000003abd178", 112 | "id": "/en/bob_dylan" 113 | } 114 | """ 115 | self.DoQueryException( 116 | query, 117 | 'exception. was not expected', 118 | exp_response='whatev' 119 | ) 120 | 121 | def testExpectOtherError(self): 122 | query = """ 123 | { 124 | "guid": "#9202a8c04000641f8000000003abd178", 125 | "id": "/en/bob_dylan" 126 | } 127 | """ 128 | self.DoQueryException( 129 | query, 130 | "MQLParseError'> != ", 131 | exc_response=(KeyError, 'whatev') 132 | ) 133 | 134 | if __name__ == '__main__': 135 | mql_fixture.main() 136 | -------------------------------------------------------------------------------- /mql/grquoting.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import re 16 | from xml.sax import saxutils 17 | import urllib 18 | import cgi 19 | 20 | from pymql.error import FormattingError 21 | 22 | ###################################################################### 23 | 24 | # quoting rules 25 | _internal_quoting_rules = [ 26 | ('\"', '\\\"'), 27 | ('\\', '\\\\'), 28 | ('\n', '\\n'), 29 | ] 30 | 31 | _internal_to_quote = dict(_internal_quoting_rules) 32 | _internal_from_quote = dict([(a, b) for b, a in _internal_quoting_rules]) 33 | _internal_from_quote['\''] = '' 34 | _internal_from_quote['\"'] = '' 35 | 36 | # I love REs (aka read it and weep) 37 | re_quoted_string_text = '^\"((?:[^\\\\\"]|\\\\[\\\\\"n])*)\"$' 38 | re_quoted_string_part = '\\\\[\\\\\"n]' 39 | # everything matches this, so we don't test (ie. all unquoted strings are legal) 40 | re_unquoted_string_text = '^(?:[^\\\\\n\"]|([\\\\\n\"]))*$' 41 | re_unquoted_string_part = '[\\\\\n\"]' 42 | 43 | re_qs = re.compile(re_quoted_string_text) 44 | re_qs_part = re.compile(re_quoted_string_part) 45 | re_us_part = re.compile(re_unquoted_string_part) 46 | 47 | 48 | def _internal_quote_sub(m): 49 | return _internal_to_quote[m.group()] 50 | 51 | 52 | def _internal_unquote_sub(m): 53 | return _internal_from_quote[m.group()] 54 | 55 | 56 | def _internal_leading_trailing(m): 57 | return 58 | 59 | 60 | ###################################################################### 61 | 62 | 63 | def quote(string): 64 | return '"' + re_us_part.sub(_internal_quote_sub, string) + '"' 65 | 66 | 67 | def unquote(string): 68 | middlem = re_qs.match(string) 69 | if middlem is None: 70 | raise FormattingError('Badly formatted quoted string %s ' % string) 71 | return re_qs_part.sub(_internal_unquote_sub, middlem.group(1)) 72 | 73 | 74 | ###################################################################### 75 | 76 | # 77 | # html escaping 78 | # url escaping 79 | # 80 | # originally from mw/client/escaping.py 81 | # 82 | 83 | 84 | def escapeAttribute(data): 85 | """ 86 | Prepares data to be used as an attribute value. The return value 87 | is a quoted version of data. The resulting string can be used 88 | directly as an attribute value: 89 | >>> print "" % quoteattr("ab ' cd \" ef") 90 | 91 | """ 92 | return (saxutils.quoteattr(data)) 93 | 94 | 95 | def escapeUrl(data): 96 | """ 97 | Replace special characters in string using the "%xx" 98 | escape. Letters, digits, and the characters "/_.-" are never 99 | escaped. 100 | """ 101 | return (urllib.quote(data)) 102 | 103 | 104 | def escapeMarkup(data): 105 | """ 106 | Convert the characters "&", "<" and ">" in data to HTML-safe 107 | sequences. 108 | """ 109 | return (cgi.escape(data)) 110 | 111 | 112 | ###################################################################### 113 | 114 | if __name__ == '__main__': 115 | print quote("\n\r\t\"\\foo\\\"") # result is "\n\r\t\"\\foo\\\"" (duh) 116 | print unquote( 117 | "\"foo\\n\\\"\\\\\"" 118 | ) # result is foo"\ -- note that python sees "foo\n\"\\" 119 | print unquote( 120 | "\"foo\\\"\\\"") # should die with an "illegal quoted string" exception 121 | -------------------------------------------------------------------------------- /emql/adapters/text.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from itertools import izip, chain 17 | 18 | from mw.emql.adapter import Adapter 19 | from mw.emql.emql import id_guid, formatted_id_guid 20 | 21 | 22 | class text_adapter(Adapter): 23 | 24 | def pre(self, tid, graph, mql, me, control, parent, params, api_keys): 25 | 26 | return {"/common/document/content": 27 | {"optional": True, "blob_id": None, "media_type": None}, 28 | "/common/document/source_uri": None, 29 | "guid": None} 30 | 31 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys): 32 | 33 | params = params.get('query') 34 | results = {} 35 | 36 | for mqlres in args: 37 | guid = mqlres['guid'] 38 | content = mqlres["/common/document/content"] 39 | if content is not None: 40 | mediatype = content["media_type"] 41 | if mediatype and mediatype.startswith("/media_type/text"): 42 | blob_id = content["blob_id"] 43 | if blob_id: 44 | chars = me.get_session().fetch_blob(tid, blob_id) 45 | try: 46 | chars = unicode(chars, 'utf-8') 47 | except: 48 | pass 49 | 50 | if params is None: 51 | results[guid] = chars 52 | else: 53 | results[guid] = result = params.copy() 54 | if 'maxlength' in result: 55 | chars = chars[:result['maxlength']] 56 | if 'chars' in result: 57 | result['chars'] = chars 58 | if 'length' in result: 59 | result['length'] = len(chars) 60 | 61 | elif mqlres["/common/document/source_uri"] is not None: 62 | if params is None: 63 | maxlength = None 64 | mode = 'blurb' 65 | else: 66 | maxlength = params.get('maxlength') 67 | mode = params.get('mode', 'blurb') 68 | if mode not in ('blurb', 'raw'): 69 | raise ValueError, "invalid mode: '%s'" %(mode) 70 | 71 | query = '/guid/%s' %(guid[1:]) 72 | if maxlength: 73 | query += '?maxlength=%d' %(maxlength) 74 | 75 | url, connection = me.get_session().http_connect('api.freebase.com', '/api/trans/%s' %(mode) + query) 76 | connection.request('GET', url) 77 | response = connection.getresponse() 78 | chars = response.read() 79 | 80 | if params is None: 81 | results[guid] = chars 82 | else: 83 | results[guid] = result = params.copy() 84 | if 'chars' in result: 85 | result['chars'] = chars 86 | if 'length' in result: 87 | result['length'] = len(chars) 88 | 89 | return results 90 | 91 | def help(self, tid, graph, mql, me, control, params): 92 | from docs import text_adapter_help 93 | 94 | return 'text/x-rst;', text_adapter_help 95 | -------------------------------------------------------------------------------- /test/BUILD: -------------------------------------------------------------------------------- 1 | # Author: bneutra@google.com (Brendan Neutra) 2 | # 3 | # Description: mql query language tests. 4 | # 5 | 6 | package(default_visibility = ["//visibility:public"]) 7 | 8 | licenses(["unencumbered"]) # Google acquisition 9 | 10 | exports_files(["LICENSE"]) 11 | 12 | py_library( 13 | name = "testing_deps", 14 | testonly = 1, 15 | srcs = [ 16 | "mql_fixture.py", 17 | ], 18 | data = [ 19 | ":config.cfg", 20 | ] + glob([ 21 | "data/*.yaml", 22 | ]), 23 | deps = [ 24 | "//base", 25 | "//loadbalancer/gslb/client/public:pywrapgslbchannel", 26 | "//metaweb/graphd/server:graphd_py_pb2", 27 | "//pyglib", 28 | "//testing/pybase", 29 | "//third_party/py/pymql:mql", 30 | "//third_party/py/simplejson:simplejson_fast", 31 | "//third_party/py/yaml", 32 | ], 33 | ) 34 | 35 | py_test( 36 | name = "type_link_test", 37 | size = "large", 38 | srcs = [ 39 | "type_link_test.py", 40 | ], 41 | deps = [ 42 | ":testing_deps", 43 | ], 44 | ) 45 | 46 | py_test( 47 | name = "cost_test", 48 | size = "large", 49 | srcs = [ 50 | "cost_test.py", 51 | ], 52 | deps = [ 53 | ":testing_deps", 54 | ], 55 | ) 56 | 57 | py_test( 58 | name = "sort_test", 59 | size = "large", 60 | srcs = [ 61 | "sort_test.py", 62 | ], 63 | deps = [ 64 | ":testing_deps", 65 | ], 66 | ) 67 | 68 | py_test( 69 | name = "basic_mql_test", 70 | size = "large", 71 | srcs = [ 72 | "basic_mql_test.py", 73 | ], 74 | deps = [ 75 | ":testing_deps", 76 | ], 77 | ) 78 | 79 | py_test( 80 | name = "mids_test", 81 | size = "large", 82 | srcs = [ 83 | "mids_test.py", 84 | ], 85 | deps = [ 86 | ":testing_deps", 87 | ], 88 | ) 89 | 90 | py_test( 91 | name = "best_hrid_test", 92 | size = "medium", 93 | srcs = [ 94 | "best_hrid_test.py", 95 | ], 96 | deps = [ 97 | ":testing_deps", 98 | ], 99 | ) 100 | 101 | py_test( 102 | name = "mql_fixture_test", 103 | size = "large", 104 | srcs = [ 105 | "mql_fixture_test.py", 106 | ], 107 | deps = [ 108 | ":testing_deps", 109 | ], 110 | ) 111 | 112 | py_test( 113 | name = "regression_id_test", 114 | size = "large", 115 | srcs = [ 116 | "regression_id_test.py", 117 | ], 118 | deps = [ 119 | ":testing_deps", 120 | ], 121 | ) 122 | 123 | py_test( 124 | name = "regression_misc_test", 125 | size = "large", 126 | srcs = [ 127 | "regression_misc_test.py", 128 | ], 129 | deps = [ 130 | ":testing_deps", 131 | ], 132 | ) 133 | 134 | py_test( 135 | name = "mql_manual_test", 136 | size = "large", 137 | srcs = [ 138 | "mql_manual_test.py", 139 | ], 140 | deps = [ 141 | ":testing_deps", 142 | ], 143 | ) 144 | 145 | py_test( 146 | name = "mql_manual_two_test", 147 | size = "large", 148 | srcs = [ 149 | "mql_manual_two_test.py", 150 | ], 151 | deps = [ 152 | ":testing_deps", 153 | ], 154 | ) 155 | 156 | py_test( 157 | name = "mql_manual_write_test", 158 | size = "large", 159 | srcs = [ 160 | "mql_manual_write_test.py", 161 | ], 162 | deps = [ 163 | ":testing_deps", 164 | ], 165 | ) 166 | 167 | py_test( 168 | name = "return_test", 169 | size = "large", 170 | srcs = [ 171 | "return_test.py", 172 | ], 173 | deps = [ 174 | ":testing_deps", 175 | ], 176 | ) 177 | 178 | py_test( 179 | name = "mql_exceptions_test", 180 | size = "large", 181 | srcs = [ 182 | "mql_exceptions_test.py", 183 | ], 184 | deps = [ 185 | ":testing_deps", 186 | ], 187 | ) 188 | 189 | py_test( 190 | name = "query_sort_test", 191 | size = "small", 192 | srcs = [ 193 | "query_sort_test.py", 194 | ], 195 | deps = [ 196 | ":testing_deps", 197 | ], 198 | ) 199 | -------------------------------------------------------------------------------- /bootstrap/bootstrap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """bootstrap -- dump and load a bootstrap from an existing graph.""" 3 | 4 | import json 5 | import re 6 | import sys 7 | 8 | from absl import app 9 | from absl import flags 10 | from absl import logging 11 | 12 | from pymql import MQLService 13 | 14 | from pymql.mql import graph 15 | from pymql.mql import lojson 16 | 17 | FLAGS = flags.FLAGS 18 | flags.DEFINE_string( 19 | 'mqlenv', None, 'a dict in the form of a string which ' 20 | 'contains valid mql env key/val pairs') 21 | flags.DEFINE_string('graphd_addr', 'localhost:9100', 22 | 'host:port of graphd server') 23 | flags.DEFINE_string('load', '', 'load bootstrap from given file') 24 | 25 | 26 | class BootstrapError(Exception): 27 | pass 28 | 29 | 30 | class Bootstrap(object): 31 | version = 1 32 | 33 | def __init__(self, gc): 34 | self.gc = gc 35 | 36 | def load_from_file(self, filename): 37 | loadfile = open(filename, 'r') 38 | data = ''.join(loadfile.readlines()) 39 | regex = re.compile('[\n\t]+') 40 | data = regex.sub(' ', data) 41 | loadfile.close() 42 | 43 | d = json.loads(data) 44 | if d['0_version'] != self.version: 45 | raise BootstrapError('version mismatch') 46 | 47 | self.bootstrap = d['1_bootstrap'] 48 | self.nodes = d['2_nodes'] 49 | self.links = d['3_links'] 50 | 51 | def mkprim(self, **kwds): 52 | if 'scope' not in kwds and self.root_user: 53 | kwds['scope'] = self.root_user 54 | params = ' '.join(['%s=%s' % (k, v) for (k, v) in kwds.items()]) 55 | result = self.gc.write_varenv('(%s)' % params, {}) 56 | return result[0] 57 | 58 | def load_bootstrap(self): 59 | self.xlate = {} 60 | self.xlate_link = {} 61 | 62 | if len(self.gc.read_varenv('(pagesize=1 result=(guid))', {})): 63 | logging.fatal("Can't bootstrap a non-empty graph") 64 | 65 | self.root_user = None # avoid forward ref in mkprim 66 | self.root_user = self.mkprim(name='"ROOT_USER"') 67 | self.root_namespace = self.mkprim(name='"ROOT_NAMESPACE"') 68 | self.has_key = self.mkprim(name='"HAS_KEY"') 69 | 70 | self.xlate[self.bootstrap['ROOT_USER']] = self.root_user 71 | self.xlate[self.bootstrap['ROOT_NAMESPACE']] = self.root_namespace 72 | self.xlate[self.bootstrap['HAS_KEY']] = self.has_key 73 | 74 | def load_root_user(self): 75 | # we dumped them separately, but we want to load them together... 76 | node_pos = 0 77 | link_pos = 0 78 | while node_pos < len(self.nodes) or link_pos < len(self.links): 79 | if link_pos >= len( 80 | self.links) or (node_pos < len(self.nodes) and 81 | self.nodes[node_pos] < self.links[link_pos]['guid']): 82 | # we will do the next node 83 | node = self.nodes[node_pos] 84 | self.write_node(node) 85 | node_pos += 1 86 | else: 87 | link = self.links[link_pos] 88 | self.write_link(link) 89 | link_pos += 1 90 | 91 | def write_node(self, node): 92 | if node not in self.xlate: 93 | self.xlate[node] = self.mkprim() 94 | 95 | def write_link(self, link): 96 | new_link = {'datatype': link['datatype'], 'value': link['value']} 97 | for ptr in ('left', 'right', 'scope', 'typeguid'): 98 | # translate the link 99 | if ptr in link: 100 | if link[ptr] == 'null': 101 | new_link[ptr] = 'null' 102 | elif link[ptr] not in self.xlate: 103 | raise BootstrapError('Saw dangling link %s' % repr(link)) 104 | else: 105 | new_link[ptr] = self.xlate[link[ptr]] 106 | new_link['guid'] = self.mkprim(**new_link) 107 | 108 | self.xlate_link[link['guid']] = new_link 109 | 110 | 111 | def main(argv): 112 | if not FLAGS.graphd_addr: 113 | raise Exception('Must specify a --graphd_addr') 114 | 115 | conn = graph.TcpGraphConnector(addrs=[('localhost', 8100)]) 116 | 117 | bootstrap = Bootstrap(conn) 118 | bootstrap.load_from_file(FLAGS.load) 119 | bootstrap.load_bootstrap() 120 | bootstrap.load_root_user() 121 | 122 | 123 | if __name__ == '__main__': 124 | app.run(main) 125 | -------------------------------------------------------------------------------- /util/keyquote.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import string 16 | from pymql.mql import error 17 | 18 | def quotekey(ustr): 19 | """ 20 | quote a unicode string to turn it into a valid namespace key 21 | 22 | """ 23 | valid_always = string.ascii_letters + string.digits + '_' 24 | valid_interior_only = valid_always + '-' 25 | 26 | if isinstance(ustr, str): 27 | s = unicode(ustr,'utf-8') 28 | elif isinstance(ustr, unicode): 29 | s = ustr 30 | else: 31 | raise ValueError, 'quotekey() expects utf-8 string or unicode' 32 | 33 | if len(s) == 0: 34 | return str(s) 35 | 36 | output = [] 37 | if s[0] in valid_always: 38 | output.append(s[0]) 39 | else: 40 | output.append('$%04X' % ord(s[0])) 41 | 42 | for c in s[1:-1]: 43 | if c in valid_interior_only: 44 | output.append(c) 45 | else: 46 | output.append('$%04X' % ord(c)) 47 | 48 | if len(s) > 1: 49 | if s[-1] in valid_always: 50 | output.append(s[-1]) 51 | else: 52 | output.append('$%04X' % ord(s[-1])) 53 | 54 | return str(''.join(output)) 55 | 56 | 57 | def unquotekey(key, encoding=None): 58 | """ 59 | unquote a namespace key and turn it into a unicode string 60 | """ 61 | 62 | valid_always = string.ascii_letters + string.digits + "_" 63 | 64 | output = [] 65 | i = 0 66 | while i < len(key): 67 | if key[i] in valid_always: 68 | output.append(key[i]) 69 | i += 1 70 | elif key[i] in '_-' and i != 0 and i != len(key): 71 | output.append(key[i]) 72 | i += 1 73 | elif key[i] == '$' and i+4 < len(key): 74 | # may raise ValueError if there are invalid characters 75 | output.append(unichr(int(key[i+1:i+5],16))) 76 | i += 5 77 | else: 78 | msg = "key %s has invalid character %s at position %d" % ( 79 | key, 80 | key[i], 81 | i 82 | ) 83 | raise error.MQLInternalError(None, msg) 84 | 85 | ustr = u''.join(output) 86 | 87 | if encoding is None: 88 | return ustr 89 | 90 | return ustr.encode(encoding) 91 | 92 | 93 | def unquote_id(id): 94 | """ 95 | Turn an id into a user-readable string, for instance turning 96 | /media_type/application/rss$002Bxml into 97 | /media_type/application/rss+xml 98 | """ 99 | 100 | if '/' not in id: 101 | return unquotekey(id) 102 | 103 | return '/'.join(unquotekey(k) for k in id.split('/')) 104 | 105 | def id_to_urlid(id): 106 | """ 107 | convert a mql id to an id suitable for embedding in a url path. 108 | """ 109 | 110 | # XXX shouldn't be in metaweb.api! 111 | from mw.formats.http import urlencode_pathseg 112 | 113 | segs = id.split('/') 114 | 115 | assert isinstance(id, str) and id != '', 'bad id "%s"' % id 116 | 117 | if id[0] == '~': 118 | assert len(segs) == 1 119 | # assume valid, should check 120 | return id 121 | 122 | if id[0] == '#': 123 | assert len(segs) == 1 124 | # assume valid, should check 125 | return '%23' + id[1:] 126 | 127 | if id[0] != '/': 128 | raise ValueError, 'unknown id format %s' % id 129 | 130 | # ok, we have a slash-path 131 | # requote components as keys and rejoin. 132 | # urlids do not have leading slashes!!! 133 | return '/'.join(urlencode_pathseg(unquotekey(seg)) for seg in segs[1:]) 134 | 135 | -------------------------------------------------------------------------------- /util/mwdatetime.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import datetime 16 | import re 17 | 18 | 19 | # a datetime is a non-empty string containing one of 20 | # yyyy, yyyy-mm, yyyy-mm-dd, 21 | # Thh, Thh:mm, Thh:mm:ss Thh:mm:ss.dddd 22 | # or yyyy-mm-dd followed by one of the T constructs. 23 | # Note that this is more lenient than valid_timestamp in lojson - it matches the @timestamp clause only, 24 | # not our extended ISO 8601 syntax 25 | 26 | 27 | # Python datetime classes support only a year range between MINYEAR (1) and MAXYEAR(9999) 28 | # we want to support anything from -9999 (== 10000BC) to 9999 (== 9999AD) 29 | # and possibly support more in the future. 30 | 31 | # and some other useful methods: 32 | __datetime_re = re.compile(r'^(?:(?:(-?\d{4})(?:-(\d\d)(?:-(\d\d))?)?)|(?:(-?\d{4})-(\d\d)-(\d\d)T)?(\d\d)(?:\:(\d\d)(?:\:(\d\d)(?:\.(\d{1,6}))?)?)?(Z|[-+](?:0\d|1[0-4])\:(00|15|30|45))?)$') 33 | 34 | # returns the graph format datetime (like ISO except for a leading T on times) 35 | def coerce_datetime(dt): 36 | try: 37 | if dt == '__now__': 38 | return datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") 39 | if dt == '__today__': 40 | return datetime.datetime.utcnow().strftime("%Y-%m-%d") 41 | 42 | match = __datetime_re.match(dt) 43 | if not match: 44 | return None 45 | elif match.group(1): 46 | if check_date(*match.group(1,2,3)): 47 | return dt 48 | else: 49 | return None 50 | elif match.group(4): 51 | # a date/time 52 | # we don't do subseconds as python thinks that '2' is "2 microseconds" not "2 deciseconds". 53 | if not check_date(*match.group(4,5,6)): 54 | return None 55 | if not check_time(*match.group(7,8,9)): 56 | return None 57 | 58 | return dt 59 | 60 | elif match.group(7): 61 | if not check_time(*match.group(7,8,9)): 62 | return None 63 | 64 | return 'T' + dt 65 | else: 66 | # no idea what the problem is, but it is invalid 67 | return None 68 | 69 | except TypeError: 70 | return None 71 | except ValueError: 72 | return None 73 | 74 | def check_date(year,month,day): 75 | # returns true or false depending on whether the day is valid 76 | # handles strings and nulls 77 | fakeyear = int(year) 78 | if int(fakeyear) > 9999 or int(fakeyear) < -9999: 79 | return False 80 | 81 | if month is None: 82 | return True 83 | elif int(month) < 1 or int(month) > 12: 84 | return False 85 | elif day is None: 86 | return True 87 | else: 88 | while fakeyear <= 0: 89 | fakeyear += 8000 90 | 91 | try: 92 | datetime.date(fakeyear,int(month),int(day)) 93 | return True 94 | except ValueError: 95 | return False 96 | 97 | def check_time(hour,minute,second): 98 | if hour is None: 99 | return False 100 | elif int(hour) < 0 or int(hour) > 23: 101 | return False 102 | elif minute is None: 103 | return True 104 | elif int(minute) < 0 or int(minute) > 59: 105 | return False 106 | elif second is None: 107 | return True 108 | elif int(second) < 0 or int(second) > 59: 109 | return False 110 | else: 111 | return True 112 | 113 | 114 | def uncoerce_datetime(graphdt): 115 | if graphdt[0] == 'T': 116 | return graphdt[1:] 117 | else: 118 | return graphdt 119 | -------------------------------------------------------------------------------- /emql/adapters/test/test_weblink.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from mw.tests.helpers import TestFixture 16 | from mw.emql import emql 17 | null = None 18 | true = True 19 | false = False 20 | WEBLINK = "/common/topic/weblink" 21 | class TestWeblinks_adapter(TestFixture): 22 | 23 | def setUp(self): 24 | super(TestWeblinks_adapter, self).setUp() 25 | self.cache = emql.emql_cache() 26 | 27 | 28 | def run_query(self, q): 29 | debug, cursors, results = self.mss.emqlread(None, q, {'debug': True, 'cache': False}, 30 | cache=self.cache) 31 | return results 32 | 33 | 34 | def test_bob_dylan(self): 35 | 36 | r = self.run_query({ 37 | "id":"/en/bob_dylan", 38 | WEBLINK:[] 39 | }) 40 | 41 | weblinks = r[WEBLINK] 42 | self.assert_(weblinks, "Basic sanity test - make sure there are some weblinks returning which indiciate that at least emql is working and that the weblinks adapter is returning results.") 43 | 44 | #XXXXXX UNCOMMENT AFTER https://bugs.freebase.com/browse/DA-1093 ###### 45 | 46 | #self.assert_("http://www.bobdylan.com/" in weblinks, "Test a key hanging off of a resource") 47 | 48 | self.assert_("http://en.wikipedia.org/wiki/Bob_Dylan" in weblinks, "Test a key hanging off a topic") 49 | 50 | 51 | def test_list_shape(self): 52 | """ 53 | Let's test to make sure weblink works with just a [] shape, in which case it should 54 | just return a list of strings 55 | """ 56 | 57 | r = self.run_query({ 58 | "id":"/en/migraine", 59 | "/common/topic/weblink":[] 60 | }) 61 | 62 | weblinks = r[WEBLINK] 63 | self.assert_(len(weblinks), "there should be some weblinks in here!") 64 | 65 | for w in weblinks: 66 | self.assert_(isinstance(w, str)) 67 | 68 | def test_topic_with_all_types_of_weblinks(self): 69 | """ 70 | This particular topic has a weblinks generated from keys in all three 71 | places - off the topic, off the annotation, off the resource 72 | """ 73 | q = { 74 | "id": "/en/royal_mail", 75 | WEBLINK: [{ 76 | "url":null, 77 | "template":{ 78 | "id":null, 79 | "template":null, 80 | "ns":null 81 | }, 82 | "category":{ 83 | "id":null, 84 | "name":null, 85 | "optional":true 86 | }, 87 | "key":null 88 | }] 89 | } 90 | r = self.run_query(q) 91 | 92 | weblink_dict = {} 93 | for w in r[WEBLINK]: 94 | weblink_dict[w['url']] = w 95 | 96 | official_link = weblink_dict.get("http://www.royalmailgroup.com/") 97 | self.assert_(official_link, "The official link for royal mail is present. Key Hangs off resource.") 98 | self.assert_(official_link['category']['name'] == "Official Website", "Official Website category is....Official Website") 99 | 100 | 101 | guardian_link = weblink_dict.get("http://www.guardian.co.uk//uk/post") 102 | self.assert_(guardian_link, "Guardian link is present. Key hangs off annotation.") 103 | self.assert_(guardian_link['category']['name'] == "Tag", "Category is Tag") 104 | 105 | wiki_link = weblink_dict.get("http://en.wikipedia.org/wiki/index.html?curid=349823") 106 | self.assert_(wiki_link, "Wiki link is present. Key hangs off topic itself.") 107 | 108 | 109 | -------------------------------------------------------------------------------- /mql/mid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # mid.py - machine ids. 17 | 18 | from cStringIO import StringIO 19 | import sys 20 | 21 | ################################################################################ 22 | ## version 1 constants 23 | VERSION = 1L 24 | MAX_BITS = 40 25 | VERSION_BITS = 2 26 | OBJID_BITS = 34 27 | GRAPHID_BITS = MAX_BITS - VERSION_BITS - OBJID_BITS 28 | GRAPHID0 = 0x9202a8c04000641f 29 | GUID_BASE = 0x8000000000000000L 30 | 31 | VERSION_MASK = (1L << VERSION_BITS) - 1L 32 | MAX_GRAPHS = 1L << GRAPHID_BITS 33 | GRAPHID_MASK = MAX_GRAPHS - 1L 34 | OBJID_MASK = (1L << OBJID_BITS) - 1L 35 | 36 | VERSION_LEFT = (VERSION - 1) << 38L 37 | VERSION_RIGHT = (VERSION - 1) << 3L 38 | 39 | ################################################################################ 40 | ## Exceptions 41 | 42 | 43 | class InvalidMunch(Exception): 44 | pass 45 | 46 | 47 | class InvalidGraphID(Exception): 48 | pass 49 | 50 | 51 | class UnknownGraphID(Exception): 52 | pass 53 | 54 | 55 | class InvalidMIDVersion(Exception): 56 | pass 57 | 58 | 59 | class InvalidMID(Exception): 60 | pass 61 | 62 | 63 | class InvalidObjID(Exception): 64 | pass 65 | 66 | 67 | munch_map = [-1] * 256 68 | for i, c in enumerate("0123456789bcdfghjklmnpqrstvwxyz_"): 69 | munch_map[ord(c)] = long(i) 70 | 71 | 72 | ## a Munch (copyright W. Harris, 2010) is 5 bits. 73 | def char_of_munch(c): 74 | if not 0 <= c <= 31: 75 | raise InvalidMunch(c) 76 | return "0123456789bcdfghjklmnpqrstvwxyz_"[c] 77 | 78 | 79 | def munch_of_char(c): 80 | value = munch_map[ord(c)] 81 | if value == -1: 82 | raise InvalidMunch(c) 83 | return value 84 | 85 | 86 | def munchstr_of_int(n): 87 | buf = [""] * 16 #.... 88 | 89 | def loop(i, n): 90 | if n == 0: 91 | return "".join(buf[16 - i:]) 92 | buf[15 - i] = char_of_munch(n & 0x1f) 93 | return loop(i + 1, n >> 5) 94 | 95 | return loop(0, n) 96 | 97 | 98 | def int_of_munchstr(str, ofs, l): 99 | rv = 0 100 | i = ofs 101 | while i <= (ofs + l) - 1: 102 | v = munch_of_char(str[i]) 103 | rv = rv << 5 | v 104 | i += 1 105 | 106 | return rv 107 | 108 | 109 | def graphid_of_guid(guid): 110 | graphid = long(guid[:16], 16) 111 | ms_crap = long(guid[16:24], 16) & 0xfffffffc 112 | n = graphid - GRAPHID0 113 | if 0 <= n < MAX_GRAPHS and ms_crap == 0x80000000: 114 | return n 115 | else: 116 | raise UnknownGraphID(n) 117 | 118 | 119 | def objid_of_guid(guid): 120 | return long(guid[23:32], 16) & OBJID_MASK 121 | 122 | 123 | def of_guid(guid): 124 | graphid = graphid_of_guid(guid) 125 | objid = objid_of_guid(guid) 126 | n = VERSION_LEFT | graphid << 34 | objid 127 | version_munch = VERSION_RIGHT << 3 | graphid 128 | version_str = char_of_munch(version_munch) 129 | return "".join(("/m/", version_str, munchstr_of_int(n))) 130 | 131 | 132 | def to_guid(mid): 133 | len_mid = len(mid) 134 | if not (4 <= len_mid <= 11 or mid.startswith("/m")): 135 | raise InvalidMID(mid) 136 | 137 | version_munch = munch_of_char(mid[3]) 138 | ver = (version_munch << 3) + 1 139 | if ver != VERSION: 140 | raise InvalidMIDVersion(mid) 141 | 142 | graphid = GRAPHID0 | version_munch & GRAPHID_MASK 143 | graphid = graphid << 64 144 | objid = GUID_BASE | int_of_munchstr(mid, 4L, len_mid - 4) 145 | guid = graphid | objid 146 | return hex(guid)[2:-1] # chop off 0x and L 147 | 148 | 149 | if __name__ == "__main__": 150 | #o_guid = "9202a8c04000641f800000000172fcb8" 151 | #o_guid = "9202a8c04000641f800000000164382e" 152 | #o_guid = "9202a8c04000641f800000000172fcb8" 153 | o_guid = "9202a8c04000641f80000000013e068e" 154 | 155 | if len(sys.argv) < 2: 156 | print "usage: mid.py " 157 | sys.exit(1) 158 | 159 | mid = sys.argv[1] 160 | print to_guid(mid) 161 | #mid = of_guid(o_guid) 162 | #print mid 163 | #n_guid = to_guid(mid) 164 | #print n_guid 165 | -------------------------------------------------------------------------------- /emql/adapters/nytimes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import mw, urllib, urlparse 16 | from datetime import datetime 17 | from collections import defaultdict 18 | from mw.emql.adapter import Adapter 19 | from lxml import etree 20 | 21 | class nytimes_articles_adapter(Adapter): 22 | 23 | def pre(self, tid, graph, mql, me, control, parent, params, api_keys): 24 | return { 25 | "key": [{ 26 | "optional": True, 27 | "value": None, 28 | "namespace" : "/user/jamie/nytdataid", 29 | "limit": 10 30 | }] 31 | } 32 | 33 | def get_articles(self, me, nytd_key, api_keys): 34 | url, connection = me.get_session().http_connect('data.nytimes.com', "/%s.rdf" % nytd_key) 35 | connection.request('GET', url) 36 | response = connection.getresponse() 37 | rdf = response.read() 38 | rdf = etree.fromstring(rdf) 39 | 40 | # Grab the search api call 41 | search_url = rdf.xpath("//nyt:search_api_query", namespaces=rdf.nsmap) 42 | if not search_url: 43 | return [] 44 | 45 | search_url = urlparse.urlparse(search_url[0].text) 46 | params = urlparse.parse_qs(search_url.query) 47 | params['api-key'] = api_keys['nytimes_articles'] 48 | params['fields'] = ','.join([ 49 | 'date', 50 | 'url', 51 | 'nytd_lead_paragraph', 52 | 'nytd_title', 53 | 'byline', 54 | 'nytd_byline', 55 | 'small_image_url', 56 | 'small_image_height', 57 | 'small_image_width', 58 | 'source_facet' 59 | ]) 60 | 61 | # build the actual query 62 | url, connection = me.get_session().http_connect(search_url.hostname, search_url.path) 63 | qs = urllib.urlencode(params, doseq=True) 64 | connection.request('GET', "%s?%s" % (url, qs)) 65 | 66 | response = connection.getresponse() 67 | json = mw.json.loads(response.read()) 68 | 69 | json = [{ 70 | 'headline': j['nytd_title'], 71 | 'text': j['nytd_lead_paragraph'], 72 | 'byline': j.get('nytd_byline', j.get('byline', None)), 73 | 'source': j.get('source_facet', None), 74 | 'date': datetime.strptime(j['date'], '%Y%m%d').isoformat(), 75 | 'img': ({'url': j['small_image_url'], 76 | 'height': j.get('small_image_height') or None, 77 | 'width': j.get('small_image_widget') or None} 78 | if j.get('small_image_url') 79 | else None), 80 | 'url': j['url'] 81 | } for j in json['results']] 82 | 83 | return json 84 | 85 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys): 86 | result = defaultdict(list) 87 | query = params['query'] or {} 88 | 89 | if isinstance(query, list): 90 | query = query[0] 91 | 92 | limit = query.get('limit', 5) 93 | 94 | if not (api_keys and api_keys.get('nytimes_articles')): 95 | raise Exception('This property requires a New York Times API key. ' 96 | 'Get one here: http://developer.nytimes.com/apps/register') 97 | 98 | for mqlres in args: 99 | if not mqlres['key']: 100 | continue 101 | 102 | for key in mqlres['key']: 103 | articles = self.get_articles(me, key['value'], api_keys) 104 | result[mqlres['guid']].extend(articles) 105 | 106 | return dict((k, v[:limit]) for k,v in result.iteritems()) 107 | 108 | def help(self, tid, graph, mql, me, control, params): 109 | from docs import nytimes_adapter_help 110 | 111 | return 'text/x-rst;', nytimes_adapter_help 112 | 113 | 114 | -------------------------------------------------------------------------------- /util/parsedt.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | # imported from Client 17 | 18 | # given a graphd datetime string (iso6801 format) 19 | # parse it and format it 20 | 21 | import re, datetime 22 | 23 | 24 | ISO8601_TIME_PATTERN = r"(?P[0-9]{2})(:(?P[0-9]{2}))?(:(?P[0-9]{2})(.(?P[0-9]+))?)?" 25 | 26 | ISO8601_TIME_REGEX = re.compile(ISO8601_TIME_PATTERN) 27 | 28 | ISO8601_REGEX = \ 29 | re.compile(r"(?P-)?(?P[0-9]{4})(-(?P[0-9]{1,2})(-(?P[0-9]{1,2})" 30 | r"((?P.)" + ISO8601_TIME_PATTERN + 31 | r"(?PZ|(([-+])([0-9]{2}):([0-9]{2})))?)?)?)?") 32 | 33 | LABELS = ('year', 'month', 'day', 'hour', 'minute', 'second') 34 | 35 | # This essentially maps the number of date components to a format, 36 | # Especially annoying: these can't be unicode, strftime doesn't like that 37 | FORMATS = [ 38 | "%Y", # year only 39 | "%b %Y", # year, month 40 | "%b %e, %Y", # year, month, day 41 | "%b %e, %Y %l%p", # year, month, day, hour 42 | "%b %e, %Y %l:%M%p", # year, month, day, hour, minute 43 | "%b %e, %Y %l:%M:%S%p", # year, month, day, hour, minute, second 44 | ] 45 | BC_FORMATS = [format.replace("%Y", "%Y B.C.E.") for format in FORMATS] 46 | CE_FORMATS = [format.replace("%Y", "%Y C.E.") for format in FORMATS] 47 | 48 | 49 | def parse_isodate(iso_date): 50 | """ 51 | Given an iso8601-formatted string (or fraction thereof) return a 52 | tuple containing a python datetime object and a format string that 53 | should be used to display it. The format is passible to strftime() 54 | and should be locale-sensitive about ordering (though today it is 55 | not) 56 | """ 57 | 58 | m = ISO8601_REGEX.match(iso_date) 59 | if not m: 60 | m = ISO8601_TIME_REGEX.match(iso_date) 61 | if not m: # bad data in the graph 62 | return None, None 63 | time_only = True 64 | else: 65 | time_only = False 66 | 67 | values = m.groupdict() 68 | 69 | args = [] 70 | if time_only: 71 | today = datetime.date.today() 72 | args = [today.year, today.month, today.day] 73 | start = 3 74 | else: 75 | start = 0 76 | 77 | count = start 78 | for k in xrange(start, 6): 79 | value = values[LABELS[k]] 80 | if value is None: 81 | args.append(1) 82 | else: 83 | count += 1 84 | args.append(int(value)) 85 | 86 | try: 87 | d = datetime.datetime(*args) 88 | except ValueError: 89 | return None, None 90 | 91 | if values.get('bc'): 92 | format = BC_FORMATS[count - 1] 93 | elif 0 <= d.year < 1000: 94 | format = CE_FORMATS[count - 1] 95 | else: 96 | format = FORMATS[count - 1] 97 | if time_only: 98 | format = format[10:] 99 | 100 | if iso_date.endswith('Z'): 101 | format += ' UTC' 102 | 103 | return d, format 104 | 105 | 106 | def format_isodate(iso_date): 107 | """ 108 | Given an iso8601 formatted string (or fraction thereof) return 109 | a timezone-independent display of the string. 110 | """ 111 | 112 | d, format = parse_isodate(iso_date) 113 | if d is None: 114 | return None 115 | 116 | if d.year >= 1900: 117 | result = d.strftime(format) 118 | else: 119 | # make sure to pick something that is a leapyear, so that 120 | # 29-Feb is available! Note that 1900 is NOT a leapyear 121 | d_1904 = d.replace(year=1904) 122 | result = d_1904.strftime(format).replace("1904", str(d.year)) 123 | 124 | if format.endswith("%p"): 125 | result = result[:-2] + result[-2:].lower() 126 | 127 | return result.replace(" ", " ").lstrip() 128 | 129 | 130 | if __name__ == "__main__": 131 | import sys 132 | print format_isodate(sys.argv[1]) 133 | -------------------------------------------------------------------------------- /test/cost_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.6 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # -*- coding: utf-8 -*- 17 | # 18 | """mql cost tests.""" 19 | 20 | __author__ = 'bneutra@google.com (Brendan Neutra)' 21 | 22 | import google3 23 | from pymql.mql import error 24 | from pymql.test import mql_fixture 25 | 26 | # stuff we care about 27 | FLOAT_COSTS = ['mql_stime', 28 | 'mql_stime', 29 | 'mql_utime', 30 | 'mql_rtime', 31 | 'mql_dbtime' 32 | ] 33 | INT_COSTS = ['pf', 34 | 'mql_dbtries', 35 | 'tu', 36 | 'ts', 37 | 'te' 38 | ] 39 | 40 | # important note: in mock replay mode, stored graph response costs 41 | # are tallied. But mql_[x]time will be calculated in realtime 42 | # so those costs will be quite different than when the mock was 43 | # recorded (they will be smaller, kinda the point of mocking) 44 | 45 | class MQLTest(mql_fixture.MQLTest): 46 | """mql cost tests.""" 47 | 48 | def setUp(self): 49 | self.SetMockPath('data/cost.yaml') 50 | super(MQLTest, self).setUp() 51 | self.env = {'as_of_time': '2010-05-01'} 52 | 53 | def testCost(self): 54 | """simple positive test.""" 55 | 56 | query = """ 57 | { 58 | "/people/person/place_of_birth": null, 59 | "id": "/en/bob_dylan" 60 | } 61 | """ 62 | exp_response = """ 63 | { 64 | "/people/person/place_of_birth": "Duluth", 65 | "id": "/en/bob_dylan" 66 | } 67 | """ 68 | self.DoQuery(query, exp_response=exp_response) 69 | cost = self.mql_result.cost 70 | self.costs_exist(cost) 71 | self.assertGreater(cost['te'], 10, 'te cost should be something') 72 | self.assertEqual(cost['mql_dbreqs'], 4, 'four graphd requests') 73 | 74 | def testCostError(self): 75 | """a query that gets a GQL error.""" 76 | 77 | query = """ 78 | { 79 | "guid": "foobar" 80 | } 81 | """ 82 | exc_response = ( 83 | error.MQLParseError, 84 | 'Can only use a hexadecimal guid here' 85 | ) 86 | self.DoQuery(query, exc_response=exc_response) 87 | cost = self.mql_service.get_cost() 88 | self.costs_exist(cost) 89 | self.assertEqual(cost['mql_dbreqs'], 1, 'only one graphd request') 90 | 91 | def testCostComplex(self): 92 | """query that does a lot of GQL.""" 93 | 94 | query = """ 95 | [{ 96 | "/people/person/date_of_birth" : [], 97 | "/music/artist/album" : [], 98 | "/film/actor/film" : [], 99 | "/film/director/film" : [], 100 | "/film/producer/film" : [], 101 | "/tv/tv_actor/starring_roles" : [], 102 | "/tv/tv_producer/programs_produced" : [], 103 | "type": "/music/artist", 104 | "b:type": "/film/actor", 105 | "c:type": "/film/director", 106 | "d:type": "/film/producer", 107 | "e:type": "/tv/tv_actor", 108 | "f:type": "/tv/tv_producer", 109 | "id": null 110 | }] 111 | """ 112 | self.DoQuery(query) 113 | cost = self.mql_result.cost 114 | self.costs_exist(cost) 115 | self.assertEqual(cost['mql_dbreqs'], 12, '12 graphd requests') 116 | self.assertGreater(cost['tu'], 100, 'tu cost should be something') 117 | 118 | 119 | def testQueryTimeout(self): 120 | 121 | self.env['query_timeout_tu'] = 50 122 | query = """ 123 | [{ 124 | "type": "/people/person", 125 | "date_of_birth": null, 126 | "sort": "date_of_birth" 127 | }] 128 | """ 129 | exc_response = ( 130 | error.MQLTimeoutError, 131 | 'Query too difficult.' 132 | ) 133 | self.DoQuery(query, exc_response=exc_response) 134 | cost = self.mql_service.get_cost() 135 | self.costs_exist(cost) 136 | 137 | def testQueryTimeoutFloat(self): 138 | 139 | # float is allowed 140 | self.env['query_timeout_tu'] = 50.1 141 | query = """ 142 | [{ 143 | "type": "/people/person", 144 | "date_of_birth": null, 145 | "sort": "date_of_birth" 146 | }] 147 | """ 148 | exc_response = ( 149 | error.MQLTimeoutError, 150 | 'Query too difficult.' 151 | ) 152 | self.DoQuery(query, exc_response=exc_response) 153 | cost = self.mql_service.get_cost() 154 | self.costs_exist(cost) 155 | 156 | def costs_exist(self, cost): 157 | for c in FLOAT_COSTS: 158 | self.assertIsInstance(cost[c], float, 'cost %s exists' % c) 159 | for c in INT_COSTS: 160 | self.assertIsInstance(cost[c], int, 'cost %s exists' % c) 161 | 162 | if __name__ == '__main__': 163 | mql_fixture.main() 164 | -------------------------------------------------------------------------------- /formats/http.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | # 17 | # utilities for dealing with http 18 | # 19 | # url escaping 20 | # content-type parsing and graph lookup 21 | # 22 | # originally from mw/client/escaping.py 23 | # duplicated in mw/mql/grquoting.py 24 | # 25 | 26 | 27 | import urllib 28 | 29 | # Table mapping response codes to messages; entries have the 30 | # form {code: (shortmessage, longmessage)}. 31 | # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html 32 | from BaseHTTPServer import BaseHTTPRequestHandler 33 | http_status_codes = BaseHTTPRequestHandler.responses 34 | 35 | 36 | # some useful uri splitting code in the "urischemes" thirdparty module. 37 | # 38 | # later i found that the most complete uri manipulation module 39 | # seems to be in 4Suite: 40 | # 41 | # from Ft.Lib import Uri, Iri 42 | 43 | 44 | # 45 | # 46 | # ALLOW: 47 | # 48 | # '~' is in the unreserved set, so they should be available like "_.-" 49 | # ':' is in pchar 50 | # '@' is in pchar (though naive text parsers may think it's an email address) 51 | # 52 | # "$" is a valid sub-delim 53 | # "!" is a valid sub-delim 54 | # "*" is a valid sub-delim 55 | # "," is a valid sub-delim 56 | # ";" is a valid sub-delim 57 | # 58 | # GENERALLY DISALLOW: 59 | # 60 | # "&" is in sub-delims but has special meaning to form parsers 61 | # "=" is in sub-delims but excluded due to avoid any possible confusion 62 | # "+" is in sub-delims but excluded due to avoid any possible confusion 63 | # with form-encoded queries 64 | 65 | # ALWAYS DISALLOW 66 | # 67 | # "'" is in sub-delims but likely to confuse 68 | # "(" is in sub-delims but definitely confuses email text parsers 69 | # ")" is in sub-delims but definitely confuses email text parsers 70 | 71 | # [A-Za-z0-9] and "_.-" are always safe in urllib.quote 72 | # additionally, we allow: 73 | our_safe = "~:@$!*,;" 74 | 75 | # this handles unicode 76 | def base_urlencode(data, safe): 77 | if isinstance(data, unicode): 78 | data = data.encode('utf_8') 79 | return urllib.quote(data, safe) 80 | 81 | 82 | def urlencode(data): 83 | ''' 84 | default url-encoder - please shift to one of the more 85 | specific versions, depending on whether you're quoting 86 | a path segment or a query arg. 87 | ''' 88 | # "_.-" are always untouched 89 | return base_urlencode(data, ',') 90 | 91 | 92 | 93 | # within path segments (between slashes) we don't need 94 | # to follow the same rules as for forms parsing. 95 | # 96 | # "=" is only special to form parsers 97 | # "&" is only special to form parsers 98 | # "+" is only special to form parsers 99 | def urlencode_pathseg(data): 100 | ''' 101 | urlencode for placement between slashes in an url. 102 | ''' 103 | return base_urlencode(data, our_safe + "=&+") 104 | 105 | 106 | # "/" is allowed in query but reserved in path segments 107 | # "?" is allowed in query but reserved in path segments 108 | def urlencode_querykey(data): 109 | ''' 110 | encode for placement before '=' in a query argument 111 | 112 | this allows '/?' 113 | ''' 114 | return base_urlencode(data, our_safe + '/?') 115 | 116 | 117 | # "/" is allowed in query but reserved in path segments 118 | # "?" is allowed in query but reserved in path segments 119 | # "=" should be allowed by form parsers after the key= 120 | def urlencode_queryvalue(data): 121 | ''' 122 | encode for placement after '=' in a query argument 123 | 124 | this allows '/?=' 125 | ''' 126 | return base_urlencode(data, our_safe + '/?') 127 | 128 | 129 | # "/" is allowed in query but reserved in path segments 130 | # "?" is allowed in query but reserved in path segments 131 | # "=" is only special to form parsers 132 | # "&" is only special to form parsers 133 | # "+" is only special to form parsers 134 | def urlencode_fragment(data): 135 | ''' 136 | encode for placement after '=' in a query argument 137 | 138 | this allows '/?=' 139 | ''' 140 | return base_urlencode(data, our_safe + '/?=&+') 141 | 142 | # 143 | # who knows what browsers do? it ain't rfc3986 that's for sure. 144 | # 145 | def urlencode_formtext(data): 146 | ''' 147 | encode a form key or value, pretending to be a browser. 148 | 149 | this version encodes space as '+' rather than as '%20', 150 | which is used when you are pretending to be a browser form 151 | submit. 152 | ''' 153 | if isinstance(data, unicode): 154 | data = data.encode('utf_8') 155 | return urllib.quote_plus(data, our_safe) 156 | 157 | 158 | def urldecode(data): 159 | ''' 160 | replace "%xx" with character equivalent 161 | ''' 162 | return urllib.unquote(data) 163 | -------------------------------------------------------------------------------- /test/return_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.4 2 | # Copyright 2020 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # -*- coding: utf-8 -*- 17 | # 18 | """mql return directive.""" 19 | 20 | __author__ = 'bneutra@google.com (Brendan Neutra)' 21 | 22 | # thanks warren for these dimetests 23 | 24 | import google3 25 | from pymql.mql import error 26 | from pymql.test import mql_fixture 27 | 28 | class MQLTest(mql_fixture.MQLTest): 29 | """mql return directive.""" 30 | 31 | def setUp(self): 32 | self.SetMockPath('data/return.yaml') 33 | super(MQLTest, self).setUp() 34 | self.env = {'as_of_time': '2010-05-01'} 35 | 36 | 37 | def testReturnCountOfObject(self): 38 | """return count of object.""" 39 | 40 | query = """ 41 | { 42 | "/people/person/children": { 43 | "count": null, 44 | "return": "count" 45 | }, 46 | "id": "/en/bob_dylan" 47 | } 48 | """ 49 | exp_response = """ 50 | { 51 | "/people/person/children": 6, 52 | "id": "/en/bob_dylan" 53 | } 54 | """ 55 | self.DoQuery(query, exp_response=exp_response) 56 | 57 | def testReturnCountOfArray(self): 58 | """return count of array.""" 59 | 60 | query = """ 61 | { 62 | "/people/person/children": [ 63 | { 64 | "count": null, 65 | "return": "count" 66 | } 67 | ], 68 | "id": "/en/bob_dylan" 69 | } 70 | """ 71 | exp_response = """ 72 | { 73 | "/people/person/children": [ 74 | 6 75 | ], 76 | "id": "/en/bob_dylan" 77 | } 78 | """ 79 | self.DoQuery(query, exp_response=exp_response) 80 | 81 | def testReturnEstimateCountOfArray(self): 82 | """return estimate-count of array.""" 83 | 84 | query = """ 85 | { 86 | "/people/person/children": [ 87 | { 88 | "return": "estimate-count", 89 | "estimate-count": null 90 | } 91 | ], 92 | "id": "/en/bob_dylan" 93 | } 94 | """ 95 | exp_response = """ 96 | { 97 | "/people/person/children": [ 98 | 6 99 | ], 100 | "id": "/en/bob_dylan" 101 | } 102 | """ 103 | self.DoQuery(query, exp_response=exp_response) 104 | 105 | def testReturnCountNullWhenNone(self): 106 | """return count null when none.""" 107 | 108 | query = """ 109 | { 110 | "album": { 111 | "return": "count", 112 | "name": "Arrested" 113 | }, 114 | "type": "/music/artist", 115 | "name": "The Police" 116 | } 117 | """ 118 | exp_response = """ 119 | null 120 | """ 121 | self.DoQuery(query, exp_response=exp_response) 122 | 123 | def testReturnCount0WhenNoneAndOptional(self): 124 | """return count 0 when none and optional.""" 125 | 126 | query = """ 127 | { 128 | "album": { 129 | "optional": true, 130 | "return": "count", 131 | "name": "Arrested" 132 | }, 133 | "type": "/music/artist", 134 | "name": "The Police" 135 | } 136 | """ 137 | exp_response = """ 138 | { 139 | "album": 0, 140 | "type": "/music/artist", 141 | "name": "The Police" 142 | } 143 | """ 144 | self.DoQuery(query, exp_response=exp_response) 145 | 146 | def testReturnIgnoresOtherResultValues(self): 147 | """return ignores other result values.""" 148 | 149 | query = """ 150 | { 151 | "/people/person/children": [ 152 | { 153 | "count": null, 154 | "nationality": { 155 | "id": "/en/united_states", 156 | "name": null 157 | }, 158 | "return": "count", 159 | "id": null 160 | } 161 | ], 162 | "id": "/en/bob_dylan" 163 | } 164 | """ 165 | exp_response = """ 166 | { 167 | "/people/person/children": [ 168 | 2 169 | ], 170 | "id": "/en/bob_dylan" 171 | } 172 | """ 173 | self.DoQuery(query, exp_response=exp_response) 174 | 175 | def testReturnImplicitCount(self): 176 | """return implicit count.""" 177 | 178 | query = """ 179 | { 180 | "/people/person/children": { 181 | "return": "count", 182 | "id": null 183 | }, 184 | "id": "/en/bob_dylan" 185 | } 186 | """ 187 | exp_response = """ 188 | { 189 | "/people/person/children": 6, 190 | "id": "/en/bob_dylan" 191 | } 192 | """ 193 | self.DoQuery(query, exp_response=exp_response) 194 | 195 | def testReturnIdFail(self): 196 | """return id.""" 197 | 198 | query = """ 199 | { 200 | "/people/person/children": { 201 | "date_of_birth": null, 202 | "return": "id", 203 | "id": null 204 | }, 205 | "id": "/en/bob_dylan" 206 | } 207 | """ 208 | exc_response = ( 209 | error.MQLParseError, 210 | "'return' currently only supports 'count' and 'estimate-count'" 211 | ) 212 | self.DoQuery(query, exc_response=exc_response) 213 | 214 | if __name__ == '__main__': 215 | mql_fixture.main() 216 | -------------------------------------------------------------------------------- /emql/adapters/search.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from itertools import izip, chain 17 | 18 | from mw.emql.adapter import Adapter 19 | from mw.emql.emql import id_guid, formatted_id_guid, MQL_LIMIT 20 | 21 | 22 | class search_adapter(Adapter): 23 | 24 | def pre(self, tid, graph, mql, me, control, parent, params, api_keys): 25 | 26 | constraints = params.get('constraints') 27 | params = params.get('query') 28 | 29 | if params is None: 30 | if constraints is not None: 31 | for operator, _params in constraints: 32 | if operator == '~=': 33 | params = _params 34 | break 35 | 36 | if isinstance(params, dict) and params.get('query') is None: 37 | if constraints is not None: 38 | for operator, _params in constraints: 39 | if operator == '~=': 40 | params['query'] = _params 41 | break 42 | 43 | if isinstance(params, list): 44 | if params: 45 | params = params[0] 46 | else: 47 | params = None 48 | 49 | if isinstance(params, (str, unicode)): 50 | params = { 'query': params } 51 | elif params is None or params.get('query') is None: 52 | raise ValueError, 'no query' 53 | 54 | args = {} 55 | result = {} 56 | 57 | for arg, value in params.iteritems(): 58 | if arg.endswith('|='): 59 | name = str(arg[:-2]) 60 | else: 61 | name = str(arg) 62 | if name in ('query', 'prefix', 'prefixed', 63 | 'type', 'type_strict', 'domain', 'domain_strict', 64 | 'type_exclude', 'type_exclude_strict', 65 | 'domain_exclude', 'domain_exclude_strict', 66 | 'limit', 'denylist', 'related', 'property', 67 | 'mql_filter', 'geo_filter', 'as_of_time', 'timeout'): 68 | args[name] = value 69 | elif name != 'score': 70 | result[name] = value 71 | 72 | for arg, value in parent.iteritems(): 73 | if arg.endswith('|='): 74 | name = str(arg[:-2]) 75 | else: 76 | name = str(arg) 77 | if name not in args: 78 | if name == 'limit': 79 | args[name] = value 80 | elif name == 'type' and isinstance(value, basestring): 81 | args['type_strict'] = 'any' 82 | args[name] = value 83 | 84 | if 'limit' not in args: 85 | args['limit'] = MQL_LIMIT # plug-in default MQL limit 86 | 87 | if 'score' in params: 88 | matches = me.get_session().relevance_query(tid, format='ac', **args) 89 | guids = ['#' + match['guid'] for match in matches] 90 | else: 91 | matches = me.get_session().relevance_query(tid, format='guids', **args) 92 | guids = ['#' + guid for guid in matches] 93 | 94 | if guids: 95 | result['guid|='] = guids 96 | else: 97 | result['guid|='] = ['#00000000000000000000000000000000'] 98 | 99 | if 'score' in params: 100 | result[':extras'] = { 101 | "fetch-data": dict((match['guid'], match['score']) 102 | for match in matches) 103 | } 104 | 105 | return result 106 | 107 | def fetch(self, tid, graph, mql, me, control, args, params, api_keys): 108 | 109 | constraints = params.get('constraints') 110 | scores = params.get(':extras', {}).get('fetch-data') 111 | params = params.get('query') 112 | 113 | was_list = False 114 | if isinstance(params, list): 115 | if params: 116 | params = params[0] 117 | was_list = True 118 | else: 119 | params = None 120 | 121 | if params is None: 122 | if constraints is not None: 123 | for operator, _params in constraints: 124 | if operator == '~=': 125 | params = _params 126 | break 127 | 128 | if isinstance(params, (str, unicode)): 129 | results = dict((mqlres['guid'], params) for mqlres in args) 130 | else: 131 | if scores is not None: 132 | for mqlres in args: 133 | mqlres['score'] = scores[mqlres['guid'][1:]] 134 | 135 | if 'guid' in params: 136 | fn = dict.get 137 | else: 138 | fn = dict.pop 139 | 140 | results = {} 141 | for mqlres in args: 142 | mqlres['query'] = params['query'] 143 | results[fn(mqlres, 'guid')] = [mqlres] if was_list else mqlres 144 | 145 | return results 146 | 147 | def help(self, tid, graph, mql, me, control, params): 148 | from docs import search_adapter_help 149 | 150 | return 'text/x-rst;', search_adapter_help 151 | 152 | 153 | -------------------------------------------------------------------------------- /test/regression_misc_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # 16 | """Test misc. regressions.""" 17 | 18 | __author__ = 'bneutra@google.com (Brendan Neutra)' 19 | 20 | import google3 21 | from pymql.test import mql_fixture 22 | 23 | 24 | class MQLTest(mql_fixture.MQLTest): 25 | 26 | def setUp(self): 27 | self.SetMockPath('data/regression_misc.yaml') 28 | super(MQLTest, self).setUp() 29 | self.env = {'as_of_time': '2009-10-01'} 30 | 31 | def testUtf8(self): 32 | """Regression test for issue 4970606.""" 33 | 34 | query = u""" 35 | [{"name":"Beyonc\u00e9", "id": null}] 36 | """ 37 | exp_response = u""" 38 | [ 39 | { 40 | "id": "/en/beyonce", 41 | "name": "Beyonc\u00e9" 42 | }, 43 | { 44 | "id": "/m/07ldnn6", 45 | "name": "Beyonc\u00e9" 46 | } 47 | ] 48 | """ 49 | self.DoQuery(query.encode('utf-8'), 50 | exp_response=exp_response.encode('utf-8')) 51 | 52 | def testCursor(self): 53 | """JIRA API-62 bug.""" 54 | 55 | # not sure the bug is valid but I just wanted to capture 56 | # this style of query. the bug was that it timed out 57 | # but i can't reproduce that -brendan 58 | 59 | query = """ 60 | [ 61 | { 62 | "attribution": { 63 | "guid": null, 64 | "optional": true, 65 | "id": null 66 | }, 67 | "reverse": null, 68 | "creator": { 69 | "guid": null, 70 | "optional": true, 71 | "id": null 72 | }, 73 | "timestamp": null, 74 | "timestamp>=": "2012-01-01T20", 75 | "source": { 76 | "guid": null, 77 | "optional": true, 78 | "id": null 79 | }, 80 | "valid": null, 81 | "limit": 1000, 82 | "master_property": null, 83 | "operation": null, 84 | "type": "/type/link", 85 | "target_value": null, 86 | "target": { 87 | "guid": null, 88 | "optional": true, 89 | "id": null 90 | } 91 | } 92 | ] 93 | """ 94 | cursor = True 95 | while 1: 96 | self.env = {'cursor': cursor, 'as_of_time': '2012-01-02'} 97 | self.MQLQuerier(query) 98 | cursor = self.mql_result.cursor 99 | if cursor is False: break 100 | 101 | def testCursorComplex(self): 102 | """random hash ordering cursor bug b/8323666.""" 103 | # TODO(bneutra) how to repro the bug, testing in process 104 | # doesn't tickle it. 105 | 106 | query = """ 107 | [ 108 | { 109 | "sort": "-timestamp", 110 | "type": "/type/link", 111 | "reverse": null, 112 | "creator": null, 113 | "timestamp": null, 114 | "source": { 115 | "mid": null 116 | }, 117 | "a:creator": { 118 | "type": "/dataworld/provenance", 119 | "optional": "forbidden" 120 | }, 121 | "valid": null, 122 | "limit": 10, 123 | "master_property": null, 124 | "operation": null, 125 | "target": { 126 | "mid": null 127 | }, 128 | "target_value": null, 129 | "b:creator": { 130 | "usergroup": { 131 | "id|=": [ 132 | "/freebase/bots", 133 | "/en/metaweb_staff", 134 | "/en/current_metaweb_staff" 135 | ], 136 | "optional": "forbidden" 137 | } 138 | } 139 | } 140 | ] 141 | """ 142 | cursor = True 143 | i = 0 144 | while i < 30: 145 | i+=1 146 | self.env = {'cursor': cursor} 147 | self.MQLQuerier(query) 148 | self.assertEquals(len(self.mql_result.result), 10) 149 | # we should have a new cursor 150 | self.assertNotEquals(cursor, self.mql_result.cursor) 151 | cursor = self.mql_result.cursor 152 | # it should be a cursor 153 | self.assertNotEquals(cursor, False) 154 | 155 | def testCursorComplex2(self): 156 | """random hash ordering cursor bug b/8323666 freeq.""" 157 | 158 | # TODO(bneutra) how to repro the bug, testing in process 159 | # doesn't tickle it. 160 | 161 | query = """ 162 | [ 163 | { 164 | "master_property": { 165 | "id": null, 166 | "reverse_property": null 167 | }, 168 | "limit": 3, 169 | "type": "/type/link", 170 | "target": { 171 | "guid": null, 172 | "type": [], 173 | "id": "#9202a8c04000641f8000000003b50f85" 174 | }, 175 | "source": { 176 | "guid": null, 177 | "type": [], 178 | "id": null 179 | } 180 | } 181 | ] 182 | """ 183 | cursor = True 184 | i = 0 185 | while i < 30: 186 | i+=1 187 | self.env = {'cursor': cursor, 'as_of_time': '2013-03-01'} 188 | self.MQLQuerier(query) 189 | self.assertEquals(len(self.mql_result.result), 3) 190 | # we should have a new cursor 191 | self.assertNotEquals(cursor, self.mql_result.cursor) 192 | cursor = self.mql_result.cursor 193 | # it should be a cursor 194 | self.assertNotEquals(cursor, False) 195 | 196 | 197 | if __name__ == '__main__': 198 | mql_fixture.main() 199 | -------------------------------------------------------------------------------- /formats/image.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | 17 | functions for manipulating image content 18 | 19 | everything is done in memory, we assume images 20 | aren't too large. 21 | 22 | """ 23 | 24 | import os, contenttype 25 | from StringIO import StringIO 26 | from mw.log import LOG 27 | 28 | from mw.api.content import Content, ContentWrapper 29 | from mw.error import ContentLoadError 30 | import mw.siteconfig 31 | from mw.mql import scope 32 | 33 | TN_MODES = ['fit', 'fill', 'fillcrop', 'fillcropmid'] 34 | DEF_TN_MODE = 'fit' 35 | 36 | class ImageContent(ContentWrapper): 37 | """ 38 | methods for dealing with image content 39 | """ 40 | 41 | # ie6 uses some bizarre content_types for PNG and JPEG images 42 | # XXX it would be nice to fix the content_type in the 43 | # /type/content object, but it may already have been uploaded. 44 | # so for now, images uploaded from ie6 will have the "wrong" 45 | # content-type and we'll need to garden them. 46 | remap_dumb_ie_mime_types = { 47 | 'image/pjpeg': contenttype.MediaType('image/jpeg'), 48 | 'image/x-png': contenttype.MediaType('image/png') 49 | } 50 | 51 | 52 | @classmethod 53 | def match(cls, c): 54 | """ 55 | true if this ContentWrapper subclass applies to the content argument. 56 | """ 57 | media_type = cls.remap_dumb_ie_mime_types.get(c.media_type, c.media_type) 58 | if not c.media_type.startswith('image/'): 59 | return False 60 | 61 | subtype = media_type.split('/')[1] 62 | 63 | return subtype in ('gif', 'png', 'jpeg', 'x-icon') 64 | 65 | def __init__(self, content): 66 | super(ImageContent, self).__init__(content) 67 | self.size = None 68 | 69 | def load(self, mss): 70 | result = mss.mqlread(dict(id=self.content.content_id, 71 | type='/common/image', 72 | size=dict(x=None, y=None))) 73 | 74 | if result is None: 75 | return 76 | 77 | self.size = (result['size']['x'], result['size']['y']) 78 | 79 | def upload(self, mss): 80 | """ 81 | add a /common/image facet to the type/content 82 | """ 83 | self.load(mss) 84 | if self.size is None: 85 | self.parse(mss) 86 | 87 | w = { 'id': self.content.content_id, 88 | 'type': { 'connect': 'insert', 89 | 'id': '/common/image' }} 90 | if self.size[0] and self.size[1]: 91 | w['/common/image/size'] = { 'create': 'unless_exists', 92 | 'type': '/measurement_unit/rect_size', 93 | 'x': self.size[0], 94 | 'y': self.size[1] } 95 | 96 | with mss.push_variables(authority="/user/content_administrator", 97 | privileged=scope.Authority): 98 | result = mss.mqlwrite(w) 99 | 100 | def parse(self, mss): 101 | """ 102 | extract data from the image 103 | 104 | exif tags from digital cameras 105 | """ 106 | # exif tags from digital cameras? 107 | 108 | self.content.fetch_body(mss) 109 | try: 110 | # XXXarielb move to pygmy as soon as pygmy doesn't crash within threads 111 | from PIL import Image 112 | img = Image.open(StringIO(self.content.body)) 113 | # believe the image parser over anything in the graph 114 | self.size = img.size 115 | except ImportError, ie: 116 | LOG.error("format.image.no_pil", str(e)) 117 | raise 118 | except Exception, e: 119 | LOG.error("format.image.parse", str(e)) 120 | raise ContentLoadError('Invalid image file', 121 | app_code="upload/invalid_image_data", 122 | error=e) 123 | 124 | def update_content(self): 125 | media_type = self.content.media_type 126 | LOG.info('update_content', "Image Updating content from %s to %s" % (media_type, 127 | self.remap_dumb_ie_mime_types.get(media_type))) 128 | self.content.media_type = self.remap_dumb_ie_mime_types.get(media_type, media_type) 129 | 130 | @classmethod 131 | def get_fallback_image_path(cls): 132 | try: 133 | config = mw.siteconfig.get_config2() 134 | path = config.get('me.img_thumb_fallback') 135 | if path and os.path.exists(path): 136 | return path 137 | except KeyError, e: 138 | pass 139 | 140 | LOG.error("image.thumb", "Could not find fallback image for thumbnailing service.") 141 | return None 142 | 143 | 144 | # failover for thumnailing operation in the event that 145 | # the image is too large to thumbnail 146 | def thumb_fallback(self, mss): 147 | path = ImageContent.get_fallback_image_path() 148 | if path is None: 149 | return None 150 | # load data 151 | fd = open(path) 152 | data = fd.read() 153 | fd.close() 154 | # the fallback image is a known GIF image. 155 | thumb_mt = 'image/gif' 156 | c = Content(media_type=thumb_mt) 157 | c.set_body(data) 158 | return c 159 | -------------------------------------------------------------------------------- /error.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Table mapping response codes to messages; entries have the 16 | # form {code: (shortmessage, longmessage)}. 17 | # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html 18 | from BaseHTTPServer import BaseHTTPRequestHandler 19 | import traceback 20 | from pymql.log import LOG 21 | 22 | 23 | # HTTP error code messages 24 | # XXX: We really really need to move to py2.5 25 | def is_valid_HTTP_code(code): 26 | return code in BaseHTTPRequestHandler.responses.keys() 27 | 28 | 29 | def get_HTTP_err(code): 30 | return '%d %s' % (code, BaseHTTPRequestHandler.responses[code][0]) 31 | 32 | 33 | class ParameterizedError(Exception): 34 | """ 35 | This is a special Exception class that is used to format messages 36 | where the contents of the message itself are important. Use it 37 | exactly how you would use the python % format operator: 38 | 39 | class MyException(ParameterizedError): 40 | pass 41 | 42 | raise MyException('Got an error in query %(query)s', query=q) 43 | 44 | This will format the string appropriately, but allow exception 45 | handlers to unpack the relevant data and optionall reinsert it 46 | into the result string 47 | """ 48 | DEF_PFX = '/api/status/error' 49 | DEF_ME_CODE = '/unknown/unknown' 50 | 51 | def __init__(self, 52 | msg, 53 | http_code=400, 54 | app_code=DEF_ME_CODE, 55 | inner_exc=None, 56 | **kwds): 57 | self.msg = msg 58 | Exception.__init__(self, msg) 59 | 60 | if not is_valid_HTTP_code(http_code): 61 | http_code = 500 62 | self.http_status = get_HTTP_err(http_code) 63 | self.http_code = http_code 64 | 65 | # app_code and and api code setup 66 | codes = app_code.split('/') 67 | if len(codes) < 3: 68 | codes = self.DEF_ME_CODE.split('/') 69 | self.comp_code = '%s/%s' % (self.DEF_PFX, codes[1]) 70 | self.app_code = '%s' % '/'.join(codes[2:]) 71 | self.messages = [self.gen_msgs(**kwds)] 72 | 73 | if not kwds.has_key('error'): 74 | # don't extract the current frame (__init__) 75 | stack = traceback.extract_stack()[:-1] 76 | kwds['traceback'] = '\r\n'.join(traceback.format_list(stack)) 77 | 78 | # log inner exception or self 79 | exc = self 80 | if inner_exc: 81 | exc = inner_exc 82 | comp = app_code[1:].replace('/', '.') 83 | if exc == self: 84 | LOG.debug(comp, msg, **kwds) 85 | else: 86 | LOG.exception(msg, **kwds) 87 | self.kwds = kwds 88 | 89 | def gen_msgs(self, **kwds): 90 | return { 91 | 'code': '%s/%s' % (self.DEF_PFX, self.app_code), 92 | 'message': self.msg, 93 | 'info': kwds.copy() 94 | } 95 | 96 | def get_err_dict(self): 97 | return { 98 | 'status': self.http_status, 99 | 'code': self.comp_code, 100 | 'messages': self.messages 101 | } 102 | 103 | def __str__(self): 104 | return str(self.get_err_dict()) 105 | 106 | 107 | class NetworkAddressError(ParameterizedError): 108 | pass 109 | 110 | 111 | class ContentLoadError(ParameterizedError): 112 | pass 113 | 114 | 115 | class TypeVerifyError(ParameterizedError): 116 | pass 117 | 118 | 119 | class EmailError(ParameterizedError): 120 | pass 121 | 122 | 123 | class SubscriptionError(ParameterizedError): 124 | pass 125 | 126 | 127 | class MSSError(ParameterizedError): 128 | pass 129 | 130 | 131 | class UserLookupError(ParameterizedError): 132 | pass 133 | 134 | 135 | class UserAuthError(ParameterizedError): 136 | pass 137 | 138 | 139 | class BlobError(ParameterizedError): 140 | pass 141 | 142 | 143 | class BLOBClientError(ParameterizedError): 144 | pass 145 | 146 | 147 | class RelevanceError(ParameterizedError): 148 | pass 149 | 150 | 151 | class TextSearchError(ParameterizedError): 152 | pass 153 | 154 | 155 | class AutocompleteError(ParameterizedError): 156 | pass 157 | 158 | 159 | class EmptyResult(ParameterizedError): 160 | pass 161 | 162 | 163 | class GraphConnectionError(ParameterizedError): 164 | pass 165 | 166 | 167 | class FormattingError(ParameterizedError): 168 | pass 169 | 170 | 171 | class SessionError(ParameterizedError): 172 | pass 173 | 174 | 175 | class ConfigError(ParameterizedError): 176 | pass 177 | 178 | 179 | class SanitizationError(ParameterizedError): 180 | pass 181 | 182 | 183 | class BlurbError(ParameterizedError): 184 | pass 185 | 186 | 187 | class DomainOperationError(ParameterizedError): 188 | pass 189 | 190 | 191 | class GenericRuntimeError(ParameterizedError): 192 | pass 193 | 194 | 195 | class OAuthDisabledError(ParameterizedError): 196 | pass 197 | 198 | 199 | class RecaptchaError(ParameterizedError): 200 | 201 | def __init__(self, 202 | msg, 203 | http_code=500, 204 | app_code=ParameterizedError.DEF_ME_CODE, 205 | inner_exc=None, 206 | **kwds): 207 | self.message = msg 208 | ParameterizedError.__init__( 209 | self, 210 | msg, 211 | http_code=http_code, 212 | app_code=app_code, 213 | inner_exc=inner_exc, 214 | **kwds) 215 | 216 | 217 | class ReadOnlyDatabaseError(ParameterizedError): 218 | 219 | def __init__(self, msg=None, *args, **kwds): 220 | msg = msg or 'You cannot save right now. Please try again later' 221 | ParameterizedError.__init__(self, msg, *args, **kwds) 222 | -------------------------------------------------------------------------------- /mql/graph/conn_mock.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Connector classes for mocked graphd query and response strings. 15 | 16 | Use these connectors when using the pymql library. 17 | See test/mql_fixture.py in pymql for a reference 18 | of how to use the record and replay connectors. 19 | """ 20 | 21 | __author__ = 'bneutra@google.com (Brendan Neutra)' 22 | import sys 23 | import hashlib 24 | import re 25 | import time 26 | from pymql.mql import error 27 | from pymql.mql.graph.connector import GraphConnector 28 | from pymql.mql.grparse import ReplyParser 29 | from absl import logging 30 | 31 | 32 | class GraphMockException(Exception): 33 | pass 34 | 35 | 36 | TIMEOUT_POLICIES = { 37 | 'default': { 38 | 'timeout': 8.0, 39 | 'stubby_deadline': 10.0, 40 | 'fail_fast': False, 41 | }, 42 | 'bootstrap': { 43 | 'timeout': 2.0, 44 | 'stubby_deadline': 4.0, 45 | 'fail_fast': False, 46 | }, 47 | } 48 | 49 | 50 | class MockRecordConnector(GraphConnector): 51 | """Mock connector for recording graphd responses. 52 | 53 | This class will append to the mockdata dictionary that it 54 | is handed. It interacts with a slightly modified 55 | live connector that you specify 56 | (e.g. the mock stubby connector) 57 | See test/mql_fixture.py for an implentation example. 58 | """ 59 | 60 | def __init__(self, mockdata, connector, **kwargs): 61 | 62 | if not kwargs.get('policy_map', None): 63 | kwargs['policy_map'] = TIMEOUT_POLICIES 64 | GraphConnector.__init__(self, **kwargs) 65 | self.mockdata = mockdata 66 | self._conn = connector 67 | self._conn._save_raw_response = True 68 | self._mocked = {} 69 | 70 | def open(self, policy=None): 71 | 72 | self._conn.open(policy) 73 | 74 | def transmit_query(self, q, policy, deadline, **kwargs): 75 | 76 | try: 77 | result = self._conn.transmit_query(q, policy, deadline) 78 | except error.MQLTimeoutError: 79 | self.gen_mock_data(q, self._conn._raw_response) 80 | self.totalcost = self._conn.totalcost 81 | raise 82 | 83 | self.gen_mock_data(q, self._conn._raw_response) 84 | self.totalcost = self._conn.totalcost 85 | return result 86 | 87 | def reset_cost(self): 88 | if hasattr(self, '_conn'): 89 | self._conn.reset_cost() 90 | 91 | def gen_mock_data(self, q, result): 92 | 93 | k, hsh = strip_mock_query(q) 94 | if hsh in self._mocked: 95 | # if a query has been seen before, assume it needs another 96 | # version of the response mocked. 97 | self._mocked[hsh] += 1 98 | hsh = hsh + '_' + str(self._mocked[hsh]) 99 | else: 100 | self._mocked[hsh] = 0 101 | self.mockdata[hsh] = [k, result] 102 | 103 | 104 | class MockReplayConnector(GraphConnector): 105 | """Mock connector for recording graphd responses. 106 | 107 | This class will read from the mockdata dictionary that it 108 | is handed. It doesn't connect or interact with graphd. 109 | It's faster and more reliable than talking to a live db. 110 | See test/mql_fixture.py for an implentation example. 111 | """ 112 | 113 | def __init__(self, mockdata): 114 | # don't connect to a graph, do not call __init__ 115 | self.no_timeouts = False 116 | self.totalcost = {} 117 | self.mockdata = mockdata 118 | self._mocked = {} 119 | 120 | def open(self, policy=None): 121 | pass 122 | 123 | def transmit_query(self, q, policy, deadline, **kwargs): 124 | start_time = time.time() 125 | logging.debug('mocking query: %s', q) 126 | k, hsh = strip_mock_query(q) 127 | 128 | if hsh in self._mocked: 129 | # we've seen this query before for this test 130 | # so increment as we did in record mode 131 | self._mocked[hsh] += 1 132 | hsh = hsh + '_' + str(self._mocked[hsh]) 133 | else: 134 | self._mocked[hsh] = 0 135 | 136 | if hsh not in self.mockdata: 137 | msg = '%s NO MOCKED REPONSE for this query: %s' % (hsh, k) 138 | logging.error(msg) 139 | raise GraphMockException(msg) 140 | 141 | m = self.mockdata[hsh] 142 | msg = 'mock query found %s: %s' % (hsh, m[0]) 143 | logging.debug(msg) 144 | logging.debug('mock response found: %s', m[1]) 145 | rg = re.search(' dateline\=\"(\S+)\" ', m[1]) 146 | self.dateline = None 147 | if rg: 148 | self.dateline = rg.groups()[0] 149 | 150 | reply_parser = ReplyParser() 151 | reply_parser.parse_full_reply(m[1]) 152 | ret = reply_parser.get_reply() 153 | dbtime = time.time() - start_time 154 | self.add_graph_costs(ret.cost, dbtime, tries=1) 155 | return ret 156 | 157 | def _get_policy(self, policy=None): 158 | return None 159 | 160 | 161 | def strip_mock_query(q): 162 | # strip off the id 163 | # note the query may be spread over multiple lines 164 | # but the directives should be on the first one. 165 | k = re.sub(' (id=\S+) ', ' ', q, count=1) 166 | 167 | # exception cases 168 | # timestamp stuff generated when creating mock responses is fine when it 169 | # comes time to replay, but mql does a scope query in realtime, not sure why 170 | # TODO(bneutra): why must MQL do this? 171 | p = re.compile('timestamp\>20\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d+ ') 172 | if re.search(p, k): 173 | logging.debug('we saw a timestamp in the query %s', k) 174 | k = re.sub(p, 'timestamp>2010-09-23T00:00:00.000001 ', k) 175 | 176 | h = hashlib.sha1() 177 | h.update(k) 178 | hsh = h.hexdigest() 179 | return k, hsh 180 | -------------------------------------------------------------------------------- /formats/contenttype.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | """ 17 | routines for working with content-type headers 18 | and other sources of media_types and text_encodings. 19 | 20 | """ 21 | 22 | import cgi 23 | from mw.formats.uniqstr import UniqueStr 24 | from mw.util import keyquote 25 | 26 | class MediaType(UniqueStr): 27 | """ 28 | this looks like an ordinary python str containing a media-type. 29 | it has some extra methods on it that are useful for the metaweb. 30 | """ 31 | 32 | _valid_part0 = ('application', 'audio', 'image', 'message', 'model', 'multipart', 33 | 'text', 'text_encoding', 'video') 34 | 35 | @property 36 | def id(self): 37 | """the id property holds the metaweb id: value """ 38 | return '/media_type/%s' % '/'.join(keyquote.quotekey(part) 39 | for part in self.split('/')) 40 | 41 | metaweb_type = '/common/media_type' 42 | 43 | type = property(lambda self: str(self).split('/')[0].strip()) 44 | subtype = property(lambda self: str(self).split('/')[1].strip()) 45 | 46 | @classmethod 47 | def normalize(cls, s): 48 | s = UniqueStr.normalize(s) 49 | 50 | if len(s) > 128: 51 | raise ValueError('invalid media type "%s"' % s) 52 | 53 | parts = s.lower().split('/') 54 | if len(parts) != 2: 55 | raise ValueError('invalid media type "%s"' % s) 56 | 57 | if parts[0] not in cls._valid_part0: 58 | raise ValueError('invalid media type "%s"' % s) 59 | 60 | return s 61 | 62 | ################################################### 63 | 64 | @classmethod 65 | def from_id(cls, id): 66 | if id is None: 67 | return None 68 | assert id.startswith('/media_type/') 69 | idpath = id[len("/media_type/"):] 70 | 71 | return keyquote.unquote_id(idpath) 72 | 73 | class TextEncoding(UniqueStr): 74 | """ 75 | canonicalized text encoding string. 76 | 77 | # see http://WWW.IANA.ORG/assignments/character-sets 78 | """ 79 | 80 | metaweb_type = '/common/text_encoding' 81 | 82 | @property 83 | def id(self): 84 | """the id property holds the metaweb id: value """ 85 | return '/media_type/text_encoding/%s' % keyquote.quotekey(self.lower()) 86 | 87 | @property 88 | def codec(self): 89 | """the codec property holds the python codec""" 90 | return self._codec 91 | 92 | @codec.setter 93 | def codec(self, value): 94 | self._codec = value 95 | 96 | @classmethod 97 | def normalize(cls, s): 98 | s = UniqueStr.normalize(s) 99 | 100 | # XXX check for valid token 101 | 102 | if len(s) > 20: 103 | raise ValueError, 'invalid charset "%s"' % s 104 | 105 | # STANDARDS PEOPLE DIG ALL CAPS. 106 | return s.upper() 107 | 108 | @classmethod 109 | def from_id(cls, id): 110 | if id is None: 111 | return None 112 | 113 | # better be ASCII, but make sure it's not unicode 114 | id = str(id) 115 | # XXX this is a bad namespace location! 116 | assert id.startswith('/media_type/text_encoding/') 117 | idpath = id[len('/media_type/text_encoding/'):] 118 | return cls(keyquote.unquotekey(idpath)) 119 | 120 | 121 | # 122 | # for now we list (and preload) some text encoding names. 123 | # 124 | 125 | # some well-known text-encodings 126 | # official names from http://www.iana.org/assignments/character-sets 127 | # python codec names are at .../lib/standard-encodings.html 128 | ascii = TextEncoding('us-ascii') 129 | ascii.addalias('ascii') 130 | ascii.codec = 'ascii' 131 | 132 | utf8 = TextEncoding('utf-8') 133 | utf8.codec = 'utf_8' 134 | 135 | utf16 = TextEncoding('utf-16') 136 | utf16.codec = 'utf_16' 137 | 138 | # XXX fill in the rest of the character sets we care about and 139 | # then turn on _exclusive 140 | #TextEncoding._exclusive = True 141 | 142 | 143 | def ContentType(value): 144 | mt, params = cgi.parse_header(value) 145 | mt = MediaType(mt) 146 | 147 | charset = params.get('charset') 148 | if charset is not None: 149 | # XXX whatever this is for, it's ugly... 150 | charset = charset.replace("'", '') 151 | te = TextEncoding(charset) 152 | else: 153 | te = None 154 | 155 | return (mt, te) 156 | 157 | class LanguageCode(UniqueStr): 158 | """ 159 | normalized language code string. 160 | 161 | mumble rfc-3066 inspired but more about common 162 | practice and the content we have. 163 | 164 | normalization may do surprising things. 165 | "en-US" gets normalized to "en". 166 | """ 167 | 168 | metaweb_type = '/type/lang' 169 | 170 | @property 171 | def id(self): 172 | """the id property holds the metaweb id: value """ 173 | return '/lang/%s' % keyquote.quotekey(self) 174 | 175 | @classmethod 176 | def normalize(cls, s): 177 | s = UniqueStr.normalize(s) 178 | 179 | if len(s) > 20: 180 | raise ValueError, 'invalid language code "%s"' % s 181 | 182 | # XXX for now we accept but do not require a leading '/lang/' 183 | # choose one, i think. 184 | if s.startswith('/lang/'): 185 | s = s[len('/lang/'):] 186 | 187 | # cut off anything following '-' (e.g. "en-US" -> "en") 188 | # XXX this should be specified and documented 189 | return s.split('-', 1)[0] 190 | 191 | @classmethod 192 | def from_id(cls, id): 193 | if id is None: 194 | return None 195 | 196 | # better be ASCII, but make sure it's not unicode 197 | id = str(id) 198 | assert id.startswith('/lang/') 199 | return cls(keyquote.unquotekey(id[len('/lang/'):])) 200 | -------------------------------------------------------------------------------- /emql/apikeys.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import hmac, hashlib 16 | 17 | null = None 18 | from mw.user.sqlmodel import mwOAuthProviderToken, get_sql_connection 19 | from sqlobject import AND, IN 20 | 21 | def get_context(mss): 22 | """ 23 | Get a unique string representing the combined user/app 24 | context. 25 | 26 | Note that this depends on mss.authenticate() having been 27 | called, if appropriate. This allows the context to be null if the 28 | call did not require authentication. 29 | """ 30 | user_id = mss.get_user_id() or '' 31 | app_id = mss.get_app_id() or '' 32 | 33 | if not user_id and not app_id: 34 | return None 35 | 36 | # user_id&app_id, user_id&, or &app_id 37 | context = "%s&%s" % (user_id, app_id) 38 | 39 | # hmac-sha1 just like oauth 40 | magic_secret = "Sup3rAuth3nticated!eMQL" 41 | signed_context = hmac.new(magic_secret, context, hashlib.sha1).hexdigest() 42 | 43 | return signed_context 44 | 45 | def get_extension_api_query(extension_id=None, optional=False): 46 | result = [{"id": null, 47 | "type": "/freebase/foreign_api", 48 | "consumer_token": {"id": null, 49 | "optional": True}, 50 | "access_token": {"id": null, 51 | "optional": True}, 52 | "api_keys": [{ 53 | "id": null, 54 | "optional": True 55 | }] 56 | }] 57 | if extension_id: 58 | result[0]["extension"] = {"id": extension_id} 59 | 60 | if optional: 61 | result[0]["optional"] = True 62 | 63 | return result 64 | 65 | 66 | def get_api_keys(mss, extension_id, apis=None): 67 | """ 68 | For a given extension, get all the API keys out of the database 69 | 70 | `apis` is the result of something like get_extension_api_query() - 71 | if you don't provide it then mqlread will be run to fill it in for 72 | the given extension_id 73 | """ 74 | 75 | # get a list of all keys that this extension needs, grouped by API 76 | # (because, in fact, an extension might use APIs that share 77 | # overlapping keys) 78 | 79 | if apis is None: 80 | q = get_extension_api_query(extension_id, optional=False) 81 | apis = mss.mqlread(q) 82 | 83 | if not apis: 84 | return None 85 | 86 | # ok, now authenticate 87 | mss.authenticate() 88 | context = get_context(mss) 89 | 90 | # to fetch them from the database, we want a flat list of all unique ids 91 | all_keys = set() 92 | for api in apis: 93 | for api_key in api["api_keys"]: 94 | all_keys.add(api_key) 95 | if api["access_token"]: 96 | all_keys.add(api["access_token"]["id"]) 97 | if api["consumer_token"]: 98 | all_keys.add(api["consumer_token"]["id"]) 99 | 100 | conn = get_sql_connection(mss) 101 | 102 | # now query the provider database for all of these specific keys 103 | foreign_key_list = mwOAuthProviderToken.select( 104 | AND(mwOAuthProviderToken.q.context == context, 105 | IN(mwOAuthProviderToken.q.apiKeyId, all_keys)), 106 | connection=conn 107 | ) 108 | 109 | # generate a map of id->key data so we can access it below 110 | foreign_keys = {} 111 | for foreign_key in foreign_key_list: 112 | info = { 113 | "id" : foreign_key.apiKeyId, 114 | "key": foreign_key.key 115 | } 116 | if foreign_key.secret: 117 | info["secret"] = foreign_key.secret 118 | 119 | foreign_keys[foreign_key.apiKeyId] = info 120 | 121 | # now generate a datastructure similar to the mqlread 122 | # something like 123 | # [{ "id": "/netflix/queue_info", 124 | # "consumer_token": { 125 | # "id": "/netflix/consumer_token", 126 | # "key": "ccc", 127 | # "secret": "secretccc", 128 | # }, 129 | # "access_token": { 130 | # "id": "/netflix/access_token", 131 | # "key": "aaa", 132 | # "secret": "secretaaa", 133 | # }, 134 | # }, 135 | # { "id": "/netflix/movie_info", 136 | # "consumer_token": { 137 | # "id": "/netflix/consumer_token", 138 | # "key": "ccc", 139 | # "secret": "secretccc", 140 | # }, 141 | # "api_keys": [{ 142 | # "id": "/netflix/affiliate_code", 143 | # "key": "fff" 144 | # }] 145 | # }] 146 | 147 | api_manifest = [] 148 | for api in apis: 149 | api_info = {"id": api["id"]} 150 | api_manifest.append(api_info) 151 | 152 | for special_key in ("consumer_token", "access_token"): 153 | if api.get(special_key): 154 | # map "consumer_token" to "/netflix/consumer_token" 155 | special_key_id = api[special_key]["id"] 156 | 157 | # even if we dont' have the key, include dummy entry 158 | # meaning that the API requires the key 159 | api_info[special_key] = { 160 | "id": special_key_id 161 | } 162 | if special_key_id in foreign_keys: 163 | # key and secret MUST be there 164 | foreign_key = foreign_keys[special_key_id] 165 | api_info[special_key]["key"] = foreign_key["key"] 166 | api_info[special_key]["secret"] = foreign_key["secret"] 167 | 168 | for api_key in api["api_keys"]: 169 | api_key_id = api_key["id"] 170 | 171 | # put a dummy entry in, meaning the API requires/expects 172 | # the key 173 | api_key_info = { 174 | "id": api_key_id, 175 | } 176 | api_info.setdefault("api_keys",[]).append(api_key_info) 177 | 178 | if api_key_id in foreign_keys: 179 | 180 | foreign_key = foreign_keys[api_key_id] 181 | 182 | if foreign_key.get("key"): 183 | api_key_info["key"] = foreign_key["key"] 184 | 185 | if foreign_key.get("secret"): 186 | api_key_info["secret"] = foreign_key["secret"] 187 | 188 | return api_manifest 189 | -------------------------------------------------------------------------------- /api/op.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | from mw.log import LOG 17 | import logging 18 | 19 | from optparse import OptionParser 20 | from ConfigParser import ConfigParser, NoSectionError, NoOptionError 21 | from mw.user.cache import get_user_by_name 22 | 23 | class OP(OptionParser): 24 | def __init__(self, *args, **kws): 25 | usage = kws.get('usage','') 26 | kws['usage'] = "%%prog [-d] [-g HOST:PORT] %s [...]" % usage 27 | OptionParser.__init__(self, *args, **kws) 28 | 29 | config_file = None 30 | if 'ME_SITE_CONFIG' in os.environ: 31 | config_file = os.environ['ME_SITE_CONFIG'] 32 | if not os.path.exists(config_file): 33 | config_file = None 34 | 35 | 36 | if config_file == None: 37 | # default look in me/mwbuild/_site.cfg 38 | config_file = os.path.abspath(os.path.join(os.path.dirname(__file__), 39 | '../../../mwbuild/_site.cfg')) 40 | 41 | # walk up the directory structure, stopping at project.mw4 42 | # (i.e. the root of whatever project we're in) 43 | path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) 44 | config_file = os.path.join(path, "_site.cfg") 45 | 46 | while (not os.path.exists(config_file) and 47 | not os.path.exists(os.path.join(path, "project.mw4"))): 48 | path = os.path.abspath(os.path.join(path, "..")) 49 | config_file = os.path.join(path, "_site.cfg") 50 | 51 | if not os.path.exists(config_file): 52 | config_file = None 53 | 54 | 55 | self.add_option('-c', '--config', dest='config_file', 56 | default=config_file, 57 | help="location of _site.cfg with graph configuration") 58 | 59 | self.add_option('-d', '--debug', dest='debug', 60 | default=False, action='store_true', 61 | help="turn on debugging output") 62 | 63 | self.add_option('-l', '--loglevel', dest='loglevel', 64 | default='WARNING', action='store', 65 | help="set the log level") 66 | self.add_option('-g', '--graph', dest='graphd_addr', 67 | metavar="HOST:PORT", 68 | help="address of graphd in the form host:port") 69 | self.add_option('-b', '--blob', dest='blobd_addr', 70 | metavar="HOST:PORT", 71 | help="address of blobd in the form host:port") 72 | self.add_option('-D', '--define', dest='defines', 73 | default=[], action='append', 74 | help='override other site.cfg options in the form section.entry=value') 75 | self.add_option("-a", "--as_user", dest="as_user", 76 | metavar="/user/USERID", 77 | help="User ID to write with") 78 | 79 | self.add_option("-r", "--relevance", dest="relevance_addr", 80 | metavar="HOST:PORT", 81 | help="host:port of relevance server") 82 | self.add_option("-s", "--geo", dest="geo_addr", 83 | metavar="HOST:PORT", 84 | help="host:port of geo server") 85 | 86 | self.add_option("-T", "--no_timeouts", dest="no_timeouts", 87 | default=False, action='store_true', 88 | help="turn off socket timeouts (off by default)") 89 | 90 | def parse_args(self, *args, **kws): 91 | # this is an all-in-one function. It parses the args, loads the config and creates the session. 92 | # most of the time in simple scripts you don't need any more control than this. 93 | 94 | options, args = self.parse_args_only(*args,**kws) 95 | 96 | config = self.load_config(options) 97 | 98 | self.create_session(config,options) 99 | 100 | return (options, args) 101 | 102 | def parse_args_only(self, *args, **kws): 103 | # this strictly parses the args without loading the config or creating the session 104 | return OptionParser.parse_args(self, *args, **kws) 105 | 106 | def load_config(self,options): 107 | # this loads the configuration file without attempting to connect to any services 108 | 109 | from paste.deploy import appconfig 110 | 111 | config = {} 112 | if options.config_file is not None: 113 | LOG.debug("parse.args", "Trying to open %s" % options.config_file) 114 | try: 115 | config = appconfig("config:%s" % options.config_file) 116 | except LookupError as e: 117 | LOG.debug("parse.args", "Error loading config file, missing paste sections", options.config_file, e) 118 | # fall through 119 | 120 | for k,v in (li.split('=', 1) 121 | for li in options.defines): 122 | config[k] = v 123 | 124 | loglevels = 'EMERG ALERT CRIT ERR WARNING NOTICE INFO DEBUG'.split() 125 | if options.loglevel in loglevels: 126 | LOG.setLevel(logging.getLevelName(options.loglevel)) 127 | else: 128 | self.error('unknown log level %s\n valid log levels are %s' 129 | % (options.loglevel, ', '.join(loglevels))) 130 | sys.exit(1) 131 | 132 | # go through the config file for these options, keeps things 133 | # simple 134 | if options.graphd_addr: 135 | config["graphd.address"] = options.graphd_addr 136 | 137 | if options.blobd_addr: 138 | config["clobd.address"] = options.blobd_addr 139 | config["clobd.masteraddress"] = options.blobd_addr 140 | 141 | if options.relevance_addr: 142 | config["relevance.address"] = options.relevance_addr 143 | 144 | if options.geo_addr: 145 | config["geo.address"] = options.geo_addr 146 | 147 | if options.no_timeouts: 148 | config["debug.no_timeouts"] = options.no_timeouts and 'true' 149 | 150 | self.config = config 151 | return config 152 | 153 | def create_session(self,config,options): 154 | # this opens the connections to services 155 | 156 | from mw.api.service import ServiceContext, Session 157 | self.ctx = ServiceContext() 158 | 159 | self.ctx.load_config(config) 160 | self.ctx.connect() 161 | 162 | self.session = Session(self.ctx) 163 | 164 | # do further configuration of Session 165 | 166 | self.session.finish_init() 167 | 168 | if options.as_user: 169 | if not options.as_user.startswith("/user/"): 170 | raise Exception("User must be in the form /user/USERID") 171 | user_name = options.as_user[len("/user/"):] 172 | self.session.push_variables(user=options.as_user) 173 | self.session._signed_user = get_user_by_name(user_name) 174 | self.session.get_user().validate(self.session) 175 | 176 | return self.session 177 | -------------------------------------------------------------------------------- /mql/pathexpr.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # 16 | # code for dealing with metaweb path expressions 17 | # 18 | # there will be a lot of variants of this language based 19 | # on embedding, it would be nice to have them all abstracted 20 | # out at some point but for now we're still figuring out 21 | # what the differences are. 22 | # 23 | 24 | import sys, os, re 25 | 26 | if __name__ == '__main__': 27 | sys.path.append(os.path.abspath('../..')) 28 | 29 | from pymql.log import LOG 30 | from error import MQLInternalError 31 | 32 | from pymql import json 33 | from pymql.error import EmptyResult, ParameterizedError 34 | 35 | 36 | class JSONResponse(object): 37 | 38 | def __init__(self, **kws): 39 | self.response = { 40 | 'status': '500 Internal Server Error', 41 | 'code': '/api/status/error/server', 42 | 'messages': [] 43 | } 44 | self.extend(**kws) 45 | 46 | def extend(self, **kws): 47 | for k, v in kws.iteritems(): 48 | if k == 'messages': 49 | self.response[k] += v 50 | else: 51 | self.response[k] = v 52 | 53 | def log(self, text, **kws): 54 | kws['message'] = text 55 | self.response['messages'].append(kws) 56 | 57 | 58 | # 59 | # 60 | # snipped from mod_python 3.1.3 apache.py 61 | # 62 | # - modified to format result as a json-like structure. 63 | # 64 | import traceback 65 | 66 | 67 | def json_traceback(response=None, exception=None, **kws): 68 | """ 69 | This function is only used when debugging is on. 70 | It sends the output similar to what you'd see 71 | when using Python interactively to the browser 72 | """ 73 | 74 | debug = 1 75 | etype, evalue, etb = sys.exc_info() 76 | 77 | try: # try/finally 78 | try: # try/except 79 | 80 | if debug and etype is IOError and str(evalue)[:5] == 'Write': 81 | # if this is an IOError while writing to client, 82 | # it is probably better not to try to write to the cleint 83 | # even if debug is on. 84 | LOG.error('json_traceback', 'skipping error write to client') 85 | debug = 0 86 | 87 | # write to log 88 | for e in traceback.format_exception(etype, evalue, etb): 89 | s = '%s' % e[:-1] 90 | LOG.error('json_traceback', s) 91 | 92 | if response is None: 93 | response = JSONResponse( 94 | status='500 Internal Server Error', code='/api/status/error/server') 95 | response.extend(**kws) 96 | 97 | stack = [ 98 | dict(zip('file,line,func,source'.split(','), quad)) 99 | for quad in traceback.extract_tb(etb, None) 100 | ] 101 | 102 | text = '%s: %s' % (etype, evalue) 103 | response.log(text, stack=stack, level='error') 104 | 105 | return response.response 106 | 107 | except Exception, e: 108 | # hit the backstop. must be a bug in the normal exception handling code, 109 | # do something simple. 110 | response = { 111 | 'status': '500 Internal Server Error', 112 | 'messages': [{ 113 | 'level': 'error', 114 | 'text': traceback.format_exc() 115 | }], 116 | } 117 | return response 118 | 119 | finally: 120 | # erase the traceback 121 | etb = None 122 | 123 | 124 | def wrap_query(querier, sq, varenv=None, transaction_id=None): 125 | """ 126 | Run a query with the given querier (usually something like 127 | ctx.low_querier.read) - performing appropriate envelope packing and 128 | unpacking, multiple queries, error handling, etc 129 | """ 130 | 131 | LOG.error( 132 | 'deprecated', 133 | 'mw.mql.pathexpr.wrap_query() is DEPRECATED and will go away soon!') 134 | 135 | if isinstance(sq, basestring): 136 | # convert to json query 137 | try: 138 | # XXX should eventually use unicode, for now utf8 139 | sq = json.loads(sq, encoding='utf-8', result_encoding='utf-8') 140 | 141 | except ValueError, e: 142 | # debug ME-907 143 | LOG.exception('mql.pathexpr.wrap_query()', sq=sq, varenv=varenv) 144 | 145 | SIMPLEJSON_ERR_RE = re.compile('^(.+): line (\d+) column (\d+)') 146 | m = SIMPLEJSON_ERR_RE.match(str(e)) 147 | if not m: 148 | raise 149 | response = JSONResponse( 150 | status='400 Bad Request', code='/api/status/error/request') 151 | text = 'json parse error: ' + m.group(1) 152 | response.log( 153 | text, line=int(m.group(2)), column=int(m.group(3)), level='error') 154 | return response.response 155 | 156 | except Exception, e: 157 | return json_traceback( 158 | exception=e, 159 | status='400 Bad Request', 160 | code='/api/status/error/request') 161 | 162 | if not isinstance(sq, dict): 163 | response = JSONResponse( 164 | status='400 Bad Request', code='/api/status/error/request') 165 | text = 'json type error: query was not a dictionary' 166 | response.log(text, level='error') 167 | return response.response 168 | 169 | if varenv is None: 170 | varenv = {} 171 | 172 | # backwards compatibility until we remove the transaction_id parameter 173 | if 'tid' not in varenv: 174 | varenv['tid'] = transaction_id 175 | 176 | if 'cursor' in sq: 177 | varenv['cursor'] = sq['cursor'] 178 | 179 | try: 180 | # should be JSONResponse(query=sq['query']) 'queries' to match 181 | # envelope spec 182 | response = JSONResponse(query=sq) 183 | results = {} 184 | 185 | # filter out these special keys for now - eventually some of 186 | # these will be filled in by the caller but only if we trust 187 | # them! 188 | reserved_names = ('request_id', 'cost', 'lang', 'transaction_id', 189 | 'permission', 'cursor', 'user') 190 | 191 | valid_queries = ( 192 | (k, v) for k, v in sq.iteritems() if k not in reserved_names) 193 | 194 | # make sure to copy the request_id 195 | if 'request_id' in sq: 196 | response['request_id'] = sq['request_id'] 197 | 198 | # should only looking either at sq['query'] for a single query or 199 | # sq['queries'] for multiple queries 200 | for id, subq in valid_queries: 201 | # assuming querier is a bound method here.. 202 | LOG.notice( 203 | 'Query', 204 | '%s.%s' % (querier.im_class.__name__, querier.__name__), 205 | subq=subq) 206 | try: 207 | results[id] = querier(subq, varenv) 208 | 209 | response.extend(status='200 OK') 210 | 211 | except EmptyResult, e: 212 | LOG.info('emptyresult', '%s' % e) 213 | response.log('empty result for query %s' % subq) 214 | result = None 215 | 216 | # exceptions should be packed into response['error'] 217 | except ParameterizedError, e: 218 | if isinstance(e, MQLInternalError): 219 | response.extend(status='500 Internal Server Error') 220 | else: 221 | response.extend(status='400 Bad Request') 222 | 223 | tb = json_traceback(response=response, exception=e) 224 | response.log('parse exception: %s' % e, level='error') 225 | result = None 226 | except Exception, e: 227 | LOG.exception('python.exception') 228 | tb = json_traceback(response=response, exception=e) 229 | return tb 230 | 231 | response.extend(result=results) 232 | if 'cursor' in varenv: 233 | response.extend(cursor=varenv['cursor']) 234 | 235 | return response.response 236 | 237 | except Exception, e: 238 | LOG.exception('python.exception') 239 | return json_traceback(response=response, exception=e) 240 | -------------------------------------------------------------------------------- /util/pattern.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import re, zlib 17 | from urlparse import urlparse 18 | 19 | RE_KEY = re.compile('\$([0-9A-F][0-9A-F][0-9A-F][0-9A-F])') 20 | RE_VARS = re.compile('{([^}]+)}') 21 | RE_NS = re.compile('([^]]+)\[([^]]+)\]') 22 | 23 | 24 | class Pattern(object): 25 | 26 | def __init__(self, pattern, guid=None, key=None, error=None): 27 | 28 | self.pattern = pattern 29 | self.guid = guid 30 | self.vars = dict((var, None) for var in RE_VARS.findall(pattern)) 31 | self.error = error 32 | if 'key' in self.vars: 33 | self.vars['key'] = self.decode_key(key) 34 | 35 | # returns a utf-8 encoded string of the pattern with variables 36 | # whose value is not None expanded. 37 | # if error is not None, the entire pattern is replaced with error 38 | # when a None variable value is encountered 39 | def __str__(self): 40 | 41 | string = self.pattern 42 | error = self.error 43 | 44 | if isinstance(string, unicode): 45 | for var, value in self.vars.iteritems(): 46 | if value is not None: 47 | if isinstance(value, str): 48 | value = unicode(value, 'utf-8') 49 | elif not isinstance(value, unicode): 50 | value = unicode(value) 51 | string = string.replace(u'{%s}' %(var), value) 52 | elif error is not None: 53 | if isinstance(error, str): 54 | string = unicode(error, 'utf-8') 55 | elif not isinstance(error, unicode): 56 | string = unicode(error) 57 | else: 58 | string = error 59 | break 60 | string = string.encode('utf-8') 61 | else: 62 | for var, value in self.vars.iteritems(): 63 | if value is not None: 64 | if isinstance(value, unicode): 65 | value = value.encode('utf-8') 66 | elif not isinstance(value, str): 67 | value = str(value) 68 | string = string.replace('{%s}' %(var), value) 69 | elif error is not None: 70 | if isinstance(error, unicode): 71 | string = error.encode('utf-8') 72 | elif not isinstance(error, str): 73 | string = str(error) 74 | else: 75 | string = error 76 | break 77 | 78 | return string 79 | 80 | # returns a unicode string of the pattern with variables 81 | # whose value is not None expanded. 82 | # if error is not None, the entire pattern is replaced with error 83 | # when a None variable value is encountered 84 | def __unicode__(self): 85 | 86 | string = self.pattern 87 | error = self.error 88 | 89 | if isinstance(string, unicode): 90 | for var, value in self.vars.iteritems(): 91 | if value is not None: 92 | if isinstance(value, str): 93 | value = unicode(value, 'utf-8') 94 | elif not isinstance(value, unicode): 95 | value = unicode(value) 96 | string = string.replace(u'{%s}' %(var), value) 97 | elif error is not None: 98 | if isinstance(error, str): 99 | string = unicode(error, 'utf-8') 100 | elif not isinstance(error, unicode): 101 | string = unicode(error) 102 | else: 103 | string = error 104 | break 105 | else: 106 | for var, value in self.vars.iteritems(): 107 | if value is not None: 108 | if isinstance(value, unicode): 109 | value = value.encode('utf-8') 110 | elif not isinstance(value, str): 111 | value = str(value) 112 | string = string.replace('{%s}' %(var), value) 113 | elif error is not None: 114 | if isinstance(error, unicode): 115 | string = error.encode('utf-8') 116 | elif not isinstance(error, str): 117 | string = str(error) 118 | else: 119 | string = error 120 | break 121 | string = unicode(string, 'utf-8') 122 | 123 | return string 124 | 125 | def decode_key(self, key): 126 | 127 | value = key 128 | if value is not None: 129 | value = RE_KEY.sub('\\u\\1', value) 130 | if value is not key: 131 | value = value.decode('unicode-escape').encode('utf-8') 132 | 133 | return value 134 | 135 | def _prop_name(self, prefix, var, prop): 136 | 137 | # use adler32 as it's shorter than hash on 64-bit and just as fast 138 | return "%s_%x:%s" %(prefix or "p", zlib.adler32(var) & 0xffffffff, prop) 139 | 140 | def mql_query(self, prefix=None): 141 | 142 | query = {} 143 | for var, value in self.vars.iteritems(): 144 | if var != 'key' and value is None: 145 | _query = prev = query 146 | for prop in var.split('.'): 147 | nsprop = RE_NS.search(prop) 148 | if nsprop is not None: 149 | prop, ns = nsprop.groups() 150 | prop = self._prop_name(prefix, var, prop) 151 | _query[prop] = { 152 | "key": [{ 153 | "limit": 1, "namespace": ns, "value": None 154 | }] 155 | } 156 | break 157 | else: 158 | prop = self._prop_name(prefix, var, prop) 159 | _query[prop] = [{"limit": 1}] 160 | prev = _query 161 | _query = _query[prop][0] 162 | else: 163 | # last prop is assumed to be prop: null compatible 164 | # so that name or literal queries require no hacks 165 | prev[prop] = None 166 | 167 | if query: 168 | query["guid"] = self.guid 169 | 170 | return query 171 | 172 | def set_key(self, key): 173 | 174 | if 'key' in self.vars: 175 | self.vars['key'] = self.decode_key(key) 176 | 177 | return self 178 | 179 | def set_mqlres(self, mqlres, prefix=None, clear=False): 180 | 181 | if clear: 182 | for var in self.vars.iterkeys(): 183 | if var != 'key': 184 | self.vars[var] = None 185 | 186 | for var, value in self.vars.iteritems(): 187 | if var != 'key' and value is None: 188 | value = mqlres 189 | for prop in var.split('.'): 190 | nsprop = RE_NS.search(prop) 191 | try: 192 | if nsprop is not None: 193 | prop, ns = nsprop.groups() 194 | prop = self._prop_name(prefix, var, prop) 195 | value = value[prop]['key'][0]['value'] 196 | break 197 | else: 198 | prop = self._prop_name(prefix, var, prop) 199 | value = value[prop] 200 | if isinstance(value, list): 201 | value = value[0] 202 | except: 203 | value = None 204 | break 205 | 206 | self.vars[var] = value 207 | 208 | return self 209 | 210 | def set_uri(self, uri): 211 | 212 | vars = self.vars 213 | (vars['scheme'], vars['host'], vars['path'], x, 214 | vars['query'], vars['fragment']) = urlparse(uri) 215 | 216 | return self 217 | -------------------------------------------------------------------------------- /mql/benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os, sys, re 16 | if __name__ == "__main__": 17 | sys.path.append(os.path.abspath("../..")) 18 | 19 | from pymql.log import LOG 20 | from pymql import json 21 | import time 22 | 23 | try: 24 | import cProfile 25 | profiler = "cProfile" 26 | except ImportError, e: 27 | import hotshot 28 | profiler = "hotshot" 29 | 30 | 31 | def wildcard_mql_query(): 32 | return [{"id": None, "*": None}] 33 | 34 | 35 | def get_all_domains_mql_query(): 36 | return [{ 37 | "id": 38 | None, 39 | "name": 40 | None, 41 | "type": 42 | "/type/domain", 43 | "key": { 44 | "value": None, 45 | "namespace": "/" 46 | }, 47 | "/type/namespace/keys": [{ 48 | "value": None, 49 | "type": None, 50 | "namespace": { 51 | "type": 52 | "/type/type", 53 | "name": 54 | None, 55 | "id": 56 | None, 57 | "domain": 58 | None, 59 | "/type/namespace/keys": [{ 60 | "value": None, 61 | "type": None, 62 | "namespace": { 63 | "type": "/type/property", 64 | "unique": None, 65 | "id": None, 66 | "schema": None, 67 | "expected_type": None, 68 | "master_property": None, 69 | "name": None, 70 | "reverse_property": [] 71 | } 72 | }] 73 | } 74 | }] 75 | }] 76 | 77 | 78 | def get_domain_mql_query(): 79 | return { 80 | "id": 81 | "/type", 82 | "name": 83 | None, 84 | "type": 85 | "/type/domain", 86 | "/type/namespace/keys": [{ 87 | "value": None, 88 | "type": None, 89 | "namespace": { 90 | "type": 91 | "/type/type", 92 | "name": 93 | None, 94 | "id": 95 | None, 96 | "domain": 97 | None, 98 | "/type/namespace/keys": [{ 99 | "value": None, 100 | "type": None, 101 | "namespace": { 102 | "type": "/type/property", 103 | "unique": None, 104 | "id": None, 105 | "schema": None, 106 | "expected_type": None, 107 | "master_property": None, 108 | "name": None, 109 | "reverse_property": [] 110 | } 111 | }] 112 | } 113 | }] 114 | } 115 | 116 | 117 | def get_type_mql_query(): 118 | return { 119 | "type": [], 120 | "name": 121 | None, 122 | "id": 123 | "/type/object", 124 | "/type/type/domain": 125 | None, 126 | "/type/namespace/keys": [{ 127 | "value": None, 128 | "type": None, 129 | "namespace": { 130 | "type": "/type/property", 131 | "unique": None, 132 | "id": None, 133 | "schema": None, 134 | "expected_type": None, 135 | "master_property": None, 136 | "name": None, 137 | "reverse_property": [] 138 | } 139 | }] 140 | } 141 | 142 | 143 | def get_schema_query(guid): 144 | return { 145 | "@guid": 146 | guid, 147 | "is_instance_of": { 148 | "@id": "/type/type" 149 | }, 150 | "uses_properties_from": { 151 | "@guid": None, 152 | ":optional": True 153 | }, 154 | "has_default_property_name": { 155 | ":value": None, 156 | ":optional": True 157 | }, 158 | "has_key": [{ 159 | ":optional": True, 160 | "@guid": None, 161 | ":value": None, 162 | "has_schema": { 163 | "@guid": None, 164 | }, 165 | "has_expected_concept_type": { 166 | ":optional": True, 167 | "@guid": None 168 | }, 169 | "has_master_property": { 170 | ":optional": True, 171 | "@guid": None, 172 | "is_unique_property": { 173 | ":value": None, 174 | ":datatype": "boolean", 175 | ":optional": True 176 | } 177 | }, 178 | "is_unique_property": { 179 | ":value": None, 180 | ":datatype": "boolean", 181 | ":optional": True 182 | }, 183 | "is_instance_of": { 184 | "@id": "/type/property" 185 | } 186 | }] 187 | } 188 | 189 | 190 | def get_object_query(): 191 | q = get_schema_query(None) 192 | q["@id"] = "/type/object" 193 | return q 194 | 195 | 196 | def get_domain_query(): 197 | ns_query = { 198 | "@id": "/type", 199 | "is_instance_of": { 200 | "@id": "/type/domain" 201 | }, 202 | "has_key": [get_schema_query(None)] 203 | } 204 | ns_query["has_key"][0][":value"] = None 205 | ns_query["has_key"][0]["has_domain"] = {"@id": "/type"} 206 | return ns_query 207 | 208 | 209 | def get_wildcard_query(): 210 | return [{ 211 | "@guid": None, 212 | "*": [{ 213 | "@guid": None, 214 | ":guid": None, 215 | ":value": None, 216 | ":optional": True 217 | }] 218 | }] 219 | 220 | 221 | def test_run(ctx, varenv, options, query): 222 | graphq = ctx.gc 223 | ctx.gc.reset_cost() 224 | 225 | #ctx.gc.reopen() 226 | result = None 227 | 228 | start_time = time.time() 229 | 230 | for i in xrange(options.num): 231 | if options.flush: 232 | ctx.high_querier.schema_factory.flush("") 233 | 234 | if options.type == "graph": 235 | result = ctx.gc.read( 236 | query, transaction_id=varenv["tid"], policy=varenv["policy"]) 237 | else: 238 | result = ctx.high_querier.read(query, varenv) 239 | 240 | stop_time = time.time() 241 | 242 | ctx.gc.totalcost["dt"] = stop_time - start_time 243 | 244 | return result 245 | 246 | 247 | def cmdline_main(): 248 | LOG.warning("benchmark", "test start") 249 | start_time = time.time() 250 | 251 | from mql.mql import cmdline 252 | op = cmdline.OP("testing") 253 | 254 | op.add_option( 255 | "-n", dest="num", default=1000, type="int", help="number of iterations") 256 | 257 | op.add_option( 258 | "-P", 259 | dest="profile", 260 | default=None, 261 | help="run profiler with output to file") 262 | 263 | op.add_option("-c", dest="call", default=None, help="function to call") 264 | 265 | op.add_option( 266 | "-f", dest="query_file", default=None, help="file containing query") 267 | 268 | op.add_option( 269 | "--flush", 270 | dest="flush", 271 | default=None, 272 | help="flush cache between every request") 273 | 274 | op.add_option("-t", dest="type", default="mql", help="graph or MQL query") 275 | 276 | options, args = op.parse_args() 277 | 278 | stop_time = time.time() 279 | op.ctx.gc.totalcost["dt"] = stop_time - start_time 280 | 281 | LOG.warning("start cost", { 282 | "nreqs": op.ctx.gc.nrequests, 283 | "cost": op.ctx.gc.totalcost 284 | }) 285 | 286 | options, args = op.parse_args() 287 | 288 | queryfile = options.query_file 289 | if queryfile is not None: 290 | qf = open(queryfile, "r") 291 | query = "".join(qf.readlines()) 292 | regex = re.compile("[\n\t]+") 293 | query = regex.sub(" ", query) 294 | qf.close() 295 | elif options.call: 296 | query = globals()[options.call]() 297 | elif len(args) == 1: 298 | query = args[0] 299 | else: 300 | op.error("Must specify a query argument") 301 | 302 | if options.type == "mql": 303 | # XXX should eventually use unicode, for now utf8 304 | query = json.loads(query, encoding="utf-8", result_encoding="utf-8") 305 | elif options.type == "graph": 306 | pass 307 | else: 308 | op.error("-t must be 'mql' or 'graph'") 309 | 310 | if options.profile: 311 | if profiler == "hotshot": 312 | profile = hotshot.Profile(options.profile) 313 | profile.runcall(test_run, op.ctx, op.varenv, options, query) 314 | LOG.warning( 315 | "benchmark", 316 | "Saving hotshot profile in Stats format to %s" % options.profile) 317 | 318 | elif profiler == "cProfile": 319 | profile = cProfile.Profile() 320 | profile.runcall(test_run, op.ctx, op.varenv, options, query) 321 | 322 | LOG.warning( 323 | "benchmark", 324 | "Saving cProfile data in kcachegrind format to %s" % options.profile) 325 | # get from http://jcalderone.livejournal.com/21124.html 326 | # and put in thirdparty/pyroot 327 | from mql.mql import lsprofcalltree 328 | k = lsprofcalltree.KCacheGrind(profile) 329 | k.output(open(options.profile, "w")) 330 | else: 331 | LOG.warning("benchmark", "No profiler available, not running benchmark") 332 | else: 333 | reslist = test_run(op.ctx, op.varenv, options, query) 334 | 335 | LOG.warning("run cost", { 336 | "nreqs": op.ctx.gc.nrequests, 337 | "cost": op.ctx.gc.totalcost 338 | }) 339 | #print repr(reslist[0]) 340 | #pprint.pprint(reslist) 341 | 342 | #LOG.warning("benchmark", "test finish") 343 | 344 | 345 | if __name__ == "__main__": 346 | cmdline_main() 347 | -------------------------------------------------------------------------------- /mql/grparse.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """quick and dirty parsing of graphd query language strings into python lists. 15 | """ 16 | import re 17 | from grquoting import quote, unquote 18 | 19 | from error import MQLGraphError, MQLDatelineInvalidError, MQLTimeoutError,\ 20 | MQLCursorInvalidError, GraphIsSnapshottingError 21 | 22 | from pymql.log import LOG 23 | 24 | gstr_escape = quote 25 | 26 | # there are several places in pymi where gstr_unescape is called on a string that 27 | # is not escaped. One example is the result of result=(datatype) - a bareword 28 | # like boolean is returned, not a quoted string. 29 | # 30 | # Rather than try and fix these cases, I've made gstr_unescape preserve 31 | # this behaviour. Please use mw.mql.grquoting.unquote() instead. 32 | 33 | 34 | def gstr_unescape(string): 35 | if string[0] == '"': 36 | return unquote(string) 37 | else: 38 | return string 39 | 40 | 41 | cost_parameters = [ 42 | ('tr', 'time/real', 43 | 'number of milliseconds graphd spent executing to answer this query in ' 44 | 'general. This number will get larger on a system that is busy with other' 45 | ' things, even if graphd isn\'t involved in them.' 46 | ), 47 | ('tu', 'time/user', 48 | 'number of milliseconds graphd spent executing in user mode while ' 49 | 'computing the answer to this request.' 50 | ), 51 | ('ts', 'time/system', 52 | 'number of milliseconds graphd spent executing in system mode while ' 53 | 'computing the answer to this requests. "Executing in system mode" almost' 54 | ' always means "reading a lot of data from disk".' 55 | ), 56 | ('pr', 'page reclaims', 57 | 'a benevolent form of page fault that doesn\'t actually do any work ' 58 | 'because the page is still in the local cache.' 59 | ), 60 | ('pf', 'page faults', 61 | 'the thing we\'re trying to minimize. Higher pf will usually be ' 62 | 'accompanied by a higher ts.' 63 | ), 64 | ('dw', 'primitive data writes', 65 | 'Usually, these will be what you expect, except for queries that create ' 66 | 'implicit type links and type system fragments.' 67 | ), 68 | ('dr', 'primitive data reads', 69 | 'how many single primitive structs were read from disk (for example, as ' 70 | 'part of dismissing them as candiates for a qualified search).' 71 | ), 72 | ('in', 'index size reads', 73 | 'how many indices were looked up with their starting address and size.'), 74 | ('ir', 'index element reads', 'get one member of one index.'), 75 | ('iw', 'index element write', 'add an element to an index.'), 76 | ('va', 'value allocation', 77 | 'allocate a (possibly temporary or transient) result data structure.'), 78 | ('te', 'time/overall', 79 | 'number of milliseconds from receipt of this query by the graph, to the ' 80 | 'start of sending the response' 81 | ), 82 | ('tg', 'time/graph', 83 | 'time me observes from sending the first byte of the request to receiving' 84 | ' the last byte' 85 | ), 86 | ('tf', 'time/formatted', 87 | 'time me takes from sending the request to handing off the formatted ' 88 | 'response' 89 | ), ('tm', 'time/mql', 'time taken inside the MQL subroutines'), 90 | ('cr', 'cache/read', 'number of requests sent to memcache'), 91 | ('cm', 'cache/miss', 'number of memcache misses'), 92 | ('ch', 'cache/hit', 'number of memcache hits'), 93 | ('lr', 'lojson-cache/read', 'number of schema requests sent to memcache'), 94 | ('lm', 'lojson-cache/miss', 'number of schema memcache misses'), 95 | ('lh', 'lojson-cache/hit', 'number of schema memcache hits'), 96 | ('rt', 'relevance/time', 97 | 'time taken inside the relevance server (as measured by ME)'), 98 | ('gcr', 'graph connect retries', 99 | 'the number of times that ME tried to open a connection to a graph'), 100 | ('gqr', 'graph query retries', 101 | 'the number of times that ME tried to service a query from a single graph') 102 | ] 103 | 104 | costcode_dict = dict([(cc[0], (cc[1], cc[2])) for cc in cost_parameters]) 105 | 106 | costitem_re = re.compile(r'([a-zA-Z]+)=(\d+)\s*') 107 | 108 | 109 | def coststr_to_dict(coststr): 110 | if not coststr: 111 | return None 112 | matches = costitem_re.findall(coststr) 113 | return dict([(k, int(v)) for k, v in matches]) 114 | 115 | 116 | graphresult_re = re.compile( 117 | r'(\(|\)| |\-\>|\<\-|[a-z]+\=|[\-\:\._A-Za-z0-9]+|\"(?:[^\"\\]|\\[\\\"n])*\")' 118 | ) 119 | 120 | 121 | class GraphResult(list): 122 | pass 123 | 124 | 125 | class ReplyParser: 126 | """ 127 | parses a graphd reply char by char. 128 | paren lists are broken up into python lists 129 | all list elements are returned as strings 130 | """ 131 | 132 | def __init__(self): 133 | self.inbuf = [] 134 | self.replyqueue = [] 135 | 136 | self.reset_parser() 137 | 138 | def reset_parser(self): 139 | # parser state 140 | 141 | self.instring = 0 # true if we have read an open " but no close 142 | self.escaped = 0 # true if we just read a backslash 143 | # if instring is 1, curstr is a list of characters that 144 | # will be joined to make the string 145 | self.curstr = [] 146 | self.curreply = [] # list of strings - join when ready to 147 | # use (faster than string concat) 148 | 149 | def parsestr(self, s): 150 | if '\n' in s: 151 | # parse all of the 'completed' lines, and if there is an 152 | # uncompleted line at the end of s, leave it in curreply 153 | 154 | reply_list = s.split('\n') 155 | 156 | self.curreply.append(reply_list.pop(0)) 157 | 158 | for reply in reply_list: 159 | 160 | # parse the previous reply 161 | replystr = ''.join(self.curreply) 162 | self.parse_full_reply(replystr) 163 | self.reset_parser() 164 | 165 | # now add the current line 166 | self.curreply.append(reply) 167 | 168 | # note that we're not processing the last line, because it is incomplete 169 | 170 | else: 171 | self.curreply.append(s) 172 | 173 | def parse_full_reply(self, replystr): 174 | """ 175 | parse the given reply string from the graph into a bunch of 176 | nested lists of tokens. Results are in the form: 177 | [ 'ok', 'id=', '"me;..."', [[['010000..', '01...', ...]]]] 178 | """ 179 | LOG.debug('graph.result', replystr) 180 | token_list = graphresult_re.findall(replystr) 181 | 182 | curlist = [] 183 | 184 | stack = [] 185 | push_state = stack.append 186 | pop_state = stack.pop 187 | 188 | for count, tok in enumerate(token_list): 189 | if tok == '(': 190 | push_state(curlist) 191 | curlist = [] 192 | elif tok == ')': 193 | sublist = curlist 194 | curlist = pop_state() 195 | curlist.append(sublist) 196 | elif tok == '\n': 197 | raise MQLGraphError( 198 | None, 199 | 'Not allowed a newline in parse_full_reply', 200 | reply=replystr, 201 | tokens=token_list) 202 | elif tok == ' ' or tok == '': 203 | pass 204 | else: 205 | curlist.append(tok) 206 | 207 | LOG.debug('graph.result.parsed', 'Parsed %d tokens' % count) 208 | if len(stack) != 0: 209 | raise MQLGraphError( 210 | None, 211 | 'got linefeed in the middle of a reply?', 212 | reply=replystr, 213 | tokens=token_list, 214 | depth=len(stack)) 215 | 216 | self.replyqueue.append(curlist) 217 | 218 | def get_reply_raw(self): 219 | return self.replyqueue.pop(0) 220 | 221 | def get_reply(self): 222 | l = self.get_reply_raw() 223 | result = GraphResult() 224 | result.status = l.pop(0) 225 | result.cost = None 226 | result.dateline = None 227 | 228 | if result.status == 'ok': 229 | result += l.pop() 230 | elif result.status == 'error': 231 | result.errcode = l.pop(0) 232 | result.errmsg = unquote(l.pop()) 233 | else: 234 | raise MQLGraphError( 235 | None, 'grparse: unknown graphd reply type', header=l[0], reply=l) 236 | 237 | # what's left is info messages from graphd 238 | li = 0 239 | while li < len(l): 240 | rv = l[li] 241 | if type(rv) == str and rv in ('cost=', 'dateline=', 'id='): 242 | modifier = rv[:-1] 243 | setattr(result, modifier, unquote(l[li + 1])) 244 | li += 2 245 | else: 246 | raise MQLGraphError( 247 | None, 248 | 'unknown response modifier from graphd', 249 | header=l[li], 250 | reply=l) 251 | 252 | if result.status == 'error' and result.errcode == 'BADCURSOR': 253 | raise MQLCursorInvalidError(None, result.errmsg) 254 | if result.status == 'error' and result.errcode == 'DATELINE': 255 | raise MQLDatelineInvalidError(None, result.errmsg) 256 | if result.status == 'error' and result.errcode == 'AGAIN': 257 | raise GraphIsSnapshottingError(None, result.errmsg) 258 | if result.status == 'error' and result.errcode == 'COST': 259 | raise MQLTimeoutError(None, 'Query too difficult.', cost=result.cost) 260 | if result.status == 'error' and result.errcode != 'EMPTY': 261 | raise MQLGraphError( 262 | None, 263 | 'error %(subclass)s: %(detail)s', 264 | detail=result.errmsg, 265 | subclass=result.errcode, 266 | dateline=result.dateline) 267 | return result 268 | 269 | def put_buf(self, buf): 270 | self.inbuf.append(buf) 271 | 272 | def isready(self): 273 | return len(self.replyqueue) > 0 274 | 275 | 276 | # this is different from a normal list printer because it 277 | # assumes that any sublists will come at the end. 278 | # of course that's wrong. hmmph. 279 | def print_result(l, indent=''): 280 | if l is None: 281 | print indent + 'None' 282 | return 283 | #print type(l) 284 | if isinstance(l, list): 285 | dangle = 0 286 | for li in l: 287 | if isinstance(li, list): 288 | if dangle: 289 | print 290 | dangle = 0 291 | print_result(li, indent + ' ') 292 | else: 293 | if not dangle: 294 | print indent, 295 | dangle = 1 296 | print str(li), 297 | if dangle: 298 | print 299 | --------------------------------------------------------------------------------