tag with class "links_main". Links in
924 | # abstract are ignored as they are available within
tag.
925 | #
926 | #
927 | #
928 | #
929 | #
930 | # - Each title looks like
931 | #
932 | #
945 |
946 | def __init__(self, offset=0):
947 | html.parser.HTMLParser.__init__(self)
948 |
949 | self.title = ''
950 | self.url = ''
951 | self.abstract = ''
952 | self.filetype = ''
953 |
954 | self.results = []
955 | self.index = offset
956 | self.textbuf = ''
957 | self.click_result = ''
958 | self.tag_annotations = {}
959 | self.np_prev_button = ''
960 | self.np_next_button = ''
961 | self.npfound = False # First next params found
962 | self.set_handlers_to('root')
963 | self.vqd = '' # vqd returned from button, required for search query to get next set of results
964 |
965 | # Tag handlers
966 |
967 | @annotate_tag
968 | def root_start(self, tag, attrs):
969 | if tag == 'div':
970 | if 'zci__result' in self.classes(attrs):
971 | self.start_populating_textbuf()
972 | return 'click_result'
973 |
974 | if 'links_main' in self.classes(attrs):
975 | # Initialize result field registers
976 | self.title = ''
977 | self.url = ''
978 | self.abstract = ''
979 | self.filetype = ''
980 |
981 | self.set_handlers_to('result')
982 | return 'result'
983 |
984 | if 'nav-link' in self.classes(attrs):
985 | self.set_handlers_to('input')
986 | return 'input'
987 | return ''
988 |
989 | @retrieve_tag_annotation
990 | def root_end(self, tag, annotation):
991 | if annotation == 'click_result':
992 | self.stop_populating_textbuf()
993 | self.click_result = self.pop_textbuf()
994 | self.set_handlers_to('root')
995 |
996 | @annotate_tag
997 | def result_start(self, tag, attrs):
998 | if tag == 'h2' and 'result__title' in self.classes(attrs):
999 | self.set_handlers_to('title')
1000 | return 'title'
1001 |
1002 | if tag == 'a' and 'result__snippet' in self.classes(attrs) and 'href' in attrs:
1003 | self.start_populating_textbuf()
1004 | return 'abstract'
1005 |
1006 | return ''
1007 |
1008 | @retrieve_tag_annotation
1009 | def result_end(self, tag, annotation):
1010 | if annotation == 'abstract':
1011 | self.stop_populating_textbuf()
1012 | self.abstract = self.pop_textbuf()
1013 | elif annotation == 'result':
1014 | if self.url:
1015 | self.index += 1
1016 | result = Result(self.index, self.title, self.url, self.abstract, None)
1017 | self.results.append(result)
1018 | self.set_handlers_to('root')
1019 |
1020 | @annotate_tag
1021 | def title_start(self, tag, attrs):
1022 | if tag == 'span':
1023 | # Print a space after the filetype indicator
1024 | self.start_populating_textbuf(lambda text: '[' + text + ']')
1025 | return 'title_filetype'
1026 |
1027 | if tag == 'a' and 'href' in attrs:
1028 | # Skip 'News for', 'Images for' search links
1029 | if attrs['href'].startswith('/search'):
1030 | return ''
1031 |
1032 | self.url = attrs['href']
1033 | try:
1034 | start = self.url.index('?q=') + len('?q=')
1035 | end = self.url.index('&sa=', start)
1036 | self.url = urllib.parse.unquote_plus(self.url[start:end])
1037 | except ValueError:
1038 | pass
1039 | self.start_populating_textbuf()
1040 | return 'title_link'
1041 |
1042 | return ''
1043 |
1044 | @retrieve_tag_annotation
1045 | def title_end(self, tag, annotation):
1046 | if annotation == 'title_filetype':
1047 | self.stop_populating_textbuf()
1048 | self.filetype = self.pop_textbuf()
1049 | self.start_populating_textbuf()
1050 | elif annotation == 'title_link':
1051 | self.stop_populating_textbuf()
1052 | self.title = self.pop_textbuf()
1053 | if self.filetype != '':
1054 | self.title = self.filetype + self.title
1055 | elif annotation == 'title':
1056 | self.set_handlers_to('result')
1057 |
1058 | @annotate_tag
1059 | def abstract_start(self, tag, attrs):
1060 | if tag == 'span' and 'st' in self.classes(attrs):
1061 | self.start_populating_textbuf()
1062 | return 'abstract_text'
1063 | return ''
1064 |
1065 | @retrieve_tag_annotation
1066 | def abstract_end(self, tag, annotation):
1067 | if annotation == 'abstract_text':
1068 | self.stop_populating_textbuf()
1069 | self.abstract = self.pop_textbuf()
1070 | elif annotation == 'abstract':
1071 | self.set_handlers_to('result')
1072 |
1073 | @annotate_tag
1074 | def input_start(self, tag, attrs):
1075 | if tag == 'input' and 'name' in attrs:
1076 | if attrs['name'] == 'nextParams':
1077 | # The previous button always shows before next button
1078 | # If there's only 1 button (page 1), it's the next button
1079 | if self.npfound is True:
1080 | self.np_prev_button = self.np_next_button
1081 | else:
1082 | self.npfound = True
1083 |
1084 | self.np_next_button = attrs['value']
1085 | return
1086 | if attrs['name'] == 'vqd' and attrs['value'] != '':
1087 | # vqd required to be passed for next/previous search page
1088 | self.vqd = attrs['value']
1089 | return
1090 |
1091 | @retrieve_tag_annotation
1092 | def input_end(self, tag, annotation):
1093 | return
1094 |
1095 | # Generic methods
1096 |
1097 | # Set handle_starttag to SCOPE_start, and handle_endtag to SCOPE_end.
1098 | def set_handlers_to(self, scope):
1099 | self.handle_starttag = getattr(self, scope + '_start')
1100 | self.handle_endtag = getattr(self, scope + '_end')
1101 |
1102 | def insert_annotation(self, tag, annotation):
1103 | if tag not in self.tag_annotations:
1104 | self.tag_annotations[tag] = []
1105 | self.tag_annotations[tag].append(annotation)
1106 |
1107 | def start_populating_textbuf(self, data_transformer=None):
1108 | if data_transformer is None:
1109 | # Record data verbatim
1110 | self.handle_data = self.record_data
1111 | else:
1112 | def record_transformed_data(data):
1113 | self.textbuf += data_transformer(data)
1114 |
1115 | self.handle_data = record_transformed_data
1116 |
1117 | self.handle_entityref = self.record_entityref
1118 | self.handle_charref = self.record_charref
1119 |
1120 | def pop_textbuf(self):
1121 | text = self.textbuf
1122 | self.textbuf = ''
1123 | return text
1124 |
1125 | def stop_populating_textbuf(self):
1126 | self.handle_data = lambda data: None
1127 | self.handle_entityref = lambda ref: None
1128 | self.handle_charref = lambda ref: None
1129 |
1130 | def record_data(self, data):
1131 | self.textbuf += data
1132 |
1133 | def record_entityref(self, ref):
1134 | try:
1135 | self.textbuf += chr(html.entities.name2codepoint[ref])
1136 | except KeyError:
1137 | # Entity name not found; most likely rather sloppy HTML
1138 | # where a literal ampersand is not escaped; For instance,
1139 | # containing the following tag
1140 | #
1141 | #
expected market return s&p 500
1142 | #
1143 | # where &p is interpreted by HTMLParser as an entity (this
1144 | # behaviour seems to be specific to Python 2.7).
1145 | self.textbuf += '&' + ref
1146 |
1147 | def record_charref(self, ref):
1148 | if ref.startswith('x'):
1149 | char = chr(int(ref[1:], 16))
1150 | else:
1151 | char = chr(int(ref))
1152 | self.textbuf += char
1153 |
1154 | @staticmethod
1155 | def classes(attrs):
1156 | """Get tag's classes from its attribute dict."""
1157 | return attrs.get('class', '').split()
1158 |
1159 | def error(self, message):
1160 | raise NotImplementedError("subclasses of ParserBase must override error()")
1161 |
1162 |
1163 | Colors = collections.namedtuple('Colors', 'index, title, url, metadata, abstract, prompt, reset')
1164 |
1165 |
1166 | class Result:
1167 | """
1168 | Container for one search result, with output helpers.
1169 |
1170 | Parameters
1171 | ----------
1172 | index : int or str
1173 | title : str
1174 | url : str
1175 | abstract : str
1176 | metadata : str, optional
1177 | Only applicable to DuckDuckGo News results, with publisher name and
1178 | publishing time.
1179 |
1180 | Attributes
1181 | ----------
1182 | index : str
1183 | title : str
1184 | url : str
1185 | abstract : str
1186 | metadata : str or None
1187 |
1188 | Class Variables
1189 | ---------------
1190 | colors : str
1191 |
1192 | Methods
1193 | -------
1194 | print()
1195 | jsonizable_object()
1196 | urltable()
1197 |
1198 | """
1199 |
1200 | # Class variables
1201 | colors = None
1202 | urlexpand = False
1203 |
1204 | def __init__(self, index, title, url, abstract, metadata=None):
1205 | index = str(index)
1206 | self.index = index
1207 | self.title = title
1208 | self.url = url
1209 | self.abstract = abstract
1210 | self.metadata = metadata
1211 |
1212 | self._urltable = {index: url}
1213 |
1214 | def _print_title_and_url(self, index, title, url):
1215 | indent = INDENT - 2
1216 | colors = self.colors
1217 |
1218 | if not self.urlexpand:
1219 | url = '[' + urllib.parse.urlparse(url).netloc + ']'
1220 |
1221 | if colors:
1222 | # Adjust index to print result index clearly
1223 | print(" %s%-*s%s" % (colors.index, indent, index + '.', colors.reset), end='')
1224 | if not self.urlexpand:
1225 | print(' ' + colors.title + title + colors.reset + ' ' + colors.url + url + colors.reset)
1226 | else:
1227 | print(' ' + colors.title + title + colors.reset)
1228 | print(' ' * (INDENT) + colors.url + url + colors.reset)
1229 | else:
1230 | if self.urlexpand:
1231 | print(' %-*s %s' % (indent, index + '.', title))
1232 | print(' %s%s' % (' ' * (indent + 1), url))
1233 | else:
1234 | print(' %-*s %s %s' % (indent, index + '.', title, url))
1235 |
1236 | def _print_metadata_and_abstract(self, abstract, metadata=None):
1237 | colors = self.colors
1238 | try:
1239 | columns, _ = os.get_terminal_size()
1240 | except OSError:
1241 | columns = 0
1242 |
1243 | if metadata:
1244 | if colors:
1245 | print(' ' * INDENT + colors.metadata + metadata + colors.reset)
1246 | else:
1247 | print(' ' * INDENT + metadata)
1248 |
1249 | if colors:
1250 | print(colors.abstract, end='')
1251 | if columns > INDENT + 1:
1252 | # Try to fill to columns
1253 | fillwidth = columns - INDENT - 1
1254 | for line in textwrap.wrap(abstract.replace('\n', ''), width=fillwidth):
1255 | print('%s%s' % (' ' * INDENT, line))
1256 | print('')
1257 | else:
1258 | print('%s\n' % abstract.replace('\n', ' '))
1259 | if colors:
1260 | print(colors.reset, end='')
1261 |
1262 | def print(self):
1263 | """Print the result entry."""
1264 |
1265 | self._print_title_and_url(self.index, self.title, self.url)
1266 | self._print_metadata_and_abstract(self.abstract, metadata=self.metadata)
1267 |
1268 | def print_paginated(self, display_index):
1269 | """Print the result entry with custom index."""
1270 |
1271 | self._print_title_and_url(display_index, self.title, self.url)
1272 | self._print_metadata_and_abstract(self.abstract, metadata=self.metadata)
1273 |
1274 | def jsonizable_object(self):
1275 | """Return a JSON-serializable dict representing the result entry."""
1276 | obj = {
1277 | 'title': self.title,
1278 | 'url': self.url,
1279 | 'abstract': self.abstract
1280 | }
1281 | if self.metadata:
1282 | obj['metadata'] = self.metadata
1283 | return obj
1284 |
1285 | def urltable(self):
1286 | """Return a index-to-URL table for the current result.
1287 |
1288 | Normally, the table contains only a single entry, but when the result
1289 | contains sitelinks, all sitelinks are included in this table.
1290 |
1291 | Returns
1292 | -------
1293 | dict
1294 | A dict mapping indices (strs) to URLs (also strs).
1295 |
1296 | """
1297 | return self._urltable
1298 |
1299 |
1300 | class DdgCmdException(Exception):
1301 | pass
1302 |
1303 |
1304 | class NoKeywordsException(DdgCmdException):
1305 | pass
1306 |
1307 |
1308 | def require_keywords(method):
1309 | # Require keywords to be set before we run a DdgCmd method. If
1310 | # no keywords have been set, raise a NoKeywordsException.
1311 | @functools.wraps(method)
1312 | def enforced_method(self, *args, **kwargs):
1313 | if not self.keywords:
1314 | raise NoKeywordsException('No keywords.')
1315 | method(self, *args, **kwargs)
1316 |
1317 | return enforced_method
1318 |
1319 |
1320 | def no_argument(method):
1321 | # Normalize a do_* method of DdgCmd that takes no argument to
1322 | # one that takes an arg, but issue a warning when an nonempty
1323 | # argument is given.
1324 | @functools.wraps(method)
1325 | def enforced_method(self, arg):
1326 | if arg:
1327 | method_name = arg.__name__
1328 | command_name = method_name[3:] if method_name.startswith('do_') else method_name
1329 | LOGGER.warning("Argument to the '%s' command ignored.", command_name)
1330 | method(self)
1331 |
1332 | return enforced_method
1333 |
1334 |
1335 | class DdgCmd:
1336 | """
1337 | Command line interpreter and executor class for ddgr.
1338 |
1339 | Inspired by PSL cmd.Cmd.
1340 |
1341 | Parameters
1342 | ----------
1343 | opts : argparse.Namespace
1344 | Options and/or arguments.
1345 |
1346 | Attributes
1347 | ----------
1348 | options : argparse.Namespace
1349 | Options that are currently in effect. Read-only attribute.
1350 | keywords : str or list or strs
1351 | Current keywords. Read-only attribute
1352 |
1353 | Methods
1354 | -------
1355 | fetch()
1356 | display_instant_answer(json_output=False)
1357 | display_results(prelude='\n', json_output=False)
1358 | fetch_and_display(prelude='\n', json_output=False)
1359 | read_next_command()
1360 | help()
1361 | cmdloop()
1362 |
1363 | """
1364 |
1365 | def __init__(self, opts, ua):
1366 | super().__init__()
1367 | self.cmd = ''
1368 | self.index = 0
1369 | self._opts = opts
1370 |
1371 | self._ddg_url = DdgUrl(opts)
1372 | proxy = opts.proxy if hasattr(opts, 'proxy') else None
1373 | self._conn = DdgConnection(proxy=proxy, ua=ua)
1374 |
1375 | self.results = []
1376 | self.instant_answer = ''
1377 | self._urltable = {}
1378 |
1379 | colors = self.colors
1380 | message = 'ddgr (? for help)'
1381 | self.prompt = ((colors.prompt + message + colors.reset + ' ')
1382 | if (colors and os.getenv('DISABLE_PROMPT_COLOR') is None) else (message + ': '))
1383 |
1384 | @property
1385 | def options(self):
1386 | """Current options."""
1387 | return self._opts
1388 |
1389 | @property
1390 | def keywords(self):
1391 | """Current keywords."""
1392 | return self._ddg_url.keywords
1393 |
1394 | @require_keywords
1395 | def fetch(self):
1396 | """Fetch a page and parse for results.
1397 |
1398 | Results are stored in ``self.results``.
1399 | Instant answer is stored in ``self.instant_answer``.
1400 |
1401 | Raises
1402 | ------
1403 | DDGConnectionError
1404 |
1405 | See Also
1406 | --------
1407 | fetch_and_display
1408 |
1409 | """
1410 | # This method also sets self._urltable.
1411 | page = self._conn.fetch_page(self._ddg_url)
1412 |
1413 | if page is None:
1414 | return
1415 |
1416 | if LOGGER.isEnabledFor(logging.DEBUG):
1417 | fd, tmpfile = tempfile.mkstemp(prefix='ddgr-response-')
1418 | os.close(fd)
1419 | with open(tmpfile, 'w', encoding='utf-8') as fp:
1420 | fp.write(page)
1421 | LOGDBG("Response body written to '%s'.", tmpfile)
1422 |
1423 | if self._opts.num:
1424 | _index = len(self._urltable)
1425 | else:
1426 | _index = 0
1427 | self._urltable = {}
1428 |
1429 | parser = DdgParser(offset=_index)
1430 | parser.feed(page)
1431 |
1432 | if self._opts.num:
1433 | self.results.extend(parser.results)
1434 | else:
1435 | self.results = parser.results
1436 |
1437 | for r in parser.results:
1438 | self._urltable.update(r.urltable())
1439 |
1440 | self._ddg_url.np_prev = parser.np_prev_button
1441 | self._ddg_url.np_next = parser.np_next_button
1442 | self._ddg_url.vqd = parser.vqd
1443 |
1444 | if parser.click_result:
1445 | self.instant_answer = parser.click_result.strip()
1446 | else:
1447 | self.instant_answer = ''
1448 |
1449 | self._ddg_url.update_num(len(parser.results))
1450 |
1451 | @require_keywords
1452 | def display_instant_answer(self, json_output=False):
1453 | """Display instant answer stored in ``self.instant_answer``.
1454 |
1455 | Parameters
1456 | ----------
1457 | json_output : bool, optional
1458 | Whether to dump results in JSON format. Default is False.
1459 |
1460 | See Also
1461 | --------
1462 | fetch_and_display
1463 |
1464 | """
1465 | if self.index == 0 and self.instant_answer and not json_output:
1466 | if self.colors:
1467 | print(self.colors.abstract)
1468 |
1469 | try:
1470 | columns, _ = os.get_terminal_size()
1471 | except OSError:
1472 | columns = 0
1473 |
1474 | fillwidth = columns - INDENT
1475 | for line in textwrap.wrap(self.instant_answer, width=fillwidth):
1476 | print('%s%s' % (' ' * INDENT, line))
1477 |
1478 | if self.colors:
1479 | print(self.colors.reset, end='')
1480 |
1481 | if self._opts.reverse:
1482 | print('')
1483 |
1484 | @require_keywords
1485 | def display_results(self, prelude='\n', json_output=False):
1486 | """Display results stored in ``self.results``.
1487 |
1488 | Parameters
1489 | ----------
1490 | See `fetch_and_display`.
1491 |
1492 | """
1493 |
1494 | if self._opts.num:
1495 | results = self.results[self.index:(self.index + self._opts.num)]
1496 | else:
1497 | results = self.results
1498 |
1499 | if self._opts.reverse:
1500 | results.reverse()
1501 |
1502 | if json_output:
1503 | # JSON output
1504 | ##
1505 | # variables renamed, JSON structure changed and instant_answer
1506 | # added by @kometenstaub on 2023-08-03,
1507 | # same license as original code applies (GPL-3; see LICENSE file)
1508 | json_result = [r.jsonizable_object() for r in results]
1509 | results_object = {
1510 | "results": json_result,
1511 | }
1512 | if self.instant_answer:
1513 | results_object["instant_answer"] = self.instant_answer
1514 | print(json.dumps(results_object, indent=2, sort_keys=True, ensure_ascii=False))
1515 | elif not results:
1516 | print('No results.', file=sys.stderr)
1517 | else:
1518 | sys.stderr.write(prelude)
1519 |
1520 | if self._opts.num: # Paginated output
1521 | for i, r in enumerate(results):
1522 | if self._opts.reverse:
1523 | r.print_paginated(str(len(results) - i))
1524 | else:
1525 | r.print_paginated(str(i + 1))
1526 | else: # Regular output
1527 | for r in results:
1528 | r.print()
1529 |
1530 | @require_keywords
1531 | def fetch_and_display(self, prelude='\n', json_output=False):
1532 | """Fetch a page and display results.
1533 |
1534 | Results are stored in ``self.results``.
1535 |
1536 | Parameters
1537 | ----------
1538 | prelude : str, optional
1539 | A string that is written to stderr before showing actual results,
1540 | usually serving as a separator. Default is an empty line.
1541 | json_output : bool, optional
1542 | Whether to dump results in JSON format. Default is False.
1543 |
1544 | Raises
1545 | ------
1546 | DDGConnectionError
1547 |
1548 | See Also
1549 | --------
1550 | fetch
1551 | display_instant_answer
1552 | display_results
1553 |
1554 | """
1555 | self.fetch()
1556 | if not self._opts.reverse:
1557 | self.display_instant_answer()
1558 | self.display_results(prelude=prelude, json_output=json_output)
1559 | else:
1560 | self.display_results(prelude=prelude, json_output=json_output)
1561 | self.display_instant_answer()
1562 |
1563 | def read_next_command(self):
1564 | """Show omniprompt and read user command line.
1565 |
1566 | Command line is always stripped, and each consecutive group of
1567 | whitespace is replaced with a single space character. If the
1568 | command line is empty after stripping, when ignore it and keep
1569 | reading. Exit with status 0 if we get EOF or an empty line
1570 | (pre-strip, that is, a raw
) twice in a row.
1571 |
1572 | The new command line (non-empty) is stored in ``self.cmd``.
1573 |
1574 | """
1575 | enter_count = 0
1576 | while True:
1577 | try:
1578 | cmd = input(self.prompt)
1579 | except EOFError:
1580 | sys.exit(0)
1581 |
1582 | if not cmd:
1583 | enter_count += 1
1584 | if enter_count == 2:
1585 | # Double
1586 | sys.exit(0)
1587 | else:
1588 | enter_count = 0
1589 |
1590 | cmd = ' '.join(cmd.split())
1591 | if cmd:
1592 | self.cmd = cmd
1593 | break
1594 |
1595 | @staticmethod
1596 | def help():
1597 | DdgArgumentParser.print_omniprompt_help(sys.stderr)
1598 | printerr('')
1599 |
1600 | @require_keywords
1601 | @no_argument
1602 | def do_first(self):
1603 | if self._opts.num:
1604 | if self.index < self._opts.num:
1605 | print('Already at the first page.', file=sys.stderr)
1606 | else:
1607 | self.index = 0
1608 | self.display_results()
1609 | return
1610 |
1611 | try:
1612 | self._ddg_url.first_page()
1613 | except ValueError as e:
1614 | print(e, file=sys.stderr)
1615 | return
1616 |
1617 | self.fetch_and_display()
1618 |
1619 | def do_ddg(self, arg):
1620 | if self._opts.num:
1621 | self.index = 0
1622 | self.results = []
1623 | self._urltable = {}
1624 | # Update keywords and reconstruct URL
1625 | self._opts.keywords = arg
1626 | self._ddg_url = DdgUrl(self._opts)
1627 | # If there is a Bang, let DuckDuckGo do the work
1628 | if arg[0] == '!' or (len(arg) > 1 and arg[1] == '!'):
1629 | open_url(self._ddg_url.full())
1630 | else:
1631 | self.fetch_and_display()
1632 |
1633 | @require_keywords
1634 | @no_argument
1635 | def do_next(self):
1636 | if self._opts.num:
1637 | count = len(self.results)
1638 | if self._ddg_url._qrycnt == 0 and self.index >= count:
1639 | print('No results.', file=sys.stderr)
1640 | return
1641 |
1642 | self.index += self._opts.num
1643 | if count - self.index < self._opts.num:
1644 | self._ddg_url.next_page()
1645 | self.fetch_and_display()
1646 | else:
1647 | self.display_results()
1648 | elif self._ddg_url._qrycnt == 0:
1649 | # If no results were fetched last time, we have hit the last page already
1650 | print('No results.', file=sys.stderr)
1651 | else:
1652 | self._ddg_url.next_page()
1653 | self.fetch_and_display()
1654 |
1655 | def handle_range(self, nav, low, high):
1656 | try:
1657 | if self._opts.num:
1658 | vals = [int(x) + self.index for x in nav.split('-')]
1659 | else:
1660 | vals = [int(x) for x in nav.split('-')]
1661 |
1662 | if len(vals) != 2:
1663 | printerr('Invalid range %s.' % nav)
1664 | return
1665 |
1666 | if vals[0] > vals[1]:
1667 | vals[0], vals[1] = vals[1], vals[0]
1668 |
1669 | for _id in range(vals[0], vals[1] + 1):
1670 | if self._opts.num and _id not in range(low, high):
1671 | printerr('Invalid index %s.' % (_id - self.index))
1672 | continue
1673 |
1674 | if str(_id) in self._urltable:
1675 | open_url(self._urltable[str(_id)])
1676 | else:
1677 | printerr('Invalid index %s.' % _id)
1678 | except ValueError:
1679 | printerr('Invalid range %s.' % nav)
1680 |
1681 | @require_keywords
1682 | def do_open(self, low, high, *args):
1683 | if not args:
1684 | printerr('Index or range missing.')
1685 | return
1686 |
1687 | for nav in args:
1688 | if nav == 'a':
1689 | for key, _ in sorted(self._urltable.items()):
1690 | if self._opts.num and int(key) not in range(low, high):
1691 | continue
1692 | open_url(self._urltable[key])
1693 | elif nav in self._urltable:
1694 | if self._opts.num:
1695 | nav = str(int(nav) + self.index)
1696 | if int(nav) not in range(low, high):
1697 | printerr('Invalid index %s.' % (int(nav) - self.index))
1698 | continue
1699 | open_url(self._urltable[nav])
1700 | elif '-' in nav:
1701 | self.handle_range(nav, low, high)
1702 | else:
1703 | printerr('Invalid index %s.' % nav)
1704 |
1705 | @require_keywords
1706 | @no_argument
1707 | def do_previous(self):
1708 | if self._opts.num:
1709 | if self.index < self._opts.num:
1710 | print('Already at the first page.', file=sys.stderr)
1711 | else:
1712 | self.index -= self._opts.num
1713 | self.display_results()
1714 | return
1715 |
1716 | try:
1717 | self._ddg_url.prev_page()
1718 | except ValueError as e:
1719 | print(e, file=sys.stderr)
1720 | return
1721 |
1722 | self.fetch_and_display()
1723 |
1724 | def copy_url(self, idx):
1725 | try:
1726 | content = self._urltable[str(idx)].encode('utf-8')
1727 |
1728 | # try copying the url to clipboard using native utilities
1729 | copier_params = []
1730 | if sys.platform.startswith(('linux', 'freebsd', 'openbsd')):
1731 | if shutil.which('xsel') is not None:
1732 | copier_params = ['xsel', '-b', '-i']
1733 | elif shutil.which('xclip') is not None:
1734 | copier_params = ['xclip', '-selection', 'clipboard']
1735 | elif shutil.which('wl-copy') is not None:
1736 | copier_params = ['wl-copy']
1737 | # If we're using Termux (Android) use its 'termux-api'
1738 | # add-on to set device clipboard.
1739 | elif shutil.which('termux-clipboard-set') is not None:
1740 | copier_params = ['termux-clipboard-set']
1741 | elif sys.platform == 'darwin':
1742 | copier_params = ['pbcopy']
1743 | elif sys.platform == 'win32':
1744 | copier_params = ['clip']
1745 | elif sys.platform.startswith('haiku'):
1746 | copier_params = ['clipboard', '-i']
1747 |
1748 | if copier_params:
1749 | Popen(copier_params, stdin=PIPE, stdout=DEVNULL, stderr=DEVNULL).communicate(content)
1750 | return
1751 |
1752 | # If native clipboard utilities are absent, try to use terminal multiplexers
1753 | # tmux
1754 | if os.getenv('TMUX_PANE'):
1755 | copier_params = ['tmux', 'set-buffer']
1756 | Popen(copier_params + [content], stdin=DEVNULL, stdout=DEVNULL, stderr=DEVNULL).communicate()
1757 | print('URL copied to tmux buffer.')
1758 | return
1759 |
1760 | # GNU Screen paste buffer
1761 | if os.getenv('STY'):
1762 | copier_params = ['screen', '-X', 'readbuf', '-e', 'utf8']
1763 | tmpfd, tmppath = tempfile.mkstemp()
1764 | try:
1765 | with os.fdopen(tmpfd, 'wb') as fp:
1766 | fp.write(content)
1767 | copier_params.append(tmppath)
1768 | Popen(copier_params, stdin=DEVNULL, stdout=DEVNULL, stderr=DEVNULL).communicate()
1769 | finally:
1770 | os.unlink(tmppath)
1771 | return
1772 |
1773 | printerr('failed to locate suitable clipboard utility')
1774 | except Exception as e:
1775 | raise NoKeywordsException from e
1776 |
1777 | def cmdloop(self):
1778 | """Run REPL."""
1779 | if self.keywords:
1780 | if self.keywords[0][0] == '!' or (
1781 | len(self.keywords[0]) > 1 and self.keywords[0][1] == '!'
1782 | ):
1783 | open_url(self._ddg_url.full())
1784 | else:
1785 | self.fetch_and_display()
1786 |
1787 | while True:
1788 | self.read_next_command()
1789 | # Automatic dispatcher
1790 | #
1791 | # We can't write a dispatcher for now because that could
1792 | # change behaviour of the prompt. However, we have already
1793 | # laid a lot of ground work for the dispatcher, e.g., the
1794 | # `no_argument' decorator.
1795 |
1796 | _num = self._opts.num
1797 | try:
1798 | cmd = self.cmd
1799 | if cmd == 'f':
1800 | self.do_first('')
1801 | elif cmd.startswith('d '):
1802 | self.do_ddg(cmd[2:])
1803 | elif cmd == 'n':
1804 | self.do_next('')
1805 | elif cmd.startswith('o '):
1806 | self.do_open(self.index + 1, self.index + self._opts.num + 1, *cmd[2:].split())
1807 | elif cmd.startswith('O '):
1808 | open_url.override_text_browser = True
1809 | self.do_open(self.index + 1, self.index + self._opts.num + 1, *cmd[2:].split())
1810 | open_url.override_text_browser = False
1811 | elif cmd == 'p':
1812 | self.do_previous('')
1813 | elif cmd == 'q':
1814 | break
1815 | elif cmd == '?':
1816 | self.help()
1817 | elif _num and cmd.isdigit() and int(cmd) in range(1, _num + 1):
1818 | open_url(self._urltable[str(int(cmd) + self.index)])
1819 | elif _num == 0 and cmd in self._urltable:
1820 | open_url(self._urltable[cmd])
1821 | elif self.keywords and cmd.isdigit() and int(cmd) < 100:
1822 | printerr('Index out of bound. To search for the number, use d.')
1823 | elif cmd == 'x':
1824 | Result.urlexpand = not Result.urlexpand
1825 | self.display_results()
1826 | elif cmd.startswith('c ') and cmd[2:].isdigit():
1827 | idx = int(cmd[2:])
1828 | if 0 < idx <= min(self._opts.num, len(self._urltable)):
1829 | self.copy_url(int(self.index) + idx)
1830 | else:
1831 | printerr("invalid index")
1832 | else:
1833 | self.do_ddg(cmd)
1834 | except KeyError:
1835 | printerr('Index out of bound. To search for the number, use d.')
1836 | except NoKeywordsException:
1837 | printerr('Initiate a query first.')
1838 |
1839 |
1840 | class DdgArgumentParser(argparse.ArgumentParser):
1841 | """Custom argument parser for ddgr."""
1842 |
1843 | # Print omniprompt help
1844 | @staticmethod
1845 | def print_omniprompt_help(file=None):
1846 | file = sys.stderr if file is None else file
1847 | file.write(textwrap.dedent("""
1848 | omniprompt keys:
1849 | n, p, f fetch the next, prev or first set of search results
1850 | index open the result corresponding to index in browser
1851 | o [index|range|a ...] open space-separated result indices, ranges or all
1852 | O [index|range|a ...] like key 'o', but try to open in a GUI browser
1853 | d keywords new DDG search for 'keywords' with original options
1854 | should be used to search omniprompt keys and indices
1855 | x toggle url expansion
1856 | c index copy url to clipboard
1857 | q, ^D, double Enter exit ddgr
1858 | ? show omniprompt help
1859 | * other inputs are considered as new search keywords
1860 | """))
1861 |
1862 | # Print information on ddgr
1863 | @staticmethod
1864 | def print_general_info(file=None):
1865 | file = sys.stderr if file is None else file
1866 | file.write(textwrap.dedent("""
1867 | Version %s
1868 | Copyright © 2016-2023 Arun Prakash Jana
1869 | License: GPLv3
1870 | Webpage: https://github.com/jarun/ddgr
1871 | """ % _VERSION_))
1872 |
1873 | # Augment print_help to print more than synopsis and options
1874 | def print_help(self, file=None):
1875 | super().print_help(file)
1876 | self.print_omniprompt_help(file)
1877 | self.print_general_info(file)
1878 |
1879 | # Automatically print full help text on error
1880 | def error(self, message):
1881 | sys.stderr.write('%s: error: %s\n\n' % (self.prog, message))
1882 | self.print_help(sys.stderr)
1883 | self.exit(2)
1884 |
1885 | # Type guards
1886 | @staticmethod
1887 | def positive_int(arg):
1888 | """Try to convert a string into a positive integer."""
1889 | try:
1890 | n = int(arg)
1891 | assert n > 0
1892 | return n
1893 | except (ValueError, AssertionError) as e:
1894 | raise argparse.ArgumentTypeError('%s is not a positive integer' % arg) from e
1895 |
1896 | @staticmethod
1897 | def nonnegative_int(arg):
1898 | """Try to convert a string into a nonnegative integer <= 25."""
1899 | try:
1900 | n = int(arg)
1901 | assert n >= 0
1902 | assert n <= 25
1903 | return n
1904 | except (ValueError, AssertionError) as e:
1905 | raise argparse.ArgumentTypeError('%s is not a non-negative integer <= 25' % arg) from e
1906 |
1907 | @staticmethod
1908 | def is_duration(arg):
1909 | """Check if a string is a valid duration accepted by DuckDuckGo.
1910 |
1911 | A valid duration is of the form dNUM, where d is a single letter h
1912 | (hour), d (day), w (week), m (month), or y (year), and NUM is a
1913 | non-negative integer.
1914 | """
1915 | try:
1916 | if arg[0] not in ('h', 'd', 'w', 'm', 'y') or int(arg[1:]) < 0:
1917 | raise ValueError
1918 | except (TypeError, IndexError, ValueError) as e:
1919 | raise argparse.ArgumentTypeError('%s is not a valid duration' % arg) from e
1920 | return arg
1921 |
1922 | @staticmethod
1923 | def is_colorstr(arg):
1924 | """Check if a string is a valid color string."""
1925 | try:
1926 | assert len(arg) == 6
1927 | for c in arg:
1928 | assert c in COLORMAP
1929 | except AssertionError as e:
1930 | raise argparse.ArgumentTypeError('%s is not a valid color string' % arg) from e
1931 | return arg
1932 |
1933 |
1934 | # Miscellaneous functions
1935 |
1936 | def python_version():
1937 | return '%d.%d.%d' % sys.version_info[:3]
1938 |
1939 |
1940 | def get_colorize(colorize):
1941 | if colorize == 'always':
1942 | return True
1943 |
1944 | if colorize == 'auto':
1945 | return sys.stdout.isatty()
1946 |
1947 | # colorize = 'never'
1948 | return False
1949 |
1950 |
1951 | def set_win_console_mode():
1952 | # VT100 control sequences are supported on Windows 10 Anniversary Update and later.
1953 | # https://docs.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences
1954 | # https://docs.microsoft.com/en-us/windows/console/setconsolemode
1955 | if platform.release() == '10':
1956 | STD_OUTPUT_HANDLE = -11
1957 | STD_ERROR_HANDLE = -12
1958 | ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
1959 | try:
1960 | from ctypes import windll, wintypes, byref
1961 | kernel32 = windll.kernel32
1962 | for nhandle in (STD_OUTPUT_HANDLE, STD_ERROR_HANDLE):
1963 | handle = kernel32.GetStdHandle(nhandle)
1964 | old_mode = wintypes.DWORD()
1965 | if not kernel32.GetConsoleMode(handle, byref(old_mode)):
1966 | raise RuntimeError('GetConsoleMode failed')
1967 | new_mode = bin(old_mode.value) | ENABLE_VIRTUAL_TERMINAL_PROCESSING
1968 | if not kernel32.SetConsoleMode(handle, new_mode):
1969 | raise RuntimeError('SetConsoleMode failed')
1970 | # Note: No need to restore at exit. SetConsoleMode seems to
1971 | # be limited to the calling process.
1972 | except Exception:
1973 | pass
1974 |
1975 |
1976 | # Query autocompleter
1977 |
1978 | # This function is largely experimental and could raise any exception;
1979 | # you should be prepared to catch anything. When it works though, it
1980 | # returns a list of strings the prefix could autocomplete to (however,
1981 | # it is not guaranteed that they start with the specified prefix; for
1982 | # instance, they won't if the specified prefix ends in a punctuation
1983 | # mark.)
1984 | def completer_fetch_completions(prefix):
1985 | # One can pass the 'hl' query param to specify the language. We
1986 | # ignore that for now.
1987 | api_url = ('https://duckduckgo.com/ac/?q=%s&kl=wt-wt' %
1988 | urllib.parse.quote(prefix, safe=''))
1989 | # A timeout of 3 seconds seems to be overly generous already.
1990 | with urllib.request.urlopen(api_url, timeout=3) as resp:
1991 | with json.loads(resp.read().decode('utf-8')) as respobj:
1992 | return [entry['phrase'] for entry in respobj]
1993 |
1994 |
1995 | def completer_run(prefix):
1996 | if prefix:
1997 | completions = completer_fetch_completions('+'.join(prefix.split()))
1998 | if completions:
1999 | print('\n'.join(completions))
2000 | sys.exit(0)
2001 |
2002 |
2003 | def parse_args(args=None, namespace=None):
2004 | """Parse ddgr arguments/options.
2005 |
2006 | Parameters
2007 | ----------
2008 | args : list, optional
2009 | Arguments to parse. Default is ``sys.argv``.
2010 | namespace : argparse.Namespace
2011 | Namespace to write to. Default is a new namespace.
2012 |
2013 | Returns
2014 | -------
2015 | argparse.Namespace
2016 | Namespace with parsed arguments / options.
2017 |
2018 | """
2019 |
2020 | colorstr_env = os.getenv('DDGR_COLORS')
2021 |
2022 | argparser = DdgArgumentParser(description='DuckDuckGo from the terminal.')
2023 | addarg = argparser.add_argument
2024 | addarg('-n', '--num', type=argparser.nonnegative_int, default=10, metavar='N',
2025 | help='show N (0<=N<=25) results per page (default 10); N=0 shows actual number of results fetched per page')
2026 | addarg('-r', '--reg', dest='region', default='us-en', metavar='REG',
2027 | help="region-specific search e.g. 'us-en' for US (default); visit https://duckduckgo.com/params")
2028 | addarg('--colorize', nargs='?', choices=['auto', 'always', 'never'],
2029 | const='always', default='auto',
2030 | help="""whether to colorize output; defaults to 'auto', which enables
2031 | color when stdout is a tty device; using --colorize without an argument
2032 | is equivalent to --colorize=always""")
2033 | addarg('-C', '--nocolor', action='store_true', help='equivalent to --colorize=never')
2034 | addarg('--colors', dest='colorstr', type=argparser.is_colorstr, default=colorstr_env if colorstr_env else 'oCdgxy', metavar='COLORS',
2035 | help='set output colors (see man page for details)')
2036 | addarg('-j', '--ducky', action='store_true', help='open the first result in a web browser; implies --np')
2037 | addarg('-t', '--time', dest='duration', metavar='SPAN', default='', choices=('d', 'w', 'm', 'y'), help='time limit search '
2038 | '[d (1 day), w (1 wk), m (1 month), y (1 year)]')
2039 | addarg('-w', '--site', dest='sites', action='append', metavar='SITE', help='search sites using DuckDuckGo')
2040 | addarg('-x', '--expand', action='store_true', help='Show complete url in search results')
2041 | addarg('-p', '--proxy', metavar='URI', help='tunnel traffic through an HTTPS proxy; URI format: [http[s]://][user:pwd@]host[:port]')
2042 | addarg('--unsafe', action='store_true', help='disable safe search')
2043 | addarg('--noua', action='store_true', help='disable user agent')
2044 | addarg('--json', action='store_true', help='output in JSON format; implies --np')
2045 | addarg('--gb', '--gui-browser', dest='gui_browser', action='store_true', help='open a bang directly in gui browser')
2046 | addarg('--np', '--noprompt', dest='noninteractive', action='store_true', help='perform search and exit, do not prompt')
2047 | addarg('--rev', '--reverse', dest='reverse', action='store_true', help='list entries in reversed order')
2048 | addarg('--url-handler', metavar='UTIL', help='custom script or cli utility to open results')
2049 | addarg('--show-browser-logs', action='store_true', help='do not suppress browser output (stdout and stderr)')
2050 | addarg('-v', '--version', action='version', version=_VERSION_)
2051 | addarg('-d', '--debug', action='store_true', help='enable debugging')
2052 | addarg('keywords', nargs='*', metavar='KEYWORD', help='search keywords')
2053 | addarg('--complete', help=argparse.SUPPRESS)
2054 |
2055 | parsed = argparser.parse_args(args, namespace)
2056 | if parsed.nocolor:
2057 | parsed.colorize = 'never'
2058 |
2059 | return parsed
2060 |
2061 |
2062 | def main():
2063 | opts = parse_args()
2064 |
2065 | # Set logging level
2066 | if opts.debug:
2067 | LOGGER.setLevel(logging.DEBUG)
2068 | LOGDBG('ddgr version %s Python version %s', _VERSION_, python_version())
2069 |
2070 | # Handle query completer
2071 | if opts.complete is not None:
2072 | completer_run(opts.complete)
2073 |
2074 | check_stdout_encoding()
2075 |
2076 | # Add cmdline args to readline history
2077 | if opts.keywords:
2078 | try:
2079 | readline.add_history(' '.join(opts.keywords))
2080 | except Exception:
2081 | pass
2082 |
2083 | # Set colors
2084 | colorize = get_colorize(opts.colorize)
2085 |
2086 | colors = Colors(*[COLORMAP[c] for c in opts.colorstr], reset=COLORMAP['x']) if colorize else None
2087 | Result.colors = colors
2088 | Result.urlexpand = opts.expand
2089 | DdgCmd.colors = colors
2090 |
2091 | # Try to enable ANSI color support in cmd or PowerShell on Windows 10
2092 | if sys.platform == 'win32' and sys.stdout.isatty() and colorize:
2093 | set_win_console_mode()
2094 |
2095 | if opts.url_handler is not None:
2096 | open_url.url_handler = opts.url_handler
2097 | else:
2098 | open_url.override_text_browser = bool(opts.gui_browser)
2099 |
2100 | # Handle browser output suppression
2101 | open_url.suppress_browser_output = not (opts.show_browser_logs or (os.getenv('BROWSER') in TEXT_BROWSERS))
2102 |
2103 | try:
2104 | repl = DdgCmd(opts, '' if opts.noua else USER_AGENT)
2105 |
2106 | if opts.json or opts.ducky or opts.noninteractive:
2107 | # Non-interactive mode
2108 | if repl.keywords and (
2109 | repl.keywords[0][0] == '!' or
2110 | (len(repl.keywords[0]) > 1 and repl.keywords[0][1] == '!')
2111 | ):
2112 | # Handle bangs
2113 | open_url(repl._ddg_url.full())
2114 | else:
2115 | repl.fetch()
2116 | if opts.ducky:
2117 | if repl.results:
2118 | open_url(repl.results[0].url)
2119 | else:
2120 | print('No results.', file=sys.stderr)
2121 | else:
2122 | if not opts.reverse:
2123 | repl.display_instant_answer(json_output=opts.json)
2124 | repl.display_results(prelude='', json_output=opts.json)
2125 | else:
2126 | repl.display_results(prelude='', json_output=opts.json)
2127 | repl.display_instant_answer(json_output=opts.json)
2128 |
2129 | sys.exit(0)
2130 |
2131 | # Interactive mode
2132 | repl.cmdloop()
2133 | except Exception as e:
2134 | # If debugging mode is enabled, let the exception through for a traceback;
2135 | # otherwise, only print the exception error message.
2136 | if LOGGER.isEnabledFor(logging.DEBUG):
2137 | raise
2138 |
2139 | LOGERR(e)
2140 | sys.exit(1)
2141 |
2142 |
2143 | if __name__ == '__main__':
2144 | main()
2145 |
--------------------------------------------------------------------------------