├── .gitignore ├── LICENSE.txt ├── README.md ├── apple-technotes-scrape.py ├── daily-puppy-scrape.py ├── gocomics-scrape.py └── openai.py /.gitignore: -------------------------------------------------------------------------------- 1 | apple-technotes-dates.json 2 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## apple-technotes-scrape.py 2 | 3 | In early 2022 Apple launched [a new technotes site](https://developer.apple.com/documentation/Technotes). It's updated regularly, but does not have a feed. This script scrapes the JSON metadata for the catalog and generates a feed for it. Publication dates for the technotes are inferred based on the first time that a given note is seen. 4 | 5 | The scraped feed is published at [https://persistent.info/scraped/apple-technotes.xml](https://persistent.info/scraped/apple-technotes.xml). 6 | 7 | ## gocomics-scrape.py 8 | 9 | GoComics.com (which recently merged with Comics.com) has stopped linking to or updating their RSS feeds. 10 | 11 | This script fetches a GoComics.com strip homepage, generates strip URLs and then for each one looks up the the actual comic image and outputs a minimal Atom feed with the image. Sample usage: 12 | 13 | python gocomics-scrape.py frazz > ~/www/scraped/frazz.xml 14 | 15 | I've put something like that in a cron job that runs once an hour. 16 | 17 | Incidentally, Frazz and Calvin and Hobbes are the comics that I wanted this for, so if you're looking for a full content feeds for them they can be found at [https://persistent.info/scraped/frazz.xml](https://persistent.info/scraped/frazz.xml) and [https://persistent.info/scraped/calvinandhobbes.xml](https://persistent.info/scraped/calvinandhobbes.xml). 18 | 19 | ## daily-puppy-scrape.py 20 | 21 | [The Daily Puppy](http://www.dailypuppy.com/) ostensibly [has an RSS feed](http://feeds.feedburner.com/TheDailyPuppy). However, it has not worked since early January 2014 (the contents are empty). Given that the site also has references to iGoogle ([shut down](https://support.google.com/websearch/answer/2664197?hl=en) on November 1, 2013), it doesn't seem like it's being maintained from a technical perspective. This script scrapes the most recent 10 puppies and generates a (full-content) feed for them (it uses the same XML endpoints as the [the iOS app](https://itunes.apple.com/app/id305199217)). 22 | 23 | The result is placed at [http://persistent.info/scraped/daily-puppy.xml](http://persistent.info/scraped/daily-puppy.xml). If the site fixes its offical feed, I will redirect this URL back to the official feed. 24 | -------------------------------------------------------------------------------- /apple-technotes-scrape.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import datetime 4 | import json 5 | import os 6 | import typing 7 | import urllib.request 8 | from xml.sax.saxutils import escape as xml_escape 9 | 10 | XHTML_NS = 'http://www.w3.org/1999/xhtml' 11 | 12 | DATES_PATH = os.path.join(os.path.dirname(__file__), 'apple-technotes-dates.json') 13 | 14 | class Technote(typing.NamedTuple): 15 | title: str 16 | url: str 17 | summary: str 18 | published_date: datetime.datetime 19 | 20 | def open_url(url): 21 | req = urllib.request.Request( 22 | url, 23 | headers={'User-Agent': 'Mozilla/5.0 (compatible; Feedbot/1.0)'}) 24 | 25 | return urllib.request.urlopen(req) 26 | 27 | def get_segment_text(s): 28 | return s.get("text") or s.get("code") or "" 29 | 30 | def fetch_technotes(): 31 | with open_url("https://developer.apple.com/tutorials/data/documentation/Technotes.json") as f: 32 | technotes_json = json.load(f) 33 | 34 | try: 35 | with open(DATES_PATH) as f: 36 | dates_json = json.load(f) 37 | except FileNotFoundError: 38 | dates_json = {} 39 | 40 | technotes = [] 41 | for reference_json in technotes_json.get("references", {}).values(): 42 | if reference_json.get("role") != "article": 43 | continue 44 | technote_json = reference_json 45 | published_date_str = dates_json.get(technote_json["identifier"]) 46 | if published_date_str: 47 | published_date = datetime.datetime.fromisoformat(published_date_str) 48 | else: 49 | published_date = datetime.datetime.now() 50 | dates_json[technote_json["identifier"]] = published_date.isoformat() 51 | technote = Technote( 52 | title=technote_json["title"], 53 | url=f"https://developer.apple.com{technote_json['url']}", 54 | summary="".join([get_segment_text(a) for a in technote_json["abstract"]]), 55 | published_date=published_date, 56 | ) 57 | technotes.append(technote) 58 | 59 | with open(DATES_PATH, "w") as f: 60 | json.dump(dates_json, f, indent=2) 61 | return sorted(technotes, key=lambda t: t.published_date, reverse=True) 62 | 63 | 64 | print('') 65 | print('') 66 | print('Apple Technotes (Unofficial)') 67 | print('') 68 | 69 | strip_count = 0 70 | for technote in fetch_technotes(): 71 | print('') 72 | print(' %s' % technote.title) 73 | print(' %s' % xml_escape(technote.url)) 74 | print(' %sT12:00:00.000Z' % technote.published_date.isoformat()) 75 | print(' ' % xml_escape(technote.url)) 76 | print(' ') 77 | print('
%s
' % (XHTML_NS, xml_escape(technote.summary))) 78 | print('
') 79 | print('
') 80 | 81 | print('
') 82 | -------------------------------------------------------------------------------- /daily-puppy-scrape.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python 2 | 3 | import datetime 4 | import re 5 | import urllib 6 | import xml.dom.minidom 7 | from xml.sax.saxutils import escape as xml_escape 8 | 9 | _BASE_URL = "http://www.dailypuppy.com/mobile/featured-puppy.php" 10 | _XHTML_NS = "http://www.w3.org/1999/xhtml" 11 | _DATE_RE = re.compile(r"(\d{4})-(\d{2})-(\d{2})$") 12 | 13 | class Puppy(object): 14 | def __init__(self, title, data_url): 15 | self.title = title 16 | self.data_url = data_url 17 | self.date = None 18 | self.html_url = None 19 | self.description = None 20 | self.pictures = [] 21 | 22 | def open_url(url): 23 | class Opener(urllib.FancyURLopener): 24 | version = "Mozilla/5.0 (compatible; Feedbot/1.0)" 25 | return Opener().open(url) 26 | 27 | def _get_node_text(node): 28 | return node.firstChild.data if node.firstChild else "" 29 | 30 | def get_puppies(): 31 | puppies_file = open_url(_BASE_URL + "?startIndex=0&count=10") 32 | puppies_dom = xml.dom.minidom.parse(puppies_file) 33 | 34 | puppies = [] 35 | puppy_nodes = puppies_dom.getElementsByTagName("item") 36 | for puppy_node in puppy_nodes: 37 | puppies.append(Puppy( 38 | title=_get_node_text(puppy_node.getElementsByTagName("title")[0]), 39 | data_url=_get_node_text(puppy_node.getElementsByTagName("link")[0]))) 40 | 41 | puppies_file.close() 42 | 43 | return puppies 44 | 45 | def fetch_puppy_data(puppy): 46 | puppy_file = open_url(puppy.data_url) 47 | puppy_dom = xml.dom.minidom.parse(puppy_file) 48 | 49 | puppy.html_url = _get_node_text( 50 | puppy_dom.getElementsByTagName("shareLink")[0]) 51 | date_match = _DATE_RE.search(puppy.html_url) 52 | puppy.date = datetime.datetime( 53 | year=int(date_match.group(1)), 54 | month=int(date_match.group(2)), 55 | day=int(date_match.group(3)), 56 | hour=12) 57 | puppy.description = _get_node_text( 58 | puppy_dom.getElementsByTagName("description")[0]) 59 | picture_nodes = puppy_dom.getElementsByTagName("fullres") 60 | for picture_node in picture_nodes: 61 | puppy.pictures.append(_get_node_text(picture_node)) 62 | 63 | puppy_file.close() 64 | 65 | puppies = get_puppies() 66 | for puppy in puppies: 67 | fetch_puppy_data(puppy) 68 | 69 | feed = u'' 70 | feed += u'\n' 71 | feed += u'\n' 72 | feed += u'The Daily Puppy (Unofficial)\n' 73 | feed += u'\n' 75 | 76 | if puppies: 77 | for puppy in puppies: 78 | feed += u'\n' 79 | feed += u' %s\n' % xml_escape(puppy.title) 80 | feed += u' %s\n' % xml_escape(puppy.data_url) 81 | feed += u' \n' % \ 82 | xml_escape(puppy.html_url) 83 | feed += u' %sZ\n' % xml_escape(puppy.date.isoformat()) 84 | feed += u' \n' 85 | feed += u'
\n' % _XHTML_NS 86 | feed += u'

\n' % xml_escape(puppy.pictures[0]) 87 | feed += u'

%s

\n' % xml_escape(puppy.description) 88 | for picture in puppy.pictures[1:]: 89 | feed += u'

\n' % xml_escape(picture) 90 | feed += u'
\n' 91 | feed += u'
\n' 92 | feed += u'
\n' 93 | else: 94 | feed += u'\n' 95 | feed += u' Could not scrape feed\n' 96 | feed += u' tag:persistent.info,2013:daily-puppy-scrape-%d\n' % \ 97 | int(time.time()) 98 | feed += u' \n' 100 | feed += u' \n' 101 | feed += u' Could not scrape the feed. Check the GitHub repository for ' \ 102 | 'updates.\n' 103 | feed += u' \n' 104 | feed += u'\n' 105 | 106 | feed += u'
\n' 107 | 108 | print feed.encode("utf-8") 109 | -------------------------------------------------------------------------------- /gocomics-scrape.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python 2 | 3 | import datetime 4 | import logging 5 | import html.parser 6 | import re 7 | import sys 8 | import time 9 | import urllib.request 10 | from xml.sax.saxutils import escape as xml_escape 11 | 12 | XHTML_NS = 'http://www.w3.org/1999/xhtml' 13 | 14 | def open_url(url): 15 | req = urllib.request.Request( 16 | url, 17 | headers={'User-Agent': 'Mozilla/5.0 (compatible; Feedbot/1.0)'}) 18 | 19 | return urllib.request.urlopen(req) 20 | 21 | 22 | def get_homepage_data(strip_id): 23 | class HomepageParser(html.parser.HTMLParser): 24 | def __init__(self): 25 | super().__init__() 26 | self.title = '' 27 | self.in_title = False 28 | 29 | def handle_starttag(self, tag, attrs): 30 | if tag == 'title' and not self.title: 31 | self.in_title = True 32 | 33 | def handle_endtag(self, tag): 34 | if tag == 'title': 35 | self.in_title = False 36 | 37 | def handle_data(self, data): 38 | if self.in_title: 39 | self.title += data 40 | self.title = re.sub(r"\s*\|.*$", "", self.title) 41 | 42 | homepage_url = 'https://www.gocomics.com/%s' % strip_id 43 | homepage_file = open_url(homepage_url) 44 | parser = HomepageParser() 45 | parser.feed(homepage_file.read().decode()) 46 | parser.close() 47 | homepage_file.close() 48 | 49 | if not parser.title: 50 | return None, [] 51 | 52 | today = datetime.date.today() 53 | strips = [] 54 | for i in range(0, 14): 55 | strip_date = today - datetime.timedelta(days=i) 56 | strip_url = '%s/%s' % (homepage_url, strip_date.strftime('%Y/%m/%d')) 57 | strips.append((strip_date, strip_url)) 58 | 59 | return parser.title, strips 60 | 61 | 62 | def get_strip_image_url(strip_url): 63 | class ImageParser(html.parser.HTMLParser): 64 | def __init__(self): 65 | super().__init__() 66 | self.image_url = None 67 | 68 | def handle_startendtag(self, tag, attrs): 69 | if tag == "meta": 70 | attrs_dict = dict(attrs) 71 | if attrs_dict.get('property') == 'og:image' and attrs_dict.get('content'): 72 | if self.image_url: 73 | logging.warning("Multiple image tags found") 74 | self.image_url = attrs_dict["content"] 75 | 76 | parser = ImageParser() 77 | try: 78 | strip_file = open_url(strip_url) 79 | except IOError as e: 80 | if 302 in e.args: 81 | # Skip over strip URLs that generate redirects, they must not have 82 | # existed. 83 | return None 84 | else: 85 | logging.warning("Could not extract strip URL", exc_info=True) 86 | return None 87 | parser.feed(strip_file.read().decode()) 88 | parser.close() 89 | strip_file.close() 90 | 91 | return parser.image_url 92 | 93 | 94 | title, strips = get_homepage_data(sys.argv[1]) 95 | 96 | print('') 97 | print('') 98 | print('%s' % xml_escape(title)) 99 | 100 | strip_count = 0 101 | for strip_date, strip_url in strips: 102 | strip_image_url = get_strip_image_url(strip_url) 103 | if not strip_image_url: 104 | continue 105 | strip_count += 1 106 | print('') 107 | print(' %s' % xml_escape(strip_date.strftime('%A, %B %d, %Y'))) 108 | print(' %s' % xml_escape(strip_url)) 109 | print(' %sT12:00:00.000Z' % strip_date.isoformat()) 110 | print(' ' % xml_escape(strip_url)) 111 | print(' ') 112 | print('
' % (XHTML_NS, xml_escape(strip_image_url))) 113 | print('
') 114 | print('
') 115 | 116 | if not strip_count: 117 | print('') 118 | print(' Could not scrape feed') 119 | print(' tag:persistent.info,2013:gocomics-scrape-error-%s' % datetime.date.today().isoformat()) 120 | print(' ') 121 | print(' ') 122 | print(' Could not scrape the feed. Check the GitHub repository for updates.') 123 | print(' ') 124 | print('') 125 | 126 | 127 | print('
') 128 | -------------------------------------------------------------------------------- /openai.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import datetime 4 | import json 5 | import typing 6 | import subprocess 7 | from xml.sax.saxutils import escape as xml_escape 8 | 9 | XHTML_NS = 'http://www.w3.org/1999/xhtml' 10 | 11 | class Article(typing.NamedTuple): 12 | title: str 13 | url: str 14 | category: str 15 | published_date: datetime.datetime 16 | 17 | def fetch_articles(): 18 | # We need to shell out to curl because the OpenAI website rejects HTTP 1.1 19 | # requests. 20 | result = subprocess.run([ 21 | 'curl', 'https://openai.com/backend/pages/?limit=12&sort=new&type=Article', 22 | '-H', 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 23 | '-H', 'accept-language: en-US,en;q=0.9,ro;q=0.8', 24 | '-H', 'priority: u=0, i', 25 | '-H', 'sec-ch-ua: "Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"', 26 | '-H', 'sec-ch-ua-platform: "macOS"', 27 | '-H', 'sec-fetch-dest: document', 28 | '-H', 'sec-fetch-mode: navigate', 29 | '-H', 'sec-fetch-site: none', 30 | '-H', 'upgrade-insecure-requests: 1', 31 | '-H', 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36', 32 | ], 33 | stdout=subprocess.PIPE, 34 | stderr=subprocess.PIPE, 35 | text=True) 36 | 37 | if result.returncode != 0: 38 | raise Exception(result.stderr) 39 | 40 | if result.stdout.startswith(""): 41 | raise Exception("OpenAI website rejected the request") 42 | 43 | articles_json = json.loads(result.stdout) 44 | articles = [] 45 | for article_json in articles_json.get("items", []): 46 | if article_json.get("pageType") != "Article": 47 | continue 48 | published_date_str = article_json["publicationDate"] 49 | published_date = datetime.datetime.fromisoformat(published_date_str) 50 | 51 | article = Article( 52 | title=article_json["title"], 53 | url=f"https://openai.com{article_json['slug']}", 54 | published_date=published_date, 55 | category=article_json.get("category", {}).get("name"), 56 | ) 57 | articles.append(article) 58 | 59 | return sorted(articles, key=lambda t: t.published_date, reverse=True) 60 | 61 | 62 | print('') 63 | print('') 64 | print('OpenAI News (Unofficial)') 65 | print('') 66 | 67 | strip_count = 0 68 | for article in fetch_articles(): 69 | print('') 70 | print(' %s' % article.title) 71 | print(' %s' % xml_escape(article.url)) 72 | print(' %sT12:00:00.000Z' % article.published_date.isoformat()) 73 | print(' ' % xml_escape(article.url)) 74 | if article.category: 75 | print(' ' % xml_escape(article.category)) 76 | print('') 77 | 78 | print('') 79 | --------------------------------------------------------------------------------