', 'html.parser')
5 | tab = tabsoup.table
6 | for row in seq_of_rows:
7 | tr = tabsoup.new_tag('tr')
8 | tab.append(tr)
9 | for item in row:
10 | td = tabsoup.new_tag('td')
11 | tr.append(td)
12 | td.string = str(item)
13 | return tab
14 |
15 |
16 | # Here is an example using the function we just defined:
17 | example = (
18 | ('foo', 'g>h', 'g&h'),
19 | ('zip', 'zap', 'zop'),
20 | )
21 |
22 | print(mktable_with_bs4(example))
23 | # prints:
24 | #
25 |
--------------------------------------------------------------------------------
/22_Structured_Text_HTML/bs4_css_selectors.py:
--------------------------------------------------------------------------------
1 | import bs4
2 |
3 | def foo_child_of_bar(t):
4 | return t.name == 'foo' and t.parent and t.parent.name == 'bar'
5 |
6 | soup = bs4.BeautifulSoup('Plain bold')
7 |
8 | # return tags with name 'foo' children of tags with name 'bar'
9 | soup.find_all(foo_child_of_bar)
10 |
11 | # exactly equivalent, with no custom filter function needed
12 | soup.select('bar > foo')
13 |
14 |
15 | ###
16 | # code to execute the above search statements and print the results of each
17 | with open(__file__) as source:
18 | for line in source:
19 | line = line.rstrip()
20 | if line == "###":
21 | break
22 |
23 | print(line)
24 | if line.startswith("soup."):
25 | statement = line.partition("#")[0]
26 | exec(f"print({statement})", globals())
27 |
--------------------------------------------------------------------------------
/22_Structured_Text_HTML/bs4_editing_and_creating_html.py:
--------------------------------------------------------------------------------
1 | import bs4
2 |
3 | # Building and adding new nodes
4 |
5 | s = bs4.NavigableString(' some text ')
6 |
7 | soup = bs4.BeautifulSoup()
8 | t = soup.new_tag('foo', bar='baz')
9 | print(t)
10 |
11 | t.append(s)
12 | print(t)
13 |
14 | print(t.string.wrap(soup.new_tag('moo', zip='zaap')))
15 | print(t)
16 |
17 |
18 | # Replacing and removing nodes
19 |
20 | soup = bs4.BeautifulSoup(
21 | 'first second third
')
22 | i = soup.i.replace_with('last')
23 | soup.b.append(i)
24 | print(soup)
25 |
26 | empty_i = soup.i.unwrap()
27 | print(soup.b.wrap(empty_i))
28 | print(soup.body)
29 |
30 | soup.i.clear()
31 | print(soup)
32 |
33 | soup.p.decompose()
34 | print(soup)
35 |
36 | soup.body.decompose()
37 | print(soup)
38 |
--------------------------------------------------------------------------------
/22_Structured_Text_HTML/bs4_getting_an_actual_string.py:
--------------------------------------------------------------------------------
1 | """
2 | >>> import bs4
3 | >>> soup = bs4.BeautifulSoup('Plain bold
')
4 | >>> print(soup.p.string)
5 | None
6 | >>> print(soup.p.b.string)
7 | bold
8 | >>> print(soup.get_text())
9 | Plain bold
10 | >>> print(soup.text)
11 | Plain bold
12 | >>> print(soup.get_text(strip=True))
13 | Plainbold
14 | """
15 |
16 | if __name__ == '__main__':
17 | # use doctest to simulate console sessions
18 | import doctest
19 | doctest.testmod(verbose=True, exclude_empty=True)
20 |
--------------------------------------------------------------------------------
/22_Structured_Text_HTML/bs4_html_parsing_example.py:
--------------------------------------------------------------------------------
1 | import urllib.request, urllib.parse, bs4
2 |
3 | f = urllib.request.urlopen('http://www.python.org')
4 | b = bs4.BeautifulSoup(f)
5 |
6 | seen = set()
7 | for anchor in b('a'):
8 | url = anchor.get('href')
9 | if url is None or url in seen:
10 | continue
11 | seen.add(url)
12 | pieces = urllib.parse.urlparse(url)
13 | if pieces[0] == 'http':
14 | print(urllib.parse.urlunparse(pieces))
15 |
--------------------------------------------------------------------------------
/22_Structured_Text_HTML/bs4_indexing_instances_of_tag.py:
--------------------------------------------------------------------------------
1 | """
2 | >>> import bs4
3 |
4 | >>> s = bs4.BeautifulSoup('baz')
5 | >>> s.get('foo')
6 | >>> s.p.get('foo')
7 | 'bar'
8 | >>> s.p.attrs
9 | {'foo': 'bar', 'class': ['ic']}
10 |
11 | """
12 |
13 | if __name__ == '__main__':
14 | # use doctest to simulate console sessions
15 | import doctest
16 | doctest.testmod(verbose=True, exclude_empty=True)
17 |
--------------------------------------------------------------------------------
/22_Structured_Text_HTML/bs4_search_methods.py:
--------------------------------------------------------------------------------
1 | import bs4
2 | import re
3 |
4 | """
5 | For any Tag instance t and any group of positional and named arguments
6 | represented by ... the following equivalence always holds:
7 |
8 | just_one = t.find(...)
9 | other_way_list = t.find_all(..., limit=1)
10 | other_way = other_way_list[0] if other_way_list else None
11 | assert just_one == other_way
12 | """
13 |
14 | soup = bs4.BeautifulSoup('''\
15 | a B tag
16 | with inner abahh
17 |
18 | and bb
19 |
20 |
21 | foo foo other
22 | ''')
23 |
24 | def child_of_foo(tag):
25 | return tag.parent.name == 'foo'
26 |
27 | # search method arguments: name
28 |
29 | # return all instances of Tag 'b' in the document
30 | soup.find_all('b') # or soup.find_all(name='b')
31 |
32 | # return all instances of Tags 'b' and 'bah' in the document
33 | soup.find_all(['b', 'bah'])
34 |
35 | # return all instances of Tags starting with 'b' in the document
36 | soup.find_all(re.compile(r'^b'))
37 |
38 | # return all instances of Tags including string 'bah' in the document
39 | soup.find_all(re.compile(r'bah'))
40 |
41 | # return all instances of Tags whose parent's name is 'foo'
42 | soup.find_all(child_of_foo)
43 |
44 |
45 | # search method arguments: string
46 |
47 | # return all instances of NavigableString whose text is 'foo'
48 | soup.find_all(string='foo')
49 |
50 | # return all instances of Tag 'b' whose .string's text is 'foo'
51 | soup.find_all('b', string='foo')
52 |
53 |
54 | # search method arguments: attrs
55 |
56 | # return all instances of Tag 'b' w/an attribute 'foo' and no 'bar'
57 | soup.find_all('b', {'foo': True, 'bar': None})
58 |
59 |
60 | ###
61 | # code to execute the above search statements and print the results of each
62 | with open(__file__) as source:
63 | for line in source:
64 | line = line.rstrip()
65 | if line == "###":
66 | break
67 |
68 | print(line)
69 | if line.startswith("soup."):
70 | statement = line.partition("#")[0]
71 | exec(f"print({statement})", globals())
72 |
--------------------------------------------------------------------------------
/22_Structured_Text_HTML/bs4_unicode_and_encoding.py:
--------------------------------------------------------------------------------
1 | """
2 | >>> import bs4
3 |
4 | >>> s = bs4.BeautifulSoup('
hello', 'html.parser')
5 | >>> print(s.prettify())
6 |
7 | hello
8 |
9 | >>> print(s.decode())
10 | hello
11 | >>> print(s.encode())
12 | b'hello
'
13 |
14 | """
15 |
16 | if __name__ == '__main__':
17 | # use doctest to simulate console sessions
18 | import doctest
19 | doctest.testmod(verbose=True, exclude_empty=True)
20 |
--------------------------------------------------------------------------------
/22_Structured_Text_HTML/bs4_which_parser.py:
--------------------------------------------------------------------------------
1 | """
2 | >>> import bs4
3 | >>> s = bs4.BeautifulSoup('hello', 'html.parser')
4 | >>> # requires lxml be installed
5 | >>> sx = bs4.BeautifulSoup('
hello', 'xml')
6 | >>> sl = bs4.BeautifulSoup('
hello', 'lxml')
7 | >>> s5 = bs4.BeautifulSoup('
hello', 'html5lib')
8 | >>> print(s, s.is_xml)
9 |
hello
False
10 | >>> print(sx, sx.is_xml)
11 | hello
True
12 | >>> print(sl, sl.is_xml)
13 | hello
False
14 | >>> print(s5, s5.is_xml)
15 | hello
False
16 | """
17 |
18 | if __name__ == '__main__':
19 | # use doctest to simulate console sessions
20 | import doctest
21 | doctest.testmod(verbose=True, exclude_empty=True)
22 |
--------------------------------------------------------------------------------
/22_Structured_Text_HTML/jinja2_building_html.py:
--------------------------------------------------------------------------------
1 | import jinja2
2 |
3 | TABLE_TEMPLATE = '''\
4 |
5 | {% for s in s_of_s %}
6 |
7 | {% for item in s %}
8 | {{item}} |
9 | {% endfor %}
10 |
11 | {% endfor %}
12 |
'''
13 |
14 | def mktable_with_jinja2(s_of_s):
15 | env = jinja2.Environment(
16 | trim_blocks=True,
17 | lstrip_blocks=True,
18 | autoescape=True)
19 | t = env.from_string(TABLE_TEMPLATE)
20 | return t.render(s_of_s=s_of_s)
21 |
22 | example = (
23 | ('foo', 'g>h', 'g&h'),
24 | ('zip', 'zap', 'zop'),
25 | )
26 | print(mktable_with_jinja2(example))
27 |
--------------------------------------------------------------------------------
/23_Structured_Text_XML/building_an_elementtree_from_scratch.py:
--------------------------------------------------------------------------------
1 | import csv
2 | from xml.etree import ElementTree as et
3 |
4 | menu = et.Element('menu')
5 | tree = et.ElementTree(menu)
6 | with open('menu.csv') as f:
7 | r = csv.reader(f)
8 | for calories, namestr in r:
9 | food = et.SubElement(menu, 'food')
10 | cals = et.SubElement(food, 'calories')
11 | cals.text = calories
12 | name = et.SubElement(food, 'name')
13 | name.text = namestr
14 |
15 | tree.write('menu.xml')
16 |
--------------------------------------------------------------------------------
/23_Structured_Text_XML/menu.csv:
--------------------------------------------------------------------------------
1 | 600,French Toast
2 | 650,Belgian Waffles
3 | 900,Berry-Berry Belgian Waffles
4 | 900,Strawberry Belgian Waffles
5 | 950,Homestyle Breakfast
6 |
--------------------------------------------------------------------------------
/23_Structured_Text_XML/parsing_xml_iteratively_1.py:
--------------------------------------------------------------------------------
1 | import heapq
2 | from xml.etree import ElementTree as et
3 |
4 |
5 | def cals_and_name():
6 | # generator for (calories, name) pairs
7 | for _, elem in et.iterparse('menu.xml'):
8 | if elem.tag != 'food':
9 | continue
10 | # just finished parsing a food, get calories and name
11 | cals = int(elem.find('calories').text)
12 | name = elem.find('name').text
13 | yield (cals, name)
14 |
15 |
16 | lowest10 = heapq.nsmallest(10, cals_and_name())
17 |
18 | for cals, name in lowest10:
19 | print(cals, name)
20 |
--------------------------------------------------------------------------------
/23_Structured_Text_XML/parsing_xml_iteratively_2.py:
--------------------------------------------------------------------------------
1 | import heapq
2 | from xml.etree import ElementTree as et
3 |
4 |
5 | def cals_and_name():
6 | # memory-thrifty generator for (calories, name) pairs
7 | root = None
8 | for event, elem in et.iterparse('menu.xml', ['start', 'end']):
9 | if event == 'start':
10 | if root is None:
11 | root = elem
12 | continue
13 | if elem.tag != 'food':
14 | continue
15 | # just finished parsing a food, get calories and name
16 | cals = int(elem.find('calories').text)
17 | name = elem.find('name').text
18 | yield (cals, name)
19 | root.remove(elem)
20 |
21 |
22 | lowest10 = heapq.nsmallest(10, cals_and_name())
23 |
24 | for cals, name in lowest10:
25 | print(cals, name)
26 |
27 |
28 |
--------------------------------------------------------------------------------
/23_Structured_Text_XML/parsing_xml_with_elementtree_parse.py:
--------------------------------------------------------------------------------
1 | from urllib import request
2 | from xml.etree import ElementTree as et
3 | content = request.urlopen('http://www.w3schools.com/xml/simple.xml')
4 | tree = et.parse(content)
5 |
6 |
7 | def bycal_and_name(e):
8 | return int(e.find('calories').text), e.find('name').text
9 | for e in sorted(tree.findall('food'), key=bycal_and_name):
10 | print(f"{e.find('calories').text} {e.find('name').text}")
11 |
12 | print()
13 |
14 | # add Buttered Toast to the menu
15 | menu = tree.getroot()
16 | toast = et.SubElement(menu, 'food')
17 | tcals = et.SubElement(toast, 'calories')
18 | tcals.text = '180'
19 | tname = et.SubElement(toast, 'name')
20 | tname.text = 'Buttered Toast'
21 | # remove anything related to 'berry' from the menu
22 | for e in menu.findall('food'):
23 | name = e.find('name').text
24 | if 'berry' in name.lower():
25 | menu.remove(e)
26 |
27 | for e in sorted(tree.findall('food'), key=bycal_and_name):
28 | print(f"{e.find('calories').text} {e.find('name').text}")
29 |
30 |
--------------------------------------------------------------------------------
/23_Structured_Text_XML/simple.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | Belgian Waffles
4 | 650
5 |
6 |
7 | Strawberry Belgian Waffles
8 | 900
9 |
10 |
11 | Berry-Berry Belgian Waffles
12 | 900
13 |
14 |
15 | French Toast
16 | 600
17 |
18 |
19 | Homestyle Breakfast
20 | 950
21 |
22 |
23 |
--------------------------------------------------------------------------------
/24_Distributing_Extensions_and_Programs/flask_setup.py:
--------------------------------------------------------------------------------
1 | """__doc__ for long_description goes here; omitted. """
2 | import re
3 | import ast
4 |
5 | from setuptools import setup
6 |
7 | _version_re = re.compile(r'__version__\s+=\s+(.*)')
8 |
9 | with open('flask/__init_.py', 'rb') as f:
10 | version = str(ast.literal_eval(_version_re.search(
11 | f.read().decode('utf-8')).group(1)))
12 |
13 | setup(
14 | name='Flask',
15 | version=version,
16 | url='http://github.com/pallets/flask/',
17 | license='BSD',
18 | author='Armin Ronacher',
19 | author_email='armin.ronacher@active-4.com',
20 | description='A microframework based on Werkzeug, Jinja2 '
21 | 'and good intentions',
22 | long_description=__doc__,
23 | packages=['flask', 'flask.ext'],
24 | include_package_data=True,
25 | zip_safe=False,
26 | platforms='any',
27 | install_requires=[
28 | 'Werkzeug>=0.7',
29 | 'Jinja2>=2.4',
30 | 'itsdangerous>=0.21',
31 | 'click>=2.0',
32 | ],
33 | classifiers=[
34 | 'Development Status :: 4 - Beta',
35 | 'Environment :: Web Environment',
36 | 'Intended Audience :: Developers',
37 | 'License :: OSI Approved :: BSD License',
38 | 'Operating System :: OS Independent',
39 | 'Programming Language :: Python',
40 | 'Programming Language :: Python :: 2',
41 | 'Programming Language :: Python :: 2.6',
42 | 'Programming Language :: Python :: 2.7',
43 | 'Programming Language :: Python :: 3',
44 | 'Programming Language :: Python :: 3.3',
45 | 'Programming Language :: Python :: 3.4',
46 | 'Programming Language :: Python :: 3.5',
47 | 'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
48 | 'Topic :: Software Development :: Libraries :: Python Modules'
49 | ],
50 | entry_points='''
51 | [console_scripts]
52 | flask=flask.cli:main
53 | '''
54 | )
55 |
--------------------------------------------------------------------------------
/25_Extending_and_Embedding_Classic_Python/hello/hello.c:
--------------------------------------------------------------------------------
1 | #include
2 | static PyObject*
3 | hello(PyObject* self)
4 | {
5 | return Py_BuildValue("s", "Hello, Python extensions world!");
6 | }
7 | static char hello_docs[] =
8 | "hello(): return a popular greeting phrase\n";
9 | static PyMethodDef hello_funcs[] = {
10 | {"helloworld", (PyCFunction)hello, METH_NOARGS, hello_docs},
11 | {NULL}
12 | };
13 | static struct PyModuleDef hello_module = {
14 | PyModuleDef_HEAD_INIT,
15 | "hello",
16 | hello_docs,
17 | -1,
18 | hello_funcs
19 | };
20 |
21 | PyMODINIT_FUNC
22 | PyInit_hello(void)
23 | {
24 | return PyModule_Create(&hello_module);
25 | }
26 |
--------------------------------------------------------------------------------
/25_Extending_and_Embedding_Classic_Python/hello/hello_demo.py:
--------------------------------------------------------------------------------
1 | import hello
2 |
3 | print(hello.helloworld())
4 |
--------------------------------------------------------------------------------
/25_Extending_and_Embedding_Classic_Python/hello/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, Extension
2 | setup(name='hello',
3 | ext_modules=[Extension('hello',sources=['hello.c'])])
4 |
--------------------------------------------------------------------------------
/25_Extending_and_Embedding_Classic_Python/intpair/intpair_demo.py:
--------------------------------------------------------------------------------
1 | import intpair
2 |
3 | x = intpair.intpair(1.2, 3.4)
4 | print(x)
5 | print(x.first, x.second)
6 |
--------------------------------------------------------------------------------
/25_Extending_and_Embedding_Classic_Python/intpair/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, Extension
2 | setup(name='intpair',
3 | ext_modules=[Extension('intpair',sources=['intpair.c'])])
4 |
--------------------------------------------------------------------------------
/25_Extending_and_Embedding_Classic_Python/merge/merge.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | static PyObject*
4 | merge(PyObject* self, PyObject* args, PyObject* kwds)
5 | {
6 | static char* argnames[] = {"x","y","override",NULL};
7 | PyObject *x, *y;
8 | int override = 0;
9 | if(!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|i", argnames,
10 | &PyDict_Type, &x, &y, &override))
11 | return NULL;
12 | if(-1 == PyDict_Merge(x, y, override)) {
13 | if(!PyErr_ExceptionMatches(PyExc_AttributeError))
14 | return NULL;
15 | PyErr_Clear();
16 | if(-1 == PyDict_MergeFromSeq2(x, y, override))
17 | return NULL;
18 | }
19 | return Py_BuildValue("");
20 | }
21 | static char merge_docs[] = "\
22 | merge(x,y,override=False): merge into dict x the items of dict y (or\n\
23 | the pairs that are the items of y, if y is a sequence), with\n\
24 | optional override. Alters dict x directly, returns None.\n\
25 | ";
26 | static PyObject*
27 | mergenew(PyObject* self, PyObject* args, PyObject* kwds)
28 | {
29 | static char* argnames[] = {"x","y","override",NULL};
30 | PyObject *x, *y, *result;
31 | int override = 0;
32 | if(!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|i", argnames,
33 | &PyDict_Type, &x, &y, &override))
34 | return NULL;
35 | result = PyObject_CallMethod(x, "copy", "");
36 | if(! result)
37 | return NULL;
38 | if(-1 == PyDict_Merge(result, y, override)) {
39 | if(!PyErr_ExceptionMatches(PyExc_AttributeError))
40 | return NULL;
41 | PyErr_Clear();
42 | if(-1 == PyDict_MergeFromSeq2(result, y, override))
43 | return NULL;
44 | }
45 | return result;
46 | }
47 | static char mergenew_docs[] = "\
48 | mergenew(x,y,override=False): merge into dict x the items of dict y\n\
49 | (or the pairs that are the items of y, if y is a sequence), with\n\
50 | optional override. Does NOT alter x, but rather returns the\n\
51 | modified copy as the function's result.\n\
52 | ";
53 | static PyMethodDef merge_funcs[] = {
54 | {"merge", (PyCFunction)merge, METH_VARARGS | METH_KEYWORDS, merge_docs},
55 | {"mergenew", (PyCFunction)mergenew, METH_VARARGS | METH_KEYWORDS, mergenew_docs},
56 | {NULL}
57 | };
58 | static char merge_module_docs[] = "Example extension module";
59 | static struct PyModuleDef merge_module = {
60 | PyModuleDef_HEAD_INIT,
61 | "merge",
62 | merge_module_docs,
63 | -1,
64 | merge_funcs
65 | };
66 |
67 | PyMODINIT_FUNC
68 | PyInit_merge(void)
69 | {
70 |
71 | return PyModule_Create(&merge_module);
72 | }
73 |
--------------------------------------------------------------------------------
/25_Extending_and_Embedding_Classic_Python/merge/merge_demo.py:
--------------------------------------------------------------------------------
1 | import merge
2 | x = {'a':1,'b':2 }
3 | merge.merge(x,[['b',3],['c',4]])
4 | print(x) # prints: {'a':1, 'b':2, 'c':4 }
5 | print(merge.mergenew(x,{'a':5,'d':6},override=1)) # prints: {'a':5, 'b':2, 'c':4, 'd':6 }
6 | print(x) # prints: {'a':1, 'b':2, 'c':4 }
7 |
--------------------------------------------------------------------------------
/25_Extending_and_Embedding_Classic_Python/merge/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, Extension
2 | setup(name='merge',
3 | ext_modules=[Extension('merge',sources=['merge.c'])])
4 |
--------------------------------------------------------------------------------
/chapters/24 Packaging Programs and Extensions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pynutshell/pynut4/8692766c98aecf564bad3ad886daa803ab23d0e9/chapters/24 Packaging Programs and Extensions.pdf
--------------------------------------------------------------------------------
/chapters/25 Extending and Embedding Classic Python.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pynutshell/pynut4/8692766c98aecf564bad3ad886daa803ab23d0e9/chapters/25 Extending and Embedding Classic Python.pdf
--------------------------------------------------------------------------------
/static/Pian_cover2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pynutshell/pynut4/8692766c98aecf564bad3ad886daa803ab23d0e9/static/Pian_cover2.jpg
--------------------------------------------------------------------------------
/test/check_snippets.py:
--------------------------------------------------------------------------------
1 | #
2 | # check_snippets.py
3 | #
4 |
5 | from pathlib import Path
6 | import sys
7 | print(f"Python version: {sys.version}\n")
8 |
9 | test_dir = Path(__file__).parent
10 | project_dir = test_dir.parent
11 | snippet_files = project_dir.rglob("*.py")
12 |
13 | # do they compile?
14 | total_files = 0
15 | fail_snip_files = []
16 | for snip_file in snippet_files:
17 | log_snip = str(snip_file.relative_to(project_dir))
18 | total_files += 1
19 | try:
20 | compile(snip_file.read_text(encoding="UTF-8"), str(snip_file), "exec")
21 | except Exception as exc:
22 | print(f"Failed to compile {log_snip}")
23 | print(exc)
24 | fail_snip_files.append((snip_file, str(exc)))
25 | else:
26 | print(f"Successfully compiled {log_snip}")
27 |
28 | print()
29 | if fail_snip_files:
30 | print(f"{len(fail_snip_files)} of {total_files} files failed to compile:")
31 | for snip_file, exc_msg in fail_snip_files:
32 | print(f"- {snip_file}\n {exc_msg}\n")
33 | else:
34 | print(f"All {total_files} .py files compiled")
35 |
36 | sys.exit(0 if not fail_snip_files else 1)
37 |
--------------------------------------------------------------------------------