├── .github └── workflows │ └── publish-to-github-pages.yml ├── .gitignore ├── Book ├── Makefile ├── TODO │ ├── constants.rst │ ├── creating_ext.rst │ ├── final.rst │ ├── functions.rst │ ├── ini.rst │ ├── managing_memory.rst │ ├── resources.rst │ ├── sapis.rst │ ├── streams.rst │ └── zend_engine.rst ├── _static │ ├── .gitignore │ └── style.css ├── _templates │ ├── .gitignore │ └── layout.html ├── conf.py ├── index.rst ├── php5 │ ├── build_system.rst │ ├── build_system │ │ ├── building_extensions.rst │ │ └── building_php.rst │ ├── classes_objects.rst │ ├── classes_objects │ │ ├── custom_object_storage.rst │ │ ├── implementing_typed_arrays.rst │ │ ├── internal_structures_and_implementation.rst │ │ ├── iterators.rst │ │ ├── magic_interfaces_comparable.rst │ │ ├── object_handlers.rst │ │ ├── serialization.rst │ │ └── simple_classes.rst │ ├── hashtables.rst │ ├── hashtables │ │ ├── array_api.rst │ │ ├── basic_structure.rst │ │ ├── hash_algorithm.rst │ │ ├── hashtable_api.rst │ │ └── images │ │ │ ├── basic_hashtable.svg │ │ │ ├── doubly_linked_hashtable.svg │ │ │ └── ordered_hashtable.svg │ ├── introduction.rst │ ├── zvals.rst │ └── zvals │ │ ├── basic_structure.rst │ │ ├── casts_and_operations.rst │ │ └── memory_management.rst ├── php7 │ ├── build_system.rst │ ├── build_system │ │ ├── building_extensions.rst │ │ └── building_php.rst │ ├── classes_objects.rst │ ├── classes_objects │ │ └── object_handlers.rst │ ├── debugging.rst │ ├── extensions_design.rst │ ├── extensions_design │ │ ├── extension_infos.rst │ │ ├── extension_skeleton.rst │ │ ├── globals_management.rst │ │ ├── hooks.rst │ │ ├── images │ │ │ ├── php_classic_lifetime.png │ │ │ ├── php_extensions_ini.png │ │ │ ├── php_extensions_lifecycle.odg │ │ │ ├── php_extensions_lifecycle.png │ │ │ ├── php_extensions_lifecycle_full.odg │ │ │ ├── php_extensions_lifecycle_full.png │ │ │ ├── php_lifetime.png │ │ │ ├── php_lifetime_process.png │ │ │ ├── php_lifetime_thread.png │ │ │ └── php_minfo.png │ │ ├── ini_settings.rst │ │ ├── php_functions.rst │ │ ├── php_lifecycle.rst │ │ └── zend_extensions.rst │ ├── internal_types.rst │ ├── internal_types │ │ ├── functions.rst │ │ ├── functions │ │ │ └── callables.rst │ │ ├── hashtables.rst │ │ ├── strings.rst │ │ ├── strings │ │ │ ├── images │ │ │ │ └── zend_string_memory_layout.png │ │ │ ├── printing_functions.rst │ │ │ ├── smart_str.rst │ │ │ └── zend_strings.rst │ │ └── zend_resources.rst │ ├── introduction.rst │ ├── memory_management.rst │ ├── memory_management │ │ ├── memory_debugging.rst │ │ └── zend_memory_manager.rst │ ├── zend_engine.rst │ ├── zend_engine │ │ ├── zend_compiler.rst │ │ ├── zend_executor.rst │ │ └── zend_opcache.rst │ ├── zvals.rst │ └── zvals │ │ ├── basic_structure.rst │ │ ├── casts_and_operations.rst │ │ ├── memory_management.rst │ │ └── references.rst └── tests │ ├── echo_basic.phpt │ ├── examining_failed_test_output.rst │ ├── introduction.rst │ ├── overview.rst │ ├── phpt_file_structure.rst │ └── running_the_test_suite.rst ├── LICENSE.md ├── README.md ├── build_html.sh ├── build_release_html.sh ├── build_release_latex.sh └── generate_php5_redirects.php /.github/workflows/publish-to-github-pages.yml: -------------------------------------------------------------------------------- 1 | name: Publish to GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | publish: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout code 13 | uses: actions/checkout@v2 14 | 15 | - name: Install rsync 16 | run: sudo apt-get update && sudo apt-get install -y rsync 17 | 18 | - name: Setup Python 3.x 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: '3.x' 22 | 23 | - name: Install Sphinx 24 | run: pip install -U Sphinx 25 | 26 | - name: Setup PHP 8.1 27 | uses: shivammathur/setup-php@v2 28 | with: 29 | php-version: '8.1' 30 | coverage: none 31 | extensions: none 32 | tools: none 33 | 34 | - name: Generate HTML release 35 | run: ./build_release_html.sh 36 | 37 | - name: Publish generated content to GitHub Pages 38 | uses: JamesIves/github-pages-deploy-action@v4.2.2 39 | with: 40 | folder: BookHTML 41 | branch: gh-pages 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | doctrees/ 2 | BookHTML/ 3 | BookLatex/ 4 | *.swp 5 | -------------------------------------------------------------------------------- /Book/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = ../BookHTML 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PHPInternalsBook.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PHPInternalsBook.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/PHPInternalsBook" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PHPInternalsBook" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /Book/TODO/constants.rst: -------------------------------------------------------------------------------- 1 | Constants 2 | ========= 3 | 4 | You've learnt how to deal with variables, here we'll introduce an important 5 | concept : constants. Fortunately, they are much more easier to master than 6 | variables. You should know the concepts behind constants as they are much 7 | used in PHP 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | .. 15 | constants/structure.rst 16 | constants/main_api.rst 17 | constants/compile_time_substitution.rst 18 | -------------------------------------------------------------------------------- /Book/TODO/creating_ext.rst: -------------------------------------------------------------------------------- 1 | Creating PHP extensions 2 | ======================= 3 | 4 | You have know enough knowledge to begin creating your first extension. We’ll 5 | reuse all you’ve learnt so far and turn it into a practical case. We’ll then 6 | start writing new PHP function in C and then cover the PHP functions scope from 7 | an internal point of view. 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | .. 15 | creating_ext/zendext_vs_ext.rst 16 | creating_ext/load_mechanism.rst 17 | creating_ext/automatic_tools.rst 18 | creating_ext/building_activating.rst 19 | creating_ext/lifecycle.rst 20 | creating_ext/my_first_ext.rst 21 | -------------------------------------------------------------------------------- /Book/TODO/final.rst: -------------------------------------------------------------------------------- 1 | Final thoughts 2 | ============== 3 | 4 | Here is the content we couldn't find a dedicated chapter for. We'll recall 5 | you about PHP's history, the wonderful path it's drawn, the different 6 | contributors that helped with it... Then we'll help you to get involved in 7 | PHP internal development by giving you hints on how to help us make 8 | this awesome project grow and master the Web. 9 | 10 | Contents: 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | .. 16 | final/history.rst 17 | final/contributors.rst 18 | final/getting_help.rst 19 | final/contributing.rst 20 | -------------------------------------------------------------------------------- /Book/TODO/functions.rst: -------------------------------------------------------------------------------- 1 | Functions 2 | ========= 3 | 4 | You’ve just built your first extension, and it works ! That’s great ! Now we 5 | will dive deeper into a very important concept : PHP functions. We’ll cover the 6 | subject showing how to declare functions, how to accept parameters from them and 7 | how to make them return values. 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | .. 15 | functions/structure.rst 16 | functions/arguments.rst 17 | functions/param_parsing.rst 18 | functions/return_values.rst 19 | functions/error_handling.rst 20 | -------------------------------------------------------------------------------- /Book/TODO/ini.rst: -------------------------------------------------------------------------------- 1 | INI and configuration management 2 | ================================ 3 | 4 | A very big part of PHP behavior is driven by configuration parameters you 5 | should have heard about as ini settings. This chapter will teach you how to 6 | ask for some ini file parsing as well as how to cope with engine parameter be 7 | it into your extension or more globally in the engine itself. 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | .. 15 | ini/getting_config.rst 16 | ini/read_and_display.rst 17 | ini/adding_params.rst 18 | ini/ini_parser.rst 19 | -------------------------------------------------------------------------------- /Book/TODO/managing_memory.rst: -------------------------------------------------------------------------------- 1 | First step into code : Managing memory 2 | ====================================== 3 | 4 | Nobody can start programming inside PHP without knowing this layer. So we’ll 5 | start by introducing how to allocate/free/debug memory within PHP using the 6 | Zend Memory Manager layer 7 | 8 | Contents: 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | 13 | .. 14 | managing_memory/zendmm_intro.rst 15 | managing_memory/zendmm_config.rst 16 | managing_memory/useful_functions.rst 17 | managing_memory/persistent_mem.rst 18 | managing_memory/zendmm_internals.rst 19 | managing_memory/zendmm_in_phpland.rst 20 | managing_memory/phpext_examples.rst 21 | -------------------------------------------------------------------------------- /Book/TODO/resources.rst: -------------------------------------------------------------------------------- 1 | Resources 2 | ========= 3 | 4 | Resources may appear as a strange concept from PHP land. In fact, resources hold 5 | any data that can be held by other types, as they represent a sort of binding 6 | with an OS under structure, such as a file handle. Here, we'll detail how such 7 | type has been designed internally and how to deal with it. 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | .. 15 | resources/definition.rst 16 | resources/zend_list.rst 17 | resources/basic_api.rst 18 | resources/persistence.rst 19 | -------------------------------------------------------------------------------- /Book/TODO/sapis.rst: -------------------------------------------------------------------------------- 1 | Server Application Programming Interfaces : SAPI 2 | ================================================ 3 | 4 | When you want to ask PHP something, you need to contact it. This sound silly, 5 | but there are different ways of contacting PHP, different entry points. These 6 | are called SAPIs. This chapter will help you understand how they work, what 7 | their responsibilities are and finally, how to create your own, should you 8 | need it. 9 | 10 | Contents: 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | .. 16 | sapis/entry_point.rst 17 | sapis/list.rst 18 | sapis/structures.rst 19 | sapis/api.rst 20 | sapis/output_buffering.rst 21 | sapis/creation.rst 22 | -------------------------------------------------------------------------------- /Book/TODO/streams.rst: -------------------------------------------------------------------------------- 1 | Network streams 2 | =============== 3 | 4 | As far as you want to play with the network, you need OS specific calls such as 5 | sockets. PHP internals has abstracted all network calls in a multi-OS compatible 6 | complex layer that we will now detail for you. 7 | 8 | Contents: 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | -------------------------------------------------------------------------------- /Book/TODO/zend_engine.rst: -------------------------------------------------------------------------------- 1 | Zend Engine 2 | =========== 3 | 4 | The big part, Zend Engine is PHP's heart. It's composed of many different pieces 5 | , each one having a responsibility. They all play together to make PHP alive. 6 | Here you'll mainly dive into the virtual machine, and learn how PHP 7 | understands its own code, how it executes it on the fly and how it makes a 8 | response to your request. 9 | 10 | Contents: 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | .. 16 | zend_engine/lexer.rst 17 | zend_engine/parser.rst 18 | zend_engine/compiler.rst 19 | zend_engine/vm.rst 20 | zend_engine/vm_details.rst 21 | zend_engine/function_calls.rst 22 | -------------------------------------------------------------------------------- /Book/_static/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Book/_static/style.css: -------------------------------------------------------------------------------- 1 | div.content { 2 | font-size: 1em; 3 | } 4 | 5 | body { 6 | min-width: 45em; 7 | max-width: 60em; 8 | } 9 | -------------------------------------------------------------------------------- /Book/_templates/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Book/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | {% block footer %} 3 | 12 | 21 | {% endblock %} -------------------------------------------------------------------------------- /Book/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # PHPInternalsBook documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Oct 26 14:36:43 2012. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | from sphinx.highlighting import lexers 16 | from pygments.lexers.web import PhpLexer 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | #sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- PHP code highlights configuration ----------------------------------------- 24 | 25 | lexers['php'] = PhpLexer(startinline=True) 26 | lexers['php-annotations'] = PhpLexer(startinline=True) 27 | 28 | # -- General configuration ----------------------------------------------------- 29 | 30 | # If your documentation needs a minimal Sphinx version, state it here. 31 | #needs_sphinx = '1.0' 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be extensions 34 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 35 | extensions = ['sphinx.ext.todo', 'sphinx.ext.ifconfig'] 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # Add static stylesheets 41 | def setup(app): 42 | app.add_css_file('style.css') 43 | 44 | # The suffix of source filenames. 45 | source_suffix = '.rst' 46 | 47 | # The encoding of source files. 48 | #source_encoding = 'utf-8-sig' 49 | 50 | # The master toctree document. 51 | master_doc = 'index' 52 | 53 | # General information about the project. 54 | project = u'PHPInternalsBook' 55 | copyright = u'2013, Julien Pauli - Anthony Ferrara - Nikita Popov' 56 | 57 | # The version info for the project you're documenting, acts as replacement for 58 | # |version| and |release|, also used in various other places throughout the 59 | # built documents. 60 | # 61 | # The short X.Y version. 62 | version = '1.0' 63 | # The full version, including alpha/beta/rc tags. 64 | release = '1.0' 65 | 66 | # The language for content autogenerated by Sphinx. Refer to documentation 67 | # for a list of supported languages. 68 | #language = None 69 | 70 | # There are two options for replacing |today|: either, you set today to some 71 | # non-false value, then it is used: 72 | #today = '' 73 | # Else, today_fmt is used as the format for a strftime call. 74 | #today_fmt = '%B %d, %Y' 75 | 76 | # List of patterns, relative to source directory, that match files and 77 | # directories to ignore when looking for source files. 78 | exclude_patterns = ['TODO'] 79 | 80 | # The reST default role (used for this markup: `text`) to use for all documents. 81 | #default_role = None 82 | 83 | # If true, '()' will be appended to :func: etc. cross-reference text. 84 | #add_function_parentheses = True 85 | 86 | # If true, the current module name will be prepended to all description 87 | # unit titles (such as .. function::). 88 | #add_module_names = True 89 | 90 | # If true, sectionauthor and moduleauthor directives will be shown in the 91 | # output. They are ignored by default. 92 | #show_authors = False 93 | 94 | # Code included with :: will be C (and not Python) 95 | highlight_language = 'c' 96 | 97 | # The name of the Pygments (syntax highlighting) style to use. 98 | pygments_style = 'sphinx' 99 | 100 | # A list of ignored prefixes for module index sorting. 101 | #modindex_common_prefix = [] 102 | 103 | 104 | # -- Options for HTML output --------------------------------------------------- 105 | 106 | # The theme to use for HTML and HTML Help pages. See the documentation for 107 | # a list of builtin themes. 108 | html_theme = 'haiku' 109 | 110 | # Theme options are theme-specific and customize the look and feel of a theme 111 | # further. For a list of options available for each theme, see the 112 | # documentation. 113 | #html_theme_options = {} 114 | 115 | # Add any paths that contain custom themes here, relative to this directory. 116 | #html_theme_path = [] 117 | 118 | # The name for this set of Sphinx documents. If None, it defaults to 119 | # " v documentation". 120 | html_title = 'PHP Internals Book' 121 | 122 | # A shorter title for the navigation bar. Default is the same as html_title. 123 | #html_short_title = None 124 | 125 | # The name of an image file (relative to this directory) to place at the top 126 | # of the sidebar. 127 | #html_logo = None 128 | 129 | # The name of an image file (within the static path) to use as favicon of the 130 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 131 | # pixels large. 132 | #html_favicon = None 133 | 134 | # Add any paths that contain custom static files (such as style sheets) here, 135 | # relative to this directory. They are copied after the builtin static files, 136 | # so a file named "default.css" will overwrite the builtin "default.css". 137 | html_static_path = ['_static'] 138 | 139 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 140 | # using the given strftime format. 141 | #html_last_updated_fmt = '%b %d, %Y' 142 | 143 | # If true, SmartyPants will be used to convert quotes and dashes to 144 | # typographically correct entities. 145 | #html_use_smartypants = True 146 | 147 | # Custom sidebar templates, maps document names to template names. 148 | #html_sidebars = {} 149 | 150 | # Additional templates that should be rendered to pages, maps page names to 151 | # template names. 152 | #html_additional_pages = {} 153 | 154 | # If false, no module index is generated. 155 | #html_domain_indices = True 156 | 157 | # If false, no index is generated. 158 | #html_use_index = True 159 | 160 | # If true, the index is split into individual pages for each letter. 161 | #html_split_index = False 162 | 163 | # If true, links to the reST sources are added to the pages. 164 | #html_show_sourcelink = True 165 | 166 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 167 | html_show_sphinx = False 168 | 169 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 170 | #html_show_copyright = True 171 | 172 | # If true, an OpenSearch description file will be output, and all pages will 173 | # contain a tag referring to it. The value of this option must be the 174 | # base URL from which the finished HTML is served. 175 | #html_use_opensearch = '' 176 | 177 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 178 | #html_file_suffix = None 179 | 180 | # Output file base name for HTML help builder. 181 | htmlhelp_basename = 'PHPInternalsBookdoc' 182 | 183 | 184 | # -- Options for LaTeX output -------------------------------------------------- 185 | 186 | # Use smaller font for code listings in PDF, so that there is about 100 chars per line 187 | from sphinx.highlighting import PygmentsBridge 188 | from pygments.formatters.latex import LatexFormatter 189 | 190 | class CustomLatexFormatter(LatexFormatter): 191 | def __init__(self, **options): 192 | super(CustomLatexFormatter, self).__init__(**options) 193 | self.verboptions = r"formatcom=\footnotesize" 194 | 195 | PygmentsBridge.latex_formatter = CustomLatexFormatter 196 | 197 | latex_elements = { 198 | # The paper size ('letterpaper' or 'a4paper'). 199 | #'papersize': 'letterpaper', 200 | 201 | # The font size ('10pt', '11pt' or '12pt'). 202 | #'pointsize': '10pt', 203 | 204 | # Additional stuff for the LaTeX preamble. 205 | #'preamble': '', 206 | } 207 | 208 | # Grouping the document tree into LaTeX files. List of tuples 209 | # (source start file, target name, title, author, documentclass [howto/manual]). 210 | latex_documents = [ 211 | ('index', 'PHPInternalsBook.tex', u'PHPInternalsBook Documentation', 212 | u'Julien Pauli - Anthony Ferrara - Nikita Popov', 'manual'), 213 | ] 214 | 215 | # The name of an image file (relative to this directory) to place at the top of 216 | # the title page. 217 | #latex_logo = None 218 | 219 | # For "manual" documents, if this is true, then toplevel headings are parts, 220 | # not chapters. 221 | #latex_use_parts = False 222 | 223 | # If true, show page references after internal links. 224 | #latex_show_pagerefs = False 225 | 226 | # If true, show URL addresses after external links. 227 | #latex_show_urls = False 228 | 229 | # Documents to append as an appendix to all manuals. 230 | #latex_appendices = [] 231 | 232 | # If false, no module index is generated. 233 | #latex_domain_indices = True 234 | 235 | 236 | # -- Options for manual page output -------------------------------------------- 237 | 238 | # One entry per manual page. List of tuples 239 | # (source start file, name, description, authors, manual section). 240 | man_pages = [ 241 | ('index', 'phpinternalsbook', u'PHPInternalsBook Documentation', 242 | [u'Julien Pauli - Anthony Ferrara - Nikita Popov'], 1) 243 | ] 244 | 245 | # If true, show URL addresses after external links. 246 | #man_show_urls = False 247 | 248 | 249 | # -- Options for Texinfo output ------------------------------------------------ 250 | 251 | # Grouping the document tree into Texinfo files. List of tuples 252 | # (source start file, target name, title, author, 253 | # dir menu entry, description, category) 254 | texinfo_documents = [ 255 | ('index', 'PHPInternalsBook', u'PHPInternalsBook Documentation', 256 | u'Julien Pauli - Anthony Ferrara - Nikita Popov', 'PHPInternalsBook', 'One line description of project.', 257 | 'Miscellaneous'), 258 | ] 259 | 260 | # Documents to append as an appendix to all manuals. 261 | #texinfo_appendices = [] 262 | 263 | # If false, no module index is generated. 264 | #texinfo_domain_indices = True 265 | 266 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 267 | #texinfo_show_urls = 'footnote' 268 | 269 | 270 | # -- Options for Epub output --------------------------------------------------- 271 | 272 | # Bibliographic Dublin Core info. 273 | epub_title = u'PHPInternalsBook' 274 | epub_author = u'Julien Pauli - Anthony Ferrara - Nikita Popov' 275 | epub_publisher = u'Julien Pauli - Anthony Ferrara - Nikita Popov' 276 | epub_copyright = u'2012, Julien Pauli - Anthony Ferrara - Nikita Popov' 277 | 278 | # The language of the text. It defaults to the language option 279 | # or en if the language is not set. 280 | #epub_language = '' 281 | 282 | # The scheme of the identifier. Typical schemes are ISBN or URL. 283 | #epub_scheme = '' 284 | 285 | # The unique identifier of the text. This can be a ISBN number 286 | # or the project homepage. 287 | #epub_identifier = '' 288 | 289 | # A unique identification for the text. 290 | #epub_uid = '' 291 | 292 | # A tuple containing the cover image and cover page html template filenames. 293 | #epub_cover = () 294 | 295 | # HTML files that should be inserted before the pages created by sphinx. 296 | # The format is a list of tuples containing the path and title. 297 | #epub_pre_files = [] 298 | 299 | # HTML files shat should be inserted after the pages created by sphinx. 300 | # The format is a list of tuples containing the path and title. 301 | #epub_post_files = [] 302 | 303 | # A list of files that should not be packed into the epub file. 304 | #epub_exclude_files = [] 305 | 306 | # The depth of the table of contents in toc.ncx. 307 | #epub_tocdepth = 3 308 | 309 | # Allow duplicate toc entries. 310 | #epub_tocdup = True 311 | -------------------------------------------------------------------------------- /Book/index.rst: -------------------------------------------------------------------------------- 1 | Table Of Contents 2 | ================= 3 | 4 | PHP 7 and PHP 8 5 | --------------- 6 | 7 | .. toctree:: 8 | :maxdepth: 3 9 | 10 | php7/introduction.rst 11 | php7/build_system.rst 12 | php7/zvals.rst 13 | php7/internal_types.rst 14 | php7/classes_objects.rst 15 | php7/extensions_design.rst 16 | php7/memory_management.rst 17 | php7/zend_engine.rst 18 | php7/debugging.rst 19 | 20 | .. 21 | php7/hashtables.rst 22 | php7/prerequisites.rst 23 | php7/php_first_look.rst 24 | php7/managing_memory.rst 25 | php7/creating_ext.rst 26 | php7/functions.rst 27 | php7/resources.rst 28 | php7/streams.rst 29 | php7/constants.rst 30 | php7/ini.rst 31 | php7/sapis.rst 32 | php7/zend_engine.rst 33 | php7/final.rst 34 | 35 | PHP 5 36 | ----- 37 | 38 | .. toctree:: 39 | :maxdepth: 2 40 | 41 | php5/introduction.rst 42 | php5/build_system.rst 43 | 44 | * Creating PHP extensions 45 | 46 | .. toctree:: 47 | :maxdepth: 2 48 | 49 | php5/zvals.rst 50 | 51 | * Implementing functions 52 | 53 | .. toctree:: 54 | :maxdepth: 2 55 | 56 | php5/hashtables.rst 57 | php5/classes_objects.rst 58 | 59 | Testing PHP Source 60 | ------------------ 61 | 62 | Writing tests applies to both PHP 5 & PHP 7. 63 | 64 | .. toctree:: 65 | :maxdepth: 3 66 | 67 | tests/introduction.rst 68 | tests/overview.rst 69 | tests/running_the_test_suite.rst 70 | tests/phpt_file_structure.rst 71 | tests/examining_failed_test_output.rst 72 | 73 | Index and search 74 | ================ 75 | 76 | * :ref:`genindex` 77 | * :ref:`search` 78 | -------------------------------------------------------------------------------- /Book/php5/build_system.rst: -------------------------------------------------------------------------------- 1 | Using the PHP build system 2 | ========================== 3 | 4 | In this chapter we'll explain how to use the PHP build system to compile both itself and additional extensions. This 5 | chapter will not yet be concerned with writing your own autoconf build instructions and only explain how to use the 6 | tooling. 7 | 8 | Contents: 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | 13 | build_system/building_php.rst 14 | build_system/building_extensions.rst 15 | -------------------------------------------------------------------------------- /Book/php5/classes_objects.rst: -------------------------------------------------------------------------------- 1 | Classes and objects 2 | =================== 3 | 4 | In recent years PHP has been turning more and more from a procedural language to an object-oriented one. Even though 5 | the fundamentals are still of procedural nature (in particular large parts of the standard library) most library code 6 | nowadays is developed in terms of classes and objects. This chapter covers the rather complex internals of PHP's object 7 | orientation system. 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | classes_objects/simple_classes.rst 15 | classes_objects/custom_object_storage.rst 16 | classes_objects/implementing_typed_arrays.rst 17 | classes_objects/object_handlers.rst 18 | classes_objects/iterators.rst 19 | classes_objects/serialization.rst 20 | classes_objects/magic_interfaces_comparable.rst 21 | classes_objects/internal_structures_and_implementation.rst 22 | 23 | .. todo:: 24 | * __construct is not always called 25 | * verify that ctors don't segfault or leak on manual call -------------------------------------------------------------------------------- /Book/php5/classes_objects/internal_structures_and_implementation.rst: -------------------------------------------------------------------------------- 1 | Internal structures and implementation 2 | ====================================== 3 | 4 | In this (last) section on object orientation in PHP we'll have a look at some of the internal structures that were 5 | previously only mentioned in passing. In particular we'll see more thoroughly the default object structure and the object 6 | store. 7 | 8 | Object properties 9 | ----------------- 10 | 11 | The probably by far most complicated part of PHP's object orientation system is the handling of object properties. In 12 | the following we'll take a look at some of its parts in more detail. 13 | 14 | Property storage 15 | ~~~~~~~~~~~~~~~~ 16 | 17 | In PHP object properties can be declared, but don't have to. How can one efficiently handle such a situation? To find 18 | out let's recall the standard ``zend_object`` structure:: 19 | 20 | typedef struct _zend_object { 21 | zend_class_entry *ce; 22 | HashTable *properties; 23 | zval **properties_table; 24 | HashTable *guards; 25 | } zend_object; 26 | 27 | This structure contains two fields for storing properties: The ``properties`` hash table and the ``properties_table`` 28 | array of ``zval`` pointers. Two separate fields are used to best handle both declared and dynamic properties: For the 29 | latter, i.e. properties that have not been declared in the class, there is no way around using the ``properties`` 30 | hash table (which uses a simple property name => value mapping). 31 | 32 | For declared properties on the other hand storing them in a hashtable would be overly wasteful: PHP's hash tables 33 | have a very high per-element overhead (of nearly one hundred bytes), but the only thing that really needs to be stored 34 | is a ``zval`` pointer for the value. For this reason PHP employs a small trick: The properties are stored in a normal 35 | C array and accessed using their offset. The offset for each property name is stored in a (global) hashtable in the 36 | class entry. Thus the property lookup happens with one additional level of indirection, i.e. rather than directly 37 | fetching the property value, first the property offset is fetched and that offset is then used to fetch the actual 38 | value. 39 | 40 | Property information (including the storage offset) is stored in ``class_entry->properties_info``. This hash table 41 | is a map of property names to ``zend_property_info`` structs:: 42 | 43 | typedef struct _zend_property_info { 44 | zend_uint flags; 45 | const char *name; 46 | int name_length; 47 | ulong h; /* hash of name */ 48 | int offset; /* storage offset */ 49 | const char *doc_comment; 50 | int doc_comment_len; 51 | zend_class_entry *ce; /* CE of declaring class */ 52 | } zend_property_info; 53 | 54 | One remaining question is what happens when both types of properties exist. In this case both structures will be used 55 | simultaneously: All properties will be written into the ``properties`` hashtable, but ``properties_table`` will still 56 | contain pointers to them. Note though that if both are used the properties table holds ``zval**`` values rather than 57 | ``zval*`` values. 58 | 59 | Sometimes PHP needs the properties as a hashtable even if they are all declared, e.g. when the ``get_properties`` 60 | handler is used. In this case PHP also switches to using ``properties`` (or rather the hybrid approach described above). 61 | This is done using the ``rebuild_object_properties`` function:: 62 | 63 | ZEND_API HashTable *zend_std_get_properties(zval *object TSRMLS_DC) 64 | { 65 | zend_object *zobj; 66 | zobj = Z_OBJ_P(object); 67 | if (!zobj->properties) { 68 | rebuild_object_properties(zobj); 69 | } 70 | return zobj->properties; 71 | } 72 | 73 | Property name mangling 74 | ~~~~~~~~~~~~~~~~~~~~~~ 75 | 76 | Consider the following code snippet: 77 | 78 | .. code-block:: php 79 | 80 | class A { 81 | private $prop = 'A'; 82 | } 83 | 84 | class B extends A { 85 | private $prop = 'B'; 86 | } 87 | 88 | class C extends B { 89 | protected $prop = 'C'; 90 | } 91 | 92 | var_dump(new C); 93 | 94 | // Output: 95 | object(C)#1 (3) { 96 | ["prop":protected]=> 97 | string(1) "C" 98 | ["prop":"B":private]=> 99 | string(1) "B" 100 | ["prop":"A":private]=> 101 | string(1) "A" 102 | } 103 | 104 | In the above example you can see the "same" property ``$prop`` being defined three times: Once as a private property of 105 | ``A``, once as a private property of ``B`` and once as a protected property of ``C``. Even though these three properties 106 | have the same name they are still distinct properties and require separate storage. 107 | 108 | In order to support this situation PHP "mangles" the property name by including the type of the property and the 109 | defining class: 110 | 111 | .. code-block:: none 112 | 113 | class Foo { private $prop; } => "\0Foo\0prop" 114 | class Bar { private $prop; } => "\0Bar\0prop" 115 | class Rab { protected $prop; } => "\0*\0prop" 116 | class Oof { public $prop; } => "prop" 117 | 118 | As you can see public properties have "normal" names, protected ones get a ``\0*\0`` prefix (where ``\0`` are NUL bytes) 119 | and private ones start with ``\0ClassName\0``. 120 | 121 | Most of the time PHP does a good job hiding the mangled names from userland. You only get to see them in some rare 122 | cases, e.g. if you cast an object to array or look at serialization output. Internally you usually don't need to care 123 | about mangled names either, e.g. when using the ``zend_declare_property`` APIs the mangling is automatically done for 124 | you. 125 | 126 | The only places where you have to look out for mangled names is if you access the ``property_info->name`` field or if 127 | you try to directly access the ``zobj->properties`` hash. In this cases you can use the 128 | ``zend_(un)mangle_property_name`` APIs:: 129 | 130 | // Unmangling 131 | const char *class_name, *property_name; 132 | int property_name_len; 133 | 134 | if (zend_unmangle_property_name_ex( 135 | mangled_property_name, mangled_property_name_len, 136 | &class_name, &property_name, &property_name_len 137 | ) == SUCCESS) { 138 | // ... 139 | } 140 | 141 | // Mangling 142 | char *mangled_property_name; 143 | int mangled_property_name_len; 144 | 145 | zend_mangle_property_name( 146 | &mangled_property_name, &mangled_property_name_len, 147 | class_name, class_name_len, property_name, property_name_len, 148 | should_do_persistent_alloc ? 1 : 0 149 | ); 150 | 151 | Property recursion guards 152 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 153 | 154 | The last member in ``zend_object`` is the ``HashTable *guards`` field. To find out what it is used for, consider what 155 | happens in the following code using magic ``__set`` properties: 156 | 157 | .. code-block:: php 158 | 159 | class Foo { 160 | public function __set($name, $value) { 161 | $this->$name = $value; 162 | } 163 | } 164 | 165 | $foo = new Foo; 166 | $foo->bar = 'baz'; 167 | var_dump($foo->bar); 168 | 169 | The ``$foo->bar = 'baz'`` assignment in the script will call ``$foo->__set('bar', 'baz')`` as the ``$bar`` property is 170 | not defined. The ``$this->$name = $value`` line in the method body in this case would become ``$foo->bar = 'baz'``. 171 | Once again ``$bar`` is an undefined property. So, does that mean that the ``__set`` method will be (recursively) called 172 | again? 173 | 174 | That's not what happens. Rather PHP sees that it is already within ``__set`` and does *not* do a recursive call. Instead 175 | it actually creates the new ``$bar`` property. In order to implement this behavior PHP uses recursion guards which 176 | remember whether PHP is already in ``__set`` etc for a certain property. These guards are stored in the ``guards`` hash 177 | table, which maps property names to ``zend_guard`` structures:: 178 | 179 | typedef struct _zend_guard { 180 | zend_bool in_get; 181 | zend_bool in_set; 182 | zend_bool in_unset; 183 | zend_bool in_isset; 184 | zend_bool dummy; /* sizeof(zend_guard) must not be equal to sizeof(void*) */ 185 | } zend_guard; 186 | 187 | Object store 188 | ------------ 189 | 190 | We already made a lot of use of the object store, so let's have a closer look at it now:: 191 | 192 | typedef struct _zend_objects_store { 193 | zend_object_store_bucket *object_buckets; 194 | zend_uint top; 195 | zend_uint size; 196 | int free_list_head; 197 | } zend_objects_store; 198 | 199 | The object store is basically a dynamically resized array of ``object_buckets``. ``size`` specifies the size of the 200 | allocation, whereas ``top`` is the next object handle to be used. Handles are counted starting from 1, to ensure that 201 | all handles are "truthy". Thus if ``top == 1`` the next object will get ``handle = 1``, but will be put at position 202 | ``object_buckets[0]``. 203 | 204 | The ``free_list_head`` is the head of a linked list of unused buckets. Whenever an object is destroyed it leaves behind 205 | an unused bucket, which is then put in this list. If a new object is created and such a bucket exists (i.e. 206 | ``free_list_head`` is not ``-1``), then this bucket is used instead of the ``top`` one. 207 | 208 | To see how this linked list is maintained have a look at the ``zend_object_store_bucket`` structure:: 209 | 210 | typedef struct _zend_object_store_bucket { 211 | zend_bool destructor_called; 212 | zend_bool valid; 213 | zend_uchar apply_count; 214 | union _store_bucket { 215 | struct _store_object { 216 | void *object; 217 | zend_objects_store_dtor_t dtor; 218 | zend_objects_free_object_storage_t free_storage; 219 | zend_objects_store_clone_t clone; 220 | const zend_object_handlers *handlers; 221 | zend_uint refcount; 222 | gc_root_buffer *buffered; 223 | } obj; 224 | struct { 225 | int next; 226 | } free_list; 227 | } bucket; 228 | } zend_object_store_bucket; 229 | 230 | If the bucket is in use (i.e. stores an object), then the ``valid`` member will be 1. In this case the 231 | ``struct _store_object`` part of the union will be used. If the bucket is not used, then ``valid`` will be 0 and PHP 232 | will make use of ``free_list.next``. 233 | 234 | This reclaiming of unused object handles can be shown with a small script: 235 | 236 | .. code-block:: php 237 | 238 | var_dump($a = new stdClass); // object(stdClass)#1 (0) {} 239 | var_dump($b = new stdClass); // object(stdClass)#2 (0) {} 240 | var_dump($c = new stdClass); // object(stdClass)#3 (0) {} 241 | 242 | unset($b); // free handle 2 243 | unset($a); // free handle 1 244 | 245 | var_dump($e = new stdClass); // object(stdClass)#1 (0) {} 246 | var_dump($f = new stdClass); // object(stdClass)#2 (0) {} 247 | 248 | As you can see the handles of ``$b`` and ``$a`` are reused in reverse order of destruction. 249 | 250 | Apart from ``valid`` the bucket structure also contains a ``destructor_called`` flag. This flag is needed for PHP's 251 | two-phase object destruction process: As already outlined previously PHP has distinct dtor (can run userland code, isn't 252 | always run) and free (must not run userland code, is always executed) phases. After the dtor handler has been called, 253 | the ``destructor_called`` flag is set to 1, so that the dtor is not run again when the object is freed. 254 | 255 | The ``apply_count`` member serves the same role as the ``nApplyCount`` member of ``HashTable``: It protects against 256 | infinite recursion. It is used via the macros ``Z_OBJ_UNPROTECT_RECURSION(zval_ptr)`` (leave recursion) and 257 | ``Z_OBJ_PROTECT_RECURSION(zval_ptr)`` (enter recursion). The latter will throw an error if the nesting level for an 258 | object is 3 or larger. Currently this protection mechanism is only used in the object comparison handler. 259 | 260 | The ``handlers`` member in the ``_store_object`` struct is also required for destruction. The reason for this is that 261 | the ``dtor`` handler only gets passed the stored object and its handle:: 262 | 263 | typedef void (*zend_objects_store_dtor_t)(void *object, zend_object_handle handle TSRMLS_DC); 264 | 265 | But in order to call ``__destruct`` PHP needs a zval. Thus it creates a temporary zval using the passed object handle 266 | and the object handlers stored in ``bucket.obj.handlers``. The issue is that this member can only be set if the object 267 | is destructed through ``zval_ptr_dtor`` or some other method where the zval (and as such the object handlers) is known. 268 | 269 | If on the other hand the object is destroyed during shutdown (using ``zend_objects_store_call_destructors``) the zval 270 | is *not* known. In this case ``bucket.obj.handlers`` will be ``NULL`` and PHP falls back to the default object handlers. 271 | Thus it can sometimes happen that overloaded object behavior is not available in ``__destruct``. An example: 272 | 273 | .. code-block:: php 274 | 275 | class DLL extends SplDoublyLinkedList { 276 | public function __destruct() { 277 | var_dump($this); 278 | } 279 | } 280 | 281 | $dll = new DLL; 282 | $dll->push(1); 283 | $dll->push(2); 284 | $dll->push(3); 285 | 286 | var_dump($dll); 287 | 288 | set_error_handler(function() use ($dll) {}); 289 | 290 | This code snippet adds a ``__destruct`` method to ``SplDoublyLinkedList`` and then forces the destructor to be called 291 | during shutdown by binding it to the error handler (the error handler is one of the last things that is freed during 292 | shutdown.) This will produce the following output: 293 | 294 | .. code-block:: none 295 | 296 | object(DLL)#1 (2) { 297 | ["flags":"SplDoublyLinkedList":private]=> 298 | int(0) 299 | ["dllist":"SplDoublyLinkedList":private]=> 300 | array(3) { 301 | [0]=> 302 | int(1) 303 | [1]=> 304 | int(2) 305 | [2]=> 306 | int(3) 307 | } 308 | } 309 | object(DLL)#1 (0) { 310 | } 311 | 312 | For the ``var_dump`` outside the destructor ``get_debug_info`` is invoked and you get meaningful debugging output. 313 | Inside the destructor PHP uses the default object handlers and as such you don't get anything apart from the class 314 | name. The same also applies to other handlers, e.g. things like cloning, comparison, etc will not work properly. 315 | 316 | This concludes the chapter on object orientation. You should now have a good understanding of how the object orientation 317 | system in PHP works and how extensions can make use of it. -------------------------------------------------------------------------------- /Book/php5/classes_objects/iterators.rst: -------------------------------------------------------------------------------- 1 | Iterators 2 | ========= 3 | 4 | In the last section we implemented a few object handlers to improve integration of typed arrays into the language. One 5 | aspect is still missing though: Iteration. In this section we'll look at how iterators are implemented internally and 6 | how we can make use of them. Once again typed arrays will serve as the example. 7 | 8 | The ``get_iterator`` handler 9 | ---------------------------- 10 | 11 | Internally iteration works very similar to the userland ``IteratorAggregate`` interface. The class has a 12 | ``get_iterator`` handler that returns a ``zend_object_iterator*``, which looks as follows:: 13 | 14 | struct _zend_object_iterator { 15 | void *data; 16 | zend_object_iterator_funcs *funcs; 17 | ulong index; /* private to fe_reset/fe_fetch opcodes */ 18 | }; 19 | 20 | The ``index`` member is used internally by the ``foreach`` implementation. It is incremented on each iteration and is 21 | used for the keys if you don't specify a custom key function. The ``funcs`` member contains handlers for the different 22 | iteration actions:: 23 | 24 | typedef struct _zend_object_iterator_funcs { 25 | /* release all resources associated with this iterator instance */ 26 | void (*dtor)(zend_object_iterator *iter TSRMLS_DC); 27 | 28 | /* check for end of iteration (FAILURE or SUCCESS if data is valid) */ 29 | int (*valid)(zend_object_iterator *iter TSRMLS_DC); 30 | 31 | /* fetch the item data for the current element */ 32 | void (*get_current_data)(zend_object_iterator *iter, zval ***data TSRMLS_DC); 33 | 34 | /* fetch the key for the current element (optional, may be NULL) */ 35 | void (*get_current_key)(zend_object_iterator *iter, zval *key TSRMLS_DC); 36 | 37 | /* step forwards to next element */ 38 | void (*move_forward)(zend_object_iterator *iter TSRMLS_DC); 39 | 40 | /* rewind to start of data (optional, may be NULL) */ 41 | void (*rewind)(zend_object_iterator *iter TSRMLS_DC); 42 | 43 | /* invalidate current value/key (optional, may be NULL) */ 44 | void (*invalidate_current)(zend_object_iterator *iter TSRMLS_DC); 45 | } zend_object_iterator_funcs; 46 | 47 | The handlers are pretty similar to the ``Iterator`` interface, only with slightly different names. The only handler 48 | that has no correspondence in userland is ``invalidate_current``, which can be used to destroy the current key/value. 49 | The handler is largely unused though, in particular ``foreach`` won't even call it. 50 | 51 | The last member in the struct is ``data``, which can be used to carry around some custom data. Usually this one slot 52 | isn't enough though, so instead of the structure is extended, similarly to what you have already seen with 53 | ``zend_object``. 54 | 55 | In order to iterate typed arrays we'll have to store a few things: First of all, we need to hold a reference to the 56 | buffer view object (otherwise it may be destroyed during iteration). We can store this in the ``data`` member. 57 | Furthermore we should keep around the ``buffer_view_object`` so we don't have to refetch it on every handler call. 58 | Additionally we'll have to store the current iteration ``offset`` and the ``zval*`` of the current element (you'll see 59 | a bit later why we need to do this):: 60 | 61 | typedef struct _buffer_view_iterator { 62 | zend_object_iterator intern; 63 | buffer_view_object *view; 64 | size_t offset; 65 | zval *current; 66 | } buffer_view_iterator; 67 | 68 | Let's also declare a dummy ``zend_object_iterator_funcs`` structure so we have something to work on:: 69 | 70 | static zend_object_iterator_funcs buffer_view_iterator_funcs = { 71 | buffer_view_iterator_dtor, 72 | buffer_view_iterator_valid, 73 | buffer_view_iterator_get_current_data, 74 | buffer_view_iterator_get_current_key, 75 | buffer_view_iterator_move_forward, 76 | buffer_view_iterator_rewind 77 | }; 78 | 79 | Now we can implement the ``get_iterator`` handler. This handler receives the class entry, the object and whether the 80 | iteration is done by reference and returns a ``zend_object_iterator*``. All we have to do is allocate the iterator and 81 | set the respective members:: 82 | 83 | zend_object_iterator *buffer_view_get_iterator( 84 | zend_class_entry *ce, zval *object, int by_ref TSRMLS_DC 85 | ) { 86 | buffer_view_iterator *iter; 87 | 88 | if (by_ref) { 89 | zend_throw_exception(NULL, "Cannot iterate buffer view by reference", 0 TSRMLS_CC); 90 | return NULL; 91 | } 92 | 93 | iter = emalloc(sizeof(buffer_view_iterator)); 94 | iter->intern.funcs = &buffer_view_iterator_funcs; 95 | 96 | iter->intern.data = object; 97 | Z_ADDREF_P(object); 98 | 99 | iter->view = zend_object_store_get_object(object TSRMLS_CC); 100 | iter->offset = 0; 101 | iter->current = NULL; 102 | 103 | return (zend_object_iterator *) iter; 104 | } 105 | 106 | Finally we have to adjust the macro for registering buffer view classes:: 107 | 108 | #define DEFINE_ARRAY_BUFFER_VIEW_CLASS(class_name, type) \ 109 | INIT_CLASS_ENTRY(tmp_ce, #class_name, array_buffer_view_functions); \ 110 | type##_array_ce = zend_register_internal_class(&tmp_ce TSRMLS_CC); \ 111 | type##_array_ce->create_object = array_buffer_view_create_object; \ 112 | type##_array_ce->get_iterator = buffer_view_get_iterator; \ 113 | type##_array_ce->iterator_funcs.funcs = &buffer_view_iterator_funcs; \ 114 | zend_class_implements(type##_array_ce TSRMLS_CC, 2, \ 115 | zend_ce_arrayaccess, zend_ce_traversable); 116 | 117 | The new things are the assignment to the ``get_iterator`` and ``iterator_funcs.funcs`` as well as the implementation 118 | of the ``Traversable`` interface. 119 | 120 | Iterator functions 121 | ------------------ 122 | 123 | Now let's actually implement the ``buffer_view_iterator_funcs`` that we specified above:: 124 | 125 | static void buffer_view_iterator_dtor(zend_object_iterator *intern TSRMLS_DC) 126 | { 127 | buffer_view_iterator *iter = (buffer_view_iterator *) intern; 128 | 129 | if (iter->current) { 130 | zval_ptr_dtor(&iter->current); 131 | } 132 | 133 | zval_ptr_dtor((zval **) &intern->data); 134 | efree(iter); 135 | } 136 | 137 | static int buffer_view_iterator_valid(zend_object_iterator *intern TSRMLS_DC) 138 | { 139 | buffer_view_iterator *iter = (buffer_view_iterator *) intern; 140 | 141 | return iter->offset < iter->view->length ? SUCCESS : FAILURE; 142 | } 143 | 144 | static void buffer_view_iterator_get_current_data( 145 | zend_object_iterator *intern, zval ***data TSRMLS_DC 146 | ) { 147 | buffer_view_iterator *iter = (buffer_view_iterator *) intern; 148 | 149 | if (iter->current) { 150 | zval_ptr_dtor(&iter->current); 151 | } 152 | 153 | if (iter->offset < iter->view->length) { 154 | iter->current = buffer_view_offset_get(iter->view, iter->offset); 155 | *data = &iter->current; 156 | } else { 157 | *data = NULL; 158 | } 159 | } 160 | 161 | #if ZEND_MODULE_API_NO >= 20121212 162 | static void buffer_view_iterator_get_current_key( 163 | zend_object_iterator *intern, zval *key TSRMLS_DC 164 | ) { 165 | buffer_view_iterator *iter = (buffer_view_iterator *) intern; 166 | ZVAL_LONG(key, iter->offset); 167 | } 168 | #else 169 | static int buffer_view_iterator_get_current_key( 170 | zend_object_iterator *intern, char **str_key, uint *str_key_len, ulong *int_key TSRMLS_DC 171 | ) { 172 | buffer_view_iterator *iter = (buffer_view_iterator *) intern; 173 | 174 | *int_key = (ulong) iter->offset; 175 | return HASH_KEY_IS_LONG; 176 | } 177 | #endif 178 | 179 | static void buffer_view_iterator_move_forward(zend_object_iterator *intern TSRMLS_DC) 180 | { 181 | buffer_view_iterator *iter = (buffer_view_iterator *) intern; 182 | 183 | iter->offset++; 184 | } 185 | 186 | static void buffer_view_iterator_rewind(zend_object_iterator *intern TSRMLS_DC) 187 | { 188 | buffer_view_iterator *iter = (buffer_view_iterator *) intern; 189 | 190 | iter->offset = 0; 191 | iter->current = NULL; 192 | } 193 | 194 | The functions should be rather straightforward, so only a few comments: 195 | 196 | ``get_current_data`` gets a ``zval*** data`` as the parameter and expects us to write a ``zval**`` into it using 197 | ``*data = ...``. The ``zval**`` is required because iteration can also happen by reference, in which case ``zval*`` 198 | won't suffice. The ``zval**`` is the reason why we have to store the current ``zval*`` in the iterator. 199 | 200 | How the ``get_current_key`` handler looks like depends on the PHP version: With PHP 5.5 you simply have to write the 201 | key into the passed ``key`` variable using one of the ``ZVAL_*`` macros. 202 | 203 | On older versions of PHP the ``get_current_key`` handler takes three parameters that can be set depending on which key 204 | type is returned. If you return ``HASH_KEY_NON_EXISTENT`` the resulting key will be ``null`` and you don't have to set 205 | any of them. For ``HASH_KEY_IS_LONG`` you set the ``int_key`` argument. For ``HASH_KEY_IS_STRING`` you have to set 206 | ``str_key`` and ``str_key_len``. Note that here ``str_key_len`` is the string length plus one (similar to how it is done 207 | in the ``zend_hash`` APIs). 208 | 209 | Honoring inheritance 210 | -------------------- 211 | 212 | Once again we need to consider what happens when the user extends the class and wants to change the iteration behavior. 213 | Right now he would have to reimplement the iteration mechanism manually, because the individual iteration handlers are 214 | not exposed to userland (only through foreach). 215 | 216 | As already with the object handlers we'll solve this by also implementing the normal ``Iterator`` interface. This time 217 | we won't need special handling to ensure that PHP actually calls the overridden methods: PHP will automatically use the 218 | fast internal handlers when the class is used directly, but will use the ``Iterator`` methods if the class is extended. 219 | 220 | In order to implement the ``Iterator`` methods we have to add a new ``size_t current_offset`` member to 221 | ``buffer_view_object``, which stores the current offset for the iteration methods (and is completely separate from the 222 | iteration state used by ``get_iterator``-style iterators). The methods itself are to the most part just argument 223 | checking boilerplate:: 224 | 225 | PHP_FUNCTION(array_buffer_view_rewind) 226 | { 227 | buffer_view_object *intern; 228 | 229 | if (zend_parse_parameters_none() == FAILURE) { 230 | return; 231 | } 232 | 233 | intern = zend_object_store_get_object(getThis() TSRMLS_CC); 234 | intern->current_offset = 0; 235 | } 236 | 237 | PHP_FUNCTION(array_buffer_view_next) 238 | { 239 | buffer_view_object *intern; 240 | 241 | if (zend_parse_parameters_none() == FAILURE) { 242 | return; 243 | } 244 | 245 | intern = zend_object_store_get_object(getThis() TSRMLS_CC); 246 | intern->current_offset++; 247 | } 248 | 249 | PHP_FUNCTION(array_buffer_view_valid) 250 | { 251 | buffer_view_object *intern; 252 | 253 | if (zend_parse_parameters_none() == FAILURE) { 254 | return; 255 | } 256 | 257 | intern = zend_object_store_get_object(getThis() TSRMLS_CC); 258 | RETURN_BOOL(intern->current_offset < intern->length); 259 | } 260 | 261 | PHP_FUNCTION(array_buffer_view_key) 262 | { 263 | buffer_view_object *intern; 264 | 265 | if (zend_parse_parameters_none() == FAILURE) { 266 | return; 267 | } 268 | 269 | intern = zend_object_store_get_object(getThis() TSRMLS_CC); 270 | RETURN_LONG((long) intern->current_offset); 271 | } 272 | 273 | PHP_FUNCTION(array_buffer_view_current) 274 | { 275 | buffer_view_object *intern; 276 | zval *value; 277 | 278 | if (zend_parse_parameters_none() == FAILURE) { 279 | return; 280 | } 281 | 282 | intern = zend_object_store_get_object(getThis() TSRMLS_CC); 283 | value = buffer_view_offset_get(intern, intern->current_offset); 284 | RETURN_ZVAL(value, 1, 1); 285 | } 286 | 287 | /* ... */ 288 | 289 | ZEND_BEGIN_ARG_INFO_EX(arginfo_buffer_view_void, 0, 0, 0) 290 | ZEND_END_ARG_INFO() 291 | 292 | /* ... */ 293 | 294 | PHP_ME_MAPPING(rewind, array_buffer_view_rewind, arginfo_buffer_view_void, ZEND_ACC_PUBLIC) 295 | PHP_ME_MAPPING(next, array_buffer_view_next, arginfo_buffer_view_void, ZEND_ACC_PUBLIC) 296 | PHP_ME_MAPPING(valid, array_buffer_view_valid, arginfo_buffer_view_void, ZEND_ACC_PUBLIC) 297 | PHP_ME_MAPPING(key, array_buffer_view_key, arginfo_buffer_view_void, ZEND_ACC_PUBLIC) 298 | PHP_ME_MAPPING(current, array_buffer_view_current, arginfo_buffer_view_void, ZEND_ACC_PUBLIC) 299 | 300 | Obviously we now should also implement ``Iterator`` rather than ``Traversable``:: 301 | 302 | #define DEFINE_ARRAY_BUFFER_VIEW_CLASS(class_name, type) \ 303 | INIT_CLASS_ENTRY(tmp_ce, #class_name, array_buffer_view_functions); \ 304 | type##_array_ce = zend_register_internal_class(&tmp_ce TSRMLS_CC); \ 305 | type##_array_ce->create_object = array_buffer_view_create_object; \ 306 | type##_array_ce->get_iterator = buffer_view_get_iterator; \ 307 | type##_array_ce->iterator_funcs.funcs = &buffer_view_iterator_funcs; \ 308 | zend_class_implements(type##_array_ce TSRMLS_CC, 2, \ 309 | zend_ce_arrayaccess, zend_ce_iterator); 310 | 311 | One last consideration regarding this: In general it is always better to implement ``IteratorAggregate`` rather than 312 | ``Iterator``, because ``IteratorAggregate`` decouples the iterator state from the main object. This is obviously simply 313 | better design, but also allows things like independent nested iteration. I still chose to implement ``Iterator`` here, 314 | because aggregates have a higher implementational overhead (as they require a separate class that has to interact with 315 | an independent object). -------------------------------------------------------------------------------- /Book/php5/classes_objects/magic_interfaces_comparable.rst: -------------------------------------------------------------------------------- 1 | Magic interfaces - Comparable 2 | ============================= 3 | 4 | Internal interfaces in PHP are very similar to their userland equivalents. The only notable difference is that internal 5 | interfaces have the additional possibility of specifying a handler that is executed when the interface is implemented. 6 | This feature can be used for various purposes like enforcing additional constraints or replacing handlers. We'll make 7 | use of it to implement a "magic" ``Comparable`` interface, which exposes the internal ``compare_objects`` handler to 8 | userland. 9 | 10 | The interface itself will look as follows: 11 | 12 | .. code-block:: php 13 | 14 | interface Comparable { 15 | static function compare($left, $right); 16 | } 17 | 18 | First, let's register this new interface in ``MINIT``:: 19 | 20 | zend_class_entry *comparable_ce; 21 | 22 | ZEND_BEGIN_ARG_INFO_EX(arginfo_comparable, 0, 0, 2) 23 | ZEND_ARG_INFO(0, obj1) 24 | ZEND_ARG_INFO(0, obj2) 25 | ZEND_END_ARG_INFO() 26 | 27 | const zend_function_entry comparable_functions[] = { 28 | ZEND_FENTRY( 29 | compare, NULL, arginfo_comparable, ZEND_ACC_PUBLIC|ZEND_ACC_ABSTRACT|ZEND_ACC_STATIC 30 | ) 31 | PHP_FE_END 32 | }; 33 | 34 | PHP_MINIT_FUNCTION(comparable) 35 | { 36 | zend_class_entry tmp_ce; 37 | INIT_CLASS_ENTRY(tmp_ce, "Comparable", comparable_functions); 38 | comparable_ce = zend_register_internal_interface(&tmp_ce TSRMLS_CC); 39 | 40 | return SUCCESS; 41 | } 42 | 43 | Note that in this case we can't use ``PHP_ABSTRACT_ME``, because it does not support static abstract methods. Instead 44 | we have to use the low-level ``ZEND_FENTRY`` macro. 45 | 46 | Next we implement the ``interface_gets_implemented`` handler:: 47 | 48 | static int implement_comparable(zend_class_entry *interface, zend_class_entry *ce TSRMLS_DC) 49 | { 50 | if (ce->create_object != NULL) { 51 | zend_error(E_ERROR, "Comparable interface can only be used on userland classes"); 52 | } 53 | 54 | ce->create_object = comparable_create_object_override; 55 | 56 | return SUCCESS; 57 | } 58 | 59 | // in MINIT 60 | comparable_ce->interface_gets_implemented = implement_comparable; 61 | 62 | When the interface is implemented the ``implement_comparable`` function will be called. In this function we override the 63 | classes ``create_object`` handler. To simplify things we only allow the interface to be used when ``create_object`` 64 | was ``NULL`` previously (i.e. it is a "normal" userland class). We could obviously also make this work with arbitrary 65 | classes by backing up the old ``create_object`` handler somewhere. 66 | 67 | In our ``create_object`` override we create the object as usual but assign our own handlers structure with a custom 68 | ``compare_objects`` handler:: 69 | 70 | static zend_object_handlers comparable_handlers; 71 | 72 | static zend_object_value comparable_create_object_override(zend_class_entry *ce TSRMLS_DC) 73 | { 74 | zend_object *object; 75 | zend_object_value retval; 76 | 77 | retval = zend_objects_new(&object, ce TSRMLS_CC); 78 | object_properties_init(object, ce); 79 | 80 | retval.handlers = &comparable_handlers; 81 | 82 | return retval; 83 | } 84 | 85 | // In MINIT 86 | memcpy(&comparable_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers)); 87 | comparable_handlers.compare_objects = comparable_compare_objects; 88 | 89 | Lastly we have to implement the custom comparison handler. It will call the ``compare`` method using the 90 | ``zend_call_method_with_2_params`` macro, which is defined in ``zend_interfaces.h``. One question that arises is which 91 | class the method should be called on. For this implementation we'll simply use the first passed object, though this is 92 | just an arbitrary choice. In practice this means that for ``$left < $right`` the class of ``$left`` will be used, but 93 | for ``$left > $right`` the class of ``$right`` is used (because PHP transforms the ``>`` to a ``<`` operation). 94 | 95 | :: 96 | 97 | #include "zend_interfaces.h" 98 | 99 | static int comparable_compare_objects(zval *obj1, zval *obj2 TSRMLS_DC) 100 | { 101 | zval *retval = NULL; 102 | int result; 103 | 104 | zend_call_method_with_2_params(NULL, Z_OBJCE_P(obj1), NULL, "compare", &retval, obj1, obj2); 105 | 106 | if (!retval || Z_TYPE_P(retval) == IS_NULL) { 107 | if (retval) { 108 | zval_ptr_dtor(&retval); 109 | } 110 | return zend_get_std_object_handlers()->compare_objects(obj1, obj2 TSRMLS_CC); 111 | } 112 | 113 | convert_to_long_ex(&retval); 114 | result = ZEND_NORMALIZE_BOOL(Z_LVAL_P(retval)); 115 | zval_ptr_dtor(&retval); 116 | 117 | return result; 118 | } 119 | 120 | The ``ZEND_NORMALIZE_BOOL`` macro used above normalizes the returned integer to ``-1``, ``0`` and ``1``. 121 | 122 | And that's all it takes. Now we can try out the new interface (sorry if the example doesn't make particularly much 123 | sense): 124 | 125 | .. code-block:: php 126 | 127 | class Point implements Comparable { 128 | protected $x, $y, $z; 129 | 130 | public function __construct($x, $y, $z) { 131 | $this->x = $x; $this->y = $y; $this->z = $z; 132 | } 133 | 134 | /* We assume a point is smaller/greater if all its components are smaller/greater */ 135 | public static function compare($p1, $p2) { 136 | if ($p1->x == $p2->x && $p1->y == $p2->y && $p1->z == $p2->z) { 137 | return 0; 138 | } 139 | 140 | if ($p1->x < $p2->x && $p1->y < $p2->y && $p1->z < $p2->z) { 141 | return -1; 142 | } 143 | 144 | if ($p1->x > $p2->x && $p1->y > $p2->y && $p1->z > $p2->z) { 145 | return 1; 146 | } 147 | 148 | // not comparable 149 | return 1; 150 | } 151 | } 152 | 153 | $p1 = new Point(1, 1, 1); 154 | $p2 = new Point(2, 2, 2); 155 | $p3 = new Point(1, 0, 2); 156 | 157 | var_dump($p1 < $p2, $p1 > $p2, $p1 == $p2); // true, false, false 158 | 159 | var_dump($p1 == $p1); // true 160 | 161 | var_dump($p1 < $p3, $p1 > $p3, $p1 == $p3); // false, false, false 162 | 163 | -------------------------------------------------------------------------------- /Book/php5/hashtables.rst: -------------------------------------------------------------------------------- 1 | Hashtables 2 | ========== 3 | 4 | Hashtables are one of the most important structures used by PHP. They form the basis for arrays, object properties, 5 | symbol tables and have countless other applications throughout the engine. This chapter will introduce how hashtables 6 | work and cover the related APIs in detail. 7 | 8 | Contents: 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | 13 | hashtables/basic_structure.rst 14 | hashtables/hashtable_api.rst 15 | hashtables/array_api.rst 16 | hashtables/hash_algorithm.rst 17 | -------------------------------------------------------------------------------- /Book/php5/hashtables/array_api.rst: -------------------------------------------------------------------------------- 1 | Symtable and array API 2 | ====================== 3 | 4 | The hashtable API allows you to work with values of any type, but in the vast majority of cases the values will be 5 | zvals. Using the ``zend_hash`` API with zvals can often be somewhat cumbersome, as you need to handle zval allocation 6 | and initialization yourself. This is why PHP provides a second set of APIs specifically aimed at this use case. Before 7 | introducing these simplified APIs we will have a look at a special kind of hashtable that PHP arrays make use of. 8 | 9 | Symtables 10 | --------- 11 | 12 | One of the core concepts behind the design of PHP is that integers and strings containing integers should be 13 | interchangeable. This also applies to arrays where the keys ``42`` and ``"42"`` should be considered the same. This is 14 | not the case though with ordinary hashtables: They strictly distinguish the key types and it's okay to have both the 15 | key ``42`` and ``"42"`` in the same table (with different values). 16 | 17 | This is why there is an additional *symtable* (symbol table) API, which is a thin wrapper around some hashtable 18 | functions which converts integral string keys to actual integer keys. For example, this is how the 19 | ``zend_symtable_find()`` function is defined:: 20 | 21 | static inline int zend_symtable_find( 22 | HashTable *ht, const char *arKey, uint nKeyLength, void **pData 23 | ) { 24 | ZEND_HANDLE_NUMERIC(arKey, nKeyLength, zend_hash_index_find(ht, idx, pData)); 25 | return zend_hash_find(ht, arKey, nKeyLength, pData); 26 | } 27 | 28 | The implementation of the ``ZEND_HANDLE_NUMERIC()`` macro will not be considered in detail here, only the functionality 29 | behind it is important: If ``arKey`` contains a decimal integer between ``LONG_MIN`` and ``LONG_MAX``, then that 30 | integer is written into ``idx`` and ``zend_hash_index_find()`` is called with it. In all other cases the code will 31 | continue to the next line, where ``zend_hash_find()`` will be invoked. 32 | 33 | Apart from ``zend_symtable_find()`` the following functions are part of the symtable API, again with the same behavior 34 | as their hashtable counterparts, but including string to integer normalization:: 35 | 36 | static inline int zend_symtable_exists(HashTable *ht, const char *arKey, uint nKeyLength); 37 | static inline int zend_symtable_del(HashTable *ht, const char *arKey, uint nKeyLength); 38 | static inline int zend_symtable_update( 39 | HashTable *ht, const char *arKey, uint nKeyLength, void *pData, uint nDataSize, void **pDest 40 | ); 41 | static inline int zend_symtable_update_current_key_ex( 42 | HashTable *ht, const char *arKey, uint nKeyLength, int mode, HashPosition *pos 43 | ); 44 | 45 | Additionally there are two macros for creating symtables:: 46 | 47 | #define ZEND_INIT_SYMTABLE_EX(ht, n, persistent) \ 48 | zend_hash_init(ht, n, NULL, ZVAL_PTR_DTOR, persistent) 49 | 50 | #define ZEND_INIT_SYMTABLE(ht) \ 51 | ZEND_INIT_SYMTABLE_EX(ht, 2, 0) 52 | 53 | As you can see these macros are just ``zend_hash_init()`` calls using ``ZVAL_PTR_DTOR`` as the destructor. As such 54 | these macros are not directly related to the string to integer casting behavior described above. 55 | 56 | Let's give this new set of functions a try:: 57 | 58 | HashTable *myht; 59 | zval *zv1, *zv2; 60 | zval **zv_dest; 61 | 62 | ALLOC_HASHTABLE(myht); 63 | ZEND_INIT_SYMTABLE(myht); 64 | 65 | MAKE_STD_ZVAL(zv1); 66 | ZVAL_STRING(zv1, "zv1", 1); 67 | 68 | MAKE_STD_ZVAL(zv2); 69 | ZVAL_STRING(zv2, "zv2", 1); 70 | 71 | zend_hash_index_update(myht, 42, &zv1, sizeof(zval *), NULL); 72 | zend_symtable_update(myht, "42", sizeof("42"), &zv2, sizeof(zval *), NULL); 73 | 74 | if (zend_hash_index_find(myht, 42, (void **) &zv_dest) == SUCCESS) { 75 | php_printf("Value at key 42 is %Z\n", *zv_dest); 76 | } 77 | 78 | if (zend_symtable_find(myht, "42", sizeof("42"), (void **) &zv_dest) == SUCCESS) { 79 | php_printf("Value at key \"42\" is %Z\n", *zv_dest); 80 | } 81 | 82 | zend_hash_destroy(myht); 83 | FREE_HASHTABLE(myht); 84 | 85 | This code will print: 86 | 87 | .. code-block:: none 88 | 89 | Value at key 42 is zv2 90 | Value at key "42" is zv2 91 | 92 | Thus both ``update`` calls wrote to the same element (the second one overwriting the first one) and both ``find`` calls 93 | also found the same element. 94 | 95 | Array API 96 | --------- 97 | 98 | Now we have all the prerequisites to look at the array API. This API no longer works directly on hashtables, but rather 99 | accepts zvals from which the hashtable is extracted using ``Z_ARRVAL_P()``. 100 | 101 | The first two functions from this API are ``array_init()`` and ``array_init_size()``, which initialize a hashtable 102 | into a zval. The former function takes only the target zval, whereas the latter takes an additional size hint:: 103 | 104 | /* Create empty array into return_value */ 105 | array_init(return_value); 106 | 107 | /* Create empty array with expected size 1000000 into return_value */ 108 | array_init_size(return_value, 1000000); 109 | 110 | The remaining functions of this API all deal with inserting values into an array. There are four families of functions 111 | which look as follows:: 112 | 113 | /* Insert at next index */ 114 | int add_next_index_*(zval *arg, ...); 115 | /* Insert at specific index */ 116 | int add_index_*(zval *arg, ulong idx, ...); 117 | /* Insert at specific key */ 118 | int add_assoc_*(zval *arg, const char *key, ...); 119 | /* Insert at specific key of length key_len (for binary safety) */ 120 | int add_assoc_*_ex(zval *arg, const char *key, uint key_len, ...); 121 | 122 | Here ``*`` is a placeholder for a type and ``...`` a placeholder for the type-specific arguments. The valid values for 123 | them are listed in the following table: 124 | 125 | .. list-table:: 126 | :header-rows: 1 127 | :widths: 8 20 128 | 129 | * - Type 130 | - Additional arguments 131 | * - ``null`` 132 | - none 133 | * - ``bool`` 134 | - ``int b`` 135 | * - ``long`` 136 | - ``long n`` 137 | * - ``double`` 138 | - ``double d`` 139 | * - ``string`` 140 | - ``const char *str, int duplicate`` 141 | * - ``stringl`` 142 | - ``const char *str, uint length, int duplicate`` 143 | * - ``resource`` 144 | - ``int r`` 145 | * - ``zval`` 146 | - ``zval *value`` 147 | 148 | As an example for the usage of these functions, let's just create a dummy array with elements of various types:: 149 | 150 | PHP_FUNCTION(make_array) { 151 | zval *zv; 152 | 153 | array_init(return_value); 154 | 155 | add_index_long(return_value, 10, 100); 156 | add_index_double(return_value, 20, 3.141); 157 | add_index_string(return_value, 30, "foo", 1); 158 | 159 | add_next_index_bool(return_value, 1); 160 | add_next_index_stringl(return_value, "\0bar", sizeof("\0bar")-1, 1); 161 | 162 | add_assoc_null(return_value, "foo"); 163 | add_assoc_long(return_value, "bar", 42); 164 | 165 | add_assoc_double_ex(return_value, "\0bar", sizeof("\0bar"), 1.61); 166 | 167 | /* For some things you still have to manually create a zval... */ 168 | MAKE_STD_ZVAL(zv); 169 | object_init(zv); 170 | add_next_index_zval(return_value, zv); 171 | } 172 | 173 | The ``var_dump()`` output of this array looks as follows (with NUL-bytes replaced by ``\0``): 174 | 175 | .. code-block:: none 176 | 177 | array(9) { 178 | [10]=> 179 | int(100) 180 | [20]=> 181 | float(3.141) 182 | [30]=> 183 | string(3) "foo" 184 | [31]=> 185 | bool(true) 186 | [32]=> 187 | string(4) "\0bar" 188 | ["foo"]=> 189 | NULL 190 | ["bar"]=> 191 | int(42) 192 | ["\0bar"]=> 193 | float(1.61) 194 | [33]=> 195 | object(stdClass)#1 (0) { 196 | } 197 | } 198 | 199 | Looking at the above code you may notice that the array API is even more inconsistent in regard to string lengths: The 200 | key length passed to the ``_ex`` functions *includes* the terminating NUL-byte, whereas the string length passed to the 201 | ``stringl`` functions *excludes* the NUL-byte. 202 | 203 | Furthermore it should be noted that while these functions start with ``add`` they behave like ``update`` functions in 204 | that they overwrite previously existing keys. 205 | 206 | There are several additional ``add_get`` functions which both insert a value and fetch it again (analogous to the last 207 | parameter of the ``zend_hash_update`` functions). As they are virtually never used they will not be discussed here and 208 | are mentioned only for the sake of completeness. 209 | 210 | This concludes our walk through the hashtable, symtable and array APIs. -------------------------------------------------------------------------------- /Book/php5/hashtables/basic_structure.rst: -------------------------------------------------------------------------------- 1 | Basic structure 2 | =============== 3 | 4 | Basic concepts 5 | -------------- 6 | 7 | Arrays in C are just regions of memory that can be accessed by offset. This implies that keys have to be integers and 8 | need to be continuous. For example, if you have the keys 0, 1 and 2, then the next key has to be 3 and can't be 9 | 214678462. PHP arrays are very different: They support both string keys and non-continuous integer keys and even allow 10 | mixing both. 11 | 12 | To implement such a structure in C there are two approaches: The first is using a binary search tree, where both lookup 13 | and insert have complexity ``O(log n)`` (where ``n`` is the number of elements in the table). The second is a hashtable, 14 | which has an average lookup/insert complexity of ``O(1)``, i.e. elements can be inserted and retrieved in constant time. 15 | As such hashtables are preferable in most cases and are also the technique that PHP uses. 16 | 17 | The idea behind a hashtable is very simple: A complex key value (like a string) is converted into an integer using a 18 | hash function. This integer can then be used as an offset into a normal C array. The issue is that the number of 19 | integers (``2^32`` or ``2^64``) is much smaller than the number of strings (of which there are infinitely many). As such 20 | the hash function will have collisions, i.e. cases where two strings have the same hash value. 21 | 22 | As such some kind of collision resolution has to take place. There are basically two solutions to this problem, the 23 | first being *open addressing* (which is not covered here). The second one is *chaining* and is employed by PHP. This 24 | method simply stores all elements having the same hash in a linked list. When a key is looked up PHP will calculate the 25 | hash and then go through the linked list of "possible" values until it finds the matching entry. Here is an 26 | illustration of chaining collision resolution: 27 | 28 | .. image:: ./images/basic_hashtable.* 29 | :align: center 30 | :height: 265 31 | 32 | The elements of the linked list are called ``Bucket``\s and the C array containing the heads of the linked lists is 33 | called ``arBuckets``. 34 | 35 | Consider how you would delete an element from such a structure: Say you have a pointer to the bucket of ``"c"`` and want 36 | to remove it. To do this you'd have to set the pointer coming from ``"a"`` to ``NULL``. Thus you need to retrieve the 37 | bucket of ``"a"`` which you can do either by traversing the linked list for the hash value or by additionally storing 38 | pointers in the reverse direction. The latter is what PHP does: Every bucket contains both a pointer to the next bucket 39 | (``pNext``) and the previous bucket (``pLast``). This is illustrated in the following graphic: 40 | 41 | .. image:: ./images/doubly_linked_hashtable.* 42 | :align: center 43 | :height: 250 44 | 45 | Furthermore PHP hashtables are *ordered*: If you traverse an array you'll get the elements in same order in which you 46 | inserted them. To support this the buckets have to be part of another linked list which specifies the order. This is 47 | once again a doubly linked list, for the same reasons as outlined above (and to support traversation in reverse order). 48 | The forward pointers are stored in ``pListNext``, the backward pointers in ``pListLast``. Additionally the hashtable 49 | structure has a pointer to the start of the list (``pListHead``) and the end of the list (``pListLast``). Here is an 50 | example of how this linked list could look like for the elements ``"a"``, ``"b"``, ``"c"`` (in that order): 51 | 52 | .. image:: ./images/ordered_hashtable.* 53 | :align: center 54 | :height: 250 55 | 56 | The HashTable and Bucket structures 57 | ----------------------------------- 58 | 59 | To implement hashtables PHP uses two structures, which can be found in the ``zend_hash.h`` file. We'll first have a look 60 | at the ``Bucket`` struct:: 61 | 62 | typedef struct bucket { 63 | ulong h; 64 | uint nKeyLength; 65 | void *pData; 66 | void *pDataPtr; 67 | struct bucket *pListNext; 68 | struct bucket *pListLast; 69 | struct bucket *pNext; 70 | struct bucket *pLast; 71 | char *arKey; 72 | } Bucket; 73 | 74 | You already know what the ``pNext``, ``pLast``, ``pListNext`` and ``pListLast`` pointers are for. Let's quickly go 75 | through the remaining members: 76 | 77 | ``h`` is the hash of the key. If the key is an integer, then ``h`` will be that integer (for integers the hash function 78 | doesn't do anything) and ``nKeyLength`` will be 0. For string keys ``h`` will be the result of ``zend_hash_func()``, 79 | ``arKey`` will hold the string and ``nKeyLength`` its length. 80 | 81 | ``pData`` is a pointer to the stored value. The stored value will not be the same as the one passed to the insertion 82 | function, rather it will be a copy of it (which is allocated separately from the bucket). As this would be very 83 | inefficient when the stored value is a pointer PHP employs a small trick: Instead of storing the pointer in a separate 84 | allocation it is put into the ``pDataPtr`` member. ``pData`` then points to that member (``pData = &pDataPtr``). 85 | 86 | Let's have a look at the main ``HashTable`` struct now:: 87 | 88 | typedef struct _hashtable { 89 | uint nTableSize; 90 | uint nTableMask; 91 | uint nNumOfElements; 92 | ulong nNextFreeElement; 93 | Bucket *pInternalPointer; 94 | Bucket *pListHead; 95 | Bucket *pListTail; 96 | Bucket **arBuckets; 97 | dtor_func_t pDestructor; 98 | zend_bool persistent; 99 | unsigned char nApplyCount; 100 | zend_bool bApplyProtection; 101 | #if ZEND_DEBUG 102 | int inconsistent; 103 | #endif 104 | } HashTable; 105 | 106 | You already know that ``arBuckets`` is the C array that contains the linked bucket lists and is indexed by the hash of 107 | the key. As PHP arrays don't have a fixed size ``arBuckets`` has to be dynamically resized when the number of elements 108 | in the table (``nNumOfElements``) surpasses the current size of the ``arBuckets`` allocation (``nTableSize``). We could 109 | of course store more than ``nTableSize`` elements in the hashtable, but this would increase the number of collisions 110 | and thus degrade performance. 111 | 112 | ``nTableSize`` is always a power of two, so if you have 12 elements in a hashtable the actual table size will be 16. 113 | Note though that while the ``arBuckets`` array automatically grows, it will *not* shrink when you remove elements. If 114 | you first insert 1000000 elements into the hashtable and then remove all of them again the ``nTableSize`` will still 115 | be 1048576. 116 | 117 | The result of the hash function is a ``ulong``, but the ``nTableSize`` will usually be a lot smaller than that. Thus 118 | the hash can not be directly used to index into the ``arBuckets`` array. Instead ``nIndex = h % nTableSize`` is used. 119 | As the table size is always a power of two this expression is equivalent to ``nIndex = h & (nTableSize - 1)``. To see 120 | why let's see how ``nTableSize - 1`` changes the value: 121 | 122 | .. code-block:: none 123 | 124 | nTableSize = 128 = 0b00000000.00000000.00000000.10000000 125 | nTableSize - 1 = 127 = 0b00000000.00000000.00000000.01111111 126 | 127 | ``nTableSize - 1`` has all bits below the table size set. Thus doing ``h & (nTableSize - 1)`` will only keep the bits 128 | of the hash that are lower than ``nTableSize``, which is the same thing ``h % nTableSize`` does. 129 | 130 | The value ``nTableSize - 1`` is called the table mask and stored in the ``nTableMask`` member. Using a masking operation 131 | instead of a modulus is just a performance optimization. 132 | 133 | The ``nNextFreeElement`` member specifies the next integer key that will be used when you insert an element using 134 | ``$array[] = $value``. It will be one larger than the largest integer key that was ever used in this hashtable. 135 | 136 | You already know the role of the ``pListHead`` and ``pListTail`` pointers (they are the head/tail of the doubly linked 137 | list specifying the order). The ``pInternalPointer`` is used for iteration and points to the "current" bucket. 138 | 139 | When an item is deleted from the hashtable a destruction function can be called for it, which is stored in the 140 | ``pDestructor`` member. For example, if you are storing ``zval *`` items in the hashtable, you will probably want 141 | ``zval_ptr_dtor`` to be called when an element is removed. 142 | 143 | The ``persistent`` flag specified whether the buckets (and their values) should use persistent allocation. For most 144 | use cases this will be ``0`` as the hashtable is not supposed to live longer than one request. The ``bApplyProtection`` 145 | flag specifies whether the hashtable should use recursion protection (defaults to 1). Recursion protection will throw 146 | an error if the recursion depth (stored in ``nApplyCount``) reaches a certain level. The protection is used for 147 | hashtable comparisons and for the ``zend_hash_apply`` functions. 148 | 149 | The last member ``inconsistent`` is only used in debug builds and stores information on the current state of the 150 | hashtable. This is used to throw errors for some incorrect usages of the hashtable, e.g. if you access a hashtable that 151 | is in the process of being destroyed. 152 | -------------------------------------------------------------------------------- /Book/php5/hashtables/hash_algorithm.rst: -------------------------------------------------------------------------------- 1 | Hash algorithm and collisions 2 | ============================= 3 | 4 | In this final section on hashtables, we'll have a closer look at worst-case collision scenarios and some properties of 5 | the hashing function that PHP employs. While this knowledge is not necessary for the usage of the hashtable APIs it 6 | should give you a better understanding of the hashtable structure and its limitations. 7 | 8 | Analyzing collisions 9 | -------------------- 10 | 11 | In order to simplify collision analysis, let's first write a helper function ``array_collision_info()`` which will 12 | take an array and tell us which keys collide into which index. In order to do so we'll go through the ``arBuckets`` and 13 | for every index create an array that contains some information about all buckets at that index:: 14 | 15 | PHP_FUNCTION(array_collision_info) { 16 | HashTable *hash; 17 | zend_uint i; 18 | 19 | if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_DC, "h", &hash) == FAILURE) { 20 | return; 21 | } 22 | 23 | array_init(return_value); 24 | 25 | /* Empty hashtables may not yet be initialized */ 26 | if (hash->nNumOfElements == 0) { 27 | return; 28 | } 29 | 30 | for (i = 0; i < hash->nTableSize; ++i) { 31 | /* Create array of elements at this nIndex */ 32 | zval *elements; 33 | Bucket *bucket; 34 | 35 | MAKE_STD_ZVAL(elements); 36 | array_init(elements); 37 | add_next_index_zval(return_value, elements); 38 | 39 | bucket = hash->arBuckets[i]; 40 | while (bucket != NULL) { 41 | zval *element; 42 | 43 | MAKE_STD_ZVAL(element); 44 | array_init(element); 45 | add_next_index_zval(elements, element); 46 | 47 | add_assoc_long(element, "hash", bucket->h); 48 | 49 | if (bucket->nKeyLength == 0) { 50 | add_assoc_long(element, "key", bucket->h); 51 | } else { 52 | add_assoc_stringl( 53 | element, "key", (char *) bucket->arKey, bucket->nKeyLength - 1, 1 54 | ); 55 | } 56 | 57 | { 58 | zval **data = (zval **) bucket->pData; 59 | Z_ADDREF_PP(data); 60 | add_assoc_zval(element, "value", *data); 61 | } 62 | 63 | bucket = bucket->pNext; 64 | } 65 | } 66 | } 67 | 68 | The code is also a nice usage example for the ``add_`` functions from the previous section. Let's try the function out:: 69 | 70 | var_dump(array_collision_info([2 => 0, 5 => 1, 10 => 2])); 71 | 72 | // Output (reformatted a bit): 73 | 74 | array(8) { 75 | [0] => array(0) {} 76 | [1] => array(0) {} 77 | [2] => array(2) { 78 | [0] => array(3) { 79 | ["hash"] => int(10) 80 | ["key"] => int(10) 81 | ["value"] => int(2) 82 | } 83 | [1] => array(3) { 84 | ["hash"] => int(2) 85 | ["key"] => int(2) 86 | ["value"] => int(0) 87 | } 88 | } 89 | [3] => array(0) {} 90 | [4] => array(0) {} 91 | [5] => array(1) { 92 | [0] => array(3) { 93 | ["hash"] => int(5) 94 | ["key"] => int(5) 95 | ["value"] => int(1) 96 | } 97 | } 98 | [6] => array(0) {} 99 | [7] => array(0) {} 100 | } 101 | 102 | There are several things you can see from this output (most of which you should already be aware of): 103 | 104 | * The outer array has 8 elements, even though only 3 were inserted. This is because 8 is the default initial table 105 | size. 106 | * For integers the hash and the key are always the same. 107 | * Even though the hashes are all different, we still have a collision at ``nIndex == 2`` because 2 % 8 is 2, but 108 | 10 % 8 is also 2. 109 | * The linked collision resolution lists contain the elements in reverse order of insertion. (This is the easiest way 110 | to implement it.) 111 | 112 | Index collisions 113 | ---------------- 114 | 115 | The goal now is to create a worst-case collision scenario where *all* hash keys collide. There are two ways to 116 | accomplish this and we'll start with the easier one: Rather than creating collisions in the hash function, we'll 117 | create the collisions in the index (which is the hash modulo the table size). 118 | 119 | For integer keys this is particularly easy, because no real hashing operation is applied to them. The index will simply 120 | be ``key % nTableSize``. Finding collisions for this expression is trivial, e.g. any key that is a multiple of the 121 | table size will collide. If the table size if 8, then the indices will be 0 % 8 = 0, 8 % 8 = 0, 16 % 8 = 0, 24 % 8 = 0, 122 | etc. 123 | 124 | Here is a PHP script demonstrating this scenario: 125 | 126 | .. code-block:: php 127 | 128 | $size = pow(2, 16); // any power of 2 will do 129 | 130 | $startTime = microtime(true); 131 | 132 | // Insert keys [0, $size, 2 * $size, 3 * $size, ..., ($size - 1) * $size] 133 | 134 | $array = array(); 135 | for ($key = 0, $maxKey = ($size - 1) * $size; $key <= $maxKey; $key += $size) { 136 | $array[$key] = 0; 137 | } 138 | 139 | $endTime = microtime(true); 140 | 141 | printf("Inserted %d elements in %.2f seconds\n", $size, $endTime - $startTime); 142 | printf("There are %d collisions at index 0\n", count(array_collision_info($array)[0])); 143 | 144 | This is the output I get (the results will be different for your machine, but should have the same order of magnitude): 145 | 146 | .. code-block:: none 147 | 148 | Inserted 65536 elements in 34.05 seconds 149 | There are 65536 collisions at index 0 150 | 151 | Of course thirty seconds to insert a handful of elements is *very* slow. What happened? As we have constructed a 152 | scenario where all hash keys collide the performance of inserts degenerates from O(1) to O(n): On every insert PHP has 153 | to walk the collision list for the index in order to check whether an element with the same key already exists. Usually 154 | this is not a problem as the collision list contains only one or two buckets. In the degenerate case on the other hand 155 | *all* elements will be in that list. 156 | 157 | As such PHP has to perform n inserts with O(n) time, which gives a total execution time of O(n^2). Thus instead of doing 158 | 2^16 operations about 2^32 will have to be done. 159 | 160 | Hash collisions 161 | --------------- 162 | 163 | Now that we successfully created a worst-case scenario using index collisions, let's do the same using actual hash 164 | collisions. As this is not possible using integer keys, we'll have to take a look at PHP's string hashing function, 165 | which is defined as follows:: 166 | 167 | static inline ulong zend_inline_hash_func(const char *arKey, uint nKeyLength) 168 | { 169 | register ulong hash = 5381; 170 | 171 | /* variant with the hash unrolled eight times */ 172 | for (; nKeyLength >= 8; nKeyLength -= 8) { 173 | hash = ((hash << 5) + hash) + *arKey++; 174 | hash = ((hash << 5) + hash) + *arKey++; 175 | hash = ((hash << 5) + hash) + *arKey++; 176 | hash = ((hash << 5) + hash) + *arKey++; 177 | hash = ((hash << 5) + hash) + *arKey++; 178 | hash = ((hash << 5) + hash) + *arKey++; 179 | hash = ((hash << 5) + hash) + *arKey++; 180 | hash = ((hash << 5) + hash) + *arKey++; 181 | } 182 | switch (nKeyLength) { 183 | case 7: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 184 | case 6: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 185 | case 5: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 186 | case 4: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 187 | case 3: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 188 | case 2: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ 189 | case 1: hash = ((hash << 5) + hash) + *arKey++; break; 190 | case 0: break; 191 | EMPTY_SWITCH_DEFAULT_CASE() 192 | } 193 | return hash; 194 | } 195 | 196 | After removing the manual loop-unrolling the function will look like this:: 197 | 198 | static inline ulong zend_inline_hash_func(const char *arKey, uint nKeyLength) 199 | { 200 | register ulong hash = 5381; 201 | 202 | for (uint i = 0; i < nKeyLength; ++i) { 203 | hash = ((hash << 5) + hash) + arKey[i]; 204 | } 205 | 206 | return hash; 207 | } 208 | 209 | The ``hash << 5 + hash`` expression is the same as ``hash * 32 + hash`` or just ``hash * 33``. Using this we can further 210 | simplify the function:: 211 | 212 | static inline ulong zend_inline_hash_func(const char *arKey, uint nKeyLength) 213 | { 214 | register ulong hash = 5381; 215 | 216 | for (uint i = 0; i < nKeyLength; ++i) { 217 | hash = hash * 33 + arKey[i]; 218 | } 219 | 220 | return hash; 221 | } 222 | 223 | This hash function is called *DJBX33A*, which stands for "Daniel J. Bernstein, Times 33 with Addition". It is one of the 224 | simplest (and as such also one of the fastest) string hashing functions there is. 225 | 226 | Thanks to the simplicity of the hash function finding collisions is not hard. We'll start with two-character collisions, 227 | i.e. we are looking for two strings ``ab`` and ``cd``, which have the same hash: 228 | 229 | .. code-block:: none 230 | 231 | hash(ab) = hash(cd) 232 | <=> (5381 * 33 + a) * 33 + b = (5381 * 33 + c) * 33 + d 233 | <=> a * 33 + b = c * 33 + d 234 | <=> c = a + n 235 | d = b - 33 * n 236 | where n is an integer 237 | 238 | This tells us that we can get a collision by taking a two-char string, incrementing the first char by one and 239 | decrementing the second char by 33. Using this technique we can create groups of 8 strings which all collide. Here is 240 | an example of such a collision group: 241 | 242 | .. code-block:: php 243 | 244 | $array = [ 245 | "E" . chr(122) => 0, 246 | "F" . chr(89) => 1, 247 | "G" . chr(56) => 2, 248 | "H" . chr(23) => 3, 249 | "I" . chr(-10) => 4, 250 | "J" . chr(-43) => 5, 251 | "K" . chr(-76) => 6, 252 | "L" . chr(-109) => 7, 253 | ]; 254 | 255 | var_dump(array_collision_info($array)); 256 | 257 | The output shows that indeed all the keys collide with hash ``193456164``:: 258 | 259 | array(8) { 260 | [0] => array(0) {} 261 | [1] => array(0) {} 262 | [2] => array(0) {} 263 | [3] => array(0) {} 264 | [4] => array(8) { 265 | [0] => array(3) { 266 | ["hash"] => int(193456164) 267 | ["key"] => string(2) "L\x93" 268 | ["value"] => int(7) 269 | } 270 | [1] => array(3) { 271 | ["hash"] => int(193456164) 272 | ["key"] => string(2) "K´" 273 | ["value"] => int(6) 274 | } 275 | [2] => array(3) { 276 | ["hash"] => int(193456164) 277 | ["key"] => string(2) "JÕ" 278 | ["value"] => int(5) 279 | } 280 | [3] => array(3) { 281 | ["hash"] => int(193456164) 282 | ["key"] => string(2) "Iö" 283 | ["value"] => int(4) 284 | } 285 | [4] => array(3) { 286 | ["hash"] => int(193456164) 287 | ["key"] => string(2) "H\x17" 288 | ["value"] => int(3) 289 | } 290 | [5] => array(3) { 291 | ["hash"] => int(193456164) 292 | ["key"] => string(2) "G8" 293 | ["value"] => int(2) 294 | } 295 | [6] => array(3) { 296 | ["hash"] => int(193456164) 297 | ["key"] => string(2) "FY" 298 | ["value"] => int(1) 299 | } 300 | [7] => array(3) { 301 | ["hash"] => int(193456164) 302 | ["key"] => string(2) "Ez" 303 | ["value"] => int(0) 304 | } 305 | } 306 | [5] => array(0) {} 307 | [6] => array(0) {} 308 | [7] => array(0) {} 309 | } 310 | 311 | Once we got one collision group, constructing more collisions is even easier. To do so we make use of the following 312 | property of DJBX33A: If two equal-length strings ``$str1`` and ``$str2`` collide, then ``$prefix.$str1.$postfix`` and 313 | ``$prefix.$str2.$postfix`` will collide as well. It's easy to prove that this is indeed true: 314 | 315 | .. code-block:: none 316 | 317 | hash(prefix . str1 . postfix) 318 | = hash(prefix) * 33^a + hash(str1) * 33^b + hash(postfix) 319 | = hash(prefix) * 33^a + hash(str2) * 33^b + hash(postfix) 320 | = hash(prefix . str2 . postfix) 321 | 322 | where a = strlen(str1 . postfix) and b = strlen(postfix) 323 | 324 | Thus, if ``Ez`` and ``FY`` collide, so will ``abcEzefg`` and ``abcFYefg``. This is also the reason why we could ignore 325 | the trailing NUL-byte that is also part of the hash in the previous considerations: It would result in a different hash, 326 | but the collisions would still be present. 327 | 328 | Using this property large sets of collisions can be created by taking a known set of collisions and concatenating them 329 | in every possible way. E.g. if we know that ``Ez`` and ``FY`` collide, then we also know that all of ``EzEzEz``, 330 | ``EzEzFY``, ``EzFYEz``, ``EzFYFY``, ``FYEzEz``, ``FYEzFY``, ``FYFYEz`` and ``FYFYFY`` will collide. With this method we 331 | can create arbitrarily large sets of collisions. -------------------------------------------------------------------------------- /Book/php5/introduction.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | This book is a collaborative effort between several PHP developers to better document and describe how PHP works 5 | internally. 6 | 7 | There are three primary goals of this book: 8 | 9 | * Document and describe how PHP internals work. 10 | * Document and describe how to extend the language with extensions. 11 | * Document and describe how you can interact with the community to develop PHP itself. 12 | 13 | This book is primarily targeted at developers who have experience in the C programming language. However, where-ever 14 | possible we will attempt to distill the information and summarize it so that developers who don't know C well, will 15 | still be able to understand the content. 16 | 17 | .. note:: Some of the introductory chapters have not been written yet (in particular the ones on creating basic 18 | extensions and declaring functions), so if you're completely new to PHP extension development you'll have to wait 19 | until the remaining introductory chapters are published or start off with other 20 | `resources on the topic `_. 21 | 22 | The repository for this book is available on GitHub_. Please report issues and provide feedback on the `issue tracker`_. 23 | 24 | .. _GitHub: https://github.com/phpinternalsbook/PHP-Internals-Book 25 | .. _issue tracker: https://github.com/phpinternalsbook/PHP-Internals-Book/issues -------------------------------------------------------------------------------- /Book/php5/zvals.rst: -------------------------------------------------------------------------------- 1 | Zvals 2 | ===== 3 | 4 | .. 5 | Writing this I'm assuming that in the previous chapter basic extension syntax was introduced und people know how to 6 | define a function without arginfo or zpp. So this chapter can have PHP_FUNCTION examples, just without heavy zpp 7 | usage 8 | 9 | In this chapter the "zval" data structure, which is used to represent PHP values, is introduced. We explain the concepts 10 | behind zvals and how to use them in extension code. 11 | 12 | Contents: 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | 17 | zvals/basic_structure.rst 18 | zvals/memory_management.rst 19 | zvals/casts_and_operations.rst -------------------------------------------------------------------------------- /Book/php7/build_system.rst: -------------------------------------------------------------------------------- 1 | Using the PHP build system 2 | ========================== 3 | 4 | In this chapter we'll explain how to use the PHP build system to compile both itself and additional extensions. This 5 | chapter will not yet be concerned with writing your own autoconf build instructions and only explain how to use the 6 | tooling. 7 | 8 | Contents: 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | 13 | build_system/building_php.rst 14 | build_system/building_extensions.rst 15 | -------------------------------------------------------------------------------- /Book/php7/build_system/building_extensions.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: bash 2 | 3 | Building PHP extensions 4 | ======================= 5 | 6 | Now that you know how to compile PHP itself, we'll move on to compiling additional extensions. We'll discuss how the 7 | build process works and what different options are available. 8 | 9 | Loading shared extensions 10 | ------------------------- 11 | 12 | As you already know from the previous section, PHP extensions can be either built statically into the PHP binary, or 13 | compiled into a shared object (``.so``). Static linkage is the default for most of the bundled extensions, whereas 14 | shared objects can be created by explicitly passing ``--enable-EXTNAME=shared`` or ``--with-EXTNAME=shared`` to 15 | ``./configure``. 16 | 17 | While static extensions will always be available, shared extensions need to be loaded using the ``extension`` or 18 | ``zend_extension`` ini options. Both options take either an absolute path to the ``.so`` file or a path relative to 19 | the ``extension_dir`` setting. 20 | 21 | As an example, consider a PHP build compiled using this configure line:: 22 | 23 | ~/php-src> ./configure --prefix=$HOME/myphp \ 24 | --enable-debug --enable-maintainer-zts \ 25 | --enable-opcache --with-gmp=shared 26 | 27 | In this case both the opcache extension and GMP extension are compiled into shared objects located in the ``modules/`` 28 | directory. You can load both either by changing the ``extension_dir`` or by passing absolute paths:: 29 | 30 | ~/php-src> sapi/cli/php -dzend_extension=`pwd`/modules/opcache.so \ 31 | -dextension=`pwd`/modules/gmp.so 32 | # or 33 | ~/php-src> sapi/cli/php -dextension_dir=`pwd`/modules \ 34 | -dzend_extension=opcache.so -dextension=gmp.so 35 | 36 | # or (since PHP 7.2 the .so is optional) 37 | ~/php-src> sapi/cli/php -dextension_dir=`pwd`/modules \ 38 | -dzend_extension=opcache -dextension=gmp 39 | 40 | During the ``make install`` step, both ``.so`` files will be moved into the extension directory of your PHP installation, 41 | which you may find using the ``php-config --extension-dir`` command. For the above build options it will be 42 | ``/home/myuser/myphp/lib/php/extensions/no-debug-non-zts-MODULE_API``. This value will also be the default of the 43 | ``extension_dir`` ini option, so you won't have to specify it explicitly and can load the extensions directly:: 44 | 45 | ~/myphp> bin/php -dzend_extension=opcache -dextension=gmp 46 | 47 | This leaves us with one question: Which mechanism should you use? Shared objects allow you to have a base PHP binary and 48 | load additional extensions through the php.ini. Distributions make use of this by providing a bare PHP package and 49 | distributing the extensions as separate packages. On the other hand, if you are compiling your own PHP binary, you 50 | likely don't have need for this, because you already know which extensions you need. 51 | 52 | As a rule of thumb, you'll use static linkage for the extensions bundled by PHP itself and use shared extensions for 53 | everything else. The reason is simply that building external extensions as shared objects is easier (or at least less 54 | intrusive), as you will see in a moment. Another benefit is that you can update the extension without rebuilding PHP. 55 | 56 | .. note:: If you need information about the difference between extensions and Zend extensions, you :doc:`may have a 57 | look at the dedicated chapter <../extensions_design/zend_extensions>`. 58 | 59 | Installing extensions from PECL 60 | ------------------------------- 61 | 62 | PECL_, the *PHP Extension Community Library*, offers a large number of extensions for PHP. When extensions are removed 63 | from the main PHP distribution, they usually continue to exist in PECL. Similarly, many extensions that are now bundled 64 | with PHP were previously PECL extensions. 65 | 66 | If you specified ``--with-pear`` during the configuration stage of your PHP build, ``make install`` will download 67 | and install PECL as a part of PEAR. You will find the ``pecl`` script in the ``$PREFIX/bin`` directory. Installing 68 | extensions is now as simple as running ``pecl install EXTNAME``, e.g.:: 69 | 70 | ~/myphp> bin/pecl install apcu 71 | 72 | This command will download, compile and install the APCu_ extension. The result will be a ``apcu.so`` file in your 73 | extension directory, which can then be loaded by passing the ``extension=apcu`` ini option. 74 | 75 | While ``pecl install`` is very handy for the end-user, it is of little interest to extension developers. In the 76 | following, we'll describe two ways to manually build extensions: Either by importing it into the main PHP source tree 77 | (this allows static linkage) or by doing an external build (only shared). 78 | 79 | .. _PECL: http://pecl.php.net 80 | .. _APCu: http://pecl.php.net/package/APCu 81 | 82 | Adding extensions to the PHP source tree 83 | ---------------------------------------- 84 | 85 | There is no fundamental difference between a third-party extension and an extension bundled with PHP. As such you can 86 | build an external extension simply by copying it into the PHP source tree and then using the usual build procedure. 87 | We'll demonstrate this using APCu as an example. 88 | 89 | First of all, you'll have to place the source code of the extension into the ``ext/EXTNAME`` directory of your PHP 90 | source tree. If the extension is available via git, this is as simple as cloning the repository from within ``ext/``:: 91 | 92 | ~/php-src/ext> git clone https://github.com/krakjoe/apcu.git 93 | 94 | Alternatively you can also download a source tarball and extract it:: 95 | 96 | /tmp> wget http://pecl.php.net/get/apcu-4.0.2.tgz 97 | /tmp> tar xzf apcu-4.0.2.tgz 98 | /tmp> mkdir ~/php-src/ext/apcu 99 | /tmp> cp -r apcu-4.0.2/. ~/php-src/ext/apcu 100 | 101 | The extension will contain a ``config.m4`` file, which specifies extension-specific build instructions for use by 102 | autoconf. To incorporate them into the ``./configure`` script, you'll have to run ``./buildconf`` again. To ensure that 103 | the configure file is really regenerated, it is recommended to delete it beforehand:: 104 | 105 | ~/php-src> rm configure && ./buildconf 106 | 107 | You can now use the ``./config.nice`` script to add APCu to your existing configuration or start over with a completely 108 | new configure line:: 109 | 110 | ~/php-src> ./config.nice --enable-apcu 111 | # or 112 | ~/php-src> ./configure --enable-apcu # --other-options 113 | 114 | Finally run ``make -jN`` to perform the actual build. As we didn't use ``--enable-apcu=shared`` the extension is 115 | statically linked into the PHP binary, i.e. no additional actions are needed to make use of it. Obviously you can also 116 | use ``make install`` to install the resulting binaries. 117 | 118 | Building extensions using ``phpize`` 119 | ------------------------------------ 120 | 121 | It is also possible to build extensions separately from PHP by making use of the ``phpize`` script that was already 122 | mentioned in the :ref:`building_php` section. 123 | 124 | ``phpize`` plays a similar role as the ``./buildconf`` script used for PHP builds: First it will import the PHP build 125 | system into your extension by copying files from ``$PREFIX/lib/php/build``. Among these files are ``php.m4`` 126 | (PHP's M4 macros), ``phpize.m4`` (which will be renamed to ``configure.ac`` in your extension and contains the main 127 | build instructions) and ``run-tests.php``. 128 | 129 | Then ``phpize`` will invoke autoconf to generate a ``./configure`` file, which can be used to customize the extension 130 | build. Note that it is not necessary to pass ``--enable-apcu`` to it, as this is implicitly assumed. Instead you should 131 | use ``--with-php-config`` to specify the path to your ``php-config`` script:: 132 | 133 | /tmp/apcu-4.0.2> ~/myphp/bin/phpize 134 | Configuring for: 135 | PHP Api Version: 20121113 136 | Zend Module Api No: 20121113 137 | Zend Extension Api No: 220121113 138 | 139 | /tmp/apcu-4.0.2> ./configure --with-php-config=$HOME/myphp/bin/php-config 140 | /tmp/apcu-4.0.2> make -jN && make install 141 | 142 | You should always specify the ``--with-php-config`` option when building extensions (unless you have only a single, 143 | global installation of PHP), otherwise ``./configure`` will not be able to correctly determine what PHP version and 144 | flags to build against. Specifying the ``php-config`` script also ensures that ``make install`` will move the generated 145 | ``.so`` file (which can be found in the ``modules/`` directory) to the right extension directory. 146 | 147 | As the ``run-tests.php`` file was also copied during the ``phpize`` stage, you can run the extension tests using 148 | ``make test`` (or an explicit call to ``run-tests.php``). 149 | 150 | The ``make clean`` target for removing compiled objects is also available and allows you to force a full rebuild of 151 | the extension, should the incremental build fail after a change. Additionally phpize provides a cleaning option via 152 | ``phpize --clean``. This will remove all the files imported by ``phpize``, as well as the files generated by the 153 | ``./configure`` script. 154 | 155 | Displaying information about extensions 156 | --------------------------------------- 157 | 158 | The PHP CLI binary provides several options to display information about extensions. You already know ``-m``, which will 159 | list all loaded extensions. You can use it to verify that an extension was loaded correctly:: 160 | 161 | ~/myphp/bin> ./php -dextension=apcu -m | grep apcu 162 | apcu 163 | 164 | There are several further switches beginning with ``--r`` that expose Reflection functionality. For example you can use 165 | ``--ri`` to display the configuration of an extension:: 166 | 167 | ~/myphp/bin> ./php -dextension=apcu --ri apcu 168 | apcu 169 | 170 | APCu Support => disabled 171 | Version => 4.0.2 172 | APCu Debugging => Disabled 173 | MMAP Support => Enabled 174 | MMAP File Mask => 175 | Serialization Support => broken 176 | Revision => $Revision: 328290 $ 177 | Build Date => Jan 1 2014 16:40:00 178 | 179 | Directive => Local Value => Master Value 180 | apc.enabled => On => On 181 | apc.shm_segments => 1 => 1 182 | apc.shm_size => 32M => 32M 183 | apc.entries_hint => 4096 => 4096 184 | apc.gc_ttl => 3600 => 3600 185 | apc.ttl => 0 => 0 186 | # ... 187 | 188 | The ``--re`` switch lists all ini settings, constants, functions and classes added by an extension: 189 | 190 | .. code-block:: none 191 | 192 | ~/myphp/bin> ./php -dextension=apcu --re apcu 193 | Extension [ extension #27 apcu version 4.0.2 ] { 194 | - INI { 195 | Entry [ apc.enabled ] 196 | Current = '1' 197 | } 198 | Entry [ apc.shm_segments ] 199 | Current = '1' 200 | } 201 | # ... 202 | } 203 | 204 | - Constants [1] { 205 | Constant [ boolean APCU_APC_FULL_BC ] { 1 } 206 | } 207 | 208 | - Functions { 209 | Function [ function apcu_cache_info ] { 210 | 211 | - Parameters [2] { 212 | Parameter #0 [ $type ] 213 | Parameter #1 [ $limited ] 214 | } 215 | } 216 | # ... 217 | } 218 | } 219 | 220 | The ``--re`` switch only works for normal extensions, Zend extensions use ``--rz`` instead. You can try this on 221 | opcache:: 222 | 223 | ~/myphp/bin> ./php -dzend_extension=opcache --rz "Zend OPcache" 224 | Zend Extension [ Zend OPcache 7.0.3-dev Copyright (c) 1999-2013 by Zend Technologies ] 225 | 226 | As you can see, this doesn't display any useful information. The reason is that opcache registers both a normal 227 | extension and a Zend extension, where the former contains all ini settings, constants and functions. So in this 228 | particular case you still need to use ``--re``. Other Zend extensions make their information available via ``--rz`` 229 | though. 230 | -------------------------------------------------------------------------------- /Book/php7/classes_objects.rst: -------------------------------------------------------------------------------- 1 | Classes and objects 2 | =================== 3 | 4 | This chapter covers the rather complex internals of PHP's object orientation system. 5 | 6 | Contents: 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | classes_objects/object_handlers.rst 12 | -------------------------------------------------------------------------------- /Book/php7/classes_objects/object_handlers.rst: -------------------------------------------------------------------------------- 1 | Object handlers 2 | =============== 3 | 4 | Nearly all operations on objects in PHP go through object handlers and every magic method or magic interface is 5 | implemented with an object or class handler internally. Furthermore there are quite a few handlers which are not exposed 6 | to userland PHP. For example internal classes can have custom comparison and cast behavior. 7 | 8 | An Overview 9 | ----------- 10 | 11 | Here are all the object handlers with their signature and a small description. 12 | 13 | .. c:member:: 14 | zval *read_property(zend_object *object, zend_string *member, int type, void **cache_slot, zval *rv) 15 | zval *write_property(zend_object *object, zend_string *member, zval *value, void **cache_slot) 16 | int has_property(zend_object *zobj, zend_string *name, int has_set_exists, void **cache_slot) 17 | void unset_property(zend_object *zobj, zend_string *name, void **cache_slot) 18 | zval *get_property_ptr_ptr(zend_object *zobj, zend_string *name, int type, void **cache_slot) 19 | 20 | These handlers correspond to the ``__get``, ``__set``, ``__isset`` and ``__unset`` methods. ``get_property_ptr_ptr`` 21 | is the internal equivalent of ``__get`` returning by reference. ``cache_slot`` is used to store the property 22 | offset and ``zend_property_info``. ``read_property`` may directly return a zval owned by the object, in which case 23 | its reference count should not be modified by ``read_property``, and the caller should not release it. 24 | Alternatively, it may return ``rv`` for temporary zvals (e.g. result of call to ``__get``), in which case the 25 | refcount should be incremented, and the caller is responsible for releasing the value. 26 | 27 | .. c:member:: 28 | zval *read_dimension(zend_object *object, zval *offset, int type, zval *rv) 29 | void write_dimension(zend_object *object, zval *offset, zval *value) 30 | int has_dimension(zend_object *object, zval *offset, int check_empty) 31 | void unset_dimension(zend_object *object, zval *offset) 32 | 33 | This set of handlers is the internal representation of the ``ArrayAccess`` interface. ``zval *rv`` in 34 | ``read_dimension`` is used for temporary values returned from ``offsetGet`` and ``offsetExists``. 35 | 36 | .. c:member:: 37 | HashTable *get_properties(zend_object *zobj) 38 | HashTable *get_debug_info(zend_object *object, int *is_temp) 39 | 40 | Used to get the object properties as a hashtable. The former is more general purpose, for example it is also used 41 | for the ``get_object_vars`` function. The latter on the other hand is used exclusively to display properties in 42 | debugging functions like ``var_dump``. So even if your object does not provide any formal properties you can still 43 | have a meaningful debug output. 44 | 45 | .. c:member:: 46 | zend_function *get_method(zend_object **obj_ptr, zend_string *method_name, const zval *key) 47 | 48 | The ``get_method`` handler fetches the ``zend_function`` used to call a certain method. Optionally ``key`` can be 49 | passed as an optimization to avoid lowercasing ``method_name`` in case it is already present. 50 | 51 | .. c:member:: 52 | zend_function *get_constructor(zend_object *zobj) 53 | 54 | Like ``get_method``, but getting the constructor function. The most common reason to override this handler is to 55 | disallow manual construction by throwing an error in the handler. 56 | 57 | .. c:member:: 58 | zend_result count_elements(zend_object *object, zend_long *count) 59 | 60 | This is just the internal way of implementing the ``Countable::count`` method. The function returns a 61 | ``zend_result`` and assigns the value to the ``zend_long *count`` pointer. 62 | 63 | .. FIXME: Change return type of count_elements to zend_result to make it more obvious the count is not returned? 64 | 65 | .. c:member:: 66 | int compare(zval *o1, zval *o2) 67 | 68 | The ``compare`` handler is a required handler that computes equality of the given object and another value. Note 69 | that the other value isn't necessarily an object of the same class, or even an object at all. The handler should 70 | return negative numbers if the lhs is smaller, 0 if they are equal, or a positive number is the lhs is larger. If 71 | the values are uncomparable ``ZEND_UNCOMPARABLE`` should be returned. 72 | 73 | .. c:member:: 74 | zend_result cast_object(zend_object *readobj, zval *writeobj, int type) 75 | 76 | Internal classes have the ability to implement a custom compare behavior and override casting behavior for all 77 | types. Userland classes on the other hand only have the ability to override object to string casting through 78 | ``__toString``. 79 | 80 | .. c:member:: 81 | zend_result get_closure(zend_object *obj, zend_class_entry **ce_ptr, zend_function **fptr_ptr, zend_object **obj_ptr, bool check_only) 82 | 83 | This handler is invoked when the object is used as a function, i.e. it is the internal version of ``__invoke``. 84 | The name derives from the fact that its main use is for the implementation of closures (the ``Closure`` class). 85 | 86 | .. c:member:: 87 | zend_string *get_class_name(const zend_object *zobj) 88 | 89 | This handler is used to get the class name from an object for debugging contexts. There should be little reason to 90 | overwrite it. 91 | 92 | .. c:member:: 93 | zend_object *clone_obj(zend_object *old_object) 94 | 95 | The ``clone_obj`` handler is called when executing ``clone $old_object``. By default PHP performs a shallow clone 96 | on objects, which means properties containing objects are not be cloned but both the old and new object will point 97 | to the same object. The ``clone_obj`` allows for this behavior to be customized. It's also used to inhibit ``clone`` 98 | altogether. 99 | 100 | .. c:member:: 101 | HashTable *get_gc(zend_object *zobj, zval **table, int *n) 102 | 103 | The ``get_gc`` handler should return all variables that are held by the object, so cyclic dependencies can be 104 | properly collected. If the object doesn't maintain a property hashmap (because it doesn't store any dynamic 105 | properties) it can use ``table`` to store a pointer directly into the list of zvals, along with a count of 106 | properties. 107 | 108 | .. c:member:: 109 | void dtor_obj(zend_object *object) 110 | void free_obj(zend_object *object) 111 | 112 | ``dtor_obj`` is called before ``free_obj``. The object must remain in a valid state after dtor_obj finishes running. 113 | Unlike ``free_obj``, it is run prior to deactivation of the executor during shutdown, which allows user code to run. 114 | This handler is not guaranteed to be called (e.g. on fatal error), and as such should not be used to release 115 | resources or deallocate memory. Furthermore, releasing resources in this handler can break detection of memory 116 | leaks, as cycles may be broken early. ``dtor_obj`` should be used only to call user destruction hooks, such as 117 | ``__destruct``. 118 | 119 | ``free_obj`` should release any resources the object holds, without freeing the object structure itself. The object 120 | does not need to be in a valid state after ``free_obj`` finishes running. ``free_obj`` will always be invoked, even 121 | if the object leaks or a fatal error occurs. However, during shutdown it may be called once the executor is no 122 | longer active, in which case execution of user code may be skipped. 123 | 124 | .. c:member:: 125 | zend_result do_operation(zend_uchar opcode, zval *result, zval *op1, zval *op2) 126 | 127 | ``do_operation`` is an optional handler that will be invoked for various arithmetic and binary operations on 128 | instances of the given class. This allows for operator overloading semantics to be implemented for custom classes. 129 | Examples for overloadable operators are ``+``, ``-``, ``*``, ``/``, ``++``, ``--``, ``!``. 130 | 131 | .. c:member:: 132 | zend_array *get_properties_for(zend_object *object, zend_prop_purpose purpose) 133 | 134 | The ``get_properties_for`` can be used to customize the list of object properties returned for various purposes. 135 | The purposes are defined in ``zend_prop_purpose``, which currently entails ``print_r``, ``var_dump``, the 136 | ``(array)`` cast, ``serialize``, ``var_export`` and ``json_encode``. 137 | -------------------------------------------------------------------------------- /Book/php7/debugging.rst: -------------------------------------------------------------------------------- 1 | Debugging with GDB 2 | ================== 3 | 4 | This chapter will introduce you with the GNU C debugger, aka GDB. When a crash happens, you usually have to find the 5 | guilty part in thousands of lines. You need tools for that, and GDB is the most commonly used debugger under Unix 6 | platforms. Here we'll give you an introduction to GDB and how to practice with it against the PHP source code. 7 | 8 | Debug symbols 9 | ------------- 10 | 11 | GDB requires debug symbols to map the memory addresses in your binary to the original position in your source code. To 12 | generate debug symbols you need to pass the ``--enable-debug`` flag to the ``./configure`` script. To get even more 13 | debugging information you may add the ``CFLAGS="-ggdb3"`` flag which will add support for macros. 14 | 15 | Debugging the VM 16 | ---------------- 17 | 18 | The VM lives in the big ``zend_vm_execute.h`` file that is generated by ``zend_vm_gen.php`` from ``zend_vm_def.h``. 19 | Debugging this file can be tedious because it is very large, and it's often not obvious which specialized handler 20 | will run. Luckily, it is possible to debug ``zend_vm_def.h`` directly by generating the VM using the 21 | ``php Zend/zend_vm_gen.php --with-lines`` command. This will annotate the code with the ``#line`` preprocessor directive 22 | to allow the debugger to know the origin of the generated instructions. 23 | -------------------------------------------------------------------------------- /Book/php7/extensions_design.rst: -------------------------------------------------------------------------------- 1 | Extensions design 2 | ================= 3 | 4 | In this chapter you'll learn how to design PHP extensions. You'll learn about the PHP lifecycle, how and when to manage 5 | memory, the different hooks you can use or the different function pointers you can replace to actively change PHP's 6 | internal machinery. You'll design PHP functions and classes to provide them through your extension, and you'll play 7 | with INI settings. 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | extensions_design/php_lifecycle.rst 15 | extensions_design/extension_skeleton.rst 16 | extensions_design/php_functions.rst 17 | extensions_design/globals_management.rst 18 | extensions_design/extension_infos.rst 19 | extensions_design/hooks.rst 20 | extensions_design/ini_settings.rst 21 | extensions_design/zend_extensions.rst 22 | 23 | -------------------------------------------------------------------------------- /Book/php7/extensions_design/extension_infos.rst: -------------------------------------------------------------------------------- 1 | Publishing extension information 2 | ================================= 3 | 4 | Extensions can publish information asked by ``phpinfo()`` or the Reflection API. Let's see that together. 5 | 6 | This chapter won't be too long as there is really no difficulty. 7 | 8 | MINFO() hook 9 | ------------ 10 | 11 | Everything takes place in the ``MINFO()`` hook you declared, if you declared one. If you declared none, then the engine 12 | will run a default function to print information about your extension. That function will only print the version of 13 | your extension and the :doc:`INI entries ` you eventually declared. If you want to hook into such 14 | process, you must declare an ``MINFO()`` :doc:`hook ` in your extension structure. 15 | 16 | .. note:: Everything takes place in `ext/standard/info.c `_ , you may read that file. Printing 18 | information about PHP extensions is done by the engine by calling `php_info_print_module() 19 | `_ 20 | 21 | Here is a simple ``MINFO()`` example:: 22 | 23 | #include "php/main/SAPI.h" 24 | #include "ext/standard/info.h" 25 | 26 | #define PIB_TXT "PHPInternalsBook Authors" 27 | #define PIB_HTML "

" PIB_TXT "

" 28 | 29 | PHP_MINFO_FUNCTION(pib) 30 | { 31 | time_t t; 32 | char cur_time[32]; 33 | 34 | time(&t); 35 | php_asctime_r(localtime(&t), cur_time); 36 | 37 | php_info_print_table_start(); 38 | php_info_print_table_colspan_header(2, "PHPInternalsBook"); 39 | php_info_print_table_row(2, "Current time", cur_time); 40 | php_info_print_table_end(); 41 | 42 | php_info_print_box_start(0); 43 | if (!sapi_module.phpinfo_as_text) { 44 | php_write(PIB_HTML, strlen(PIB_HTML)); 45 | } else { 46 | php_write(PIB_TXT, strlen(PIB_TXT)); 47 | } 48 | php_info_print_box_end(); 49 | } 50 | 51 | zend_module_entry pib_module_entry = { 52 | STANDARD_MODULE_HEADER, 53 | "pib", 54 | NULL, /* Function entries */ 55 | NULL, /* Module init */ 56 | NULL, /* Module shutdown */ 57 | NULL, /* Request init */ 58 | NULL, /* Request shutdown */ 59 | PHP_MINFO(pib), /* Module information */ 60 | "0.1", /* Replace with version number for your extension */ 61 | STANDARD_MODULE_PROPERTIES 62 | }; 63 | 64 | .. image:: ./images/php_minfo.png 65 | :align: center 66 | 67 | What you basically have to do is to deal with ``php_info_print_*()`` API, that allows to print into the output stream 68 | that is generated. If you want to print some raw information, a simple ``php_write()`` is enough. ``php_write()`` just 69 | writes what you pass as argument onto the SAPI output stream, whereas ``php_info_print_*()`` API does as well, but 70 | before formats the content using HTML *table-tr-td* tags if the output is expected to be HTML, or simple spaces if not. 71 | 72 | Like you can see, you need to include *ext/standard/info.h* to access the ``php_info_print_*()`` API, and you will need 73 | *php/main/SAPI.h* to access the ``sapi_module`` symbol. That symbol is global, it represents the current *SAPI* used by 74 | the PHP process. The ``phpinfo_as_text`` field inform if you are going to write in a "Web" SAPI like *php-fpm* f.e, or 75 | in a "text" one, like *php-cli*. 76 | 77 | What will trigger your ``MINFO()`` hook are : 78 | 79 | * Calls to userland ``phpinfo()`` function 80 | * ``php -i``, ``php-cgi -i``, ``php-fpm -i``. More generally `` - i`` 81 | * ``php --ri`` or userland ``ReflectionExtension::info()`` 82 | 83 | .. note:: Take care of the output formatting. Probe for ``sapi_module.phpinfo_as_text`` if you need to change between 84 | text and HTML formatting. You don't know how your extensions' infos will be called by userland. 85 | 86 | If you need to display your INI settings, just call for the ``DISPLAY_INI_ENTRIES()`` macro into your ``MINFO()``. This 87 | macro resolves to `display_ini_entries() 88 | `_. 89 | 90 | A note about the Reflection API 91 | ------------------------------- 92 | 93 | The Reflection heavily uses your ``zend_module_entry`` structure. For example, when you call 94 | ``ReflectionExtension::getVersion()``, the API just reads the version field of your ``zend_module_entry`` structure. 95 | 96 | Same to discover functions, your ``zend_module_entry`` has got a ``const struct _zend_function_entry *functions`` member 97 | which is used to register PHP functions. 98 | 99 | Basically, the PHP userland Reflection API just reads your ``zend_module_entry`` structure and publishes those 100 | information. It may also use your ``module_number`` to gather back information your extension registered at different 101 | locations against the engine. For example, ``ReflectionExtension::getINIentries()`` or 102 | ``ReflectionExtension::getClasses()`` use this. 103 | -------------------------------------------------------------------------------- /Book/php7/extensions_design/images/php_classic_lifetime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phpinternalsbook/PHP-Internals-Book/93565dad5f95517f7a87c54805d31008150cf670/Book/php7/extensions_design/images/php_classic_lifetime.png -------------------------------------------------------------------------------- /Book/php7/extensions_design/images/php_extensions_ini.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phpinternalsbook/PHP-Internals-Book/93565dad5f95517f7a87c54805d31008150cf670/Book/php7/extensions_design/images/php_extensions_ini.png -------------------------------------------------------------------------------- /Book/php7/extensions_design/images/php_extensions_lifecycle.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phpinternalsbook/PHP-Internals-Book/93565dad5f95517f7a87c54805d31008150cf670/Book/php7/extensions_design/images/php_extensions_lifecycle.odg -------------------------------------------------------------------------------- /Book/php7/extensions_design/images/php_extensions_lifecycle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phpinternalsbook/PHP-Internals-Book/93565dad5f95517f7a87c54805d31008150cf670/Book/php7/extensions_design/images/php_extensions_lifecycle.png -------------------------------------------------------------------------------- /Book/php7/extensions_design/images/php_extensions_lifecycle_full.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phpinternalsbook/PHP-Internals-Book/93565dad5f95517f7a87c54805d31008150cf670/Book/php7/extensions_design/images/php_extensions_lifecycle_full.odg -------------------------------------------------------------------------------- /Book/php7/extensions_design/images/php_extensions_lifecycle_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phpinternalsbook/PHP-Internals-Book/93565dad5f95517f7a87c54805d31008150cf670/Book/php7/extensions_design/images/php_extensions_lifecycle_full.png -------------------------------------------------------------------------------- /Book/php7/extensions_design/images/php_lifetime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phpinternalsbook/PHP-Internals-Book/93565dad5f95517f7a87c54805d31008150cf670/Book/php7/extensions_design/images/php_lifetime.png -------------------------------------------------------------------------------- /Book/php7/extensions_design/images/php_lifetime_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phpinternalsbook/PHP-Internals-Book/93565dad5f95517f7a87c54805d31008150cf670/Book/php7/extensions_design/images/php_lifetime_process.png -------------------------------------------------------------------------------- /Book/php7/extensions_design/images/php_lifetime_thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phpinternalsbook/PHP-Internals-Book/93565dad5f95517f7a87c54805d31008150cf670/Book/php7/extensions_design/images/php_lifetime_thread.png -------------------------------------------------------------------------------- /Book/php7/extensions_design/images/php_minfo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phpinternalsbook/PHP-Internals-Book/93565dad5f95517f7a87c54805d31008150cf670/Book/php7/extensions_design/images/php_minfo.png -------------------------------------------------------------------------------- /Book/php7/internal_types.rst: -------------------------------------------------------------------------------- 1 | Internal types 2 | ============== 3 | 4 | In this chapter we will detail the special types used internally by PHP. 5 | 6 | Contents: 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | internal_types/strings.rst 12 | internal_types/zend_resources.rst 13 | internal_types/hashtables.rst 14 | internal_types/functions.rst 15 | -------------------------------------------------------------------------------- /Book/php7/internal_types/functions.rst: -------------------------------------------------------------------------------- 1 | Functions 2 | ======================== 3 | 4 | The body of PHP functions are represented with the ``zend_function`` structure. 5 | However, handling them is rarely done as they are solely needed for the VM. 6 | In general PHP ``callable`` s are what will need to be dealt with, which are represented by the pair of 7 | ``zend_fcall_info``/``zend_fcall_info_cache`` structures. 8 | 9 | 10 | TODO: Detail ``zend_function`` 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | functions/callables.rst -------------------------------------------------------------------------------- /Book/php7/internal_types/functions/callables.rst: -------------------------------------------------------------------------------- 1 | PHP Callables 2 | =================== 3 | 4 | Dealing with PHP functions in C requires the knowledge of the following two structures 5 | ``zend_fcall_info``/``zend_fcall_info_cache``. The first one necessarily contains the information for calling 6 | the function, such as arguments and the return value, but may also include the actual callable. 7 | The latter *only* contains the callable. We will use the commonly used abbreviation of FCI and FCC when talking about 8 | ``zend_fcall_info`` and ``zend_fcall_info_cache`` respectively. 9 | You will most likely encounter those when using the ZPP ``f`` argument flag, or when you need to call a PHP function 10 | or method from within an extension. 11 | 12 | Structure of ``zend_fcall_info`` 13 | -------------------------------- 14 | 15 | .. warning:: The implementation of ``zend_fcall_info`` is widely different prior to PHP 7.1.0. 16 | 17 | As of PHP 8.0.0, ``zend_fcall_info`` has the following structure: 18 | 19 | :: 20 | 21 | struct _zend_fcall_info { 22 | size_t size; 23 | zval function_name; 24 | zval *retval; 25 | zval *params; 26 | zend_object *object; 27 | uint32_t param_count; 28 | /* This hashtable can also contain positional arguments (with integer keys), 29 | * which will be appended to the normal params[]. This makes it easier to 30 | * integrate APIs like call_user_func_array(). The usual restriction that 31 | * there may not be position arguments after named arguments applies. */ 32 | HashTable *named_params; 33 | } zend_fcall_info; 34 | 35 | 36 | Let detail the various FCI fields: 37 | 38 | ``size``: 39 | Mandatory field, which is the size of an FCI structure, thus always: ``sizeof(zend_fcall_info)`` 40 | ``function_name``: 41 | Mandatory field, the actual callable, do not be fooled by the name of this field as this is a leftover when 42 | PHP didn't have objects and class methods. It must be a string zval or an array following the same rules as 43 | callables in PHP, namely the first index is a class or instance object, and the second one is the method name. 44 | It can also be undefined if, and only if, an initialized FCC is provided. 45 | ``retval``: 46 | Mandatory field, which will contain the result of the PHP function 47 | ``param_count``: 48 | Mandatory field, the number of arguments that will be provided to this call to the function 49 | ``params``: 50 | contains positional arguments that will be provided to this call to the function. 51 | If ``param_count = 0``, it can be ``NULL``. 52 | ``object``: 53 | The object on which to call the method name stored in ``function_name``, or ``NULL`` if no objects are involved. 54 | ``named_params``: 55 | A HashTable containing named (or positional) arguments. 56 | 57 | .. note:: Prior to PHP 8.0.0, the ``named_params`` field did not exist. However, a ``zend_bool no_separation;`` 58 | field existed which specified if array arguments should be separated or not. 59 | 60 | Structure of ``zend_fcall_info_cache`` 61 | -------------------------------------- 62 | 63 | A ``zend_fcall_info_cache`` has the following structure: 64 | 65 | :: 66 | 67 | typedef struct _zend_fcall_info_cache { 68 | zend_function *function_handler; 69 | zend_class_entry *calling_scope; 70 | zend_class_entry *called_scope; 71 | zend_object *object; 72 | } zend_fcall_info_cache; 73 | 74 | Let detail the various FCC fields: 75 | 76 | ``function_handler``: 77 | The actual body of a PHP function that will be used by the VM, can be retrieved from the global function table 78 | or a class function table (``zend_class_entry->function_table``). 79 | ``object``: 80 | If the function is an object method, this field is the relevant object. 81 | ``called_scope``: 82 | The scope in which to call the method, generally it's ``object->ce``. 83 | ``calling_scope``: 84 | The scope in which this call is made, only used by the VM. 85 | 86 | .. warning:: Prior to PHP 7.3.0 there existed an ``initialized`` field. Now an FCC is considered initialized when 87 | ``function_handler`` is set to a non-null pointer. 88 | 89 | The *only* case where an FCC will be uninitialized is if the function is a trampoline, i.e. when the method 90 | of a class does not exist but is handled by the magic methods ``__call()``/``__callStatic()``. 91 | This is because a trampoline is freed by ZPP as it is a newly allocated ``zend_function`` struct with the 92 | op array copied, and is freed when called. To retrieve it manually use ``zend_is_callable_ex()``. 93 | 94 | .. warning:: It is not sufficient to just store the FCC to be able to call a user function at a later stage. 95 | If the callable zval from the FCI is an object (because it has an ``__invoke`` method, is a ``Closure``, 96 | or a trampoline) then a reference to the ``zend_object`` must also be stored, the refcount incremented, 97 | and released as needed. Moreover, if the callable is a trampoline the ``function_handler`` must be copied 98 | to be persisted between calls (see how SPL implements the storage of autoloading functions). 99 | 100 | .. note:: To determine that two user functions are equal comparing the ``function_handler``, ``object``, 101 | ``called_scope``, ``calling_scope``, and the pointer to the ``zend_object`` for closures is generally sufficient. 102 | Except when the user function is a trampoline, this is because the ``function_handler`` is reallocated for every 103 | call, in that case one needs to compared the ``function_handler->common.function_name`` field using 104 | ``zend_string_equals()`` instead of comparing the pointers of the function handler directly. 105 | 106 | .. note:: In most cases an FCC does not need to be released, the exception is if the FCC may hold a trampoline 107 | in which case the ``void zend_release_fcall_info_cache(zend_fcall_info_cache *fcc)`` should be used to release it. 108 | Moreover, if a reference to the closure is kept, this must be called *prior* to freeing the closure, 109 | as the trampoline will partially refer to a ``zend_function *`` entry in the closure CE. 110 | 111 | .. 112 | This API is still being worked on and won't be usable for a year 113 | note:: As of PHP 8.3.0, the FCC holds a ``closure`` field and a dedicated API to handle storing userland callables. 114 | 115 | Zend Engine API for callables 116 | ----------------------------- 117 | 118 | The API is located at various locations in the ``Zend_API.h`` header file. 119 | We will describe the various APIs needed to deal with callables in PHP. 120 | 121 | First of all, to check if an FCI is initialized use the ``ZEND_FCI_INITIALIZED(fci)`` macro. 122 | 123 | .. And, as of PHP 8.3.0, the ``ZEND_FCC_INITIALIZED(fcc)`` macro to check if an FCC is initialized. 124 | 125 | If you have a correctly initialized and set up FCI/FCC pair for a callable you can call it directly by using the 126 | ``zend_call_function(zend_fcall_info *fci, zend_fcall_info_cache *fci_cache)`` function. 127 | 128 | .. warning:: The ``zend_fcall_info_arg*()`` and ``zend_fcall_info_call()`` APIs should not be used. 129 | The ``zval *args`` parameter does *not* set the ``params`` field of the FCI directly. 130 | Instead it expect it to be a PHP array (IS_ARRAY zval) containing positional arguments, which will be reallocated 131 | into a new C array. As the ``named_params`` field accepts positional arguments, it is generally better to simply 132 | assign the HashTable pointer of this argument to this field. 133 | Moreover, as arguments to a userland call are predetermined and stack allocated it is better to assign the 134 | ``params`` and ``param_count`` fields directly. 135 | 136 | .. 137 | note:: As of PHP 8.3.0, the ``zend_call_function_with_return_value(*fci, *fcc, zval *retval)`` function has 138 | been added to replace the usage of ``zend_fcall_info_call(fci, fcc, retval, NULL)``. 139 | 140 | In the more likely case where you just have a callable zval, you have the choice of a couple different options 141 | depending on the use case. 142 | 143 | For a one-off call the ``call_user_function(function_table, object, function_name, retval_ptr, param_count, params)`` 144 | and ``call_user_function_named(function_table, object, function_name, retval_ptr, param_count, params, named_params)`` 145 | macro-functions will do the trick. 146 | 147 | .. note:: As of PHP 7.1.0, the ``function_table`` argument is not used and should always be ``NULL``. 148 | 149 | The drawback of those functions is that they will verify the zval is indeed callable, and create a FCI/FCC pair on 150 | every call. If you know you will need to call these functions multiple time it's best to create a FCI/FCC pair yourself 151 | by using the ``zend_result zend_fcall_info_init(zval *callable, uint32_t check_flags, zend_fcall_info *fci, 152 | zend_fcall_info_cache *fcc, zend_string **callable_name, char **error)`` function. 153 | If this function returns ``FAILURE``, then the zval is not a proper callable. 154 | ``check_flags`` is forwarded to ``zend_is_callable_ex()``, generally you don't want to pass any modifying flags, 155 | however ``IS_CALLABLE_SUPPRESS_DEPRECATIONS`` might be useful in certain cases. 156 | 157 | In case you just have an FCC (or a combination of ``zend_function`` and ``zend_object``) you can use the following 158 | functions:: 159 | 160 | /* Call the provided zend_function with the given params. 161 | * If retval_ptr is NULL, the return value is discarded. 162 | * If object is NULL, this must be a free function or static call. 163 | * called_scope must be provided for instance and static method calls. */ 164 | ZEND_API void zend_call_known_function( 165 | zend_function *fn, zend_object *object, zend_class_entry *called_scope, zval *retval_ptr, 166 | uint32_t param_count, zval *params, HashTable *named_params); 167 | 168 | /* Call the provided zend_function instance method on an object. */ 169 | static zend_always_inline void zend_call_known_instance_method( 170 | zend_function *fn, zend_object *object, zval *retval_ptr, 171 | uint32_t param_count, zval *params) 172 | { 173 | zend_call_known_function(fn, object, object->ce, retval_ptr, param_count, params, NULL); 174 | } 175 | 176 | And specific parameter number variations for the latter. 177 | 178 | .. note:: If you want to call a method on an object if it exists use the ``zend_call_method_if_exists()`` function. 179 | -------------------------------------------------------------------------------- /Book/php7/internal_types/hashtables.rst: -------------------------------------------------------------------------------- 1 | HashTables: zend_array 2 | ======================= 3 | 4 | 5 | -------------------------------------------------------------------------------- /Book/php7/internal_types/strings.rst: -------------------------------------------------------------------------------- 1 | Strings 2 | ======= 3 | 4 | Rather than using a plain ``char *`` pointer, PHP uses a custom ``zend_string`` type to 5 | represent strings. This chapter discusses how to work with this structure, as well as various 6 | string-related utilities. 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | strings/zend_strings.rst 12 | strings/smart_str.rst 13 | strings/printing_functions.rst 14 | -------------------------------------------------------------------------------- /Book/php7/internal_types/strings/images/zend_string_memory_layout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phpinternalsbook/PHP-Internals-Book/93565dad5f95517f7a87c54805d31008150cf670/Book/php7/internal_types/strings/images/zend_string_memory_layout.png -------------------------------------------------------------------------------- /Book/php7/internal_types/strings/printing_functions.rst: -------------------------------------------------------------------------------- 1 | PHP's custom printf functions 2 | ============================= 3 | 4 | You all know libc's ``printf()`` and family. This chapter will detail those many clones PHP declares and use, what's 5 | their goal, why use them and when to use them. 6 | 7 | .. note:: Libc's documentation about ``printf()`` and friends 8 | `is located here `_ 9 | 10 | You know that those functions are useful, but sometimes don't provide enough functionalities. 11 | Also, you know that 12 | `adding format strings `_ to ``printf()`` 13 | family is not trivial, not portable and security risky. 14 | 15 | PHP adds its own printf-like functions to replace libc ones and to be used by the internal developer. 16 | They will mainly add new formats, play with :doc:`zend_string` instead of 17 | ``char *``, etc... Let's see them together. 18 | 19 | .. warning:: You must master your libc default ``printf()`` formats. Read 20 | `their documentation here `_. 21 | 22 | .. note:: Those functions are added **to replace** libc ones, that means that if you use ``sprintf()`` f.e, that won't 23 | lead to libc's ``sprintf()``, but to PHP replacement. Except the traditional ``printf()``, everything else 24 | is replaced. 25 | 26 | Traditional use 27 | *************** 28 | 29 | First of all, you should not use ``sprintf()``, as that function doesn't perform any check and allows many buffer 30 | overflow errors. Please, try to avoid using it. 31 | 32 | .. warning:: Please try to avoid using ``sprintf()`` as much as possible. 33 | 34 | Then, you have some choice. 35 | 36 | You know your result buffer size 37 | -------------------------------- 38 | 39 | If you know your buffer size, ``snprintf()`` or ``slprintf()`` will do the job for you. There is a difference in what 40 | those functions return, but not in what those functions do. 41 | 42 | They both print according to the formats passed, and they both terminate your buffer by a ``NUL`` byte *'\\0'* whatever 43 | happens. However, ``snprintf()`` returns the number of characters that could have been used, whereas ``slprintf()`` 44 | returns the number of characters that have effectively been used, thus enabling to detect too-small buffers and string 45 | truncation. This, is not counting the final *'\\0'*. 46 | 47 | Here is an example so that you fully understand:: 48 | 49 | char foo[8]; /* 8-char large buffer */ 50 | const char str[] = "Hello world"; /* 12 chars including \0 in count */ 51 | int r; 52 | 53 | r = snprintf(foo, sizeof(foo), "%s", str); 54 | /* r = 11 here even if only 7 printable chars were written in foo */ 55 | 56 | /* foo value is now 'H' 'e' 'l' 'l' 'o' ' ' 'w' '\0' */ 57 | 58 | ``snprintf()`` is not a good function to use, as it does not allows to detect an eventual string truncation. 59 | As you can see from the example above, "Hello world\\0" doesn't fit in an eight-byte buffer, that's obvious, but 60 | ``snprintf()`` still returns you 11, which is ``strlen("Hello world\0")``. You have no way to detect that the string's 61 | got truncated. 62 | 63 | Here is ``slprintf()``:: 64 | 65 | char foo[8]; /* 8-char large buffer */ 66 | const char str[] = "Hello world"; /* 12 chars including \0 in count */ 67 | int r; 68 | 69 | r = slprintf(foo, sizeof(foo), "%s", str); 70 | /* r = 7 here , because 7 printable chars were written in foo */ 71 | 72 | /* foo value is now 'H' 'e' 'l' 'l' 'o' ' ' 'w' '\0' */ 73 | 74 | With ``slprintf()``, the result buffer ``foo`` contains the exact same string, but the returned value is now 7. 7 is 75 | less than the 11 chars from the *"Hello world"* string, thus you can detect that it got truncated:: 76 | 77 | if (slprintf(foo, sizeof(foo), "%s", str) < strlen(str)) { 78 | /* A string truncation occurred */ 79 | } 80 | 81 | Remember: 82 | 83 | * Those two function always ``NUL`` terminate the string, truncation or not. Result strings are then safe C strings. 84 | * Only ``slprintf()`` allows to detect a string truncation. 85 | 86 | Those two functions are defined in 87 | `main/snprintf.c `_ 88 | 89 | You don't know your buffer size 90 | ------------------------------- 91 | 92 | Now if you don't know your result buffer size, you need a dynamically allocated one, and then you'll use ``spprintf()``. 93 | Remember that **you'll have to free** the buffer by yourself ! 94 | 95 | Here is an example:: 96 | 97 | #include 98 | 99 | char *result; 100 | int r; 101 | 102 | time_t timestamp = time(NULL); 103 | 104 | r = spprintf(&result, 0, "Here is the date: %s", asctime(localtime(×tamp))); 105 | 106 | /* now use result that contains something like "Here is the date: Thu Jun 15 19:12:51 2017\n" */ 107 | 108 | efree(result); 109 | 110 | ``spprintf()`` returns the number of characters that've been printed into the result buffer, not counting the final 111 | *'\\0'*, hence you know the number of bytes that got allocated for you (minus one). 112 | 113 | Please, note that the allocation is done using ZendMM (request allocation), and should thus be used as part of a 114 | request and freed using ``efree()`` and not ``free()``. 115 | 116 | .. note:: :doc:`The chapter about Zend Memory Manager <../../memory_management/zend_memory_manager>` (ZendMM) details 117 | how dynamic memory is allocated through PHP. 118 | 119 | If you want to limit the buffer size, you pass that limit as the second argument, if you pass *0*, that means 120 | unlimited:: 121 | 122 | #include 123 | 124 | char *result; 125 | int r; 126 | 127 | time_t timestamp = time(NULL); 128 | 129 | /* Do not print more than 10 bytes || allocate more than 11 bytes */ 130 | r = spprintf(&result, 10, "Here is the date: %s", asctime(localtime(×tamp))); 131 | 132 | /* r == 10 here, and 11 bytes were allocated into result */ 133 | 134 | efree(result); 135 | 136 | .. note:: Whenever possible, try not to use dynamic memory allocations. That impacts performances. If you got the 137 | choice, go for the static stack allocated buffer. 138 | 139 | ``spprintf()`` is written in 140 | `main/spprintf.c `_. 141 | 142 | What about printf() ? 143 | --------------------- 144 | 145 | If you need to ``printf()``, aka to print formatted to the output stream, use ``php_printf()``. That function 146 | internally uses ``spprintf()``, and thus performs a dynamic allocation that it frees itself just after having sent it 147 | to the SAPI output, aka stdout in case of CLI, or the output buffer (CGI buffer f.e) for other SAPIs. 148 | 149 | Special PHP printf formats 150 | -------------------------- 151 | 152 | Remember that PHP replaces most libc's ``printf()`` functions by its own of its own design. You can have a look at 153 | the argument parsing API which is easy to understand `from reading the source 154 | `_. 155 | 156 | What that means is that arguments parsing algo has been fully rewritten, and may differ from what you're used to in libc. 157 | F.e, the libc locale is not taken care of in most cases. 158 | 159 | Special formats may be used, like *"%I64"* to explicitly print to an int64, or *"%I32"*. 160 | You can also use *"%Z"* to make a zval printable (according to PHP cast rules to string), that one is a great addition. 161 | 162 | The formatter will also recognize infinite numbers and print "INF", or "NAN" for not-a-number. 163 | 164 | If you make a mistake, and ask the formatter to print a ``NULL`` pointer, where libc will crash for sure, PHP will 165 | return *"(null)"* as a result string. 166 | 167 | .. note:: If in a printf you see a magic *"(null)"* appearing, that means you passed a NULL pointer to one of PHP 168 | printf family functions. 169 | 170 | 171 | Printf()ing into zend_strings 172 | ----------------------------- 173 | 174 | As :doc:`zend_string ` are a very common structure into PHP source, you may need to ``printf()`` into a 175 | ``zend_string`` instead of a traditional C ``char *``. For this, use ``strpprintf()``. 176 | 177 | The API is ``zend_string *strpprintf(size_t max_len, const char *format, ...)`` that means that the ``zend_string`` is 178 | returned to you, and not the number of printed chars as you may expect. You can limit that number though, using the 179 | first parameter (pass 0 to mean infinite); and you must remember that the ``zend_string`` will be allocated using the 180 | Zend Memory Manager, and thus bound to the current request. 181 | 182 | Obviously, the format API is shared with the one seen above. 183 | 184 | Here is a quick example:: 185 | 186 | zend_string *result; 187 | 188 | result = strpprintf(0, "You are using PHP %s", PHP_VERSION); 189 | 190 | /* Do something with result */ 191 | 192 | zend_string_release(result); 193 | 194 | A note on ``zend_`` API 195 | ----------------------- 196 | 197 | You may meet ``zend_spprintf()``, or ``zend_strpprintf()`` functions. Those are the exact same as the ones seen above. 198 | 199 | They are just here as part of the separation between the Zend Engine and PHP Core, a detail that is not important for 200 | us, as into the source code, everything gets mixed together. 201 | -------------------------------------------------------------------------------- /Book/php7/internal_types/strings/smart_str.rst: -------------------------------------------------------------------------------- 1 | smart_str API 2 | ============= 3 | 4 | That may seem strange, but the C language offers nearly nothing to play with strings (build, concatenate, shrink, 5 | expand, transform, etc...). C is a low level general purpose language one can use to build APIs to deal with more 6 | specific tasks, such as string constructions. 7 | 8 | .. note:: Obviously you all got that we talk about ASCII strings, aka bytes. No Unicode in there. 9 | 10 | PHP's ``smart_str`` is an API that will help you build strings and especially concatenate chunks of bytes into strings. 11 | This API seats next to :doc:`PHP's special printf() APIs` and :doc:`zend_string ` to 12 | help with strings management. 13 | 14 | smart_str VS smart_string 15 | ************************* 16 | 17 | Here are the two structures:: 18 | 19 | typedef struct { 20 | char *c; 21 | size_t len; 22 | size_t a; 23 | } smart_string; 24 | 25 | typedef struct { 26 | zend_string *s; 27 | size_t a; 28 | } smart_str; 29 | 30 | Like you can see, one will work with traditional C strings (as ``char*/size_t``) and the other will make use of the 31 | PHP's specific ``zend_string`` structure. 32 | 33 | We will detail the latter: ``smart_str``, that works with :doc:`zend_strings `. Both APIs are exactly the 34 | same, simply note that one (the one we'll detail here) starts by ``smart_str_**()`` and the other by 35 | ``smart_string_***()``. Don't confuse ! 36 | 37 | The ``smart_str`` API is detailed into `Zend/zend_smart_str.h 38 | `_ (also the .c 39 | file). 40 | 41 | .. warning:: ``smart_str`` is not to be confused with ``smart_string``. 42 | 43 | Basic API usage 44 | *************** 45 | 46 | So far so good, that API is really easy to manage. You basically stack-allocate a ``smart_str``, and pass its pointer to 47 | ``smart_str_***()`` API functions that manage the embedded ``zend_string`` for you. You build your string, use it, and 48 | then you free it. Nothing very strong in there right ? 49 | 50 | The embedded ``zend_string`` will be allocated whether 51 | :doc:`permanently or request-bound <../../memory_management/zend_memory_manager>`, that depends on the last extended API 52 | parameter you'll use:: 53 | 54 | smart_str my_str = {0}; 55 | 56 | smart_str_appends(&my_str, "Hello, you are using PHP version "); 57 | smart_str_appends(&my_str, PHP_VERSION); 58 | 59 | smart_str_appendc(&my_str, '\n'); 60 | 61 | smart_str_appends(&my_str, "You are using "); 62 | smart_str_append_unsigned(&my_str, zend_hash_num_elements(CG(function_table))); 63 | smart_str_appends(&my_str, " PHP functions"); 64 | 65 | smart_str_0(&my_str); 66 | 67 | /* Use my_str now */ 68 | PHPWRITE(ZSTR_VAL(my_str.s), ZSTR_LEN(my_str.s)); 69 | 70 | /* Don't forget to release/free it */ 71 | smart_str_free(&my_str); 72 | 73 | We can also use the embedded ``zend_string`` independently of the ``smart_str``:: 74 | 75 | smart_str my_str = {0}; 76 | 77 | smart_str_appends(&my_str, "Hello, you are using PHP version "); 78 | smart_str_appends(&my_str, PHP_VERSION); 79 | 80 | zend_string *str = smart_str_extract(my_str); 81 | RETURN_STR(str); 82 | 83 | /* We must not free my_str in this case */ 84 | 85 | ``smart_str_extract()`` returns a pre-allocated empty string if ``smart_str.s`` 86 | is ``NULL``. Otherwise, it adds a trailing *NUL* byte and trims the allocated 87 | memory to the string size. 88 | 89 | We used here the simple API, the extended one ends with ``_ex()``, and allows you to tell if you want a persistent or 90 | a request-bound allocation for the underlying ``zend_string``. Example:: 91 | 92 | smart_str my_str = {0}; 93 | 94 | smart_str_appends_ex(&my_str, "Hello world", 1); /* 1 means persistent allocation */ 95 | 96 | Then, depending on what you want to append, you'll use the right API call. If you append a classical C string, you can 97 | use ``smart_str_appends(smart_str *dst, const char *src)``. If you make use of a binary string, and thus know its 98 | length, then use ``smart_str_appendl(smart_str *dst, const char *src, size_t len)``. 99 | 100 | The less specific ``smart_str_append(smart_str *dest, const zend_string *src)`` simply appends a ``zend_string`` to 101 | your ``smart_str`` string. And if you come to play with others ``smart_str``, use 102 | ``smart_str_append_smart_str(smart_str *dst, const smart_str *src)`` to combine them together. 103 | 104 | smart_str specific tricks 105 | ************************* 106 | 107 | * Never forget to finish your string with a call to ``smart_str_0()``. That puts a *NUL* char at the end of the embed 108 | string and make it compatible with libc string functions. 109 | * Never forget to free your string, with ``smart_str_free()``, once you're done with it. 110 | * Use ``smart_str_extract()`` to get a standalone ``zend_string`` when you have 111 | finished building the string. This takes care of calling ``smart_str_0()``, 112 | and of optimizing allocations. In this case, calling ``smart_str_free()`` is 113 | not necessary. 114 | * You can share the standalone ``zend_string`` later elsewhere playing with its reference 115 | counter. Please, visit the :doc:`zend_string dedicated chapter ` to know more about it. 116 | * You can play with ``smart_str`` allocations. Look at ``smart_str_alloc()`` and friends. 117 | * ``smart_str`` is heavily used into PHP's heart. For example, PHP's 118 | :doc:`specific printf() functions ` internally use a ``smart_str`` buffer. 119 | * ``smart_str`` is definitely an easy structure you need to master. 120 | 121 | -------------------------------------------------------------------------------- /Book/php7/internal_types/zend_resources.rst: -------------------------------------------------------------------------------- 1 | The Resource type: zend_resource 2 | ================================ 3 | 4 | Even though PHP could really get rid of the "resource" type, because custom object storage allows to build a PHP 5 | representation of any abstract kind of data, that resource type still exists in the current version of PHP, and you may 6 | need to deal with it. 7 | 8 | If you need to create resources, we really would like to push you not to, but instead use objects and their 9 | custom storage management. Objects is the PHP type that can embed 10 | anything of any type. However, for historical reasons, PHP still knows about that special type "Resource", and still 11 | makes use of it in its heart or in some extensions. Let's see that type together. Beware however, it is really cryptic 12 | and suffers from a long past history, so don't be surprised about its design especially when reading the source code 13 | about it 14 | 15 | What is the "Resource" type? 16 | ---------------------------- 17 | 18 | Easy enough you know about it. We are talking about this here: 19 | 20 | .. code-block:: php 21 | 22 | $fp = fopen('/proc/cpuinfo', 'r'); 23 | var_dump($fp); /* resource(2) of type (stream) */ 24 | 25 | Internally, a resource is bound to the ``zend_resource`` structure type:: 26 | 27 | struct _zend_resource { 28 | zend_refcounted_h gc; 29 | int handle; 30 | int type; 31 | void *ptr; 32 | }; 33 | 34 | We find the traditional ``zend_refcounted_h`` header, meaning that resources are reference countable. 35 | 36 | The ``handle`` is an integer that is used internally by the engine to locate the resource into an internal resource 37 | table. It is used as the key for such a table. 38 | 39 | The ``type`` is used to regroup resources of the same type together. This is about the way resources get destroyed and 40 | how they are fetched back from their handle. 41 | 42 | Finally, the ``ptr`` field in ``zend_resource`` is your abstract data. Remember resources are about storing an abstract 43 | data that cannot fit in any data type PHP can represent natively (but objects could, like we said earlier). 44 | 45 | Resource types and resource destruction 46 | --------------------------------------- 47 | 48 | Resources must register a destructor. When users use resources in PHP userland, they usually don't bother cleaning 49 | those when they don't make use of them anymore. For example, it is not uncommon to see an ``fopen()`` call, and not see 50 | the matching ``fclose()`` call. Using the C language, this would be at best a bad idea, at most a disaster. But using a 51 | high level language like PHP, you ease things. 52 | 53 | You, as an internal developer, must be prepared to the fact that the user would create a lot of resources you'll allow 54 | him to use, without properly cleaning them and releasing memory/OS resource. You hence must register a destructor that 55 | will be called anytime the engine is about to destroy a resource of that type. 56 | 57 | Destructors are grouped by types, so are resources themselves. You won't apply the destructor for a resource of type 58 | 'database' than for a resource of type 'file'. 59 | 60 | There also exists two kinds of resources, here again differentiated about their lifetime. 61 | 62 | * Classical resources, the most used ones, do not persist across several requests, their destructor is called at 63 | request shutdown. 64 | * Persistent resources will persist across several requests and will only get destroyed when the PHP process dies. 65 | 66 | .. note:: You may be interested by :doc:`the PHP lifecycle <../extensions_design/php_lifecycle>` chapter that shows you 67 | the different steps occurring in PHP's process life. Also, the 68 | :doc:`Zend Memory Manager chapter <../memory_management/zend_memory_manager>` may help in understanding 69 | concepts of persistent and request-bound memory allocations. 70 | 71 | Playing with resources 72 | ---------------------- 73 | 74 | The resources related API can be found in 75 | `zend/zend_list.c `_. 76 | You may find some inconsistencies into it, like talking about "lists" for "resources". 77 | 78 | Creating resources 79 | ****************** 80 | 81 | To create a resource, one must first register a destructor for it and associate it to a resource type name using 82 | ``zend_register_list_destructors_ex()``. That call will return an integer that represents the type of resource you 83 | register. You must remember that integer because you will need it later-on to fetch back your resource from the user. 84 | 85 | After that, you can register a new resource using ``zend_register_resource()``. That one will return you a 86 | ``zend_resource``. Let's see together a simple use-case example:: 87 | 88 | #include 89 | 90 | int res_num; 91 | FILE *fp; 92 | zend_resource *my_res; 93 | zval my_val; 94 | 95 | static void my_res_dtor(zend_resource *rsrc) 96 | { 97 | fclose((FILE *)rsrc->ptr); 98 | } 99 | 100 | /* module_number should be your PHP extension number here */ 101 | res_num = zend_register_list_destructors_ex(my_res_dtor, NULL, "my_res", module_number); 102 | fp = fopen("/proc/cpuinfo", "r"); 103 | my_res = zend_register_resource((void *)fp, res_num); 104 | 105 | ZVAL_RES(&my_val, my_res); 106 | 107 | What we do in the code above, is that we open a file using libc's ``fopen()``, and store the returned pointer into a 108 | resource. Before that, we registered a destructor which when called will use libc's ``fclose()`` on the pointer. Then, 109 | we register the resource against the engine, and we pass the resource into a ``zval`` container that could get returned 110 | to userland. 111 | 112 | .. note:: Zvals chapter can be found :ref:`here `. 113 | 114 | What must be remembered is resource type. Here, we register a resource of type *"my_res"*. This is the type name. The 115 | engine does not really care about type name, but type identifier, the integer returned by 116 | ``zend_register_list_destructors_ex()``. You should remember it somewhere, like we do in the ``res_num`` variable. 117 | 118 | Fetching back resources 119 | *********************** 120 | 121 | Now that we registered a resource and put it in a ``zval`` for an example, we should learn how to fetch back that 122 | resource from the userland. Remember, the resource is stored into the ``zval``. Into the resource is stored the resource 123 | type number (on the ``type`` field). Thus, to be given back our resource from the user, we must extract the 124 | ``zend_resource`` from the ``zval``, and call ``zend_fetch_resource()`` to get back our ``FILE *`` pointer:: 125 | 126 | /* ... later on ... */ 127 | 128 | zval *user_zval = /* fetch zval from userland, assume type IS_RESOURCE */ 129 | 130 | ZEND_ASSERT(Z_TYPE_P(user_zval) == IS_RESOURCE); /* just a check to be sure */ 131 | 132 | fp = (FILE *)zend_fetch_resource(Z_RESVAL_P(user_zval), "my_res", res_num); 133 | 134 | Like we said : get back a zval from the user (of type ``IS_RESOURCE``), and fetch the resource pointer back from it by 135 | calling ``zend_fetch_resource()``. 136 | 137 | That function will check if the type of the resource is of the type you pass as third parameter (``res_num`` here). 138 | If yes, it extracts back the ``void *`` resource pointer you need and we are done. If not, then it throws a warning like 139 | *"supplied resource is not a valid {type name} resource"*. 140 | This could happen if for example you expect a resource of type "my_res", and you are given a zval with a resource of 141 | type "gzip", like one returned by ``gzopen()`` PHP function. 142 | 143 | Resource types are just a way for the engine to mix different kind of resources (of type "file", "gzip" or even "mysql 144 | connection") into the same resource table. Resource types have names, so that those can be used in error messages or in 145 | debug statement (like a ``var_dump($my_resource)``), and they also are represented as an integer used internally to 146 | fetch back the resource pointer from it, and to register a destructor with the resource type. 147 | 148 | .. note:: Like you can see, if we would have used objects, those represent types by themselves, and there wouldn't have 149 | to happen that step of fetching back a resource from its identifier verifying its type. Objects are 150 | self-describing types. But resources are still a valid data type for the current PHP version. 151 | 152 | Reference counting resources 153 | ---------------------------- 154 | 155 | Like many other types, ``zend_resource`` is reference counted. We can see its ``zend_refcounted_h`` header. Here is the 156 | API to play with reference counting, if you need it (you shouldn't really need it on an average): 157 | 158 | * ``zend_list_delete(zend_resource *res)`` decrements refcount and destroys resource if drops to zero 159 | * ``zend_list_free(zend_resource *res)`` checks if refcount is zero, and destroys the resource if true. 160 | * ``zend_list_close(zend_resource *res)`` calls the resource destructor whatever the conditions 161 | 162 | Persistent resources 163 | -------------------- 164 | 165 | Persistent resources don't get destroyed at the end of the request. The classical use-case for that are persistent 166 | database connections. Those are connections that are recycled from request to request (with all the bullshit that will 167 | bring). 168 | 169 | Traditionally, you should not be using persistent resources, as one request will be different from the other. Reusing 170 | the same resource should really be thoughtful before going this way. 171 | 172 | To register a persistent resource, use a persistent destructor instead of a classical one. This is done in the call 173 | to ``zend_register_list_destructors_ex()``, which API is like:: 174 | 175 | zend_register_list_destructors_ex(rsrc_dtor_func_t destructor, rsrc_dtor_func_t persistent_destructor, 176 | const char *type_name, int module_number); 177 | -------------------------------------------------------------------------------- /Book/php7/introduction.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | This book is a collaborative effort between several developers of the PHP language to better document and describe how 5 | PHP works internally. 6 | 7 | There are three primary goals of this book: 8 | 9 | * Document and describe how PHP internals work. 10 | * Document and describe how to extend the language with extensions. 11 | * Document and describe how you can interact with the community to develop PHP itself. 12 | 13 | This book is primarily targeted at developers who have experience in the C programming language. However, wherever 14 | possible we will attempt to distill the information and summarize it so that developers who don't know C well, will 15 | still be able to understand the content. 16 | 17 | However, let us insist. You won't be able to achieve something productive, stable (crash free under any platform), 18 | performant and useful, if you don't know the C language. Here are some pretty nice online resources about the C 19 | language itself, its ecosystem and build tools, and Operating System APIs: 20 | 21 | * http://www.tenouk.com/ 22 | * https://en.wikibooks.org/wiki/C_Programming 23 | * http://c-faq.com/ 24 | * https://www.gnu.org/software/libc/ 25 | * http://www.faqs.org/docs/Linux-HOWTO/Program-Library-HOWTO.html 26 | 27 | We also highly recommend you some books. You'll learn with them how to efficiently use the C language, and how to 28 | make it translate to efficient CPU instructions so that you can design strong/fast/reliable and secure programs. 29 | 30 | * The C Programming Language (Ritchie & Kernighan) 31 | * Advanced Topics in C Core Concepts in Data Structures 32 | * Learn C the Hard Way 33 | * The Art of Debugging with GDB DDD and Eclipse 34 | * The Linux Programming Interface 35 | * Advanced Linux Programming 36 | * Hackers Delight 37 | * Write Great Code (2 Volumes) 38 | 39 | .. note:: This book is Work-In-Progress and some chapters have not been written yet. We don't pay attention to a 40 | specific order, but add content as we feel. 41 | 42 | The repository for this book is available on GitHub_. Please report issues and provide feedback on the `issue tracker`_. 43 | 44 | .. _GitHub: https://github.com/phpinternalsbook/PHP-Internals-Book 45 | .. _issue tracker: https://github.com/phpinternalsbook/PHP-Internals-Book/issues 46 | -------------------------------------------------------------------------------- /Book/php7/memory_management.rst: -------------------------------------------------------------------------------- 1 | Memory management 2 | ================= 3 | 4 | C programmers usually have to deal with memory by hand. With dynamic memory, the programmer allocates memory when 5 | needing and frees it when finished. Failing to free dynamic memory leads to a "memory leak", which may or may not be a 6 | bad thing. In the case of PHP, as the process could live for a virtually infinite amount of time, creating a memory leak 7 | will be dramatic. In any situation, leaking memory really translates to poorly and badly designed programs that cannot 8 | be trusted. 9 | Memory leaking is easy to understand. You ask the OS to book some part of the main machine memory for you, but you never 10 | tell it to release it back for other processes usage : you are not alone on the machine, other processes need some 11 | memory, and the OS itself as well. 12 | 13 | Also, in C, memory areas are clearly bound. Reading or writing before or after the bounds is a very nasty operation. 14 | That will lead for sure to a crash, or worse an exploitable security issue. There are no magical things like 15 | auto-resizeable areas with the C language. You must clearly tell the machine (and the CPU) what you want it to do. No 16 | guess, no magic, no automation of any kind (like garbage collection). 17 | 18 | PHP's got a very specific memory model, and provides its own layer over the traditional libc's dynamic memory 19 | allocator. This layer is called **Zend Memory Manager**. 20 | 21 | This chapter explains you what Zend Memory Manager is, how you must use it, and what you must/must not do with it. 22 | After that, we'll quickly introduce you to specific tools used in the C area to debug dynamic memory. 23 | 24 | .. note:: If you need, please get some (possibly strong) knowledge about C memory allocation classes (static vs 25 | dynamic memory), and about libc's allocator. 26 | 27 | Contents: 28 | 29 | .. toctree:: 30 | :maxdepth: 2 31 | 32 | memory_management/zend_memory_manager.rst 33 | memory_management/memory_debugging.rst 34 | -------------------------------------------------------------------------------- /Book/php7/zend_engine.rst: -------------------------------------------------------------------------------- 1 | Zend engine 2 | =========== 3 | 4 | The Zend engine is a set of components that make PHP what it is. The most important Zend engine component is the 5 | *Zend Virtual Machine*, which is composed of the *Zend Compiler* and the *Zend Executor* components. We could also add 6 | the opcache zend extension in such category. Those three components are the heart of PHP (or the brain, you choose), 7 | they are critical and they are the most complex parts of all the PHP source code. In the current chapter, we'll try to 8 | open them and detail them. 9 | 10 | Contents: 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | zend_engine/zend_compiler.rst 16 | zend_engine/zend_executor.rst 17 | zend_engine/zend_opcache.rst 18 | 19 | -------------------------------------------------------------------------------- /Book/php7/zend_engine/zend_compiler.rst: -------------------------------------------------------------------------------- 1 | Zend Compiler 2 | ============= 3 | 4 | An example of how to add a syntactic feature to PHP (for updating lexing, adding to PHP's context free grammar, and adding a new opcode) is https://phpinternals.net/articles/implementing_a_range_operator_into_php. 5 | 6 | A good introduction to the internals of the Zend virtual machine is https://nikic.github.io/2017/04/14/PHP-7-Virtual-machine.html. This describes the output of the compiler, as well as the way the executor(virtual machine) works. 7 | -------------------------------------------------------------------------------- /Book/php7/zend_engine/zend_executor.rst: -------------------------------------------------------------------------------- 1 | Zend Executor 2 | ============= 3 | 4 | A good introduction to the internals of the zend executor (Virtual Machine) is https://nikic.github.io/2017/04/14/PHP-7-Virtual-machine.html 5 | -------------------------------------------------------------------------------- /Book/php7/zend_engine/zend_opcache.rst: -------------------------------------------------------------------------------- 1 | Zend opcache 2 | ============ 3 | -------------------------------------------------------------------------------- /Book/php7/zvals.rst: -------------------------------------------------------------------------------- 1 | .. _zvals: 2 | 3 | Zvals 4 | ===== 5 | 6 | In this chapter the ``zval`` data structure, which is used to represent arbitrary PHP values, is introduced. We 7 | explain the concepts behind zvals and how to use them in extension code. 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | zvals/basic_structure.rst 15 | zvals/memory_management.rst 16 | zvals/references.rst 17 | zvals/casts_and_operations.rst 18 | -------------------------------------------------------------------------------- /Book/php7/zvals/casts_and_operations.rst: -------------------------------------------------------------------------------- 1 | Casts and operations 2 | ==================== 3 | 4 | Casts 5 | ----- 6 | 7 | In many situations, you expect to receive a zval of a specific type. In this case, you could strictly check for the 8 | desired type:: 9 | 10 | if (Z_TYPE_P(val) != IS_STRING) { 11 | zend_type_error("Expected string"); 12 | return; 13 | } 14 | 15 | Alternatively, you can perform a cast to the desired type. There are two ways in which casts can be performed: The first 16 | is to actually change the type of the zval using one of the ``convert_to_*`` functions:: 17 | 18 | convert_to_string(val); 19 | // Z_TYPE_P(val) == IS_STRING is guaranteed here. 20 | 21 | Similar functions exist for all the other types that have a meaningful type cast:: 22 | 23 | void convert_to_null(zval *op); 24 | void convert_to_boolean(zval *op); 25 | void convert_to_long(zval *op); 26 | void convert_to_double(zval *op); 27 | void convert_to_string(zval *op); 28 | void convert_to_array(zval *op); 29 | void convert_to_object(zval *op); 30 | 31 | In addition, the ``convert_scalar_to_number()`` function can be used to convert the zval into either an integer or a 32 | float, with the caveat that arrays stay as arrays:: 33 | 34 | convert_scalar_to_number(val); 35 | switch (Z_TYPE_P(val)) { 36 | case IS_LONG: 37 | php_printf("Long: " ZEND_LONG_FMT "\n", Z_LVAL_P(val)); 38 | break; 39 | case IS_DOUBLE: 40 | php_printf("Long: %H\n", Z_DVAL_P(val)); 41 | break; 42 | case IS_ARRAY: 43 | php_printf("Array\n"); 44 | break; 45 | ZEND_EMPTY_SWITCH_DEFAULT_CASE() 46 | } 47 | 48 | Because ``convert_to_*`` modifies zvals in-place, care is needed to maintain copy-on-write semantics. A common mistake 49 | is to write code like the following:: 50 | 51 | zval *val; 52 | ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(array), val) { 53 | convert_to_string(val); 54 | // Use val as string. 55 | } 56 | 57 | Here, we want to iterate over an array and treat all elements as strings. However, as ``convert_to_string()`` operates 58 | in-place, this means that the array actually gets modified. As such, this code is only legal if you own the array 59 | uniquely. Otherwise, it would be necessary to perform a separation first:: 60 | 61 | zval *val; 62 | SEPARATE_ARRAY(array); 63 | ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(array), val) { 64 | convert_to_string(val); 65 | // Use val as string. 66 | } 67 | 68 | The second set of cast APIs avoids this issue by returning the converted value instead of changing the type of the zval 69 | itself. In the cases where it can be used, this is usually more convenient, more efficient and safer (with regard to 70 | copy-on-write). When converting to booleans, integers and floats, we simply receive a ``bool``, ``zend_long``, or 71 | ``double`` result and are done:: 72 | 73 | bool b = zend_is_true(val); 74 | zend_long l = zval_get_long(val); 75 | double d = zval_get_double(val); 76 | 77 | For strings, we receive a ``zend_string *`` result, which we must release afterwards. If the value is already a string, 78 | this will simply increment the refcount. If it's not a string, it will either return an existing interned string, or 79 | allocate a new one:: 80 | 81 | zend_string *str = zval_get_string(val); 82 | // Do something with str. 83 | zend_string_release(str); 84 | 85 | For this kind of temporary usage, where we don't retain a long-term reference to ``str``, an additional optimized API 86 | exists:: 87 | 88 | zend_string *tmp_str; 89 | zend_string *str = zval_get_tmp_string(val, &tmp_str); 90 | // Do something with str. 91 | zend_tmp_string_release(tmp_str); 92 | 93 | This API works the same way as ``zval_get_string()``, but avoids a refcount increment and decrement for the common 94 | case where the value is already a string. 95 | 96 | When it comes to conversions to strings in particular, there is one additional issue to consider: ``__toString()`` 97 | methods can throw (actually, conversions to int and float can throw as well, but this issue is usually ignored). This 98 | can be handled by checking ``EG(exception)`` after a string conversion:: 99 | 100 | zend_string *str = zval_get_string(val); 101 | if (EG(exception)) { 102 | // zend_string_release(str) is safe, but not necessary here. 103 | return; 104 | } 105 | zend_string_release(str); 106 | 107 | However, the more idiomatic and efficient way to handle this situation, is to use ``try`` variants of these functions 108 | instead, which will indicate whether an exception has been thrown in their return value:: 109 | 110 | if (!try_convert_to_string(val)) { 111 | // Exception thrown. 112 | return; 113 | } 114 | 115 | zend_string *str = zval_try_get_string(val); 116 | if (!str) { 117 | // Exception thrown. 118 | return; 119 | } 120 | zend_string_release(str); 121 | 122 | zend_string *tmp_str; 123 | zend_string *str = zend_try_get_tmp_string(val, &tmp_str); 124 | if (!str) { 125 | // Exception thrown. 126 | return; 127 | } 128 | zend_tmp_string_release(tmp_str); 129 | 130 | Operations 131 | ---------- 132 | 133 | Userland operations like ``$op1 + $op2`` are implemented through corresponding functions like ``add_function()`` 134 | internally, which accept a result out-parameter, followed by the input operands:: 135 | 136 | zval *op1 = /* ... */, *op2 = /* ... */; 137 | zval result; 138 | if (add_function(&result, op1, op2) == FAILURE) { 139 | // Exception thrown. 140 | return; 141 | } 142 | // Do something with result. 143 | zval_ptr_dtor(&result); 144 | 145 | It should be noted that these functions are rather rarely used in practice, as most code works with zvals of specific 146 | types, rather than operating on completely arbitrary values. The full set of functions is:: 147 | 148 | zend_result add_function(zval *result, zval *op1, zval *op2); /* $result = $op1 + $op2 */ 149 | zend_result sub_function(zval *result, zval *op1, zval *op2); /* $result = $op1 - $op2 */ 150 | zend_result mul_function(zval *result, zval *op1, zval *op2); /* $result = $op1 * $op2 */ 151 | zend_result pow_function(zval *result, zval *op1, zval *op2); /* $result = $op1 ** $op2 */ 152 | zend_result div_function(zval *result, zval *op1, zval *op2); /* $result = $op1 / $op2 */ 153 | zend_result mod_function(zval *result, zval *op1, zval *op2); /* $result = $op1 % $op2 */ 154 | zend_result bitwise_or_function(zval *result, zval *op1, zval *op2); /* $result = $op1 | $op2 */ 155 | zend_result bitwise_and_function(zval *result, zval *op1, zval *op2); /* $result = $op1 & $op2 */ 156 | zend_result bitwise_xor_function(zval *result, zval *op1, zval *op2); /* $result = $op1 ^ $op2 */ 157 | zend_result boolean_xor_function(zval *result, zval *op1, zval *op2); /* $result = $op1 xor $op2 */ 158 | zend_result shift_left_function(zval *result, zval *op1, zval *op2); /* $result = $op1 << $op2 */ 159 | zend_result shift_right_function(zval *result, zval *op1, zval *op2); /* $result = $op1 >> $op2 */ 160 | zend_result concat_function(zval *result, zval *op1, zval *op2); /* $result = $op1 . $op2 */ 161 | 162 | zend_result bitwise_not_function(zval *result, zval *op1); /* $result = ~$op1 */ 163 | zend_result boolean_not_function(zval *result, zval *op1); /* $result = !$op1 */ 164 | 165 | zend_result increment_function(zval *op); /* ++$op */ 166 | zend_result decrement_function(zval *op); /* --$op */ 167 | 168 | zend_result compare_function(zval *result, zval *op1, zval *op2); /* $result = $op1 <=> $op2 */ 169 | zend_result is_equal_function(zval *result, zval *op1, zval *op2); /* $result = $op1 == $op2 */ 170 | zend_result is_not_equal_function(zval *result, zval *op1, zval *op2); /* $result = $op1 != $op2 */ 171 | zend_result is_identical_function(zval *result, zval *op1, zval *op2); /* $result = $op1 === $op2 */ 172 | zend_result is_not_identical_function(zval *result, zval *op1, zval *op2); /* $result = $op1 !== $op2 */ 173 | zend_result is_smaller_function(zval *result, zval *op1, zval *op2); /* $result = $op1 < $op2 */ 174 | zend_result is_smaller_or_equal_function(zval *result, zval *op1, zval *op2); /* $result = $op1 <= $op2 */ 175 | /* $op1 > $op2 is same as $op2 < $op1 */ 176 | /* $op1 >= $op2 is same as $op2 <= $op1 */ 177 | 178 | For comparisons, there are two more variants that return the comparison result, instead of placing it in a zval:: 179 | 180 | bool zend_is_identical(zval *op1, zval *op2); 181 | int zend_compare(zval *op1, zval *op2); 182 | 183 | ``zend_compare()`` returns a 3-way comparison result like the ``<=>`` operator in PHP, which is less than, equal to, 184 | or greater than zero depending on whether ``op1`` is smaller, equal to, or greater than ``op2``. 185 | 186 | Finally, there are a number of variants that have a ``fast_`` prefix. These are optimized implementations that 187 | restrict the arguments to certain types, or inline part of the implementation and/or implement it using inline 188 | assembly:: 189 | 190 | /* op1 must have type IS_LONG, implementation uses inline assembly. */ 191 | static zend_always_inline void fast_long_increment_function(zval *op1); 192 | static zend_always_inline void fast_long_decrement_function(zval *op1); 193 | /* op1 and op2 must have type IS_LONG, implementation uses inline assembly. */ 194 | static zend_always_inline void fast_long_add_function(zval *result, zval *op1, zval *op2); 195 | static zend_always_inline void fast_long_sub_function(zval *result, zval *op1, zval *op2); 196 | /* op1, op2 may have any type, but IS_LONG and IS_DOUBLE addition is inlined. */ 197 | static zend_always_inline zend_result fast_add_function(zval *result, zval *op1, zval *op2); 198 | /* op1, op2 may have any type, but IS_LONG, IS_DOUBLE and IS_STRING equality is inlined. */ 199 | static zend_always_inline bool fast_equal_check_function(zval *op1, zval *op2); 200 | /* op1 must have type IS_LONG, op2 can have any type. */ 201 | static zend_always_inline bool fast_equal_check_long(zval *op1, zval *op2); 202 | /* op1 must have type IS_DOUBLE, op2 can have any type. */ 203 | static zend_always_inline bool fast_equal_check_string(zval *op1, zval *op2); 204 | /* op1, op2 may have any type, but part of the implementation is inlined. */ 205 | static zend_always_inline bool fast_is_identical_function(zval *op1, zval *op2); 206 | static zend_always_inline bool fast_is_not_identical_function(zval *op1, zval *op2); 207 | -------------------------------------------------------------------------------- /Book/php7/zvals/references.rst: -------------------------------------------------------------------------------- 1 | References 2 | ========== 3 | 4 | PHP references (in the sense of the ``&`` symbol) are mostly transparent to userland code, but require consistent 5 | special handling in the implementation. This chapter discusses how references are represented, and how internal code 6 | should deal with them. 7 | 8 | Reference semantics 9 | ------------------- 10 | 11 | Before going into the internal representation of PHP references, it may be helpful to clarify some common 12 | misconceptions about the semantics of references in PHP. Consider this basic example: 13 | 14 | .. code-block:: php 15 | 16 | $a = 0; 17 | $b =& $a; 18 | $a++; 19 | $b++; 20 | var_dump($a); // int(2) 21 | var_dump($b); // int(2) 22 | 23 | People will commonly say that "``$b`` is a reference to ``$a``". However, this is not quite correct, in that 24 | references in PHP have no concept of directionality. After ``$b =& $a``, both ``$a`` and ``$b`` reference a common 25 | value, and neither of the variables is privileged in any way. 26 | 27 | This becomes particularly problematic when we consider the interaction of references and array copies: 28 | 29 | .. code-block:: php 30 | 31 | $array = [0]; 32 | $ref =& $array[0]; 33 | $array2 = $array; 34 | $array2[] = 42; // Triggering copy-on-write makes no difference here. 35 | $ref++; 36 | var_dump($array[0]); // int(1) 37 | var_dump($array2[0]); // int(1) 38 | 39 | The ``$ref =& $array[0]`` line creates a reference between ``$ref`` and ``$array[0]``. When the array is subsequently 40 | copied, it becomes a reference between ``$ref``, ``$array[0]`` and ``$array2[0]``, as the reference is also copied. 41 | 42 | Intuitively this behavior is wrong. There's two reasons why it happens: The first one is the aforementioned lack 43 | of directionality. This behavior *would* make sense if we had written ``$array[0] =& $ref``. In this case it would be 44 | expected that a copy of ``$array2[0]`` also points to ``$ref``. However, we cannot actually distinguish these two 45 | cases. 46 | 47 | The second and more important reason is a more technical one: ``$array2 = $array`` only performs a refcount increment, 48 | which means we wouldn't have a chance to drop the reference even if we wanted to. 49 | 50 | Representation 51 | -------------- 52 | 53 | References are represented using an ``IS_REFERENCE`` zval that points to a ``zend_reference`` structure:: 54 | 55 | struct _zend_reference { 56 | zend_refcounted_h gc; 57 | zval val; 58 | zend_property_info_source_list sources; 59 | }; 60 | 61 | Zvals themselves do not have a reference count, and cannot be shared. The ``zend_reference`` structure essentially 62 | represents a reference-counted zval that *can* be shared. Multiple zvals can point to the same ``zend_reference``, 63 | and any change to the ``val`` it contains will be observable from all sources. 64 | 65 | Type sources 66 | ~~~~~~~~~~~~ 67 | 68 | Normally, PHP does not track who or what makes use of a given reference. The only knowledge that is stored is how many 69 | users there are (through the refcount), so that the reference may be destroyed in time. 70 | 71 | However, due to the introduction of typed properties in PHP 7.4, we do need to track of which typed properties make 72 | use of a certain reference, in order to enforce property types for indirect modifications through references: 73 | 74 | .. code-block:: php 75 | 76 | class Test { 77 | public int $prop = 42; 78 | } 79 | $test = new Test; 80 | $ref =& $test->prop; 81 | $ref = "string"; // TypeError 82 | 83 | The ``sources`` member of ``zend_reference`` stores a list of ``zend_property_info`` pointers to track typed properties 84 | that use the reference. Macros like ``ZEND_REF_HAS_TYPE_SOURCES()``, ``ZEND_REF_ADD_TYPE_SOURCE()``, and 85 | ``ZEND_REF_DEL_TYPE_SOURCE()`` are used to manage this source list, but typically only engine code needs to deal with 86 | this. 87 | 88 | Initializing references 89 | ----------------------- 90 | 91 | Just like other zvals, references are initialized through a set of macros. The most basic one accepts an already 92 | created ``zend_reference`` pointer:: 93 | 94 | zval ref; 95 | ZVAL_REF(ref, zend_reference_ptr); 96 | 97 | To create a reference from scratch, ``ZVAL_NEW_REF()`` can be used:: 98 | 99 | zval ref; 100 | zval initial_val; 101 | ZVAL_STRING(initial_val, "test"); 102 | ZVAL_NEW_REF(&ref, &initial_val); 103 | 104 | This macro accepts an initial value for the reference. Note that it is *moved* into the reference using 105 | ``ZVAL_COPY_VALUE``, the refcount is not incremented. Alternatively, ``ZVAL_NEW_EMPTY_REF()`` leaves the value 106 | uninitialized:: 107 | 108 | zval ref; 109 | ZVAL_NEW_EMPTY_REF(&ref); 110 | ZVAL_STRING(Z_REFVAL(ref), "test"); 111 | 112 | Here we create an empty reference and then initialize the reference value ``Z_REFVAL(ref)`` directly. Finally, 113 | ``ZVAL_MAKE_REF()`` can be used to promote an existing zval into a reference:: 114 | 115 | zval *zv = /* ... */; 116 | ZVAL_MAKE_REF(zv); 117 | 118 | If ``zv`` was already a reference, this does nothing. It if wasn't a reference yet, this will change ``zv`` into a 119 | reference and set its initial value to the old value of ``zv``. 120 | 121 | Dereferencing and unwrapping 122 | ---------------------------- 123 | 124 | Most code does not want to handle references in any special way, and simply want to look through to the underlying 125 | value:: 126 | 127 | zval *zv = /* ... */; 128 | if (Z_ISREF_P(zv)) { 129 | zv = Z_REFVAL_P(zv); 130 | } 131 | 132 | If the value is a reference (``Z_ISREF``), we switch to looking at the value it contains. This operation is called 133 | "dereferencing" and is more compactly written as ``ZVAL_DEREF(zv)``. It is extremely common and should be applied 134 | essentially at any point where reference zvals might occur. For example, this is how a typical loop over an array 135 | might look like:: 136 | 137 | zval *val; 138 | ZEND_HASH_FOREACH_VAL(ht, val) { 139 | ZVAL_DEREF(val); 140 | 141 | /* Do something with val, now a guaranteed non-reference. */ 142 | } ZEND_HASH_FOREACH_END(); 143 | 144 | The ``ZVAL_COPY_DEREF(target, source)`` macro is a combined form of ``ZVAL_COPY`` and ``ZVAL_DEREF``. It copies the 145 | dereferenced value of ``source`` into ``target``. 146 | 147 | Dereferencing simply moves a pointer from the outer to the inner zval, without changing either. It is also possible 148 | to actually remove the reference wrapper by performing an unwrap. It is probably easiest to understand this operation 149 | by looking at its implementation:: 150 | 151 | static zend_always_inline void zend_unwrap_reference(zval *op) { 152 | if (Z_REFCOUNT_P(op) == 1) { 153 | ZVAL_UNREF(op); 154 | } else { 155 | Z_DELREF_P(op); 156 | ZVAL_COPY(op, Z_REFVAL_P(op)); 157 | } 158 | } 159 | 160 | If the refcount is 1, then the inner value is moved into ``op`` and the reference wrapper is destroyed. This is what 161 | ``ZVAL_UNREF()`` does. If the refcount is greater than one, then we decrement the refcount of the reference wrapper, 162 | and copy (with refcount increase) the inner value into ``op``. This means that an unwrap operation does not necessarily 163 | destroy the reference (if it has other users), but will remove one particular use. 164 | 165 | Indirect zvals 166 | -------------- 167 | 168 | Next to references, PHP also has a more direct mechanism to share zvals. The ``IS_INDIRECT`` type stores a direct 169 | pointer to another zval:: 170 | 171 | zval val1; 172 | ZVAL_LONG(&val1, 42); 173 | 174 | zval val2; 175 | ZVAL_INDIRECT(&val2, &val1); 176 | 177 | ZEND_ASSERT(Z_INDIRECT(val2) == &val1); 178 | 179 | While there is some surface similarity to references, this mechanism is not generally usable, because nothing ensures 180 | that the pointed-to zval isn't deallocated. For this reason, indirect zvals can only be used in controlled situations, 181 | for example to point from a property hash table to a property slot table. This is possible, because we know that the 182 | property slot table is not reallocated during the lifetime of an object, and the property hash table and property slot 183 | table are deallocated at the same time, so no dangling pointers are left behind. 184 | 185 | As such, indirect zvals can only occur in specific situations, and cannot be stored in general-purpose userland-exposed 186 | zvals. 187 | -------------------------------------------------------------------------------- /Book/tests/echo_basic.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | echo - basic test for echo language construct 3 | --FILE-- 4 | 7 | --EXPECT-- 8 | This works and takes args! -------------------------------------------------------------------------------- /Book/tests/examining_failed_test_output.rst: -------------------------------------------------------------------------------- 1 | .. _examining_failed_test_output: 2 | 3 | Examining failed test output 4 | ============================ 5 | 6 | We've learned how to create and run tests and have had great success with passing tests, but what happens when things go 7 | wrong? We'll examine how to help ourselves debug failed tests using the failed-test output files. 8 | 9 | Failed test output files 10 | ------------------------ 11 | 12 | If a test would fail, the test results will be dumped in the directory where the test file resides. This happens for 13 | each failed test. For example for a failed test ``001.phpt`` you will get this output: 14 | 15 | * ``001.log`` The output of this one test 16 | * ``001.exp`` The expected test result 17 | * ``001.out`` The actual test result 18 | * ``001.diff`` The difference between the expected and actual result 19 | * ``001.php`` A PHP snippet that contains the code of the failed test 20 | * ``001.sh`` A script to run the PHP snippet of the failed test 21 | 22 | The ``.sh`` bash script is a wrapper around the failed php script that makes it easy to re-run the failed script with 23 | the same runtime conditions as the test run. With all this information at hand, you can start debugging your code. 24 | 25 | Examine failed test output inline 26 | --------------------------------- 27 | 28 | Having all these files on your filesystem is not always convenient. Sometimes you might just want to see the failed 29 | test output on your screen, inline with the failed test. The ``run-tests.php`` script accepts flags to enable such 30 | behavior ``--show-[all|php|exp|diff|out]``. For example if you want to see the diffs inline: 31 | 32 | .. code-block:: bash 33 | 34 | ./run-tests.php --show-diff 35 | ... 36 | TEST 613/14433 [tests/lang/operators/nan-comparison-false.phpt] 37 | ========DIFF======== 38 | 002+ bool(true) 39 | 003+ bool(true) 40 | 004+ bool(true) 41 | 005+ bool(true) 42 | 002- bool(false) 43 | 003- bool(false) 44 | 004- bool(false) 45 | 005- bool(false) 46 | ========DONE======== 47 | ... 48 | 49 | In case of diff they are only printed if there is an actual difference. There are also flags to print the skip 50 | criteria or the clean scripts ``--show-[skip|clean]``. For example: 51 | 52 | .. code-block:: bash 53 | 54 | ./run-tests.php --show-clean 55 | ... 56 | PASS Test for buffering in core functions with implicit flush on [tests/func/009.phpt] 57 | TEST 399/14433 [tests/func/010.phpt] 58 | ========CLEAN======== 59 | 62 | ========DONE======== 63 | PASS function with many parameters [tests/func/010.phpt] 64 | PASS Test bitwise AND, OR, XOR, NOT and logical NOT in INI via error_reporting [tests/func/011.phpt] 65 | ... 66 | 67 | There even is a flag to show slow tests ``--show-slow`` that accepts a number of milliseconds. At the end of the test 68 | run, the tests that ran slower are reported. Say you want to inspect tests in ``ext/standard/tests/file`` that run 69 | more than 10 seconds: 70 | 71 | .. code-block:: bash 72 | 73 | ./run-tests.php --show-slow 10000 ext/standard/tests/file 74 | ===================================================================== 75 | SLOW TEST SUMMARY 76 | --------------------------------------------------------------------- 77 | (28.043 s) Test fileatime(), filemtime(), filectime() & touch() functions : usage variation [ext/standard/tests/file/005_variation.phpt] 78 | ===================================================================== 79 | 80 | Unfortunately the make script does not have environment variables to activate these flags. But you can use them anyway 81 | by abusing the ``TESTS`` variable instead: 82 | 83 | .. code-block:: bash 84 | 85 | make test TESTS=--show-all 86 | -------------------------------------------------------------------------------- /Book/tests/introduction.rst: -------------------------------------------------------------------------------- 1 | .. _tests_introduction: 2 | 3 | Testing with ``.phpt`` files 4 | ============================ 5 | 6 | Tests are a great way to get your foot in the door of PHP internals. You don't even need to know C in order to get 7 | started because all the tests are written in PHP. 8 | 9 | In this chapter we'll explore the test suite that covers the internal functionality of PHP. We'll cover everything from 10 | running the test suite with PHP's custom black-box testing tool called ``run-tests.php`` to creating new tests. 11 | 12 | @TODO 13 | ----- 14 | 15 | The following sections still need to be written: 16 | 17 | * Finding untested code (finding_untested_code) 18 | * Environment variables (advanced/environment_variables) 19 | * Redirect Tests (advanced/redirect_tests) 20 | * Step debugging with GDB (advanced/step_debugging_with_gdb) -------------------------------------------------------------------------------- /Book/tests/overview.rst: -------------------------------------------------------------------------------- 1 | .. _overview: 2 | 3 | Testing overview 4 | ================ 5 | 6 | PHP has an extensive test suite with over 15,000 individual test files. The test files are run with PHP's black-box 7 | testing tool called `run-tests.php`_ which can be found in the root directory of the php source code. 8 | 9 | "But wait!" you say, "I heard that PHP source doesn't have any unit tests." You are correct. The PHP source code has 10 | zero unit tests. But it does have `functional tests`_ and lucky for us, these particular functional tests are written in 11 | PHP. The test files have a ``.phpt`` file extension and can be run just like any normal PHP file. 12 | 13 | The official documentation for writing phpt tests lives at `qa.php.net`_. 14 | 15 | .. _run-tests.php: https://github.com/php/php-src/blob/master/run-tests.php 16 | .. _`functional tests`: https://en.wikipedia.org/wiki/Functional_testing 17 | .. _`qa.php.net`: http://qa.php.net/write-test.php 18 | 19 | Black-box testing 20 | ----------------- 21 | 22 | In a nutshell, `black-box testing`_ sends input to some function and examines the output after the function has finished 23 | execution. If the output matches what we were expecting, then the test has passed. Black-box testing doesn't care *how* 24 | something is done, it only cares about the end result. This is exactly how ``run-tests.php`` works; it takes a set of 25 | inputs, runs some PHP code and then examines the output. If the output matches what is expected, then the test passes. 26 | 27 | .. _black-box testing: https://en.wikipedia.org/wiki/Black-box_testing 28 | 29 | Where the test files live 30 | ------------------------- 31 | 32 | The test files live in several different places throughout the codebase in folders named ``tests``. Each test folder 33 | contains ``.phpt`` files pertaining to its containing folder's code. 34 | 35 | * ``ext/{extension-name}/tests/`` Extension tests 36 | * ``sapi/{sapi-name}/tests/`` SAPI tests 37 | * ``Zend/tests/`` Zend engine tests 38 | * ``tests/`` More Zend engine tests 39 | -------------------------------------------------------------------------------- /Book/tests/phpt_file_structure.rst: -------------------------------------------------------------------------------- 1 | .. _phpt_file_structure: 2 | 3 | The ``.phpt`` file structure 4 | ============================ 5 | 6 | Now that we know how to run the tests with run-tests, let's dive into a phpt file in more detail. A phpt file is just a 7 | normal PHP file but it contains a number of different sections which run-tests supports. 8 | 9 | A basic test example 10 | -------------------- 11 | 12 | Here's a basic example of a PHP source test that tests the ``echo`` construct. 13 | 14 | .. literalinclude:: echo_basic.phpt 15 | :language: php 16 | 17 | Did you know that `echo can take a list of arguments`_? Well you do now. 18 | 19 | There are `many more sections`_ that are available to us in a phpt file, but these three are the bare-minimum required. 20 | The ``--EXPECT--`` section has a few variations but we'll get into describing the sections in just a bit. 21 | 22 | Notice that of the three sections, we have everything we need to run a black-box test. We have a name for the test, a 23 | bit of code and the expected output. Again, black-box testing doesn't care *how* the code runs, it only concerns itself 24 | with the end result. 25 | 26 | .. _echo can take a list of arguments: http://php.net/manual/en/function.echo.php 27 | .. _many more sections: http://qa.php.net/phpt_details.php 28 | 29 | Some notable sections 30 | --------------------- 31 | 32 | Now that we've seen the three required sections for every ``.phpt`` file let's take a look a few other common sections 33 | we'll no doubt encounter. 34 | 35 | ``--TEST--`` : The name of the test 36 | The `--TEST-- section`_ just describes the test (for humans) in one line. This will be displayed in the console when 37 | the test is run, so it's good to be descriptive but not overly verbose. If your test needs a longer description, add 38 | a `--DESCRIPTION-- section`_. 39 | 40 | .. code-block:: php 41 | 42 | --TEST-- 43 | json_decode() with large integers 44 | 45 | .. note:: The ``--TEST--`` section must be the very first line of the phpt file. Otherwise run-tests will not 46 | consider it to be a valid test file and mark the test as "borked". 47 | 48 | ``--FILE--`` : The PHP code to run 49 | The `--FILE-- section`_ is the PHP code that we want to test. In our above example we're making sure the ``echo`` 50 | construct can take a list of arguments and concatenate them into standard out. 51 | 52 | .. code-block:: php 53 | 54 | --FILE-- 55 | largenum); 59 | $x = json_decode($json, false, 512, JSON_BIGINT_AS_STRING); 60 | var_dump($x->largenum); 61 | echo "Done\n"; 62 | ?> 63 | 64 | .. note:: Although it is considered a best-practice to leave off the closing PHP tag (``?>``) in userland, this is 65 | not the case with a phpt file. If you leave off the closing PHP tag, run-tests will have no trouble 66 | running your test, but your test will no longer be able to run as a normal PHP file. It will also make 67 | your IDE go bonkers. So always remember to include the closing PHP tag in every ``--FILE--`` section. 68 | 69 | ``--EXPECT--`` : The expected output 70 | The `--EXPECT-- section`_ contains exactly what we would expect to see from standard output. If you're expecting 71 | fancy assertions like you get in `PHPUnit`_, you won't get any here. Remember, these are *`functional tests`_* so we 72 | just examine the output after providing inputs. 73 | 74 | .. code-block:: php 75 | 76 | --EXPECT-- 77 | float(1.2345678901235E+29) 78 | string(30) "123456789012345678901234567890" 79 | Done 80 | 81 | .. note:: Trailing new lines are trimmed off by run-tests for both the expected and actual output so you don't have 82 | to worry about adding or removing trailing new lines at the end of the ``--EXPECT--`` section. 83 | 84 | ``--EXPECTF--`` : The expected output with substitution 85 | Because the tests need to run on a multitude of environments, we often times may not know what the actual output 86 | of a script will be. Or perhaps the functionality that your testing is nondeterministic. For this use case we have 87 | the `--EXPECTF-- section`_ which allows us to substitute sections of output with substitution characters much 88 | like the `sprintf() function`_ in PHP. 89 | 90 | .. code-block:: php 91 | 92 | --EXPECTF-- 93 | string(%d) "%s" 94 | Done 95 | 96 | This is particularly handy when creating error-case tests that output the absolute path to the PHP file; something 97 | that would vary from environment to environment. 98 | 99 | Below is an abbreviated error-case example taken from `a real test`_ of the `password hashing functions`_ which 100 | makes use of the ``--EXPECTF--`` section. 101 | 102 | .. code-block:: php 103 | 104 | --TEST-- 105 | Test error operation of password_hash() with bcrypt hashing 106 | --FILE-- 107 | 3))); 109 | ?> 110 | --EXPECTF-- 111 | Warning: password_hash(): Invalid bcrypt cost parameter specified: 3 in %s on line %d 112 | NULL 113 | 114 | ``--SKIPIF--`` : Conditions that a test should be skipped 115 | Since PHP can be configured with myriad options, the build of PHP that you're running might not be compiled with the 116 | required dependencies that are needed to run a test. The case where this is most common is the extension tests. 117 | 118 | If a test needs an extension installed in order to run the test will have a `--SKIPIF-- section`_ which checks that 119 | the extension is indeed installed. 120 | 121 | .. code-block:: php 122 | 123 | --SKIPIF-- 124 | 125 | 126 | Any tests that meet the ``--SKIPIF--`` condition will be marked as "skipped" by run-tests and continue on to the 127 | next test in the queue. Any text after the word "skip" will be returned in the output when you run the test from 128 | run-tests as the reason why the test was skipped. 129 | 130 | Many of the tests will halt the script execution with `die()`_ or `exit()`_ if the ``--SKIPIF--`` condition is met 131 | as in the example above. It is important to understand that just because you ``die()`` in a ``--SKIPIF--`` section, 132 | that does not mean run-tests will skip your test. Run-tests simply examines the output of ``--SKIPIF--`` and looks 133 | for the word "skip" as the first four characters. If the first word is not "skip", the test will not be skipped. 134 | 135 | In fact, you don't have to halt execution at all as long as "skip" is the first word of the output. 136 | 137 | The following example will skip a test. Note how we didn't halt the script execution. 138 | 139 | .. code-block:: php 140 | 141 | --SKIPIF-- 142 | 143 | 144 | By contrast, examine the following example. Notice how it halts script execution but since the word "skip" isn't the 145 | the first word in the output, run-tests will still happily run the test without skipping it. 146 | 147 | .. code-block:: php 148 | 149 | --SKIPIF-- 150 | 151 | 152 | .. note:: Although it is not required to halt script execution in the ``--SKIPIF--`` section, it is always highly 153 | recommended so that you can still run the phpt file as a normal php file and see a nice message like "skip 154 | ext/json must be installed" instead of getting a ton of random errors. 155 | 156 | ``--INI--`` 157 | Sometimes tests rely on having very specific INI settings set. In this case you can define any INI settings with the 158 | `--INI-- section`_. Each INI setting is placed on a new line within the section. 159 | 160 | .. code-block:: php 161 | 162 | --INI-- 163 | date.timezone=America/Chicago 164 | 165 | Run-tests does all the magic involved with setting the INI configuration for you. 166 | 167 | .. _--TEST-- section: http://qa.php.net/phpt_details.php#test_section 168 | .. _--DESCRIPTION-- section: http://qa.php.net/phpt_details.php#description_section 169 | .. _--FILE-- section: http://qa.php.net/phpt_details.php#file_section 170 | .. _--EXPECT-- section: http://qa.php.net/phpt_details.php#expect_section 171 | .. _PHPUnit: https://phpunit.de/ 172 | .. _functional tests: https://en.wikipedia.org/wiki/Functional_testing 173 | .. _--EXPECTF-- section: http://qa.php.net/phpt_details.php#expectf_section 174 | .. _sprintf() function: http://php.net/sprintf 175 | .. _a real test: https://github.com/php/php-src/blob/master/ext/standard/tests/password/password_bcrypt_errors.phpt 176 | .. _password hashing functions: http://php.net/password 177 | .. _--SKIPIF-- section: http://qa.php.net/phpt_details.php#skipif_section 178 | .. _die(): http://php.net/die 179 | .. _exit(): http://php.net/exit 180 | .. _--INI-- section: http://qa.php.net/phpt_details.php#ini_section 181 | 182 | Writing a simple test 183 | --------------------- 184 | 185 | Let's write our first test just to get familiar with the process. 186 | 187 | Typically tests are stored in a ``tests/`` directory that lives near the code we want to test. For example, the `PDO 188 | extension`_ is found at ``ext/pdo`` in the PHP source code. If you open that directory, you'll see a `tests/ directory`_ 189 | with lots of ``.phpt`` files in it. All the other extensions are set up the same way. There are also tests for the Zend 190 | engine which are located in `Zend/tests/`_. 191 | 192 | For this example, we'll just temporarily create a test in the root ``php-src`` directory. Create and open a new file 193 | with your favorite editor. 194 | 195 | .. code-block:: bash 196 | 197 | $ vi echo_basic.phpt 198 | 199 | .. note:: If you've never used vim before, you'll probably be stuck after running the command above. Just press 200 | ```` a bunch of times and then type ``:q!`` and it should poop you back out to the terminal. You can just 201 | use your favorite editor for this part instead of vim. And then when you get an extra second later on, `learn 202 | vim`_. 203 | 204 | Now copy and paste the example test from above into the new test file. Here's the test file again to save you some 205 | scrolling around. 206 | 207 | .. literalinclude:: echo_basic.phpt 208 | :language: php 209 | 210 | After you save the file as ``echo_basic.phpt`` in the root of the PHP source code and exit your editor, run the example 211 | test with make. 212 | 213 | .. code-block:: bash 214 | 215 | $ make test TESTS=echo_basic.phpt 216 | 217 | If everything worked, you'll see the following passing test summary. 218 | 219 | .. code-block:: bash 220 | 221 | ===================================================================== 222 | Running selected tests. 223 | PASS echo - basic test for echo language construct [echo_basic.phpt] 224 | ===================================================================== 225 | Number of tests : 1 1 226 | Tests skipped : 0 ( 0.0%) -------- 227 | Tests warned : 0 ( 0.0%) ( 0.0%) 228 | Tests failed : 0 ( 0.0%) ( 0.0%) 229 | Expected fail : 0 ( 0.0%) ( 0.0%) 230 | Tests passed : 1 (100.0%) (100.0%) 231 | --------------------------------------------------------------------- 232 | Time taken : 0 seconds 233 | ===================================================================== 234 | 235 | Notice how text from the ``--TEST--`` section of the test is being displayed in the console: 236 | 237 | .. code-block:: bash 238 | 239 | PASS echo - basic test for echo language construct [echo_basic.phpt] 240 | 241 | To illustrate the point that black-box testing only cares about the output, let's change the PHP code in the 242 | ``--FILE--`` section and keep everything else the same. 243 | 244 | .. code-block:: php 245 | 246 | 251 | 252 | Now let's run the test again. 253 | 254 | .. code-block:: bash 255 | 256 | $ make test TESTS=echo_basic.phpt 257 | 258 | The test should still pass because the expected output is still the same as it was before. Let's try another example. 259 | Replace the PHP code in the ``--FILE--`` section of the test with the following code and then run the test again. 260 | 261 | .. code-block:: php 262 | 263 | 268 | 269 | Although this one looks obscure, I set up a `Gist with the expected output`_ and we're just dumping the body of an HTTP 270 | request to that Gist. Unless there are network connection issues or if the gist gets deleted, this will produce the same 271 | output as the other bits of code and the test will still pass. This will fail if you don't have the `ext/openssl`_ 272 | extension installed since the Gist is behind https. 273 | 274 | Let's try one more example. Replace the PHP code in the ``--FILE--`` section with the following. 275 | 276 | .. code-block:: php 277 | 278 | 294 | 295 | Crazy, right? This will take a few seconds just to output a simple string and you'd never do this in real life, but the 296 | test will still pass. Run-tests does not care that that your code is slow [#]_ or inefficient or just terrible, if the 297 | expected output matches the actual output, your test will be in the green. 298 | 299 | .. _PDO extension: http://php.net/pdo 300 | .. _tests/ directory: https://github.com/php/php-src/tree/master/ext/pdo/tests 301 | .. _Zend/tests/: https://github.com/php/php-src/tree/master/Zend/tests 302 | .. _learn vim: https://www.google.com/search?q=learn+vim 303 | .. _Gist with the expected output: https://gist.githubusercontent.com/SammyK/9c7bf6acdc5bcaa2cfbb404adc61abe6/raw/04af30473fc78033f7d8941ecd567934b0f804c0/foo-phpt-output.txt 304 | .. _ext/openssl: http://php.net/openssl 305 | .. [#] **Timeouts:** The default timeout for run-tests is 60 seconds (or 300 seconds when testing for memory leaks) but you can specify a different timeout using the ``--set-timeout`` flag. 306 | -------------------------------------------------------------------------------- /Book/tests/running_the_test_suite.rst: -------------------------------------------------------------------------------- 1 | .. _running_the_test_suite: 2 | 3 | Running the test suite 4 | ====================== 5 | 6 | Now that we have an basic understanding of what kind of testing we're doing, let's run the test suite. Before we can run 7 | the tests, make sure you have successfully built php from source (see :ref:`building_php`) and you have an executable 8 | at ``sapi/cli/php``. 9 | 10 | There are two ways to run the test suite. 11 | 12 | Running tests directly with ``run-tests.php`` 13 | --------------------------------------------- 14 | 15 | You can run the test suite directly with run-tests.php. At minimum you'll need to specify the php executable that you 16 | wish to test against which you can do with the ``-p`` flag. Note that in order to run all tests correctly this should be 17 | an absolute path to the PHP executable. 18 | 19 | .. code-block:: bash 20 | 21 | ~/php-src> sapi/cli/php run-tests.php -p `pwd`/sapi/cli/php 22 | 23 | A shortcut flag you can use to tell run-tests to test against the PHP executable that is currently invoked is by using 24 | the ``-P`` flag. 25 | 26 | .. code-block:: bash 27 | 28 | ~/php-src> sapi/cli/php run-tests.php -P 29 | 30 | If you don't want to have to set the ``-p`` or ``-P`` flag every time, you could specify the php executable with the 31 | ``TEST_PHP_EXECUTABLE`` environment variable which can be set with ``export`` on Linux machines. 32 | 33 | .. code-block:: bash 34 | 35 | ~/php-src> export TEST_PHP_EXECUTABLE=sapi/cli/php 36 | ~/php-src> sapi/cli/php run-tests.php 37 | 38 | On Windows you can set the environment variable using ``set``. 39 | 40 | .. code-block:: bash 41 | 42 | C:\php-src> set TEST_PHP_EXECUTABLE=sapi/cli/php 43 | C:\php-src> sapi/cli/php run-tests.php 44 | 45 | By default ``run-tests.php`` will start running all 15,000+ tests in the test suite which would take forever-ever. You 46 | can specify a target folder of tests to run or even a single test. The following example will run all the tests 47 | associated with `PHP 7's CSPRNG`_. 48 | 49 | .. code-block:: bash 50 | 51 | ~/php-src> sapi/cli/php run-tests.php -P ext/standard/tests/random 52 | 53 | You can also specify multiple target folders or files to run. 54 | 55 | .. code-block:: bash 56 | 57 | ~/php-src> sapi/cli/php run-tests.php -P Zend/ ext/reflection/ ext/standard/tests/array/ 58 | 59 | If you run the full test suite, and there are failing tests, the script will ask if you want to send a report to the 60 | PHP QA team. This can be annoying sometimes. It is possible suppress this question by setting the ``-q`` flag, or by 61 | setting environment variable ``NO_INTERACTION=1``. 62 | 63 | .. _PHP 7's CSPRNG: http://php.net/csprng 64 | 65 | Running tests via the ``Makefile`` (recommended) 66 | ------------------------------------------------ 67 | 68 | The recommended way of running the test suite is via the ``test`` target which is defined in the ``Makefile``. The 69 | ``test`` target does all the hard work of specifying the PHP executable for you (the one you compiled), setting up some 70 | default INI settings and specifying the best run-tests flags for you. You needn't worry about setting environment 71 | variables or setting additional flags so the command is very simple. 72 | 73 | .. code-block:: bash 74 | 75 | ~/php-src> make test 76 | 77 | As before, ``run-tests.php`` will start running the entire test suite. To specify a folder or single test to execute you 78 | can pass ``make`` a ``TESTS`` variable. The following example will test that PHP can handle binary literals properly. 79 | 80 | .. code-block:: bash 81 | 82 | ~/php-src> make test TESTS=Zend/tests/binary.phpt 83 | 84 | You can specify multiple target folders or files to run by separating each path with a space in the ``TESTS`` variable. 85 | 86 | .. code-block:: bash 87 | 88 | ~/php-src> make test TESTS="Zend/ ext/reflection/ ext/standard/tests/array/" 89 | 90 | Executing tests in parallel 91 | --------------------------- 92 | 93 | Since PHP 7.4 you are able to run tests in parallel by passing a ``-j`` flag to ``run-tests.php``, where ```` is the number of parallel processes you want. For example, ``-j4`` would run 4 tests at once. 94 | 95 | Example for running tests through run-tests.php: 96 | 97 | .. code-block:: bash 98 | 99 | run-tests.php -j8 100 | 101 | Example for running tests through make: 102 | 103 | .. code-block:: bash 104 | 105 | make test TESTS="-j8" 106 | 107 | More options 108 | ------------ 109 | 110 | Tests that passed are usually not that interesting. You can filter what tests to show with the ``-g`` flag. Possible 111 | values are ``PASS``, ``FAIL``, ``XFAIL``, ``SKIP``, ``BORK``, ``WARN``, ``LEAK``, ``REDIRECT`` For example to only 112 | show failing tests: 113 | 114 | .. code-block:: bash 115 | 116 | ~/php-src> run-tests.php -g FAIL 117 | 118 | For a full list of supported options that run-tests supports, just run it with ``--help``. 119 | 120 | .. code-block:: bash 121 | 122 | ~/php-src> sapi/cli/php run-tests.php --help 123 | 124 | For example ``-x`` to skip slow tests, ``--offline`` to skip online tests or ``-m`` for testing memory leaks with 125 | Valgrind can be very useful. 126 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PHP-Internals-Book 2 | ================== 3 | 4 | Document format: RST 5 | -------------------- 6 | 7 | The book is written using ReStructured Text and generated using Sphinx. 8 | 9 | * RST manual: http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html 10 | * RST quickref: http://docutils.sourceforge.net/docs/user/rst/quickref.html 11 | * Sphinx manual: https://www.sphinx-doc.org/en/master/ 12 | 13 | Coding style 14 | ------------ 15 | 16 | The following "coding style" applies to the written text, not to the included code. 17 | 18 | * The maximum line-width for text is 120 characters. 19 | * The maximum line-width for code is 98 characters. Including the four space indentation this would be a limit of 102 characters. This is a hard limit that prevents overflows in the PDF version. 20 | * Indentation uses four spaces. 21 | * Lines should not have trailing whitespace. 22 | * Punctuation like `?`, `!` or `:` should directly follow after the word (e.g. `foo:` rather than `foo :`). 23 | 24 | Domains 25 | ------- 26 | 27 | These domains have been bought by Anthony to publish info about the book: 28 | 29 | * phpinternalsbook.com 30 | * phpcorebook.com 31 | * insidephpbook.com 32 | 33 | Only the first domain is currently in use. 34 | 35 | Authors 36 | ------- 37 | 38 | * Julien Pauli: jpauli@php.net 39 | * Nikita Popov: nikic@php.net 40 | * Anthony Ferrara: ircmaxell@php.net 41 | -------------------------------------------------------------------------------- /build_html.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # https://sipb.mit.edu/doc/safe-shell/ 4 | set -eufo pipefail 5 | 6 | shopt -s failglob 7 | 8 | sphinx-build -b html -d doctrees Book BookHTML 9 | php generate_php5_redirects.php 10 | -------------------------------------------------------------------------------- /build_release_html.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # https://sipb.mit.edu/doc/safe-shell/ 4 | set -eufo pipefail 5 | 6 | shopt -s failglob 7 | 8 | # get rid of old files, so we don't keep them around in the git repo 9 | # when a file or directory was renamed 10 | rm -rf BookHTML/*/ 11 | rm -f BookHTML/*.html 12 | rm -f BookHTML/.buildinfo 13 | 14 | sphinx-build -b html -d doctrees -a Book BookHTML 15 | php generate_php5_redirects.php 16 | -------------------------------------------------------------------------------- /build_release_latex.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # https://sipb.mit.edu/doc/safe-shell/ 4 | set -eufo pipefail 5 | 6 | shopt -s failglob 7 | 8 | rm -rf BookLatex 9 | # inkscape Book/hashtables/images/basic_hashtable.svg -D -A Book/hashtables/images/basic_hashtable.pdf 10 | # inkscape Book/hashtables/images/doubly_linked_hashtable.svg -D -A Book/hashtables/images/doubly_linked_hashtable.pdf 11 | # inkscape Book/hashtables/images/ordered_hashtable.svg -D -A Book/hashtables/images/ordered_hashtable.pdf 12 | sphinx-build -b latex -d doctrees -a Book BookLatex 13 | cd BookLatex 14 | pdflatex PHPInternalsBook.tex 15 | pdflatex PHPInternalsBook.tex 16 | cd .. 17 | -------------------------------------------------------------------------------- /generate_php5_redirects.php: -------------------------------------------------------------------------------- 1 | 32 | 33 | 34 | 35 | 36 | 37 | 40 | 41 | 42 | Page Redirection 43 | 44 | If you are not redirected automatically, follow this link. 45 | 46 | '; 47 | 48 | $basePath = __DIR__ . '/BookHTML/'; 49 | $baseURL = '/php5/'; 50 | 51 | foreach ($files as $file) { 52 | $fileName = $basePath . $file; 53 | if (!file_exists(dirname($fileName))) { 54 | mkdir(dirname($fileName)); 55 | } 56 | 57 | $content = str_replace('_URL_', $baseURL . $file, $template); 58 | file_put_contents($fileName, $content); 59 | } 60 | --------------------------------------------------------------------------------