├── MANIFEST.in
├── recipe
    ├── build.sh
    ├── bld.bat
    ├── meta.yaml
    └── README.md
├── .gitignore
├── LICENSE.txt
├── pyproject.toml
├── setupOLD.py
├── src
    └── ipython_memory_usage
    │   ├── __init__.py
    │   ├── perf_process.py
    │   ├── ipython_memory_usage_perf.py
    │   ├── ipython_memory_usage.py
    │   └── examples
    │       └── example_usage_np_pd.ipynb
└── README.md


/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | 


--------------------------------------------------------------------------------
/recipe/build.sh:
--------------------------------------------------------------------------------
1 | $PYTHON setup.py install
2 | 


--------------------------------------------------------------------------------
/recipe/bld.bat:
--------------------------------------------------------------------------------
1 | "%PYTHON%" setup.py install
2 | if errorlevel 1 exit 1
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | bin/
12 | build/
13 | develop-eggs/
14 | dist/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # Installer logs
26 | pip-log.txt
27 | pip-delete-this-directory.txt
28 | 
29 | # Unit test / coverage reports
30 | htmlcov/
31 | .tox/
32 | .coverage
33 | .cache
34 | nosetests.xml
35 | coverage.xml
36 | 
37 | # Translations
38 | *.mo
39 | 
40 | # Mr Developer
41 | .mr.developer.cfg
42 | .project
43 | .pydevproject
44 | 
45 | # Rope
46 | .ropeproject
47 | 
48 | # Django stuff:
49 | *.log
50 | *.pot
51 | 
52 | # Sphinx documentation
53 | docs/_build/
54 | 
55 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | Copyright 2023 Ian Ozsvald
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | 
 3 | name = "ipython_memory_usage"
 4 | 
 5 | version = "1.8.3"
 6 | # https://sethmlarson.dev/pep-440 notes on writing a version number
 7 | 
 8 | # description
 9 | description = "A Jupyter/IPYthon cell based memory and CPU profiler"
10 | readme="README.md"
11 | 
12 | requires-python = ">=3.7"
13 | license = {file = "LICENSE.txt"}
14 | 
15 | authors = [
16 |   { name="Ian Ozsvald", email="ian@ianozsvald.com" },
17 | ]
18 | 
19 | #  url="https://github.com/ianozsvald/ipython_memory_usage",
20 | #   author="Ian Ozsvald",
21 | #   author_email="ian@ianozsvald.com",
22 | #   maintainer="Ian Ozsvald",
23 | #   maintainer_email="ian@ianozsvald.com",
24 | 
25 | dependencies = [ "IPython>=7.0", "memory_profiler", "psutil"]
26 | 
27 | classifiers = [
28 |     "Programming Language :: Python :: 3",
29 |     "License :: OSI Approved :: MIT License",
30 |     "Operating System :: OS Independent",
31 | ]
32 | 
33 | 
34 | [project.urls]
35 | Homepage = "https://github.com/ianozsvald/ipython_memory_usage"
36 | Issues = "https://github.com/ianozsvald/ipython_memory_usage"
37 | 
38 | 
39 | [build-system]
40 | requires = ["setuptools>=61.0", "build", "twine"]
41 | build-backend = "setuptools.build_meta"
42 | 
43 | 


--------------------------------------------------------------------------------
/recipe/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |   name: ipython_memory_usage
 3 |   version: 1.1
 4 | 
 5 | source:
 6 |   url: https://files.pythonhosted.org/packages/ed/8a/38fa2249179df377477a6967caf027de0ae93c6813c4e664f517da90f9e9/ipython_memory_usage-1.1.tar.gz
 7 |   sha256: 1f4697210257f853fea74de0cf3fae60a32e550e578bac6f46de9b40c550422b
 8 | 
 9 | build:
10 |   number: 0
11 | 
12 | requirements:
13 |   build:
14 |     - python
15 |     - setuptools
16 |     - memory_profiler
17 |     - IPython >=2.1
18 | 
19 |   run:
20 |     - python
21 |     - memory_profiler
22 |     - IPython >=2.1
23 | 
24 | test:
25 |   imports:
26 |    - ipython_memory_usage
27 | 
28 | about:
29 |   home: https://github.com/ianozsvald/ipython_memory_usage
30 |   license: BSD2
31 |   license_family: BSD
32 |   license_file: LICENSE
33 |   summary:
34 |     - IPython tool to report memory usage deltas for every command you type. If you are running out of RAM then use this tool to understand what’s happening. It also records the time spent running each command.
35 |     - In [3]: arr=np.random.uniform(size=int(1e7))
36 |     -   ‘arr=np.random.uniform(size=int(1e7))’ used 76.2578 MiB RAM in 0.33s, peaked 0.00 MiB above current, total RAM usage 107.37 MiB
37 | 
38 | extra:
39 |   recipe-maintainers:
40 |     - Ian Ozsvald
41 | 


--------------------------------------------------------------------------------
/setupOLD.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """ipython_memory_usage: display memory usage during IPython execution
 3 | 
 4 | ipython_memory_usage is an IPython tool to report memory usage deltas for every command you type.
 5 | """
 6 | 
 7 | doclines = __doc__.split("\n")
 8 | 
 9 | # Chosen from http://www.python.org/pypi?:action=list_classifiers
10 | classifiers = """\
11 | Development Status :: 5 - Production/Stable
12 | Environment :: Console
13 | Intended Audience :: Science/Research
14 | Intended Audience :: Developers
15 | License :: Free To Use But Restricted
16 | Natural Language :: English
17 | Operating System :: OS Independent
18 | Programming Language :: Python
19 | Topic :: Software Development :: Libraries :: Python Modules
20 | Topic :: Software Development :: Testing
21 | """
22 | 
23 | from setuptools import setup, find_packages
24 | setup(
25 |     name="ipython_memory_usage",
26 |     version="1.1",
27 |     url="https://github.com/ianozsvald/ipython_memory_usage",
28 |     author="Ian Ozsvald",
29 |     author_email="ian@ianozsvald.com",
30 |     maintainer="Ian Ozsvald",
31 |     maintainer_email="ian@ianozsvald.com",
32 |     description=doclines[0],
33 |     long_description = """IPython tool to report memory usage deltas for every command you type. If you are running out of RAM then use this tool to understand what's happening. It also records the time spent running each command. \n
34 | 
35 |         In [3]: arr=np.random.uniform(size=int(1e7))\n
36 |         'arr=np.random.uniform(size=int(1e7))' used 76.2578 MiB RAM in 0.33s, peaked 0.00 MiB above current, total RAM usage 107.37 MiB
37 |     """,
38 |     long_description_content_type='text/markdown',
39 |     classifiers=filter(None, classifiers.split("\n")),
40 |     platforms=["Any."],
41 |     packages=['ipython_memory_usage'],
42 |     package_dir={'': 'src'},
43 |     install_requires=['IPython>=2.1', 'memory_profiler']
44 | )
45 | 


--------------------------------------------------------------------------------
/recipe/README.md:
--------------------------------------------------------------------------------
 1 | # Conda Build integration
 2 | 
 3 | Conda build requires three files.
 4 | * meta.yaml - Definition file.
 5 | * bld.bat   - Script file with instructions for Windows builds.
 6 | * build.sh  - Script file with instructions for *nix builds.
 7 | 
 8 | ## Build Package Locally
 9 | Relevant documentation: https://docs.conda.io/projects/conda-build/en/latest/user-guide/tutorials/build-pkgs.html and https://docs.conda.io/projects/conda-build/en/stable/resources/define-metadata.html
10 | 
11 |     # Clone repo
12 |     git clone git@github.com:ianozsvald/ipython_memory_usage.git
13 |     
14 |     # Enter repo
15 |     cd ipython_memory_usage
16 |     
17 |     # Build package locally
18 |     conda-build recipe/meta.yaml
19 |     
20 |     # Check Smithy [Requires conda-smithy: `conda install -n root -c conda-forge conda-smithy`]
21 |     cd recipe
22 |     conda smithy recipe-lint
23 | 
24 |     # Test package locally
25 |     conda install --use-local ipython_memory_usage
26 | 
27 | 
28 | ## Change the build source
29 | Conda build supports a number of sources (source:) from which to prepare packages.
30 | 
31 | **Locally**
32 | 
33 |     # Set absolute or relative path
34 |     path: ..
35 | 
36 | **Git**
37 | 
38 |     # Pin to a revision or version
39 |     git_rev: b36bcc2f85a49dc33eec125d541fe8bee6b67cfd
40 |     git_url: https://github.com/ianozsvald/ipython_memory_usage
41 | 
42 | **Remote package**
43 | 
44 |     # Set the URL and Signature (on ubuntu you can use `sha256sum source_file.tar.gz`)
45 |     url: https://files.pythonhosted.org/packages/ed/8a/38fa2249179df377477a6967caf027de0ae93c6813c4e664f517da90f9e9/ipython_memory_usage-1.1.tar.gz
46 |     sha256: 1f4697210257f853fea74de0cf3fae60a32e550e578bac6f46de9b40c550422b
47 | 
48 | 
49 | ## Updating the package
50 | Make sure to update the version number and if not building locally pin the requsite source (git version or pypi package)
51 | 
52 | 
53 | ## Deploying to Conda Forge
54 | In order to deploy to conda forge, follow the instructions on https://conda-forge.org/#contribute
55 | 
56 | Fork https://github.com/conda-forge/staged-recipes
57 | 
58 | Add meta.yaml to recipes/ipython_memory_usage .
59 | 
60 | Make sure meta-yaml uses a source package (url: .... tar.gz), not git!
61 | 
62 | Create a pull request.
63 | 


--------------------------------------------------------------------------------
/src/ipython_memory_usage/__init__.py:
--------------------------------------------------------------------------------
 1 | """Profile mem usage envelope of IPython commands and report interactively
 2 | 
 3 | Use 
 4 | In[] %load_ext ipython_memory_usage
 5 | In[] %imu_start # invoke magic-based tracking and
 6 | # %imu_stop to disable
 7 | """
 8 | 
 9 | import ipython_memory_usage.ipython_memory_usage as imu
10 | 
11 | from importlib.metadata import version
12 | __version__ = version("ipython_memory_usage") # -> e.g. "1.2"
13 | 
14 | 
15 | #from IPython.core.magic import (
16 | #    register_cell_magic, register_line_cell_magic
17 | #)
18 | 
19 | #@register_line_cell_magic
20 | #def ipython_memory_usage_start(line, cell=None):
21 | #    imu.start_watching_memory()
22 | #    return 'memory profile enabled'
23 | 
24 | #@register_line_cell_magic
25 | #def ipython_memory_usage_stop(line, cell=None):
26 | #    imu.stop_watching_memory()
27 | #    return 'memory profile disabled'
28 | 
29 | from IPython.core.magic import (Magics, magics_class, line_magic,
30 |                                 cell_magic, line_cell_magic)
31 | 
32 | # The class MUST call this class decorator at creation time
33 | # https://ipython.readthedocs.io/en/stable/config/custommagics.html
34 | @magics_class
35 | class IPythonMemoryUsageMagics(Magics):
36 | 
37 |     @line_magic
38 |     def lmagic(self, line):
39 |         "my line magic"
40 |         print("Full access to the main IPython object:", self.shell)
41 |         print("Variables in the user namespace:", list(self.shell.user_ns.keys()))
42 |         return line
43 | 
44 |     #@cell_magic
45 |     #def cmagic(self, line, cell):
46 |     #    "my cell magic"
47 |     #    return line, cell
48 | 
49 |     @line_magic
50 |     def imu_start(self, line):
51 |         """Start CPU & memory profiling for IPython Memory Usage"""
52 |         imu.start_watching_memory()
53 |         return "IPython Memory Usage started"
54 | 
55 |     @line_magic
56 |     def imu_stop(self, line):
57 |         """End profiling for IPython Memory Usage"""
58 |         imu.stop_watching_memory()
59 |         return "IPython Memory Usage stopped"
60 | 
61 | 
62 | def load_ipython_extension(ipython):
63 |     """
64 |     Any module file that define a function named `load_ipython_extension`
65 |     can be loaded via `%load_ext module.path` or be configured to be
66 |     autoloaded by IPython at startup time.
67 |     """
68 |     # You can register the class itself without instantiating it.  IPython will
69 |     # call the default constructor on it.
70 |     print("Enabling IPython Memory Usage, use %imu_start to begin, %imu_stop to end")
71 |     ipython.register_magics(IPythonMemoryUsageMagics)
72 | 


--------------------------------------------------------------------------------
/src/ipython_memory_usage/perf_process.py:
--------------------------------------------------------------------------------
  1 | """"""
  2 | from __future__ import division  # 1/2 == 0.5, as in Py3
  3 | from __future__ import absolute_import  # avoid hiding global modules with locals
  4 | from __future__ import print_function  # force use of print("hello")
  5 | from __future__ import unicode_literals  # force unadorned strings "" to be unicode without prepending u""
  6 | import subprocess
  7 | import unittest
  8 | import os
  9 | 
 10 | FIXTURE0 = """     0.100167119              3,183 cache-misses   """
 11 | ANSWER0 = 3183
 12 | FIXTURE1 = """#           time             counts events\n     0.100167119              3,183 cache-misses             \n     0.200354348              4,045 cache-misses             \n     """
 13 | ANSWER1 = [3183, 4045]
 14 | FIXTURE2 = """     3.501390851        471,219,787 stalled-cycles-frontend\n  14.005319456          2,249,115 stalled-cycles-frontend  """
 15 | ANSWER2 = [471219787,  2249115]
 16 | 
 17 | 
 18 | EVENT_TYPE_CM = "cache-misses"
 19 | EVENT_TYPE_SCF = "stalled-cycles-frontend"
 20 | EVENT_TYPE_I = "instructions"
 21 | EVENT_TYPES = set([EVENT_TYPE_CM, EVENT_TYPE_SCF, EVENT_TYPE_I])
 22 | EVENT_TYPE = EVENT_TYPE_CM
 23 | 
 24 | def process_line(line):
 25 |     """Process a single output line from perf-stat, extract only a value (skip help lines)"""
 26 |     line_bits = line.split()
 27 |     #print(line_bits)
 28 |     try:
 29 |         value = float(line_bits[1].replace(',', ''))
 30 |     except ValueError:
 31 |         if line_bits[2] in EVENT_TYPES:
 32 |             # we only get here if we've got a value and a key
 33 |             key = line_bits[2]
 34 |         value = None
 35 |     except IndexError:
 36 |         value = None
 37 |     return value
 38 | 
 39 | 
 40 | def process_lines(lines):
 41 |     """Process many lines of perf-stat output, extract the values"""
 42 |     # we're assuming we have \n as line endings in this long string
 43 |     values = []
 44 |     for line in lines.split('\n'):
 45 |         value = process_line(line)
 46 |         if value:
 47 |             values.append(value)
 48 |     return values
 49 | 
 50 | 
 51 | class Test(unittest.TestCase):
 52 |     def test1(self):
 53 |         answer0 = process_line(FIXTURE0)
 54 |         self.assertEqual(ANSWER0, answer0)
 55 | 
 56 |     def test_process_lines(self):
 57 |         values = process_lines(FIXTURE0)
 58 |         self.assertEqual(values, [ANSWER0])
 59 | 
 60 |     def test_process_lines2(self):
 61 |         # check we can process the cache-misses messages
 62 |         values = process_lines(FIXTURE1)
 63 |         self.assertEqual(values, ANSWER1)
 64 | 
 65 |         # check that if we have repeated help messages, we still extract the
 66 |         # values we expect
 67 |         values = process_lines(FIXTURE1+FIXTURE1)
 68 |         self.assertEqual(values, ANSWER1+ANSWER1)
 69 | 
 70 |     def test_process_lines3(self):
 71 |         # check we can process stalled-cycles-frontend messages
 72 |         values = process_lines(FIXTURE2)
 73 |         self.assertEqual(values, ANSWER2)
 74 | 
 75 | 
 76 | def run_capture_perf(pid):
 77 |     """Start a perf stat process monitoring pid every 100ms"""
 78 |     cmd = "perf stat --pid {pid} --event {event_type} -I 100".format(pid=pid, event_type=EVENT_TYPE)
 79 |     #print("run_capture_perf running:", cmd)  # debug message
 80 |     proc = subprocess.Popen(cmd.split(), stderr=subprocess.PIPE)
 81 |     return proc
 82 | 
 83 | 
 84 | def finish_perf(proc):
 85 |     """Finish collecting data, parse and return"""
 86 |     # once the job has finished, kill recording
 87 |     proc.kill()
 88 |     # now block to gather all output data
 89 |     (stdoutdata, stderrdata) = proc.communicate()
 90 |     # example stderrdata output:
 91 |     # #           time             counts events
 92 |     # 0.100173796              2,761 cache-misses
 93 |     # 0.200387519              4,232 cache-misses
 94 |     # 0.300540762              5,277 cache-misses
 95 |     # 0.400778748              3,916 cache-misses
 96 |     stderrdata = stderrdata.decode('ascii')  # assume ascii
 97 |     values = process_lines(stderrdata)
 98 |     return values
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     # simple test for a hardcoded pid gathered over 0.5 seconds
103 |     pid = os.getpid()
104 |     print("Using pid:", pid)
105 |     proc = run_capture_perf(pid)
106 |     import time
107 |     time.sleep(0.5)
108 |     values = finish_perf(proc)
109 |     print(values)
110 | 


--------------------------------------------------------------------------------
/src/ipython_memory_usage/ipython_memory_usage_perf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """Use Linux perf tool to interrogate the CPU's performance counters"""
  4 | from __future__ import division  # 1/2 == 0.5, as in Py3
  5 | from __future__ import absolute_import  # avoid hiding global modules with locals
  6 | from __future__ import print_function  # force use of print("hello")
  7 | from __future__ import unicode_literals  # force unadorned strings "" to be unicode without prepending u""
  8 | import os
  9 | import time
 10 | import memory_profiler
 11 | from IPython import get_ipython
 12 | import perf_process
 13 | 
 14 | # keep a global accounting for the last known memory usage
 15 | # which is the reference point for the memory delta calculation
 16 | previous_call_memory_usage = memory_profiler.memory_usage()[0]
 17 | t1 = time.time() # will be set to current time later
 18 | keep_watching = True
 19 | peak_memory_usage = -1
 20 | perf_proc = None
 21 | 
 22 | watching_memory = True
 23 | input_cells = get_ipython().user_ns['In']
 24 | 
 25 | 
 26 | def start_watching_memory():
 27 |     """Register memory profiling tools to IPython instance."""
 28 |     global watching_memory
 29 |     watching_memory = True
 30 |     ip = get_ipython()
 31 |     ip.events.register("post_run_cell", watch_memory)
 32 |     ip.events.register("pre_run_cell", pre_run_cell)
 33 | 
 34 | 
 35 | def stop_watching_memory():
 36 |     """Unregister memory profiling tools from IPython instance."""
 37 |     global watching_memory
 38 |     watching_memory = False
 39 |     ip = get_ipython()
 40 |     try:
 41 |         ip.events.unregister("post_run_cell", watch_memory)
 42 |     except ValueError:
 43 |         pass
 44 |     try:
 45 |         ip.events.unregister("pre_run_cell", pre_run_cell)
 46 |     except ValueError:
 47 |         pass
 48 | 
 49 | def watch_memory():
 50 |     import time
 51 |     # bring in the global memory usage value from the previous iteration
 52 |     global previous_call_memory_usage, peak_memory_usage, keep_watching, perf_proc, \
 53 |            watching_memory, input_cells
 54 |     #nbr_commands = len(In)
 55 |     new_memory_usage = memory_profiler.memory_usage()[0]
 56 |     memory_delta = new_memory_usage - previous_call_memory_usage
 57 |     keep_watching = False
 58 |     peaked_memory_usage = max(0, peak_memory_usage - new_memory_usage)
 59 |     # calculate time delta using global t1 (from the pre-run event) and current
 60 |     # time
 61 |     time_delta_secs = time.time() - t1
 62 |     perf_values = []
 63 |     if perf_proc:
 64 |         # pause if necessary to attempt to make sure we get a sample from perf...
 65 |         # as the 100ms min sample time and flushing oddness means I don't get a
 66 |         # sample very quickly for short-running tasks
 67 |         MIN_TIME_TO_GET_PERF_SAMPLE = 0.2
 68 |         if time_delta_secs < MIN_TIME_TO_GET_PERF_SAMPLE:
 69 |             print("PAUSING to get perf sample for {}s".format(MIN_TIME_TO_GET_PERF_SAMPLE))
 70 |             time.sleep(MIN_TIME_TO_GET_PERF_SAMPLE)  # pause until at least 0.1s has passed
 71 |         # if we have a valid perf running then capture that information
 72 |         perf_values = perf_process.finish_perf(perf_proc)
 73 |     
 74 |     cmd = ""  #In[nbr_commands-1]
 75 |     # convert the results into a pretty string
 76 |     #output_template = "'{cmd}' used {memory_delta:0.4f} MiB RAM in {time_delta:0.2f}s, peaked {peaked_memory_usage:0.2f} MiB above current, total RAM usage {memory_usage:0.2f} MiB"
 77 |     output_template = "Used {memory_delta:0.4f} MiB RAM in {time_delta:0.2f}s, peaked {peaked_memory_usage:0.2f} MiB above current, total RAM usage {memory_usage:0.2f} MiB"
 78 |     output = output_template.format(time_delta=time_delta_secs,
 79 |                                     cmd=cmd,
 80 |                                     memory_delta=memory_delta,
 81 |                                     peaked_memory_usage=peaked_memory_usage,
 82 |                                     memory_usage=new_memory_usage)
 83 |     print(str(output))
 84 |     if perf_values:
 85 |         perf_average = int(sum(perf_values) / float(time_delta_secs))
 86 |         #print("perf value for {} averages to {:,}/second, raw samples:".format(perf_process.EVENT_TYPE, perf_average), perf_values)
 87 |         print("perf value for {} averages to {:,}/second".format(perf_process.EVENT_TYPE, perf_average))
 88 |     else:
 89 |         print("perf - no results to report, possibly the collection time was too short?")
 90 |     previous_call_memory_usage = new_memory_usage
 91 | 
 92 | 
 93 | def during_execution_memory_sampler():
 94 |     import time
 95 |     import memory_profiler
 96 |     global keep_watching, peak_memory_usage
 97 |     peak_memory_usage = -1
 98 |     keep_watching = True
 99 | 
100 |     n = 0
101 |     WAIT_BETWEEN_SAMPLES_SECS = 0.001
102 |     MAX_ITERATIONS = 60.0 / WAIT_BETWEEN_SAMPLES_SECS
103 |     while True:
104 |         mem_usage = memory_profiler.memory_usage()[0]
105 |         peak_memory_usage = max(mem_usage, peak_memory_usage)
106 |         time.sleep(WAIT_BETWEEN_SAMPLES_SECS)
107 |         if not keep_watching or n > MAX_ITERATIONS:
108 |             # exit if we've been told our command has finished or if it has run
109 |             # for more than a sane amount of time (e.g. maybe something crashed
110 |             # and we don't want this to carry on running)
111 |             if n > MAX_ITERATIONS:
112 |                 print("{} SOMETHING WEIRD HAPPENED AND THIS RAN FOR TOO LONG, THIS THREAD IS KILLING ITSELF".format(__file__))
113 |             break
114 |         n += 1
115 | 
116 | 
117 | def pre_run_cell():
118 |     """Capture current time before we execute the current command"""
119 |     import time
120 |     import os
121 |     global perf_proc, t1
122 |     # t1 records the start time of this execution cycle
123 |     t1 = time.time()
124 | 
125 |     # start a thread that samples RAM usage until the current command finishes
126 |     import threading
127 |     ipython_memory_usage_thread = threading.Thread(target=during_execution_memory_sampler)
128 |     ipython_memory_usage_thread.daemon = True
129 |     ipython_memory_usage_thread.start()
130 | 
131 |     pid = os.getpid()
132 |     perf_proc = perf_process.run_capture_perf(pid)


--------------------------------------------------------------------------------
/src/ipython_memory_usage/ipython_memory_usage.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """Profile mem usage envelope of IPython commands and report interactively"""
  4 | import time
  5 | import memory_profiler
  6 | from IPython import get_ipython
  7 | import psutil  # ADDED
  8 | 
  9 | # __version__ = 1.1  # set to desired value.
 10 | # Disabled for now, I'll use pyproject.toml for this
 11 | # we could use:
 12 | # from importlib.metadata import version
 13 | # version("ipython_memory_usage") -> "1.2"
 14 | # I've moved the above to __init__.py
 15 | 
 16 | # To run: %run -i ipython_memory_usage.py
 17 | # but there will be no output
 18 | 
 19 | # keep a global accounting for the last known memory usage
 20 | # which is the reference point for the memory delta calculation
 21 | previous_call_memory_usage = memory_profiler.memory_usage()[0]
 22 | t1 = time.time()  # will be set to current time later
 23 | keep_watching = True
 24 | peak_memory_usage = -1
 25 | cpu_utilisation_list = []
 26 | watching_memory = True
 27 | input_cells = get_ipython().user_ns["In"]
 28 | 
 29 | 
 30 | def start_watching_memory():
 31 |     """Register memory profiling tools to IPython instance."""
 32 |     global watching_memory
 33 | 
 34 |     # Just in case start is called more than once, stop watching. Hence unregister events
 35 |     stop_watching_memory()
 36 | 
 37 |     watching_memory = True
 38 |     ip = get_ipython()
 39 |     ip.events.register("post_run_cell", watch_memory)
 40 |     ip.events.register("pre_run_cell", pre_run_cell)
 41 | 
 42 | 
 43 | def stop_watching_memory():
 44 |     """Unregister memory profiling tools from IPython instance."""
 45 |     global watching_memory
 46 |     watching_memory = False
 47 |     ip = get_ipython()
 48 |     try:
 49 |         ip.events.unregister("post_run_cell", watch_memory)
 50 |     except ValueError:
 51 |         pass
 52 |     try:
 53 |         ip.events.unregister("pre_run_cell", pre_run_cell)
 54 |     except ValueError:
 55 |         pass
 56 | 
 57 | 
 58 | def watch_memory(execution_result):
 59 |     """Prints the memory usage if watching the memory"""
 60 |     # print(type(execution_result)) # <class 'IPython.core.interactiveshell.ExecutionResult'>
 61 |     # bring in the global memory usage value from the previous iteration
 62 |     global previous_call_memory_usage, peak_memory_usage, keep_watching, watching_memory, input_cells
 63 |     new_memory_usage = memory_profiler.memory_usage()[0]
 64 |     memory_delta = new_memory_usage - previous_call_memory_usage
 65 |     keep_watching = False
 66 |     peaked_memory_usage = max(0, peak_memory_usage - new_memory_usage)
 67 |     # calculate time delta using global t1 (from the pre-run event) and current
 68 |     # time
 69 |     time_delta_secs = time.time() - t1
 70 |     num_commands = len(input_cells) - 1
 71 |     cmd = "In [{}]".format(num_commands)
 72 | 
 73 |     # summarise cpu utililisation
 74 |     cpu_means = []
 75 |     cpu_max = 0
 76 |     cpu_mean = 0
 77 |     for row in cpu_utilisation_list:
 78 |         mean = sum(row) / len(row)
 79 |         cpu_means.append(mean)
 80 |         cpu_max = max(cpu_max, max(row))
 81 |     if len(cpu_utilisation_list) > 0:
 82 |         cpu_mean = sum(cpu_means) / len(cpu_means)
 83 | 
 84 |     # convert the results into a pretty string
 85 |     output_template = (
 86 |         "{cmd} used {memory_delta:0.1f} MiB RAM in "
 87 |         "{time_delta:0.2f}s (system mean cpu {cpu_mean:0.0f}%, single max cpu {cpu_max:0.0f}%), peaked {peaked_memory_usage:0.1f} "
 88 |         "MiB above final usage, current RAM usage now "
 89 |         "{memory_usage:0.1f} MiB"
 90 |     )
 91 |     output = output_template.format(
 92 |         time_delta=time_delta_secs,
 93 |         cmd=cmd,
 94 |         memory_delta=memory_delta,
 95 |         peaked_memory_usage=peaked_memory_usage,
 96 |         memory_usage=new_memory_usage,
 97 |         cpu_mean=cpu_mean,
 98 |         cpu_max=cpu_max,
 99 |     )
100 |     if watching_memory:
101 |         print(str(output))
102 |     previous_call_memory_usage = new_memory_usage
103 | 
104 | 
105 | def during_execution_memory_sampler():
106 |     """Thread to sample memory usage"""
107 |     import time
108 |     import memory_profiler
109 | 
110 |     global keep_watching, peak_memory_usage, cpu_utilisation_list
111 |     peak_memory_usage = -1
112 |     cpu_utilisation_list = []
113 |     psutil.cpu_percent()  # must call it once to clear the built-in history
114 |     keep_watching = True
115 | 
116 |     n = 0
117 |     WAIT_BETWEEN_SAMPLES_SECS = 0.001
118 |     MAX_ITERATIONS = 60.0 / WAIT_BETWEEN_SAMPLES_SECS
119 |     while True:
120 |         # get memory details
121 |         mem_usage = memory_profiler.memory_usage()[0]
122 |         peak_memory_usage = max(mem_usage, peak_memory_usage)
123 | 
124 |         # get cpu usage details
125 |         this_cpu_utilisation = psutil.cpu_percent(
126 |             percpu=True
127 |         )  # get cpu utilisation per cpu
128 |         cpu_utilisation_list.append(this_cpu_utilisation)
129 | 
130 |         time.sleep(WAIT_BETWEEN_SAMPLES_SECS)
131 |         if not keep_watching or n > MAX_ITERATIONS:
132 |             # exit if we've been told our command has finished or if it has run
133 |             # for more than a sane amount of time (e.g. maybe something crashed
134 |             # and we don't want this to carry on running)
135 |             if n > MAX_ITERATIONS:
136 |                 print(
137 |                     "{} SOMETHING WEIRD HAPPENED AND THIS RAN FOR TOO LONG, THIS THREAD IS KILLING ITSELF".format(
138 |                         __file__
139 |                     )
140 |                 )
141 |             break
142 |         n += 1
143 | 
144 | 
145 | def pre_run_cell(execution_result):
146 |     """Capture current time before we execute the current command"""
147 |     import time
148 | 
149 |     global t1
150 |     t1 = time.time()
151 | 
152 |     # start a thread that samples RAM usage until the current command finishes
153 |     import threading
154 | 
155 |     ipython_memory_usage_thread = threading.Thread(
156 |         target=during_execution_memory_sampler
157 |     )
158 |     ipython_memory_usage_thread.daemon = True
159 |     ipython_memory_usage_thread.start()
160 | 
161 | 
162 | def expensive_fn():
163 |     """test fn to make the machine do some work"""
164 |     # import math
165 |     for _ in range(10):
166 |         nbr = [n for n in range(1_000_000)]
167 |         max_nbr = max(nbr)
168 |     return max_nbr
169 | 
170 | 
171 | if __name__ == "__main__":
172 |     # if we e.g. %run -i cell_profiler.py from IPython
173 |     start_watching_memory()
174 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ipython_memory_usage
  2 | ====================
  3 | 
  4 | IPython tool to report memory usage deltas for every command you type. If you are running out of RAM then use this tool to understand what's happening. It also records the time spent running each command.
  5 | 
  6 | > "ipython-memory-usage is very useful. As an ML engineer, many times I have to work with large datasets that occupy several GBs of RAM. And to get a ballpark of mem usage of each large object/df and entire app as a whole I had to do free -g or -m iteratively which is not very efficient. But with this I can easily get RAM occupied within my jupyter cell. Shared this broadly with my team." - Vishal Mittal, ML Engineer at Goldman Sachs
  7 | 
  8 | This tool helps you to figure out which commands use a lot of RAM and take a long time to run, this is very useful if you're working with large numpy matrices. In addition it reports the peak memory usage whilst a command is running which might be higher (due to temporary objects) than the final RAM usage. Built on @fabianp's `memory_profiler`.
  9 | 
 10 | As a simple example - make 10,000,000,000 "ones", report that it costs 57GB of RAM and took 35 seconds to execute, with 1 CPU at 100% for at least some of the time and the overall CPU usage being 10% (as only 1 core was running):
 11 | 
 12 | ```
 13 | $ ipython
 14 | ...
 15 | In [1]: %load_ext ipython_memory_usage
 16 | Enabling IPython Memory Usage, use %imu_start to begin, %imu_stop to end
 17 | 
 18 | In [2]: %imu_start
 19 | Out[2]: 'IPython Memory Usage started'
 20 | In [2] used 1.0 MiB RAM in 8.02s (system mean cpu 0%, single max cpu 0%), peaked 0.0 MiB above final usage, current RAM usage now 55.1 MiB
 21 | 
 22 | In [3]: import numpy as np
 23 | In [3] used 14.6 MiB RAM in 0.25s (system mean cpu 15%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 69.8 MiB
 24 | 
 25 | In [4]: np.ones(int(1e10));
 26 | In [4] used -27.9 MiB RAM in 35.58s (system mean cpu 10%, single max cpu 100%), peaked 57458.3 MiB above final usage, current RAM usage now 54.1 MiB
 27 | 
 28 | 
 29 | ```
 30 | 
 31 | Francesc Alted has a fork with more memory delta details, see it here: https://github.com/FrancescAlted/ipython_memwatcher
 32 | 
 33 | For a demo using numpy and Pandas take a look at [examples/example_usage_np_pd.ipynb](https://github.com/ianozsvald/ipython_memory_usage/blob/master/src/ipython_memory_usage/examples/example_usage_np_pd.ipynb).
 34 | 
 35 | Setup
 36 | =====
 37 | 
 38 | Supported: Python 3.8+ and IPython 7.9+
 39 | 
 40 | Simple: 
 41 | 
 42 | `$ pip install ipython_memory_usage` 
 43 | 
 44 | via https://pypi.org/project/ipython-memory-usage/
 45 | 
 46 | OR
 47 | 
 48 | Take a copy of the code or fork from https://github.com/ianozsvald/ipython_memory_usage and then:
 49 | 
 50 |     $ python setup.py install
 51 | 
 52 | If you pull it from github and you want to develop on it, it is easier to make a link in `site-packages` and develop it locally with:
 53 | 
 54 |     $ pip install -e .
 55 | 
 56 | To uninstall:
 57 | 
 58 |     $ pip uninstall ipython_memory_usage
 59 | 
 60 | Example usage
 61 | =============
 62 | 
 63 | We can measure on every line how large array operations allocate and deallocate memory:
 64 | 
 65 | For the beginner with numpy it can be easy to work on copies of matrices which use a large amount of RAM. The following example sets the scene and then shows an in-place low-RAM variant.
 66 | 
 67 | First we make a random square array and modify it twice using copies taking 2.3GB RAM:
 68 | ```    
 69 | In [1]: %load_ext ipython_memory_usage
 70 | Enabling IPython Memory Usage, use %imu_start to begin, %imu_stop to end
 71 | In [2]: %imu_start
 72 | 
 73 | In [3]: a = np.random.random((int(1e4),int(1e4)))
 74 | In [3] used 763.3 MiB RAM in 1.82s (system mean cpu 7%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 832.5 MiB
 75 | 
 76 | In [4]: b = a*2
 77 | In [4] used 762.9 MiB RAM in 0.32s (system mean cpu 6%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 1595.5 MiB
 78 | 
 79 | In [5]: c = np.sqrt(b)
 80 | In [5] used 762.8 MiB RAM in 0.39s (system mean cpu 7%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 2358.3 MiB
 81 | ```
 82 | 
 83 | 
 84 | Now we do the same operations but in-place on `a`, using 813MB RAM in total:
 85 | ```
 86 | In [3]: a = np.random.random((int(1e4),int(1e4)))
 87 | In [3] used 0.1 MiB RAM in 0.92s (system mean cpu 7%, single max cpu 100%), peaked 761.9 MiB above final usage, current RAM usage now 832.5 MiB
 88 | 
 89 | In [4]: a *= 2
 90 | In [4] used 0.1 MiB RAM in 0.18s (system mean cpu 6%, single max cpu 16%), peaked 0.0 MiB above final usage, current RAM usage now 832.6 MiB
 91 | 
 92 | In [5]: a = np.sqrt(a, out=a)
 93 | In [5] used 0.0 MiB RAM in 0.25s (system mean cpu 4%, single max cpu 50%), peaked 0.0 MiB above final usage, current RAM usage now 832.6 MiB
 94 | ```
 95 | 
 96 | Lots of `numpy` functions have in-place operations that can assign their result back into themselves (see the `out` argument): http://docs.scipy.org/doc/numpy/reference/ufuncs.html#available-ufuncs
 97 | 
 98 | 
 99 | Newer (2020+) versions of Numpy use temporary objects which provide memory optimisation, see https://docs.scipy.org/doc/numpy-1.13.0/release.html
100 | 
101 | `a` and `b` are multiplied into a temprorary, then the same temporary is re-used for the addition with `c` which is then assigned to `c` so only one circa 700MB array is created. In older versions of numpy several arrays could be created during the same operation.
102 | 
103 | ```
104 | In [3]: a = np.ones(int(1e8)); b = np.ones(int(1e8)); c = np.ones(int(1e8))
105 | In [3] used 2288.8 MiB RAM in 0.61s (system mean cpu 6%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 2358.4 MiB
106 | 
107 | In [4]: d = a * b + c
108 | In [4] used 763.3 MiB RAM in 0.53s (system mean cpu 5%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 3121.7 MiB
109 | ```
110 | 
111 | Important RAM usage note
112 | ========================
113 | 
114 | It is much easier to debug RAM situations with a fresh IPython shell. The longer you use your current shell, the more objects remain inside it and the more RAM the Operating System may have reserved. RAM is returned to the OS slowly, so you can end up with a large process with plenty of spare internal RAM (which will be allocated to your large objects), so this tool (via memory_profiler) reports 0MB RAM usage. If you get confused or don't trust the results, quit IPython and start a fresh shell, then run the fewest commands you need to understand how RAM is added to the process.
115 | 
116 | 
117 | Experimental perf stat report to monitor caching
118 | ================================================
119 | 
120 | __Totally out of date for 2023, this needs a refresh__
121 | 
122 | I've added experimental support for the `perf stat` tool on Linux. To use it make sure that `perf stat` runs at the command line first. Experimental support of the `cache-misses` event is enabled in this variant script (to use this `cd src/ipython_memory_usage` first):
123 | ```
124 | Python 3.4.3 |Anaconda 2.3.0 (64-bit)| (default, Jun  4 2015, 15:29:08) 
125 | IPython 3.2.0 -- An enhanced Interactive Python.
126 | In [1]: %run -i ipython_memory_usage_perf.py
127 | In [2]: start_watching_memory()
128 | ```
129 | 
130 | Here's an example that builds on the previous ones. We build a square matrix with C ordering, we also need a 1D vector of the same size:
131 | ```
132 | In [3]: ones_c = np.ones((int(1e4),int(1e4)))
133 | In [4]: v = np.ones(int(1e4))
134 | ```
135 | 
136 | Next we run `%timeit` using all the data in row 0. The data will reasonably fit into a cache as `v.nbytes == 80000` (80 kilobytes) and my L3 cache is 6MB. The report `perf value for cache-misses averages to 8,823/second` shows an average of 8k cache misses per seconds during this operation (followed by all the raw sampled events for reference). `%timeit` shows that this operation cost 14 microseconds per loop:
137 | 
138 |     In [5]: %timeit v * ones_c[0, :]
139 |     run_capture_perf running: perf stat --pid 4978 --event cache-misses -I 100
140 |     100000 loops, best of 3: 14.9 µs per loop
141 |     In [6] used 0.1875 MiB RAM in 6.27s, peaked 0.00 MiB above current, total RAM usage 812.54 MiB
142 |     perf value for cache-misses averages to 8,823/second, raw samples: [6273.0, 382.0, 441.0, 1103.0, 632.0, 1314.0, 180.0, 451.0, 189.0, 540.0, 159.0, 1632.0, 285.0, 949.0, 408.0, 79.0, 448.0, 1167.0, 505.0, 350.0, 79.0, 172.0, 683.0, 2185.0, 1151.0, 170.0, 716.0, 2224.0, 572.0, 1708.0, 314.0, 572.0, 21.0, 209.0, 498.0, 839.0, 955.0, 233.0, 202.0, 797.0, 88.0, 185.0, 1663.0, 450.0, 352.0, 739.0, 4413.0, 1810.0, 1852.0, 550.0, 135.0, 389.0, 334.0, 235.0, 1922.0, 658.0, 233.0, 266.0, 170.0, 2198.0, 222.0, 4702.0]
143 | 
144 | We can run the same code using alternative indexing - for column 0 we get all the row elements, this means we have to fetch the column but it is stored in row-order, so each long row goes into the cache to use just one element. Now `%timeit` reports 210 microseconds per loop which is an order of magnitude slower than before, on average we have 474k cache misses per second. This column-ordered method of indexing the data is far less cache-friendly than the previous (row-ordered) method.
145 | 
146 |     In [5]: %timeit v * ones_c[:, 0]
147 |     run_capture_perf running: perf stat --pid 4978 --event cache-misses -I 100
148 |     1000 loops, best of 3: 210 µs per loop
149 |     In [5] used 0.0156 MiB RAM in 1.01s, peaked 0.00 MiB above current, total RAM usage 812.55 MiB
150 |     perf value for cache-misses averages to 474,771/second, raw samples: [77253.0, 49168.0, 48660.0, 53147.0, 52532.0, 56546.0, 50128.0, 48890.0, 43623.0]
151 | 
152 | If the sample-gathering happens too quickly then an artifical pause is added, this means that IPython can pause for a fraction of a second which inevitably causes cache misses (as the CPU is being using and IPython is running an event loop). You can witness the baseline cache misses using `pass`:
153 | 
154 |     In [9]: pass
155 |     run_capture_perf running: perf stat --pid 4978 --event cache-misses -I 100
156 |     PAUSING to get perf sample for 0.3s
157 |     In [9] used 0.0039 MiB RAM in 0.13s, peaked 0.00 MiB above current, total RAM usage 812.57 MiB
158 |     perf value for cache-misses averages to 131,611/second, raw samples: [14111.0, 3481.0]
159 | 
160 | NOTE that this is experimental, it is only known to work on Ian's laptop using Ubuntu Linux (`perf` doesn't exist on Mac or Windows). There are some tests for the `perf` parsing code, run `nosetests perf_process.py` to confirm these work ok and validate with your own `perf` output. I'm using `perf` version 3.11.0-12. Inside `perf_process.py` the `EVENT_TYPE` can be substituted to other events like `stalled-cycles-frontend` (exit IPython and restart to make sure the run-time is good - this code is hacky!).
161 | 
162 | To trial the code run `$ python perf_process.py`, this is useful for interactive development.
163 | 
164 | Requirements
165 | ============
166 | 
167 |  * `memory_profiler` https://github.com/fabianp/memory_profiler   (`pip install memory_profiler`)
168 |  * `psutil` for cpu tracking
169 |  * `perf stat` (Linux only, installed outside of Python using e.g. Synaptic, apt-get etc)
170 | 
171 | Tested on
172 | =========
173 | 
174 | * IPython 8.17 with Python 3.12 on Linux 64bit (2023-11)
175 | 
176 | 
177 | Developer installation notes
178 | ============================
179 | 
180 | Ian's 2023 update
181 | 
182 | ```
183 | conda create -n ipython_memory_usage python=3.12
184 | conda activate ipython_memory_usage
185 | pip install numpy
186 | pip install ipython
187 | 
188 | pip install -e . # editable installation
189 | 
190 | ipython
191 | import ipython_memory_usage
192 | %ipython_memory_usage_start
193 | 
194 | python -m build # builds an installable
195 | ```
196 | 
197 | 
198 | These notes are for the Man AHL 2019 Hackathon.
199 | 
200 | ```
201 | conda create -n hackathon_ipython_memory_usage python=3.7
202 | conda activate hackathon_ipython_memory_usage
203 | conda install ipython numpy memory_profiler
204 | 
205 | mkdir hackathon_ipython_memory_usage
206 | cd hackathon_ipython_memory_usage/
207 | git clone git@github.com:ianozsvald/ipython_memory_usage.git
208 | 
209 | # note "develop" and not the usual "install" here, to make the local folder editable!
210 | python setup.py develop 
211 | 
212 | # now run ipython and follow the examples from further above in this README
213 | ```
214 | 
215 | ```
216 | # make a development environment
217 | $ mkdir ipython_memory_usage_dev
218 | $ cd ipython_memory_usage_dev/
219 | $ conda create -n ipython_memory_usage_dev python=3.9 ipython jupyter memory_profiler numpy pandas
220 | $ conda activate ipython_memory_usage_dev
221 | git clone git@github.com:ianozsvald/ipython_memory_usage.git
222 | 
223 | # note "develop" and not the usual "install" here, to make the local folder editable!
224 | $ python setup.py develop 
225 | 
226 | # now run ipython and follow the examples from further above in this README
227 | ```
228 | 
229 |  Acknowledgements
230 |  ================
231 |  
232 |  Many thanks to https://github.com/manahl/ for hosting their 2019-11 hackathon. Here we removed old Python 2.x code, added an IPython magic, validated that Python 3.8 is supported and (very nearly) have a working conda recipe. Thanks to my colleagues:
233 |  * https://github.com/ps-git
234 |  * https://github.com/erdincmutlu
235 |  * https://github.com/Stefannn
236 |  * https://github.com/valmal
237 |  * https://github.com/PauleGef
238 |  * Elices 
239 | 
240 |  Many thanks to https://github.com/manahl/ for hosting a hackathon (2018-04) that led to us publishing `ipython_memory_usage` to PyPi: https://pypi.org/project/ipython-memory-usage/ . Props to my colleagues for helping me fix the docs and upload to PyPI:
241 | * https://github.com/pawellee
242 | * https://github.com/takumab
243 | * https://github.com/Hexal7785 (Hetal)
244 | * https://github.com/robmarkcole
245 | * https://github.com/pmalde
246 | * https://github.com/LucijaGregov
247 | * https://github.com/xhochy
248 | 
249 | TO FIX
250 | ======
251 | 
252 |  * merge perf variation into the main variation as some sort of plugin (so it doesn't interfere if per not installed or available)
253 |  * possibly try to add a counter for the size of the garbage collector, to see how many temp objects are made (disable gc first) on each command?
254 |  * conda installation is really out of date `$ conda install -c conda-forge ipython_memory_usage` via https://anaconda.org/conda-forge/ipython_memory_usage
255 | * Should I keep __version__ in ipython_memory_usage.py in addition to pyproject.toml?
256 |   * https://stackoverflow.com/questions/72167802/adding-version-attribute-to-python-module
257 | * Should twine be in the build dependencies in .toml?
258 | * How to add developer dependencies like black?
259 | * For conda how do I specify the source package when it isn't uploaded yet?
260 | 
261 | 
262 | Problems
263 | ========
264 | 
265 |  * I can't figure out how to hook into live In prompt (at least - I can for static output, not for a dynamic output - see the code and the commented out blocks referring to `watch_memory_prompt`)
266 |  
267 | Notes to Ian
268 | ============
269 | 
270 | To push to PyPI I need to follow https://packaging.python.org/en/latest/tutorials/packaging-projects/:
271 | * pip install -U twine
272 | * update version in pyproject.toml
273 | * python -m build
274 | * check the dist/ folder only has the current build, 1 whl and 1 zip
275 | * python -m twine upload --repository testpypi dist/*
276 |   * username is __token__, password is the token in my passwords
277 | * python -m twine upload --repository pypi dist/* # when ready
278 |   
279 | 


--------------------------------------------------------------------------------
/src/ipython_memory_usage/examples/example_usage_np_pd.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "25531fd7",
   6 |    "metadata": {},
   7 |    "source": [
   8 |     "# Short demo of using `ipython_memory_usage` to diagnose numpy and Pandas RAM usage\n",
   9 |     "\n",
  10 |     "Author Ian uses this tool in his Higher Performance Python training (https://ianozsvald.com/training/) and it is mentioned in his High Performance Python (2nd ed, O'Reilly) technical book.\n",
  11 |     "\n",
  12 |     "We can use it to understand how much RAM we're currently using and which of several alternate ways to solve a problem in complex tools might be the most RAM efficient solutions.\n",
  13 |     "\n",
  14 |     "* `total RAM usage` is the current RAM usage at the end of that cell's execution\n",
  15 |     "* `used` shows the difference between the _last_ `total RAM usage` and this one\n",
  16 |     "* `peaked` shows any during-execution peak _above_ the resulting `total RAM usage` (i.e. hidden RAM usage that might catch you out)"
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "code",
  21 |    "execution_count": 1,
  22 |    "id": "b2954dc0",
  23 |    "metadata": {},
  24 |    "outputs": [
  25 |     {
  26 |      "name": "stdout",
  27 |      "output_type": "stream",
  28 |      "text": [
  29 |       "Help on package ipython_memory_usage:\n",
  30 |       "\n",
  31 |       "NAME\n",
  32 |       "    ipython_memory_usage - Profile mem usage envelope of IPython commands and report interactively\n",
  33 |       "\n",
  34 |       "DESCRIPTION\n",
  35 |       "    Use \n",
  36 |       "    In[] %load_ext ipython_memory_usage\n",
  37 |       "    In[] %imu_start # invoke magic-based tracking and\n",
  38 |       "    # %imu_stop to disable\n",
  39 |       "\n",
  40 |       "PACKAGE CONTENTS\n",
  41 |       "    ipython_memory_usage\n",
  42 |       "    ipython_memory_usage_perf\n",
  43 |       "    perf_process\n",
  44 |       "\n",
  45 |       "SUBMODULES\n",
  46 |       "    imu\n",
  47 |       "\n",
  48 |       "CLASSES\n",
  49 |       "    IPython.core.magic.Magics(traitlets.config.configurable.Configurable)\n",
  50 |       "        IPythonMemoryUsageMagics\n",
  51 |       "    \n",
  52 |       "    class IPythonMemoryUsageMagics(IPython.core.magic.Magics)\n",
  53 |       "     |  IPythonMemoryUsageMagics(shell=None, **kwargs)\n",
  54 |       "     |  \n",
  55 |       "     |  # The class MUST call this class decorator at creation time\n",
  56 |       "     |  # https://ipython.readthedocs.io/en/stable/config/custommagics.html\n",
  57 |       "     |  \n",
  58 |       "     |  Method resolution order:\n",
  59 |       "     |      IPythonMemoryUsageMagics\n",
  60 |       "     |      IPython.core.magic.Magics\n",
  61 |       "     |      traitlets.config.configurable.Configurable\n",
  62 |       "     |      traitlets.traitlets.HasTraits\n",
  63 |       "     |      traitlets.traitlets.HasDescriptors\n",
  64 |       "     |      builtins.object\n",
  65 |       "     |  \n",
  66 |       "     |  Methods defined here:\n",
  67 |       "     |  \n",
  68 |       "     |  imu_start(self, line)\n",
  69 |       "     |      Start CPU & memory profiling for IPython Memory Usage\n",
  70 |       "     |  \n",
  71 |       "     |  imu_stop(self, line)\n",
  72 |       "     |      End profiling for IPython Memory Usage\n",
  73 |       "     |  \n",
  74 |       "     |  lmagic(self, line)\n",
  75 |       "     |      my line magic\n",
  76 |       "     |  \n",
  77 |       "     |  ----------------------------------------------------------------------\n",
  78 |       "     |  Data and other attributes defined here:\n",
  79 |       "     |  \n",
  80 |       "     |  __annotations__ = {}\n",
  81 |       "     |  \n",
  82 |       "     |  magics = {'cell': {}, 'line': {'imu_start': 'imu_start', 'imu_stop': '...\n",
  83 |       "     |  \n",
  84 |       "     |  registered = True\n",
  85 |       "     |  \n",
  86 |       "     |  ----------------------------------------------------------------------\n",
  87 |       "     |  Methods inherited from IPython.core.magic.Magics:\n",
  88 |       "     |  \n",
  89 |       "     |  __init__(self, shell=None, **kwargs)\n",
  90 |       "     |      Create a configurable given a config config.\n",
  91 |       "     |      \n",
  92 |       "     |      Parameters\n",
  93 |       "     |      ----------\n",
  94 |       "     |      config : Config\n",
  95 |       "     |          If this is empty, default values are used. If config is a\n",
  96 |       "     |          :class:`Config` instance, it will be used to configure the\n",
  97 |       "     |          instance.\n",
  98 |       "     |      parent : Configurable instance, optional\n",
  99 |       "     |          The parent Configurable instance of this object.\n",
 100 |       "     |      \n",
 101 |       "     |      Notes\n",
 102 |       "     |      -----\n",
 103 |       "     |      Subclasses of Configurable must call the :meth:`__init__` method of\n",
 104 |       "     |      :class:`Configurable` *before* doing anything else and using\n",
 105 |       "     |      :func:`super`::\n",
 106 |       "     |      \n",
 107 |       "     |          class MyConfigurable(Configurable):\n",
 108 |       "     |              def __init__(self, config=None):\n",
 109 |       "     |                  super(MyConfigurable, self).__init__(config=config)\n",
 110 |       "     |                  # Then any other code you need to finish initialization.\n",
 111 |       "     |      \n",
 112 |       "     |      This ensures that instances will be configured properly.\n",
 113 |       "     |  \n",
 114 |       "     |  arg_err(self, func)\n",
 115 |       "     |      Print docstring if incorrect arguments were passed\n",
 116 |       "     |  \n",
 117 |       "     |  default_option(self, fn, optstr)\n",
 118 |       "     |      Make an entry in the options_table for fn, with value optstr\n",
 119 |       "     |  \n",
 120 |       "     |  format_latex(self, strng)\n",
 121 |       "     |      Format a string for latex inclusion.\n",
 122 |       "     |  \n",
 123 |       "     |  parse_options(self, arg_str, opt_str, *long_opts, **kw)\n",
 124 |       "     |      Parse options passed to an argument string.\n",
 125 |       "     |      \n",
 126 |       "     |      The interface is similar to that of :func:`getopt.getopt`, but it\n",
 127 |       "     |      returns a :class:`~IPython.utils.struct.Struct` with the options as keys\n",
 128 |       "     |      and the stripped argument string still as a string.\n",
 129 |       "     |      \n",
 130 |       "     |      arg_str is quoted as a true sys.argv vector by using shlex.split.\n",
 131 |       "     |      This allows us to easily expand variables, glob files, quote\n",
 132 |       "     |      arguments, etc.\n",
 133 |       "     |      \n",
 134 |       "     |      Parameters\n",
 135 |       "     |      ----------\n",
 136 |       "     |      arg_str : str\n",
 137 |       "     |          The arguments to parse.\n",
 138 |       "     |      opt_str : str\n",
 139 |       "     |          The options specification.\n",
 140 |       "     |      mode : str, default 'string'\n",
 141 |       "     |          If given as 'list', the argument string is returned as a list (split\n",
 142 |       "     |          on whitespace) instead of a string.\n",
 143 |       "     |      list_all : bool, default False\n",
 144 |       "     |          Put all option values in lists. Normally only options\n",
 145 |       "     |          appearing more than once are put in a list.\n",
 146 |       "     |      posix : bool, default True\n",
 147 |       "     |          Whether to split the input line in POSIX mode or not, as per the\n",
 148 |       "     |          conventions outlined in the :mod:`shlex` module from the standard\n",
 149 |       "     |          library.\n",
 150 |       "     |  \n",
 151 |       "     |  ----------------------------------------------------------------------\n",
 152 |       "     |  Data and other attributes inherited from IPython.core.magic.Magics:\n",
 153 |       "     |  \n",
 154 |       "     |  options_table = None\n",
 155 |       "     |  \n",
 156 |       "     |  shell = None\n",
 157 |       "     |  \n",
 158 |       "     |  ----------------------------------------------------------------------\n",
 159 |       "     |  Methods inherited from traitlets.config.configurable.Configurable:\n",
 160 |       "     |  \n",
 161 |       "     |  update_config(self, config: 'Config') -> 'None'\n",
 162 |       "     |      Update config and load the new values\n",
 163 |       "     |  \n",
 164 |       "     |  ----------------------------------------------------------------------\n",
 165 |       "     |  Class methods inherited from traitlets.config.configurable.Configurable:\n",
 166 |       "     |  \n",
 167 |       "     |  class_config_rst_doc() -> 'str' from traitlets.traitlets.MetaHasTraits\n",
 168 |       "     |      Generate rST documentation for this class' config options.\n",
 169 |       "     |      \n",
 170 |       "     |      Excludes traits defined on parent classes.\n",
 171 |       "     |  \n",
 172 |       "     |  class_config_section(classes: 't.Sequence[type[HasTraits]] | None' = None) -> 'str' from traitlets.traitlets.MetaHasTraits\n",
 173 |       "     |      Get the config section for this class.\n",
 174 |       "     |      \n",
 175 |       "     |      Parameters\n",
 176 |       "     |      ----------\n",
 177 |       "     |      classes : list, optional\n",
 178 |       "     |          The list of other classes in the config file.\n",
 179 |       "     |          Used to reduce redundant information.\n",
 180 |       "     |  \n",
 181 |       "     |  class_get_help(inst: 'HasTraits | None' = None) -> 'str' from traitlets.traitlets.MetaHasTraits\n",
 182 |       "     |      Get the help string for this class in ReST format.\n",
 183 |       "     |      \n",
 184 |       "     |      If `inst` is given, its current trait values will be used in place of\n",
 185 |       "     |      class defaults.\n",
 186 |       "     |  \n",
 187 |       "     |  class_get_trait_help(trait: 'TraitType[t.Any, t.Any]', inst: 'HasTraits | None' = None, helptext: 'str | None' = None) -> 'str' from traitlets.traitlets.MetaHasTraits\n",
 188 |       "     |      Get the helptext string for a single trait.\n",
 189 |       "     |      \n",
 190 |       "     |      :param inst:\n",
 191 |       "     |          If given, its current trait values will be used in place of\n",
 192 |       "     |          the class default.\n",
 193 |       "     |      :param helptext:\n",
 194 |       "     |          If not given, uses the `help` attribute of the current trait.\n",
 195 |       "     |  \n",
 196 |       "     |  class_print_help(inst: 'HasTraits | None' = None) -> 'None' from traitlets.traitlets.MetaHasTraits\n",
 197 |       "     |      Get the help string for a single trait and print it.\n",
 198 |       "     |  \n",
 199 |       "     |  section_names() -> 'list[str]' from traitlets.traitlets.MetaHasTraits\n",
 200 |       "     |      return section names as a list\n",
 201 |       "     |  \n",
 202 |       "     |  ----------------------------------------------------------------------\n",
 203 |       "     |  Data descriptors inherited from traitlets.config.configurable.Configurable:\n",
 204 |       "     |  \n",
 205 |       "     |  config\n",
 206 |       "     |  \n",
 207 |       "     |  parent\n",
 208 |       "     |  \n",
 209 |       "     |  ----------------------------------------------------------------------\n",
 210 |       "     |  Methods inherited from traitlets.traitlets.HasTraits:\n",
 211 |       "     |  \n",
 212 |       "     |  __getstate__(self) -> 'dict[str, t.Any]'\n",
 213 |       "     |      Helper for pickle.\n",
 214 |       "     |  \n",
 215 |       "     |  __setstate__(self, state: 'dict[str, t.Any]') -> 'None'\n",
 216 |       "     |  \n",
 217 |       "     |  add_traits(self, **traits: 't.Any') -> 'None'\n",
 218 |       "     |      Dynamically add trait attributes to the HasTraits instance.\n",
 219 |       "     |  \n",
 220 |       "     |  has_trait(self, name: 'str') -> 'bool'\n",
 221 |       "     |      Returns True if the object has a trait with the specified name.\n",
 222 |       "     |  \n",
 223 |       "     |  hold_trait_notifications(self) -> 't.Any'\n",
 224 |       "     |      Context manager for bundling trait change notifications and cross\n",
 225 |       "     |      validation.\n",
 226 |       "     |      \n",
 227 |       "     |      Use this when doing multiple trait assignments (init, config), to avoid\n",
 228 |       "     |      race conditions in trait notifiers requesting other trait values.\n",
 229 |       "     |      All trait notifications will fire after all values have been assigned.\n",
 230 |       "     |  \n",
 231 |       "     |  notify_change(self, change: 'Bunch') -> 'None'\n",
 232 |       "     |      Notify observers of a change event\n",
 233 |       "     |  \n",
 234 |       "     |  observe(self, handler: 't.Callable[..., t.Any]', names: 'Sentinel | str | t.Iterable[Sentinel | str]' = traitlets.All, type: 'Sentinel | str' = 'change') -> 'None'\n",
 235 |       "     |      Setup a handler to be called when a trait changes.\n",
 236 |       "     |      \n",
 237 |       "     |      This is used to setup dynamic notifications of trait changes.\n",
 238 |       "     |      \n",
 239 |       "     |      Parameters\n",
 240 |       "     |      ----------\n",
 241 |       "     |      handler : callable\n",
 242 |       "     |          A callable that is called when a trait changes. Its\n",
 243 |       "     |          signature should be ``handler(change)``, where ``change`` is a\n",
 244 |       "     |          dictionary. The change dictionary at least holds a 'type' key.\n",
 245 |       "     |          * ``type``: the type of notification.\n",
 246 |       "     |          Other keys may be passed depending on the value of 'type'. In the\n",
 247 |       "     |          case where type is 'change', we also have the following keys:\n",
 248 |       "     |          * ``owner`` : the HasTraits instance\n",
 249 |       "     |          * ``old`` : the old value of the modified trait attribute\n",
 250 |       "     |          * ``new`` : the new value of the modified trait attribute\n",
 251 |       "     |          * ``name`` : the name of the modified trait attribute.\n",
 252 |       "     |      names : list, str, All\n",
 253 |       "     |          If names is All, the handler will apply to all traits.  If a list\n",
 254 |       "     |          of str, handler will apply to all names in the list.  If a\n",
 255 |       "     |          str, the handler will apply just to that name.\n",
 256 |       "     |      type : str, All (default: 'change')\n",
 257 |       "     |          The type of notification to filter by. If equal to All, then all\n",
 258 |       "     |          notifications are passed to the observe handler.\n",
 259 |       "     |  \n",
 260 |       "     |  on_trait_change(self, handler: 'EventHandler | None' = None, name: 'Sentinel | str | None' = None, remove: 'bool' = False) -> 'None'\n",
 261 |       "     |      DEPRECATED: Setup a handler to be called when a trait changes.\n",
 262 |       "     |      \n",
 263 |       "     |      This is used to setup dynamic notifications of trait changes.\n",
 264 |       "     |      \n",
 265 |       "     |      Static handlers can be created by creating methods on a HasTraits\n",
 266 |       "     |      subclass with the naming convention '_[traitname]_changed'.  Thus,\n",
 267 |       "     |      to create static handler for the trait 'a', create the method\n",
 268 |       "     |      _a_changed(self, name, old, new) (fewer arguments can be used, see\n",
 269 |       "     |      below).\n",
 270 |       "     |      \n",
 271 |       "     |      If `remove` is True and `handler` is not specified, all change\n",
 272 |       "     |      handlers for the specified name are uninstalled.\n",
 273 |       "     |      \n",
 274 |       "     |      Parameters\n",
 275 |       "     |      ----------\n",
 276 |       "     |      handler : callable, None\n",
 277 |       "     |          A callable that is called when a trait changes.  Its\n",
 278 |       "     |          signature can be handler(), handler(name), handler(name, new),\n",
 279 |       "     |          handler(name, old, new), or handler(name, old, new, self).\n",
 280 |       "     |      name : list, str, None\n",
 281 |       "     |          If None, the handler will apply to all traits.  If a list\n",
 282 |       "     |          of str, handler will apply to all names in the list.  If a\n",
 283 |       "     |          str, the handler will apply just to that name.\n",
 284 |       "     |      remove : bool\n",
 285 |       "     |          If False (the default), then install the handler.  If True\n",
 286 |       "     |          then unintall it.\n",
 287 |       "     |  \n",
 288 |       "     |  set_trait(self, name: 'str', value: 't.Any') -> 'None'\n",
 289 |       "     |      Forcibly sets trait attribute, including read-only attributes.\n",
 290 |       "     |  \n",
 291 |       "     |  setup_instance(*args: 't.Any', **kwargs: 't.Any') -> 'None'\n",
 292 |       "     |      This is called **before** self.__init__ is called.\n",
 293 |       "     |  \n",
 294 |       "     |  trait_defaults(self, *names: 'str', **metadata: 't.Any') -> 'dict[str, t.Any] | Sentinel'\n",
 295 |       "     |      Return a trait's default value or a dictionary of them\n",
 296 |       "     |      \n",
 297 |       "     |      Notes\n",
 298 |       "     |      -----\n",
 299 |       "     |      Dynamically generated default values may\n",
 300 |       "     |      depend on the current state of the object.\n",
 301 |       "     |  \n",
 302 |       "     |  trait_has_value(self, name: 'str') -> 'bool'\n",
 303 |       "     |      Returns True if the specified trait has a value.\n",
 304 |       "     |      \n",
 305 |       "     |      This will return false even if ``getattr`` would return a\n",
 306 |       "     |      dynamically generated default value. These default values\n",
 307 |       "     |      will be recognized as existing only after they have been\n",
 308 |       "     |      generated.\n",
 309 |       "     |      \n",
 310 |       "     |      Example\n",
 311 |       "     |      \n",
 312 |       "     |      .. code-block:: python\n",
 313 |       "     |      \n",
 314 |       "     |          class MyClass(HasTraits):\n",
 315 |       "     |              i = Int()\n",
 316 |       "     |      \n",
 317 |       "     |      \n",
 318 |       "     |          mc = MyClass()\n",
 319 |       "     |          assert not mc.trait_has_value(\"i\")\n",
 320 |       "     |          mc.i  # generates a default value\n",
 321 |       "     |          assert mc.trait_has_value(\"i\")\n",
 322 |       "     |  \n",
 323 |       "     |  trait_metadata(self, traitname: 'str', key: 'str', default: 't.Any' = None) -> 't.Any'\n",
 324 |       "     |      Get metadata values for trait by key.\n",
 325 |       "     |  \n",
 326 |       "     |  trait_names(self, **metadata: 't.Any') -> 'list[str]'\n",
 327 |       "     |      Get a list of all the names of this class' traits.\n",
 328 |       "     |  \n",
 329 |       "     |  trait_values(self, **metadata: 't.Any') -> 'dict[str, t.Any]'\n",
 330 |       "     |      A ``dict`` of trait names and their values.\n",
 331 |       "     |      \n",
 332 |       "     |      The metadata kwargs allow functions to be passed in which\n",
 333 |       "     |      filter traits based on metadata values.  The functions should\n",
 334 |       "     |      take a single value as an argument and return a boolean.  If\n",
 335 |       "     |      any function returns False, then the trait is not included in\n",
 336 |       "     |      the output.  If a metadata key doesn't exist, None will be passed\n",
 337 |       "     |      to the function.\n",
 338 |       "     |      \n",
 339 |       "     |      Returns\n",
 340 |       "     |      -------\n",
 341 |       "     |      A ``dict`` of trait names and their values.\n",
 342 |       "     |      \n",
 343 |       "     |      Notes\n",
 344 |       "     |      -----\n",
 345 |       "     |      Trait values are retrieved via ``getattr``, any exceptions raised\n",
 346 |       "     |      by traits or the operations they may trigger will result in the\n",
 347 |       "     |      absence of a trait value in the result ``dict``.\n",
 348 |       "     |  \n",
 349 |       "     |  traits(self, **metadata: 't.Any') -> 'dict[str, TraitType[t.Any, t.Any]]'\n",
 350 |       "     |      Get a ``dict`` of all the traits of this class.  The dictionary\n",
 351 |       "     |      is keyed on the name and the values are the TraitType objects.\n",
 352 |       "     |      \n",
 353 |       "     |      The TraitTypes returned don't know anything about the values\n",
 354 |       "     |      that the various HasTrait's instances are holding.\n",
 355 |       "     |      \n",
 356 |       "     |      The metadata kwargs allow functions to be passed in which\n",
 357 |       "     |      filter traits based on metadata values.  The functions should\n",
 358 |       "     |      take a single value as an argument and return a boolean.  If\n",
 359 |       "     |      any function returns False, then the trait is not included in\n",
 360 |       "     |      the output.  If a metadata key doesn't exist, None will be passed\n",
 361 |       "     |      to the function.\n",
 362 |       "     |  \n",
 363 |       "     |  unobserve(self, handler: 't.Callable[..., t.Any]', names: 'Sentinel | str | t.Iterable[Sentinel | str]' = traitlets.All, type: 'Sentinel | str' = 'change') -> 'None'\n",
 364 |       "     |      Remove a trait change handler.\n",
 365 |       "     |      \n",
 366 |       "     |      This is used to unregister handlers to trait change notifications.\n",
 367 |       "     |      \n",
 368 |       "     |      Parameters\n",
 369 |       "     |      ----------\n",
 370 |       "     |      handler : callable\n",
 371 |       "     |          The callable called when a trait attribute changes.\n",
 372 |       "     |      names : list, str, All (default: All)\n",
 373 |       "     |          The names of the traits for which the specified handler should be\n",
 374 |       "     |          uninstalled. If names is All, the specified handler is uninstalled\n",
 375 |       "     |          from the list of notifiers corresponding to all changes.\n",
 376 |       "     |      type : str or All (default: 'change')\n",
 377 |       "     |          The type of notification to filter by. If All, the specified handler\n",
 378 |       "     |          is uninstalled from the list of notifiers corresponding to all types.\n",
 379 |       "     |  \n",
 380 |       "     |  unobserve_all(self, name: 'str | t.Any' = traitlets.All) -> 'None'\n",
 381 |       "     |      Remove trait change handlers of any type for the specified name.\n",
 382 |       "     |      If name is not specified, removes all trait notifiers.\n",
 383 |       "     |  \n",
 384 |       "     |  ----------------------------------------------------------------------\n",
 385 |       "     |  Class methods inherited from traitlets.traitlets.HasTraits:\n",
 386 |       "     |  \n",
 387 |       "     |  class_own_trait_events(name: 'str') -> 'dict[str, EventHandler]' from traitlets.traitlets.MetaHasTraits\n",
 388 |       "     |      Get a dict of all event handlers defined on this class, not a parent.\n",
 389 |       "     |      \n",
 390 |       "     |      Works like ``event_handlers``, except for excluding traits from parents.\n",
 391 |       "     |  \n",
 392 |       "     |  class_own_traits(**metadata: 't.Any') -> 'dict[str, TraitType[t.Any, t.Any]]' from traitlets.traitlets.MetaHasTraits\n",
 393 |       "     |      Get a dict of all the traitlets defined on this class, not a parent.\n",
 394 |       "     |      \n",
 395 |       "     |      Works like `class_traits`, except for excluding traits from parents.\n",
 396 |       "     |  \n",
 397 |       "     |  class_trait_names(**metadata: 't.Any') -> 'list[str]' from traitlets.traitlets.MetaHasTraits\n",
 398 |       "     |      Get a list of all the names of this class' traits.\n",
 399 |       "     |      \n",
 400 |       "     |      This method is just like the :meth:`trait_names` method,\n",
 401 |       "     |      but is unbound.\n",
 402 |       "     |  \n",
 403 |       "     |  class_traits(**metadata: 't.Any') -> 'dict[str, TraitType[t.Any, t.Any]]' from traitlets.traitlets.MetaHasTraits\n",
 404 |       "     |      Get a ``dict`` of all the traits of this class.  The dictionary\n",
 405 |       "     |      is keyed on the name and the values are the TraitType objects.\n",
 406 |       "     |      \n",
 407 |       "     |      This method is just like the :meth:`traits` method, but is unbound.\n",
 408 |       "     |      \n",
 409 |       "     |      The TraitTypes returned don't know anything about the values\n",
 410 |       "     |      that the various HasTrait's instances are holding.\n",
 411 |       "     |      \n",
 412 |       "     |      The metadata kwargs allow functions to be passed in which\n",
 413 |       "     |      filter traits based on metadata values.  The functions should\n",
 414 |       "     |      take a single value as an argument and return a boolean.  If\n",
 415 |       "     |      any function returns False, then the trait is not included in\n",
 416 |       "     |      the output.  If a metadata key doesn't exist, None will be passed\n",
 417 |       "     |      to the function.\n",
 418 |       "     |  \n",
 419 |       "     |  trait_events(name: 'str | None' = None) -> 'dict[str, EventHandler]' from traitlets.traitlets.MetaHasTraits\n",
 420 |       "     |      Get a ``dict`` of all the event handlers of this class.\n",
 421 |       "     |      \n",
 422 |       "     |      Parameters\n",
 423 |       "     |      ----------\n",
 424 |       "     |      name : str (default: None)\n",
 425 |       "     |          The name of a trait of this class. If name is ``None`` then all\n",
 426 |       "     |          the event handlers of this class will be returned instead.\n",
 427 |       "     |      \n",
 428 |       "     |      Returns\n",
 429 |       "     |      -------\n",
 430 |       "     |      The event handlers associated with a trait name, or all event handlers.\n",
 431 |       "     |  \n",
 432 |       "     |  ----------------------------------------------------------------------\n",
 433 |       "     |  Readonly properties inherited from traitlets.traitlets.HasTraits:\n",
 434 |       "     |  \n",
 435 |       "     |  cross_validation_lock\n",
 436 |       "     |      A contextmanager for running a block with our cross validation lock set\n",
 437 |       "     |      to True.\n",
 438 |       "     |      \n",
 439 |       "     |      At the end of the block, the lock's value is restored to its value\n",
 440 |       "     |      prior to entering the block.\n",
 441 |       "     |  \n",
 442 |       "     |  ----------------------------------------------------------------------\n",
 443 |       "     |  Static methods inherited from traitlets.traitlets.HasDescriptors:\n",
 444 |       "     |  \n",
 445 |       "     |  __new__(*args: 't.Any', **kwargs: 't.Any') -> 't.Any'\n",
 446 |       "     |      Create and return a new object.  See help(type) for accurate signature.\n",
 447 |       "     |  \n",
 448 |       "     |  ----------------------------------------------------------------------\n",
 449 |       "     |  Data descriptors inherited from traitlets.traitlets.HasDescriptors:\n",
 450 |       "     |  \n",
 451 |       "     |  __dict__\n",
 452 |       "     |      dictionary for instance variables (if defined)\n",
 453 |       "     |  \n",
 454 |       "     |  __weakref__\n",
 455 |       "     |      list of weak references to the object (if defined)\n",
 456 |       "\n",
 457 |       "FUNCTIONS\n",
 458 |       "    load_ipython_extension(ipython)\n",
 459 |       "        Any module file that define a function named `load_ipython_extension`\n",
 460 |       "        can be loaded via `%load_ext module.path` or be configured to be\n",
 461 |       "        autoloaded by IPython at startup time.\n",
 462 |       "\n",
 463 |       "VERSION\n",
 464 |       "    1.8.2\n",
 465 |       "\n",
 466 |       "FILE\n",
 467 |       "    /home/ian/miniconda3/envs/coursehpp/lib/python3.11/site-packages/ipython_memory_usage/__init__.py\n",
 468 |       "\n",
 469 |       "\n"
 470 |      ]
 471 |     }
 472 |    ],
 473 |    "source": [
 474 |     "import ipython_memory_usage\n",
 475 |     "help(ipython_memory_usage) # or ipython_memory_usage?"
 476 |    ]
 477 |   },
 478 |   {
 479 |    "cell_type": "code",
 480 |    "execution_count": 2,
 481 |    "id": "3faa440f",
 482 |    "metadata": {},
 483 |    "outputs": [
 484 |     {
 485 |      "name": "stdout",
 486 |      "output_type": "stream",
 487 |      "text": [
 488 |       "Enabling IPython Memory Usage, use %imu_start to begin, %imu_stop to end\n"
 489 |      ]
 490 |     },
 491 |     {
 492 |      "data": {
 493 |       "text/plain": [
 494 |        "'IPython Memory Usage started'"
 495 |       ]
 496 |      },
 497 |      "execution_count": 2,
 498 |      "metadata": {},
 499 |      "output_type": "execute_result"
 500 |     },
 501 |     {
 502 |      "name": "stdout",
 503 |      "output_type": "stream",
 504 |      "text": [
 505 |       "In [2] used 0.9 MiB RAM in 0.13s (system mean cpu 0%, single max cpu 0%), peaked 0.0 MiB above final usage, current RAM usage now 66.1 MiB\n"
 506 |      ]
 507 |     }
 508 |    ],
 509 |    "source": [
 510 |     "%load_ext ipython_memory_usage\n",
 511 |     "%imu_start"
 512 |    ]
 513 |   },
 514 |   {
 515 |    "cell_type": "markdown",
 516 |    "id": "fc9ce9d2",
 517 |    "metadata": {},
 518 |    "source": [
 519 |     "# Importing packages uses some RAM"
 520 |    ]
 521 |   },
 522 |   {
 523 |    "cell_type": "code",
 524 |    "execution_count": 3,
 525 |    "id": "f65a2110",
 526 |    "metadata": {},
 527 |    "outputs": [
 528 |     {
 529 |      "name": "stdout",
 530 |      "output_type": "stream",
 531 |      "text": [
 532 |       "In [3] used 15.3 MiB RAM in 0.19s (system mean cpu 0%, single max cpu 0%), peaked 0.0 MiB above final usage, current RAM usage now 81.4 MiB\n"
 533 |      ]
 534 |     }
 535 |    ],
 536 |    "source": [
 537 |     "import numpy as np # note that importing a package will increase total RAM usage a little"
 538 |    ]
 539 |   },
 540 |   {
 541 |    "cell_type": "code",
 542 |    "execution_count": 4,
 543 |    "id": "78d9ac64",
 544 |    "metadata": {},
 545 |    "outputs": [
 546 |     {
 547 |      "name": "stdout",
 548 |      "output_type": "stream",
 549 |      "text": [
 550 |       "In [4] used 54.1 MiB RAM in 0.39s (system mean cpu 8%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 135.5 MiB\n"
 551 |      ]
 552 |     }
 553 |    ],
 554 |    "source": [
 555 |     "import pandas as pd # note that importing Pandas uses more RAM than importing numpy"
 556 |    ]
 557 |   },
 558 |   {
 559 |    "cell_type": "code",
 560 |    "execution_count": 5,
 561 |    "id": "84ca123c",
 562 |    "metadata": {},
 563 |    "outputs": [
 564 |     {
 565 |      "name": "stdout",
 566 |      "output_type": "stream",
 567 |      "text": [
 568 |       "In [5] used 0.0 MiB RAM in 0.10s (system mean cpu 7%, single max cpu 27%), peaked 0.0 MiB above final usage, current RAM usage now 135.5 MiB\n"
 569 |      ]
 570 |     }
 571 |    ],
 572 |    "source": [
 573 |     "import string"
 574 |    ]
 575 |   },
 576 |   {
 577 |    "cell_type": "markdown",
 578 |    "id": "7fa3e401",
 579 |    "metadata": {},
 580 |    "source": [
 581 |     "# Making a large array uses a predictable amount of RAM"
 582 |    ]
 583 |   },
 584 |   {
 585 |    "cell_type": "code",
 586 |    "execution_count": 6,
 587 |    "id": "9663af97",
 588 |    "metadata": {},
 589 |    "outputs": [
 590 |     {
 591 |      "name": "stdout",
 592 |      "output_type": "stream",
 593 |      "text": [
 594 |       "In [6] used 763.2 MiB RAM in 0.28s (system mean cpu 8%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 898.7 MiB\n"
 595 |      ]
 596 |     }
 597 |    ],
 598 |    "source": [
 599 |     "# if we make a big array - 100M items * 8 byte floats, this cell\n",
 600 |     "# uses circa 800MB (often 760 MiB - note mibi-bytes as used in the underlying memory_profiler tool)\n",
 601 |     "# The total RAM usage grows by roughly this amount\n",
 602 |     "arr = np.ones(100_000_000) "
 603 |    ]
 604 |   },
 605 |   {
 606 |    "cell_type": "code",
 607 |    "execution_count": 7,
 608 |    "id": "0d23b888",
 609 |    "metadata": {},
 610 |    "outputs": [
 611 |     {
 612 |      "name": "stdout",
 613 |      "output_type": "stream",
 614 |      "text": [
 615 |       "In [7] used -762.9 MiB RAM in 0.10s (system mean cpu 8%, single max cpu 20%), peaked 762.9 MiB above final usage, current RAM usage now 135.7 MiB\n"
 616 |      ]
 617 |     }
 618 |    ],
 619 |    "source": [
 620 |     "# deleting arr reduces RAM usage by roughly the expected amount and\n",
 621 |     "# total RAM usage should drop back down\n",
 622 |     "del arr"
 623 |    ]
 624 |   },
 625 |   {
 626 |    "cell_type": "code",
 627 |    "execution_count": 8,
 628 |    "id": "5d3b516f",
 629 |    "metadata": {},
 630 |    "outputs": [
 631 |     {
 632 |      "name": "stdout",
 633 |      "output_type": "stream",
 634 |      "text": [
 635 |       "In [8] used 763.0 MiB RAM in 0.27s (system mean cpu 8%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 898.8 MiB\n"
 636 |      ]
 637 |     }
 638 |    ],
 639 |    "source": [
 640 |     "# if we make it again, RAM usage goes up again\n",
 641 |     "arr = np.ones(100_000_000) "
 642 |    ]
 643 |   },
 644 |   {
 645 |    "cell_type": "code",
 646 |    "execution_count": 9,
 647 |    "id": "c1bbaff3",
 648 |    "metadata": {},
 649 |    "outputs": [
 650 |     {
 651 |      "name": "stdout",
 652 |      "output_type": "stream",
 653 |      "text": [
 654 |       "In [9] used -762.9 MiB RAM in 0.10s (system mean cpu 5%, single max cpu 20%), peaked 762.9 MiB above final usage, current RAM usage now 135.8 MiB\n"
 655 |      ]
 656 |     }
 657 |    ],
 658 |    "source": [
 659 |     "del arr"
 660 |    ]
 661 |   },
 662 |   {
 663 |    "cell_type": "markdown",
 664 |    "id": "3a733e50",
 665 |    "metadata": {},
 666 |    "source": [
 667 |     "# Making a big random array takes RAM + time"
 668 |    ]
 669 |   },
 670 |   {
 671 |    "cell_type": "code",
 672 |    "execution_count": 10,
 673 |    "id": "57518265",
 674 |    "metadata": {},
 675 |    "outputs": [
 676 |     {
 677 |      "name": "stdout",
 678 |      "output_type": "stream",
 679 |      "text": [
 680 |       "[ 1.11397493 -0.19617328  1.41230994  0.72710602  0.6042617 ] float64\n",
 681 |       "In [10] used 763.1 MiB RAM in 3.25s (system mean cpu 10%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 898.9 MiB\n"
 682 |      ]
 683 |     }
 684 |    ],
 685 |    "source": [
 686 |     "# creating random items takes some time, after \"used ... RAM\" note \"3s\" or so for several seconds\n",
 687 |     "arr = np.random.normal(size=100_000_000)\n",
 688 |     "print(arr[:5], arr.dtype)"
 689 |    ]
 690 |   },
 691 |   {
 692 |    "cell_type": "markdown",
 693 |    "id": "7ced0369",
 694 |    "metadata": {},
 695 |    "source": [
 696 |     "# Intermediate calculations can cost additional temporary RAM \n",
 697 |     "\n",
 698 |     "**NOTE** this section may work different if you're on Windows (if so - please report back to Ian by raising a bug and noting the difference.\n",
 699 |     "\n",
 700 |     "On some platforms, e.g. Linux as used here, temporary intermediates can be reused in-place reducing the overall memory allocation: https://docs.scipy.org/doc/numpy-1.13.0/release.html#highlights"
 701 |    ]
 702 |   },
 703 |   {
 704 |    "cell_type": "code",
 705 |    "execution_count": 11,
 706 |    "id": "84828ac9",
 707 |    "metadata": {},
 708 |    "outputs": [
 709 |     {
 710 |      "name": "stdout",
 711 |      "output_type": "stream",
 712 |      "text": [
 713 |       "In [11] used 0.1 MiB RAM in 0.10s (system mean cpu 5%, single max cpu 40%), peaked 0.0 MiB above final usage, current RAM usage now 899.0 MiB\n"
 714 |      ]
 715 |     }
 716 |    ],
 717 |    "source": [
 718 |     "pass"
 719 |    ]
 720 |   },
 721 |   {
 722 |    "cell_type": "code",
 723 |    "execution_count": 12,
 724 |    "id": "9c96ec84",
 725 |    "metadata": {},
 726 |    "outputs": [
 727 |     {
 728 |      "name": "stdout",
 729 |      "output_type": "stream",
 730 |      "text": [
 731 |       "In [12] used 763.6 MiB RAM in 0.76s (system mean cpu 10%, single max cpu 100%), peaked 762.9 MiB above final usage, current RAM usage now 1662.6 MiB\n"
 732 |      ]
 733 |     }
 734 |    ],
 735 |    "source": [
 736 |     "# (arr * 2) will allocate a new 762MiB array\n",
 737 |     "# (arr * 3) will also allocate another 762MiB (so +1.4GB in total)\n",
 738 |     "# the (arr * 2) array can be overwritten in-place for the division, so \n",
 739 |     "# a third temporary is _not_ needed\n",
 740 |     "# the final result (costing 762MiB) is assigned to arr_result\n",
 741 |     "# therefore we report \"used 762MiB\" at the end of the cell's execution\n",
 742 |     "# plus \"peaked 762MiB above final usage\" due to the second temporary,\n",
 743 |     "# so 1.4GB max was used during execution.\n",
 744 |     "arr_result = (arr * 2) / (arr * 3)"
 745 |    ]
 746 |   },
 747 |   {
 748 |    "cell_type": "code",
 749 |    "execution_count": 13,
 750 |    "id": "ccfd36b5",
 751 |    "metadata": {},
 752 |    "outputs": [
 753 |     {
 754 |      "name": "stdout",
 755 |      "output_type": "stream",
 756 |      "text": [
 757 |       "In [13] used -762.9 MiB RAM in 0.11s (system mean cpu 9%, single max cpu 27%), peaked 762.9 MiB above final usage, current RAM usage now 899.7 MiB\n"
 758 |      ]
 759 |     }
 760 |    ],
 761 |    "source": [
 762 |     "del arr"
 763 |    ]
 764 |   },
 765 |   {
 766 |    "cell_type": "code",
 767 |    "execution_count": 14,
 768 |    "id": "9d0937da",
 769 |    "metadata": {},
 770 |    "outputs": [
 771 |     {
 772 |      "name": "stdout",
 773 |      "output_type": "stream",
 774 |      "text": [
 775 |       "In [14] used -762.9 MiB RAM in 0.11s (system mean cpu 6%, single max cpu 30%), peaked 762.9 MiB above final usage, current RAM usage now 136.8 MiB\n"
 776 |      ]
 777 |     }
 778 |    ],
 779 |    "source": [
 780 |     "del arr_result"
 781 |    ]
 782 |   },
 783 |   {
 784 |    "cell_type": "markdown",
 785 |    "id": "e0f96940",
 786 |    "metadata": {},
 787 |    "source": [
 788 |     "# Pandas DataFrames can be costly on RAM"
 789 |    ]
 790 |   },
 791 |   {
 792 |    "cell_type": "markdown",
 793 |    "id": "9e6bbc35",
 794 |    "metadata": {},
 795 |    "source": [
 796 |     "## Example with deleting columns\n",
 797 |     "\n",
 798 |     "Props to Jamie Brunning for this example"
 799 |    ]
 800 |   },
 801 |   {
 802 |    "cell_type": "code",
 803 |    "execution_count": 15,
 804 |    "id": "f285be45",
 805 |    "metadata": {},
 806 |    "outputs": [
 807 |     {
 808 |      "name": "stdout",
 809 |      "output_type": "stream",
 810 |      "text": [
 811 |       "In [15] used 0.0 MiB RAM in 0.10s (system mean cpu 8%, single max cpu 36%), peaked 0.0 MiB above final usage, current RAM usage now 136.8 MiB\n"
 812 |      ]
 813 |     }
 814 |    ],
 815 |    "source": [
 816 |     "pass"
 817 |    ]
 818 |   },
 819 |   {
 820 |    "cell_type": "code",
 821 |    "execution_count": 16,
 822 |    "id": "0a365395",
 823 |    "metadata": {},
 824 |    "outputs": [
 825 |     {
 826 |      "name": "stdout",
 827 |      "output_type": "stream",
 828 |      "text": [
 829 |       "In [16] used 3051.8 MiB RAM in 12.30s (system mean cpu 9%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 3188.6 MiB\n"
 830 |      ]
 831 |     }
 832 |    ],
 833 |    "source": [
 834 |     "arr_several_cols = np.random.normal(size=(100_000_000, 4))"
 835 |    ]
 836 |   },
 837 |   {
 838 |    "cell_type": "code",
 839 |    "execution_count": 17,
 840 |    "id": "b1751039",
 841 |    "metadata": {},
 842 |    "outputs": [
 843 |     {
 844 |      "data": {
 845 |       "text/plain": [
 846 |        "(100000000, 4)"
 847 |       ]
 848 |      },
 849 |      "execution_count": 17,
 850 |      "metadata": {},
 851 |      "output_type": "execute_result"
 852 |     },
 853 |     {
 854 |      "name": "stdout",
 855 |      "output_type": "stream",
 856 |      "text": [
 857 |       "In [17] used 0.0 MiB RAM in 0.10s (system mean cpu 7%, single max cpu 44%), peaked 0.0 MiB above final usage, current RAM usage now 3188.6 MiB\n"
 858 |      ]
 859 |     }
 860 |    ],
 861 |    "source": [
 862 |     "arr_several_cols.shape"
 863 |    ]
 864 |   },
 865 |   {
 866 |    "cell_type": "code",
 867 |    "execution_count": 18,
 868 |    "id": "27cb29ed",
 869 |    "metadata": {},
 870 |    "outputs": [
 871 |     {
 872 |      "data": {
 873 |       "text/plain": [
 874 |        "'Cost per column 800,000,000 bytes'"
 875 |       ]
 876 |      },
 877 |      "execution_count": 18,
 878 |      "metadata": {},
 879 |      "output_type": "execute_result"
 880 |     },
 881 |     {
 882 |      "name": "stdout",
 883 |      "output_type": "stream",
 884 |      "text": [
 885 |       "In [18] used 0.0 MiB RAM in 0.10s (system mean cpu 10%, single max cpu 50%), peaked 0.0 MiB above final usage, current RAM usage now 3188.6 MiB\n"
 886 |      ]
 887 |     }
 888 |    ],
 889 |    "source": [
 890 |     "f\"Cost per column {int(arr_several_cols.data.nbytes / arr_several_cols.shape[1]):,} bytes\""
 891 |    ]
 892 |   },
 893 |   {
 894 |    "cell_type": "code",
 895 |    "execution_count": 19,
 896 |    "id": "cb36e2b6",
 897 |    "metadata": {},
 898 |    "outputs": [
 899 |     {
 900 |      "name": "stdout",
 901 |      "output_type": "stream",
 902 |      "text": [
 903 |       "<class 'pandas.core.frame.DataFrame'>\n",
 904 |       "RangeIndex: 100000000 entries, 0 to 99999999\n",
 905 |       "Data columns (total 4 columns):\n",
 906 |       " #   Column  Dtype  \n",
 907 |       "---  ------  -----  \n",
 908 |       " 0   a       float64\n",
 909 |       " 1   b       float64\n",
 910 |       " 2   c       float64\n",
 911 |       " 3   d       float64\n",
 912 |       "dtypes: float64(4)\n",
 913 |       "memory usage: 3.0 GB\n",
 914 |       "In [19] used 2.1 MiB RAM in 0.11s (system mean cpu 8%, single max cpu 33%), peaked 0.0 MiB above final usage, current RAM usage now 3190.7 MiB\n"
 915 |      ]
 916 |     }
 917 |    ],
 918 |    "source": [
 919 |     "# The DataFrame in this case is a thin wrapper over the numpy array\n",
 920 |     "# and costs little extra RAM\n",
 921 |     "df = pd.DataFrame(arr_several_cols, columns=list(string.ascii_lowercase)[:arr_several_cols.shape[1]])\n",
 922 |     "df.info()"
 923 |    ]
 924 |   },
 925 |   {
 926 |    "cell_type": "code",
 927 |    "execution_count": 20,
 928 |    "id": "4579f39b",
 929 |    "metadata": {},
 930 |    "outputs": [
 931 |     {
 932 |      "name": "stdout",
 933 |      "output_type": "stream",
 934 |      "text": [
 935 |       "In [20] used 0.0 MiB RAM in 0.10s (system mean cpu 9%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 3190.7 MiB\n"
 936 |      ]
 937 |     }
 938 |    ],
 939 |    "source": [
 940 |     "# use Jupyter's xdel to remove all references of our expensive array, just in case\n",
 941 |     "# (but not in this case) it is also referred to in an Out[] history item\n",
 942 |     "%xdel arr_several_cols"
 943 |    ]
 944 |   },
 945 |   {
 946 |    "cell_type": "code",
 947 |    "execution_count": 21,
 948 |    "id": "a42d2330",
 949 |    "metadata": {},
 950 |    "outputs": [
 951 |     {
 952 |      "name": "stdout",
 953 |      "output_type": "stream",
 954 |      "text": [
 955 |       "<class 'pandas.core.frame.DataFrame'>\n",
 956 |       "RangeIndex: 100000000 entries, 0 to 99999999\n",
 957 |       "Data columns (total 4 columns):\n",
 958 |       " #   Column  Dtype  \n",
 959 |       "---  ------  -----  \n",
 960 |       " 0   a       float64\n",
 961 |       " 1   b       float64\n",
 962 |       " 2   c       float64\n",
 963 |       " 3   d       float64\n",
 964 |       "dtypes: float64(4)\n",
 965 |       "memory usage: 3.0 GB\n",
 966 |       "In [21] used 0.0 MiB RAM in 0.11s (system mean cpu 11%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 3190.7 MiB\n"
 967 |      ]
 968 |     }
 969 |    ],
 970 |    "source": [
 971 |     "df.info()"
 972 |    ]
 973 |   },
 974 |   {
 975 |    "cell_type": "code",
 976 |    "execution_count": 22,
 977 |    "id": "810fe2a7",
 978 |    "metadata": {},
 979 |    "outputs": [
 980 |     {
 981 |      "name": "stdout",
 982 |      "output_type": "stream",
 983 |      "text": [
 984 |       "In [22] used 0.0 MiB RAM in 0.10s (system mean cpu 9%, single max cpu 30%), peaked 0.0 MiB above final usage, current RAM usage now 3190.7 MiB\n"
 985 |      ]
 986 |     }
 987 |    ],
 988 |    "source": [
 989 |     "# deleting a column \n",
 990 |     "# note that no RAM is freed up!\n",
 991 |     "del df['a']"
 992 |    ]
 993 |   },
 994 |   {
 995 |    "cell_type": "code",
 996 |    "execution_count": 23,
 997 |    "id": "a10f1792-9bbe-4ced-85cf-09fa566a055b",
 998 |    "metadata": {},
 999 |    "outputs": [
1000 |     {
1001 |      "name": "stdout",
1002 |      "output_type": "stream",
1003 |      "text": [
1004 |       "<class 'pandas.core.frame.DataFrame'>\n",
1005 |       "RangeIndex: 100000000 entries, 0 to 99999999\n",
1006 |       "Data columns (total 3 columns):\n",
1007 |       " #   Column  Dtype  \n",
1008 |       "---  ------  -----  \n",
1009 |       " 0   b       float64\n",
1010 |       " 1   c       float64\n",
1011 |       " 2   d       float64\n",
1012 |       "dtypes: float64(3)\n",
1013 |       "memory usage: 2.2 GB\n",
1014 |       "In [23] used 0.0 MiB RAM in 0.11s (system mean cpu 12%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 3190.7 MiB\n"
1015 |      ]
1016 |     }
1017 |    ],
1018 |    "source": [
1019 |     "df.info()"
1020 |    ]
1021 |   },
1022 |   {
1023 |    "cell_type": "code",
1024 |    "execution_count": 24,
1025 |    "id": "e3f87a79",
1026 |    "metadata": {},
1027 |    "outputs": [
1028 |     {
1029 |      "data": {
1030 |       "text/plain": [
1031 |        "0"
1032 |       ]
1033 |      },
1034 |      "execution_count": 24,
1035 |      "metadata": {},
1036 |      "output_type": "execute_result"
1037 |     },
1038 |     {
1039 |      "name": "stdout",
1040 |      "output_type": "stream",
1041 |      "text": [
1042 |       "In [24] used 0.0 MiB RAM in 0.14s (system mean cpu 11%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 3190.7 MiB\n"
1043 |      ]
1044 |     }
1045 |    ],
1046 |    "source": [
1047 |     "# we get no benefit by forcing a collection\n",
1048 |     "import gc\n",
1049 |     "gc.collect()"
1050 |    ]
1051 |   },
1052 |   {
1053 |    "cell_type": "code",
1054 |    "execution_count": 25,
1055 |    "id": "0f33f9d6",
1056 |    "metadata": {},
1057 |    "outputs": [
1058 |     {
1059 |      "name": "stdout",
1060 |      "output_type": "stream",
1061 |      "text": [
1062 |       "<class 'pandas.core.frame.DataFrame'>\n",
1063 |       "RangeIndex: 100000000 entries, 0 to 99999999\n",
1064 |       "Data columns (total 3 columns):\n",
1065 |       " #   Column  Dtype  \n",
1066 |       "---  ------  -----  \n",
1067 |       " 0   b       float64\n",
1068 |       " 1   c       float64\n",
1069 |       " 2   d       float64\n",
1070 |       "dtypes: float64(3)\n",
1071 |       "memory usage: 2.2 GB\n",
1072 |       "In [25] used 0.0 MiB RAM in 0.11s (system mean cpu 12%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 3190.7 MiB\n"
1073 |      ]
1074 |     }
1075 |    ],
1076 |    "source": [
1077 |     "df.info()"
1078 |    ]
1079 |   },
1080 |   {
1081 |    "cell_type": "code",
1082 |    "execution_count": 26,
1083 |    "id": "b20a4003",
1084 |    "metadata": {},
1085 |    "outputs": [
1086 |     {
1087 |      "name": "stdout",
1088 |      "output_type": "stream",
1089 |      "text": [
1090 |       "In [26] used 0.0 MiB RAM in 0.10s (system mean cpu 11%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 3190.7 MiB\n"
1091 |      ]
1092 |     }
1093 |    ],
1094 |    "source": [
1095 |     "pass"
1096 |    ]
1097 |   },
1098 |   {
1099 |    "cell_type": "code",
1100 |    "execution_count": 27,
1101 |    "id": "5c2dfe69",
1102 |    "metadata": {},
1103 |    "outputs": [
1104 |     {
1105 |      "name": "stdout",
1106 |      "output_type": "stream",
1107 |      "text": [
1108 |       "In [27] used -1525.7 MiB RAM in 1.00s (system mean cpu 17%, single max cpu 100%), peaked 2937.5 MiB above final usage, current RAM usage now 1665.0 MiB\n"
1109 |      ]
1110 |     }
1111 |    ],
1112 |    "source": [
1113 |     "# using drop with inplace=False (the default) returns a copied DataFrame, if you don't use\n",
1114 |     "# this then maybe you end up with multiple DataFrames consuming RAM in a confusing fashion\n",
1115 |     "# e.g. you might have done `df2 = df.drop...` and then you've got the unmodified original\n",
1116 |     "# plus the modified df2 in the local namespace\n",
1117 |     "# We see total RAM usage drop by circa 800MB, the cost of 1 column, plus the other column (a)\n",
1118 |     "# maybe the usage of drop forces a flush on any internal caching in pandas?\n",
1119 |     "df = df.drop(columns=['b'])"
1120 |    ]
1121 |   },
1122 |   {
1123 |    "cell_type": "code",
1124 |    "execution_count": 28,
1125 |    "id": "e8fa1677",
1126 |    "metadata": {},
1127 |    "outputs": [
1128 |     {
1129 |      "name": "stdout",
1130 |      "output_type": "stream",
1131 |      "text": [
1132 |       "<class 'pandas.core.frame.DataFrame'>\n",
1133 |       "RangeIndex: 100000000 entries, 0 to 99999999\n",
1134 |       "Data columns (total 2 columns):\n",
1135 |       " #   Column  Dtype  \n",
1136 |       "---  ------  -----  \n",
1137 |       " 0   c       float64\n",
1138 |       " 1   d       float64\n",
1139 |       "dtypes: float64(2)\n",
1140 |       "memory usage: 1.5 GB\n",
1141 |       "In [28] used 0.0 MiB RAM in 0.11s (system mean cpu 11%, single max cpu 100%), peaked 0.0 MiB above final usage, current RAM usage now 1665.0 MiB\n"
1142 |      ]
1143 |     }
1144 |    ],
1145 |    "source": [
1146 |     "df.info()"
1147 |    ]
1148 |   },
1149 |   {
1150 |    "cell_type": "code",
1151 |    "execution_count": 29,
1152 |    "id": "c5891371",
1153 |    "metadata": {},
1154 |    "outputs": [
1155 |     {
1156 |      "name": "stdout",
1157 |      "output_type": "stream",
1158 |      "text": [
1159 |       "In [29] used -762.9 MiB RAM in 0.44s (system mean cpu 14%, single max cpu 80%), peaked 762.9 MiB above final usage, current RAM usage now 902.1 MiB\n"
1160 |      ]
1161 |     }
1162 |    ],
1163 |    "source": [
1164 |     "# dropping in-place is probably more sensible, we recover another circa 800MB\n",
1165 |     "df.drop(columns=['c'], inplace=True)"
1166 |    ]
1167 |   },
1168 |   {
1169 |    "cell_type": "code",
1170 |    "execution_count": 30,
1171 |    "id": "2c0453e8",
1172 |    "metadata": {},
1173 |    "outputs": [
1174 |     {
1175 |      "name": "stdout",
1176 |      "output_type": "stream",
1177 |      "text": [
1178 |       "<class 'pandas.core.frame.DataFrame'>\n",
1179 |       "RangeIndex: 100000000 entries, 0 to 99999999\n",
1180 |       "Data columns (total 1 columns):\n",
1181 |       " #   Column  Dtype  \n",
1182 |       "---  ------  -----  \n",
1183 |       " 0   d       float64\n",
1184 |       "dtypes: float64(1)\n",
1185 |       "memory usage: 762.9 MB\n",
1186 |       "In [30] used 0.0 MiB RAM in 0.11s (system mean cpu 0%, single max cpu 0%), peaked 0.0 MiB above final usage, current RAM usage now 902.1 MiB\n"
1187 |      ]
1188 |     }
1189 |    ],
1190 |    "source": [
1191 |     "df.info()"
1192 |    ]
1193 |   },
1194 |   {
1195 |    "cell_type": "code",
1196 |    "execution_count": 31,
1197 |    "id": "50ba3e81",
1198 |    "metadata": {},
1199 |    "outputs": [
1200 |     {
1201 |      "name": "stdout",
1202 |      "output_type": "stream",
1203 |      "text": [
1204 |       "In [31] used 0.0 MiB RAM in 0.10s (system mean cpu 0%, single max cpu 0%), peaked 0.0 MiB above final usage, current RAM usage now 902.1 MiB\n"
1205 |      ]
1206 |     }
1207 |    ],
1208 |    "source": [
1209 |     "pass"
1210 |    ]
1211 |   },
1212 |   {
1213 |    "cell_type": "code",
1214 |    "execution_count": 32,
1215 |    "id": "59e0c880",
1216 |    "metadata": {},
1217 |    "outputs": [
1218 |     {
1219 |      "name": "stdout",
1220 |      "output_type": "stream",
1221 |      "text": [
1222 |       "In [32] used -762.9 MiB RAM in 0.11s (system mean cpu 0%, single max cpu 0%), peaked 0.0 MiB above final usage, current RAM usage now 139.1 MiB\n"
1223 |      ]
1224 |     }
1225 |    ],
1226 |    "source": [
1227 |     "# now we get back to where we were before we made the DataFrame and the array\n",
1228 |     "df.drop(columns=['d'], inplace=True)"
1229 |    ]
1230 |   },
1231 |   {
1232 |    "cell_type": "markdown",
1233 |    "id": "31fed661",
1234 |    "metadata": {},
1235 |    "source": [
1236 |     "# Diagnostics\n",
1237 |     "\n",
1238 |     "`%xdel my_df` will delete all references of `my_df` from the namespace including those in the Out[] history buffer, this does more cleaning than just using `del my_df`.\n",
1239 |     "\n",
1240 |     "`%reset` will reset all variables and imported modules, it is like starting a new kernel."
1241 |    ]
1242 |   },
1243 |   {
1244 |    "cell_type": "code",
1245 |    "execution_count": 33,
1246 |    "id": "5a6a2f31",
1247 |    "metadata": {},
1248 |    "outputs": [
1249 |     {
1250 |      "name": "stdout",
1251 |      "output_type": "stream",
1252 |      "text": [
1253 |       "Variable               Type         Data/Info\n",
1254 |       "---------------------------------------------\n",
1255 |       "df                     DataFrame    Empty DataFrame\\nColumns:<...>0000000 rows x 0 columns]\n",
1256 |       "gc                     module       <module 'gc' (built-in)>\n",
1257 |       "ipython_memory_usage   module       <module 'ipython_memory_u<...>emory_usage/__init__.py'>\n",
1258 |       "np                     module       <module 'numpy' from '/ho<...>kages/numpy/__init__.py'>\n",
1259 |       "pd                     module       <module 'pandas' from '/h<...>ages/pandas/__init__.py'>\n",
1260 |       "string                 module       <module 'string' from '/h<...>ib/python3.11/string.py'>\n",
1261 |       "In [33] used 0.0 MiB RAM in 0.10s (system mean cpu 6%, single max cpu 22%), peaked 0.0 MiB above final usage, current RAM usage now 139.1 MiB\n"
1262 |      ]
1263 |     }
1264 |    ],
1265 |    "source": [
1266 |     "# %whos shows what's in the local namespace\n",
1267 |     "%whos"
1268 |    ]
1269 |   },
1270 |   {
1271 |    "cell_type": "code",
1272 |    "execution_count": 34,
1273 |    "id": "542391ed",
1274 |    "metadata": {},
1275 |    "outputs": [
1276 |     {
1277 |      "name": "stdout",
1278 |      "output_type": "stream",
1279 |      "text": [
1280 |       "In [34] used 0.0 MiB RAM in 0.10s (system mean cpu 5%, single max cpu 25%), peaked 0.0 MiB above final usage, current RAM usage now 139.1 MiB\n"
1281 |      ]
1282 |     }
1283 |    ],
1284 |    "source": [
1285 |     "# we can use %xdel to safely remove all references including those that might be (but not in this case)\n",
1286 |     "# in the Out[] history buffer\n",
1287 |     "%xdel df"
1288 |    ]
1289 |   },
1290 |   {
1291 |    "cell_type": "code",
1292 |    "execution_count": 35,
1293 |    "id": "3b1fb436",
1294 |    "metadata": {},
1295 |    "outputs": [
1296 |     {
1297 |      "name": "stdout",
1298 |      "output_type": "stream",
1299 |      "text": [
1300 |       "In [35] used 0.0 MiB RAM in 0.10s (system mean cpu 6%, single max cpu 20%), peaked 0.0 MiB above final usage, current RAM usage now 139.1 MiB\n"
1301 |      ]
1302 |     }
1303 |    ],
1304 |    "source": [
1305 |     "#%imu_stop\n",
1306 |     "#'IPython Memory Usage stopped'"
1307 |    ]
1308 |   },
1309 |   {
1310 |    "cell_type": "code",
1311 |    "execution_count": null,
1312 |    "id": "0ba1c17a",
1313 |    "metadata": {},
1314 |    "outputs": [],
1315 |    "source": []
1316 |   }
1317 |  ],
1318 |  "metadata": {
1319 |   "kernelspec": {
1320 |    "display_name": "Python 3 (ipykernel)",
1321 |    "language": "python",
1322 |    "name": "python3"
1323 |   },
1324 |   "language_info": {
1325 |    "codemirror_mode": {
1326 |     "name": "ipython",
1327 |     "version": 3
1328 |    },
1329 |    "file_extension": ".py",
1330 |    "mimetype": "text/x-python",
1331 |    "name": "python",
1332 |    "nbconvert_exporter": "python",
1333 |    "pygments_lexer": "ipython3",
1334 |    "version": "3.11.5"
1335 |   }
1336 |  },
1337 |  "nbformat": 4,
1338 |  "nbformat_minor": 5
1339 | }
1340 | 


--------------------------------------------------------------------------------